From: Roger Shimizu Date: Mon, 27 Jun 2022 17:04:55 +0000 (+0100) Subject: Import android-platform-external-boringssl_13~preview2-7.debian.tar.xz X-Git-Tag: archive/raspbian/13_preview2-7+rpi1~1^2~4^2 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=4bdab0ea5e40f14a8693a85ec9103a9390a3a95a;p=android-platform-external-boringssl.git Import android-platform-external-boringssl_13~preview2-7.debian.tar.xz [dgit import tarball android-platform-external-boringssl 13~preview2-7 android-platform-external-boringssl_13~preview2-7.debian.tar.xz] --- 4bdab0ea5e40f14a8693a85ec9103a9390a3a95a diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 0000000..514c702 --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,14 @@ +pages: + image: registry.salsa.debian.org/salsa-ci-team/ci-image-git-buildpackage + stage: deploy + artifacts: + paths: + - public + expire_in: 1 day + except: + - tags + - pristine-tar + - upstream + script: + - gitlab-ci-git-buildpackage-all + - gitlab-ci-aptly diff --git a/README.Debian b/README.Debian new file mode 100644 index 0000000..fd5a11c --- /dev/null +++ b/README.Debian @@ -0,0 +1,21 @@ +This library is a version of Google's internal OpenSSL fork called +BoringSSL. This is not the official BoringSSL release, it is the +version of BoringSSL that is maintained as part of the Android SDK and +OS. These packages should never be used for anything but the parts of +the Android SDK that require them. + +We package these chunks separately because we believe it makes it +easier to maintain. Security updates can happen only in the +particular package rather than having to build the whole Android SDK +together as one giant source tree. + +Upstream is already good at providing security fixes for all of the +various bits, and they maintain quite a few stable releases in +parallel. Security maintenance for the Android SDK packages will +mostly be a matter of just including any new patch versions +(i.e. 8.1.0_r14 vs 8.1.0_r15). + +For more info: +https://lists.debian.org/debian-security/2016/05/msg00038.html + + -- Hans-Christoph Steiner , Wed, 28 Feb 2018 11:51:35 +0100 diff --git a/android-boringssl.docs b/android-boringssl.docs new file mode 100644 index 0000000..a67d2fc --- /dev/null +++ b/android-boringssl.docs @@ -0,0 +1,3 @@ +NOTICE +src/INCORPORATING.md +src/README.md diff --git a/android-boringssl.install b/android-boringssl.install new file mode 100644 index 0000000..07836ed --- /dev/null +++ b/android-boringssl.install @@ -0,0 +1 @@ +debian/out/bssl-tool usr/bin diff --git a/android-boringssl.lintian-overrides b/android-boringssl.lintian-overrides new file mode 100644 index 0000000..7f810ab --- /dev/null +++ b/android-boringssl.lintian-overrides @@ -0,0 +1,12 @@ +# Executables from the Android SDK normally each staticly link in all +# of the libs. That means each executable includes a full copy of all +# the libs, which is not how Debian works. These libs need to be +# dynamically linlked in so that just the library can be updated when +# there is a security fix, instead of requiring all of the executables +# be recompiled. That said, these libraries do not have a stable +# interface and were never intended to be used by any other project, +# only with the internal, Android SDK executables. Therefore, the +# Android SDK executables use private libraries located in +# /usr/lib/${DEB_HOST_MULTIARCH}/android + +custom-library-search-path diff --git a/android-libboringssl-dev.install b/android-libboringssl-dev.install new file mode 100755 index 0000000..0777c48 --- /dev/null +++ b/android-libboringssl-dev.install @@ -0,0 +1,5 @@ +#!/usr/bin/dh-exec + +src/include/openssl usr/include/android +debian/out/libcrypto.so usr/lib/${DEB_HOST_MULTIARCH}/android +debian/out/libssl.so usr/lib/${DEB_HOST_MULTIARCH}/android diff --git a/android-libboringssl.docs b/android-libboringssl.docs new file mode 100644 index 0000000..b41e947 --- /dev/null +++ b/android-libboringssl.docs @@ -0,0 +1,3 @@ +NOTICE +src/INCORPORATING.md +src/README.md \ No newline at end of file diff --git a/android-libboringssl.install b/android-libboringssl.install new file mode 100755 index 0000000..ff8628b --- /dev/null +++ b/android-libboringssl.install @@ -0,0 +1,4 @@ +#!/usr/bin/dh-exec + +debian/out/libcrypto.so.* usr/lib/${DEB_HOST_MULTIARCH}/android +debian/out/libssl.so.* usr/lib/${DEB_HOST_MULTIARCH}/android diff --git a/android-libboringssl.lintian-overrides b/android-libboringssl.lintian-overrides new file mode 100644 index 0000000..7f810ab --- /dev/null +++ b/android-libboringssl.lintian-overrides @@ -0,0 +1,12 @@ +# Executables from the Android SDK normally each staticly link in all +# of the libs. That means each executable includes a full copy of all +# the libs, which is not how Debian works. These libs need to be +# dynamically linlked in so that just the library can be updated when +# there is a security fix, instead of requiring all of the executables +# be recompiled. That said, these libraries do not have a stable +# interface and were never intended to be used by any other project, +# only with the internal, Android SDK executables. Therefore, the +# Android SDK executables use private libraries located in +# /usr/lib/${DEB_HOST_MULTIARCH}/android + +custom-library-search-path diff --git a/changelog b/changelog new file mode 100644 index 0000000..7a31a6e --- /dev/null +++ b/changelog @@ -0,0 +1,228 @@ +android-platform-external-boringssl (13~preview2-7) unstable; urgency=medium + + * Team upload. + * [again] Use lld as linker on available platforms. + + -- Roger Shimizu Tue, 28 Jun 2022 02:04:55 +0900 + +android-platform-external-boringssl (13~preview2-6) unstable; urgency=medium + + * Team upload. + * Use lld as linker on available platforms. + * debian/patches/0[12]: Add patch description. + * d/source/lintian-overrides: Adapt new rule to source filename. + * Add debian/upstream/metadata. + + -- Roger Shimizu Mon, 27 Jun 2022 19:38:58 +0900 + +android-platform-external-boringssl (13~preview2-5) unstable; urgency=medium + + * Team upload. + * debian/*.mk: Fix ftbfs for mips*el. + + -- Roger Shimizu Sun, 19 Jun 2022 02:14:20 +0900 + +android-platform-external-boringssl (13~preview2-4) unstable; urgency=medium + + * Team upload. + * Add patch from upstream tag platform-tools-33.0.1. + * Move -pie from debian/rules to debian/*.mk executable build. + * [ubuntu] debian/rules: ignore dh_dwz error. + * Use lld as linker when available. + + -- Roger Shimizu Sun, 19 Jun 2022 00:21:10 +0900 + +android-platform-external-boringssl (13~preview2-3) unstable; urgency=medium + + * Team upload. + * d/rules: Move common CPPFLAGS from d/*.mk to d/rules + * d/control: Move android-libboringssl-dev from Architecture: all to + each arch being supported. + + -- Roger Shimizu Mon, 13 Jun 2022 00:52:06 +0900 + +android-platform-external-boringssl (13~preview2-2) unstable; urgency=medium + + * Team upload. + * debian/*.mk: Fix ftbfs for armel. + + -- Roger Shimizu Tue, 07 Jun 2022 18:35:21 +0900 + +android-platform-external-boringssl (13~preview2-1) unstable; urgency=medium + + * Team upload. + * debian/*.mk: Using the "gnu11" variant means we don't need _XOPEN_SOURCE. + Additionally, using C11 makes the faster refcount implementation + available. This setting is from upstream. + + -- Roger Shimizu Mon, 06 Jun 2022 21:09:55 +0900 + +android-platform-external-boringssl (13~preview2-1~exp1) unstable; urgency=medium + + * Team upload. + * New upstream version 13~preview2 + * debian/patches: Refresh patches. + * debian/sources.mk: Update by script. + + -- Roger Shimizu Wed, 01 Jun 2022 04:00:31 +0900 + +android-platform-external-boringssl (12.1.0+r5-2) unstable; urgency=medium + + * Team upload. + * debian/tests/control: Limit architecture. + + -- Roger Shimizu Tue, 31 May 2022 01:41:52 +0900 + +android-platform-external-boringssl (12.1.0+r5-1) unstable; urgency=medium + + * Team upload. + * Upload to unstable. + * debian/rules: Build and test only for -arch build. + + -- Roger Shimizu Mon, 30 May 2022 19:06:02 +0900 + +android-platform-external-boringssl (12.1.0+r5-1~exp10) experimental; urgency=medium + + * Team upload. + * debian/crypto_test.mk: Fallback to gcc for mips64el. + Thanks to Adrian Bunk for fixing this test for mips64el. + + -- Roger Shimizu Sun, 29 May 2022 16:50:06 +0900 + +android-platform-external-boringssl (12.1.0+r5-1~exp9) experimental; urgency=medium + + * Team upload. + * debian/control: Update Depends version. + * debian/tests/control: Add autopkgtest test. + + -- Roger Shimizu Sat, 28 May 2022 18:38:29 +0900 + +android-platform-external-boringssl (12.1.0+r5-1~exp8) experimental; urgency=medium + + * Team upload. + * debian/rules: + - Disable building bssl-tools for Hurd. + - Add -pie to LDFLAGS to enhance the hardening. + * debian/control: + - Add android-boringssl package to include the tool. + * debian/copyright & debian/source/lintian-overrides: + - Adapt with new upstream. + + -- Roger Shimizu Thu, 26 May 2022 01:00:02 +0900 + +android-platform-external-boringssl (12.1.0+r5-1~exp7) experimental; urgency=medium + + * Team upload. + * debian/tool_test.mk and debian/rules: + - Add bssl-tool to build. + * debian/rules: + - Still run failing test for mips64el, just ignore the result. + + -- Roger Shimizu Mon, 23 May 2022 21:01:59 +0900 + +android-platform-external-boringssl (12.1.0+r5-1~exp6) experimental; urgency=medium + + * Team upload. + * Disable crypto_test for mips64el temporarily. + * Split test_support as an independant library. + * d/rules: + - Make dependency driven makefile rules. + + -- Roger Shimizu Mon, 16 May 2022 23:23:41 +0900 + +android-platform-external-boringssl (12.1.0+r5-1~exp5) experimental; urgency=medium + + * Team upload. + * Update eureka.mk and source it in debian/*.mk + * d/{crypto,ssl}_test.mk: + - Link with atomic for armel. + * d/patches: + - Update 01 patch to fix x32. + + -- Roger Shimizu Mon, 16 May 2022 02:25:59 +0900 + +android-platform-external-boringssl (12.1.0+r5-1~exp4) experimental; urgency=medium + + * Team upload. + * debian/patches: + - Update patch to fix sh4 and x32. + * Add debian/{crypto,ssl}_test.mk to test built libraries. + * d/lib{crypto,ssl}.mk: + - Import source list from eureka.mk. + + -- Roger Shimizu Sun, 15 May 2022 19:24:19 +0900 + +android-platform-external-boringssl (12.1.0+r5-1~exp3) experimental; urgency=medium + + * Team upload. + * debian/control: Add all little endian Arch, to check the buildd + result. + + -- Roger Shimizu Sun, 15 May 2022 03:27:01 +0900 + +android-platform-external-boringssl (12.1.0+r5-1~exp2) experimental; urgency=medium + + * Team upload. + * Try to build on new Arch (little endian): ia64, riscv64, sh4, x32 + + -- Roger Shimizu Sun, 15 May 2022 01:59:06 +0900 + +android-platform-external-boringssl (12.1.0+r5-1~exp1) experimental; urgency=medium + + * New upstream version 12.1.0+r5 + * debian/control: + - Fix multiarch issues. + - Add ppc64el support. + * debian/rules: + - Use clang as default compiler. + + -- Roger Shimizu Sat, 14 May 2022 02:09:14 +0900 + +android-platform-external-boringssl (10.0.0+r36-2~exp1) experimental; urgency=medium + + * Team upload. + + [ Hans-Christoph Steiner ] + * gitlab-ci: exclude tags, pristine-tar, upstream + + [ Roger Shimizu ] + * debian/control: + - Add mips*el to build. + + -- Roger Shimizu Mon, 11 Jan 2021 03:31:07 +0900 + +android-platform-external-boringssl (10.0.0+r36-1) unstable; urgency=medium + + * Team upload + * New upstream version + * Upstream (10.0.0+r36) (Closes: #933865) + + -- Dhyey Patel Mon, 23 Nov 2020 12:14:17 +0100 + +android-platform-external-boringssl (8.1.0+r23-3) unstable; urgency=medium + + [ Kai-Chung Yan (殷啟聰) ] + * d/copyright: Refer to the Apache-2.0 in the commons-licenses + + [ Roger Shimizu ] + * d/watch: Update rule to get new upstream version + + [ Hans-Christoph Steiner ] + * fix adb crashes on startup on armhf (Closes: #933865) + + -- Hans-Christoph Steiner Thu, 08 Oct 2020 19:35:08 +0200 + +android-platform-external-boringssl (8.1.0+r23-2) unstable; urgency=medium + + * Update d/copyright: + * Cover all copyright holders (Closes: #905820) + * Point the Source to AOSP + * Standards-Version => 4.2.1 + + -- Kai-Chung Yan Fri, 21 Sep 2018 16:43:18 +0800 + +android-platform-external-boringssl (8.1.0+r23-1) unstable; urgency=medium + + * Initial release (Closes: #823933) + + -- Kai-Chung Yan Mon, 28 May 2018 19:53:05 +0200 diff --git a/clean b/clean new file mode 100644 index 0000000..92a7cdc --- /dev/null +++ b/clean @@ -0,0 +1 @@ +debian/out/ \ No newline at end of file diff --git a/compiler_test.mk b/compiler_test.mk new file mode 100644 index 0000000..889c2a0 --- /dev/null +++ b/compiler_test.mk @@ -0,0 +1,27 @@ +NAME = compiler_test + +SOURCES = \ + src/crypto/compiler_test.cc \ + src/crypto/test/test_util.cc \ + +OBJECTS = $(SOURCES:.cc=.o) + +CXXFLAGS += -std=gnu++2a +CPPFLAGS += \ + -Isrc/include \ + +LDFLAGS += \ + -lgtest \ + -lpthread \ + -pie + +ifneq ($(filter mipsel mips64el,$(DEB_HOST_ARCH)),) + LDFLAGS += -Wl,-z,notext +endif + +build: $(OBJECTS) /usr/lib/$(DEB_HOST_MULTIARCH)/libgtest_main.a + mkdir -p debian/out + $(CXX) $^ -o debian/out/$(NAME) $(LDFLAGS) + +$(OBJECTS): %.o: %.cc + $(CXX) -c -o $@ $< $(CXXFLAGS) $(CPPFLAGS) diff --git a/control b/control new file mode 100644 index 0000000..ce92bff --- /dev/null +++ b/control @@ -0,0 +1,89 @@ +Source: android-platform-external-boringssl +Section: libs +Priority: optional +Maintainer: Android Tools Maintainers +Uploaders: Kai-Chung Yan +Build-Depends: + clang [amd64 i386 armel armhf arm64 mipsel mips64el ppc64el riscv64], + debhelper-compat (= 12), + dh-exec, + libgtest-dev, + lld [amd64 i386 armel armhf arm64 mipsel mips64el ppc64el], +Standards-Version: 4.5.0 +Rules-Requires-Root: no +Vcs-Git: https://salsa.debian.org/android-tools-team/android-platform-external-boringssl.git +Vcs-Browser: https://salsa.debian.org/android-tools-team/android-platform-external-boringssl +Homepage: https://android.googlesource.com/platform/external/boringssl + +Package: android-libboringssl +Architecture: armel armhf arm64 amd64 i386 ppc64el mipsel mips64el hurd-i386 ia64 kfreebsd-amd64 kfreebsd-i386 riscv64 sh4 x32 +Multi-Arch: same +Depends: ${misc:Depends}, ${shlibs:Depends} +Description: Google's internal fork of OpenSSL for the Android SDK + The Android SDK builds against a static version of BoringSSL, + Google's internal fork of OpenSSL. This package should never be used + for anything but Android SDK packages that already depend on it. + . + BoringSSL arose because Google used OpenSSL for many years in various + ways and, over time, built up a large number of patches that were + maintained while tracking upstream OpenSSL. As Google’s product + portfolio became more complex, more copies of OpenSSL sprung up and + the effort involved in maintaining all these patches in multiple + places was growing steadily. + . + This is the Android AOSP fork of BoringSSL which is designed to be + used by Android and its SDK. BoringSSL is only ever statically linked + into apps, and pinned to a commit version. Upstream has no official + releases of BoringSSL on its own, so it must be included separately + for each project that uses it. + +Package: android-libboringssl-dev +Section: libdevel +Architecture: armel armhf arm64 amd64 i386 ppc64el mipsel mips64el hurd-i386 ia64 kfreebsd-amd64 kfreebsd-i386 riscv64 sh4 x32 +Multi-Arch: foreign +Depends: ${misc:Depends}, + android-libboringssl (= ${binary:Version}), +Description: Google's internal fork of OpenSSL for the Android SDK - devel + The Android SDK builds against a static version of BoringSSL, + Google's internal fork of OpenSSL. This package should never be used + for anything but Android SDK packages that already depend on it. + . + BoringSSL arose because Google used OpenSSL for many years in various + ways and, over time, built up a large number of patches that were + maintained while tracking upstream OpenSSL. As Google’s product + portfolio became more complex, more copies of OpenSSL sprung up and + the effort involved in maintaining all these patches in multiple + places was growing steadily. + . + This is the Android AOSP fork of BoringSSL which is designed to be + used by Android and its SDK. BoringSSL is only ever statically linked + into apps, and pinned to a commit version. Upstream has no official + releases of BoringSSL on its own, so it must be included separately + for each project that uses it. + . + This package contains the development files. + +Package: android-boringssl +Section: utils +Architecture: armel armhf arm64 amd64 i386 ppc64el mipsel mips64el ia64 kfreebsd-amd64 kfreebsd-i386 riscv64 sh4 x32 +Multi-Arch: foreign +Depends: ${misc:Depends}, ${shlibs:Depends} +Description: Google's internal fork of OpenSSL for the Android SDK - tool + The Android SDK builds against a static version of BoringSSL, + Google's internal fork of OpenSSL. This package should never be used + for anything but Android SDK packages that already depend on it. + . + BoringSSL arose because Google used OpenSSL for many years in various + ways and, over time, built up a large number of patches that were + maintained while tracking upstream OpenSSL. As Google’s product + portfolio became more complex, more copies of OpenSSL sprung up and + the effort involved in maintaining all these patches in multiple + places was growing steadily. + . + This is the Android AOSP fork of BoringSSL which is designed to be + used by Android and its SDK. BoringSSL is only ever statically linked + into apps, and pinned to a commit version. Upstream has no official + releases of BoringSSL on its own, so it must be included separately + for each project that uses it. + . + This package contains the boringssl command line tool. diff --git a/copyright b/copyright new file mode 100644 index 0000000..4d06b4b --- /dev/null +++ b/copyright @@ -0,0 +1,435 @@ +Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0 +Source: https://android.googlesource.com/platform/external/boringssl +Upstream-Name: BoringSSL + +Files: * +Copyright: 1998-2011, The OpenSSL Project +License: OpenSSL and SSLeay + +Files: src/crypto/asn1/* + src/crypto/base64/base64.c + src/crypto/bio/* + src/crypto/bn_extra/* + src/crypto/buf/* + src/crypto/cipher_extra/cipher_extra.c + src/crypto/cipher_extra/derive_key.c + src/crypto/cipher_extra/e_null.c + src/crypto/cipher_extra/e_rc2.c + src/crypto/cipher_extra/e_rc4.c + src/crypto/cipher_extra/internal.h + src/crypto/conf/conf.c + src/crypto/conf/conf_def.h + src/crypto/cpu-intel.c + src/crypto/digest_extra/digest_extra.c + src/crypto/dsa/dsa_test.cc + src/crypto/dsa/dsa.c + src/crypto/err/err.c + src/crypto/evp/evp_asn1.c + src/crypto/evp/evp_ctx.c + src/crypto/evp/evp.c + src/crypto/evp/internal.h + src/crypto/evp/sign.c + src/crypto/ex_data.c + src/crypto/fipsmodule/bn/* + src/crypto/fipsmodule/cipher/cipher.c + src/crypto/fipsmodule/cipher/e_des.c + src/crypto/fipsmodule/cipher/internal.h + src/crypto/fipsmodule/des/des.c + src/crypto/fipsmodule/des/internal.h + src/crypto/fipsmodule/digest/digest.c + src/crypto/fipsmodule/digest/digests.c + src/crypto/fipsmodule/digest/internal.h + src/crypto/fipsmodule/hmac/hmac.c + src/crypto/fipsmodule/md4/md4.c + src/crypto/fipsmodule/md5/md5.c + src/crypto/fipsmodule/rsa/internal.h + src/crypto/fipsmodule/rsa/rsa_impl.c + src/crypto/fipsmodule/rsa/rsa.c + src/crypto/fipsmodule/sha/sha1-altivec.c + src/crypto/fipsmodule/sha/sha1.c + src/crypto/fipsmodule/sha/sha256.c + src/crypto/fipsmodule/sha/sha512.c + src/crypto/hmac_extra/* + src/crypto/internal.h + src/crypto/lhash/lhash.c + src/crypto/mem.c + src/crypto/obj/* + src/crypto/pem/* + src/crypto/rc4/rc4.c + src/crypto/rsa_extra/* + src/crypto/stack/stack.c + src/crypto/thread.c + src/crypto/x509/* + src/decrepit/bio/base64_bio.c + src/decrepit/blowfish/blowfish.c + src/decrepit/cast/cast_tables.c + src/decrepit/cast/cast.c + src/decrepit/cast/internal.h + src/decrepit/des/cfb64ede.c + src/decrepit/macros.h + src/decrepit/rc4/rc4_decrepit.c + src/decrepit/ripemd/internal.h + src/decrepit/ripemd/ripemd.c + src/decrepit/rsa/rsa_decrepit.c + src/decrepit/ssl/ssl_decrepit.c + src/include/openssl/asn1.h + src/include/openssl/base64.h + src/include/openssl/bio.h + src/include/openssl/blowfish.h + src/include/openssl/bn.h + src/include/openssl/buf.h + src/include/openssl/cast.h + src/include/openssl/cipher.h + src/include/openssl/conf.h + src/include/openssl/cpu.h + src/include/openssl/des.h + src/include/openssl/dh.h + src/include/openssl/digest.h + src/include/openssl/dsa.h + src/include/openssl/err.h + src/include/openssl/evp.h + src/include/openssl/ex_data.h + src/include/openssl/hmac.h + src/include/openssl/lhash.h + src/include/openssl/md4.h + src/include/openssl/md5.h + src/include/openssl/mem.h + src/include/openssl/nid.h + src/include/openssl/obj.h + src/include/openssl/pem.h + src/include/openssl/rc4.h + src/include/openssl/ripemd.h + src/include/openssl/rsa.h + src/include/openssl/sha.h + src/include/openssl/ssl.h + src/include/openssl/ssl3.h + src/include/openssl/stack.h + src/include/openssl/thread.h + src/include/openssl/tls1.h + src/include/openssl/type_check.h + src/include/openssl/x509_vfy.h + src/include/openssl/x509.h + src/ssl/* +Copyright: 1995-1998, Eric Young +License: SSLeay + +Files: crypto_test_data.cc + err_data.c + rules.mk + src/.clang-format + src/.github/* + src/.gitignore + src/BUILDING.md + src/CMakeLists.txt + src/codereview.settings + src/CONTRIBUTING.md + src/crypto/asn1/asn1_test.cc + src/crypto/base64/base64_test.cc + src/crypto/bio/bio_test.cc + src/crypto/bio/socket_helper.c + src/crypto/bn_extra/bn_asn1.c + src/crypto/bytestring/* + src/crypto/chacha/* + src/crypto/cipher_extra/aead_test.cc + src/crypto/cipher_extra/e_aesctrhmac.c + src/crypto/cipher_extra/e_aesgcmsiv.c + src/crypto/cipher_extra/e_chacha20poly1305.c + src/crypto/cipher_extra/e_tls.c + src/crypto/cipher_extra/test/nist_cavp/make_cavp.go + src/crypto/cmac/cmac_test.cc + src/crypto/compiler_test.cc + src/crypto/conf/internal.h + src/crypto/cpu-aarch64-linux.c + src/crypto/cpu-arm-linux.c + src/crypto/cpu-arm.c + src/crypto/cpu-ppc64le.c + src/crypto/crypto.c + src/crypto/curve25519/* + src/crypto/digest_extra/digest_test.cc + src/crypto/engine/* + src/crypto/err/err_data_generate.go + src/crypto/err/err_test.cc + src/crypto/evp/evp_extra_test.cc + src/crypto/evp/p_ed25519_asn1.c + src/crypto/evp/p_ed25519.c + src/crypto/evp/pbkdf_test.cc + src/crypto/evp/scrypt_test.cc + src/crypto/fipsmodule/aes/aes_test.cc + src/crypto/fipsmodule/aes/internal.h + src/crypto/fipsmodule/bcm.c + src/crypto/fipsmodule/bn/check_bn_tests.go + src/crypto/fipsmodule/cipher/aead.c + src/crypto/fipsmodule/delocate.h + src/crypto/fipsmodule/ec/ec_test.cc + src/crypto/fipsmodule/ec/p224-64.c + src/crypto/fipsmodule/ec/p256-x86_64_test.cc + src/crypto/fipsmodule/is_fips.c + src/crypto/fipsmodule/modes/polyval.c + src/crypto/fipsmodule/rand/* + src/crypto/hkdf/* + src/crypto/lhash/lhash_test.cc + src/crypto/obj/obj_test.cc + src/crypto/obj/objects.go + src/crypto/pkcs7/* + src/crypto/pkcs8/pkcs12_test.cc + src/crypto/pkcs8/pkcs8_test.cc + src/crypto/poly1305/* + src/crypto/pool/* + src/crypto/rand_extra/* + src/crypto/refcount_*.c + src/crypto/test/* + src/crypto/thread_*.c + src/crypto/x509/internal.h + src/crypto/x509/x509_test.cc + src/decrepit/evp/* + src/decrepit/obj/* + src/decrepit/ripemd/* + src/decrepit/x509/* + src/FUZZING.md + src/include/openssl/aead.h + src/include/openssl/asn1_mac.h + src/include/openssl/buffer.h + src/include/openssl/bytestring.h + src/include/openssl/chacha.h + src/include/openssl/cmac.h + src/include/openssl/crypto.h + src/include/openssl/curve25519.h + src/include/openssl/dtls1.h + src/include/openssl/engine.h + src/include/openssl/hkdf.h + src/include/openssl/is_boringssl.h + src/include/openssl/obj_mac.h + src/include/openssl/objects.h + src/include/openssl/opensslconf.h + src/include/openssl/opensslv.h + src/include/openssl/ossl_typ.h + src/include/openssl/pkcs12.h + src/include/openssl/pkcs7.h + src/include/openssl/poly1305.h + src/include/openssl/pool.h + src/include/openssl/rand.h + src/include/openssl/safestack.h + src/include/openssl/srtp.h + src/INCORPORATING.md + src/PORTING.md + src/README.md + src/ssl/CMakeLists.txt + src/ssl/ssl_aead_ctx.cc + src/ssl/ssl_buffer.cc + src/ssl/ssl_test.cc + src/ssl/ssl_versions.cc + src/ssl/test/* + src/ssl/tls13_both.cc + src/ssl/tls13_client.cc + src/ssl/tls13_enc.cc + src/ssl/tls13_server.cc + src/STYLE.md + src/tool/* + src/util/* +Copyright: 2014-2017, Google Inc. +License: ISC + +Files: src/crypto/cipher_extra/asm/aes128gcmsiv-x86_64.pl +Copyright: 2017, Shay Gueron + 2017, Google Inc +License: ISC + +Files: sources.bp + sources.mk +Copyright: 2017, The Android Open Source Project +License: Apache-2.0 + +Files: src/ssl/test/runner/*.go +Copyright: 2012-2016, The Go Authors +License: BSD-3-clause + +Files: src/util/bot/go/* + src/util/bot/vs_toolchain.py +Copyright: 2014, The Chromium Authors +License: BSD-3-clause + +Files: src/crypto/fipsmodule/bn/asm/rsaz-avx2.pl + src/crypto/fipsmodule/bn/rsaz_exp.c + src/crypto/fipsmodule/bn/rsaz_exp.h +Copyright: 2012, Intel Corporation +License: BSD-3-clause + +Files: src/crypto/fipsmodule/ec/asm/p256-x86_64-asm.pl + src/crypto/fipsmodule/ec/p256-x86_64-table.h + src/crypto/fipsmodule/ec/p256-x86_64.* +Copyright: 2014-2015, Intel Corporation +License: ISC + +Files: src/crypto/x509v3/v3_pci.c + src/crypto/x509v3/v3_pcia.c +Copyright: 2004, Kungliga Tekniska Högskolan +License: BSD-3-clause + +Files: src/include/openssl/ecdh.h +Copyright: 2002, Sun Microsystems + 2000-2002, The OpenSSL Project +License: OpenSSL + +Files: src/crypto/fipsmodule/bn/montgomery_inv.c +Copyright: 2016, Brian Smith +License: ISC + +Files: src/crypto/cipher_extra/asm/chacha20_poly1305_x86_64.pl +Copyright: 2015, CloudFlare Ltd +License: ISC + +Files: debian/* +Copyright: 2016, Kai-Chung Yan +License: MIT + +License: OpenSSL + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. All advertising materials mentioning features or use of this + software must display the following acknowledgment: + "This product includes software developed by the OpenSSL Project + for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + endorse or promote products derived from this software without + prior written permission. For written permission, please contact + openssl-core@openssl.org. + 5. Products derived from this software may not be called "OpenSSL" + nor may "OpenSSL" appear in their names without prior written + permission of the OpenSSL Project. + 6. Redistributions of any form whatsoever must retain the following + acknowledgment: + "This product includes software developed by the OpenSSL Project + for use in the OpenSSL Toolkit (http://www.openssl.org/)" + . + THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + OF THE POSSIBILITY OF SUCH DAMAGE. + +License: ISC + Permission to use, copy, modify, and/or distribute this software for any + purpose with or without fee is hereby granted, provided that the above + copyright notice and this permission notice appear in all copies. + . + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY + SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + +License: MIT + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + . + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + . + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. + +License: SSLeay + This library is free for commercial and non-commercial use as long as + the following conditions are adhered to. The following conditions + apply to all code found in this distribution, be it the RC4, RSA, + lhash, DES, etc., code; not just the SSL code. The SSL documentation + included with this distribution is covered by the same copyright terms + except that the holder is Tim Hudson (tjh@cryptsoft.com). + . + Copyright remains Eric Young's, and as such any Copyright notices in + the code are not to be removed. + If this package is used in a product, Eric Young should be given attribution + as the author of the parts of the library used. + This can be in the form of a textual message at program startup or + in documentation (online or textual) provided with the package. + . + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: + "This product includes cryptographic software written by + Eric Young (eay@cryptsoft.com)" + The word 'cryptographic' can be left out if the routines from the library + being used are not cryptographic related :-). + 4. If you include any Windows specific code (or a derivative thereof) from + the apps directory (application code) you must include an acknowledgement: + "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + . + THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + SUCH DAMAGE. + . + The licence and distribution terms for any publically available version or + derivative of this code cannot be changed. i.e. this code cannot simply be + copied and put under another distribution licence + [including the GNU General Public Licence.] + +License: Apache-2.0 + On Debian systems, the full text of the Apache License, Version 2.0 + can be found in the file `/usr/share/common-licenses/Apache-2.0'. + +License: BSD-3-clause + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + . + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + . + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/crypto_test.mk b/crypto_test.mk new file mode 100644 index 0000000..a89f9eb --- /dev/null +++ b/crypto_test.mk @@ -0,0 +1,43 @@ +include debian/sources.mk + +NAME = crypto_test + +SOURCES = $(crypto_test_sources) +OBJECTS = $(SOURCES:.cc=.o) + +# src/crypto/pkcs8/pkcs12_test.cc:37:19: error: ISO C++20 does not permit initialization of char array with UTF-8 string literal +CXXFLAGS += -std=gnu++2a +CPPFLAGS += \ + -Isrc/include \ + +LDFLAGS += \ + -Ldebian/out \ + -Wl,-rpath=/usr/lib/$(DEB_HOST_MULTIARCH)/android \ + -lcrypto \ + -lgtest \ + -lpthread \ + -lssl \ + -ltest_support \ + -pie + +ifneq ($(filter mipsel mips64el,$(DEB_HOST_ARCH)),) + LDFLAGS += -Wl,-z,notext +endif + +# -latomic should be the last library specified +# https://github.com/android/ndk/issues/589 +ifeq ($(DEB_HOST_ARCH), armel) + LDFLAGS += -latomic +endif + +# clang built crypto_test binary crashes on mips64el +# so fallback to gcc as workaround +ifeq ($(DEB_HOST_ARCH), mips64el) + CXX = g++ +endif + +build: $(OBJECTS) + $(CXX) $^ -o debian/out/$(NAME) $(LDFLAGS) + +$(OBJECTS): %.o: %.cc + $(CXX) -c -o $@ $< $(CXXFLAGS) $(CPPFLAGS) diff --git a/gbp.conf b/gbp.conf new file mode 100644 index 0000000..5474c60 --- /dev/null +++ b/gbp.conf @@ -0,0 +1,3 @@ +[DEFAULT] +pristine-tar = True +sign-tags = True diff --git a/libcrypto.mk b/libcrypto.mk new file mode 100644 index 0000000..35523d3 --- /dev/null +++ b/libcrypto.mk @@ -0,0 +1,59 @@ +include debian/sources.mk + +NAME = libcrypto +SOURCES = $(crypto_sources) + +amd64_SOURCES = $(linux_x86_64_sources) +arm64_SOURCES = $(linux_aarch64_sources) +armel_SOURCES = $(linux_arm_sources) +armhf_SOURCES = $(linux_arm_sources) +i386_SOURCES = $(linux_x86_sources) +ppc64el_SOURCES = $(linux_ppc64le_sources) +hurd-i386_SOURCES = $(linux_x86_sources) +kfreebsd-i386_SOURCES = $(linux_x86_sources) +kfreebsd-amd64_SOURCES = $(linux_x86_64_sources) +x32_SOURCES = $(linux_x86_64_sources) + +SOURCES += $($(DEB_HOST_ARCH)_SOURCES) + +SOURCES_C = $(filter %.c,$(SOURCES)) +OBJECTS_C = $(SOURCES_C:.c=.o) +SOURCES_ASSEMBLY = $(filter %.S,$(SOURCES)) +OBJECTS_ASSEMBLY = $(SOURCES_ASSEMBLY:.S=.o) + +CFLAGS += -std=gnu11 \ + -fvisibility=hidden \ + -Wa,--noexecstack # Fixes `shlib-with-executable-stack`, see `src/util/BUILD.toplevel` + +CPPFLAGS += \ + -Isrc/crypto \ + -Isrc/include \ + +LDFLAGS += \ + -Wl,-soname,$(NAME).so.0 \ + -lpthread \ + -shared \ + +ifneq ($(filter mipsel mips64el,$(DEB_HOST_ARCH)),) + LDFLAGS += -Wl,-z,notext +endif + +# -latomic should be the last library specified +# https://github.com/android/ndk/issues/589 +# Use gcc instead of clang for assembly on armel +CC_ASSEMBLY = $(CC) +ifeq ($(DEB_HOST_ARCH), armel) + LDFLAGS += -latomic + CC_ASSEMBLY = gcc +endif + +build: $(OBJECTS_C) $(OBJECTS_ASSEMBLY) + mkdir -p debian/out + $(CC) $^ -o debian/out/$(NAME).so.0 $(LDFLAGS) + ln -sf $(NAME).so.0 debian/out/$(NAME).so + +$(OBJECTS_C): %.o: %.c + $(CC) -c -o $@ $< $(CFLAGS) $(CPPFLAGS) + +$(OBJECTS_ASSEMBLY): %.o: %.S + $(CC_ASSEMBLY) -c -o $@ $< $(CFLAGS) $(CPPFLAGS) diff --git a/libssl.mk b/libssl.mk new file mode 100644 index 0000000..dd00b87 --- /dev/null +++ b/libssl.mk @@ -0,0 +1,26 @@ +include debian/sources.mk + +NAME = libssl +SOURCES = $(ssl_sources) + +OBJECTS = $(SOURCES:.cc=.o) + +CXXFLAGS += -std=gnu++2a \ + -fvisibility=hidden \ + +CPPFLAGS += \ + -Isrc/include \ + +LDFLAGS += \ + -Ldebian/out \ + -Wl,-rpath=/usr/lib/$(DEB_HOST_MULTIARCH)/android \ + -Wl,-soname,$(NAME).so.0 \ + -lcrypto \ + -shared \ + +build: $(OBJECTS) + $(CXX) $^ -o debian/out/$(NAME).so.0 $(LDFLAGS) + ln -sf $(NAME).so.0 debian/out/$(NAME).so + +$(OBJECTS): %.o: %.cc + $(CXX) -c -o $@ $< $(CXXFLAGS) $(CPPFLAGS) diff --git a/libtest_support.mk b/libtest_support.mk new file mode 100644 index 0000000..99137a5 --- /dev/null +++ b/libtest_support.mk @@ -0,0 +1,21 @@ +include debian/sources.mk + +NAME = libtest_support +SOURCES = $(test_support_sources) + +OBJECTS = $(SOURCES:.cc=.o) + +CXXFLAGS += -std=gnu++2a +CPPFLAGS += \ + -Isrc/include \ + +LDFLAGS += \ + -Wl,-soname,$(NAME).so.0 \ + -shared \ + +build: $(OBJECTS) + $(CXX) $^ -o debian/out/$(NAME).so.0 $(LDFLAGS) + ln -sf $(NAME).so.0 debian/out/$(NAME).so + +$(OBJECTS): %.o: %.cc + $(CXX) -c -o $@ $< $(CXXFLAGS) $(CPPFLAGS) diff --git a/patches/01-Add-new-Arch-ia64-riscv64-sh4-x32.patch b/patches/01-Add-new-Arch-ia64-riscv64-sh4-x32.patch new file mode 100644 index 0000000..f19c423 --- /dev/null +++ b/patches/01-Add-new-Arch-ia64-riscv64-sh4-x32.patch @@ -0,0 +1,27 @@ +Description: Support to build on little endian systems: ia64, riscv64, sh4, and x32 +Forwarded: https://boringssl-review.googlesource.com/c/boringssl/+/52965 +--- a/src/include/openssl/base.h ++++ b/src/include/openssl/base.h +@@ -84,7 +84,7 @@ extern "C" { + #endif + + +-#if defined(__x86_64) || defined(_M_AMD64) || defined(_M_X64) ++#if (defined(__x86_64) && defined(__LP64__)) || defined(_M_AMD64) || defined(_M_X64) + #define OPENSSL_64_BIT + #define OPENSSL_X86_64 + #elif defined(__x86) || defined(__i386) || defined(__i386__) || defined(_M_IX86) +@@ -109,6 +109,13 @@ extern "C" { + #define OPENSSL_64_BIT + #elif defined(__riscv) && __SIZEOF_POINTER__ == 4 + #define OPENSSL_32_BIT ++#elif defined(__ia64__) ++#define OPENSSL_64_BIT ++#elif defined(__x86_64__) && defined(__ILP32__) // x32 ++#define OPENSSL_32_BIT ++#define OPENSSL_X86_64 ++#elif defined(__sh__) ++#define OPENSSL_32_BIT + #elif defined(__pnacl__) + #define OPENSSL_32_BIT + #define OPENSSL_PNACL diff --git a/patches/02-sources-mk.patch b/patches/02-sources-mk.patch new file mode 100644 index 0000000..6c06161 --- /dev/null +++ b/patches/02-sources-mk.patch @@ -0,0 +1,14 @@ +Description: Update debian/sources.mk +Forwarded: not-needed +--- a/src/util/generate_build_files.py ++++ b/src/util/generate_build_files.py +@@ -315,6 +315,9 @@ class Eureka(object): + self.PrintVariableSection(makefile, 'crypto_sources', files['crypto']) + self.PrintVariableSection(makefile, 'ssl_sources', files['ssl']) + self.PrintVariableSection(makefile, 'tool_sources', files['tool']) ++ self.PrintVariableSection(makefile, 'test_support_sources', files['test_support']) ++ self.PrintVariableSection(makefile, 'crypto_test_sources', files['crypto_test']) ++ self.PrintVariableSection(makefile, 'ssl_test_sources', files['ssl_test']) + + for ((osname, arch), asm_files) in asm_outputs: + if osname != 'linux': diff --git a/patches/Sync-to-81502beeddc5f116d44d0898c.patch b/patches/Sync-to-81502beeddc5f116d44d0898c.patch new file mode 100644 index 0000000..e1c419b --- /dev/null +++ b/patches/Sync-to-81502beeddc5f116d44d0898c.patch @@ -0,0 +1,174804 @@ +From: Pete Bentley +Date: Mon, 28 Feb 2022 19:23:25 +0000 +Subject: Sync to 81502beeddc5f116d44d0898c6c4a33057198db8 + +This includes the following changes: + +https://boringssl.googlesource.com/boringssl/+log/345c86b1cfcc478a71a9a71f0206893fd16ae912..81502beeddc5f116d44d0898c6c4a33057198db8 + +* Linkify RFCs in more places in the docs. +* Make FFDH self tests lazy. +* Make ECC self tests lazy. +* HPKE is now RFC 9180. +* Include the policy document for the most recent FIPS validation. +* Check static CPU capabilities on x86. +Update-Note: This CL may break build environments that incorrectly mark +some instruction as statically available. This is unlikely to happen +with vector instructions like AVX, where the compiler could freely emit +them anyway. However, instructions like AES-NI might be set incorrectly. +* Align rsaz_avx2_preferred with x86_64-mont5.pl. +* Enable SHA-NI optimizations for SHA-256. +* Update Intel SDE. +* Include the EKU extension in bssl server's self-signed certs. +* Don't call a non-test file *test.h. +* Make RSA self-test lazy. +* Add link to new Android FIPS certificate. +* delocate: handle a new output form in Clang 13. +* Drop, now unused, KAT value. +* Drop CAVP code. +* Break FIPS tests differently. +* Don't forget hmac.h in self_check.h. +* Perform SHA-$x and HMAC KAT before integrity check. +* Add a couple of spaces to `check_test`. +* Split FIPS KATs into fast and slow groups. +* Move DES out of the FIPS module. +* acvp: don't send the Authorization header when renewing tokens +* Support Bazel's test-sharding protocol. +* Simply CMake assembly source selection. +* Rename generated assembly from 'mac' or 'ios' to 'apple' +Update-Note: References to 'mac' or 'ios' source lists in downstream +builds should be renamed to 'apple'. +* Build aarch64 assembly for macOS in the bazel build. +* Fix OPENSSL_NO_ASM definition in bazel. +* Use @platforms in Bazel rules. +* Record ClientHelloInner values in msg_callback. +* Fold ssl_decode_client_hello_inner into ssl_client_hello_decrypt. +* Explicitly reject self-referential ech_outer_extensions. +* Simpler square-root computation for Ed25519 +* Condition split handshake tests on Linux in CMake. +* Implement PEM_read_bio_DHparams with the macro. +* Limit _XOPEN_SOURCE to Linux. +Update-Note: It's possible this will break yet another obscure UNIX. +Hopefully we can eventually find a combination that works? +* Fix Unicode strings for C++20 + +Test: atest CtsLibcoreTestCases CtsLibcoreOkHttpTestCases +Change-Id: I177c2a06d3d85ea4912e4f657caa370363966c33 +--- + Android.bp | 49 +- + BORINGSSL_REVISION | 2 +- + BUILD.generated.bzl | 129 +- + BUILD.generated_tests.bzl | 3 +- + android-sources.cmake | 120 +- + apple-aarch64/crypto/chacha/chacha-armv8.S | 1992 +++++ + apple-aarch64/crypto/fipsmodule/aesv8-armx64.S | 799 ++ + apple-aarch64/crypto/fipsmodule/armv8-mont.S | 1433 ++++ + apple-aarch64/crypto/fipsmodule/ghash-neon-armv8.S | 343 + + apple-aarch64/crypto/fipsmodule/ghashv8-armx64.S | 573 ++ + apple-aarch64/crypto/fipsmodule/sha1-armv8.S | 1235 +++ + apple-aarch64/crypto/fipsmodule/sha256-armv8.S | 1212 +++ + apple-aarch64/crypto/fipsmodule/sha512-armv8.S | 1614 ++++ + apple-aarch64/crypto/fipsmodule/vpaes-armv8.S | 1232 +++ + apple-aarch64/crypto/test/trampoline-armv8.S | 758 ++ + apple-arm/crypto/chacha/chacha-armv4.S | 1498 ++++ + apple-arm/crypto/fipsmodule/aesv8-armx32.S | 809 ++ + apple-arm/crypto/fipsmodule/armv4-mont.S | 982 +++ + apple-arm/crypto/fipsmodule/bsaes-armv7.S | 1536 ++++ + apple-arm/crypto/fipsmodule/ghash-armv4.S | 258 + + apple-arm/crypto/fipsmodule/ghashv8-armx32.S | 260 + + apple-arm/crypto/fipsmodule/sha1-armv4-large.S | 1518 ++++ + apple-arm/crypto/fipsmodule/sha256-armv4.S | 2846 +++++++ + apple-arm/crypto/fipsmodule/sha512-armv4.S | 1899 +++++ + apple-arm/crypto/fipsmodule/vpaes-armv7.S | 1265 +++ + apple-arm/crypto/test/trampoline-armv4.S | 376 + + apple-x86/crypto/chacha/chacha-x86.S | 974 +++ + apple-x86/crypto/fipsmodule/aesni-x86.S | 2476 ++++++ + apple-x86/crypto/fipsmodule/bn-586.S | 988 +++ + apple-x86/crypto/fipsmodule/co-586.S | 1257 +++ + apple-x86/crypto/fipsmodule/ghash-ssse3-x86.S | 289 + + apple-x86/crypto/fipsmodule/ghash-x86.S | 323 + + apple-x86/crypto/fipsmodule/md5-586.S | 685 ++ + apple-x86/crypto/fipsmodule/sha1-586.S | 3805 +++++++++ + apple-x86/crypto/fipsmodule/sha256-586.S | 5568 ++++++++++++ + apple-x86/crypto/fipsmodule/sha512-586.S | 2838 +++++++ + apple-x86/crypto/fipsmodule/vpaes-x86.S | 681 ++ + apple-x86/crypto/fipsmodule/x86-mont.S | 485 ++ + apple-x86/crypto/test/trampoline-x86.S | 169 + + apple-x86_64/crypto/chacha/chacha-x86_64.S | 1625 ++++ + .../crypto/cipher_extra/aes128gcmsiv-x86_64.S | 3068 +++++++ + .../crypto/cipher_extra/chacha20_poly1305_x86_64.S | 8878 ++++++++++++++++++++ + apple-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S | 850 ++ + apple-x86_64/crypto/fipsmodule/aesni-x86_64.S | 2503 ++++++ + .../crypto/fipsmodule/ghash-ssse3-x86_64.S | 426 + + apple-x86_64/crypto/fipsmodule/ghash-x86_64.S | 1125 +++ + apple-x86_64/crypto/fipsmodule/md5-x86_64.S | 696 ++ + apple-x86_64/crypto/fipsmodule/p256-x86_64-asm.S | 4467 ++++++++++ + .../crypto/fipsmodule/p256_beeu-x86_64-asm.S | 328 + + apple-x86_64/crypto/fipsmodule/rdrand-x86_64.S | 62 + + apple-x86_64/crypto/fipsmodule/rsaz-avx2.S | 1748 ++++ + apple-x86_64/crypto/fipsmodule/sha1-x86_64.S | 5466 ++++++++++++ + apple-x86_64/crypto/fipsmodule/sha256-x86_64.S | 4182 +++++++++ + apple-x86_64/crypto/fipsmodule/sha512-x86_64.S | 2990 +++++++ + apple-x86_64/crypto/fipsmodule/vpaes-x86_64.S | 1130 +++ + apple-x86_64/crypto/fipsmodule/x86_64-mont.S | 1256 +++ + apple-x86_64/crypto/fipsmodule/x86_64-mont5.S | 3788 +++++++++ + apple-x86_64/crypto/test/trampoline-x86_64.S | 513 ++ + err_data.c | 540 +- + eureka.mk | 2 + + linux-x86_64/crypto/fipsmodule/sha256-x86_64.S | 211 + + mac-x86/crypto/chacha/chacha-x86.S | 974 --- + mac-x86/crypto/fipsmodule/aesni-x86.S | 2476 ------ + mac-x86/crypto/fipsmodule/bn-586.S | 988 --- + mac-x86/crypto/fipsmodule/co-586.S | 1257 --- + mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S | 289 - + mac-x86/crypto/fipsmodule/ghash-x86.S | 323 - + mac-x86/crypto/fipsmodule/md5-586.S | 685 -- + mac-x86/crypto/fipsmodule/sha1-586.S | 3805 --------- + mac-x86/crypto/fipsmodule/sha256-586.S | 5568 ------------ + mac-x86/crypto/fipsmodule/sha512-586.S | 2838 ------- + mac-x86/crypto/fipsmodule/vpaes-x86.S | 681 -- + mac-x86/crypto/fipsmodule/x86-mont.S | 485 -- + mac-x86/crypto/test/trampoline-x86.S | 169 - + mac-x86_64/crypto/chacha/chacha-x86_64.S | 1625 ---- + .../crypto/cipher_extra/aes128gcmsiv-x86_64.S | 3068 ------- + .../crypto/cipher_extra/chacha20_poly1305_x86_64.S | 8878 -------------------- + mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S | 850 -- + mac-x86_64/crypto/fipsmodule/aesni-x86_64.S | 2503 ------ + mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S | 426 - + mac-x86_64/crypto/fipsmodule/ghash-x86_64.S | 1125 --- + mac-x86_64/crypto/fipsmodule/md5-x86_64.S | 696 -- + mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S | 4467 ---------- + .../crypto/fipsmodule/p256_beeu-x86_64-asm.S | 328 - + mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S | 62 - + mac-x86_64/crypto/fipsmodule/rsaz-avx2.S | 1748 ---- + mac-x86_64/crypto/fipsmodule/sha1-x86_64.S | 5466 ------------ + mac-x86_64/crypto/fipsmodule/sha256-x86_64.S | 3971 --------- + mac-x86_64/crypto/fipsmodule/sha512-x86_64.S | 2990 ------- + mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S | 1130 --- + mac-x86_64/crypto/fipsmodule/x86_64-mont.S | 1256 --- + mac-x86_64/crypto/fipsmodule/x86_64-mont5.S | 3788 --------- + mac-x86_64/crypto/test/trampoline-x86_64.S | 513 -- + sources.bp | 2 + + sources.mk | 2 + + src/.gitignore | 4 +- + src/CMakeLists.txt | 13 +- + src/crypto/CMakeLists.txt | 2 + + src/crypto/cipher_extra/e_aesgcmsiv.c | 11 +- + src/crypto/cipher_extra/e_des.c | 258 + + src/crypto/cipher_extra/internal.h | 3 +- + src/crypto/curve25519/curve25519.c | 20 +- + src/crypto/des/des.c | 784 ++ + src/crypto/des/internal.h | 238 + + src/crypto/err/ssl.errordata | 2 +- + src/crypto/fipsmodule/FIPS.md | 33 +- + src/crypto/fipsmodule/aes/internal.h | 8 +- + src/crypto/fipsmodule/bcm.c | 21 +- + src/crypto/fipsmodule/bn/rsaz_exp.h | 13 +- + src/crypto/fipsmodule/cipher/e_des.c | 237 - + src/crypto/fipsmodule/des/des.c | 784 -- + src/crypto/fipsmodule/des/internal.h | 238 - + src/crypto/fipsmodule/dh/dh.c | 14 +- + src/crypto/fipsmodule/dh/internal.h | 36 + + src/crypto/fipsmodule/ec/ec.c | 12 +- + src/crypto/fipsmodule/ec/ec_key.c | 8 +- + src/crypto/fipsmodule/ec/internal.h | 7 + + src/crypto/fipsmodule/ec/p256-x86_64.c | 2 +- + src/crypto/fipsmodule/ec/p256-x86_64_test.cc | 2 +- + src/crypto/fipsmodule/ecdh/ecdh.c | 3 + + src/crypto/fipsmodule/ecdsa/ecdsa.c | 15 +- + src/crypto/fipsmodule/ecdsa/internal.h | 6 + + src/crypto/fipsmodule/modes/gcm.c | 10 +- + src/crypto/fipsmodule/modes/gcm_test.cc | 4 +- + src/crypto/fipsmodule/modes/internal.h | 4 - + ...ingCrypto-Android-Security-Policy-20210319.docx | Bin 0 -> 156793 bytes + src/crypto/fipsmodule/rand/internal.h | 5 +- + src/crypto/fipsmodule/rand/rand.c | 10 +- + src/crypto/fipsmodule/rsa/internal.h | 22 + + src/crypto/fipsmodule/rsa/rsa.c | 56 +- + src/crypto/fipsmodule/rsa/rsa_impl.c | 19 +- + src/crypto/fipsmodule/self_check/self_check.c | 933 +- + src/crypto/fipsmodule/sha/asm/sha512-x86_64.pl | 19 +- + src/crypto/hpke/hpke.c | 4 +- + src/crypto/hpke/translate_test_vectors.py | 2 +- + src/crypto/hrss/hrss.c | 3 +- + src/crypto/hrss/hrss_test.cc | 3 +- + src/crypto/impl_dispatch_test.cc | 6 +- + src/crypto/internal.h | 176 +- + src/crypto/pem/pem_all.c | 8 +- + src/crypto/pem/pem_pkey.c | 36 - + src/crypto/pkcs8/pkcs12_test.cc | 4 +- + src/decrepit/des/cfb64ede.c | 2 +- + src/include/openssl/hpke.h | 2 +- + src/include/openssl/ssl.h | 10 +- + src/include/openssl/ssl3.h | 1 + + src/ssl/encrypted_client_hello.cc | 43 +- + src/ssl/handshake_client.cc | 2 +- + src/ssl/handshake_server.cc | 21 +- + src/ssl/internal.h | 16 +- + src/ssl/test/CMakeLists.txt | 2 +- + src/ssl/test/runner/hpke/hpke.go | 2 +- + src/ssl/test/runner/runner.go | 121 +- + src/ssl/test/runner/sharding.go | 77 + + src/ssl/test/test_config.cc | 18 +- + src/ssl/tls13_server.cc | 20 +- + src/tool/server.cc | 44 +- + src/util/BUILD.toplevel | 109 +- + src/util/bot/DEPS | 8 +- + src/util/bot/UPDATING | 6 +- + src/util/bot/extract.py | 2 + + src/util/bot/sde-linux64.tar.bz2.sha1 | 1 - + src/util/bot/sde-linux64.tar.xz.sha1 | 1 + + src/util/bot/sde-win32.tar.bz2.sha1 | 1 - + src/util/bot/sde-win32.tar.xz.sha1 | 1 + + src/util/doc.go | 29 +- + src/util/fipstools/CMakeLists.txt | 12 + + src/util/fipstools/acvp/acvptool/acvp/acvp.go | 10 +- + src/util/fipstools/break-kat.go | 89 + + src/util/fipstools/break-tests-android.sh | 117 - + src/util/fipstools/break-tests.sh | 53 - + src/util/fipstools/cavp/CMakeLists.txt | 42 - + src/util/fipstools/cavp/cavp_aes_gcm_test.cc | 166 - + src/util/fipstools/cavp/cavp_aes_test.cc | 225 - + src/util/fipstools/cavp/cavp_ctr_drbg_test.cc | 106 - + .../fipstools/cavp/cavp_ecdsa2_keypair_test.cc | 92 - + src/util/fipstools/cavp/cavp_ecdsa2_pkv_test.cc | 66 - + src/util/fipstools/cavp/cavp_ecdsa2_siggen_test.cc | 123 - + src/util/fipstools/cavp/cavp_ecdsa2_sigver_test.cc | 84 - + src/util/fipstools/cavp/cavp_hmac_test.cc | 106 - + src/util/fipstools/cavp/cavp_kas_test.cc | 156 - + src/util/fipstools/cavp/cavp_keywrap_test.cc | 166 - + src/util/fipstools/cavp/cavp_main.cc | 73 - + src/util/fipstools/cavp/cavp_rsa2_keygen_test.cc | 93 - + src/util/fipstools/cavp/cavp_rsa2_siggen_test.cc | 128 - + src/util/fipstools/cavp/cavp_rsa2_sigver_test.cc | 125 - + src/util/fipstools/cavp/cavp_sha_monte_test.cc | 103 - + src/util/fipstools/cavp/cavp_sha_test.cc | 97 - + src/util/fipstools/cavp/cavp_tdes_test.cc | 336 - + src/util/fipstools/cavp/cavp_test_util.cc | 220 - + src/util/fipstools/cavp/cavp_test_util.h | 76 - + src/util/fipstools/cavp/cavp_tlskdf_test.cc | 113 - + src/util/fipstools/cavp/run_cavp.go | 592 -- + src/util/fipstools/cavp/test_fips.c | 309 - + src/util/fipstools/delocate/delocate.peg | 2 +- + src/util/fipstools/delocate/delocate.peg.go | 3455 ++++---- + .../fipstools/delocate/testdata/x86_64-Basic/in.s | 1 + + .../fipstools/delocate/testdata/x86_64-Basic/out.s | 1 + + src/util/fipstools/test-break-kat.sh | 40 + + src/util/fipstools/test_fips.c | 309 + + src/util/generate_build_files.py | 16 +- + win-x86_64/crypto/fipsmodule/sha256-x86_64.asm | 274 + + 202 files changed, 95752 insertions(+), 73426 deletions(-) + create mode 100644 apple-aarch64/crypto/chacha/chacha-armv8.S + create mode 100644 apple-aarch64/crypto/fipsmodule/aesv8-armx64.S + create mode 100644 apple-aarch64/crypto/fipsmodule/armv8-mont.S + create mode 100644 apple-aarch64/crypto/fipsmodule/ghash-neon-armv8.S + create mode 100644 apple-aarch64/crypto/fipsmodule/ghashv8-armx64.S + create mode 100644 apple-aarch64/crypto/fipsmodule/sha1-armv8.S + create mode 100644 apple-aarch64/crypto/fipsmodule/sha256-armv8.S + create mode 100644 apple-aarch64/crypto/fipsmodule/sha512-armv8.S + create mode 100644 apple-aarch64/crypto/fipsmodule/vpaes-armv8.S + create mode 100644 apple-aarch64/crypto/test/trampoline-armv8.S + create mode 100644 apple-arm/crypto/chacha/chacha-armv4.S + create mode 100644 apple-arm/crypto/fipsmodule/aesv8-armx32.S + create mode 100644 apple-arm/crypto/fipsmodule/armv4-mont.S + create mode 100644 apple-arm/crypto/fipsmodule/bsaes-armv7.S + create mode 100644 apple-arm/crypto/fipsmodule/ghash-armv4.S + create mode 100644 apple-arm/crypto/fipsmodule/ghashv8-armx32.S + create mode 100644 apple-arm/crypto/fipsmodule/sha1-armv4-large.S + create mode 100644 apple-arm/crypto/fipsmodule/sha256-armv4.S + create mode 100644 apple-arm/crypto/fipsmodule/sha512-armv4.S + create mode 100644 apple-arm/crypto/fipsmodule/vpaes-armv7.S + create mode 100644 apple-arm/crypto/test/trampoline-armv4.S + create mode 100644 apple-x86/crypto/chacha/chacha-x86.S + create mode 100644 apple-x86/crypto/fipsmodule/aesni-x86.S + create mode 100644 apple-x86/crypto/fipsmodule/bn-586.S + create mode 100644 apple-x86/crypto/fipsmodule/co-586.S + create mode 100644 apple-x86/crypto/fipsmodule/ghash-ssse3-x86.S + create mode 100644 apple-x86/crypto/fipsmodule/ghash-x86.S + create mode 100644 apple-x86/crypto/fipsmodule/md5-586.S + create mode 100644 apple-x86/crypto/fipsmodule/sha1-586.S + create mode 100644 apple-x86/crypto/fipsmodule/sha256-586.S + create mode 100644 apple-x86/crypto/fipsmodule/sha512-586.S + create mode 100644 apple-x86/crypto/fipsmodule/vpaes-x86.S + create mode 100644 apple-x86/crypto/fipsmodule/x86-mont.S + create mode 100644 apple-x86/crypto/test/trampoline-x86.S + create mode 100644 apple-x86_64/crypto/chacha/chacha-x86_64.S + create mode 100644 apple-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S + create mode 100644 apple-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S + create mode 100644 apple-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S + create mode 100644 apple-x86_64/crypto/fipsmodule/aesni-x86_64.S + create mode 100644 apple-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S + create mode 100644 apple-x86_64/crypto/fipsmodule/ghash-x86_64.S + create mode 100644 apple-x86_64/crypto/fipsmodule/md5-x86_64.S + create mode 100644 apple-x86_64/crypto/fipsmodule/p256-x86_64-asm.S + create mode 100644 apple-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S + create mode 100644 apple-x86_64/crypto/fipsmodule/rdrand-x86_64.S + create mode 100644 apple-x86_64/crypto/fipsmodule/rsaz-avx2.S + create mode 100644 apple-x86_64/crypto/fipsmodule/sha1-x86_64.S + create mode 100644 apple-x86_64/crypto/fipsmodule/sha256-x86_64.S + create mode 100644 apple-x86_64/crypto/fipsmodule/sha512-x86_64.S + create mode 100644 apple-x86_64/crypto/fipsmodule/vpaes-x86_64.S + create mode 100644 apple-x86_64/crypto/fipsmodule/x86_64-mont.S + create mode 100644 apple-x86_64/crypto/fipsmodule/x86_64-mont5.S + create mode 100644 apple-x86_64/crypto/test/trampoline-x86_64.S + delete mode 100644 mac-x86/crypto/chacha/chacha-x86.S + delete mode 100644 mac-x86/crypto/fipsmodule/aesni-x86.S + delete mode 100644 mac-x86/crypto/fipsmodule/bn-586.S + delete mode 100644 mac-x86/crypto/fipsmodule/co-586.S + delete mode 100644 mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S + delete mode 100644 mac-x86/crypto/fipsmodule/ghash-x86.S + delete mode 100644 mac-x86/crypto/fipsmodule/md5-586.S + delete mode 100644 mac-x86/crypto/fipsmodule/sha1-586.S + delete mode 100644 mac-x86/crypto/fipsmodule/sha256-586.S + delete mode 100644 mac-x86/crypto/fipsmodule/sha512-586.S + delete mode 100644 mac-x86/crypto/fipsmodule/vpaes-x86.S + delete mode 100644 mac-x86/crypto/fipsmodule/x86-mont.S + delete mode 100644 mac-x86/crypto/test/trampoline-x86.S + delete mode 100644 mac-x86_64/crypto/chacha/chacha-x86_64.S + delete mode 100644 mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S + delete mode 100644 mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/aesni-x86_64.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/ghash-x86_64.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/md5-x86_64.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/rsaz-avx2.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/sha1-x86_64.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/sha256-x86_64.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/sha512-x86_64.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/x86_64-mont.S + delete mode 100644 mac-x86_64/crypto/fipsmodule/x86_64-mont5.S + delete mode 100644 mac-x86_64/crypto/test/trampoline-x86_64.S + create mode 100644 src/crypto/cipher_extra/e_des.c + create mode 100644 src/crypto/des/des.c + create mode 100644 src/crypto/des/internal.h + delete mode 100644 src/crypto/fipsmodule/cipher/e_des.c + delete mode 100644 src/crypto/fipsmodule/des/des.c + delete mode 100644 src/crypto/fipsmodule/des/internal.h + create mode 100644 src/crypto/fipsmodule/dh/internal.h + create mode 100644 src/crypto/fipsmodule/policydocs/BoringCrypto-Android-Security-Policy-20210319.docx + create mode 100644 src/ssl/test/runner/sharding.go + delete mode 100644 src/util/bot/sde-linux64.tar.bz2.sha1 + create mode 100644 src/util/bot/sde-linux64.tar.xz.sha1 + delete mode 100644 src/util/bot/sde-win32.tar.bz2.sha1 + create mode 100644 src/util/bot/sde-win32.tar.xz.sha1 + create mode 100644 src/util/fipstools/CMakeLists.txt + create mode 100644 src/util/fipstools/break-kat.go + delete mode 100644 src/util/fipstools/break-tests-android.sh + delete mode 100644 src/util/fipstools/break-tests.sh + delete mode 100644 src/util/fipstools/cavp/CMakeLists.txt + delete mode 100644 src/util/fipstools/cavp/cavp_aes_gcm_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_aes_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_ctr_drbg_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_ecdsa2_keypair_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_ecdsa2_pkv_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_ecdsa2_siggen_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_ecdsa2_sigver_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_hmac_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_kas_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_keywrap_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_main.cc + delete mode 100644 src/util/fipstools/cavp/cavp_rsa2_keygen_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_rsa2_siggen_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_rsa2_sigver_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_sha_monte_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_sha_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_tdes_test.cc + delete mode 100644 src/util/fipstools/cavp/cavp_test_util.cc + delete mode 100644 src/util/fipstools/cavp/cavp_test_util.h + delete mode 100644 src/util/fipstools/cavp/cavp_tlskdf_test.cc + delete mode 100644 src/util/fipstools/cavp/run_cavp.go + delete mode 100644 src/util/fipstools/cavp/test_fips.c + create mode 100644 src/util/fipstools/test-break-kat.sh + create mode 100644 src/util/fipstools/test_fips.c + +diff --git a/Android.bp b/Android.bp +index 416bfd8..7ee3395 100644 +--- a/Android.bp ++++ b/Android.bp +@@ -421,53 +421,6 @@ cc_binary { + }, + } + +-// Used for CAVP testing for FIPS certification. +-// Not installed on devices by default. +-cc_binary { +- name: "cavp", +- host_supported: true, +- srcs: [ +- "src/util/fipstools/cavp/cavp_aes_gcm_test.cc", +- "src/util/fipstools/cavp/cavp_aes_test.cc", +- "src/util/fipstools/cavp/cavp_ctr_drbg_test.cc", +- "src/util/fipstools/cavp/cavp_ecdsa2_keypair_test.cc", +- "src/util/fipstools/cavp/cavp_ecdsa2_pkv_test.cc", +- "src/util/fipstools/cavp/cavp_ecdsa2_siggen_test.cc", +- "src/util/fipstools/cavp/cavp_ecdsa2_sigver_test.cc", +- "src/util/fipstools/cavp/cavp_hmac_test.cc", +- "src/util/fipstools/cavp/cavp_kas_test.cc", +- "src/util/fipstools/cavp/cavp_keywrap_test.cc", +- "src/util/fipstools/cavp/cavp_main.cc", +- "src/util/fipstools/cavp/cavp_rsa2_keygen_test.cc", +- "src/util/fipstools/cavp/cavp_rsa2_siggen_test.cc", +- "src/util/fipstools/cavp/cavp_rsa2_sigver_test.cc", +- "src/util/fipstools/cavp/cavp_sha_monte_test.cc", +- "src/util/fipstools/cavp/cavp_sha_test.cc", +- "src/util/fipstools/cavp/cavp_tdes_test.cc", +- "src/util/fipstools/cavp/cavp_test_util.cc", +- "src/util/fipstools/cavp/cavp_tlskdf_test.cc", +- ], +- target: { +- android: { +- compile_multilib: "both", +- }, +- }, +- multilib: { +- lib32: { +- suffix: "32", +- }, +- }, +- +- shared_libs: [ +- "libcrypto", +- ], +- +- defaults: [ +- "boringssl_test_support_sources", +- "boringssl_flags", +- ], +-} +- + // Used for ACVP testing for FIPS certification. + // Not installed on devices by default. + cc_binary { +@@ -584,6 +537,6 @@ cc_binary { + "libcrypto", + ], + srcs: [ +- "src/util/fipstools/cavp/test_fips.c", ++ "src/util/fipstools/test_fips.c", + ], + } +diff --git a/BORINGSSL_REVISION b/BORINGSSL_REVISION +index 9fc401a..95a1efc 100644 +--- a/BORINGSSL_REVISION ++++ b/BORINGSSL_REVISION +@@ -1 +1 @@ +-345c86b1cfcc478a71a9a71f0206893fd16ae912 ++81502beeddc5f116d44d0898c6c4a33057198db8 +diff --git a/BUILD.generated.bzl b/BUILD.generated.bzl +index 8621d9b..bf9efa7 100644 +--- a/BUILD.generated.bzl ++++ b/BUILD.generated.bzl +@@ -37,8 +37,6 @@ fips_fragments = [ + "src/crypto/fipsmodule/cipher/aead.c", + "src/crypto/fipsmodule/cipher/cipher.c", + "src/crypto/fipsmodule/cipher/e_aes.c", +- "src/crypto/fipsmodule/cipher/e_des.c", +- "src/crypto/fipsmodule/des/des.c", + "src/crypto/fipsmodule/dh/check.c", + "src/crypto/fipsmodule/dh/dh.c", + "src/crypto/fipsmodule/digest/digest.c", +@@ -218,6 +216,7 @@ crypto_internal_headers = [ + "src/crypto/cpu_arm_linux.h", + "src/crypto/curve25519/curve25519_tables.h", + "src/crypto/curve25519/internal.h", ++ "src/crypto/des/internal.h", + "src/crypto/dsa/internal.h", + "src/crypto/ec_extra/internal.h", + "src/crypto/err/internal.h", +@@ -227,7 +226,7 @@ crypto_internal_headers = [ + "src/crypto/fipsmodule/bn/rsaz_exp.h", + "src/crypto/fipsmodule/cipher/internal.h", + "src/crypto/fipsmodule/delocate.h", +- "src/crypto/fipsmodule/des/internal.h", ++ "src/crypto/fipsmodule/dh/internal.h", + "src/crypto/fipsmodule/digest/internal.h", + "src/crypto/fipsmodule/digest/md32_common.h", + "src/crypto/fipsmodule/ec/internal.h", +@@ -320,6 +319,7 @@ crypto_sources = [ + "src/crypto/cipher_extra/e_aesctrhmac.c", + "src/crypto/cipher_extra/e_aesgcmsiv.c", + "src/crypto/cipher_extra/e_chacha20poly1305.c", ++ "src/crypto/cipher_extra/e_des.c", + "src/crypto/cipher_extra/e_null.c", + "src/crypto/cipher_extra/e_rc2.c", + "src/crypto/cipher_extra/e_rc4.c", +@@ -338,6 +338,7 @@ crypto_sources = [ + "src/crypto/crypto.c", + "src/crypto/curve25519/curve25519.c", + "src/crypto/curve25519/spake25519.c", ++ "src/crypto/des/des.c", + "src/crypto/dh_extra/dh_asn1.c", + "src/crypto/dh_extra/params.c", + "src/crypto/digest_extra/digest_extra.c", +@@ -520,31 +521,69 @@ tool_headers = [ + "src/tool/transport_common.h", + ] + +-crypto_sources_ios_aarch64 = [ +- "ios-aarch64/crypto/chacha/chacha-armv8.S", +- "ios-aarch64/crypto/fipsmodule/aesv8-armx64.S", +- "ios-aarch64/crypto/fipsmodule/armv8-mont.S", +- "ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S", +- "ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S", +- "ios-aarch64/crypto/fipsmodule/sha1-armv8.S", +- "ios-aarch64/crypto/fipsmodule/sha256-armv8.S", +- "ios-aarch64/crypto/fipsmodule/sha512-armv8.S", +- "ios-aarch64/crypto/fipsmodule/vpaes-armv8.S", +- "ios-aarch64/crypto/test/trampoline-armv8.S", ++crypto_sources_apple_aarch64 = [ ++ "apple-aarch64/crypto/chacha/chacha-armv8.S", ++ "apple-aarch64/crypto/fipsmodule/aesv8-armx64.S", ++ "apple-aarch64/crypto/fipsmodule/armv8-mont.S", ++ "apple-aarch64/crypto/fipsmodule/ghash-neon-armv8.S", ++ "apple-aarch64/crypto/fipsmodule/ghashv8-armx64.S", ++ "apple-aarch64/crypto/fipsmodule/sha1-armv8.S", ++ "apple-aarch64/crypto/fipsmodule/sha256-armv8.S", ++ "apple-aarch64/crypto/fipsmodule/sha512-armv8.S", ++ "apple-aarch64/crypto/fipsmodule/vpaes-armv8.S", ++ "apple-aarch64/crypto/test/trampoline-armv8.S", + ] + +-crypto_sources_ios_arm = [ +- "ios-arm/crypto/chacha/chacha-armv4.S", +- "ios-arm/crypto/fipsmodule/aesv8-armx32.S", +- "ios-arm/crypto/fipsmodule/armv4-mont.S", +- "ios-arm/crypto/fipsmodule/bsaes-armv7.S", +- "ios-arm/crypto/fipsmodule/ghash-armv4.S", +- "ios-arm/crypto/fipsmodule/ghashv8-armx32.S", +- "ios-arm/crypto/fipsmodule/sha1-armv4-large.S", +- "ios-arm/crypto/fipsmodule/sha256-armv4.S", +- "ios-arm/crypto/fipsmodule/sha512-armv4.S", +- "ios-arm/crypto/fipsmodule/vpaes-armv7.S", +- "ios-arm/crypto/test/trampoline-armv4.S", ++crypto_sources_apple_arm = [ ++ "apple-arm/crypto/chacha/chacha-armv4.S", ++ "apple-arm/crypto/fipsmodule/aesv8-armx32.S", ++ "apple-arm/crypto/fipsmodule/armv4-mont.S", ++ "apple-arm/crypto/fipsmodule/bsaes-armv7.S", ++ "apple-arm/crypto/fipsmodule/ghash-armv4.S", ++ "apple-arm/crypto/fipsmodule/ghashv8-armx32.S", ++ "apple-arm/crypto/fipsmodule/sha1-armv4-large.S", ++ "apple-arm/crypto/fipsmodule/sha256-armv4.S", ++ "apple-arm/crypto/fipsmodule/sha512-armv4.S", ++ "apple-arm/crypto/fipsmodule/vpaes-armv7.S", ++ "apple-arm/crypto/test/trampoline-armv4.S", ++] ++ ++crypto_sources_apple_x86 = [ ++ "apple-x86/crypto/chacha/chacha-x86.S", ++ "apple-x86/crypto/fipsmodule/aesni-x86.S", ++ "apple-x86/crypto/fipsmodule/bn-586.S", ++ "apple-x86/crypto/fipsmodule/co-586.S", ++ "apple-x86/crypto/fipsmodule/ghash-ssse3-x86.S", ++ "apple-x86/crypto/fipsmodule/ghash-x86.S", ++ "apple-x86/crypto/fipsmodule/md5-586.S", ++ "apple-x86/crypto/fipsmodule/sha1-586.S", ++ "apple-x86/crypto/fipsmodule/sha256-586.S", ++ "apple-x86/crypto/fipsmodule/sha512-586.S", ++ "apple-x86/crypto/fipsmodule/vpaes-x86.S", ++ "apple-x86/crypto/fipsmodule/x86-mont.S", ++ "apple-x86/crypto/test/trampoline-x86.S", ++] ++ ++crypto_sources_apple_x86_64 = [ ++ "apple-x86_64/crypto/chacha/chacha-x86_64.S", ++ "apple-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S", ++ "apple-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S", ++ "apple-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S", ++ "apple-x86_64/crypto/fipsmodule/aesni-x86_64.S", ++ "apple-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S", ++ "apple-x86_64/crypto/fipsmodule/ghash-x86_64.S", ++ "apple-x86_64/crypto/fipsmodule/md5-x86_64.S", ++ "apple-x86_64/crypto/fipsmodule/p256-x86_64-asm.S", ++ "apple-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S", ++ "apple-x86_64/crypto/fipsmodule/rdrand-x86_64.S", ++ "apple-x86_64/crypto/fipsmodule/rsaz-avx2.S", ++ "apple-x86_64/crypto/fipsmodule/sha1-x86_64.S", ++ "apple-x86_64/crypto/fipsmodule/sha256-x86_64.S", ++ "apple-x86_64/crypto/fipsmodule/sha512-x86_64.S", ++ "apple-x86_64/crypto/fipsmodule/vpaes-x86_64.S", ++ "apple-x86_64/crypto/fipsmodule/x86_64-mont.S", ++ "apple-x86_64/crypto/fipsmodule/x86_64-mont5.S", ++ "apple-x86_64/crypto/test/trampoline-x86_64.S", + ] + + crypto_sources_linux_aarch64 = [ +@@ -621,44 +660,6 @@ crypto_sources_linux_x86_64 = [ + "src/crypto/hrss/asm/poly_rq_mul.S", + ] + +-crypto_sources_mac_x86 = [ +- "mac-x86/crypto/chacha/chacha-x86.S", +- "mac-x86/crypto/fipsmodule/aesni-x86.S", +- "mac-x86/crypto/fipsmodule/bn-586.S", +- "mac-x86/crypto/fipsmodule/co-586.S", +- "mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S", +- "mac-x86/crypto/fipsmodule/ghash-x86.S", +- "mac-x86/crypto/fipsmodule/md5-586.S", +- "mac-x86/crypto/fipsmodule/sha1-586.S", +- "mac-x86/crypto/fipsmodule/sha256-586.S", +- "mac-x86/crypto/fipsmodule/sha512-586.S", +- "mac-x86/crypto/fipsmodule/vpaes-x86.S", +- "mac-x86/crypto/fipsmodule/x86-mont.S", +- "mac-x86/crypto/test/trampoline-x86.S", +-] +- +-crypto_sources_mac_x86_64 = [ +- "mac-x86_64/crypto/chacha/chacha-x86_64.S", +- "mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S", +- "mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S", +- "mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S", +- "mac-x86_64/crypto/fipsmodule/aesni-x86_64.S", +- "mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S", +- "mac-x86_64/crypto/fipsmodule/ghash-x86_64.S", +- "mac-x86_64/crypto/fipsmodule/md5-x86_64.S", +- "mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S", +- "mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S", +- "mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S", +- "mac-x86_64/crypto/fipsmodule/rsaz-avx2.S", +- "mac-x86_64/crypto/fipsmodule/sha1-x86_64.S", +- "mac-x86_64/crypto/fipsmodule/sha256-x86_64.S", +- "mac-x86_64/crypto/fipsmodule/sha512-x86_64.S", +- "mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S", +- "mac-x86_64/crypto/fipsmodule/x86_64-mont.S", +- "mac-x86_64/crypto/fipsmodule/x86_64-mont5.S", +- "mac-x86_64/crypto/test/trampoline-x86_64.S", +-] +- + crypto_sources_win_aarch64 = [ + "win-aarch64/crypto/chacha/chacha-armv8.S", + "win-aarch64/crypto/fipsmodule/aesv8-armx64.S", +diff --git a/BUILD.generated_tests.bzl b/BUILD.generated_tests.bzl +index 532ca40..51a5ea4 100644 +--- a/BUILD.generated_tests.bzl ++++ b/BUILD.generated_tests.bzl +@@ -12,6 +12,7 @@ test_support_sources = [ + "src/crypto/cpu_arm_linux.h", + "src/crypto/curve25519/curve25519_tables.h", + "src/crypto/curve25519/internal.h", ++ "src/crypto/des/internal.h", + "src/crypto/dsa/internal.h", + "src/crypto/ec_extra/internal.h", + "src/crypto/err/internal.h", +@@ -21,7 +22,7 @@ test_support_sources = [ + "src/crypto/fipsmodule/bn/rsaz_exp.h", + "src/crypto/fipsmodule/cipher/internal.h", + "src/crypto/fipsmodule/delocate.h", +- "src/crypto/fipsmodule/des/internal.h", ++ "src/crypto/fipsmodule/dh/internal.h", + "src/crypto/fipsmodule/digest/internal.h", + "src/crypto/fipsmodule/digest/md32_common.h", + "src/crypto/fipsmodule/ec/internal.h", +diff --git a/android-sources.cmake b/android-sources.cmake +index 841eed9..15079b3 100644 +--- a/android-sources.cmake ++++ b/android-sources.cmake +@@ -75,6 +75,7 @@ set(crypto_sources + ${BORINGSSL_ROOT}src/crypto/cipher_extra/e_aesctrhmac.c + ${BORINGSSL_ROOT}src/crypto/cipher_extra/e_aesgcmsiv.c + ${BORINGSSL_ROOT}src/crypto/cipher_extra/e_chacha20poly1305.c ++ ${BORINGSSL_ROOT}src/crypto/cipher_extra/e_des.c + ${BORINGSSL_ROOT}src/crypto/cipher_extra/e_null.c + ${BORINGSSL_ROOT}src/crypto/cipher_extra/e_rc2.c + ${BORINGSSL_ROOT}src/crypto/cipher_extra/e_rc4.c +@@ -93,6 +94,7 @@ set(crypto_sources + ${BORINGSSL_ROOT}src/crypto/crypto.c + ${BORINGSSL_ROOT}src/crypto/curve25519/curve25519.c + ${BORINGSSL_ROOT}src/crypto/curve25519/spake25519.c ++ ${BORINGSSL_ROOT}src/crypto/des/des.c + ${BORINGSSL_ROOT}src/crypto/dh_extra/dh_asn1.c + ${BORINGSSL_ROOT}src/crypto/dh_extra/params.c + ${BORINGSSL_ROOT}src/crypto/digest_extra/digest_extra.c +@@ -388,30 +390,66 @@ set(ssl_test_sources + ${BORINGSSL_ROOT}src/ssl/ssl_c_test.c + ${BORINGSSL_ROOT}src/ssl/ssl_test.cc + ) +-set(crypto_sources_ios_aarch64 +- ${BORINGSSL_ROOT}ios-aarch64/crypto/chacha/chacha-armv8.S +- ${BORINGSSL_ROOT}ios-aarch64/crypto/fipsmodule/aesv8-armx64.S +- ${BORINGSSL_ROOT}ios-aarch64/crypto/fipsmodule/armv8-mont.S +- ${BORINGSSL_ROOT}ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S +- ${BORINGSSL_ROOT}ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S +- ${BORINGSSL_ROOT}ios-aarch64/crypto/fipsmodule/sha1-armv8.S +- ${BORINGSSL_ROOT}ios-aarch64/crypto/fipsmodule/sha256-armv8.S +- ${BORINGSSL_ROOT}ios-aarch64/crypto/fipsmodule/sha512-armv8.S +- ${BORINGSSL_ROOT}ios-aarch64/crypto/fipsmodule/vpaes-armv8.S +- ${BORINGSSL_ROOT}ios-aarch64/crypto/test/trampoline-armv8.S ++set(crypto_sources_apple_aarch64 ++ ${BORINGSSL_ROOT}apple-aarch64/crypto/chacha/chacha-armv8.S ++ ${BORINGSSL_ROOT}apple-aarch64/crypto/fipsmodule/aesv8-armx64.S ++ ${BORINGSSL_ROOT}apple-aarch64/crypto/fipsmodule/armv8-mont.S ++ ${BORINGSSL_ROOT}apple-aarch64/crypto/fipsmodule/ghash-neon-armv8.S ++ ${BORINGSSL_ROOT}apple-aarch64/crypto/fipsmodule/ghashv8-armx64.S ++ ${BORINGSSL_ROOT}apple-aarch64/crypto/fipsmodule/sha1-armv8.S ++ ${BORINGSSL_ROOT}apple-aarch64/crypto/fipsmodule/sha256-armv8.S ++ ${BORINGSSL_ROOT}apple-aarch64/crypto/fipsmodule/sha512-armv8.S ++ ${BORINGSSL_ROOT}apple-aarch64/crypto/fipsmodule/vpaes-armv8.S ++ ${BORINGSSL_ROOT}apple-aarch64/crypto/test/trampoline-armv8.S + ) +-set(crypto_sources_ios_arm +- ${BORINGSSL_ROOT}ios-arm/crypto/chacha/chacha-armv4.S +- ${BORINGSSL_ROOT}ios-arm/crypto/fipsmodule/aesv8-armx32.S +- ${BORINGSSL_ROOT}ios-arm/crypto/fipsmodule/armv4-mont.S +- ${BORINGSSL_ROOT}ios-arm/crypto/fipsmodule/bsaes-armv7.S +- ${BORINGSSL_ROOT}ios-arm/crypto/fipsmodule/ghash-armv4.S +- ${BORINGSSL_ROOT}ios-arm/crypto/fipsmodule/ghashv8-armx32.S +- ${BORINGSSL_ROOT}ios-arm/crypto/fipsmodule/sha1-armv4-large.S +- ${BORINGSSL_ROOT}ios-arm/crypto/fipsmodule/sha256-armv4.S +- ${BORINGSSL_ROOT}ios-arm/crypto/fipsmodule/sha512-armv4.S +- ${BORINGSSL_ROOT}ios-arm/crypto/fipsmodule/vpaes-armv7.S +- ${BORINGSSL_ROOT}ios-arm/crypto/test/trampoline-armv4.S ++set(crypto_sources_apple_arm ++ ${BORINGSSL_ROOT}apple-arm/crypto/chacha/chacha-armv4.S ++ ${BORINGSSL_ROOT}apple-arm/crypto/fipsmodule/aesv8-armx32.S ++ ${BORINGSSL_ROOT}apple-arm/crypto/fipsmodule/armv4-mont.S ++ ${BORINGSSL_ROOT}apple-arm/crypto/fipsmodule/bsaes-armv7.S ++ ${BORINGSSL_ROOT}apple-arm/crypto/fipsmodule/ghash-armv4.S ++ ${BORINGSSL_ROOT}apple-arm/crypto/fipsmodule/ghashv8-armx32.S ++ ${BORINGSSL_ROOT}apple-arm/crypto/fipsmodule/sha1-armv4-large.S ++ ${BORINGSSL_ROOT}apple-arm/crypto/fipsmodule/sha256-armv4.S ++ ${BORINGSSL_ROOT}apple-arm/crypto/fipsmodule/sha512-armv4.S ++ ${BORINGSSL_ROOT}apple-arm/crypto/fipsmodule/vpaes-armv7.S ++ ${BORINGSSL_ROOT}apple-arm/crypto/test/trampoline-armv4.S ++) ++set(crypto_sources_apple_x86 ++ ${BORINGSSL_ROOT}apple-x86/crypto/chacha/chacha-x86.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/fipsmodule/aesni-x86.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/fipsmodule/bn-586.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/fipsmodule/co-586.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/fipsmodule/ghash-ssse3-x86.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/fipsmodule/ghash-x86.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/fipsmodule/md5-586.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/fipsmodule/sha1-586.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/fipsmodule/sha256-586.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/fipsmodule/sha512-586.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/fipsmodule/vpaes-x86.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/fipsmodule/x86-mont.S ++ ${BORINGSSL_ROOT}apple-x86/crypto/test/trampoline-x86.S ++) ++set(crypto_sources_apple_x86_64 ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/chacha/chacha-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/aesni-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/ghash-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/md5-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/p256-x86_64-asm.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/rdrand-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/rsaz-avx2.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/sha1-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/sha256-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/sha512-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/vpaes-x86_64.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/x86_64-mont.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/fipsmodule/x86_64-mont5.S ++ ${BORINGSSL_ROOT}apple-x86_64/crypto/test/trampoline-x86_64.S + ) + set(crypto_sources_linux_aarch64 + ${BORINGSSL_ROOT}linux-aarch64/crypto/chacha/chacha-armv8.S +@@ -482,42 +520,6 @@ set(crypto_sources_linux_x86_64 + ${BORINGSSL_ROOT}linux-x86_64/crypto/test/trampoline-x86_64.S + ${BORINGSSL_ROOT}src/crypto/hrss/asm/poly_rq_mul.S + ) +-set(crypto_sources_mac_x86 +- ${BORINGSSL_ROOT}mac-x86/crypto/chacha/chacha-x86.S +- ${BORINGSSL_ROOT}mac-x86/crypto/fipsmodule/aesni-x86.S +- ${BORINGSSL_ROOT}mac-x86/crypto/fipsmodule/bn-586.S +- ${BORINGSSL_ROOT}mac-x86/crypto/fipsmodule/co-586.S +- ${BORINGSSL_ROOT}mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S +- ${BORINGSSL_ROOT}mac-x86/crypto/fipsmodule/ghash-x86.S +- ${BORINGSSL_ROOT}mac-x86/crypto/fipsmodule/md5-586.S +- ${BORINGSSL_ROOT}mac-x86/crypto/fipsmodule/sha1-586.S +- ${BORINGSSL_ROOT}mac-x86/crypto/fipsmodule/sha256-586.S +- ${BORINGSSL_ROOT}mac-x86/crypto/fipsmodule/sha512-586.S +- ${BORINGSSL_ROOT}mac-x86/crypto/fipsmodule/vpaes-x86.S +- ${BORINGSSL_ROOT}mac-x86/crypto/fipsmodule/x86-mont.S +- ${BORINGSSL_ROOT}mac-x86/crypto/test/trampoline-x86.S +-) +-set(crypto_sources_mac_x86_64 +- ${BORINGSSL_ROOT}mac-x86_64/crypto/chacha/chacha-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/aesni-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/ghash-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/md5-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/rsaz-avx2.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/sha1-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/sha256-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/sha512-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/x86_64-mont.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/fipsmodule/x86_64-mont5.S +- ${BORINGSSL_ROOT}mac-x86_64/crypto/test/trampoline-x86_64.S +-) + set(crypto_sources_win_aarch64 + ${BORINGSSL_ROOT}win-aarch64/crypto/chacha/chacha-armv8.S + ${BORINGSSL_ROOT}win-aarch64/crypto/fipsmodule/aesv8-armx64.S +diff --git a/apple-aarch64/crypto/chacha/chacha-armv8.S b/apple-aarch64/crypto/chacha/chacha-armv8.S +new file mode 100644 +index 0000000..dd992a2 +--- /dev/null ++++ b/apple-aarch64/crypto/chacha/chacha-armv8.S +@@ -0,0 +1,1992 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++ ++.private_extern _OPENSSL_armcap_P ++ ++.section __TEXT,__const ++ ++.align 5 ++Lsigma: ++.quad 0x3320646e61707865,0x6b20657479622d32 // endian-neutral ++Lone: ++.long 1,0,0,0 ++.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++ ++.text ++ ++.globl _ChaCha20_ctr32 ++.private_extern _ChaCha20_ctr32 ++ ++.align 5 ++_ChaCha20_ctr32: ++ AARCH64_VALID_CALL_TARGET ++ cbz x2,Labort ++#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 ++ adrp x5,:pg_hi21_nc:_OPENSSL_armcap_P ++#else ++ adrp x5,_OPENSSL_armcap_P@PAGE ++#endif ++ cmp x2,#192 ++ b.lo Lshort ++ ldr w17,[x5,_OPENSSL_armcap_P@PAGEOFF] ++ tst w17,#ARMV7_NEON ++ b.ne ChaCha20_neon ++ ++Lshort: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-96]! ++ add x29,sp,#0 ++ ++ adrp x5,Lsigma@PAGE ++ add x5,x5,Lsigma@PAGEOFF ++ stp x19,x20,[sp,#16] ++ stp x21,x22,[sp,#32] ++ stp x23,x24,[sp,#48] ++ stp x25,x26,[sp,#64] ++ stp x27,x28,[sp,#80] ++ sub sp,sp,#64 ++ ++ ldp x22,x23,[x5] // load sigma ++ ldp x24,x25,[x3] // load key ++ ldp x26,x27,[x3,#16] ++ ldp x28,x30,[x4] // load counter ++#ifdef __AARCH64EB__ ++ ror x24,x24,#32 ++ ror x25,x25,#32 ++ ror x26,x26,#32 ++ ror x27,x27,#32 ++ ror x28,x28,#32 ++ ror x30,x30,#32 ++#endif ++ ++Loop_outer: ++ mov w5,w22 // unpack key block ++ lsr x6,x22,#32 ++ mov w7,w23 ++ lsr x8,x23,#32 ++ mov w9,w24 ++ lsr x10,x24,#32 ++ mov w11,w25 ++ lsr x12,x25,#32 ++ mov w13,w26 ++ lsr x14,x26,#32 ++ mov w15,w27 ++ lsr x16,x27,#32 ++ mov w17,w28 ++ lsr x19,x28,#32 ++ mov w20,w30 ++ lsr x21,x30,#32 ++ ++ mov x4,#10 ++ subs x2,x2,#64 ++Loop: ++ sub x4,x4,#1 ++ add w5,w5,w9 ++ add w6,w6,w10 ++ add w7,w7,w11 ++ add w8,w8,w12 ++ eor w17,w17,w5 ++ eor w19,w19,w6 ++ eor w20,w20,w7 ++ eor w21,w21,w8 ++ ror w17,w17,#16 ++ ror w19,w19,#16 ++ ror w20,w20,#16 ++ ror w21,w21,#16 ++ add w13,w13,w17 ++ add w14,w14,w19 ++ add w15,w15,w20 ++ add w16,w16,w21 ++ eor w9,w9,w13 ++ eor w10,w10,w14 ++ eor w11,w11,w15 ++ eor w12,w12,w16 ++ ror w9,w9,#20 ++ ror w10,w10,#20 ++ ror w11,w11,#20 ++ ror w12,w12,#20 ++ add w5,w5,w9 ++ add w6,w6,w10 ++ add w7,w7,w11 ++ add w8,w8,w12 ++ eor w17,w17,w5 ++ eor w19,w19,w6 ++ eor w20,w20,w7 ++ eor w21,w21,w8 ++ ror w17,w17,#24 ++ ror w19,w19,#24 ++ ror w20,w20,#24 ++ ror w21,w21,#24 ++ add w13,w13,w17 ++ add w14,w14,w19 ++ add w15,w15,w20 ++ add w16,w16,w21 ++ eor w9,w9,w13 ++ eor w10,w10,w14 ++ eor w11,w11,w15 ++ eor w12,w12,w16 ++ ror w9,w9,#25 ++ ror w10,w10,#25 ++ ror w11,w11,#25 ++ ror w12,w12,#25 ++ add w5,w5,w10 ++ add w6,w6,w11 ++ add w7,w7,w12 ++ add w8,w8,w9 ++ eor w21,w21,w5 ++ eor w17,w17,w6 ++ eor w19,w19,w7 ++ eor w20,w20,w8 ++ ror w21,w21,#16 ++ ror w17,w17,#16 ++ ror w19,w19,#16 ++ ror w20,w20,#16 ++ add w15,w15,w21 ++ add w16,w16,w17 ++ add w13,w13,w19 ++ add w14,w14,w20 ++ eor w10,w10,w15 ++ eor w11,w11,w16 ++ eor w12,w12,w13 ++ eor w9,w9,w14 ++ ror w10,w10,#20 ++ ror w11,w11,#20 ++ ror w12,w12,#20 ++ ror w9,w9,#20 ++ add w5,w5,w10 ++ add w6,w6,w11 ++ add w7,w7,w12 ++ add w8,w8,w9 ++ eor w21,w21,w5 ++ eor w17,w17,w6 ++ eor w19,w19,w7 ++ eor w20,w20,w8 ++ ror w21,w21,#24 ++ ror w17,w17,#24 ++ ror w19,w19,#24 ++ ror w20,w20,#24 ++ add w15,w15,w21 ++ add w16,w16,w17 ++ add w13,w13,w19 ++ add w14,w14,w20 ++ eor w10,w10,w15 ++ eor w11,w11,w16 ++ eor w12,w12,w13 ++ eor w9,w9,w14 ++ ror w10,w10,#25 ++ ror w11,w11,#25 ++ ror w12,w12,#25 ++ ror w9,w9,#25 ++ cbnz x4,Loop ++ ++ add w5,w5,w22 // accumulate key block ++ add x6,x6,x22,lsr#32 ++ add w7,w7,w23 ++ add x8,x8,x23,lsr#32 ++ add w9,w9,w24 ++ add x10,x10,x24,lsr#32 ++ add w11,w11,w25 ++ add x12,x12,x25,lsr#32 ++ add w13,w13,w26 ++ add x14,x14,x26,lsr#32 ++ add w15,w15,w27 ++ add x16,x16,x27,lsr#32 ++ add w17,w17,w28 ++ add x19,x19,x28,lsr#32 ++ add w20,w20,w30 ++ add x21,x21,x30,lsr#32 ++ ++ b.lo Ltail ++ ++ add x5,x5,x6,lsl#32 // pack ++ add x7,x7,x8,lsl#32 ++ ldp x6,x8,[x1,#0] // load input ++ add x9,x9,x10,lsl#32 ++ add x11,x11,x12,lsl#32 ++ ldp x10,x12,[x1,#16] ++ add x13,x13,x14,lsl#32 ++ add x15,x15,x16,lsl#32 ++ ldp x14,x16,[x1,#32] ++ add x17,x17,x19,lsl#32 ++ add x20,x20,x21,lsl#32 ++ ldp x19,x21,[x1,#48] ++ add x1,x1,#64 ++#ifdef __AARCH64EB__ ++ rev x5,x5 ++ rev x7,x7 ++ rev x9,x9 ++ rev x11,x11 ++ rev x13,x13 ++ rev x15,x15 ++ rev x17,x17 ++ rev x20,x20 ++#endif ++ eor x5,x5,x6 ++ eor x7,x7,x8 ++ eor x9,x9,x10 ++ eor x11,x11,x12 ++ eor x13,x13,x14 ++ eor x15,x15,x16 ++ eor x17,x17,x19 ++ eor x20,x20,x21 ++ ++ stp x5,x7,[x0,#0] // store output ++ add x28,x28,#1 // increment counter ++ stp x9,x11,[x0,#16] ++ stp x13,x15,[x0,#32] ++ stp x17,x20,[x0,#48] ++ add x0,x0,#64 ++ ++ b.hi Loop_outer ++ ++ ldp x19,x20,[x29,#16] ++ add sp,sp,#64 ++ ldp x21,x22,[x29,#32] ++ ldp x23,x24,[x29,#48] ++ ldp x25,x26,[x29,#64] ++ ldp x27,x28,[x29,#80] ++ ldp x29,x30,[sp],#96 ++ AARCH64_VALIDATE_LINK_REGISTER ++Labort: ++ ret ++ ++.align 4 ++Ltail: ++ add x2,x2,#64 ++Less_than_64: ++ sub x0,x0,#1 ++ add x1,x1,x2 ++ add x0,x0,x2 ++ add x4,sp,x2 ++ neg x2,x2 ++ ++ add x5,x5,x6,lsl#32 // pack ++ add x7,x7,x8,lsl#32 ++ add x9,x9,x10,lsl#32 ++ add x11,x11,x12,lsl#32 ++ add x13,x13,x14,lsl#32 ++ add x15,x15,x16,lsl#32 ++ add x17,x17,x19,lsl#32 ++ add x20,x20,x21,lsl#32 ++#ifdef __AARCH64EB__ ++ rev x5,x5 ++ rev x7,x7 ++ rev x9,x9 ++ rev x11,x11 ++ rev x13,x13 ++ rev x15,x15 ++ rev x17,x17 ++ rev x20,x20 ++#endif ++ stp x5,x7,[sp,#0] ++ stp x9,x11,[sp,#16] ++ stp x13,x15,[sp,#32] ++ stp x17,x20,[sp,#48] ++ ++Loop_tail: ++ ldrb w10,[x1,x2] ++ ldrb w11,[x4,x2] ++ add x2,x2,#1 ++ eor w10,w10,w11 ++ strb w10,[x0,x2] ++ cbnz x2,Loop_tail ++ ++ stp xzr,xzr,[sp,#0] ++ stp xzr,xzr,[sp,#16] ++ stp xzr,xzr,[sp,#32] ++ stp xzr,xzr,[sp,#48] ++ ++ ldp x19,x20,[x29,#16] ++ add sp,sp,#64 ++ ldp x21,x22,[x29,#32] ++ ldp x23,x24,[x29,#48] ++ ldp x25,x26,[x29,#64] ++ ldp x27,x28,[x29,#80] ++ ldp x29,x30,[sp],#96 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++ ++.align 5 ++ChaCha20_neon: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-96]! ++ add x29,sp,#0 ++ ++ adrp x5,Lsigma@PAGE ++ add x5,x5,Lsigma@PAGEOFF ++ stp x19,x20,[sp,#16] ++ stp x21,x22,[sp,#32] ++ stp x23,x24,[sp,#48] ++ stp x25,x26,[sp,#64] ++ stp x27,x28,[sp,#80] ++ cmp x2,#512 ++ b.hs L512_or_more_neon ++ ++ sub sp,sp,#64 ++ ++ ldp x22,x23,[x5] // load sigma ++ ld1 {v24.4s},[x5],#16 ++ ldp x24,x25,[x3] // load key ++ ldp x26,x27,[x3,#16] ++ ld1 {v25.4s,v26.4s},[x3] ++ ldp x28,x30,[x4] // load counter ++ ld1 {v27.4s},[x4] ++ ld1 {v31.4s},[x5] ++#ifdef __AARCH64EB__ ++ rev64 v24.4s,v24.4s ++ ror x24,x24,#32 ++ ror x25,x25,#32 ++ ror x26,x26,#32 ++ ror x27,x27,#32 ++ ror x28,x28,#32 ++ ror x30,x30,#32 ++#endif ++ add v27.4s,v27.4s,v31.4s // += 1 ++ add v28.4s,v27.4s,v31.4s ++ add v29.4s,v28.4s,v31.4s ++ shl v31.4s,v31.4s,#2 // 1 -> 4 ++ ++Loop_outer_neon: ++ mov w5,w22 // unpack key block ++ lsr x6,x22,#32 ++ mov v0.16b,v24.16b ++ mov w7,w23 ++ lsr x8,x23,#32 ++ mov v4.16b,v24.16b ++ mov w9,w24 ++ lsr x10,x24,#32 ++ mov v16.16b,v24.16b ++ mov w11,w25 ++ mov v1.16b,v25.16b ++ lsr x12,x25,#32 ++ mov v5.16b,v25.16b ++ mov w13,w26 ++ mov v17.16b,v25.16b ++ lsr x14,x26,#32 ++ mov v3.16b,v27.16b ++ mov w15,w27 ++ mov v7.16b,v28.16b ++ lsr x16,x27,#32 ++ mov v19.16b,v29.16b ++ mov w17,w28 ++ mov v2.16b,v26.16b ++ lsr x19,x28,#32 ++ mov v6.16b,v26.16b ++ mov w20,w30 ++ mov v18.16b,v26.16b ++ lsr x21,x30,#32 ++ ++ mov x4,#10 ++ subs x2,x2,#256 ++Loop_neon: ++ sub x4,x4,#1 ++ add v0.4s,v0.4s,v1.4s ++ add w5,w5,w9 ++ add v4.4s,v4.4s,v5.4s ++ add w6,w6,w10 ++ add v16.4s,v16.4s,v17.4s ++ add w7,w7,w11 ++ eor v3.16b,v3.16b,v0.16b ++ add w8,w8,w12 ++ eor v7.16b,v7.16b,v4.16b ++ eor w17,w17,w5 ++ eor v19.16b,v19.16b,v16.16b ++ eor w19,w19,w6 ++ rev32 v3.8h,v3.8h ++ eor w20,w20,w7 ++ rev32 v7.8h,v7.8h ++ eor w21,w21,w8 ++ rev32 v19.8h,v19.8h ++ ror w17,w17,#16 ++ add v2.4s,v2.4s,v3.4s ++ ror w19,w19,#16 ++ add v6.4s,v6.4s,v7.4s ++ ror w20,w20,#16 ++ add v18.4s,v18.4s,v19.4s ++ ror w21,w21,#16 ++ eor v20.16b,v1.16b,v2.16b ++ add w13,w13,w17 ++ eor v21.16b,v5.16b,v6.16b ++ add w14,w14,w19 ++ eor v22.16b,v17.16b,v18.16b ++ add w15,w15,w20 ++ ushr v1.4s,v20.4s,#20 ++ add w16,w16,w21 ++ ushr v5.4s,v21.4s,#20 ++ eor w9,w9,w13 ++ ushr v17.4s,v22.4s,#20 ++ eor w10,w10,w14 ++ sli v1.4s,v20.4s,#12 ++ eor w11,w11,w15 ++ sli v5.4s,v21.4s,#12 ++ eor w12,w12,w16 ++ sli v17.4s,v22.4s,#12 ++ ror w9,w9,#20 ++ add v0.4s,v0.4s,v1.4s ++ ror w10,w10,#20 ++ add v4.4s,v4.4s,v5.4s ++ ror w11,w11,#20 ++ add v16.4s,v16.4s,v17.4s ++ ror w12,w12,#20 ++ eor v20.16b,v3.16b,v0.16b ++ add w5,w5,w9 ++ eor v21.16b,v7.16b,v4.16b ++ add w6,w6,w10 ++ eor v22.16b,v19.16b,v16.16b ++ add w7,w7,w11 ++ ushr v3.4s,v20.4s,#24 ++ add w8,w8,w12 ++ ushr v7.4s,v21.4s,#24 ++ eor w17,w17,w5 ++ ushr v19.4s,v22.4s,#24 ++ eor w19,w19,w6 ++ sli v3.4s,v20.4s,#8 ++ eor w20,w20,w7 ++ sli v7.4s,v21.4s,#8 ++ eor w21,w21,w8 ++ sli v19.4s,v22.4s,#8 ++ ror w17,w17,#24 ++ add v2.4s,v2.4s,v3.4s ++ ror w19,w19,#24 ++ add v6.4s,v6.4s,v7.4s ++ ror w20,w20,#24 ++ add v18.4s,v18.4s,v19.4s ++ ror w21,w21,#24 ++ eor v20.16b,v1.16b,v2.16b ++ add w13,w13,w17 ++ eor v21.16b,v5.16b,v6.16b ++ add w14,w14,w19 ++ eor v22.16b,v17.16b,v18.16b ++ add w15,w15,w20 ++ ushr v1.4s,v20.4s,#25 ++ add w16,w16,w21 ++ ushr v5.4s,v21.4s,#25 ++ eor w9,w9,w13 ++ ushr v17.4s,v22.4s,#25 ++ eor w10,w10,w14 ++ sli v1.4s,v20.4s,#7 ++ eor w11,w11,w15 ++ sli v5.4s,v21.4s,#7 ++ eor w12,w12,w16 ++ sli v17.4s,v22.4s,#7 ++ ror w9,w9,#25 ++ ext v2.16b,v2.16b,v2.16b,#8 ++ ror w10,w10,#25 ++ ext v6.16b,v6.16b,v6.16b,#8 ++ ror w11,w11,#25 ++ ext v18.16b,v18.16b,v18.16b,#8 ++ ror w12,w12,#25 ++ ext v3.16b,v3.16b,v3.16b,#12 ++ ext v7.16b,v7.16b,v7.16b,#12 ++ ext v19.16b,v19.16b,v19.16b,#12 ++ ext v1.16b,v1.16b,v1.16b,#4 ++ ext v5.16b,v5.16b,v5.16b,#4 ++ ext v17.16b,v17.16b,v17.16b,#4 ++ add v0.4s,v0.4s,v1.4s ++ add w5,w5,w10 ++ add v4.4s,v4.4s,v5.4s ++ add w6,w6,w11 ++ add v16.4s,v16.4s,v17.4s ++ add w7,w7,w12 ++ eor v3.16b,v3.16b,v0.16b ++ add w8,w8,w9 ++ eor v7.16b,v7.16b,v4.16b ++ eor w21,w21,w5 ++ eor v19.16b,v19.16b,v16.16b ++ eor w17,w17,w6 ++ rev32 v3.8h,v3.8h ++ eor w19,w19,w7 ++ rev32 v7.8h,v7.8h ++ eor w20,w20,w8 ++ rev32 v19.8h,v19.8h ++ ror w21,w21,#16 ++ add v2.4s,v2.4s,v3.4s ++ ror w17,w17,#16 ++ add v6.4s,v6.4s,v7.4s ++ ror w19,w19,#16 ++ add v18.4s,v18.4s,v19.4s ++ ror w20,w20,#16 ++ eor v20.16b,v1.16b,v2.16b ++ add w15,w15,w21 ++ eor v21.16b,v5.16b,v6.16b ++ add w16,w16,w17 ++ eor v22.16b,v17.16b,v18.16b ++ add w13,w13,w19 ++ ushr v1.4s,v20.4s,#20 ++ add w14,w14,w20 ++ ushr v5.4s,v21.4s,#20 ++ eor w10,w10,w15 ++ ushr v17.4s,v22.4s,#20 ++ eor w11,w11,w16 ++ sli v1.4s,v20.4s,#12 ++ eor w12,w12,w13 ++ sli v5.4s,v21.4s,#12 ++ eor w9,w9,w14 ++ sli v17.4s,v22.4s,#12 ++ ror w10,w10,#20 ++ add v0.4s,v0.4s,v1.4s ++ ror w11,w11,#20 ++ add v4.4s,v4.4s,v5.4s ++ ror w12,w12,#20 ++ add v16.4s,v16.4s,v17.4s ++ ror w9,w9,#20 ++ eor v20.16b,v3.16b,v0.16b ++ add w5,w5,w10 ++ eor v21.16b,v7.16b,v4.16b ++ add w6,w6,w11 ++ eor v22.16b,v19.16b,v16.16b ++ add w7,w7,w12 ++ ushr v3.4s,v20.4s,#24 ++ add w8,w8,w9 ++ ushr v7.4s,v21.4s,#24 ++ eor w21,w21,w5 ++ ushr v19.4s,v22.4s,#24 ++ eor w17,w17,w6 ++ sli v3.4s,v20.4s,#8 ++ eor w19,w19,w7 ++ sli v7.4s,v21.4s,#8 ++ eor w20,w20,w8 ++ sli v19.4s,v22.4s,#8 ++ ror w21,w21,#24 ++ add v2.4s,v2.4s,v3.4s ++ ror w17,w17,#24 ++ add v6.4s,v6.4s,v7.4s ++ ror w19,w19,#24 ++ add v18.4s,v18.4s,v19.4s ++ ror w20,w20,#24 ++ eor v20.16b,v1.16b,v2.16b ++ add w15,w15,w21 ++ eor v21.16b,v5.16b,v6.16b ++ add w16,w16,w17 ++ eor v22.16b,v17.16b,v18.16b ++ add w13,w13,w19 ++ ushr v1.4s,v20.4s,#25 ++ add w14,w14,w20 ++ ushr v5.4s,v21.4s,#25 ++ eor w10,w10,w15 ++ ushr v17.4s,v22.4s,#25 ++ eor w11,w11,w16 ++ sli v1.4s,v20.4s,#7 ++ eor w12,w12,w13 ++ sli v5.4s,v21.4s,#7 ++ eor w9,w9,w14 ++ sli v17.4s,v22.4s,#7 ++ ror w10,w10,#25 ++ ext v2.16b,v2.16b,v2.16b,#8 ++ ror w11,w11,#25 ++ ext v6.16b,v6.16b,v6.16b,#8 ++ ror w12,w12,#25 ++ ext v18.16b,v18.16b,v18.16b,#8 ++ ror w9,w9,#25 ++ ext v3.16b,v3.16b,v3.16b,#4 ++ ext v7.16b,v7.16b,v7.16b,#4 ++ ext v19.16b,v19.16b,v19.16b,#4 ++ ext v1.16b,v1.16b,v1.16b,#12 ++ ext v5.16b,v5.16b,v5.16b,#12 ++ ext v17.16b,v17.16b,v17.16b,#12 ++ cbnz x4,Loop_neon ++ ++ add w5,w5,w22 // accumulate key block ++ add v0.4s,v0.4s,v24.4s ++ add x6,x6,x22,lsr#32 ++ add v4.4s,v4.4s,v24.4s ++ add w7,w7,w23 ++ add v16.4s,v16.4s,v24.4s ++ add x8,x8,x23,lsr#32 ++ add v2.4s,v2.4s,v26.4s ++ add w9,w9,w24 ++ add v6.4s,v6.4s,v26.4s ++ add x10,x10,x24,lsr#32 ++ add v18.4s,v18.4s,v26.4s ++ add w11,w11,w25 ++ add v3.4s,v3.4s,v27.4s ++ add x12,x12,x25,lsr#32 ++ add w13,w13,w26 ++ add v7.4s,v7.4s,v28.4s ++ add x14,x14,x26,lsr#32 ++ add w15,w15,w27 ++ add v19.4s,v19.4s,v29.4s ++ add x16,x16,x27,lsr#32 ++ add w17,w17,w28 ++ add v1.4s,v1.4s,v25.4s ++ add x19,x19,x28,lsr#32 ++ add w20,w20,w30 ++ add v5.4s,v5.4s,v25.4s ++ add x21,x21,x30,lsr#32 ++ add v17.4s,v17.4s,v25.4s ++ ++ b.lo Ltail_neon ++ ++ add x5,x5,x6,lsl#32 // pack ++ add x7,x7,x8,lsl#32 ++ ldp x6,x8,[x1,#0] // load input ++ add x9,x9,x10,lsl#32 ++ add x11,x11,x12,lsl#32 ++ ldp x10,x12,[x1,#16] ++ add x13,x13,x14,lsl#32 ++ add x15,x15,x16,lsl#32 ++ ldp x14,x16,[x1,#32] ++ add x17,x17,x19,lsl#32 ++ add x20,x20,x21,lsl#32 ++ ldp x19,x21,[x1,#48] ++ add x1,x1,#64 ++#ifdef __AARCH64EB__ ++ rev x5,x5 ++ rev x7,x7 ++ rev x9,x9 ++ rev x11,x11 ++ rev x13,x13 ++ rev x15,x15 ++ rev x17,x17 ++ rev x20,x20 ++#endif ++ ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 ++ eor x5,x5,x6 ++ eor x7,x7,x8 ++ eor x9,x9,x10 ++ eor x11,x11,x12 ++ eor x13,x13,x14 ++ eor v0.16b,v0.16b,v20.16b ++ eor x15,x15,x16 ++ eor v1.16b,v1.16b,v21.16b ++ eor x17,x17,x19 ++ eor v2.16b,v2.16b,v22.16b ++ eor x20,x20,x21 ++ eor v3.16b,v3.16b,v23.16b ++ ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 ++ ++ stp x5,x7,[x0,#0] // store output ++ add x28,x28,#4 // increment counter ++ stp x9,x11,[x0,#16] ++ add v27.4s,v27.4s,v31.4s // += 4 ++ stp x13,x15,[x0,#32] ++ add v28.4s,v28.4s,v31.4s ++ stp x17,x20,[x0,#48] ++ add v29.4s,v29.4s,v31.4s ++ add x0,x0,#64 ++ ++ st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 ++ ld1 {v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 ++ ++ eor v4.16b,v4.16b,v20.16b ++ eor v5.16b,v5.16b,v21.16b ++ eor v6.16b,v6.16b,v22.16b ++ eor v7.16b,v7.16b,v23.16b ++ st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 ++ ++ eor v16.16b,v16.16b,v0.16b ++ eor v17.16b,v17.16b,v1.16b ++ eor v18.16b,v18.16b,v2.16b ++ eor v19.16b,v19.16b,v3.16b ++ st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 ++ ++ b.hi Loop_outer_neon ++ ++ ldp x19,x20,[x29,#16] ++ add sp,sp,#64 ++ ldp x21,x22,[x29,#32] ++ ldp x23,x24,[x29,#48] ++ ldp x25,x26,[x29,#64] ++ ldp x27,x28,[x29,#80] ++ ldp x29,x30,[sp],#96 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++Ltail_neon: ++ add x2,x2,#256 ++ cmp x2,#64 ++ b.lo Less_than_64 ++ ++ add x5,x5,x6,lsl#32 // pack ++ add x7,x7,x8,lsl#32 ++ ldp x6,x8,[x1,#0] // load input ++ add x9,x9,x10,lsl#32 ++ add x11,x11,x12,lsl#32 ++ ldp x10,x12,[x1,#16] ++ add x13,x13,x14,lsl#32 ++ add x15,x15,x16,lsl#32 ++ ldp x14,x16,[x1,#32] ++ add x17,x17,x19,lsl#32 ++ add x20,x20,x21,lsl#32 ++ ldp x19,x21,[x1,#48] ++ add x1,x1,#64 ++#ifdef __AARCH64EB__ ++ rev x5,x5 ++ rev x7,x7 ++ rev x9,x9 ++ rev x11,x11 ++ rev x13,x13 ++ rev x15,x15 ++ rev x17,x17 ++ rev x20,x20 ++#endif ++ eor x5,x5,x6 ++ eor x7,x7,x8 ++ eor x9,x9,x10 ++ eor x11,x11,x12 ++ eor x13,x13,x14 ++ eor x15,x15,x16 ++ eor x17,x17,x19 ++ eor x20,x20,x21 ++ ++ stp x5,x7,[x0,#0] // store output ++ add x28,x28,#4 // increment counter ++ stp x9,x11,[x0,#16] ++ stp x13,x15,[x0,#32] ++ stp x17,x20,[x0,#48] ++ add x0,x0,#64 ++ b.eq Ldone_neon ++ sub x2,x2,#64 ++ cmp x2,#64 ++ b.lo Less_than_128 ++ ++ ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 ++ eor v0.16b,v0.16b,v20.16b ++ eor v1.16b,v1.16b,v21.16b ++ eor v2.16b,v2.16b,v22.16b ++ eor v3.16b,v3.16b,v23.16b ++ st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 ++ b.eq Ldone_neon ++ sub x2,x2,#64 ++ cmp x2,#64 ++ b.lo Less_than_192 ++ ++ ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 ++ eor v4.16b,v4.16b,v20.16b ++ eor v5.16b,v5.16b,v21.16b ++ eor v6.16b,v6.16b,v22.16b ++ eor v7.16b,v7.16b,v23.16b ++ st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 ++ b.eq Ldone_neon ++ sub x2,x2,#64 ++ ++ st1 {v16.16b,v17.16b,v18.16b,v19.16b},[sp] ++ b Last_neon ++ ++Less_than_128: ++ st1 {v0.16b,v1.16b,v2.16b,v3.16b},[sp] ++ b Last_neon ++Less_than_192: ++ st1 {v4.16b,v5.16b,v6.16b,v7.16b},[sp] ++ b Last_neon ++ ++.align 4 ++Last_neon: ++ sub x0,x0,#1 ++ add x1,x1,x2 ++ add x0,x0,x2 ++ add x4,sp,x2 ++ neg x2,x2 ++ ++Loop_tail_neon: ++ ldrb w10,[x1,x2] ++ ldrb w11,[x4,x2] ++ add x2,x2,#1 ++ eor w10,w10,w11 ++ strb w10,[x0,x2] ++ cbnz x2,Loop_tail_neon ++ ++ stp xzr,xzr,[sp,#0] ++ stp xzr,xzr,[sp,#16] ++ stp xzr,xzr,[sp,#32] ++ stp xzr,xzr,[sp,#48] ++ ++Ldone_neon: ++ ldp x19,x20,[x29,#16] ++ add sp,sp,#64 ++ ldp x21,x22,[x29,#32] ++ ldp x23,x24,[x29,#48] ++ ldp x25,x26,[x29,#64] ++ ldp x27,x28,[x29,#80] ++ ldp x29,x30,[sp],#96 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++.align 5 ++ChaCha20_512_neon: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-96]! ++ add x29,sp,#0 ++ ++ adrp x5,Lsigma@PAGE ++ add x5,x5,Lsigma@PAGEOFF ++ stp x19,x20,[sp,#16] ++ stp x21,x22,[sp,#32] ++ stp x23,x24,[sp,#48] ++ stp x25,x26,[sp,#64] ++ stp x27,x28,[sp,#80] ++ ++L512_or_more_neon: ++ sub sp,sp,#128+64 ++ ++ ldp x22,x23,[x5] // load sigma ++ ld1 {v24.4s},[x5],#16 ++ ldp x24,x25,[x3] // load key ++ ldp x26,x27,[x3,#16] ++ ld1 {v25.4s,v26.4s},[x3] ++ ldp x28,x30,[x4] // load counter ++ ld1 {v27.4s},[x4] ++ ld1 {v31.4s},[x5] ++#ifdef __AARCH64EB__ ++ rev64 v24.4s,v24.4s ++ ror x24,x24,#32 ++ ror x25,x25,#32 ++ ror x26,x26,#32 ++ ror x27,x27,#32 ++ ror x28,x28,#32 ++ ror x30,x30,#32 ++#endif ++ add v27.4s,v27.4s,v31.4s // += 1 ++ stp q24,q25,[sp,#0] // off-load key block, invariant part ++ add v27.4s,v27.4s,v31.4s // not typo ++ str q26,[sp,#32] ++ add v28.4s,v27.4s,v31.4s ++ add v29.4s,v28.4s,v31.4s ++ add v30.4s,v29.4s,v31.4s ++ shl v31.4s,v31.4s,#2 // 1 -> 4 ++ ++ stp d8,d9,[sp,#128+0] // meet ABI requirements ++ stp d10,d11,[sp,#128+16] ++ stp d12,d13,[sp,#128+32] ++ stp d14,d15,[sp,#128+48] ++ ++ sub x2,x2,#512 // not typo ++ ++Loop_outer_512_neon: ++ mov v0.16b,v24.16b ++ mov v4.16b,v24.16b ++ mov v8.16b,v24.16b ++ mov v12.16b,v24.16b ++ mov v16.16b,v24.16b ++ mov v20.16b,v24.16b ++ mov v1.16b,v25.16b ++ mov w5,w22 // unpack key block ++ mov v5.16b,v25.16b ++ lsr x6,x22,#32 ++ mov v9.16b,v25.16b ++ mov w7,w23 ++ mov v13.16b,v25.16b ++ lsr x8,x23,#32 ++ mov v17.16b,v25.16b ++ mov w9,w24 ++ mov v21.16b,v25.16b ++ lsr x10,x24,#32 ++ mov v3.16b,v27.16b ++ mov w11,w25 ++ mov v7.16b,v28.16b ++ lsr x12,x25,#32 ++ mov v11.16b,v29.16b ++ mov w13,w26 ++ mov v15.16b,v30.16b ++ lsr x14,x26,#32 ++ mov v2.16b,v26.16b ++ mov w15,w27 ++ mov v6.16b,v26.16b ++ lsr x16,x27,#32 ++ add v19.4s,v3.4s,v31.4s // +4 ++ mov w17,w28 ++ add v23.4s,v7.4s,v31.4s // +4 ++ lsr x19,x28,#32 ++ mov v10.16b,v26.16b ++ mov w20,w30 ++ mov v14.16b,v26.16b ++ lsr x21,x30,#32 ++ mov v18.16b,v26.16b ++ stp q27,q28,[sp,#48] // off-load key block, variable part ++ mov v22.16b,v26.16b ++ str q29,[sp,#80] ++ ++ mov x4,#5 ++ subs x2,x2,#512 ++Loop_upper_neon: ++ sub x4,x4,#1 ++ add v0.4s,v0.4s,v1.4s ++ add w5,w5,w9 ++ add v4.4s,v4.4s,v5.4s ++ add w6,w6,w10 ++ add v8.4s,v8.4s,v9.4s ++ add w7,w7,w11 ++ add v12.4s,v12.4s,v13.4s ++ add w8,w8,w12 ++ add v16.4s,v16.4s,v17.4s ++ eor w17,w17,w5 ++ add v20.4s,v20.4s,v21.4s ++ eor w19,w19,w6 ++ eor v3.16b,v3.16b,v0.16b ++ eor w20,w20,w7 ++ eor v7.16b,v7.16b,v4.16b ++ eor w21,w21,w8 ++ eor v11.16b,v11.16b,v8.16b ++ ror w17,w17,#16 ++ eor v15.16b,v15.16b,v12.16b ++ ror w19,w19,#16 ++ eor v19.16b,v19.16b,v16.16b ++ ror w20,w20,#16 ++ eor v23.16b,v23.16b,v20.16b ++ ror w21,w21,#16 ++ rev32 v3.8h,v3.8h ++ add w13,w13,w17 ++ rev32 v7.8h,v7.8h ++ add w14,w14,w19 ++ rev32 v11.8h,v11.8h ++ add w15,w15,w20 ++ rev32 v15.8h,v15.8h ++ add w16,w16,w21 ++ rev32 v19.8h,v19.8h ++ eor w9,w9,w13 ++ rev32 v23.8h,v23.8h ++ eor w10,w10,w14 ++ add v2.4s,v2.4s,v3.4s ++ eor w11,w11,w15 ++ add v6.4s,v6.4s,v7.4s ++ eor w12,w12,w16 ++ add v10.4s,v10.4s,v11.4s ++ ror w9,w9,#20 ++ add v14.4s,v14.4s,v15.4s ++ ror w10,w10,#20 ++ add v18.4s,v18.4s,v19.4s ++ ror w11,w11,#20 ++ add v22.4s,v22.4s,v23.4s ++ ror w12,w12,#20 ++ eor v24.16b,v1.16b,v2.16b ++ add w5,w5,w9 ++ eor v25.16b,v5.16b,v6.16b ++ add w6,w6,w10 ++ eor v26.16b,v9.16b,v10.16b ++ add w7,w7,w11 ++ eor v27.16b,v13.16b,v14.16b ++ add w8,w8,w12 ++ eor v28.16b,v17.16b,v18.16b ++ eor w17,w17,w5 ++ eor v29.16b,v21.16b,v22.16b ++ eor w19,w19,w6 ++ ushr v1.4s,v24.4s,#20 ++ eor w20,w20,w7 ++ ushr v5.4s,v25.4s,#20 ++ eor w21,w21,w8 ++ ushr v9.4s,v26.4s,#20 ++ ror w17,w17,#24 ++ ushr v13.4s,v27.4s,#20 ++ ror w19,w19,#24 ++ ushr v17.4s,v28.4s,#20 ++ ror w20,w20,#24 ++ ushr v21.4s,v29.4s,#20 ++ ror w21,w21,#24 ++ sli v1.4s,v24.4s,#12 ++ add w13,w13,w17 ++ sli v5.4s,v25.4s,#12 ++ add w14,w14,w19 ++ sli v9.4s,v26.4s,#12 ++ add w15,w15,w20 ++ sli v13.4s,v27.4s,#12 ++ add w16,w16,w21 ++ sli v17.4s,v28.4s,#12 ++ eor w9,w9,w13 ++ sli v21.4s,v29.4s,#12 ++ eor w10,w10,w14 ++ add v0.4s,v0.4s,v1.4s ++ eor w11,w11,w15 ++ add v4.4s,v4.4s,v5.4s ++ eor w12,w12,w16 ++ add v8.4s,v8.4s,v9.4s ++ ror w9,w9,#25 ++ add v12.4s,v12.4s,v13.4s ++ ror w10,w10,#25 ++ add v16.4s,v16.4s,v17.4s ++ ror w11,w11,#25 ++ add v20.4s,v20.4s,v21.4s ++ ror w12,w12,#25 ++ eor v24.16b,v3.16b,v0.16b ++ add w5,w5,w10 ++ eor v25.16b,v7.16b,v4.16b ++ add w6,w6,w11 ++ eor v26.16b,v11.16b,v8.16b ++ add w7,w7,w12 ++ eor v27.16b,v15.16b,v12.16b ++ add w8,w8,w9 ++ eor v28.16b,v19.16b,v16.16b ++ eor w21,w21,w5 ++ eor v29.16b,v23.16b,v20.16b ++ eor w17,w17,w6 ++ ushr v3.4s,v24.4s,#24 ++ eor w19,w19,w7 ++ ushr v7.4s,v25.4s,#24 ++ eor w20,w20,w8 ++ ushr v11.4s,v26.4s,#24 ++ ror w21,w21,#16 ++ ushr v15.4s,v27.4s,#24 ++ ror w17,w17,#16 ++ ushr v19.4s,v28.4s,#24 ++ ror w19,w19,#16 ++ ushr v23.4s,v29.4s,#24 ++ ror w20,w20,#16 ++ sli v3.4s,v24.4s,#8 ++ add w15,w15,w21 ++ sli v7.4s,v25.4s,#8 ++ add w16,w16,w17 ++ sli v11.4s,v26.4s,#8 ++ add w13,w13,w19 ++ sli v15.4s,v27.4s,#8 ++ add w14,w14,w20 ++ sli v19.4s,v28.4s,#8 ++ eor w10,w10,w15 ++ sli v23.4s,v29.4s,#8 ++ eor w11,w11,w16 ++ add v2.4s,v2.4s,v3.4s ++ eor w12,w12,w13 ++ add v6.4s,v6.4s,v7.4s ++ eor w9,w9,w14 ++ add v10.4s,v10.4s,v11.4s ++ ror w10,w10,#20 ++ add v14.4s,v14.4s,v15.4s ++ ror w11,w11,#20 ++ add v18.4s,v18.4s,v19.4s ++ ror w12,w12,#20 ++ add v22.4s,v22.4s,v23.4s ++ ror w9,w9,#20 ++ eor v24.16b,v1.16b,v2.16b ++ add w5,w5,w10 ++ eor v25.16b,v5.16b,v6.16b ++ add w6,w6,w11 ++ eor v26.16b,v9.16b,v10.16b ++ add w7,w7,w12 ++ eor v27.16b,v13.16b,v14.16b ++ add w8,w8,w9 ++ eor v28.16b,v17.16b,v18.16b ++ eor w21,w21,w5 ++ eor v29.16b,v21.16b,v22.16b ++ eor w17,w17,w6 ++ ushr v1.4s,v24.4s,#25 ++ eor w19,w19,w7 ++ ushr v5.4s,v25.4s,#25 ++ eor w20,w20,w8 ++ ushr v9.4s,v26.4s,#25 ++ ror w21,w21,#24 ++ ushr v13.4s,v27.4s,#25 ++ ror w17,w17,#24 ++ ushr v17.4s,v28.4s,#25 ++ ror w19,w19,#24 ++ ushr v21.4s,v29.4s,#25 ++ ror w20,w20,#24 ++ sli v1.4s,v24.4s,#7 ++ add w15,w15,w21 ++ sli v5.4s,v25.4s,#7 ++ add w16,w16,w17 ++ sli v9.4s,v26.4s,#7 ++ add w13,w13,w19 ++ sli v13.4s,v27.4s,#7 ++ add w14,w14,w20 ++ sli v17.4s,v28.4s,#7 ++ eor w10,w10,w15 ++ sli v21.4s,v29.4s,#7 ++ eor w11,w11,w16 ++ ext v2.16b,v2.16b,v2.16b,#8 ++ eor w12,w12,w13 ++ ext v6.16b,v6.16b,v6.16b,#8 ++ eor w9,w9,w14 ++ ext v10.16b,v10.16b,v10.16b,#8 ++ ror w10,w10,#25 ++ ext v14.16b,v14.16b,v14.16b,#8 ++ ror w11,w11,#25 ++ ext v18.16b,v18.16b,v18.16b,#8 ++ ror w12,w12,#25 ++ ext v22.16b,v22.16b,v22.16b,#8 ++ ror w9,w9,#25 ++ ext v3.16b,v3.16b,v3.16b,#12 ++ ext v7.16b,v7.16b,v7.16b,#12 ++ ext v11.16b,v11.16b,v11.16b,#12 ++ ext v15.16b,v15.16b,v15.16b,#12 ++ ext v19.16b,v19.16b,v19.16b,#12 ++ ext v23.16b,v23.16b,v23.16b,#12 ++ ext v1.16b,v1.16b,v1.16b,#4 ++ ext v5.16b,v5.16b,v5.16b,#4 ++ ext v9.16b,v9.16b,v9.16b,#4 ++ ext v13.16b,v13.16b,v13.16b,#4 ++ ext v17.16b,v17.16b,v17.16b,#4 ++ ext v21.16b,v21.16b,v21.16b,#4 ++ add v0.4s,v0.4s,v1.4s ++ add w5,w5,w9 ++ add v4.4s,v4.4s,v5.4s ++ add w6,w6,w10 ++ add v8.4s,v8.4s,v9.4s ++ add w7,w7,w11 ++ add v12.4s,v12.4s,v13.4s ++ add w8,w8,w12 ++ add v16.4s,v16.4s,v17.4s ++ eor w17,w17,w5 ++ add v20.4s,v20.4s,v21.4s ++ eor w19,w19,w6 ++ eor v3.16b,v3.16b,v0.16b ++ eor w20,w20,w7 ++ eor v7.16b,v7.16b,v4.16b ++ eor w21,w21,w8 ++ eor v11.16b,v11.16b,v8.16b ++ ror w17,w17,#16 ++ eor v15.16b,v15.16b,v12.16b ++ ror w19,w19,#16 ++ eor v19.16b,v19.16b,v16.16b ++ ror w20,w20,#16 ++ eor v23.16b,v23.16b,v20.16b ++ ror w21,w21,#16 ++ rev32 v3.8h,v3.8h ++ add w13,w13,w17 ++ rev32 v7.8h,v7.8h ++ add w14,w14,w19 ++ rev32 v11.8h,v11.8h ++ add w15,w15,w20 ++ rev32 v15.8h,v15.8h ++ add w16,w16,w21 ++ rev32 v19.8h,v19.8h ++ eor w9,w9,w13 ++ rev32 v23.8h,v23.8h ++ eor w10,w10,w14 ++ add v2.4s,v2.4s,v3.4s ++ eor w11,w11,w15 ++ add v6.4s,v6.4s,v7.4s ++ eor w12,w12,w16 ++ add v10.4s,v10.4s,v11.4s ++ ror w9,w9,#20 ++ add v14.4s,v14.4s,v15.4s ++ ror w10,w10,#20 ++ add v18.4s,v18.4s,v19.4s ++ ror w11,w11,#20 ++ add v22.4s,v22.4s,v23.4s ++ ror w12,w12,#20 ++ eor v24.16b,v1.16b,v2.16b ++ add w5,w5,w9 ++ eor v25.16b,v5.16b,v6.16b ++ add w6,w6,w10 ++ eor v26.16b,v9.16b,v10.16b ++ add w7,w7,w11 ++ eor v27.16b,v13.16b,v14.16b ++ add w8,w8,w12 ++ eor v28.16b,v17.16b,v18.16b ++ eor w17,w17,w5 ++ eor v29.16b,v21.16b,v22.16b ++ eor w19,w19,w6 ++ ushr v1.4s,v24.4s,#20 ++ eor w20,w20,w7 ++ ushr v5.4s,v25.4s,#20 ++ eor w21,w21,w8 ++ ushr v9.4s,v26.4s,#20 ++ ror w17,w17,#24 ++ ushr v13.4s,v27.4s,#20 ++ ror w19,w19,#24 ++ ushr v17.4s,v28.4s,#20 ++ ror w20,w20,#24 ++ ushr v21.4s,v29.4s,#20 ++ ror w21,w21,#24 ++ sli v1.4s,v24.4s,#12 ++ add w13,w13,w17 ++ sli v5.4s,v25.4s,#12 ++ add w14,w14,w19 ++ sli v9.4s,v26.4s,#12 ++ add w15,w15,w20 ++ sli v13.4s,v27.4s,#12 ++ add w16,w16,w21 ++ sli v17.4s,v28.4s,#12 ++ eor w9,w9,w13 ++ sli v21.4s,v29.4s,#12 ++ eor w10,w10,w14 ++ add v0.4s,v0.4s,v1.4s ++ eor w11,w11,w15 ++ add v4.4s,v4.4s,v5.4s ++ eor w12,w12,w16 ++ add v8.4s,v8.4s,v9.4s ++ ror w9,w9,#25 ++ add v12.4s,v12.4s,v13.4s ++ ror w10,w10,#25 ++ add v16.4s,v16.4s,v17.4s ++ ror w11,w11,#25 ++ add v20.4s,v20.4s,v21.4s ++ ror w12,w12,#25 ++ eor v24.16b,v3.16b,v0.16b ++ add w5,w5,w10 ++ eor v25.16b,v7.16b,v4.16b ++ add w6,w6,w11 ++ eor v26.16b,v11.16b,v8.16b ++ add w7,w7,w12 ++ eor v27.16b,v15.16b,v12.16b ++ add w8,w8,w9 ++ eor v28.16b,v19.16b,v16.16b ++ eor w21,w21,w5 ++ eor v29.16b,v23.16b,v20.16b ++ eor w17,w17,w6 ++ ushr v3.4s,v24.4s,#24 ++ eor w19,w19,w7 ++ ushr v7.4s,v25.4s,#24 ++ eor w20,w20,w8 ++ ushr v11.4s,v26.4s,#24 ++ ror w21,w21,#16 ++ ushr v15.4s,v27.4s,#24 ++ ror w17,w17,#16 ++ ushr v19.4s,v28.4s,#24 ++ ror w19,w19,#16 ++ ushr v23.4s,v29.4s,#24 ++ ror w20,w20,#16 ++ sli v3.4s,v24.4s,#8 ++ add w15,w15,w21 ++ sli v7.4s,v25.4s,#8 ++ add w16,w16,w17 ++ sli v11.4s,v26.4s,#8 ++ add w13,w13,w19 ++ sli v15.4s,v27.4s,#8 ++ add w14,w14,w20 ++ sli v19.4s,v28.4s,#8 ++ eor w10,w10,w15 ++ sli v23.4s,v29.4s,#8 ++ eor w11,w11,w16 ++ add v2.4s,v2.4s,v3.4s ++ eor w12,w12,w13 ++ add v6.4s,v6.4s,v7.4s ++ eor w9,w9,w14 ++ add v10.4s,v10.4s,v11.4s ++ ror w10,w10,#20 ++ add v14.4s,v14.4s,v15.4s ++ ror w11,w11,#20 ++ add v18.4s,v18.4s,v19.4s ++ ror w12,w12,#20 ++ add v22.4s,v22.4s,v23.4s ++ ror w9,w9,#20 ++ eor v24.16b,v1.16b,v2.16b ++ add w5,w5,w10 ++ eor v25.16b,v5.16b,v6.16b ++ add w6,w6,w11 ++ eor v26.16b,v9.16b,v10.16b ++ add w7,w7,w12 ++ eor v27.16b,v13.16b,v14.16b ++ add w8,w8,w9 ++ eor v28.16b,v17.16b,v18.16b ++ eor w21,w21,w5 ++ eor v29.16b,v21.16b,v22.16b ++ eor w17,w17,w6 ++ ushr v1.4s,v24.4s,#25 ++ eor w19,w19,w7 ++ ushr v5.4s,v25.4s,#25 ++ eor w20,w20,w8 ++ ushr v9.4s,v26.4s,#25 ++ ror w21,w21,#24 ++ ushr v13.4s,v27.4s,#25 ++ ror w17,w17,#24 ++ ushr v17.4s,v28.4s,#25 ++ ror w19,w19,#24 ++ ushr v21.4s,v29.4s,#25 ++ ror w20,w20,#24 ++ sli v1.4s,v24.4s,#7 ++ add w15,w15,w21 ++ sli v5.4s,v25.4s,#7 ++ add w16,w16,w17 ++ sli v9.4s,v26.4s,#7 ++ add w13,w13,w19 ++ sli v13.4s,v27.4s,#7 ++ add w14,w14,w20 ++ sli v17.4s,v28.4s,#7 ++ eor w10,w10,w15 ++ sli v21.4s,v29.4s,#7 ++ eor w11,w11,w16 ++ ext v2.16b,v2.16b,v2.16b,#8 ++ eor w12,w12,w13 ++ ext v6.16b,v6.16b,v6.16b,#8 ++ eor w9,w9,w14 ++ ext v10.16b,v10.16b,v10.16b,#8 ++ ror w10,w10,#25 ++ ext v14.16b,v14.16b,v14.16b,#8 ++ ror w11,w11,#25 ++ ext v18.16b,v18.16b,v18.16b,#8 ++ ror w12,w12,#25 ++ ext v22.16b,v22.16b,v22.16b,#8 ++ ror w9,w9,#25 ++ ext v3.16b,v3.16b,v3.16b,#4 ++ ext v7.16b,v7.16b,v7.16b,#4 ++ ext v11.16b,v11.16b,v11.16b,#4 ++ ext v15.16b,v15.16b,v15.16b,#4 ++ ext v19.16b,v19.16b,v19.16b,#4 ++ ext v23.16b,v23.16b,v23.16b,#4 ++ ext v1.16b,v1.16b,v1.16b,#12 ++ ext v5.16b,v5.16b,v5.16b,#12 ++ ext v9.16b,v9.16b,v9.16b,#12 ++ ext v13.16b,v13.16b,v13.16b,#12 ++ ext v17.16b,v17.16b,v17.16b,#12 ++ ext v21.16b,v21.16b,v21.16b,#12 ++ cbnz x4,Loop_upper_neon ++ ++ add w5,w5,w22 // accumulate key block ++ add x6,x6,x22,lsr#32 ++ add w7,w7,w23 ++ add x8,x8,x23,lsr#32 ++ add w9,w9,w24 ++ add x10,x10,x24,lsr#32 ++ add w11,w11,w25 ++ add x12,x12,x25,lsr#32 ++ add w13,w13,w26 ++ add x14,x14,x26,lsr#32 ++ add w15,w15,w27 ++ add x16,x16,x27,lsr#32 ++ add w17,w17,w28 ++ add x19,x19,x28,lsr#32 ++ add w20,w20,w30 ++ add x21,x21,x30,lsr#32 ++ ++ add x5,x5,x6,lsl#32 // pack ++ add x7,x7,x8,lsl#32 ++ ldp x6,x8,[x1,#0] // load input ++ add x9,x9,x10,lsl#32 ++ add x11,x11,x12,lsl#32 ++ ldp x10,x12,[x1,#16] ++ add x13,x13,x14,lsl#32 ++ add x15,x15,x16,lsl#32 ++ ldp x14,x16,[x1,#32] ++ add x17,x17,x19,lsl#32 ++ add x20,x20,x21,lsl#32 ++ ldp x19,x21,[x1,#48] ++ add x1,x1,#64 ++#ifdef __AARCH64EB__ ++ rev x5,x5 ++ rev x7,x7 ++ rev x9,x9 ++ rev x11,x11 ++ rev x13,x13 ++ rev x15,x15 ++ rev x17,x17 ++ rev x20,x20 ++#endif ++ eor x5,x5,x6 ++ eor x7,x7,x8 ++ eor x9,x9,x10 ++ eor x11,x11,x12 ++ eor x13,x13,x14 ++ eor x15,x15,x16 ++ eor x17,x17,x19 ++ eor x20,x20,x21 ++ ++ stp x5,x7,[x0,#0] // store output ++ add x28,x28,#1 // increment counter ++ mov w5,w22 // unpack key block ++ lsr x6,x22,#32 ++ stp x9,x11,[x0,#16] ++ mov w7,w23 ++ lsr x8,x23,#32 ++ stp x13,x15,[x0,#32] ++ mov w9,w24 ++ lsr x10,x24,#32 ++ stp x17,x20,[x0,#48] ++ add x0,x0,#64 ++ mov w11,w25 ++ lsr x12,x25,#32 ++ mov w13,w26 ++ lsr x14,x26,#32 ++ mov w15,w27 ++ lsr x16,x27,#32 ++ mov w17,w28 ++ lsr x19,x28,#32 ++ mov w20,w30 ++ lsr x21,x30,#32 ++ ++ mov x4,#5 ++Loop_lower_neon: ++ sub x4,x4,#1 ++ add v0.4s,v0.4s,v1.4s ++ add w5,w5,w9 ++ add v4.4s,v4.4s,v5.4s ++ add w6,w6,w10 ++ add v8.4s,v8.4s,v9.4s ++ add w7,w7,w11 ++ add v12.4s,v12.4s,v13.4s ++ add w8,w8,w12 ++ add v16.4s,v16.4s,v17.4s ++ eor w17,w17,w5 ++ add v20.4s,v20.4s,v21.4s ++ eor w19,w19,w6 ++ eor v3.16b,v3.16b,v0.16b ++ eor w20,w20,w7 ++ eor v7.16b,v7.16b,v4.16b ++ eor w21,w21,w8 ++ eor v11.16b,v11.16b,v8.16b ++ ror w17,w17,#16 ++ eor v15.16b,v15.16b,v12.16b ++ ror w19,w19,#16 ++ eor v19.16b,v19.16b,v16.16b ++ ror w20,w20,#16 ++ eor v23.16b,v23.16b,v20.16b ++ ror w21,w21,#16 ++ rev32 v3.8h,v3.8h ++ add w13,w13,w17 ++ rev32 v7.8h,v7.8h ++ add w14,w14,w19 ++ rev32 v11.8h,v11.8h ++ add w15,w15,w20 ++ rev32 v15.8h,v15.8h ++ add w16,w16,w21 ++ rev32 v19.8h,v19.8h ++ eor w9,w9,w13 ++ rev32 v23.8h,v23.8h ++ eor w10,w10,w14 ++ add v2.4s,v2.4s,v3.4s ++ eor w11,w11,w15 ++ add v6.4s,v6.4s,v7.4s ++ eor w12,w12,w16 ++ add v10.4s,v10.4s,v11.4s ++ ror w9,w9,#20 ++ add v14.4s,v14.4s,v15.4s ++ ror w10,w10,#20 ++ add v18.4s,v18.4s,v19.4s ++ ror w11,w11,#20 ++ add v22.4s,v22.4s,v23.4s ++ ror w12,w12,#20 ++ eor v24.16b,v1.16b,v2.16b ++ add w5,w5,w9 ++ eor v25.16b,v5.16b,v6.16b ++ add w6,w6,w10 ++ eor v26.16b,v9.16b,v10.16b ++ add w7,w7,w11 ++ eor v27.16b,v13.16b,v14.16b ++ add w8,w8,w12 ++ eor v28.16b,v17.16b,v18.16b ++ eor w17,w17,w5 ++ eor v29.16b,v21.16b,v22.16b ++ eor w19,w19,w6 ++ ushr v1.4s,v24.4s,#20 ++ eor w20,w20,w7 ++ ushr v5.4s,v25.4s,#20 ++ eor w21,w21,w8 ++ ushr v9.4s,v26.4s,#20 ++ ror w17,w17,#24 ++ ushr v13.4s,v27.4s,#20 ++ ror w19,w19,#24 ++ ushr v17.4s,v28.4s,#20 ++ ror w20,w20,#24 ++ ushr v21.4s,v29.4s,#20 ++ ror w21,w21,#24 ++ sli v1.4s,v24.4s,#12 ++ add w13,w13,w17 ++ sli v5.4s,v25.4s,#12 ++ add w14,w14,w19 ++ sli v9.4s,v26.4s,#12 ++ add w15,w15,w20 ++ sli v13.4s,v27.4s,#12 ++ add w16,w16,w21 ++ sli v17.4s,v28.4s,#12 ++ eor w9,w9,w13 ++ sli v21.4s,v29.4s,#12 ++ eor w10,w10,w14 ++ add v0.4s,v0.4s,v1.4s ++ eor w11,w11,w15 ++ add v4.4s,v4.4s,v5.4s ++ eor w12,w12,w16 ++ add v8.4s,v8.4s,v9.4s ++ ror w9,w9,#25 ++ add v12.4s,v12.4s,v13.4s ++ ror w10,w10,#25 ++ add v16.4s,v16.4s,v17.4s ++ ror w11,w11,#25 ++ add v20.4s,v20.4s,v21.4s ++ ror w12,w12,#25 ++ eor v24.16b,v3.16b,v0.16b ++ add w5,w5,w10 ++ eor v25.16b,v7.16b,v4.16b ++ add w6,w6,w11 ++ eor v26.16b,v11.16b,v8.16b ++ add w7,w7,w12 ++ eor v27.16b,v15.16b,v12.16b ++ add w8,w8,w9 ++ eor v28.16b,v19.16b,v16.16b ++ eor w21,w21,w5 ++ eor v29.16b,v23.16b,v20.16b ++ eor w17,w17,w6 ++ ushr v3.4s,v24.4s,#24 ++ eor w19,w19,w7 ++ ushr v7.4s,v25.4s,#24 ++ eor w20,w20,w8 ++ ushr v11.4s,v26.4s,#24 ++ ror w21,w21,#16 ++ ushr v15.4s,v27.4s,#24 ++ ror w17,w17,#16 ++ ushr v19.4s,v28.4s,#24 ++ ror w19,w19,#16 ++ ushr v23.4s,v29.4s,#24 ++ ror w20,w20,#16 ++ sli v3.4s,v24.4s,#8 ++ add w15,w15,w21 ++ sli v7.4s,v25.4s,#8 ++ add w16,w16,w17 ++ sli v11.4s,v26.4s,#8 ++ add w13,w13,w19 ++ sli v15.4s,v27.4s,#8 ++ add w14,w14,w20 ++ sli v19.4s,v28.4s,#8 ++ eor w10,w10,w15 ++ sli v23.4s,v29.4s,#8 ++ eor w11,w11,w16 ++ add v2.4s,v2.4s,v3.4s ++ eor w12,w12,w13 ++ add v6.4s,v6.4s,v7.4s ++ eor w9,w9,w14 ++ add v10.4s,v10.4s,v11.4s ++ ror w10,w10,#20 ++ add v14.4s,v14.4s,v15.4s ++ ror w11,w11,#20 ++ add v18.4s,v18.4s,v19.4s ++ ror w12,w12,#20 ++ add v22.4s,v22.4s,v23.4s ++ ror w9,w9,#20 ++ eor v24.16b,v1.16b,v2.16b ++ add w5,w5,w10 ++ eor v25.16b,v5.16b,v6.16b ++ add w6,w6,w11 ++ eor v26.16b,v9.16b,v10.16b ++ add w7,w7,w12 ++ eor v27.16b,v13.16b,v14.16b ++ add w8,w8,w9 ++ eor v28.16b,v17.16b,v18.16b ++ eor w21,w21,w5 ++ eor v29.16b,v21.16b,v22.16b ++ eor w17,w17,w6 ++ ushr v1.4s,v24.4s,#25 ++ eor w19,w19,w7 ++ ushr v5.4s,v25.4s,#25 ++ eor w20,w20,w8 ++ ushr v9.4s,v26.4s,#25 ++ ror w21,w21,#24 ++ ushr v13.4s,v27.4s,#25 ++ ror w17,w17,#24 ++ ushr v17.4s,v28.4s,#25 ++ ror w19,w19,#24 ++ ushr v21.4s,v29.4s,#25 ++ ror w20,w20,#24 ++ sli v1.4s,v24.4s,#7 ++ add w15,w15,w21 ++ sli v5.4s,v25.4s,#7 ++ add w16,w16,w17 ++ sli v9.4s,v26.4s,#7 ++ add w13,w13,w19 ++ sli v13.4s,v27.4s,#7 ++ add w14,w14,w20 ++ sli v17.4s,v28.4s,#7 ++ eor w10,w10,w15 ++ sli v21.4s,v29.4s,#7 ++ eor w11,w11,w16 ++ ext v2.16b,v2.16b,v2.16b,#8 ++ eor w12,w12,w13 ++ ext v6.16b,v6.16b,v6.16b,#8 ++ eor w9,w9,w14 ++ ext v10.16b,v10.16b,v10.16b,#8 ++ ror w10,w10,#25 ++ ext v14.16b,v14.16b,v14.16b,#8 ++ ror w11,w11,#25 ++ ext v18.16b,v18.16b,v18.16b,#8 ++ ror w12,w12,#25 ++ ext v22.16b,v22.16b,v22.16b,#8 ++ ror w9,w9,#25 ++ ext v3.16b,v3.16b,v3.16b,#12 ++ ext v7.16b,v7.16b,v7.16b,#12 ++ ext v11.16b,v11.16b,v11.16b,#12 ++ ext v15.16b,v15.16b,v15.16b,#12 ++ ext v19.16b,v19.16b,v19.16b,#12 ++ ext v23.16b,v23.16b,v23.16b,#12 ++ ext v1.16b,v1.16b,v1.16b,#4 ++ ext v5.16b,v5.16b,v5.16b,#4 ++ ext v9.16b,v9.16b,v9.16b,#4 ++ ext v13.16b,v13.16b,v13.16b,#4 ++ ext v17.16b,v17.16b,v17.16b,#4 ++ ext v21.16b,v21.16b,v21.16b,#4 ++ add v0.4s,v0.4s,v1.4s ++ add w5,w5,w9 ++ add v4.4s,v4.4s,v5.4s ++ add w6,w6,w10 ++ add v8.4s,v8.4s,v9.4s ++ add w7,w7,w11 ++ add v12.4s,v12.4s,v13.4s ++ add w8,w8,w12 ++ add v16.4s,v16.4s,v17.4s ++ eor w17,w17,w5 ++ add v20.4s,v20.4s,v21.4s ++ eor w19,w19,w6 ++ eor v3.16b,v3.16b,v0.16b ++ eor w20,w20,w7 ++ eor v7.16b,v7.16b,v4.16b ++ eor w21,w21,w8 ++ eor v11.16b,v11.16b,v8.16b ++ ror w17,w17,#16 ++ eor v15.16b,v15.16b,v12.16b ++ ror w19,w19,#16 ++ eor v19.16b,v19.16b,v16.16b ++ ror w20,w20,#16 ++ eor v23.16b,v23.16b,v20.16b ++ ror w21,w21,#16 ++ rev32 v3.8h,v3.8h ++ add w13,w13,w17 ++ rev32 v7.8h,v7.8h ++ add w14,w14,w19 ++ rev32 v11.8h,v11.8h ++ add w15,w15,w20 ++ rev32 v15.8h,v15.8h ++ add w16,w16,w21 ++ rev32 v19.8h,v19.8h ++ eor w9,w9,w13 ++ rev32 v23.8h,v23.8h ++ eor w10,w10,w14 ++ add v2.4s,v2.4s,v3.4s ++ eor w11,w11,w15 ++ add v6.4s,v6.4s,v7.4s ++ eor w12,w12,w16 ++ add v10.4s,v10.4s,v11.4s ++ ror w9,w9,#20 ++ add v14.4s,v14.4s,v15.4s ++ ror w10,w10,#20 ++ add v18.4s,v18.4s,v19.4s ++ ror w11,w11,#20 ++ add v22.4s,v22.4s,v23.4s ++ ror w12,w12,#20 ++ eor v24.16b,v1.16b,v2.16b ++ add w5,w5,w9 ++ eor v25.16b,v5.16b,v6.16b ++ add w6,w6,w10 ++ eor v26.16b,v9.16b,v10.16b ++ add w7,w7,w11 ++ eor v27.16b,v13.16b,v14.16b ++ add w8,w8,w12 ++ eor v28.16b,v17.16b,v18.16b ++ eor w17,w17,w5 ++ eor v29.16b,v21.16b,v22.16b ++ eor w19,w19,w6 ++ ushr v1.4s,v24.4s,#20 ++ eor w20,w20,w7 ++ ushr v5.4s,v25.4s,#20 ++ eor w21,w21,w8 ++ ushr v9.4s,v26.4s,#20 ++ ror w17,w17,#24 ++ ushr v13.4s,v27.4s,#20 ++ ror w19,w19,#24 ++ ushr v17.4s,v28.4s,#20 ++ ror w20,w20,#24 ++ ushr v21.4s,v29.4s,#20 ++ ror w21,w21,#24 ++ sli v1.4s,v24.4s,#12 ++ add w13,w13,w17 ++ sli v5.4s,v25.4s,#12 ++ add w14,w14,w19 ++ sli v9.4s,v26.4s,#12 ++ add w15,w15,w20 ++ sli v13.4s,v27.4s,#12 ++ add w16,w16,w21 ++ sli v17.4s,v28.4s,#12 ++ eor w9,w9,w13 ++ sli v21.4s,v29.4s,#12 ++ eor w10,w10,w14 ++ add v0.4s,v0.4s,v1.4s ++ eor w11,w11,w15 ++ add v4.4s,v4.4s,v5.4s ++ eor w12,w12,w16 ++ add v8.4s,v8.4s,v9.4s ++ ror w9,w9,#25 ++ add v12.4s,v12.4s,v13.4s ++ ror w10,w10,#25 ++ add v16.4s,v16.4s,v17.4s ++ ror w11,w11,#25 ++ add v20.4s,v20.4s,v21.4s ++ ror w12,w12,#25 ++ eor v24.16b,v3.16b,v0.16b ++ add w5,w5,w10 ++ eor v25.16b,v7.16b,v4.16b ++ add w6,w6,w11 ++ eor v26.16b,v11.16b,v8.16b ++ add w7,w7,w12 ++ eor v27.16b,v15.16b,v12.16b ++ add w8,w8,w9 ++ eor v28.16b,v19.16b,v16.16b ++ eor w21,w21,w5 ++ eor v29.16b,v23.16b,v20.16b ++ eor w17,w17,w6 ++ ushr v3.4s,v24.4s,#24 ++ eor w19,w19,w7 ++ ushr v7.4s,v25.4s,#24 ++ eor w20,w20,w8 ++ ushr v11.4s,v26.4s,#24 ++ ror w21,w21,#16 ++ ushr v15.4s,v27.4s,#24 ++ ror w17,w17,#16 ++ ushr v19.4s,v28.4s,#24 ++ ror w19,w19,#16 ++ ushr v23.4s,v29.4s,#24 ++ ror w20,w20,#16 ++ sli v3.4s,v24.4s,#8 ++ add w15,w15,w21 ++ sli v7.4s,v25.4s,#8 ++ add w16,w16,w17 ++ sli v11.4s,v26.4s,#8 ++ add w13,w13,w19 ++ sli v15.4s,v27.4s,#8 ++ add w14,w14,w20 ++ sli v19.4s,v28.4s,#8 ++ eor w10,w10,w15 ++ sli v23.4s,v29.4s,#8 ++ eor w11,w11,w16 ++ add v2.4s,v2.4s,v3.4s ++ eor w12,w12,w13 ++ add v6.4s,v6.4s,v7.4s ++ eor w9,w9,w14 ++ add v10.4s,v10.4s,v11.4s ++ ror w10,w10,#20 ++ add v14.4s,v14.4s,v15.4s ++ ror w11,w11,#20 ++ add v18.4s,v18.4s,v19.4s ++ ror w12,w12,#20 ++ add v22.4s,v22.4s,v23.4s ++ ror w9,w9,#20 ++ eor v24.16b,v1.16b,v2.16b ++ add w5,w5,w10 ++ eor v25.16b,v5.16b,v6.16b ++ add w6,w6,w11 ++ eor v26.16b,v9.16b,v10.16b ++ add w7,w7,w12 ++ eor v27.16b,v13.16b,v14.16b ++ add w8,w8,w9 ++ eor v28.16b,v17.16b,v18.16b ++ eor w21,w21,w5 ++ eor v29.16b,v21.16b,v22.16b ++ eor w17,w17,w6 ++ ushr v1.4s,v24.4s,#25 ++ eor w19,w19,w7 ++ ushr v5.4s,v25.4s,#25 ++ eor w20,w20,w8 ++ ushr v9.4s,v26.4s,#25 ++ ror w21,w21,#24 ++ ushr v13.4s,v27.4s,#25 ++ ror w17,w17,#24 ++ ushr v17.4s,v28.4s,#25 ++ ror w19,w19,#24 ++ ushr v21.4s,v29.4s,#25 ++ ror w20,w20,#24 ++ sli v1.4s,v24.4s,#7 ++ add w15,w15,w21 ++ sli v5.4s,v25.4s,#7 ++ add w16,w16,w17 ++ sli v9.4s,v26.4s,#7 ++ add w13,w13,w19 ++ sli v13.4s,v27.4s,#7 ++ add w14,w14,w20 ++ sli v17.4s,v28.4s,#7 ++ eor w10,w10,w15 ++ sli v21.4s,v29.4s,#7 ++ eor w11,w11,w16 ++ ext v2.16b,v2.16b,v2.16b,#8 ++ eor w12,w12,w13 ++ ext v6.16b,v6.16b,v6.16b,#8 ++ eor w9,w9,w14 ++ ext v10.16b,v10.16b,v10.16b,#8 ++ ror w10,w10,#25 ++ ext v14.16b,v14.16b,v14.16b,#8 ++ ror w11,w11,#25 ++ ext v18.16b,v18.16b,v18.16b,#8 ++ ror w12,w12,#25 ++ ext v22.16b,v22.16b,v22.16b,#8 ++ ror w9,w9,#25 ++ ext v3.16b,v3.16b,v3.16b,#4 ++ ext v7.16b,v7.16b,v7.16b,#4 ++ ext v11.16b,v11.16b,v11.16b,#4 ++ ext v15.16b,v15.16b,v15.16b,#4 ++ ext v19.16b,v19.16b,v19.16b,#4 ++ ext v23.16b,v23.16b,v23.16b,#4 ++ ext v1.16b,v1.16b,v1.16b,#12 ++ ext v5.16b,v5.16b,v5.16b,#12 ++ ext v9.16b,v9.16b,v9.16b,#12 ++ ext v13.16b,v13.16b,v13.16b,#12 ++ ext v17.16b,v17.16b,v17.16b,#12 ++ ext v21.16b,v21.16b,v21.16b,#12 ++ cbnz x4,Loop_lower_neon ++ ++ add w5,w5,w22 // accumulate key block ++ ldp q24,q25,[sp,#0] ++ add x6,x6,x22,lsr#32 ++ ldp q26,q27,[sp,#32] ++ add w7,w7,w23 ++ ldp q28,q29,[sp,#64] ++ add x8,x8,x23,lsr#32 ++ add v0.4s,v0.4s,v24.4s ++ add w9,w9,w24 ++ add v4.4s,v4.4s,v24.4s ++ add x10,x10,x24,lsr#32 ++ add v8.4s,v8.4s,v24.4s ++ add w11,w11,w25 ++ add v12.4s,v12.4s,v24.4s ++ add x12,x12,x25,lsr#32 ++ add v16.4s,v16.4s,v24.4s ++ add w13,w13,w26 ++ add v20.4s,v20.4s,v24.4s ++ add x14,x14,x26,lsr#32 ++ add v2.4s,v2.4s,v26.4s ++ add w15,w15,w27 ++ add v6.4s,v6.4s,v26.4s ++ add x16,x16,x27,lsr#32 ++ add v10.4s,v10.4s,v26.4s ++ add w17,w17,w28 ++ add v14.4s,v14.4s,v26.4s ++ add x19,x19,x28,lsr#32 ++ add v18.4s,v18.4s,v26.4s ++ add w20,w20,w30 ++ add v22.4s,v22.4s,v26.4s ++ add x21,x21,x30,lsr#32 ++ add v19.4s,v19.4s,v31.4s // +4 ++ add x5,x5,x6,lsl#32 // pack ++ add v23.4s,v23.4s,v31.4s // +4 ++ add x7,x7,x8,lsl#32 ++ add v3.4s,v3.4s,v27.4s ++ ldp x6,x8,[x1,#0] // load input ++ add v7.4s,v7.4s,v28.4s ++ add x9,x9,x10,lsl#32 ++ add v11.4s,v11.4s,v29.4s ++ add x11,x11,x12,lsl#32 ++ add v15.4s,v15.4s,v30.4s ++ ldp x10,x12,[x1,#16] ++ add v19.4s,v19.4s,v27.4s ++ add x13,x13,x14,lsl#32 ++ add v23.4s,v23.4s,v28.4s ++ add x15,x15,x16,lsl#32 ++ add v1.4s,v1.4s,v25.4s ++ ldp x14,x16,[x1,#32] ++ add v5.4s,v5.4s,v25.4s ++ add x17,x17,x19,lsl#32 ++ add v9.4s,v9.4s,v25.4s ++ add x20,x20,x21,lsl#32 ++ add v13.4s,v13.4s,v25.4s ++ ldp x19,x21,[x1,#48] ++ add v17.4s,v17.4s,v25.4s ++ add x1,x1,#64 ++ add v21.4s,v21.4s,v25.4s ++ ++#ifdef __AARCH64EB__ ++ rev x5,x5 ++ rev x7,x7 ++ rev x9,x9 ++ rev x11,x11 ++ rev x13,x13 ++ rev x15,x15 ++ rev x17,x17 ++ rev x20,x20 ++#endif ++ ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64 ++ eor x5,x5,x6 ++ eor x7,x7,x8 ++ eor x9,x9,x10 ++ eor x11,x11,x12 ++ eor x13,x13,x14 ++ eor v0.16b,v0.16b,v24.16b ++ eor x15,x15,x16 ++ eor v1.16b,v1.16b,v25.16b ++ eor x17,x17,x19 ++ eor v2.16b,v2.16b,v26.16b ++ eor x20,x20,x21 ++ eor v3.16b,v3.16b,v27.16b ++ ld1 {v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64 ++ ++ stp x5,x7,[x0,#0] // store output ++ add x28,x28,#7 // increment counter ++ stp x9,x11,[x0,#16] ++ stp x13,x15,[x0,#32] ++ stp x17,x20,[x0,#48] ++ add x0,x0,#64 ++ st1 {v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64 ++ ++ ld1 {v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64 ++ eor v4.16b,v4.16b,v24.16b ++ eor v5.16b,v5.16b,v25.16b ++ eor v6.16b,v6.16b,v26.16b ++ eor v7.16b,v7.16b,v27.16b ++ st1 {v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64 ++ ++ ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 ++ eor v8.16b,v8.16b,v0.16b ++ ldp q24,q25,[sp,#0] ++ eor v9.16b,v9.16b,v1.16b ++ ldp q26,q27,[sp,#32] ++ eor v10.16b,v10.16b,v2.16b ++ eor v11.16b,v11.16b,v3.16b ++ st1 {v8.16b,v9.16b,v10.16b,v11.16b},[x0],#64 ++ ++ ld1 {v8.16b,v9.16b,v10.16b,v11.16b},[x1],#64 ++ eor v12.16b,v12.16b,v4.16b ++ eor v13.16b,v13.16b,v5.16b ++ eor v14.16b,v14.16b,v6.16b ++ eor v15.16b,v15.16b,v7.16b ++ st1 {v12.16b,v13.16b,v14.16b,v15.16b},[x0],#64 ++ ++ ld1 {v12.16b,v13.16b,v14.16b,v15.16b},[x1],#64 ++ eor v16.16b,v16.16b,v8.16b ++ eor v17.16b,v17.16b,v9.16b ++ eor v18.16b,v18.16b,v10.16b ++ eor v19.16b,v19.16b,v11.16b ++ st1 {v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64 ++ ++ shl v0.4s,v31.4s,#1 // 4 -> 8 ++ eor v20.16b,v20.16b,v12.16b ++ eor v21.16b,v21.16b,v13.16b ++ eor v22.16b,v22.16b,v14.16b ++ eor v23.16b,v23.16b,v15.16b ++ st1 {v20.16b,v21.16b,v22.16b,v23.16b},[x0],#64 ++ ++ add v27.4s,v27.4s,v0.4s // += 8 ++ add v28.4s,v28.4s,v0.4s ++ add v29.4s,v29.4s,v0.4s ++ add v30.4s,v30.4s,v0.4s ++ ++ b.hs Loop_outer_512_neon ++ ++ adds x2,x2,#512 ++ ushr v0.4s,v31.4s,#2 // 4 -> 1 ++ ++ ldp d8,d9,[sp,#128+0] // meet ABI requirements ++ ldp d10,d11,[sp,#128+16] ++ ldp d12,d13,[sp,#128+32] ++ ldp d14,d15,[sp,#128+48] ++ ++ stp q24,q31,[sp,#0] // wipe off-load area ++ stp q24,q31,[sp,#32] ++ stp q24,q31,[sp,#64] ++ ++ b.eq Ldone_512_neon ++ ++ cmp x2,#192 ++ sub v27.4s,v27.4s,v0.4s // -= 1 ++ sub v28.4s,v28.4s,v0.4s ++ sub v29.4s,v29.4s,v0.4s ++ add sp,sp,#128 ++ b.hs Loop_outer_neon ++ ++ eor v25.16b,v25.16b,v25.16b ++ eor v26.16b,v26.16b,v26.16b ++ eor v27.16b,v27.16b,v27.16b ++ eor v28.16b,v28.16b,v28.16b ++ eor v29.16b,v29.16b,v29.16b ++ eor v30.16b,v30.16b,v30.16b ++ b Loop_outer ++ ++Ldone_512_neon: ++ ldp x19,x20,[x29,#16] ++ add sp,sp,#128+64 ++ ldp x21,x22,[x29,#32] ++ ldp x23,x24,[x29,#48] ++ ldp x25,x26,[x29,#64] ++ ldp x27,x28,[x29,#80] ++ ldp x29,x30,[sp],#96 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-aarch64/crypto/fipsmodule/aesv8-armx64.S b/apple-aarch64/crypto/fipsmodule/aesv8-armx64.S +new file mode 100644 +index 0000000..50d7dea +--- /dev/null ++++ b/apple-aarch64/crypto/fipsmodule/aesv8-armx64.S +@@ -0,0 +1,799 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++#if __ARM_MAX_ARCH__>=7 ++.text ++ ++.section __TEXT,__const ++.align 5 ++Lrcon: ++.long 0x01,0x01,0x01,0x01 ++.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat ++.long 0x1b,0x1b,0x1b,0x1b ++ ++.text ++ ++.globl _aes_hw_set_encrypt_key ++.private_extern _aes_hw_set_encrypt_key ++ ++.align 5 ++_aes_hw_set_encrypt_key: ++Lenc_key: ++ // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. ++ AARCH64_VALID_CALL_TARGET ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ mov x3,#-1 ++ cmp x0,#0 ++ b.eq Lenc_key_abort ++ cmp x2,#0 ++ b.eq Lenc_key_abort ++ mov x3,#-2 ++ cmp w1,#128 ++ b.lt Lenc_key_abort ++ cmp w1,#256 ++ b.gt Lenc_key_abort ++ tst w1,#0x3f ++ b.ne Lenc_key_abort ++ ++ adrp x3,Lrcon@PAGE ++ add x3,x3,Lrcon@PAGEOFF ++ cmp w1,#192 ++ ++ eor v0.16b,v0.16b,v0.16b ++ ld1 {v3.16b},[x0],#16 ++ mov w1,#8 // reuse w1 ++ ld1 {v1.4s,v2.4s},[x3],#32 ++ ++ b.lt Loop128 ++ b.eq L192 ++ b L256 ++ ++.align 4 ++Loop128: ++ tbl v6.16b,{v3.16b},v2.16b ++ ext v5.16b,v0.16b,v3.16b,#12 ++ st1 {v3.4s},[x2],#16 ++ aese v6.16b,v0.16b ++ subs w1,w1,#1 ++ ++ eor v3.16b,v3.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v3.16b,v3.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v6.16b,v6.16b,v1.16b ++ eor v3.16b,v3.16b,v5.16b ++ shl v1.16b,v1.16b,#1 ++ eor v3.16b,v3.16b,v6.16b ++ b.ne Loop128 ++ ++ ld1 {v1.4s},[x3] ++ ++ tbl v6.16b,{v3.16b},v2.16b ++ ext v5.16b,v0.16b,v3.16b,#12 ++ st1 {v3.4s},[x2],#16 ++ aese v6.16b,v0.16b ++ ++ eor v3.16b,v3.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v3.16b,v3.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v6.16b,v6.16b,v1.16b ++ eor v3.16b,v3.16b,v5.16b ++ shl v1.16b,v1.16b,#1 ++ eor v3.16b,v3.16b,v6.16b ++ ++ tbl v6.16b,{v3.16b},v2.16b ++ ext v5.16b,v0.16b,v3.16b,#12 ++ st1 {v3.4s},[x2],#16 ++ aese v6.16b,v0.16b ++ ++ eor v3.16b,v3.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v3.16b,v3.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v6.16b,v6.16b,v1.16b ++ eor v3.16b,v3.16b,v5.16b ++ eor v3.16b,v3.16b,v6.16b ++ st1 {v3.4s},[x2] ++ add x2,x2,#0x50 ++ ++ mov w12,#10 ++ b Ldone ++ ++.align 4 ++L192: ++ ld1 {v4.8b},[x0],#8 ++ movi v6.16b,#8 // borrow v6.16b ++ st1 {v3.4s},[x2],#16 ++ sub v2.16b,v2.16b,v6.16b // adjust the mask ++ ++Loop192: ++ tbl v6.16b,{v4.16b},v2.16b ++ ext v5.16b,v0.16b,v3.16b,#12 ++ st1 {v4.8b},[x2],#8 ++ aese v6.16b,v0.16b ++ subs w1,w1,#1 ++ ++ eor v3.16b,v3.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v3.16b,v3.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v3.16b,v3.16b,v5.16b ++ ++ dup v5.4s,v3.s[3] ++ eor v5.16b,v5.16b,v4.16b ++ eor v6.16b,v6.16b,v1.16b ++ ext v4.16b,v0.16b,v4.16b,#12 ++ shl v1.16b,v1.16b,#1 ++ eor v4.16b,v4.16b,v5.16b ++ eor v3.16b,v3.16b,v6.16b ++ eor v4.16b,v4.16b,v6.16b ++ st1 {v3.4s},[x2],#16 ++ b.ne Loop192 ++ ++ mov w12,#12 ++ add x2,x2,#0x20 ++ b Ldone ++ ++.align 4 ++L256: ++ ld1 {v4.16b},[x0] ++ mov w1,#7 ++ mov w12,#14 ++ st1 {v3.4s},[x2],#16 ++ ++Loop256: ++ tbl v6.16b,{v4.16b},v2.16b ++ ext v5.16b,v0.16b,v3.16b,#12 ++ st1 {v4.4s},[x2],#16 ++ aese v6.16b,v0.16b ++ subs w1,w1,#1 ++ ++ eor v3.16b,v3.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v3.16b,v3.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v6.16b,v6.16b,v1.16b ++ eor v3.16b,v3.16b,v5.16b ++ shl v1.16b,v1.16b,#1 ++ eor v3.16b,v3.16b,v6.16b ++ st1 {v3.4s},[x2],#16 ++ b.eq Ldone ++ ++ dup v6.4s,v3.s[3] // just splat ++ ext v5.16b,v0.16b,v4.16b,#12 ++ aese v6.16b,v0.16b ++ ++ eor v4.16b,v4.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v4.16b,v4.16b,v5.16b ++ ext v5.16b,v0.16b,v5.16b,#12 ++ eor v4.16b,v4.16b,v5.16b ++ ++ eor v4.16b,v4.16b,v6.16b ++ b Loop256 ++ ++Ldone: ++ str w12,[x2] ++ mov x3,#0 ++ ++Lenc_key_abort: ++ mov x0,x3 // return value ++ ldr x29,[sp],#16 ++ ret ++ ++ ++.globl _aes_hw_set_decrypt_key ++.private_extern _aes_hw_set_decrypt_key ++ ++.align 5 ++_aes_hw_set_decrypt_key: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ bl Lenc_key ++ ++ cmp x0,#0 ++ b.ne Ldec_key_abort ++ ++ sub x2,x2,#240 // restore original x2 ++ mov x4,#-16 ++ add x0,x2,x12,lsl#4 // end of key schedule ++ ++ ld1 {v0.4s},[x2] ++ ld1 {v1.4s},[x0] ++ st1 {v0.4s},[x0],x4 ++ st1 {v1.4s},[x2],#16 ++ ++Loop_imc: ++ ld1 {v0.4s},[x2] ++ ld1 {v1.4s},[x0] ++ aesimc v0.16b,v0.16b ++ aesimc v1.16b,v1.16b ++ st1 {v0.4s},[x0],x4 ++ st1 {v1.4s},[x2],#16 ++ cmp x0,x2 ++ b.hi Loop_imc ++ ++ ld1 {v0.4s},[x2] ++ aesimc v0.16b,v0.16b ++ st1 {v0.4s},[x0] ++ ++ eor x0,x0,x0 // return value ++Ldec_key_abort: ++ ldp x29,x30,[sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++.globl _aes_hw_encrypt ++.private_extern _aes_hw_encrypt ++ ++.align 5 ++_aes_hw_encrypt: ++ AARCH64_VALID_CALL_TARGET ++ ldr w3,[x2,#240] ++ ld1 {v0.4s},[x2],#16 ++ ld1 {v2.16b},[x0] ++ sub w3,w3,#2 ++ ld1 {v1.4s},[x2],#16 ++ ++Loop_enc: ++ aese v2.16b,v0.16b ++ aesmc v2.16b,v2.16b ++ ld1 {v0.4s},[x2],#16 ++ subs w3,w3,#2 ++ aese v2.16b,v1.16b ++ aesmc v2.16b,v2.16b ++ ld1 {v1.4s},[x2],#16 ++ b.gt Loop_enc ++ ++ aese v2.16b,v0.16b ++ aesmc v2.16b,v2.16b ++ ld1 {v0.4s},[x2] ++ aese v2.16b,v1.16b ++ eor v2.16b,v2.16b,v0.16b ++ ++ st1 {v2.16b},[x1] ++ ret ++ ++.globl _aes_hw_decrypt ++.private_extern _aes_hw_decrypt ++ ++.align 5 ++_aes_hw_decrypt: ++ AARCH64_VALID_CALL_TARGET ++ ldr w3,[x2,#240] ++ ld1 {v0.4s},[x2],#16 ++ ld1 {v2.16b},[x0] ++ sub w3,w3,#2 ++ ld1 {v1.4s},[x2],#16 ++ ++Loop_dec: ++ aesd v2.16b,v0.16b ++ aesimc v2.16b,v2.16b ++ ld1 {v0.4s},[x2],#16 ++ subs w3,w3,#2 ++ aesd v2.16b,v1.16b ++ aesimc v2.16b,v2.16b ++ ld1 {v1.4s},[x2],#16 ++ b.gt Loop_dec ++ ++ aesd v2.16b,v0.16b ++ aesimc v2.16b,v2.16b ++ ld1 {v0.4s},[x2] ++ aesd v2.16b,v1.16b ++ eor v2.16b,v2.16b,v0.16b ++ ++ st1 {v2.16b},[x1] ++ ret ++ ++.globl _aes_hw_cbc_encrypt ++.private_extern _aes_hw_cbc_encrypt ++ ++.align 5 ++_aes_hw_cbc_encrypt: ++ // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. ++ AARCH64_VALID_CALL_TARGET ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ subs x2,x2,#16 ++ mov x8,#16 ++ b.lo Lcbc_abort ++ csel x8,xzr,x8,eq ++ ++ cmp w5,#0 // en- or decrypting? ++ ldr w5,[x3,#240] ++ and x2,x2,#-16 ++ ld1 {v6.16b},[x4] ++ ld1 {v0.16b},[x0],x8 ++ ++ ld1 {v16.4s,v17.4s},[x3] // load key schedule... ++ sub w5,w5,#6 ++ add x7,x3,x5,lsl#4 // pointer to last 7 round keys ++ sub w5,w5,#2 ++ ld1 {v18.4s,v19.4s},[x7],#32 ++ ld1 {v20.4s,v21.4s},[x7],#32 ++ ld1 {v22.4s,v23.4s},[x7],#32 ++ ld1 {v7.4s},[x7] ++ ++ add x7,x3,#32 ++ mov w6,w5 ++ b.eq Lcbc_dec ++ ++ cmp w5,#2 ++ eor v0.16b,v0.16b,v6.16b ++ eor v5.16b,v16.16b,v7.16b ++ b.eq Lcbc_enc128 ++ ++ ld1 {v2.4s,v3.4s},[x7] ++ add x7,x3,#16 ++ add x6,x3,#16*4 ++ add x12,x3,#16*5 ++ aese v0.16b,v16.16b ++ aesmc v0.16b,v0.16b ++ add x14,x3,#16*6 ++ add x3,x3,#16*7 ++ b Lenter_cbc_enc ++ ++.align 4 ++Loop_cbc_enc: ++ aese v0.16b,v16.16b ++ aesmc v0.16b,v0.16b ++ st1 {v6.16b},[x1],#16 ++Lenter_cbc_enc: ++ aese v0.16b,v17.16b ++ aesmc v0.16b,v0.16b ++ aese v0.16b,v2.16b ++ aesmc v0.16b,v0.16b ++ ld1 {v16.4s},[x6] ++ cmp w5,#4 ++ aese v0.16b,v3.16b ++ aesmc v0.16b,v0.16b ++ ld1 {v17.4s},[x12] ++ b.eq Lcbc_enc192 ++ ++ aese v0.16b,v16.16b ++ aesmc v0.16b,v0.16b ++ ld1 {v16.4s},[x14] ++ aese v0.16b,v17.16b ++ aesmc v0.16b,v0.16b ++ ld1 {v17.4s},[x3] ++ nop ++ ++Lcbc_enc192: ++ aese v0.16b,v16.16b ++ aesmc v0.16b,v0.16b ++ subs x2,x2,#16 ++ aese v0.16b,v17.16b ++ aesmc v0.16b,v0.16b ++ csel x8,xzr,x8,eq ++ aese v0.16b,v18.16b ++ aesmc v0.16b,v0.16b ++ aese v0.16b,v19.16b ++ aesmc v0.16b,v0.16b ++ ld1 {v16.16b},[x0],x8 ++ aese v0.16b,v20.16b ++ aesmc v0.16b,v0.16b ++ eor v16.16b,v16.16b,v5.16b ++ aese v0.16b,v21.16b ++ aesmc v0.16b,v0.16b ++ ld1 {v17.4s},[x7] // re-pre-load rndkey[1] ++ aese v0.16b,v22.16b ++ aesmc v0.16b,v0.16b ++ aese v0.16b,v23.16b ++ eor v6.16b,v0.16b,v7.16b ++ b.hs Loop_cbc_enc ++ ++ st1 {v6.16b},[x1],#16 ++ b Lcbc_done ++ ++.align 5 ++Lcbc_enc128: ++ ld1 {v2.4s,v3.4s},[x7] ++ aese v0.16b,v16.16b ++ aesmc v0.16b,v0.16b ++ b Lenter_cbc_enc128 ++Loop_cbc_enc128: ++ aese v0.16b,v16.16b ++ aesmc v0.16b,v0.16b ++ st1 {v6.16b},[x1],#16 ++Lenter_cbc_enc128: ++ aese v0.16b,v17.16b ++ aesmc v0.16b,v0.16b ++ subs x2,x2,#16 ++ aese v0.16b,v2.16b ++ aesmc v0.16b,v0.16b ++ csel x8,xzr,x8,eq ++ aese v0.16b,v3.16b ++ aesmc v0.16b,v0.16b ++ aese v0.16b,v18.16b ++ aesmc v0.16b,v0.16b ++ aese v0.16b,v19.16b ++ aesmc v0.16b,v0.16b ++ ld1 {v16.16b},[x0],x8 ++ aese v0.16b,v20.16b ++ aesmc v0.16b,v0.16b ++ aese v0.16b,v21.16b ++ aesmc v0.16b,v0.16b ++ aese v0.16b,v22.16b ++ aesmc v0.16b,v0.16b ++ eor v16.16b,v16.16b,v5.16b ++ aese v0.16b,v23.16b ++ eor v6.16b,v0.16b,v7.16b ++ b.hs Loop_cbc_enc128 ++ ++ st1 {v6.16b},[x1],#16 ++ b Lcbc_done ++.align 5 ++Lcbc_dec: ++ ld1 {v18.16b},[x0],#16 ++ subs x2,x2,#32 // bias ++ add w6,w5,#2 ++ orr v3.16b,v0.16b,v0.16b ++ orr v1.16b,v0.16b,v0.16b ++ orr v19.16b,v18.16b,v18.16b ++ b.lo Lcbc_dec_tail ++ ++ orr v1.16b,v18.16b,v18.16b ++ ld1 {v18.16b},[x0],#16 ++ orr v2.16b,v0.16b,v0.16b ++ orr v3.16b,v1.16b,v1.16b ++ orr v19.16b,v18.16b,v18.16b ++ ++Loop3x_cbc_dec: ++ aesd v0.16b,v16.16b ++ aesimc v0.16b,v0.16b ++ aesd v1.16b,v16.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v16.16b ++ aesimc v18.16b,v18.16b ++ ld1 {v16.4s},[x7],#16 ++ subs w6,w6,#2 ++ aesd v0.16b,v17.16b ++ aesimc v0.16b,v0.16b ++ aesd v1.16b,v17.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v17.16b ++ aesimc v18.16b,v18.16b ++ ld1 {v17.4s},[x7],#16 ++ b.gt Loop3x_cbc_dec ++ ++ aesd v0.16b,v16.16b ++ aesimc v0.16b,v0.16b ++ aesd v1.16b,v16.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v16.16b ++ aesimc v18.16b,v18.16b ++ eor v4.16b,v6.16b,v7.16b ++ subs x2,x2,#0x30 ++ eor v5.16b,v2.16b,v7.16b ++ csel x6,x2,x6,lo // x6, w6, is zero at this point ++ aesd v0.16b,v17.16b ++ aesimc v0.16b,v0.16b ++ aesd v1.16b,v17.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v17.16b ++ aesimc v18.16b,v18.16b ++ eor v17.16b,v3.16b,v7.16b ++ add x0,x0,x6 // x0 is adjusted in such way that ++ // at exit from the loop v1.16b-v18.16b ++ // are loaded with last "words" ++ orr v6.16b,v19.16b,v19.16b ++ mov x7,x3 ++ aesd v0.16b,v20.16b ++ aesimc v0.16b,v0.16b ++ aesd v1.16b,v20.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v20.16b ++ aesimc v18.16b,v18.16b ++ ld1 {v2.16b},[x0],#16 ++ aesd v0.16b,v21.16b ++ aesimc v0.16b,v0.16b ++ aesd v1.16b,v21.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v21.16b ++ aesimc v18.16b,v18.16b ++ ld1 {v3.16b},[x0],#16 ++ aesd v0.16b,v22.16b ++ aesimc v0.16b,v0.16b ++ aesd v1.16b,v22.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v22.16b ++ aesimc v18.16b,v18.16b ++ ld1 {v19.16b},[x0],#16 ++ aesd v0.16b,v23.16b ++ aesd v1.16b,v23.16b ++ aesd v18.16b,v23.16b ++ ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] ++ add w6,w5,#2 ++ eor v4.16b,v4.16b,v0.16b ++ eor v5.16b,v5.16b,v1.16b ++ eor v18.16b,v18.16b,v17.16b ++ ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] ++ st1 {v4.16b},[x1],#16 ++ orr v0.16b,v2.16b,v2.16b ++ st1 {v5.16b},[x1],#16 ++ orr v1.16b,v3.16b,v3.16b ++ st1 {v18.16b},[x1],#16 ++ orr v18.16b,v19.16b,v19.16b ++ b.hs Loop3x_cbc_dec ++ ++ cmn x2,#0x30 ++ b.eq Lcbc_done ++ nop ++ ++Lcbc_dec_tail: ++ aesd v1.16b,v16.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v16.16b ++ aesimc v18.16b,v18.16b ++ ld1 {v16.4s},[x7],#16 ++ subs w6,w6,#2 ++ aesd v1.16b,v17.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v17.16b ++ aesimc v18.16b,v18.16b ++ ld1 {v17.4s},[x7],#16 ++ b.gt Lcbc_dec_tail ++ ++ aesd v1.16b,v16.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v16.16b ++ aesimc v18.16b,v18.16b ++ aesd v1.16b,v17.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v17.16b ++ aesimc v18.16b,v18.16b ++ aesd v1.16b,v20.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v20.16b ++ aesimc v18.16b,v18.16b ++ cmn x2,#0x20 ++ aesd v1.16b,v21.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v21.16b ++ aesimc v18.16b,v18.16b ++ eor v5.16b,v6.16b,v7.16b ++ aesd v1.16b,v22.16b ++ aesimc v1.16b,v1.16b ++ aesd v18.16b,v22.16b ++ aesimc v18.16b,v18.16b ++ eor v17.16b,v3.16b,v7.16b ++ aesd v1.16b,v23.16b ++ aesd v18.16b,v23.16b ++ b.eq Lcbc_dec_one ++ eor v5.16b,v5.16b,v1.16b ++ eor v17.16b,v17.16b,v18.16b ++ orr v6.16b,v19.16b,v19.16b ++ st1 {v5.16b},[x1],#16 ++ st1 {v17.16b},[x1],#16 ++ b Lcbc_done ++ ++Lcbc_dec_one: ++ eor v5.16b,v5.16b,v18.16b ++ orr v6.16b,v19.16b,v19.16b ++ st1 {v5.16b},[x1],#16 ++ ++Lcbc_done: ++ st1 {v6.16b},[x4] ++Lcbc_abort: ++ ldr x29,[sp],#16 ++ ret ++ ++.globl _aes_hw_ctr32_encrypt_blocks ++.private_extern _aes_hw_ctr32_encrypt_blocks ++ ++.align 5 ++_aes_hw_ctr32_encrypt_blocks: ++ // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. ++ AARCH64_VALID_CALL_TARGET ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ ldr w5,[x3,#240] ++ ++ ldr w8, [x4, #12] ++ ld1 {v0.4s},[x4] ++ ++ ld1 {v16.4s,v17.4s},[x3] // load key schedule... ++ sub w5,w5,#4 ++ mov x12,#16 ++ cmp x2,#2 ++ add x7,x3,x5,lsl#4 // pointer to last 5 round keys ++ sub w5,w5,#2 ++ ld1 {v20.4s,v21.4s},[x7],#32 ++ ld1 {v22.4s,v23.4s},[x7],#32 ++ ld1 {v7.4s},[x7] ++ add x7,x3,#32 ++ mov w6,w5 ++ csel x12,xzr,x12,lo ++ ++ // ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are ++ // affected by silicon errata #1742098 [0] and #1655431 [1], ++ // respectively, where the second instruction of an aese/aesmc ++ // instruction pair may execute twice if an interrupt is taken right ++ // after the first instruction consumes an input register of which a ++ // single 32-bit lane has been updated the last time it was modified. ++ // ++ // This function uses a counter in one 32-bit lane. The vmov lines ++ // could write to v1.16b and v18.16b directly, but that trips this bugs. ++ // We write to v6.16b and copy to the final register as a workaround. ++ // ++ // [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice ++ // [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice ++#ifndef __AARCH64EB__ ++ rev w8, w8 ++#endif ++ add w10, w8, #1 ++ orr v6.16b,v0.16b,v0.16b ++ rev w10, w10 ++ mov v6.s[3],w10 ++ add w8, w8, #2 ++ orr v1.16b,v6.16b,v6.16b ++ b.ls Lctr32_tail ++ rev w12, w8 ++ mov v6.s[3],w12 ++ sub x2,x2,#3 // bias ++ orr v18.16b,v6.16b,v6.16b ++ b Loop3x_ctr32 ++ ++.align 4 ++Loop3x_ctr32: ++ aese v0.16b,v16.16b ++ aesmc v0.16b,v0.16b ++ aese v1.16b,v16.16b ++ aesmc v1.16b,v1.16b ++ aese v18.16b,v16.16b ++ aesmc v18.16b,v18.16b ++ ld1 {v16.4s},[x7],#16 ++ subs w6,w6,#2 ++ aese v0.16b,v17.16b ++ aesmc v0.16b,v0.16b ++ aese v1.16b,v17.16b ++ aesmc v1.16b,v1.16b ++ aese v18.16b,v17.16b ++ aesmc v18.16b,v18.16b ++ ld1 {v17.4s},[x7],#16 ++ b.gt Loop3x_ctr32 ++ ++ aese v0.16b,v16.16b ++ aesmc v4.16b,v0.16b ++ aese v1.16b,v16.16b ++ aesmc v5.16b,v1.16b ++ ld1 {v2.16b},[x0],#16 ++ add w9,w8,#1 ++ aese v18.16b,v16.16b ++ aesmc v18.16b,v18.16b ++ ld1 {v3.16b},[x0],#16 ++ rev w9,w9 ++ aese v4.16b,v17.16b ++ aesmc v4.16b,v4.16b ++ aese v5.16b,v17.16b ++ aesmc v5.16b,v5.16b ++ ld1 {v19.16b},[x0],#16 ++ mov x7,x3 ++ aese v18.16b,v17.16b ++ aesmc v17.16b,v18.16b ++ aese v4.16b,v20.16b ++ aesmc v4.16b,v4.16b ++ aese v5.16b,v20.16b ++ aesmc v5.16b,v5.16b ++ eor v2.16b,v2.16b,v7.16b ++ add w10,w8,#2 ++ aese v17.16b,v20.16b ++ aesmc v17.16b,v17.16b ++ eor v3.16b,v3.16b,v7.16b ++ add w8,w8,#3 ++ aese v4.16b,v21.16b ++ aesmc v4.16b,v4.16b ++ aese v5.16b,v21.16b ++ aesmc v5.16b,v5.16b ++ // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work ++ // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in ++ // 32-bit mode. See the comment above. ++ eor v19.16b,v19.16b,v7.16b ++ mov v6.s[3], w9 ++ aese v17.16b,v21.16b ++ aesmc v17.16b,v17.16b ++ orr v0.16b,v6.16b,v6.16b ++ rev w10,w10 ++ aese v4.16b,v22.16b ++ aesmc v4.16b,v4.16b ++ mov v6.s[3], w10 ++ rev w12,w8 ++ aese v5.16b,v22.16b ++ aesmc v5.16b,v5.16b ++ orr v1.16b,v6.16b,v6.16b ++ mov v6.s[3], w12 ++ aese v17.16b,v22.16b ++ aesmc v17.16b,v17.16b ++ orr v18.16b,v6.16b,v6.16b ++ subs x2,x2,#3 ++ aese v4.16b,v23.16b ++ aese v5.16b,v23.16b ++ aese v17.16b,v23.16b ++ ++ eor v2.16b,v2.16b,v4.16b ++ ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] ++ st1 {v2.16b},[x1],#16 ++ eor v3.16b,v3.16b,v5.16b ++ mov w6,w5 ++ st1 {v3.16b},[x1],#16 ++ eor v19.16b,v19.16b,v17.16b ++ ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] ++ st1 {v19.16b},[x1],#16 ++ b.hs Loop3x_ctr32 ++ ++ adds x2,x2,#3 ++ b.eq Lctr32_done ++ cmp x2,#1 ++ mov x12,#16 ++ csel x12,xzr,x12,eq ++ ++Lctr32_tail: ++ aese v0.16b,v16.16b ++ aesmc v0.16b,v0.16b ++ aese v1.16b,v16.16b ++ aesmc v1.16b,v1.16b ++ ld1 {v16.4s},[x7],#16 ++ subs w6,w6,#2 ++ aese v0.16b,v17.16b ++ aesmc v0.16b,v0.16b ++ aese v1.16b,v17.16b ++ aesmc v1.16b,v1.16b ++ ld1 {v17.4s},[x7],#16 ++ b.gt Lctr32_tail ++ ++ aese v0.16b,v16.16b ++ aesmc v0.16b,v0.16b ++ aese v1.16b,v16.16b ++ aesmc v1.16b,v1.16b ++ aese v0.16b,v17.16b ++ aesmc v0.16b,v0.16b ++ aese v1.16b,v17.16b ++ aesmc v1.16b,v1.16b ++ ld1 {v2.16b},[x0],x12 ++ aese v0.16b,v20.16b ++ aesmc v0.16b,v0.16b ++ aese v1.16b,v20.16b ++ aesmc v1.16b,v1.16b ++ ld1 {v3.16b},[x0] ++ aese v0.16b,v21.16b ++ aesmc v0.16b,v0.16b ++ aese v1.16b,v21.16b ++ aesmc v1.16b,v1.16b ++ eor v2.16b,v2.16b,v7.16b ++ aese v0.16b,v22.16b ++ aesmc v0.16b,v0.16b ++ aese v1.16b,v22.16b ++ aesmc v1.16b,v1.16b ++ eor v3.16b,v3.16b,v7.16b ++ aese v0.16b,v23.16b ++ aese v1.16b,v23.16b ++ ++ cmp x2,#1 ++ eor v2.16b,v2.16b,v0.16b ++ eor v3.16b,v3.16b,v1.16b ++ st1 {v2.16b},[x1],#16 ++ b.eq Lctr32_done ++ st1 {v3.16b},[x1] ++ ++Lctr32_done: ++ ldr x29,[sp],#16 ++ ret ++ ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-aarch64/crypto/fipsmodule/armv8-mont.S b/apple-aarch64/crypto/fipsmodule/armv8-mont.S +new file mode 100644 +index 0000000..2493ae0 +--- /dev/null ++++ b/apple-aarch64/crypto/fipsmodule/armv8-mont.S +@@ -0,0 +1,1433 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++.text ++ ++.globl _bn_mul_mont ++.private_extern _bn_mul_mont ++ ++.align 5 ++_bn_mul_mont: ++ AARCH64_SIGN_LINK_REGISTER ++ tst x5,#7 ++ b.eq __bn_sqr8x_mont ++ tst x5,#3 ++ b.eq __bn_mul4x_mont ++Lmul_mont: ++ stp x29,x30,[sp,#-64]! ++ add x29,sp,#0 ++ stp x19,x20,[sp,#16] ++ stp x21,x22,[sp,#32] ++ stp x23,x24,[sp,#48] ++ ++ ldr x9,[x2],#8 // bp[0] ++ sub x22,sp,x5,lsl#3 ++ ldp x7,x8,[x1],#16 // ap[0..1] ++ lsl x5,x5,#3 ++ ldr x4,[x4] // *n0 ++ and x22,x22,#-16 // ABI says so ++ ldp x13,x14,[x3],#16 // np[0..1] ++ ++ mul x6,x7,x9 // ap[0]*bp[0] ++ sub x21,x5,#16 // j=num-2 ++ umulh x7,x7,x9 ++ mul x10,x8,x9 // ap[1]*bp[0] ++ umulh x11,x8,x9 ++ ++ mul x15,x6,x4 // "tp[0]"*n0 ++ mov sp,x22 // alloca ++ ++ // (*) mul x12,x13,x15 // np[0]*m1 ++ umulh x13,x13,x15 ++ mul x16,x14,x15 // np[1]*m1 ++ // (*) adds x12,x12,x6 // discarded ++ // (*) As for removal of first multiplication and addition ++ // instructions. The outcome of first addition is ++ // guaranteed to be zero, which leaves two computationally ++ // significant outcomes: it either carries or not. Then ++ // question is when does it carry? Is there alternative ++ // way to deduce it? If you follow operations, you can ++ // observe that condition for carry is quite simple: ++ // x6 being non-zero. So that carry can be calculated ++ // by adding -1 to x6. That's what next instruction does. ++ subs xzr,x6,#1 // (*) ++ umulh x17,x14,x15 ++ adc x13,x13,xzr ++ cbz x21,L1st_skip ++ ++L1st: ++ ldr x8,[x1],#8 ++ adds x6,x10,x7 ++ sub x21,x21,#8 // j-- ++ adc x7,x11,xzr ++ ++ ldr x14,[x3],#8 ++ adds x12,x16,x13 ++ mul x10,x8,x9 // ap[j]*bp[0] ++ adc x13,x17,xzr ++ umulh x11,x8,x9 ++ ++ adds x12,x12,x6 ++ mul x16,x14,x15 // np[j]*m1 ++ adc x13,x13,xzr ++ umulh x17,x14,x15 ++ str x12,[x22],#8 // tp[j-1] ++ cbnz x21,L1st ++ ++L1st_skip: ++ adds x6,x10,x7 ++ sub x1,x1,x5 // rewind x1 ++ adc x7,x11,xzr ++ ++ adds x12,x16,x13 ++ sub x3,x3,x5 // rewind x3 ++ adc x13,x17,xzr ++ ++ adds x12,x12,x6 ++ sub x20,x5,#8 // i=num-1 ++ adcs x13,x13,x7 ++ ++ adc x19,xzr,xzr // upmost overflow bit ++ stp x12,x13,[x22] ++ ++Louter: ++ ldr x9,[x2],#8 // bp[i] ++ ldp x7,x8,[x1],#16 ++ ldr x23,[sp] // tp[0] ++ add x22,sp,#8 ++ ++ mul x6,x7,x9 // ap[0]*bp[i] ++ sub x21,x5,#16 // j=num-2 ++ umulh x7,x7,x9 ++ ldp x13,x14,[x3],#16 ++ mul x10,x8,x9 // ap[1]*bp[i] ++ adds x6,x6,x23 ++ umulh x11,x8,x9 ++ adc x7,x7,xzr ++ ++ mul x15,x6,x4 ++ sub x20,x20,#8 // i-- ++ ++ // (*) mul x12,x13,x15 // np[0]*m1 ++ umulh x13,x13,x15 ++ mul x16,x14,x15 // np[1]*m1 ++ // (*) adds x12,x12,x6 ++ subs xzr,x6,#1 // (*) ++ umulh x17,x14,x15 ++ cbz x21,Linner_skip ++ ++Linner: ++ ldr x8,[x1],#8 ++ adc x13,x13,xzr ++ ldr x23,[x22],#8 // tp[j] ++ adds x6,x10,x7 ++ sub x21,x21,#8 // j-- ++ adc x7,x11,xzr ++ ++ adds x12,x16,x13 ++ ldr x14,[x3],#8 ++ adc x13,x17,xzr ++ ++ mul x10,x8,x9 // ap[j]*bp[i] ++ adds x6,x6,x23 ++ umulh x11,x8,x9 ++ adc x7,x7,xzr ++ ++ mul x16,x14,x15 // np[j]*m1 ++ adds x12,x12,x6 ++ umulh x17,x14,x15 ++ str x12,[x22,#-16] // tp[j-1] ++ cbnz x21,Linner ++ ++Linner_skip: ++ ldr x23,[x22],#8 // tp[j] ++ adc x13,x13,xzr ++ adds x6,x10,x7 ++ sub x1,x1,x5 // rewind x1 ++ adc x7,x11,xzr ++ ++ adds x12,x16,x13 ++ sub x3,x3,x5 // rewind x3 ++ adcs x13,x17,x19 ++ adc x19,xzr,xzr ++ ++ adds x6,x6,x23 ++ adc x7,x7,xzr ++ ++ adds x12,x12,x6 ++ adcs x13,x13,x7 ++ adc x19,x19,xzr // upmost overflow bit ++ stp x12,x13,[x22,#-16] ++ ++ cbnz x20,Louter ++ ++ // Final step. We see if result is larger than modulus, and ++ // if it is, subtract the modulus. But comparison implies ++ // subtraction. So we subtract modulus, see if it borrowed, ++ // and conditionally copy original value. ++ ldr x23,[sp] // tp[0] ++ add x22,sp,#8 ++ ldr x14,[x3],#8 // np[0] ++ subs x21,x5,#8 // j=num-1 and clear borrow ++ mov x1,x0 ++Lsub: ++ sbcs x8,x23,x14 // tp[j]-np[j] ++ ldr x23,[x22],#8 ++ sub x21,x21,#8 // j-- ++ ldr x14,[x3],#8 ++ str x8,[x1],#8 // rp[j]=tp[j]-np[j] ++ cbnz x21,Lsub ++ ++ sbcs x8,x23,x14 ++ sbcs x19,x19,xzr // did it borrow? ++ str x8,[x1],#8 // rp[num-1] ++ ++ ldr x23,[sp] // tp[0] ++ add x22,sp,#8 ++ ldr x8,[x0],#8 // rp[0] ++ sub x5,x5,#8 // num-- ++ nop ++Lcond_copy: ++ sub x5,x5,#8 // num-- ++ csel x14,x23,x8,lo // did it borrow? ++ ldr x23,[x22],#8 ++ ldr x8,[x0],#8 ++ str xzr,[x22,#-16] // wipe tp ++ str x14,[x0,#-16] ++ cbnz x5,Lcond_copy ++ ++ csel x14,x23,x8,lo ++ str xzr,[x22,#-8] // wipe tp ++ str x14,[x0,#-8] ++ ++ ldp x19,x20,[x29,#16] ++ mov sp,x29 ++ ldp x21,x22,[x29,#32] ++ mov x0,#1 ++ ldp x23,x24,[x29,#48] ++ ldr x29,[sp],#64 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++.align 5 ++__bn_sqr8x_mont: ++ // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to ++ // only from bn_mul_mont which has already signed the return address. ++ cmp x1,x2 ++ b.ne __bn_mul4x_mont ++Lsqr8x_mont: ++ stp x29,x30,[sp,#-128]! ++ add x29,sp,#0 ++ stp x19,x20,[sp,#16] ++ stp x21,x22,[sp,#32] ++ stp x23,x24,[sp,#48] ++ stp x25,x26,[sp,#64] ++ stp x27,x28,[sp,#80] ++ stp x0,x3,[sp,#96] // offload rp and np ++ ++ ldp x6,x7,[x1,#8*0] ++ ldp x8,x9,[x1,#8*2] ++ ldp x10,x11,[x1,#8*4] ++ ldp x12,x13,[x1,#8*6] ++ ++ sub x2,sp,x5,lsl#4 ++ lsl x5,x5,#3 ++ ldr x4,[x4] // *n0 ++ mov sp,x2 // alloca ++ sub x27,x5,#8*8 ++ b Lsqr8x_zero_start ++ ++Lsqr8x_zero: ++ sub x27,x27,#8*8 ++ stp xzr,xzr,[x2,#8*0] ++ stp xzr,xzr,[x2,#8*2] ++ stp xzr,xzr,[x2,#8*4] ++ stp xzr,xzr,[x2,#8*6] ++Lsqr8x_zero_start: ++ stp xzr,xzr,[x2,#8*8] ++ stp xzr,xzr,[x2,#8*10] ++ stp xzr,xzr,[x2,#8*12] ++ stp xzr,xzr,[x2,#8*14] ++ add x2,x2,#8*16 ++ cbnz x27,Lsqr8x_zero ++ ++ add x3,x1,x5 ++ add x1,x1,#8*8 ++ mov x19,xzr ++ mov x20,xzr ++ mov x21,xzr ++ mov x22,xzr ++ mov x23,xzr ++ mov x24,xzr ++ mov x25,xzr ++ mov x26,xzr ++ mov x2,sp ++ str x4,[x29,#112] // offload n0 ++ ++ // Multiply everything but a[i]*a[i] ++.align 4 ++Lsqr8x_outer_loop: ++ // a[1]a[0] (i) ++ // a[2]a[0] ++ // a[3]a[0] ++ // a[4]a[0] ++ // a[5]a[0] ++ // a[6]a[0] ++ // a[7]a[0] ++ // a[2]a[1] (ii) ++ // a[3]a[1] ++ // a[4]a[1] ++ // a[5]a[1] ++ // a[6]a[1] ++ // a[7]a[1] ++ // a[3]a[2] (iii) ++ // a[4]a[2] ++ // a[5]a[2] ++ // a[6]a[2] ++ // a[7]a[2] ++ // a[4]a[3] (iv) ++ // a[5]a[3] ++ // a[6]a[3] ++ // a[7]a[3] ++ // a[5]a[4] (v) ++ // a[6]a[4] ++ // a[7]a[4] ++ // a[6]a[5] (vi) ++ // a[7]a[5] ++ // a[7]a[6] (vii) ++ ++ mul x14,x7,x6 // lo(a[1..7]*a[0]) (i) ++ mul x15,x8,x6 ++ mul x16,x9,x6 ++ mul x17,x10,x6 ++ adds x20,x20,x14 // t[1]+lo(a[1]*a[0]) ++ mul x14,x11,x6 ++ adcs x21,x21,x15 ++ mul x15,x12,x6 ++ adcs x22,x22,x16 ++ mul x16,x13,x6 ++ adcs x23,x23,x17 ++ umulh x17,x7,x6 // hi(a[1..7]*a[0]) ++ adcs x24,x24,x14 ++ umulh x14,x8,x6 ++ adcs x25,x25,x15 ++ umulh x15,x9,x6 ++ adcs x26,x26,x16 ++ umulh x16,x10,x6 ++ stp x19,x20,[x2],#8*2 // t[0..1] ++ adc x19,xzr,xzr // t[8] ++ adds x21,x21,x17 // t[2]+lo(a[1]*a[0]) ++ umulh x17,x11,x6 ++ adcs x22,x22,x14 ++ umulh x14,x12,x6 ++ adcs x23,x23,x15 ++ umulh x15,x13,x6 ++ adcs x24,x24,x16 ++ mul x16,x8,x7 // lo(a[2..7]*a[1]) (ii) ++ adcs x25,x25,x17 ++ mul x17,x9,x7 ++ adcs x26,x26,x14 ++ mul x14,x10,x7 ++ adc x19,x19,x15 ++ ++ mul x15,x11,x7 ++ adds x22,x22,x16 ++ mul x16,x12,x7 ++ adcs x23,x23,x17 ++ mul x17,x13,x7 ++ adcs x24,x24,x14 ++ umulh x14,x8,x7 // hi(a[2..7]*a[1]) ++ adcs x25,x25,x15 ++ umulh x15,x9,x7 ++ adcs x26,x26,x16 ++ umulh x16,x10,x7 ++ adcs x19,x19,x17 ++ umulh x17,x11,x7 ++ stp x21,x22,[x2],#8*2 // t[2..3] ++ adc x20,xzr,xzr // t[9] ++ adds x23,x23,x14 ++ umulh x14,x12,x7 ++ adcs x24,x24,x15 ++ umulh x15,x13,x7 ++ adcs x25,x25,x16 ++ mul x16,x9,x8 // lo(a[3..7]*a[2]) (iii) ++ adcs x26,x26,x17 ++ mul x17,x10,x8 ++ adcs x19,x19,x14 ++ mul x14,x11,x8 ++ adc x20,x20,x15 ++ ++ mul x15,x12,x8 ++ adds x24,x24,x16 ++ mul x16,x13,x8 ++ adcs x25,x25,x17 ++ umulh x17,x9,x8 // hi(a[3..7]*a[2]) ++ adcs x26,x26,x14 ++ umulh x14,x10,x8 ++ adcs x19,x19,x15 ++ umulh x15,x11,x8 ++ adcs x20,x20,x16 ++ umulh x16,x12,x8 ++ stp x23,x24,[x2],#8*2 // t[4..5] ++ adc x21,xzr,xzr // t[10] ++ adds x25,x25,x17 ++ umulh x17,x13,x8 ++ adcs x26,x26,x14 ++ mul x14,x10,x9 // lo(a[4..7]*a[3]) (iv) ++ adcs x19,x19,x15 ++ mul x15,x11,x9 ++ adcs x20,x20,x16 ++ mul x16,x12,x9 ++ adc x21,x21,x17 ++ ++ mul x17,x13,x9 ++ adds x26,x26,x14 ++ umulh x14,x10,x9 // hi(a[4..7]*a[3]) ++ adcs x19,x19,x15 ++ umulh x15,x11,x9 ++ adcs x20,x20,x16 ++ umulh x16,x12,x9 ++ adcs x21,x21,x17 ++ umulh x17,x13,x9 ++ stp x25,x26,[x2],#8*2 // t[6..7] ++ adc x22,xzr,xzr // t[11] ++ adds x19,x19,x14 ++ mul x14,x11,x10 // lo(a[5..7]*a[4]) (v) ++ adcs x20,x20,x15 ++ mul x15,x12,x10 ++ adcs x21,x21,x16 ++ mul x16,x13,x10 ++ adc x22,x22,x17 ++ ++ umulh x17,x11,x10 // hi(a[5..7]*a[4]) ++ adds x20,x20,x14 ++ umulh x14,x12,x10 ++ adcs x21,x21,x15 ++ umulh x15,x13,x10 ++ adcs x22,x22,x16 ++ mul x16,x12,x11 // lo(a[6..7]*a[5]) (vi) ++ adc x23,xzr,xzr // t[12] ++ adds x21,x21,x17 ++ mul x17,x13,x11 ++ adcs x22,x22,x14 ++ umulh x14,x12,x11 // hi(a[6..7]*a[5]) ++ adc x23,x23,x15 ++ ++ umulh x15,x13,x11 ++ adds x22,x22,x16 ++ mul x16,x13,x12 // lo(a[7]*a[6]) (vii) ++ adcs x23,x23,x17 ++ umulh x17,x13,x12 // hi(a[7]*a[6]) ++ adc x24,xzr,xzr // t[13] ++ adds x23,x23,x14 ++ sub x27,x3,x1 // done yet? ++ adc x24,x24,x15 ++ ++ adds x24,x24,x16 ++ sub x14,x3,x5 // rewinded ap ++ adc x25,xzr,xzr // t[14] ++ add x25,x25,x17 ++ ++ cbz x27,Lsqr8x_outer_break ++ ++ mov x4,x6 ++ ldp x6,x7,[x2,#8*0] ++ ldp x8,x9,[x2,#8*2] ++ ldp x10,x11,[x2,#8*4] ++ ldp x12,x13,[x2,#8*6] ++ adds x19,x19,x6 ++ adcs x20,x20,x7 ++ ldp x6,x7,[x1,#8*0] ++ adcs x21,x21,x8 ++ adcs x22,x22,x9 ++ ldp x8,x9,[x1,#8*2] ++ adcs x23,x23,x10 ++ adcs x24,x24,x11 ++ ldp x10,x11,[x1,#8*4] ++ adcs x25,x25,x12 ++ mov x0,x1 ++ adcs x26,xzr,x13 ++ ldp x12,x13,[x1,#8*6] ++ add x1,x1,#8*8 ++ //adc x28,xzr,xzr // moved below ++ mov x27,#-8*8 ++ ++ // a[8]a[0] ++ // a[9]a[0] ++ // a[a]a[0] ++ // a[b]a[0] ++ // a[c]a[0] ++ // a[d]a[0] ++ // a[e]a[0] ++ // a[f]a[0] ++ // a[8]a[1] ++ // a[f]a[1]........................ ++ // a[8]a[2] ++ // a[f]a[2]........................ ++ // a[8]a[3] ++ // a[f]a[3]........................ ++ // a[8]a[4] ++ // a[f]a[4]........................ ++ // a[8]a[5] ++ // a[f]a[5]........................ ++ // a[8]a[6] ++ // a[f]a[6]........................ ++ // a[8]a[7] ++ // a[f]a[7]........................ ++Lsqr8x_mul: ++ mul x14,x6,x4 ++ adc x28,xzr,xzr // carry bit, modulo-scheduled ++ mul x15,x7,x4 ++ add x27,x27,#8 ++ mul x16,x8,x4 ++ mul x17,x9,x4 ++ adds x19,x19,x14 ++ mul x14,x10,x4 ++ adcs x20,x20,x15 ++ mul x15,x11,x4 ++ adcs x21,x21,x16 ++ mul x16,x12,x4 ++ adcs x22,x22,x17 ++ mul x17,x13,x4 ++ adcs x23,x23,x14 ++ umulh x14,x6,x4 ++ adcs x24,x24,x15 ++ umulh x15,x7,x4 ++ adcs x25,x25,x16 ++ umulh x16,x8,x4 ++ adcs x26,x26,x17 ++ umulh x17,x9,x4 ++ adc x28,x28,xzr ++ str x19,[x2],#8 ++ adds x19,x20,x14 ++ umulh x14,x10,x4 ++ adcs x20,x21,x15 ++ umulh x15,x11,x4 ++ adcs x21,x22,x16 ++ umulh x16,x12,x4 ++ adcs x22,x23,x17 ++ umulh x17,x13,x4 ++ ldr x4,[x0,x27] ++ adcs x23,x24,x14 ++ adcs x24,x25,x15 ++ adcs x25,x26,x16 ++ adcs x26,x28,x17 ++ //adc x28,xzr,xzr // moved above ++ cbnz x27,Lsqr8x_mul ++ // note that carry flag is guaranteed ++ // to be zero at this point ++ cmp x1,x3 // done yet? ++ b.eq Lsqr8x_break ++ ++ ldp x6,x7,[x2,#8*0] ++ ldp x8,x9,[x2,#8*2] ++ ldp x10,x11,[x2,#8*4] ++ ldp x12,x13,[x2,#8*6] ++ adds x19,x19,x6 ++ ldr x4,[x0,#-8*8] ++ adcs x20,x20,x7 ++ ldp x6,x7,[x1,#8*0] ++ adcs x21,x21,x8 ++ adcs x22,x22,x9 ++ ldp x8,x9,[x1,#8*2] ++ adcs x23,x23,x10 ++ adcs x24,x24,x11 ++ ldp x10,x11,[x1,#8*4] ++ adcs x25,x25,x12 ++ mov x27,#-8*8 ++ adcs x26,x26,x13 ++ ldp x12,x13,[x1,#8*6] ++ add x1,x1,#8*8 ++ //adc x28,xzr,xzr // moved above ++ b Lsqr8x_mul ++ ++.align 4 ++Lsqr8x_break: ++ ldp x6,x7,[x0,#8*0] ++ add x1,x0,#8*8 ++ ldp x8,x9,[x0,#8*2] ++ sub x14,x3,x1 // is it last iteration? ++ ldp x10,x11,[x0,#8*4] ++ sub x15,x2,x14 ++ ldp x12,x13,[x0,#8*6] ++ cbz x14,Lsqr8x_outer_loop ++ ++ stp x19,x20,[x2,#8*0] ++ ldp x19,x20,[x15,#8*0] ++ stp x21,x22,[x2,#8*2] ++ ldp x21,x22,[x15,#8*2] ++ stp x23,x24,[x2,#8*4] ++ ldp x23,x24,[x15,#8*4] ++ stp x25,x26,[x2,#8*6] ++ mov x2,x15 ++ ldp x25,x26,[x15,#8*6] ++ b Lsqr8x_outer_loop ++ ++.align 4 ++Lsqr8x_outer_break: ++ // Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0] ++ ldp x7,x9,[x14,#8*0] // recall that x14 is &a[0] ++ ldp x15,x16,[sp,#8*1] ++ ldp x11,x13,[x14,#8*2] ++ add x1,x14,#8*4 ++ ldp x17,x14,[sp,#8*3] ++ ++ stp x19,x20,[x2,#8*0] ++ mul x19,x7,x7 ++ stp x21,x22,[x2,#8*2] ++ umulh x7,x7,x7 ++ stp x23,x24,[x2,#8*4] ++ mul x8,x9,x9 ++ stp x25,x26,[x2,#8*6] ++ mov x2,sp ++ umulh x9,x9,x9 ++ adds x20,x7,x15,lsl#1 ++ extr x15,x16,x15,#63 ++ sub x27,x5,#8*4 ++ ++Lsqr4x_shift_n_add: ++ adcs x21,x8,x15 ++ extr x16,x17,x16,#63 ++ sub x27,x27,#8*4 ++ adcs x22,x9,x16 ++ ldp x15,x16,[x2,#8*5] ++ mul x10,x11,x11 ++ ldp x7,x9,[x1],#8*2 ++ umulh x11,x11,x11 ++ mul x12,x13,x13 ++ umulh x13,x13,x13 ++ extr x17,x14,x17,#63 ++ stp x19,x20,[x2,#8*0] ++ adcs x23,x10,x17 ++ extr x14,x15,x14,#63 ++ stp x21,x22,[x2,#8*2] ++ adcs x24,x11,x14 ++ ldp x17,x14,[x2,#8*7] ++ extr x15,x16,x15,#63 ++ adcs x25,x12,x15 ++ extr x16,x17,x16,#63 ++ adcs x26,x13,x16 ++ ldp x15,x16,[x2,#8*9] ++ mul x6,x7,x7 ++ ldp x11,x13,[x1],#8*2 ++ umulh x7,x7,x7 ++ mul x8,x9,x9 ++ umulh x9,x9,x9 ++ stp x23,x24,[x2,#8*4] ++ extr x17,x14,x17,#63 ++ stp x25,x26,[x2,#8*6] ++ add x2,x2,#8*8 ++ adcs x19,x6,x17 ++ extr x14,x15,x14,#63 ++ adcs x20,x7,x14 ++ ldp x17,x14,[x2,#8*3] ++ extr x15,x16,x15,#63 ++ cbnz x27,Lsqr4x_shift_n_add ++ ldp x1,x4,[x29,#104] // pull np and n0 ++ ++ adcs x21,x8,x15 ++ extr x16,x17,x16,#63 ++ adcs x22,x9,x16 ++ ldp x15,x16,[x2,#8*5] ++ mul x10,x11,x11 ++ umulh x11,x11,x11 ++ stp x19,x20,[x2,#8*0] ++ mul x12,x13,x13 ++ umulh x13,x13,x13 ++ stp x21,x22,[x2,#8*2] ++ extr x17,x14,x17,#63 ++ adcs x23,x10,x17 ++ extr x14,x15,x14,#63 ++ ldp x19,x20,[sp,#8*0] ++ adcs x24,x11,x14 ++ extr x15,x16,x15,#63 ++ ldp x6,x7,[x1,#8*0] ++ adcs x25,x12,x15 ++ extr x16,xzr,x16,#63 ++ ldp x8,x9,[x1,#8*2] ++ adc x26,x13,x16 ++ ldp x10,x11,[x1,#8*4] ++ ++ // Reduce by 512 bits per iteration ++ mul x28,x4,x19 // t[0]*n0 ++ ldp x12,x13,[x1,#8*6] ++ add x3,x1,x5 ++ ldp x21,x22,[sp,#8*2] ++ stp x23,x24,[x2,#8*4] ++ ldp x23,x24,[sp,#8*4] ++ stp x25,x26,[x2,#8*6] ++ ldp x25,x26,[sp,#8*6] ++ add x1,x1,#8*8 ++ mov x30,xzr // initial top-most carry ++ mov x2,sp ++ mov x27,#8 ++ ++Lsqr8x_reduction: ++ // (*) mul x14,x6,x28 // lo(n[0-7])*lo(t[0]*n0) ++ mul x15,x7,x28 ++ sub x27,x27,#1 ++ mul x16,x8,x28 ++ str x28,[x2],#8 // put aside t[0]*n0 for tail processing ++ mul x17,x9,x28 ++ // (*) adds xzr,x19,x14 ++ subs xzr,x19,#1 // (*) ++ mul x14,x10,x28 ++ adcs x19,x20,x15 ++ mul x15,x11,x28 ++ adcs x20,x21,x16 ++ mul x16,x12,x28 ++ adcs x21,x22,x17 ++ mul x17,x13,x28 ++ adcs x22,x23,x14 ++ umulh x14,x6,x28 // hi(n[0-7])*lo(t[0]*n0) ++ adcs x23,x24,x15 ++ umulh x15,x7,x28 ++ adcs x24,x25,x16 ++ umulh x16,x8,x28 ++ adcs x25,x26,x17 ++ umulh x17,x9,x28 ++ adc x26,xzr,xzr ++ adds x19,x19,x14 ++ umulh x14,x10,x28 ++ adcs x20,x20,x15 ++ umulh x15,x11,x28 ++ adcs x21,x21,x16 ++ umulh x16,x12,x28 ++ adcs x22,x22,x17 ++ umulh x17,x13,x28 ++ mul x28,x4,x19 // next t[0]*n0 ++ adcs x23,x23,x14 ++ adcs x24,x24,x15 ++ adcs x25,x25,x16 ++ adc x26,x26,x17 ++ cbnz x27,Lsqr8x_reduction ++ ++ ldp x14,x15,[x2,#8*0] ++ ldp x16,x17,[x2,#8*2] ++ mov x0,x2 ++ sub x27,x3,x1 // done yet? ++ adds x19,x19,x14 ++ adcs x20,x20,x15 ++ ldp x14,x15,[x2,#8*4] ++ adcs x21,x21,x16 ++ adcs x22,x22,x17 ++ ldp x16,x17,[x2,#8*6] ++ adcs x23,x23,x14 ++ adcs x24,x24,x15 ++ adcs x25,x25,x16 ++ adcs x26,x26,x17 ++ //adc x28,xzr,xzr // moved below ++ cbz x27,Lsqr8x8_post_condition ++ ++ ldr x4,[x2,#-8*8] ++ ldp x6,x7,[x1,#8*0] ++ ldp x8,x9,[x1,#8*2] ++ ldp x10,x11,[x1,#8*4] ++ mov x27,#-8*8 ++ ldp x12,x13,[x1,#8*6] ++ add x1,x1,#8*8 ++ ++Lsqr8x_tail: ++ mul x14,x6,x4 ++ adc x28,xzr,xzr // carry bit, modulo-scheduled ++ mul x15,x7,x4 ++ add x27,x27,#8 ++ mul x16,x8,x4 ++ mul x17,x9,x4 ++ adds x19,x19,x14 ++ mul x14,x10,x4 ++ adcs x20,x20,x15 ++ mul x15,x11,x4 ++ adcs x21,x21,x16 ++ mul x16,x12,x4 ++ adcs x22,x22,x17 ++ mul x17,x13,x4 ++ adcs x23,x23,x14 ++ umulh x14,x6,x4 ++ adcs x24,x24,x15 ++ umulh x15,x7,x4 ++ adcs x25,x25,x16 ++ umulh x16,x8,x4 ++ adcs x26,x26,x17 ++ umulh x17,x9,x4 ++ adc x28,x28,xzr ++ str x19,[x2],#8 ++ adds x19,x20,x14 ++ umulh x14,x10,x4 ++ adcs x20,x21,x15 ++ umulh x15,x11,x4 ++ adcs x21,x22,x16 ++ umulh x16,x12,x4 ++ adcs x22,x23,x17 ++ umulh x17,x13,x4 ++ ldr x4,[x0,x27] ++ adcs x23,x24,x14 ++ adcs x24,x25,x15 ++ adcs x25,x26,x16 ++ adcs x26,x28,x17 ++ //adc x28,xzr,xzr // moved above ++ cbnz x27,Lsqr8x_tail ++ // note that carry flag is guaranteed ++ // to be zero at this point ++ ldp x6,x7,[x2,#8*0] ++ sub x27,x3,x1 // done yet? ++ sub x16,x3,x5 // rewinded np ++ ldp x8,x9,[x2,#8*2] ++ ldp x10,x11,[x2,#8*4] ++ ldp x12,x13,[x2,#8*6] ++ cbz x27,Lsqr8x_tail_break ++ ++ ldr x4,[x0,#-8*8] ++ adds x19,x19,x6 ++ adcs x20,x20,x7 ++ ldp x6,x7,[x1,#8*0] ++ adcs x21,x21,x8 ++ adcs x22,x22,x9 ++ ldp x8,x9,[x1,#8*2] ++ adcs x23,x23,x10 ++ adcs x24,x24,x11 ++ ldp x10,x11,[x1,#8*4] ++ adcs x25,x25,x12 ++ mov x27,#-8*8 ++ adcs x26,x26,x13 ++ ldp x12,x13,[x1,#8*6] ++ add x1,x1,#8*8 ++ //adc x28,xzr,xzr // moved above ++ b Lsqr8x_tail ++ ++.align 4 ++Lsqr8x_tail_break: ++ ldr x4,[x29,#112] // pull n0 ++ add x27,x2,#8*8 // end of current t[num] window ++ ++ subs xzr,x30,#1 // "move" top-most carry to carry bit ++ adcs x14,x19,x6 ++ adcs x15,x20,x7 ++ ldp x19,x20,[x0,#8*0] ++ adcs x21,x21,x8 ++ ldp x6,x7,[x16,#8*0] // recall that x16 is &n[0] ++ adcs x22,x22,x9 ++ ldp x8,x9,[x16,#8*2] ++ adcs x23,x23,x10 ++ adcs x24,x24,x11 ++ ldp x10,x11,[x16,#8*4] ++ adcs x25,x25,x12 ++ adcs x26,x26,x13 ++ ldp x12,x13,[x16,#8*6] ++ add x1,x16,#8*8 ++ adc x30,xzr,xzr // top-most carry ++ mul x28,x4,x19 ++ stp x14,x15,[x2,#8*0] ++ stp x21,x22,[x2,#8*2] ++ ldp x21,x22,[x0,#8*2] ++ stp x23,x24,[x2,#8*4] ++ ldp x23,x24,[x0,#8*4] ++ cmp x27,x29 // did we hit the bottom? ++ stp x25,x26,[x2,#8*6] ++ mov x2,x0 // slide the window ++ ldp x25,x26,[x0,#8*6] ++ mov x27,#8 ++ b.ne Lsqr8x_reduction ++ ++ // Final step. We see if result is larger than modulus, and ++ // if it is, subtract the modulus. But comparison implies ++ // subtraction. So we subtract modulus, see if it borrowed, ++ // and conditionally copy original value. ++ ldr x0,[x29,#96] // pull rp ++ add x2,x2,#8*8 ++ subs x14,x19,x6 ++ sbcs x15,x20,x7 ++ sub x27,x5,#8*8 ++ mov x3,x0 // x0 copy ++ ++Lsqr8x_sub: ++ sbcs x16,x21,x8 ++ ldp x6,x7,[x1,#8*0] ++ sbcs x17,x22,x9 ++ stp x14,x15,[x0,#8*0] ++ sbcs x14,x23,x10 ++ ldp x8,x9,[x1,#8*2] ++ sbcs x15,x24,x11 ++ stp x16,x17,[x0,#8*2] ++ sbcs x16,x25,x12 ++ ldp x10,x11,[x1,#8*4] ++ sbcs x17,x26,x13 ++ ldp x12,x13,[x1,#8*6] ++ add x1,x1,#8*8 ++ ldp x19,x20,[x2,#8*0] ++ sub x27,x27,#8*8 ++ ldp x21,x22,[x2,#8*2] ++ ldp x23,x24,[x2,#8*4] ++ ldp x25,x26,[x2,#8*6] ++ add x2,x2,#8*8 ++ stp x14,x15,[x0,#8*4] ++ sbcs x14,x19,x6 ++ stp x16,x17,[x0,#8*6] ++ add x0,x0,#8*8 ++ sbcs x15,x20,x7 ++ cbnz x27,Lsqr8x_sub ++ ++ sbcs x16,x21,x8 ++ mov x2,sp ++ add x1,sp,x5 ++ ldp x6,x7,[x3,#8*0] ++ sbcs x17,x22,x9 ++ stp x14,x15,[x0,#8*0] ++ sbcs x14,x23,x10 ++ ldp x8,x9,[x3,#8*2] ++ sbcs x15,x24,x11 ++ stp x16,x17,[x0,#8*2] ++ sbcs x16,x25,x12 ++ ldp x19,x20,[x1,#8*0] ++ sbcs x17,x26,x13 ++ ldp x21,x22,[x1,#8*2] ++ sbcs xzr,x30,xzr // did it borrow? ++ ldr x30,[x29,#8] // pull return address ++ stp x14,x15,[x0,#8*4] ++ stp x16,x17,[x0,#8*6] ++ ++ sub x27,x5,#8*4 ++Lsqr4x_cond_copy: ++ sub x27,x27,#8*4 ++ csel x14,x19,x6,lo ++ stp xzr,xzr,[x2,#8*0] ++ csel x15,x20,x7,lo ++ ldp x6,x7,[x3,#8*4] ++ ldp x19,x20,[x1,#8*4] ++ csel x16,x21,x8,lo ++ stp xzr,xzr,[x2,#8*2] ++ add x2,x2,#8*4 ++ csel x17,x22,x9,lo ++ ldp x8,x9,[x3,#8*6] ++ ldp x21,x22,[x1,#8*6] ++ add x1,x1,#8*4 ++ stp x14,x15,[x3,#8*0] ++ stp x16,x17,[x3,#8*2] ++ add x3,x3,#8*4 ++ stp xzr,xzr,[x1,#8*0] ++ stp xzr,xzr,[x1,#8*2] ++ cbnz x27,Lsqr4x_cond_copy ++ ++ csel x14,x19,x6,lo ++ stp xzr,xzr,[x2,#8*0] ++ csel x15,x20,x7,lo ++ stp xzr,xzr,[x2,#8*2] ++ csel x16,x21,x8,lo ++ csel x17,x22,x9,lo ++ stp x14,x15,[x3,#8*0] ++ stp x16,x17,[x3,#8*2] ++ ++ b Lsqr8x_done ++ ++.align 4 ++Lsqr8x8_post_condition: ++ adc x28,xzr,xzr ++ ldr x30,[x29,#8] // pull return address ++ // x19-7,x28 hold result, x6-7 hold modulus ++ subs x6,x19,x6 ++ ldr x1,[x29,#96] // pull rp ++ sbcs x7,x20,x7 ++ stp xzr,xzr,[sp,#8*0] ++ sbcs x8,x21,x8 ++ stp xzr,xzr,[sp,#8*2] ++ sbcs x9,x22,x9 ++ stp xzr,xzr,[sp,#8*4] ++ sbcs x10,x23,x10 ++ stp xzr,xzr,[sp,#8*6] ++ sbcs x11,x24,x11 ++ stp xzr,xzr,[sp,#8*8] ++ sbcs x12,x25,x12 ++ stp xzr,xzr,[sp,#8*10] ++ sbcs x13,x26,x13 ++ stp xzr,xzr,[sp,#8*12] ++ sbcs x28,x28,xzr // did it borrow? ++ stp xzr,xzr,[sp,#8*14] ++ ++ // x6-7 hold result-modulus ++ csel x6,x19,x6,lo ++ csel x7,x20,x7,lo ++ csel x8,x21,x8,lo ++ csel x9,x22,x9,lo ++ stp x6,x7,[x1,#8*0] ++ csel x10,x23,x10,lo ++ csel x11,x24,x11,lo ++ stp x8,x9,[x1,#8*2] ++ csel x12,x25,x12,lo ++ csel x13,x26,x13,lo ++ stp x10,x11,[x1,#8*4] ++ stp x12,x13,[x1,#8*6] ++ ++Lsqr8x_done: ++ ldp x19,x20,[x29,#16] ++ mov sp,x29 ++ ldp x21,x22,[x29,#32] ++ mov x0,#1 ++ ldp x23,x24,[x29,#48] ++ ldp x25,x26,[x29,#64] ++ ldp x27,x28,[x29,#80] ++ ldr x29,[sp],#128 ++ // x30 is popped earlier ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++.align 5 ++__bn_mul4x_mont: ++ // Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to ++ // only from bn_mul_mont or __bn_mul8x_mont which have already signed the ++ // return address. ++ stp x29,x30,[sp,#-128]! ++ add x29,sp,#0 ++ stp x19,x20,[sp,#16] ++ stp x21,x22,[sp,#32] ++ stp x23,x24,[sp,#48] ++ stp x25,x26,[sp,#64] ++ stp x27,x28,[sp,#80] ++ ++ sub x26,sp,x5,lsl#3 ++ lsl x5,x5,#3 ++ ldr x4,[x4] // *n0 ++ sub sp,x26,#8*4 // alloca ++ ++ add x10,x2,x5 ++ add x27,x1,x5 ++ stp x0,x10,[x29,#96] // offload rp and &b[num] ++ ++ ldr x24,[x2,#8*0] // b[0] ++ ldp x6,x7,[x1,#8*0] // a[0..3] ++ ldp x8,x9,[x1,#8*2] ++ add x1,x1,#8*4 ++ mov x19,xzr ++ mov x20,xzr ++ mov x21,xzr ++ mov x22,xzr ++ ldp x14,x15,[x3,#8*0] // n[0..3] ++ ldp x16,x17,[x3,#8*2] ++ adds x3,x3,#8*4 // clear carry bit ++ mov x0,xzr ++ mov x28,#0 ++ mov x26,sp ++ ++Loop_mul4x_1st_reduction: ++ mul x10,x6,x24 // lo(a[0..3]*b[0]) ++ adc x0,x0,xzr // modulo-scheduled ++ mul x11,x7,x24 ++ add x28,x28,#8 ++ mul x12,x8,x24 ++ and x28,x28,#31 ++ mul x13,x9,x24 ++ adds x19,x19,x10 ++ umulh x10,x6,x24 // hi(a[0..3]*b[0]) ++ adcs x20,x20,x11 ++ mul x25,x19,x4 // t[0]*n0 ++ adcs x21,x21,x12 ++ umulh x11,x7,x24 ++ adcs x22,x22,x13 ++ umulh x12,x8,x24 ++ adc x23,xzr,xzr ++ umulh x13,x9,x24 ++ ldr x24,[x2,x28] // next b[i] (or b[0]) ++ adds x20,x20,x10 ++ // (*) mul x10,x14,x25 // lo(n[0..3]*t[0]*n0) ++ str x25,[x26],#8 // put aside t[0]*n0 for tail processing ++ adcs x21,x21,x11 ++ mul x11,x15,x25 ++ adcs x22,x22,x12 ++ mul x12,x16,x25 ++ adc x23,x23,x13 // can't overflow ++ mul x13,x17,x25 ++ // (*) adds xzr,x19,x10 ++ subs xzr,x19,#1 // (*) ++ umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0) ++ adcs x19,x20,x11 ++ umulh x11,x15,x25 ++ adcs x20,x21,x12 ++ umulh x12,x16,x25 ++ adcs x21,x22,x13 ++ umulh x13,x17,x25 ++ adcs x22,x23,x0 ++ adc x0,xzr,xzr ++ adds x19,x19,x10 ++ sub x10,x27,x1 ++ adcs x20,x20,x11 ++ adcs x21,x21,x12 ++ adcs x22,x22,x13 ++ //adc x0,x0,xzr ++ cbnz x28,Loop_mul4x_1st_reduction ++ ++ cbz x10,Lmul4x4_post_condition ++ ++ ldp x6,x7,[x1,#8*0] // a[4..7] ++ ldp x8,x9,[x1,#8*2] ++ add x1,x1,#8*4 ++ ldr x25,[sp] // a[0]*n0 ++ ldp x14,x15,[x3,#8*0] // n[4..7] ++ ldp x16,x17,[x3,#8*2] ++ add x3,x3,#8*4 ++ ++Loop_mul4x_1st_tail: ++ mul x10,x6,x24 // lo(a[4..7]*b[i]) ++ adc x0,x0,xzr // modulo-scheduled ++ mul x11,x7,x24 ++ add x28,x28,#8 ++ mul x12,x8,x24 ++ and x28,x28,#31 ++ mul x13,x9,x24 ++ adds x19,x19,x10 ++ umulh x10,x6,x24 // hi(a[4..7]*b[i]) ++ adcs x20,x20,x11 ++ umulh x11,x7,x24 ++ adcs x21,x21,x12 ++ umulh x12,x8,x24 ++ adcs x22,x22,x13 ++ umulh x13,x9,x24 ++ adc x23,xzr,xzr ++ ldr x24,[x2,x28] // next b[i] (or b[0]) ++ adds x20,x20,x10 ++ mul x10,x14,x25 // lo(n[4..7]*a[0]*n0) ++ adcs x21,x21,x11 ++ mul x11,x15,x25 ++ adcs x22,x22,x12 ++ mul x12,x16,x25 ++ adc x23,x23,x13 // can't overflow ++ mul x13,x17,x25 ++ adds x19,x19,x10 ++ umulh x10,x14,x25 // hi(n[4..7]*a[0]*n0) ++ adcs x20,x20,x11 ++ umulh x11,x15,x25 ++ adcs x21,x21,x12 ++ umulh x12,x16,x25 ++ adcs x22,x22,x13 ++ adcs x23,x23,x0 ++ umulh x13,x17,x25 ++ adc x0,xzr,xzr ++ ldr x25,[sp,x28] // next t[0]*n0 ++ str x19,[x26],#8 // result!!! ++ adds x19,x20,x10 ++ sub x10,x27,x1 // done yet? ++ adcs x20,x21,x11 ++ adcs x21,x22,x12 ++ adcs x22,x23,x13 ++ //adc x0,x0,xzr ++ cbnz x28,Loop_mul4x_1st_tail ++ ++ sub x11,x27,x5 // rewinded x1 ++ cbz x10,Lmul4x_proceed ++ ++ ldp x6,x7,[x1,#8*0] ++ ldp x8,x9,[x1,#8*2] ++ add x1,x1,#8*4 ++ ldp x14,x15,[x3,#8*0] ++ ldp x16,x17,[x3,#8*2] ++ add x3,x3,#8*4 ++ b Loop_mul4x_1st_tail ++ ++.align 5 ++Lmul4x_proceed: ++ ldr x24,[x2,#8*4]! // *++b ++ adc x30,x0,xzr ++ ldp x6,x7,[x11,#8*0] // a[0..3] ++ sub x3,x3,x5 // rewind np ++ ldp x8,x9,[x11,#8*2] ++ add x1,x11,#8*4 ++ ++ stp x19,x20,[x26,#8*0] // result!!! ++ ldp x19,x20,[sp,#8*4] // t[0..3] ++ stp x21,x22,[x26,#8*2] // result!!! ++ ldp x21,x22,[sp,#8*6] ++ ++ ldp x14,x15,[x3,#8*0] // n[0..3] ++ mov x26,sp ++ ldp x16,x17,[x3,#8*2] ++ adds x3,x3,#8*4 // clear carry bit ++ mov x0,xzr ++ ++.align 4 ++Loop_mul4x_reduction: ++ mul x10,x6,x24 // lo(a[0..3]*b[4]) ++ adc x0,x0,xzr // modulo-scheduled ++ mul x11,x7,x24 ++ add x28,x28,#8 ++ mul x12,x8,x24 ++ and x28,x28,#31 ++ mul x13,x9,x24 ++ adds x19,x19,x10 ++ umulh x10,x6,x24 // hi(a[0..3]*b[4]) ++ adcs x20,x20,x11 ++ mul x25,x19,x4 // t[0]*n0 ++ adcs x21,x21,x12 ++ umulh x11,x7,x24 ++ adcs x22,x22,x13 ++ umulh x12,x8,x24 ++ adc x23,xzr,xzr ++ umulh x13,x9,x24 ++ ldr x24,[x2,x28] // next b[i] ++ adds x20,x20,x10 ++ // (*) mul x10,x14,x25 ++ str x25,[x26],#8 // put aside t[0]*n0 for tail processing ++ adcs x21,x21,x11 ++ mul x11,x15,x25 // lo(n[0..3]*t[0]*n0 ++ adcs x22,x22,x12 ++ mul x12,x16,x25 ++ adc x23,x23,x13 // can't overflow ++ mul x13,x17,x25 ++ // (*) adds xzr,x19,x10 ++ subs xzr,x19,#1 // (*) ++ umulh x10,x14,x25 // hi(n[0..3]*t[0]*n0 ++ adcs x19,x20,x11 ++ umulh x11,x15,x25 ++ adcs x20,x21,x12 ++ umulh x12,x16,x25 ++ adcs x21,x22,x13 ++ umulh x13,x17,x25 ++ adcs x22,x23,x0 ++ adc x0,xzr,xzr ++ adds x19,x19,x10 ++ adcs x20,x20,x11 ++ adcs x21,x21,x12 ++ adcs x22,x22,x13 ++ //adc x0,x0,xzr ++ cbnz x28,Loop_mul4x_reduction ++ ++ adc x0,x0,xzr ++ ldp x10,x11,[x26,#8*4] // t[4..7] ++ ldp x12,x13,[x26,#8*6] ++ ldp x6,x7,[x1,#8*0] // a[4..7] ++ ldp x8,x9,[x1,#8*2] ++ add x1,x1,#8*4 ++ adds x19,x19,x10 ++ adcs x20,x20,x11 ++ adcs x21,x21,x12 ++ adcs x22,x22,x13 ++ //adc x0,x0,xzr ++ ++ ldr x25,[sp] // t[0]*n0 ++ ldp x14,x15,[x3,#8*0] // n[4..7] ++ ldp x16,x17,[x3,#8*2] ++ add x3,x3,#8*4 ++ ++.align 4 ++Loop_mul4x_tail: ++ mul x10,x6,x24 // lo(a[4..7]*b[4]) ++ adc x0,x0,xzr // modulo-scheduled ++ mul x11,x7,x24 ++ add x28,x28,#8 ++ mul x12,x8,x24 ++ and x28,x28,#31 ++ mul x13,x9,x24 ++ adds x19,x19,x10 ++ umulh x10,x6,x24 // hi(a[4..7]*b[4]) ++ adcs x20,x20,x11 ++ umulh x11,x7,x24 ++ adcs x21,x21,x12 ++ umulh x12,x8,x24 ++ adcs x22,x22,x13 ++ umulh x13,x9,x24 ++ adc x23,xzr,xzr ++ ldr x24,[x2,x28] // next b[i] ++ adds x20,x20,x10 ++ mul x10,x14,x25 // lo(n[4..7]*t[0]*n0) ++ adcs x21,x21,x11 ++ mul x11,x15,x25 ++ adcs x22,x22,x12 ++ mul x12,x16,x25 ++ adc x23,x23,x13 // can't overflow ++ mul x13,x17,x25 ++ adds x19,x19,x10 ++ umulh x10,x14,x25 // hi(n[4..7]*t[0]*n0) ++ adcs x20,x20,x11 ++ umulh x11,x15,x25 ++ adcs x21,x21,x12 ++ umulh x12,x16,x25 ++ adcs x22,x22,x13 ++ umulh x13,x17,x25 ++ adcs x23,x23,x0 ++ ldr x25,[sp,x28] // next a[0]*n0 ++ adc x0,xzr,xzr ++ str x19,[x26],#8 // result!!! ++ adds x19,x20,x10 ++ sub x10,x27,x1 // done yet? ++ adcs x20,x21,x11 ++ adcs x21,x22,x12 ++ adcs x22,x23,x13 ++ //adc x0,x0,xzr ++ cbnz x28,Loop_mul4x_tail ++ ++ sub x11,x3,x5 // rewinded np? ++ adc x0,x0,xzr ++ cbz x10,Loop_mul4x_break ++ ++ ldp x10,x11,[x26,#8*4] ++ ldp x12,x13,[x26,#8*6] ++ ldp x6,x7,[x1,#8*0] ++ ldp x8,x9,[x1,#8*2] ++ add x1,x1,#8*4 ++ adds x19,x19,x10 ++ adcs x20,x20,x11 ++ adcs x21,x21,x12 ++ adcs x22,x22,x13 ++ //adc x0,x0,xzr ++ ldp x14,x15,[x3,#8*0] ++ ldp x16,x17,[x3,#8*2] ++ add x3,x3,#8*4 ++ b Loop_mul4x_tail ++ ++.align 4 ++Loop_mul4x_break: ++ ldp x12,x13,[x29,#96] // pull rp and &b[num] ++ adds x19,x19,x30 ++ add x2,x2,#8*4 // bp++ ++ adcs x20,x20,xzr ++ sub x1,x1,x5 // rewind ap ++ adcs x21,x21,xzr ++ stp x19,x20,[x26,#8*0] // result!!! ++ adcs x22,x22,xzr ++ ldp x19,x20,[sp,#8*4] // t[0..3] ++ adc x30,x0,xzr ++ stp x21,x22,[x26,#8*2] // result!!! ++ cmp x2,x13 // done yet? ++ ldp x21,x22,[sp,#8*6] ++ ldp x14,x15,[x11,#8*0] // n[0..3] ++ ldp x16,x17,[x11,#8*2] ++ add x3,x11,#8*4 ++ b.eq Lmul4x_post ++ ++ ldr x24,[x2] ++ ldp x6,x7,[x1,#8*0] // a[0..3] ++ ldp x8,x9,[x1,#8*2] ++ adds x1,x1,#8*4 // clear carry bit ++ mov x0,xzr ++ mov x26,sp ++ b Loop_mul4x_reduction ++ ++.align 4 ++Lmul4x_post: ++ // Final step. We see if result is larger than modulus, and ++ // if it is, subtract the modulus. But comparison implies ++ // subtraction. So we subtract modulus, see if it borrowed, ++ // and conditionally copy original value. ++ mov x0,x12 ++ mov x27,x12 // x0 copy ++ subs x10,x19,x14 ++ add x26,sp,#8*8 ++ sbcs x11,x20,x15 ++ sub x28,x5,#8*4 ++ ++Lmul4x_sub: ++ sbcs x12,x21,x16 ++ ldp x14,x15,[x3,#8*0] ++ sub x28,x28,#8*4 ++ ldp x19,x20,[x26,#8*0] ++ sbcs x13,x22,x17 ++ ldp x16,x17,[x3,#8*2] ++ add x3,x3,#8*4 ++ ldp x21,x22,[x26,#8*2] ++ add x26,x26,#8*4 ++ stp x10,x11,[x0,#8*0] ++ sbcs x10,x19,x14 ++ stp x12,x13,[x0,#8*2] ++ add x0,x0,#8*4 ++ sbcs x11,x20,x15 ++ cbnz x28,Lmul4x_sub ++ ++ sbcs x12,x21,x16 ++ mov x26,sp ++ add x1,sp,#8*4 ++ ldp x6,x7,[x27,#8*0] ++ sbcs x13,x22,x17 ++ stp x10,x11,[x0,#8*0] ++ ldp x8,x9,[x27,#8*2] ++ stp x12,x13,[x0,#8*2] ++ ldp x19,x20,[x1,#8*0] ++ ldp x21,x22,[x1,#8*2] ++ sbcs xzr,x30,xzr // did it borrow? ++ ldr x30,[x29,#8] // pull return address ++ ++ sub x28,x5,#8*4 ++Lmul4x_cond_copy: ++ sub x28,x28,#8*4 ++ csel x10,x19,x6,lo ++ stp xzr,xzr,[x26,#8*0] ++ csel x11,x20,x7,lo ++ ldp x6,x7,[x27,#8*4] ++ ldp x19,x20,[x1,#8*4] ++ csel x12,x21,x8,lo ++ stp xzr,xzr,[x26,#8*2] ++ add x26,x26,#8*4 ++ csel x13,x22,x9,lo ++ ldp x8,x9,[x27,#8*6] ++ ldp x21,x22,[x1,#8*6] ++ add x1,x1,#8*4 ++ stp x10,x11,[x27,#8*0] ++ stp x12,x13,[x27,#8*2] ++ add x27,x27,#8*4 ++ cbnz x28,Lmul4x_cond_copy ++ ++ csel x10,x19,x6,lo ++ stp xzr,xzr,[x26,#8*0] ++ csel x11,x20,x7,lo ++ stp xzr,xzr,[x26,#8*2] ++ csel x12,x21,x8,lo ++ stp xzr,xzr,[x26,#8*3] ++ csel x13,x22,x9,lo ++ stp xzr,xzr,[x26,#8*4] ++ stp x10,x11,[x27,#8*0] ++ stp x12,x13,[x27,#8*2] ++ ++ b Lmul4x_done ++ ++.align 4 ++Lmul4x4_post_condition: ++ adc x0,x0,xzr ++ ldr x1,[x29,#96] // pull rp ++ // x19-3,x0 hold result, x14-7 hold modulus ++ subs x6,x19,x14 ++ ldr x30,[x29,#8] // pull return address ++ sbcs x7,x20,x15 ++ stp xzr,xzr,[sp,#8*0] ++ sbcs x8,x21,x16 ++ stp xzr,xzr,[sp,#8*2] ++ sbcs x9,x22,x17 ++ stp xzr,xzr,[sp,#8*4] ++ sbcs xzr,x0,xzr // did it borrow? ++ stp xzr,xzr,[sp,#8*6] ++ ++ // x6-3 hold result-modulus ++ csel x6,x19,x6,lo ++ csel x7,x20,x7,lo ++ csel x8,x21,x8,lo ++ csel x9,x22,x9,lo ++ stp x6,x7,[x1,#8*0] ++ stp x8,x9,[x1,#8*2] ++ ++Lmul4x_done: ++ ldp x19,x20,[x29,#16] ++ mov sp,x29 ++ ldp x21,x22,[x29,#32] ++ mov x0,#1 ++ ldp x23,x24,[x29,#48] ++ ldp x25,x26,[x29,#64] ++ ldp x27,x28,[x29,#80] ++ ldr x29,[sp],#128 ++ // x30 is popped earlier ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 4 ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-aarch64/crypto/fipsmodule/ghash-neon-armv8.S b/apple-aarch64/crypto/fipsmodule/ghash-neon-armv8.S +new file mode 100644 +index 0000000..5441afc +--- /dev/null ++++ b/apple-aarch64/crypto/fipsmodule/ghash-neon-armv8.S +@@ -0,0 +1,343 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++.text ++ ++.globl _gcm_init_neon ++.private_extern _gcm_init_neon ++ ++.align 4 ++_gcm_init_neon: ++ AARCH64_VALID_CALL_TARGET ++ // This function is adapted from gcm_init_v8. xC2 is t3. ++ ld1 {v17.2d}, [x1] // load H ++ movi v19.16b, #0xe1 ++ shl v19.2d, v19.2d, #57 // 0xc2.0 ++ ext v3.16b, v17.16b, v17.16b, #8 ++ ushr v18.2d, v19.2d, #63 ++ dup v17.4s, v17.s[1] ++ ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01 ++ ushr v18.2d, v3.2d, #63 ++ sshr v17.4s, v17.4s, #31 // broadcast carry bit ++ and v18.16b, v18.16b, v16.16b ++ shl v3.2d, v3.2d, #1 ++ ext v18.16b, v18.16b, v18.16b, #8 ++ and v16.16b, v16.16b, v17.16b ++ orr v3.16b, v3.16b, v18.16b // H<<<=1 ++ eor v5.16b, v3.16b, v16.16b // twisted H ++ st1 {v5.2d}, [x0] // store Htable[0] ++ ret ++ ++ ++.globl _gcm_gmult_neon ++.private_extern _gcm_gmult_neon ++ ++.align 4 ++_gcm_gmult_neon: ++ AARCH64_VALID_CALL_TARGET ++ ld1 {v3.16b}, [x0] // load Xi ++ ld1 {v5.1d}, [x1], #8 // load twisted H ++ ld1 {v6.1d}, [x1] ++ adrp x9, Lmasks@PAGE // load constants ++ add x9, x9, Lmasks@PAGEOFF ++ ld1 {v24.2d, v25.2d}, [x9] ++ rev64 v3.16b, v3.16b // byteswap Xi ++ ext v3.16b, v3.16b, v3.16b, #8 ++ eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing ++ ++ mov x3, #16 ++ b Lgmult_neon ++ ++ ++.globl _gcm_ghash_neon ++.private_extern _gcm_ghash_neon ++ ++.align 4 ++_gcm_ghash_neon: ++ AARCH64_VALID_CALL_TARGET ++ ld1 {v0.16b}, [x0] // load Xi ++ ld1 {v5.1d}, [x1], #8 // load twisted H ++ ld1 {v6.1d}, [x1] ++ adrp x9, Lmasks@PAGE // load constants ++ add x9, x9, Lmasks@PAGEOFF ++ ld1 {v24.2d, v25.2d}, [x9] ++ rev64 v0.16b, v0.16b // byteswap Xi ++ ext v0.16b, v0.16b, v0.16b, #8 ++ eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing ++ ++Loop_neon: ++ ld1 {v3.16b}, [x2], #16 // load inp ++ rev64 v3.16b, v3.16b // byteswap inp ++ ext v3.16b, v3.16b, v3.16b, #8 ++ eor v3.16b, v3.16b, v0.16b // inp ^= Xi ++ ++Lgmult_neon: ++ // Split the input into v3 and v4. (The upper halves are unused, ++ // so it is okay to leave them alone.) ++ ins v4.d[0], v3.d[1] ++ ext v16.8b, v5.8b, v5.8b, #1 // A1 ++ pmull v16.8h, v16.8b, v3.8b // F = A1*B ++ ext v0.8b, v3.8b, v3.8b, #1 // B1 ++ pmull v0.8h, v5.8b, v0.8b // E = A*B1 ++ ext v17.8b, v5.8b, v5.8b, #2 // A2 ++ pmull v17.8h, v17.8b, v3.8b // H = A2*B ++ ext v19.8b, v3.8b, v3.8b, #2 // B2 ++ pmull v19.8h, v5.8b, v19.8b // G = A*B2 ++ ext v18.8b, v5.8b, v5.8b, #3 // A3 ++ eor v16.16b, v16.16b, v0.16b // L = E + F ++ pmull v18.8h, v18.8b, v3.8b // J = A3*B ++ ext v0.8b, v3.8b, v3.8b, #3 // B3 ++ eor v17.16b, v17.16b, v19.16b // M = G + H ++ pmull v0.8h, v5.8b, v0.8b // I = A*B3 ++ ++ // Here we diverge from the 32-bit version. It computes the following ++ // (instructions reordered for clarity): ++ // ++ // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) ++ // vand $t0#hi, $t0#hi, $k48 ++ // veor $t0#lo, $t0#lo, $t0#hi ++ // ++ // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) ++ // vand $t1#hi, $t1#hi, $k32 ++ // veor $t1#lo, $t1#lo, $t1#hi ++ // ++ // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) ++ // vand $t2#hi, $t2#hi, $k16 ++ // veor $t2#lo, $t2#lo, $t2#hi ++ // ++ // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) ++ // vmov.i64 $t3#hi, #0 ++ // ++ // $kN is a mask with the bottom N bits set. AArch64 cannot compute on ++ // upper halves of SIMD registers, so we must split each half into ++ // separate registers. To compensate, we pair computations up and ++ // parallelize. ++ ++ ext v19.8b, v3.8b, v3.8b, #4 // B4 ++ eor v18.16b, v18.16b, v0.16b // N = I + J ++ pmull v19.8h, v5.8b, v19.8b // K = A*B4 ++ ++ // This can probably be scheduled more efficiently. For now, we just ++ // pair up independent instructions. ++ zip1 v20.2d, v16.2d, v17.2d ++ zip1 v22.2d, v18.2d, v19.2d ++ zip2 v21.2d, v16.2d, v17.2d ++ zip2 v23.2d, v18.2d, v19.2d ++ eor v20.16b, v20.16b, v21.16b ++ eor v22.16b, v22.16b, v23.16b ++ and v21.16b, v21.16b, v24.16b ++ and v23.16b, v23.16b, v25.16b ++ eor v20.16b, v20.16b, v21.16b ++ eor v22.16b, v22.16b, v23.16b ++ zip1 v16.2d, v20.2d, v21.2d ++ zip1 v18.2d, v22.2d, v23.2d ++ zip2 v17.2d, v20.2d, v21.2d ++ zip2 v19.2d, v22.2d, v23.2d ++ ++ ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 ++ ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 ++ pmull v0.8h, v5.8b, v3.8b // D = A*B ++ ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 ++ ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 ++ eor v16.16b, v16.16b, v17.16b ++ eor v18.16b, v18.16b, v19.16b ++ eor v0.16b, v0.16b, v16.16b ++ eor v0.16b, v0.16b, v18.16b ++ eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing ++ ext v16.8b, v7.8b, v7.8b, #1 // A1 ++ pmull v16.8h, v16.8b, v3.8b // F = A1*B ++ ext v1.8b, v3.8b, v3.8b, #1 // B1 ++ pmull v1.8h, v7.8b, v1.8b // E = A*B1 ++ ext v17.8b, v7.8b, v7.8b, #2 // A2 ++ pmull v17.8h, v17.8b, v3.8b // H = A2*B ++ ext v19.8b, v3.8b, v3.8b, #2 // B2 ++ pmull v19.8h, v7.8b, v19.8b // G = A*B2 ++ ext v18.8b, v7.8b, v7.8b, #3 // A3 ++ eor v16.16b, v16.16b, v1.16b // L = E + F ++ pmull v18.8h, v18.8b, v3.8b // J = A3*B ++ ext v1.8b, v3.8b, v3.8b, #3 // B3 ++ eor v17.16b, v17.16b, v19.16b // M = G + H ++ pmull v1.8h, v7.8b, v1.8b // I = A*B3 ++ ++ // Here we diverge from the 32-bit version. It computes the following ++ // (instructions reordered for clarity): ++ // ++ // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) ++ // vand $t0#hi, $t0#hi, $k48 ++ // veor $t0#lo, $t0#lo, $t0#hi ++ // ++ // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) ++ // vand $t1#hi, $t1#hi, $k32 ++ // veor $t1#lo, $t1#lo, $t1#hi ++ // ++ // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) ++ // vand $t2#hi, $t2#hi, $k16 ++ // veor $t2#lo, $t2#lo, $t2#hi ++ // ++ // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) ++ // vmov.i64 $t3#hi, #0 ++ // ++ // $kN is a mask with the bottom N bits set. AArch64 cannot compute on ++ // upper halves of SIMD registers, so we must split each half into ++ // separate registers. To compensate, we pair computations up and ++ // parallelize. ++ ++ ext v19.8b, v3.8b, v3.8b, #4 // B4 ++ eor v18.16b, v18.16b, v1.16b // N = I + J ++ pmull v19.8h, v7.8b, v19.8b // K = A*B4 ++ ++ // This can probably be scheduled more efficiently. For now, we just ++ // pair up independent instructions. ++ zip1 v20.2d, v16.2d, v17.2d ++ zip1 v22.2d, v18.2d, v19.2d ++ zip2 v21.2d, v16.2d, v17.2d ++ zip2 v23.2d, v18.2d, v19.2d ++ eor v20.16b, v20.16b, v21.16b ++ eor v22.16b, v22.16b, v23.16b ++ and v21.16b, v21.16b, v24.16b ++ and v23.16b, v23.16b, v25.16b ++ eor v20.16b, v20.16b, v21.16b ++ eor v22.16b, v22.16b, v23.16b ++ zip1 v16.2d, v20.2d, v21.2d ++ zip1 v18.2d, v22.2d, v23.2d ++ zip2 v17.2d, v20.2d, v21.2d ++ zip2 v19.2d, v22.2d, v23.2d ++ ++ ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 ++ ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 ++ pmull v1.8h, v7.8b, v3.8b // D = A*B ++ ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 ++ ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 ++ eor v16.16b, v16.16b, v17.16b ++ eor v18.16b, v18.16b, v19.16b ++ eor v1.16b, v1.16b, v16.16b ++ eor v1.16b, v1.16b, v18.16b ++ ext v16.8b, v6.8b, v6.8b, #1 // A1 ++ pmull v16.8h, v16.8b, v4.8b // F = A1*B ++ ext v2.8b, v4.8b, v4.8b, #1 // B1 ++ pmull v2.8h, v6.8b, v2.8b // E = A*B1 ++ ext v17.8b, v6.8b, v6.8b, #2 // A2 ++ pmull v17.8h, v17.8b, v4.8b // H = A2*B ++ ext v19.8b, v4.8b, v4.8b, #2 // B2 ++ pmull v19.8h, v6.8b, v19.8b // G = A*B2 ++ ext v18.8b, v6.8b, v6.8b, #3 // A3 ++ eor v16.16b, v16.16b, v2.16b // L = E + F ++ pmull v18.8h, v18.8b, v4.8b // J = A3*B ++ ext v2.8b, v4.8b, v4.8b, #3 // B3 ++ eor v17.16b, v17.16b, v19.16b // M = G + H ++ pmull v2.8h, v6.8b, v2.8b // I = A*B3 ++ ++ // Here we diverge from the 32-bit version. It computes the following ++ // (instructions reordered for clarity): ++ // ++ // veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L) ++ // vand $t0#hi, $t0#hi, $k48 ++ // veor $t0#lo, $t0#lo, $t0#hi ++ // ++ // veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M) ++ // vand $t1#hi, $t1#hi, $k32 ++ // veor $t1#lo, $t1#lo, $t1#hi ++ // ++ // veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N) ++ // vand $t2#hi, $t2#hi, $k16 ++ // veor $t2#lo, $t2#lo, $t2#hi ++ // ++ // veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K) ++ // vmov.i64 $t3#hi, #0 ++ // ++ // $kN is a mask with the bottom N bits set. AArch64 cannot compute on ++ // upper halves of SIMD registers, so we must split each half into ++ // separate registers. To compensate, we pair computations up and ++ // parallelize. ++ ++ ext v19.8b, v4.8b, v4.8b, #4 // B4 ++ eor v18.16b, v18.16b, v2.16b // N = I + J ++ pmull v19.8h, v6.8b, v19.8b // K = A*B4 ++ ++ // This can probably be scheduled more efficiently. For now, we just ++ // pair up independent instructions. ++ zip1 v20.2d, v16.2d, v17.2d ++ zip1 v22.2d, v18.2d, v19.2d ++ zip2 v21.2d, v16.2d, v17.2d ++ zip2 v23.2d, v18.2d, v19.2d ++ eor v20.16b, v20.16b, v21.16b ++ eor v22.16b, v22.16b, v23.16b ++ and v21.16b, v21.16b, v24.16b ++ and v23.16b, v23.16b, v25.16b ++ eor v20.16b, v20.16b, v21.16b ++ eor v22.16b, v22.16b, v23.16b ++ zip1 v16.2d, v20.2d, v21.2d ++ zip1 v18.2d, v22.2d, v23.2d ++ zip2 v17.2d, v20.2d, v21.2d ++ zip2 v19.2d, v22.2d, v23.2d ++ ++ ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8 ++ ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16 ++ pmull v2.8h, v6.8b, v4.8b // D = A*B ++ ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32 ++ ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24 ++ eor v16.16b, v16.16b, v17.16b ++ eor v18.16b, v18.16b, v19.16b ++ eor v2.16b, v2.16b, v16.16b ++ eor v2.16b, v2.16b, v18.16b ++ ext v16.16b, v0.16b, v2.16b, #8 ++ eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing ++ eor v1.16b, v1.16b, v2.16b ++ eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi ++ ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result ++ // This is a no-op due to the ins instruction below. ++ // ins v2.d[0], v1.d[1] ++ ++ // equivalent of reduction_avx from ghash-x86_64.pl ++ shl v17.2d, v0.2d, #57 // 1st phase ++ shl v18.2d, v0.2d, #62 ++ eor v18.16b, v18.16b, v17.16b // ++ shl v17.2d, v0.2d, #63 ++ eor v18.16b, v18.16b, v17.16b // ++ // Note Xm contains {Xl.d[1], Xh.d[0]}. ++ eor v18.16b, v18.16b, v1.16b ++ ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0] ++ ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1] ++ ++ ushr v18.2d, v0.2d, #1 // 2nd phase ++ eor v2.16b, v2.16b,v0.16b ++ eor v0.16b, v0.16b,v18.16b // ++ ushr v18.2d, v18.2d, #6 ++ ushr v0.2d, v0.2d, #1 // ++ eor v0.16b, v0.16b, v2.16b // ++ eor v0.16b, v0.16b, v18.16b // ++ ++ subs x3, x3, #16 ++ bne Loop_neon ++ ++ rev64 v0.16b, v0.16b // byteswap Xi and write ++ ext v0.16b, v0.16b, v0.16b, #8 ++ st1 {v0.16b}, [x0] ++ ++ ret ++ ++ ++.section __TEXT,__const ++.align 4 ++Lmasks: ++.quad 0x0000ffffffffffff // k48 ++.quad 0x00000000ffffffff // k32 ++.quad 0x000000000000ffff // k16 ++.quad 0x0000000000000000 // k0 ++.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 2 ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-aarch64/crypto/fipsmodule/ghashv8-armx64.S b/apple-aarch64/crypto/fipsmodule/ghashv8-armx64.S +new file mode 100644 +index 0000000..0ba0cdd +--- /dev/null ++++ b/apple-aarch64/crypto/fipsmodule/ghashv8-armx64.S +@@ -0,0 +1,573 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++#if __ARM_MAX_ARCH__>=7 ++.text ++ ++.globl _gcm_init_v8 ++.private_extern _gcm_init_v8 ++ ++.align 4 ++_gcm_init_v8: ++ AARCH64_VALID_CALL_TARGET ++ ld1 {v17.2d},[x1] //load input H ++ movi v19.16b,#0xe1 ++ shl v19.2d,v19.2d,#57 //0xc2.0 ++ ext v3.16b,v17.16b,v17.16b,#8 ++ ushr v18.2d,v19.2d,#63 ++ dup v17.4s,v17.s[1] ++ ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01 ++ ushr v18.2d,v3.2d,#63 ++ sshr v17.4s,v17.4s,#31 //broadcast carry bit ++ and v18.16b,v18.16b,v16.16b ++ shl v3.2d,v3.2d,#1 ++ ext v18.16b,v18.16b,v18.16b,#8 ++ and v16.16b,v16.16b,v17.16b ++ orr v3.16b,v3.16b,v18.16b //H<<<=1 ++ eor v20.16b,v3.16b,v16.16b //twisted H ++ st1 {v20.2d},[x0],#16 //store Htable[0] ++ ++ //calculate H^2 ++ ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing ++ pmull v0.1q,v20.1d,v20.1d ++ eor v16.16b,v16.16b,v20.16b ++ pmull2 v2.1q,v20.2d,v20.2d ++ pmull v1.1q,v16.1d,v16.1d ++ ++ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing ++ eor v18.16b,v0.16b,v2.16b ++ eor v1.16b,v1.16b,v17.16b ++ eor v1.16b,v1.16b,v18.16b ++ pmull v18.1q,v0.1d,v19.1d //1st phase ++ ++ ins v2.d[0],v1.d[1] ++ ins v1.d[1],v0.d[0] ++ eor v0.16b,v1.16b,v18.16b ++ ++ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase ++ pmull v0.1q,v0.1d,v19.1d ++ eor v18.16b,v18.16b,v2.16b ++ eor v22.16b,v0.16b,v18.16b ++ ++ ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing ++ eor v17.16b,v17.16b,v22.16b ++ ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed ++ st1 {v21.2d,v22.2d},[x0],#32 //store Htable[1..2] ++ //calculate H^3 and H^4 ++ pmull v0.1q,v20.1d, v22.1d ++ pmull v5.1q,v22.1d,v22.1d ++ pmull2 v2.1q,v20.2d, v22.2d ++ pmull2 v7.1q,v22.2d,v22.2d ++ pmull v1.1q,v16.1d,v17.1d ++ pmull v6.1q,v17.1d,v17.1d ++ ++ ext v16.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing ++ ext v17.16b,v5.16b,v7.16b,#8 ++ eor v18.16b,v0.16b,v2.16b ++ eor v1.16b,v1.16b,v16.16b ++ eor v4.16b,v5.16b,v7.16b ++ eor v6.16b,v6.16b,v17.16b ++ eor v1.16b,v1.16b,v18.16b ++ pmull v18.1q,v0.1d,v19.1d //1st phase ++ eor v6.16b,v6.16b,v4.16b ++ pmull v4.1q,v5.1d,v19.1d ++ ++ ins v2.d[0],v1.d[1] ++ ins v7.d[0],v6.d[1] ++ ins v1.d[1],v0.d[0] ++ ins v6.d[1],v5.d[0] ++ eor v0.16b,v1.16b,v18.16b ++ eor v5.16b,v6.16b,v4.16b ++ ++ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase ++ ext v4.16b,v5.16b,v5.16b,#8 ++ pmull v0.1q,v0.1d,v19.1d ++ pmull v5.1q,v5.1d,v19.1d ++ eor v18.16b,v18.16b,v2.16b ++ eor v4.16b,v4.16b,v7.16b ++ eor v20.16b, v0.16b,v18.16b //H^3 ++ eor v22.16b,v5.16b,v4.16b //H^4 ++ ++ ext v16.16b,v20.16b, v20.16b,#8 //Karatsuba pre-processing ++ ext v17.16b,v22.16b,v22.16b,#8 ++ eor v16.16b,v16.16b,v20.16b ++ eor v17.16b,v17.16b,v22.16b ++ ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed ++ st1 {v20.2d,v21.2d,v22.2d},[x0] //store Htable[3..5] ++ ret ++ ++.globl _gcm_gmult_v8 ++.private_extern _gcm_gmult_v8 ++ ++.align 4 ++_gcm_gmult_v8: ++ AARCH64_VALID_CALL_TARGET ++ ld1 {v17.2d},[x0] //load Xi ++ movi v19.16b,#0xe1 ++ ld1 {v20.2d,v21.2d},[x1] //load twisted H, ... ++ shl v19.2d,v19.2d,#57 ++#ifndef __AARCH64EB__ ++ rev64 v17.16b,v17.16b ++#endif ++ ext v3.16b,v17.16b,v17.16b,#8 ++ ++ pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo ++ eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing ++ pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi ++ pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) ++ ++ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing ++ eor v18.16b,v0.16b,v2.16b ++ eor v1.16b,v1.16b,v17.16b ++ eor v1.16b,v1.16b,v18.16b ++ pmull v18.1q,v0.1d,v19.1d //1st phase of reduction ++ ++ ins v2.d[0],v1.d[1] ++ ins v1.d[1],v0.d[0] ++ eor v0.16b,v1.16b,v18.16b ++ ++ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction ++ pmull v0.1q,v0.1d,v19.1d ++ eor v18.16b,v18.16b,v2.16b ++ eor v0.16b,v0.16b,v18.16b ++ ++#ifndef __AARCH64EB__ ++ rev64 v0.16b,v0.16b ++#endif ++ ext v0.16b,v0.16b,v0.16b,#8 ++ st1 {v0.2d},[x0] //write out Xi ++ ++ ret ++ ++.globl _gcm_ghash_v8 ++.private_extern _gcm_ghash_v8 ++ ++.align 4 ++_gcm_ghash_v8: ++ AARCH64_VALID_CALL_TARGET ++ cmp x3,#64 ++ b.hs Lgcm_ghash_v8_4x ++ ld1 {v0.2d},[x0] //load [rotated] Xi ++ //"[rotated]" means that ++ //loaded value would have ++ //to be rotated in order to ++ //make it appear as in ++ //algorithm specification ++ subs x3,x3,#32 //see if x3 is 32 or larger ++ mov x12,#16 //x12 is used as post- ++ //increment for input pointer; ++ //as loop is modulo-scheduled ++ //x12 is zeroed just in time ++ //to preclude overstepping ++ //inp[len], which means that ++ //last block[s] are actually ++ //loaded twice, but last ++ //copy is not processed ++ ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2 ++ movi v19.16b,#0xe1 ++ ld1 {v22.2d},[x1] ++ csel x12,xzr,x12,eq //is it time to zero x12? ++ ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi ++ ld1 {v16.2d},[x2],#16 //load [rotated] I[0] ++ shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant ++#ifndef __AARCH64EB__ ++ rev64 v16.16b,v16.16b ++ rev64 v0.16b,v0.16b ++#endif ++ ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0] ++ b.lo Lodd_tail_v8 //x3 was less than 32 ++ ld1 {v17.2d},[x2],x12 //load [rotated] I[1] ++#ifndef __AARCH64EB__ ++ rev64 v17.16b,v17.16b ++#endif ++ ext v7.16b,v17.16b,v17.16b,#8 ++ eor v3.16b,v3.16b,v0.16b //I[i]^=Xi ++ pmull v4.1q,v20.1d,v7.1d //H·Ii+1 ++ eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing ++ pmull2 v6.1q,v20.2d,v7.2d ++ b Loop_mod2x_v8 ++ ++.align 4 ++Loop_mod2x_v8: ++ ext v18.16b,v3.16b,v3.16b,#8 ++ subs x3,x3,#32 //is there more data? ++ pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo ++ csel x12,xzr,x12,lo //is it time to zero x12? ++ ++ pmull v5.1q,v21.1d,v17.1d ++ eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing ++ pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi ++ eor v0.16b,v0.16b,v4.16b //accumulate ++ pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) ++ ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2] ++ ++ eor v2.16b,v2.16b,v6.16b ++ csel x12,xzr,x12,eq //is it time to zero x12? ++ eor v1.16b,v1.16b,v5.16b ++ ++ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing ++ eor v18.16b,v0.16b,v2.16b ++ eor v1.16b,v1.16b,v17.16b ++ ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3] ++#ifndef __AARCH64EB__ ++ rev64 v16.16b,v16.16b ++#endif ++ eor v1.16b,v1.16b,v18.16b ++ pmull v18.1q,v0.1d,v19.1d //1st phase of reduction ++ ++#ifndef __AARCH64EB__ ++ rev64 v17.16b,v17.16b ++#endif ++ ins v2.d[0],v1.d[1] ++ ins v1.d[1],v0.d[0] ++ ext v7.16b,v17.16b,v17.16b,#8 ++ ext v3.16b,v16.16b,v16.16b,#8 ++ eor v0.16b,v1.16b,v18.16b ++ pmull v4.1q,v20.1d,v7.1d //H·Ii+1 ++ eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early ++ ++ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction ++ pmull v0.1q,v0.1d,v19.1d ++ eor v3.16b,v3.16b,v18.16b ++ eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing ++ eor v3.16b,v3.16b,v0.16b ++ pmull2 v6.1q,v20.2d,v7.2d ++ b.hs Loop_mod2x_v8 //there was at least 32 more bytes ++ ++ eor v2.16b,v2.16b,v18.16b ++ ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b ++ adds x3,x3,#32 //re-construct x3 ++ eor v0.16b,v0.16b,v2.16b //re-construct v0.16b ++ b.eq Ldone_v8 //is x3 zero? ++Lodd_tail_v8: ++ ext v18.16b,v0.16b,v0.16b,#8 ++ eor v3.16b,v3.16b,v0.16b //inp^=Xi ++ eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi ++ ++ pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo ++ eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing ++ pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi ++ pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi) ++ ++ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing ++ eor v18.16b,v0.16b,v2.16b ++ eor v1.16b,v1.16b,v17.16b ++ eor v1.16b,v1.16b,v18.16b ++ pmull v18.1q,v0.1d,v19.1d //1st phase of reduction ++ ++ ins v2.d[0],v1.d[1] ++ ins v1.d[1],v0.d[0] ++ eor v0.16b,v1.16b,v18.16b ++ ++ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction ++ pmull v0.1q,v0.1d,v19.1d ++ eor v18.16b,v18.16b,v2.16b ++ eor v0.16b,v0.16b,v18.16b ++ ++Ldone_v8: ++#ifndef __AARCH64EB__ ++ rev64 v0.16b,v0.16b ++#endif ++ ext v0.16b,v0.16b,v0.16b,#8 ++ st1 {v0.2d},[x0] //write out Xi ++ ++ ret ++ ++ ++.align 4 ++gcm_ghash_v8_4x: ++Lgcm_ghash_v8_4x: ++ ld1 {v0.2d},[x0] //load [rotated] Xi ++ ld1 {v20.2d,v21.2d,v22.2d},[x1],#48 //load twisted H, ..., H^2 ++ movi v19.16b,#0xe1 ++ ld1 {v26.2d,v27.2d,v28.2d},[x1] //load twisted H^3, ..., H^4 ++ shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant ++ ++ ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64 ++#ifndef __AARCH64EB__ ++ rev64 v0.16b,v0.16b ++ rev64 v5.16b,v5.16b ++ rev64 v6.16b,v6.16b ++ rev64 v7.16b,v7.16b ++ rev64 v4.16b,v4.16b ++#endif ++ ext v25.16b,v7.16b,v7.16b,#8 ++ ext v24.16b,v6.16b,v6.16b,#8 ++ ext v23.16b,v5.16b,v5.16b,#8 ++ ++ pmull v29.1q,v20.1d,v25.1d //H·Ii+3 ++ eor v7.16b,v7.16b,v25.16b ++ pmull2 v31.1q,v20.2d,v25.2d ++ pmull v30.1q,v21.1d,v7.1d ++ ++ pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2 ++ eor v6.16b,v6.16b,v24.16b ++ pmull2 v24.1q,v22.2d,v24.2d ++ pmull2 v6.1q,v21.2d,v6.2d ++ ++ eor v29.16b,v29.16b,v16.16b ++ eor v31.16b,v31.16b,v24.16b ++ eor v30.16b,v30.16b,v6.16b ++ ++ pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1 ++ eor v5.16b,v5.16b,v23.16b ++ pmull2 v23.1q,v26.2d,v23.2d ++ pmull v5.1q,v27.1d,v5.1d ++ ++ eor v29.16b,v29.16b,v7.16b ++ eor v31.16b,v31.16b,v23.16b ++ eor v30.16b,v30.16b,v5.16b ++ ++ subs x3,x3,#128 ++ b.lo Ltail4x ++ ++ b Loop4x ++ ++.align 4 ++Loop4x: ++ eor v16.16b,v4.16b,v0.16b ++ ld1 {v4.2d,v5.2d,v6.2d,v7.2d},[x2],#64 ++ ext v3.16b,v16.16b,v16.16b,#8 ++#ifndef __AARCH64EB__ ++ rev64 v5.16b,v5.16b ++ rev64 v6.16b,v6.16b ++ rev64 v7.16b,v7.16b ++ rev64 v4.16b,v4.16b ++#endif ++ ++ pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii) ++ eor v16.16b,v16.16b,v3.16b ++ pmull2 v2.1q,v28.2d,v3.2d ++ ext v25.16b,v7.16b,v7.16b,#8 ++ pmull2 v1.1q,v27.2d,v16.2d ++ ++ eor v0.16b,v0.16b,v29.16b ++ eor v2.16b,v2.16b,v31.16b ++ ext v24.16b,v6.16b,v6.16b,#8 ++ eor v1.16b,v1.16b,v30.16b ++ ext v23.16b,v5.16b,v5.16b,#8 ++ ++ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing ++ eor v18.16b,v0.16b,v2.16b ++ pmull v29.1q,v20.1d,v25.1d //H·Ii+3 ++ eor v7.16b,v7.16b,v25.16b ++ eor v1.16b,v1.16b,v17.16b ++ pmull2 v31.1q,v20.2d,v25.2d ++ eor v1.16b,v1.16b,v18.16b ++ pmull v30.1q,v21.1d,v7.1d ++ ++ pmull v18.1q,v0.1d,v19.1d //1st phase of reduction ++ ins v2.d[0],v1.d[1] ++ ins v1.d[1],v0.d[0] ++ pmull v16.1q,v22.1d,v24.1d //H^2·Ii+2 ++ eor v6.16b,v6.16b,v24.16b ++ pmull2 v24.1q,v22.2d,v24.2d ++ eor v0.16b,v1.16b,v18.16b ++ pmull2 v6.1q,v21.2d,v6.2d ++ ++ eor v29.16b,v29.16b,v16.16b ++ eor v31.16b,v31.16b,v24.16b ++ eor v30.16b,v30.16b,v6.16b ++ ++ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction ++ pmull v0.1q,v0.1d,v19.1d ++ pmull v7.1q,v26.1d,v23.1d //H^3·Ii+1 ++ eor v5.16b,v5.16b,v23.16b ++ eor v18.16b,v18.16b,v2.16b ++ pmull2 v23.1q,v26.2d,v23.2d ++ pmull v5.1q,v27.1d,v5.1d ++ ++ eor v0.16b,v0.16b,v18.16b ++ eor v29.16b,v29.16b,v7.16b ++ eor v31.16b,v31.16b,v23.16b ++ ext v0.16b,v0.16b,v0.16b,#8 ++ eor v30.16b,v30.16b,v5.16b ++ ++ subs x3,x3,#64 ++ b.hs Loop4x ++ ++Ltail4x: ++ eor v16.16b,v4.16b,v0.16b ++ ext v3.16b,v16.16b,v16.16b,#8 ++ ++ pmull v0.1q,v28.1d,v3.1d //H^4·(Xi+Ii) ++ eor v16.16b,v16.16b,v3.16b ++ pmull2 v2.1q,v28.2d,v3.2d ++ pmull2 v1.1q,v27.2d,v16.2d ++ ++ eor v0.16b,v0.16b,v29.16b ++ eor v2.16b,v2.16b,v31.16b ++ eor v1.16b,v1.16b,v30.16b ++ ++ adds x3,x3,#64 ++ b.eq Ldone4x ++ ++ cmp x3,#32 ++ b.lo Lone ++ b.eq Ltwo ++Lthree: ++ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing ++ eor v18.16b,v0.16b,v2.16b ++ eor v1.16b,v1.16b,v17.16b ++ ld1 {v4.2d,v5.2d,v6.2d},[x2] ++ eor v1.16b,v1.16b,v18.16b ++#ifndef __AARCH64EB__ ++ rev64 v5.16b,v5.16b ++ rev64 v6.16b,v6.16b ++ rev64 v4.16b,v4.16b ++#endif ++ ++ pmull v18.1q,v0.1d,v19.1d //1st phase of reduction ++ ins v2.d[0],v1.d[1] ++ ins v1.d[1],v0.d[0] ++ ext v24.16b,v6.16b,v6.16b,#8 ++ ext v23.16b,v5.16b,v5.16b,#8 ++ eor v0.16b,v1.16b,v18.16b ++ ++ pmull v29.1q,v20.1d,v24.1d //H·Ii+2 ++ eor v6.16b,v6.16b,v24.16b ++ ++ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction ++ pmull v0.1q,v0.1d,v19.1d ++ eor v18.16b,v18.16b,v2.16b ++ pmull2 v31.1q,v20.2d,v24.2d ++ pmull v30.1q,v21.1d,v6.1d ++ eor v0.16b,v0.16b,v18.16b ++ pmull v7.1q,v22.1d,v23.1d //H^2·Ii+1 ++ eor v5.16b,v5.16b,v23.16b ++ ext v0.16b,v0.16b,v0.16b,#8 ++ ++ pmull2 v23.1q,v22.2d,v23.2d ++ eor v16.16b,v4.16b,v0.16b ++ pmull2 v5.1q,v21.2d,v5.2d ++ ext v3.16b,v16.16b,v16.16b,#8 ++ ++ eor v29.16b,v29.16b,v7.16b ++ eor v31.16b,v31.16b,v23.16b ++ eor v30.16b,v30.16b,v5.16b ++ ++ pmull v0.1q,v26.1d,v3.1d //H^3·(Xi+Ii) ++ eor v16.16b,v16.16b,v3.16b ++ pmull2 v2.1q,v26.2d,v3.2d ++ pmull v1.1q,v27.1d,v16.1d ++ ++ eor v0.16b,v0.16b,v29.16b ++ eor v2.16b,v2.16b,v31.16b ++ eor v1.16b,v1.16b,v30.16b ++ b Ldone4x ++ ++.align 4 ++Ltwo: ++ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing ++ eor v18.16b,v0.16b,v2.16b ++ eor v1.16b,v1.16b,v17.16b ++ ld1 {v4.2d,v5.2d},[x2] ++ eor v1.16b,v1.16b,v18.16b ++#ifndef __AARCH64EB__ ++ rev64 v5.16b,v5.16b ++ rev64 v4.16b,v4.16b ++#endif ++ ++ pmull v18.1q,v0.1d,v19.1d //1st phase of reduction ++ ins v2.d[0],v1.d[1] ++ ins v1.d[1],v0.d[0] ++ ext v23.16b,v5.16b,v5.16b,#8 ++ eor v0.16b,v1.16b,v18.16b ++ ++ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction ++ pmull v0.1q,v0.1d,v19.1d ++ eor v18.16b,v18.16b,v2.16b ++ eor v0.16b,v0.16b,v18.16b ++ ext v0.16b,v0.16b,v0.16b,#8 ++ ++ pmull v29.1q,v20.1d,v23.1d //H·Ii+1 ++ eor v5.16b,v5.16b,v23.16b ++ ++ eor v16.16b,v4.16b,v0.16b ++ ext v3.16b,v16.16b,v16.16b,#8 ++ ++ pmull2 v31.1q,v20.2d,v23.2d ++ pmull v30.1q,v21.1d,v5.1d ++ ++ pmull v0.1q,v22.1d,v3.1d //H^2·(Xi+Ii) ++ eor v16.16b,v16.16b,v3.16b ++ pmull2 v2.1q,v22.2d,v3.2d ++ pmull2 v1.1q,v21.2d,v16.2d ++ ++ eor v0.16b,v0.16b,v29.16b ++ eor v2.16b,v2.16b,v31.16b ++ eor v1.16b,v1.16b,v30.16b ++ b Ldone4x ++ ++.align 4 ++Lone: ++ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing ++ eor v18.16b,v0.16b,v2.16b ++ eor v1.16b,v1.16b,v17.16b ++ ld1 {v4.2d},[x2] ++ eor v1.16b,v1.16b,v18.16b ++#ifndef __AARCH64EB__ ++ rev64 v4.16b,v4.16b ++#endif ++ ++ pmull v18.1q,v0.1d,v19.1d //1st phase of reduction ++ ins v2.d[0],v1.d[1] ++ ins v1.d[1],v0.d[0] ++ eor v0.16b,v1.16b,v18.16b ++ ++ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction ++ pmull v0.1q,v0.1d,v19.1d ++ eor v18.16b,v18.16b,v2.16b ++ eor v0.16b,v0.16b,v18.16b ++ ext v0.16b,v0.16b,v0.16b,#8 ++ ++ eor v16.16b,v4.16b,v0.16b ++ ext v3.16b,v16.16b,v16.16b,#8 ++ ++ pmull v0.1q,v20.1d,v3.1d ++ eor v16.16b,v16.16b,v3.16b ++ pmull2 v2.1q,v20.2d,v3.2d ++ pmull v1.1q,v21.1d,v16.1d ++ ++Ldone4x: ++ ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing ++ eor v18.16b,v0.16b,v2.16b ++ eor v1.16b,v1.16b,v17.16b ++ eor v1.16b,v1.16b,v18.16b ++ ++ pmull v18.1q,v0.1d,v19.1d //1st phase of reduction ++ ins v2.d[0],v1.d[1] ++ ins v1.d[1],v0.d[0] ++ eor v0.16b,v1.16b,v18.16b ++ ++ ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction ++ pmull v0.1q,v0.1d,v19.1d ++ eor v18.16b,v18.16b,v2.16b ++ eor v0.16b,v0.16b,v18.16b ++ ext v0.16b,v0.16b,v0.16b,#8 ++ ++#ifndef __AARCH64EB__ ++ rev64 v0.16b,v0.16b ++#endif ++ st1 {v0.2d},[x0] //write out Xi ++ ++ ret ++ ++.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 2 ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-aarch64/crypto/fipsmodule/sha1-armv8.S b/apple-aarch64/crypto/fipsmodule/sha1-armv8.S +new file mode 100644 +index 0000000..62ba800 +--- /dev/null ++++ b/apple-aarch64/crypto/fipsmodule/sha1-armv8.S +@@ -0,0 +1,1235 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++.text ++ ++ ++.private_extern _OPENSSL_armcap_P ++.globl _sha1_block_data_order ++.private_extern _sha1_block_data_order ++ ++.align 6 ++_sha1_block_data_order: ++ // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. ++ AARCH64_VALID_CALL_TARGET ++#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 ++ adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P ++#else ++ adrp x16,_OPENSSL_armcap_P@PAGE ++#endif ++ ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF] ++ tst w16,#ARMV8_SHA1 ++ b.ne Lv8_entry ++ ++ stp x29,x30,[sp,#-96]! ++ add x29,sp,#0 ++ stp x19,x20,[sp,#16] ++ stp x21,x22,[sp,#32] ++ stp x23,x24,[sp,#48] ++ stp x25,x26,[sp,#64] ++ stp x27,x28,[sp,#80] ++ ++ ldp w20,w21,[x0] ++ ldp w22,w23,[x0,#8] ++ ldr w24,[x0,#16] ++ ++Loop: ++ ldr x3,[x1],#64 ++ movz w28,#0x7999 ++ sub x2,x2,#1 ++ movk w28,#0x5a82,lsl#16 ++#ifdef __AARCH64EB__ ++ ror x3,x3,#32 ++#else ++ rev32 x3,x3 ++#endif ++ add w24,w24,w28 // warm it up ++ add w24,w24,w3 ++ lsr x4,x3,#32 ++ ldr x5,[x1,#-56] ++ bic w25,w23,w21 ++ and w26,w22,w21 ++ ror w27,w20,#27 ++ add w23,w23,w28 // future e+=K ++ orr w25,w25,w26 ++ add w24,w24,w27 // e+=rot(a,5) ++ ror w21,w21,#2 ++ add w23,w23,w4 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++#ifdef __AARCH64EB__ ++ ror x5,x5,#32 ++#else ++ rev32 x5,x5 ++#endif ++ bic w25,w22,w20 ++ and w26,w21,w20 ++ ror w27,w24,#27 ++ add w22,w22,w28 // future e+=K ++ orr w25,w25,w26 ++ add w23,w23,w27 // e+=rot(a,5) ++ ror w20,w20,#2 ++ add w22,w22,w5 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ lsr x6,x5,#32 ++ ldr x7,[x1,#-48] ++ bic w25,w21,w24 ++ and w26,w20,w24 ++ ror w27,w23,#27 ++ add w21,w21,w28 // future e+=K ++ orr w25,w25,w26 ++ add w22,w22,w27 // e+=rot(a,5) ++ ror w24,w24,#2 ++ add w21,w21,w6 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++#ifdef __AARCH64EB__ ++ ror x7,x7,#32 ++#else ++ rev32 x7,x7 ++#endif ++ bic w25,w20,w23 ++ and w26,w24,w23 ++ ror w27,w22,#27 ++ add w20,w20,w28 // future e+=K ++ orr w25,w25,w26 ++ add w21,w21,w27 // e+=rot(a,5) ++ ror w23,w23,#2 ++ add w20,w20,w7 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ lsr x8,x7,#32 ++ ldr x9,[x1,#-40] ++ bic w25,w24,w22 ++ and w26,w23,w22 ++ ror w27,w21,#27 ++ add w24,w24,w28 // future e+=K ++ orr w25,w25,w26 ++ add w20,w20,w27 // e+=rot(a,5) ++ ror w22,w22,#2 ++ add w24,w24,w8 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++#ifdef __AARCH64EB__ ++ ror x9,x9,#32 ++#else ++ rev32 x9,x9 ++#endif ++ bic w25,w23,w21 ++ and w26,w22,w21 ++ ror w27,w20,#27 ++ add w23,w23,w28 // future e+=K ++ orr w25,w25,w26 ++ add w24,w24,w27 // e+=rot(a,5) ++ ror w21,w21,#2 ++ add w23,w23,w9 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ lsr x10,x9,#32 ++ ldr x11,[x1,#-32] ++ bic w25,w22,w20 ++ and w26,w21,w20 ++ ror w27,w24,#27 ++ add w22,w22,w28 // future e+=K ++ orr w25,w25,w26 ++ add w23,w23,w27 // e+=rot(a,5) ++ ror w20,w20,#2 ++ add w22,w22,w10 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++#ifdef __AARCH64EB__ ++ ror x11,x11,#32 ++#else ++ rev32 x11,x11 ++#endif ++ bic w25,w21,w24 ++ and w26,w20,w24 ++ ror w27,w23,#27 ++ add w21,w21,w28 // future e+=K ++ orr w25,w25,w26 ++ add w22,w22,w27 // e+=rot(a,5) ++ ror w24,w24,#2 ++ add w21,w21,w11 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ lsr x12,x11,#32 ++ ldr x13,[x1,#-24] ++ bic w25,w20,w23 ++ and w26,w24,w23 ++ ror w27,w22,#27 ++ add w20,w20,w28 // future e+=K ++ orr w25,w25,w26 ++ add w21,w21,w27 // e+=rot(a,5) ++ ror w23,w23,#2 ++ add w20,w20,w12 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++#ifdef __AARCH64EB__ ++ ror x13,x13,#32 ++#else ++ rev32 x13,x13 ++#endif ++ bic w25,w24,w22 ++ and w26,w23,w22 ++ ror w27,w21,#27 ++ add w24,w24,w28 // future e+=K ++ orr w25,w25,w26 ++ add w20,w20,w27 // e+=rot(a,5) ++ ror w22,w22,#2 ++ add w24,w24,w13 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ lsr x14,x13,#32 ++ ldr x15,[x1,#-16] ++ bic w25,w23,w21 ++ and w26,w22,w21 ++ ror w27,w20,#27 ++ add w23,w23,w28 // future e+=K ++ orr w25,w25,w26 ++ add w24,w24,w27 // e+=rot(a,5) ++ ror w21,w21,#2 ++ add w23,w23,w14 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++#ifdef __AARCH64EB__ ++ ror x15,x15,#32 ++#else ++ rev32 x15,x15 ++#endif ++ bic w25,w22,w20 ++ and w26,w21,w20 ++ ror w27,w24,#27 ++ add w22,w22,w28 // future e+=K ++ orr w25,w25,w26 ++ add w23,w23,w27 // e+=rot(a,5) ++ ror w20,w20,#2 ++ add w22,w22,w15 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ lsr x16,x15,#32 ++ ldr x17,[x1,#-8] ++ bic w25,w21,w24 ++ and w26,w20,w24 ++ ror w27,w23,#27 ++ add w21,w21,w28 // future e+=K ++ orr w25,w25,w26 ++ add w22,w22,w27 // e+=rot(a,5) ++ ror w24,w24,#2 ++ add w21,w21,w16 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++#ifdef __AARCH64EB__ ++ ror x17,x17,#32 ++#else ++ rev32 x17,x17 ++#endif ++ bic w25,w20,w23 ++ and w26,w24,w23 ++ ror w27,w22,#27 ++ add w20,w20,w28 // future e+=K ++ orr w25,w25,w26 ++ add w21,w21,w27 // e+=rot(a,5) ++ ror w23,w23,#2 ++ add w20,w20,w17 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ lsr x19,x17,#32 ++ eor w3,w3,w5 ++ bic w25,w24,w22 ++ and w26,w23,w22 ++ ror w27,w21,#27 ++ eor w3,w3,w11 ++ add w24,w24,w28 // future e+=K ++ orr w25,w25,w26 ++ add w20,w20,w27 // e+=rot(a,5) ++ eor w3,w3,w16 ++ ror w22,w22,#2 ++ add w24,w24,w19 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w3,w3,#31 ++ eor w4,w4,w6 ++ bic w25,w23,w21 ++ and w26,w22,w21 ++ ror w27,w20,#27 ++ eor w4,w4,w12 ++ add w23,w23,w28 // future e+=K ++ orr w25,w25,w26 ++ add w24,w24,w27 // e+=rot(a,5) ++ eor w4,w4,w17 ++ ror w21,w21,#2 ++ add w23,w23,w3 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w4,w4,#31 ++ eor w5,w5,w7 ++ bic w25,w22,w20 ++ and w26,w21,w20 ++ ror w27,w24,#27 ++ eor w5,w5,w13 ++ add w22,w22,w28 // future e+=K ++ orr w25,w25,w26 ++ add w23,w23,w27 // e+=rot(a,5) ++ eor w5,w5,w19 ++ ror w20,w20,#2 ++ add w22,w22,w4 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w5,w5,#31 ++ eor w6,w6,w8 ++ bic w25,w21,w24 ++ and w26,w20,w24 ++ ror w27,w23,#27 ++ eor w6,w6,w14 ++ add w21,w21,w28 // future e+=K ++ orr w25,w25,w26 ++ add w22,w22,w27 // e+=rot(a,5) ++ eor w6,w6,w3 ++ ror w24,w24,#2 ++ add w21,w21,w5 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w6,w6,#31 ++ eor w7,w7,w9 ++ bic w25,w20,w23 ++ and w26,w24,w23 ++ ror w27,w22,#27 ++ eor w7,w7,w15 ++ add w20,w20,w28 // future e+=K ++ orr w25,w25,w26 ++ add w21,w21,w27 // e+=rot(a,5) ++ eor w7,w7,w4 ++ ror w23,w23,#2 ++ add w20,w20,w6 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w7,w7,#31 ++ movz w28,#0xeba1 ++ movk w28,#0x6ed9,lsl#16 ++ eor w8,w8,w10 ++ bic w25,w24,w22 ++ and w26,w23,w22 ++ ror w27,w21,#27 ++ eor w8,w8,w16 ++ add w24,w24,w28 // future e+=K ++ orr w25,w25,w26 ++ add w20,w20,w27 // e+=rot(a,5) ++ eor w8,w8,w5 ++ ror w22,w22,#2 ++ add w24,w24,w7 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w8,w8,#31 ++ eor w9,w9,w11 ++ eor w25,w23,w21 ++ ror w27,w20,#27 ++ add w23,w23,w28 // future e+=K ++ eor w9,w9,w17 ++ eor w25,w25,w22 ++ add w24,w24,w27 // e+=rot(a,5) ++ ror w21,w21,#2 ++ eor w9,w9,w6 ++ add w23,w23,w8 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w9,w9,#31 ++ eor w10,w10,w12 ++ eor w25,w22,w20 ++ ror w27,w24,#27 ++ add w22,w22,w28 // future e+=K ++ eor w10,w10,w19 ++ eor w25,w25,w21 ++ add w23,w23,w27 // e+=rot(a,5) ++ ror w20,w20,#2 ++ eor w10,w10,w7 ++ add w22,w22,w9 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w10,w10,#31 ++ eor w11,w11,w13 ++ eor w25,w21,w24 ++ ror w27,w23,#27 ++ add w21,w21,w28 // future e+=K ++ eor w11,w11,w3 ++ eor w25,w25,w20 ++ add w22,w22,w27 // e+=rot(a,5) ++ ror w24,w24,#2 ++ eor w11,w11,w8 ++ add w21,w21,w10 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w11,w11,#31 ++ eor w12,w12,w14 ++ eor w25,w20,w23 ++ ror w27,w22,#27 ++ add w20,w20,w28 // future e+=K ++ eor w12,w12,w4 ++ eor w25,w25,w24 ++ add w21,w21,w27 // e+=rot(a,5) ++ ror w23,w23,#2 ++ eor w12,w12,w9 ++ add w20,w20,w11 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w12,w12,#31 ++ eor w13,w13,w15 ++ eor w25,w24,w22 ++ ror w27,w21,#27 ++ add w24,w24,w28 // future e+=K ++ eor w13,w13,w5 ++ eor w25,w25,w23 ++ add w20,w20,w27 // e+=rot(a,5) ++ ror w22,w22,#2 ++ eor w13,w13,w10 ++ add w24,w24,w12 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w13,w13,#31 ++ eor w14,w14,w16 ++ eor w25,w23,w21 ++ ror w27,w20,#27 ++ add w23,w23,w28 // future e+=K ++ eor w14,w14,w6 ++ eor w25,w25,w22 ++ add w24,w24,w27 // e+=rot(a,5) ++ ror w21,w21,#2 ++ eor w14,w14,w11 ++ add w23,w23,w13 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w14,w14,#31 ++ eor w15,w15,w17 ++ eor w25,w22,w20 ++ ror w27,w24,#27 ++ add w22,w22,w28 // future e+=K ++ eor w15,w15,w7 ++ eor w25,w25,w21 ++ add w23,w23,w27 // e+=rot(a,5) ++ ror w20,w20,#2 ++ eor w15,w15,w12 ++ add w22,w22,w14 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w15,w15,#31 ++ eor w16,w16,w19 ++ eor w25,w21,w24 ++ ror w27,w23,#27 ++ add w21,w21,w28 // future e+=K ++ eor w16,w16,w8 ++ eor w25,w25,w20 ++ add w22,w22,w27 // e+=rot(a,5) ++ ror w24,w24,#2 ++ eor w16,w16,w13 ++ add w21,w21,w15 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w16,w16,#31 ++ eor w17,w17,w3 ++ eor w25,w20,w23 ++ ror w27,w22,#27 ++ add w20,w20,w28 // future e+=K ++ eor w17,w17,w9 ++ eor w25,w25,w24 ++ add w21,w21,w27 // e+=rot(a,5) ++ ror w23,w23,#2 ++ eor w17,w17,w14 ++ add w20,w20,w16 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w17,w17,#31 ++ eor w19,w19,w4 ++ eor w25,w24,w22 ++ ror w27,w21,#27 ++ add w24,w24,w28 // future e+=K ++ eor w19,w19,w10 ++ eor w25,w25,w23 ++ add w20,w20,w27 // e+=rot(a,5) ++ ror w22,w22,#2 ++ eor w19,w19,w15 ++ add w24,w24,w17 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w19,w19,#31 ++ eor w3,w3,w5 ++ eor w25,w23,w21 ++ ror w27,w20,#27 ++ add w23,w23,w28 // future e+=K ++ eor w3,w3,w11 ++ eor w25,w25,w22 ++ add w24,w24,w27 // e+=rot(a,5) ++ ror w21,w21,#2 ++ eor w3,w3,w16 ++ add w23,w23,w19 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w3,w3,#31 ++ eor w4,w4,w6 ++ eor w25,w22,w20 ++ ror w27,w24,#27 ++ add w22,w22,w28 // future e+=K ++ eor w4,w4,w12 ++ eor w25,w25,w21 ++ add w23,w23,w27 // e+=rot(a,5) ++ ror w20,w20,#2 ++ eor w4,w4,w17 ++ add w22,w22,w3 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w4,w4,#31 ++ eor w5,w5,w7 ++ eor w25,w21,w24 ++ ror w27,w23,#27 ++ add w21,w21,w28 // future e+=K ++ eor w5,w5,w13 ++ eor w25,w25,w20 ++ add w22,w22,w27 // e+=rot(a,5) ++ ror w24,w24,#2 ++ eor w5,w5,w19 ++ add w21,w21,w4 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w5,w5,#31 ++ eor w6,w6,w8 ++ eor w25,w20,w23 ++ ror w27,w22,#27 ++ add w20,w20,w28 // future e+=K ++ eor w6,w6,w14 ++ eor w25,w25,w24 ++ add w21,w21,w27 // e+=rot(a,5) ++ ror w23,w23,#2 ++ eor w6,w6,w3 ++ add w20,w20,w5 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w6,w6,#31 ++ eor w7,w7,w9 ++ eor w25,w24,w22 ++ ror w27,w21,#27 ++ add w24,w24,w28 // future e+=K ++ eor w7,w7,w15 ++ eor w25,w25,w23 ++ add w20,w20,w27 // e+=rot(a,5) ++ ror w22,w22,#2 ++ eor w7,w7,w4 ++ add w24,w24,w6 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w7,w7,#31 ++ eor w8,w8,w10 ++ eor w25,w23,w21 ++ ror w27,w20,#27 ++ add w23,w23,w28 // future e+=K ++ eor w8,w8,w16 ++ eor w25,w25,w22 ++ add w24,w24,w27 // e+=rot(a,5) ++ ror w21,w21,#2 ++ eor w8,w8,w5 ++ add w23,w23,w7 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w8,w8,#31 ++ eor w9,w9,w11 ++ eor w25,w22,w20 ++ ror w27,w24,#27 ++ add w22,w22,w28 // future e+=K ++ eor w9,w9,w17 ++ eor w25,w25,w21 ++ add w23,w23,w27 // e+=rot(a,5) ++ ror w20,w20,#2 ++ eor w9,w9,w6 ++ add w22,w22,w8 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w9,w9,#31 ++ eor w10,w10,w12 ++ eor w25,w21,w24 ++ ror w27,w23,#27 ++ add w21,w21,w28 // future e+=K ++ eor w10,w10,w19 ++ eor w25,w25,w20 ++ add w22,w22,w27 // e+=rot(a,5) ++ ror w24,w24,#2 ++ eor w10,w10,w7 ++ add w21,w21,w9 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w10,w10,#31 ++ eor w11,w11,w13 ++ eor w25,w20,w23 ++ ror w27,w22,#27 ++ add w20,w20,w28 // future e+=K ++ eor w11,w11,w3 ++ eor w25,w25,w24 ++ add w21,w21,w27 // e+=rot(a,5) ++ ror w23,w23,#2 ++ eor w11,w11,w8 ++ add w20,w20,w10 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w11,w11,#31 ++ movz w28,#0xbcdc ++ movk w28,#0x8f1b,lsl#16 ++ eor w12,w12,w14 ++ eor w25,w24,w22 ++ ror w27,w21,#27 ++ add w24,w24,w28 // future e+=K ++ eor w12,w12,w4 ++ eor w25,w25,w23 ++ add w20,w20,w27 // e+=rot(a,5) ++ ror w22,w22,#2 ++ eor w12,w12,w9 ++ add w24,w24,w11 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w12,w12,#31 ++ orr w25,w21,w22 ++ and w26,w21,w22 ++ eor w13,w13,w15 ++ ror w27,w20,#27 ++ and w25,w25,w23 ++ add w23,w23,w28 // future e+=K ++ eor w13,w13,w5 ++ add w24,w24,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w21,w21,#2 ++ eor w13,w13,w10 ++ add w23,w23,w12 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w13,w13,#31 ++ orr w25,w20,w21 ++ and w26,w20,w21 ++ eor w14,w14,w16 ++ ror w27,w24,#27 ++ and w25,w25,w22 ++ add w22,w22,w28 // future e+=K ++ eor w14,w14,w6 ++ add w23,w23,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w20,w20,#2 ++ eor w14,w14,w11 ++ add w22,w22,w13 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w14,w14,#31 ++ orr w25,w24,w20 ++ and w26,w24,w20 ++ eor w15,w15,w17 ++ ror w27,w23,#27 ++ and w25,w25,w21 ++ add w21,w21,w28 // future e+=K ++ eor w15,w15,w7 ++ add w22,w22,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w24,w24,#2 ++ eor w15,w15,w12 ++ add w21,w21,w14 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w15,w15,#31 ++ orr w25,w23,w24 ++ and w26,w23,w24 ++ eor w16,w16,w19 ++ ror w27,w22,#27 ++ and w25,w25,w20 ++ add w20,w20,w28 // future e+=K ++ eor w16,w16,w8 ++ add w21,w21,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w23,w23,#2 ++ eor w16,w16,w13 ++ add w20,w20,w15 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w16,w16,#31 ++ orr w25,w22,w23 ++ and w26,w22,w23 ++ eor w17,w17,w3 ++ ror w27,w21,#27 ++ and w25,w25,w24 ++ add w24,w24,w28 // future e+=K ++ eor w17,w17,w9 ++ add w20,w20,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w22,w22,#2 ++ eor w17,w17,w14 ++ add w24,w24,w16 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w17,w17,#31 ++ orr w25,w21,w22 ++ and w26,w21,w22 ++ eor w19,w19,w4 ++ ror w27,w20,#27 ++ and w25,w25,w23 ++ add w23,w23,w28 // future e+=K ++ eor w19,w19,w10 ++ add w24,w24,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w21,w21,#2 ++ eor w19,w19,w15 ++ add w23,w23,w17 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w19,w19,#31 ++ orr w25,w20,w21 ++ and w26,w20,w21 ++ eor w3,w3,w5 ++ ror w27,w24,#27 ++ and w25,w25,w22 ++ add w22,w22,w28 // future e+=K ++ eor w3,w3,w11 ++ add w23,w23,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w20,w20,#2 ++ eor w3,w3,w16 ++ add w22,w22,w19 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w3,w3,#31 ++ orr w25,w24,w20 ++ and w26,w24,w20 ++ eor w4,w4,w6 ++ ror w27,w23,#27 ++ and w25,w25,w21 ++ add w21,w21,w28 // future e+=K ++ eor w4,w4,w12 ++ add w22,w22,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w24,w24,#2 ++ eor w4,w4,w17 ++ add w21,w21,w3 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w4,w4,#31 ++ orr w25,w23,w24 ++ and w26,w23,w24 ++ eor w5,w5,w7 ++ ror w27,w22,#27 ++ and w25,w25,w20 ++ add w20,w20,w28 // future e+=K ++ eor w5,w5,w13 ++ add w21,w21,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w23,w23,#2 ++ eor w5,w5,w19 ++ add w20,w20,w4 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w5,w5,#31 ++ orr w25,w22,w23 ++ and w26,w22,w23 ++ eor w6,w6,w8 ++ ror w27,w21,#27 ++ and w25,w25,w24 ++ add w24,w24,w28 // future e+=K ++ eor w6,w6,w14 ++ add w20,w20,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w22,w22,#2 ++ eor w6,w6,w3 ++ add w24,w24,w5 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w6,w6,#31 ++ orr w25,w21,w22 ++ and w26,w21,w22 ++ eor w7,w7,w9 ++ ror w27,w20,#27 ++ and w25,w25,w23 ++ add w23,w23,w28 // future e+=K ++ eor w7,w7,w15 ++ add w24,w24,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w21,w21,#2 ++ eor w7,w7,w4 ++ add w23,w23,w6 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w7,w7,#31 ++ orr w25,w20,w21 ++ and w26,w20,w21 ++ eor w8,w8,w10 ++ ror w27,w24,#27 ++ and w25,w25,w22 ++ add w22,w22,w28 // future e+=K ++ eor w8,w8,w16 ++ add w23,w23,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w20,w20,#2 ++ eor w8,w8,w5 ++ add w22,w22,w7 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w8,w8,#31 ++ orr w25,w24,w20 ++ and w26,w24,w20 ++ eor w9,w9,w11 ++ ror w27,w23,#27 ++ and w25,w25,w21 ++ add w21,w21,w28 // future e+=K ++ eor w9,w9,w17 ++ add w22,w22,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w24,w24,#2 ++ eor w9,w9,w6 ++ add w21,w21,w8 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w9,w9,#31 ++ orr w25,w23,w24 ++ and w26,w23,w24 ++ eor w10,w10,w12 ++ ror w27,w22,#27 ++ and w25,w25,w20 ++ add w20,w20,w28 // future e+=K ++ eor w10,w10,w19 ++ add w21,w21,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w23,w23,#2 ++ eor w10,w10,w7 ++ add w20,w20,w9 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w10,w10,#31 ++ orr w25,w22,w23 ++ and w26,w22,w23 ++ eor w11,w11,w13 ++ ror w27,w21,#27 ++ and w25,w25,w24 ++ add w24,w24,w28 // future e+=K ++ eor w11,w11,w3 ++ add w20,w20,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w22,w22,#2 ++ eor w11,w11,w8 ++ add w24,w24,w10 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w11,w11,#31 ++ orr w25,w21,w22 ++ and w26,w21,w22 ++ eor w12,w12,w14 ++ ror w27,w20,#27 ++ and w25,w25,w23 ++ add w23,w23,w28 // future e+=K ++ eor w12,w12,w4 ++ add w24,w24,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w21,w21,#2 ++ eor w12,w12,w9 ++ add w23,w23,w11 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w12,w12,#31 ++ orr w25,w20,w21 ++ and w26,w20,w21 ++ eor w13,w13,w15 ++ ror w27,w24,#27 ++ and w25,w25,w22 ++ add w22,w22,w28 // future e+=K ++ eor w13,w13,w5 ++ add w23,w23,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w20,w20,#2 ++ eor w13,w13,w10 ++ add w22,w22,w12 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w13,w13,#31 ++ orr w25,w24,w20 ++ and w26,w24,w20 ++ eor w14,w14,w16 ++ ror w27,w23,#27 ++ and w25,w25,w21 ++ add w21,w21,w28 // future e+=K ++ eor w14,w14,w6 ++ add w22,w22,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w24,w24,#2 ++ eor w14,w14,w11 ++ add w21,w21,w13 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w14,w14,#31 ++ orr w25,w23,w24 ++ and w26,w23,w24 ++ eor w15,w15,w17 ++ ror w27,w22,#27 ++ and w25,w25,w20 ++ add w20,w20,w28 // future e+=K ++ eor w15,w15,w7 ++ add w21,w21,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w23,w23,#2 ++ eor w15,w15,w12 ++ add w20,w20,w14 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w15,w15,#31 ++ movz w28,#0xc1d6 ++ movk w28,#0xca62,lsl#16 ++ orr w25,w22,w23 ++ and w26,w22,w23 ++ eor w16,w16,w19 ++ ror w27,w21,#27 ++ and w25,w25,w24 ++ add w24,w24,w28 // future e+=K ++ eor w16,w16,w8 ++ add w20,w20,w27 // e+=rot(a,5) ++ orr w25,w25,w26 ++ ror w22,w22,#2 ++ eor w16,w16,w13 ++ add w24,w24,w15 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w16,w16,#31 ++ eor w17,w17,w3 ++ eor w25,w23,w21 ++ ror w27,w20,#27 ++ add w23,w23,w28 // future e+=K ++ eor w17,w17,w9 ++ eor w25,w25,w22 ++ add w24,w24,w27 // e+=rot(a,5) ++ ror w21,w21,#2 ++ eor w17,w17,w14 ++ add w23,w23,w16 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w17,w17,#31 ++ eor w19,w19,w4 ++ eor w25,w22,w20 ++ ror w27,w24,#27 ++ add w22,w22,w28 // future e+=K ++ eor w19,w19,w10 ++ eor w25,w25,w21 ++ add w23,w23,w27 // e+=rot(a,5) ++ ror w20,w20,#2 ++ eor w19,w19,w15 ++ add w22,w22,w17 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w19,w19,#31 ++ eor w3,w3,w5 ++ eor w25,w21,w24 ++ ror w27,w23,#27 ++ add w21,w21,w28 // future e+=K ++ eor w3,w3,w11 ++ eor w25,w25,w20 ++ add w22,w22,w27 // e+=rot(a,5) ++ ror w24,w24,#2 ++ eor w3,w3,w16 ++ add w21,w21,w19 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w3,w3,#31 ++ eor w4,w4,w6 ++ eor w25,w20,w23 ++ ror w27,w22,#27 ++ add w20,w20,w28 // future e+=K ++ eor w4,w4,w12 ++ eor w25,w25,w24 ++ add w21,w21,w27 // e+=rot(a,5) ++ ror w23,w23,#2 ++ eor w4,w4,w17 ++ add w20,w20,w3 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w4,w4,#31 ++ eor w5,w5,w7 ++ eor w25,w24,w22 ++ ror w27,w21,#27 ++ add w24,w24,w28 // future e+=K ++ eor w5,w5,w13 ++ eor w25,w25,w23 ++ add w20,w20,w27 // e+=rot(a,5) ++ ror w22,w22,#2 ++ eor w5,w5,w19 ++ add w24,w24,w4 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w5,w5,#31 ++ eor w6,w6,w8 ++ eor w25,w23,w21 ++ ror w27,w20,#27 ++ add w23,w23,w28 // future e+=K ++ eor w6,w6,w14 ++ eor w25,w25,w22 ++ add w24,w24,w27 // e+=rot(a,5) ++ ror w21,w21,#2 ++ eor w6,w6,w3 ++ add w23,w23,w5 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w6,w6,#31 ++ eor w7,w7,w9 ++ eor w25,w22,w20 ++ ror w27,w24,#27 ++ add w22,w22,w28 // future e+=K ++ eor w7,w7,w15 ++ eor w25,w25,w21 ++ add w23,w23,w27 // e+=rot(a,5) ++ ror w20,w20,#2 ++ eor w7,w7,w4 ++ add w22,w22,w6 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w7,w7,#31 ++ eor w8,w8,w10 ++ eor w25,w21,w24 ++ ror w27,w23,#27 ++ add w21,w21,w28 // future e+=K ++ eor w8,w8,w16 ++ eor w25,w25,w20 ++ add w22,w22,w27 // e+=rot(a,5) ++ ror w24,w24,#2 ++ eor w8,w8,w5 ++ add w21,w21,w7 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w8,w8,#31 ++ eor w9,w9,w11 ++ eor w25,w20,w23 ++ ror w27,w22,#27 ++ add w20,w20,w28 // future e+=K ++ eor w9,w9,w17 ++ eor w25,w25,w24 ++ add w21,w21,w27 // e+=rot(a,5) ++ ror w23,w23,#2 ++ eor w9,w9,w6 ++ add w20,w20,w8 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w9,w9,#31 ++ eor w10,w10,w12 ++ eor w25,w24,w22 ++ ror w27,w21,#27 ++ add w24,w24,w28 // future e+=K ++ eor w10,w10,w19 ++ eor w25,w25,w23 ++ add w20,w20,w27 // e+=rot(a,5) ++ ror w22,w22,#2 ++ eor w10,w10,w7 ++ add w24,w24,w9 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w10,w10,#31 ++ eor w11,w11,w13 ++ eor w25,w23,w21 ++ ror w27,w20,#27 ++ add w23,w23,w28 // future e+=K ++ eor w11,w11,w3 ++ eor w25,w25,w22 ++ add w24,w24,w27 // e+=rot(a,5) ++ ror w21,w21,#2 ++ eor w11,w11,w8 ++ add w23,w23,w10 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w11,w11,#31 ++ eor w12,w12,w14 ++ eor w25,w22,w20 ++ ror w27,w24,#27 ++ add w22,w22,w28 // future e+=K ++ eor w12,w12,w4 ++ eor w25,w25,w21 ++ add w23,w23,w27 // e+=rot(a,5) ++ ror w20,w20,#2 ++ eor w12,w12,w9 ++ add w22,w22,w11 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w12,w12,#31 ++ eor w13,w13,w15 ++ eor w25,w21,w24 ++ ror w27,w23,#27 ++ add w21,w21,w28 // future e+=K ++ eor w13,w13,w5 ++ eor w25,w25,w20 ++ add w22,w22,w27 // e+=rot(a,5) ++ ror w24,w24,#2 ++ eor w13,w13,w10 ++ add w21,w21,w12 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w13,w13,#31 ++ eor w14,w14,w16 ++ eor w25,w20,w23 ++ ror w27,w22,#27 ++ add w20,w20,w28 // future e+=K ++ eor w14,w14,w6 ++ eor w25,w25,w24 ++ add w21,w21,w27 // e+=rot(a,5) ++ ror w23,w23,#2 ++ eor w14,w14,w11 ++ add w20,w20,w13 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ror w14,w14,#31 ++ eor w15,w15,w17 ++ eor w25,w24,w22 ++ ror w27,w21,#27 ++ add w24,w24,w28 // future e+=K ++ eor w15,w15,w7 ++ eor w25,w25,w23 ++ add w20,w20,w27 // e+=rot(a,5) ++ ror w22,w22,#2 ++ eor w15,w15,w12 ++ add w24,w24,w14 // future e+=X[i] ++ add w20,w20,w25 // e+=F(b,c,d) ++ ror w15,w15,#31 ++ eor w16,w16,w19 ++ eor w25,w23,w21 ++ ror w27,w20,#27 ++ add w23,w23,w28 // future e+=K ++ eor w16,w16,w8 ++ eor w25,w25,w22 ++ add w24,w24,w27 // e+=rot(a,5) ++ ror w21,w21,#2 ++ eor w16,w16,w13 ++ add w23,w23,w15 // future e+=X[i] ++ add w24,w24,w25 // e+=F(b,c,d) ++ ror w16,w16,#31 ++ eor w17,w17,w3 ++ eor w25,w22,w20 ++ ror w27,w24,#27 ++ add w22,w22,w28 // future e+=K ++ eor w17,w17,w9 ++ eor w25,w25,w21 ++ add w23,w23,w27 // e+=rot(a,5) ++ ror w20,w20,#2 ++ eor w17,w17,w14 ++ add w22,w22,w16 // future e+=X[i] ++ add w23,w23,w25 // e+=F(b,c,d) ++ ror w17,w17,#31 ++ eor w19,w19,w4 ++ eor w25,w21,w24 ++ ror w27,w23,#27 ++ add w21,w21,w28 // future e+=K ++ eor w19,w19,w10 ++ eor w25,w25,w20 ++ add w22,w22,w27 // e+=rot(a,5) ++ ror w24,w24,#2 ++ eor w19,w19,w15 ++ add w21,w21,w17 // future e+=X[i] ++ add w22,w22,w25 // e+=F(b,c,d) ++ ror w19,w19,#31 ++ ldp w4,w5,[x0] ++ eor w25,w20,w23 ++ ror w27,w22,#27 ++ add w20,w20,w28 // future e+=K ++ eor w25,w25,w24 ++ add w21,w21,w27 // e+=rot(a,5) ++ ror w23,w23,#2 ++ add w20,w20,w19 // future e+=X[i] ++ add w21,w21,w25 // e+=F(b,c,d) ++ ldp w6,w7,[x0,#8] ++ eor w25,w24,w22 ++ ror w27,w21,#27 ++ eor w25,w25,w23 ++ add w20,w20,w27 // e+=rot(a,5) ++ ror w22,w22,#2 ++ ldr w8,[x0,#16] ++ add w20,w20,w25 // e+=F(b,c,d) ++ add w21,w21,w5 ++ add w22,w22,w6 ++ add w20,w20,w4 ++ add w23,w23,w7 ++ add w24,w24,w8 ++ stp w20,w21,[x0] ++ stp w22,w23,[x0,#8] ++ str w24,[x0,#16] ++ cbnz x2,Loop ++ ++ ldp x19,x20,[sp,#16] ++ ldp x21,x22,[sp,#32] ++ ldp x23,x24,[sp,#48] ++ ldp x25,x26,[sp,#64] ++ ldp x27,x28,[sp,#80] ++ ldr x29,[sp],#96 ++ ret ++ ++ ++.align 6 ++sha1_block_armv8: ++ // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. ++ AARCH64_VALID_CALL_TARGET ++Lv8_entry: ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ ++ adrp x4,Lconst@PAGE ++ add x4,x4,Lconst@PAGEOFF ++ eor v1.16b,v1.16b,v1.16b ++ ld1 {v0.4s},[x0],#16 ++ ld1 {v1.s}[0],[x0] ++ sub x0,x0,#16 ++ ld1 {v16.4s,v17.4s,v18.4s,v19.4s},[x4] ++ ++Loop_hw: ++ ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 ++ sub x2,x2,#1 ++ rev32 v4.16b,v4.16b ++ rev32 v5.16b,v5.16b ++ ++ add v20.4s,v16.4s,v4.4s ++ rev32 v6.16b,v6.16b ++ orr v22.16b,v0.16b,v0.16b // offload ++ ++ add v21.4s,v16.4s,v5.4s ++ rev32 v7.16b,v7.16b ++.long 0x5e280803 //sha1h v3.16b,v0.16b ++.long 0x5e140020 //sha1c v0.16b,v1.16b,v20.4s // 0 ++ add v20.4s,v16.4s,v6.4s ++.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b ++.long 0x5e280802 //sha1h v2.16b,v0.16b // 1 ++.long 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s ++ add v21.4s,v16.4s,v7.4s ++.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b ++.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b ++.long 0x5e280803 //sha1h v3.16b,v0.16b // 2 ++.long 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s ++ add v20.4s,v16.4s,v4.4s ++.long 0x5e281885 //sha1su1 v5.16b,v4.16b ++.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b ++.long 0x5e280802 //sha1h v2.16b,v0.16b // 3 ++.long 0x5e150060 //sha1c v0.16b,v3.16b,v21.4s ++ add v21.4s,v17.4s,v5.4s ++.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b ++.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b ++.long 0x5e280803 //sha1h v3.16b,v0.16b // 4 ++.long 0x5e140040 //sha1c v0.16b,v2.16b,v20.4s ++ add v20.4s,v17.4s,v6.4s ++.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b ++.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b ++.long 0x5e280802 //sha1h v2.16b,v0.16b // 5 ++.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s ++ add v21.4s,v17.4s,v7.4s ++.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b ++.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b ++.long 0x5e280803 //sha1h v3.16b,v0.16b // 6 ++.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s ++ add v20.4s,v17.4s,v4.4s ++.long 0x5e281885 //sha1su1 v5.16b,v4.16b ++.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b ++.long 0x5e280802 //sha1h v2.16b,v0.16b // 7 ++.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s ++ add v21.4s,v17.4s,v5.4s ++.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b ++.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b ++.long 0x5e280803 //sha1h v3.16b,v0.16b // 8 ++.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s ++ add v20.4s,v18.4s,v6.4s ++.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b ++.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b ++.long 0x5e280802 //sha1h v2.16b,v0.16b // 9 ++.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s ++ add v21.4s,v18.4s,v7.4s ++.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b ++.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b ++.long 0x5e280803 //sha1h v3.16b,v0.16b // 10 ++.long 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s ++ add v20.4s,v18.4s,v4.4s ++.long 0x5e281885 //sha1su1 v5.16b,v4.16b ++.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b ++.long 0x5e280802 //sha1h v2.16b,v0.16b // 11 ++.long 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s ++ add v21.4s,v18.4s,v5.4s ++.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b ++.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b ++.long 0x5e280803 //sha1h v3.16b,v0.16b // 12 ++.long 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s ++ add v20.4s,v18.4s,v6.4s ++.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b ++.long 0x5e0630a4 //sha1su0 v4.16b,v5.16b,v6.16b ++.long 0x5e280802 //sha1h v2.16b,v0.16b // 13 ++.long 0x5e152060 //sha1m v0.16b,v3.16b,v21.4s ++ add v21.4s,v19.4s,v7.4s ++.long 0x5e2818e4 //sha1su1 v4.16b,v7.16b ++.long 0x5e0730c5 //sha1su0 v5.16b,v6.16b,v7.16b ++.long 0x5e280803 //sha1h v3.16b,v0.16b // 14 ++.long 0x5e142040 //sha1m v0.16b,v2.16b,v20.4s ++ add v20.4s,v19.4s,v4.4s ++.long 0x5e281885 //sha1su1 v5.16b,v4.16b ++.long 0x5e0430e6 //sha1su0 v6.16b,v7.16b,v4.16b ++.long 0x5e280802 //sha1h v2.16b,v0.16b // 15 ++.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s ++ add v21.4s,v19.4s,v5.4s ++.long 0x5e2818a6 //sha1su1 v6.16b,v5.16b ++.long 0x5e053087 //sha1su0 v7.16b,v4.16b,v5.16b ++.long 0x5e280803 //sha1h v3.16b,v0.16b // 16 ++.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s ++ add v20.4s,v19.4s,v6.4s ++.long 0x5e2818c7 //sha1su1 v7.16b,v6.16b ++.long 0x5e280802 //sha1h v2.16b,v0.16b // 17 ++.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s ++ add v21.4s,v19.4s,v7.4s ++ ++.long 0x5e280803 //sha1h v3.16b,v0.16b // 18 ++.long 0x5e141040 //sha1p v0.16b,v2.16b,v20.4s ++ ++.long 0x5e280802 //sha1h v2.16b,v0.16b // 19 ++.long 0x5e151060 //sha1p v0.16b,v3.16b,v21.4s ++ ++ add v1.4s,v1.4s,v2.4s ++ add v0.4s,v0.4s,v22.4s ++ ++ cbnz x2,Loop_hw ++ ++ st1 {v0.4s},[x0],#16 ++ st1 {v1.s}[0],[x0] ++ ++ ldr x29,[sp],#16 ++ ret ++ ++.section __TEXT,__const ++.align 6 ++Lconst: ++.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 //K_00_19 ++.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 //K_20_39 ++.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc //K_40_59 ++.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 //K_60_79 ++.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 2 ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-aarch64/crypto/fipsmodule/sha256-armv8.S b/apple-aarch64/crypto/fipsmodule/sha256-armv8.S +new file mode 100644 +index 0000000..b40b260 +--- /dev/null ++++ b/apple-aarch64/crypto/fipsmodule/sha256-armv8.S +@@ -0,0 +1,1212 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. ++// ++// Licensed under the OpenSSL license (the "License"). You may not use ++// this file except in compliance with the License. You can obtain a copy ++// in the file LICENSE in the source distribution or at ++// https://www.openssl.org/source/license.html ++ ++// ==================================================================== ++// Written by Andy Polyakov for the OpenSSL ++// project. The module is, however, dual licensed under OpenSSL and ++// CRYPTOGAMS licenses depending on where you obtain it. For further ++// details see http://www.openssl.org/~appro/cryptogams/. ++// ++// Permission to use under GPLv2 terms is granted. ++// ==================================================================== ++// ++// SHA256/512 for ARMv8. ++// ++// Performance in cycles per processed byte and improvement coefficient ++// over code generated with "default" compiler: ++// ++// SHA256-hw SHA256(*) SHA512 ++// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) ++// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) ++// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) ++// Denver 2.01 10.5 (+26%) 6.70 (+8%) ++// X-Gene 20.0 (+100%) 12.8 (+300%(***)) ++// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) ++// Kryo 1.92 17.4 (+30%) 11.2 (+8%) ++// ++// (*) Software SHA256 results are of lesser relevance, presented ++// mostly for informational purposes. ++// (**) The result is a trade-off: it's possible to improve it by ++// 10% (or by 1 cycle per round), but at the cost of 20% loss ++// on Cortex-A53 (or by 4 cycles per round). ++// (***) Super-impressive coefficients over gcc-generated code are ++// indication of some compiler "pathology", most notably code ++// generated with -mgeneral-regs-only is significantly faster ++// and the gap is only 40-90%. ++ ++#ifndef __KERNEL__ ++# include ++#endif ++ ++.text ++ ++ ++.private_extern _OPENSSL_armcap_P ++.globl _sha256_block_data_order ++.private_extern _sha256_block_data_order ++ ++.align 6 ++_sha256_block_data_order: ++ AARCH64_VALID_CALL_TARGET ++#ifndef __KERNEL__ ++#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 ++ adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P ++#else ++ adrp x16,_OPENSSL_armcap_P@PAGE ++#endif ++ ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF] ++ tst w16,#ARMV8_SHA256 ++ b.ne Lv8_entry ++#endif ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-128]! ++ add x29,sp,#0 ++ ++ stp x19,x20,[sp,#16] ++ stp x21,x22,[sp,#32] ++ stp x23,x24,[sp,#48] ++ stp x25,x26,[sp,#64] ++ stp x27,x28,[sp,#80] ++ sub sp,sp,#4*4 ++ ++ ldp w20,w21,[x0] // load context ++ ldp w22,w23,[x0,#2*4] ++ ldp w24,w25,[x0,#4*4] ++ add x2,x1,x2,lsl#6 // end of input ++ ldp w26,w27,[x0,#6*4] ++ adrp x30,LK256@PAGE ++ add x30,x30,LK256@PAGEOFF ++ stp x0,x2,[x29,#96] ++ ++Loop: ++ ldp w3,w4,[x1],#2*4 ++ ldr w19,[x30],#4 // *K++ ++ eor w28,w21,w22 // magic seed ++ str x1,[x29,#112] ++#ifndef __AARCH64EB__ ++ rev w3,w3 // 0 ++#endif ++ ror w16,w24,#6 ++ add w27,w27,w19 // h+=K[i] ++ eor w6,w24,w24,ror#14 ++ and w17,w25,w24 ++ bic w19,w26,w24 ++ add w27,w27,w3 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w20,w21 // a^b, b^c in next round ++ eor w16,w16,w6,ror#11 // Sigma1(e) ++ ror w6,w20,#2 ++ add w27,w27,w17 // h+=Ch(e,f,g) ++ eor w17,w20,w20,ror#9 ++ add w27,w27,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w23,w23,w27 // d+=h ++ eor w28,w28,w21 // Maj(a,b,c) ++ eor w17,w6,w17,ror#13 // Sigma0(a) ++ add w27,w27,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w27,w27,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w4,w4 // 1 ++#endif ++ ldp w5,w6,[x1],#2*4 ++ add w27,w27,w17 // h+=Sigma0(a) ++ ror w16,w23,#6 ++ add w26,w26,w28 // h+=K[i] ++ eor w7,w23,w23,ror#14 ++ and w17,w24,w23 ++ bic w28,w25,w23 ++ add w26,w26,w4 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w27,w20 // a^b, b^c in next round ++ eor w16,w16,w7,ror#11 // Sigma1(e) ++ ror w7,w27,#2 ++ add w26,w26,w17 // h+=Ch(e,f,g) ++ eor w17,w27,w27,ror#9 ++ add w26,w26,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w22,w22,w26 // d+=h ++ eor w19,w19,w20 // Maj(a,b,c) ++ eor w17,w7,w17,ror#13 // Sigma0(a) ++ add w26,w26,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w26,w26,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w5,w5 // 2 ++#endif ++ add w26,w26,w17 // h+=Sigma0(a) ++ ror w16,w22,#6 ++ add w25,w25,w19 // h+=K[i] ++ eor w8,w22,w22,ror#14 ++ and w17,w23,w22 ++ bic w19,w24,w22 ++ add w25,w25,w5 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w26,w27 // a^b, b^c in next round ++ eor w16,w16,w8,ror#11 // Sigma1(e) ++ ror w8,w26,#2 ++ add w25,w25,w17 // h+=Ch(e,f,g) ++ eor w17,w26,w26,ror#9 ++ add w25,w25,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w21,w21,w25 // d+=h ++ eor w28,w28,w27 // Maj(a,b,c) ++ eor w17,w8,w17,ror#13 // Sigma0(a) ++ add w25,w25,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w25,w25,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w6,w6 // 3 ++#endif ++ ldp w7,w8,[x1],#2*4 ++ add w25,w25,w17 // h+=Sigma0(a) ++ ror w16,w21,#6 ++ add w24,w24,w28 // h+=K[i] ++ eor w9,w21,w21,ror#14 ++ and w17,w22,w21 ++ bic w28,w23,w21 ++ add w24,w24,w6 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w25,w26 // a^b, b^c in next round ++ eor w16,w16,w9,ror#11 // Sigma1(e) ++ ror w9,w25,#2 ++ add w24,w24,w17 // h+=Ch(e,f,g) ++ eor w17,w25,w25,ror#9 ++ add w24,w24,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w20,w20,w24 // d+=h ++ eor w19,w19,w26 // Maj(a,b,c) ++ eor w17,w9,w17,ror#13 // Sigma0(a) ++ add w24,w24,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w24,w24,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w7,w7 // 4 ++#endif ++ add w24,w24,w17 // h+=Sigma0(a) ++ ror w16,w20,#6 ++ add w23,w23,w19 // h+=K[i] ++ eor w10,w20,w20,ror#14 ++ and w17,w21,w20 ++ bic w19,w22,w20 ++ add w23,w23,w7 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w24,w25 // a^b, b^c in next round ++ eor w16,w16,w10,ror#11 // Sigma1(e) ++ ror w10,w24,#2 ++ add w23,w23,w17 // h+=Ch(e,f,g) ++ eor w17,w24,w24,ror#9 ++ add w23,w23,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w27,w27,w23 // d+=h ++ eor w28,w28,w25 // Maj(a,b,c) ++ eor w17,w10,w17,ror#13 // Sigma0(a) ++ add w23,w23,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w23,w23,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w8,w8 // 5 ++#endif ++ ldp w9,w10,[x1],#2*4 ++ add w23,w23,w17 // h+=Sigma0(a) ++ ror w16,w27,#6 ++ add w22,w22,w28 // h+=K[i] ++ eor w11,w27,w27,ror#14 ++ and w17,w20,w27 ++ bic w28,w21,w27 ++ add w22,w22,w8 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w23,w24 // a^b, b^c in next round ++ eor w16,w16,w11,ror#11 // Sigma1(e) ++ ror w11,w23,#2 ++ add w22,w22,w17 // h+=Ch(e,f,g) ++ eor w17,w23,w23,ror#9 ++ add w22,w22,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w26,w26,w22 // d+=h ++ eor w19,w19,w24 // Maj(a,b,c) ++ eor w17,w11,w17,ror#13 // Sigma0(a) ++ add w22,w22,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w22,w22,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w9,w9 // 6 ++#endif ++ add w22,w22,w17 // h+=Sigma0(a) ++ ror w16,w26,#6 ++ add w21,w21,w19 // h+=K[i] ++ eor w12,w26,w26,ror#14 ++ and w17,w27,w26 ++ bic w19,w20,w26 ++ add w21,w21,w9 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w22,w23 // a^b, b^c in next round ++ eor w16,w16,w12,ror#11 // Sigma1(e) ++ ror w12,w22,#2 ++ add w21,w21,w17 // h+=Ch(e,f,g) ++ eor w17,w22,w22,ror#9 ++ add w21,w21,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w25,w25,w21 // d+=h ++ eor w28,w28,w23 // Maj(a,b,c) ++ eor w17,w12,w17,ror#13 // Sigma0(a) ++ add w21,w21,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w21,w21,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w10,w10 // 7 ++#endif ++ ldp w11,w12,[x1],#2*4 ++ add w21,w21,w17 // h+=Sigma0(a) ++ ror w16,w25,#6 ++ add w20,w20,w28 // h+=K[i] ++ eor w13,w25,w25,ror#14 ++ and w17,w26,w25 ++ bic w28,w27,w25 ++ add w20,w20,w10 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w21,w22 // a^b, b^c in next round ++ eor w16,w16,w13,ror#11 // Sigma1(e) ++ ror w13,w21,#2 ++ add w20,w20,w17 // h+=Ch(e,f,g) ++ eor w17,w21,w21,ror#9 ++ add w20,w20,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w24,w24,w20 // d+=h ++ eor w19,w19,w22 // Maj(a,b,c) ++ eor w17,w13,w17,ror#13 // Sigma0(a) ++ add w20,w20,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w20,w20,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w11,w11 // 8 ++#endif ++ add w20,w20,w17 // h+=Sigma0(a) ++ ror w16,w24,#6 ++ add w27,w27,w19 // h+=K[i] ++ eor w14,w24,w24,ror#14 ++ and w17,w25,w24 ++ bic w19,w26,w24 ++ add w27,w27,w11 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w20,w21 // a^b, b^c in next round ++ eor w16,w16,w14,ror#11 // Sigma1(e) ++ ror w14,w20,#2 ++ add w27,w27,w17 // h+=Ch(e,f,g) ++ eor w17,w20,w20,ror#9 ++ add w27,w27,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w23,w23,w27 // d+=h ++ eor w28,w28,w21 // Maj(a,b,c) ++ eor w17,w14,w17,ror#13 // Sigma0(a) ++ add w27,w27,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w27,w27,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w12,w12 // 9 ++#endif ++ ldp w13,w14,[x1],#2*4 ++ add w27,w27,w17 // h+=Sigma0(a) ++ ror w16,w23,#6 ++ add w26,w26,w28 // h+=K[i] ++ eor w15,w23,w23,ror#14 ++ and w17,w24,w23 ++ bic w28,w25,w23 ++ add w26,w26,w12 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w27,w20 // a^b, b^c in next round ++ eor w16,w16,w15,ror#11 // Sigma1(e) ++ ror w15,w27,#2 ++ add w26,w26,w17 // h+=Ch(e,f,g) ++ eor w17,w27,w27,ror#9 ++ add w26,w26,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w22,w22,w26 // d+=h ++ eor w19,w19,w20 // Maj(a,b,c) ++ eor w17,w15,w17,ror#13 // Sigma0(a) ++ add w26,w26,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w26,w26,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w13,w13 // 10 ++#endif ++ add w26,w26,w17 // h+=Sigma0(a) ++ ror w16,w22,#6 ++ add w25,w25,w19 // h+=K[i] ++ eor w0,w22,w22,ror#14 ++ and w17,w23,w22 ++ bic w19,w24,w22 ++ add w25,w25,w13 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w26,w27 // a^b, b^c in next round ++ eor w16,w16,w0,ror#11 // Sigma1(e) ++ ror w0,w26,#2 ++ add w25,w25,w17 // h+=Ch(e,f,g) ++ eor w17,w26,w26,ror#9 ++ add w25,w25,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w21,w21,w25 // d+=h ++ eor w28,w28,w27 // Maj(a,b,c) ++ eor w17,w0,w17,ror#13 // Sigma0(a) ++ add w25,w25,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w25,w25,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w14,w14 // 11 ++#endif ++ ldp w15,w0,[x1],#2*4 ++ add w25,w25,w17 // h+=Sigma0(a) ++ str w6,[sp,#12] ++ ror w16,w21,#6 ++ add w24,w24,w28 // h+=K[i] ++ eor w6,w21,w21,ror#14 ++ and w17,w22,w21 ++ bic w28,w23,w21 ++ add w24,w24,w14 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w25,w26 // a^b, b^c in next round ++ eor w16,w16,w6,ror#11 // Sigma1(e) ++ ror w6,w25,#2 ++ add w24,w24,w17 // h+=Ch(e,f,g) ++ eor w17,w25,w25,ror#9 ++ add w24,w24,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w20,w20,w24 // d+=h ++ eor w19,w19,w26 // Maj(a,b,c) ++ eor w17,w6,w17,ror#13 // Sigma0(a) ++ add w24,w24,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w24,w24,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w15,w15 // 12 ++#endif ++ add w24,w24,w17 // h+=Sigma0(a) ++ str w7,[sp,#0] ++ ror w16,w20,#6 ++ add w23,w23,w19 // h+=K[i] ++ eor w7,w20,w20,ror#14 ++ and w17,w21,w20 ++ bic w19,w22,w20 ++ add w23,w23,w15 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w24,w25 // a^b, b^c in next round ++ eor w16,w16,w7,ror#11 // Sigma1(e) ++ ror w7,w24,#2 ++ add w23,w23,w17 // h+=Ch(e,f,g) ++ eor w17,w24,w24,ror#9 ++ add w23,w23,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w27,w27,w23 // d+=h ++ eor w28,w28,w25 // Maj(a,b,c) ++ eor w17,w7,w17,ror#13 // Sigma0(a) ++ add w23,w23,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w23,w23,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w0,w0 // 13 ++#endif ++ ldp w1,w2,[x1] ++ add w23,w23,w17 // h+=Sigma0(a) ++ str w8,[sp,#4] ++ ror w16,w27,#6 ++ add w22,w22,w28 // h+=K[i] ++ eor w8,w27,w27,ror#14 ++ and w17,w20,w27 ++ bic w28,w21,w27 ++ add w22,w22,w0 // h+=X[i] ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w23,w24 // a^b, b^c in next round ++ eor w16,w16,w8,ror#11 // Sigma1(e) ++ ror w8,w23,#2 ++ add w22,w22,w17 // h+=Ch(e,f,g) ++ eor w17,w23,w23,ror#9 ++ add w22,w22,w16 // h+=Sigma1(e) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ add w26,w26,w22 // d+=h ++ eor w19,w19,w24 // Maj(a,b,c) ++ eor w17,w8,w17,ror#13 // Sigma0(a) ++ add w22,w22,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ //add w22,w22,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w1,w1 // 14 ++#endif ++ ldr w6,[sp,#12] ++ add w22,w22,w17 // h+=Sigma0(a) ++ str w9,[sp,#8] ++ ror w16,w26,#6 ++ add w21,w21,w19 // h+=K[i] ++ eor w9,w26,w26,ror#14 ++ and w17,w27,w26 ++ bic w19,w20,w26 ++ add w21,w21,w1 // h+=X[i] ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w22,w23 // a^b, b^c in next round ++ eor w16,w16,w9,ror#11 // Sigma1(e) ++ ror w9,w22,#2 ++ add w21,w21,w17 // h+=Ch(e,f,g) ++ eor w17,w22,w22,ror#9 ++ add w21,w21,w16 // h+=Sigma1(e) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ add w25,w25,w21 // d+=h ++ eor w28,w28,w23 // Maj(a,b,c) ++ eor w17,w9,w17,ror#13 // Sigma0(a) ++ add w21,w21,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ //add w21,w21,w17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev w2,w2 // 15 ++#endif ++ ldr w7,[sp,#0] ++ add w21,w21,w17 // h+=Sigma0(a) ++ str w10,[sp,#12] ++ ror w16,w25,#6 ++ add w20,w20,w28 // h+=K[i] ++ ror w9,w4,#7 ++ and w17,w26,w25 ++ ror w8,w1,#17 ++ bic w28,w27,w25 ++ ror w10,w21,#2 ++ add w20,w20,w2 // h+=X[i] ++ eor w16,w16,w25,ror#11 ++ eor w9,w9,w4,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w21,w22 // a^b, b^c in next round ++ eor w16,w16,w25,ror#25 // Sigma1(e) ++ eor w10,w10,w21,ror#13 ++ add w20,w20,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w8,w8,w1,ror#19 ++ eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) ++ add w20,w20,w16 // h+=Sigma1(e) ++ eor w19,w19,w22 // Maj(a,b,c) ++ eor w17,w10,w21,ror#22 // Sigma0(a) ++ eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) ++ add w3,w3,w12 ++ add w24,w24,w20 // d+=h ++ add w20,w20,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w3,w3,w9 ++ add w20,w20,w17 // h+=Sigma0(a) ++ add w3,w3,w8 ++Loop_16_xx: ++ ldr w8,[sp,#4] ++ str w11,[sp,#0] ++ ror w16,w24,#6 ++ add w27,w27,w19 // h+=K[i] ++ ror w10,w5,#7 ++ and w17,w25,w24 ++ ror w9,w2,#17 ++ bic w19,w26,w24 ++ ror w11,w20,#2 ++ add w27,w27,w3 // h+=X[i] ++ eor w16,w16,w24,ror#11 ++ eor w10,w10,w5,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w20,w21 // a^b, b^c in next round ++ eor w16,w16,w24,ror#25 // Sigma1(e) ++ eor w11,w11,w20,ror#13 ++ add w27,w27,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w9,w9,w2,ror#19 ++ eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) ++ add w27,w27,w16 // h+=Sigma1(e) ++ eor w28,w28,w21 // Maj(a,b,c) ++ eor w17,w11,w20,ror#22 // Sigma0(a) ++ eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) ++ add w4,w4,w13 ++ add w23,w23,w27 // d+=h ++ add w27,w27,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w4,w4,w10 ++ add w27,w27,w17 // h+=Sigma0(a) ++ add w4,w4,w9 ++ ldr w9,[sp,#8] ++ str w12,[sp,#4] ++ ror w16,w23,#6 ++ add w26,w26,w28 // h+=K[i] ++ ror w11,w6,#7 ++ and w17,w24,w23 ++ ror w10,w3,#17 ++ bic w28,w25,w23 ++ ror w12,w27,#2 ++ add w26,w26,w4 // h+=X[i] ++ eor w16,w16,w23,ror#11 ++ eor w11,w11,w6,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w27,w20 // a^b, b^c in next round ++ eor w16,w16,w23,ror#25 // Sigma1(e) ++ eor w12,w12,w27,ror#13 ++ add w26,w26,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w10,w10,w3,ror#19 ++ eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) ++ add w26,w26,w16 // h+=Sigma1(e) ++ eor w19,w19,w20 // Maj(a,b,c) ++ eor w17,w12,w27,ror#22 // Sigma0(a) ++ eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) ++ add w5,w5,w14 ++ add w22,w22,w26 // d+=h ++ add w26,w26,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w5,w5,w11 ++ add w26,w26,w17 // h+=Sigma0(a) ++ add w5,w5,w10 ++ ldr w10,[sp,#12] ++ str w13,[sp,#8] ++ ror w16,w22,#6 ++ add w25,w25,w19 // h+=K[i] ++ ror w12,w7,#7 ++ and w17,w23,w22 ++ ror w11,w4,#17 ++ bic w19,w24,w22 ++ ror w13,w26,#2 ++ add w25,w25,w5 // h+=X[i] ++ eor w16,w16,w22,ror#11 ++ eor w12,w12,w7,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w26,w27 // a^b, b^c in next round ++ eor w16,w16,w22,ror#25 // Sigma1(e) ++ eor w13,w13,w26,ror#13 ++ add w25,w25,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w11,w11,w4,ror#19 ++ eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) ++ add w25,w25,w16 // h+=Sigma1(e) ++ eor w28,w28,w27 // Maj(a,b,c) ++ eor w17,w13,w26,ror#22 // Sigma0(a) ++ eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) ++ add w6,w6,w15 ++ add w21,w21,w25 // d+=h ++ add w25,w25,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w6,w6,w12 ++ add w25,w25,w17 // h+=Sigma0(a) ++ add w6,w6,w11 ++ ldr w11,[sp,#0] ++ str w14,[sp,#12] ++ ror w16,w21,#6 ++ add w24,w24,w28 // h+=K[i] ++ ror w13,w8,#7 ++ and w17,w22,w21 ++ ror w12,w5,#17 ++ bic w28,w23,w21 ++ ror w14,w25,#2 ++ add w24,w24,w6 // h+=X[i] ++ eor w16,w16,w21,ror#11 ++ eor w13,w13,w8,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w25,w26 // a^b, b^c in next round ++ eor w16,w16,w21,ror#25 // Sigma1(e) ++ eor w14,w14,w25,ror#13 ++ add w24,w24,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w12,w12,w5,ror#19 ++ eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) ++ add w24,w24,w16 // h+=Sigma1(e) ++ eor w19,w19,w26 // Maj(a,b,c) ++ eor w17,w14,w25,ror#22 // Sigma0(a) ++ eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) ++ add w7,w7,w0 ++ add w20,w20,w24 // d+=h ++ add w24,w24,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w7,w7,w13 ++ add w24,w24,w17 // h+=Sigma0(a) ++ add w7,w7,w12 ++ ldr w12,[sp,#4] ++ str w15,[sp,#0] ++ ror w16,w20,#6 ++ add w23,w23,w19 // h+=K[i] ++ ror w14,w9,#7 ++ and w17,w21,w20 ++ ror w13,w6,#17 ++ bic w19,w22,w20 ++ ror w15,w24,#2 ++ add w23,w23,w7 // h+=X[i] ++ eor w16,w16,w20,ror#11 ++ eor w14,w14,w9,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w24,w25 // a^b, b^c in next round ++ eor w16,w16,w20,ror#25 // Sigma1(e) ++ eor w15,w15,w24,ror#13 ++ add w23,w23,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w13,w13,w6,ror#19 ++ eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) ++ add w23,w23,w16 // h+=Sigma1(e) ++ eor w28,w28,w25 // Maj(a,b,c) ++ eor w17,w15,w24,ror#22 // Sigma0(a) ++ eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) ++ add w8,w8,w1 ++ add w27,w27,w23 // d+=h ++ add w23,w23,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w8,w8,w14 ++ add w23,w23,w17 // h+=Sigma0(a) ++ add w8,w8,w13 ++ ldr w13,[sp,#8] ++ str w0,[sp,#4] ++ ror w16,w27,#6 ++ add w22,w22,w28 // h+=K[i] ++ ror w15,w10,#7 ++ and w17,w20,w27 ++ ror w14,w7,#17 ++ bic w28,w21,w27 ++ ror w0,w23,#2 ++ add w22,w22,w8 // h+=X[i] ++ eor w16,w16,w27,ror#11 ++ eor w15,w15,w10,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w23,w24 // a^b, b^c in next round ++ eor w16,w16,w27,ror#25 // Sigma1(e) ++ eor w0,w0,w23,ror#13 ++ add w22,w22,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w14,w14,w7,ror#19 ++ eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) ++ add w22,w22,w16 // h+=Sigma1(e) ++ eor w19,w19,w24 // Maj(a,b,c) ++ eor w17,w0,w23,ror#22 // Sigma0(a) ++ eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) ++ add w9,w9,w2 ++ add w26,w26,w22 // d+=h ++ add w22,w22,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w9,w9,w15 ++ add w22,w22,w17 // h+=Sigma0(a) ++ add w9,w9,w14 ++ ldr w14,[sp,#12] ++ str w1,[sp,#8] ++ ror w16,w26,#6 ++ add w21,w21,w19 // h+=K[i] ++ ror w0,w11,#7 ++ and w17,w27,w26 ++ ror w15,w8,#17 ++ bic w19,w20,w26 ++ ror w1,w22,#2 ++ add w21,w21,w9 // h+=X[i] ++ eor w16,w16,w26,ror#11 ++ eor w0,w0,w11,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w22,w23 // a^b, b^c in next round ++ eor w16,w16,w26,ror#25 // Sigma1(e) ++ eor w1,w1,w22,ror#13 ++ add w21,w21,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w15,w15,w8,ror#19 ++ eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) ++ add w21,w21,w16 // h+=Sigma1(e) ++ eor w28,w28,w23 // Maj(a,b,c) ++ eor w17,w1,w22,ror#22 // Sigma0(a) ++ eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) ++ add w10,w10,w3 ++ add w25,w25,w21 // d+=h ++ add w21,w21,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w10,w10,w0 ++ add w21,w21,w17 // h+=Sigma0(a) ++ add w10,w10,w15 ++ ldr w15,[sp,#0] ++ str w2,[sp,#12] ++ ror w16,w25,#6 ++ add w20,w20,w28 // h+=K[i] ++ ror w1,w12,#7 ++ and w17,w26,w25 ++ ror w0,w9,#17 ++ bic w28,w27,w25 ++ ror w2,w21,#2 ++ add w20,w20,w10 // h+=X[i] ++ eor w16,w16,w25,ror#11 ++ eor w1,w1,w12,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w21,w22 // a^b, b^c in next round ++ eor w16,w16,w25,ror#25 // Sigma1(e) ++ eor w2,w2,w21,ror#13 ++ add w20,w20,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w0,w0,w9,ror#19 ++ eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) ++ add w20,w20,w16 // h+=Sigma1(e) ++ eor w19,w19,w22 // Maj(a,b,c) ++ eor w17,w2,w21,ror#22 // Sigma0(a) ++ eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) ++ add w11,w11,w4 ++ add w24,w24,w20 // d+=h ++ add w20,w20,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w11,w11,w1 ++ add w20,w20,w17 // h+=Sigma0(a) ++ add w11,w11,w0 ++ ldr w0,[sp,#4] ++ str w3,[sp,#0] ++ ror w16,w24,#6 ++ add w27,w27,w19 // h+=K[i] ++ ror w2,w13,#7 ++ and w17,w25,w24 ++ ror w1,w10,#17 ++ bic w19,w26,w24 ++ ror w3,w20,#2 ++ add w27,w27,w11 // h+=X[i] ++ eor w16,w16,w24,ror#11 ++ eor w2,w2,w13,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w20,w21 // a^b, b^c in next round ++ eor w16,w16,w24,ror#25 // Sigma1(e) ++ eor w3,w3,w20,ror#13 ++ add w27,w27,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w1,w1,w10,ror#19 ++ eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) ++ add w27,w27,w16 // h+=Sigma1(e) ++ eor w28,w28,w21 // Maj(a,b,c) ++ eor w17,w3,w20,ror#22 // Sigma0(a) ++ eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) ++ add w12,w12,w5 ++ add w23,w23,w27 // d+=h ++ add w27,w27,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w12,w12,w2 ++ add w27,w27,w17 // h+=Sigma0(a) ++ add w12,w12,w1 ++ ldr w1,[sp,#8] ++ str w4,[sp,#4] ++ ror w16,w23,#6 ++ add w26,w26,w28 // h+=K[i] ++ ror w3,w14,#7 ++ and w17,w24,w23 ++ ror w2,w11,#17 ++ bic w28,w25,w23 ++ ror w4,w27,#2 ++ add w26,w26,w12 // h+=X[i] ++ eor w16,w16,w23,ror#11 ++ eor w3,w3,w14,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w27,w20 // a^b, b^c in next round ++ eor w16,w16,w23,ror#25 // Sigma1(e) ++ eor w4,w4,w27,ror#13 ++ add w26,w26,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w2,w2,w11,ror#19 ++ eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) ++ add w26,w26,w16 // h+=Sigma1(e) ++ eor w19,w19,w20 // Maj(a,b,c) ++ eor w17,w4,w27,ror#22 // Sigma0(a) ++ eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) ++ add w13,w13,w6 ++ add w22,w22,w26 // d+=h ++ add w26,w26,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w13,w13,w3 ++ add w26,w26,w17 // h+=Sigma0(a) ++ add w13,w13,w2 ++ ldr w2,[sp,#12] ++ str w5,[sp,#8] ++ ror w16,w22,#6 ++ add w25,w25,w19 // h+=K[i] ++ ror w4,w15,#7 ++ and w17,w23,w22 ++ ror w3,w12,#17 ++ bic w19,w24,w22 ++ ror w5,w26,#2 ++ add w25,w25,w13 // h+=X[i] ++ eor w16,w16,w22,ror#11 ++ eor w4,w4,w15,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w26,w27 // a^b, b^c in next round ++ eor w16,w16,w22,ror#25 // Sigma1(e) ++ eor w5,w5,w26,ror#13 ++ add w25,w25,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w3,w3,w12,ror#19 ++ eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) ++ add w25,w25,w16 // h+=Sigma1(e) ++ eor w28,w28,w27 // Maj(a,b,c) ++ eor w17,w5,w26,ror#22 // Sigma0(a) ++ eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) ++ add w14,w14,w7 ++ add w21,w21,w25 // d+=h ++ add w25,w25,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w14,w14,w4 ++ add w25,w25,w17 // h+=Sigma0(a) ++ add w14,w14,w3 ++ ldr w3,[sp,#0] ++ str w6,[sp,#12] ++ ror w16,w21,#6 ++ add w24,w24,w28 // h+=K[i] ++ ror w5,w0,#7 ++ and w17,w22,w21 ++ ror w4,w13,#17 ++ bic w28,w23,w21 ++ ror w6,w25,#2 ++ add w24,w24,w14 // h+=X[i] ++ eor w16,w16,w21,ror#11 ++ eor w5,w5,w0,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w25,w26 // a^b, b^c in next round ++ eor w16,w16,w21,ror#25 // Sigma1(e) ++ eor w6,w6,w25,ror#13 ++ add w24,w24,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w4,w4,w13,ror#19 ++ eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) ++ add w24,w24,w16 // h+=Sigma1(e) ++ eor w19,w19,w26 // Maj(a,b,c) ++ eor w17,w6,w25,ror#22 // Sigma0(a) ++ eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) ++ add w15,w15,w8 ++ add w20,w20,w24 // d+=h ++ add w24,w24,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w15,w15,w5 ++ add w24,w24,w17 // h+=Sigma0(a) ++ add w15,w15,w4 ++ ldr w4,[sp,#4] ++ str w7,[sp,#0] ++ ror w16,w20,#6 ++ add w23,w23,w19 // h+=K[i] ++ ror w6,w1,#7 ++ and w17,w21,w20 ++ ror w5,w14,#17 ++ bic w19,w22,w20 ++ ror w7,w24,#2 ++ add w23,w23,w15 // h+=X[i] ++ eor w16,w16,w20,ror#11 ++ eor w6,w6,w1,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w24,w25 // a^b, b^c in next round ++ eor w16,w16,w20,ror#25 // Sigma1(e) ++ eor w7,w7,w24,ror#13 ++ add w23,w23,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w5,w5,w14,ror#19 ++ eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) ++ add w23,w23,w16 // h+=Sigma1(e) ++ eor w28,w28,w25 // Maj(a,b,c) ++ eor w17,w7,w24,ror#22 // Sigma0(a) ++ eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) ++ add w0,w0,w9 ++ add w27,w27,w23 // d+=h ++ add w23,w23,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w0,w0,w6 ++ add w23,w23,w17 // h+=Sigma0(a) ++ add w0,w0,w5 ++ ldr w5,[sp,#8] ++ str w8,[sp,#4] ++ ror w16,w27,#6 ++ add w22,w22,w28 // h+=K[i] ++ ror w7,w2,#7 ++ and w17,w20,w27 ++ ror w6,w15,#17 ++ bic w28,w21,w27 ++ ror w8,w23,#2 ++ add w22,w22,w0 // h+=X[i] ++ eor w16,w16,w27,ror#11 ++ eor w7,w7,w2,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w23,w24 // a^b, b^c in next round ++ eor w16,w16,w27,ror#25 // Sigma1(e) ++ eor w8,w8,w23,ror#13 ++ add w22,w22,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w6,w6,w15,ror#19 ++ eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) ++ add w22,w22,w16 // h+=Sigma1(e) ++ eor w19,w19,w24 // Maj(a,b,c) ++ eor w17,w8,w23,ror#22 // Sigma0(a) ++ eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) ++ add w1,w1,w10 ++ add w26,w26,w22 // d+=h ++ add w22,w22,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w1,w1,w7 ++ add w22,w22,w17 // h+=Sigma0(a) ++ add w1,w1,w6 ++ ldr w6,[sp,#12] ++ str w9,[sp,#8] ++ ror w16,w26,#6 ++ add w21,w21,w19 // h+=K[i] ++ ror w8,w3,#7 ++ and w17,w27,w26 ++ ror w7,w0,#17 ++ bic w19,w20,w26 ++ ror w9,w22,#2 ++ add w21,w21,w1 // h+=X[i] ++ eor w16,w16,w26,ror#11 ++ eor w8,w8,w3,ror#18 ++ orr w17,w17,w19 // Ch(e,f,g) ++ eor w19,w22,w23 // a^b, b^c in next round ++ eor w16,w16,w26,ror#25 // Sigma1(e) ++ eor w9,w9,w22,ror#13 ++ add w21,w21,w17 // h+=Ch(e,f,g) ++ and w28,w28,w19 // (b^c)&=(a^b) ++ eor w7,w7,w0,ror#19 ++ eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) ++ add w21,w21,w16 // h+=Sigma1(e) ++ eor w28,w28,w23 // Maj(a,b,c) ++ eor w17,w9,w22,ror#22 // Sigma0(a) ++ eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) ++ add w2,w2,w11 ++ add w25,w25,w21 // d+=h ++ add w21,w21,w28 // h+=Maj(a,b,c) ++ ldr w28,[x30],#4 // *K++, w19 in next round ++ add w2,w2,w8 ++ add w21,w21,w17 // h+=Sigma0(a) ++ add w2,w2,w7 ++ ldr w7,[sp,#0] ++ str w10,[sp,#12] ++ ror w16,w25,#6 ++ add w20,w20,w28 // h+=K[i] ++ ror w9,w4,#7 ++ and w17,w26,w25 ++ ror w8,w1,#17 ++ bic w28,w27,w25 ++ ror w10,w21,#2 ++ add w20,w20,w2 // h+=X[i] ++ eor w16,w16,w25,ror#11 ++ eor w9,w9,w4,ror#18 ++ orr w17,w17,w28 // Ch(e,f,g) ++ eor w28,w21,w22 // a^b, b^c in next round ++ eor w16,w16,w25,ror#25 // Sigma1(e) ++ eor w10,w10,w21,ror#13 ++ add w20,w20,w17 // h+=Ch(e,f,g) ++ and w19,w19,w28 // (b^c)&=(a^b) ++ eor w8,w8,w1,ror#19 ++ eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) ++ add w20,w20,w16 // h+=Sigma1(e) ++ eor w19,w19,w22 // Maj(a,b,c) ++ eor w17,w10,w21,ror#22 // Sigma0(a) ++ eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) ++ add w3,w3,w12 ++ add w24,w24,w20 // d+=h ++ add w20,w20,w19 // h+=Maj(a,b,c) ++ ldr w19,[x30],#4 // *K++, w28 in next round ++ add w3,w3,w9 ++ add w20,w20,w17 // h+=Sigma0(a) ++ add w3,w3,w8 ++ cbnz w19,Loop_16_xx ++ ++ ldp x0,x2,[x29,#96] ++ ldr x1,[x29,#112] ++ sub x30,x30,#260 // rewind ++ ++ ldp w3,w4,[x0] ++ ldp w5,w6,[x0,#2*4] ++ add x1,x1,#14*4 // advance input pointer ++ ldp w7,w8,[x0,#4*4] ++ add w20,w20,w3 ++ ldp w9,w10,[x0,#6*4] ++ add w21,w21,w4 ++ add w22,w22,w5 ++ add w23,w23,w6 ++ stp w20,w21,[x0] ++ add w24,w24,w7 ++ add w25,w25,w8 ++ stp w22,w23,[x0,#2*4] ++ add w26,w26,w9 ++ add w27,w27,w10 ++ cmp x1,x2 ++ stp w24,w25,[x0,#4*4] ++ stp w26,w27,[x0,#6*4] ++ b.ne Loop ++ ++ ldp x19,x20,[x29,#16] ++ add sp,sp,#4*4 ++ ldp x21,x22,[x29,#32] ++ ldp x23,x24,[x29,#48] ++ ldp x25,x26,[x29,#64] ++ ldp x27,x28,[x29,#80] ++ ldp x29,x30,[sp],#128 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++.section __TEXT,__const ++.align 6 ++ ++LK256: ++.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 ++.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 ++.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 ++.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 ++.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc ++.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da ++.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 ++.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 ++.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 ++.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 ++.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 ++.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 ++.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 ++.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 ++.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 ++.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 ++.long 0 //terminator ++ ++.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 2 ++.text ++#ifndef __KERNEL__ ++ ++.align 6 ++sha256_block_armv8: ++Lv8_entry: ++ // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later. ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ ++ ld1 {v0.4s,v1.4s},[x0] ++ adrp x3,LK256@PAGE ++ add x3,x3,LK256@PAGEOFF ++ ++Loop_hw: ++ ld1 {v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64 ++ sub x2,x2,#1 ++ ld1 {v16.4s},[x3],#16 ++ rev32 v4.16b,v4.16b ++ rev32 v5.16b,v5.16b ++ rev32 v6.16b,v6.16b ++ rev32 v7.16b,v7.16b ++ orr v18.16b,v0.16b,v0.16b // offload ++ orr v19.16b,v1.16b,v1.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v4.4s ++.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v5.4s ++.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v6.4s ++.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v7.4s ++.long 0x5e282887 //sha256su0 v7.16b,v4.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v4.4s ++.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v5.4s ++.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v6.4s ++.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v7.4s ++.long 0x5e282887 //sha256su0 v7.16b,v4.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v4.4s ++.long 0x5e2828a4 //sha256su0 v4.16b,v5.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++.long 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v5.4s ++.long 0x5e2828c5 //sha256su0 v5.16b,v6.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++.long 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v6.4s ++.long 0x5e2828e6 //sha256su0 v6.16b,v7.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++.long 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v7.4s ++.long 0x5e282887 //sha256su0 v7.16b,v4.16b ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++.long 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b ++ ld1 {v17.4s},[x3],#16 ++ add v16.4s,v16.4s,v4.4s ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++ ++ ld1 {v16.4s},[x3],#16 ++ add v17.4s,v17.4s,v5.4s ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++ ++ ld1 {v17.4s},[x3] ++ add v16.4s,v16.4s,v6.4s ++ sub x3,x3,#64*4-16 // rewind ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s ++.long 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s ++ ++ add v17.4s,v17.4s,v7.4s ++ orr v2.16b,v0.16b,v0.16b ++.long 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s ++.long 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s ++ ++ add v0.4s,v0.4s,v18.4s ++ add v1.4s,v1.4s,v19.4s ++ ++ cbnz x2,Loop_hw ++ ++ st1 {v0.4s,v1.4s},[x0] ++ ++ ldr x29,[sp],#16 ++ ret ++ ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-aarch64/crypto/fipsmodule/sha512-armv8.S b/apple-aarch64/crypto/fipsmodule/sha512-armv8.S +new file mode 100644 +index 0000000..b2d366d +--- /dev/null ++++ b/apple-aarch64/crypto/fipsmodule/sha512-armv8.S +@@ -0,0 +1,1614 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++// Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved. ++// ++// Licensed under the OpenSSL license (the "License"). You may not use ++// this file except in compliance with the License. You can obtain a copy ++// in the file LICENSE in the source distribution or at ++// https://www.openssl.org/source/license.html ++ ++// ==================================================================== ++// Written by Andy Polyakov for the OpenSSL ++// project. The module is, however, dual licensed under OpenSSL and ++// CRYPTOGAMS licenses depending on where you obtain it. For further ++// details see http://www.openssl.org/~appro/cryptogams/. ++// ++// Permission to use under GPLv2 terms is granted. ++// ==================================================================== ++// ++// SHA256/512 for ARMv8. ++// ++// Performance in cycles per processed byte and improvement coefficient ++// over code generated with "default" compiler: ++// ++// SHA256-hw SHA256(*) SHA512 ++// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**)) ++// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***)) ++// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***)) ++// Denver 2.01 10.5 (+26%) 6.70 (+8%) ++// X-Gene 20.0 (+100%) 12.8 (+300%(***)) ++// Mongoose 2.36 13.0 (+50%) 8.36 (+33%) ++// Kryo 1.92 17.4 (+30%) 11.2 (+8%) ++// ++// (*) Software SHA256 results are of lesser relevance, presented ++// mostly for informational purposes. ++// (**) The result is a trade-off: it's possible to improve it by ++// 10% (or by 1 cycle per round), but at the cost of 20% loss ++// on Cortex-A53 (or by 4 cycles per round). ++// (***) Super-impressive coefficients over gcc-generated code are ++// indication of some compiler "pathology", most notably code ++// generated with -mgeneral-regs-only is significantly faster ++// and the gap is only 40-90%. ++ ++#ifndef __KERNEL__ ++# include ++#endif ++ ++.text ++ ++ ++.private_extern _OPENSSL_armcap_P ++.globl _sha512_block_data_order ++.private_extern _sha512_block_data_order ++ ++.align 6 ++_sha512_block_data_order: ++ AARCH64_VALID_CALL_TARGET ++#ifndef __KERNEL__ ++#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10 ++ adrp x16,:pg_hi21_nc:_OPENSSL_armcap_P ++#else ++ adrp x16,_OPENSSL_armcap_P@PAGE ++#endif ++ ldr w16,[x16,_OPENSSL_armcap_P@PAGEOFF] ++ tst w16,#ARMV8_SHA512 ++ b.ne Lv8_entry ++#endif ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-128]! ++ add x29,sp,#0 ++ ++ stp x19,x20,[sp,#16] ++ stp x21,x22,[sp,#32] ++ stp x23,x24,[sp,#48] ++ stp x25,x26,[sp,#64] ++ stp x27,x28,[sp,#80] ++ sub sp,sp,#4*8 ++ ++ ldp x20,x21,[x0] // load context ++ ldp x22,x23,[x0,#2*8] ++ ldp x24,x25,[x0,#4*8] ++ add x2,x1,x2,lsl#7 // end of input ++ ldp x26,x27,[x0,#6*8] ++ adrp x30,LK512@PAGE ++ add x30,x30,LK512@PAGEOFF ++ stp x0,x2,[x29,#96] ++ ++Loop: ++ ldp x3,x4,[x1],#2*8 ++ ldr x19,[x30],#8 // *K++ ++ eor x28,x21,x22 // magic seed ++ str x1,[x29,#112] ++#ifndef __AARCH64EB__ ++ rev x3,x3 // 0 ++#endif ++ ror x16,x24,#14 ++ add x27,x27,x19 // h+=K[i] ++ eor x6,x24,x24,ror#23 ++ and x17,x25,x24 ++ bic x19,x26,x24 ++ add x27,x27,x3 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x20,x21 // a^b, b^c in next round ++ eor x16,x16,x6,ror#18 // Sigma1(e) ++ ror x6,x20,#28 ++ add x27,x27,x17 // h+=Ch(e,f,g) ++ eor x17,x20,x20,ror#5 ++ add x27,x27,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x23,x23,x27 // d+=h ++ eor x28,x28,x21 // Maj(a,b,c) ++ eor x17,x6,x17,ror#34 // Sigma0(a) ++ add x27,x27,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x27,x27,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x4,x4 // 1 ++#endif ++ ldp x5,x6,[x1],#2*8 ++ add x27,x27,x17 // h+=Sigma0(a) ++ ror x16,x23,#14 ++ add x26,x26,x28 // h+=K[i] ++ eor x7,x23,x23,ror#23 ++ and x17,x24,x23 ++ bic x28,x25,x23 ++ add x26,x26,x4 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x27,x20 // a^b, b^c in next round ++ eor x16,x16,x7,ror#18 // Sigma1(e) ++ ror x7,x27,#28 ++ add x26,x26,x17 // h+=Ch(e,f,g) ++ eor x17,x27,x27,ror#5 ++ add x26,x26,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x22,x22,x26 // d+=h ++ eor x19,x19,x20 // Maj(a,b,c) ++ eor x17,x7,x17,ror#34 // Sigma0(a) ++ add x26,x26,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x26,x26,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x5,x5 // 2 ++#endif ++ add x26,x26,x17 // h+=Sigma0(a) ++ ror x16,x22,#14 ++ add x25,x25,x19 // h+=K[i] ++ eor x8,x22,x22,ror#23 ++ and x17,x23,x22 ++ bic x19,x24,x22 ++ add x25,x25,x5 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x26,x27 // a^b, b^c in next round ++ eor x16,x16,x8,ror#18 // Sigma1(e) ++ ror x8,x26,#28 ++ add x25,x25,x17 // h+=Ch(e,f,g) ++ eor x17,x26,x26,ror#5 ++ add x25,x25,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x21,x21,x25 // d+=h ++ eor x28,x28,x27 // Maj(a,b,c) ++ eor x17,x8,x17,ror#34 // Sigma0(a) ++ add x25,x25,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x25,x25,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x6,x6 // 3 ++#endif ++ ldp x7,x8,[x1],#2*8 ++ add x25,x25,x17 // h+=Sigma0(a) ++ ror x16,x21,#14 ++ add x24,x24,x28 // h+=K[i] ++ eor x9,x21,x21,ror#23 ++ and x17,x22,x21 ++ bic x28,x23,x21 ++ add x24,x24,x6 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x25,x26 // a^b, b^c in next round ++ eor x16,x16,x9,ror#18 // Sigma1(e) ++ ror x9,x25,#28 ++ add x24,x24,x17 // h+=Ch(e,f,g) ++ eor x17,x25,x25,ror#5 ++ add x24,x24,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x20,x20,x24 // d+=h ++ eor x19,x19,x26 // Maj(a,b,c) ++ eor x17,x9,x17,ror#34 // Sigma0(a) ++ add x24,x24,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x24,x24,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x7,x7 // 4 ++#endif ++ add x24,x24,x17 // h+=Sigma0(a) ++ ror x16,x20,#14 ++ add x23,x23,x19 // h+=K[i] ++ eor x10,x20,x20,ror#23 ++ and x17,x21,x20 ++ bic x19,x22,x20 ++ add x23,x23,x7 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x24,x25 // a^b, b^c in next round ++ eor x16,x16,x10,ror#18 // Sigma1(e) ++ ror x10,x24,#28 ++ add x23,x23,x17 // h+=Ch(e,f,g) ++ eor x17,x24,x24,ror#5 ++ add x23,x23,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x27,x27,x23 // d+=h ++ eor x28,x28,x25 // Maj(a,b,c) ++ eor x17,x10,x17,ror#34 // Sigma0(a) ++ add x23,x23,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x23,x23,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x8,x8 // 5 ++#endif ++ ldp x9,x10,[x1],#2*8 ++ add x23,x23,x17 // h+=Sigma0(a) ++ ror x16,x27,#14 ++ add x22,x22,x28 // h+=K[i] ++ eor x11,x27,x27,ror#23 ++ and x17,x20,x27 ++ bic x28,x21,x27 ++ add x22,x22,x8 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x23,x24 // a^b, b^c in next round ++ eor x16,x16,x11,ror#18 // Sigma1(e) ++ ror x11,x23,#28 ++ add x22,x22,x17 // h+=Ch(e,f,g) ++ eor x17,x23,x23,ror#5 ++ add x22,x22,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x26,x26,x22 // d+=h ++ eor x19,x19,x24 // Maj(a,b,c) ++ eor x17,x11,x17,ror#34 // Sigma0(a) ++ add x22,x22,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x22,x22,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x9,x9 // 6 ++#endif ++ add x22,x22,x17 // h+=Sigma0(a) ++ ror x16,x26,#14 ++ add x21,x21,x19 // h+=K[i] ++ eor x12,x26,x26,ror#23 ++ and x17,x27,x26 ++ bic x19,x20,x26 ++ add x21,x21,x9 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x22,x23 // a^b, b^c in next round ++ eor x16,x16,x12,ror#18 // Sigma1(e) ++ ror x12,x22,#28 ++ add x21,x21,x17 // h+=Ch(e,f,g) ++ eor x17,x22,x22,ror#5 ++ add x21,x21,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x25,x25,x21 // d+=h ++ eor x28,x28,x23 // Maj(a,b,c) ++ eor x17,x12,x17,ror#34 // Sigma0(a) ++ add x21,x21,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x21,x21,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x10,x10 // 7 ++#endif ++ ldp x11,x12,[x1],#2*8 ++ add x21,x21,x17 // h+=Sigma0(a) ++ ror x16,x25,#14 ++ add x20,x20,x28 // h+=K[i] ++ eor x13,x25,x25,ror#23 ++ and x17,x26,x25 ++ bic x28,x27,x25 ++ add x20,x20,x10 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x21,x22 // a^b, b^c in next round ++ eor x16,x16,x13,ror#18 // Sigma1(e) ++ ror x13,x21,#28 ++ add x20,x20,x17 // h+=Ch(e,f,g) ++ eor x17,x21,x21,ror#5 ++ add x20,x20,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x24,x24,x20 // d+=h ++ eor x19,x19,x22 // Maj(a,b,c) ++ eor x17,x13,x17,ror#34 // Sigma0(a) ++ add x20,x20,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x20,x20,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x11,x11 // 8 ++#endif ++ add x20,x20,x17 // h+=Sigma0(a) ++ ror x16,x24,#14 ++ add x27,x27,x19 // h+=K[i] ++ eor x14,x24,x24,ror#23 ++ and x17,x25,x24 ++ bic x19,x26,x24 ++ add x27,x27,x11 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x20,x21 // a^b, b^c in next round ++ eor x16,x16,x14,ror#18 // Sigma1(e) ++ ror x14,x20,#28 ++ add x27,x27,x17 // h+=Ch(e,f,g) ++ eor x17,x20,x20,ror#5 ++ add x27,x27,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x23,x23,x27 // d+=h ++ eor x28,x28,x21 // Maj(a,b,c) ++ eor x17,x14,x17,ror#34 // Sigma0(a) ++ add x27,x27,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x27,x27,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x12,x12 // 9 ++#endif ++ ldp x13,x14,[x1],#2*8 ++ add x27,x27,x17 // h+=Sigma0(a) ++ ror x16,x23,#14 ++ add x26,x26,x28 // h+=K[i] ++ eor x15,x23,x23,ror#23 ++ and x17,x24,x23 ++ bic x28,x25,x23 ++ add x26,x26,x12 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x27,x20 // a^b, b^c in next round ++ eor x16,x16,x15,ror#18 // Sigma1(e) ++ ror x15,x27,#28 ++ add x26,x26,x17 // h+=Ch(e,f,g) ++ eor x17,x27,x27,ror#5 ++ add x26,x26,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x22,x22,x26 // d+=h ++ eor x19,x19,x20 // Maj(a,b,c) ++ eor x17,x15,x17,ror#34 // Sigma0(a) ++ add x26,x26,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x26,x26,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x13,x13 // 10 ++#endif ++ add x26,x26,x17 // h+=Sigma0(a) ++ ror x16,x22,#14 ++ add x25,x25,x19 // h+=K[i] ++ eor x0,x22,x22,ror#23 ++ and x17,x23,x22 ++ bic x19,x24,x22 ++ add x25,x25,x13 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x26,x27 // a^b, b^c in next round ++ eor x16,x16,x0,ror#18 // Sigma1(e) ++ ror x0,x26,#28 ++ add x25,x25,x17 // h+=Ch(e,f,g) ++ eor x17,x26,x26,ror#5 ++ add x25,x25,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x21,x21,x25 // d+=h ++ eor x28,x28,x27 // Maj(a,b,c) ++ eor x17,x0,x17,ror#34 // Sigma0(a) ++ add x25,x25,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x25,x25,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x14,x14 // 11 ++#endif ++ ldp x15,x0,[x1],#2*8 ++ add x25,x25,x17 // h+=Sigma0(a) ++ str x6,[sp,#24] ++ ror x16,x21,#14 ++ add x24,x24,x28 // h+=K[i] ++ eor x6,x21,x21,ror#23 ++ and x17,x22,x21 ++ bic x28,x23,x21 ++ add x24,x24,x14 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x25,x26 // a^b, b^c in next round ++ eor x16,x16,x6,ror#18 // Sigma1(e) ++ ror x6,x25,#28 ++ add x24,x24,x17 // h+=Ch(e,f,g) ++ eor x17,x25,x25,ror#5 ++ add x24,x24,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x20,x20,x24 // d+=h ++ eor x19,x19,x26 // Maj(a,b,c) ++ eor x17,x6,x17,ror#34 // Sigma0(a) ++ add x24,x24,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x24,x24,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x15,x15 // 12 ++#endif ++ add x24,x24,x17 // h+=Sigma0(a) ++ str x7,[sp,#0] ++ ror x16,x20,#14 ++ add x23,x23,x19 // h+=K[i] ++ eor x7,x20,x20,ror#23 ++ and x17,x21,x20 ++ bic x19,x22,x20 ++ add x23,x23,x15 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x24,x25 // a^b, b^c in next round ++ eor x16,x16,x7,ror#18 // Sigma1(e) ++ ror x7,x24,#28 ++ add x23,x23,x17 // h+=Ch(e,f,g) ++ eor x17,x24,x24,ror#5 ++ add x23,x23,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x27,x27,x23 // d+=h ++ eor x28,x28,x25 // Maj(a,b,c) ++ eor x17,x7,x17,ror#34 // Sigma0(a) ++ add x23,x23,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x23,x23,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x0,x0 // 13 ++#endif ++ ldp x1,x2,[x1] ++ add x23,x23,x17 // h+=Sigma0(a) ++ str x8,[sp,#8] ++ ror x16,x27,#14 ++ add x22,x22,x28 // h+=K[i] ++ eor x8,x27,x27,ror#23 ++ and x17,x20,x27 ++ bic x28,x21,x27 ++ add x22,x22,x0 // h+=X[i] ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x23,x24 // a^b, b^c in next round ++ eor x16,x16,x8,ror#18 // Sigma1(e) ++ ror x8,x23,#28 ++ add x22,x22,x17 // h+=Ch(e,f,g) ++ eor x17,x23,x23,ror#5 ++ add x22,x22,x16 // h+=Sigma1(e) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ add x26,x26,x22 // d+=h ++ eor x19,x19,x24 // Maj(a,b,c) ++ eor x17,x8,x17,ror#34 // Sigma0(a) ++ add x22,x22,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ //add x22,x22,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x1,x1 // 14 ++#endif ++ ldr x6,[sp,#24] ++ add x22,x22,x17 // h+=Sigma0(a) ++ str x9,[sp,#16] ++ ror x16,x26,#14 ++ add x21,x21,x19 // h+=K[i] ++ eor x9,x26,x26,ror#23 ++ and x17,x27,x26 ++ bic x19,x20,x26 ++ add x21,x21,x1 // h+=X[i] ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x22,x23 // a^b, b^c in next round ++ eor x16,x16,x9,ror#18 // Sigma1(e) ++ ror x9,x22,#28 ++ add x21,x21,x17 // h+=Ch(e,f,g) ++ eor x17,x22,x22,ror#5 ++ add x21,x21,x16 // h+=Sigma1(e) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ add x25,x25,x21 // d+=h ++ eor x28,x28,x23 // Maj(a,b,c) ++ eor x17,x9,x17,ror#34 // Sigma0(a) ++ add x21,x21,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ //add x21,x21,x17 // h+=Sigma0(a) ++#ifndef __AARCH64EB__ ++ rev x2,x2 // 15 ++#endif ++ ldr x7,[sp,#0] ++ add x21,x21,x17 // h+=Sigma0(a) ++ str x10,[sp,#24] ++ ror x16,x25,#14 ++ add x20,x20,x28 // h+=K[i] ++ ror x9,x4,#1 ++ and x17,x26,x25 ++ ror x8,x1,#19 ++ bic x28,x27,x25 ++ ror x10,x21,#28 ++ add x20,x20,x2 // h+=X[i] ++ eor x16,x16,x25,ror#18 ++ eor x9,x9,x4,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x21,x22 // a^b, b^c in next round ++ eor x16,x16,x25,ror#41 // Sigma1(e) ++ eor x10,x10,x21,ror#34 ++ add x20,x20,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x8,x8,x1,ror#61 ++ eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) ++ add x20,x20,x16 // h+=Sigma1(e) ++ eor x19,x19,x22 // Maj(a,b,c) ++ eor x17,x10,x21,ror#39 // Sigma0(a) ++ eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) ++ add x3,x3,x12 ++ add x24,x24,x20 // d+=h ++ add x20,x20,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x3,x3,x9 ++ add x20,x20,x17 // h+=Sigma0(a) ++ add x3,x3,x8 ++Loop_16_xx: ++ ldr x8,[sp,#8] ++ str x11,[sp,#0] ++ ror x16,x24,#14 ++ add x27,x27,x19 // h+=K[i] ++ ror x10,x5,#1 ++ and x17,x25,x24 ++ ror x9,x2,#19 ++ bic x19,x26,x24 ++ ror x11,x20,#28 ++ add x27,x27,x3 // h+=X[i] ++ eor x16,x16,x24,ror#18 ++ eor x10,x10,x5,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x20,x21 // a^b, b^c in next round ++ eor x16,x16,x24,ror#41 // Sigma1(e) ++ eor x11,x11,x20,ror#34 ++ add x27,x27,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x9,x9,x2,ror#61 ++ eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) ++ add x27,x27,x16 // h+=Sigma1(e) ++ eor x28,x28,x21 // Maj(a,b,c) ++ eor x17,x11,x20,ror#39 // Sigma0(a) ++ eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) ++ add x4,x4,x13 ++ add x23,x23,x27 // d+=h ++ add x27,x27,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x4,x4,x10 ++ add x27,x27,x17 // h+=Sigma0(a) ++ add x4,x4,x9 ++ ldr x9,[sp,#16] ++ str x12,[sp,#8] ++ ror x16,x23,#14 ++ add x26,x26,x28 // h+=K[i] ++ ror x11,x6,#1 ++ and x17,x24,x23 ++ ror x10,x3,#19 ++ bic x28,x25,x23 ++ ror x12,x27,#28 ++ add x26,x26,x4 // h+=X[i] ++ eor x16,x16,x23,ror#18 ++ eor x11,x11,x6,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x27,x20 // a^b, b^c in next round ++ eor x16,x16,x23,ror#41 // Sigma1(e) ++ eor x12,x12,x27,ror#34 ++ add x26,x26,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x10,x10,x3,ror#61 ++ eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) ++ add x26,x26,x16 // h+=Sigma1(e) ++ eor x19,x19,x20 // Maj(a,b,c) ++ eor x17,x12,x27,ror#39 // Sigma0(a) ++ eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) ++ add x5,x5,x14 ++ add x22,x22,x26 // d+=h ++ add x26,x26,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x5,x5,x11 ++ add x26,x26,x17 // h+=Sigma0(a) ++ add x5,x5,x10 ++ ldr x10,[sp,#24] ++ str x13,[sp,#16] ++ ror x16,x22,#14 ++ add x25,x25,x19 // h+=K[i] ++ ror x12,x7,#1 ++ and x17,x23,x22 ++ ror x11,x4,#19 ++ bic x19,x24,x22 ++ ror x13,x26,#28 ++ add x25,x25,x5 // h+=X[i] ++ eor x16,x16,x22,ror#18 ++ eor x12,x12,x7,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x26,x27 // a^b, b^c in next round ++ eor x16,x16,x22,ror#41 // Sigma1(e) ++ eor x13,x13,x26,ror#34 ++ add x25,x25,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x11,x11,x4,ror#61 ++ eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) ++ add x25,x25,x16 // h+=Sigma1(e) ++ eor x28,x28,x27 // Maj(a,b,c) ++ eor x17,x13,x26,ror#39 // Sigma0(a) ++ eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) ++ add x6,x6,x15 ++ add x21,x21,x25 // d+=h ++ add x25,x25,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x6,x6,x12 ++ add x25,x25,x17 // h+=Sigma0(a) ++ add x6,x6,x11 ++ ldr x11,[sp,#0] ++ str x14,[sp,#24] ++ ror x16,x21,#14 ++ add x24,x24,x28 // h+=K[i] ++ ror x13,x8,#1 ++ and x17,x22,x21 ++ ror x12,x5,#19 ++ bic x28,x23,x21 ++ ror x14,x25,#28 ++ add x24,x24,x6 // h+=X[i] ++ eor x16,x16,x21,ror#18 ++ eor x13,x13,x8,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x25,x26 // a^b, b^c in next round ++ eor x16,x16,x21,ror#41 // Sigma1(e) ++ eor x14,x14,x25,ror#34 ++ add x24,x24,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x12,x12,x5,ror#61 ++ eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) ++ add x24,x24,x16 // h+=Sigma1(e) ++ eor x19,x19,x26 // Maj(a,b,c) ++ eor x17,x14,x25,ror#39 // Sigma0(a) ++ eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) ++ add x7,x7,x0 ++ add x20,x20,x24 // d+=h ++ add x24,x24,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x7,x7,x13 ++ add x24,x24,x17 // h+=Sigma0(a) ++ add x7,x7,x12 ++ ldr x12,[sp,#8] ++ str x15,[sp,#0] ++ ror x16,x20,#14 ++ add x23,x23,x19 // h+=K[i] ++ ror x14,x9,#1 ++ and x17,x21,x20 ++ ror x13,x6,#19 ++ bic x19,x22,x20 ++ ror x15,x24,#28 ++ add x23,x23,x7 // h+=X[i] ++ eor x16,x16,x20,ror#18 ++ eor x14,x14,x9,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x24,x25 // a^b, b^c in next round ++ eor x16,x16,x20,ror#41 // Sigma1(e) ++ eor x15,x15,x24,ror#34 ++ add x23,x23,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x13,x13,x6,ror#61 ++ eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) ++ add x23,x23,x16 // h+=Sigma1(e) ++ eor x28,x28,x25 // Maj(a,b,c) ++ eor x17,x15,x24,ror#39 // Sigma0(a) ++ eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) ++ add x8,x8,x1 ++ add x27,x27,x23 // d+=h ++ add x23,x23,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x8,x8,x14 ++ add x23,x23,x17 // h+=Sigma0(a) ++ add x8,x8,x13 ++ ldr x13,[sp,#16] ++ str x0,[sp,#8] ++ ror x16,x27,#14 ++ add x22,x22,x28 // h+=K[i] ++ ror x15,x10,#1 ++ and x17,x20,x27 ++ ror x14,x7,#19 ++ bic x28,x21,x27 ++ ror x0,x23,#28 ++ add x22,x22,x8 // h+=X[i] ++ eor x16,x16,x27,ror#18 ++ eor x15,x15,x10,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x23,x24 // a^b, b^c in next round ++ eor x16,x16,x27,ror#41 // Sigma1(e) ++ eor x0,x0,x23,ror#34 ++ add x22,x22,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x14,x14,x7,ror#61 ++ eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) ++ add x22,x22,x16 // h+=Sigma1(e) ++ eor x19,x19,x24 // Maj(a,b,c) ++ eor x17,x0,x23,ror#39 // Sigma0(a) ++ eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) ++ add x9,x9,x2 ++ add x26,x26,x22 // d+=h ++ add x22,x22,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x9,x9,x15 ++ add x22,x22,x17 // h+=Sigma0(a) ++ add x9,x9,x14 ++ ldr x14,[sp,#24] ++ str x1,[sp,#16] ++ ror x16,x26,#14 ++ add x21,x21,x19 // h+=K[i] ++ ror x0,x11,#1 ++ and x17,x27,x26 ++ ror x15,x8,#19 ++ bic x19,x20,x26 ++ ror x1,x22,#28 ++ add x21,x21,x9 // h+=X[i] ++ eor x16,x16,x26,ror#18 ++ eor x0,x0,x11,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x22,x23 // a^b, b^c in next round ++ eor x16,x16,x26,ror#41 // Sigma1(e) ++ eor x1,x1,x22,ror#34 ++ add x21,x21,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x15,x15,x8,ror#61 ++ eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) ++ add x21,x21,x16 // h+=Sigma1(e) ++ eor x28,x28,x23 // Maj(a,b,c) ++ eor x17,x1,x22,ror#39 // Sigma0(a) ++ eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) ++ add x10,x10,x3 ++ add x25,x25,x21 // d+=h ++ add x21,x21,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x10,x10,x0 ++ add x21,x21,x17 // h+=Sigma0(a) ++ add x10,x10,x15 ++ ldr x15,[sp,#0] ++ str x2,[sp,#24] ++ ror x16,x25,#14 ++ add x20,x20,x28 // h+=K[i] ++ ror x1,x12,#1 ++ and x17,x26,x25 ++ ror x0,x9,#19 ++ bic x28,x27,x25 ++ ror x2,x21,#28 ++ add x20,x20,x10 // h+=X[i] ++ eor x16,x16,x25,ror#18 ++ eor x1,x1,x12,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x21,x22 // a^b, b^c in next round ++ eor x16,x16,x25,ror#41 // Sigma1(e) ++ eor x2,x2,x21,ror#34 ++ add x20,x20,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x0,x0,x9,ror#61 ++ eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) ++ add x20,x20,x16 // h+=Sigma1(e) ++ eor x19,x19,x22 // Maj(a,b,c) ++ eor x17,x2,x21,ror#39 // Sigma0(a) ++ eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) ++ add x11,x11,x4 ++ add x24,x24,x20 // d+=h ++ add x20,x20,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x11,x11,x1 ++ add x20,x20,x17 // h+=Sigma0(a) ++ add x11,x11,x0 ++ ldr x0,[sp,#8] ++ str x3,[sp,#0] ++ ror x16,x24,#14 ++ add x27,x27,x19 // h+=K[i] ++ ror x2,x13,#1 ++ and x17,x25,x24 ++ ror x1,x10,#19 ++ bic x19,x26,x24 ++ ror x3,x20,#28 ++ add x27,x27,x11 // h+=X[i] ++ eor x16,x16,x24,ror#18 ++ eor x2,x2,x13,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x20,x21 // a^b, b^c in next round ++ eor x16,x16,x24,ror#41 // Sigma1(e) ++ eor x3,x3,x20,ror#34 ++ add x27,x27,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x1,x1,x10,ror#61 ++ eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) ++ add x27,x27,x16 // h+=Sigma1(e) ++ eor x28,x28,x21 // Maj(a,b,c) ++ eor x17,x3,x20,ror#39 // Sigma0(a) ++ eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) ++ add x12,x12,x5 ++ add x23,x23,x27 // d+=h ++ add x27,x27,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x12,x12,x2 ++ add x27,x27,x17 // h+=Sigma0(a) ++ add x12,x12,x1 ++ ldr x1,[sp,#16] ++ str x4,[sp,#8] ++ ror x16,x23,#14 ++ add x26,x26,x28 // h+=K[i] ++ ror x3,x14,#1 ++ and x17,x24,x23 ++ ror x2,x11,#19 ++ bic x28,x25,x23 ++ ror x4,x27,#28 ++ add x26,x26,x12 // h+=X[i] ++ eor x16,x16,x23,ror#18 ++ eor x3,x3,x14,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x27,x20 // a^b, b^c in next round ++ eor x16,x16,x23,ror#41 // Sigma1(e) ++ eor x4,x4,x27,ror#34 ++ add x26,x26,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x2,x2,x11,ror#61 ++ eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) ++ add x26,x26,x16 // h+=Sigma1(e) ++ eor x19,x19,x20 // Maj(a,b,c) ++ eor x17,x4,x27,ror#39 // Sigma0(a) ++ eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) ++ add x13,x13,x6 ++ add x22,x22,x26 // d+=h ++ add x26,x26,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x13,x13,x3 ++ add x26,x26,x17 // h+=Sigma0(a) ++ add x13,x13,x2 ++ ldr x2,[sp,#24] ++ str x5,[sp,#16] ++ ror x16,x22,#14 ++ add x25,x25,x19 // h+=K[i] ++ ror x4,x15,#1 ++ and x17,x23,x22 ++ ror x3,x12,#19 ++ bic x19,x24,x22 ++ ror x5,x26,#28 ++ add x25,x25,x13 // h+=X[i] ++ eor x16,x16,x22,ror#18 ++ eor x4,x4,x15,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x26,x27 // a^b, b^c in next round ++ eor x16,x16,x22,ror#41 // Sigma1(e) ++ eor x5,x5,x26,ror#34 ++ add x25,x25,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x3,x3,x12,ror#61 ++ eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) ++ add x25,x25,x16 // h+=Sigma1(e) ++ eor x28,x28,x27 // Maj(a,b,c) ++ eor x17,x5,x26,ror#39 // Sigma0(a) ++ eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) ++ add x14,x14,x7 ++ add x21,x21,x25 // d+=h ++ add x25,x25,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x14,x14,x4 ++ add x25,x25,x17 // h+=Sigma0(a) ++ add x14,x14,x3 ++ ldr x3,[sp,#0] ++ str x6,[sp,#24] ++ ror x16,x21,#14 ++ add x24,x24,x28 // h+=K[i] ++ ror x5,x0,#1 ++ and x17,x22,x21 ++ ror x4,x13,#19 ++ bic x28,x23,x21 ++ ror x6,x25,#28 ++ add x24,x24,x14 // h+=X[i] ++ eor x16,x16,x21,ror#18 ++ eor x5,x5,x0,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x25,x26 // a^b, b^c in next round ++ eor x16,x16,x21,ror#41 // Sigma1(e) ++ eor x6,x6,x25,ror#34 ++ add x24,x24,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x4,x4,x13,ror#61 ++ eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) ++ add x24,x24,x16 // h+=Sigma1(e) ++ eor x19,x19,x26 // Maj(a,b,c) ++ eor x17,x6,x25,ror#39 // Sigma0(a) ++ eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) ++ add x15,x15,x8 ++ add x20,x20,x24 // d+=h ++ add x24,x24,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x15,x15,x5 ++ add x24,x24,x17 // h+=Sigma0(a) ++ add x15,x15,x4 ++ ldr x4,[sp,#8] ++ str x7,[sp,#0] ++ ror x16,x20,#14 ++ add x23,x23,x19 // h+=K[i] ++ ror x6,x1,#1 ++ and x17,x21,x20 ++ ror x5,x14,#19 ++ bic x19,x22,x20 ++ ror x7,x24,#28 ++ add x23,x23,x15 // h+=X[i] ++ eor x16,x16,x20,ror#18 ++ eor x6,x6,x1,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x24,x25 // a^b, b^c in next round ++ eor x16,x16,x20,ror#41 // Sigma1(e) ++ eor x7,x7,x24,ror#34 ++ add x23,x23,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x5,x5,x14,ror#61 ++ eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) ++ add x23,x23,x16 // h+=Sigma1(e) ++ eor x28,x28,x25 // Maj(a,b,c) ++ eor x17,x7,x24,ror#39 // Sigma0(a) ++ eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) ++ add x0,x0,x9 ++ add x27,x27,x23 // d+=h ++ add x23,x23,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x0,x0,x6 ++ add x23,x23,x17 // h+=Sigma0(a) ++ add x0,x0,x5 ++ ldr x5,[sp,#16] ++ str x8,[sp,#8] ++ ror x16,x27,#14 ++ add x22,x22,x28 // h+=K[i] ++ ror x7,x2,#1 ++ and x17,x20,x27 ++ ror x6,x15,#19 ++ bic x28,x21,x27 ++ ror x8,x23,#28 ++ add x22,x22,x0 // h+=X[i] ++ eor x16,x16,x27,ror#18 ++ eor x7,x7,x2,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x23,x24 // a^b, b^c in next round ++ eor x16,x16,x27,ror#41 // Sigma1(e) ++ eor x8,x8,x23,ror#34 ++ add x22,x22,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x6,x6,x15,ror#61 ++ eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) ++ add x22,x22,x16 // h+=Sigma1(e) ++ eor x19,x19,x24 // Maj(a,b,c) ++ eor x17,x8,x23,ror#39 // Sigma0(a) ++ eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) ++ add x1,x1,x10 ++ add x26,x26,x22 // d+=h ++ add x22,x22,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x1,x1,x7 ++ add x22,x22,x17 // h+=Sigma0(a) ++ add x1,x1,x6 ++ ldr x6,[sp,#24] ++ str x9,[sp,#16] ++ ror x16,x26,#14 ++ add x21,x21,x19 // h+=K[i] ++ ror x8,x3,#1 ++ and x17,x27,x26 ++ ror x7,x0,#19 ++ bic x19,x20,x26 ++ ror x9,x22,#28 ++ add x21,x21,x1 // h+=X[i] ++ eor x16,x16,x26,ror#18 ++ eor x8,x8,x3,ror#8 ++ orr x17,x17,x19 // Ch(e,f,g) ++ eor x19,x22,x23 // a^b, b^c in next round ++ eor x16,x16,x26,ror#41 // Sigma1(e) ++ eor x9,x9,x22,ror#34 ++ add x21,x21,x17 // h+=Ch(e,f,g) ++ and x28,x28,x19 // (b^c)&=(a^b) ++ eor x7,x7,x0,ror#61 ++ eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) ++ add x21,x21,x16 // h+=Sigma1(e) ++ eor x28,x28,x23 // Maj(a,b,c) ++ eor x17,x9,x22,ror#39 // Sigma0(a) ++ eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) ++ add x2,x2,x11 ++ add x25,x25,x21 // d+=h ++ add x21,x21,x28 // h+=Maj(a,b,c) ++ ldr x28,[x30],#8 // *K++, x19 in next round ++ add x2,x2,x8 ++ add x21,x21,x17 // h+=Sigma0(a) ++ add x2,x2,x7 ++ ldr x7,[sp,#0] ++ str x10,[sp,#24] ++ ror x16,x25,#14 ++ add x20,x20,x28 // h+=K[i] ++ ror x9,x4,#1 ++ and x17,x26,x25 ++ ror x8,x1,#19 ++ bic x28,x27,x25 ++ ror x10,x21,#28 ++ add x20,x20,x2 // h+=X[i] ++ eor x16,x16,x25,ror#18 ++ eor x9,x9,x4,ror#8 ++ orr x17,x17,x28 // Ch(e,f,g) ++ eor x28,x21,x22 // a^b, b^c in next round ++ eor x16,x16,x25,ror#41 // Sigma1(e) ++ eor x10,x10,x21,ror#34 ++ add x20,x20,x17 // h+=Ch(e,f,g) ++ and x19,x19,x28 // (b^c)&=(a^b) ++ eor x8,x8,x1,ror#61 ++ eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) ++ add x20,x20,x16 // h+=Sigma1(e) ++ eor x19,x19,x22 // Maj(a,b,c) ++ eor x17,x10,x21,ror#39 // Sigma0(a) ++ eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) ++ add x3,x3,x12 ++ add x24,x24,x20 // d+=h ++ add x20,x20,x19 // h+=Maj(a,b,c) ++ ldr x19,[x30],#8 // *K++, x28 in next round ++ add x3,x3,x9 ++ add x20,x20,x17 // h+=Sigma0(a) ++ add x3,x3,x8 ++ cbnz x19,Loop_16_xx ++ ++ ldp x0,x2,[x29,#96] ++ ldr x1,[x29,#112] ++ sub x30,x30,#648 // rewind ++ ++ ldp x3,x4,[x0] ++ ldp x5,x6,[x0,#2*8] ++ add x1,x1,#14*8 // advance input pointer ++ ldp x7,x8,[x0,#4*8] ++ add x20,x20,x3 ++ ldp x9,x10,[x0,#6*8] ++ add x21,x21,x4 ++ add x22,x22,x5 ++ add x23,x23,x6 ++ stp x20,x21,[x0] ++ add x24,x24,x7 ++ add x25,x25,x8 ++ stp x22,x23,[x0,#2*8] ++ add x26,x26,x9 ++ add x27,x27,x10 ++ cmp x1,x2 ++ stp x24,x25,[x0,#4*8] ++ stp x26,x27,[x0,#6*8] ++ b.ne Loop ++ ++ ldp x19,x20,[x29,#16] ++ add sp,sp,#4*8 ++ ldp x21,x22,[x29,#32] ++ ldp x23,x24,[x29,#48] ++ ldp x25,x26,[x29,#64] ++ ldp x27,x28,[x29,#80] ++ ldp x29,x30,[sp],#128 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++.section __TEXT,__const ++.align 6 ++ ++LK512: ++.quad 0x428a2f98d728ae22,0x7137449123ef65cd ++.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc ++.quad 0x3956c25bf348b538,0x59f111f1b605d019 ++.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 ++.quad 0xd807aa98a3030242,0x12835b0145706fbe ++.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 ++.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 ++.quad 0x9bdc06a725c71235,0xc19bf174cf692694 ++.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 ++.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 ++.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 ++.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 ++.quad 0x983e5152ee66dfab,0xa831c66d2db43210 ++.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 ++.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 ++.quad 0x06ca6351e003826f,0x142929670a0e6e70 ++.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 ++.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df ++.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 ++.quad 0x81c2c92e47edaee6,0x92722c851482353b ++.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 ++.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 ++.quad 0xd192e819d6ef5218,0xd69906245565a910 ++.quad 0xf40e35855771202a,0x106aa07032bbd1b8 ++.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 ++.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 ++.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb ++.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 ++.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 ++.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec ++.quad 0x90befffa23631e28,0xa4506cebde82bde9 ++.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b ++.quad 0xca273eceea26619c,0xd186b8c721c0c207 ++.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 ++.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 ++.quad 0x113f9804bef90dae,0x1b710b35131c471b ++.quad 0x28db77f523047d84,0x32caab7b40c72493 ++.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c ++.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a ++.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 ++.quad 0 // terminator ++ ++.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 2 ++.text ++#ifndef __KERNEL__ ++ ++.align 6 ++sha512_block_armv8: ++Lv8_entry: ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ ++ ld1 {v16.16b,v17.16b,v18.16b,v19.16b},[x1],#64 // load input ++ ld1 {v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64 ++ ++ ld1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // load context ++ adrp x3,LK512@PAGE ++ add x3,x3,LK512@PAGEOFF ++ ++ rev64 v16.16b,v16.16b ++ rev64 v17.16b,v17.16b ++ rev64 v18.16b,v18.16b ++ rev64 v19.16b,v19.16b ++ rev64 v20.16b,v20.16b ++ rev64 v21.16b,v21.16b ++ rev64 v22.16b,v22.16b ++ rev64 v23.16b,v23.16b ++ b Loop_hw ++ ++.align 4 ++Loop_hw: ++ ld1 {v24.2d},[x3],#16 ++ subs x2,x2,#1 ++ sub x4,x1,#128 ++ orr v26.16b,v0.16b,v0.16b // offload ++ orr v27.16b,v1.16b,v1.16b ++ orr v28.16b,v2.16b,v2.16b ++ orr v29.16b,v3.16b,v3.16b ++ csel x1,x1,x4,ne // conditional rewind ++ add v24.2d,v24.2d,v16.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v2.16b,v3.16b,#8 ++ ext v6.16b,v1.16b,v2.16b,#8 ++ add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec08230 //sha512su0 v16.16b,v17.16b ++ ext v7.16b,v20.16b,v21.16b,#8 ++.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b ++.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b ++ add v4.2d,v1.2d,v3.2d // "D + T1" ++.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b ++ add v25.2d,v25.2d,v17.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v4.16b,v2.16b,#8 ++ ext v6.16b,v0.16b,v4.16b,#8 ++ add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08251 //sha512su0 v17.16b,v18.16b ++ ext v7.16b,v21.16b,v22.16b,#8 ++.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b ++.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b ++ add v1.2d,v0.2d,v2.2d // "D + T1" ++.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b ++ add v24.2d,v24.2d,v18.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v1.16b,v4.16b,#8 ++ ext v6.16b,v3.16b,v1.16b,#8 ++ add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec08272 //sha512su0 v18.16b,v19.16b ++ ext v7.16b,v22.16b,v23.16b,#8 ++.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b ++.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b ++ add v0.2d,v3.2d,v4.2d // "D + T1" ++.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b ++ add v25.2d,v25.2d,v19.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v0.16b,v1.16b,#8 ++ ext v6.16b,v2.16b,v0.16b,#8 ++ add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08293 //sha512su0 v19.16b,v20.16b ++ ext v7.16b,v23.16b,v16.16b,#8 ++.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b ++.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b ++ add v3.2d,v2.2d,v1.2d // "D + T1" ++.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b ++ add v24.2d,v24.2d,v20.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v3.16b,v0.16b,#8 ++ ext v6.16b,v4.16b,v3.16b,#8 ++ add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec082b4 //sha512su0 v20.16b,v21.16b ++ ext v7.16b,v16.16b,v17.16b,#8 ++.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b ++.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b ++ add v2.2d,v4.2d,v0.2d // "D + T1" ++.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b ++ add v25.2d,v25.2d,v21.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v2.16b,v3.16b,#8 ++ ext v6.16b,v1.16b,v2.16b,#8 ++ add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec082d5 //sha512su0 v21.16b,v22.16b ++ ext v7.16b,v17.16b,v18.16b,#8 ++.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b ++.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b ++ add v4.2d,v1.2d,v3.2d // "D + T1" ++.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b ++ add v24.2d,v24.2d,v22.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v4.16b,v2.16b,#8 ++ ext v6.16b,v0.16b,v4.16b,#8 ++ add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec082f6 //sha512su0 v22.16b,v23.16b ++ ext v7.16b,v18.16b,v19.16b,#8 ++.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b ++.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b ++ add v1.2d,v0.2d,v2.2d // "D + T1" ++.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b ++ add v25.2d,v25.2d,v23.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v1.16b,v4.16b,#8 ++ ext v6.16b,v3.16b,v1.16b,#8 ++ add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08217 //sha512su0 v23.16b,v16.16b ++ ext v7.16b,v19.16b,v20.16b,#8 ++.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b ++.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b ++ add v0.2d,v3.2d,v4.2d // "D + T1" ++.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b ++ add v24.2d,v24.2d,v16.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v0.16b,v1.16b,#8 ++ ext v6.16b,v2.16b,v0.16b,#8 ++ add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec08230 //sha512su0 v16.16b,v17.16b ++ ext v7.16b,v20.16b,v21.16b,#8 ++.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b ++.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b ++ add v3.2d,v2.2d,v1.2d // "D + T1" ++.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b ++ add v25.2d,v25.2d,v17.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v3.16b,v0.16b,#8 ++ ext v6.16b,v4.16b,v3.16b,#8 ++ add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08251 //sha512su0 v17.16b,v18.16b ++ ext v7.16b,v21.16b,v22.16b,#8 ++.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b ++.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b ++ add v2.2d,v4.2d,v0.2d // "D + T1" ++.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b ++ add v24.2d,v24.2d,v18.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v2.16b,v3.16b,#8 ++ ext v6.16b,v1.16b,v2.16b,#8 ++ add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec08272 //sha512su0 v18.16b,v19.16b ++ ext v7.16b,v22.16b,v23.16b,#8 ++.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b ++.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b ++ add v4.2d,v1.2d,v3.2d // "D + T1" ++.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b ++ add v25.2d,v25.2d,v19.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v4.16b,v2.16b,#8 ++ ext v6.16b,v0.16b,v4.16b,#8 ++ add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08293 //sha512su0 v19.16b,v20.16b ++ ext v7.16b,v23.16b,v16.16b,#8 ++.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b ++.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b ++ add v1.2d,v0.2d,v2.2d // "D + T1" ++.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b ++ add v24.2d,v24.2d,v20.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v1.16b,v4.16b,#8 ++ ext v6.16b,v3.16b,v1.16b,#8 ++ add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec082b4 //sha512su0 v20.16b,v21.16b ++ ext v7.16b,v16.16b,v17.16b,#8 ++.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b ++.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b ++ add v0.2d,v3.2d,v4.2d // "D + T1" ++.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b ++ add v25.2d,v25.2d,v21.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v0.16b,v1.16b,#8 ++ ext v6.16b,v2.16b,v0.16b,#8 ++ add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec082d5 //sha512su0 v21.16b,v22.16b ++ ext v7.16b,v17.16b,v18.16b,#8 ++.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b ++.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b ++ add v3.2d,v2.2d,v1.2d // "D + T1" ++.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b ++ add v24.2d,v24.2d,v22.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v3.16b,v0.16b,#8 ++ ext v6.16b,v4.16b,v3.16b,#8 ++ add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec082f6 //sha512su0 v22.16b,v23.16b ++ ext v7.16b,v18.16b,v19.16b,#8 ++.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b ++.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b ++ add v2.2d,v4.2d,v0.2d // "D + T1" ++.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b ++ add v25.2d,v25.2d,v23.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v2.16b,v3.16b,#8 ++ ext v6.16b,v1.16b,v2.16b,#8 ++ add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08217 //sha512su0 v23.16b,v16.16b ++ ext v7.16b,v19.16b,v20.16b,#8 ++.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b ++.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b ++ add v4.2d,v1.2d,v3.2d // "D + T1" ++.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b ++ add v24.2d,v24.2d,v16.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v4.16b,v2.16b,#8 ++ ext v6.16b,v0.16b,v4.16b,#8 ++ add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec08230 //sha512su0 v16.16b,v17.16b ++ ext v7.16b,v20.16b,v21.16b,#8 ++.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b ++.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b ++ add v1.2d,v0.2d,v2.2d // "D + T1" ++.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b ++ add v25.2d,v25.2d,v17.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v1.16b,v4.16b,#8 ++ ext v6.16b,v3.16b,v1.16b,#8 ++ add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08251 //sha512su0 v17.16b,v18.16b ++ ext v7.16b,v21.16b,v22.16b,#8 ++.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b ++.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b ++ add v0.2d,v3.2d,v4.2d // "D + T1" ++.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b ++ add v24.2d,v24.2d,v18.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v0.16b,v1.16b,#8 ++ ext v6.16b,v2.16b,v0.16b,#8 ++ add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec08272 //sha512su0 v18.16b,v19.16b ++ ext v7.16b,v22.16b,v23.16b,#8 ++.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b ++.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b ++ add v3.2d,v2.2d,v1.2d // "D + T1" ++.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b ++ add v25.2d,v25.2d,v19.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v3.16b,v0.16b,#8 ++ ext v6.16b,v4.16b,v3.16b,#8 ++ add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08293 //sha512su0 v19.16b,v20.16b ++ ext v7.16b,v23.16b,v16.16b,#8 ++.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b ++.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b ++ add v2.2d,v4.2d,v0.2d // "D + T1" ++.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b ++ add v24.2d,v24.2d,v20.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v2.16b,v3.16b,#8 ++ ext v6.16b,v1.16b,v2.16b,#8 ++ add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec082b4 //sha512su0 v20.16b,v21.16b ++ ext v7.16b,v16.16b,v17.16b,#8 ++.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b ++.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b ++ add v4.2d,v1.2d,v3.2d // "D + T1" ++.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b ++ add v25.2d,v25.2d,v21.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v4.16b,v2.16b,#8 ++ ext v6.16b,v0.16b,v4.16b,#8 ++ add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec082d5 //sha512su0 v21.16b,v22.16b ++ ext v7.16b,v17.16b,v18.16b,#8 ++.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b ++.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b ++ add v1.2d,v0.2d,v2.2d // "D + T1" ++.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b ++ add v24.2d,v24.2d,v22.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v1.16b,v4.16b,#8 ++ ext v6.16b,v3.16b,v1.16b,#8 ++ add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec082f6 //sha512su0 v22.16b,v23.16b ++ ext v7.16b,v18.16b,v19.16b,#8 ++.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b ++.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b ++ add v0.2d,v3.2d,v4.2d // "D + T1" ++.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b ++ add v25.2d,v25.2d,v23.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v0.16b,v1.16b,#8 ++ ext v6.16b,v2.16b,v0.16b,#8 ++ add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08217 //sha512su0 v23.16b,v16.16b ++ ext v7.16b,v19.16b,v20.16b,#8 ++.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b ++.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b ++ add v3.2d,v2.2d,v1.2d // "D + T1" ++.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b ++ add v24.2d,v24.2d,v16.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v3.16b,v0.16b,#8 ++ ext v6.16b,v4.16b,v3.16b,#8 ++ add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec08230 //sha512su0 v16.16b,v17.16b ++ ext v7.16b,v20.16b,v21.16b,#8 ++.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b ++.long 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b ++ add v2.2d,v4.2d,v0.2d // "D + T1" ++.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b ++ add v25.2d,v25.2d,v17.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v2.16b,v3.16b,#8 ++ ext v6.16b,v1.16b,v2.16b,#8 ++ add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08251 //sha512su0 v17.16b,v18.16b ++ ext v7.16b,v21.16b,v22.16b,#8 ++.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b ++.long 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b ++ add v4.2d,v1.2d,v3.2d // "D + T1" ++.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b ++ add v24.2d,v24.2d,v18.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v4.16b,v2.16b,#8 ++ ext v6.16b,v0.16b,v4.16b,#8 ++ add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec08272 //sha512su0 v18.16b,v19.16b ++ ext v7.16b,v22.16b,v23.16b,#8 ++.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b ++.long 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b ++ add v1.2d,v0.2d,v2.2d // "D + T1" ++.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b ++ add v25.2d,v25.2d,v19.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v1.16b,v4.16b,#8 ++ ext v6.16b,v3.16b,v1.16b,#8 ++ add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08293 //sha512su0 v19.16b,v20.16b ++ ext v7.16b,v23.16b,v16.16b,#8 ++.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b ++.long 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b ++ add v0.2d,v3.2d,v4.2d // "D + T1" ++.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b ++ add v24.2d,v24.2d,v20.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v0.16b,v1.16b,#8 ++ ext v6.16b,v2.16b,v0.16b,#8 ++ add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec082b4 //sha512su0 v20.16b,v21.16b ++ ext v7.16b,v16.16b,v17.16b,#8 ++.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b ++.long 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b ++ add v3.2d,v2.2d,v1.2d // "D + T1" ++.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b ++ add v25.2d,v25.2d,v21.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v3.16b,v0.16b,#8 ++ ext v6.16b,v4.16b,v3.16b,#8 ++ add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec082d5 //sha512su0 v21.16b,v22.16b ++ ext v7.16b,v17.16b,v18.16b,#8 ++.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b ++.long 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b ++ add v2.2d,v4.2d,v0.2d // "D + T1" ++.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b ++ add v24.2d,v24.2d,v22.2d ++ ld1 {v25.2d},[x3],#16 ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v2.16b,v3.16b,#8 ++ ext v6.16b,v1.16b,v2.16b,#8 ++ add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xcec082f6 //sha512su0 v22.16b,v23.16b ++ ext v7.16b,v18.16b,v19.16b,#8 ++.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b ++.long 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b ++ add v4.2d,v1.2d,v3.2d // "D + T1" ++.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b ++ add v25.2d,v25.2d,v23.2d ++ ld1 {v24.2d},[x3],#16 ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v4.16b,v2.16b,#8 ++ ext v6.16b,v0.16b,v4.16b,#8 ++ add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xcec08217 //sha512su0 v23.16b,v16.16b ++ ext v7.16b,v19.16b,v20.16b,#8 ++.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b ++.long 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b ++ add v1.2d,v0.2d,v2.2d // "D + T1" ++.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b ++ ld1 {v25.2d},[x3],#16 ++ add v24.2d,v24.2d,v16.2d ++ ld1 {v16.16b},[x1],#16 // load next input ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v1.16b,v4.16b,#8 ++ ext v6.16b,v3.16b,v1.16b,#8 ++ add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b ++ rev64 v16.16b,v16.16b ++ add v0.2d,v3.2d,v4.2d // "D + T1" ++.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b ++ ld1 {v24.2d},[x3],#16 ++ add v25.2d,v25.2d,v17.2d ++ ld1 {v17.16b},[x1],#16 // load next input ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v0.16b,v1.16b,#8 ++ ext v6.16b,v2.16b,v0.16b,#8 ++ add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b ++ rev64 v17.16b,v17.16b ++ add v3.2d,v2.2d,v1.2d // "D + T1" ++.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b ++ ld1 {v25.2d},[x3],#16 ++ add v24.2d,v24.2d,v18.2d ++ ld1 {v18.16b},[x1],#16 // load next input ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v3.16b,v0.16b,#8 ++ ext v6.16b,v4.16b,v3.16b,#8 ++ add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b ++ rev64 v18.16b,v18.16b ++ add v2.2d,v4.2d,v0.2d // "D + T1" ++.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b ++ ld1 {v24.2d},[x3],#16 ++ add v25.2d,v25.2d,v19.2d ++ ld1 {v19.16b},[x1],#16 // load next input ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v2.16b,v3.16b,#8 ++ ext v6.16b,v1.16b,v2.16b,#8 ++ add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b ++ rev64 v19.16b,v19.16b ++ add v4.2d,v1.2d,v3.2d // "D + T1" ++.long 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b ++ ld1 {v25.2d},[x3],#16 ++ add v24.2d,v24.2d,v20.2d ++ ld1 {v20.16b},[x1],#16 // load next input ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v4.16b,v2.16b,#8 ++ ext v6.16b,v0.16b,v4.16b,#8 ++ add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b ++ rev64 v20.16b,v20.16b ++ add v1.2d,v0.2d,v2.2d // "D + T1" ++.long 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b ++ ld1 {v24.2d},[x3],#16 ++ add v25.2d,v25.2d,v21.2d ++ ld1 {v21.16b},[x1],#16 // load next input ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v1.16b,v4.16b,#8 ++ ext v6.16b,v3.16b,v1.16b,#8 ++ add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b ++ rev64 v21.16b,v21.16b ++ add v0.2d,v3.2d,v4.2d // "D + T1" ++.long 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b ++ ld1 {v25.2d},[x3],#16 ++ add v24.2d,v24.2d,v22.2d ++ ld1 {v22.16b},[x1],#16 // load next input ++ ext v24.16b,v24.16b,v24.16b,#8 ++ ext v5.16b,v0.16b,v1.16b,#8 ++ ext v6.16b,v2.16b,v0.16b,#8 ++ add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" ++.long 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b ++ rev64 v22.16b,v22.16b ++ add v3.2d,v2.2d,v1.2d // "D + T1" ++.long 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b ++ sub x3,x3,#80*8 // rewind ++ add v25.2d,v25.2d,v23.2d ++ ld1 {v23.16b},[x1],#16 // load next input ++ ext v25.16b,v25.16b,v25.16b,#8 ++ ext v5.16b,v3.16b,v0.16b,#8 ++ ext v6.16b,v4.16b,v3.16b,#8 ++ add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" ++.long 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b ++ rev64 v23.16b,v23.16b ++ add v2.2d,v4.2d,v0.2d // "D + T1" ++.long 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b ++ add v0.2d,v0.2d,v26.2d // accumulate ++ add v1.2d,v1.2d,v27.2d ++ add v2.2d,v2.2d,v28.2d ++ add v3.2d,v3.2d,v29.2d ++ ++ cbnz x2,Loop_hw ++ ++ st1 {v0.2d,v1.2d,v2.2d,v3.2d},[x0] // store context ++ ++ ldr x29,[sp],#16 ++ ret ++ ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-aarch64/crypto/fipsmodule/vpaes-armv8.S b/apple-aarch64/crypto/fipsmodule/vpaes-armv8.S +new file mode 100644 +index 0000000..6dfc25d +--- /dev/null ++++ b/apple-aarch64/crypto/fipsmodule/vpaes-armv8.S +@@ -0,0 +1,1232 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++.section __TEXT,__const ++ ++ ++.align 7 // totally strategic alignment ++_vpaes_consts: ++Lk_mc_forward: // mc_forward ++.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 ++.quad 0x080B0A0904070605, 0x000302010C0F0E0D ++.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 ++.quad 0x000302010C0F0E0D, 0x080B0A0904070605 ++Lk_mc_backward: // mc_backward ++.quad 0x0605040702010003, 0x0E0D0C0F0A09080B ++.quad 0x020100030E0D0C0F, 0x0A09080B06050407 ++.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 ++.quad 0x0A09080B06050407, 0x020100030E0D0C0F ++Lk_sr: // sr ++.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 ++.quad 0x030E09040F0A0500, 0x0B06010C07020D08 ++.quad 0x0F060D040B020900, 0x070E050C030A0108 ++.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 ++ ++// ++// "Hot" constants ++// ++Lk_inv: // inv, inva ++.quad 0x0E05060F0D080180, 0x040703090A0B0C02 ++.quad 0x01040A060F0B0780, 0x030D0E0C02050809 ++Lk_ipt: // input transform (lo, hi) ++.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 ++.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 ++Lk_sbo: // sbou, sbot ++.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 ++.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA ++Lk_sb1: // sb1u, sb1t ++.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF ++.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 ++Lk_sb2: // sb2u, sb2t ++.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A ++.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD ++ ++// ++// Decryption stuff ++// ++Lk_dipt: // decryption input transform ++.quad 0x0F505B040B545F00, 0x154A411E114E451A ++.quad 0x86E383E660056500, 0x12771772F491F194 ++Lk_dsbo: // decryption sbox final output ++.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D ++.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C ++Lk_dsb9: // decryption sbox output *9*u, *9*t ++.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 ++.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 ++Lk_dsbd: // decryption sbox output *D*u, *D*t ++.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 ++.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 ++Lk_dsbb: // decryption sbox output *B*u, *B*t ++.quad 0xD022649296B44200, 0x602646F6B0F2D404 ++.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B ++Lk_dsbe: // decryption sbox output *E*u, *E*t ++.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 ++.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 ++ ++// ++// Key schedule constants ++// ++Lk_dksd: // decryption key schedule: invskew x*D ++.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 ++.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E ++Lk_dksb: // decryption key schedule: invskew x*B ++.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 ++.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 ++Lk_dkse: // decryption key schedule: invskew x*E + 0x63 ++.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 ++.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 ++Lk_dks9: // decryption key schedule: invskew x*9 ++.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC ++.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE ++ ++Lk_rcon: // rcon ++.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 ++ ++Lk_opt: // output transform ++.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 ++.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 ++Lk_deskew: // deskew tables: inverts the sbox's "skew" ++.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A ++.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 ++ ++.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,65,82,77,118,56,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 ++.align 2 ++ ++.align 6 ++ ++.text ++## ++## _aes_preheat ++## ++## Fills register %r10 -> .aes_consts (so you can -fPIC) ++## and %xmm9-%xmm15 as specified below. ++## ++ ++.align 4 ++_vpaes_encrypt_preheat: ++ adrp x10, Lk_inv@PAGE ++ add x10, x10, Lk_inv@PAGEOFF ++ movi v17.16b, #0x0f ++ ld1 {v18.2d,v19.2d}, [x10],#32 // Lk_inv ++ ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x10],#64 // Lk_ipt, Lk_sbo ++ ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10] // Lk_sb1, Lk_sb2 ++ ret ++ ++ ++## ++## _aes_encrypt_core ++## ++## AES-encrypt %xmm0. ++## ++## Inputs: ++## %xmm0 = input ++## %xmm9-%xmm15 as in _vpaes_preheat ++## (%rdx) = scheduled keys ++## ++## Output in %xmm0 ++## Clobbers %xmm1-%xmm5, %r9, %r10, %r11, %rax ++## Preserves %xmm6 - %xmm8 so you get some local vectors ++## ++## ++ ++.align 4 ++_vpaes_encrypt_core: ++ mov x9, x2 ++ ldr w8, [x2,#240] // pull rounds ++ adrp x11, Lk_mc_forward@PAGE+16 ++ add x11, x11, Lk_mc_forward@PAGEOFF+16 ++ // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ++ ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key ++ and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 ++ ushr v0.16b, v7.16b, #4 // vpsrlb $4, %xmm0, %xmm0 ++ tbl v1.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 ++ // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi ++ tbl v2.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 ++ eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 ++ eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 ++ b Lenc_entry ++ ++.align 4 ++Lenc_loop: ++ // middle of middle round ++ add x10, x11, #0x40 ++ tbl v4.16b, {v25.16b}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u ++ ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # Lk_mc_forward[] ++ tbl v0.16b, {v24.16b}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t ++ eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ tbl v5.16b, {v27.16b}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ tbl v2.16b, {v26.16b}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t ++ ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # Lk_mc_backward[] ++ tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B ++ eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A ++ tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D ++ eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B ++ tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C ++ eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D ++ and x11, x11, #~(1<<6) // and $0x30, %r11 # ... mod 4 ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D ++ sub w8, w8, #1 // nr-- ++ ++Lenc_entry: ++ // top of round ++ and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k ++ ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i ++ tbl v5.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k ++ eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ tbl v2.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ tbl v3.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 ++ cbnz w8, Lenc_loop ++ ++ // middle of last round ++ add x10, x11, #0x80 ++ // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo ++ // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 ++ tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # Lk_sr[] ++ tbl v0.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t ++ eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 ++ ret ++ ++ ++.globl _vpaes_encrypt ++.private_extern _vpaes_encrypt ++ ++.align 4 ++_vpaes_encrypt: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ ++ ld1 {v7.16b}, [x0] ++ bl _vpaes_encrypt_preheat ++ bl _vpaes_encrypt_core ++ st1 {v0.16b}, [x1] ++ ++ ldp x29,x30,[sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++ ++.align 4 ++_vpaes_encrypt_2x: ++ mov x9, x2 ++ ldr w8, [x2,#240] // pull rounds ++ adrp x11, Lk_mc_forward@PAGE+16 ++ add x11, x11, Lk_mc_forward@PAGEOFF+16 ++ // vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ++ ld1 {v16.2d}, [x9], #16 // vmovdqu (%r9), %xmm5 # round0 key ++ and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 ++ ushr v0.16b, v14.16b, #4 // vpsrlb $4, %xmm0, %xmm0 ++ and v9.16b, v15.16b, v17.16b ++ ushr v8.16b, v15.16b, #4 ++ tbl v1.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm1 ++ tbl v9.16b, {v20.16b}, v9.16b ++ // vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi ++ tbl v2.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm3, %xmm2 ++ tbl v10.16b, {v21.16b}, v8.16b ++ eor v0.16b, v1.16b, v16.16b // vpxor %xmm5, %xmm1, %xmm0 ++ eor v8.16b, v9.16b, v16.16b ++ eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 ++ eor v8.16b, v8.16b, v10.16b ++ b Lenc_2x_entry ++ ++.align 4 ++Lenc_2x_loop: ++ // middle of middle round ++ add x10, x11, #0x40 ++ tbl v4.16b, {v25.16b}, v2.16b // vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u ++ tbl v12.16b, {v25.16b}, v10.16b ++ ld1 {v1.2d}, [x11], #16 // vmovdqa -0x40(%r11,%r10), %xmm1 # Lk_mc_forward[] ++ tbl v0.16b, {v24.16b}, v3.16b // vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t ++ tbl v8.16b, {v24.16b}, v11.16b ++ eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ eor v12.16b, v12.16b, v16.16b ++ tbl v5.16b, {v27.16b}, v2.16b // vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u ++ tbl v13.16b, {v27.16b}, v10.16b ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ eor v8.16b, v8.16b, v12.16b ++ tbl v2.16b, {v26.16b}, v3.16b // vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t ++ tbl v10.16b, {v26.16b}, v11.16b ++ ld1 {v4.2d}, [x10] // vmovdqa (%r11,%r10), %xmm4 # Lk_mc_backward[] ++ tbl v3.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm3 # 0 = B ++ tbl v11.16b, {v8.16b}, v1.16b ++ eor v2.16b, v2.16b, v5.16b // vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A ++ eor v10.16b, v10.16b, v13.16b ++ tbl v0.16b, {v0.16b}, v4.16b // vpshufb %xmm4, %xmm0, %xmm0 # 3 = D ++ tbl v8.16b, {v8.16b}, v4.16b ++ eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B ++ eor v11.16b, v11.16b, v10.16b ++ tbl v4.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C ++ tbl v12.16b, {v11.16b},v1.16b ++ eor v0.16b, v0.16b, v3.16b // vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D ++ eor v8.16b, v8.16b, v11.16b ++ and x11, x11, #~(1<<6) // and $0x30, %r11 # ... mod 4 ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D ++ eor v8.16b, v8.16b, v12.16b ++ sub w8, w8, #1 // nr-- ++ ++Lenc_2x_entry: ++ // top of round ++ and v1.16b, v0.16b, v17.16b // vpand %xmm0, %xmm9, %xmm1 # 0 = k ++ ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i ++ and v9.16b, v8.16b, v17.16b ++ ushr v8.16b, v8.16b, #4 ++ tbl v5.16b, {v19.16b},v1.16b // vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k ++ tbl v13.16b, {v19.16b},v9.16b ++ eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ eor v9.16b, v9.16b, v8.16b ++ tbl v3.16b, {v18.16b},v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ tbl v11.16b, {v18.16b},v8.16b ++ tbl v4.16b, {v18.16b},v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ tbl v12.16b, {v18.16b},v9.16b ++ eor v3.16b, v3.16b, v5.16b // vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ eor v11.16b, v11.16b, v13.16b ++ eor v4.16b, v4.16b, v5.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ eor v12.16b, v12.16b, v13.16b ++ tbl v2.16b, {v18.16b},v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ tbl v10.16b, {v18.16b},v11.16b ++ tbl v3.16b, {v18.16b},v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ tbl v11.16b, {v18.16b},v12.16b ++ eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ eor v10.16b, v10.16b, v9.16b ++ eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ eor v11.16b, v11.16b, v8.16b ++ ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm5 ++ cbnz w8, Lenc_2x_loop ++ ++ // middle of last round ++ add x10, x11, #0x80 ++ // vmovdqa -0x60(%r10), %xmm4 # 3 : sbou .Lk_sbo ++ // vmovdqa -0x50(%r10), %xmm0 # 0 : sbot .Lk_sbo+16 ++ tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ tbl v12.16b, {v22.16b}, v10.16b ++ ld1 {v1.2d}, [x10] // vmovdqa 0x40(%r11,%r10), %xmm1 # Lk_sr[] ++ tbl v0.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t ++ tbl v8.16b, {v23.16b}, v11.16b ++ eor v4.16b, v4.16b, v16.16b // vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ eor v12.16b, v12.16b, v16.16b ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ eor v8.16b, v8.16b, v12.16b ++ tbl v0.16b, {v0.16b},v1.16b // vpshufb %xmm1, %xmm0, %xmm0 ++ tbl v1.16b, {v8.16b},v1.16b ++ ret ++ ++ ++ ++.align 4 ++_vpaes_decrypt_preheat: ++ adrp x10, Lk_inv@PAGE ++ add x10, x10, Lk_inv@PAGEOFF ++ movi v17.16b, #0x0f ++ adrp x11, Lk_dipt@PAGE ++ add x11, x11, Lk_dipt@PAGEOFF ++ ld1 {v18.2d,v19.2d}, [x10],#32 // Lk_inv ++ ld1 {v20.2d,v21.2d,v22.2d,v23.2d}, [x11],#64 // Lk_dipt, Lk_dsbo ++ ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x11],#64 // Lk_dsb9, Lk_dsbd ++ ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x11] // Lk_dsbb, Lk_dsbe ++ ret ++ ++ ++## ++## Decryption core ++## ++## Same API as encryption core. ++## ++ ++.align 4 ++_vpaes_decrypt_core: ++ mov x9, x2 ++ ldr w8, [x2,#240] // pull rounds ++ ++ // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo ++ lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 ++ eor x11, x11, #0x30 // xor $0x30, %r11 ++ adrp x10, Lk_sr@PAGE ++ add x10, x10, Lk_sr@PAGEOFF ++ and x11, x11, #0x30 // and $0x30, %r11 ++ add x11, x11, x10 ++ adrp x10, Lk_mc_forward@PAGE+48 ++ add x10, x10, Lk_mc_forward@PAGEOFF+48 ++ ++ ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key ++ and v1.16b, v7.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 ++ ushr v0.16b, v7.16b, #4 // vpsrlb $4, %xmm0, %xmm0 ++ tbl v2.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm2 ++ ld1 {v5.2d}, [x10] // vmovdqa Lk_mc_forward+48(%rip), %xmm5 ++ // vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi ++ tbl v0.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm1, %xmm0 ++ eor v2.16b, v2.16b, v16.16b // vpxor %xmm4, %xmm2, %xmm2 ++ eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 ++ b Ldec_entry ++ ++.align 4 ++Ldec_loop: ++// ++// Inverse mix columns ++// ++ // vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u ++ // vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t ++ tbl v4.16b, {v24.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u ++ tbl v1.16b, {v25.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t ++ eor v0.16b, v4.16b, v16.16b // vpxor %xmm4, %xmm0, %xmm0 ++ // vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu ++ eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ // vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt ++ ++ tbl v4.16b, {v26.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu ++ tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ tbl v1.16b, {v27.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ // vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu ++ eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ // vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt ++ ++ tbl v4.16b, {v28.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu ++ tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ tbl v1.16b, {v29.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ // vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu ++ eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ // vmovdqa 0x50(%r10), %xmm1 # 0 : sbet ++ ++ tbl v4.16b, {v30.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu ++ tbl v0.16b, {v0.16b}, v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ tbl v1.16b, {v31.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ ext v5.16b, v5.16b, v5.16b, #12 // vpalignr $12, %xmm5, %xmm5, %xmm5 ++ eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ sub w8, w8, #1 // sub $1,%rax # nr-- ++ ++Ldec_entry: ++ // top of round ++ and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k ++ ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i ++ tbl v2.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ tbl v2.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ tbl v3.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm0 ++ cbnz w8, Ldec_loop ++ ++ // middle of last round ++ // vmovdqa 0x60(%r10), %xmm4 # 3 : sbou ++ tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ // vmovdqa 0x70(%r10), %xmm1 # 0 : sbot ++ ld1 {v2.2d}, [x11] // vmovdqa -0x160(%r11), %xmm2 # Lk_sr-Lk_dsbd=-0x160 ++ tbl v1.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t ++ eor v4.16b, v4.16b, v16.16b // vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k ++ eor v0.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm0 # 0 = A ++ tbl v0.16b, {v0.16b}, v2.16b // vpshufb %xmm2, %xmm0, %xmm0 ++ ret ++ ++ ++.globl _vpaes_decrypt ++.private_extern _vpaes_decrypt ++ ++.align 4 ++_vpaes_decrypt: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ ++ ld1 {v7.16b}, [x0] ++ bl _vpaes_decrypt_preheat ++ bl _vpaes_decrypt_core ++ st1 {v0.16b}, [x1] ++ ++ ldp x29,x30,[sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++// v14-v15 input, v0-v1 output ++ ++.align 4 ++_vpaes_decrypt_2x: ++ mov x9, x2 ++ ldr w8, [x2,#240] // pull rounds ++ ++ // vmovdqa .Lk_dipt(%rip), %xmm2 # iptlo ++ lsl x11, x8, #4 // mov %rax, %r11; shl $4, %r11 ++ eor x11, x11, #0x30 // xor $0x30, %r11 ++ adrp x10, Lk_sr@PAGE ++ add x10, x10, Lk_sr@PAGEOFF ++ and x11, x11, #0x30 // and $0x30, %r11 ++ add x11, x11, x10 ++ adrp x10, Lk_mc_forward@PAGE+48 ++ add x10, x10, Lk_mc_forward@PAGEOFF+48 ++ ++ ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm4 # round0 key ++ and v1.16b, v14.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 ++ ushr v0.16b, v14.16b, #4 // vpsrlb $4, %xmm0, %xmm0 ++ and v9.16b, v15.16b, v17.16b ++ ushr v8.16b, v15.16b, #4 ++ tbl v2.16b, {v20.16b},v1.16b // vpshufb %xmm1, %xmm2, %xmm2 ++ tbl v10.16b, {v20.16b},v9.16b ++ ld1 {v5.2d}, [x10] // vmovdqa Lk_mc_forward+48(%rip), %xmm5 ++ // vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi ++ tbl v0.16b, {v21.16b},v0.16b // vpshufb %xmm0, %xmm1, %xmm0 ++ tbl v8.16b, {v21.16b},v8.16b ++ eor v2.16b, v2.16b, v16.16b // vpxor %xmm4, %xmm2, %xmm2 ++ eor v10.16b, v10.16b, v16.16b ++ eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 ++ eor v8.16b, v8.16b, v10.16b ++ b Ldec_2x_entry ++ ++.align 4 ++Ldec_2x_loop: ++// ++// Inverse mix columns ++// ++ // vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u ++ // vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t ++ tbl v4.16b, {v24.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u ++ tbl v12.16b, {v24.16b}, v10.16b ++ tbl v1.16b, {v25.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t ++ tbl v9.16b, {v25.16b}, v11.16b ++ eor v0.16b, v4.16b, v16.16b // vpxor %xmm4, %xmm0, %xmm0 ++ eor v8.16b, v12.16b, v16.16b ++ // vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu ++ eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ eor v8.16b, v8.16b, v9.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ // vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt ++ ++ tbl v4.16b, {v26.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu ++ tbl v12.16b, {v26.16b}, v10.16b ++ tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ tbl v8.16b, {v8.16b},v5.16b ++ tbl v1.16b, {v27.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt ++ tbl v9.16b, {v27.16b}, v11.16b ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ eor v8.16b, v8.16b, v12.16b ++ // vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu ++ eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ eor v8.16b, v8.16b, v9.16b ++ // vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt ++ ++ tbl v4.16b, {v28.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu ++ tbl v12.16b, {v28.16b}, v10.16b ++ tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ tbl v8.16b, {v8.16b},v5.16b ++ tbl v1.16b, {v29.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt ++ tbl v9.16b, {v29.16b}, v11.16b ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ eor v8.16b, v8.16b, v12.16b ++ // vmovdqa 0x40(%r10), %xmm4 # 4 : sbeu ++ eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ eor v8.16b, v8.16b, v9.16b ++ // vmovdqa 0x50(%r10), %xmm1 # 0 : sbet ++ ++ tbl v4.16b, {v30.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu ++ tbl v12.16b, {v30.16b}, v10.16b ++ tbl v0.16b, {v0.16b},v5.16b // vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ tbl v8.16b, {v8.16b},v5.16b ++ tbl v1.16b, {v31.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet ++ tbl v9.16b, {v31.16b}, v11.16b ++ eor v0.16b, v0.16b, v4.16b // vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ eor v8.16b, v8.16b, v12.16b ++ ext v5.16b, v5.16b, v5.16b, #12 // vpalignr $12, %xmm5, %xmm5, %xmm5 ++ eor v0.16b, v0.16b, v1.16b // vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ eor v8.16b, v8.16b, v9.16b ++ sub w8, w8, #1 // sub $1,%rax # nr-- ++ ++Ldec_2x_entry: ++ // top of round ++ and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k ++ ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i ++ and v9.16b, v8.16b, v17.16b ++ ushr v8.16b, v8.16b, #4 ++ tbl v2.16b, {v19.16b},v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ tbl v10.16b, {v19.16b},v9.16b ++ eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ eor v9.16b, v9.16b, v8.16b ++ tbl v3.16b, {v18.16b},v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ tbl v11.16b, {v18.16b},v8.16b ++ tbl v4.16b, {v18.16b},v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ tbl v12.16b, {v18.16b},v9.16b ++ eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ eor v11.16b, v11.16b, v10.16b ++ eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ eor v12.16b, v12.16b, v10.16b ++ tbl v2.16b, {v18.16b},v3.16b // vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ tbl v10.16b, {v18.16b},v11.16b ++ tbl v3.16b, {v18.16b},v4.16b // vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ tbl v11.16b, {v18.16b},v12.16b ++ eor v2.16b, v2.16b, v1.16b // vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ eor v10.16b, v10.16b, v9.16b ++ eor v3.16b, v3.16b, v0.16b // vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ eor v11.16b, v11.16b, v8.16b ++ ld1 {v16.2d}, [x9],#16 // vmovdqu (%r9), %xmm0 ++ cbnz w8, Ldec_2x_loop ++ ++ // middle of last round ++ // vmovdqa 0x60(%r10), %xmm4 # 3 : sbou ++ tbl v4.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ tbl v12.16b, {v22.16b}, v10.16b ++ // vmovdqa 0x70(%r10), %xmm1 # 0 : sbot ++ tbl v1.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t ++ tbl v9.16b, {v23.16b}, v11.16b ++ ld1 {v2.2d}, [x11] // vmovdqa -0x160(%r11), %xmm2 # Lk_sr-Lk_dsbd=-0x160 ++ eor v4.16b, v4.16b, v16.16b // vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k ++ eor v12.16b, v12.16b, v16.16b ++ eor v0.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm0 # 0 = A ++ eor v8.16b, v9.16b, v12.16b ++ tbl v0.16b, {v0.16b},v2.16b // vpshufb %xmm2, %xmm0, %xmm0 ++ tbl v1.16b, {v8.16b},v2.16b ++ ret ++ ++######################################################## ++## ## ++## AES key schedule ## ++## ## ++######################################################## ++ ++.align 4 ++_vpaes_key_preheat: ++ adrp x10, Lk_inv@PAGE ++ add x10, x10, Lk_inv@PAGEOFF ++ movi v16.16b, #0x5b // Lk_s63 ++ adrp x11, Lk_sb1@PAGE ++ add x11, x11, Lk_sb1@PAGEOFF ++ movi v17.16b, #0x0f // Lk_s0F ++ ld1 {v18.2d,v19.2d,v20.2d,v21.2d}, [x10] // Lk_inv, Lk_ipt ++ adrp x10, Lk_dksd@PAGE ++ add x10, x10, Lk_dksd@PAGEOFF ++ ld1 {v22.2d,v23.2d}, [x11] // Lk_sb1 ++ adrp x11, Lk_mc_forward@PAGE ++ add x11, x11, Lk_mc_forward@PAGEOFF ++ ld1 {v24.2d,v25.2d,v26.2d,v27.2d}, [x10],#64 // Lk_dksd, Lk_dksb ++ ld1 {v28.2d,v29.2d,v30.2d,v31.2d}, [x10],#64 // Lk_dkse, Lk_dks9 ++ ld1 {v8.2d}, [x10] // Lk_rcon ++ ld1 {v9.2d}, [x11] // Lk_mc_forward[0] ++ ret ++ ++ ++ ++.align 4 ++_vpaes_schedule_core: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29, x30, [sp,#-16]! ++ add x29,sp,#0 ++ ++ bl _vpaes_key_preheat // load the tables ++ ++ ld1 {v0.16b}, [x0],#16 // vmovdqu (%rdi), %xmm0 # load key (unaligned) ++ ++ // input transform ++ mov v3.16b, v0.16b // vmovdqa %xmm0, %xmm3 ++ bl _vpaes_schedule_transform ++ mov v7.16b, v0.16b // vmovdqa %xmm0, %xmm7 ++ ++ adrp x10, Lk_sr@PAGE // lea Lk_sr(%rip),%r10 ++ add x10, x10, Lk_sr@PAGEOFF ++ ++ add x8, x8, x10 ++ cbnz w3, Lschedule_am_decrypting ++ ++ // encrypting, output zeroth round key after transform ++ st1 {v0.2d}, [x2] // vmovdqu %xmm0, (%rdx) ++ b Lschedule_go ++ ++Lschedule_am_decrypting: ++ // decrypting, output zeroth round key after shiftrows ++ ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 ++ tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 ++ st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx) ++ eor x8, x8, #0x30 // xor $0x30, %r8 ++ ++Lschedule_go: ++ cmp w1, #192 // cmp $192, %esi ++ b.hi Lschedule_256 ++ b.eq Lschedule_192 ++ // 128: fall though ++ ++## ++## .schedule_128 ++## ++## 128-bit specific part of key schedule. ++## ++## This schedule is really simple, because all its parts ++## are accomplished by the subroutines. ++## ++Lschedule_128: ++ mov x0, #10 // mov $10, %esi ++ ++Loop_schedule_128: ++ sub x0, x0, #1 // dec %esi ++ bl _vpaes_schedule_round ++ cbz x0, Lschedule_mangle_last ++ bl _vpaes_schedule_mangle // write output ++ b Loop_schedule_128 ++ ++## ++## .aes_schedule_192 ++## ++## 192-bit specific part of key schedule. ++## ++## The main body of this schedule is the same as the 128-bit ++## schedule, but with more smearing. The long, high side is ++## stored in %xmm7 as before, and the short, low side is in ++## the high bits of %xmm6. ++## ++## This schedule is somewhat nastier, however, because each ++## round produces 192 bits of key material, or 1.5 round keys. ++## Therefore, on each cycle we do 2 rounds and produce 3 round ++## keys. ++## ++.align 4 ++Lschedule_192: ++ sub x0, x0, #8 ++ ld1 {v0.16b}, [x0] // vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) ++ bl _vpaes_schedule_transform // input transform ++ mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save short part ++ eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 # clear 4 ++ ins v6.d[0], v4.d[0] // vmovhlps %xmm4, %xmm6, %xmm6 # clobber low side with zeros ++ mov x0, #4 // mov $4, %esi ++ ++Loop_schedule_192: ++ sub x0, x0, #1 // dec %esi ++ bl _vpaes_schedule_round ++ ext v0.16b, v6.16b, v0.16b, #8 // vpalignr $8,%xmm6,%xmm0,%xmm0 ++ bl _vpaes_schedule_mangle // save key n ++ bl _vpaes_schedule_192_smear ++ bl _vpaes_schedule_mangle // save key n+1 ++ bl _vpaes_schedule_round ++ cbz x0, Lschedule_mangle_last ++ bl _vpaes_schedule_mangle // save key n+2 ++ bl _vpaes_schedule_192_smear ++ b Loop_schedule_192 ++ ++## ++## .aes_schedule_256 ++## ++## 256-bit specific part of key schedule. ++## ++## The structure here is very similar to the 128-bit ++## schedule, but with an additional "low side" in ++## %xmm6. The low side's rounds are the same as the ++## high side's, except no rcon and no rotation. ++## ++.align 4 ++Lschedule_256: ++ ld1 {v0.16b}, [x0] // vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) ++ bl _vpaes_schedule_transform // input transform ++ mov x0, #7 // mov $7, %esi ++ ++Loop_schedule_256: ++ sub x0, x0, #1 // dec %esi ++ bl _vpaes_schedule_mangle // output low result ++ mov v6.16b, v0.16b // vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 ++ ++ // high round ++ bl _vpaes_schedule_round ++ cbz x0, Lschedule_mangle_last ++ bl _vpaes_schedule_mangle ++ ++ // low round. swap xmm7 and xmm6 ++ dup v0.4s, v0.s[3] // vpshufd $0xFF, %xmm0, %xmm0 ++ movi v4.16b, #0 ++ mov v5.16b, v7.16b // vmovdqa %xmm7, %xmm5 ++ mov v7.16b, v6.16b // vmovdqa %xmm6, %xmm7 ++ bl _vpaes_schedule_low_round ++ mov v7.16b, v5.16b // vmovdqa %xmm5, %xmm7 ++ ++ b Loop_schedule_256 ++ ++## ++## .aes_schedule_mangle_last ++## ++## Mangler for last round of key schedule ++## Mangles %xmm0 ++## when encrypting, outputs out(%xmm0) ^ 63 ++## when decrypting, outputs unskew(%xmm0) ++## ++## Always called right before return... jumps to cleanup and exits ++## ++.align 4 ++Lschedule_mangle_last: ++ // schedule last round key from xmm0 ++ adrp x11, Lk_deskew@PAGE // lea Lk_deskew(%rip),%r11 # prepare to deskew ++ add x11, x11, Lk_deskew@PAGEOFF ++ ++ cbnz w3, Lschedule_mangle_last_dec ++ ++ // encrypting ++ ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10),%xmm1 ++ adrp x11, Lk_opt@PAGE // lea Lk_opt(%rip), %r11 # prepare to output transform ++ add x11, x11, Lk_opt@PAGEOFF ++ add x2, x2, #32 // add $32, %rdx ++ tbl v0.16b, {v0.16b}, v1.16b // vpshufb %xmm1, %xmm0, %xmm0 # output permute ++ ++Lschedule_mangle_last_dec: ++ ld1 {v20.2d,v21.2d}, [x11] // reload constants ++ sub x2, x2, #16 // add $-16, %rdx ++ eor v0.16b, v0.16b, v16.16b // vpxor Lk_s63(%rip), %xmm0, %xmm0 ++ bl _vpaes_schedule_transform // output transform ++ st1 {v0.2d}, [x2] // vmovdqu %xmm0, (%rdx) # save last key ++ ++ // cleanup ++ eor v0.16b, v0.16b, v0.16b // vpxor %xmm0, %xmm0, %xmm0 ++ eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 ++ eor v2.16b, v2.16b, v2.16b // vpxor %xmm2, %xmm2, %xmm2 ++ eor v3.16b, v3.16b, v3.16b // vpxor %xmm3, %xmm3, %xmm3 ++ eor v4.16b, v4.16b, v4.16b // vpxor %xmm4, %xmm4, %xmm4 ++ eor v5.16b, v5.16b, v5.16b // vpxor %xmm5, %xmm5, %xmm5 ++ eor v6.16b, v6.16b, v6.16b // vpxor %xmm6, %xmm6, %xmm6 ++ eor v7.16b, v7.16b, v7.16b // vpxor %xmm7, %xmm7, %xmm7 ++ ldp x29, x30, [sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++## ++## .aes_schedule_192_smear ++## ++## Smear the short, low side in the 192-bit key schedule. ++## ++## Inputs: ++## %xmm7: high side, b a x y ++## %xmm6: low side, d c 0 0 ++## %xmm13: 0 ++## ++## Outputs: ++## %xmm6: b+c+d b+c 0 0 ++## %xmm0: b+c+d b+c b a ++## ++ ++.align 4 ++_vpaes_schedule_192_smear: ++ movi v1.16b, #0 ++ dup v0.4s, v7.s[3] ++ ins v1.s[3], v6.s[2] // vpshufd $0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 ++ ins v0.s[0], v7.s[2] // vpshufd $0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a ++ eor v6.16b, v6.16b, v1.16b // vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 ++ eor v1.16b, v1.16b, v1.16b // vpxor %xmm1, %xmm1, %xmm1 ++ eor v6.16b, v6.16b, v0.16b // vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a ++ mov v0.16b, v6.16b // vmovdqa %xmm6, %xmm0 ++ ins v6.d[0], v1.d[0] // vmovhlps %xmm1, %xmm6, %xmm6 # clobber low side with zeros ++ ret ++ ++ ++## ++## .aes_schedule_round ++## ++## Runs one main round of the key schedule on %xmm0, %xmm7 ++## ++## Specifically, runs subbytes on the high dword of %xmm0 ++## then rotates it by one byte and xors into the low dword of ++## %xmm7. ++## ++## Adds rcon from low byte of %xmm8, then rotates %xmm8 for ++## next rcon. ++## ++## Smears the dwords of %xmm7 by xoring the low into the ++## second low, result into third, result into highest. ++## ++## Returns results in %xmm7 = %xmm0. ++## Clobbers %xmm1-%xmm4, %r11. ++## ++ ++.align 4 ++_vpaes_schedule_round: ++ // extract rcon from xmm8 ++ movi v4.16b, #0 // vpxor %xmm4, %xmm4, %xmm4 ++ ext v1.16b, v8.16b, v4.16b, #15 // vpalignr $15, %xmm8, %xmm4, %xmm1 ++ ext v8.16b, v8.16b, v8.16b, #15 // vpalignr $15, %xmm8, %xmm8, %xmm8 ++ eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 ++ ++ // rotate ++ dup v0.4s, v0.s[3] // vpshufd $0xFF, %xmm0, %xmm0 ++ ext v0.16b, v0.16b, v0.16b, #1 // vpalignr $1, %xmm0, %xmm0, %xmm0 ++ ++ // fall through... ++ ++ // low round: same as high round, but no rotation and no rcon. ++_vpaes_schedule_low_round: ++ // smear xmm7 ++ ext v1.16b, v4.16b, v7.16b, #12 // vpslldq $4, %xmm7, %xmm1 ++ eor v7.16b, v7.16b, v1.16b // vpxor %xmm1, %xmm7, %xmm7 ++ ext v4.16b, v4.16b, v7.16b, #8 // vpslldq $8, %xmm7, %xmm4 ++ ++ // subbytes ++ and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 # 0 = k ++ ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 # 1 = i ++ eor v7.16b, v7.16b, v4.16b // vpxor %xmm4, %xmm7, %xmm7 ++ tbl v2.16b, {v19.16b}, v1.16b // vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ eor v1.16b, v1.16b, v0.16b // vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ tbl v3.16b, {v18.16b}, v0.16b // vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ tbl v4.16b, {v18.16b}, v1.16b // vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ eor v7.16b, v7.16b, v16.16b // vpxor Lk_s63(%rip), %xmm7, %xmm7 ++ tbl v3.16b, {v18.16b}, v3.16b // vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak ++ eor v4.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ tbl v2.16b, {v18.16b}, v4.16b // vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak ++ eor v3.16b, v3.16b, v1.16b // vpxor %xmm1, %xmm3, %xmm3 # 2 = io ++ eor v2.16b, v2.16b, v0.16b // vpxor %xmm0, %xmm2, %xmm2 # 3 = jo ++ tbl v4.16b, {v23.16b}, v3.16b // vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou ++ tbl v1.16b, {v22.16b}, v2.16b // vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t ++ eor v1.16b, v1.16b, v4.16b // vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output ++ ++ // add in smeared stuff ++ eor v0.16b, v1.16b, v7.16b // vpxor %xmm7, %xmm1, %xmm0 ++ eor v7.16b, v1.16b, v7.16b // vmovdqa %xmm0, %xmm7 ++ ret ++ ++ ++## ++## .aes_schedule_transform ++## ++## Linear-transform %xmm0 according to tables at (%r11) ++## ++## Requires that %xmm9 = 0x0F0F... as in preheat ++## Output in %xmm0 ++## Clobbers %xmm1, %xmm2 ++## ++ ++.align 4 ++_vpaes_schedule_transform: ++ and v1.16b, v0.16b, v17.16b // vpand %xmm9, %xmm0, %xmm1 ++ ushr v0.16b, v0.16b, #4 // vpsrlb $4, %xmm0, %xmm0 ++ // vmovdqa (%r11), %xmm2 # lo ++ tbl v2.16b, {v20.16b}, v1.16b // vpshufb %xmm1, %xmm2, %xmm2 ++ // vmovdqa 16(%r11), %xmm1 # hi ++ tbl v0.16b, {v21.16b}, v0.16b // vpshufb %xmm0, %xmm1, %xmm0 ++ eor v0.16b, v0.16b, v2.16b // vpxor %xmm2, %xmm0, %xmm0 ++ ret ++ ++ ++## ++## .aes_schedule_mangle ++## ++## Mangle xmm0 from (basis-transformed) standard version ++## to our version. ++## ++## On encrypt, ++## xor with 0x63 ++## multiply by circulant 0,1,1,1 ++## apply shiftrows transform ++## ++## On decrypt, ++## xor with 0x63 ++## multiply by "inverse mixcolumns" circulant E,B,D,9 ++## deskew ++## apply shiftrows transform ++## ++## ++## Writes out to (%rdx), and increments or decrements it ++## Keeps track of round number mod 4 in %r8 ++## Preserves xmm0 ++## Clobbers xmm1-xmm5 ++## ++ ++.align 4 ++_vpaes_schedule_mangle: ++ mov v4.16b, v0.16b // vmovdqa %xmm0, %xmm4 # save xmm0 for later ++ // vmovdqa .Lk_mc_forward(%rip),%xmm5 ++ cbnz w3, Lschedule_mangle_dec ++ ++ // encrypting ++ eor v4.16b, v0.16b, v16.16b // vpxor Lk_s63(%rip), %xmm0, %xmm4 ++ add x2, x2, #16 // add $16, %rdx ++ tbl v4.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm4 ++ tbl v1.16b, {v4.16b}, v9.16b // vpshufb %xmm5, %xmm4, %xmm1 ++ tbl v3.16b, {v1.16b}, v9.16b // vpshufb %xmm5, %xmm1, %xmm3 ++ eor v4.16b, v4.16b, v1.16b // vpxor %xmm1, %xmm4, %xmm4 ++ ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 ++ eor v3.16b, v3.16b, v4.16b // vpxor %xmm4, %xmm3, %xmm3 ++ ++ b Lschedule_mangle_both ++.align 4 ++Lschedule_mangle_dec: ++ // inverse mix columns ++ // lea .Lk_dksd(%rip),%r11 ++ ushr v1.16b, v4.16b, #4 // vpsrlb $4, %xmm4, %xmm1 # 1 = hi ++ and v4.16b, v4.16b, v17.16b // vpand %xmm9, %xmm4, %xmm4 # 4 = lo ++ ++ // vmovdqa 0x00(%r11), %xmm2 ++ tbl v2.16b, {v24.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 ++ // vmovdqa 0x10(%r11), %xmm3 ++ tbl v3.16b, {v25.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 ++ eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 ++ tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 ++ ++ // vmovdqa 0x20(%r11), %xmm2 ++ tbl v2.16b, {v26.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 ++ eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 ++ // vmovdqa 0x30(%r11), %xmm3 ++ tbl v3.16b, {v27.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 ++ eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 ++ tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 ++ ++ // vmovdqa 0x40(%r11), %xmm2 ++ tbl v2.16b, {v28.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 ++ eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 ++ // vmovdqa 0x50(%r11), %xmm3 ++ tbl v3.16b, {v29.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 ++ eor v3.16b, v3.16b, v2.16b // vpxor %xmm2, %xmm3, %xmm3 ++ ++ // vmovdqa 0x60(%r11), %xmm2 ++ tbl v2.16b, {v30.16b}, v4.16b // vpshufb %xmm4, %xmm2, %xmm2 ++ tbl v3.16b, {v3.16b}, v9.16b // vpshufb %xmm5, %xmm3, %xmm3 ++ // vmovdqa 0x70(%r11), %xmm4 ++ tbl v4.16b, {v31.16b}, v1.16b // vpshufb %xmm1, %xmm4, %xmm4 ++ ld1 {v1.2d}, [x8] // vmovdqa (%r8,%r10), %xmm1 ++ eor v2.16b, v2.16b, v3.16b // vpxor %xmm3, %xmm2, %xmm2 ++ eor v3.16b, v4.16b, v2.16b // vpxor %xmm2, %xmm4, %xmm3 ++ ++ sub x2, x2, #16 // add $-16, %rdx ++ ++Lschedule_mangle_both: ++ tbl v3.16b, {v3.16b}, v1.16b // vpshufb %xmm1, %xmm3, %xmm3 ++ add x8, x8, #48 // add $-16, %r8 ++ and x8, x8, #~(1<<6) // and $0x30, %r8 ++ st1 {v3.2d}, [x2] // vmovdqu %xmm3, (%rdx) ++ ret ++ ++ ++.globl _vpaes_set_encrypt_key ++.private_extern _vpaes_set_encrypt_key ++ ++.align 4 ++_vpaes_set_encrypt_key: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ stp d8,d9,[sp,#-16]! // ABI spec says so ++ ++ lsr w9, w1, #5 // shr $5,%eax ++ add w9, w9, #5 // $5,%eax ++ str w9, [x2,#240] // mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ ++ mov w3, #0 // mov $0,%ecx ++ mov x8, #0x30 // mov $0x30,%r8d ++ bl _vpaes_schedule_core ++ eor x0, x0, x0 ++ ++ ldp d8,d9,[sp],#16 ++ ldp x29,x30,[sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++.globl _vpaes_set_decrypt_key ++.private_extern _vpaes_set_decrypt_key ++ ++.align 4 ++_vpaes_set_decrypt_key: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ stp d8,d9,[sp,#-16]! // ABI spec says so ++ ++ lsr w9, w1, #5 // shr $5,%eax ++ add w9, w9, #5 // $5,%eax ++ str w9, [x2,#240] // mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ lsl w9, w9, #4 // shl $4,%eax ++ add x2, x2, #16 // lea 16(%rdx,%rax),%rdx ++ add x2, x2, x9 ++ ++ mov w3, #1 // mov $1,%ecx ++ lsr w8, w1, #1 // shr $1,%r8d ++ and x8, x8, #32 // and $32,%r8d ++ eor x8, x8, #32 // xor $32,%r8d # nbits==192?0:32 ++ bl _vpaes_schedule_core ++ ++ ldp d8,d9,[sp],#16 ++ ldp x29,x30,[sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++.globl _vpaes_cbc_encrypt ++.private_extern _vpaes_cbc_encrypt ++ ++.align 4 ++_vpaes_cbc_encrypt: ++ AARCH64_SIGN_LINK_REGISTER ++ cbz x2, Lcbc_abort ++ cmp w5, #0 // check direction ++ b.eq vpaes_cbc_decrypt ++ ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ ++ mov x17, x2 // reassign ++ mov x2, x3 // reassign ++ ++ ld1 {v0.16b}, [x4] // load ivec ++ bl _vpaes_encrypt_preheat ++ b Lcbc_enc_loop ++ ++.align 4 ++Lcbc_enc_loop: ++ ld1 {v7.16b}, [x0],#16 // load input ++ eor v7.16b, v7.16b, v0.16b // xor with ivec ++ bl _vpaes_encrypt_core ++ st1 {v0.16b}, [x1],#16 // save output ++ subs x17, x17, #16 ++ b.hi Lcbc_enc_loop ++ ++ st1 {v0.16b}, [x4] // write ivec ++ ++ ldp x29,x30,[sp],#16 ++Lcbc_abort: ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++ ++.align 4 ++vpaes_cbc_decrypt: ++ // Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to ++ // only from vpaes_cbc_encrypt which has already signed the return address. ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ stp d8,d9,[sp,#-16]! // ABI spec says so ++ stp d10,d11,[sp,#-16]! ++ stp d12,d13,[sp,#-16]! ++ stp d14,d15,[sp,#-16]! ++ ++ mov x17, x2 // reassign ++ mov x2, x3 // reassign ++ ld1 {v6.16b}, [x4] // load ivec ++ bl _vpaes_decrypt_preheat ++ tst x17, #16 ++ b.eq Lcbc_dec_loop2x ++ ++ ld1 {v7.16b}, [x0], #16 // load input ++ bl _vpaes_decrypt_core ++ eor v0.16b, v0.16b, v6.16b // xor with ivec ++ orr v6.16b, v7.16b, v7.16b // next ivec value ++ st1 {v0.16b}, [x1], #16 ++ subs x17, x17, #16 ++ b.ls Lcbc_dec_done ++ ++.align 4 ++Lcbc_dec_loop2x: ++ ld1 {v14.16b,v15.16b}, [x0], #32 ++ bl _vpaes_decrypt_2x ++ eor v0.16b, v0.16b, v6.16b // xor with ivec ++ eor v1.16b, v1.16b, v14.16b ++ orr v6.16b, v15.16b, v15.16b ++ st1 {v0.16b,v1.16b}, [x1], #32 ++ subs x17, x17, #32 ++ b.hi Lcbc_dec_loop2x ++ ++Lcbc_dec_done: ++ st1 {v6.16b}, [x4] ++ ++ ldp d14,d15,[sp],#16 ++ ldp d12,d13,[sp],#16 ++ ldp d10,d11,[sp],#16 ++ ldp d8,d9,[sp],#16 ++ ldp x29,x30,[sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++.globl _vpaes_ctr32_encrypt_blocks ++.private_extern _vpaes_ctr32_encrypt_blocks ++ ++.align 4 ++_vpaes_ctr32_encrypt_blocks: ++ AARCH64_SIGN_LINK_REGISTER ++ stp x29,x30,[sp,#-16]! ++ add x29,sp,#0 ++ stp d8,d9,[sp,#-16]! // ABI spec says so ++ stp d10,d11,[sp,#-16]! ++ stp d12,d13,[sp,#-16]! ++ stp d14,d15,[sp,#-16]! ++ ++ cbz x2, Lctr32_done ++ ++ // Note, unlike the other functions, x2 here is measured in blocks, ++ // not bytes. ++ mov x17, x2 ++ mov x2, x3 ++ ++ // Load the IV and counter portion. ++ ldr w6, [x4, #12] ++ ld1 {v7.16b}, [x4] ++ ++ bl _vpaes_encrypt_preheat ++ tst x17, #1 ++ rev w6, w6 // The counter is big-endian. ++ b.eq Lctr32_prep_loop ++ ++ // Handle one block so the remaining block count is even for ++ // _vpaes_encrypt_2x. ++ ld1 {v6.16b}, [x0], #16 // Load input ahead of time ++ bl _vpaes_encrypt_core ++ eor v0.16b, v0.16b, v6.16b // XOR input and result ++ st1 {v0.16b}, [x1], #16 ++ subs x17, x17, #1 ++ // Update the counter. ++ add w6, w6, #1 ++ rev w7, w6 ++ mov v7.s[3], w7 ++ b.ls Lctr32_done ++ ++Lctr32_prep_loop: ++ // _vpaes_encrypt_core takes its input from v7, while _vpaes_encrypt_2x ++ // uses v14 and v15. ++ mov v15.16b, v7.16b ++ mov v14.16b, v7.16b ++ add w6, w6, #1 ++ rev w7, w6 ++ mov v15.s[3], w7 ++ ++Lctr32_loop: ++ ld1 {v6.16b,v7.16b}, [x0], #32 // Load input ahead of time ++ bl _vpaes_encrypt_2x ++ eor v0.16b, v0.16b, v6.16b // XOR input and result ++ eor v1.16b, v1.16b, v7.16b // XOR input and result (#2) ++ st1 {v0.16b,v1.16b}, [x1], #32 ++ subs x17, x17, #2 ++ // Update the counter. ++ add w7, w6, #1 ++ add w6, w6, #2 ++ rev w7, w7 ++ mov v14.s[3], w7 ++ rev w7, w6 ++ mov v15.s[3], w7 ++ b.hi Lctr32_loop ++ ++Lctr32_done: ++ ldp d14,d15,[sp],#16 ++ ldp d12,d13,[sp],#16 ++ ldp d10,d11,[sp],#16 ++ ldp d8,d9,[sp],#16 ++ ldp x29,x30,[sp],#16 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-aarch64/crypto/test/trampoline-armv8.S b/apple-aarch64/crypto/test/trampoline-armv8.S +new file mode 100644 +index 0000000..325da9b +--- /dev/null ++++ b/apple-aarch64/crypto/test/trampoline-armv8.S +@@ -0,0 +1,758 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++.text ++ ++// abi_test_trampoline loads callee-saved registers from |state|, calls |func| ++// with |argv|, then saves the callee-saved registers into |state|. It returns ++// the result of |func|. The |unwind| argument is unused. ++// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state, ++// const uint64_t *argv, size_t argc, ++// uint64_t unwind); ++ ++.globl _abi_test_trampoline ++.private_extern _abi_test_trampoline ++.align 4 ++_abi_test_trampoline: ++Labi_test_trampoline_begin: ++ AARCH64_SIGN_LINK_REGISTER ++ // Stack layout (low to high addresses) ++ // x29,x30 (16 bytes) ++ // d8-d15 (64 bytes) ++ // x19-x28 (80 bytes) ++ // x1 (8 bytes) ++ // padding (8 bytes) ++ stp x29, x30, [sp, #-176]! ++ mov x29, sp ++ ++ // Saved callee-saved registers and |state|. ++ stp d8, d9, [sp, #16] ++ stp d10, d11, [sp, #32] ++ stp d12, d13, [sp, #48] ++ stp d14, d15, [sp, #64] ++ stp x19, x20, [sp, #80] ++ stp x21, x22, [sp, #96] ++ stp x23, x24, [sp, #112] ++ stp x25, x26, [sp, #128] ++ stp x27, x28, [sp, #144] ++ str x1, [sp, #160] ++ ++ // Load registers from |state|, with the exception of x29. x29 is the ++ // frame pointer and also callee-saved, but AAPCS64 allows platforms to ++ // mandate that x29 always point to a frame. iOS64 does so, which means ++ // we cannot fill x29 with entropy without violating ABI rules ++ // ourselves. x29 is tested separately below. ++ ldp d8, d9, [x1], #16 ++ ldp d10, d11, [x1], #16 ++ ldp d12, d13, [x1], #16 ++ ldp d14, d15, [x1], #16 ++ ldp x19, x20, [x1], #16 ++ ldp x21, x22, [x1], #16 ++ ldp x23, x24, [x1], #16 ++ ldp x25, x26, [x1], #16 ++ ldp x27, x28, [x1], #16 ++ ++ // Move parameters into temporary registers. ++ mov x9, x0 ++ mov x10, x2 ++ mov x11, x3 ++ ++ // Load parameters into registers. ++ cbz x11, Largs_done ++ ldr x0, [x10], #8 ++ subs x11, x11, #1 ++ b.eq Largs_done ++ ldr x1, [x10], #8 ++ subs x11, x11, #1 ++ b.eq Largs_done ++ ldr x2, [x10], #8 ++ subs x11, x11, #1 ++ b.eq Largs_done ++ ldr x3, [x10], #8 ++ subs x11, x11, #1 ++ b.eq Largs_done ++ ldr x4, [x10], #8 ++ subs x11, x11, #1 ++ b.eq Largs_done ++ ldr x5, [x10], #8 ++ subs x11, x11, #1 ++ b.eq Largs_done ++ ldr x6, [x10], #8 ++ subs x11, x11, #1 ++ b.eq Largs_done ++ ldr x7, [x10], #8 ++ ++Largs_done: ++ blr x9 ++ ++ // Reload |state| and store registers. ++ ldr x1, [sp, #160] ++ stp d8, d9, [x1], #16 ++ stp d10, d11, [x1], #16 ++ stp d12, d13, [x1], #16 ++ stp d14, d15, [x1], #16 ++ stp x19, x20, [x1], #16 ++ stp x21, x22, [x1], #16 ++ stp x23, x24, [x1], #16 ++ stp x25, x26, [x1], #16 ++ stp x27, x28, [x1], #16 ++ ++ // |func| is required to preserve x29, the frame pointer. We cannot load ++ // random values into x29 (see comment above), so compare it against the ++ // expected value and zero the field of |state| if corrupted. ++ mov x9, sp ++ cmp x29, x9 ++ b.eq Lx29_ok ++ str xzr, [x1] ++ ++Lx29_ok: ++ // Restore callee-saved registers. ++ ldp d8, d9, [sp, #16] ++ ldp d10, d11, [sp, #32] ++ ldp d12, d13, [sp, #48] ++ ldp d14, d15, [sp, #64] ++ ldp x19, x20, [sp, #80] ++ ldp x21, x22, [sp, #96] ++ ldp x23, x24, [sp, #112] ++ ldp x25, x26, [sp, #128] ++ ldp x27, x28, [sp, #144] ++ ++ ldp x29, x30, [sp], #176 ++ AARCH64_VALIDATE_LINK_REGISTER ++ ret ++ ++ ++.globl _abi_test_clobber_x0 ++.private_extern _abi_test_clobber_x0 ++.align 4 ++_abi_test_clobber_x0: ++ AARCH64_VALID_CALL_TARGET ++ mov x0, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x1 ++.private_extern _abi_test_clobber_x1 ++.align 4 ++_abi_test_clobber_x1: ++ AARCH64_VALID_CALL_TARGET ++ mov x1, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x2 ++.private_extern _abi_test_clobber_x2 ++.align 4 ++_abi_test_clobber_x2: ++ AARCH64_VALID_CALL_TARGET ++ mov x2, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x3 ++.private_extern _abi_test_clobber_x3 ++.align 4 ++_abi_test_clobber_x3: ++ AARCH64_VALID_CALL_TARGET ++ mov x3, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x4 ++.private_extern _abi_test_clobber_x4 ++.align 4 ++_abi_test_clobber_x4: ++ AARCH64_VALID_CALL_TARGET ++ mov x4, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x5 ++.private_extern _abi_test_clobber_x5 ++.align 4 ++_abi_test_clobber_x5: ++ AARCH64_VALID_CALL_TARGET ++ mov x5, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x6 ++.private_extern _abi_test_clobber_x6 ++.align 4 ++_abi_test_clobber_x6: ++ AARCH64_VALID_CALL_TARGET ++ mov x6, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x7 ++.private_extern _abi_test_clobber_x7 ++.align 4 ++_abi_test_clobber_x7: ++ AARCH64_VALID_CALL_TARGET ++ mov x7, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x8 ++.private_extern _abi_test_clobber_x8 ++.align 4 ++_abi_test_clobber_x8: ++ AARCH64_VALID_CALL_TARGET ++ mov x8, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x9 ++.private_extern _abi_test_clobber_x9 ++.align 4 ++_abi_test_clobber_x9: ++ AARCH64_VALID_CALL_TARGET ++ mov x9, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x10 ++.private_extern _abi_test_clobber_x10 ++.align 4 ++_abi_test_clobber_x10: ++ AARCH64_VALID_CALL_TARGET ++ mov x10, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x11 ++.private_extern _abi_test_clobber_x11 ++.align 4 ++_abi_test_clobber_x11: ++ AARCH64_VALID_CALL_TARGET ++ mov x11, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x12 ++.private_extern _abi_test_clobber_x12 ++.align 4 ++_abi_test_clobber_x12: ++ AARCH64_VALID_CALL_TARGET ++ mov x12, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x13 ++.private_extern _abi_test_clobber_x13 ++.align 4 ++_abi_test_clobber_x13: ++ AARCH64_VALID_CALL_TARGET ++ mov x13, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x14 ++.private_extern _abi_test_clobber_x14 ++.align 4 ++_abi_test_clobber_x14: ++ AARCH64_VALID_CALL_TARGET ++ mov x14, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x15 ++.private_extern _abi_test_clobber_x15 ++.align 4 ++_abi_test_clobber_x15: ++ AARCH64_VALID_CALL_TARGET ++ mov x15, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x16 ++.private_extern _abi_test_clobber_x16 ++.align 4 ++_abi_test_clobber_x16: ++ AARCH64_VALID_CALL_TARGET ++ mov x16, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x17 ++.private_extern _abi_test_clobber_x17 ++.align 4 ++_abi_test_clobber_x17: ++ AARCH64_VALID_CALL_TARGET ++ mov x17, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x19 ++.private_extern _abi_test_clobber_x19 ++.align 4 ++_abi_test_clobber_x19: ++ AARCH64_VALID_CALL_TARGET ++ mov x19, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x20 ++.private_extern _abi_test_clobber_x20 ++.align 4 ++_abi_test_clobber_x20: ++ AARCH64_VALID_CALL_TARGET ++ mov x20, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x21 ++.private_extern _abi_test_clobber_x21 ++.align 4 ++_abi_test_clobber_x21: ++ AARCH64_VALID_CALL_TARGET ++ mov x21, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x22 ++.private_extern _abi_test_clobber_x22 ++.align 4 ++_abi_test_clobber_x22: ++ AARCH64_VALID_CALL_TARGET ++ mov x22, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x23 ++.private_extern _abi_test_clobber_x23 ++.align 4 ++_abi_test_clobber_x23: ++ AARCH64_VALID_CALL_TARGET ++ mov x23, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x24 ++.private_extern _abi_test_clobber_x24 ++.align 4 ++_abi_test_clobber_x24: ++ AARCH64_VALID_CALL_TARGET ++ mov x24, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x25 ++.private_extern _abi_test_clobber_x25 ++.align 4 ++_abi_test_clobber_x25: ++ AARCH64_VALID_CALL_TARGET ++ mov x25, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x26 ++.private_extern _abi_test_clobber_x26 ++.align 4 ++_abi_test_clobber_x26: ++ AARCH64_VALID_CALL_TARGET ++ mov x26, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x27 ++.private_extern _abi_test_clobber_x27 ++.align 4 ++_abi_test_clobber_x27: ++ AARCH64_VALID_CALL_TARGET ++ mov x27, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x28 ++.private_extern _abi_test_clobber_x28 ++.align 4 ++_abi_test_clobber_x28: ++ AARCH64_VALID_CALL_TARGET ++ mov x28, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_x29 ++.private_extern _abi_test_clobber_x29 ++.align 4 ++_abi_test_clobber_x29: ++ AARCH64_VALID_CALL_TARGET ++ mov x29, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d0 ++.private_extern _abi_test_clobber_d0 ++.align 4 ++_abi_test_clobber_d0: ++ AARCH64_VALID_CALL_TARGET ++ fmov d0, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d1 ++.private_extern _abi_test_clobber_d1 ++.align 4 ++_abi_test_clobber_d1: ++ AARCH64_VALID_CALL_TARGET ++ fmov d1, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d2 ++.private_extern _abi_test_clobber_d2 ++.align 4 ++_abi_test_clobber_d2: ++ AARCH64_VALID_CALL_TARGET ++ fmov d2, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d3 ++.private_extern _abi_test_clobber_d3 ++.align 4 ++_abi_test_clobber_d3: ++ AARCH64_VALID_CALL_TARGET ++ fmov d3, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d4 ++.private_extern _abi_test_clobber_d4 ++.align 4 ++_abi_test_clobber_d4: ++ AARCH64_VALID_CALL_TARGET ++ fmov d4, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d5 ++.private_extern _abi_test_clobber_d5 ++.align 4 ++_abi_test_clobber_d5: ++ AARCH64_VALID_CALL_TARGET ++ fmov d5, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d6 ++.private_extern _abi_test_clobber_d6 ++.align 4 ++_abi_test_clobber_d6: ++ AARCH64_VALID_CALL_TARGET ++ fmov d6, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d7 ++.private_extern _abi_test_clobber_d7 ++.align 4 ++_abi_test_clobber_d7: ++ AARCH64_VALID_CALL_TARGET ++ fmov d7, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d8 ++.private_extern _abi_test_clobber_d8 ++.align 4 ++_abi_test_clobber_d8: ++ AARCH64_VALID_CALL_TARGET ++ fmov d8, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d9 ++.private_extern _abi_test_clobber_d9 ++.align 4 ++_abi_test_clobber_d9: ++ AARCH64_VALID_CALL_TARGET ++ fmov d9, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d10 ++.private_extern _abi_test_clobber_d10 ++.align 4 ++_abi_test_clobber_d10: ++ AARCH64_VALID_CALL_TARGET ++ fmov d10, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d11 ++.private_extern _abi_test_clobber_d11 ++.align 4 ++_abi_test_clobber_d11: ++ AARCH64_VALID_CALL_TARGET ++ fmov d11, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d12 ++.private_extern _abi_test_clobber_d12 ++.align 4 ++_abi_test_clobber_d12: ++ AARCH64_VALID_CALL_TARGET ++ fmov d12, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d13 ++.private_extern _abi_test_clobber_d13 ++.align 4 ++_abi_test_clobber_d13: ++ AARCH64_VALID_CALL_TARGET ++ fmov d13, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d14 ++.private_extern _abi_test_clobber_d14 ++.align 4 ++_abi_test_clobber_d14: ++ AARCH64_VALID_CALL_TARGET ++ fmov d14, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d15 ++.private_extern _abi_test_clobber_d15 ++.align 4 ++_abi_test_clobber_d15: ++ AARCH64_VALID_CALL_TARGET ++ fmov d15, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d16 ++.private_extern _abi_test_clobber_d16 ++.align 4 ++_abi_test_clobber_d16: ++ AARCH64_VALID_CALL_TARGET ++ fmov d16, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d17 ++.private_extern _abi_test_clobber_d17 ++.align 4 ++_abi_test_clobber_d17: ++ AARCH64_VALID_CALL_TARGET ++ fmov d17, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d18 ++.private_extern _abi_test_clobber_d18 ++.align 4 ++_abi_test_clobber_d18: ++ AARCH64_VALID_CALL_TARGET ++ fmov d18, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d19 ++.private_extern _abi_test_clobber_d19 ++.align 4 ++_abi_test_clobber_d19: ++ AARCH64_VALID_CALL_TARGET ++ fmov d19, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d20 ++.private_extern _abi_test_clobber_d20 ++.align 4 ++_abi_test_clobber_d20: ++ AARCH64_VALID_CALL_TARGET ++ fmov d20, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d21 ++.private_extern _abi_test_clobber_d21 ++.align 4 ++_abi_test_clobber_d21: ++ AARCH64_VALID_CALL_TARGET ++ fmov d21, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d22 ++.private_extern _abi_test_clobber_d22 ++.align 4 ++_abi_test_clobber_d22: ++ AARCH64_VALID_CALL_TARGET ++ fmov d22, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d23 ++.private_extern _abi_test_clobber_d23 ++.align 4 ++_abi_test_clobber_d23: ++ AARCH64_VALID_CALL_TARGET ++ fmov d23, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d24 ++.private_extern _abi_test_clobber_d24 ++.align 4 ++_abi_test_clobber_d24: ++ AARCH64_VALID_CALL_TARGET ++ fmov d24, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d25 ++.private_extern _abi_test_clobber_d25 ++.align 4 ++_abi_test_clobber_d25: ++ AARCH64_VALID_CALL_TARGET ++ fmov d25, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d26 ++.private_extern _abi_test_clobber_d26 ++.align 4 ++_abi_test_clobber_d26: ++ AARCH64_VALID_CALL_TARGET ++ fmov d26, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d27 ++.private_extern _abi_test_clobber_d27 ++.align 4 ++_abi_test_clobber_d27: ++ AARCH64_VALID_CALL_TARGET ++ fmov d27, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d28 ++.private_extern _abi_test_clobber_d28 ++.align 4 ++_abi_test_clobber_d28: ++ AARCH64_VALID_CALL_TARGET ++ fmov d28, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d29 ++.private_extern _abi_test_clobber_d29 ++.align 4 ++_abi_test_clobber_d29: ++ AARCH64_VALID_CALL_TARGET ++ fmov d29, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d30 ++.private_extern _abi_test_clobber_d30 ++.align 4 ++_abi_test_clobber_d30: ++ AARCH64_VALID_CALL_TARGET ++ fmov d30, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_d31 ++.private_extern _abi_test_clobber_d31 ++.align 4 ++_abi_test_clobber_d31: ++ AARCH64_VALID_CALL_TARGET ++ fmov d31, xzr ++ ret ++ ++ ++.globl _abi_test_clobber_v8_upper ++.private_extern _abi_test_clobber_v8_upper ++.align 4 ++_abi_test_clobber_v8_upper: ++ AARCH64_VALID_CALL_TARGET ++ fmov v8.d[1], xzr ++ ret ++ ++ ++.globl _abi_test_clobber_v9_upper ++.private_extern _abi_test_clobber_v9_upper ++.align 4 ++_abi_test_clobber_v9_upper: ++ AARCH64_VALID_CALL_TARGET ++ fmov v9.d[1], xzr ++ ret ++ ++ ++.globl _abi_test_clobber_v10_upper ++.private_extern _abi_test_clobber_v10_upper ++.align 4 ++_abi_test_clobber_v10_upper: ++ AARCH64_VALID_CALL_TARGET ++ fmov v10.d[1], xzr ++ ret ++ ++ ++.globl _abi_test_clobber_v11_upper ++.private_extern _abi_test_clobber_v11_upper ++.align 4 ++_abi_test_clobber_v11_upper: ++ AARCH64_VALID_CALL_TARGET ++ fmov v11.d[1], xzr ++ ret ++ ++ ++.globl _abi_test_clobber_v12_upper ++.private_extern _abi_test_clobber_v12_upper ++.align 4 ++_abi_test_clobber_v12_upper: ++ AARCH64_VALID_CALL_TARGET ++ fmov v12.d[1], xzr ++ ret ++ ++ ++.globl _abi_test_clobber_v13_upper ++.private_extern _abi_test_clobber_v13_upper ++.align 4 ++_abi_test_clobber_v13_upper: ++ AARCH64_VALID_CALL_TARGET ++ fmov v13.d[1], xzr ++ ret ++ ++ ++.globl _abi_test_clobber_v14_upper ++.private_extern _abi_test_clobber_v14_upper ++.align 4 ++_abi_test_clobber_v14_upper: ++ AARCH64_VALID_CALL_TARGET ++ fmov v14.d[1], xzr ++ ret ++ ++ ++.globl _abi_test_clobber_v15_upper ++.private_extern _abi_test_clobber_v15_upper ++.align 4 ++_abi_test_clobber_v15_upper: ++ AARCH64_VALID_CALL_TARGET ++ fmov v15.d[1], xzr ++ ret ++ ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-arm/crypto/chacha/chacha-armv4.S b/apple-arm/crypto/chacha/chacha-armv4.S +new file mode 100644 +index 0000000..cadf2b6 +--- /dev/null ++++ b/apple-arm/crypto/chacha/chacha-armv4.S +@@ -0,0 +1,1498 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both ++@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. ++ ++ ++.text ++#if defined(__thumb2__) || defined(__clang__) ++.syntax unified ++#endif ++#if defined(__thumb2__) ++.thumb ++#else ++.code 32 ++#endif ++ ++#if defined(__thumb2__) || defined(__clang__) ++#define ldrhsb ldrbhs ++#endif ++ ++.align 5 ++Lsigma: ++.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 @ endian-neutral ++Lone: ++.long 1,0,0,0 ++#if __ARM_MAX_ARCH__>=7 ++LOPENSSL_armcap: ++.word OPENSSL_armcap_P-LChaCha20_ctr32 ++#else ++.word -1 ++#endif ++ ++.globl _ChaCha20_ctr32 ++.private_extern _ChaCha20_ctr32 ++#ifdef __thumb2__ ++.thumb_func _ChaCha20_ctr32 ++#endif ++.align 5 ++_ChaCha20_ctr32: ++LChaCha20_ctr32: ++ ldr r12,[sp,#0] @ pull pointer to counter and nonce ++ stmdb sp!,{r0,r1,r2,r4-r11,lr} ++#if __ARM_ARCH__<7 && !defined(__thumb2__) ++ sub r14,pc,#16 @ _ChaCha20_ctr32 ++#else ++ adr r14,LChaCha20_ctr32 ++#endif ++ cmp r2,#0 @ len==0? ++#ifdef __thumb2__ ++ itt eq ++#endif ++ addeq sp,sp,#4*3 ++ beq Lno_data ++#if __ARM_MAX_ARCH__>=7 ++ cmp r2,#192 @ test len ++ bls Lshort ++ ldr r4,[r14,#-32] ++ ldr r4,[r14,r4] ++# ifdef __APPLE__ ++ ldr r4,[r4] ++# endif ++ tst r4,#ARMV7_NEON ++ bne LChaCha20_neon ++Lshort: ++#endif ++ ldmia r12,{r4,r5,r6,r7} @ load counter and nonce ++ sub sp,sp,#4*(16) @ off-load area ++ sub r14,r14,#64 @ Lsigma ++ stmdb sp!,{r4,r5,r6,r7} @ copy counter and nonce ++ ldmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} @ load key ++ ldmia r14,{r0,r1,r2,r3} @ load sigma ++ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} @ copy key ++ stmdb sp!,{r0,r1,r2,r3} @ copy sigma ++ str r10,[sp,#4*(16+10)] @ off-load "rx" ++ str r11,[sp,#4*(16+11)] @ off-load "rx" ++ b Loop_outer_enter ++ ++.align 4 ++Loop_outer: ++ ldmia sp,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ load key material ++ str r11,[sp,#4*(32+2)] @ save len ++ str r12, [sp,#4*(32+1)] @ save inp ++ str r14, [sp,#4*(32+0)] @ save out ++Loop_outer_enter: ++ ldr r11, [sp,#4*(15)] ++ ldr r12,[sp,#4*(12)] @ modulo-scheduled load ++ ldr r10, [sp,#4*(13)] ++ ldr r14,[sp,#4*(14)] ++ str r11, [sp,#4*(16+15)] ++ mov r11,#10 ++ b Loop ++ ++.align 4 ++Loop: ++ subs r11,r11,#1 ++ add r0,r0,r4 ++ mov r12,r12,ror#16 ++ add r1,r1,r5 ++ mov r10,r10,ror#16 ++ eor r12,r12,r0,ror#16 ++ eor r10,r10,r1,ror#16 ++ add r8,r8,r12 ++ mov r4,r4,ror#20 ++ add r9,r9,r10 ++ mov r5,r5,ror#20 ++ eor r4,r4,r8,ror#20 ++ eor r5,r5,r9,ror#20 ++ add r0,r0,r4 ++ mov r12,r12,ror#24 ++ add r1,r1,r5 ++ mov r10,r10,ror#24 ++ eor r12,r12,r0,ror#24 ++ eor r10,r10,r1,ror#24 ++ add r8,r8,r12 ++ mov r4,r4,ror#25 ++ add r9,r9,r10 ++ mov r5,r5,ror#25 ++ str r10,[sp,#4*(16+13)] ++ ldr r10,[sp,#4*(16+15)] ++ eor r4,r4,r8,ror#25 ++ eor r5,r5,r9,ror#25 ++ str r8,[sp,#4*(16+8)] ++ ldr r8,[sp,#4*(16+10)] ++ add r2,r2,r6 ++ mov r14,r14,ror#16 ++ str r9,[sp,#4*(16+9)] ++ ldr r9,[sp,#4*(16+11)] ++ add r3,r3,r7 ++ mov r10,r10,ror#16 ++ eor r14,r14,r2,ror#16 ++ eor r10,r10,r3,ror#16 ++ add r8,r8,r14 ++ mov r6,r6,ror#20 ++ add r9,r9,r10 ++ mov r7,r7,ror#20 ++ eor r6,r6,r8,ror#20 ++ eor r7,r7,r9,ror#20 ++ add r2,r2,r6 ++ mov r14,r14,ror#24 ++ add r3,r3,r7 ++ mov r10,r10,ror#24 ++ eor r14,r14,r2,ror#24 ++ eor r10,r10,r3,ror#24 ++ add r8,r8,r14 ++ mov r6,r6,ror#25 ++ add r9,r9,r10 ++ mov r7,r7,ror#25 ++ eor r6,r6,r8,ror#25 ++ eor r7,r7,r9,ror#25 ++ add r0,r0,r5 ++ mov r10,r10,ror#16 ++ add r1,r1,r6 ++ mov r12,r12,ror#16 ++ eor r10,r10,r0,ror#16 ++ eor r12,r12,r1,ror#16 ++ add r8,r8,r10 ++ mov r5,r5,ror#20 ++ add r9,r9,r12 ++ mov r6,r6,ror#20 ++ eor r5,r5,r8,ror#20 ++ eor r6,r6,r9,ror#20 ++ add r0,r0,r5 ++ mov r10,r10,ror#24 ++ add r1,r1,r6 ++ mov r12,r12,ror#24 ++ eor r10,r10,r0,ror#24 ++ eor r12,r12,r1,ror#24 ++ add r8,r8,r10 ++ mov r5,r5,ror#25 ++ str r10,[sp,#4*(16+15)] ++ ldr r10,[sp,#4*(16+13)] ++ add r9,r9,r12 ++ mov r6,r6,ror#25 ++ eor r5,r5,r8,ror#25 ++ eor r6,r6,r9,ror#25 ++ str r8,[sp,#4*(16+10)] ++ ldr r8,[sp,#4*(16+8)] ++ add r2,r2,r7 ++ mov r10,r10,ror#16 ++ str r9,[sp,#4*(16+11)] ++ ldr r9,[sp,#4*(16+9)] ++ add r3,r3,r4 ++ mov r14,r14,ror#16 ++ eor r10,r10,r2,ror#16 ++ eor r14,r14,r3,ror#16 ++ add r8,r8,r10 ++ mov r7,r7,ror#20 ++ add r9,r9,r14 ++ mov r4,r4,ror#20 ++ eor r7,r7,r8,ror#20 ++ eor r4,r4,r9,ror#20 ++ add r2,r2,r7 ++ mov r10,r10,ror#24 ++ add r3,r3,r4 ++ mov r14,r14,ror#24 ++ eor r10,r10,r2,ror#24 ++ eor r14,r14,r3,ror#24 ++ add r8,r8,r10 ++ mov r7,r7,ror#25 ++ add r9,r9,r14 ++ mov r4,r4,ror#25 ++ eor r7,r7,r8,ror#25 ++ eor r4,r4,r9,ror#25 ++ bne Loop ++ ++ ldr r11,[sp,#4*(32+2)] @ load len ++ ++ str r8, [sp,#4*(16+8)] @ modulo-scheduled store ++ str r9, [sp,#4*(16+9)] ++ str r12,[sp,#4*(16+12)] ++ str r10, [sp,#4*(16+13)] ++ str r14,[sp,#4*(16+14)] ++ ++ @ at this point we have first half of 512-bit result in ++ @ rx and second half at sp+4*(16+8) ++ ++ cmp r11,#64 @ done yet? ++#ifdef __thumb2__ ++ itete lo ++#endif ++ addlo r12,sp,#4*(0) @ shortcut or ... ++ ldrhs r12,[sp,#4*(32+1)] @ ... load inp ++ addlo r14,sp,#4*(0) @ shortcut or ... ++ ldrhs r14,[sp,#4*(32+0)] @ ... load out ++ ++ ldr r8,[sp,#4*(0)] @ load key material ++ ldr r9,[sp,#4*(1)] ++ ++#if __ARM_ARCH__>=6 || !defined(__ARMEB__) ++# if __ARM_ARCH__<7 ++ orr r10,r12,r14 ++ tst r10,#3 @ are input and output aligned? ++ ldr r10,[sp,#4*(2)] ++ bne Lunaligned ++ cmp r11,#64 @ restore flags ++# else ++ ldr r10,[sp,#4*(2)] ++# endif ++ ldr r11,[sp,#4*(3)] ++ ++ add r0,r0,r8 @ accumulate key material ++ add r1,r1,r9 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhs r8,[r12],#16 @ load input ++ ldrhs r9,[r12,#-12] ++ ++ add r2,r2,r10 ++ add r3,r3,r11 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhs r10,[r12,#-8] ++ ldrhs r11,[r12,#-4] ++# if __ARM_ARCH__>=6 && defined(__ARMEB__) ++ rev r0,r0 ++ rev r1,r1 ++ rev r2,r2 ++ rev r3,r3 ++# endif ++# ifdef __thumb2__ ++ itt hs ++# endif ++ eorhs r0,r0,r8 @ xor with input ++ eorhs r1,r1,r9 ++ add r8,sp,#4*(4) ++ str r0,[r14],#16 @ store output ++# ifdef __thumb2__ ++ itt hs ++# endif ++ eorhs r2,r2,r10 ++ eorhs r3,r3,r11 ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++ str r1,[r14,#-12] ++ str r2,[r14,#-8] ++ str r3,[r14,#-4] ++ ++ add r4,r4,r8 @ accumulate key material ++ add r5,r5,r9 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhs r8,[r12],#16 @ load input ++ ldrhs r9,[r12,#-12] ++ add r6,r6,r10 ++ add r7,r7,r11 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhs r10,[r12,#-8] ++ ldrhs r11,[r12,#-4] ++# if __ARM_ARCH__>=6 && defined(__ARMEB__) ++ rev r4,r4 ++ rev r5,r5 ++ rev r6,r6 ++ rev r7,r7 ++# endif ++# ifdef __thumb2__ ++ itt hs ++# endif ++ eorhs r4,r4,r8 ++ eorhs r5,r5,r9 ++ add r8,sp,#4*(8) ++ str r4,[r14],#16 @ store output ++# ifdef __thumb2__ ++ itt hs ++# endif ++ eorhs r6,r6,r10 ++ eorhs r7,r7,r11 ++ str r5,[r14,#-12] ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++ str r6,[r14,#-8] ++ add r0,sp,#4*(16+8) ++ str r7,[r14,#-4] ++ ++ ldmia r0,{r0,r1,r2,r3,r4,r5,r6,r7} @ load second half ++ ++ add r0,r0,r8 @ accumulate key material ++ add r1,r1,r9 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhs r8,[r12],#16 @ load input ++ ldrhs r9,[r12,#-12] ++# ifdef __thumb2__ ++ itt hi ++# endif ++ strhi r10,[sp,#4*(16+10)] @ copy "rx" while at it ++ strhi r11,[sp,#4*(16+11)] @ copy "rx" while at it ++ add r2,r2,r10 ++ add r3,r3,r11 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhs r10,[r12,#-8] ++ ldrhs r11,[r12,#-4] ++# if __ARM_ARCH__>=6 && defined(__ARMEB__) ++ rev r0,r0 ++ rev r1,r1 ++ rev r2,r2 ++ rev r3,r3 ++# endif ++# ifdef __thumb2__ ++ itt hs ++# endif ++ eorhs r0,r0,r8 ++ eorhs r1,r1,r9 ++ add r8,sp,#4*(12) ++ str r0,[r14],#16 @ store output ++# ifdef __thumb2__ ++ itt hs ++# endif ++ eorhs r2,r2,r10 ++ eorhs r3,r3,r11 ++ str r1,[r14,#-12] ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++ str r2,[r14,#-8] ++ str r3,[r14,#-4] ++ ++ add r4,r4,r8 @ accumulate key material ++ add r5,r5,r9 ++# ifdef __thumb2__ ++ itt hi ++# endif ++ addhi r8,r8,#1 @ next counter value ++ strhi r8,[sp,#4*(12)] @ save next counter value ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhs r8,[r12],#16 @ load input ++ ldrhs r9,[r12,#-12] ++ add r6,r6,r10 ++ add r7,r7,r11 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhs r10,[r12,#-8] ++ ldrhs r11,[r12,#-4] ++# if __ARM_ARCH__>=6 && defined(__ARMEB__) ++ rev r4,r4 ++ rev r5,r5 ++ rev r6,r6 ++ rev r7,r7 ++# endif ++# ifdef __thumb2__ ++ itt hs ++# endif ++ eorhs r4,r4,r8 ++ eorhs r5,r5,r9 ++# ifdef __thumb2__ ++ it ne ++# endif ++ ldrne r8,[sp,#4*(32+2)] @ re-load len ++# ifdef __thumb2__ ++ itt hs ++# endif ++ eorhs r6,r6,r10 ++ eorhs r7,r7,r11 ++ str r4,[r14],#16 @ store output ++ str r5,[r14,#-12] ++# ifdef __thumb2__ ++ it hs ++# endif ++ subhs r11,r8,#64 @ len-=64 ++ str r6,[r14,#-8] ++ str r7,[r14,#-4] ++ bhi Loop_outer ++ ++ beq Ldone ++# if __ARM_ARCH__<7 ++ b Ltail ++ ++.align 4 ++Lunaligned:@ unaligned endian-neutral path ++ cmp r11,#64 @ restore flags ++# endif ++#endif ++#if __ARM_ARCH__<7 ++ ldr r11,[sp,#4*(3)] ++ add r0,r0,r8 @ accumulate key material ++ add r1,r1,r9 ++ add r2,r2,r10 ++# ifdef __thumb2__ ++ itete lo ++# endif ++ eorlo r8,r8,r8 @ zero or ... ++ ldrhsb r8,[r12],#16 @ ... load input ++ eorlo r9,r9,r9 ++ ldrhsb r9,[r12,#-12] ++ ++ add r3,r3,r11 ++# ifdef __thumb2__ ++ itete lo ++# endif ++ eorlo r10,r10,r10 ++ ldrhsb r10,[r12,#-8] ++ eorlo r11,r11,r11 ++ ldrhsb r11,[r12,#-4] ++ ++ eor r0,r8,r0 @ xor with input (or zero) ++ eor r1,r9,r1 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-15] @ load more input ++ ldrhsb r9,[r12,#-11] ++ eor r2,r10,r2 ++ strb r0,[r14],#16 @ store output ++ eor r3,r11,r3 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-7] ++ ldrhsb r11,[r12,#-3] ++ strb r1,[r14,#-12] ++ eor r0,r8,r0,lsr#8 ++ strb r2,[r14,#-8] ++ eor r1,r9,r1,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-14] @ load more input ++ ldrhsb r9,[r12,#-10] ++ strb r3,[r14,#-4] ++ eor r2,r10,r2,lsr#8 ++ strb r0,[r14,#-15] ++ eor r3,r11,r3,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-6] ++ ldrhsb r11,[r12,#-2] ++ strb r1,[r14,#-11] ++ eor r0,r8,r0,lsr#8 ++ strb r2,[r14,#-7] ++ eor r1,r9,r1,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-13] @ load more input ++ ldrhsb r9,[r12,#-9] ++ strb r3,[r14,#-3] ++ eor r2,r10,r2,lsr#8 ++ strb r0,[r14,#-14] ++ eor r3,r11,r3,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-5] ++ ldrhsb r11,[r12,#-1] ++ strb r1,[r14,#-10] ++ strb r2,[r14,#-6] ++ eor r0,r8,r0,lsr#8 ++ strb r3,[r14,#-2] ++ eor r1,r9,r1,lsr#8 ++ strb r0,[r14,#-13] ++ eor r2,r10,r2,lsr#8 ++ strb r1,[r14,#-9] ++ eor r3,r11,r3,lsr#8 ++ strb r2,[r14,#-5] ++ strb r3,[r14,#-1] ++ add r8,sp,#4*(4+0) ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++ add r0,sp,#4*(16+8) ++ add r4,r4,r8 @ accumulate key material ++ add r5,r5,r9 ++ add r6,r6,r10 ++# ifdef __thumb2__ ++ itete lo ++# endif ++ eorlo r8,r8,r8 @ zero or ... ++ ldrhsb r8,[r12],#16 @ ... load input ++ eorlo r9,r9,r9 ++ ldrhsb r9,[r12,#-12] ++ ++ add r7,r7,r11 ++# ifdef __thumb2__ ++ itete lo ++# endif ++ eorlo r10,r10,r10 ++ ldrhsb r10,[r12,#-8] ++ eorlo r11,r11,r11 ++ ldrhsb r11,[r12,#-4] ++ ++ eor r4,r8,r4 @ xor with input (or zero) ++ eor r5,r9,r5 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-15] @ load more input ++ ldrhsb r9,[r12,#-11] ++ eor r6,r10,r6 ++ strb r4,[r14],#16 @ store output ++ eor r7,r11,r7 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-7] ++ ldrhsb r11,[r12,#-3] ++ strb r5,[r14,#-12] ++ eor r4,r8,r4,lsr#8 ++ strb r6,[r14,#-8] ++ eor r5,r9,r5,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-14] @ load more input ++ ldrhsb r9,[r12,#-10] ++ strb r7,[r14,#-4] ++ eor r6,r10,r6,lsr#8 ++ strb r4,[r14,#-15] ++ eor r7,r11,r7,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-6] ++ ldrhsb r11,[r12,#-2] ++ strb r5,[r14,#-11] ++ eor r4,r8,r4,lsr#8 ++ strb r6,[r14,#-7] ++ eor r5,r9,r5,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-13] @ load more input ++ ldrhsb r9,[r12,#-9] ++ strb r7,[r14,#-3] ++ eor r6,r10,r6,lsr#8 ++ strb r4,[r14,#-14] ++ eor r7,r11,r7,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-5] ++ ldrhsb r11,[r12,#-1] ++ strb r5,[r14,#-10] ++ strb r6,[r14,#-6] ++ eor r4,r8,r4,lsr#8 ++ strb r7,[r14,#-2] ++ eor r5,r9,r5,lsr#8 ++ strb r4,[r14,#-13] ++ eor r6,r10,r6,lsr#8 ++ strb r5,[r14,#-9] ++ eor r7,r11,r7,lsr#8 ++ strb r6,[r14,#-5] ++ strb r7,[r14,#-1] ++ add r8,sp,#4*(4+4) ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++ ldmia r0,{r0,r1,r2,r3,r4,r5,r6,r7} @ load second half ++# ifdef __thumb2__ ++ itt hi ++# endif ++ strhi r10,[sp,#4*(16+10)] @ copy "rx" ++ strhi r11,[sp,#4*(16+11)] @ copy "rx" ++ add r0,r0,r8 @ accumulate key material ++ add r1,r1,r9 ++ add r2,r2,r10 ++# ifdef __thumb2__ ++ itete lo ++# endif ++ eorlo r8,r8,r8 @ zero or ... ++ ldrhsb r8,[r12],#16 @ ... load input ++ eorlo r9,r9,r9 ++ ldrhsb r9,[r12,#-12] ++ ++ add r3,r3,r11 ++# ifdef __thumb2__ ++ itete lo ++# endif ++ eorlo r10,r10,r10 ++ ldrhsb r10,[r12,#-8] ++ eorlo r11,r11,r11 ++ ldrhsb r11,[r12,#-4] ++ ++ eor r0,r8,r0 @ xor with input (or zero) ++ eor r1,r9,r1 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-15] @ load more input ++ ldrhsb r9,[r12,#-11] ++ eor r2,r10,r2 ++ strb r0,[r14],#16 @ store output ++ eor r3,r11,r3 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-7] ++ ldrhsb r11,[r12,#-3] ++ strb r1,[r14,#-12] ++ eor r0,r8,r0,lsr#8 ++ strb r2,[r14,#-8] ++ eor r1,r9,r1,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-14] @ load more input ++ ldrhsb r9,[r12,#-10] ++ strb r3,[r14,#-4] ++ eor r2,r10,r2,lsr#8 ++ strb r0,[r14,#-15] ++ eor r3,r11,r3,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-6] ++ ldrhsb r11,[r12,#-2] ++ strb r1,[r14,#-11] ++ eor r0,r8,r0,lsr#8 ++ strb r2,[r14,#-7] ++ eor r1,r9,r1,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-13] @ load more input ++ ldrhsb r9,[r12,#-9] ++ strb r3,[r14,#-3] ++ eor r2,r10,r2,lsr#8 ++ strb r0,[r14,#-14] ++ eor r3,r11,r3,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-5] ++ ldrhsb r11,[r12,#-1] ++ strb r1,[r14,#-10] ++ strb r2,[r14,#-6] ++ eor r0,r8,r0,lsr#8 ++ strb r3,[r14,#-2] ++ eor r1,r9,r1,lsr#8 ++ strb r0,[r14,#-13] ++ eor r2,r10,r2,lsr#8 ++ strb r1,[r14,#-9] ++ eor r3,r11,r3,lsr#8 ++ strb r2,[r14,#-5] ++ strb r3,[r14,#-1] ++ add r8,sp,#4*(4+8) ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++ add r4,r4,r8 @ accumulate key material ++# ifdef __thumb2__ ++ itt hi ++# endif ++ addhi r8,r8,#1 @ next counter value ++ strhi r8,[sp,#4*(12)] @ save next counter value ++ add r5,r5,r9 ++ add r6,r6,r10 ++# ifdef __thumb2__ ++ itete lo ++# endif ++ eorlo r8,r8,r8 @ zero or ... ++ ldrhsb r8,[r12],#16 @ ... load input ++ eorlo r9,r9,r9 ++ ldrhsb r9,[r12,#-12] ++ ++ add r7,r7,r11 ++# ifdef __thumb2__ ++ itete lo ++# endif ++ eorlo r10,r10,r10 ++ ldrhsb r10,[r12,#-8] ++ eorlo r11,r11,r11 ++ ldrhsb r11,[r12,#-4] ++ ++ eor r4,r8,r4 @ xor with input (or zero) ++ eor r5,r9,r5 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-15] @ load more input ++ ldrhsb r9,[r12,#-11] ++ eor r6,r10,r6 ++ strb r4,[r14],#16 @ store output ++ eor r7,r11,r7 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-7] ++ ldrhsb r11,[r12,#-3] ++ strb r5,[r14,#-12] ++ eor r4,r8,r4,lsr#8 ++ strb r6,[r14,#-8] ++ eor r5,r9,r5,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-14] @ load more input ++ ldrhsb r9,[r12,#-10] ++ strb r7,[r14,#-4] ++ eor r6,r10,r6,lsr#8 ++ strb r4,[r14,#-15] ++ eor r7,r11,r7,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-6] ++ ldrhsb r11,[r12,#-2] ++ strb r5,[r14,#-11] ++ eor r4,r8,r4,lsr#8 ++ strb r6,[r14,#-7] ++ eor r5,r9,r5,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r8,[r12,#-13] @ load more input ++ ldrhsb r9,[r12,#-9] ++ strb r7,[r14,#-3] ++ eor r6,r10,r6,lsr#8 ++ strb r4,[r14,#-14] ++ eor r7,r11,r7,lsr#8 ++# ifdef __thumb2__ ++ itt hs ++# endif ++ ldrhsb r10,[r12,#-5] ++ ldrhsb r11,[r12,#-1] ++ strb r5,[r14,#-10] ++ strb r6,[r14,#-6] ++ eor r4,r8,r4,lsr#8 ++ strb r7,[r14,#-2] ++ eor r5,r9,r5,lsr#8 ++ strb r4,[r14,#-13] ++ eor r6,r10,r6,lsr#8 ++ strb r5,[r14,#-9] ++ eor r7,r11,r7,lsr#8 ++ strb r6,[r14,#-5] ++ strb r7,[r14,#-1] ++# ifdef __thumb2__ ++ it ne ++# endif ++ ldrne r8,[sp,#4*(32+2)] @ re-load len ++# ifdef __thumb2__ ++ it hs ++# endif ++ subhs r11,r8,#64 @ len-=64 ++ bhi Loop_outer ++ ++ beq Ldone ++#endif ++ ++Ltail: ++ ldr r12,[sp,#4*(32+1)] @ load inp ++ add r9,sp,#4*(0) ++ ldr r14,[sp,#4*(32+0)] @ load out ++ ++Loop_tail: ++ ldrb r10,[r9],#1 @ read buffer on stack ++ ldrb r11,[r12],#1 @ read input ++ subs r8,r8,#1 ++ eor r11,r11,r10 ++ strb r11,[r14],#1 @ store output ++ bne Loop_tail ++ ++Ldone: ++ add sp,sp,#4*(32+3) ++Lno_data: ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} ++ ++#if __ARM_MAX_ARCH__>=7 ++ ++ ++ ++#ifdef __thumb2__ ++.thumb_func ChaCha20_neon ++#endif ++.align 5 ++ChaCha20_neon: ++ ldr r12,[sp,#0] @ pull pointer to counter and nonce ++ stmdb sp!,{r0,r1,r2,r4-r11,lr} ++LChaCha20_neon: ++ adr r14,Lsigma ++ vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI spec says so ++ stmdb sp!,{r0,r1,r2,r3} ++ ++ vld1.32 {q1,q2},[r3] @ load key ++ ldmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} @ load key ++ ++ sub sp,sp,#4*(16+16) ++ vld1.32 {q3},[r12] @ load counter and nonce ++ add r12,sp,#4*8 ++ ldmia r14,{r0,r1,r2,r3} @ load sigma ++ vld1.32 {q0},[r14]! @ load sigma ++ vld1.32 {q12},[r14] @ one ++ vst1.32 {q2,q3},[r12] @ copy 1/2key|counter|nonce ++ vst1.32 {q0,q1},[sp] @ copy sigma|1/2key ++ ++ str r10,[sp,#4*(16+10)] @ off-load "rx" ++ str r11,[sp,#4*(16+11)] @ off-load "rx" ++ vshl.i32 d26,d24,#1 @ two ++ vstr d24,[sp,#4*(16+0)] ++ vshl.i32 d28,d24,#2 @ four ++ vstr d26,[sp,#4*(16+2)] ++ vmov q4,q0 ++ vstr d28,[sp,#4*(16+4)] ++ vmov q8,q0 ++ vmov q5,q1 ++ vmov q9,q1 ++ b Loop_neon_enter ++ ++.align 4 ++Loop_neon_outer: ++ ldmia sp,{r0,r1,r2,r3,r4,r5,r6,r7,r8,r9} @ load key material ++ cmp r11,#64*2 @ if len<=64*2 ++ bls Lbreak_neon @ switch to integer-only ++ vmov q4,q0 ++ str r11,[sp,#4*(32+2)] @ save len ++ vmov q8,q0 ++ str r12, [sp,#4*(32+1)] @ save inp ++ vmov q5,q1 ++ str r14, [sp,#4*(32+0)] @ save out ++ vmov q9,q1 ++Loop_neon_enter: ++ ldr r11, [sp,#4*(15)] ++ vadd.i32 q7,q3,q12 @ counter+1 ++ ldr r12,[sp,#4*(12)] @ modulo-scheduled load ++ vmov q6,q2 ++ ldr r10, [sp,#4*(13)] ++ vmov q10,q2 ++ ldr r14,[sp,#4*(14)] ++ vadd.i32 q11,q7,q12 @ counter+2 ++ str r11, [sp,#4*(16+15)] ++ mov r11,#10 ++ add r12,r12,#3 @ counter+3 ++ b Loop_neon ++ ++.align 4 ++Loop_neon: ++ subs r11,r11,#1 ++ vadd.i32 q0,q0,q1 ++ add r0,r0,r4 ++ vadd.i32 q4,q4,q5 ++ mov r12,r12,ror#16 ++ vadd.i32 q8,q8,q9 ++ add r1,r1,r5 ++ veor q3,q3,q0 ++ mov r10,r10,ror#16 ++ veor q7,q7,q4 ++ eor r12,r12,r0,ror#16 ++ veor q11,q11,q8 ++ eor r10,r10,r1,ror#16 ++ vrev32.16 q3,q3 ++ add r8,r8,r12 ++ vrev32.16 q7,q7 ++ mov r4,r4,ror#20 ++ vrev32.16 q11,q11 ++ add r9,r9,r10 ++ vadd.i32 q2,q2,q3 ++ mov r5,r5,ror#20 ++ vadd.i32 q6,q6,q7 ++ eor r4,r4,r8,ror#20 ++ vadd.i32 q10,q10,q11 ++ eor r5,r5,r9,ror#20 ++ veor q12,q1,q2 ++ add r0,r0,r4 ++ veor q13,q5,q6 ++ mov r12,r12,ror#24 ++ veor q14,q9,q10 ++ add r1,r1,r5 ++ vshr.u32 q1,q12,#20 ++ mov r10,r10,ror#24 ++ vshr.u32 q5,q13,#20 ++ eor r12,r12,r0,ror#24 ++ vshr.u32 q9,q14,#20 ++ eor r10,r10,r1,ror#24 ++ vsli.32 q1,q12,#12 ++ add r8,r8,r12 ++ vsli.32 q5,q13,#12 ++ mov r4,r4,ror#25 ++ vsli.32 q9,q14,#12 ++ add r9,r9,r10 ++ vadd.i32 q0,q0,q1 ++ mov r5,r5,ror#25 ++ vadd.i32 q4,q4,q5 ++ str r10,[sp,#4*(16+13)] ++ vadd.i32 q8,q8,q9 ++ ldr r10,[sp,#4*(16+15)] ++ veor q12,q3,q0 ++ eor r4,r4,r8,ror#25 ++ veor q13,q7,q4 ++ eor r5,r5,r9,ror#25 ++ veor q14,q11,q8 ++ str r8,[sp,#4*(16+8)] ++ vshr.u32 q3,q12,#24 ++ ldr r8,[sp,#4*(16+10)] ++ vshr.u32 q7,q13,#24 ++ add r2,r2,r6 ++ vshr.u32 q11,q14,#24 ++ mov r14,r14,ror#16 ++ vsli.32 q3,q12,#8 ++ str r9,[sp,#4*(16+9)] ++ vsli.32 q7,q13,#8 ++ ldr r9,[sp,#4*(16+11)] ++ vsli.32 q11,q14,#8 ++ add r3,r3,r7 ++ vadd.i32 q2,q2,q3 ++ mov r10,r10,ror#16 ++ vadd.i32 q6,q6,q7 ++ eor r14,r14,r2,ror#16 ++ vadd.i32 q10,q10,q11 ++ eor r10,r10,r3,ror#16 ++ veor q12,q1,q2 ++ add r8,r8,r14 ++ veor q13,q5,q6 ++ mov r6,r6,ror#20 ++ veor q14,q9,q10 ++ add r9,r9,r10 ++ vshr.u32 q1,q12,#25 ++ mov r7,r7,ror#20 ++ vshr.u32 q5,q13,#25 ++ eor r6,r6,r8,ror#20 ++ vshr.u32 q9,q14,#25 ++ eor r7,r7,r9,ror#20 ++ vsli.32 q1,q12,#7 ++ add r2,r2,r6 ++ vsli.32 q5,q13,#7 ++ mov r14,r14,ror#24 ++ vsli.32 q9,q14,#7 ++ add r3,r3,r7 ++ vext.8 q2,q2,q2,#8 ++ mov r10,r10,ror#24 ++ vext.8 q6,q6,q6,#8 ++ eor r14,r14,r2,ror#24 ++ vext.8 q10,q10,q10,#8 ++ eor r10,r10,r3,ror#24 ++ vext.8 q1,q1,q1,#4 ++ add r8,r8,r14 ++ vext.8 q5,q5,q5,#4 ++ mov r6,r6,ror#25 ++ vext.8 q9,q9,q9,#4 ++ add r9,r9,r10 ++ vext.8 q3,q3,q3,#12 ++ mov r7,r7,ror#25 ++ vext.8 q7,q7,q7,#12 ++ eor r6,r6,r8,ror#25 ++ vext.8 q11,q11,q11,#12 ++ eor r7,r7,r9,ror#25 ++ vadd.i32 q0,q0,q1 ++ add r0,r0,r5 ++ vadd.i32 q4,q4,q5 ++ mov r10,r10,ror#16 ++ vadd.i32 q8,q8,q9 ++ add r1,r1,r6 ++ veor q3,q3,q0 ++ mov r12,r12,ror#16 ++ veor q7,q7,q4 ++ eor r10,r10,r0,ror#16 ++ veor q11,q11,q8 ++ eor r12,r12,r1,ror#16 ++ vrev32.16 q3,q3 ++ add r8,r8,r10 ++ vrev32.16 q7,q7 ++ mov r5,r5,ror#20 ++ vrev32.16 q11,q11 ++ add r9,r9,r12 ++ vadd.i32 q2,q2,q3 ++ mov r6,r6,ror#20 ++ vadd.i32 q6,q6,q7 ++ eor r5,r5,r8,ror#20 ++ vadd.i32 q10,q10,q11 ++ eor r6,r6,r9,ror#20 ++ veor q12,q1,q2 ++ add r0,r0,r5 ++ veor q13,q5,q6 ++ mov r10,r10,ror#24 ++ veor q14,q9,q10 ++ add r1,r1,r6 ++ vshr.u32 q1,q12,#20 ++ mov r12,r12,ror#24 ++ vshr.u32 q5,q13,#20 ++ eor r10,r10,r0,ror#24 ++ vshr.u32 q9,q14,#20 ++ eor r12,r12,r1,ror#24 ++ vsli.32 q1,q12,#12 ++ add r8,r8,r10 ++ vsli.32 q5,q13,#12 ++ mov r5,r5,ror#25 ++ vsli.32 q9,q14,#12 ++ str r10,[sp,#4*(16+15)] ++ vadd.i32 q0,q0,q1 ++ ldr r10,[sp,#4*(16+13)] ++ vadd.i32 q4,q4,q5 ++ add r9,r9,r12 ++ vadd.i32 q8,q8,q9 ++ mov r6,r6,ror#25 ++ veor q12,q3,q0 ++ eor r5,r5,r8,ror#25 ++ veor q13,q7,q4 ++ eor r6,r6,r9,ror#25 ++ veor q14,q11,q8 ++ str r8,[sp,#4*(16+10)] ++ vshr.u32 q3,q12,#24 ++ ldr r8,[sp,#4*(16+8)] ++ vshr.u32 q7,q13,#24 ++ add r2,r2,r7 ++ vshr.u32 q11,q14,#24 ++ mov r10,r10,ror#16 ++ vsli.32 q3,q12,#8 ++ str r9,[sp,#4*(16+11)] ++ vsli.32 q7,q13,#8 ++ ldr r9,[sp,#4*(16+9)] ++ vsli.32 q11,q14,#8 ++ add r3,r3,r4 ++ vadd.i32 q2,q2,q3 ++ mov r14,r14,ror#16 ++ vadd.i32 q6,q6,q7 ++ eor r10,r10,r2,ror#16 ++ vadd.i32 q10,q10,q11 ++ eor r14,r14,r3,ror#16 ++ veor q12,q1,q2 ++ add r8,r8,r10 ++ veor q13,q5,q6 ++ mov r7,r7,ror#20 ++ veor q14,q9,q10 ++ add r9,r9,r14 ++ vshr.u32 q1,q12,#25 ++ mov r4,r4,ror#20 ++ vshr.u32 q5,q13,#25 ++ eor r7,r7,r8,ror#20 ++ vshr.u32 q9,q14,#25 ++ eor r4,r4,r9,ror#20 ++ vsli.32 q1,q12,#7 ++ add r2,r2,r7 ++ vsli.32 q5,q13,#7 ++ mov r10,r10,ror#24 ++ vsli.32 q9,q14,#7 ++ add r3,r3,r4 ++ vext.8 q2,q2,q2,#8 ++ mov r14,r14,ror#24 ++ vext.8 q6,q6,q6,#8 ++ eor r10,r10,r2,ror#24 ++ vext.8 q10,q10,q10,#8 ++ eor r14,r14,r3,ror#24 ++ vext.8 q1,q1,q1,#12 ++ add r8,r8,r10 ++ vext.8 q5,q5,q5,#12 ++ mov r7,r7,ror#25 ++ vext.8 q9,q9,q9,#12 ++ add r9,r9,r14 ++ vext.8 q3,q3,q3,#4 ++ mov r4,r4,ror#25 ++ vext.8 q7,q7,q7,#4 ++ eor r7,r7,r8,ror#25 ++ vext.8 q11,q11,q11,#4 ++ eor r4,r4,r9,ror#25 ++ bne Loop_neon ++ ++ add r11,sp,#32 ++ vld1.32 {q12,q13},[sp] @ load key material ++ vld1.32 {q14,q15},[r11] ++ ++ ldr r11,[sp,#4*(32+2)] @ load len ++ ++ str r8, [sp,#4*(16+8)] @ modulo-scheduled store ++ str r9, [sp,#4*(16+9)] ++ str r12,[sp,#4*(16+12)] ++ str r10, [sp,#4*(16+13)] ++ str r14,[sp,#4*(16+14)] ++ ++ @ at this point we have first half of 512-bit result in ++ @ rx and second half at sp+4*(16+8) ++ ++ ldr r12,[sp,#4*(32+1)] @ load inp ++ ldr r14,[sp,#4*(32+0)] @ load out ++ ++ vadd.i32 q0,q0,q12 @ accumulate key material ++ vadd.i32 q4,q4,q12 ++ vadd.i32 q8,q8,q12 ++ vldr d24,[sp,#4*(16+0)] @ one ++ ++ vadd.i32 q1,q1,q13 ++ vadd.i32 q5,q5,q13 ++ vadd.i32 q9,q9,q13 ++ vldr d26,[sp,#4*(16+2)] @ two ++ ++ vadd.i32 q2,q2,q14 ++ vadd.i32 q6,q6,q14 ++ vadd.i32 q10,q10,q14 ++ vadd.i32 d14,d14,d24 @ counter+1 ++ vadd.i32 d22,d22,d26 @ counter+2 ++ ++ vadd.i32 q3,q3,q15 ++ vadd.i32 q7,q7,q15 ++ vadd.i32 q11,q11,q15 ++ ++ cmp r11,#64*4 ++ blo Ltail_neon ++ ++ vld1.8 {q12,q13},[r12]! @ load input ++ mov r11,sp ++ vld1.8 {q14,q15},[r12]! ++ veor q0,q0,q12 @ xor with input ++ veor q1,q1,q13 ++ vld1.8 {q12,q13},[r12]! ++ veor q2,q2,q14 ++ veor q3,q3,q15 ++ vld1.8 {q14,q15},[r12]! ++ ++ veor q4,q4,q12 ++ vst1.8 {q0,q1},[r14]! @ store output ++ veor q5,q5,q13 ++ vld1.8 {q12,q13},[r12]! ++ veor q6,q6,q14 ++ vst1.8 {q2,q3},[r14]! ++ veor q7,q7,q15 ++ vld1.8 {q14,q15},[r12]! ++ ++ veor q8,q8,q12 ++ vld1.32 {q0,q1},[r11]! @ load for next iteration ++ veor d25,d25,d25 ++ vldr d24,[sp,#4*(16+4)] @ four ++ veor q9,q9,q13 ++ vld1.32 {q2,q3},[r11] ++ veor q10,q10,q14 ++ vst1.8 {q4,q5},[r14]! ++ veor q11,q11,q15 ++ vst1.8 {q6,q7},[r14]! ++ ++ vadd.i32 d6,d6,d24 @ next counter value ++ vldr d24,[sp,#4*(16+0)] @ one ++ ++ ldmia sp,{r8,r9,r10,r11} @ load key material ++ add r0,r0,r8 @ accumulate key material ++ ldr r8,[r12],#16 @ load input ++ vst1.8 {q8,q9},[r14]! ++ add r1,r1,r9 ++ ldr r9,[r12,#-12] ++ vst1.8 {q10,q11},[r14]! ++ add r2,r2,r10 ++ ldr r10,[r12,#-8] ++ add r3,r3,r11 ++ ldr r11,[r12,#-4] ++# ifdef __ARMEB__ ++ rev r0,r0 ++ rev r1,r1 ++ rev r2,r2 ++ rev r3,r3 ++# endif ++ eor r0,r0,r8 @ xor with input ++ add r8,sp,#4*(4) ++ eor r1,r1,r9 ++ str r0,[r14],#16 @ store output ++ eor r2,r2,r10 ++ str r1,[r14,#-12] ++ eor r3,r3,r11 ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++ str r2,[r14,#-8] ++ str r3,[r14,#-4] ++ ++ add r4,r4,r8 @ accumulate key material ++ ldr r8,[r12],#16 @ load input ++ add r5,r5,r9 ++ ldr r9,[r12,#-12] ++ add r6,r6,r10 ++ ldr r10,[r12,#-8] ++ add r7,r7,r11 ++ ldr r11,[r12,#-4] ++# ifdef __ARMEB__ ++ rev r4,r4 ++ rev r5,r5 ++ rev r6,r6 ++ rev r7,r7 ++# endif ++ eor r4,r4,r8 ++ add r8,sp,#4*(8) ++ eor r5,r5,r9 ++ str r4,[r14],#16 @ store output ++ eor r6,r6,r10 ++ str r5,[r14,#-12] ++ eor r7,r7,r11 ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++ str r6,[r14,#-8] ++ add r0,sp,#4*(16+8) ++ str r7,[r14,#-4] ++ ++ ldmia r0,{r0,r1,r2,r3,r4,r5,r6,r7} @ load second half ++ ++ add r0,r0,r8 @ accumulate key material ++ ldr r8,[r12],#16 @ load input ++ add r1,r1,r9 ++ ldr r9,[r12,#-12] ++# ifdef __thumb2__ ++ it hi ++# endif ++ strhi r10,[sp,#4*(16+10)] @ copy "rx" while at it ++ add r2,r2,r10 ++ ldr r10,[r12,#-8] ++# ifdef __thumb2__ ++ it hi ++# endif ++ strhi r11,[sp,#4*(16+11)] @ copy "rx" while at it ++ add r3,r3,r11 ++ ldr r11,[r12,#-4] ++# ifdef __ARMEB__ ++ rev r0,r0 ++ rev r1,r1 ++ rev r2,r2 ++ rev r3,r3 ++# endif ++ eor r0,r0,r8 ++ add r8,sp,#4*(12) ++ eor r1,r1,r9 ++ str r0,[r14],#16 @ store output ++ eor r2,r2,r10 ++ str r1,[r14,#-12] ++ eor r3,r3,r11 ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++ str r2,[r14,#-8] ++ str r3,[r14,#-4] ++ ++ add r4,r4,r8 @ accumulate key material ++ add r8,r8,#4 @ next counter value ++ add r5,r5,r9 ++ str r8,[sp,#4*(12)] @ save next counter value ++ ldr r8,[r12],#16 @ load input ++ add r6,r6,r10 ++ add r4,r4,#3 @ counter+3 ++ ldr r9,[r12,#-12] ++ add r7,r7,r11 ++ ldr r10,[r12,#-8] ++ ldr r11,[r12,#-4] ++# ifdef __ARMEB__ ++ rev r4,r4 ++ rev r5,r5 ++ rev r6,r6 ++ rev r7,r7 ++# endif ++ eor r4,r4,r8 ++# ifdef __thumb2__ ++ it hi ++# endif ++ ldrhi r8,[sp,#4*(32+2)] @ re-load len ++ eor r5,r5,r9 ++ eor r6,r6,r10 ++ str r4,[r14],#16 @ store output ++ eor r7,r7,r11 ++ str r5,[r14,#-12] ++ sub r11,r8,#64*4 @ len-=64*4 ++ str r6,[r14,#-8] ++ str r7,[r14,#-4] ++ bhi Loop_neon_outer ++ ++ b Ldone_neon ++ ++.align 4 ++Lbreak_neon: ++ @ harmonize NEON and integer-only stack frames: load data ++ @ from NEON frame, but save to integer-only one; distance ++ @ between the two is 4*(32+4+16-32)=4*(20). ++ ++ str r11, [sp,#4*(20+32+2)] @ save len ++ add r11,sp,#4*(32+4) ++ str r12, [sp,#4*(20+32+1)] @ save inp ++ str r14, [sp,#4*(20+32+0)] @ save out ++ ++ ldr r12,[sp,#4*(16+10)] ++ ldr r14,[sp,#4*(16+11)] ++ vldmia r11,{d8,d9,d10,d11,d12,d13,d14,d15} @ fulfill ABI requirement ++ str r12,[sp,#4*(20+16+10)] @ copy "rx" ++ str r14,[sp,#4*(20+16+11)] @ copy "rx" ++ ++ ldr r11, [sp,#4*(15)] ++ ldr r12,[sp,#4*(12)] @ modulo-scheduled load ++ ldr r10, [sp,#4*(13)] ++ ldr r14,[sp,#4*(14)] ++ str r11, [sp,#4*(20+16+15)] ++ add r11,sp,#4*(20) ++ vst1.32 {q0,q1},[r11]! @ copy key ++ add sp,sp,#4*(20) @ switch frame ++ vst1.32 {q2,q3},[r11] ++ mov r11,#10 ++ b Loop @ go integer-only ++ ++.align 4 ++Ltail_neon: ++ cmp r11,#64*3 ++ bhs L192_or_more_neon ++ cmp r11,#64*2 ++ bhs L128_or_more_neon ++ cmp r11,#64*1 ++ bhs L64_or_more_neon ++ ++ add r8,sp,#4*(8) ++ vst1.8 {q0,q1},[sp] ++ add r10,sp,#4*(0) ++ vst1.8 {q2,q3},[r8] ++ b Loop_tail_neon ++ ++.align 4 ++L64_or_more_neon: ++ vld1.8 {q12,q13},[r12]! ++ vld1.8 {q14,q15},[r12]! ++ veor q0,q0,q12 ++ veor q1,q1,q13 ++ veor q2,q2,q14 ++ veor q3,q3,q15 ++ vst1.8 {q0,q1},[r14]! ++ vst1.8 {q2,q3},[r14]! ++ ++ beq Ldone_neon ++ ++ add r8,sp,#4*(8) ++ vst1.8 {q4,q5},[sp] ++ add r10,sp,#4*(0) ++ vst1.8 {q6,q7},[r8] ++ sub r11,r11,#64*1 @ len-=64*1 ++ b Loop_tail_neon ++ ++.align 4 ++L128_or_more_neon: ++ vld1.8 {q12,q13},[r12]! ++ vld1.8 {q14,q15},[r12]! ++ veor q0,q0,q12 ++ veor q1,q1,q13 ++ vld1.8 {q12,q13},[r12]! ++ veor q2,q2,q14 ++ veor q3,q3,q15 ++ vld1.8 {q14,q15},[r12]! ++ ++ veor q4,q4,q12 ++ veor q5,q5,q13 ++ vst1.8 {q0,q1},[r14]! ++ veor q6,q6,q14 ++ vst1.8 {q2,q3},[r14]! ++ veor q7,q7,q15 ++ vst1.8 {q4,q5},[r14]! ++ vst1.8 {q6,q7},[r14]! ++ ++ beq Ldone_neon ++ ++ add r8,sp,#4*(8) ++ vst1.8 {q8,q9},[sp] ++ add r10,sp,#4*(0) ++ vst1.8 {q10,q11},[r8] ++ sub r11,r11,#64*2 @ len-=64*2 ++ b Loop_tail_neon ++ ++.align 4 ++L192_or_more_neon: ++ vld1.8 {q12,q13},[r12]! ++ vld1.8 {q14,q15},[r12]! ++ veor q0,q0,q12 ++ veor q1,q1,q13 ++ vld1.8 {q12,q13},[r12]! ++ veor q2,q2,q14 ++ veor q3,q3,q15 ++ vld1.8 {q14,q15},[r12]! ++ ++ veor q4,q4,q12 ++ veor q5,q5,q13 ++ vld1.8 {q12,q13},[r12]! ++ veor q6,q6,q14 ++ vst1.8 {q0,q1},[r14]! ++ veor q7,q7,q15 ++ vld1.8 {q14,q15},[r12]! ++ ++ veor q8,q8,q12 ++ vst1.8 {q2,q3},[r14]! ++ veor q9,q9,q13 ++ vst1.8 {q4,q5},[r14]! ++ veor q10,q10,q14 ++ vst1.8 {q6,q7},[r14]! ++ veor q11,q11,q15 ++ vst1.8 {q8,q9},[r14]! ++ vst1.8 {q10,q11},[r14]! ++ ++ beq Ldone_neon ++ ++ ldmia sp,{r8,r9,r10,r11} @ load key material ++ add r0,r0,r8 @ accumulate key material ++ add r8,sp,#4*(4) ++ add r1,r1,r9 ++ add r2,r2,r10 ++ add r3,r3,r11 ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++ ++ add r4,r4,r8 @ accumulate key material ++ add r8,sp,#4*(8) ++ add r5,r5,r9 ++ add r6,r6,r10 ++ add r7,r7,r11 ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++# ifdef __ARMEB__ ++ rev r0,r0 ++ rev r1,r1 ++ rev r2,r2 ++ rev r3,r3 ++ rev r4,r4 ++ rev r5,r5 ++ rev r6,r6 ++ rev r7,r7 ++# endif ++ stmia sp,{r0,r1,r2,r3,r4,r5,r6,r7} ++ add r0,sp,#4*(16+8) ++ ++ ldmia r0,{r0,r1,r2,r3,r4,r5,r6,r7} @ load second half ++ ++ add r0,r0,r8 @ accumulate key material ++ add r8,sp,#4*(12) ++ add r1,r1,r9 ++ add r2,r2,r10 ++ add r3,r3,r11 ++ ldmia r8,{r8,r9,r10,r11} @ load key material ++ ++ add r4,r4,r8 @ accumulate key material ++ add r8,sp,#4*(8) ++ add r5,r5,r9 ++ add r4,r4,#3 @ counter+3 ++ add r6,r6,r10 ++ add r7,r7,r11 ++ ldr r11,[sp,#4*(32+2)] @ re-load len ++# ifdef __ARMEB__ ++ rev r0,r0 ++ rev r1,r1 ++ rev r2,r2 ++ rev r3,r3 ++ rev r4,r4 ++ rev r5,r5 ++ rev r6,r6 ++ rev r7,r7 ++# endif ++ stmia r8,{r0,r1,r2,r3,r4,r5,r6,r7} ++ add r10,sp,#4*(0) ++ sub r11,r11,#64*3 @ len-=64*3 ++ ++Loop_tail_neon: ++ ldrb r8,[r10],#1 @ read buffer on stack ++ ldrb r9,[r12],#1 @ read input ++ subs r11,r11,#1 ++ eor r8,r8,r9 ++ strb r8,[r14],#1 @ store output ++ bne Loop_tail_neon ++ ++Ldone_neon: ++ add sp,sp,#4*(32+4) ++ vldmia sp,{d8,d9,d10,d11,d12,d13,d14,d15} ++ add sp,sp,#4*(16+3) ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} ++ ++.comm _OPENSSL_armcap_P,4 ++.non_lazy_symbol_pointer ++OPENSSL_armcap_P: ++.indirect_symbol _OPENSSL_armcap_P ++.long 0 ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-arm/crypto/fipsmodule/aesv8-armx32.S b/apple-arm/crypto/fipsmodule/aesv8-armx32.S +new file mode 100644 +index 0000000..87b4b0a +--- /dev/null ++++ b/apple-arm/crypto/fipsmodule/aesv8-armx32.S +@@ -0,0 +1,809 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++#if __ARM_MAX_ARCH__>=7 ++.text ++ ++ ++.code 32 ++#undef __thumb2__ ++.align 5 ++Lrcon: ++.long 0x01,0x01,0x01,0x01 ++.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat ++.long 0x1b,0x1b,0x1b,0x1b ++ ++.text ++ ++.globl _aes_hw_set_encrypt_key ++.private_extern _aes_hw_set_encrypt_key ++#ifdef __thumb2__ ++.thumb_func _aes_hw_set_encrypt_key ++#endif ++.align 5 ++_aes_hw_set_encrypt_key: ++Lenc_key: ++ mov r3,#-1 ++ cmp r0,#0 ++ beq Lenc_key_abort ++ cmp r2,#0 ++ beq Lenc_key_abort ++ mov r3,#-2 ++ cmp r1,#128 ++ blt Lenc_key_abort ++ cmp r1,#256 ++ bgt Lenc_key_abort ++ tst r1,#0x3f ++ bne Lenc_key_abort ++ ++ adr r3,Lrcon ++ cmp r1,#192 ++ ++ veor q0,q0,q0 ++ vld1.8 {q3},[r0]! ++ mov r1,#8 @ reuse r1 ++ vld1.32 {q1,q2},[r3]! ++ ++ blt Loop128 ++ beq L192 ++ b L256 ++ ++.align 4 ++Loop128: ++ vtbl.8 d20,{q3},d4 ++ vtbl.8 d21,{q3},d5 ++ vext.8 q9,q0,q3,#12 ++ vst1.32 {q3},[r2]! ++.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 ++ subs r1,r1,#1 ++ ++ veor q3,q3,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q3,q3,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q10,q10,q1 ++ veor q3,q3,q9 ++ vshl.u8 q1,q1,#1 ++ veor q3,q3,q10 ++ bne Loop128 ++ ++ vld1.32 {q1},[r3] ++ ++ vtbl.8 d20,{q3},d4 ++ vtbl.8 d21,{q3},d5 ++ vext.8 q9,q0,q3,#12 ++ vst1.32 {q3},[r2]! ++.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 ++ ++ veor q3,q3,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q3,q3,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q10,q10,q1 ++ veor q3,q3,q9 ++ vshl.u8 q1,q1,#1 ++ veor q3,q3,q10 ++ ++ vtbl.8 d20,{q3},d4 ++ vtbl.8 d21,{q3},d5 ++ vext.8 q9,q0,q3,#12 ++ vst1.32 {q3},[r2]! ++.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 ++ ++ veor q3,q3,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q3,q3,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q10,q10,q1 ++ veor q3,q3,q9 ++ veor q3,q3,q10 ++ vst1.32 {q3},[r2] ++ add r2,r2,#0x50 ++ ++ mov r12,#10 ++ b Ldone ++ ++.align 4 ++L192: ++ vld1.8 {d16},[r0]! ++ vmov.i8 q10,#8 @ borrow q10 ++ vst1.32 {q3},[r2]! ++ vsub.i8 q2,q2,q10 @ adjust the mask ++ ++Loop192: ++ vtbl.8 d20,{q8},d4 ++ vtbl.8 d21,{q8},d5 ++ vext.8 q9,q0,q3,#12 ++ vst1.32 {d16},[r2]! ++.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 ++ subs r1,r1,#1 ++ ++ veor q3,q3,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q3,q3,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q3,q3,q9 ++ ++ vdup.32 q9,d7[1] ++ veor q9,q9,q8 ++ veor q10,q10,q1 ++ vext.8 q8,q0,q8,#12 ++ vshl.u8 q1,q1,#1 ++ veor q8,q8,q9 ++ veor q3,q3,q10 ++ veor q8,q8,q10 ++ vst1.32 {q3},[r2]! ++ bne Loop192 ++ ++ mov r12,#12 ++ add r2,r2,#0x20 ++ b Ldone ++ ++.align 4 ++L256: ++ vld1.8 {q8},[r0] ++ mov r1,#7 ++ mov r12,#14 ++ vst1.32 {q3},[r2]! ++ ++Loop256: ++ vtbl.8 d20,{q8},d4 ++ vtbl.8 d21,{q8},d5 ++ vext.8 q9,q0,q3,#12 ++ vst1.32 {q8},[r2]! ++.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 ++ subs r1,r1,#1 ++ ++ veor q3,q3,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q3,q3,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q10,q10,q1 ++ veor q3,q3,q9 ++ vshl.u8 q1,q1,#1 ++ veor q3,q3,q10 ++ vst1.32 {q3},[r2]! ++ beq Ldone ++ ++ vdup.32 q10,d7[1] ++ vext.8 q9,q0,q8,#12 ++.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 ++ ++ veor q8,q8,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q8,q8,q9 ++ vext.8 q9,q0,q9,#12 ++ veor q8,q8,q9 ++ ++ veor q8,q8,q10 ++ b Loop256 ++ ++Ldone: ++ str r12,[r2] ++ mov r3,#0 ++ ++Lenc_key_abort: ++ mov r0,r3 @ return value ++ ++ bx lr ++ ++ ++.globl _aes_hw_set_decrypt_key ++.private_extern _aes_hw_set_decrypt_key ++#ifdef __thumb2__ ++.thumb_func _aes_hw_set_decrypt_key ++#endif ++.align 5 ++_aes_hw_set_decrypt_key: ++ stmdb sp!,{r4,lr} ++ bl Lenc_key ++ ++ cmp r0,#0 ++ bne Ldec_key_abort ++ ++ sub r2,r2,#240 @ restore original r2 ++ mov r4,#-16 ++ add r0,r2,r12,lsl#4 @ end of key schedule ++ ++ vld1.32 {q0},[r2] ++ vld1.32 {q1},[r0] ++ vst1.32 {q0},[r0],r4 ++ vst1.32 {q1},[r2]! ++ ++Loop_imc: ++ vld1.32 {q0},[r2] ++ vld1.32 {q1},[r0] ++.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++ vst1.32 {q0},[r0],r4 ++ vst1.32 {q1},[r2]! ++ cmp r0,r2 ++ bhi Loop_imc ++ ++ vld1.32 {q0},[r2] ++.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 ++ vst1.32 {q0},[r0] ++ ++ eor r0,r0,r0 @ return value ++Ldec_key_abort: ++ ldmia sp!,{r4,pc} ++ ++.globl _aes_hw_encrypt ++.private_extern _aes_hw_encrypt ++#ifdef __thumb2__ ++.thumb_func _aes_hw_encrypt ++#endif ++.align 5 ++_aes_hw_encrypt: ++ AARCH64_VALID_CALL_TARGET ++ ldr r3,[r2,#240] ++ vld1.32 {q0},[r2]! ++ vld1.8 {q2},[r0] ++ sub r3,r3,#2 ++ vld1.32 {q1},[r2]! ++ ++Loop_enc: ++.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 ++.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 ++ vld1.32 {q0},[r2]! ++ subs r3,r3,#2 ++.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 ++.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 ++ vld1.32 {q1},[r2]! ++ bgt Loop_enc ++ ++.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 ++.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 ++ vld1.32 {q0},[r2] ++.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 ++ veor q2,q2,q0 ++ ++ vst1.8 {q2},[r1] ++ bx lr ++ ++.globl _aes_hw_decrypt ++.private_extern _aes_hw_decrypt ++#ifdef __thumb2__ ++.thumb_func _aes_hw_decrypt ++#endif ++.align 5 ++_aes_hw_decrypt: ++ AARCH64_VALID_CALL_TARGET ++ ldr r3,[r2,#240] ++ vld1.32 {q0},[r2]! ++ vld1.8 {q2},[r0] ++ sub r3,r3,#2 ++ vld1.32 {q1},[r2]! ++ ++Loop_dec: ++.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 ++.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 ++ vld1.32 {q0},[r2]! ++ subs r3,r3,#2 ++.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 ++.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 ++ vld1.32 {q1},[r2]! ++ bgt Loop_dec ++ ++.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 ++.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 ++ vld1.32 {q0},[r2] ++.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 ++ veor q2,q2,q0 ++ ++ vst1.8 {q2},[r1] ++ bx lr ++ ++.globl _aes_hw_cbc_encrypt ++.private_extern _aes_hw_cbc_encrypt ++#ifdef __thumb2__ ++.thumb_func _aes_hw_cbc_encrypt ++#endif ++.align 5 ++_aes_hw_cbc_encrypt: ++ mov ip,sp ++ stmdb sp!,{r4,r5,r6,r7,r8,lr} ++ vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so ++ ldmia ip,{r4,r5} @ load remaining args ++ subs r2,r2,#16 ++ mov r8,#16 ++ blo Lcbc_abort ++ moveq r8,#0 ++ ++ cmp r5,#0 @ en- or decrypting? ++ ldr r5,[r3,#240] ++ and r2,r2,#-16 ++ vld1.8 {q6},[r4] ++ vld1.8 {q0},[r0],r8 ++ ++ vld1.32 {q8,q9},[r3] @ load key schedule... ++ sub r5,r5,#6 ++ add r7,r3,r5,lsl#4 @ pointer to last 7 round keys ++ sub r5,r5,#2 ++ vld1.32 {q10,q11},[r7]! ++ vld1.32 {q12,q13},[r7]! ++ vld1.32 {q14,q15},[r7]! ++ vld1.32 {q7},[r7] ++ ++ add r7,r3,#32 ++ mov r6,r5 ++ beq Lcbc_dec ++ ++ cmp r5,#2 ++ veor q0,q0,q6 ++ veor q5,q8,q7 ++ beq Lcbc_enc128 ++ ++ vld1.32 {q2,q3},[r7] ++ add r7,r3,#16 ++ add r6,r3,#16*4 ++ add r12,r3,#16*5 ++.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ add r14,r3,#16*6 ++ add r3,r3,#16*7 ++ b Lenter_cbc_enc ++ ++.align 4 ++Loop_cbc_enc: ++.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ vst1.8 {q6},[r1]! ++Lenter_cbc_enc: ++.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ vld1.32 {q8},[r6] ++ cmp r5,#4 ++.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ vld1.32 {q9},[r12] ++ beq Lcbc_enc192 ++ ++.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ vld1.32 {q8},[r14] ++.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ vld1.32 {q9},[r3] ++ nop ++ ++Lcbc_enc192: ++.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ subs r2,r2,#16 ++.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ moveq r8,#0 ++.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ vld1.8 {q8},[r0],r8 ++.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ veor q8,q8,q5 ++.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ vld1.32 {q9},[r7] @ re-pre-load rndkey[1] ++.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 ++ veor q6,q0,q7 ++ bhs Loop_cbc_enc ++ ++ vst1.8 {q6},[r1]! ++ b Lcbc_done ++ ++.align 5 ++Lcbc_enc128: ++ vld1.32 {q2,q3},[r7] ++.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ b Lenter_cbc_enc128 ++Loop_cbc_enc128: ++.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ vst1.8 {q6},[r1]! ++Lenter_cbc_enc128: ++.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ subs r2,r2,#16 ++.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ moveq r8,#0 ++.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ vld1.8 {q8},[r0],r8 ++.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++ veor q8,q8,q5 ++.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 ++ veor q6,q0,q7 ++ bhs Loop_cbc_enc128 ++ ++ vst1.8 {q6},[r1]! ++ b Lcbc_done ++.align 5 ++Lcbc_dec: ++ vld1.8 {q10},[r0]! ++ subs r2,r2,#32 @ bias ++ add r6,r5,#2 ++ vorr q3,q0,q0 ++ vorr q1,q0,q0 ++ vorr q11,q10,q10 ++ blo Lcbc_dec_tail ++ ++ vorr q1,q10,q10 ++ vld1.8 {q10},[r0]! ++ vorr q2,q0,q0 ++ vorr q3,q1,q1 ++ vorr q11,q10,q10 ++ ++Loop3x_cbc_dec: ++.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 ++.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 ++.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ vld1.32 {q8},[r7]! ++ subs r6,r6,#2 ++.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 ++.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 ++.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ vld1.32 {q9},[r7]! ++ bgt Loop3x_cbc_dec ++ ++.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 ++.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 ++.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ veor q4,q6,q7 ++ subs r2,r2,#0x30 ++ veor q5,q2,q7 ++ movlo r6,r2 @ r6, r6, is zero at this point ++.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 ++.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 ++.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ veor q9,q3,q7 ++ add r0,r0,r6 @ r0 is adjusted in such way that ++ @ at exit from the loop q1-q10 ++ @ are loaded with last "words" ++ vorr q6,q11,q11 ++ mov r7,r3 ++.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 ++.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 ++.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ vld1.8 {q2},[r0]! ++.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 ++.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 ++.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ vld1.8 {q3},[r0]! ++.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 ++.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 ++.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ vld1.8 {q11},[r0]! ++.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 ++.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 ++.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 ++ vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] ++ add r6,r5,#2 ++ veor q4,q4,q0 ++ veor q5,q5,q1 ++ veor q10,q10,q9 ++ vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] ++ vst1.8 {q4},[r1]! ++ vorr q0,q2,q2 ++ vst1.8 {q5},[r1]! ++ vorr q1,q3,q3 ++ vst1.8 {q10},[r1]! ++ vorr q10,q11,q11 ++ bhs Loop3x_cbc_dec ++ ++ cmn r2,#0x30 ++ beq Lcbc_done ++ nop ++ ++Lcbc_dec_tail: ++.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ vld1.32 {q8},[r7]! ++ subs r6,r6,#2 ++.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ vld1.32 {q9},[r7]! ++ bgt Lcbc_dec_tail ++ ++.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ cmn r2,#0x20 ++.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ veor q5,q6,q7 ++.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 ++.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 ++.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14 ++.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10 ++ veor q9,q3,q7 ++.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 ++.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15 ++ beq Lcbc_dec_one ++ veor q5,q5,q1 ++ veor q9,q9,q10 ++ vorr q6,q11,q11 ++ vst1.8 {q5},[r1]! ++ vst1.8 {q9},[r1]! ++ b Lcbc_done ++ ++Lcbc_dec_one: ++ veor q5,q5,q10 ++ vorr q6,q11,q11 ++ vst1.8 {q5},[r1]! ++ ++Lcbc_done: ++ vst1.8 {q6},[r4] ++Lcbc_abort: ++ vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} ++ ldmia sp!,{r4,r5,r6,r7,r8,pc} ++ ++.globl _aes_hw_ctr32_encrypt_blocks ++.private_extern _aes_hw_ctr32_encrypt_blocks ++#ifdef __thumb2__ ++.thumb_func _aes_hw_ctr32_encrypt_blocks ++#endif ++.align 5 ++_aes_hw_ctr32_encrypt_blocks: ++ mov ip,sp ++ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr} ++ vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so ++ ldr r4, [ip] @ load remaining arg ++ ldr r5,[r3,#240] ++ ++ ldr r8, [r4, #12] ++ vld1.32 {q0},[r4] ++ ++ vld1.32 {q8,q9},[r3] @ load key schedule... ++ sub r5,r5,#4 ++ mov r12,#16 ++ cmp r2,#2 ++ add r7,r3,r5,lsl#4 @ pointer to last 5 round keys ++ sub r5,r5,#2 ++ vld1.32 {q12,q13},[r7]! ++ vld1.32 {q14,q15},[r7]! ++ vld1.32 {q7},[r7] ++ add r7,r3,#32 ++ mov r6,r5 ++ movlo r12,#0 ++ ++ @ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are ++ @ affected by silicon errata #1742098 [0] and #1655431 [1], ++ @ respectively, where the second instruction of an aese/aesmc ++ @ instruction pair may execute twice if an interrupt is taken right ++ @ after the first instruction consumes an input register of which a ++ @ single 32-bit lane has been updated the last time it was modified. ++ @ ++ @ This function uses a counter in one 32-bit lane. The ++ @ could write to q1 and q10 directly, but that trips this bugs. ++ @ We write to q6 and copy to the final register as a workaround. ++ @ ++ @ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice ++ @ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice ++#ifndef __ARMEB__ ++ rev r8, r8 ++#endif ++ add r10, r8, #1 ++ vorr q6,q0,q0 ++ rev r10, r10 ++ vmov.32 d13[1],r10 ++ add r8, r8, #2 ++ vorr q1,q6,q6 ++ bls Lctr32_tail ++ rev r12, r8 ++ vmov.32 d13[1],r12 ++ sub r2,r2,#3 @ bias ++ vorr q10,q6,q6 ++ b Loop3x_ctr32 ++ ++.align 4 ++Loop3x_ctr32: ++.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 ++.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 ++.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 ++.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 ++ vld1.32 {q8},[r7]! ++ subs r6,r6,#2 ++.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 ++.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 ++.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 ++.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 ++ vld1.32 {q9},[r7]! ++ bgt Loop3x_ctr32 ++ ++.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 ++.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 ++.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 ++.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 ++ vld1.8 {q2},[r0]! ++ add r9,r8,#1 ++.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8 ++.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10 ++ vld1.8 {q3},[r0]! ++ rev r9,r9 ++.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 ++.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 ++.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 ++.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 ++ vld1.8 {q11},[r0]! ++ mov r7,r3 ++.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9 ++.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10 ++.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 ++.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 ++.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 ++.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 ++ veor q2,q2,q7 ++ add r10,r8,#2 ++.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12 ++.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 ++ veor q3,q3,q7 ++ add r8,r8,#3 ++.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 ++.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 ++.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 ++.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 ++ @ Note the logic to update q0, q1, and q1 is written to work ++ @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in ++ @ 32-bit mode. See the comment above. ++ veor q11,q11,q7 ++ vmov.32 d13[1], r9 ++.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13 ++.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 ++ vorr q0,q6,q6 ++ rev r10,r10 ++.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 ++.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 ++ vmov.32 d13[1], r10 ++ rev r12,r8 ++.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 ++.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 ++ vorr q1,q6,q6 ++ vmov.32 d13[1], r12 ++.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14 ++.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9 ++ vorr q10,q6,q6 ++ subs r2,r2,#3 ++.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 ++.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 ++.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15 ++ ++ veor q2,q2,q4 ++ vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] ++ vst1.8 {q2},[r1]! ++ veor q3,q3,q5 ++ mov r6,r5 ++ vst1.8 {q3},[r1]! ++ veor q11,q11,q9 ++ vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] ++ vst1.8 {q11},[r1]! ++ bhs Loop3x_ctr32 ++ ++ adds r2,r2,#3 ++ beq Lctr32_done ++ cmp r2,#1 ++ mov r12,#16 ++ moveq r12,#0 ++ ++Lctr32_tail: ++.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 ++.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 ++ vld1.32 {q8},[r7]! ++ subs r6,r6,#2 ++.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 ++.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 ++ vld1.32 {q9},[r7]! ++ bgt Lctr32_tail ++ ++.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 ++.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 ++.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 ++.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 ++ vld1.8 {q2},[r0],r12 ++.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 ++.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 ++ vld1.8 {q3},[r0] ++.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 ++.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 ++ veor q2,q2,q7 ++.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 ++.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 ++.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 ++.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 ++ veor q3,q3,q7 ++.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 ++.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 ++ ++ cmp r2,#1 ++ veor q2,q2,q0 ++ veor q3,q3,q1 ++ vst1.8 {q2},[r1]! ++ beq Lctr32_done ++ vst1.8 {q3},[r1] ++ ++Lctr32_done: ++ vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc} ++ ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-arm/crypto/fipsmodule/armv4-mont.S b/apple-arm/crypto/fipsmodule/armv4-mont.S +new file mode 100644 +index 0000000..e549d1f +--- /dev/null ++++ b/apple-arm/crypto/fipsmodule/armv4-mont.S +@@ -0,0 +1,982 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both ++@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. ++ ++ ++.text ++#if defined(__thumb2__) ++.syntax unified ++.thumb ++#else ++.code 32 ++#endif ++ ++#if __ARM_MAX_ARCH__>=7 ++.align 5 ++LOPENSSL_armcap: ++.word OPENSSL_armcap_P-Lbn_mul_mont ++#endif ++ ++.globl _bn_mul_mont ++.private_extern _bn_mul_mont ++#ifdef __thumb2__ ++.thumb_func _bn_mul_mont ++#endif ++ ++.align 5 ++_bn_mul_mont: ++Lbn_mul_mont: ++ ldr ip,[sp,#4] @ load num ++ stmdb sp!,{r0,r2} @ sp points at argument block ++#if __ARM_MAX_ARCH__>=7 ++ tst ip,#7 ++ bne Lialu ++ adr r0,Lbn_mul_mont ++ ldr r2,LOPENSSL_armcap ++ ldr r0,[r0,r2] ++#ifdef __APPLE__ ++ ldr r0,[r0] ++#endif ++ tst r0,#ARMV7_NEON @ NEON available? ++ ldmia sp, {r0,r2} ++ beq Lialu ++ add sp,sp,#8 ++ b bn_mul8x_mont_neon ++.align 4 ++Lialu: ++#endif ++ cmp ip,#2 ++ mov r0,ip @ load num ++#ifdef __thumb2__ ++ ittt lt ++#endif ++ movlt r0,#0 ++ addlt sp,sp,#2*4 ++ blt Labrt ++ ++ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers ++ ++ mov r0,r0,lsl#2 @ rescale r0 for byte count ++ sub sp,sp,r0 @ alloca(4*num) ++ sub sp,sp,#4 @ +extra dword ++ sub r0,r0,#4 @ "num=num-1" ++ add r4,r2,r0 @ &bp[num-1] ++ ++ add r0,sp,r0 @ r0 to point at &tp[num-1] ++ ldr r8,[r0,#14*4] @ &n0 ++ ldr r2,[r2] @ bp[0] ++ ldr r5,[r1],#4 @ ap[0],ap++ ++ ldr r6,[r3],#4 @ np[0],np++ ++ ldr r8,[r8] @ *n0 ++ str r4,[r0,#15*4] @ save &bp[num] ++ ++ umull r10,r11,r5,r2 @ ap[0]*bp[0] ++ str r8,[r0,#14*4] @ save n0 value ++ mul r8,r10,r8 @ "tp[0]"*n0 ++ mov r12,#0 ++ umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]" ++ mov r4,sp ++ ++L1st: ++ ldr r5,[r1],#4 @ ap[j],ap++ ++ mov r10,r11 ++ ldr r6,[r3],#4 @ np[j],np++ ++ mov r11,#0 ++ umlal r10,r11,r5,r2 @ ap[j]*bp[0] ++ mov r14,#0 ++ umlal r12,r14,r6,r8 @ np[j]*n0 ++ adds r12,r12,r10 ++ str r12,[r4],#4 @ tp[j-1]=,tp++ ++ adc r12,r14,#0 ++ cmp r4,r0 ++ bne L1st ++ ++ adds r12,r12,r11 ++ ldr r4,[r0,#13*4] @ restore bp ++ mov r14,#0 ++ ldr r8,[r0,#14*4] @ restore n0 ++ adc r14,r14,#0 ++ str r12,[r0] @ tp[num-1]= ++ mov r7,sp ++ str r14,[r0,#4] @ tp[num]= ++ ++Louter: ++ sub r7,r0,r7 @ "original" r0-1 value ++ sub r1,r1,r7 @ "rewind" ap to &ap[1] ++ ldr r2,[r4,#4]! @ *(++bp) ++ sub r3,r3,r7 @ "rewind" np to &np[1] ++ ldr r5,[r1,#-4] @ ap[0] ++ ldr r10,[sp] @ tp[0] ++ ldr r6,[r3,#-4] @ np[0] ++ ldr r7,[sp,#4] @ tp[1] ++ ++ mov r11,#0 ++ umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0] ++ str r4,[r0,#13*4] @ save bp ++ mul r8,r10,r8 ++ mov r12,#0 ++ umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]" ++ mov r4,sp ++ ++Linner: ++ ldr r5,[r1],#4 @ ap[j],ap++ ++ adds r10,r11,r7 @ +=tp[j] ++ ldr r6,[r3],#4 @ np[j],np++ ++ mov r11,#0 ++ umlal r10,r11,r5,r2 @ ap[j]*bp[i] ++ mov r14,#0 ++ umlal r12,r14,r6,r8 @ np[j]*n0 ++ adc r11,r11,#0 ++ ldr r7,[r4,#8] @ tp[j+1] ++ adds r12,r12,r10 ++ str r12,[r4],#4 @ tp[j-1]=,tp++ ++ adc r12,r14,#0 ++ cmp r4,r0 ++ bne Linner ++ ++ adds r12,r12,r11 ++ mov r14,#0 ++ ldr r4,[r0,#13*4] @ restore bp ++ adc r14,r14,#0 ++ ldr r8,[r0,#14*4] @ restore n0 ++ adds r12,r12,r7 ++ ldr r7,[r0,#15*4] @ restore &bp[num] ++ adc r14,r14,#0 ++ str r12,[r0] @ tp[num-1]= ++ str r14,[r0,#4] @ tp[num]= ++ ++ cmp r4,r7 ++#ifdef __thumb2__ ++ itt ne ++#endif ++ movne r7,sp ++ bne Louter ++ ++ ldr r2,[r0,#12*4] @ pull rp ++ mov r5,sp ++ add r0,r0,#4 @ r0 to point at &tp[num] ++ sub r5,r0,r5 @ "original" num value ++ mov r4,sp @ "rewind" r4 ++ mov r1,r4 @ "borrow" r1 ++ sub r3,r3,r5 @ "rewind" r3 to &np[0] ++ ++ subs r7,r7,r7 @ "clear" carry flag ++Lsub: ldr r7,[r4],#4 ++ ldr r6,[r3],#4 ++ sbcs r7,r7,r6 @ tp[j]-np[j] ++ str r7,[r2],#4 @ rp[j]= ++ teq r4,r0 @ preserve carry ++ bne Lsub ++ sbcs r14,r14,#0 @ upmost carry ++ mov r4,sp @ "rewind" r4 ++ sub r2,r2,r5 @ "rewind" r2 ++ ++Lcopy: ldr r7,[r4] @ conditional copy ++ ldr r5,[r2] ++ str sp,[r4],#4 @ zap tp ++#ifdef __thumb2__ ++ it cc ++#endif ++ movcc r5,r7 ++ str r5,[r2],#4 ++ teq r4,r0 @ preserve carry ++ bne Lcopy ++ ++ mov sp,r0 ++ add sp,sp,#4 @ skip over tp[num+1] ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers ++ add sp,sp,#2*4 @ skip over {r0,r2} ++ mov r0,#1 ++Labrt: ++#if __ARM_ARCH__>=5 ++ bx lr @ bx lr ++#else ++ tst lr,#1 ++ moveq pc,lr @ be binary compatible with V4, yet ++.word 0xe12fff1e @ interoperable with Thumb ISA:-) ++#endif ++ ++#if __ARM_MAX_ARCH__>=7 ++ ++ ++ ++#ifdef __thumb2__ ++.thumb_func bn_mul8x_mont_neon ++#endif ++.align 5 ++bn_mul8x_mont_neon: ++ mov ip,sp ++ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11} ++ vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so ++ ldmia ip,{r4,r5} @ load rest of parameter block ++ mov ip,sp ++ ++ cmp r5,#8 ++ bhi LNEON_8n ++ ++ @ special case for r5==8, everything is in register bank... ++ ++ vld1.32 {d28[0]}, [r2,:32]! ++ veor d8,d8,d8 ++ sub r7,sp,r5,lsl#4 ++ vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-( ++ and r7,r7,#-64 ++ vld1.32 {d30[0]}, [r4,:32] ++ mov sp,r7 @ alloca ++ vzip.16 d28,d8 ++ ++ vmull.u32 q6,d28,d0[0] ++ vmull.u32 q7,d28,d0[1] ++ vmull.u32 q8,d28,d1[0] ++ vshl.i64 d29,d13,#16 ++ vmull.u32 q9,d28,d1[1] ++ ++ vadd.u64 d29,d29,d12 ++ veor d8,d8,d8 ++ vmul.u32 d29,d29,d30 ++ ++ vmull.u32 q10,d28,d2[0] ++ vld1.32 {d4,d5,d6,d7}, [r3]! ++ vmull.u32 q11,d28,d2[1] ++ vmull.u32 q12,d28,d3[0] ++ vzip.16 d29,d8 ++ vmull.u32 q13,d28,d3[1] ++ ++ vmlal.u32 q6,d29,d4[0] ++ sub r9,r5,#1 ++ vmlal.u32 q7,d29,d4[1] ++ vmlal.u32 q8,d29,d5[0] ++ vmlal.u32 q9,d29,d5[1] ++ ++ vmlal.u32 q10,d29,d6[0] ++ vmov q5,q6 ++ vmlal.u32 q11,d29,d6[1] ++ vmov q6,q7 ++ vmlal.u32 q12,d29,d7[0] ++ vmov q7,q8 ++ vmlal.u32 q13,d29,d7[1] ++ vmov q8,q9 ++ vmov q9,q10 ++ vshr.u64 d10,d10,#16 ++ vmov q10,q11 ++ vmov q11,q12 ++ vadd.u64 d10,d10,d11 ++ vmov q12,q13 ++ veor q13,q13 ++ vshr.u64 d10,d10,#16 ++ ++ b LNEON_outer8 ++ ++.align 4 ++LNEON_outer8: ++ vld1.32 {d28[0]}, [r2,:32]! ++ veor d8,d8,d8 ++ vzip.16 d28,d8 ++ vadd.u64 d12,d12,d10 ++ ++ vmlal.u32 q6,d28,d0[0] ++ vmlal.u32 q7,d28,d0[1] ++ vmlal.u32 q8,d28,d1[0] ++ vshl.i64 d29,d13,#16 ++ vmlal.u32 q9,d28,d1[1] ++ ++ vadd.u64 d29,d29,d12 ++ veor d8,d8,d8 ++ subs r9,r9,#1 ++ vmul.u32 d29,d29,d30 ++ ++ vmlal.u32 q10,d28,d2[0] ++ vmlal.u32 q11,d28,d2[1] ++ vmlal.u32 q12,d28,d3[0] ++ vzip.16 d29,d8 ++ vmlal.u32 q13,d28,d3[1] ++ ++ vmlal.u32 q6,d29,d4[0] ++ vmlal.u32 q7,d29,d4[1] ++ vmlal.u32 q8,d29,d5[0] ++ vmlal.u32 q9,d29,d5[1] ++ ++ vmlal.u32 q10,d29,d6[0] ++ vmov q5,q6 ++ vmlal.u32 q11,d29,d6[1] ++ vmov q6,q7 ++ vmlal.u32 q12,d29,d7[0] ++ vmov q7,q8 ++ vmlal.u32 q13,d29,d7[1] ++ vmov q8,q9 ++ vmov q9,q10 ++ vshr.u64 d10,d10,#16 ++ vmov q10,q11 ++ vmov q11,q12 ++ vadd.u64 d10,d10,d11 ++ vmov q12,q13 ++ veor q13,q13 ++ vshr.u64 d10,d10,#16 ++ ++ bne LNEON_outer8 ++ ++ vadd.u64 d12,d12,d10 ++ mov r7,sp ++ vshr.u64 d10,d12,#16 ++ mov r8,r5 ++ vadd.u64 d13,d13,d10 ++ add r6,sp,#96 ++ vshr.u64 d10,d13,#16 ++ vzip.16 d12,d13 ++ ++ b LNEON_tail_entry ++ ++.align 4 ++LNEON_8n: ++ veor q6,q6,q6 ++ sub r7,sp,#128 ++ veor q7,q7,q7 ++ sub r7,r7,r5,lsl#4 ++ veor q8,q8,q8 ++ and r7,r7,#-64 ++ veor q9,q9,q9 ++ mov sp,r7 @ alloca ++ veor q10,q10,q10 ++ add r7,r7,#256 ++ veor q11,q11,q11 ++ sub r8,r5,#8 ++ veor q12,q12,q12 ++ veor q13,q13,q13 ++ ++LNEON_8n_init: ++ vst1.64 {q6,q7},[r7,:256]! ++ subs r8,r8,#8 ++ vst1.64 {q8,q9},[r7,:256]! ++ vst1.64 {q10,q11},[r7,:256]! ++ vst1.64 {q12,q13},[r7,:256]! ++ bne LNEON_8n_init ++ ++ add r6,sp,#256 ++ vld1.32 {d0,d1,d2,d3},[r1]! ++ add r10,sp,#8 ++ vld1.32 {d30[0]},[r4,:32] ++ mov r9,r5 ++ b LNEON_8n_outer ++ ++.align 4 ++LNEON_8n_outer: ++ vld1.32 {d28[0]},[r2,:32]! @ *b++ ++ veor d8,d8,d8 ++ vzip.16 d28,d8 ++ add r7,sp,#128 ++ vld1.32 {d4,d5,d6,d7},[r3]! ++ ++ vmlal.u32 q6,d28,d0[0] ++ vmlal.u32 q7,d28,d0[1] ++ veor d8,d8,d8 ++ vmlal.u32 q8,d28,d1[0] ++ vshl.i64 d29,d13,#16 ++ vmlal.u32 q9,d28,d1[1] ++ vadd.u64 d29,d29,d12 ++ vmlal.u32 q10,d28,d2[0] ++ vmul.u32 d29,d29,d30 ++ vmlal.u32 q11,d28,d2[1] ++ vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0] ++ vmlal.u32 q12,d28,d3[0] ++ vzip.16 d29,d8 ++ vmlal.u32 q13,d28,d3[1] ++ vld1.32 {d28[0]},[r2,:32]! @ *b++ ++ vmlal.u32 q6,d29,d4[0] ++ veor d10,d10,d10 ++ vmlal.u32 q7,d29,d4[1] ++ vzip.16 d28,d10 ++ vmlal.u32 q8,d29,d5[0] ++ vshr.u64 d12,d12,#16 ++ vmlal.u32 q9,d29,d5[1] ++ vmlal.u32 q10,d29,d6[0] ++ vadd.u64 d12,d12,d13 ++ vmlal.u32 q11,d29,d6[1] ++ vshr.u64 d12,d12,#16 ++ vmlal.u32 q12,d29,d7[0] ++ vmlal.u32 q13,d29,d7[1] ++ vadd.u64 d14,d14,d12 ++ vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0] ++ vmlal.u32 q7,d28,d0[0] ++ vld1.64 {q6},[r6,:128]! ++ vmlal.u32 q8,d28,d0[1] ++ veor d8,d8,d8 ++ vmlal.u32 q9,d28,d1[0] ++ vshl.i64 d29,d15,#16 ++ vmlal.u32 q10,d28,d1[1] ++ vadd.u64 d29,d29,d14 ++ vmlal.u32 q11,d28,d2[0] ++ vmul.u32 d29,d29,d30 ++ vmlal.u32 q12,d28,d2[1] ++ vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1] ++ vmlal.u32 q13,d28,d3[0] ++ vzip.16 d29,d8 ++ vmlal.u32 q6,d28,d3[1] ++ vld1.32 {d28[0]},[r2,:32]! @ *b++ ++ vmlal.u32 q7,d29,d4[0] ++ veor d10,d10,d10 ++ vmlal.u32 q8,d29,d4[1] ++ vzip.16 d28,d10 ++ vmlal.u32 q9,d29,d5[0] ++ vshr.u64 d14,d14,#16 ++ vmlal.u32 q10,d29,d5[1] ++ vmlal.u32 q11,d29,d6[0] ++ vadd.u64 d14,d14,d15 ++ vmlal.u32 q12,d29,d6[1] ++ vshr.u64 d14,d14,#16 ++ vmlal.u32 q13,d29,d7[0] ++ vmlal.u32 q6,d29,d7[1] ++ vadd.u64 d16,d16,d14 ++ vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1] ++ vmlal.u32 q8,d28,d0[0] ++ vld1.64 {q7},[r6,:128]! ++ vmlal.u32 q9,d28,d0[1] ++ veor d8,d8,d8 ++ vmlal.u32 q10,d28,d1[0] ++ vshl.i64 d29,d17,#16 ++ vmlal.u32 q11,d28,d1[1] ++ vadd.u64 d29,d29,d16 ++ vmlal.u32 q12,d28,d2[0] ++ vmul.u32 d29,d29,d30 ++ vmlal.u32 q13,d28,d2[1] ++ vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2] ++ vmlal.u32 q6,d28,d3[0] ++ vzip.16 d29,d8 ++ vmlal.u32 q7,d28,d3[1] ++ vld1.32 {d28[0]},[r2,:32]! @ *b++ ++ vmlal.u32 q8,d29,d4[0] ++ veor d10,d10,d10 ++ vmlal.u32 q9,d29,d4[1] ++ vzip.16 d28,d10 ++ vmlal.u32 q10,d29,d5[0] ++ vshr.u64 d16,d16,#16 ++ vmlal.u32 q11,d29,d5[1] ++ vmlal.u32 q12,d29,d6[0] ++ vadd.u64 d16,d16,d17 ++ vmlal.u32 q13,d29,d6[1] ++ vshr.u64 d16,d16,#16 ++ vmlal.u32 q6,d29,d7[0] ++ vmlal.u32 q7,d29,d7[1] ++ vadd.u64 d18,d18,d16 ++ vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2] ++ vmlal.u32 q9,d28,d0[0] ++ vld1.64 {q8},[r6,:128]! ++ vmlal.u32 q10,d28,d0[1] ++ veor d8,d8,d8 ++ vmlal.u32 q11,d28,d1[0] ++ vshl.i64 d29,d19,#16 ++ vmlal.u32 q12,d28,d1[1] ++ vadd.u64 d29,d29,d18 ++ vmlal.u32 q13,d28,d2[0] ++ vmul.u32 d29,d29,d30 ++ vmlal.u32 q6,d28,d2[1] ++ vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3] ++ vmlal.u32 q7,d28,d3[0] ++ vzip.16 d29,d8 ++ vmlal.u32 q8,d28,d3[1] ++ vld1.32 {d28[0]},[r2,:32]! @ *b++ ++ vmlal.u32 q9,d29,d4[0] ++ veor d10,d10,d10 ++ vmlal.u32 q10,d29,d4[1] ++ vzip.16 d28,d10 ++ vmlal.u32 q11,d29,d5[0] ++ vshr.u64 d18,d18,#16 ++ vmlal.u32 q12,d29,d5[1] ++ vmlal.u32 q13,d29,d6[0] ++ vadd.u64 d18,d18,d19 ++ vmlal.u32 q6,d29,d6[1] ++ vshr.u64 d18,d18,#16 ++ vmlal.u32 q7,d29,d7[0] ++ vmlal.u32 q8,d29,d7[1] ++ vadd.u64 d20,d20,d18 ++ vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3] ++ vmlal.u32 q10,d28,d0[0] ++ vld1.64 {q9},[r6,:128]! ++ vmlal.u32 q11,d28,d0[1] ++ veor d8,d8,d8 ++ vmlal.u32 q12,d28,d1[0] ++ vshl.i64 d29,d21,#16 ++ vmlal.u32 q13,d28,d1[1] ++ vadd.u64 d29,d29,d20 ++ vmlal.u32 q6,d28,d2[0] ++ vmul.u32 d29,d29,d30 ++ vmlal.u32 q7,d28,d2[1] ++ vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4] ++ vmlal.u32 q8,d28,d3[0] ++ vzip.16 d29,d8 ++ vmlal.u32 q9,d28,d3[1] ++ vld1.32 {d28[0]},[r2,:32]! @ *b++ ++ vmlal.u32 q10,d29,d4[0] ++ veor d10,d10,d10 ++ vmlal.u32 q11,d29,d4[1] ++ vzip.16 d28,d10 ++ vmlal.u32 q12,d29,d5[0] ++ vshr.u64 d20,d20,#16 ++ vmlal.u32 q13,d29,d5[1] ++ vmlal.u32 q6,d29,d6[0] ++ vadd.u64 d20,d20,d21 ++ vmlal.u32 q7,d29,d6[1] ++ vshr.u64 d20,d20,#16 ++ vmlal.u32 q8,d29,d7[0] ++ vmlal.u32 q9,d29,d7[1] ++ vadd.u64 d22,d22,d20 ++ vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4] ++ vmlal.u32 q11,d28,d0[0] ++ vld1.64 {q10},[r6,:128]! ++ vmlal.u32 q12,d28,d0[1] ++ veor d8,d8,d8 ++ vmlal.u32 q13,d28,d1[0] ++ vshl.i64 d29,d23,#16 ++ vmlal.u32 q6,d28,d1[1] ++ vadd.u64 d29,d29,d22 ++ vmlal.u32 q7,d28,d2[0] ++ vmul.u32 d29,d29,d30 ++ vmlal.u32 q8,d28,d2[1] ++ vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5] ++ vmlal.u32 q9,d28,d3[0] ++ vzip.16 d29,d8 ++ vmlal.u32 q10,d28,d3[1] ++ vld1.32 {d28[0]},[r2,:32]! @ *b++ ++ vmlal.u32 q11,d29,d4[0] ++ veor d10,d10,d10 ++ vmlal.u32 q12,d29,d4[1] ++ vzip.16 d28,d10 ++ vmlal.u32 q13,d29,d5[0] ++ vshr.u64 d22,d22,#16 ++ vmlal.u32 q6,d29,d5[1] ++ vmlal.u32 q7,d29,d6[0] ++ vadd.u64 d22,d22,d23 ++ vmlal.u32 q8,d29,d6[1] ++ vshr.u64 d22,d22,#16 ++ vmlal.u32 q9,d29,d7[0] ++ vmlal.u32 q10,d29,d7[1] ++ vadd.u64 d24,d24,d22 ++ vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5] ++ vmlal.u32 q12,d28,d0[0] ++ vld1.64 {q11},[r6,:128]! ++ vmlal.u32 q13,d28,d0[1] ++ veor d8,d8,d8 ++ vmlal.u32 q6,d28,d1[0] ++ vshl.i64 d29,d25,#16 ++ vmlal.u32 q7,d28,d1[1] ++ vadd.u64 d29,d29,d24 ++ vmlal.u32 q8,d28,d2[0] ++ vmul.u32 d29,d29,d30 ++ vmlal.u32 q9,d28,d2[1] ++ vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6] ++ vmlal.u32 q10,d28,d3[0] ++ vzip.16 d29,d8 ++ vmlal.u32 q11,d28,d3[1] ++ vld1.32 {d28[0]},[r2,:32]! @ *b++ ++ vmlal.u32 q12,d29,d4[0] ++ veor d10,d10,d10 ++ vmlal.u32 q13,d29,d4[1] ++ vzip.16 d28,d10 ++ vmlal.u32 q6,d29,d5[0] ++ vshr.u64 d24,d24,#16 ++ vmlal.u32 q7,d29,d5[1] ++ vmlal.u32 q8,d29,d6[0] ++ vadd.u64 d24,d24,d25 ++ vmlal.u32 q9,d29,d6[1] ++ vshr.u64 d24,d24,#16 ++ vmlal.u32 q10,d29,d7[0] ++ vmlal.u32 q11,d29,d7[1] ++ vadd.u64 d26,d26,d24 ++ vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6] ++ vmlal.u32 q13,d28,d0[0] ++ vld1.64 {q12},[r6,:128]! ++ vmlal.u32 q6,d28,d0[1] ++ veor d8,d8,d8 ++ vmlal.u32 q7,d28,d1[0] ++ vshl.i64 d29,d27,#16 ++ vmlal.u32 q8,d28,d1[1] ++ vadd.u64 d29,d29,d26 ++ vmlal.u32 q9,d28,d2[0] ++ vmul.u32 d29,d29,d30 ++ vmlal.u32 q10,d28,d2[1] ++ vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7] ++ vmlal.u32 q11,d28,d3[0] ++ vzip.16 d29,d8 ++ vmlal.u32 q12,d28,d3[1] ++ vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0] ++ vmlal.u32 q13,d29,d4[0] ++ vld1.32 {d0,d1,d2,d3},[r1]! ++ vmlal.u32 q6,d29,d4[1] ++ vmlal.u32 q7,d29,d5[0] ++ vshr.u64 d26,d26,#16 ++ vmlal.u32 q8,d29,d5[1] ++ vmlal.u32 q9,d29,d6[0] ++ vadd.u64 d26,d26,d27 ++ vmlal.u32 q10,d29,d6[1] ++ vshr.u64 d26,d26,#16 ++ vmlal.u32 q11,d29,d7[0] ++ vmlal.u32 q12,d29,d7[1] ++ vadd.u64 d12,d12,d26 ++ vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7] ++ add r10,sp,#8 @ rewind ++ sub r8,r5,#8 ++ b LNEON_8n_inner ++ ++.align 4 ++LNEON_8n_inner: ++ subs r8,r8,#8 ++ vmlal.u32 q6,d28,d0[0] ++ vld1.64 {q13},[r6,:128] ++ vmlal.u32 q7,d28,d0[1] ++ vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0] ++ vmlal.u32 q8,d28,d1[0] ++ vld1.32 {d4,d5,d6,d7},[r3]! ++ vmlal.u32 q9,d28,d1[1] ++ it ne ++ addne r6,r6,#16 @ don't advance in last iteration ++ vmlal.u32 q10,d28,d2[0] ++ vmlal.u32 q11,d28,d2[1] ++ vmlal.u32 q12,d28,d3[0] ++ vmlal.u32 q13,d28,d3[1] ++ vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1] ++ vmlal.u32 q6,d29,d4[0] ++ vmlal.u32 q7,d29,d4[1] ++ vmlal.u32 q8,d29,d5[0] ++ vmlal.u32 q9,d29,d5[1] ++ vmlal.u32 q10,d29,d6[0] ++ vmlal.u32 q11,d29,d6[1] ++ vmlal.u32 q12,d29,d7[0] ++ vmlal.u32 q13,d29,d7[1] ++ vst1.64 {q6},[r7,:128]! ++ vmlal.u32 q7,d28,d0[0] ++ vld1.64 {q6},[r6,:128] ++ vmlal.u32 q8,d28,d0[1] ++ vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1] ++ vmlal.u32 q9,d28,d1[0] ++ it ne ++ addne r6,r6,#16 @ don't advance in last iteration ++ vmlal.u32 q10,d28,d1[1] ++ vmlal.u32 q11,d28,d2[0] ++ vmlal.u32 q12,d28,d2[1] ++ vmlal.u32 q13,d28,d3[0] ++ vmlal.u32 q6,d28,d3[1] ++ vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2] ++ vmlal.u32 q7,d29,d4[0] ++ vmlal.u32 q8,d29,d4[1] ++ vmlal.u32 q9,d29,d5[0] ++ vmlal.u32 q10,d29,d5[1] ++ vmlal.u32 q11,d29,d6[0] ++ vmlal.u32 q12,d29,d6[1] ++ vmlal.u32 q13,d29,d7[0] ++ vmlal.u32 q6,d29,d7[1] ++ vst1.64 {q7},[r7,:128]! ++ vmlal.u32 q8,d28,d0[0] ++ vld1.64 {q7},[r6,:128] ++ vmlal.u32 q9,d28,d0[1] ++ vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2] ++ vmlal.u32 q10,d28,d1[0] ++ it ne ++ addne r6,r6,#16 @ don't advance in last iteration ++ vmlal.u32 q11,d28,d1[1] ++ vmlal.u32 q12,d28,d2[0] ++ vmlal.u32 q13,d28,d2[1] ++ vmlal.u32 q6,d28,d3[0] ++ vmlal.u32 q7,d28,d3[1] ++ vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3] ++ vmlal.u32 q8,d29,d4[0] ++ vmlal.u32 q9,d29,d4[1] ++ vmlal.u32 q10,d29,d5[0] ++ vmlal.u32 q11,d29,d5[1] ++ vmlal.u32 q12,d29,d6[0] ++ vmlal.u32 q13,d29,d6[1] ++ vmlal.u32 q6,d29,d7[0] ++ vmlal.u32 q7,d29,d7[1] ++ vst1.64 {q8},[r7,:128]! ++ vmlal.u32 q9,d28,d0[0] ++ vld1.64 {q8},[r6,:128] ++ vmlal.u32 q10,d28,d0[1] ++ vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3] ++ vmlal.u32 q11,d28,d1[0] ++ it ne ++ addne r6,r6,#16 @ don't advance in last iteration ++ vmlal.u32 q12,d28,d1[1] ++ vmlal.u32 q13,d28,d2[0] ++ vmlal.u32 q6,d28,d2[1] ++ vmlal.u32 q7,d28,d3[0] ++ vmlal.u32 q8,d28,d3[1] ++ vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4] ++ vmlal.u32 q9,d29,d4[0] ++ vmlal.u32 q10,d29,d4[1] ++ vmlal.u32 q11,d29,d5[0] ++ vmlal.u32 q12,d29,d5[1] ++ vmlal.u32 q13,d29,d6[0] ++ vmlal.u32 q6,d29,d6[1] ++ vmlal.u32 q7,d29,d7[0] ++ vmlal.u32 q8,d29,d7[1] ++ vst1.64 {q9},[r7,:128]! ++ vmlal.u32 q10,d28,d0[0] ++ vld1.64 {q9},[r6,:128] ++ vmlal.u32 q11,d28,d0[1] ++ vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4] ++ vmlal.u32 q12,d28,d1[0] ++ it ne ++ addne r6,r6,#16 @ don't advance in last iteration ++ vmlal.u32 q13,d28,d1[1] ++ vmlal.u32 q6,d28,d2[0] ++ vmlal.u32 q7,d28,d2[1] ++ vmlal.u32 q8,d28,d3[0] ++ vmlal.u32 q9,d28,d3[1] ++ vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5] ++ vmlal.u32 q10,d29,d4[0] ++ vmlal.u32 q11,d29,d4[1] ++ vmlal.u32 q12,d29,d5[0] ++ vmlal.u32 q13,d29,d5[1] ++ vmlal.u32 q6,d29,d6[0] ++ vmlal.u32 q7,d29,d6[1] ++ vmlal.u32 q8,d29,d7[0] ++ vmlal.u32 q9,d29,d7[1] ++ vst1.64 {q10},[r7,:128]! ++ vmlal.u32 q11,d28,d0[0] ++ vld1.64 {q10},[r6,:128] ++ vmlal.u32 q12,d28,d0[1] ++ vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5] ++ vmlal.u32 q13,d28,d1[0] ++ it ne ++ addne r6,r6,#16 @ don't advance in last iteration ++ vmlal.u32 q6,d28,d1[1] ++ vmlal.u32 q7,d28,d2[0] ++ vmlal.u32 q8,d28,d2[1] ++ vmlal.u32 q9,d28,d3[0] ++ vmlal.u32 q10,d28,d3[1] ++ vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6] ++ vmlal.u32 q11,d29,d4[0] ++ vmlal.u32 q12,d29,d4[1] ++ vmlal.u32 q13,d29,d5[0] ++ vmlal.u32 q6,d29,d5[1] ++ vmlal.u32 q7,d29,d6[0] ++ vmlal.u32 q8,d29,d6[1] ++ vmlal.u32 q9,d29,d7[0] ++ vmlal.u32 q10,d29,d7[1] ++ vst1.64 {q11},[r7,:128]! ++ vmlal.u32 q12,d28,d0[0] ++ vld1.64 {q11},[r6,:128] ++ vmlal.u32 q13,d28,d0[1] ++ vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6] ++ vmlal.u32 q6,d28,d1[0] ++ it ne ++ addne r6,r6,#16 @ don't advance in last iteration ++ vmlal.u32 q7,d28,d1[1] ++ vmlal.u32 q8,d28,d2[0] ++ vmlal.u32 q9,d28,d2[1] ++ vmlal.u32 q10,d28,d3[0] ++ vmlal.u32 q11,d28,d3[1] ++ vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7] ++ vmlal.u32 q12,d29,d4[0] ++ vmlal.u32 q13,d29,d4[1] ++ vmlal.u32 q6,d29,d5[0] ++ vmlal.u32 q7,d29,d5[1] ++ vmlal.u32 q8,d29,d6[0] ++ vmlal.u32 q9,d29,d6[1] ++ vmlal.u32 q10,d29,d7[0] ++ vmlal.u32 q11,d29,d7[1] ++ vst1.64 {q12},[r7,:128]! ++ vmlal.u32 q13,d28,d0[0] ++ vld1.64 {q12},[r6,:128] ++ vmlal.u32 q6,d28,d0[1] ++ vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7] ++ vmlal.u32 q7,d28,d1[0] ++ it ne ++ addne r6,r6,#16 @ don't advance in last iteration ++ vmlal.u32 q8,d28,d1[1] ++ vmlal.u32 q9,d28,d2[0] ++ vmlal.u32 q10,d28,d2[1] ++ vmlal.u32 q11,d28,d3[0] ++ vmlal.u32 q12,d28,d3[1] ++ it eq ++ subeq r1,r1,r5,lsl#2 @ rewind ++ vmlal.u32 q13,d29,d4[0] ++ vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0] ++ vmlal.u32 q6,d29,d4[1] ++ vld1.32 {d0,d1,d2,d3},[r1]! ++ vmlal.u32 q7,d29,d5[0] ++ add r10,sp,#8 @ rewind ++ vmlal.u32 q8,d29,d5[1] ++ vmlal.u32 q9,d29,d6[0] ++ vmlal.u32 q10,d29,d6[1] ++ vmlal.u32 q11,d29,d7[0] ++ vst1.64 {q13},[r7,:128]! ++ vmlal.u32 q12,d29,d7[1] ++ ++ bne LNEON_8n_inner ++ add r6,sp,#128 ++ vst1.64 {q6,q7},[r7,:256]! ++ veor q2,q2,q2 @ d4-d5 ++ vst1.64 {q8,q9},[r7,:256]! ++ veor q3,q3,q3 @ d6-d7 ++ vst1.64 {q10,q11},[r7,:256]! ++ vst1.64 {q12},[r7,:128] ++ ++ subs r9,r9,#8 ++ vld1.64 {q6,q7},[r6,:256]! ++ vld1.64 {q8,q9},[r6,:256]! ++ vld1.64 {q10,q11},[r6,:256]! ++ vld1.64 {q12,q13},[r6,:256]! ++ ++ itt ne ++ subne r3,r3,r5,lsl#2 @ rewind ++ bne LNEON_8n_outer ++ ++ add r7,sp,#128 ++ vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame ++ vshr.u64 d10,d12,#16 ++ vst1.64 {q2,q3},[sp,:256]! ++ vadd.u64 d13,d13,d10 ++ vst1.64 {q2,q3}, [sp,:256]! ++ vshr.u64 d10,d13,#16 ++ vst1.64 {q2,q3}, [sp,:256]! ++ vzip.16 d12,d13 ++ ++ mov r8,r5 ++ b LNEON_tail_entry ++ ++.align 4 ++LNEON_tail: ++ vadd.u64 d12,d12,d10 ++ vshr.u64 d10,d12,#16 ++ vld1.64 {q8,q9}, [r6, :256]! ++ vadd.u64 d13,d13,d10 ++ vld1.64 {q10,q11}, [r6, :256]! ++ vshr.u64 d10,d13,#16 ++ vld1.64 {q12,q13}, [r6, :256]! ++ vzip.16 d12,d13 ++ ++LNEON_tail_entry: ++ vadd.u64 d14,d14,d10 ++ vst1.32 {d12[0]}, [r7, :32]! ++ vshr.u64 d10,d14,#16 ++ vadd.u64 d15,d15,d10 ++ vshr.u64 d10,d15,#16 ++ vzip.16 d14,d15 ++ vadd.u64 d16,d16,d10 ++ vst1.32 {d14[0]}, [r7, :32]! ++ vshr.u64 d10,d16,#16 ++ vadd.u64 d17,d17,d10 ++ vshr.u64 d10,d17,#16 ++ vzip.16 d16,d17 ++ vadd.u64 d18,d18,d10 ++ vst1.32 {d16[0]}, [r7, :32]! ++ vshr.u64 d10,d18,#16 ++ vadd.u64 d19,d19,d10 ++ vshr.u64 d10,d19,#16 ++ vzip.16 d18,d19 ++ vadd.u64 d20,d20,d10 ++ vst1.32 {d18[0]}, [r7, :32]! ++ vshr.u64 d10,d20,#16 ++ vadd.u64 d21,d21,d10 ++ vshr.u64 d10,d21,#16 ++ vzip.16 d20,d21 ++ vadd.u64 d22,d22,d10 ++ vst1.32 {d20[0]}, [r7, :32]! ++ vshr.u64 d10,d22,#16 ++ vadd.u64 d23,d23,d10 ++ vshr.u64 d10,d23,#16 ++ vzip.16 d22,d23 ++ vadd.u64 d24,d24,d10 ++ vst1.32 {d22[0]}, [r7, :32]! ++ vshr.u64 d10,d24,#16 ++ vadd.u64 d25,d25,d10 ++ vshr.u64 d10,d25,#16 ++ vzip.16 d24,d25 ++ vadd.u64 d26,d26,d10 ++ vst1.32 {d24[0]}, [r7, :32]! ++ vshr.u64 d10,d26,#16 ++ vadd.u64 d27,d27,d10 ++ vshr.u64 d10,d27,#16 ++ vzip.16 d26,d27 ++ vld1.64 {q6,q7}, [r6, :256]! ++ subs r8,r8,#8 ++ vst1.32 {d26[0]}, [r7, :32]! ++ bne LNEON_tail ++ ++ vst1.32 {d10[0]}, [r7, :32] @ top-most bit ++ sub r3,r3,r5,lsl#2 @ rewind r3 ++ subs r1,sp,#0 @ clear carry flag ++ add r2,sp,r5,lsl#2 ++ ++LNEON_sub: ++ ldmia r1!, {r4,r5,r6,r7} ++ ldmia r3!, {r8,r9,r10,r11} ++ sbcs r8, r4,r8 ++ sbcs r9, r5,r9 ++ sbcs r10,r6,r10 ++ sbcs r11,r7,r11 ++ teq r1,r2 @ preserves carry ++ stmia r0!, {r8,r9,r10,r11} ++ bne LNEON_sub ++ ++ ldr r10, [r1] @ load top-most bit ++ mov r11,sp ++ veor q0,q0,q0 ++ sub r11,r2,r11 @ this is num*4 ++ veor q1,q1,q1 ++ mov r1,sp ++ sub r0,r0,r11 @ rewind r0 ++ mov r3,r2 @ second 3/4th of frame ++ sbcs r10,r10,#0 @ result is carry flag ++ ++LNEON_copy_n_zap: ++ ldmia r1!, {r4,r5,r6,r7} ++ ldmia r0, {r8,r9,r10,r11} ++ it cc ++ movcc r8, r4 ++ vst1.64 {q0,q1}, [r3,:256]! @ wipe ++ itt cc ++ movcc r9, r5 ++ movcc r10,r6 ++ vst1.64 {q0,q1}, [r3,:256]! @ wipe ++ it cc ++ movcc r11,r7 ++ ldmia r1, {r4,r5,r6,r7} ++ stmia r0!, {r8,r9,r10,r11} ++ sub r1,r1,#16 ++ ldmia r0, {r8,r9,r10,r11} ++ it cc ++ movcc r8, r4 ++ vst1.64 {q0,q1}, [r1,:256]! @ wipe ++ itt cc ++ movcc r9, r5 ++ movcc r10,r6 ++ vst1.64 {q0,q1}, [r3,:256]! @ wipe ++ it cc ++ movcc r11,r7 ++ teq r1,r2 @ preserves carry ++ stmia r0!, {r8,r9,r10,r11} ++ bne LNEON_copy_n_zap ++ ++ mov sp,ip ++ vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11} ++ bx lr @ bx lr ++ ++#endif ++.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 2 ++#if __ARM_MAX_ARCH__>=7 ++.comm _OPENSSL_armcap_P,4 ++.non_lazy_symbol_pointer ++OPENSSL_armcap_P: ++.indirect_symbol _OPENSSL_armcap_P ++.long 0 ++.private_extern _OPENSSL_armcap_P ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-arm/crypto/fipsmodule/bsaes-armv7.S b/apple-arm/crypto/fipsmodule/bsaes-armv7.S +new file mode 100644 +index 0000000..8329a8c +--- /dev/null ++++ b/apple-arm/crypto/fipsmodule/bsaes-armv7.S +@@ -0,0 +1,1536 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++@ Copyright 2012-2016 The OpenSSL Project Authors. All Rights Reserved. ++@ ++@ Licensed under the OpenSSL license (the "License"). You may not use ++@ this file except in compliance with the License. You can obtain a copy ++@ in the file LICENSE in the source distribution or at ++@ https://www.openssl.org/source/license.html ++ ++ ++@ ==================================================================== ++@ Written by Andy Polyakov for the OpenSSL ++@ project. The module is, however, dual licensed under OpenSSL and ++@ CRYPTOGAMS licenses depending on where you obtain it. For further ++@ details see http://www.openssl.org/~appro/cryptogams/. ++@ ++@ Specific modes and adaptation for Linux kernel by Ard Biesheuvel ++@ of Linaro. Permission to use under GPL terms is granted. ++@ ==================================================================== ++ ++@ Bit-sliced AES for ARM NEON ++@ ++@ February 2012. ++@ ++@ This implementation is direct adaptation of bsaes-x86_64 module for ++@ ARM NEON. Except that this module is endian-neutral [in sense that ++@ it can be compiled for either endianness] by courtesy of vld1.8's ++@ neutrality. Initial version doesn't implement interface to OpenSSL, ++@ only low-level primitives and unsupported entry points, just enough ++@ to collect performance results, which for Cortex-A8 core are: ++@ ++@ encrypt 19.5 cycles per byte processed with 128-bit key ++@ decrypt 22.1 cycles per byte processed with 128-bit key ++@ key conv. 440 cycles per 128-bit key/0.18 of 8x block ++@ ++@ Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 19.7, ++@ which is [much] worse than anticipated (for further details see ++@ http://www.openssl.org/~appro/Snapdragon-S4.html). ++@ ++@ Cortex-A15 manages in 14.2/16.1 cycles [when integer-only code ++@ manages in 20.0 cycles]. ++@ ++@ When comparing to x86_64 results keep in mind that NEON unit is ++@ [mostly] single-issue and thus can't [fully] benefit from ++@ instruction-level parallelism. And when comparing to aes-armv4 ++@ results keep in mind key schedule conversion overhead (see ++@ bsaes-x86_64.pl for further details)... ++@ ++@ ++ ++@ April-August 2013 ++@ Add CBC, CTR and XTS subroutines and adapt for kernel use; courtesy of Ard. ++ ++#ifndef __KERNEL__ ++# include ++ ++# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} ++# define VFP_ABI_POP vldmia sp!,{d8-d15} ++# define VFP_ABI_FRAME 0x40 ++#else ++# define VFP_ABI_PUSH ++# define VFP_ABI_POP ++# define VFP_ABI_FRAME 0 ++# define BSAES_ASM_EXTENDED_KEY ++# define XTS_CHAIN_TWEAK ++# define __ARM_ARCH__ __LINUX_ARM_ARCH__ ++# define __ARM_MAX_ARCH__ 7 ++#endif ++ ++#ifdef __thumb__ ++# define adrl adr ++#endif ++ ++#if __ARM_MAX_ARCH__>=7 ++ ++ ++ ++.text ++.syntax unified @ ARMv7-capable assembler is expected to handle this ++#if defined(__thumb2__) && !defined(__APPLE__) ++.thumb ++#else ++.code 32 ++# undef __thumb2__ ++#endif ++ ++#ifdef __thumb2__ ++.thumb_func _bsaes_decrypt8 ++#endif ++.align 4 ++_bsaes_decrypt8: ++ adr r6,. ++ vldmia r4!, {q9} @ round 0 key ++#if defined(__thumb2__) || defined(__APPLE__) ++ adr r6,LM0ISR ++#else ++ add r6,r6,#LM0ISR-_bsaes_decrypt8 ++#endif ++ ++ vldmia r6!, {q8} @ LM0ISR ++ veor q10, q0, q9 @ xor with round0 key ++ veor q11, q1, q9 ++ vtbl.8 d0, {q10}, d16 ++ vtbl.8 d1, {q10}, d17 ++ veor q12, q2, q9 ++ vtbl.8 d2, {q11}, d16 ++ vtbl.8 d3, {q11}, d17 ++ veor q13, q3, q9 ++ vtbl.8 d4, {q12}, d16 ++ vtbl.8 d5, {q12}, d17 ++ veor q14, q4, q9 ++ vtbl.8 d6, {q13}, d16 ++ vtbl.8 d7, {q13}, d17 ++ veor q15, q5, q9 ++ vtbl.8 d8, {q14}, d16 ++ vtbl.8 d9, {q14}, d17 ++ veor q10, q6, q9 ++ vtbl.8 d10, {q15}, d16 ++ vtbl.8 d11, {q15}, d17 ++ veor q11, q7, q9 ++ vtbl.8 d12, {q10}, d16 ++ vtbl.8 d13, {q10}, d17 ++ vtbl.8 d14, {q11}, d16 ++ vtbl.8 d15, {q11}, d17 ++ vmov.i8 q8,#0x55 @ compose LBS0 ++ vmov.i8 q9,#0x33 @ compose LBS1 ++ vshr.u64 q10, q6, #1 ++ vshr.u64 q11, q4, #1 ++ veor q10, q10, q7 ++ veor q11, q11, q5 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q7, q7, q10 ++ vshl.u64 q10, q10, #1 ++ veor q5, q5, q11 ++ vshl.u64 q11, q11, #1 ++ veor q6, q6, q10 ++ veor q4, q4, q11 ++ vshr.u64 q10, q2, #1 ++ vshr.u64 q11, q0, #1 ++ veor q10, q10, q3 ++ veor q11, q11, q1 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q3, q3, q10 ++ vshl.u64 q10, q10, #1 ++ veor q1, q1, q11 ++ vshl.u64 q11, q11, #1 ++ veor q2, q2, q10 ++ veor q0, q0, q11 ++ vmov.i8 q8,#0x0f @ compose LBS2 ++ vshr.u64 q10, q5, #2 ++ vshr.u64 q11, q4, #2 ++ veor q10, q10, q7 ++ veor q11, q11, q6 ++ vand q10, q10, q9 ++ vand q11, q11, q9 ++ veor q7, q7, q10 ++ vshl.u64 q10, q10, #2 ++ veor q6, q6, q11 ++ vshl.u64 q11, q11, #2 ++ veor q5, q5, q10 ++ veor q4, q4, q11 ++ vshr.u64 q10, q1, #2 ++ vshr.u64 q11, q0, #2 ++ veor q10, q10, q3 ++ veor q11, q11, q2 ++ vand q10, q10, q9 ++ vand q11, q11, q9 ++ veor q3, q3, q10 ++ vshl.u64 q10, q10, #2 ++ veor q2, q2, q11 ++ vshl.u64 q11, q11, #2 ++ veor q1, q1, q10 ++ veor q0, q0, q11 ++ vshr.u64 q10, q3, #4 ++ vshr.u64 q11, q2, #4 ++ veor q10, q10, q7 ++ veor q11, q11, q6 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q7, q7, q10 ++ vshl.u64 q10, q10, #4 ++ veor q6, q6, q11 ++ vshl.u64 q11, q11, #4 ++ veor q3, q3, q10 ++ veor q2, q2, q11 ++ vshr.u64 q10, q1, #4 ++ vshr.u64 q11, q0, #4 ++ veor q10, q10, q5 ++ veor q11, q11, q4 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q5, q5, q10 ++ vshl.u64 q10, q10, #4 ++ veor q4, q4, q11 ++ vshl.u64 q11, q11, #4 ++ veor q1, q1, q10 ++ veor q0, q0, q11 ++ sub r5,r5,#1 ++ b Ldec_sbox ++.align 4 ++Ldec_loop: ++ vldmia r4!, {q8,q9,q10,q11} ++ veor q8, q8, q0 ++ veor q9, q9, q1 ++ vtbl.8 d0, {q8}, d24 ++ vtbl.8 d1, {q8}, d25 ++ vldmia r4!, {q8} ++ veor q10, q10, q2 ++ vtbl.8 d2, {q9}, d24 ++ vtbl.8 d3, {q9}, d25 ++ vldmia r4!, {q9} ++ veor q11, q11, q3 ++ vtbl.8 d4, {q10}, d24 ++ vtbl.8 d5, {q10}, d25 ++ vldmia r4!, {q10} ++ vtbl.8 d6, {q11}, d24 ++ vtbl.8 d7, {q11}, d25 ++ vldmia r4!, {q11} ++ veor q8, q8, q4 ++ veor q9, q9, q5 ++ vtbl.8 d8, {q8}, d24 ++ vtbl.8 d9, {q8}, d25 ++ veor q10, q10, q6 ++ vtbl.8 d10, {q9}, d24 ++ vtbl.8 d11, {q9}, d25 ++ veor q11, q11, q7 ++ vtbl.8 d12, {q10}, d24 ++ vtbl.8 d13, {q10}, d25 ++ vtbl.8 d14, {q11}, d24 ++ vtbl.8 d15, {q11}, d25 ++Ldec_sbox: ++ veor q1, q1, q4 ++ veor q3, q3, q4 ++ ++ veor q4, q4, q7 ++ veor q1, q1, q6 ++ veor q2, q2, q7 ++ veor q6, q6, q4 ++ ++ veor q0, q0, q1 ++ veor q2, q2, q5 ++ veor q7, q7, q6 ++ veor q3, q3, q0 ++ veor q5, q5, q0 ++ veor q1, q1, q3 ++ veor q11, q3, q0 ++ veor q10, q7, q4 ++ veor q9, q1, q6 ++ veor q13, q4, q0 ++ vmov q8, q10 ++ veor q12, q5, q2 ++ ++ vorr q10, q10, q9 ++ veor q15, q11, q8 ++ vand q14, q11, q12 ++ vorr q11, q11, q12 ++ veor q12, q12, q9 ++ vand q8, q8, q9 ++ veor q9, q6, q2 ++ vand q15, q15, q12 ++ vand q13, q13, q9 ++ veor q9, q3, q7 ++ veor q12, q1, q5 ++ veor q11, q11, q13 ++ veor q10, q10, q13 ++ vand q13, q9, q12 ++ vorr q9, q9, q12 ++ veor q11, q11, q15 ++ veor q8, q8, q13 ++ veor q10, q10, q14 ++ veor q9, q9, q15 ++ veor q8, q8, q14 ++ vand q12, q4, q6 ++ veor q9, q9, q14 ++ vand q13, q0, q2 ++ vand q14, q7, q1 ++ vorr q15, q3, q5 ++ veor q11, q11, q12 ++ veor q9, q9, q14 ++ veor q8, q8, q15 ++ veor q10, q10, q13 ++ ++ @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 ++ ++ @ new smaller inversion ++ ++ vand q14, q11, q9 ++ vmov q12, q8 ++ ++ veor q13, q10, q14 ++ veor q15, q8, q14 ++ veor q14, q8, q14 @ q14=q15 ++ ++ vbsl q13, q9, q8 ++ vbsl q15, q11, q10 ++ veor q11, q11, q10 ++ ++ vbsl q12, q13, q14 ++ vbsl q8, q14, q13 ++ ++ vand q14, q12, q15 ++ veor q9, q9, q8 ++ ++ veor q14, q14, q11 ++ veor q12, q5, q2 ++ veor q8, q1, q6 ++ veor q10, q15, q14 ++ vand q10, q10, q5 ++ veor q5, q5, q1 ++ vand q11, q1, q15 ++ vand q5, q5, q14 ++ veor q1, q11, q10 ++ veor q5, q5, q11 ++ veor q15, q15, q13 ++ veor q14, q14, q9 ++ veor q11, q15, q14 ++ veor q10, q13, q9 ++ vand q11, q11, q12 ++ vand q10, q10, q2 ++ veor q12, q12, q8 ++ veor q2, q2, q6 ++ vand q8, q8, q15 ++ vand q6, q6, q13 ++ vand q12, q12, q14 ++ vand q2, q2, q9 ++ veor q8, q8, q12 ++ veor q2, q2, q6 ++ veor q12, q12, q11 ++ veor q6, q6, q10 ++ veor q5, q5, q12 ++ veor q2, q2, q12 ++ veor q1, q1, q8 ++ veor q6, q6, q8 ++ ++ veor q12, q3, q0 ++ veor q8, q7, q4 ++ veor q11, q15, q14 ++ veor q10, q13, q9 ++ vand q11, q11, q12 ++ vand q10, q10, q0 ++ veor q12, q12, q8 ++ veor q0, q0, q4 ++ vand q8, q8, q15 ++ vand q4, q4, q13 ++ vand q12, q12, q14 ++ vand q0, q0, q9 ++ veor q8, q8, q12 ++ veor q0, q0, q4 ++ veor q12, q12, q11 ++ veor q4, q4, q10 ++ veor q15, q15, q13 ++ veor q14, q14, q9 ++ veor q10, q15, q14 ++ vand q10, q10, q3 ++ veor q3, q3, q7 ++ vand q11, q7, q15 ++ vand q3, q3, q14 ++ veor q7, q11, q10 ++ veor q3, q3, q11 ++ veor q3, q3, q12 ++ veor q0, q0, q12 ++ veor q7, q7, q8 ++ veor q4, q4, q8 ++ veor q1, q1, q7 ++ veor q6, q6, q5 ++ ++ veor q4, q4, q1 ++ veor q2, q2, q7 ++ veor q5, q5, q7 ++ veor q4, q4, q2 ++ veor q7, q7, q0 ++ veor q4, q4, q5 ++ veor q3, q3, q6 ++ veor q6, q6, q1 ++ veor q3, q3, q4 ++ ++ veor q4, q4, q0 ++ veor q7, q7, q3 ++ subs r5,r5,#1 ++ bcc Ldec_done ++ @ multiplication by 0x05-0x00-0x04-0x00 ++ vext.8 q8, q0, q0, #8 ++ vext.8 q14, q3, q3, #8 ++ vext.8 q15, q5, q5, #8 ++ veor q8, q8, q0 ++ vext.8 q9, q1, q1, #8 ++ veor q14, q14, q3 ++ vext.8 q10, q6, q6, #8 ++ veor q15, q15, q5 ++ vext.8 q11, q4, q4, #8 ++ veor q9, q9, q1 ++ vext.8 q12, q2, q2, #8 ++ veor q10, q10, q6 ++ vext.8 q13, q7, q7, #8 ++ veor q11, q11, q4 ++ veor q12, q12, q2 ++ veor q13, q13, q7 ++ ++ veor q0, q0, q14 ++ veor q1, q1, q14 ++ veor q6, q6, q8 ++ veor q2, q2, q10 ++ veor q4, q4, q9 ++ veor q1, q1, q15 ++ veor q6, q6, q15 ++ veor q2, q2, q14 ++ veor q7, q7, q11 ++ veor q4, q4, q14 ++ veor q3, q3, q12 ++ veor q2, q2, q15 ++ veor q7, q7, q15 ++ veor q5, q5, q13 ++ vext.8 q8, q0, q0, #12 @ x0 <<< 32 ++ vext.8 q9, q1, q1, #12 ++ veor q0, q0, q8 @ x0 ^ (x0 <<< 32) ++ vext.8 q10, q6, q6, #12 ++ veor q1, q1, q9 ++ vext.8 q11, q4, q4, #12 ++ veor q6, q6, q10 ++ vext.8 q12, q2, q2, #12 ++ veor q4, q4, q11 ++ vext.8 q13, q7, q7, #12 ++ veor q2, q2, q12 ++ vext.8 q14, q3, q3, #12 ++ veor q7, q7, q13 ++ vext.8 q15, q5, q5, #12 ++ veor q3, q3, q14 ++ ++ veor q9, q9, q0 ++ veor q5, q5, q15 ++ vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) ++ veor q10, q10, q1 ++ veor q8, q8, q5 ++ veor q9, q9, q5 ++ vext.8 q1, q1, q1, #8 ++ veor q13, q13, q2 ++ veor q0, q0, q8 ++ veor q14, q14, q7 ++ veor q1, q1, q9 ++ vext.8 q8, q2, q2, #8 ++ veor q12, q12, q4 ++ vext.8 q9, q7, q7, #8 ++ veor q15, q15, q3 ++ vext.8 q2, q4, q4, #8 ++ veor q11, q11, q6 ++ vext.8 q7, q5, q5, #8 ++ veor q12, q12, q5 ++ vext.8 q4, q3, q3, #8 ++ veor q11, q11, q5 ++ vext.8 q3, q6, q6, #8 ++ veor q5, q9, q13 ++ veor q11, q11, q2 ++ veor q7, q7, q15 ++ veor q6, q4, q14 ++ veor q4, q8, q12 ++ veor q2, q3, q10 ++ vmov q3, q11 ++ @ vmov q5, q9 ++ vldmia r6, {q12} @ LISR ++ ite eq @ Thumb2 thing, sanity check in ARM ++ addeq r6,r6,#0x10 ++ bne Ldec_loop ++ vldmia r6, {q12} @ LISRM0 ++ b Ldec_loop ++.align 4 ++Ldec_done: ++ vmov.i8 q8,#0x55 @ compose LBS0 ++ vmov.i8 q9,#0x33 @ compose LBS1 ++ vshr.u64 q10, q3, #1 ++ vshr.u64 q11, q2, #1 ++ veor q10, q10, q5 ++ veor q11, q11, q7 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q5, q5, q10 ++ vshl.u64 q10, q10, #1 ++ veor q7, q7, q11 ++ vshl.u64 q11, q11, #1 ++ veor q3, q3, q10 ++ veor q2, q2, q11 ++ vshr.u64 q10, q6, #1 ++ vshr.u64 q11, q0, #1 ++ veor q10, q10, q4 ++ veor q11, q11, q1 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q4, q4, q10 ++ vshl.u64 q10, q10, #1 ++ veor q1, q1, q11 ++ vshl.u64 q11, q11, #1 ++ veor q6, q6, q10 ++ veor q0, q0, q11 ++ vmov.i8 q8,#0x0f @ compose LBS2 ++ vshr.u64 q10, q7, #2 ++ vshr.u64 q11, q2, #2 ++ veor q10, q10, q5 ++ veor q11, q11, q3 ++ vand q10, q10, q9 ++ vand q11, q11, q9 ++ veor q5, q5, q10 ++ vshl.u64 q10, q10, #2 ++ veor q3, q3, q11 ++ vshl.u64 q11, q11, #2 ++ veor q7, q7, q10 ++ veor q2, q2, q11 ++ vshr.u64 q10, q1, #2 ++ vshr.u64 q11, q0, #2 ++ veor q10, q10, q4 ++ veor q11, q11, q6 ++ vand q10, q10, q9 ++ vand q11, q11, q9 ++ veor q4, q4, q10 ++ vshl.u64 q10, q10, #2 ++ veor q6, q6, q11 ++ vshl.u64 q11, q11, #2 ++ veor q1, q1, q10 ++ veor q0, q0, q11 ++ vshr.u64 q10, q4, #4 ++ vshr.u64 q11, q6, #4 ++ veor q10, q10, q5 ++ veor q11, q11, q3 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q5, q5, q10 ++ vshl.u64 q10, q10, #4 ++ veor q3, q3, q11 ++ vshl.u64 q11, q11, #4 ++ veor q4, q4, q10 ++ veor q6, q6, q11 ++ vshr.u64 q10, q1, #4 ++ vshr.u64 q11, q0, #4 ++ veor q10, q10, q7 ++ veor q11, q11, q2 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q7, q7, q10 ++ vshl.u64 q10, q10, #4 ++ veor q2, q2, q11 ++ vshl.u64 q11, q11, #4 ++ veor q1, q1, q10 ++ veor q0, q0, q11 ++ vldmia r4, {q8} @ last round key ++ veor q6, q6, q8 ++ veor q4, q4, q8 ++ veor q2, q2, q8 ++ veor q7, q7, q8 ++ veor q3, q3, q8 ++ veor q5, q5, q8 ++ veor q0, q0, q8 ++ veor q1, q1, q8 ++ bx lr ++ ++ ++ ++.align 6 ++_bsaes_const: ++LM0ISR:@ InvShiftRows constants ++.quad 0x0a0e0206070b0f03, 0x0004080c0d010509 ++LISR: ++.quad 0x0504070602010003, 0x0f0e0d0c080b0a09 ++LISRM0: ++.quad 0x01040b0e0205080f, 0x0306090c00070a0d ++LM0SR:@ ShiftRows constants ++.quad 0x0a0e02060f03070b, 0x0004080c05090d01 ++LSR: ++.quad 0x0504070600030201, 0x0f0e0d0c0a09080b ++LSRM0: ++.quad 0x0304090e00050a0f, 0x01060b0c0207080d ++LM0: ++.quad 0x02060a0e03070b0f, 0x0004080c0105090d ++LREVM0SR: ++.quad 0x090d01050c000408, 0x03070b0f060a0e02 ++.byte 66,105,116,45,115,108,105,99,101,100,32,65,69,83,32,102,111,114,32,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 6 ++ ++ ++#ifdef __thumb2__ ++.thumb_func _bsaes_encrypt8 ++#endif ++.align 4 ++_bsaes_encrypt8: ++ adr r6,. ++ vldmia r4!, {q9} @ round 0 key ++#if defined(__thumb2__) || defined(__APPLE__) ++ adr r6,LM0SR ++#else ++ sub r6,r6,#_bsaes_encrypt8-LM0SR ++#endif ++ ++ vldmia r6!, {q8} @ LM0SR ++_bsaes_encrypt8_alt: ++ veor q10, q0, q9 @ xor with round0 key ++ veor q11, q1, q9 ++ vtbl.8 d0, {q10}, d16 ++ vtbl.8 d1, {q10}, d17 ++ veor q12, q2, q9 ++ vtbl.8 d2, {q11}, d16 ++ vtbl.8 d3, {q11}, d17 ++ veor q13, q3, q9 ++ vtbl.8 d4, {q12}, d16 ++ vtbl.8 d5, {q12}, d17 ++ veor q14, q4, q9 ++ vtbl.8 d6, {q13}, d16 ++ vtbl.8 d7, {q13}, d17 ++ veor q15, q5, q9 ++ vtbl.8 d8, {q14}, d16 ++ vtbl.8 d9, {q14}, d17 ++ veor q10, q6, q9 ++ vtbl.8 d10, {q15}, d16 ++ vtbl.8 d11, {q15}, d17 ++ veor q11, q7, q9 ++ vtbl.8 d12, {q10}, d16 ++ vtbl.8 d13, {q10}, d17 ++ vtbl.8 d14, {q11}, d16 ++ vtbl.8 d15, {q11}, d17 ++_bsaes_encrypt8_bitslice: ++ vmov.i8 q8,#0x55 @ compose LBS0 ++ vmov.i8 q9,#0x33 @ compose LBS1 ++ vshr.u64 q10, q6, #1 ++ vshr.u64 q11, q4, #1 ++ veor q10, q10, q7 ++ veor q11, q11, q5 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q7, q7, q10 ++ vshl.u64 q10, q10, #1 ++ veor q5, q5, q11 ++ vshl.u64 q11, q11, #1 ++ veor q6, q6, q10 ++ veor q4, q4, q11 ++ vshr.u64 q10, q2, #1 ++ vshr.u64 q11, q0, #1 ++ veor q10, q10, q3 ++ veor q11, q11, q1 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q3, q3, q10 ++ vshl.u64 q10, q10, #1 ++ veor q1, q1, q11 ++ vshl.u64 q11, q11, #1 ++ veor q2, q2, q10 ++ veor q0, q0, q11 ++ vmov.i8 q8,#0x0f @ compose LBS2 ++ vshr.u64 q10, q5, #2 ++ vshr.u64 q11, q4, #2 ++ veor q10, q10, q7 ++ veor q11, q11, q6 ++ vand q10, q10, q9 ++ vand q11, q11, q9 ++ veor q7, q7, q10 ++ vshl.u64 q10, q10, #2 ++ veor q6, q6, q11 ++ vshl.u64 q11, q11, #2 ++ veor q5, q5, q10 ++ veor q4, q4, q11 ++ vshr.u64 q10, q1, #2 ++ vshr.u64 q11, q0, #2 ++ veor q10, q10, q3 ++ veor q11, q11, q2 ++ vand q10, q10, q9 ++ vand q11, q11, q9 ++ veor q3, q3, q10 ++ vshl.u64 q10, q10, #2 ++ veor q2, q2, q11 ++ vshl.u64 q11, q11, #2 ++ veor q1, q1, q10 ++ veor q0, q0, q11 ++ vshr.u64 q10, q3, #4 ++ vshr.u64 q11, q2, #4 ++ veor q10, q10, q7 ++ veor q11, q11, q6 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q7, q7, q10 ++ vshl.u64 q10, q10, #4 ++ veor q6, q6, q11 ++ vshl.u64 q11, q11, #4 ++ veor q3, q3, q10 ++ veor q2, q2, q11 ++ vshr.u64 q10, q1, #4 ++ vshr.u64 q11, q0, #4 ++ veor q10, q10, q5 ++ veor q11, q11, q4 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q5, q5, q10 ++ vshl.u64 q10, q10, #4 ++ veor q4, q4, q11 ++ vshl.u64 q11, q11, #4 ++ veor q1, q1, q10 ++ veor q0, q0, q11 ++ sub r5,r5,#1 ++ b Lenc_sbox ++.align 4 ++Lenc_loop: ++ vldmia r4!, {q8,q9,q10,q11} ++ veor q8, q8, q0 ++ veor q9, q9, q1 ++ vtbl.8 d0, {q8}, d24 ++ vtbl.8 d1, {q8}, d25 ++ vldmia r4!, {q8} ++ veor q10, q10, q2 ++ vtbl.8 d2, {q9}, d24 ++ vtbl.8 d3, {q9}, d25 ++ vldmia r4!, {q9} ++ veor q11, q11, q3 ++ vtbl.8 d4, {q10}, d24 ++ vtbl.8 d5, {q10}, d25 ++ vldmia r4!, {q10} ++ vtbl.8 d6, {q11}, d24 ++ vtbl.8 d7, {q11}, d25 ++ vldmia r4!, {q11} ++ veor q8, q8, q4 ++ veor q9, q9, q5 ++ vtbl.8 d8, {q8}, d24 ++ vtbl.8 d9, {q8}, d25 ++ veor q10, q10, q6 ++ vtbl.8 d10, {q9}, d24 ++ vtbl.8 d11, {q9}, d25 ++ veor q11, q11, q7 ++ vtbl.8 d12, {q10}, d24 ++ vtbl.8 d13, {q10}, d25 ++ vtbl.8 d14, {q11}, d24 ++ vtbl.8 d15, {q11}, d25 ++Lenc_sbox: ++ veor q2, q2, q1 ++ veor q5, q5, q6 ++ veor q3, q3, q0 ++ veor q6, q6, q2 ++ veor q5, q5, q0 ++ ++ veor q6, q6, q3 ++ veor q3, q3, q7 ++ veor q7, q7, q5 ++ veor q3, q3, q4 ++ veor q4, q4, q5 ++ ++ veor q2, q2, q7 ++ veor q3, q3, q1 ++ veor q1, q1, q5 ++ veor q11, q7, q4 ++ veor q10, q1, q2 ++ veor q9, q5, q3 ++ veor q13, q2, q4 ++ vmov q8, q10 ++ veor q12, q6, q0 ++ ++ vorr q10, q10, q9 ++ veor q15, q11, q8 ++ vand q14, q11, q12 ++ vorr q11, q11, q12 ++ veor q12, q12, q9 ++ vand q8, q8, q9 ++ veor q9, q3, q0 ++ vand q15, q15, q12 ++ vand q13, q13, q9 ++ veor q9, q7, q1 ++ veor q12, q5, q6 ++ veor q11, q11, q13 ++ veor q10, q10, q13 ++ vand q13, q9, q12 ++ vorr q9, q9, q12 ++ veor q11, q11, q15 ++ veor q8, q8, q13 ++ veor q10, q10, q14 ++ veor q9, q9, q15 ++ veor q8, q8, q14 ++ vand q12, q2, q3 ++ veor q9, q9, q14 ++ vand q13, q4, q0 ++ vand q14, q1, q5 ++ vorr q15, q7, q6 ++ veor q11, q11, q12 ++ veor q9, q9, q14 ++ veor q8, q8, q15 ++ veor q10, q10, q13 ++ ++ @ Inv_GF16 0, 1, 2, 3, s0, s1, s2, s3 ++ ++ @ new smaller inversion ++ ++ vand q14, q11, q9 ++ vmov q12, q8 ++ ++ veor q13, q10, q14 ++ veor q15, q8, q14 ++ veor q14, q8, q14 @ q14=q15 ++ ++ vbsl q13, q9, q8 ++ vbsl q15, q11, q10 ++ veor q11, q11, q10 ++ ++ vbsl q12, q13, q14 ++ vbsl q8, q14, q13 ++ ++ vand q14, q12, q15 ++ veor q9, q9, q8 ++ ++ veor q14, q14, q11 ++ veor q12, q6, q0 ++ veor q8, q5, q3 ++ veor q10, q15, q14 ++ vand q10, q10, q6 ++ veor q6, q6, q5 ++ vand q11, q5, q15 ++ vand q6, q6, q14 ++ veor q5, q11, q10 ++ veor q6, q6, q11 ++ veor q15, q15, q13 ++ veor q14, q14, q9 ++ veor q11, q15, q14 ++ veor q10, q13, q9 ++ vand q11, q11, q12 ++ vand q10, q10, q0 ++ veor q12, q12, q8 ++ veor q0, q0, q3 ++ vand q8, q8, q15 ++ vand q3, q3, q13 ++ vand q12, q12, q14 ++ vand q0, q0, q9 ++ veor q8, q8, q12 ++ veor q0, q0, q3 ++ veor q12, q12, q11 ++ veor q3, q3, q10 ++ veor q6, q6, q12 ++ veor q0, q0, q12 ++ veor q5, q5, q8 ++ veor q3, q3, q8 ++ ++ veor q12, q7, q4 ++ veor q8, q1, q2 ++ veor q11, q15, q14 ++ veor q10, q13, q9 ++ vand q11, q11, q12 ++ vand q10, q10, q4 ++ veor q12, q12, q8 ++ veor q4, q4, q2 ++ vand q8, q8, q15 ++ vand q2, q2, q13 ++ vand q12, q12, q14 ++ vand q4, q4, q9 ++ veor q8, q8, q12 ++ veor q4, q4, q2 ++ veor q12, q12, q11 ++ veor q2, q2, q10 ++ veor q15, q15, q13 ++ veor q14, q14, q9 ++ veor q10, q15, q14 ++ vand q10, q10, q7 ++ veor q7, q7, q1 ++ vand q11, q1, q15 ++ vand q7, q7, q14 ++ veor q1, q11, q10 ++ veor q7, q7, q11 ++ veor q7, q7, q12 ++ veor q4, q4, q12 ++ veor q1, q1, q8 ++ veor q2, q2, q8 ++ veor q7, q7, q0 ++ veor q1, q1, q6 ++ veor q6, q6, q0 ++ veor q4, q4, q7 ++ veor q0, q0, q1 ++ ++ veor q1, q1, q5 ++ veor q5, q5, q2 ++ veor q2, q2, q3 ++ veor q3, q3, q5 ++ veor q4, q4, q5 ++ ++ veor q6, q6, q3 ++ subs r5,r5,#1 ++ bcc Lenc_done ++ vext.8 q8, q0, q0, #12 @ x0 <<< 32 ++ vext.8 q9, q1, q1, #12 ++ veor q0, q0, q8 @ x0 ^ (x0 <<< 32) ++ vext.8 q10, q4, q4, #12 ++ veor q1, q1, q9 ++ vext.8 q11, q6, q6, #12 ++ veor q4, q4, q10 ++ vext.8 q12, q3, q3, #12 ++ veor q6, q6, q11 ++ vext.8 q13, q7, q7, #12 ++ veor q3, q3, q12 ++ vext.8 q14, q2, q2, #12 ++ veor q7, q7, q13 ++ vext.8 q15, q5, q5, #12 ++ veor q2, q2, q14 ++ ++ veor q9, q9, q0 ++ veor q5, q5, q15 ++ vext.8 q0, q0, q0, #8 @ (x0 ^ (x0 <<< 32)) <<< 64) ++ veor q10, q10, q1 ++ veor q8, q8, q5 ++ veor q9, q9, q5 ++ vext.8 q1, q1, q1, #8 ++ veor q13, q13, q3 ++ veor q0, q0, q8 ++ veor q14, q14, q7 ++ veor q1, q1, q9 ++ vext.8 q8, q3, q3, #8 ++ veor q12, q12, q6 ++ vext.8 q9, q7, q7, #8 ++ veor q15, q15, q2 ++ vext.8 q3, q6, q6, #8 ++ veor q11, q11, q4 ++ vext.8 q7, q5, q5, #8 ++ veor q12, q12, q5 ++ vext.8 q6, q2, q2, #8 ++ veor q11, q11, q5 ++ vext.8 q2, q4, q4, #8 ++ veor q5, q9, q13 ++ veor q4, q8, q12 ++ veor q3, q3, q11 ++ veor q7, q7, q15 ++ veor q6, q6, q14 ++ @ vmov q4, q8 ++ veor q2, q2, q10 ++ @ vmov q5, q9 ++ vldmia r6, {q12} @ LSR ++ ite eq @ Thumb2 thing, samity check in ARM ++ addeq r6,r6,#0x10 ++ bne Lenc_loop ++ vldmia r6, {q12} @ LSRM0 ++ b Lenc_loop ++.align 4 ++Lenc_done: ++ vmov.i8 q8,#0x55 @ compose LBS0 ++ vmov.i8 q9,#0x33 @ compose LBS1 ++ vshr.u64 q10, q2, #1 ++ vshr.u64 q11, q3, #1 ++ veor q10, q10, q5 ++ veor q11, q11, q7 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q5, q5, q10 ++ vshl.u64 q10, q10, #1 ++ veor q7, q7, q11 ++ vshl.u64 q11, q11, #1 ++ veor q2, q2, q10 ++ veor q3, q3, q11 ++ vshr.u64 q10, q4, #1 ++ vshr.u64 q11, q0, #1 ++ veor q10, q10, q6 ++ veor q11, q11, q1 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q6, q6, q10 ++ vshl.u64 q10, q10, #1 ++ veor q1, q1, q11 ++ vshl.u64 q11, q11, #1 ++ veor q4, q4, q10 ++ veor q0, q0, q11 ++ vmov.i8 q8,#0x0f @ compose LBS2 ++ vshr.u64 q10, q7, #2 ++ vshr.u64 q11, q3, #2 ++ veor q10, q10, q5 ++ veor q11, q11, q2 ++ vand q10, q10, q9 ++ vand q11, q11, q9 ++ veor q5, q5, q10 ++ vshl.u64 q10, q10, #2 ++ veor q2, q2, q11 ++ vshl.u64 q11, q11, #2 ++ veor q7, q7, q10 ++ veor q3, q3, q11 ++ vshr.u64 q10, q1, #2 ++ vshr.u64 q11, q0, #2 ++ veor q10, q10, q6 ++ veor q11, q11, q4 ++ vand q10, q10, q9 ++ vand q11, q11, q9 ++ veor q6, q6, q10 ++ vshl.u64 q10, q10, #2 ++ veor q4, q4, q11 ++ vshl.u64 q11, q11, #2 ++ veor q1, q1, q10 ++ veor q0, q0, q11 ++ vshr.u64 q10, q6, #4 ++ vshr.u64 q11, q4, #4 ++ veor q10, q10, q5 ++ veor q11, q11, q2 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q5, q5, q10 ++ vshl.u64 q10, q10, #4 ++ veor q2, q2, q11 ++ vshl.u64 q11, q11, #4 ++ veor q6, q6, q10 ++ veor q4, q4, q11 ++ vshr.u64 q10, q1, #4 ++ vshr.u64 q11, q0, #4 ++ veor q10, q10, q7 ++ veor q11, q11, q3 ++ vand q10, q10, q8 ++ vand q11, q11, q8 ++ veor q7, q7, q10 ++ vshl.u64 q10, q10, #4 ++ veor q3, q3, q11 ++ vshl.u64 q11, q11, #4 ++ veor q1, q1, q10 ++ veor q0, q0, q11 ++ vldmia r4, {q8} @ last round key ++ veor q4, q4, q8 ++ veor q6, q6, q8 ++ veor q3, q3, q8 ++ veor q7, q7, q8 ++ veor q2, q2, q8 ++ veor q5, q5, q8 ++ veor q0, q0, q8 ++ veor q1, q1, q8 ++ bx lr ++ ++#ifdef __thumb2__ ++.thumb_func _bsaes_key_convert ++#endif ++.align 4 ++_bsaes_key_convert: ++ adr r6,. ++ vld1.8 {q7}, [r4]! @ load round 0 key ++#if defined(__thumb2__) || defined(__APPLE__) ++ adr r6,LM0 ++#else ++ sub r6,r6,#_bsaes_key_convert-LM0 ++#endif ++ vld1.8 {q15}, [r4]! @ load round 1 key ++ ++ vmov.i8 q8, #0x01 @ bit masks ++ vmov.i8 q9, #0x02 ++ vmov.i8 q10, #0x04 ++ vmov.i8 q11, #0x08 ++ vmov.i8 q12, #0x10 ++ vmov.i8 q13, #0x20 ++ vldmia r6, {q14} @ LM0 ++ ++#ifdef __ARMEL__ ++ vrev32.8 q7, q7 ++ vrev32.8 q15, q15 ++#endif ++ sub r5,r5,#1 ++ vstmia r12!, {q7} @ save round 0 key ++ b Lkey_loop ++ ++.align 4 ++Lkey_loop: ++ vtbl.8 d14,{q15},d28 ++ vtbl.8 d15,{q15},d29 ++ vmov.i8 q6, #0x40 ++ vmov.i8 q15, #0x80 ++ ++ vtst.8 q0, q7, q8 ++ vtst.8 q1, q7, q9 ++ vtst.8 q2, q7, q10 ++ vtst.8 q3, q7, q11 ++ vtst.8 q4, q7, q12 ++ vtst.8 q5, q7, q13 ++ vtst.8 q6, q7, q6 ++ vtst.8 q7, q7, q15 ++ vld1.8 {q15}, [r4]! @ load next round key ++ vmvn q0, q0 @ "pnot" ++ vmvn q1, q1 ++ vmvn q5, q5 ++ vmvn q6, q6 ++#ifdef __ARMEL__ ++ vrev32.8 q15, q15 ++#endif ++ subs r5,r5,#1 ++ vstmia r12!,{q0,q1,q2,q3,q4,q5,q6,q7} @ write bit-sliced round key ++ bne Lkey_loop ++ ++ vmov.i8 q7,#0x63 @ compose L63 ++ @ don't save last round key ++ bx lr ++ ++.globl _bsaes_cbc_encrypt ++.private_extern _bsaes_cbc_encrypt ++#ifdef __thumb2__ ++.thumb_func _bsaes_cbc_encrypt ++#endif ++.align 5 ++_bsaes_cbc_encrypt: ++ @ In OpenSSL, this function had a fallback to aes_nohw_cbc_encrypt for ++ @ short inputs. We patch this out, using bsaes for all input sizes. ++ ++ @ it is up to the caller to make sure we are called with enc == 0 ++ ++ mov ip, sp ++ stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} ++ VFP_ABI_PUSH ++ ldr r8, [ip] @ IV is 1st arg on the stack ++ mov r2, r2, lsr#4 @ len in 16 byte blocks ++ sub sp, #0x10 @ scratch space to carry over the IV ++ mov r9, sp @ save sp ++ ++ ldr r10, [r3, #240] @ get # of rounds ++#ifndef BSAES_ASM_EXTENDED_KEY ++ @ allocate the key schedule on the stack ++ sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key ++ add r12, #96 @ sifze of bit-slices key schedule ++ ++ @ populate the key schedule ++ mov r4, r3 @ pass key ++ mov r5, r10 @ pass # of rounds ++ mov sp, r12 @ sp is sp ++ bl _bsaes_key_convert ++ vldmia sp, {q6} ++ vstmia r12, {q15} @ save last round key ++ veor q7, q7, q6 @ fix up round 0 key ++ vstmia sp, {q7} ++#else ++ ldr r12, [r3, #244] ++ eors r12, #1 ++ beq 0f ++ ++ @ populate the key schedule ++ str r12, [r3, #244] ++ mov r4, r3 @ pass key ++ mov r5, r10 @ pass # of rounds ++ add r12, r3, #248 @ pass key schedule ++ bl _bsaes_key_convert ++ add r4, r3, #248 ++ vldmia r4, {q6} ++ vstmia r12, {q15} @ save last round key ++ veor q7, q7, q6 @ fix up round 0 key ++ vstmia r4, {q7} ++ ++.align 2 ++ ++#endif ++ ++ vld1.8 {q15}, [r8] @ load IV ++ b Lcbc_dec_loop ++ ++.align 4 ++Lcbc_dec_loop: ++ subs r2, r2, #0x8 ++ bmi Lcbc_dec_loop_finish ++ ++ vld1.8 {q0,q1}, [r0]! @ load input ++ vld1.8 {q2,q3}, [r0]! ++#ifndef BSAES_ASM_EXTENDED_KEY ++ mov r4, sp @ pass the key ++#else ++ add r4, r3, #248 ++#endif ++ vld1.8 {q4,q5}, [r0]! ++ mov r5, r10 ++ vld1.8 {q6,q7}, [r0] ++ sub r0, r0, #0x60 ++ vstmia r9, {q15} @ put aside IV ++ ++ bl _bsaes_decrypt8 ++ ++ vldmia r9, {q14} @ reload IV ++ vld1.8 {q8,q9}, [r0]! @ reload input ++ veor q0, q0, q14 @ ^= IV ++ vld1.8 {q10,q11}, [r0]! ++ veor q1, q1, q8 ++ veor q6, q6, q9 ++ vld1.8 {q12,q13}, [r0]! ++ veor q4, q4, q10 ++ veor q2, q2, q11 ++ vld1.8 {q14,q15}, [r0]! ++ veor q7, q7, q12 ++ vst1.8 {q0,q1}, [r1]! @ write output ++ veor q3, q3, q13 ++ vst1.8 {q6}, [r1]! ++ veor q5, q5, q14 ++ vst1.8 {q4}, [r1]! ++ vst1.8 {q2}, [r1]! ++ vst1.8 {q7}, [r1]! ++ vst1.8 {q3}, [r1]! ++ vst1.8 {q5}, [r1]! ++ ++ b Lcbc_dec_loop ++ ++Lcbc_dec_loop_finish: ++ adds r2, r2, #8 ++ beq Lcbc_dec_done ++ ++ @ Set up most parameters for the _bsaes_decrypt8 call. ++#ifndef BSAES_ASM_EXTENDED_KEY ++ mov r4, sp @ pass the key ++#else ++ add r4, r3, #248 ++#endif ++ mov r5, r10 ++ vstmia r9, {q15} @ put aside IV ++ ++ vld1.8 {q0}, [r0]! @ load input ++ cmp r2, #2 ++ blo Lcbc_dec_one ++ vld1.8 {q1}, [r0]! ++ beq Lcbc_dec_two ++ vld1.8 {q2}, [r0]! ++ cmp r2, #4 ++ blo Lcbc_dec_three ++ vld1.8 {q3}, [r0]! ++ beq Lcbc_dec_four ++ vld1.8 {q4}, [r0]! ++ cmp r2, #6 ++ blo Lcbc_dec_five ++ vld1.8 {q5}, [r0]! ++ beq Lcbc_dec_six ++ vld1.8 {q6}, [r0]! ++ sub r0, r0, #0x70 ++ ++ bl _bsaes_decrypt8 ++ ++ vldmia r9, {q14} @ reload IV ++ vld1.8 {q8,q9}, [r0]! @ reload input ++ veor q0, q0, q14 @ ^= IV ++ vld1.8 {q10,q11}, [r0]! ++ veor q1, q1, q8 ++ veor q6, q6, q9 ++ vld1.8 {q12,q13}, [r0]! ++ veor q4, q4, q10 ++ veor q2, q2, q11 ++ vld1.8 {q15}, [r0]! ++ veor q7, q7, q12 ++ vst1.8 {q0,q1}, [r1]! @ write output ++ veor q3, q3, q13 ++ vst1.8 {q6}, [r1]! ++ vst1.8 {q4}, [r1]! ++ vst1.8 {q2}, [r1]! ++ vst1.8 {q7}, [r1]! ++ vst1.8 {q3}, [r1]! ++ b Lcbc_dec_done ++.align 4 ++Lcbc_dec_six: ++ sub r0, r0, #0x60 ++ bl _bsaes_decrypt8 ++ vldmia r9,{q14} @ reload IV ++ vld1.8 {q8,q9}, [r0]! @ reload input ++ veor q0, q0, q14 @ ^= IV ++ vld1.8 {q10,q11}, [r0]! ++ veor q1, q1, q8 ++ veor q6, q6, q9 ++ vld1.8 {q12}, [r0]! ++ veor q4, q4, q10 ++ veor q2, q2, q11 ++ vld1.8 {q15}, [r0]! ++ veor q7, q7, q12 ++ vst1.8 {q0,q1}, [r1]! @ write output ++ vst1.8 {q6}, [r1]! ++ vst1.8 {q4}, [r1]! ++ vst1.8 {q2}, [r1]! ++ vst1.8 {q7}, [r1]! ++ b Lcbc_dec_done ++.align 4 ++Lcbc_dec_five: ++ sub r0, r0, #0x50 ++ bl _bsaes_decrypt8 ++ vldmia r9, {q14} @ reload IV ++ vld1.8 {q8,q9}, [r0]! @ reload input ++ veor q0, q0, q14 @ ^= IV ++ vld1.8 {q10,q11}, [r0]! ++ veor q1, q1, q8 ++ veor q6, q6, q9 ++ vld1.8 {q15}, [r0]! ++ veor q4, q4, q10 ++ vst1.8 {q0,q1}, [r1]! @ write output ++ veor q2, q2, q11 ++ vst1.8 {q6}, [r1]! ++ vst1.8 {q4}, [r1]! ++ vst1.8 {q2}, [r1]! ++ b Lcbc_dec_done ++.align 4 ++Lcbc_dec_four: ++ sub r0, r0, #0x40 ++ bl _bsaes_decrypt8 ++ vldmia r9, {q14} @ reload IV ++ vld1.8 {q8,q9}, [r0]! @ reload input ++ veor q0, q0, q14 @ ^= IV ++ vld1.8 {q10}, [r0]! ++ veor q1, q1, q8 ++ veor q6, q6, q9 ++ vld1.8 {q15}, [r0]! ++ veor q4, q4, q10 ++ vst1.8 {q0,q1}, [r1]! @ write output ++ vst1.8 {q6}, [r1]! ++ vst1.8 {q4}, [r1]! ++ b Lcbc_dec_done ++.align 4 ++Lcbc_dec_three: ++ sub r0, r0, #0x30 ++ bl _bsaes_decrypt8 ++ vldmia r9, {q14} @ reload IV ++ vld1.8 {q8,q9}, [r0]! @ reload input ++ veor q0, q0, q14 @ ^= IV ++ vld1.8 {q15}, [r0]! ++ veor q1, q1, q8 ++ veor q6, q6, q9 ++ vst1.8 {q0,q1}, [r1]! @ write output ++ vst1.8 {q6}, [r1]! ++ b Lcbc_dec_done ++.align 4 ++Lcbc_dec_two: ++ sub r0, r0, #0x20 ++ bl _bsaes_decrypt8 ++ vldmia r9, {q14} @ reload IV ++ vld1.8 {q8}, [r0]! @ reload input ++ veor q0, q0, q14 @ ^= IV ++ vld1.8 {q15}, [r0]! @ reload input ++ veor q1, q1, q8 ++ vst1.8 {q0,q1}, [r1]! @ write output ++ b Lcbc_dec_done ++.align 4 ++Lcbc_dec_one: ++ sub r0, r0, #0x10 ++ bl _bsaes_decrypt8 ++ vldmia r9, {q14} @ reload IV ++ vld1.8 {q15}, [r0]! @ reload input ++ veor q0, q0, q14 @ ^= IV ++ vst1.8 {q0}, [r1]! @ write output ++ ++Lcbc_dec_done: ++#ifndef BSAES_ASM_EXTENDED_KEY ++ vmov.i32 q0, #0 ++ vmov.i32 q1, #0 ++Lcbc_dec_bzero:@ wipe key schedule [if any] ++ vstmia sp!, {q0,q1} ++ cmp sp, r9 ++ bne Lcbc_dec_bzero ++#endif ++ ++ mov sp, r9 ++ add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb ++ vst1.8 {q15}, [r8] @ return IV ++ VFP_ABI_POP ++ ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} ++ ++.globl _bsaes_ctr32_encrypt_blocks ++.private_extern _bsaes_ctr32_encrypt_blocks ++#ifdef __thumb2__ ++.thumb_func _bsaes_ctr32_encrypt_blocks ++#endif ++.align 5 ++_bsaes_ctr32_encrypt_blocks: ++ @ In OpenSSL, short inputs fall back to aes_nohw_* here. We patch this ++ @ out to retain a constant-time implementation. ++ mov ip, sp ++ stmdb sp!, {r4,r5,r6,r7,r8,r9,r10, lr} ++ VFP_ABI_PUSH ++ ldr r8, [ip] @ ctr is 1st arg on the stack ++ sub sp, sp, #0x10 @ scratch space to carry over the ctr ++ mov r9, sp @ save sp ++ ++ ldr r10, [r3, #240] @ get # of rounds ++#ifndef BSAES_ASM_EXTENDED_KEY ++ @ allocate the key schedule on the stack ++ sub r12, sp, r10, lsl#7 @ 128 bytes per inner round key ++ add r12, #96 @ size of bit-sliced key schedule ++ ++ @ populate the key schedule ++ mov r4, r3 @ pass key ++ mov r5, r10 @ pass # of rounds ++ mov sp, r12 @ sp is sp ++ bl _bsaes_key_convert ++ veor q7,q7,q15 @ fix up last round key ++ vstmia r12, {q7} @ save last round key ++ ++ vld1.8 {q0}, [r8] @ load counter ++#ifdef __APPLE__ ++ mov r8, #:lower16:(LREVM0SR-LM0) ++ add r8, r6, r8 ++#else ++ add r8, r6, #LREVM0SR-LM0 @ borrow r8 ++#endif ++ vldmia sp, {q4} @ load round0 key ++#else ++ ldr r12, [r3, #244] ++ eors r12, #1 ++ beq 0f ++ ++ @ populate the key schedule ++ str r12, [r3, #244] ++ mov r4, r3 @ pass key ++ mov r5, r10 @ pass # of rounds ++ add r12, r3, #248 @ pass key schedule ++ bl _bsaes_key_convert ++ veor q7,q7,q15 @ fix up last round key ++ vstmia r12, {q7} @ save last round key ++ ++.align 2 ++ add r12, r3, #248 ++ vld1.8 {q0}, [r8] @ load counter ++ adrl r8, LREVM0SR @ borrow r8 ++ vldmia r12, {q4} @ load round0 key ++ sub sp, #0x10 @ place for adjusted round0 key ++#endif ++ ++ vmov.i32 q8,#1 @ compose 1<<96 ++ veor q9,q9,q9 ++ vrev32.8 q0,q0 ++ vext.8 q8,q9,q8,#4 ++ vrev32.8 q4,q4 ++ vadd.u32 q9,q8,q8 @ compose 2<<96 ++ vstmia sp, {q4} @ save adjusted round0 key ++ b Lctr_enc_loop ++ ++.align 4 ++Lctr_enc_loop: ++ vadd.u32 q10, q8, q9 @ compose 3<<96 ++ vadd.u32 q1, q0, q8 @ +1 ++ vadd.u32 q2, q0, q9 @ +2 ++ vadd.u32 q3, q0, q10 @ +3 ++ vadd.u32 q4, q1, q10 ++ vadd.u32 q5, q2, q10 ++ vadd.u32 q6, q3, q10 ++ vadd.u32 q7, q4, q10 ++ vadd.u32 q10, q5, q10 @ next counter ++ ++ @ Borrow prologue from _bsaes_encrypt8 to use the opportunity ++ @ to flip byte order in 32-bit counter ++ ++ vldmia sp, {q9} @ load round0 key ++#ifndef BSAES_ASM_EXTENDED_KEY ++ add r4, sp, #0x10 @ pass next round key ++#else ++ add r4, r3, #264 ++#endif ++ vldmia r8, {q8} @ LREVM0SR ++ mov r5, r10 @ pass rounds ++ vstmia r9, {q10} @ save next counter ++#ifdef __APPLE__ ++ mov r6, #:lower16:(LREVM0SR-LSR) ++ sub r6, r8, r6 ++#else ++ sub r6, r8, #LREVM0SR-LSR @ pass constants ++#endif ++ ++ bl _bsaes_encrypt8_alt ++ ++ subs r2, r2, #8 ++ blo Lctr_enc_loop_done ++ ++ vld1.8 {q8,q9}, [r0]! @ load input ++ vld1.8 {q10,q11}, [r0]! ++ veor q0, q8 ++ veor q1, q9 ++ vld1.8 {q12,q13}, [r0]! ++ veor q4, q10 ++ veor q6, q11 ++ vld1.8 {q14,q15}, [r0]! ++ veor q3, q12 ++ vst1.8 {q0,q1}, [r1]! @ write output ++ veor q7, q13 ++ veor q2, q14 ++ vst1.8 {q4}, [r1]! ++ veor q5, q15 ++ vst1.8 {q6}, [r1]! ++ vmov.i32 q8, #1 @ compose 1<<96 ++ vst1.8 {q3}, [r1]! ++ veor q9, q9, q9 ++ vst1.8 {q7}, [r1]! ++ vext.8 q8, q9, q8, #4 ++ vst1.8 {q2}, [r1]! ++ vadd.u32 q9,q8,q8 @ compose 2<<96 ++ vst1.8 {q5}, [r1]! ++ vldmia r9, {q0} @ load counter ++ ++ bne Lctr_enc_loop ++ b Lctr_enc_done ++ ++.align 4 ++Lctr_enc_loop_done: ++ add r2, r2, #8 ++ vld1.8 {q8}, [r0]! @ load input ++ veor q0, q8 ++ vst1.8 {q0}, [r1]! @ write output ++ cmp r2, #2 ++ blo Lctr_enc_done ++ vld1.8 {q9}, [r0]! ++ veor q1, q9 ++ vst1.8 {q1}, [r1]! ++ beq Lctr_enc_done ++ vld1.8 {q10}, [r0]! ++ veor q4, q10 ++ vst1.8 {q4}, [r1]! ++ cmp r2, #4 ++ blo Lctr_enc_done ++ vld1.8 {q11}, [r0]! ++ veor q6, q11 ++ vst1.8 {q6}, [r1]! ++ beq Lctr_enc_done ++ vld1.8 {q12}, [r0]! ++ veor q3, q12 ++ vst1.8 {q3}, [r1]! ++ cmp r2, #6 ++ blo Lctr_enc_done ++ vld1.8 {q13}, [r0]! ++ veor q7, q13 ++ vst1.8 {q7}, [r1]! ++ beq Lctr_enc_done ++ vld1.8 {q14}, [r0] ++ veor q2, q14 ++ vst1.8 {q2}, [r1]! ++ ++Lctr_enc_done: ++ vmov.i32 q0, #0 ++ vmov.i32 q1, #0 ++#ifndef BSAES_ASM_EXTENDED_KEY ++Lctr_enc_bzero:@ wipe key schedule [if any] ++ vstmia sp!, {q0,q1} ++ cmp sp, r9 ++ bne Lctr_enc_bzero ++#else ++ vstmia sp, {q0,q1} ++#endif ++ ++ mov sp, r9 ++ add sp, #0x10 @ add sp,r9,#0x10 is no good for thumb ++ VFP_ABI_POP ++ ldmia sp!, {r4,r5,r6,r7,r8,r9,r10, pc} @ return ++ ++ @ OpenSSL contains aes_nohw_* fallback code here. We patch this ++ @ out to retain a constant-time implementation. ++ ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-arm/crypto/fipsmodule/ghash-armv4.S b/apple-arm/crypto/fipsmodule/ghash-armv4.S +new file mode 100644 +index 0000000..36f4cce +--- /dev/null ++++ b/apple-arm/crypto/fipsmodule/ghash-armv4.S +@@ -0,0 +1,258 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both ++@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL ++@ instructions are in aesv8-armx.pl.) ++ ++ ++.text ++#if defined(__thumb2__) || defined(__clang__) ++.syntax unified ++#define ldrplb ldrbpl ++#define ldrneb ldrbne ++#endif ++#if defined(__thumb2__) ++.thumb ++#else ++.code 32 ++#endif ++#if __ARM_MAX_ARCH__>=7 ++ ++ ++ ++.globl _gcm_init_neon ++.private_extern _gcm_init_neon ++#ifdef __thumb2__ ++.thumb_func _gcm_init_neon ++#endif ++.align 4 ++_gcm_init_neon: ++ vld1.64 d7,[r1]! @ load H ++ vmov.i8 q8,#0xe1 ++ vld1.64 d6,[r1] ++ vshl.i64 d17,#57 ++ vshr.u64 d16,#63 @ t0=0xc2....01 ++ vdup.8 q9,d7[7] ++ vshr.u64 d26,d6,#63 ++ vshr.s8 q9,#7 @ broadcast carry bit ++ vshl.i64 q3,q3,#1 ++ vand q8,q8,q9 ++ vorr d7,d26 @ H<<<=1 ++ veor q3,q3,q8 @ twisted H ++ vstmia r0,{q3} ++ ++ bx lr @ bx lr ++ ++ ++.globl _gcm_gmult_neon ++.private_extern _gcm_gmult_neon ++#ifdef __thumb2__ ++.thumb_func _gcm_gmult_neon ++#endif ++.align 4 ++_gcm_gmult_neon: ++ vld1.64 d7,[r0]! @ load Xi ++ vld1.64 d6,[r0]! ++ vmov.i64 d29,#0x0000ffffffffffff ++ vldmia r1,{d26,d27} @ load twisted H ++ vmov.i64 d30,#0x00000000ffffffff ++#ifdef __ARMEL__ ++ vrev64.8 q3,q3 ++#endif ++ vmov.i64 d31,#0x000000000000ffff ++ veor d28,d26,d27 @ Karatsuba pre-processing ++ mov r3,#16 ++ b Lgmult_neon ++ ++ ++.globl _gcm_ghash_neon ++.private_extern _gcm_ghash_neon ++#ifdef __thumb2__ ++.thumb_func _gcm_ghash_neon ++#endif ++.align 4 ++_gcm_ghash_neon: ++ vld1.64 d1,[r0]! @ load Xi ++ vld1.64 d0,[r0]! ++ vmov.i64 d29,#0x0000ffffffffffff ++ vldmia r1,{d26,d27} @ load twisted H ++ vmov.i64 d30,#0x00000000ffffffff ++#ifdef __ARMEL__ ++ vrev64.8 q0,q0 ++#endif ++ vmov.i64 d31,#0x000000000000ffff ++ veor d28,d26,d27 @ Karatsuba pre-processing ++ ++Loop_neon: ++ vld1.64 d7,[r2]! @ load inp ++ vld1.64 d6,[r2]! ++#ifdef __ARMEL__ ++ vrev64.8 q3,q3 ++#endif ++ veor q3,q0 @ inp^=Xi ++Lgmult_neon: ++ vext.8 d16, d26, d26, #1 @ A1 ++ vmull.p8 q8, d16, d6 @ F = A1*B ++ vext.8 d0, d6, d6, #1 @ B1 ++ vmull.p8 q0, d26, d0 @ E = A*B1 ++ vext.8 d18, d26, d26, #2 @ A2 ++ vmull.p8 q9, d18, d6 @ H = A2*B ++ vext.8 d22, d6, d6, #2 @ B2 ++ vmull.p8 q11, d26, d22 @ G = A*B2 ++ vext.8 d20, d26, d26, #3 @ A3 ++ veor q8, q8, q0 @ L = E + F ++ vmull.p8 q10, d20, d6 @ J = A3*B ++ vext.8 d0, d6, d6, #3 @ B3 ++ veor q9, q9, q11 @ M = G + H ++ vmull.p8 q0, d26, d0 @ I = A*B3 ++ veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 ++ vand d17, d17, d29 ++ vext.8 d22, d6, d6, #4 @ B4 ++ veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 ++ vand d19, d19, d30 ++ vmull.p8 q11, d26, d22 @ K = A*B4 ++ veor q10, q10, q0 @ N = I + J ++ veor d16, d16, d17 ++ veor d18, d18, d19 ++ veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 ++ vand d21, d21, d31 ++ vext.8 q8, q8, q8, #15 ++ veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 ++ vmov.i64 d23, #0 ++ vext.8 q9, q9, q9, #14 ++ veor d20, d20, d21 ++ vmull.p8 q0, d26, d6 @ D = A*B ++ vext.8 q11, q11, q11, #12 ++ vext.8 q10, q10, q10, #13 ++ veor q8, q8, q9 ++ veor q10, q10, q11 ++ veor q0, q0, q8 ++ veor q0, q0, q10 ++ veor d6,d6,d7 @ Karatsuba pre-processing ++ vext.8 d16, d28, d28, #1 @ A1 ++ vmull.p8 q8, d16, d6 @ F = A1*B ++ vext.8 d2, d6, d6, #1 @ B1 ++ vmull.p8 q1, d28, d2 @ E = A*B1 ++ vext.8 d18, d28, d28, #2 @ A2 ++ vmull.p8 q9, d18, d6 @ H = A2*B ++ vext.8 d22, d6, d6, #2 @ B2 ++ vmull.p8 q11, d28, d22 @ G = A*B2 ++ vext.8 d20, d28, d28, #3 @ A3 ++ veor q8, q8, q1 @ L = E + F ++ vmull.p8 q10, d20, d6 @ J = A3*B ++ vext.8 d2, d6, d6, #3 @ B3 ++ veor q9, q9, q11 @ M = G + H ++ vmull.p8 q1, d28, d2 @ I = A*B3 ++ veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 ++ vand d17, d17, d29 ++ vext.8 d22, d6, d6, #4 @ B4 ++ veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 ++ vand d19, d19, d30 ++ vmull.p8 q11, d28, d22 @ K = A*B4 ++ veor q10, q10, q1 @ N = I + J ++ veor d16, d16, d17 ++ veor d18, d18, d19 ++ veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 ++ vand d21, d21, d31 ++ vext.8 q8, q8, q8, #15 ++ veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 ++ vmov.i64 d23, #0 ++ vext.8 q9, q9, q9, #14 ++ veor d20, d20, d21 ++ vmull.p8 q1, d28, d6 @ D = A*B ++ vext.8 q11, q11, q11, #12 ++ vext.8 q10, q10, q10, #13 ++ veor q8, q8, q9 ++ veor q10, q10, q11 ++ veor q1, q1, q8 ++ veor q1, q1, q10 ++ vext.8 d16, d27, d27, #1 @ A1 ++ vmull.p8 q8, d16, d7 @ F = A1*B ++ vext.8 d4, d7, d7, #1 @ B1 ++ vmull.p8 q2, d27, d4 @ E = A*B1 ++ vext.8 d18, d27, d27, #2 @ A2 ++ vmull.p8 q9, d18, d7 @ H = A2*B ++ vext.8 d22, d7, d7, #2 @ B2 ++ vmull.p8 q11, d27, d22 @ G = A*B2 ++ vext.8 d20, d27, d27, #3 @ A3 ++ veor q8, q8, q2 @ L = E + F ++ vmull.p8 q10, d20, d7 @ J = A3*B ++ vext.8 d4, d7, d7, #3 @ B3 ++ veor q9, q9, q11 @ M = G + H ++ vmull.p8 q2, d27, d4 @ I = A*B3 ++ veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8 ++ vand d17, d17, d29 ++ vext.8 d22, d7, d7, #4 @ B4 ++ veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16 ++ vand d19, d19, d30 ++ vmull.p8 q11, d27, d22 @ K = A*B4 ++ veor q10, q10, q2 @ N = I + J ++ veor d16, d16, d17 ++ veor d18, d18, d19 ++ veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24 ++ vand d21, d21, d31 ++ vext.8 q8, q8, q8, #15 ++ veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32 ++ vmov.i64 d23, #0 ++ vext.8 q9, q9, q9, #14 ++ veor d20, d20, d21 ++ vmull.p8 q2, d27, d7 @ D = A*B ++ vext.8 q11, q11, q11, #12 ++ vext.8 q10, q10, q10, #13 ++ veor q8, q8, q9 ++ veor q10, q10, q11 ++ veor q2, q2, q8 ++ veor q2, q2, q10 ++ veor q1,q1,q0 @ Karatsuba post-processing ++ veor q1,q1,q2 ++ veor d1,d1,d2 ++ veor d4,d4,d3 @ Xh|Xl - 256-bit result ++ ++ @ equivalent of reduction_avx from ghash-x86_64.pl ++ vshl.i64 q9,q0,#57 @ 1st phase ++ vshl.i64 q10,q0,#62 ++ veor q10,q10,q9 @ ++ vshl.i64 q9,q0,#63 ++ veor q10, q10, q9 @ ++ veor d1,d1,d20 @ ++ veor d4,d4,d21 ++ ++ vshr.u64 q10,q0,#1 @ 2nd phase ++ veor q2,q2,q0 ++ veor q0,q0,q10 @ ++ vshr.u64 q10,q10,#6 ++ vshr.u64 q0,q0,#1 @ ++ veor q0,q0,q2 @ ++ veor q0,q0,q10 @ ++ ++ subs r3,#16 ++ bne Loop_neon ++ ++#ifdef __ARMEL__ ++ vrev64.8 q0,q0 ++#endif ++ sub r0,#16 ++ vst1.64 d1,[r0]! @ write out Xi ++ vst1.64 d0,[r0] ++ ++ bx lr @ bx lr ++ ++#endif ++.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 2 ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-arm/crypto/fipsmodule/ghashv8-armx32.S b/apple-arm/crypto/fipsmodule/ghashv8-armx32.S +new file mode 100644 +index 0000000..dcac580 +--- /dev/null ++++ b/apple-arm/crypto/fipsmodule/ghashv8-armx32.S +@@ -0,0 +1,260 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++#if __ARM_MAX_ARCH__>=7 ++.text ++ ++.code 32 ++#undef __thumb2__ ++.globl _gcm_init_v8 ++.private_extern _gcm_init_v8 ++#ifdef __thumb2__ ++.thumb_func _gcm_init_v8 ++#endif ++.align 4 ++_gcm_init_v8: ++ AARCH64_VALID_CALL_TARGET ++ vld1.64 {q9},[r1] @ load input H ++ vmov.i8 q11,#0xe1 ++ vshl.i64 q11,q11,#57 @ 0xc2.0 ++ vext.8 q3,q9,q9,#8 ++ vshr.u64 q10,q11,#63 ++ vdup.32 q9,d18[1] ++ vext.8 q8,q10,q11,#8 @ t0=0xc2....01 ++ vshr.u64 q10,q3,#63 ++ vshr.s32 q9,q9,#31 @ broadcast carry bit ++ vand q10,q10,q8 ++ vshl.i64 q3,q3,#1 ++ vext.8 q10,q10,q10,#8 ++ vand q8,q8,q9 ++ vorr q3,q3,q10 @ H<<<=1 ++ veor q12,q3,q8 @ twisted H ++ vst1.64 {q12},[r0]! @ store Htable[0] ++ ++ @ calculate H^2 ++ vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing ++.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12 ++ veor q8,q8,q12 ++.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12 ++.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8 ++ ++ vext.8 q9,q0,q2,#8 @ Karatsuba post-processing ++ veor q10,q0,q2 ++ veor q1,q1,q9 ++ veor q1,q1,q10 ++.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase ++ ++ vmov d4,d3 @ Xh|Xm - 256-bit result ++ vmov d3,d0 @ Xm is rotated Xl ++ veor q0,q1,q10 ++ ++ vext.8 q10,q0,q0,#8 @ 2nd phase ++.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 ++ veor q10,q10,q2 ++ veor q14,q0,q10 ++ ++ vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing ++ veor q9,q9,q14 ++ vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed ++ vst1.64 {q13,q14},[r0]! @ store Htable[1..2] ++ bx lr ++ ++.globl _gcm_gmult_v8 ++.private_extern _gcm_gmult_v8 ++#ifdef __thumb2__ ++.thumb_func _gcm_gmult_v8 ++#endif ++.align 4 ++_gcm_gmult_v8: ++ AARCH64_VALID_CALL_TARGET ++ vld1.64 {q9},[r0] @ load Xi ++ vmov.i8 q11,#0xe1 ++ vld1.64 {q12,q13},[r1] @ load twisted H, ... ++ vshl.u64 q11,q11,#57 ++#ifndef __ARMEB__ ++ vrev64.8 q9,q9 ++#endif ++ vext.8 q3,q9,q9,#8 ++ ++.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo ++ veor q9,q9,q3 @ Karatsuba pre-processing ++.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi ++.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) ++ ++ vext.8 q9,q0,q2,#8 @ Karatsuba post-processing ++ veor q10,q0,q2 ++ veor q1,q1,q9 ++ veor q1,q1,q10 ++.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction ++ ++ vmov d4,d3 @ Xh|Xm - 256-bit result ++ vmov d3,d0 @ Xm is rotated Xl ++ veor q0,q1,q10 ++ ++ vext.8 q10,q0,q0,#8 @ 2nd phase of reduction ++.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 ++ veor q10,q10,q2 ++ veor q0,q0,q10 ++ ++#ifndef __ARMEB__ ++ vrev64.8 q0,q0 ++#endif ++ vext.8 q0,q0,q0,#8 ++ vst1.64 {q0},[r0] @ write out Xi ++ ++ bx lr ++ ++.globl _gcm_ghash_v8 ++.private_extern _gcm_ghash_v8 ++#ifdef __thumb2__ ++.thumb_func _gcm_ghash_v8 ++#endif ++.align 4 ++_gcm_ghash_v8: ++ AARCH64_VALID_CALL_TARGET ++ vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so ++ vld1.64 {q0},[r0] @ load [rotated] Xi ++ @ "[rotated]" means that ++ @ loaded value would have ++ @ to be rotated in order to ++ @ make it appear as in ++ @ algorithm specification ++ subs r3,r3,#32 @ see if r3 is 32 or larger ++ mov r12,#16 @ r12 is used as post- ++ @ increment for input pointer; ++ @ as loop is modulo-scheduled ++ @ r12 is zeroed just in time ++ @ to preclude overstepping ++ @ inp[len], which means that ++ @ last block[s] are actually ++ @ loaded twice, but last ++ @ copy is not processed ++ vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2 ++ vmov.i8 q11,#0xe1 ++ vld1.64 {q14},[r1] ++ moveq r12,#0 @ is it time to zero r12? ++ vext.8 q0,q0,q0,#8 @ rotate Xi ++ vld1.64 {q8},[r2]! @ load [rotated] I[0] ++ vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant ++#ifndef __ARMEB__ ++ vrev64.8 q8,q8 ++ vrev64.8 q0,q0 ++#endif ++ vext.8 q3,q8,q8,#8 @ rotate I[0] ++ blo Lodd_tail_v8 @ r3 was less than 32 ++ vld1.64 {q9},[r2],r12 @ load [rotated] I[1] ++#ifndef __ARMEB__ ++ vrev64.8 q9,q9 ++#endif ++ vext.8 q7,q9,q9,#8 ++ veor q3,q3,q0 @ I[i]^=Xi ++.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1 ++ veor q9,q9,q7 @ Karatsuba pre-processing ++.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7 ++ b Loop_mod2x_v8 ++ ++.align 4 ++Loop_mod2x_v8: ++ vext.8 q10,q3,q3,#8 ++ subs r3,r3,#32 @ is there more data? ++.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo ++ movlo r12,#0 @ is it time to zero r12? ++ ++.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9 ++ veor q10,q10,q3 @ Karatsuba pre-processing ++.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi ++ veor q0,q0,q4 @ accumulate ++.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi) ++ vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2] ++ ++ veor q2,q2,q6 ++ moveq r12,#0 @ is it time to zero r12? ++ veor q1,q1,q5 ++ ++ vext.8 q9,q0,q2,#8 @ Karatsuba post-processing ++ veor q10,q0,q2 ++ veor q1,q1,q9 ++ vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3] ++#ifndef __ARMEB__ ++ vrev64.8 q8,q8 ++#endif ++ veor q1,q1,q10 ++.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction ++ ++#ifndef __ARMEB__ ++ vrev64.8 q9,q9 ++#endif ++ vmov d4,d3 @ Xh|Xm - 256-bit result ++ vmov d3,d0 @ Xm is rotated Xl ++ vext.8 q7,q9,q9,#8 ++ vext.8 q3,q8,q8,#8 ++ veor q0,q1,q10 ++.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1 ++ veor q3,q3,q2 @ accumulate q3 early ++ ++ vext.8 q10,q0,q0,#8 @ 2nd phase of reduction ++.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 ++ veor q3,q3,q10 ++ veor q9,q9,q7 @ Karatsuba pre-processing ++ veor q3,q3,q0 ++.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7 ++ bhs Loop_mod2x_v8 @ there was at least 32 more bytes ++ ++ veor q2,q2,q10 ++ vext.8 q3,q8,q8,#8 @ re-construct q3 ++ adds r3,r3,#32 @ re-construct r3 ++ veor q0,q0,q2 @ re-construct q0 ++ beq Ldone_v8 @ is r3 zero? ++Lodd_tail_v8: ++ vext.8 q10,q0,q0,#8 ++ veor q3,q3,q0 @ inp^=Xi ++ veor q9,q8,q10 @ q9 is rotated inp^Xi ++ ++.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo ++ veor q9,q9,q3 @ Karatsuba pre-processing ++.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi ++.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi) ++ ++ vext.8 q9,q0,q2,#8 @ Karatsuba post-processing ++ veor q10,q0,q2 ++ veor q1,q1,q9 ++ veor q1,q1,q10 ++.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction ++ ++ vmov d4,d3 @ Xh|Xm - 256-bit result ++ vmov d3,d0 @ Xm is rotated Xl ++ veor q0,q1,q10 ++ ++ vext.8 q10,q0,q0,#8 @ 2nd phase of reduction ++.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11 ++ veor q10,q10,q2 ++ veor q0,q0,q10 ++ ++Ldone_v8: ++#ifndef __ARMEB__ ++ vrev64.8 q0,q0 ++#endif ++ vext.8 q0,q0,q0,#8 ++ vst1.64 {q0},[r0] @ write out Xi ++ ++ vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so ++ bx lr ++ ++.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 2 ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-arm/crypto/fipsmodule/sha1-armv4-large.S b/apple-arm/crypto/fipsmodule/sha1-armv4-large.S +new file mode 100644 +index 0000000..82ac8df +--- /dev/null ++++ b/apple-arm/crypto/fipsmodule/sha1-armv4-large.S +@@ -0,0 +1,1518 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++#include ++ ++.text ++#if defined(__thumb2__) ++.syntax unified ++.thumb ++#else ++.code 32 ++#endif ++ ++.globl _sha1_block_data_order ++.private_extern _sha1_block_data_order ++#ifdef __thumb2__ ++.thumb_func _sha1_block_data_order ++#endif ++ ++.align 5 ++_sha1_block_data_order: ++#if __ARM_MAX_ARCH__>=7 ++Lsha1_block: ++ adr r3,Lsha1_block ++ ldr r12,LOPENSSL_armcap ++ ldr r12,[r3,r12] @ OPENSSL_armcap_P ++#ifdef __APPLE__ ++ ldr r12,[r12] ++#endif ++ tst r12,#ARMV8_SHA1 ++ bne LARMv8 ++ tst r12,#ARMV7_NEON ++ bne LNEON ++#endif ++ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} ++ add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 ++ ldmia r0,{r3,r4,r5,r6,r7} ++Lloop: ++ ldr r8,LK_00_19 ++ mov r14,sp ++ sub sp,sp,#15*4 ++ mov r5,r5,ror#30 ++ mov r6,r6,ror#30 ++ mov r7,r7,ror#30 @ [6] ++L_00_15: ++#if __ARM_ARCH__<7 ++ ldrb r10,[r1,#2] ++ ldrb r9,[r1,#3] ++ ldrb r11,[r1,#1] ++ add r7,r8,r7,ror#2 @ E+=K_00_19 ++ ldrb r12,[r1],#4 ++ orr r9,r9,r10,lsl#8 ++ eor r10,r5,r6 @ F_xx_xx ++ orr r9,r9,r11,lsl#16 ++ add r7,r7,r3,ror#27 @ E+=ROR(A,27) ++ orr r9,r9,r12,lsl#24 ++#else ++ ldr r9,[r1],#4 @ handles unaligned ++ add r7,r8,r7,ror#2 @ E+=K_00_19 ++ eor r10,r5,r6 @ F_xx_xx ++ add r7,r7,r3,ror#27 @ E+=ROR(A,27) ++#ifdef __ARMEL__ ++ rev r9,r9 @ byte swap ++#endif ++#endif ++ and r10,r4,r10,ror#2 ++ add r7,r7,r9 @ E+=X[i] ++ eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) ++ str r9,[r14,#-4]! ++ add r7,r7,r10 @ E+=F_00_19(B,C,D) ++#if __ARM_ARCH__<7 ++ ldrb r10,[r1,#2] ++ ldrb r9,[r1,#3] ++ ldrb r11,[r1,#1] ++ add r6,r8,r6,ror#2 @ E+=K_00_19 ++ ldrb r12,[r1],#4 ++ orr r9,r9,r10,lsl#8 ++ eor r10,r4,r5 @ F_xx_xx ++ orr r9,r9,r11,lsl#16 ++ add r6,r6,r7,ror#27 @ E+=ROR(A,27) ++ orr r9,r9,r12,lsl#24 ++#else ++ ldr r9,[r1],#4 @ handles unaligned ++ add r6,r8,r6,ror#2 @ E+=K_00_19 ++ eor r10,r4,r5 @ F_xx_xx ++ add r6,r6,r7,ror#27 @ E+=ROR(A,27) ++#ifdef __ARMEL__ ++ rev r9,r9 @ byte swap ++#endif ++#endif ++ and r10,r3,r10,ror#2 ++ add r6,r6,r9 @ E+=X[i] ++ eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) ++ str r9,[r14,#-4]! ++ add r6,r6,r10 @ E+=F_00_19(B,C,D) ++#if __ARM_ARCH__<7 ++ ldrb r10,[r1,#2] ++ ldrb r9,[r1,#3] ++ ldrb r11,[r1,#1] ++ add r5,r8,r5,ror#2 @ E+=K_00_19 ++ ldrb r12,[r1],#4 ++ orr r9,r9,r10,lsl#8 ++ eor r10,r3,r4 @ F_xx_xx ++ orr r9,r9,r11,lsl#16 ++ add r5,r5,r6,ror#27 @ E+=ROR(A,27) ++ orr r9,r9,r12,lsl#24 ++#else ++ ldr r9,[r1],#4 @ handles unaligned ++ add r5,r8,r5,ror#2 @ E+=K_00_19 ++ eor r10,r3,r4 @ F_xx_xx ++ add r5,r5,r6,ror#27 @ E+=ROR(A,27) ++#ifdef __ARMEL__ ++ rev r9,r9 @ byte swap ++#endif ++#endif ++ and r10,r7,r10,ror#2 ++ add r5,r5,r9 @ E+=X[i] ++ eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) ++ str r9,[r14,#-4]! ++ add r5,r5,r10 @ E+=F_00_19(B,C,D) ++#if __ARM_ARCH__<7 ++ ldrb r10,[r1,#2] ++ ldrb r9,[r1,#3] ++ ldrb r11,[r1,#1] ++ add r4,r8,r4,ror#2 @ E+=K_00_19 ++ ldrb r12,[r1],#4 ++ orr r9,r9,r10,lsl#8 ++ eor r10,r7,r3 @ F_xx_xx ++ orr r9,r9,r11,lsl#16 ++ add r4,r4,r5,ror#27 @ E+=ROR(A,27) ++ orr r9,r9,r12,lsl#24 ++#else ++ ldr r9,[r1],#4 @ handles unaligned ++ add r4,r8,r4,ror#2 @ E+=K_00_19 ++ eor r10,r7,r3 @ F_xx_xx ++ add r4,r4,r5,ror#27 @ E+=ROR(A,27) ++#ifdef __ARMEL__ ++ rev r9,r9 @ byte swap ++#endif ++#endif ++ and r10,r6,r10,ror#2 ++ add r4,r4,r9 @ E+=X[i] ++ eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) ++ str r9,[r14,#-4]! ++ add r4,r4,r10 @ E+=F_00_19(B,C,D) ++#if __ARM_ARCH__<7 ++ ldrb r10,[r1,#2] ++ ldrb r9,[r1,#3] ++ ldrb r11,[r1,#1] ++ add r3,r8,r3,ror#2 @ E+=K_00_19 ++ ldrb r12,[r1],#4 ++ orr r9,r9,r10,lsl#8 ++ eor r10,r6,r7 @ F_xx_xx ++ orr r9,r9,r11,lsl#16 ++ add r3,r3,r4,ror#27 @ E+=ROR(A,27) ++ orr r9,r9,r12,lsl#24 ++#else ++ ldr r9,[r1],#4 @ handles unaligned ++ add r3,r8,r3,ror#2 @ E+=K_00_19 ++ eor r10,r6,r7 @ F_xx_xx ++ add r3,r3,r4,ror#27 @ E+=ROR(A,27) ++#ifdef __ARMEL__ ++ rev r9,r9 @ byte swap ++#endif ++#endif ++ and r10,r5,r10,ror#2 ++ add r3,r3,r9 @ E+=X[i] ++ eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) ++ str r9,[r14,#-4]! ++ add r3,r3,r10 @ E+=F_00_19(B,C,D) ++#if defined(__thumb2__) ++ mov r12,sp ++ teq r14,r12 ++#else ++ teq r14,sp ++#endif ++ bne L_00_15 @ [((11+4)*5+2)*3] ++ sub sp,sp,#25*4 ++#if __ARM_ARCH__<7 ++ ldrb r10,[r1,#2] ++ ldrb r9,[r1,#3] ++ ldrb r11,[r1,#1] ++ add r7,r8,r7,ror#2 @ E+=K_00_19 ++ ldrb r12,[r1],#4 ++ orr r9,r9,r10,lsl#8 ++ eor r10,r5,r6 @ F_xx_xx ++ orr r9,r9,r11,lsl#16 ++ add r7,r7,r3,ror#27 @ E+=ROR(A,27) ++ orr r9,r9,r12,lsl#24 ++#else ++ ldr r9,[r1],#4 @ handles unaligned ++ add r7,r8,r7,ror#2 @ E+=K_00_19 ++ eor r10,r5,r6 @ F_xx_xx ++ add r7,r7,r3,ror#27 @ E+=ROR(A,27) ++#ifdef __ARMEL__ ++ rev r9,r9 @ byte swap ++#endif ++#endif ++ and r10,r4,r10,ror#2 ++ add r7,r7,r9 @ E+=X[i] ++ eor r10,r10,r6,ror#2 @ F_00_19(B,C,D) ++ str r9,[r14,#-4]! ++ add r7,r7,r10 @ E+=F_00_19(B,C,D) ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r6,r8,r6,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r4,r5 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r6,r6,r7,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ and r10,r3,r10,ror#2 @ F_xx_xx ++ @ F_xx_xx ++ add r6,r6,r9 @ E+=X[i] ++ eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) ++ add r6,r6,r10 @ E+=F_00_19(B,C,D) ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r5,r8,r5,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r3,r4 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r5,r5,r6,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ and r10,r7,r10,ror#2 @ F_xx_xx ++ @ F_xx_xx ++ add r5,r5,r9 @ E+=X[i] ++ eor r10,r10,r4,ror#2 @ F_00_19(B,C,D) ++ add r5,r5,r10 @ E+=F_00_19(B,C,D) ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r4,r8,r4,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r7,r3 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r4,r4,r5,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ and r10,r6,r10,ror#2 @ F_xx_xx ++ @ F_xx_xx ++ add r4,r4,r9 @ E+=X[i] ++ eor r10,r10,r3,ror#2 @ F_00_19(B,C,D) ++ add r4,r4,r10 @ E+=F_00_19(B,C,D) ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r3,r8,r3,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r6,r7 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r3,r3,r4,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ and r10,r5,r10,ror#2 @ F_xx_xx ++ @ F_xx_xx ++ add r3,r3,r9 @ E+=X[i] ++ eor r10,r10,r7,ror#2 @ F_00_19(B,C,D) ++ add r3,r3,r10 @ E+=F_00_19(B,C,D) ++ ++ ldr r8,LK_20_39 @ [+15+16*4] ++ cmn sp,#0 @ [+3], clear carry to denote 20_39 ++L_20_39_or_60_79: ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r7,r8,r7,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r5,r6 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r7,r7,r3,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ eor r10,r4,r10,ror#2 @ F_xx_xx ++ @ F_xx_xx ++ add r7,r7,r9 @ E+=X[i] ++ add r7,r7,r10 @ E+=F_20_39(B,C,D) ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r6,r8,r6,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r4,r5 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r6,r6,r7,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ eor r10,r3,r10,ror#2 @ F_xx_xx ++ @ F_xx_xx ++ add r6,r6,r9 @ E+=X[i] ++ add r6,r6,r10 @ E+=F_20_39(B,C,D) ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r5,r8,r5,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r3,r4 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r5,r5,r6,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ eor r10,r7,r10,ror#2 @ F_xx_xx ++ @ F_xx_xx ++ add r5,r5,r9 @ E+=X[i] ++ add r5,r5,r10 @ E+=F_20_39(B,C,D) ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r4,r8,r4,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r7,r3 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r4,r4,r5,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ eor r10,r6,r10,ror#2 @ F_xx_xx ++ @ F_xx_xx ++ add r4,r4,r9 @ E+=X[i] ++ add r4,r4,r10 @ E+=F_20_39(B,C,D) ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r3,r8,r3,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r6,r7 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r3,r3,r4,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ eor r10,r5,r10,ror#2 @ F_xx_xx ++ @ F_xx_xx ++ add r3,r3,r9 @ E+=X[i] ++ add r3,r3,r10 @ E+=F_20_39(B,C,D) ++#if defined(__thumb2__) ++ mov r12,sp ++ teq r14,r12 ++#else ++ teq r14,sp @ preserve carry ++#endif ++ bne L_20_39_or_60_79 @ [+((12+3)*5+2)*4] ++ bcs L_done @ [+((12+3)*5+2)*4], spare 300 bytes ++ ++ ldr r8,LK_40_59 ++ sub sp,sp,#20*4 @ [+2] ++L_40_59: ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r7,r8,r7,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r5,r6 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r7,r7,r3,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ and r10,r4,r10,ror#2 @ F_xx_xx ++ and r11,r5,r6 @ F_xx_xx ++ add r7,r7,r9 @ E+=X[i] ++ add r7,r7,r10 @ E+=F_40_59(B,C,D) ++ add r7,r7,r11,ror#2 ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r6,r8,r6,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r4,r5 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r6,r6,r7,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ and r10,r3,r10,ror#2 @ F_xx_xx ++ and r11,r4,r5 @ F_xx_xx ++ add r6,r6,r9 @ E+=X[i] ++ add r6,r6,r10 @ E+=F_40_59(B,C,D) ++ add r6,r6,r11,ror#2 ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r5,r8,r5,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r3,r4 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r5,r5,r6,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ and r10,r7,r10,ror#2 @ F_xx_xx ++ and r11,r3,r4 @ F_xx_xx ++ add r5,r5,r9 @ E+=X[i] ++ add r5,r5,r10 @ E+=F_40_59(B,C,D) ++ add r5,r5,r11,ror#2 ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r4,r8,r4,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r7,r3 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r4,r4,r5,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ and r10,r6,r10,ror#2 @ F_xx_xx ++ and r11,r7,r3 @ F_xx_xx ++ add r4,r4,r9 @ E+=X[i] ++ add r4,r4,r10 @ E+=F_40_59(B,C,D) ++ add r4,r4,r11,ror#2 ++ ldr r9,[r14,#15*4] ++ ldr r10,[r14,#13*4] ++ ldr r11,[r14,#7*4] ++ add r3,r8,r3,ror#2 @ E+=K_xx_xx ++ ldr r12,[r14,#2*4] ++ eor r9,r9,r10 ++ eor r11,r11,r12 @ 1 cycle stall ++ eor r10,r6,r7 @ F_xx_xx ++ mov r9,r9,ror#31 ++ add r3,r3,r4,ror#27 @ E+=ROR(A,27) ++ eor r9,r9,r11,ror#31 ++ str r9,[r14,#-4]! ++ and r10,r5,r10,ror#2 @ F_xx_xx ++ and r11,r6,r7 @ F_xx_xx ++ add r3,r3,r9 @ E+=X[i] ++ add r3,r3,r10 @ E+=F_40_59(B,C,D) ++ add r3,r3,r11,ror#2 ++#if defined(__thumb2__) ++ mov r12,sp ++ teq r14,r12 ++#else ++ teq r14,sp ++#endif ++ bne L_40_59 @ [+((12+5)*5+2)*4] ++ ++ ldr r8,LK_60_79 ++ sub sp,sp,#20*4 ++ cmp sp,#0 @ set carry to denote 60_79 ++ b L_20_39_or_60_79 @ [+4], spare 300 bytes ++L_done: ++ add sp,sp,#80*4 @ "deallocate" stack frame ++ ldmia r0,{r8,r9,r10,r11,r12} ++ add r3,r8,r3 ++ add r4,r9,r4 ++ add r5,r10,r5,ror#2 ++ add r6,r11,r6,ror#2 ++ add r7,r12,r7,ror#2 ++ stmia r0,{r3,r4,r5,r6,r7} ++ teq r1,r2 ++ bne Lloop @ [+18], total 1307 ++ ++#if __ARM_ARCH__>=5 ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} ++#else ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} ++ tst lr,#1 ++ moveq pc,lr @ be binary compatible with V4, yet ++.word 0xe12fff1e @ interoperable with Thumb ISA:-) ++#endif ++ ++ ++.align 5 ++LK_00_19:.word 0x5a827999 ++LK_20_39:.word 0x6ed9eba1 ++LK_40_59:.word 0x8f1bbcdc ++LK_60_79:.word 0xca62c1d6 ++#if __ARM_MAX_ARCH__>=7 ++LOPENSSL_armcap: ++.word OPENSSL_armcap_P-Lsha1_block ++#endif ++.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 5 ++#if __ARM_MAX_ARCH__>=7 ++ ++ ++ ++#ifdef __thumb2__ ++.thumb_func sha1_block_data_order_neon ++#endif ++.align 4 ++sha1_block_data_order_neon: ++LNEON: ++ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} ++ add r2,r1,r2,lsl#6 @ r2 to point at the end of r1 ++ @ dmb @ errata #451034 on early Cortex A8 ++ @ vstmdb sp!,{d8-d15} @ ABI specification says so ++ mov r14,sp ++ sub r12,sp,#64 ++ adr r8,LK_00_19 ++ bic r12,r12,#15 @ align for 128-bit stores ++ ++ ldmia r0,{r3,r4,r5,r6,r7} @ load context ++ mov sp,r12 @ alloca ++ ++ vld1.8 {q0,q1},[r1]! @ handles unaligned ++ veor q15,q15,q15 ++ vld1.8 {q2,q3},[r1]! ++ vld1.32 {d28[],d29[]},[r8,:32]! @ load K_00_19 ++ vrev32.8 q0,q0 @ yes, even on ++ vrev32.8 q1,q1 @ big-endian... ++ vrev32.8 q2,q2 ++ vadd.i32 q8,q0,q14 ++ vrev32.8 q3,q3 ++ vadd.i32 q9,q1,q14 ++ vst1.32 {q8},[r12,:128]! ++ vadd.i32 q10,q2,q14 ++ vst1.32 {q9},[r12,:128]! ++ vst1.32 {q10},[r12,:128]! ++ ldr r9,[sp] @ big RAW stall ++ ++Loop_neon: ++ vext.8 q8,q0,q1,#8 ++ bic r10,r6,r4 ++ add r7,r7,r9 ++ and r11,r5,r4 ++ vadd.i32 q13,q3,q14 ++ ldr r9,[sp,#4] ++ add r7,r7,r3,ror#27 ++ vext.8 q12,q3,q15,#4 ++ eor r11,r11,r10 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ veor q8,q8,q0 ++ bic r10,r5,r3 ++ add r6,r6,r9 ++ veor q12,q12,q2 ++ and r11,r4,r3 ++ ldr r9,[sp,#8] ++ veor q12,q12,q8 ++ add r6,r6,r7,ror#27 ++ eor r11,r11,r10 ++ vst1.32 {q13},[r12,:128]! ++ sub r12,r12,#64 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ vext.8 q13,q15,q12,#4 ++ bic r10,r4,r7 ++ add r5,r5,r9 ++ vadd.i32 q8,q12,q12 ++ and r11,r3,r7 ++ ldr r9,[sp,#12] ++ vsri.32 q8,q12,#31 ++ add r5,r5,r6,ror#27 ++ eor r11,r11,r10 ++ mov r7,r7,ror#2 ++ vshr.u32 q12,q13,#30 ++ add r5,r5,r11 ++ bic r10,r3,r6 ++ vshl.u32 q13,q13,#2 ++ add r4,r4,r9 ++ and r11,r7,r6 ++ veor q8,q8,q12 ++ ldr r9,[sp,#16] ++ add r4,r4,r5,ror#27 ++ veor q8,q8,q13 ++ eor r11,r11,r10 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ vext.8 q9,q1,q2,#8 ++ bic r10,r7,r5 ++ add r3,r3,r9 ++ and r11,r6,r5 ++ vadd.i32 q13,q8,q14 ++ ldr r9,[sp,#20] ++ vld1.32 {d28[],d29[]},[r8,:32]! ++ add r3,r3,r4,ror#27 ++ vext.8 q12,q8,q15,#4 ++ eor r11,r11,r10 ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ veor q9,q9,q1 ++ bic r10,r6,r4 ++ add r7,r7,r9 ++ veor q12,q12,q3 ++ and r11,r5,r4 ++ ldr r9,[sp,#24] ++ veor q12,q12,q9 ++ add r7,r7,r3,ror#27 ++ eor r11,r11,r10 ++ vst1.32 {q13},[r12,:128]! ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ vext.8 q13,q15,q12,#4 ++ bic r10,r5,r3 ++ add r6,r6,r9 ++ vadd.i32 q9,q12,q12 ++ and r11,r4,r3 ++ ldr r9,[sp,#28] ++ vsri.32 q9,q12,#31 ++ add r6,r6,r7,ror#27 ++ eor r11,r11,r10 ++ mov r3,r3,ror#2 ++ vshr.u32 q12,q13,#30 ++ add r6,r6,r11 ++ bic r10,r4,r7 ++ vshl.u32 q13,q13,#2 ++ add r5,r5,r9 ++ and r11,r3,r7 ++ veor q9,q9,q12 ++ ldr r9,[sp,#32] ++ add r5,r5,r6,ror#27 ++ veor q9,q9,q13 ++ eor r11,r11,r10 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ vext.8 q10,q2,q3,#8 ++ bic r10,r3,r6 ++ add r4,r4,r9 ++ and r11,r7,r6 ++ vadd.i32 q13,q9,q14 ++ ldr r9,[sp,#36] ++ add r4,r4,r5,ror#27 ++ vext.8 q12,q9,q15,#4 ++ eor r11,r11,r10 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ veor q10,q10,q2 ++ bic r10,r7,r5 ++ add r3,r3,r9 ++ veor q12,q12,q8 ++ and r11,r6,r5 ++ ldr r9,[sp,#40] ++ veor q12,q12,q10 ++ add r3,r3,r4,ror#27 ++ eor r11,r11,r10 ++ vst1.32 {q13},[r12,:128]! ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ vext.8 q13,q15,q12,#4 ++ bic r10,r6,r4 ++ add r7,r7,r9 ++ vadd.i32 q10,q12,q12 ++ and r11,r5,r4 ++ ldr r9,[sp,#44] ++ vsri.32 q10,q12,#31 ++ add r7,r7,r3,ror#27 ++ eor r11,r11,r10 ++ mov r4,r4,ror#2 ++ vshr.u32 q12,q13,#30 ++ add r7,r7,r11 ++ bic r10,r5,r3 ++ vshl.u32 q13,q13,#2 ++ add r6,r6,r9 ++ and r11,r4,r3 ++ veor q10,q10,q12 ++ ldr r9,[sp,#48] ++ add r6,r6,r7,ror#27 ++ veor q10,q10,q13 ++ eor r11,r11,r10 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ vext.8 q11,q3,q8,#8 ++ bic r10,r4,r7 ++ add r5,r5,r9 ++ and r11,r3,r7 ++ vadd.i32 q13,q10,q14 ++ ldr r9,[sp,#52] ++ add r5,r5,r6,ror#27 ++ vext.8 q12,q10,q15,#4 ++ eor r11,r11,r10 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ veor q11,q11,q3 ++ bic r10,r3,r6 ++ add r4,r4,r9 ++ veor q12,q12,q9 ++ and r11,r7,r6 ++ ldr r9,[sp,#56] ++ veor q12,q12,q11 ++ add r4,r4,r5,ror#27 ++ eor r11,r11,r10 ++ vst1.32 {q13},[r12,:128]! ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ vext.8 q13,q15,q12,#4 ++ bic r10,r7,r5 ++ add r3,r3,r9 ++ vadd.i32 q11,q12,q12 ++ and r11,r6,r5 ++ ldr r9,[sp,#60] ++ vsri.32 q11,q12,#31 ++ add r3,r3,r4,ror#27 ++ eor r11,r11,r10 ++ mov r5,r5,ror#2 ++ vshr.u32 q12,q13,#30 ++ add r3,r3,r11 ++ bic r10,r6,r4 ++ vshl.u32 q13,q13,#2 ++ add r7,r7,r9 ++ and r11,r5,r4 ++ veor q11,q11,q12 ++ ldr r9,[sp,#0] ++ add r7,r7,r3,ror#27 ++ veor q11,q11,q13 ++ eor r11,r11,r10 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ vext.8 q12,q10,q11,#8 ++ bic r10,r5,r3 ++ add r6,r6,r9 ++ and r11,r4,r3 ++ veor q0,q0,q8 ++ ldr r9,[sp,#4] ++ add r6,r6,r7,ror#27 ++ veor q0,q0,q1 ++ eor r11,r11,r10 ++ mov r3,r3,ror#2 ++ vadd.i32 q13,q11,q14 ++ add r6,r6,r11 ++ bic r10,r4,r7 ++ veor q12,q12,q0 ++ add r5,r5,r9 ++ and r11,r3,r7 ++ vshr.u32 q0,q12,#30 ++ ldr r9,[sp,#8] ++ add r5,r5,r6,ror#27 ++ vst1.32 {q13},[r12,:128]! ++ sub r12,r12,#64 ++ eor r11,r11,r10 ++ mov r7,r7,ror#2 ++ vsli.32 q0,q12,#2 ++ add r5,r5,r11 ++ bic r10,r3,r6 ++ add r4,r4,r9 ++ and r11,r7,r6 ++ ldr r9,[sp,#12] ++ add r4,r4,r5,ror#27 ++ eor r11,r11,r10 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ bic r10,r7,r5 ++ add r3,r3,r9 ++ and r11,r6,r5 ++ ldr r9,[sp,#16] ++ add r3,r3,r4,ror#27 ++ eor r11,r11,r10 ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ vext.8 q12,q11,q0,#8 ++ eor r10,r4,r6 ++ add r7,r7,r9 ++ ldr r9,[sp,#20] ++ veor q1,q1,q9 ++ eor r11,r10,r5 ++ add r7,r7,r3,ror#27 ++ veor q1,q1,q2 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ vadd.i32 q13,q0,q14 ++ eor r10,r3,r5 ++ add r6,r6,r9 ++ veor q12,q12,q1 ++ ldr r9,[sp,#24] ++ eor r11,r10,r4 ++ vshr.u32 q1,q12,#30 ++ add r6,r6,r7,ror#27 ++ mov r3,r3,ror#2 ++ vst1.32 {q13},[r12,:128]! ++ add r6,r6,r11 ++ eor r10,r7,r4 ++ vsli.32 q1,q12,#2 ++ add r5,r5,r9 ++ ldr r9,[sp,#28] ++ eor r11,r10,r3 ++ add r5,r5,r6,ror#27 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ eor r10,r6,r3 ++ add r4,r4,r9 ++ ldr r9,[sp,#32] ++ eor r11,r10,r7 ++ add r4,r4,r5,ror#27 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ vext.8 q12,q0,q1,#8 ++ eor r10,r5,r7 ++ add r3,r3,r9 ++ ldr r9,[sp,#36] ++ veor q2,q2,q10 ++ eor r11,r10,r6 ++ add r3,r3,r4,ror#27 ++ veor q2,q2,q3 ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ vadd.i32 q13,q1,q14 ++ eor r10,r4,r6 ++ vld1.32 {d28[],d29[]},[r8,:32]! ++ add r7,r7,r9 ++ veor q12,q12,q2 ++ ldr r9,[sp,#40] ++ eor r11,r10,r5 ++ vshr.u32 q2,q12,#30 ++ add r7,r7,r3,ror#27 ++ mov r4,r4,ror#2 ++ vst1.32 {q13},[r12,:128]! ++ add r7,r7,r11 ++ eor r10,r3,r5 ++ vsli.32 q2,q12,#2 ++ add r6,r6,r9 ++ ldr r9,[sp,#44] ++ eor r11,r10,r4 ++ add r6,r6,r7,ror#27 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ eor r10,r7,r4 ++ add r5,r5,r9 ++ ldr r9,[sp,#48] ++ eor r11,r10,r3 ++ add r5,r5,r6,ror#27 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ vext.8 q12,q1,q2,#8 ++ eor r10,r6,r3 ++ add r4,r4,r9 ++ ldr r9,[sp,#52] ++ veor q3,q3,q11 ++ eor r11,r10,r7 ++ add r4,r4,r5,ror#27 ++ veor q3,q3,q8 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ vadd.i32 q13,q2,q14 ++ eor r10,r5,r7 ++ add r3,r3,r9 ++ veor q12,q12,q3 ++ ldr r9,[sp,#56] ++ eor r11,r10,r6 ++ vshr.u32 q3,q12,#30 ++ add r3,r3,r4,ror#27 ++ mov r5,r5,ror#2 ++ vst1.32 {q13},[r12,:128]! ++ add r3,r3,r11 ++ eor r10,r4,r6 ++ vsli.32 q3,q12,#2 ++ add r7,r7,r9 ++ ldr r9,[sp,#60] ++ eor r11,r10,r5 ++ add r7,r7,r3,ror#27 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ eor r10,r3,r5 ++ add r6,r6,r9 ++ ldr r9,[sp,#0] ++ eor r11,r10,r4 ++ add r6,r6,r7,ror#27 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ vext.8 q12,q2,q3,#8 ++ eor r10,r7,r4 ++ add r5,r5,r9 ++ ldr r9,[sp,#4] ++ veor q8,q8,q0 ++ eor r11,r10,r3 ++ add r5,r5,r6,ror#27 ++ veor q8,q8,q9 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ vadd.i32 q13,q3,q14 ++ eor r10,r6,r3 ++ add r4,r4,r9 ++ veor q12,q12,q8 ++ ldr r9,[sp,#8] ++ eor r11,r10,r7 ++ vshr.u32 q8,q12,#30 ++ add r4,r4,r5,ror#27 ++ mov r6,r6,ror#2 ++ vst1.32 {q13},[r12,:128]! ++ sub r12,r12,#64 ++ add r4,r4,r11 ++ eor r10,r5,r7 ++ vsli.32 q8,q12,#2 ++ add r3,r3,r9 ++ ldr r9,[sp,#12] ++ eor r11,r10,r6 ++ add r3,r3,r4,ror#27 ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ eor r10,r4,r6 ++ add r7,r7,r9 ++ ldr r9,[sp,#16] ++ eor r11,r10,r5 ++ add r7,r7,r3,ror#27 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ vext.8 q12,q3,q8,#8 ++ eor r10,r3,r5 ++ add r6,r6,r9 ++ ldr r9,[sp,#20] ++ veor q9,q9,q1 ++ eor r11,r10,r4 ++ add r6,r6,r7,ror#27 ++ veor q9,q9,q10 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ vadd.i32 q13,q8,q14 ++ eor r10,r7,r4 ++ add r5,r5,r9 ++ veor q12,q12,q9 ++ ldr r9,[sp,#24] ++ eor r11,r10,r3 ++ vshr.u32 q9,q12,#30 ++ add r5,r5,r6,ror#27 ++ mov r7,r7,ror#2 ++ vst1.32 {q13},[r12,:128]! ++ add r5,r5,r11 ++ eor r10,r6,r3 ++ vsli.32 q9,q12,#2 ++ add r4,r4,r9 ++ ldr r9,[sp,#28] ++ eor r11,r10,r7 ++ add r4,r4,r5,ror#27 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ eor r10,r5,r7 ++ add r3,r3,r9 ++ ldr r9,[sp,#32] ++ eor r11,r10,r6 ++ add r3,r3,r4,ror#27 ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ vext.8 q12,q8,q9,#8 ++ add r7,r7,r9 ++ and r10,r5,r6 ++ ldr r9,[sp,#36] ++ veor q10,q10,q2 ++ add r7,r7,r3,ror#27 ++ eor r11,r5,r6 ++ veor q10,q10,q11 ++ add r7,r7,r10 ++ and r11,r11,r4 ++ vadd.i32 q13,q9,q14 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ veor q12,q12,q10 ++ add r6,r6,r9 ++ and r10,r4,r5 ++ vshr.u32 q10,q12,#30 ++ ldr r9,[sp,#40] ++ add r6,r6,r7,ror#27 ++ vst1.32 {q13},[r12,:128]! ++ eor r11,r4,r5 ++ add r6,r6,r10 ++ vsli.32 q10,q12,#2 ++ and r11,r11,r3 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ add r5,r5,r9 ++ and r10,r3,r4 ++ ldr r9,[sp,#44] ++ add r5,r5,r6,ror#27 ++ eor r11,r3,r4 ++ add r5,r5,r10 ++ and r11,r11,r7 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ add r4,r4,r9 ++ and r10,r7,r3 ++ ldr r9,[sp,#48] ++ add r4,r4,r5,ror#27 ++ eor r11,r7,r3 ++ add r4,r4,r10 ++ and r11,r11,r6 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ vext.8 q12,q9,q10,#8 ++ add r3,r3,r9 ++ and r10,r6,r7 ++ ldr r9,[sp,#52] ++ veor q11,q11,q3 ++ add r3,r3,r4,ror#27 ++ eor r11,r6,r7 ++ veor q11,q11,q0 ++ add r3,r3,r10 ++ and r11,r11,r5 ++ vadd.i32 q13,q10,q14 ++ mov r5,r5,ror#2 ++ vld1.32 {d28[],d29[]},[r8,:32]! ++ add r3,r3,r11 ++ veor q12,q12,q11 ++ add r7,r7,r9 ++ and r10,r5,r6 ++ vshr.u32 q11,q12,#30 ++ ldr r9,[sp,#56] ++ add r7,r7,r3,ror#27 ++ vst1.32 {q13},[r12,:128]! ++ eor r11,r5,r6 ++ add r7,r7,r10 ++ vsli.32 q11,q12,#2 ++ and r11,r11,r4 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ add r6,r6,r9 ++ and r10,r4,r5 ++ ldr r9,[sp,#60] ++ add r6,r6,r7,ror#27 ++ eor r11,r4,r5 ++ add r6,r6,r10 ++ and r11,r11,r3 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ add r5,r5,r9 ++ and r10,r3,r4 ++ ldr r9,[sp,#0] ++ add r5,r5,r6,ror#27 ++ eor r11,r3,r4 ++ add r5,r5,r10 ++ and r11,r11,r7 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ vext.8 q12,q10,q11,#8 ++ add r4,r4,r9 ++ and r10,r7,r3 ++ ldr r9,[sp,#4] ++ veor q0,q0,q8 ++ add r4,r4,r5,ror#27 ++ eor r11,r7,r3 ++ veor q0,q0,q1 ++ add r4,r4,r10 ++ and r11,r11,r6 ++ vadd.i32 q13,q11,q14 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ veor q12,q12,q0 ++ add r3,r3,r9 ++ and r10,r6,r7 ++ vshr.u32 q0,q12,#30 ++ ldr r9,[sp,#8] ++ add r3,r3,r4,ror#27 ++ vst1.32 {q13},[r12,:128]! ++ sub r12,r12,#64 ++ eor r11,r6,r7 ++ add r3,r3,r10 ++ vsli.32 q0,q12,#2 ++ and r11,r11,r5 ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ add r7,r7,r9 ++ and r10,r5,r6 ++ ldr r9,[sp,#12] ++ add r7,r7,r3,ror#27 ++ eor r11,r5,r6 ++ add r7,r7,r10 ++ and r11,r11,r4 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ add r6,r6,r9 ++ and r10,r4,r5 ++ ldr r9,[sp,#16] ++ add r6,r6,r7,ror#27 ++ eor r11,r4,r5 ++ add r6,r6,r10 ++ and r11,r11,r3 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ vext.8 q12,q11,q0,#8 ++ add r5,r5,r9 ++ and r10,r3,r4 ++ ldr r9,[sp,#20] ++ veor q1,q1,q9 ++ add r5,r5,r6,ror#27 ++ eor r11,r3,r4 ++ veor q1,q1,q2 ++ add r5,r5,r10 ++ and r11,r11,r7 ++ vadd.i32 q13,q0,q14 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ veor q12,q12,q1 ++ add r4,r4,r9 ++ and r10,r7,r3 ++ vshr.u32 q1,q12,#30 ++ ldr r9,[sp,#24] ++ add r4,r4,r5,ror#27 ++ vst1.32 {q13},[r12,:128]! ++ eor r11,r7,r3 ++ add r4,r4,r10 ++ vsli.32 q1,q12,#2 ++ and r11,r11,r6 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ add r3,r3,r9 ++ and r10,r6,r7 ++ ldr r9,[sp,#28] ++ add r3,r3,r4,ror#27 ++ eor r11,r6,r7 ++ add r3,r3,r10 ++ and r11,r11,r5 ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ add r7,r7,r9 ++ and r10,r5,r6 ++ ldr r9,[sp,#32] ++ add r7,r7,r3,ror#27 ++ eor r11,r5,r6 ++ add r7,r7,r10 ++ and r11,r11,r4 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ vext.8 q12,q0,q1,#8 ++ add r6,r6,r9 ++ and r10,r4,r5 ++ ldr r9,[sp,#36] ++ veor q2,q2,q10 ++ add r6,r6,r7,ror#27 ++ eor r11,r4,r5 ++ veor q2,q2,q3 ++ add r6,r6,r10 ++ and r11,r11,r3 ++ vadd.i32 q13,q1,q14 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ veor q12,q12,q2 ++ add r5,r5,r9 ++ and r10,r3,r4 ++ vshr.u32 q2,q12,#30 ++ ldr r9,[sp,#40] ++ add r5,r5,r6,ror#27 ++ vst1.32 {q13},[r12,:128]! ++ eor r11,r3,r4 ++ add r5,r5,r10 ++ vsli.32 q2,q12,#2 ++ and r11,r11,r7 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ add r4,r4,r9 ++ and r10,r7,r3 ++ ldr r9,[sp,#44] ++ add r4,r4,r5,ror#27 ++ eor r11,r7,r3 ++ add r4,r4,r10 ++ and r11,r11,r6 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ add r3,r3,r9 ++ and r10,r6,r7 ++ ldr r9,[sp,#48] ++ add r3,r3,r4,ror#27 ++ eor r11,r6,r7 ++ add r3,r3,r10 ++ and r11,r11,r5 ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ vext.8 q12,q1,q2,#8 ++ eor r10,r4,r6 ++ add r7,r7,r9 ++ ldr r9,[sp,#52] ++ veor q3,q3,q11 ++ eor r11,r10,r5 ++ add r7,r7,r3,ror#27 ++ veor q3,q3,q8 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ vadd.i32 q13,q2,q14 ++ eor r10,r3,r5 ++ add r6,r6,r9 ++ veor q12,q12,q3 ++ ldr r9,[sp,#56] ++ eor r11,r10,r4 ++ vshr.u32 q3,q12,#30 ++ add r6,r6,r7,ror#27 ++ mov r3,r3,ror#2 ++ vst1.32 {q13},[r12,:128]! ++ add r6,r6,r11 ++ eor r10,r7,r4 ++ vsli.32 q3,q12,#2 ++ add r5,r5,r9 ++ ldr r9,[sp,#60] ++ eor r11,r10,r3 ++ add r5,r5,r6,ror#27 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ eor r10,r6,r3 ++ add r4,r4,r9 ++ ldr r9,[sp,#0] ++ eor r11,r10,r7 ++ add r4,r4,r5,ror#27 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ vadd.i32 q13,q3,q14 ++ eor r10,r5,r7 ++ add r3,r3,r9 ++ vst1.32 {q13},[r12,:128]! ++ sub r12,r12,#64 ++ teq r1,r2 ++ sub r8,r8,#16 ++ it eq ++ subeq r1,r1,#64 ++ vld1.8 {q0,q1},[r1]! ++ ldr r9,[sp,#4] ++ eor r11,r10,r6 ++ vld1.8 {q2,q3},[r1]! ++ add r3,r3,r4,ror#27 ++ mov r5,r5,ror#2 ++ vld1.32 {d28[],d29[]},[r8,:32]! ++ add r3,r3,r11 ++ eor r10,r4,r6 ++ vrev32.8 q0,q0 ++ add r7,r7,r9 ++ ldr r9,[sp,#8] ++ eor r11,r10,r5 ++ add r7,r7,r3,ror#27 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ eor r10,r3,r5 ++ add r6,r6,r9 ++ ldr r9,[sp,#12] ++ eor r11,r10,r4 ++ add r6,r6,r7,ror#27 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ eor r10,r7,r4 ++ add r5,r5,r9 ++ ldr r9,[sp,#16] ++ eor r11,r10,r3 ++ add r5,r5,r6,ror#27 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ vrev32.8 q1,q1 ++ eor r10,r6,r3 ++ add r4,r4,r9 ++ vadd.i32 q8,q0,q14 ++ ldr r9,[sp,#20] ++ eor r11,r10,r7 ++ vst1.32 {q8},[r12,:128]! ++ add r4,r4,r5,ror#27 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ eor r10,r5,r7 ++ add r3,r3,r9 ++ ldr r9,[sp,#24] ++ eor r11,r10,r6 ++ add r3,r3,r4,ror#27 ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ eor r10,r4,r6 ++ add r7,r7,r9 ++ ldr r9,[sp,#28] ++ eor r11,r10,r5 ++ add r7,r7,r3,ror#27 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ eor r10,r3,r5 ++ add r6,r6,r9 ++ ldr r9,[sp,#32] ++ eor r11,r10,r4 ++ add r6,r6,r7,ror#27 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ vrev32.8 q2,q2 ++ eor r10,r7,r4 ++ add r5,r5,r9 ++ vadd.i32 q9,q1,q14 ++ ldr r9,[sp,#36] ++ eor r11,r10,r3 ++ vst1.32 {q9},[r12,:128]! ++ add r5,r5,r6,ror#27 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ eor r10,r6,r3 ++ add r4,r4,r9 ++ ldr r9,[sp,#40] ++ eor r11,r10,r7 ++ add r4,r4,r5,ror#27 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ eor r10,r5,r7 ++ add r3,r3,r9 ++ ldr r9,[sp,#44] ++ eor r11,r10,r6 ++ add r3,r3,r4,ror#27 ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ eor r10,r4,r6 ++ add r7,r7,r9 ++ ldr r9,[sp,#48] ++ eor r11,r10,r5 ++ add r7,r7,r3,ror#27 ++ mov r4,r4,ror#2 ++ add r7,r7,r11 ++ vrev32.8 q3,q3 ++ eor r10,r3,r5 ++ add r6,r6,r9 ++ vadd.i32 q10,q2,q14 ++ ldr r9,[sp,#52] ++ eor r11,r10,r4 ++ vst1.32 {q10},[r12,:128]! ++ add r6,r6,r7,ror#27 ++ mov r3,r3,ror#2 ++ add r6,r6,r11 ++ eor r10,r7,r4 ++ add r5,r5,r9 ++ ldr r9,[sp,#56] ++ eor r11,r10,r3 ++ add r5,r5,r6,ror#27 ++ mov r7,r7,ror#2 ++ add r5,r5,r11 ++ eor r10,r6,r3 ++ add r4,r4,r9 ++ ldr r9,[sp,#60] ++ eor r11,r10,r7 ++ add r4,r4,r5,ror#27 ++ mov r6,r6,ror#2 ++ add r4,r4,r11 ++ eor r10,r5,r7 ++ add r3,r3,r9 ++ eor r11,r10,r6 ++ add r3,r3,r4,ror#27 ++ mov r5,r5,ror#2 ++ add r3,r3,r11 ++ ldmia r0,{r9,r10,r11,r12} @ accumulate context ++ add r3,r3,r9 ++ ldr r9,[r0,#16] ++ add r4,r4,r10 ++ add r5,r5,r11 ++ add r6,r6,r12 ++ it eq ++ moveq sp,r14 ++ add r7,r7,r9 ++ it ne ++ ldrne r9,[sp] ++ stmia r0,{r3,r4,r5,r6,r7} ++ itt ne ++ addne r12,sp,#3*16 ++ bne Loop_neon ++ ++ @ vldmia sp!,{d8-d15} ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} ++ ++#endif ++#if __ARM_MAX_ARCH__>=7 ++ ++# if defined(__thumb2__) ++# define INST(a,b,c,d) .byte c,d|0xf,a,b ++# else ++# define INST(a,b,c,d) .byte a,b,c,d|0x10 ++# endif ++ ++#ifdef __thumb2__ ++.thumb_func sha1_block_data_order_armv8 ++#endif ++.align 5 ++sha1_block_data_order_armv8: ++LARMv8: ++ vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so ++ ++ veor q1,q1,q1 ++ adr r3,LK_00_19 ++ vld1.32 {q0},[r0]! ++ vld1.32 {d2[0]},[r0] ++ sub r0,r0,#16 ++ vld1.32 {d16[],d17[]},[r3,:32]! ++ vld1.32 {d18[],d19[]},[r3,:32]! ++ vld1.32 {d20[],d21[]},[r3,:32]! ++ vld1.32 {d22[],d23[]},[r3,:32] ++ ++Loop_v8: ++ vld1.8 {q4,q5},[r1]! ++ vld1.8 {q6,q7},[r1]! ++ vrev32.8 q4,q4 ++ vrev32.8 q5,q5 ++ ++ vadd.i32 q12,q8,q4 ++ vrev32.8 q6,q6 ++ vmov q14,q0 @ offload ++ subs r2,r2,#1 ++ ++ vadd.i32 q13,q8,q5 ++ vrev32.8 q7,q7 ++ INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 0 ++ INST(0x68,0x0c,0x02,0xe2) @ sha1c q0,q1,q12 ++ vadd.i32 q12,q8,q6 ++ INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6 ++ INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 1 ++ INST(0x6a,0x0c,0x06,0xe2) @ sha1c q0,q3,q13 ++ vadd.i32 q13,q8,q7 ++ INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7 ++ INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7 ++ INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 2 ++ INST(0x68,0x0c,0x04,0xe2) @ sha1c q0,q2,q12 ++ vadd.i32 q12,q8,q4 ++ INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4 ++ INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4 ++ INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 3 ++ INST(0x6a,0x0c,0x06,0xe2) @ sha1c q0,q3,q13 ++ vadd.i32 q13,q9,q5 ++ INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5 ++ INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5 ++ INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 4 ++ INST(0x68,0x0c,0x04,0xe2) @ sha1c q0,q2,q12 ++ vadd.i32 q12,q9,q6 ++ INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6 ++ INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6 ++ INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 5 ++ INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 ++ vadd.i32 q13,q9,q7 ++ INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7 ++ INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7 ++ INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 6 ++ INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12 ++ vadd.i32 q12,q9,q4 ++ INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4 ++ INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4 ++ INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 7 ++ INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 ++ vadd.i32 q13,q9,q5 ++ INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5 ++ INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5 ++ INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 8 ++ INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12 ++ vadd.i32 q12,q10,q6 ++ INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6 ++ INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6 ++ INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 9 ++ INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 ++ vadd.i32 q13,q10,q7 ++ INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7 ++ INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7 ++ INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 10 ++ INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12 ++ vadd.i32 q12,q10,q4 ++ INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4 ++ INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4 ++ INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 11 ++ INST(0x6a,0x0c,0x26,0xe2) @ sha1m q0,q3,q13 ++ vadd.i32 q13,q10,q5 ++ INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5 ++ INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5 ++ INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 12 ++ INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12 ++ vadd.i32 q12,q10,q6 ++ INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6 ++ INST(0x4c,0x8c,0x3a,0xe2) @ sha1su0 q4,q5,q6 ++ INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 13 ++ INST(0x6a,0x0c,0x26,0xe2) @ sha1m q0,q3,q13 ++ vadd.i32 q13,q11,q7 ++ INST(0x8e,0x83,0xba,0xf3) @ sha1su1 q4,q7 ++ INST(0x4e,0xac,0x3c,0xe2) @ sha1su0 q5,q6,q7 ++ INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 14 ++ INST(0x68,0x0c,0x24,0xe2) @ sha1m q0,q2,q12 ++ vadd.i32 q12,q11,q4 ++ INST(0x88,0xa3,0xba,0xf3) @ sha1su1 q5,q4 ++ INST(0x48,0xcc,0x3e,0xe2) @ sha1su0 q6,q7,q4 ++ INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 15 ++ INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 ++ vadd.i32 q13,q11,q5 ++ INST(0x8a,0xc3,0xba,0xf3) @ sha1su1 q6,q5 ++ INST(0x4a,0xec,0x38,0xe2) @ sha1su0 q7,q4,q5 ++ INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 16 ++ INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12 ++ vadd.i32 q12,q11,q6 ++ INST(0x8c,0xe3,0xba,0xf3) @ sha1su1 q7,q6 ++ INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 17 ++ INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 ++ vadd.i32 q13,q11,q7 ++ ++ INST(0xc0,0x62,0xb9,0xf3) @ sha1h q3,q0 @ 18 ++ INST(0x68,0x0c,0x14,0xe2) @ sha1p q0,q2,q12 ++ ++ INST(0xc0,0x42,0xb9,0xf3) @ sha1h q2,q0 @ 19 ++ INST(0x6a,0x0c,0x16,0xe2) @ sha1p q0,q3,q13 ++ ++ vadd.i32 q1,q1,q2 ++ vadd.i32 q0,q0,q14 ++ bne Loop_v8 ++ ++ vst1.32 {q0},[r0]! ++ vst1.32 {d2[0]},[r0] ++ ++ vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} ++ bx lr @ bx lr ++ ++#endif ++#if __ARM_MAX_ARCH__>=7 ++.comm _OPENSSL_armcap_P,4 ++.non_lazy_symbol_pointer ++OPENSSL_armcap_P: ++.indirect_symbol _OPENSSL_armcap_P ++.long 0 ++.private_extern _OPENSSL_armcap_P ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-arm/crypto/fipsmodule/sha256-armv4.S b/apple-arm/crypto/fipsmodule/sha256-armv4.S +new file mode 100644 +index 0000000..0cf3648 +--- /dev/null ++++ b/apple-arm/crypto/fipsmodule/sha256-armv4.S +@@ -0,0 +1,2846 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. ++@ ++@ Licensed under the OpenSSL license (the "License"). You may not use ++@ this file except in compliance with the License. You can obtain a copy ++@ in the file LICENSE in the source distribution or at ++@ https://www.openssl.org/source/license.html ++ ++ ++@ ==================================================================== ++@ Written by Andy Polyakov for the OpenSSL ++@ project. The module is, however, dual licensed under OpenSSL and ++@ CRYPTOGAMS licenses depending on where you obtain it. For further ++@ details see http://www.openssl.org/~appro/cryptogams/. ++@ ++@ Permission to use under GPL terms is granted. ++@ ==================================================================== ++ ++@ SHA256 block procedure for ARMv4. May 2007. ++ ++@ Performance is ~2x better than gcc 3.4 generated code and in "abso- ++@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per ++@ byte [on single-issue Xscale PXA250 core]. ++ ++@ July 2010. ++@ ++@ Rescheduling for dual-issue pipeline resulted in 22% improvement on ++@ Cortex A8 core and ~20 cycles per processed byte. ++ ++@ February 2011. ++@ ++@ Profiler-assisted and platform-specific optimization resulted in 16% ++@ improvement on Cortex A8 core and ~15.4 cycles per processed byte. ++ ++@ September 2013. ++@ ++@ Add NEON implementation. On Cortex A8 it was measured to process one ++@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon ++@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only ++@ code (meaning that latter performs sub-optimally, nothing was done ++@ about it). ++ ++@ May 2014. ++@ ++@ Add ARMv8 code path performing at 2.0 cpb on Apple A7. ++ ++#ifndef __KERNEL__ ++# include ++#else ++# define __ARM_ARCH__ __LINUX_ARM_ARCH__ ++# define __ARM_MAX_ARCH__ 7 ++#endif ++ ++@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both ++@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those ++@ instructions are manually-encoded. (See unsha256.) ++ ++ ++.text ++#if defined(__thumb2__) ++.syntax unified ++.thumb ++#else ++.code 32 ++#endif ++ ++ ++.align 5 ++K256: ++.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 ++.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 ++.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 ++.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 ++.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc ++.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da ++.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 ++.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 ++.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 ++.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 ++.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 ++.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 ++.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 ++.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 ++.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 ++.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 ++ ++.word 0 @ terminator ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ++LOPENSSL_armcap: ++.word OPENSSL_armcap_P-Lsha256_block_data_order ++#endif ++.align 5 ++ ++.globl _sha256_block_data_order ++.private_extern _sha256_block_data_order ++#ifdef __thumb2__ ++.thumb_func _sha256_block_data_order ++#endif ++_sha256_block_data_order: ++Lsha256_block_data_order: ++#if __ARM_ARCH__<7 && !defined(__thumb2__) ++ sub r3,pc,#8 @ _sha256_block_data_order ++#else ++ adr r3,Lsha256_block_data_order ++#endif ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ++ ldr r12,LOPENSSL_armcap ++ ldr r12,[r3,r12] @ OPENSSL_armcap_P ++#ifdef __APPLE__ ++ ldr r12,[r12] ++#endif ++ tst r12,#ARMV8_SHA256 ++ bne LARMv8 ++ tst r12,#ARMV7_NEON ++ bne LNEON ++#endif ++ add r2,r1,r2,lsl#6 @ len to point at the end of inp ++ stmdb sp!,{r0,r1,r2,r4-r11,lr} ++ ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} ++ sub r14,r3,#256+32 @ K256 ++ sub sp,sp,#16*4 @ alloca(X[16]) ++Loop: ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r5,r6 @ magic ++ eor r12,r12,r12 ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 0 ++# if 0==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r8,r8,ror#5 ++ add r4,r4,r12 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r8,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 0 ++ add r4,r4,r12 @ h+=Maj(a,b,c) from the past ++ ldrb r12,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r12,lsl#8 ++ ldrb r12,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 0==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r8,r8,ror#5 ++ orr r2,r2,r12,lsl#24 ++ eor r0,r0,r8,ror#19 @ Sigma1(e) ++#endif ++ ldr r12,[r14],#4 @ *K256++ ++ add r11,r11,r2 @ h+=X[i] ++ str r2,[sp,#0*4] ++ eor r2,r9,r10 ++ add r11,r11,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r8 ++ add r11,r11,r12 @ h+=K256[i] ++ eor r2,r2,r10 @ Ch(e,f,g) ++ eor r0,r4,r4,ror#11 ++ add r11,r11,r2 @ h+=Ch(e,f,g) ++#if 0==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 0<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r4,r5 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#2*4] @ from future BODY_16_xx ++ eor r12,r4,r5 @ a^b, b^c in next round ++ ldr r1,[sp,#15*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r4,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r7,r7,r11 @ d+=h ++ eor r3,r3,r5 @ Maj(a,b,c) ++ add r11,r11,r0,ror#2 @ h+=Sigma0(a) ++ @ add r11,r11,r3 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 1 ++# if 1==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r7,r7,ror#5 ++ add r11,r11,r3 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r7,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 1 ++ add r11,r11,r3 @ h+=Maj(a,b,c) from the past ++ ldrb r3,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r3,lsl#8 ++ ldrb r3,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 1==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r7,r7,ror#5 ++ orr r2,r2,r3,lsl#24 ++ eor r0,r0,r7,ror#19 @ Sigma1(e) ++#endif ++ ldr r3,[r14],#4 @ *K256++ ++ add r10,r10,r2 @ h+=X[i] ++ str r2,[sp,#1*4] ++ eor r2,r8,r9 ++ add r10,r10,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r7 ++ add r10,r10,r3 @ h+=K256[i] ++ eor r2,r2,r9 @ Ch(e,f,g) ++ eor r0,r11,r11,ror#11 ++ add r10,r10,r2 @ h+=Ch(e,f,g) ++#if 1==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 1<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r11,r4 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#3*4] @ from future BODY_16_xx ++ eor r3,r11,r4 @ a^b, b^c in next round ++ ldr r1,[sp,#0*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r11,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r6,r6,r10 @ d+=h ++ eor r12,r12,r4 @ Maj(a,b,c) ++ add r10,r10,r0,ror#2 @ h+=Sigma0(a) ++ @ add r10,r10,r12 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 2 ++# if 2==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r6,r6,ror#5 ++ add r10,r10,r12 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r6,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 2 ++ add r10,r10,r12 @ h+=Maj(a,b,c) from the past ++ ldrb r12,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r12,lsl#8 ++ ldrb r12,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 2==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r6,r6,ror#5 ++ orr r2,r2,r12,lsl#24 ++ eor r0,r0,r6,ror#19 @ Sigma1(e) ++#endif ++ ldr r12,[r14],#4 @ *K256++ ++ add r9,r9,r2 @ h+=X[i] ++ str r2,[sp,#2*4] ++ eor r2,r7,r8 ++ add r9,r9,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r6 ++ add r9,r9,r12 @ h+=K256[i] ++ eor r2,r2,r8 @ Ch(e,f,g) ++ eor r0,r10,r10,ror#11 ++ add r9,r9,r2 @ h+=Ch(e,f,g) ++#if 2==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 2<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r10,r11 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#4*4] @ from future BODY_16_xx ++ eor r12,r10,r11 @ a^b, b^c in next round ++ ldr r1,[sp,#1*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r10,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r5,r5,r9 @ d+=h ++ eor r3,r3,r11 @ Maj(a,b,c) ++ add r9,r9,r0,ror#2 @ h+=Sigma0(a) ++ @ add r9,r9,r3 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 3 ++# if 3==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r5,r5,ror#5 ++ add r9,r9,r3 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r5,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 3 ++ add r9,r9,r3 @ h+=Maj(a,b,c) from the past ++ ldrb r3,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r3,lsl#8 ++ ldrb r3,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 3==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r5,r5,ror#5 ++ orr r2,r2,r3,lsl#24 ++ eor r0,r0,r5,ror#19 @ Sigma1(e) ++#endif ++ ldr r3,[r14],#4 @ *K256++ ++ add r8,r8,r2 @ h+=X[i] ++ str r2,[sp,#3*4] ++ eor r2,r6,r7 ++ add r8,r8,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r5 ++ add r8,r8,r3 @ h+=K256[i] ++ eor r2,r2,r7 @ Ch(e,f,g) ++ eor r0,r9,r9,ror#11 ++ add r8,r8,r2 @ h+=Ch(e,f,g) ++#if 3==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 3<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r9,r10 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#5*4] @ from future BODY_16_xx ++ eor r3,r9,r10 @ a^b, b^c in next round ++ ldr r1,[sp,#2*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r9,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r4,r4,r8 @ d+=h ++ eor r12,r12,r10 @ Maj(a,b,c) ++ add r8,r8,r0,ror#2 @ h+=Sigma0(a) ++ @ add r8,r8,r12 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 4 ++# if 4==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r4,r4,ror#5 ++ add r8,r8,r12 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r4,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 4 ++ add r8,r8,r12 @ h+=Maj(a,b,c) from the past ++ ldrb r12,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r12,lsl#8 ++ ldrb r12,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 4==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r4,r4,ror#5 ++ orr r2,r2,r12,lsl#24 ++ eor r0,r0,r4,ror#19 @ Sigma1(e) ++#endif ++ ldr r12,[r14],#4 @ *K256++ ++ add r7,r7,r2 @ h+=X[i] ++ str r2,[sp,#4*4] ++ eor r2,r5,r6 ++ add r7,r7,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r4 ++ add r7,r7,r12 @ h+=K256[i] ++ eor r2,r2,r6 @ Ch(e,f,g) ++ eor r0,r8,r8,ror#11 ++ add r7,r7,r2 @ h+=Ch(e,f,g) ++#if 4==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 4<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r8,r9 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#6*4] @ from future BODY_16_xx ++ eor r12,r8,r9 @ a^b, b^c in next round ++ ldr r1,[sp,#3*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r8,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r11,r11,r7 @ d+=h ++ eor r3,r3,r9 @ Maj(a,b,c) ++ add r7,r7,r0,ror#2 @ h+=Sigma0(a) ++ @ add r7,r7,r3 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 5 ++# if 5==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r11,r11,ror#5 ++ add r7,r7,r3 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r11,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 5 ++ add r7,r7,r3 @ h+=Maj(a,b,c) from the past ++ ldrb r3,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r3,lsl#8 ++ ldrb r3,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 5==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r11,r11,ror#5 ++ orr r2,r2,r3,lsl#24 ++ eor r0,r0,r11,ror#19 @ Sigma1(e) ++#endif ++ ldr r3,[r14],#4 @ *K256++ ++ add r6,r6,r2 @ h+=X[i] ++ str r2,[sp,#5*4] ++ eor r2,r4,r5 ++ add r6,r6,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r11 ++ add r6,r6,r3 @ h+=K256[i] ++ eor r2,r2,r5 @ Ch(e,f,g) ++ eor r0,r7,r7,ror#11 ++ add r6,r6,r2 @ h+=Ch(e,f,g) ++#if 5==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 5<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r7,r8 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#7*4] @ from future BODY_16_xx ++ eor r3,r7,r8 @ a^b, b^c in next round ++ ldr r1,[sp,#4*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r7,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r10,r10,r6 @ d+=h ++ eor r12,r12,r8 @ Maj(a,b,c) ++ add r6,r6,r0,ror#2 @ h+=Sigma0(a) ++ @ add r6,r6,r12 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 6 ++# if 6==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r10,r10,ror#5 ++ add r6,r6,r12 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r10,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 6 ++ add r6,r6,r12 @ h+=Maj(a,b,c) from the past ++ ldrb r12,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r12,lsl#8 ++ ldrb r12,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 6==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r10,r10,ror#5 ++ orr r2,r2,r12,lsl#24 ++ eor r0,r0,r10,ror#19 @ Sigma1(e) ++#endif ++ ldr r12,[r14],#4 @ *K256++ ++ add r5,r5,r2 @ h+=X[i] ++ str r2,[sp,#6*4] ++ eor r2,r11,r4 ++ add r5,r5,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r10 ++ add r5,r5,r12 @ h+=K256[i] ++ eor r2,r2,r4 @ Ch(e,f,g) ++ eor r0,r6,r6,ror#11 ++ add r5,r5,r2 @ h+=Ch(e,f,g) ++#if 6==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 6<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r6,r7 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#8*4] @ from future BODY_16_xx ++ eor r12,r6,r7 @ a^b, b^c in next round ++ ldr r1,[sp,#5*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r6,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r9,r9,r5 @ d+=h ++ eor r3,r3,r7 @ Maj(a,b,c) ++ add r5,r5,r0,ror#2 @ h+=Sigma0(a) ++ @ add r5,r5,r3 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 7 ++# if 7==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r9,r9,ror#5 ++ add r5,r5,r3 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r9,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 7 ++ add r5,r5,r3 @ h+=Maj(a,b,c) from the past ++ ldrb r3,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r3,lsl#8 ++ ldrb r3,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 7==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r9,r9,ror#5 ++ orr r2,r2,r3,lsl#24 ++ eor r0,r0,r9,ror#19 @ Sigma1(e) ++#endif ++ ldr r3,[r14],#4 @ *K256++ ++ add r4,r4,r2 @ h+=X[i] ++ str r2,[sp,#7*4] ++ eor r2,r10,r11 ++ add r4,r4,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r9 ++ add r4,r4,r3 @ h+=K256[i] ++ eor r2,r2,r11 @ Ch(e,f,g) ++ eor r0,r5,r5,ror#11 ++ add r4,r4,r2 @ h+=Ch(e,f,g) ++#if 7==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 7<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r5,r6 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#9*4] @ from future BODY_16_xx ++ eor r3,r5,r6 @ a^b, b^c in next round ++ ldr r1,[sp,#6*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r5,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r8,r8,r4 @ d+=h ++ eor r12,r12,r6 @ Maj(a,b,c) ++ add r4,r4,r0,ror#2 @ h+=Sigma0(a) ++ @ add r4,r4,r12 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 8 ++# if 8==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r8,r8,ror#5 ++ add r4,r4,r12 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r8,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 8 ++ add r4,r4,r12 @ h+=Maj(a,b,c) from the past ++ ldrb r12,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r12,lsl#8 ++ ldrb r12,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 8==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r8,r8,ror#5 ++ orr r2,r2,r12,lsl#24 ++ eor r0,r0,r8,ror#19 @ Sigma1(e) ++#endif ++ ldr r12,[r14],#4 @ *K256++ ++ add r11,r11,r2 @ h+=X[i] ++ str r2,[sp,#8*4] ++ eor r2,r9,r10 ++ add r11,r11,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r8 ++ add r11,r11,r12 @ h+=K256[i] ++ eor r2,r2,r10 @ Ch(e,f,g) ++ eor r0,r4,r4,ror#11 ++ add r11,r11,r2 @ h+=Ch(e,f,g) ++#if 8==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 8<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r4,r5 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#10*4] @ from future BODY_16_xx ++ eor r12,r4,r5 @ a^b, b^c in next round ++ ldr r1,[sp,#7*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r4,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r7,r7,r11 @ d+=h ++ eor r3,r3,r5 @ Maj(a,b,c) ++ add r11,r11,r0,ror#2 @ h+=Sigma0(a) ++ @ add r11,r11,r3 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 9 ++# if 9==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r7,r7,ror#5 ++ add r11,r11,r3 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r7,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 9 ++ add r11,r11,r3 @ h+=Maj(a,b,c) from the past ++ ldrb r3,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r3,lsl#8 ++ ldrb r3,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 9==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r7,r7,ror#5 ++ orr r2,r2,r3,lsl#24 ++ eor r0,r0,r7,ror#19 @ Sigma1(e) ++#endif ++ ldr r3,[r14],#4 @ *K256++ ++ add r10,r10,r2 @ h+=X[i] ++ str r2,[sp,#9*4] ++ eor r2,r8,r9 ++ add r10,r10,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r7 ++ add r10,r10,r3 @ h+=K256[i] ++ eor r2,r2,r9 @ Ch(e,f,g) ++ eor r0,r11,r11,ror#11 ++ add r10,r10,r2 @ h+=Ch(e,f,g) ++#if 9==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 9<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r11,r4 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#11*4] @ from future BODY_16_xx ++ eor r3,r11,r4 @ a^b, b^c in next round ++ ldr r1,[sp,#8*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r11,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r6,r6,r10 @ d+=h ++ eor r12,r12,r4 @ Maj(a,b,c) ++ add r10,r10,r0,ror#2 @ h+=Sigma0(a) ++ @ add r10,r10,r12 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 10 ++# if 10==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r6,r6,ror#5 ++ add r10,r10,r12 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r6,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 10 ++ add r10,r10,r12 @ h+=Maj(a,b,c) from the past ++ ldrb r12,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r12,lsl#8 ++ ldrb r12,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 10==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r6,r6,ror#5 ++ orr r2,r2,r12,lsl#24 ++ eor r0,r0,r6,ror#19 @ Sigma1(e) ++#endif ++ ldr r12,[r14],#4 @ *K256++ ++ add r9,r9,r2 @ h+=X[i] ++ str r2,[sp,#10*4] ++ eor r2,r7,r8 ++ add r9,r9,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r6 ++ add r9,r9,r12 @ h+=K256[i] ++ eor r2,r2,r8 @ Ch(e,f,g) ++ eor r0,r10,r10,ror#11 ++ add r9,r9,r2 @ h+=Ch(e,f,g) ++#if 10==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 10<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r10,r11 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#12*4] @ from future BODY_16_xx ++ eor r12,r10,r11 @ a^b, b^c in next round ++ ldr r1,[sp,#9*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r10,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r5,r5,r9 @ d+=h ++ eor r3,r3,r11 @ Maj(a,b,c) ++ add r9,r9,r0,ror#2 @ h+=Sigma0(a) ++ @ add r9,r9,r3 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 11 ++# if 11==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r5,r5,ror#5 ++ add r9,r9,r3 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r5,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 11 ++ add r9,r9,r3 @ h+=Maj(a,b,c) from the past ++ ldrb r3,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r3,lsl#8 ++ ldrb r3,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 11==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r5,r5,ror#5 ++ orr r2,r2,r3,lsl#24 ++ eor r0,r0,r5,ror#19 @ Sigma1(e) ++#endif ++ ldr r3,[r14],#4 @ *K256++ ++ add r8,r8,r2 @ h+=X[i] ++ str r2,[sp,#11*4] ++ eor r2,r6,r7 ++ add r8,r8,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r5 ++ add r8,r8,r3 @ h+=K256[i] ++ eor r2,r2,r7 @ Ch(e,f,g) ++ eor r0,r9,r9,ror#11 ++ add r8,r8,r2 @ h+=Ch(e,f,g) ++#if 11==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 11<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r9,r10 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#13*4] @ from future BODY_16_xx ++ eor r3,r9,r10 @ a^b, b^c in next round ++ ldr r1,[sp,#10*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r9,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r4,r4,r8 @ d+=h ++ eor r12,r12,r10 @ Maj(a,b,c) ++ add r8,r8,r0,ror#2 @ h+=Sigma0(a) ++ @ add r8,r8,r12 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 12 ++# if 12==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r4,r4,ror#5 ++ add r8,r8,r12 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r4,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 12 ++ add r8,r8,r12 @ h+=Maj(a,b,c) from the past ++ ldrb r12,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r12,lsl#8 ++ ldrb r12,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 12==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r4,r4,ror#5 ++ orr r2,r2,r12,lsl#24 ++ eor r0,r0,r4,ror#19 @ Sigma1(e) ++#endif ++ ldr r12,[r14],#4 @ *K256++ ++ add r7,r7,r2 @ h+=X[i] ++ str r2,[sp,#12*4] ++ eor r2,r5,r6 ++ add r7,r7,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r4 ++ add r7,r7,r12 @ h+=K256[i] ++ eor r2,r2,r6 @ Ch(e,f,g) ++ eor r0,r8,r8,ror#11 ++ add r7,r7,r2 @ h+=Ch(e,f,g) ++#if 12==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 12<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r8,r9 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#14*4] @ from future BODY_16_xx ++ eor r12,r8,r9 @ a^b, b^c in next round ++ ldr r1,[sp,#11*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r8,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r11,r11,r7 @ d+=h ++ eor r3,r3,r9 @ Maj(a,b,c) ++ add r7,r7,r0,ror#2 @ h+=Sigma0(a) ++ @ add r7,r7,r3 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 13 ++# if 13==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r11,r11,ror#5 ++ add r7,r7,r3 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r11,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 13 ++ add r7,r7,r3 @ h+=Maj(a,b,c) from the past ++ ldrb r3,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r3,lsl#8 ++ ldrb r3,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 13==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r11,r11,ror#5 ++ orr r2,r2,r3,lsl#24 ++ eor r0,r0,r11,ror#19 @ Sigma1(e) ++#endif ++ ldr r3,[r14],#4 @ *K256++ ++ add r6,r6,r2 @ h+=X[i] ++ str r2,[sp,#13*4] ++ eor r2,r4,r5 ++ add r6,r6,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r11 ++ add r6,r6,r3 @ h+=K256[i] ++ eor r2,r2,r5 @ Ch(e,f,g) ++ eor r0,r7,r7,ror#11 ++ add r6,r6,r2 @ h+=Ch(e,f,g) ++#if 13==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 13<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r7,r8 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#15*4] @ from future BODY_16_xx ++ eor r3,r7,r8 @ a^b, b^c in next round ++ ldr r1,[sp,#12*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r7,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r10,r10,r6 @ d+=h ++ eor r12,r12,r8 @ Maj(a,b,c) ++ add r6,r6,r0,ror#2 @ h+=Sigma0(a) ++ @ add r6,r6,r12 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 14 ++# if 14==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r10,r10,ror#5 ++ add r6,r6,r12 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r10,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 14 ++ add r6,r6,r12 @ h+=Maj(a,b,c) from the past ++ ldrb r12,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r12,lsl#8 ++ ldrb r12,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 14==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r10,r10,ror#5 ++ orr r2,r2,r12,lsl#24 ++ eor r0,r0,r10,ror#19 @ Sigma1(e) ++#endif ++ ldr r12,[r14],#4 @ *K256++ ++ add r5,r5,r2 @ h+=X[i] ++ str r2,[sp,#14*4] ++ eor r2,r11,r4 ++ add r5,r5,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r10 ++ add r5,r5,r12 @ h+=K256[i] ++ eor r2,r2,r4 @ Ch(e,f,g) ++ eor r0,r6,r6,ror#11 ++ add r5,r5,r2 @ h+=Ch(e,f,g) ++#if 14==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 14<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r6,r7 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#0*4] @ from future BODY_16_xx ++ eor r12,r6,r7 @ a^b, b^c in next round ++ ldr r1,[sp,#13*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r6,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r9,r9,r5 @ d+=h ++ eor r3,r3,r7 @ Maj(a,b,c) ++ add r5,r5,r0,ror#2 @ h+=Sigma0(a) ++ @ add r5,r5,r3 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ @ ldr r2,[r1],#4 @ 15 ++# if 15==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r9,r9,ror#5 ++ add r5,r5,r3 @ h+=Maj(a,b,c) from the past ++ eor r0,r0,r9,ror#19 @ Sigma1(e) ++# ifndef __ARMEB__ ++ rev r2,r2 ++# endif ++#else ++ @ ldrb r2,[r1,#3] @ 15 ++ add r5,r5,r3 @ h+=Maj(a,b,c) from the past ++ ldrb r3,[r1,#2] ++ ldrb r0,[r1,#1] ++ orr r2,r2,r3,lsl#8 ++ ldrb r3,[r1],#4 ++ orr r2,r2,r0,lsl#16 ++# if 15==15 ++ str r1,[sp,#17*4] @ make room for r1 ++# endif ++ eor r0,r9,r9,ror#5 ++ orr r2,r2,r3,lsl#24 ++ eor r0,r0,r9,ror#19 @ Sigma1(e) ++#endif ++ ldr r3,[r14],#4 @ *K256++ ++ add r4,r4,r2 @ h+=X[i] ++ str r2,[sp,#15*4] ++ eor r2,r10,r11 ++ add r4,r4,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r9 ++ add r4,r4,r3 @ h+=K256[i] ++ eor r2,r2,r11 @ Ch(e,f,g) ++ eor r0,r5,r5,ror#11 ++ add r4,r4,r2 @ h+=Ch(e,f,g) ++#if 15==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 15<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r5,r6 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#1*4] @ from future BODY_16_xx ++ eor r3,r5,r6 @ a^b, b^c in next round ++ ldr r1,[sp,#14*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r5,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r8,r8,r4 @ d+=h ++ eor r12,r12,r6 @ Maj(a,b,c) ++ add r4,r4,r0,ror#2 @ h+=Sigma0(a) ++ @ add r4,r4,r12 @ h+=Maj(a,b,c) ++Lrounds_16_xx: ++ @ ldr r2,[sp,#1*4] @ 16 ++ @ ldr r1,[sp,#14*4] ++ mov r0,r2,ror#7 ++ add r4,r4,r12 @ h+=Maj(a,b,c) from the past ++ mov r12,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r12,r12,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#0*4] ++ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#9*4] ++ ++ add r12,r12,r0 ++ eor r0,r8,r8,ror#5 @ from BODY_00_15 ++ add r2,r2,r12 ++ eor r0,r0,r8,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r12,[r14],#4 @ *K256++ ++ add r11,r11,r2 @ h+=X[i] ++ str r2,[sp,#0*4] ++ eor r2,r9,r10 ++ add r11,r11,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r8 ++ add r11,r11,r12 @ h+=K256[i] ++ eor r2,r2,r10 @ Ch(e,f,g) ++ eor r0,r4,r4,ror#11 ++ add r11,r11,r2 @ h+=Ch(e,f,g) ++#if 16==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 16<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r4,r5 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#2*4] @ from future BODY_16_xx ++ eor r12,r4,r5 @ a^b, b^c in next round ++ ldr r1,[sp,#15*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r4,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r7,r7,r11 @ d+=h ++ eor r3,r3,r5 @ Maj(a,b,c) ++ add r11,r11,r0,ror#2 @ h+=Sigma0(a) ++ @ add r11,r11,r3 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#2*4] @ 17 ++ @ ldr r1,[sp,#15*4] ++ mov r0,r2,ror#7 ++ add r11,r11,r3 @ h+=Maj(a,b,c) from the past ++ mov r3,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r3,r3,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#1*4] ++ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#10*4] ++ ++ add r3,r3,r0 ++ eor r0,r7,r7,ror#5 @ from BODY_00_15 ++ add r2,r2,r3 ++ eor r0,r0,r7,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r3,[r14],#4 @ *K256++ ++ add r10,r10,r2 @ h+=X[i] ++ str r2,[sp,#1*4] ++ eor r2,r8,r9 ++ add r10,r10,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r7 ++ add r10,r10,r3 @ h+=K256[i] ++ eor r2,r2,r9 @ Ch(e,f,g) ++ eor r0,r11,r11,ror#11 ++ add r10,r10,r2 @ h+=Ch(e,f,g) ++#if 17==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 17<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r11,r4 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#3*4] @ from future BODY_16_xx ++ eor r3,r11,r4 @ a^b, b^c in next round ++ ldr r1,[sp,#0*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r11,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r6,r6,r10 @ d+=h ++ eor r12,r12,r4 @ Maj(a,b,c) ++ add r10,r10,r0,ror#2 @ h+=Sigma0(a) ++ @ add r10,r10,r12 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#3*4] @ 18 ++ @ ldr r1,[sp,#0*4] ++ mov r0,r2,ror#7 ++ add r10,r10,r12 @ h+=Maj(a,b,c) from the past ++ mov r12,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r12,r12,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#2*4] ++ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#11*4] ++ ++ add r12,r12,r0 ++ eor r0,r6,r6,ror#5 @ from BODY_00_15 ++ add r2,r2,r12 ++ eor r0,r0,r6,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r12,[r14],#4 @ *K256++ ++ add r9,r9,r2 @ h+=X[i] ++ str r2,[sp,#2*4] ++ eor r2,r7,r8 ++ add r9,r9,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r6 ++ add r9,r9,r12 @ h+=K256[i] ++ eor r2,r2,r8 @ Ch(e,f,g) ++ eor r0,r10,r10,ror#11 ++ add r9,r9,r2 @ h+=Ch(e,f,g) ++#if 18==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 18<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r10,r11 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#4*4] @ from future BODY_16_xx ++ eor r12,r10,r11 @ a^b, b^c in next round ++ ldr r1,[sp,#1*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r10,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r5,r5,r9 @ d+=h ++ eor r3,r3,r11 @ Maj(a,b,c) ++ add r9,r9,r0,ror#2 @ h+=Sigma0(a) ++ @ add r9,r9,r3 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#4*4] @ 19 ++ @ ldr r1,[sp,#1*4] ++ mov r0,r2,ror#7 ++ add r9,r9,r3 @ h+=Maj(a,b,c) from the past ++ mov r3,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r3,r3,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#3*4] ++ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#12*4] ++ ++ add r3,r3,r0 ++ eor r0,r5,r5,ror#5 @ from BODY_00_15 ++ add r2,r2,r3 ++ eor r0,r0,r5,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r3,[r14],#4 @ *K256++ ++ add r8,r8,r2 @ h+=X[i] ++ str r2,[sp,#3*4] ++ eor r2,r6,r7 ++ add r8,r8,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r5 ++ add r8,r8,r3 @ h+=K256[i] ++ eor r2,r2,r7 @ Ch(e,f,g) ++ eor r0,r9,r9,ror#11 ++ add r8,r8,r2 @ h+=Ch(e,f,g) ++#if 19==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 19<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r9,r10 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#5*4] @ from future BODY_16_xx ++ eor r3,r9,r10 @ a^b, b^c in next round ++ ldr r1,[sp,#2*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r9,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r4,r4,r8 @ d+=h ++ eor r12,r12,r10 @ Maj(a,b,c) ++ add r8,r8,r0,ror#2 @ h+=Sigma0(a) ++ @ add r8,r8,r12 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#5*4] @ 20 ++ @ ldr r1,[sp,#2*4] ++ mov r0,r2,ror#7 ++ add r8,r8,r12 @ h+=Maj(a,b,c) from the past ++ mov r12,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r12,r12,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#4*4] ++ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#13*4] ++ ++ add r12,r12,r0 ++ eor r0,r4,r4,ror#5 @ from BODY_00_15 ++ add r2,r2,r12 ++ eor r0,r0,r4,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r12,[r14],#4 @ *K256++ ++ add r7,r7,r2 @ h+=X[i] ++ str r2,[sp,#4*4] ++ eor r2,r5,r6 ++ add r7,r7,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r4 ++ add r7,r7,r12 @ h+=K256[i] ++ eor r2,r2,r6 @ Ch(e,f,g) ++ eor r0,r8,r8,ror#11 ++ add r7,r7,r2 @ h+=Ch(e,f,g) ++#if 20==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 20<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r8,r9 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#6*4] @ from future BODY_16_xx ++ eor r12,r8,r9 @ a^b, b^c in next round ++ ldr r1,[sp,#3*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r8,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r11,r11,r7 @ d+=h ++ eor r3,r3,r9 @ Maj(a,b,c) ++ add r7,r7,r0,ror#2 @ h+=Sigma0(a) ++ @ add r7,r7,r3 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#6*4] @ 21 ++ @ ldr r1,[sp,#3*4] ++ mov r0,r2,ror#7 ++ add r7,r7,r3 @ h+=Maj(a,b,c) from the past ++ mov r3,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r3,r3,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#5*4] ++ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#14*4] ++ ++ add r3,r3,r0 ++ eor r0,r11,r11,ror#5 @ from BODY_00_15 ++ add r2,r2,r3 ++ eor r0,r0,r11,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r3,[r14],#4 @ *K256++ ++ add r6,r6,r2 @ h+=X[i] ++ str r2,[sp,#5*4] ++ eor r2,r4,r5 ++ add r6,r6,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r11 ++ add r6,r6,r3 @ h+=K256[i] ++ eor r2,r2,r5 @ Ch(e,f,g) ++ eor r0,r7,r7,ror#11 ++ add r6,r6,r2 @ h+=Ch(e,f,g) ++#if 21==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 21<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r7,r8 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#7*4] @ from future BODY_16_xx ++ eor r3,r7,r8 @ a^b, b^c in next round ++ ldr r1,[sp,#4*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r7,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r10,r10,r6 @ d+=h ++ eor r12,r12,r8 @ Maj(a,b,c) ++ add r6,r6,r0,ror#2 @ h+=Sigma0(a) ++ @ add r6,r6,r12 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#7*4] @ 22 ++ @ ldr r1,[sp,#4*4] ++ mov r0,r2,ror#7 ++ add r6,r6,r12 @ h+=Maj(a,b,c) from the past ++ mov r12,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r12,r12,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#6*4] ++ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#15*4] ++ ++ add r12,r12,r0 ++ eor r0,r10,r10,ror#5 @ from BODY_00_15 ++ add r2,r2,r12 ++ eor r0,r0,r10,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r12,[r14],#4 @ *K256++ ++ add r5,r5,r2 @ h+=X[i] ++ str r2,[sp,#6*4] ++ eor r2,r11,r4 ++ add r5,r5,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r10 ++ add r5,r5,r12 @ h+=K256[i] ++ eor r2,r2,r4 @ Ch(e,f,g) ++ eor r0,r6,r6,ror#11 ++ add r5,r5,r2 @ h+=Ch(e,f,g) ++#if 22==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 22<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r6,r7 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#8*4] @ from future BODY_16_xx ++ eor r12,r6,r7 @ a^b, b^c in next round ++ ldr r1,[sp,#5*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r6,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r9,r9,r5 @ d+=h ++ eor r3,r3,r7 @ Maj(a,b,c) ++ add r5,r5,r0,ror#2 @ h+=Sigma0(a) ++ @ add r5,r5,r3 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#8*4] @ 23 ++ @ ldr r1,[sp,#5*4] ++ mov r0,r2,ror#7 ++ add r5,r5,r3 @ h+=Maj(a,b,c) from the past ++ mov r3,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r3,r3,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#7*4] ++ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#0*4] ++ ++ add r3,r3,r0 ++ eor r0,r9,r9,ror#5 @ from BODY_00_15 ++ add r2,r2,r3 ++ eor r0,r0,r9,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r3,[r14],#4 @ *K256++ ++ add r4,r4,r2 @ h+=X[i] ++ str r2,[sp,#7*4] ++ eor r2,r10,r11 ++ add r4,r4,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r9 ++ add r4,r4,r3 @ h+=K256[i] ++ eor r2,r2,r11 @ Ch(e,f,g) ++ eor r0,r5,r5,ror#11 ++ add r4,r4,r2 @ h+=Ch(e,f,g) ++#if 23==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 23<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r5,r6 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#9*4] @ from future BODY_16_xx ++ eor r3,r5,r6 @ a^b, b^c in next round ++ ldr r1,[sp,#6*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r5,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r8,r8,r4 @ d+=h ++ eor r12,r12,r6 @ Maj(a,b,c) ++ add r4,r4,r0,ror#2 @ h+=Sigma0(a) ++ @ add r4,r4,r12 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#9*4] @ 24 ++ @ ldr r1,[sp,#6*4] ++ mov r0,r2,ror#7 ++ add r4,r4,r12 @ h+=Maj(a,b,c) from the past ++ mov r12,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r12,r12,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#8*4] ++ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#1*4] ++ ++ add r12,r12,r0 ++ eor r0,r8,r8,ror#5 @ from BODY_00_15 ++ add r2,r2,r12 ++ eor r0,r0,r8,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r12,[r14],#4 @ *K256++ ++ add r11,r11,r2 @ h+=X[i] ++ str r2,[sp,#8*4] ++ eor r2,r9,r10 ++ add r11,r11,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r8 ++ add r11,r11,r12 @ h+=K256[i] ++ eor r2,r2,r10 @ Ch(e,f,g) ++ eor r0,r4,r4,ror#11 ++ add r11,r11,r2 @ h+=Ch(e,f,g) ++#if 24==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 24<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r4,r5 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#10*4] @ from future BODY_16_xx ++ eor r12,r4,r5 @ a^b, b^c in next round ++ ldr r1,[sp,#7*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r4,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r7,r7,r11 @ d+=h ++ eor r3,r3,r5 @ Maj(a,b,c) ++ add r11,r11,r0,ror#2 @ h+=Sigma0(a) ++ @ add r11,r11,r3 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#10*4] @ 25 ++ @ ldr r1,[sp,#7*4] ++ mov r0,r2,ror#7 ++ add r11,r11,r3 @ h+=Maj(a,b,c) from the past ++ mov r3,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r3,r3,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#9*4] ++ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#2*4] ++ ++ add r3,r3,r0 ++ eor r0,r7,r7,ror#5 @ from BODY_00_15 ++ add r2,r2,r3 ++ eor r0,r0,r7,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r3,[r14],#4 @ *K256++ ++ add r10,r10,r2 @ h+=X[i] ++ str r2,[sp,#9*4] ++ eor r2,r8,r9 ++ add r10,r10,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r7 ++ add r10,r10,r3 @ h+=K256[i] ++ eor r2,r2,r9 @ Ch(e,f,g) ++ eor r0,r11,r11,ror#11 ++ add r10,r10,r2 @ h+=Ch(e,f,g) ++#if 25==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 25<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r11,r4 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#11*4] @ from future BODY_16_xx ++ eor r3,r11,r4 @ a^b, b^c in next round ++ ldr r1,[sp,#8*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r11,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r6,r6,r10 @ d+=h ++ eor r12,r12,r4 @ Maj(a,b,c) ++ add r10,r10,r0,ror#2 @ h+=Sigma0(a) ++ @ add r10,r10,r12 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#11*4] @ 26 ++ @ ldr r1,[sp,#8*4] ++ mov r0,r2,ror#7 ++ add r10,r10,r12 @ h+=Maj(a,b,c) from the past ++ mov r12,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r12,r12,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#10*4] ++ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#3*4] ++ ++ add r12,r12,r0 ++ eor r0,r6,r6,ror#5 @ from BODY_00_15 ++ add r2,r2,r12 ++ eor r0,r0,r6,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r12,[r14],#4 @ *K256++ ++ add r9,r9,r2 @ h+=X[i] ++ str r2,[sp,#10*4] ++ eor r2,r7,r8 ++ add r9,r9,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r6 ++ add r9,r9,r12 @ h+=K256[i] ++ eor r2,r2,r8 @ Ch(e,f,g) ++ eor r0,r10,r10,ror#11 ++ add r9,r9,r2 @ h+=Ch(e,f,g) ++#if 26==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 26<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r10,r11 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#12*4] @ from future BODY_16_xx ++ eor r12,r10,r11 @ a^b, b^c in next round ++ ldr r1,[sp,#9*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r10,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r5,r5,r9 @ d+=h ++ eor r3,r3,r11 @ Maj(a,b,c) ++ add r9,r9,r0,ror#2 @ h+=Sigma0(a) ++ @ add r9,r9,r3 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#12*4] @ 27 ++ @ ldr r1,[sp,#9*4] ++ mov r0,r2,ror#7 ++ add r9,r9,r3 @ h+=Maj(a,b,c) from the past ++ mov r3,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r3,r3,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#11*4] ++ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#4*4] ++ ++ add r3,r3,r0 ++ eor r0,r5,r5,ror#5 @ from BODY_00_15 ++ add r2,r2,r3 ++ eor r0,r0,r5,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r3,[r14],#4 @ *K256++ ++ add r8,r8,r2 @ h+=X[i] ++ str r2,[sp,#11*4] ++ eor r2,r6,r7 ++ add r8,r8,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r5 ++ add r8,r8,r3 @ h+=K256[i] ++ eor r2,r2,r7 @ Ch(e,f,g) ++ eor r0,r9,r9,ror#11 ++ add r8,r8,r2 @ h+=Ch(e,f,g) ++#if 27==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 27<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r9,r10 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#13*4] @ from future BODY_16_xx ++ eor r3,r9,r10 @ a^b, b^c in next round ++ ldr r1,[sp,#10*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r9,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r4,r4,r8 @ d+=h ++ eor r12,r12,r10 @ Maj(a,b,c) ++ add r8,r8,r0,ror#2 @ h+=Sigma0(a) ++ @ add r8,r8,r12 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#13*4] @ 28 ++ @ ldr r1,[sp,#10*4] ++ mov r0,r2,ror#7 ++ add r8,r8,r12 @ h+=Maj(a,b,c) from the past ++ mov r12,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r12,r12,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#12*4] ++ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#5*4] ++ ++ add r12,r12,r0 ++ eor r0,r4,r4,ror#5 @ from BODY_00_15 ++ add r2,r2,r12 ++ eor r0,r0,r4,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r12,[r14],#4 @ *K256++ ++ add r7,r7,r2 @ h+=X[i] ++ str r2,[sp,#12*4] ++ eor r2,r5,r6 ++ add r7,r7,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r4 ++ add r7,r7,r12 @ h+=K256[i] ++ eor r2,r2,r6 @ Ch(e,f,g) ++ eor r0,r8,r8,ror#11 ++ add r7,r7,r2 @ h+=Ch(e,f,g) ++#if 28==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 28<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r8,r9 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#14*4] @ from future BODY_16_xx ++ eor r12,r8,r9 @ a^b, b^c in next round ++ ldr r1,[sp,#11*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r8,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r11,r11,r7 @ d+=h ++ eor r3,r3,r9 @ Maj(a,b,c) ++ add r7,r7,r0,ror#2 @ h+=Sigma0(a) ++ @ add r7,r7,r3 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#14*4] @ 29 ++ @ ldr r1,[sp,#11*4] ++ mov r0,r2,ror#7 ++ add r7,r7,r3 @ h+=Maj(a,b,c) from the past ++ mov r3,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r3,r3,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#13*4] ++ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#6*4] ++ ++ add r3,r3,r0 ++ eor r0,r11,r11,ror#5 @ from BODY_00_15 ++ add r2,r2,r3 ++ eor r0,r0,r11,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r3,[r14],#4 @ *K256++ ++ add r6,r6,r2 @ h+=X[i] ++ str r2,[sp,#13*4] ++ eor r2,r4,r5 ++ add r6,r6,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r11 ++ add r6,r6,r3 @ h+=K256[i] ++ eor r2,r2,r5 @ Ch(e,f,g) ++ eor r0,r7,r7,ror#11 ++ add r6,r6,r2 @ h+=Ch(e,f,g) ++#if 29==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 29<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r7,r8 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#15*4] @ from future BODY_16_xx ++ eor r3,r7,r8 @ a^b, b^c in next round ++ ldr r1,[sp,#12*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r7,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r10,r10,r6 @ d+=h ++ eor r12,r12,r8 @ Maj(a,b,c) ++ add r6,r6,r0,ror#2 @ h+=Sigma0(a) ++ @ add r6,r6,r12 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#15*4] @ 30 ++ @ ldr r1,[sp,#12*4] ++ mov r0,r2,ror#7 ++ add r6,r6,r12 @ h+=Maj(a,b,c) from the past ++ mov r12,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r12,r12,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#14*4] ++ eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#7*4] ++ ++ add r12,r12,r0 ++ eor r0,r10,r10,ror#5 @ from BODY_00_15 ++ add r2,r2,r12 ++ eor r0,r0,r10,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r12,[r14],#4 @ *K256++ ++ add r5,r5,r2 @ h+=X[i] ++ str r2,[sp,#14*4] ++ eor r2,r11,r4 ++ add r5,r5,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r10 ++ add r5,r5,r12 @ h+=K256[i] ++ eor r2,r2,r4 @ Ch(e,f,g) ++ eor r0,r6,r6,ror#11 ++ add r5,r5,r2 @ h+=Ch(e,f,g) ++#if 30==31 ++ and r12,r12,#0xff ++ cmp r12,#0xf2 @ done? ++#endif ++#if 30<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r12,r6,r7 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#0*4] @ from future BODY_16_xx ++ eor r12,r6,r7 @ a^b, b^c in next round ++ ldr r1,[sp,#13*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r6,ror#20 @ Sigma0(a) ++ and r3,r3,r12 @ (b^c)&=(a^b) ++ add r9,r9,r5 @ d+=h ++ eor r3,r3,r7 @ Maj(a,b,c) ++ add r5,r5,r0,ror#2 @ h+=Sigma0(a) ++ @ add r5,r5,r3 @ h+=Maj(a,b,c) ++ @ ldr r2,[sp,#0*4] @ 31 ++ @ ldr r1,[sp,#13*4] ++ mov r0,r2,ror#7 ++ add r5,r5,r3 @ h+=Maj(a,b,c) from the past ++ mov r3,r1,ror#17 ++ eor r0,r0,r2,ror#18 ++ eor r3,r3,r1,ror#19 ++ eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) ++ ldr r2,[sp,#15*4] ++ eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) ++ ldr r1,[sp,#8*4] ++ ++ add r3,r3,r0 ++ eor r0,r9,r9,ror#5 @ from BODY_00_15 ++ add r2,r2,r3 ++ eor r0,r0,r9,ror#19 @ Sigma1(e) ++ add r2,r2,r1 @ X[i] ++ ldr r3,[r14],#4 @ *K256++ ++ add r4,r4,r2 @ h+=X[i] ++ str r2,[sp,#15*4] ++ eor r2,r10,r11 ++ add r4,r4,r0,ror#6 @ h+=Sigma1(e) ++ and r2,r2,r9 ++ add r4,r4,r3 @ h+=K256[i] ++ eor r2,r2,r11 @ Ch(e,f,g) ++ eor r0,r5,r5,ror#11 ++ add r4,r4,r2 @ h+=Ch(e,f,g) ++#if 31==31 ++ and r3,r3,#0xff ++ cmp r3,#0xf2 @ done? ++#endif ++#if 31<15 ++# if __ARM_ARCH__>=7 ++ ldr r2,[r1],#4 @ prefetch ++# else ++ ldrb r2,[r1,#3] ++# endif ++ eor r3,r5,r6 @ a^b, b^c in next round ++#else ++ ldr r2,[sp,#1*4] @ from future BODY_16_xx ++ eor r3,r5,r6 @ a^b, b^c in next round ++ ldr r1,[sp,#14*4] @ from future BODY_16_xx ++#endif ++ eor r0,r0,r5,ror#20 @ Sigma0(a) ++ and r12,r12,r3 @ (b^c)&=(a^b) ++ add r8,r8,r4 @ d+=h ++ eor r12,r12,r6 @ Maj(a,b,c) ++ add r4,r4,r0,ror#2 @ h+=Sigma0(a) ++ @ add r4,r4,r12 @ h+=Maj(a,b,c) ++#if __ARM_ARCH__>=7 ++ ite eq @ Thumb2 thing, sanity check in ARM ++#endif ++ ldreq r3,[sp,#16*4] @ pull ctx ++ bne Lrounds_16_xx ++ ++ add r4,r4,r12 @ h+=Maj(a,b,c) from the past ++ ldr r0,[r3,#0] ++ ldr r2,[r3,#4] ++ ldr r12,[r3,#8] ++ add r4,r4,r0 ++ ldr r0,[r3,#12] ++ add r5,r5,r2 ++ ldr r2,[r3,#16] ++ add r6,r6,r12 ++ ldr r12,[r3,#20] ++ add r7,r7,r0 ++ ldr r0,[r3,#24] ++ add r8,r8,r2 ++ ldr r2,[r3,#28] ++ add r9,r9,r12 ++ ldr r1,[sp,#17*4] @ pull inp ++ ldr r12,[sp,#18*4] @ pull inp+len ++ add r10,r10,r0 ++ add r11,r11,r2 ++ stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} ++ cmp r1,r12 ++ sub r14,r14,#256 @ rewind Ktbl ++ bne Loop ++ ++ add sp,sp,#19*4 @ destroy frame ++#if __ARM_ARCH__>=5 ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc} ++#else ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr} ++ tst lr,#1 ++ moveq pc,lr @ be binary compatible with V4, yet ++.word 0xe12fff1e @ interoperable with Thumb ISA:-) ++#endif ++ ++#if __ARM_MAX_ARCH__>=7 ++ ++ ++ ++.globl _sha256_block_data_order_neon ++.private_extern _sha256_block_data_order_neon ++#ifdef __thumb2__ ++.thumb_func _sha256_block_data_order_neon ++#endif ++.align 5 ++.skip 16 ++_sha256_block_data_order_neon: ++LNEON: ++ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} ++ ++ sub r11,sp,#16*4+16 ++ adr r14,K256 ++ bic r11,r11,#15 @ align for 128-bit stores ++ mov r12,sp ++ mov sp,r11 @ alloca ++ add r2,r1,r2,lsl#6 @ len to point at the end of inp ++ ++ vld1.8 {q0},[r1]! ++ vld1.8 {q1},[r1]! ++ vld1.8 {q2},[r1]! ++ vld1.8 {q3},[r1]! ++ vld1.32 {q8},[r14,:128]! ++ vld1.32 {q9},[r14,:128]! ++ vld1.32 {q10},[r14,:128]! ++ vld1.32 {q11},[r14,:128]! ++ vrev32.8 q0,q0 @ yes, even on ++ str r0,[sp,#64] ++ vrev32.8 q1,q1 @ big-endian ++ str r1,[sp,#68] ++ mov r1,sp ++ vrev32.8 q2,q2 ++ str r2,[sp,#72] ++ vrev32.8 q3,q3 ++ str r12,[sp,#76] @ save original sp ++ vadd.i32 q8,q8,q0 ++ vadd.i32 q9,q9,q1 ++ vst1.32 {q8},[r1,:128]! ++ vadd.i32 q10,q10,q2 ++ vst1.32 {q9},[r1,:128]! ++ vadd.i32 q11,q11,q3 ++ vst1.32 {q10},[r1,:128]! ++ vst1.32 {q11},[r1,:128]! ++ ++ ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} ++ sub r1,r1,#64 ++ ldr r2,[sp,#0] ++ eor r12,r12,r12 ++ eor r3,r5,r6 ++ b L_00_48 ++ ++.align 4 ++L_00_48: ++ vext.8 q8,q0,q1,#4 ++ add r11,r11,r2 ++ eor r2,r9,r10 ++ eor r0,r8,r8,ror#5 ++ vext.8 q9,q2,q3,#4 ++ add r4,r4,r12 ++ and r2,r2,r8 ++ eor r12,r0,r8,ror#19 ++ vshr.u32 q10,q8,#7 ++ eor r0,r4,r4,ror#11 ++ eor r2,r2,r10 ++ vadd.i32 q0,q0,q9 ++ add r11,r11,r12,ror#6 ++ eor r12,r4,r5 ++ vshr.u32 q9,q8,#3 ++ eor r0,r0,r4,ror#20 ++ add r11,r11,r2 ++ vsli.32 q10,q8,#25 ++ ldr r2,[sp,#4] ++ and r3,r3,r12 ++ vshr.u32 q11,q8,#18 ++ add r7,r7,r11 ++ add r11,r11,r0,ror#2 ++ eor r3,r3,r5 ++ veor q9,q9,q10 ++ add r10,r10,r2 ++ vsli.32 q11,q8,#14 ++ eor r2,r8,r9 ++ eor r0,r7,r7,ror#5 ++ vshr.u32 d24,d7,#17 ++ add r11,r11,r3 ++ and r2,r2,r7 ++ veor q9,q9,q11 ++ eor r3,r0,r7,ror#19 ++ eor r0,r11,r11,ror#11 ++ vsli.32 d24,d7,#15 ++ eor r2,r2,r9 ++ add r10,r10,r3,ror#6 ++ vshr.u32 d25,d7,#10 ++ eor r3,r11,r4 ++ eor r0,r0,r11,ror#20 ++ vadd.i32 q0,q0,q9 ++ add r10,r10,r2 ++ ldr r2,[sp,#8] ++ veor d25,d25,d24 ++ and r12,r12,r3 ++ add r6,r6,r10 ++ vshr.u32 d24,d7,#19 ++ add r10,r10,r0,ror#2 ++ eor r12,r12,r4 ++ vsli.32 d24,d7,#13 ++ add r9,r9,r2 ++ eor r2,r7,r8 ++ veor d25,d25,d24 ++ eor r0,r6,r6,ror#5 ++ add r10,r10,r12 ++ vadd.i32 d0,d0,d25 ++ and r2,r2,r6 ++ eor r12,r0,r6,ror#19 ++ vshr.u32 d24,d0,#17 ++ eor r0,r10,r10,ror#11 ++ eor r2,r2,r8 ++ vsli.32 d24,d0,#15 ++ add r9,r9,r12,ror#6 ++ eor r12,r10,r11 ++ vshr.u32 d25,d0,#10 ++ eor r0,r0,r10,ror#20 ++ add r9,r9,r2 ++ veor d25,d25,d24 ++ ldr r2,[sp,#12] ++ and r3,r3,r12 ++ vshr.u32 d24,d0,#19 ++ add r5,r5,r9 ++ add r9,r9,r0,ror#2 ++ eor r3,r3,r11 ++ vld1.32 {q8},[r14,:128]! ++ add r8,r8,r2 ++ vsli.32 d24,d0,#13 ++ eor r2,r6,r7 ++ eor r0,r5,r5,ror#5 ++ veor d25,d25,d24 ++ add r9,r9,r3 ++ and r2,r2,r5 ++ vadd.i32 d1,d1,d25 ++ eor r3,r0,r5,ror#19 ++ eor r0,r9,r9,ror#11 ++ vadd.i32 q8,q8,q0 ++ eor r2,r2,r7 ++ add r8,r8,r3,ror#6 ++ eor r3,r9,r10 ++ eor r0,r0,r9,ror#20 ++ add r8,r8,r2 ++ ldr r2,[sp,#16] ++ and r12,r12,r3 ++ add r4,r4,r8 ++ vst1.32 {q8},[r1,:128]! ++ add r8,r8,r0,ror#2 ++ eor r12,r12,r10 ++ vext.8 q8,q1,q2,#4 ++ add r7,r7,r2 ++ eor r2,r5,r6 ++ eor r0,r4,r4,ror#5 ++ vext.8 q9,q3,q0,#4 ++ add r8,r8,r12 ++ and r2,r2,r4 ++ eor r12,r0,r4,ror#19 ++ vshr.u32 q10,q8,#7 ++ eor r0,r8,r8,ror#11 ++ eor r2,r2,r6 ++ vadd.i32 q1,q1,q9 ++ add r7,r7,r12,ror#6 ++ eor r12,r8,r9 ++ vshr.u32 q9,q8,#3 ++ eor r0,r0,r8,ror#20 ++ add r7,r7,r2 ++ vsli.32 q10,q8,#25 ++ ldr r2,[sp,#20] ++ and r3,r3,r12 ++ vshr.u32 q11,q8,#18 ++ add r11,r11,r7 ++ add r7,r7,r0,ror#2 ++ eor r3,r3,r9 ++ veor q9,q9,q10 ++ add r6,r6,r2 ++ vsli.32 q11,q8,#14 ++ eor r2,r4,r5 ++ eor r0,r11,r11,ror#5 ++ vshr.u32 d24,d1,#17 ++ add r7,r7,r3 ++ and r2,r2,r11 ++ veor q9,q9,q11 ++ eor r3,r0,r11,ror#19 ++ eor r0,r7,r7,ror#11 ++ vsli.32 d24,d1,#15 ++ eor r2,r2,r5 ++ add r6,r6,r3,ror#6 ++ vshr.u32 d25,d1,#10 ++ eor r3,r7,r8 ++ eor r0,r0,r7,ror#20 ++ vadd.i32 q1,q1,q9 ++ add r6,r6,r2 ++ ldr r2,[sp,#24] ++ veor d25,d25,d24 ++ and r12,r12,r3 ++ add r10,r10,r6 ++ vshr.u32 d24,d1,#19 ++ add r6,r6,r0,ror#2 ++ eor r12,r12,r8 ++ vsli.32 d24,d1,#13 ++ add r5,r5,r2 ++ eor r2,r11,r4 ++ veor d25,d25,d24 ++ eor r0,r10,r10,ror#5 ++ add r6,r6,r12 ++ vadd.i32 d2,d2,d25 ++ and r2,r2,r10 ++ eor r12,r0,r10,ror#19 ++ vshr.u32 d24,d2,#17 ++ eor r0,r6,r6,ror#11 ++ eor r2,r2,r4 ++ vsli.32 d24,d2,#15 ++ add r5,r5,r12,ror#6 ++ eor r12,r6,r7 ++ vshr.u32 d25,d2,#10 ++ eor r0,r0,r6,ror#20 ++ add r5,r5,r2 ++ veor d25,d25,d24 ++ ldr r2,[sp,#28] ++ and r3,r3,r12 ++ vshr.u32 d24,d2,#19 ++ add r9,r9,r5 ++ add r5,r5,r0,ror#2 ++ eor r3,r3,r7 ++ vld1.32 {q8},[r14,:128]! ++ add r4,r4,r2 ++ vsli.32 d24,d2,#13 ++ eor r2,r10,r11 ++ eor r0,r9,r9,ror#5 ++ veor d25,d25,d24 ++ add r5,r5,r3 ++ and r2,r2,r9 ++ vadd.i32 d3,d3,d25 ++ eor r3,r0,r9,ror#19 ++ eor r0,r5,r5,ror#11 ++ vadd.i32 q8,q8,q1 ++ eor r2,r2,r11 ++ add r4,r4,r3,ror#6 ++ eor r3,r5,r6 ++ eor r0,r0,r5,ror#20 ++ add r4,r4,r2 ++ ldr r2,[sp,#32] ++ and r12,r12,r3 ++ add r8,r8,r4 ++ vst1.32 {q8},[r1,:128]! ++ add r4,r4,r0,ror#2 ++ eor r12,r12,r6 ++ vext.8 q8,q2,q3,#4 ++ add r11,r11,r2 ++ eor r2,r9,r10 ++ eor r0,r8,r8,ror#5 ++ vext.8 q9,q0,q1,#4 ++ add r4,r4,r12 ++ and r2,r2,r8 ++ eor r12,r0,r8,ror#19 ++ vshr.u32 q10,q8,#7 ++ eor r0,r4,r4,ror#11 ++ eor r2,r2,r10 ++ vadd.i32 q2,q2,q9 ++ add r11,r11,r12,ror#6 ++ eor r12,r4,r5 ++ vshr.u32 q9,q8,#3 ++ eor r0,r0,r4,ror#20 ++ add r11,r11,r2 ++ vsli.32 q10,q8,#25 ++ ldr r2,[sp,#36] ++ and r3,r3,r12 ++ vshr.u32 q11,q8,#18 ++ add r7,r7,r11 ++ add r11,r11,r0,ror#2 ++ eor r3,r3,r5 ++ veor q9,q9,q10 ++ add r10,r10,r2 ++ vsli.32 q11,q8,#14 ++ eor r2,r8,r9 ++ eor r0,r7,r7,ror#5 ++ vshr.u32 d24,d3,#17 ++ add r11,r11,r3 ++ and r2,r2,r7 ++ veor q9,q9,q11 ++ eor r3,r0,r7,ror#19 ++ eor r0,r11,r11,ror#11 ++ vsli.32 d24,d3,#15 ++ eor r2,r2,r9 ++ add r10,r10,r3,ror#6 ++ vshr.u32 d25,d3,#10 ++ eor r3,r11,r4 ++ eor r0,r0,r11,ror#20 ++ vadd.i32 q2,q2,q9 ++ add r10,r10,r2 ++ ldr r2,[sp,#40] ++ veor d25,d25,d24 ++ and r12,r12,r3 ++ add r6,r6,r10 ++ vshr.u32 d24,d3,#19 ++ add r10,r10,r0,ror#2 ++ eor r12,r12,r4 ++ vsli.32 d24,d3,#13 ++ add r9,r9,r2 ++ eor r2,r7,r8 ++ veor d25,d25,d24 ++ eor r0,r6,r6,ror#5 ++ add r10,r10,r12 ++ vadd.i32 d4,d4,d25 ++ and r2,r2,r6 ++ eor r12,r0,r6,ror#19 ++ vshr.u32 d24,d4,#17 ++ eor r0,r10,r10,ror#11 ++ eor r2,r2,r8 ++ vsli.32 d24,d4,#15 ++ add r9,r9,r12,ror#6 ++ eor r12,r10,r11 ++ vshr.u32 d25,d4,#10 ++ eor r0,r0,r10,ror#20 ++ add r9,r9,r2 ++ veor d25,d25,d24 ++ ldr r2,[sp,#44] ++ and r3,r3,r12 ++ vshr.u32 d24,d4,#19 ++ add r5,r5,r9 ++ add r9,r9,r0,ror#2 ++ eor r3,r3,r11 ++ vld1.32 {q8},[r14,:128]! ++ add r8,r8,r2 ++ vsli.32 d24,d4,#13 ++ eor r2,r6,r7 ++ eor r0,r5,r5,ror#5 ++ veor d25,d25,d24 ++ add r9,r9,r3 ++ and r2,r2,r5 ++ vadd.i32 d5,d5,d25 ++ eor r3,r0,r5,ror#19 ++ eor r0,r9,r9,ror#11 ++ vadd.i32 q8,q8,q2 ++ eor r2,r2,r7 ++ add r8,r8,r3,ror#6 ++ eor r3,r9,r10 ++ eor r0,r0,r9,ror#20 ++ add r8,r8,r2 ++ ldr r2,[sp,#48] ++ and r12,r12,r3 ++ add r4,r4,r8 ++ vst1.32 {q8},[r1,:128]! ++ add r8,r8,r0,ror#2 ++ eor r12,r12,r10 ++ vext.8 q8,q3,q0,#4 ++ add r7,r7,r2 ++ eor r2,r5,r6 ++ eor r0,r4,r4,ror#5 ++ vext.8 q9,q1,q2,#4 ++ add r8,r8,r12 ++ and r2,r2,r4 ++ eor r12,r0,r4,ror#19 ++ vshr.u32 q10,q8,#7 ++ eor r0,r8,r8,ror#11 ++ eor r2,r2,r6 ++ vadd.i32 q3,q3,q9 ++ add r7,r7,r12,ror#6 ++ eor r12,r8,r9 ++ vshr.u32 q9,q8,#3 ++ eor r0,r0,r8,ror#20 ++ add r7,r7,r2 ++ vsli.32 q10,q8,#25 ++ ldr r2,[sp,#52] ++ and r3,r3,r12 ++ vshr.u32 q11,q8,#18 ++ add r11,r11,r7 ++ add r7,r7,r0,ror#2 ++ eor r3,r3,r9 ++ veor q9,q9,q10 ++ add r6,r6,r2 ++ vsli.32 q11,q8,#14 ++ eor r2,r4,r5 ++ eor r0,r11,r11,ror#5 ++ vshr.u32 d24,d5,#17 ++ add r7,r7,r3 ++ and r2,r2,r11 ++ veor q9,q9,q11 ++ eor r3,r0,r11,ror#19 ++ eor r0,r7,r7,ror#11 ++ vsli.32 d24,d5,#15 ++ eor r2,r2,r5 ++ add r6,r6,r3,ror#6 ++ vshr.u32 d25,d5,#10 ++ eor r3,r7,r8 ++ eor r0,r0,r7,ror#20 ++ vadd.i32 q3,q3,q9 ++ add r6,r6,r2 ++ ldr r2,[sp,#56] ++ veor d25,d25,d24 ++ and r12,r12,r3 ++ add r10,r10,r6 ++ vshr.u32 d24,d5,#19 ++ add r6,r6,r0,ror#2 ++ eor r12,r12,r8 ++ vsli.32 d24,d5,#13 ++ add r5,r5,r2 ++ eor r2,r11,r4 ++ veor d25,d25,d24 ++ eor r0,r10,r10,ror#5 ++ add r6,r6,r12 ++ vadd.i32 d6,d6,d25 ++ and r2,r2,r10 ++ eor r12,r0,r10,ror#19 ++ vshr.u32 d24,d6,#17 ++ eor r0,r6,r6,ror#11 ++ eor r2,r2,r4 ++ vsli.32 d24,d6,#15 ++ add r5,r5,r12,ror#6 ++ eor r12,r6,r7 ++ vshr.u32 d25,d6,#10 ++ eor r0,r0,r6,ror#20 ++ add r5,r5,r2 ++ veor d25,d25,d24 ++ ldr r2,[sp,#60] ++ and r3,r3,r12 ++ vshr.u32 d24,d6,#19 ++ add r9,r9,r5 ++ add r5,r5,r0,ror#2 ++ eor r3,r3,r7 ++ vld1.32 {q8},[r14,:128]! ++ add r4,r4,r2 ++ vsli.32 d24,d6,#13 ++ eor r2,r10,r11 ++ eor r0,r9,r9,ror#5 ++ veor d25,d25,d24 ++ add r5,r5,r3 ++ and r2,r2,r9 ++ vadd.i32 d7,d7,d25 ++ eor r3,r0,r9,ror#19 ++ eor r0,r5,r5,ror#11 ++ vadd.i32 q8,q8,q3 ++ eor r2,r2,r11 ++ add r4,r4,r3,ror#6 ++ eor r3,r5,r6 ++ eor r0,r0,r5,ror#20 ++ add r4,r4,r2 ++ ldr r2,[r14] ++ and r12,r12,r3 ++ add r8,r8,r4 ++ vst1.32 {q8},[r1,:128]! ++ add r4,r4,r0,ror#2 ++ eor r12,r12,r6 ++ teq r2,#0 @ check for K256 terminator ++ ldr r2,[sp,#0] ++ sub r1,r1,#64 ++ bne L_00_48 ++ ++ ldr r1,[sp,#68] ++ ldr r0,[sp,#72] ++ sub r14,r14,#256 @ rewind r14 ++ teq r1,r0 ++ it eq ++ subeq r1,r1,#64 @ avoid SEGV ++ vld1.8 {q0},[r1]! @ load next input block ++ vld1.8 {q1},[r1]! ++ vld1.8 {q2},[r1]! ++ vld1.8 {q3},[r1]! ++ it ne ++ strne r1,[sp,#68] ++ mov r1,sp ++ add r11,r11,r2 ++ eor r2,r9,r10 ++ eor r0,r8,r8,ror#5 ++ add r4,r4,r12 ++ vld1.32 {q8},[r14,:128]! ++ and r2,r2,r8 ++ eor r12,r0,r8,ror#19 ++ eor r0,r4,r4,ror#11 ++ eor r2,r2,r10 ++ vrev32.8 q0,q0 ++ add r11,r11,r12,ror#6 ++ eor r12,r4,r5 ++ eor r0,r0,r4,ror#20 ++ add r11,r11,r2 ++ vadd.i32 q8,q8,q0 ++ ldr r2,[sp,#4] ++ and r3,r3,r12 ++ add r7,r7,r11 ++ add r11,r11,r0,ror#2 ++ eor r3,r3,r5 ++ add r10,r10,r2 ++ eor r2,r8,r9 ++ eor r0,r7,r7,ror#5 ++ add r11,r11,r3 ++ and r2,r2,r7 ++ eor r3,r0,r7,ror#19 ++ eor r0,r11,r11,ror#11 ++ eor r2,r2,r9 ++ add r10,r10,r3,ror#6 ++ eor r3,r11,r4 ++ eor r0,r0,r11,ror#20 ++ add r10,r10,r2 ++ ldr r2,[sp,#8] ++ and r12,r12,r3 ++ add r6,r6,r10 ++ add r10,r10,r0,ror#2 ++ eor r12,r12,r4 ++ add r9,r9,r2 ++ eor r2,r7,r8 ++ eor r0,r6,r6,ror#5 ++ add r10,r10,r12 ++ and r2,r2,r6 ++ eor r12,r0,r6,ror#19 ++ eor r0,r10,r10,ror#11 ++ eor r2,r2,r8 ++ add r9,r9,r12,ror#6 ++ eor r12,r10,r11 ++ eor r0,r0,r10,ror#20 ++ add r9,r9,r2 ++ ldr r2,[sp,#12] ++ and r3,r3,r12 ++ add r5,r5,r9 ++ add r9,r9,r0,ror#2 ++ eor r3,r3,r11 ++ add r8,r8,r2 ++ eor r2,r6,r7 ++ eor r0,r5,r5,ror#5 ++ add r9,r9,r3 ++ and r2,r2,r5 ++ eor r3,r0,r5,ror#19 ++ eor r0,r9,r9,ror#11 ++ eor r2,r2,r7 ++ add r8,r8,r3,ror#6 ++ eor r3,r9,r10 ++ eor r0,r0,r9,ror#20 ++ add r8,r8,r2 ++ ldr r2,[sp,#16] ++ and r12,r12,r3 ++ add r4,r4,r8 ++ add r8,r8,r0,ror#2 ++ eor r12,r12,r10 ++ vst1.32 {q8},[r1,:128]! ++ add r7,r7,r2 ++ eor r2,r5,r6 ++ eor r0,r4,r4,ror#5 ++ add r8,r8,r12 ++ vld1.32 {q8},[r14,:128]! ++ and r2,r2,r4 ++ eor r12,r0,r4,ror#19 ++ eor r0,r8,r8,ror#11 ++ eor r2,r2,r6 ++ vrev32.8 q1,q1 ++ add r7,r7,r12,ror#6 ++ eor r12,r8,r9 ++ eor r0,r0,r8,ror#20 ++ add r7,r7,r2 ++ vadd.i32 q8,q8,q1 ++ ldr r2,[sp,#20] ++ and r3,r3,r12 ++ add r11,r11,r7 ++ add r7,r7,r0,ror#2 ++ eor r3,r3,r9 ++ add r6,r6,r2 ++ eor r2,r4,r5 ++ eor r0,r11,r11,ror#5 ++ add r7,r7,r3 ++ and r2,r2,r11 ++ eor r3,r0,r11,ror#19 ++ eor r0,r7,r7,ror#11 ++ eor r2,r2,r5 ++ add r6,r6,r3,ror#6 ++ eor r3,r7,r8 ++ eor r0,r0,r7,ror#20 ++ add r6,r6,r2 ++ ldr r2,[sp,#24] ++ and r12,r12,r3 ++ add r10,r10,r6 ++ add r6,r6,r0,ror#2 ++ eor r12,r12,r8 ++ add r5,r5,r2 ++ eor r2,r11,r4 ++ eor r0,r10,r10,ror#5 ++ add r6,r6,r12 ++ and r2,r2,r10 ++ eor r12,r0,r10,ror#19 ++ eor r0,r6,r6,ror#11 ++ eor r2,r2,r4 ++ add r5,r5,r12,ror#6 ++ eor r12,r6,r7 ++ eor r0,r0,r6,ror#20 ++ add r5,r5,r2 ++ ldr r2,[sp,#28] ++ and r3,r3,r12 ++ add r9,r9,r5 ++ add r5,r5,r0,ror#2 ++ eor r3,r3,r7 ++ add r4,r4,r2 ++ eor r2,r10,r11 ++ eor r0,r9,r9,ror#5 ++ add r5,r5,r3 ++ and r2,r2,r9 ++ eor r3,r0,r9,ror#19 ++ eor r0,r5,r5,ror#11 ++ eor r2,r2,r11 ++ add r4,r4,r3,ror#6 ++ eor r3,r5,r6 ++ eor r0,r0,r5,ror#20 ++ add r4,r4,r2 ++ ldr r2,[sp,#32] ++ and r12,r12,r3 ++ add r8,r8,r4 ++ add r4,r4,r0,ror#2 ++ eor r12,r12,r6 ++ vst1.32 {q8},[r1,:128]! ++ add r11,r11,r2 ++ eor r2,r9,r10 ++ eor r0,r8,r8,ror#5 ++ add r4,r4,r12 ++ vld1.32 {q8},[r14,:128]! ++ and r2,r2,r8 ++ eor r12,r0,r8,ror#19 ++ eor r0,r4,r4,ror#11 ++ eor r2,r2,r10 ++ vrev32.8 q2,q2 ++ add r11,r11,r12,ror#6 ++ eor r12,r4,r5 ++ eor r0,r0,r4,ror#20 ++ add r11,r11,r2 ++ vadd.i32 q8,q8,q2 ++ ldr r2,[sp,#36] ++ and r3,r3,r12 ++ add r7,r7,r11 ++ add r11,r11,r0,ror#2 ++ eor r3,r3,r5 ++ add r10,r10,r2 ++ eor r2,r8,r9 ++ eor r0,r7,r7,ror#5 ++ add r11,r11,r3 ++ and r2,r2,r7 ++ eor r3,r0,r7,ror#19 ++ eor r0,r11,r11,ror#11 ++ eor r2,r2,r9 ++ add r10,r10,r3,ror#6 ++ eor r3,r11,r4 ++ eor r0,r0,r11,ror#20 ++ add r10,r10,r2 ++ ldr r2,[sp,#40] ++ and r12,r12,r3 ++ add r6,r6,r10 ++ add r10,r10,r0,ror#2 ++ eor r12,r12,r4 ++ add r9,r9,r2 ++ eor r2,r7,r8 ++ eor r0,r6,r6,ror#5 ++ add r10,r10,r12 ++ and r2,r2,r6 ++ eor r12,r0,r6,ror#19 ++ eor r0,r10,r10,ror#11 ++ eor r2,r2,r8 ++ add r9,r9,r12,ror#6 ++ eor r12,r10,r11 ++ eor r0,r0,r10,ror#20 ++ add r9,r9,r2 ++ ldr r2,[sp,#44] ++ and r3,r3,r12 ++ add r5,r5,r9 ++ add r9,r9,r0,ror#2 ++ eor r3,r3,r11 ++ add r8,r8,r2 ++ eor r2,r6,r7 ++ eor r0,r5,r5,ror#5 ++ add r9,r9,r3 ++ and r2,r2,r5 ++ eor r3,r0,r5,ror#19 ++ eor r0,r9,r9,ror#11 ++ eor r2,r2,r7 ++ add r8,r8,r3,ror#6 ++ eor r3,r9,r10 ++ eor r0,r0,r9,ror#20 ++ add r8,r8,r2 ++ ldr r2,[sp,#48] ++ and r12,r12,r3 ++ add r4,r4,r8 ++ add r8,r8,r0,ror#2 ++ eor r12,r12,r10 ++ vst1.32 {q8},[r1,:128]! ++ add r7,r7,r2 ++ eor r2,r5,r6 ++ eor r0,r4,r4,ror#5 ++ add r8,r8,r12 ++ vld1.32 {q8},[r14,:128]! ++ and r2,r2,r4 ++ eor r12,r0,r4,ror#19 ++ eor r0,r8,r8,ror#11 ++ eor r2,r2,r6 ++ vrev32.8 q3,q3 ++ add r7,r7,r12,ror#6 ++ eor r12,r8,r9 ++ eor r0,r0,r8,ror#20 ++ add r7,r7,r2 ++ vadd.i32 q8,q8,q3 ++ ldr r2,[sp,#52] ++ and r3,r3,r12 ++ add r11,r11,r7 ++ add r7,r7,r0,ror#2 ++ eor r3,r3,r9 ++ add r6,r6,r2 ++ eor r2,r4,r5 ++ eor r0,r11,r11,ror#5 ++ add r7,r7,r3 ++ and r2,r2,r11 ++ eor r3,r0,r11,ror#19 ++ eor r0,r7,r7,ror#11 ++ eor r2,r2,r5 ++ add r6,r6,r3,ror#6 ++ eor r3,r7,r8 ++ eor r0,r0,r7,ror#20 ++ add r6,r6,r2 ++ ldr r2,[sp,#56] ++ and r12,r12,r3 ++ add r10,r10,r6 ++ add r6,r6,r0,ror#2 ++ eor r12,r12,r8 ++ add r5,r5,r2 ++ eor r2,r11,r4 ++ eor r0,r10,r10,ror#5 ++ add r6,r6,r12 ++ and r2,r2,r10 ++ eor r12,r0,r10,ror#19 ++ eor r0,r6,r6,ror#11 ++ eor r2,r2,r4 ++ add r5,r5,r12,ror#6 ++ eor r12,r6,r7 ++ eor r0,r0,r6,ror#20 ++ add r5,r5,r2 ++ ldr r2,[sp,#60] ++ and r3,r3,r12 ++ add r9,r9,r5 ++ add r5,r5,r0,ror#2 ++ eor r3,r3,r7 ++ add r4,r4,r2 ++ eor r2,r10,r11 ++ eor r0,r9,r9,ror#5 ++ add r5,r5,r3 ++ and r2,r2,r9 ++ eor r3,r0,r9,ror#19 ++ eor r0,r5,r5,ror#11 ++ eor r2,r2,r11 ++ add r4,r4,r3,ror#6 ++ eor r3,r5,r6 ++ eor r0,r0,r5,ror#20 ++ add r4,r4,r2 ++ ldr r2,[sp,#64] ++ and r12,r12,r3 ++ add r8,r8,r4 ++ add r4,r4,r0,ror#2 ++ eor r12,r12,r6 ++ vst1.32 {q8},[r1,:128]! ++ ldr r0,[r2,#0] ++ add r4,r4,r12 @ h+=Maj(a,b,c) from the past ++ ldr r12,[r2,#4] ++ ldr r3,[r2,#8] ++ ldr r1,[r2,#12] ++ add r4,r4,r0 @ accumulate ++ ldr r0,[r2,#16] ++ add r5,r5,r12 ++ ldr r12,[r2,#20] ++ add r6,r6,r3 ++ ldr r3,[r2,#24] ++ add r7,r7,r1 ++ ldr r1,[r2,#28] ++ add r8,r8,r0 ++ str r4,[r2],#4 ++ add r9,r9,r12 ++ str r5,[r2],#4 ++ add r10,r10,r3 ++ str r6,[r2],#4 ++ add r11,r11,r1 ++ str r7,[r2],#4 ++ stmia r2,{r8,r9,r10,r11} ++ ++ ittte ne ++ movne r1,sp ++ ldrne r2,[sp,#0] ++ eorne r12,r12,r12 ++ ldreq sp,[sp,#76] @ restore original sp ++ itt ne ++ eorne r3,r5,r6 ++ bne L_00_48 ++ ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} ++ ++#endif ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ++ ++# if defined(__thumb2__) ++# define INST(a,b,c,d) .byte c,d|0xc,a,b ++# else ++# define INST(a,b,c,d) .byte a,b,c,d ++# endif ++ ++#ifdef __thumb2__ ++.thumb_func sha256_block_data_order_armv8 ++#endif ++.align 5 ++sha256_block_data_order_armv8: ++LARMv8: ++ vld1.32 {q0,q1},[r0] ++ sub r3,r3,#256+32 ++ add r2,r1,r2,lsl#6 @ len to point at the end of inp ++ b Loop_v8 ++ ++.align 4 ++Loop_v8: ++ vld1.8 {q8,q9},[r1]! ++ vld1.8 {q10,q11},[r1]! ++ vld1.32 {q12},[r3]! ++ vrev32.8 q8,q8 ++ vrev32.8 q9,q9 ++ vrev32.8 q10,q10 ++ vrev32.8 q11,q11 ++ vmov q14,q0 @ offload ++ vmov q15,q1 ++ teq r1,r2 ++ vld1.32 {q13},[r3]! ++ vadd.i32 q12,q12,q8 ++ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 ++ vmov q2,q0 ++ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 ++ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 ++ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 ++ vld1.32 {q12},[r3]! ++ vadd.i32 q13,q13,q9 ++ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 ++ vmov q2,q0 ++ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 ++ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 ++ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 ++ vld1.32 {q13},[r3]! ++ vadd.i32 q12,q12,q10 ++ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 ++ vmov q2,q0 ++ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 ++ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 ++ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 ++ vld1.32 {q12},[r3]! ++ vadd.i32 q13,q13,q11 ++ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 ++ vmov q2,q0 ++ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 ++ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 ++ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 ++ vld1.32 {q13},[r3]! ++ vadd.i32 q12,q12,q8 ++ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 ++ vmov q2,q0 ++ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 ++ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 ++ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 ++ vld1.32 {q12},[r3]! ++ vadd.i32 q13,q13,q9 ++ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 ++ vmov q2,q0 ++ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 ++ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 ++ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 ++ vld1.32 {q13},[r3]! ++ vadd.i32 q12,q12,q10 ++ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 ++ vmov q2,q0 ++ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 ++ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 ++ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 ++ vld1.32 {q12},[r3]! ++ vadd.i32 q13,q13,q11 ++ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 ++ vmov q2,q0 ++ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 ++ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 ++ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 ++ vld1.32 {q13},[r3]! ++ vadd.i32 q12,q12,q8 ++ INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 ++ vmov q2,q0 ++ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 ++ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 ++ INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 ++ vld1.32 {q12},[r3]! ++ vadd.i32 q13,q13,q9 ++ INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 ++ vmov q2,q0 ++ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 ++ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 ++ INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 ++ vld1.32 {q13},[r3]! ++ vadd.i32 q12,q12,q10 ++ INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 ++ vmov q2,q0 ++ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 ++ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 ++ INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 ++ vld1.32 {q12},[r3]! ++ vadd.i32 q13,q13,q11 ++ INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 ++ vmov q2,q0 ++ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 ++ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 ++ INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 ++ vld1.32 {q13},[r3]! ++ vadd.i32 q12,q12,q8 ++ vmov q2,q0 ++ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 ++ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 ++ ++ vld1.32 {q12},[r3]! ++ vadd.i32 q13,q13,q9 ++ vmov q2,q0 ++ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 ++ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 ++ ++ vld1.32 {q13},[r3] ++ vadd.i32 q12,q12,q10 ++ sub r3,r3,#256-16 @ rewind ++ vmov q2,q0 ++ INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 ++ INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 ++ ++ vadd.i32 q13,q13,q11 ++ vmov q2,q0 ++ INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 ++ INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 ++ ++ vadd.i32 q0,q0,q14 ++ vadd.i32 q1,q1,q15 ++ it ne ++ bne Loop_v8 ++ ++ vst1.32 {q0,q1},[r0] ++ ++ bx lr @ bx lr ++ ++#endif ++.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 2 ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ++.comm _OPENSSL_armcap_P,4 ++.non_lazy_symbol_pointer ++OPENSSL_armcap_P: ++.indirect_symbol _OPENSSL_armcap_P ++.long 0 ++.private_extern _OPENSSL_armcap_P ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-arm/crypto/fipsmodule/sha512-armv4.S b/apple-arm/crypto/fipsmodule/sha512-armv4.S +new file mode 100644 +index 0000000..21913cb +--- /dev/null ++++ b/apple-arm/crypto/fipsmodule/sha512-armv4.S +@@ -0,0 +1,1899 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved. ++@ ++@ Licensed under the OpenSSL license (the "License"). You may not use ++@ this file except in compliance with the License. You can obtain a copy ++@ in the file LICENSE in the source distribution or at ++@ https://www.openssl.org/source/license.html ++ ++ ++@ ==================================================================== ++@ Written by Andy Polyakov for the OpenSSL ++@ project. The module is, however, dual licensed under OpenSSL and ++@ CRYPTOGAMS licenses depending on where you obtain it. For further ++@ details see http://www.openssl.org/~appro/cryptogams/. ++@ ++@ Permission to use under GPL terms is granted. ++@ ==================================================================== ++ ++@ SHA512 block procedure for ARMv4. September 2007. ++ ++@ This code is ~4.5 (four and a half) times faster than code generated ++@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue ++@ Xscale PXA250 core]. ++@ ++@ July 2010. ++@ ++@ Rescheduling for dual-issue pipeline resulted in 6% improvement on ++@ Cortex A8 core and ~40 cycles per processed byte. ++ ++@ February 2011. ++@ ++@ Profiler-assisted and platform-specific optimization resulted in 7% ++@ improvement on Coxtex A8 core and ~38 cycles per byte. ++ ++@ March 2011. ++@ ++@ Add NEON implementation. On Cortex A8 it was measured to process ++@ one byte in 23.3 cycles or ~60% faster than integer-only code. ++ ++@ August 2012. ++@ ++@ Improve NEON performance by 12% on Snapdragon S4. In absolute ++@ terms it's 22.6 cycles per byte, which is disappointing result. ++@ Technical writers asserted that 3-way S4 pipeline can sustain ++@ multiple NEON instructions per cycle, but dual NEON issue could ++@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html ++@ for further details. On side note Cortex-A15 processes one byte in ++@ 16 cycles. ++ ++@ Byte order [in]dependence. ========================================= ++@ ++@ Originally caller was expected to maintain specific *dword* order in ++@ h[0-7], namely with most significant dword at *lower* address, which ++@ was reflected in below two parameters as 0 and 4. Now caller is ++@ expected to maintain native byte order for whole 64-bit values. ++#ifndef __KERNEL__ ++# include ++# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} ++# define VFP_ABI_POP vldmia sp!,{d8-d15} ++#else ++# define __ARM_ARCH__ __LINUX_ARM_ARCH__ ++# define __ARM_MAX_ARCH__ 7 ++# define VFP_ABI_PUSH ++# define VFP_ABI_POP ++#endif ++ ++@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both ++@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. ++ ++ ++#ifdef __ARMEL__ ++# define LO 0 ++# define HI 4 ++# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 ++#else ++# define HI 0 ++# define LO 4 ++# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 ++#endif ++ ++.text ++#if defined(__thumb2__) ++.syntax unified ++.thumb ++# define adrl adr ++#else ++.code 32 ++#endif ++ ++ ++.align 5 ++K512: ++ WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) ++ WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) ++ WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) ++ WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) ++ WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) ++ WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) ++ WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) ++ WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) ++ WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) ++ WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) ++ WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) ++ WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) ++ WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) ++ WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) ++ WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) ++ WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) ++ WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) ++ WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) ++ WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) ++ WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) ++ WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) ++ WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) ++ WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) ++ WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) ++ WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) ++ WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) ++ WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) ++ WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) ++ WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) ++ WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) ++ WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) ++ WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) ++ WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) ++ WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) ++ WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) ++ WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) ++ WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) ++ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) ++ WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) ++ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) ++ ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ++LOPENSSL_armcap: ++.word OPENSSL_armcap_P-Lsha512_block_data_order ++.skip 32-4 ++#else ++.skip 32 ++#endif ++ ++.globl _sha512_block_data_order ++.private_extern _sha512_block_data_order ++#ifdef __thumb2__ ++.thumb_func _sha512_block_data_order ++#endif ++_sha512_block_data_order: ++Lsha512_block_data_order: ++#if __ARM_ARCH__<7 && !defined(__thumb2__) ++ sub r3,pc,#8 @ _sha512_block_data_order ++#else ++ adr r3,Lsha512_block_data_order ++#endif ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ++ ldr r12,LOPENSSL_armcap ++ ldr r12,[r3,r12] @ OPENSSL_armcap_P ++#ifdef __APPLE__ ++ ldr r12,[r12] ++#endif ++ tst r12,#ARMV7_NEON ++ bne LNEON ++#endif ++ add r2,r1,r2,lsl#7 @ len to point at the end of inp ++ stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} ++ sub r14,r3,#672 @ K512 ++ sub sp,sp,#9*8 ++ ++ ldr r7,[r0,#32+LO] ++ ldr r8,[r0,#32+HI] ++ ldr r9, [r0,#48+LO] ++ ldr r10, [r0,#48+HI] ++ ldr r11, [r0,#56+LO] ++ ldr r12, [r0,#56+HI] ++Loop: ++ str r9, [sp,#48+0] ++ str r10, [sp,#48+4] ++ str r11, [sp,#56+0] ++ str r12, [sp,#56+4] ++ ldr r5,[r0,#0+LO] ++ ldr r6,[r0,#0+HI] ++ ldr r3,[r0,#8+LO] ++ ldr r4,[r0,#8+HI] ++ ldr r9, [r0,#16+LO] ++ ldr r10, [r0,#16+HI] ++ ldr r11, [r0,#24+LO] ++ ldr r12, [r0,#24+HI] ++ str r3,[sp,#8+0] ++ str r4,[sp,#8+4] ++ str r9, [sp,#16+0] ++ str r10, [sp,#16+4] ++ str r11, [sp,#24+0] ++ str r12, [sp,#24+4] ++ ldr r3,[r0,#40+LO] ++ ldr r4,[r0,#40+HI] ++ str r3,[sp,#40+0] ++ str r4,[sp,#40+4] ++ ++L00_15: ++#if __ARM_ARCH__<7 ++ ldrb r3,[r1,#7] ++ ldrb r9, [r1,#6] ++ ldrb r10, [r1,#5] ++ ldrb r11, [r1,#4] ++ ldrb r4,[r1,#3] ++ ldrb r12, [r1,#2] ++ orr r3,r3,r9,lsl#8 ++ ldrb r9, [r1,#1] ++ orr r3,r3,r10,lsl#16 ++ ldrb r10, [r1],#8 ++ orr r3,r3,r11,lsl#24 ++ orr r4,r4,r12,lsl#8 ++ orr r4,r4,r9,lsl#16 ++ orr r4,r4,r10,lsl#24 ++#else ++ ldr r3,[r1,#4] ++ ldr r4,[r1],#8 ++#ifdef __ARMEL__ ++ rev r3,r3 ++ rev r4,r4 ++#endif ++#endif ++ @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) ++ @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 ++ @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 ++ mov r9,r7,lsr#14 ++ str r3,[sp,#64+0] ++ mov r10,r8,lsr#14 ++ str r4,[sp,#64+4] ++ eor r9,r9,r8,lsl#18 ++ ldr r11,[sp,#56+0] @ h.lo ++ eor r10,r10,r7,lsl#18 ++ ldr r12,[sp,#56+4] @ h.hi ++ eor r9,r9,r7,lsr#18 ++ eor r10,r10,r8,lsr#18 ++ eor r9,r9,r8,lsl#14 ++ eor r10,r10,r7,lsl#14 ++ eor r9,r9,r8,lsr#9 ++ eor r10,r10,r7,lsr#9 ++ eor r9,r9,r7,lsl#23 ++ eor r10,r10,r8,lsl#23 @ Sigma1(e) ++ adds r3,r3,r9 ++ ldr r9,[sp,#40+0] @ f.lo ++ adc r4,r4,r10 @ T += Sigma1(e) ++ ldr r10,[sp,#40+4] @ f.hi ++ adds r3,r3,r11 ++ ldr r11,[sp,#48+0] @ g.lo ++ adc r4,r4,r12 @ T += h ++ ldr r12,[sp,#48+4] @ g.hi ++ ++ eor r9,r9,r11 ++ str r7,[sp,#32+0] ++ eor r10,r10,r12 ++ str r8,[sp,#32+4] ++ and r9,r9,r7 ++ str r5,[sp,#0+0] ++ and r10,r10,r8 ++ str r6,[sp,#0+4] ++ eor r9,r9,r11 ++ ldr r11,[r14,#LO] @ K[i].lo ++ eor r10,r10,r12 @ Ch(e,f,g) ++ ldr r12,[r14,#HI] @ K[i].hi ++ ++ adds r3,r3,r9 ++ ldr r7,[sp,#24+0] @ d.lo ++ adc r4,r4,r10 @ T += Ch(e,f,g) ++ ldr r8,[sp,#24+4] @ d.hi ++ adds r3,r3,r11 ++ and r9,r11,#0xff ++ adc r4,r4,r12 @ T += K[i] ++ adds r7,r7,r3 ++ ldr r11,[sp,#8+0] @ b.lo ++ adc r8,r8,r4 @ d += T ++ teq r9,#148 ++ ++ ldr r12,[sp,#16+0] @ c.lo ++#if __ARM_ARCH__>=7 ++ it eq @ Thumb2 thing, sanity check in ARM ++#endif ++ orreq r14,r14,#1 ++ @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) ++ @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 ++ @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 ++ mov r9,r5,lsr#28 ++ mov r10,r6,lsr#28 ++ eor r9,r9,r6,lsl#4 ++ eor r10,r10,r5,lsl#4 ++ eor r9,r9,r6,lsr#2 ++ eor r10,r10,r5,lsr#2 ++ eor r9,r9,r5,lsl#30 ++ eor r10,r10,r6,lsl#30 ++ eor r9,r9,r6,lsr#7 ++ eor r10,r10,r5,lsr#7 ++ eor r9,r9,r5,lsl#25 ++ eor r10,r10,r6,lsl#25 @ Sigma0(a) ++ adds r3,r3,r9 ++ and r9,r5,r11 ++ adc r4,r4,r10 @ T += Sigma0(a) ++ ++ ldr r10,[sp,#8+4] @ b.hi ++ orr r5,r5,r11 ++ ldr r11,[sp,#16+4] @ c.hi ++ and r5,r5,r12 ++ and r12,r6,r10 ++ orr r6,r6,r10 ++ orr r5,r5,r9 @ Maj(a,b,c).lo ++ and r6,r6,r11 ++ adds r5,r5,r3 ++ orr r6,r6,r12 @ Maj(a,b,c).hi ++ sub sp,sp,#8 ++ adc r6,r6,r4 @ h += T ++ tst r14,#1 ++ add r14,r14,#8 ++ tst r14,#1 ++ beq L00_15 ++ ldr r9,[sp,#184+0] ++ ldr r10,[sp,#184+4] ++ bic r14,r14,#1 ++L16_79: ++ @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) ++ @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 ++ @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 ++ mov r3,r9,lsr#1 ++ ldr r11,[sp,#80+0] ++ mov r4,r10,lsr#1 ++ ldr r12,[sp,#80+4] ++ eor r3,r3,r10,lsl#31 ++ eor r4,r4,r9,lsl#31 ++ eor r3,r3,r9,lsr#8 ++ eor r4,r4,r10,lsr#8 ++ eor r3,r3,r10,lsl#24 ++ eor r4,r4,r9,lsl#24 ++ eor r3,r3,r9,lsr#7 ++ eor r4,r4,r10,lsr#7 ++ eor r3,r3,r10,lsl#25 ++ ++ @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) ++ @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 ++ @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 ++ mov r9,r11,lsr#19 ++ mov r10,r12,lsr#19 ++ eor r9,r9,r12,lsl#13 ++ eor r10,r10,r11,lsl#13 ++ eor r9,r9,r12,lsr#29 ++ eor r10,r10,r11,lsr#29 ++ eor r9,r9,r11,lsl#3 ++ eor r10,r10,r12,lsl#3 ++ eor r9,r9,r11,lsr#6 ++ eor r10,r10,r12,lsr#6 ++ ldr r11,[sp,#120+0] ++ eor r9,r9,r12,lsl#26 ++ ++ ldr r12,[sp,#120+4] ++ adds r3,r3,r9 ++ ldr r9,[sp,#192+0] ++ adc r4,r4,r10 ++ ++ ldr r10,[sp,#192+4] ++ adds r3,r3,r11 ++ adc r4,r4,r12 ++ adds r3,r3,r9 ++ adc r4,r4,r10 ++ @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) ++ @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 ++ @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 ++ mov r9,r7,lsr#14 ++ str r3,[sp,#64+0] ++ mov r10,r8,lsr#14 ++ str r4,[sp,#64+4] ++ eor r9,r9,r8,lsl#18 ++ ldr r11,[sp,#56+0] @ h.lo ++ eor r10,r10,r7,lsl#18 ++ ldr r12,[sp,#56+4] @ h.hi ++ eor r9,r9,r7,lsr#18 ++ eor r10,r10,r8,lsr#18 ++ eor r9,r9,r8,lsl#14 ++ eor r10,r10,r7,lsl#14 ++ eor r9,r9,r8,lsr#9 ++ eor r10,r10,r7,lsr#9 ++ eor r9,r9,r7,lsl#23 ++ eor r10,r10,r8,lsl#23 @ Sigma1(e) ++ adds r3,r3,r9 ++ ldr r9,[sp,#40+0] @ f.lo ++ adc r4,r4,r10 @ T += Sigma1(e) ++ ldr r10,[sp,#40+4] @ f.hi ++ adds r3,r3,r11 ++ ldr r11,[sp,#48+0] @ g.lo ++ adc r4,r4,r12 @ T += h ++ ldr r12,[sp,#48+4] @ g.hi ++ ++ eor r9,r9,r11 ++ str r7,[sp,#32+0] ++ eor r10,r10,r12 ++ str r8,[sp,#32+4] ++ and r9,r9,r7 ++ str r5,[sp,#0+0] ++ and r10,r10,r8 ++ str r6,[sp,#0+4] ++ eor r9,r9,r11 ++ ldr r11,[r14,#LO] @ K[i].lo ++ eor r10,r10,r12 @ Ch(e,f,g) ++ ldr r12,[r14,#HI] @ K[i].hi ++ ++ adds r3,r3,r9 ++ ldr r7,[sp,#24+0] @ d.lo ++ adc r4,r4,r10 @ T += Ch(e,f,g) ++ ldr r8,[sp,#24+4] @ d.hi ++ adds r3,r3,r11 ++ and r9,r11,#0xff ++ adc r4,r4,r12 @ T += K[i] ++ adds r7,r7,r3 ++ ldr r11,[sp,#8+0] @ b.lo ++ adc r8,r8,r4 @ d += T ++ teq r9,#23 ++ ++ ldr r12,[sp,#16+0] @ c.lo ++#if __ARM_ARCH__>=7 ++ it eq @ Thumb2 thing, sanity check in ARM ++#endif ++ orreq r14,r14,#1 ++ @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) ++ @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 ++ @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 ++ mov r9,r5,lsr#28 ++ mov r10,r6,lsr#28 ++ eor r9,r9,r6,lsl#4 ++ eor r10,r10,r5,lsl#4 ++ eor r9,r9,r6,lsr#2 ++ eor r10,r10,r5,lsr#2 ++ eor r9,r9,r5,lsl#30 ++ eor r10,r10,r6,lsl#30 ++ eor r9,r9,r6,lsr#7 ++ eor r10,r10,r5,lsr#7 ++ eor r9,r9,r5,lsl#25 ++ eor r10,r10,r6,lsl#25 @ Sigma0(a) ++ adds r3,r3,r9 ++ and r9,r5,r11 ++ adc r4,r4,r10 @ T += Sigma0(a) ++ ++ ldr r10,[sp,#8+4] @ b.hi ++ orr r5,r5,r11 ++ ldr r11,[sp,#16+4] @ c.hi ++ and r5,r5,r12 ++ and r12,r6,r10 ++ orr r6,r6,r10 ++ orr r5,r5,r9 @ Maj(a,b,c).lo ++ and r6,r6,r11 ++ adds r5,r5,r3 ++ orr r6,r6,r12 @ Maj(a,b,c).hi ++ sub sp,sp,#8 ++ adc r6,r6,r4 @ h += T ++ tst r14,#1 ++ add r14,r14,#8 ++#if __ARM_ARCH__>=7 ++ ittt eq @ Thumb2 thing, sanity check in ARM ++#endif ++ ldreq r9,[sp,#184+0] ++ ldreq r10,[sp,#184+4] ++ beq L16_79 ++ bic r14,r14,#1 ++ ++ ldr r3,[sp,#8+0] ++ ldr r4,[sp,#8+4] ++ ldr r9, [r0,#0+LO] ++ ldr r10, [r0,#0+HI] ++ ldr r11, [r0,#8+LO] ++ ldr r12, [r0,#8+HI] ++ adds r9,r5,r9 ++ str r9, [r0,#0+LO] ++ adc r10,r6,r10 ++ str r10, [r0,#0+HI] ++ adds r11,r3,r11 ++ str r11, [r0,#8+LO] ++ adc r12,r4,r12 ++ str r12, [r0,#8+HI] ++ ++ ldr r5,[sp,#16+0] ++ ldr r6,[sp,#16+4] ++ ldr r3,[sp,#24+0] ++ ldr r4,[sp,#24+4] ++ ldr r9, [r0,#16+LO] ++ ldr r10, [r0,#16+HI] ++ ldr r11, [r0,#24+LO] ++ ldr r12, [r0,#24+HI] ++ adds r9,r5,r9 ++ str r9, [r0,#16+LO] ++ adc r10,r6,r10 ++ str r10, [r0,#16+HI] ++ adds r11,r3,r11 ++ str r11, [r0,#24+LO] ++ adc r12,r4,r12 ++ str r12, [r0,#24+HI] ++ ++ ldr r3,[sp,#40+0] ++ ldr r4,[sp,#40+4] ++ ldr r9, [r0,#32+LO] ++ ldr r10, [r0,#32+HI] ++ ldr r11, [r0,#40+LO] ++ ldr r12, [r0,#40+HI] ++ adds r7,r7,r9 ++ str r7,[r0,#32+LO] ++ adc r8,r8,r10 ++ str r8,[r0,#32+HI] ++ adds r11,r3,r11 ++ str r11, [r0,#40+LO] ++ adc r12,r4,r12 ++ str r12, [r0,#40+HI] ++ ++ ldr r5,[sp,#48+0] ++ ldr r6,[sp,#48+4] ++ ldr r3,[sp,#56+0] ++ ldr r4,[sp,#56+4] ++ ldr r9, [r0,#48+LO] ++ ldr r10, [r0,#48+HI] ++ ldr r11, [r0,#56+LO] ++ ldr r12, [r0,#56+HI] ++ adds r9,r5,r9 ++ str r9, [r0,#48+LO] ++ adc r10,r6,r10 ++ str r10, [r0,#48+HI] ++ adds r11,r3,r11 ++ str r11, [r0,#56+LO] ++ adc r12,r4,r12 ++ str r12, [r0,#56+HI] ++ ++ add sp,sp,#640 ++ sub r14,r14,#640 ++ ++ teq r1,r2 ++ bne Loop ++ ++ add sp,sp,#8*9 @ destroy frame ++#if __ARM_ARCH__>=5 ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} ++#else ++ ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} ++ tst lr,#1 ++ moveq pc,lr @ be binary compatible with V4, yet ++.word 0xe12fff1e @ interoperable with Thumb ISA:-) ++#endif ++ ++#if __ARM_MAX_ARCH__>=7 ++ ++ ++ ++.globl _sha512_block_data_order_neon ++.private_extern _sha512_block_data_order_neon ++#ifdef __thumb2__ ++.thumb_func _sha512_block_data_order_neon ++#endif ++.align 4 ++_sha512_block_data_order_neon: ++LNEON: ++ dmb @ errata #451034 on early Cortex A8 ++ add r2,r1,r2,lsl#7 @ len to point at the end of inp ++ adr r3,K512 ++ VFP_ABI_PUSH ++ vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context ++Loop_neon: ++ vshr.u64 d24,d20,#14 @ 0 ++#if 0<16 ++ vld1.64 {d0},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d20,#18 ++#if 0>0 ++ vadd.i64 d16,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d20,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d20,#50 ++ vsli.64 d25,d20,#46 ++ vmov d29,d20 ++ vsli.64 d26,d20,#23 ++#if 0<16 && defined(__ARMEL__) ++ vrev64.8 d0,d0 ++#endif ++ veor d25,d24 ++ vbsl d29,d21,d22 @ Ch(e,f,g) ++ vshr.u64 d24,d16,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d23 ++ vshr.u64 d25,d16,#34 ++ vsli.64 d24,d16,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d16,#39 ++ vadd.i64 d28,d0 ++ vsli.64 d25,d16,#30 ++ veor d30,d16,d17 ++ vsli.64 d26,d16,#25 ++ veor d23,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d18,d17 @ Maj(a,b,c) ++ veor d23,d26 @ Sigma0(a) ++ vadd.i64 d19,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d23,d30 ++ vshr.u64 d24,d19,#14 @ 1 ++#if 1<16 ++ vld1.64 {d1},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d19,#18 ++#if 1>0 ++ vadd.i64 d23,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d19,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d19,#50 ++ vsli.64 d25,d19,#46 ++ vmov d29,d19 ++ vsli.64 d26,d19,#23 ++#if 1<16 && defined(__ARMEL__) ++ vrev64.8 d1,d1 ++#endif ++ veor d25,d24 ++ vbsl d29,d20,d21 @ Ch(e,f,g) ++ vshr.u64 d24,d23,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d22 ++ vshr.u64 d25,d23,#34 ++ vsli.64 d24,d23,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d23,#39 ++ vadd.i64 d28,d1 ++ vsli.64 d25,d23,#30 ++ veor d30,d23,d16 ++ vsli.64 d26,d23,#25 ++ veor d22,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d17,d16 @ Maj(a,b,c) ++ veor d22,d26 @ Sigma0(a) ++ vadd.i64 d18,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d22,d30 ++ vshr.u64 d24,d18,#14 @ 2 ++#if 2<16 ++ vld1.64 {d2},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d18,#18 ++#if 2>0 ++ vadd.i64 d22,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d18,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d18,#50 ++ vsli.64 d25,d18,#46 ++ vmov d29,d18 ++ vsli.64 d26,d18,#23 ++#if 2<16 && defined(__ARMEL__) ++ vrev64.8 d2,d2 ++#endif ++ veor d25,d24 ++ vbsl d29,d19,d20 @ Ch(e,f,g) ++ vshr.u64 d24,d22,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d21 ++ vshr.u64 d25,d22,#34 ++ vsli.64 d24,d22,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d22,#39 ++ vadd.i64 d28,d2 ++ vsli.64 d25,d22,#30 ++ veor d30,d22,d23 ++ vsli.64 d26,d22,#25 ++ veor d21,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d16,d23 @ Maj(a,b,c) ++ veor d21,d26 @ Sigma0(a) ++ vadd.i64 d17,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d21,d30 ++ vshr.u64 d24,d17,#14 @ 3 ++#if 3<16 ++ vld1.64 {d3},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d17,#18 ++#if 3>0 ++ vadd.i64 d21,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d17,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d17,#50 ++ vsli.64 d25,d17,#46 ++ vmov d29,d17 ++ vsli.64 d26,d17,#23 ++#if 3<16 && defined(__ARMEL__) ++ vrev64.8 d3,d3 ++#endif ++ veor d25,d24 ++ vbsl d29,d18,d19 @ Ch(e,f,g) ++ vshr.u64 d24,d21,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d20 ++ vshr.u64 d25,d21,#34 ++ vsli.64 d24,d21,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d21,#39 ++ vadd.i64 d28,d3 ++ vsli.64 d25,d21,#30 ++ veor d30,d21,d22 ++ vsli.64 d26,d21,#25 ++ veor d20,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d23,d22 @ Maj(a,b,c) ++ veor d20,d26 @ Sigma0(a) ++ vadd.i64 d16,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d20,d30 ++ vshr.u64 d24,d16,#14 @ 4 ++#if 4<16 ++ vld1.64 {d4},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d16,#18 ++#if 4>0 ++ vadd.i64 d20,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d16,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d16,#50 ++ vsli.64 d25,d16,#46 ++ vmov d29,d16 ++ vsli.64 d26,d16,#23 ++#if 4<16 && defined(__ARMEL__) ++ vrev64.8 d4,d4 ++#endif ++ veor d25,d24 ++ vbsl d29,d17,d18 @ Ch(e,f,g) ++ vshr.u64 d24,d20,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d19 ++ vshr.u64 d25,d20,#34 ++ vsli.64 d24,d20,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d20,#39 ++ vadd.i64 d28,d4 ++ vsli.64 d25,d20,#30 ++ veor d30,d20,d21 ++ vsli.64 d26,d20,#25 ++ veor d19,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d22,d21 @ Maj(a,b,c) ++ veor d19,d26 @ Sigma0(a) ++ vadd.i64 d23,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d19,d30 ++ vshr.u64 d24,d23,#14 @ 5 ++#if 5<16 ++ vld1.64 {d5},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d23,#18 ++#if 5>0 ++ vadd.i64 d19,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d23,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d23,#50 ++ vsli.64 d25,d23,#46 ++ vmov d29,d23 ++ vsli.64 d26,d23,#23 ++#if 5<16 && defined(__ARMEL__) ++ vrev64.8 d5,d5 ++#endif ++ veor d25,d24 ++ vbsl d29,d16,d17 @ Ch(e,f,g) ++ vshr.u64 d24,d19,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d18 ++ vshr.u64 d25,d19,#34 ++ vsli.64 d24,d19,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d19,#39 ++ vadd.i64 d28,d5 ++ vsli.64 d25,d19,#30 ++ veor d30,d19,d20 ++ vsli.64 d26,d19,#25 ++ veor d18,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d21,d20 @ Maj(a,b,c) ++ veor d18,d26 @ Sigma0(a) ++ vadd.i64 d22,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d18,d30 ++ vshr.u64 d24,d22,#14 @ 6 ++#if 6<16 ++ vld1.64 {d6},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d22,#18 ++#if 6>0 ++ vadd.i64 d18,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d22,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d22,#50 ++ vsli.64 d25,d22,#46 ++ vmov d29,d22 ++ vsli.64 d26,d22,#23 ++#if 6<16 && defined(__ARMEL__) ++ vrev64.8 d6,d6 ++#endif ++ veor d25,d24 ++ vbsl d29,d23,d16 @ Ch(e,f,g) ++ vshr.u64 d24,d18,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d17 ++ vshr.u64 d25,d18,#34 ++ vsli.64 d24,d18,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d18,#39 ++ vadd.i64 d28,d6 ++ vsli.64 d25,d18,#30 ++ veor d30,d18,d19 ++ vsli.64 d26,d18,#25 ++ veor d17,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d20,d19 @ Maj(a,b,c) ++ veor d17,d26 @ Sigma0(a) ++ vadd.i64 d21,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d17,d30 ++ vshr.u64 d24,d21,#14 @ 7 ++#if 7<16 ++ vld1.64 {d7},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d21,#18 ++#if 7>0 ++ vadd.i64 d17,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d21,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d21,#50 ++ vsli.64 d25,d21,#46 ++ vmov d29,d21 ++ vsli.64 d26,d21,#23 ++#if 7<16 && defined(__ARMEL__) ++ vrev64.8 d7,d7 ++#endif ++ veor d25,d24 ++ vbsl d29,d22,d23 @ Ch(e,f,g) ++ vshr.u64 d24,d17,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d16 ++ vshr.u64 d25,d17,#34 ++ vsli.64 d24,d17,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d17,#39 ++ vadd.i64 d28,d7 ++ vsli.64 d25,d17,#30 ++ veor d30,d17,d18 ++ vsli.64 d26,d17,#25 ++ veor d16,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d19,d18 @ Maj(a,b,c) ++ veor d16,d26 @ Sigma0(a) ++ vadd.i64 d20,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d16,d30 ++ vshr.u64 d24,d20,#14 @ 8 ++#if 8<16 ++ vld1.64 {d8},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d20,#18 ++#if 8>0 ++ vadd.i64 d16,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d20,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d20,#50 ++ vsli.64 d25,d20,#46 ++ vmov d29,d20 ++ vsli.64 d26,d20,#23 ++#if 8<16 && defined(__ARMEL__) ++ vrev64.8 d8,d8 ++#endif ++ veor d25,d24 ++ vbsl d29,d21,d22 @ Ch(e,f,g) ++ vshr.u64 d24,d16,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d23 ++ vshr.u64 d25,d16,#34 ++ vsli.64 d24,d16,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d16,#39 ++ vadd.i64 d28,d8 ++ vsli.64 d25,d16,#30 ++ veor d30,d16,d17 ++ vsli.64 d26,d16,#25 ++ veor d23,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d18,d17 @ Maj(a,b,c) ++ veor d23,d26 @ Sigma0(a) ++ vadd.i64 d19,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d23,d30 ++ vshr.u64 d24,d19,#14 @ 9 ++#if 9<16 ++ vld1.64 {d9},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d19,#18 ++#if 9>0 ++ vadd.i64 d23,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d19,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d19,#50 ++ vsli.64 d25,d19,#46 ++ vmov d29,d19 ++ vsli.64 d26,d19,#23 ++#if 9<16 && defined(__ARMEL__) ++ vrev64.8 d9,d9 ++#endif ++ veor d25,d24 ++ vbsl d29,d20,d21 @ Ch(e,f,g) ++ vshr.u64 d24,d23,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d22 ++ vshr.u64 d25,d23,#34 ++ vsli.64 d24,d23,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d23,#39 ++ vadd.i64 d28,d9 ++ vsli.64 d25,d23,#30 ++ veor d30,d23,d16 ++ vsli.64 d26,d23,#25 ++ veor d22,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d17,d16 @ Maj(a,b,c) ++ veor d22,d26 @ Sigma0(a) ++ vadd.i64 d18,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d22,d30 ++ vshr.u64 d24,d18,#14 @ 10 ++#if 10<16 ++ vld1.64 {d10},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d18,#18 ++#if 10>0 ++ vadd.i64 d22,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d18,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d18,#50 ++ vsli.64 d25,d18,#46 ++ vmov d29,d18 ++ vsli.64 d26,d18,#23 ++#if 10<16 && defined(__ARMEL__) ++ vrev64.8 d10,d10 ++#endif ++ veor d25,d24 ++ vbsl d29,d19,d20 @ Ch(e,f,g) ++ vshr.u64 d24,d22,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d21 ++ vshr.u64 d25,d22,#34 ++ vsli.64 d24,d22,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d22,#39 ++ vadd.i64 d28,d10 ++ vsli.64 d25,d22,#30 ++ veor d30,d22,d23 ++ vsli.64 d26,d22,#25 ++ veor d21,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d16,d23 @ Maj(a,b,c) ++ veor d21,d26 @ Sigma0(a) ++ vadd.i64 d17,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d21,d30 ++ vshr.u64 d24,d17,#14 @ 11 ++#if 11<16 ++ vld1.64 {d11},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d17,#18 ++#if 11>0 ++ vadd.i64 d21,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d17,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d17,#50 ++ vsli.64 d25,d17,#46 ++ vmov d29,d17 ++ vsli.64 d26,d17,#23 ++#if 11<16 && defined(__ARMEL__) ++ vrev64.8 d11,d11 ++#endif ++ veor d25,d24 ++ vbsl d29,d18,d19 @ Ch(e,f,g) ++ vshr.u64 d24,d21,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d20 ++ vshr.u64 d25,d21,#34 ++ vsli.64 d24,d21,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d21,#39 ++ vadd.i64 d28,d11 ++ vsli.64 d25,d21,#30 ++ veor d30,d21,d22 ++ vsli.64 d26,d21,#25 ++ veor d20,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d23,d22 @ Maj(a,b,c) ++ veor d20,d26 @ Sigma0(a) ++ vadd.i64 d16,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d20,d30 ++ vshr.u64 d24,d16,#14 @ 12 ++#if 12<16 ++ vld1.64 {d12},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d16,#18 ++#if 12>0 ++ vadd.i64 d20,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d16,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d16,#50 ++ vsli.64 d25,d16,#46 ++ vmov d29,d16 ++ vsli.64 d26,d16,#23 ++#if 12<16 && defined(__ARMEL__) ++ vrev64.8 d12,d12 ++#endif ++ veor d25,d24 ++ vbsl d29,d17,d18 @ Ch(e,f,g) ++ vshr.u64 d24,d20,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d19 ++ vshr.u64 d25,d20,#34 ++ vsli.64 d24,d20,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d20,#39 ++ vadd.i64 d28,d12 ++ vsli.64 d25,d20,#30 ++ veor d30,d20,d21 ++ vsli.64 d26,d20,#25 ++ veor d19,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d22,d21 @ Maj(a,b,c) ++ veor d19,d26 @ Sigma0(a) ++ vadd.i64 d23,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d19,d30 ++ vshr.u64 d24,d23,#14 @ 13 ++#if 13<16 ++ vld1.64 {d13},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d23,#18 ++#if 13>0 ++ vadd.i64 d19,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d23,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d23,#50 ++ vsli.64 d25,d23,#46 ++ vmov d29,d23 ++ vsli.64 d26,d23,#23 ++#if 13<16 && defined(__ARMEL__) ++ vrev64.8 d13,d13 ++#endif ++ veor d25,d24 ++ vbsl d29,d16,d17 @ Ch(e,f,g) ++ vshr.u64 d24,d19,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d18 ++ vshr.u64 d25,d19,#34 ++ vsli.64 d24,d19,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d19,#39 ++ vadd.i64 d28,d13 ++ vsli.64 d25,d19,#30 ++ veor d30,d19,d20 ++ vsli.64 d26,d19,#25 ++ veor d18,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d21,d20 @ Maj(a,b,c) ++ veor d18,d26 @ Sigma0(a) ++ vadd.i64 d22,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d18,d30 ++ vshr.u64 d24,d22,#14 @ 14 ++#if 14<16 ++ vld1.64 {d14},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d22,#18 ++#if 14>0 ++ vadd.i64 d18,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d22,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d22,#50 ++ vsli.64 d25,d22,#46 ++ vmov d29,d22 ++ vsli.64 d26,d22,#23 ++#if 14<16 && defined(__ARMEL__) ++ vrev64.8 d14,d14 ++#endif ++ veor d25,d24 ++ vbsl d29,d23,d16 @ Ch(e,f,g) ++ vshr.u64 d24,d18,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d17 ++ vshr.u64 d25,d18,#34 ++ vsli.64 d24,d18,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d18,#39 ++ vadd.i64 d28,d14 ++ vsli.64 d25,d18,#30 ++ veor d30,d18,d19 ++ vsli.64 d26,d18,#25 ++ veor d17,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d20,d19 @ Maj(a,b,c) ++ veor d17,d26 @ Sigma0(a) ++ vadd.i64 d21,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d17,d30 ++ vshr.u64 d24,d21,#14 @ 15 ++#if 15<16 ++ vld1.64 {d15},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d21,#18 ++#if 15>0 ++ vadd.i64 d17,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d21,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d21,#50 ++ vsli.64 d25,d21,#46 ++ vmov d29,d21 ++ vsli.64 d26,d21,#23 ++#if 15<16 && defined(__ARMEL__) ++ vrev64.8 d15,d15 ++#endif ++ veor d25,d24 ++ vbsl d29,d22,d23 @ Ch(e,f,g) ++ vshr.u64 d24,d17,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d16 ++ vshr.u64 d25,d17,#34 ++ vsli.64 d24,d17,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d17,#39 ++ vadd.i64 d28,d15 ++ vsli.64 d25,d17,#30 ++ veor d30,d17,d18 ++ vsli.64 d26,d17,#25 ++ veor d16,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d19,d18 @ Maj(a,b,c) ++ veor d16,d26 @ Sigma0(a) ++ vadd.i64 d20,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d16,d30 ++ mov r12,#4 ++L16_79_neon: ++ subs r12,#1 ++ vshr.u64 q12,q7,#19 ++ vshr.u64 q13,q7,#61 ++ vadd.i64 d16,d30 @ h+=Maj from the past ++ vshr.u64 q15,q7,#6 ++ vsli.64 q12,q7,#45 ++ vext.8 q14,q0,q1,#8 @ X[i+1] ++ vsli.64 q13,q7,#3 ++ veor q15,q12 ++ vshr.u64 q12,q14,#1 ++ veor q15,q13 @ sigma1(X[i+14]) ++ vshr.u64 q13,q14,#8 ++ vadd.i64 q0,q15 ++ vshr.u64 q15,q14,#7 ++ vsli.64 q12,q14,#63 ++ vsli.64 q13,q14,#56 ++ vext.8 q14,q4,q5,#8 @ X[i+9] ++ veor q15,q12 ++ vshr.u64 d24,d20,#14 @ from NEON_00_15 ++ vadd.i64 q0,q14 ++ vshr.u64 d25,d20,#18 @ from NEON_00_15 ++ veor q15,q13 @ sigma0(X[i+1]) ++ vshr.u64 d26,d20,#41 @ from NEON_00_15 ++ vadd.i64 q0,q15 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d20,#50 ++ vsli.64 d25,d20,#46 ++ vmov d29,d20 ++ vsli.64 d26,d20,#23 ++#if 16<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d21,d22 @ Ch(e,f,g) ++ vshr.u64 d24,d16,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d23 ++ vshr.u64 d25,d16,#34 ++ vsli.64 d24,d16,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d16,#39 ++ vadd.i64 d28,d0 ++ vsli.64 d25,d16,#30 ++ veor d30,d16,d17 ++ vsli.64 d26,d16,#25 ++ veor d23,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d18,d17 @ Maj(a,b,c) ++ veor d23,d26 @ Sigma0(a) ++ vadd.i64 d19,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d23,d30 ++ vshr.u64 d24,d19,#14 @ 17 ++#if 17<16 ++ vld1.64 {d1},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d19,#18 ++#if 17>0 ++ vadd.i64 d23,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d19,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d19,#50 ++ vsli.64 d25,d19,#46 ++ vmov d29,d19 ++ vsli.64 d26,d19,#23 ++#if 17<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d20,d21 @ Ch(e,f,g) ++ vshr.u64 d24,d23,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d22 ++ vshr.u64 d25,d23,#34 ++ vsli.64 d24,d23,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d23,#39 ++ vadd.i64 d28,d1 ++ vsli.64 d25,d23,#30 ++ veor d30,d23,d16 ++ vsli.64 d26,d23,#25 ++ veor d22,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d17,d16 @ Maj(a,b,c) ++ veor d22,d26 @ Sigma0(a) ++ vadd.i64 d18,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d22,d30 ++ vshr.u64 q12,q0,#19 ++ vshr.u64 q13,q0,#61 ++ vadd.i64 d22,d30 @ h+=Maj from the past ++ vshr.u64 q15,q0,#6 ++ vsli.64 q12,q0,#45 ++ vext.8 q14,q1,q2,#8 @ X[i+1] ++ vsli.64 q13,q0,#3 ++ veor q15,q12 ++ vshr.u64 q12,q14,#1 ++ veor q15,q13 @ sigma1(X[i+14]) ++ vshr.u64 q13,q14,#8 ++ vadd.i64 q1,q15 ++ vshr.u64 q15,q14,#7 ++ vsli.64 q12,q14,#63 ++ vsli.64 q13,q14,#56 ++ vext.8 q14,q5,q6,#8 @ X[i+9] ++ veor q15,q12 ++ vshr.u64 d24,d18,#14 @ from NEON_00_15 ++ vadd.i64 q1,q14 ++ vshr.u64 d25,d18,#18 @ from NEON_00_15 ++ veor q15,q13 @ sigma0(X[i+1]) ++ vshr.u64 d26,d18,#41 @ from NEON_00_15 ++ vadd.i64 q1,q15 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d18,#50 ++ vsli.64 d25,d18,#46 ++ vmov d29,d18 ++ vsli.64 d26,d18,#23 ++#if 18<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d19,d20 @ Ch(e,f,g) ++ vshr.u64 d24,d22,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d21 ++ vshr.u64 d25,d22,#34 ++ vsli.64 d24,d22,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d22,#39 ++ vadd.i64 d28,d2 ++ vsli.64 d25,d22,#30 ++ veor d30,d22,d23 ++ vsli.64 d26,d22,#25 ++ veor d21,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d16,d23 @ Maj(a,b,c) ++ veor d21,d26 @ Sigma0(a) ++ vadd.i64 d17,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d21,d30 ++ vshr.u64 d24,d17,#14 @ 19 ++#if 19<16 ++ vld1.64 {d3},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d17,#18 ++#if 19>0 ++ vadd.i64 d21,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d17,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d17,#50 ++ vsli.64 d25,d17,#46 ++ vmov d29,d17 ++ vsli.64 d26,d17,#23 ++#if 19<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d18,d19 @ Ch(e,f,g) ++ vshr.u64 d24,d21,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d20 ++ vshr.u64 d25,d21,#34 ++ vsli.64 d24,d21,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d21,#39 ++ vadd.i64 d28,d3 ++ vsli.64 d25,d21,#30 ++ veor d30,d21,d22 ++ vsli.64 d26,d21,#25 ++ veor d20,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d23,d22 @ Maj(a,b,c) ++ veor d20,d26 @ Sigma0(a) ++ vadd.i64 d16,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d20,d30 ++ vshr.u64 q12,q1,#19 ++ vshr.u64 q13,q1,#61 ++ vadd.i64 d20,d30 @ h+=Maj from the past ++ vshr.u64 q15,q1,#6 ++ vsli.64 q12,q1,#45 ++ vext.8 q14,q2,q3,#8 @ X[i+1] ++ vsli.64 q13,q1,#3 ++ veor q15,q12 ++ vshr.u64 q12,q14,#1 ++ veor q15,q13 @ sigma1(X[i+14]) ++ vshr.u64 q13,q14,#8 ++ vadd.i64 q2,q15 ++ vshr.u64 q15,q14,#7 ++ vsli.64 q12,q14,#63 ++ vsli.64 q13,q14,#56 ++ vext.8 q14,q6,q7,#8 @ X[i+9] ++ veor q15,q12 ++ vshr.u64 d24,d16,#14 @ from NEON_00_15 ++ vadd.i64 q2,q14 ++ vshr.u64 d25,d16,#18 @ from NEON_00_15 ++ veor q15,q13 @ sigma0(X[i+1]) ++ vshr.u64 d26,d16,#41 @ from NEON_00_15 ++ vadd.i64 q2,q15 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d16,#50 ++ vsli.64 d25,d16,#46 ++ vmov d29,d16 ++ vsli.64 d26,d16,#23 ++#if 20<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d17,d18 @ Ch(e,f,g) ++ vshr.u64 d24,d20,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d19 ++ vshr.u64 d25,d20,#34 ++ vsli.64 d24,d20,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d20,#39 ++ vadd.i64 d28,d4 ++ vsli.64 d25,d20,#30 ++ veor d30,d20,d21 ++ vsli.64 d26,d20,#25 ++ veor d19,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d22,d21 @ Maj(a,b,c) ++ veor d19,d26 @ Sigma0(a) ++ vadd.i64 d23,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d19,d30 ++ vshr.u64 d24,d23,#14 @ 21 ++#if 21<16 ++ vld1.64 {d5},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d23,#18 ++#if 21>0 ++ vadd.i64 d19,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d23,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d23,#50 ++ vsli.64 d25,d23,#46 ++ vmov d29,d23 ++ vsli.64 d26,d23,#23 ++#if 21<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d16,d17 @ Ch(e,f,g) ++ vshr.u64 d24,d19,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d18 ++ vshr.u64 d25,d19,#34 ++ vsli.64 d24,d19,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d19,#39 ++ vadd.i64 d28,d5 ++ vsli.64 d25,d19,#30 ++ veor d30,d19,d20 ++ vsli.64 d26,d19,#25 ++ veor d18,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d21,d20 @ Maj(a,b,c) ++ veor d18,d26 @ Sigma0(a) ++ vadd.i64 d22,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d18,d30 ++ vshr.u64 q12,q2,#19 ++ vshr.u64 q13,q2,#61 ++ vadd.i64 d18,d30 @ h+=Maj from the past ++ vshr.u64 q15,q2,#6 ++ vsli.64 q12,q2,#45 ++ vext.8 q14,q3,q4,#8 @ X[i+1] ++ vsli.64 q13,q2,#3 ++ veor q15,q12 ++ vshr.u64 q12,q14,#1 ++ veor q15,q13 @ sigma1(X[i+14]) ++ vshr.u64 q13,q14,#8 ++ vadd.i64 q3,q15 ++ vshr.u64 q15,q14,#7 ++ vsli.64 q12,q14,#63 ++ vsli.64 q13,q14,#56 ++ vext.8 q14,q7,q0,#8 @ X[i+9] ++ veor q15,q12 ++ vshr.u64 d24,d22,#14 @ from NEON_00_15 ++ vadd.i64 q3,q14 ++ vshr.u64 d25,d22,#18 @ from NEON_00_15 ++ veor q15,q13 @ sigma0(X[i+1]) ++ vshr.u64 d26,d22,#41 @ from NEON_00_15 ++ vadd.i64 q3,q15 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d22,#50 ++ vsli.64 d25,d22,#46 ++ vmov d29,d22 ++ vsli.64 d26,d22,#23 ++#if 22<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d23,d16 @ Ch(e,f,g) ++ vshr.u64 d24,d18,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d17 ++ vshr.u64 d25,d18,#34 ++ vsli.64 d24,d18,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d18,#39 ++ vadd.i64 d28,d6 ++ vsli.64 d25,d18,#30 ++ veor d30,d18,d19 ++ vsli.64 d26,d18,#25 ++ veor d17,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d20,d19 @ Maj(a,b,c) ++ veor d17,d26 @ Sigma0(a) ++ vadd.i64 d21,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d17,d30 ++ vshr.u64 d24,d21,#14 @ 23 ++#if 23<16 ++ vld1.64 {d7},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d21,#18 ++#if 23>0 ++ vadd.i64 d17,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d21,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d21,#50 ++ vsli.64 d25,d21,#46 ++ vmov d29,d21 ++ vsli.64 d26,d21,#23 ++#if 23<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d22,d23 @ Ch(e,f,g) ++ vshr.u64 d24,d17,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d16 ++ vshr.u64 d25,d17,#34 ++ vsli.64 d24,d17,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d17,#39 ++ vadd.i64 d28,d7 ++ vsli.64 d25,d17,#30 ++ veor d30,d17,d18 ++ vsli.64 d26,d17,#25 ++ veor d16,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d19,d18 @ Maj(a,b,c) ++ veor d16,d26 @ Sigma0(a) ++ vadd.i64 d20,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d16,d30 ++ vshr.u64 q12,q3,#19 ++ vshr.u64 q13,q3,#61 ++ vadd.i64 d16,d30 @ h+=Maj from the past ++ vshr.u64 q15,q3,#6 ++ vsli.64 q12,q3,#45 ++ vext.8 q14,q4,q5,#8 @ X[i+1] ++ vsli.64 q13,q3,#3 ++ veor q15,q12 ++ vshr.u64 q12,q14,#1 ++ veor q15,q13 @ sigma1(X[i+14]) ++ vshr.u64 q13,q14,#8 ++ vadd.i64 q4,q15 ++ vshr.u64 q15,q14,#7 ++ vsli.64 q12,q14,#63 ++ vsli.64 q13,q14,#56 ++ vext.8 q14,q0,q1,#8 @ X[i+9] ++ veor q15,q12 ++ vshr.u64 d24,d20,#14 @ from NEON_00_15 ++ vadd.i64 q4,q14 ++ vshr.u64 d25,d20,#18 @ from NEON_00_15 ++ veor q15,q13 @ sigma0(X[i+1]) ++ vshr.u64 d26,d20,#41 @ from NEON_00_15 ++ vadd.i64 q4,q15 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d20,#50 ++ vsli.64 d25,d20,#46 ++ vmov d29,d20 ++ vsli.64 d26,d20,#23 ++#if 24<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d21,d22 @ Ch(e,f,g) ++ vshr.u64 d24,d16,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d23 ++ vshr.u64 d25,d16,#34 ++ vsli.64 d24,d16,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d16,#39 ++ vadd.i64 d28,d8 ++ vsli.64 d25,d16,#30 ++ veor d30,d16,d17 ++ vsli.64 d26,d16,#25 ++ veor d23,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d18,d17 @ Maj(a,b,c) ++ veor d23,d26 @ Sigma0(a) ++ vadd.i64 d19,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d23,d30 ++ vshr.u64 d24,d19,#14 @ 25 ++#if 25<16 ++ vld1.64 {d9},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d19,#18 ++#if 25>0 ++ vadd.i64 d23,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d19,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d19,#50 ++ vsli.64 d25,d19,#46 ++ vmov d29,d19 ++ vsli.64 d26,d19,#23 ++#if 25<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d20,d21 @ Ch(e,f,g) ++ vshr.u64 d24,d23,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d22 ++ vshr.u64 d25,d23,#34 ++ vsli.64 d24,d23,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d23,#39 ++ vadd.i64 d28,d9 ++ vsli.64 d25,d23,#30 ++ veor d30,d23,d16 ++ vsli.64 d26,d23,#25 ++ veor d22,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d17,d16 @ Maj(a,b,c) ++ veor d22,d26 @ Sigma0(a) ++ vadd.i64 d18,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d22,d30 ++ vshr.u64 q12,q4,#19 ++ vshr.u64 q13,q4,#61 ++ vadd.i64 d22,d30 @ h+=Maj from the past ++ vshr.u64 q15,q4,#6 ++ vsli.64 q12,q4,#45 ++ vext.8 q14,q5,q6,#8 @ X[i+1] ++ vsli.64 q13,q4,#3 ++ veor q15,q12 ++ vshr.u64 q12,q14,#1 ++ veor q15,q13 @ sigma1(X[i+14]) ++ vshr.u64 q13,q14,#8 ++ vadd.i64 q5,q15 ++ vshr.u64 q15,q14,#7 ++ vsli.64 q12,q14,#63 ++ vsli.64 q13,q14,#56 ++ vext.8 q14,q1,q2,#8 @ X[i+9] ++ veor q15,q12 ++ vshr.u64 d24,d18,#14 @ from NEON_00_15 ++ vadd.i64 q5,q14 ++ vshr.u64 d25,d18,#18 @ from NEON_00_15 ++ veor q15,q13 @ sigma0(X[i+1]) ++ vshr.u64 d26,d18,#41 @ from NEON_00_15 ++ vadd.i64 q5,q15 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d18,#50 ++ vsli.64 d25,d18,#46 ++ vmov d29,d18 ++ vsli.64 d26,d18,#23 ++#if 26<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d19,d20 @ Ch(e,f,g) ++ vshr.u64 d24,d22,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d21 ++ vshr.u64 d25,d22,#34 ++ vsli.64 d24,d22,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d22,#39 ++ vadd.i64 d28,d10 ++ vsli.64 d25,d22,#30 ++ veor d30,d22,d23 ++ vsli.64 d26,d22,#25 ++ veor d21,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d16,d23 @ Maj(a,b,c) ++ veor d21,d26 @ Sigma0(a) ++ vadd.i64 d17,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d21,d30 ++ vshr.u64 d24,d17,#14 @ 27 ++#if 27<16 ++ vld1.64 {d11},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d17,#18 ++#if 27>0 ++ vadd.i64 d21,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d17,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d17,#50 ++ vsli.64 d25,d17,#46 ++ vmov d29,d17 ++ vsli.64 d26,d17,#23 ++#if 27<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d18,d19 @ Ch(e,f,g) ++ vshr.u64 d24,d21,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d20 ++ vshr.u64 d25,d21,#34 ++ vsli.64 d24,d21,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d21,#39 ++ vadd.i64 d28,d11 ++ vsli.64 d25,d21,#30 ++ veor d30,d21,d22 ++ vsli.64 d26,d21,#25 ++ veor d20,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d23,d22 @ Maj(a,b,c) ++ veor d20,d26 @ Sigma0(a) ++ vadd.i64 d16,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d20,d30 ++ vshr.u64 q12,q5,#19 ++ vshr.u64 q13,q5,#61 ++ vadd.i64 d20,d30 @ h+=Maj from the past ++ vshr.u64 q15,q5,#6 ++ vsli.64 q12,q5,#45 ++ vext.8 q14,q6,q7,#8 @ X[i+1] ++ vsli.64 q13,q5,#3 ++ veor q15,q12 ++ vshr.u64 q12,q14,#1 ++ veor q15,q13 @ sigma1(X[i+14]) ++ vshr.u64 q13,q14,#8 ++ vadd.i64 q6,q15 ++ vshr.u64 q15,q14,#7 ++ vsli.64 q12,q14,#63 ++ vsli.64 q13,q14,#56 ++ vext.8 q14,q2,q3,#8 @ X[i+9] ++ veor q15,q12 ++ vshr.u64 d24,d16,#14 @ from NEON_00_15 ++ vadd.i64 q6,q14 ++ vshr.u64 d25,d16,#18 @ from NEON_00_15 ++ veor q15,q13 @ sigma0(X[i+1]) ++ vshr.u64 d26,d16,#41 @ from NEON_00_15 ++ vadd.i64 q6,q15 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d16,#50 ++ vsli.64 d25,d16,#46 ++ vmov d29,d16 ++ vsli.64 d26,d16,#23 ++#if 28<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d17,d18 @ Ch(e,f,g) ++ vshr.u64 d24,d20,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d19 ++ vshr.u64 d25,d20,#34 ++ vsli.64 d24,d20,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d20,#39 ++ vadd.i64 d28,d12 ++ vsli.64 d25,d20,#30 ++ veor d30,d20,d21 ++ vsli.64 d26,d20,#25 ++ veor d19,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d22,d21 @ Maj(a,b,c) ++ veor d19,d26 @ Sigma0(a) ++ vadd.i64 d23,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d19,d30 ++ vshr.u64 d24,d23,#14 @ 29 ++#if 29<16 ++ vld1.64 {d13},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d23,#18 ++#if 29>0 ++ vadd.i64 d19,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d23,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d23,#50 ++ vsli.64 d25,d23,#46 ++ vmov d29,d23 ++ vsli.64 d26,d23,#23 ++#if 29<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d16,d17 @ Ch(e,f,g) ++ vshr.u64 d24,d19,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d18 ++ vshr.u64 d25,d19,#34 ++ vsli.64 d24,d19,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d19,#39 ++ vadd.i64 d28,d13 ++ vsli.64 d25,d19,#30 ++ veor d30,d19,d20 ++ vsli.64 d26,d19,#25 ++ veor d18,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d21,d20 @ Maj(a,b,c) ++ veor d18,d26 @ Sigma0(a) ++ vadd.i64 d22,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d18,d30 ++ vshr.u64 q12,q6,#19 ++ vshr.u64 q13,q6,#61 ++ vadd.i64 d18,d30 @ h+=Maj from the past ++ vshr.u64 q15,q6,#6 ++ vsli.64 q12,q6,#45 ++ vext.8 q14,q7,q0,#8 @ X[i+1] ++ vsli.64 q13,q6,#3 ++ veor q15,q12 ++ vshr.u64 q12,q14,#1 ++ veor q15,q13 @ sigma1(X[i+14]) ++ vshr.u64 q13,q14,#8 ++ vadd.i64 q7,q15 ++ vshr.u64 q15,q14,#7 ++ vsli.64 q12,q14,#63 ++ vsli.64 q13,q14,#56 ++ vext.8 q14,q3,q4,#8 @ X[i+9] ++ veor q15,q12 ++ vshr.u64 d24,d22,#14 @ from NEON_00_15 ++ vadd.i64 q7,q14 ++ vshr.u64 d25,d22,#18 @ from NEON_00_15 ++ veor q15,q13 @ sigma0(X[i+1]) ++ vshr.u64 d26,d22,#41 @ from NEON_00_15 ++ vadd.i64 q7,q15 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d22,#50 ++ vsli.64 d25,d22,#46 ++ vmov d29,d22 ++ vsli.64 d26,d22,#23 ++#if 30<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d23,d16 @ Ch(e,f,g) ++ vshr.u64 d24,d18,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d17 ++ vshr.u64 d25,d18,#34 ++ vsli.64 d24,d18,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d18,#39 ++ vadd.i64 d28,d14 ++ vsli.64 d25,d18,#30 ++ veor d30,d18,d19 ++ vsli.64 d26,d18,#25 ++ veor d17,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d20,d19 @ Maj(a,b,c) ++ veor d17,d26 @ Sigma0(a) ++ vadd.i64 d21,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d17,d30 ++ vshr.u64 d24,d21,#14 @ 31 ++#if 31<16 ++ vld1.64 {d15},[r1]! @ handles unaligned ++#endif ++ vshr.u64 d25,d21,#18 ++#if 31>0 ++ vadd.i64 d17,d30 @ h+=Maj from the past ++#endif ++ vshr.u64 d26,d21,#41 ++ vld1.64 {d28},[r3,:64]! @ K[i++] ++ vsli.64 d24,d21,#50 ++ vsli.64 d25,d21,#46 ++ vmov d29,d21 ++ vsli.64 d26,d21,#23 ++#if 31<16 && defined(__ARMEL__) ++ vrev64.8 , ++#endif ++ veor d25,d24 ++ vbsl d29,d22,d23 @ Ch(e,f,g) ++ vshr.u64 d24,d17,#28 ++ veor d26,d25 @ Sigma1(e) ++ vadd.i64 d27,d29,d16 ++ vshr.u64 d25,d17,#34 ++ vsli.64 d24,d17,#36 ++ vadd.i64 d27,d26 ++ vshr.u64 d26,d17,#39 ++ vadd.i64 d28,d15 ++ vsli.64 d25,d17,#30 ++ veor d30,d17,d18 ++ vsli.64 d26,d17,#25 ++ veor d16,d24,d25 ++ vadd.i64 d27,d28 ++ vbsl d30,d19,d18 @ Maj(a,b,c) ++ veor d16,d26 @ Sigma0(a) ++ vadd.i64 d20,d27 ++ vadd.i64 d30,d27 ++ @ vadd.i64 d16,d30 ++ bne L16_79_neon ++ ++ vadd.i64 d16,d30 @ h+=Maj from the past ++ vldmia r0,{d24,d25,d26,d27,d28,d29,d30,d31} @ load context to temp ++ vadd.i64 q8,q12 @ vectorized accumulate ++ vadd.i64 q9,q13 ++ vadd.i64 q10,q14 ++ vadd.i64 q11,q15 ++ vstmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ save context ++ teq r1,r2 ++ sub r3,#640 @ rewind K512 ++ bne Loop_neon ++ ++ VFP_ABI_POP ++ bx lr @ .word 0xe12fff1e ++ ++#endif ++.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.align 2 ++.align 2 ++#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__) ++.comm _OPENSSL_armcap_P,4 ++.non_lazy_symbol_pointer ++OPENSSL_armcap_P: ++.indirect_symbol _OPENSSL_armcap_P ++.long 0 ++.private_extern _OPENSSL_armcap_P ++#endif ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-arm/crypto/fipsmodule/vpaes-armv7.S b/apple-arm/crypto/fipsmodule/vpaes-armv7.S +new file mode 100644 +index 0000000..6aead7c +--- /dev/null ++++ b/apple-arm/crypto/fipsmodule/vpaes-armv7.S +@@ -0,0 +1,1265 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.syntax unified ++ ++ ++ ++ ++#if defined(__thumb2__) ++.thumb ++#else ++.code 32 ++#endif ++ ++.text ++ ++ ++.align 7 @ totally strategic alignment ++_vpaes_consts: ++Lk_mc_forward:@ mc_forward ++.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 ++.quad 0x080B0A0904070605, 0x000302010C0F0E0D ++.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 ++.quad 0x000302010C0F0E0D, 0x080B0A0904070605 ++Lk_mc_backward:@ mc_backward ++.quad 0x0605040702010003, 0x0E0D0C0F0A09080B ++.quad 0x020100030E0D0C0F, 0x0A09080B06050407 ++.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 ++.quad 0x0A09080B06050407, 0x020100030E0D0C0F ++Lk_sr:@ sr ++.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 ++.quad 0x030E09040F0A0500, 0x0B06010C07020D08 ++.quad 0x0F060D040B020900, 0x070E050C030A0108 ++.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 ++ ++@ ++@ "Hot" constants ++@ ++Lk_inv:@ inv, inva ++.quad 0x0E05060F0D080180, 0x040703090A0B0C02 ++.quad 0x01040A060F0B0780, 0x030D0E0C02050809 ++Lk_ipt:@ input transform (lo, hi) ++.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 ++.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 ++Lk_sbo:@ sbou, sbot ++.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 ++.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA ++Lk_sb1:@ sb1u, sb1t ++.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF ++.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 ++Lk_sb2:@ sb2u, sb2t ++.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A ++.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD ++ ++.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,65,82,77,118,55,32,78,69,79,78,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 ++.align 2 ++ ++.align 6 ++@@ ++@@ _aes_preheat ++@@ ++@@ Fills q9-q15 as specified below. ++@@ ++#ifdef __thumb2__ ++.thumb_func _vpaes_preheat ++#endif ++.align 4 ++_vpaes_preheat: ++ adr r10, Lk_inv ++ vmov.i8 q9, #0x0f @ Lk_s0F ++ vld1.64 {q10,q11}, [r10]! @ Lk_inv ++ add r10, r10, #64 @ Skip Lk_ipt, Lk_sbo ++ vld1.64 {q12,q13}, [r10]! @ Lk_sb1 ++ vld1.64 {q14,q15}, [r10] @ Lk_sb2 ++ bx lr ++ ++@@ ++@@ _aes_encrypt_core ++@@ ++@@ AES-encrypt q0. ++@@ ++@@ Inputs: ++@@ q0 = input ++@@ q9-q15 as in _vpaes_preheat ++@@ [r2] = scheduled keys ++@@ ++@@ Output in q0 ++@@ Clobbers q1-q5, r8-r11 ++@@ Preserves q6-q8 so you get some local vectors ++@@ ++@@ ++#ifdef __thumb2__ ++.thumb_func _vpaes_encrypt_core ++#endif ++.align 4 ++_vpaes_encrypt_core: ++ mov r9, r2 ++ ldr r8, [r2,#240] @ pull rounds ++ adr r11, Lk_ipt ++ @ vmovdqa .Lk_ipt(%rip), %xmm2 # iptlo ++ @ vmovdqa .Lk_ipt+16(%rip), %xmm3 # ipthi ++ vld1.64 {q2, q3}, [r11] ++ adr r11, Lk_mc_forward+16 ++ vld1.64 {q5}, [r9]! @ vmovdqu (%r9), %xmm5 # round0 key ++ vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 ++ vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 ++ vtbl.8 d2, {q2}, d2 @ vpshufb %xmm1, %xmm2, %xmm1 ++ vtbl.8 d3, {q2}, d3 ++ vtbl.8 d4, {q3}, d0 @ vpshufb %xmm0, %xmm3, %xmm2 ++ vtbl.8 d5, {q3}, d1 ++ veor q0, q1, q5 @ vpxor %xmm5, %xmm1, %xmm0 ++ veor q0, q0, q2 @ vpxor %xmm2, %xmm0, %xmm0 ++ ++ @ .Lenc_entry ends with a bnz instruction which is normally paired with ++ @ subs in .Lenc_loop. ++ tst r8, r8 ++ b Lenc_entry ++ ++.align 4 ++Lenc_loop: ++ @ middle of middle round ++ add r10, r11, #0x40 ++ vtbl.8 d8, {q13}, d4 @ vpshufb %xmm2, %xmm13, %xmm4 # 4 = sb1u ++ vtbl.8 d9, {q13}, d5 ++ vld1.64 {q1}, [r11]! @ vmovdqa -0x40(%r11,%r10), %xmm1 # Lk_mc_forward[] ++ vtbl.8 d0, {q12}, d6 @ vpshufb %xmm3, %xmm12, %xmm0 # 0 = sb1t ++ vtbl.8 d1, {q12}, d7 ++ veor q4, q4, q5 @ vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ vtbl.8 d10, {q15}, d4 @ vpshufb %xmm2, %xmm15, %xmm5 # 4 = sb2u ++ vtbl.8 d11, {q15}, d5 ++ veor q0, q0, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ vtbl.8 d4, {q14}, d6 @ vpshufb %xmm3, %xmm14, %xmm2 # 2 = sb2t ++ vtbl.8 d5, {q14}, d7 ++ vld1.64 {q4}, [r10] @ vmovdqa (%r11,%r10), %xmm4 # Lk_mc_backward[] ++ vtbl.8 d6, {q0}, d2 @ vpshufb %xmm1, %xmm0, %xmm3 # 0 = B ++ vtbl.8 d7, {q0}, d3 ++ veor q2, q2, q5 @ vpxor %xmm5, %xmm2, %xmm2 # 2 = 2A ++ @ Write to q5 instead of q0, so the table and destination registers do ++ @ not overlap. ++ vtbl.8 d10, {q0}, d8 @ vpshufb %xmm4, %xmm0, %xmm0 # 3 = D ++ vtbl.8 d11, {q0}, d9 ++ veor q3, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 # 0 = 2A+B ++ vtbl.8 d8, {q3}, d2 @ vpshufb %xmm1, %xmm3, %xmm4 # 0 = 2B+C ++ vtbl.8 d9, {q3}, d3 ++ @ Here we restore the original q0/q5 usage. ++ veor q0, q5, q3 @ vpxor %xmm3, %xmm0, %xmm0 # 3 = 2A+B+D ++ and r11, r11, #~(1<<6) @ and $0x30, %r11 # ... mod 4 ++ veor q0, q0, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = 2A+3B+C+D ++ subs r8, r8, #1 @ nr-- ++ ++Lenc_entry: ++ @ top of round ++ vand q1, q0, q9 @ vpand %xmm0, %xmm9, %xmm1 # 0 = k ++ vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 # 1 = i ++ vtbl.8 d10, {q11}, d2 @ vpshufb %xmm1, %xmm11, %xmm5 # 2 = a/k ++ vtbl.8 d11, {q11}, d3 ++ veor q1, q1, q0 @ vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vtbl.8 d6, {q10}, d0 @ vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vtbl.8 d7, {q10}, d1 ++ vtbl.8 d8, {q10}, d2 @ vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vtbl.8 d9, {q10}, d3 ++ veor q3, q3, q5 @ vpxor %xmm5, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ veor q4, q4, q5 @ vpxor %xmm5, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vtbl.8 d4, {q10}, d6 @ vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ vtbl.8 d5, {q10}, d7 ++ vtbl.8 d6, {q10}, d8 @ vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ vtbl.8 d7, {q10}, d9 ++ veor q2, q2, q1 @ vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ veor q3, q3, q0 @ vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ vld1.64 {q5}, [r9]! @ vmovdqu (%r9), %xmm5 ++ bne Lenc_loop ++ ++ @ middle of last round ++ add r10, r11, #0x80 ++ ++ adr r11, Lk_sbo ++ @ Read to q1 instead of q4, so the vtbl.8 instruction below does not ++ @ overlap table and destination registers. ++ vld1.64 {q1}, [r11]! @ vmovdqa -0x60(%r10), %xmm4 # 3 : sbou ++ vld1.64 {q0}, [r11] @ vmovdqa -0x50(%r10), %xmm0 # 0 : sbot Lk_sbo+16 ++ vtbl.8 d8, {q1}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ vtbl.8 d9, {q1}, d5 ++ vld1.64 {q1}, [r10] @ vmovdqa 0x40(%r11,%r10), %xmm1 # Lk_sr[] ++ @ Write to q2 instead of q0 below, to avoid overlapping table and ++ @ destination registers. ++ vtbl.8 d4, {q0}, d6 @ vpshufb %xmm3, %xmm0, %xmm0 # 0 = sb1t ++ vtbl.8 d5, {q0}, d7 ++ veor q4, q4, q5 @ vpxor %xmm5, %xmm4, %xmm4 # 4 = sb1u + k ++ veor q2, q2, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 0 = A ++ @ Here we restore the original q0/q2 usage. ++ vtbl.8 d0, {q2}, d2 @ vpshufb %xmm1, %xmm0, %xmm0 ++ vtbl.8 d1, {q2}, d3 ++ bx lr ++ ++ ++.globl _vpaes_encrypt ++.private_extern _vpaes_encrypt ++#ifdef __thumb2__ ++.thumb_func _vpaes_encrypt ++#endif ++.align 4 ++_vpaes_encrypt: ++ @ _vpaes_encrypt_core uses r8-r11. Round up to r7-r11 to maintain stack ++ @ alignment. ++ stmdb sp!, {r7,r8,r9,r10,r11,lr} ++ @ _vpaes_encrypt_core uses q4-q5 (d8-d11), which are callee-saved. ++ vstmdb sp!, {d8,d9,d10,d11} ++ ++ vld1.64 {q0}, [r0] ++ bl _vpaes_preheat ++ bl _vpaes_encrypt_core ++ vst1.64 {q0}, [r1] ++ ++ vldmia sp!, {d8,d9,d10,d11} ++ ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return ++ ++ ++@ ++@ Decryption stuff ++@ ++ ++.align 4 ++_vpaes_decrypt_consts: ++Lk_dipt:@ decryption input transform ++.quad 0x0F505B040B545F00, 0x154A411E114E451A ++.quad 0x86E383E660056500, 0x12771772F491F194 ++Lk_dsbo:@ decryption sbox final output ++.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D ++.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C ++Lk_dsb9:@ decryption sbox output *9*u, *9*t ++.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 ++.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 ++Lk_dsbd:@ decryption sbox output *D*u, *D*t ++.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 ++.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 ++Lk_dsbb:@ decryption sbox output *B*u, *B*t ++.quad 0xD022649296B44200, 0x602646F6B0F2D404 ++.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B ++Lk_dsbe:@ decryption sbox output *E*u, *E*t ++.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 ++.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 ++ ++ ++@@ ++@@ Decryption core ++@@ ++@@ Same API as encryption core, except it clobbers q12-q15 rather than using ++@@ the values from _vpaes_preheat. q9-q11 must still be set from ++@@ _vpaes_preheat. ++@@ ++#ifdef __thumb2__ ++.thumb_func _vpaes_decrypt_core ++#endif ++.align 4 ++_vpaes_decrypt_core: ++ mov r9, r2 ++ ldr r8, [r2,#240] @ pull rounds ++ ++ @ This function performs shuffles with various constants. The x86_64 ++ @ version loads them on-demand into %xmm0-%xmm5. This does not work well ++ @ for ARMv7 because those registers are shuffle destinations. The ARMv8 ++ @ version preloads those constants into registers, but ARMv7 has half ++ @ the registers to work with. Instead, we load them on-demand into ++ @ q12-q15, registers normally use for preloaded constants. This is fine ++ @ because decryption doesn't use those constants. The values are ++ @ constant, so this does not interfere with potential 2x optimizations. ++ adr r7, Lk_dipt ++ ++ vld1.64 {q12,q13}, [r7] @ vmovdqa Lk_dipt(%rip), %xmm2 # iptlo ++ lsl r11, r8, #4 @ mov %rax, %r11; shl $4, %r11 ++ eor r11, r11, #0x30 @ xor $0x30, %r11 ++ adr r10, Lk_sr ++ and r11, r11, #0x30 @ and $0x30, %r11 ++ add r11, r11, r10 ++ adr r10, Lk_mc_forward+48 ++ ++ vld1.64 {q4}, [r9]! @ vmovdqu (%r9), %xmm4 # round0 key ++ vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 ++ vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 ++ vtbl.8 d4, {q12}, d2 @ vpshufb %xmm1, %xmm2, %xmm2 ++ vtbl.8 d5, {q12}, d3 ++ vld1.64 {q5}, [r10] @ vmovdqa Lk_mc_forward+48(%rip), %xmm5 ++ @ vmovdqa .Lk_dipt+16(%rip), %xmm1 # ipthi ++ vtbl.8 d0, {q13}, d0 @ vpshufb %xmm0, %xmm1, %xmm0 ++ vtbl.8 d1, {q13}, d1 ++ veor q2, q2, q4 @ vpxor %xmm4, %xmm2, %xmm2 ++ veor q0, q0, q2 @ vpxor %xmm2, %xmm0, %xmm0 ++ ++ @ .Ldec_entry ends with a bnz instruction which is normally paired with ++ @ subs in .Ldec_loop. ++ tst r8, r8 ++ b Ldec_entry ++ ++.align 4 ++Ldec_loop: ++@ ++@ Inverse mix columns ++@ ++ ++ @ We load .Lk_dsb* into q12-q15 on-demand. See the comment at the top of ++ @ the function. ++ adr r10, Lk_dsb9 ++ vld1.64 {q12,q13}, [r10]! @ vmovdqa -0x20(%r10),%xmm4 # 4 : sb9u ++ @ vmovdqa -0x10(%r10),%xmm1 # 0 : sb9t ++ @ Load sbd* ahead of time. ++ vld1.64 {q14,q15}, [r10]! @ vmovdqa 0x00(%r10),%xmm4 # 4 : sbdu ++ @ vmovdqa 0x10(%r10),%xmm1 # 0 : sbdt ++ vtbl.8 d8, {q12}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sb9u ++ vtbl.8 d9, {q12}, d5 ++ vtbl.8 d2, {q13}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb9t ++ vtbl.8 d3, {q13}, d7 ++ veor q0, q4, q0 @ vpxor %xmm4, %xmm0, %xmm0 ++ ++ veor q0, q0, q1 @ vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ ++ @ Load sbb* ahead of time. ++ vld1.64 {q12,q13}, [r10]! @ vmovdqa 0x20(%r10),%xmm4 # 4 : sbbu ++ @ vmovdqa 0x30(%r10),%xmm1 # 0 : sbbt ++ ++ vtbl.8 d8, {q14}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbdu ++ vtbl.8 d9, {q14}, d5 ++ @ Write to q1 instead of q0, so the table and destination registers do ++ @ not overlap. ++ vtbl.8 d2, {q0}, d10 @ vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vtbl.8 d3, {q0}, d11 ++ @ Here we restore the original q0/q1 usage. This instruction is ++ @ reordered from the ARMv8 version so we do not clobber the vtbl.8 ++ @ below. ++ veor q0, q1, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ vtbl.8 d2, {q15}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbdt ++ vtbl.8 d3, {q15}, d7 ++ @ vmovdqa 0x20(%r10), %xmm4 # 4 : sbbu ++ veor q0, q0, q1 @ vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ @ vmovdqa 0x30(%r10), %xmm1 # 0 : sbbt ++ ++ @ Load sbd* ahead of time. ++ vld1.64 {q14,q15}, [r10]! @ vmovdqa 0x40(%r10),%xmm4 # 4 : sbeu ++ @ vmovdqa 0x50(%r10),%xmm1 # 0 : sbet ++ ++ vtbl.8 d8, {q12}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbbu ++ vtbl.8 d9, {q12}, d5 ++ @ Write to q1 instead of q0, so the table and destination registers do ++ @ not overlap. ++ vtbl.8 d2, {q0}, d10 @ vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vtbl.8 d3, {q0}, d11 ++ @ Here we restore the original q0/q1 usage. This instruction is ++ @ reordered from the ARMv8 version so we do not clobber the vtbl.8 ++ @ below. ++ veor q0, q1, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ vtbl.8 d2, {q13}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbbt ++ vtbl.8 d3, {q13}, d7 ++ veor q0, q0, q1 @ vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ ++ vtbl.8 d8, {q14}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbeu ++ vtbl.8 d9, {q14}, d5 ++ @ Write to q1 instead of q0, so the table and destination registers do ++ @ not overlap. ++ vtbl.8 d2, {q0}, d10 @ vpshufb %xmm5, %xmm0, %xmm0 # MC ch ++ vtbl.8 d3, {q0}, d11 ++ @ Here we restore the original q0/q1 usage. This instruction is ++ @ reordered from the ARMv8 version so we do not clobber the vtbl.8 ++ @ below. ++ veor q0, q1, q4 @ vpxor %xmm4, %xmm0, %xmm0 # 4 = ch ++ vtbl.8 d2, {q15}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sbet ++ vtbl.8 d3, {q15}, d7 ++ vext.8 q5, q5, q5, #12 @ vpalignr $12, %xmm5, %xmm5, %xmm5 ++ veor q0, q0, q1 @ vpxor %xmm1, %xmm0, %xmm0 # 0 = ch ++ subs r8, r8, #1 @ sub $1,%rax # nr-- ++ ++Ldec_entry: ++ @ top of round ++ vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 # 0 = k ++ vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 # 1 = i ++ vtbl.8 d4, {q11}, d2 @ vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ vtbl.8 d5, {q11}, d3 ++ veor q1, q1, q0 @ vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vtbl.8 d6, {q10}, d0 @ vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vtbl.8 d7, {q10}, d1 ++ vtbl.8 d8, {q10}, d2 @ vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vtbl.8 d9, {q10}, d3 ++ veor q3, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ veor q4, q4, q2 @ vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vtbl.8 d4, {q10}, d6 @ vpshufb %xmm3, %xmm10, %xmm2 # 2 = 1/iak ++ vtbl.8 d5, {q10}, d7 ++ vtbl.8 d6, {q10}, d8 @ vpshufb %xmm4, %xmm10, %xmm3 # 3 = 1/jak ++ vtbl.8 d7, {q10}, d9 ++ veor q2, q2, q1 @ vpxor %xmm1, %xmm2, %xmm2 # 2 = io ++ veor q3, q3, q0 @ vpxor %xmm0, %xmm3, %xmm3 # 3 = jo ++ vld1.64 {q0}, [r9]! @ vmovdqu (%r9), %xmm0 ++ bne Ldec_loop ++ ++ @ middle of last round ++ ++ adr r10, Lk_dsbo ++ ++ @ Write to q1 rather than q4 to avoid overlapping table and destination. ++ vld1.64 {q1}, [r10]! @ vmovdqa 0x60(%r10), %xmm4 # 3 : sbou ++ vtbl.8 d8, {q1}, d4 @ vpshufb %xmm2, %xmm4, %xmm4 # 4 = sbou ++ vtbl.8 d9, {q1}, d5 ++ @ Write to q2 rather than q1 to avoid overlapping table and destination. ++ vld1.64 {q2}, [r10] @ vmovdqa 0x70(%r10), %xmm1 # 0 : sbot ++ vtbl.8 d2, {q2}, d6 @ vpshufb %xmm3, %xmm1, %xmm1 # 0 = sb1t ++ vtbl.8 d3, {q2}, d7 ++ vld1.64 {q2}, [r11] @ vmovdqa -0x160(%r11), %xmm2 # Lk_sr-Lk_dsbd=-0x160 ++ veor q4, q4, q0 @ vpxor %xmm0, %xmm4, %xmm4 # 4 = sb1u + k ++ @ Write to q1 rather than q0 so the table and destination registers ++ @ below do not overlap. ++ veor q1, q1, q4 @ vpxor %xmm4, %xmm1, %xmm0 # 0 = A ++ vtbl.8 d0, {q1}, d4 @ vpshufb %xmm2, %xmm0, %xmm0 ++ vtbl.8 d1, {q1}, d5 ++ bx lr ++ ++ ++.globl _vpaes_decrypt ++.private_extern _vpaes_decrypt ++#ifdef __thumb2__ ++.thumb_func _vpaes_decrypt ++#endif ++.align 4 ++_vpaes_decrypt: ++ @ _vpaes_decrypt_core uses r7-r11. ++ stmdb sp!, {r7,r8,r9,r10,r11,lr} ++ @ _vpaes_decrypt_core uses q4-q5 (d8-d11), which are callee-saved. ++ vstmdb sp!, {d8,d9,d10,d11} ++ ++ vld1.64 {q0}, [r0] ++ bl _vpaes_preheat ++ bl _vpaes_decrypt_core ++ vst1.64 {q0}, [r1] ++ ++ vldmia sp!, {d8,d9,d10,d11} ++ ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return ++ ++@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ++@@ @@ ++@@ AES key schedule @@ ++@@ @@ ++@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ ++ ++@ This function diverges from both x86_64 and armv7 in which constants are ++@ pinned. x86_64 has a common preheat function for all operations. aarch64 ++@ separates them because it has enough registers to pin nearly all constants. ++@ armv7 does not have enough registers, but needing explicit loads and stores ++@ also complicates using x86_64's register allocation directly. ++@ ++@ We pin some constants for convenience and leave q14 and q15 free to load ++@ others on demand. ++ ++@ ++@ Key schedule constants ++@ ++ ++.align 4 ++_vpaes_key_consts: ++Lk_dksd:@ decryption key schedule: invskew x*D ++.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 ++.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E ++Lk_dksb:@ decryption key schedule: invskew x*B ++.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 ++.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 ++Lk_dkse:@ decryption key schedule: invskew x*E + 0x63 ++.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 ++.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 ++Lk_dks9:@ decryption key schedule: invskew x*9 ++.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC ++.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE ++ ++Lk_rcon:@ rcon ++.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 ++ ++Lk_opt:@ output transform ++.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 ++.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 ++Lk_deskew:@ deskew tables: inverts the sbox's "skew" ++.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A ++.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 ++ ++ ++#ifdef __thumb2__ ++.thumb_func _vpaes_key_preheat ++#endif ++.align 4 ++_vpaes_key_preheat: ++ adr r11, Lk_rcon ++ vmov.i8 q12, #0x5b @ Lk_s63 ++ adr r10, Lk_inv @ Must be aligned to 8 mod 16. ++ vmov.i8 q9, #0x0f @ Lk_s0F ++ vld1.64 {q10,q11}, [r10] @ Lk_inv ++ vld1.64 {q8}, [r11] @ Lk_rcon ++ bx lr ++ ++ ++#ifdef __thumb2__ ++.thumb_func _vpaes_schedule_core ++#endif ++.align 4 ++_vpaes_schedule_core: ++ @ We only need to save lr, but ARM requires an 8-byte stack alignment, ++ @ so save an extra register. ++ stmdb sp!, {r3,lr} ++ ++ bl _vpaes_key_preheat @ load the tables ++ ++ adr r11, Lk_ipt @ Must be aligned to 8 mod 16. ++ vld1.64 {q0}, [r0]! @ vmovdqu (%rdi), %xmm0 # load key (unaligned) ++ ++ @ input transform ++ @ Use q4 here rather than q3 so .Lschedule_am_decrypting does not ++ @ overlap table and destination. ++ vmov q4, q0 @ vmovdqa %xmm0, %xmm3 ++ bl _vpaes_schedule_transform ++ adr r10, Lk_sr @ Must be aligned to 8 mod 16. ++ vmov q7, q0 @ vmovdqa %xmm0, %xmm7 ++ ++ add r8, r8, r10 ++ tst r3, r3 ++ bne Lschedule_am_decrypting ++ ++ @ encrypting, output zeroth round key after transform ++ vst1.64 {q0}, [r2] @ vmovdqu %xmm0, (%rdx) ++ b Lschedule_go ++ ++Lschedule_am_decrypting: ++ @ decrypting, output zeroth round key after shiftrows ++ vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10), %xmm1 ++ vtbl.8 d6, {q4}, d2 @ vpshufb %xmm1, %xmm3, %xmm3 ++ vtbl.8 d7, {q4}, d3 ++ vst1.64 {q3}, [r2] @ vmovdqu %xmm3, (%rdx) ++ eor r8, r8, #0x30 @ xor $0x30, %r8 ++ ++Lschedule_go: ++ cmp r1, #192 @ cmp $192, %esi ++ bhi Lschedule_256 ++ beq Lschedule_192 ++ @ 128: fall though ++ ++@@ ++@@ .schedule_128 ++@@ ++@@ 128-bit specific part of key schedule. ++@@ ++@@ This schedule is really simple, because all its parts ++@@ are accomplished by the subroutines. ++@@ ++Lschedule_128: ++ mov r0, #10 @ mov $10, %esi ++ ++Loop_schedule_128: ++ bl _vpaes_schedule_round ++ subs r0, r0, #1 @ dec %esi ++ beq Lschedule_mangle_last ++ bl _vpaes_schedule_mangle @ write output ++ b Loop_schedule_128 ++ ++@@ ++@@ .aes_schedule_192 ++@@ ++@@ 192-bit specific part of key schedule. ++@@ ++@@ The main body of this schedule is the same as the 128-bit ++@@ schedule, but with more smearing. The long, high side is ++@@ stored in q7 as before, and the short, low side is in ++@@ the high bits of q6. ++@@ ++@@ This schedule is somewhat nastier, however, because each ++@@ round produces 192 bits of key material, or 1.5 round keys. ++@@ Therefore, on each cycle we do 2 rounds and produce 3 round ++@@ keys. ++@@ ++.align 4 ++Lschedule_192: ++ sub r0, r0, #8 ++ vld1.64 {q0}, [r0] @ vmovdqu 8(%rdi),%xmm0 # load key part 2 (very unaligned) ++ bl _vpaes_schedule_transform @ input transform ++ vmov q6, q0 @ vmovdqa %xmm0, %xmm6 # save short part ++ vmov.i8 d12, #0 @ vpxor %xmm4, %xmm4, %xmm4 # clear 4 ++ @ vmovhlps %xmm4, %xmm6, %xmm6 # clobber low side with zeros ++ mov r0, #4 @ mov $4, %esi ++ ++Loop_schedule_192: ++ bl _vpaes_schedule_round ++ vext.8 q0, q6, q0, #8 @ vpalignr $8,%xmm6,%xmm0,%xmm0 ++ bl _vpaes_schedule_mangle @ save key n ++ bl _vpaes_schedule_192_smear ++ bl _vpaes_schedule_mangle @ save key n+1 ++ bl _vpaes_schedule_round ++ subs r0, r0, #1 @ dec %esi ++ beq Lschedule_mangle_last ++ bl _vpaes_schedule_mangle @ save key n+2 ++ bl _vpaes_schedule_192_smear ++ b Loop_schedule_192 ++ ++@@ ++@@ .aes_schedule_256 ++@@ ++@@ 256-bit specific part of key schedule. ++@@ ++@@ The structure here is very similar to the 128-bit ++@@ schedule, but with an additional "low side" in ++@@ q6. The low side's rounds are the same as the ++@@ high side's, except no rcon and no rotation. ++@@ ++.align 4 ++Lschedule_256: ++ vld1.64 {q0}, [r0] @ vmovdqu 16(%rdi),%xmm0 # load key part 2 (unaligned) ++ bl _vpaes_schedule_transform @ input transform ++ mov r0, #7 @ mov $7, %esi ++ ++Loop_schedule_256: ++ bl _vpaes_schedule_mangle @ output low result ++ vmov q6, q0 @ vmovdqa %xmm0, %xmm6 # save cur_lo in xmm6 ++ ++ @ high round ++ bl _vpaes_schedule_round ++ subs r0, r0, #1 @ dec %esi ++ beq Lschedule_mangle_last ++ bl _vpaes_schedule_mangle ++ ++ @ low round. swap xmm7 and xmm6 ++ vdup.32 q0, d1[1] @ vpshufd $0xFF, %xmm0, %xmm0 ++ vmov.i8 q4, #0 ++ vmov q5, q7 @ vmovdqa %xmm7, %xmm5 ++ vmov q7, q6 @ vmovdqa %xmm6, %xmm7 ++ bl _vpaes_schedule_low_round ++ vmov q7, q5 @ vmovdqa %xmm5, %xmm7 ++ ++ b Loop_schedule_256 ++ ++@@ ++@@ .aes_schedule_mangle_last ++@@ ++@@ Mangler for last round of key schedule ++@@ Mangles q0 ++@@ when encrypting, outputs out(q0) ^ 63 ++@@ when decrypting, outputs unskew(q0) ++@@ ++@@ Always called right before return... jumps to cleanup and exits ++@@ ++.align 4 ++Lschedule_mangle_last: ++ @ schedule last round key from xmm0 ++ adr r11, Lk_deskew @ lea Lk_deskew(%rip),%r11 # prepare to deskew ++ tst r3, r3 ++ bne Lschedule_mangle_last_dec ++ ++ @ encrypting ++ vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10),%xmm1 ++ adr r11, Lk_opt @ lea Lk_opt(%rip), %r11 # prepare to output transform ++ add r2, r2, #32 @ add $32, %rdx ++ vmov q2, q0 ++ vtbl.8 d0, {q2}, d2 @ vpshufb %xmm1, %xmm0, %xmm0 # output permute ++ vtbl.8 d1, {q2}, d3 ++ ++Lschedule_mangle_last_dec: ++ sub r2, r2, #16 @ add $-16, %rdx ++ veor q0, q0, q12 @ vpxor Lk_s63(%rip), %xmm0, %xmm0 ++ bl _vpaes_schedule_transform @ output transform ++ vst1.64 {q0}, [r2] @ vmovdqu %xmm0, (%rdx) # save last key ++ ++ @ cleanup ++ veor q0, q0, q0 @ vpxor %xmm0, %xmm0, %xmm0 ++ veor q1, q1, q1 @ vpxor %xmm1, %xmm1, %xmm1 ++ veor q2, q2, q2 @ vpxor %xmm2, %xmm2, %xmm2 ++ veor q3, q3, q3 @ vpxor %xmm3, %xmm3, %xmm3 ++ veor q4, q4, q4 @ vpxor %xmm4, %xmm4, %xmm4 ++ veor q5, q5, q5 @ vpxor %xmm5, %xmm5, %xmm5 ++ veor q6, q6, q6 @ vpxor %xmm6, %xmm6, %xmm6 ++ veor q7, q7, q7 @ vpxor %xmm7, %xmm7, %xmm7 ++ ldmia sp!, {r3,pc} @ return ++ ++ ++@@ ++@@ .aes_schedule_192_smear ++@@ ++@@ Smear the short, low side in the 192-bit key schedule. ++@@ ++@@ Inputs: ++@@ q7: high side, b a x y ++@@ q6: low side, d c 0 0 ++@@ ++@@ Outputs: ++@@ q6: b+c+d b+c 0 0 ++@@ q0: b+c+d b+c b a ++@@ ++#ifdef __thumb2__ ++.thumb_func _vpaes_schedule_192_smear ++#endif ++.align 4 ++_vpaes_schedule_192_smear: ++ vmov.i8 q1, #0 ++ vdup.32 q0, d15[1] ++ vshl.i64 q1, q6, #32 @ vpshufd $0x80, %xmm6, %xmm1 # d c 0 0 -> c 0 0 0 ++ vmov d0, d15 @ vpshufd $0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a ++ veor q6, q6, q1 @ vpxor %xmm1, %xmm6, %xmm6 # -> c+d c 0 0 ++ veor q1, q1, q1 @ vpxor %xmm1, %xmm1, %xmm1 ++ veor q6, q6, q0 @ vpxor %xmm0, %xmm6, %xmm6 # -> b+c+d b+c b a ++ vmov q0, q6 @ vmovdqa %xmm6, %xmm0 ++ vmov d12, d2 @ vmovhlps %xmm1, %xmm6, %xmm6 # clobber low side with zeros ++ bx lr ++ ++ ++@@ ++@@ .aes_schedule_round ++@@ ++@@ Runs one main round of the key schedule on q0, q7 ++@@ ++@@ Specifically, runs subbytes on the high dword of q0 ++@@ then rotates it by one byte and xors into the low dword of ++@@ q7. ++@@ ++@@ Adds rcon from low byte of q8, then rotates q8 for ++@@ next rcon. ++@@ ++@@ Smears the dwords of q7 by xoring the low into the ++@@ second low, result into third, result into highest. ++@@ ++@@ Returns results in q7 = q0. ++@@ Clobbers q1-q4, r11. ++@@ ++#ifdef __thumb2__ ++.thumb_func _vpaes_schedule_round ++#endif ++.align 4 ++_vpaes_schedule_round: ++ @ extract rcon from xmm8 ++ vmov.i8 q4, #0 @ vpxor %xmm4, %xmm4, %xmm4 ++ vext.8 q1, q8, q4, #15 @ vpalignr $15, %xmm8, %xmm4, %xmm1 ++ vext.8 q8, q8, q8, #15 @ vpalignr $15, %xmm8, %xmm8, %xmm8 ++ veor q7, q7, q1 @ vpxor %xmm1, %xmm7, %xmm7 ++ ++ @ rotate ++ vdup.32 q0, d1[1] @ vpshufd $0xFF, %xmm0, %xmm0 ++ vext.8 q0, q0, q0, #1 @ vpalignr $1, %xmm0, %xmm0, %xmm0 ++ ++ @ fall through... ++ ++ @ low round: same as high round, but no rotation and no rcon. ++_vpaes_schedule_low_round: ++ @ The x86_64 version pins .Lk_sb1 in %xmm13 and .Lk_sb1+16 in %xmm12. ++ @ We pin other values in _vpaes_key_preheat, so load them now. ++ adr r11, Lk_sb1 ++ vld1.64 {q14,q15}, [r11] ++ ++ @ smear xmm7 ++ vext.8 q1, q4, q7, #12 @ vpslldq $4, %xmm7, %xmm1 ++ veor q7, q7, q1 @ vpxor %xmm1, %xmm7, %xmm7 ++ vext.8 q4, q4, q7, #8 @ vpslldq $8, %xmm7, %xmm4 ++ ++ @ subbytes ++ vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 # 0 = k ++ vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 # 1 = i ++ veor q7, q7, q4 @ vpxor %xmm4, %xmm7, %xmm7 ++ vtbl.8 d4, {q11}, d2 @ vpshufb %xmm1, %xmm11, %xmm2 # 2 = a/k ++ vtbl.8 d5, {q11}, d3 ++ veor q1, q1, q0 @ vpxor %xmm0, %xmm1, %xmm1 # 0 = j ++ vtbl.8 d6, {q10}, d0 @ vpshufb %xmm0, %xmm10, %xmm3 # 3 = 1/i ++ vtbl.8 d7, {q10}, d1 ++ veor q3, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 # 3 = iak = 1/i + a/k ++ vtbl.8 d8, {q10}, d2 @ vpshufb %xmm1, %xmm10, %xmm4 # 4 = 1/j ++ vtbl.8 d9, {q10}, d3 ++ veor q7, q7, q12 @ vpxor Lk_s63(%rip), %xmm7, %xmm7 ++ vtbl.8 d6, {q10}, d6 @ vpshufb %xmm3, %xmm10, %xmm3 # 2 = 1/iak ++ vtbl.8 d7, {q10}, d7 ++ veor q4, q4, q2 @ vpxor %xmm2, %xmm4, %xmm4 # 4 = jak = 1/j + a/k ++ vtbl.8 d4, {q10}, d8 @ vpshufb %xmm4, %xmm10, %xmm2 # 3 = 1/jak ++ vtbl.8 d5, {q10}, d9 ++ veor q3, q3, q1 @ vpxor %xmm1, %xmm3, %xmm3 # 2 = io ++ veor q2, q2, q0 @ vpxor %xmm0, %xmm2, %xmm2 # 3 = jo ++ vtbl.8 d8, {q15}, d6 @ vpshufb %xmm3, %xmm13, %xmm4 # 4 = sbou ++ vtbl.8 d9, {q15}, d7 ++ vtbl.8 d2, {q14}, d4 @ vpshufb %xmm2, %xmm12, %xmm1 # 0 = sb1t ++ vtbl.8 d3, {q14}, d5 ++ veor q1, q1, q4 @ vpxor %xmm4, %xmm1, %xmm1 # 0 = sbox output ++ ++ @ add in smeared stuff ++ veor q0, q1, q7 @ vpxor %xmm7, %xmm1, %xmm0 ++ veor q7, q1, q7 @ vmovdqa %xmm0, %xmm7 ++ bx lr ++ ++ ++@@ ++@@ .aes_schedule_transform ++@@ ++@@ Linear-transform q0 according to tables at [r11] ++@@ ++@@ Requires that q9 = 0x0F0F... as in preheat ++@@ Output in q0 ++@@ Clobbers q1, q2, q14, q15 ++@@ ++#ifdef __thumb2__ ++.thumb_func _vpaes_schedule_transform ++#endif ++.align 4 ++_vpaes_schedule_transform: ++ vld1.64 {q14,q15}, [r11] @ vmovdqa (%r11), %xmm2 # lo ++ @ vmovdqa 16(%r11), %xmm1 # hi ++ vand q1, q0, q9 @ vpand %xmm9, %xmm0, %xmm1 ++ vshr.u8 q0, q0, #4 @ vpsrlb $4, %xmm0, %xmm0 ++ vtbl.8 d4, {q14}, d2 @ vpshufb %xmm1, %xmm2, %xmm2 ++ vtbl.8 d5, {q14}, d3 ++ vtbl.8 d0, {q15}, d0 @ vpshufb %xmm0, %xmm1, %xmm0 ++ vtbl.8 d1, {q15}, d1 ++ veor q0, q0, q2 @ vpxor %xmm2, %xmm0, %xmm0 ++ bx lr ++ ++ ++@@ ++@@ .aes_schedule_mangle ++@@ ++@@ Mangles q0 from (basis-transformed) standard version ++@@ to our version. ++@@ ++@@ On encrypt, ++@@ xor with 0x63 ++@@ multiply by circulant 0,1,1,1 ++@@ apply shiftrows transform ++@@ ++@@ On decrypt, ++@@ xor with 0x63 ++@@ multiply by "inverse mixcolumns" circulant E,B,D,9 ++@@ deskew ++@@ apply shiftrows transform ++@@ ++@@ ++@@ Writes out to [r2], and increments or decrements it ++@@ Keeps track of round number mod 4 in r8 ++@@ Preserves q0 ++@@ Clobbers q1-q5 ++@@ ++#ifdef __thumb2__ ++.thumb_func _vpaes_schedule_mangle ++#endif ++.align 4 ++_vpaes_schedule_mangle: ++ tst r3, r3 ++ vmov q4, q0 @ vmovdqa %xmm0, %xmm4 # save xmm0 for later ++ adr r11, Lk_mc_forward @ Must be aligned to 8 mod 16. ++ vld1.64 {q5}, [r11] @ vmovdqa Lk_mc_forward(%rip),%xmm5 ++ bne Lschedule_mangle_dec ++ ++ @ encrypting ++ @ Write to q2 so we do not overlap table and destination below. ++ veor q2, q0, q12 @ vpxor Lk_s63(%rip), %xmm0, %xmm4 ++ add r2, r2, #16 @ add $16, %rdx ++ vtbl.8 d8, {q2}, d10 @ vpshufb %xmm5, %xmm4, %xmm4 ++ vtbl.8 d9, {q2}, d11 ++ vtbl.8 d2, {q4}, d10 @ vpshufb %xmm5, %xmm4, %xmm1 ++ vtbl.8 d3, {q4}, d11 ++ vtbl.8 d6, {q1}, d10 @ vpshufb %xmm5, %xmm1, %xmm3 ++ vtbl.8 d7, {q1}, d11 ++ veor q4, q4, q1 @ vpxor %xmm1, %xmm4, %xmm4 ++ vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10), %xmm1 ++ veor q3, q3, q4 @ vpxor %xmm4, %xmm3, %xmm3 ++ ++ b Lschedule_mangle_both ++.align 4 ++Lschedule_mangle_dec: ++ @ inverse mix columns ++ adr r11, Lk_dksd @ lea Lk_dksd(%rip),%r11 ++ vshr.u8 q1, q4, #4 @ vpsrlb $4, %xmm4, %xmm1 # 1 = hi ++ vand q4, q4, q9 @ vpand %xmm9, %xmm4, %xmm4 # 4 = lo ++ ++ vld1.64 {q14,q15}, [r11]! @ vmovdqa 0x00(%r11), %xmm2 ++ @ vmovdqa 0x10(%r11), %xmm3 ++ vtbl.8 d4, {q14}, d8 @ vpshufb %xmm4, %xmm2, %xmm2 ++ vtbl.8 d5, {q14}, d9 ++ vtbl.8 d6, {q15}, d2 @ vpshufb %xmm1, %xmm3, %xmm3 ++ vtbl.8 d7, {q15}, d3 ++ @ Load .Lk_dksb ahead of time. ++ vld1.64 {q14,q15}, [r11]! @ vmovdqa 0x20(%r11), %xmm2 ++ @ vmovdqa 0x30(%r11), %xmm3 ++ @ Write to q13 so we do not overlap table and destination. ++ veor q13, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 ++ vtbl.8 d6, {q13}, d10 @ vpshufb %xmm5, %xmm3, %xmm3 ++ vtbl.8 d7, {q13}, d11 ++ ++ vtbl.8 d4, {q14}, d8 @ vpshufb %xmm4, %xmm2, %xmm2 ++ vtbl.8 d5, {q14}, d9 ++ veor q2, q2, q3 @ vpxor %xmm3, %xmm2, %xmm2 ++ vtbl.8 d6, {q15}, d2 @ vpshufb %xmm1, %xmm3, %xmm3 ++ vtbl.8 d7, {q15}, d3 ++ @ Load .Lk_dkse ahead of time. ++ vld1.64 {q14,q15}, [r11]! @ vmovdqa 0x40(%r11), %xmm2 ++ @ vmovdqa 0x50(%r11), %xmm3 ++ @ Write to q13 so we do not overlap table and destination. ++ veor q13, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 ++ vtbl.8 d6, {q13}, d10 @ vpshufb %xmm5, %xmm3, %xmm3 ++ vtbl.8 d7, {q13}, d11 ++ ++ vtbl.8 d4, {q14}, d8 @ vpshufb %xmm4, %xmm2, %xmm2 ++ vtbl.8 d5, {q14}, d9 ++ veor q2, q2, q3 @ vpxor %xmm3, %xmm2, %xmm2 ++ vtbl.8 d6, {q15}, d2 @ vpshufb %xmm1, %xmm3, %xmm3 ++ vtbl.8 d7, {q15}, d3 ++ @ Load .Lk_dkse ahead of time. ++ vld1.64 {q14,q15}, [r11]! @ vmovdqa 0x60(%r11), %xmm2 ++ @ vmovdqa 0x70(%r11), %xmm4 ++ @ Write to q13 so we do not overlap table and destination. ++ veor q13, q3, q2 @ vpxor %xmm2, %xmm3, %xmm3 ++ ++ vtbl.8 d4, {q14}, d8 @ vpshufb %xmm4, %xmm2, %xmm2 ++ vtbl.8 d5, {q14}, d9 ++ vtbl.8 d6, {q13}, d10 @ vpshufb %xmm5, %xmm3, %xmm3 ++ vtbl.8 d7, {q13}, d11 ++ vtbl.8 d8, {q15}, d2 @ vpshufb %xmm1, %xmm4, %xmm4 ++ vtbl.8 d9, {q15}, d3 ++ vld1.64 {q1}, [r8] @ vmovdqa (%r8,%r10), %xmm1 ++ veor q2, q2, q3 @ vpxor %xmm3, %xmm2, %xmm2 ++ veor q3, q4, q2 @ vpxor %xmm2, %xmm4, %xmm3 ++ ++ sub r2, r2, #16 @ add $-16, %rdx ++ ++Lschedule_mangle_both: ++ @ Write to q2 so table and destination do not overlap. ++ vtbl.8 d4, {q3}, d2 @ vpshufb %xmm1, %xmm3, %xmm3 ++ vtbl.8 d5, {q3}, d3 ++ add r8, r8, #64-16 @ add $-16, %r8 ++ and r8, r8, #~(1<<6) @ and $0x30, %r8 ++ vst1.64 {q2}, [r2] @ vmovdqu %xmm3, (%rdx) ++ bx lr ++ ++ ++.globl _vpaes_set_encrypt_key ++.private_extern _vpaes_set_encrypt_key ++#ifdef __thumb2__ ++.thumb_func _vpaes_set_encrypt_key ++#endif ++.align 4 ++_vpaes_set_encrypt_key: ++ stmdb sp!, {r7,r8,r9,r10,r11, lr} ++ vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} ++ ++ lsr r9, r1, #5 @ shr $5,%eax ++ add r9, r9, #5 @ $5,%eax ++ str r9, [r2,#240] @ mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ ++ mov r3, #0 @ mov $0,%ecx ++ mov r8, #0x30 @ mov $0x30,%r8d ++ bl _vpaes_schedule_core ++ eor r0, r0, r0 ++ ++ vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} ++ ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return ++ ++ ++.globl _vpaes_set_decrypt_key ++.private_extern _vpaes_set_decrypt_key ++#ifdef __thumb2__ ++.thumb_func _vpaes_set_decrypt_key ++#endif ++.align 4 ++_vpaes_set_decrypt_key: ++ stmdb sp!, {r7,r8,r9,r10,r11, lr} ++ vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} ++ ++ lsr r9, r1, #5 @ shr $5,%eax ++ add r9, r9, #5 @ $5,%eax ++ str r9, [r2,#240] @ mov %eax,240(%rdx) # AES_KEY->rounds = nbits/32+5; ++ lsl r9, r9, #4 @ shl $4,%eax ++ add r2, r2, #16 @ lea 16(%rdx,%rax),%rdx ++ add r2, r2, r9 ++ ++ mov r3, #1 @ mov $1,%ecx ++ lsr r8, r1, #1 @ shr $1,%r8d ++ and r8, r8, #32 @ and $32,%r8d ++ eor r8, r8, #32 @ xor $32,%r8d # nbits==192?0:32 ++ bl _vpaes_schedule_core ++ ++ vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} ++ ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return ++ ++ ++@ Additional constants for converting to bsaes. ++ ++.align 4 ++_vpaes_convert_consts: ++@ .Lk_opt_then_skew applies skew(opt(x)) XOR 0x63, where skew is the linear ++@ transform in the AES S-box. 0x63 is incorporated into the low half of the ++@ table. This was computed with the following script: ++@ ++@ def u64s_to_u128(x, y): ++@ return x | (y << 64) ++@ def u128_to_u64s(w): ++@ return w & ((1<<64)-1), w >> 64 ++@ def get_byte(w, i): ++@ return (w >> (i*8)) & 0xff ++@ def apply_table(table, b): ++@ lo = b & 0xf ++@ hi = b >> 4 ++@ return get_byte(table[0], lo) ^ get_byte(table[1], hi) ++@ def opt(b): ++@ table = [ ++@ u64s_to_u128(0xFF9F4929D6B66000, 0xF7974121DEBE6808), ++@ u64s_to_u128(0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0), ++@ ] ++@ return apply_table(table, b) ++@ def rot_byte(b, n): ++@ return 0xff & ((b << n) | (b >> (8-n))) ++@ def skew(x): ++@ return (x ^ rot_byte(x, 1) ^ rot_byte(x, 2) ^ rot_byte(x, 3) ^ ++@ rot_byte(x, 4)) ++@ table = [0, 0] ++@ for i in range(16): ++@ table[0] |= (skew(opt(i)) ^ 0x63) << (i*8) ++@ table[1] |= skew(opt(i<<4)) << (i*8) ++@ print(" .quad 0x%016x, 0x%016x" % u128_to_u64s(table[0])) ++@ print(" .quad 0x%016x, 0x%016x" % u128_to_u64s(table[1])) ++Lk_opt_then_skew: ++.quad 0x9cb8436798bc4763, 0x6440bb9f6044bf9b ++.quad 0x1f30062936192f00, 0xb49bad829db284ab ++ ++@ .Lk_decrypt_transform is a permutation which performs an 8-bit left-rotation ++@ followed by a byte-swap on each 32-bit word of a vector. E.g., 0x11223344 ++@ becomes 0x22334411 and then 0x11443322. ++Lk_decrypt_transform: ++.quad 0x0704050603000102, 0x0f0c0d0e0b08090a ++ ++ ++@ void vpaes_encrypt_key_to_bsaes(AES_KEY *bsaes, const AES_KEY *vpaes); ++.globl _vpaes_encrypt_key_to_bsaes ++.private_extern _vpaes_encrypt_key_to_bsaes ++#ifdef __thumb2__ ++.thumb_func _vpaes_encrypt_key_to_bsaes ++#endif ++.align 4 ++_vpaes_encrypt_key_to_bsaes: ++ stmdb sp!, {r11, lr} ++ ++ @ See _vpaes_schedule_core for the key schedule logic. In particular, ++ @ _vpaes_schedule_transform(.Lk_ipt) (section 2.2 of the paper), ++ @ _vpaes_schedule_mangle (section 4.3), and .Lschedule_mangle_last ++ @ contain the transformations not in the bsaes representation. This ++ @ function inverts those transforms. ++ @ ++ @ Note also that bsaes-armv7.pl expects aes-armv4.pl's key ++ @ representation, which does not match the other aes_nohw_* ++ @ implementations. The ARM aes_nohw_* stores each 32-bit word ++ @ byteswapped, as a convenience for (unsupported) big-endian ARM, at the ++ @ cost of extra REV and VREV32 operations in little-endian ARM. ++ ++ vmov.i8 q9, #0x0f @ Required by _vpaes_schedule_transform ++ adr r2, Lk_mc_forward @ Must be aligned to 8 mod 16. ++ add r3, r2, 0x90 @ Lk_sr+0x10-Lk_mc_forward = 0x90 (Apple's toolchain doesn't support the expression) ++ ++ vld1.64 {q12}, [r2] ++ vmov.i8 q10, #0x5b @ Lk_s63 from vpaes-x86_64 ++ adr r11, Lk_opt @ Must be aligned to 8 mod 16. ++ vmov.i8 q11, #0x63 @ LK_s63 without Lk_ipt applied ++ ++ @ vpaes stores one fewer round count than bsaes, but the number of keys ++ @ is the same. ++ ldr r2, [r1,#240] ++ add r2, r2, #1 ++ str r2, [r0,#240] ++ ++ @ The first key is transformed with _vpaes_schedule_transform(.Lk_ipt). ++ @ Invert this with .Lk_opt. ++ vld1.64 {q0}, [r1]! ++ bl _vpaes_schedule_transform ++ vrev32.8 q0, q0 ++ vst1.64 {q0}, [r0]! ++ ++ @ The middle keys have _vpaes_schedule_transform(.Lk_ipt) applied, ++ @ followed by _vpaes_schedule_mangle. _vpaes_schedule_mangle XORs 0x63, ++ @ multiplies by the circulant 0,1,1,1, then applies ShiftRows. ++Loop_enc_key_to_bsaes: ++ vld1.64 {q0}, [r1]! ++ ++ @ Invert the ShiftRows step (see .Lschedule_mangle_both). Note we cycle ++ @ r3 in the opposite direction and start at .Lk_sr+0x10 instead of 0x30. ++ @ We use r3 rather than r8 to avoid a callee-saved register. ++ vld1.64 {q1}, [r3] ++ vtbl.8 d4, {q0}, d2 ++ vtbl.8 d5, {q0}, d3 ++ add r3, r3, #16 ++ and r3, r3, #~(1<<6) ++ vmov q0, q2 ++ ++ @ Handle the last key differently. ++ subs r2, r2, #1 ++ beq Loop_enc_key_to_bsaes_last ++ ++ @ Multiply by the circulant. This is its own inverse. ++ vtbl.8 d2, {q0}, d24 ++ vtbl.8 d3, {q0}, d25 ++ vmov q0, q1 ++ vtbl.8 d4, {q1}, d24 ++ vtbl.8 d5, {q1}, d25 ++ veor q0, q0, q2 ++ vtbl.8 d2, {q2}, d24 ++ vtbl.8 d3, {q2}, d25 ++ veor q0, q0, q1 ++ ++ @ XOR and finish. ++ veor q0, q0, q10 ++ bl _vpaes_schedule_transform ++ vrev32.8 q0, q0 ++ vst1.64 {q0}, [r0]! ++ b Loop_enc_key_to_bsaes ++ ++Loop_enc_key_to_bsaes_last: ++ @ The final key does not have a basis transform (note ++ @ .Lschedule_mangle_last inverts the original transform). It only XORs ++ @ 0x63 and applies ShiftRows. The latter was already inverted in the ++ @ loop. Note that, because we act on the original representation, we use ++ @ q11, not q10. ++ veor q0, q0, q11 ++ vrev32.8 q0, q0 ++ vst1.64 {q0}, [r0] ++ ++ @ Wipe registers which contained key material. ++ veor q0, q0, q0 ++ veor q1, q1, q1 ++ veor q2, q2, q2 ++ ++ ldmia sp!, {r11, pc} @ return ++ ++ ++@ void vpaes_decrypt_key_to_bsaes(AES_KEY *vpaes, const AES_KEY *bsaes); ++.globl _vpaes_decrypt_key_to_bsaes ++.private_extern _vpaes_decrypt_key_to_bsaes ++#ifdef __thumb2__ ++.thumb_func _vpaes_decrypt_key_to_bsaes ++#endif ++.align 4 ++_vpaes_decrypt_key_to_bsaes: ++ stmdb sp!, {r11, lr} ++ ++ @ See _vpaes_schedule_core for the key schedule logic. Note vpaes ++ @ computes the decryption key schedule in reverse. Additionally, ++ @ aes-x86_64.pl shares some transformations, so we must only partially ++ @ invert vpaes's transformations. In general, vpaes computes in a ++ @ different basis (.Lk_ipt and .Lk_opt) and applies the inverses of ++ @ MixColumns, ShiftRows, and the affine part of the AES S-box (which is ++ @ split into a linear skew and XOR of 0x63). We undo all but MixColumns. ++ @ ++ @ Note also that bsaes-armv7.pl expects aes-armv4.pl's key ++ @ representation, which does not match the other aes_nohw_* ++ @ implementations. The ARM aes_nohw_* stores each 32-bit word ++ @ byteswapped, as a convenience for (unsupported) big-endian ARM, at the ++ @ cost of extra REV and VREV32 operations in little-endian ARM. ++ ++ adr r2, Lk_decrypt_transform ++ adr r3, Lk_sr+0x30 ++ adr r11, Lk_opt_then_skew @ Input to _vpaes_schedule_transform. ++ vld1.64 {q12}, [r2] @ Reuse q12 from encryption. ++ vmov.i8 q9, #0x0f @ Required by _vpaes_schedule_transform ++ ++ @ vpaes stores one fewer round count than bsaes, but the number of keys ++ @ is the same. ++ ldr r2, [r1,#240] ++ add r2, r2, #1 ++ str r2, [r0,#240] ++ ++ @ Undo the basis change and reapply the S-box affine transform. See ++ @ .Lschedule_mangle_last. ++ vld1.64 {q0}, [r1]! ++ bl _vpaes_schedule_transform ++ vrev32.8 q0, q0 ++ vst1.64 {q0}, [r0]! ++ ++ @ See _vpaes_schedule_mangle for the transform on the middle keys. Note ++ @ it simultaneously inverts MixColumns and the S-box affine transform. ++ @ See .Lk_dksd through .Lk_dks9. ++Loop_dec_key_to_bsaes: ++ vld1.64 {q0}, [r1]! ++ ++ @ Invert the ShiftRows step (see .Lschedule_mangle_both). Note going ++ @ forwards cancels inverting for which direction we cycle r3. We use r3 ++ @ rather than r8 to avoid a callee-saved register. ++ vld1.64 {q1}, [r3] ++ vtbl.8 d4, {q0}, d2 ++ vtbl.8 d5, {q0}, d3 ++ add r3, r3, #64-16 ++ and r3, r3, #~(1<<6) ++ vmov q0, q2 ++ ++ @ Handle the last key differently. ++ subs r2, r2, #1 ++ beq Loop_dec_key_to_bsaes_last ++ ++ @ Undo the basis change and reapply the S-box affine transform. ++ bl _vpaes_schedule_transform ++ ++ @ Rotate each word by 8 bytes (cycle the rows) and then byte-swap. We ++ @ combine the two operations in .Lk_decrypt_transform. ++ @ ++ @ TODO(davidben): Where does the rotation come from? ++ vtbl.8 d2, {q0}, d24 ++ vtbl.8 d3, {q0}, d25 ++ ++ vst1.64 {q1}, [r0]! ++ b Loop_dec_key_to_bsaes ++ ++Loop_dec_key_to_bsaes_last: ++ @ The final key only inverts ShiftRows (already done in the loop). See ++ @ .Lschedule_am_decrypting. Its basis is not transformed. ++ vrev32.8 q0, q0 ++ vst1.64 {q0}, [r0]! ++ ++ @ Wipe registers which contained key material. ++ veor q0, q0, q0 ++ veor q1, q1, q1 ++ veor q2, q2, q2 ++ ++ ldmia sp!, {r11, pc} @ return ++ ++.globl _vpaes_ctr32_encrypt_blocks ++.private_extern _vpaes_ctr32_encrypt_blocks ++#ifdef __thumb2__ ++.thumb_func _vpaes_ctr32_encrypt_blocks ++#endif ++.align 4 ++_vpaes_ctr32_encrypt_blocks: ++ mov ip, sp ++ stmdb sp!, {r7,r8,r9,r10,r11, lr} ++ @ This function uses q4-q7 (d8-d15), which are callee-saved. ++ vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} ++ ++ cmp r2, #0 ++ @ r8 is passed on the stack. ++ ldr r8, [ip] ++ beq Lctr32_done ++ ++ @ _vpaes_encrypt_core expects the key in r2, so swap r2 and r3. ++ mov r9, r3 ++ mov r3, r2 ++ mov r2, r9 ++ ++ @ Load the IV and counter portion. ++ ldr r7, [r8, #12] ++ vld1.8 {q7}, [r8] ++ ++ bl _vpaes_preheat ++ rev r7, r7 @ The counter is big-endian. ++ ++Lctr32_loop: ++ vmov q0, q7 ++ vld1.8 {q6}, [r0]! @ Load input ahead of time ++ bl _vpaes_encrypt_core ++ veor q0, q0, q6 @ XOR input and result ++ vst1.8 {q0}, [r1]! ++ subs r3, r3, #1 ++ @ Update the counter. ++ add r7, r7, #1 ++ rev r9, r7 ++ vmov.32 d15[1], r9 ++ bne Lctr32_loop ++ ++Lctr32_done: ++ vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} ++ ldmia sp!, {r7,r8,r9,r10,r11, pc} @ return ++ ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-arm/crypto/test/trampoline-armv4.S b/apple-arm/crypto/test/trampoline-armv4.S +new file mode 100644 +index 0000000..9d74f55 +--- /dev/null ++++ b/apple-arm/crypto/test/trampoline-armv4.S +@@ -0,0 +1,376 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if !defined(__has_feature) ++#define __has_feature(x) 0 ++#endif ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++ ++#if !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.syntax unified ++ ++ ++ ++ ++.text ++ ++@ abi_test_trampoline loads callee-saved registers from |state|, calls |func| ++@ with |argv|, then saves the callee-saved registers into |state|. It returns ++@ the result of |func|. The |unwind| argument is unused. ++@ uint32_t abi_test_trampoline(void (*func)(...), CallerState *state, ++@ const uint32_t *argv, size_t argc, ++@ int unwind); ++ ++.globl _abi_test_trampoline ++.private_extern _abi_test_trampoline ++.align 4 ++_abi_test_trampoline: ++ @ Save parameters and all callee-saved registers. For convenience, we ++ @ save r9 on iOS even though it's volatile. ++ vstmdb sp!, {d8,d9,d10,d11,d12,d13,d14,d15} ++ stmdb sp!, {r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,lr} ++ ++ @ Reserve stack space for six (10-4) stack parameters, plus an extra 4 ++ @ bytes to keep it 8-byte-aligned (see AAPCS, section 5.3). ++ sub sp, sp, #28 ++ ++ @ Every register in AAPCS is either non-volatile or a parameter (except ++ @ r9 on iOS), so this code, by the actual call, loses all its scratch ++ @ registers. First fill in stack parameters while there are registers ++ @ to spare. ++ cmp r3, #4 ++ bls Lstack_args_done ++ mov r4, sp @ r4 is the output pointer. ++ add r5, r2, r3, lsl #2 @ Set r5 to the end of argv. ++ add r2, r2, #16 @ Skip four arguments. ++Lstack_args_loop: ++ ldr r6, [r2], #4 ++ cmp r2, r5 ++ str r6, [r4], #4 ++ bne Lstack_args_loop ++ ++Lstack_args_done: ++ @ Load registers from |r1|. ++ vldmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15} ++#if defined(__APPLE__) ++ @ r9 is not volatile on iOS. ++ ldmia r1!, {r4,r5,r6,r7,r8,r10-r11} ++#else ++ ldmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11} ++#endif ++ ++ @ Load register parameters. This uses up our remaining registers, so we ++ @ repurpose lr as scratch space. ++ ldr r3, [sp, #40] @ Reload argc. ++ ldr lr, [sp, #36] @ Load argv into lr. ++ cmp r3, #3 ++ bhi Larg_r3 ++ beq Larg_r2 ++ cmp r3, #1 ++ bhi Larg_r1 ++ beq Larg_r0 ++ b Largs_done ++ ++Larg_r3: ++ ldr r3, [lr, #12] @ argv[3] ++Larg_r2: ++ ldr r2, [lr, #8] @ argv[2] ++Larg_r1: ++ ldr r1, [lr, #4] @ argv[1] ++Larg_r0: ++ ldr r0, [lr] @ argv[0] ++Largs_done: ++ ++ @ With every other register in use, load the function pointer into lr ++ @ and call the function. ++ ldr lr, [sp, #28] ++ blx lr ++ ++ @ r1-r3 are free for use again. The trampoline only supports ++ @ single-return functions. Pass r4-r11 to the caller. ++ ldr r1, [sp, #32] ++ vstmia r1!, {d8,d9,d10,d11,d12,d13,d14,d15} ++#if defined(__APPLE__) ++ @ r9 is not volatile on iOS. ++ stmia r1!, {r4,r5,r6,r7,r8,r10-r11} ++#else ++ stmia r1!, {r4,r5,r6,r7,r8,r9,r10,r11} ++#endif ++ ++ @ Unwind the stack and restore registers. ++ add sp, sp, #44 @ 44 = 28+16 ++ ldmia sp!, {r4,r5,r6,r7,r8,r9,r10,r11,lr} @ Skip r0-r3 (see +16 above). ++ vldmia sp!, {d8,d9,d10,d11,d12,d13,d14,d15} ++ ++ bx lr ++ ++ ++.globl _abi_test_clobber_r0 ++.private_extern _abi_test_clobber_r0 ++.align 4 ++_abi_test_clobber_r0: ++ mov r0, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r1 ++.private_extern _abi_test_clobber_r1 ++.align 4 ++_abi_test_clobber_r1: ++ mov r1, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r2 ++.private_extern _abi_test_clobber_r2 ++.align 4 ++_abi_test_clobber_r2: ++ mov r2, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r3 ++.private_extern _abi_test_clobber_r3 ++.align 4 ++_abi_test_clobber_r3: ++ mov r3, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r4 ++.private_extern _abi_test_clobber_r4 ++.align 4 ++_abi_test_clobber_r4: ++ mov r4, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r5 ++.private_extern _abi_test_clobber_r5 ++.align 4 ++_abi_test_clobber_r5: ++ mov r5, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r6 ++.private_extern _abi_test_clobber_r6 ++.align 4 ++_abi_test_clobber_r6: ++ mov r6, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r7 ++.private_extern _abi_test_clobber_r7 ++.align 4 ++_abi_test_clobber_r7: ++ mov r7, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r8 ++.private_extern _abi_test_clobber_r8 ++.align 4 ++_abi_test_clobber_r8: ++ mov r8, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r9 ++.private_extern _abi_test_clobber_r9 ++.align 4 ++_abi_test_clobber_r9: ++ mov r9, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r10 ++.private_extern _abi_test_clobber_r10 ++.align 4 ++_abi_test_clobber_r10: ++ mov r10, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r11 ++.private_extern _abi_test_clobber_r11 ++.align 4 ++_abi_test_clobber_r11: ++ mov r11, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_r12 ++.private_extern _abi_test_clobber_r12 ++.align 4 ++_abi_test_clobber_r12: ++ mov r12, #0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d0 ++.private_extern _abi_test_clobber_d0 ++.align 4 ++_abi_test_clobber_d0: ++ mov r0, #0 ++ vmov s0, r0 ++ vmov s1, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d1 ++.private_extern _abi_test_clobber_d1 ++.align 4 ++_abi_test_clobber_d1: ++ mov r0, #0 ++ vmov s2, r0 ++ vmov s3, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d2 ++.private_extern _abi_test_clobber_d2 ++.align 4 ++_abi_test_clobber_d2: ++ mov r0, #0 ++ vmov s4, r0 ++ vmov s5, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d3 ++.private_extern _abi_test_clobber_d3 ++.align 4 ++_abi_test_clobber_d3: ++ mov r0, #0 ++ vmov s6, r0 ++ vmov s7, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d4 ++.private_extern _abi_test_clobber_d4 ++.align 4 ++_abi_test_clobber_d4: ++ mov r0, #0 ++ vmov s8, r0 ++ vmov s9, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d5 ++.private_extern _abi_test_clobber_d5 ++.align 4 ++_abi_test_clobber_d5: ++ mov r0, #0 ++ vmov s10, r0 ++ vmov s11, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d6 ++.private_extern _abi_test_clobber_d6 ++.align 4 ++_abi_test_clobber_d6: ++ mov r0, #0 ++ vmov s12, r0 ++ vmov s13, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d7 ++.private_extern _abi_test_clobber_d7 ++.align 4 ++_abi_test_clobber_d7: ++ mov r0, #0 ++ vmov s14, r0 ++ vmov s15, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d8 ++.private_extern _abi_test_clobber_d8 ++.align 4 ++_abi_test_clobber_d8: ++ mov r0, #0 ++ vmov s16, r0 ++ vmov s17, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d9 ++.private_extern _abi_test_clobber_d9 ++.align 4 ++_abi_test_clobber_d9: ++ mov r0, #0 ++ vmov s18, r0 ++ vmov s19, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d10 ++.private_extern _abi_test_clobber_d10 ++.align 4 ++_abi_test_clobber_d10: ++ mov r0, #0 ++ vmov s20, r0 ++ vmov s21, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d11 ++.private_extern _abi_test_clobber_d11 ++.align 4 ++_abi_test_clobber_d11: ++ mov r0, #0 ++ vmov s22, r0 ++ vmov s23, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d12 ++.private_extern _abi_test_clobber_d12 ++.align 4 ++_abi_test_clobber_d12: ++ mov r0, #0 ++ vmov s24, r0 ++ vmov s25, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d13 ++.private_extern _abi_test_clobber_d13 ++.align 4 ++_abi_test_clobber_d13: ++ mov r0, #0 ++ vmov s26, r0 ++ vmov s27, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d14 ++.private_extern _abi_test_clobber_d14 ++.align 4 ++_abi_test_clobber_d14: ++ mov r0, #0 ++ vmov s28, r0 ++ vmov s29, r0 ++ bx lr ++ ++ ++.globl _abi_test_clobber_d15 ++.private_extern _abi_test_clobber_d15 ++.align 4 ++_abi_test_clobber_d15: ++ mov r0, #0 ++ vmov s30, r0 ++ vmov s31, r0 ++ bx lr ++ ++#endif // !OPENSSL_NO_ASM +diff --git a/apple-x86/crypto/chacha/chacha-x86.S b/apple-x86/crypto/chacha/chacha-x86.S +new file mode 100644 +index 0000000..ef535b2 +--- /dev/null ++++ b/apple-x86/crypto/chacha/chacha-x86.S +@@ -0,0 +1,974 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.globl _ChaCha20_ctr32 ++.private_extern _ChaCha20_ctr32 ++.align 4 ++_ChaCha20_ctr32: ++L_ChaCha20_ctr32_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ xorl %eax,%eax ++ cmpl 28(%esp),%eax ++ je L000no_data ++ call Lpic_point ++Lpic_point: ++ popl %eax ++ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp ++ testl $16777216,(%ebp) ++ jz L001x86 ++ testl $512,4(%ebp) ++ jz L001x86 ++ jmp Lssse3_shortcut ++L001x86: ++ movl 32(%esp),%esi ++ movl 36(%esp),%edi ++ subl $132,%esp ++ movl (%esi),%eax ++ movl 4(%esi),%ebx ++ movl 8(%esi),%ecx ++ movl 12(%esi),%edx ++ movl %eax,80(%esp) ++ movl %ebx,84(%esp) ++ movl %ecx,88(%esp) ++ movl %edx,92(%esp) ++ movl 16(%esi),%eax ++ movl 20(%esi),%ebx ++ movl 24(%esi),%ecx ++ movl 28(%esi),%edx ++ movl %eax,96(%esp) ++ movl %ebx,100(%esp) ++ movl %ecx,104(%esp) ++ movl %edx,108(%esp) ++ movl (%edi),%eax ++ movl 4(%edi),%ebx ++ movl 8(%edi),%ecx ++ movl 12(%edi),%edx ++ subl $1,%eax ++ movl %eax,112(%esp) ++ movl %ebx,116(%esp) ++ movl %ecx,120(%esp) ++ movl %edx,124(%esp) ++ jmp L002entry ++.align 4,0x90 ++L003outer_loop: ++ movl %ebx,156(%esp) ++ movl %eax,152(%esp) ++ movl %ecx,160(%esp) ++L002entry: ++ movl $1634760805,%eax ++ movl $857760878,4(%esp) ++ movl $2036477234,8(%esp) ++ movl $1797285236,12(%esp) ++ movl 84(%esp),%ebx ++ movl 88(%esp),%ebp ++ movl 104(%esp),%ecx ++ movl 108(%esp),%esi ++ movl 116(%esp),%edx ++ movl 120(%esp),%edi ++ movl %ebx,20(%esp) ++ movl %ebp,24(%esp) ++ movl %ecx,40(%esp) ++ movl %esi,44(%esp) ++ movl %edx,52(%esp) ++ movl %edi,56(%esp) ++ movl 92(%esp),%ebx ++ movl 124(%esp),%edi ++ movl 112(%esp),%edx ++ movl 80(%esp),%ebp ++ movl 96(%esp),%ecx ++ movl 100(%esp),%esi ++ addl $1,%edx ++ movl %ebx,28(%esp) ++ movl %edi,60(%esp) ++ movl %edx,112(%esp) ++ movl $10,%ebx ++ jmp L004loop ++.align 4,0x90 ++L004loop: ++ addl %ebp,%eax ++ movl %ebx,128(%esp) ++ movl %ebp,%ebx ++ xorl %eax,%edx ++ roll $16,%edx ++ addl %edx,%ecx ++ xorl %ecx,%ebx ++ movl 52(%esp),%edi ++ roll $12,%ebx ++ movl 20(%esp),%ebp ++ addl %ebx,%eax ++ xorl %eax,%edx ++ movl %eax,(%esp) ++ roll $8,%edx ++ movl 4(%esp),%eax ++ addl %edx,%ecx ++ movl %edx,48(%esp) ++ xorl %ecx,%ebx ++ addl %ebp,%eax ++ roll $7,%ebx ++ xorl %eax,%edi ++ movl %ecx,32(%esp) ++ roll $16,%edi ++ movl %ebx,16(%esp) ++ addl %edi,%esi ++ movl 40(%esp),%ecx ++ xorl %esi,%ebp ++ movl 56(%esp),%edx ++ roll $12,%ebp ++ movl 24(%esp),%ebx ++ addl %ebp,%eax ++ xorl %eax,%edi ++ movl %eax,4(%esp) ++ roll $8,%edi ++ movl 8(%esp),%eax ++ addl %edi,%esi ++ movl %edi,52(%esp) ++ xorl %esi,%ebp ++ addl %ebx,%eax ++ roll $7,%ebp ++ xorl %eax,%edx ++ movl %esi,36(%esp) ++ roll $16,%edx ++ movl %ebp,20(%esp) ++ addl %edx,%ecx ++ movl 44(%esp),%esi ++ xorl %ecx,%ebx ++ movl 60(%esp),%edi ++ roll $12,%ebx ++ movl 28(%esp),%ebp ++ addl %ebx,%eax ++ xorl %eax,%edx ++ movl %eax,8(%esp) ++ roll $8,%edx ++ movl 12(%esp),%eax ++ addl %edx,%ecx ++ movl %edx,56(%esp) ++ xorl %ecx,%ebx ++ addl %ebp,%eax ++ roll $7,%ebx ++ xorl %eax,%edi ++ roll $16,%edi ++ movl %ebx,24(%esp) ++ addl %edi,%esi ++ xorl %esi,%ebp ++ roll $12,%ebp ++ movl 20(%esp),%ebx ++ addl %ebp,%eax ++ xorl %eax,%edi ++ movl %eax,12(%esp) ++ roll $8,%edi ++ movl (%esp),%eax ++ addl %edi,%esi ++ movl %edi,%edx ++ xorl %esi,%ebp ++ addl %ebx,%eax ++ roll $7,%ebp ++ xorl %eax,%edx ++ roll $16,%edx ++ movl %ebp,28(%esp) ++ addl %edx,%ecx ++ xorl %ecx,%ebx ++ movl 48(%esp),%edi ++ roll $12,%ebx ++ movl 24(%esp),%ebp ++ addl %ebx,%eax ++ xorl %eax,%edx ++ movl %eax,(%esp) ++ roll $8,%edx ++ movl 4(%esp),%eax ++ addl %edx,%ecx ++ movl %edx,60(%esp) ++ xorl %ecx,%ebx ++ addl %ebp,%eax ++ roll $7,%ebx ++ xorl %eax,%edi ++ movl %ecx,40(%esp) ++ roll $16,%edi ++ movl %ebx,20(%esp) ++ addl %edi,%esi ++ movl 32(%esp),%ecx ++ xorl %esi,%ebp ++ movl 52(%esp),%edx ++ roll $12,%ebp ++ movl 28(%esp),%ebx ++ addl %ebp,%eax ++ xorl %eax,%edi ++ movl %eax,4(%esp) ++ roll $8,%edi ++ movl 8(%esp),%eax ++ addl %edi,%esi ++ movl %edi,48(%esp) ++ xorl %esi,%ebp ++ addl %ebx,%eax ++ roll $7,%ebp ++ xorl %eax,%edx ++ movl %esi,44(%esp) ++ roll $16,%edx ++ movl %ebp,24(%esp) ++ addl %edx,%ecx ++ movl 36(%esp),%esi ++ xorl %ecx,%ebx ++ movl 56(%esp),%edi ++ roll $12,%ebx ++ movl 16(%esp),%ebp ++ addl %ebx,%eax ++ xorl %eax,%edx ++ movl %eax,8(%esp) ++ roll $8,%edx ++ movl 12(%esp),%eax ++ addl %edx,%ecx ++ movl %edx,52(%esp) ++ xorl %ecx,%ebx ++ addl %ebp,%eax ++ roll $7,%ebx ++ xorl %eax,%edi ++ roll $16,%edi ++ movl %ebx,28(%esp) ++ addl %edi,%esi ++ xorl %esi,%ebp ++ movl 48(%esp),%edx ++ roll $12,%ebp ++ movl 128(%esp),%ebx ++ addl %ebp,%eax ++ xorl %eax,%edi ++ movl %eax,12(%esp) ++ roll $8,%edi ++ movl (%esp),%eax ++ addl %edi,%esi ++ movl %edi,56(%esp) ++ xorl %esi,%ebp ++ roll $7,%ebp ++ decl %ebx ++ jnz L004loop ++ movl 160(%esp),%ebx ++ addl $1634760805,%eax ++ addl 80(%esp),%ebp ++ addl 96(%esp),%ecx ++ addl 100(%esp),%esi ++ cmpl $64,%ebx ++ jb L005tail ++ movl 156(%esp),%ebx ++ addl 112(%esp),%edx ++ addl 120(%esp),%edi ++ xorl (%ebx),%eax ++ xorl 16(%ebx),%ebp ++ movl %eax,(%esp) ++ movl 152(%esp),%eax ++ xorl 32(%ebx),%ecx ++ xorl 36(%ebx),%esi ++ xorl 48(%ebx),%edx ++ xorl 56(%ebx),%edi ++ movl %ebp,16(%eax) ++ movl %ecx,32(%eax) ++ movl %esi,36(%eax) ++ movl %edx,48(%eax) ++ movl %edi,56(%eax) ++ movl 4(%esp),%ebp ++ movl 8(%esp),%ecx ++ movl 12(%esp),%esi ++ movl 20(%esp),%edx ++ movl 24(%esp),%edi ++ addl $857760878,%ebp ++ addl $2036477234,%ecx ++ addl $1797285236,%esi ++ addl 84(%esp),%edx ++ addl 88(%esp),%edi ++ xorl 4(%ebx),%ebp ++ xorl 8(%ebx),%ecx ++ xorl 12(%ebx),%esi ++ xorl 20(%ebx),%edx ++ xorl 24(%ebx),%edi ++ movl %ebp,4(%eax) ++ movl %ecx,8(%eax) ++ movl %esi,12(%eax) ++ movl %edx,20(%eax) ++ movl %edi,24(%eax) ++ movl 28(%esp),%ebp ++ movl 40(%esp),%ecx ++ movl 44(%esp),%esi ++ movl 52(%esp),%edx ++ movl 60(%esp),%edi ++ addl 92(%esp),%ebp ++ addl 104(%esp),%ecx ++ addl 108(%esp),%esi ++ addl 116(%esp),%edx ++ addl 124(%esp),%edi ++ xorl 28(%ebx),%ebp ++ xorl 40(%ebx),%ecx ++ xorl 44(%ebx),%esi ++ xorl 52(%ebx),%edx ++ xorl 60(%ebx),%edi ++ leal 64(%ebx),%ebx ++ movl %ebp,28(%eax) ++ movl (%esp),%ebp ++ movl %ecx,40(%eax) ++ movl 160(%esp),%ecx ++ movl %esi,44(%eax) ++ movl %edx,52(%eax) ++ movl %edi,60(%eax) ++ movl %ebp,(%eax) ++ leal 64(%eax),%eax ++ subl $64,%ecx ++ jnz L003outer_loop ++ jmp L006done ++L005tail: ++ addl 112(%esp),%edx ++ addl 120(%esp),%edi ++ movl %eax,(%esp) ++ movl %ebp,16(%esp) ++ movl %ecx,32(%esp) ++ movl %esi,36(%esp) ++ movl %edx,48(%esp) ++ movl %edi,56(%esp) ++ movl 4(%esp),%ebp ++ movl 8(%esp),%ecx ++ movl 12(%esp),%esi ++ movl 20(%esp),%edx ++ movl 24(%esp),%edi ++ addl $857760878,%ebp ++ addl $2036477234,%ecx ++ addl $1797285236,%esi ++ addl 84(%esp),%edx ++ addl 88(%esp),%edi ++ movl %ebp,4(%esp) ++ movl %ecx,8(%esp) ++ movl %esi,12(%esp) ++ movl %edx,20(%esp) ++ movl %edi,24(%esp) ++ movl 28(%esp),%ebp ++ movl 40(%esp),%ecx ++ movl 44(%esp),%esi ++ movl 52(%esp),%edx ++ movl 60(%esp),%edi ++ addl 92(%esp),%ebp ++ addl 104(%esp),%ecx ++ addl 108(%esp),%esi ++ addl 116(%esp),%edx ++ addl 124(%esp),%edi ++ movl %ebp,28(%esp) ++ movl 156(%esp),%ebp ++ movl %ecx,40(%esp) ++ movl 152(%esp),%ecx ++ movl %esi,44(%esp) ++ xorl %esi,%esi ++ movl %edx,52(%esp) ++ movl %edi,60(%esp) ++ xorl %eax,%eax ++ xorl %edx,%edx ++L007tail_loop: ++ movb (%esi,%ebp,1),%al ++ movb (%esp,%esi,1),%dl ++ leal 1(%esi),%esi ++ xorb %dl,%al ++ movb %al,-1(%ecx,%esi,1) ++ decl %ebx ++ jnz L007tail_loop ++L006done: ++ addl $132,%esp ++L000no_data: ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _ChaCha20_ssse3 ++.private_extern _ChaCha20_ssse3 ++.align 4 ++_ChaCha20_ssse3: ++L_ChaCha20_ssse3_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++Lssse3_shortcut: ++ movl 20(%esp),%edi ++ movl 24(%esp),%esi ++ movl 28(%esp),%ecx ++ movl 32(%esp),%edx ++ movl 36(%esp),%ebx ++ movl %esp,%ebp ++ subl $524,%esp ++ andl $-64,%esp ++ movl %ebp,512(%esp) ++ leal Lssse3_data-Lpic_point(%eax),%eax ++ movdqu (%ebx),%xmm3 ++ cmpl $256,%ecx ++ jb L0081x ++ movl %edx,516(%esp) ++ movl %ebx,520(%esp) ++ subl $256,%ecx ++ leal 384(%esp),%ebp ++ movdqu (%edx),%xmm7 ++ pshufd $0,%xmm3,%xmm0 ++ pshufd $85,%xmm3,%xmm1 ++ pshufd $170,%xmm3,%xmm2 ++ pshufd $255,%xmm3,%xmm3 ++ paddd 48(%eax),%xmm0 ++ pshufd $0,%xmm7,%xmm4 ++ pshufd $85,%xmm7,%xmm5 ++ psubd 64(%eax),%xmm0 ++ pshufd $170,%xmm7,%xmm6 ++ pshufd $255,%xmm7,%xmm7 ++ movdqa %xmm0,64(%ebp) ++ movdqa %xmm1,80(%ebp) ++ movdqa %xmm2,96(%ebp) ++ movdqa %xmm3,112(%ebp) ++ movdqu 16(%edx),%xmm3 ++ movdqa %xmm4,-64(%ebp) ++ movdqa %xmm5,-48(%ebp) ++ movdqa %xmm6,-32(%ebp) ++ movdqa %xmm7,-16(%ebp) ++ movdqa 32(%eax),%xmm7 ++ leal 128(%esp),%ebx ++ pshufd $0,%xmm3,%xmm0 ++ pshufd $85,%xmm3,%xmm1 ++ pshufd $170,%xmm3,%xmm2 ++ pshufd $255,%xmm3,%xmm3 ++ pshufd $0,%xmm7,%xmm4 ++ pshufd $85,%xmm7,%xmm5 ++ pshufd $170,%xmm7,%xmm6 ++ pshufd $255,%xmm7,%xmm7 ++ movdqa %xmm0,(%ebp) ++ movdqa %xmm1,16(%ebp) ++ movdqa %xmm2,32(%ebp) ++ movdqa %xmm3,48(%ebp) ++ movdqa %xmm4,-128(%ebp) ++ movdqa %xmm5,-112(%ebp) ++ movdqa %xmm6,-96(%ebp) ++ movdqa %xmm7,-80(%ebp) ++ leal 128(%esi),%esi ++ leal 128(%edi),%edi ++ jmp L009outer_loop ++.align 4,0x90 ++L009outer_loop: ++ movdqa -112(%ebp),%xmm1 ++ movdqa -96(%ebp),%xmm2 ++ movdqa -80(%ebp),%xmm3 ++ movdqa -48(%ebp),%xmm5 ++ movdqa -32(%ebp),%xmm6 ++ movdqa -16(%ebp),%xmm7 ++ movdqa %xmm1,-112(%ebx) ++ movdqa %xmm2,-96(%ebx) ++ movdqa %xmm3,-80(%ebx) ++ movdqa %xmm5,-48(%ebx) ++ movdqa %xmm6,-32(%ebx) ++ movdqa %xmm7,-16(%ebx) ++ movdqa 32(%ebp),%xmm2 ++ movdqa 48(%ebp),%xmm3 ++ movdqa 64(%ebp),%xmm4 ++ movdqa 80(%ebp),%xmm5 ++ movdqa 96(%ebp),%xmm6 ++ movdqa 112(%ebp),%xmm7 ++ paddd 64(%eax),%xmm4 ++ movdqa %xmm2,32(%ebx) ++ movdqa %xmm3,48(%ebx) ++ movdqa %xmm4,64(%ebx) ++ movdqa %xmm5,80(%ebx) ++ movdqa %xmm6,96(%ebx) ++ movdqa %xmm7,112(%ebx) ++ movdqa %xmm4,64(%ebp) ++ movdqa -128(%ebp),%xmm0 ++ movdqa %xmm4,%xmm6 ++ movdqa -64(%ebp),%xmm3 ++ movdqa (%ebp),%xmm4 ++ movdqa 16(%ebp),%xmm5 ++ movl $10,%edx ++ nop ++.align 4,0x90 ++L010loop: ++ paddd %xmm3,%xmm0 ++ movdqa %xmm3,%xmm2 ++ pxor %xmm0,%xmm6 ++ pshufb (%eax),%xmm6 ++ paddd %xmm6,%xmm4 ++ pxor %xmm4,%xmm2 ++ movdqa -48(%ebx),%xmm3 ++ movdqa %xmm2,%xmm1 ++ pslld $12,%xmm2 ++ psrld $20,%xmm1 ++ por %xmm1,%xmm2 ++ movdqa -112(%ebx),%xmm1 ++ paddd %xmm2,%xmm0 ++ movdqa 80(%ebx),%xmm7 ++ pxor %xmm0,%xmm6 ++ movdqa %xmm0,-128(%ebx) ++ pshufb 16(%eax),%xmm6 ++ paddd %xmm6,%xmm4 ++ movdqa %xmm6,64(%ebx) ++ pxor %xmm4,%xmm2 ++ paddd %xmm3,%xmm1 ++ movdqa %xmm2,%xmm0 ++ pslld $7,%xmm2 ++ psrld $25,%xmm0 ++ pxor %xmm1,%xmm7 ++ por %xmm0,%xmm2 ++ movdqa %xmm4,(%ebx) ++ pshufb (%eax),%xmm7 ++ movdqa %xmm2,-64(%ebx) ++ paddd %xmm7,%xmm5 ++ movdqa 32(%ebx),%xmm4 ++ pxor %xmm5,%xmm3 ++ movdqa -32(%ebx),%xmm2 ++ movdqa %xmm3,%xmm0 ++ pslld $12,%xmm3 ++ psrld $20,%xmm0 ++ por %xmm0,%xmm3 ++ movdqa -96(%ebx),%xmm0 ++ paddd %xmm3,%xmm1 ++ movdqa 96(%ebx),%xmm6 ++ pxor %xmm1,%xmm7 ++ movdqa %xmm1,-112(%ebx) ++ pshufb 16(%eax),%xmm7 ++ paddd %xmm7,%xmm5 ++ movdqa %xmm7,80(%ebx) ++ pxor %xmm5,%xmm3 ++ paddd %xmm2,%xmm0 ++ movdqa %xmm3,%xmm1 ++ pslld $7,%xmm3 ++ psrld $25,%xmm1 ++ pxor %xmm0,%xmm6 ++ por %xmm1,%xmm3 ++ movdqa %xmm5,16(%ebx) ++ pshufb (%eax),%xmm6 ++ movdqa %xmm3,-48(%ebx) ++ paddd %xmm6,%xmm4 ++ movdqa 48(%ebx),%xmm5 ++ pxor %xmm4,%xmm2 ++ movdqa -16(%ebx),%xmm3 ++ movdqa %xmm2,%xmm1 ++ pslld $12,%xmm2 ++ psrld $20,%xmm1 ++ por %xmm1,%xmm2 ++ movdqa -80(%ebx),%xmm1 ++ paddd %xmm2,%xmm0 ++ movdqa 112(%ebx),%xmm7 ++ pxor %xmm0,%xmm6 ++ movdqa %xmm0,-96(%ebx) ++ pshufb 16(%eax),%xmm6 ++ paddd %xmm6,%xmm4 ++ movdqa %xmm6,96(%ebx) ++ pxor %xmm4,%xmm2 ++ paddd %xmm3,%xmm1 ++ movdqa %xmm2,%xmm0 ++ pslld $7,%xmm2 ++ psrld $25,%xmm0 ++ pxor %xmm1,%xmm7 ++ por %xmm0,%xmm2 ++ pshufb (%eax),%xmm7 ++ movdqa %xmm2,-32(%ebx) ++ paddd %xmm7,%xmm5 ++ pxor %xmm5,%xmm3 ++ movdqa -48(%ebx),%xmm2 ++ movdqa %xmm3,%xmm0 ++ pslld $12,%xmm3 ++ psrld $20,%xmm0 ++ por %xmm0,%xmm3 ++ movdqa -128(%ebx),%xmm0 ++ paddd %xmm3,%xmm1 ++ pxor %xmm1,%xmm7 ++ movdqa %xmm1,-80(%ebx) ++ pshufb 16(%eax),%xmm7 ++ paddd %xmm7,%xmm5 ++ movdqa %xmm7,%xmm6 ++ pxor %xmm5,%xmm3 ++ paddd %xmm2,%xmm0 ++ movdqa %xmm3,%xmm1 ++ pslld $7,%xmm3 ++ psrld $25,%xmm1 ++ pxor %xmm0,%xmm6 ++ por %xmm1,%xmm3 ++ pshufb (%eax),%xmm6 ++ movdqa %xmm3,-16(%ebx) ++ paddd %xmm6,%xmm4 ++ pxor %xmm4,%xmm2 ++ movdqa -32(%ebx),%xmm3 ++ movdqa %xmm2,%xmm1 ++ pslld $12,%xmm2 ++ psrld $20,%xmm1 ++ por %xmm1,%xmm2 ++ movdqa -112(%ebx),%xmm1 ++ paddd %xmm2,%xmm0 ++ movdqa 64(%ebx),%xmm7 ++ pxor %xmm0,%xmm6 ++ movdqa %xmm0,-128(%ebx) ++ pshufb 16(%eax),%xmm6 ++ paddd %xmm6,%xmm4 ++ movdqa %xmm6,112(%ebx) ++ pxor %xmm4,%xmm2 ++ paddd %xmm3,%xmm1 ++ movdqa %xmm2,%xmm0 ++ pslld $7,%xmm2 ++ psrld $25,%xmm0 ++ pxor %xmm1,%xmm7 ++ por %xmm0,%xmm2 ++ movdqa %xmm4,32(%ebx) ++ pshufb (%eax),%xmm7 ++ movdqa %xmm2,-48(%ebx) ++ paddd %xmm7,%xmm5 ++ movdqa (%ebx),%xmm4 ++ pxor %xmm5,%xmm3 ++ movdqa -16(%ebx),%xmm2 ++ movdqa %xmm3,%xmm0 ++ pslld $12,%xmm3 ++ psrld $20,%xmm0 ++ por %xmm0,%xmm3 ++ movdqa -96(%ebx),%xmm0 ++ paddd %xmm3,%xmm1 ++ movdqa 80(%ebx),%xmm6 ++ pxor %xmm1,%xmm7 ++ movdqa %xmm1,-112(%ebx) ++ pshufb 16(%eax),%xmm7 ++ paddd %xmm7,%xmm5 ++ movdqa %xmm7,64(%ebx) ++ pxor %xmm5,%xmm3 ++ paddd %xmm2,%xmm0 ++ movdqa %xmm3,%xmm1 ++ pslld $7,%xmm3 ++ psrld $25,%xmm1 ++ pxor %xmm0,%xmm6 ++ por %xmm1,%xmm3 ++ movdqa %xmm5,48(%ebx) ++ pshufb (%eax),%xmm6 ++ movdqa %xmm3,-32(%ebx) ++ paddd %xmm6,%xmm4 ++ movdqa 16(%ebx),%xmm5 ++ pxor %xmm4,%xmm2 ++ movdqa -64(%ebx),%xmm3 ++ movdqa %xmm2,%xmm1 ++ pslld $12,%xmm2 ++ psrld $20,%xmm1 ++ por %xmm1,%xmm2 ++ movdqa -80(%ebx),%xmm1 ++ paddd %xmm2,%xmm0 ++ movdqa 96(%ebx),%xmm7 ++ pxor %xmm0,%xmm6 ++ movdqa %xmm0,-96(%ebx) ++ pshufb 16(%eax),%xmm6 ++ paddd %xmm6,%xmm4 ++ movdqa %xmm6,80(%ebx) ++ pxor %xmm4,%xmm2 ++ paddd %xmm3,%xmm1 ++ movdqa %xmm2,%xmm0 ++ pslld $7,%xmm2 ++ psrld $25,%xmm0 ++ pxor %xmm1,%xmm7 ++ por %xmm0,%xmm2 ++ pshufb (%eax),%xmm7 ++ movdqa %xmm2,-16(%ebx) ++ paddd %xmm7,%xmm5 ++ pxor %xmm5,%xmm3 ++ movdqa %xmm3,%xmm0 ++ pslld $12,%xmm3 ++ psrld $20,%xmm0 ++ por %xmm0,%xmm3 ++ movdqa -128(%ebx),%xmm0 ++ paddd %xmm3,%xmm1 ++ movdqa 64(%ebx),%xmm6 ++ pxor %xmm1,%xmm7 ++ movdqa %xmm1,-80(%ebx) ++ pshufb 16(%eax),%xmm7 ++ paddd %xmm7,%xmm5 ++ movdqa %xmm7,96(%ebx) ++ pxor %xmm5,%xmm3 ++ movdqa %xmm3,%xmm1 ++ pslld $7,%xmm3 ++ psrld $25,%xmm1 ++ por %xmm1,%xmm3 ++ decl %edx ++ jnz L010loop ++ movdqa %xmm3,-64(%ebx) ++ movdqa %xmm4,(%ebx) ++ movdqa %xmm5,16(%ebx) ++ movdqa %xmm6,64(%ebx) ++ movdqa %xmm7,96(%ebx) ++ movdqa -112(%ebx),%xmm1 ++ movdqa -96(%ebx),%xmm2 ++ movdqa -80(%ebx),%xmm3 ++ paddd -128(%ebp),%xmm0 ++ paddd -112(%ebp),%xmm1 ++ paddd -96(%ebp),%xmm2 ++ paddd -80(%ebp),%xmm3 ++ movdqa %xmm0,%xmm6 ++ punpckldq %xmm1,%xmm0 ++ movdqa %xmm2,%xmm7 ++ punpckldq %xmm3,%xmm2 ++ punpckhdq %xmm1,%xmm6 ++ punpckhdq %xmm3,%xmm7 ++ movdqa %xmm0,%xmm1 ++ punpcklqdq %xmm2,%xmm0 ++ movdqa %xmm6,%xmm3 ++ punpcklqdq %xmm7,%xmm6 ++ punpckhqdq %xmm2,%xmm1 ++ punpckhqdq %xmm7,%xmm3 ++ movdqu -128(%esi),%xmm4 ++ movdqu -64(%esi),%xmm5 ++ movdqu (%esi),%xmm2 ++ movdqu 64(%esi),%xmm7 ++ leal 16(%esi),%esi ++ pxor %xmm0,%xmm4 ++ movdqa -64(%ebx),%xmm0 ++ pxor %xmm1,%xmm5 ++ movdqa -48(%ebx),%xmm1 ++ pxor %xmm2,%xmm6 ++ movdqa -32(%ebx),%xmm2 ++ pxor %xmm3,%xmm7 ++ movdqa -16(%ebx),%xmm3 ++ movdqu %xmm4,-128(%edi) ++ movdqu %xmm5,-64(%edi) ++ movdqu %xmm6,(%edi) ++ movdqu %xmm7,64(%edi) ++ leal 16(%edi),%edi ++ paddd -64(%ebp),%xmm0 ++ paddd -48(%ebp),%xmm1 ++ paddd -32(%ebp),%xmm2 ++ paddd -16(%ebp),%xmm3 ++ movdqa %xmm0,%xmm6 ++ punpckldq %xmm1,%xmm0 ++ movdqa %xmm2,%xmm7 ++ punpckldq %xmm3,%xmm2 ++ punpckhdq %xmm1,%xmm6 ++ punpckhdq %xmm3,%xmm7 ++ movdqa %xmm0,%xmm1 ++ punpcklqdq %xmm2,%xmm0 ++ movdqa %xmm6,%xmm3 ++ punpcklqdq %xmm7,%xmm6 ++ punpckhqdq %xmm2,%xmm1 ++ punpckhqdq %xmm7,%xmm3 ++ movdqu -128(%esi),%xmm4 ++ movdqu -64(%esi),%xmm5 ++ movdqu (%esi),%xmm2 ++ movdqu 64(%esi),%xmm7 ++ leal 16(%esi),%esi ++ pxor %xmm0,%xmm4 ++ movdqa (%ebx),%xmm0 ++ pxor %xmm1,%xmm5 ++ movdqa 16(%ebx),%xmm1 ++ pxor %xmm2,%xmm6 ++ movdqa 32(%ebx),%xmm2 ++ pxor %xmm3,%xmm7 ++ movdqa 48(%ebx),%xmm3 ++ movdqu %xmm4,-128(%edi) ++ movdqu %xmm5,-64(%edi) ++ movdqu %xmm6,(%edi) ++ movdqu %xmm7,64(%edi) ++ leal 16(%edi),%edi ++ paddd (%ebp),%xmm0 ++ paddd 16(%ebp),%xmm1 ++ paddd 32(%ebp),%xmm2 ++ paddd 48(%ebp),%xmm3 ++ movdqa %xmm0,%xmm6 ++ punpckldq %xmm1,%xmm0 ++ movdqa %xmm2,%xmm7 ++ punpckldq %xmm3,%xmm2 ++ punpckhdq %xmm1,%xmm6 ++ punpckhdq %xmm3,%xmm7 ++ movdqa %xmm0,%xmm1 ++ punpcklqdq %xmm2,%xmm0 ++ movdqa %xmm6,%xmm3 ++ punpcklqdq %xmm7,%xmm6 ++ punpckhqdq %xmm2,%xmm1 ++ punpckhqdq %xmm7,%xmm3 ++ movdqu -128(%esi),%xmm4 ++ movdqu -64(%esi),%xmm5 ++ movdqu (%esi),%xmm2 ++ movdqu 64(%esi),%xmm7 ++ leal 16(%esi),%esi ++ pxor %xmm0,%xmm4 ++ movdqa 64(%ebx),%xmm0 ++ pxor %xmm1,%xmm5 ++ movdqa 80(%ebx),%xmm1 ++ pxor %xmm2,%xmm6 ++ movdqa 96(%ebx),%xmm2 ++ pxor %xmm3,%xmm7 ++ movdqa 112(%ebx),%xmm3 ++ movdqu %xmm4,-128(%edi) ++ movdqu %xmm5,-64(%edi) ++ movdqu %xmm6,(%edi) ++ movdqu %xmm7,64(%edi) ++ leal 16(%edi),%edi ++ paddd 64(%ebp),%xmm0 ++ paddd 80(%ebp),%xmm1 ++ paddd 96(%ebp),%xmm2 ++ paddd 112(%ebp),%xmm3 ++ movdqa %xmm0,%xmm6 ++ punpckldq %xmm1,%xmm0 ++ movdqa %xmm2,%xmm7 ++ punpckldq %xmm3,%xmm2 ++ punpckhdq %xmm1,%xmm6 ++ punpckhdq %xmm3,%xmm7 ++ movdqa %xmm0,%xmm1 ++ punpcklqdq %xmm2,%xmm0 ++ movdqa %xmm6,%xmm3 ++ punpcklqdq %xmm7,%xmm6 ++ punpckhqdq %xmm2,%xmm1 ++ punpckhqdq %xmm7,%xmm3 ++ movdqu -128(%esi),%xmm4 ++ movdqu -64(%esi),%xmm5 ++ movdqu (%esi),%xmm2 ++ movdqu 64(%esi),%xmm7 ++ leal 208(%esi),%esi ++ pxor %xmm0,%xmm4 ++ pxor %xmm1,%xmm5 ++ pxor %xmm2,%xmm6 ++ pxor %xmm3,%xmm7 ++ movdqu %xmm4,-128(%edi) ++ movdqu %xmm5,-64(%edi) ++ movdqu %xmm6,(%edi) ++ movdqu %xmm7,64(%edi) ++ leal 208(%edi),%edi ++ subl $256,%ecx ++ jnc L009outer_loop ++ addl $256,%ecx ++ jz L011done ++ movl 520(%esp),%ebx ++ leal -128(%esi),%esi ++ movl 516(%esp),%edx ++ leal -128(%edi),%edi ++ movd 64(%ebp),%xmm2 ++ movdqu (%ebx),%xmm3 ++ paddd 96(%eax),%xmm2 ++ pand 112(%eax),%xmm3 ++ por %xmm2,%xmm3 ++L0081x: ++ movdqa 32(%eax),%xmm0 ++ movdqu (%edx),%xmm1 ++ movdqu 16(%edx),%xmm2 ++ movdqa (%eax),%xmm6 ++ movdqa 16(%eax),%xmm7 ++ movl %ebp,48(%esp) ++ movdqa %xmm0,(%esp) ++ movdqa %xmm1,16(%esp) ++ movdqa %xmm2,32(%esp) ++ movdqa %xmm3,48(%esp) ++ movl $10,%edx ++ jmp L012loop1x ++.align 4,0x90 ++L013outer1x: ++ movdqa 80(%eax),%xmm3 ++ movdqa (%esp),%xmm0 ++ movdqa 16(%esp),%xmm1 ++ movdqa 32(%esp),%xmm2 ++ paddd 48(%esp),%xmm3 ++ movl $10,%edx ++ movdqa %xmm3,48(%esp) ++ jmp L012loop1x ++.align 4,0x90 ++L012loop1x: ++ paddd %xmm1,%xmm0 ++ pxor %xmm0,%xmm3 ++.byte 102,15,56,0,222 ++ paddd %xmm3,%xmm2 ++ pxor %xmm2,%xmm1 ++ movdqa %xmm1,%xmm4 ++ psrld $20,%xmm1 ++ pslld $12,%xmm4 ++ por %xmm4,%xmm1 ++ paddd %xmm1,%xmm0 ++ pxor %xmm0,%xmm3 ++.byte 102,15,56,0,223 ++ paddd %xmm3,%xmm2 ++ pxor %xmm2,%xmm1 ++ movdqa %xmm1,%xmm4 ++ psrld $25,%xmm1 ++ pslld $7,%xmm4 ++ por %xmm4,%xmm1 ++ pshufd $78,%xmm2,%xmm2 ++ pshufd $57,%xmm1,%xmm1 ++ pshufd $147,%xmm3,%xmm3 ++ nop ++ paddd %xmm1,%xmm0 ++ pxor %xmm0,%xmm3 ++.byte 102,15,56,0,222 ++ paddd %xmm3,%xmm2 ++ pxor %xmm2,%xmm1 ++ movdqa %xmm1,%xmm4 ++ psrld $20,%xmm1 ++ pslld $12,%xmm4 ++ por %xmm4,%xmm1 ++ paddd %xmm1,%xmm0 ++ pxor %xmm0,%xmm3 ++.byte 102,15,56,0,223 ++ paddd %xmm3,%xmm2 ++ pxor %xmm2,%xmm1 ++ movdqa %xmm1,%xmm4 ++ psrld $25,%xmm1 ++ pslld $7,%xmm4 ++ por %xmm4,%xmm1 ++ pshufd $78,%xmm2,%xmm2 ++ pshufd $147,%xmm1,%xmm1 ++ pshufd $57,%xmm3,%xmm3 ++ decl %edx ++ jnz L012loop1x ++ paddd (%esp),%xmm0 ++ paddd 16(%esp),%xmm1 ++ paddd 32(%esp),%xmm2 ++ paddd 48(%esp),%xmm3 ++ cmpl $64,%ecx ++ jb L014tail ++ movdqu (%esi),%xmm4 ++ movdqu 16(%esi),%xmm5 ++ pxor %xmm4,%xmm0 ++ movdqu 32(%esi),%xmm4 ++ pxor %xmm5,%xmm1 ++ movdqu 48(%esi),%xmm5 ++ pxor %xmm4,%xmm2 ++ pxor %xmm5,%xmm3 ++ leal 64(%esi),%esi ++ movdqu %xmm0,(%edi) ++ movdqu %xmm1,16(%edi) ++ movdqu %xmm2,32(%edi) ++ movdqu %xmm3,48(%edi) ++ leal 64(%edi),%edi ++ subl $64,%ecx ++ jnz L013outer1x ++ jmp L011done ++L014tail: ++ movdqa %xmm0,(%esp) ++ movdqa %xmm1,16(%esp) ++ movdqa %xmm2,32(%esp) ++ movdqa %xmm3,48(%esp) ++ xorl %eax,%eax ++ xorl %edx,%edx ++ xorl %ebp,%ebp ++L015tail_loop: ++ movb (%esp,%ebp,1),%al ++ movb (%esi,%ebp,1),%dl ++ leal 1(%ebp),%ebp ++ xorb %dl,%al ++ movb %al,-1(%edi,%ebp,1) ++ decl %ecx ++ jnz L015tail_loop ++L011done: ++ movl 512(%esp),%esp ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 6,0x90 ++Lssse3_data: ++.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 ++.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 ++.long 1634760805,857760878,2036477234,1797285236 ++.long 0,1,2,3 ++.long 4,4,4,4 ++.long 1,0,0,0 ++.long 4,0,0,0 ++.long 0,-1,-1,-1 ++.align 6,0x90 ++.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 ++.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 ++.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 ++.byte 114,103,62,0 ++.section __IMPORT,__pointers,non_lazy_symbol_pointers ++L_OPENSSL_ia32cap_P$non_lazy_ptr: ++.indirect_symbol _OPENSSL_ia32cap_P ++.long 0 ++#endif +diff --git a/apple-x86/crypto/fipsmodule/aesni-x86.S b/apple-x86/crypto/fipsmodule/aesni-x86.S +new file mode 100644 +index 0000000..00f6003 +--- /dev/null ++++ b/apple-x86/crypto/fipsmodule/aesni-x86.S +@@ -0,0 +1,2476 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++#ifdef BORINGSSL_DISPATCH_TEST ++#endif ++.globl _aes_hw_encrypt ++.private_extern _aes_hw_encrypt ++.align 4 ++_aes_hw_encrypt: ++L_aes_hw_encrypt_begin: ++#ifdef BORINGSSL_DISPATCH_TEST ++ pushl %ebx ++ pushl %edx ++ call L000pic ++L000pic: ++ popl %ebx ++ leal _BORINGSSL_function_hit+1-L000pic(%ebx),%ebx ++ movl $1,%edx ++ movb %dl,(%ebx) ++ popl %edx ++ popl %ebx ++#endif ++ movl 4(%esp),%eax ++ movl 12(%esp),%edx ++ movups (%eax),%xmm2 ++ movl 240(%edx),%ecx ++ movl 8(%esp),%eax ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L001enc1_loop_1: ++.byte 102,15,56,220,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L001enc1_loop_1 ++.byte 102,15,56,221,209 ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ movups %xmm2,(%eax) ++ pxor %xmm2,%xmm2 ++ ret ++.globl _aes_hw_decrypt ++.private_extern _aes_hw_decrypt ++.align 4 ++_aes_hw_decrypt: ++L_aes_hw_decrypt_begin: ++ movl 4(%esp),%eax ++ movl 12(%esp),%edx ++ movups (%eax),%xmm2 ++ movl 240(%edx),%ecx ++ movl 8(%esp),%eax ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L002dec1_loop_2: ++.byte 102,15,56,222,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L002dec1_loop_2 ++.byte 102,15,56,223,209 ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ movups %xmm2,(%eax) ++ pxor %xmm2,%xmm2 ++ ret ++.private_extern __aesni_encrypt2 ++.align 4 ++__aesni_encrypt2: ++ movups (%edx),%xmm0 ++ shll $4,%ecx ++ movups 16(%edx),%xmm1 ++ xorps %xmm0,%xmm2 ++ pxor %xmm0,%xmm3 ++ movups 32(%edx),%xmm0 ++ leal 32(%edx,%ecx,1),%edx ++ negl %ecx ++ addl $16,%ecx ++L003enc2_loop: ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++ movups (%edx,%ecx,1),%xmm1 ++ addl $32,%ecx ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++ movups -16(%edx,%ecx,1),%xmm0 ++ jnz L003enc2_loop ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,221,208 ++.byte 102,15,56,221,216 ++ ret ++.private_extern __aesni_decrypt2 ++.align 4 ++__aesni_decrypt2: ++ movups (%edx),%xmm0 ++ shll $4,%ecx ++ movups 16(%edx),%xmm1 ++ xorps %xmm0,%xmm2 ++ pxor %xmm0,%xmm3 ++ movups 32(%edx),%xmm0 ++ leal 32(%edx,%ecx,1),%edx ++ negl %ecx ++ addl $16,%ecx ++L004dec2_loop: ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++ movups (%edx,%ecx,1),%xmm1 ++ addl $32,%ecx ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++ movups -16(%edx,%ecx,1),%xmm0 ++ jnz L004dec2_loop ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,223,208 ++.byte 102,15,56,223,216 ++ ret ++.private_extern __aesni_encrypt3 ++.align 4 ++__aesni_encrypt3: ++ movups (%edx),%xmm0 ++ shll $4,%ecx ++ movups 16(%edx),%xmm1 ++ xorps %xmm0,%xmm2 ++ pxor %xmm0,%xmm3 ++ pxor %xmm0,%xmm4 ++ movups 32(%edx),%xmm0 ++ leal 32(%edx,%ecx,1),%edx ++ negl %ecx ++ addl $16,%ecx ++L005enc3_loop: ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++ movups (%edx,%ecx,1),%xmm1 ++ addl $32,%ecx ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++.byte 102,15,56,220,224 ++ movups -16(%edx,%ecx,1),%xmm0 ++ jnz L005enc3_loop ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,221,208 ++.byte 102,15,56,221,216 ++.byte 102,15,56,221,224 ++ ret ++.private_extern __aesni_decrypt3 ++.align 4 ++__aesni_decrypt3: ++ movups (%edx),%xmm0 ++ shll $4,%ecx ++ movups 16(%edx),%xmm1 ++ xorps %xmm0,%xmm2 ++ pxor %xmm0,%xmm3 ++ pxor %xmm0,%xmm4 ++ movups 32(%edx),%xmm0 ++ leal 32(%edx,%ecx,1),%edx ++ negl %ecx ++ addl $16,%ecx ++L006dec3_loop: ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++ movups (%edx,%ecx,1),%xmm1 ++ addl $32,%ecx ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++ movups -16(%edx,%ecx,1),%xmm0 ++ jnz L006dec3_loop ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,223,208 ++.byte 102,15,56,223,216 ++.byte 102,15,56,223,224 ++ ret ++.private_extern __aesni_encrypt4 ++.align 4 ++__aesni_encrypt4: ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ shll $4,%ecx ++ xorps %xmm0,%xmm2 ++ pxor %xmm0,%xmm3 ++ pxor %xmm0,%xmm4 ++ pxor %xmm0,%xmm5 ++ movups 32(%edx),%xmm0 ++ leal 32(%edx,%ecx,1),%edx ++ negl %ecx ++.byte 15,31,64,0 ++ addl $16,%ecx ++L007enc4_loop: ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++ movups (%edx,%ecx,1),%xmm1 ++ addl $32,%ecx ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++.byte 102,15,56,220,224 ++.byte 102,15,56,220,232 ++ movups -16(%edx,%ecx,1),%xmm0 ++ jnz L007enc4_loop ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++.byte 102,15,56,221,208 ++.byte 102,15,56,221,216 ++.byte 102,15,56,221,224 ++.byte 102,15,56,221,232 ++ ret ++.private_extern __aesni_decrypt4 ++.align 4 ++__aesni_decrypt4: ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ shll $4,%ecx ++ xorps %xmm0,%xmm2 ++ pxor %xmm0,%xmm3 ++ pxor %xmm0,%xmm4 ++ pxor %xmm0,%xmm5 ++ movups 32(%edx),%xmm0 ++ leal 32(%edx,%ecx,1),%edx ++ negl %ecx ++.byte 15,31,64,0 ++ addl $16,%ecx ++L008dec4_loop: ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++ movups (%edx,%ecx,1),%xmm1 ++ addl $32,%ecx ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++.byte 102,15,56,222,232 ++ movups -16(%edx,%ecx,1),%xmm0 ++ jnz L008dec4_loop ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,223,208 ++.byte 102,15,56,223,216 ++.byte 102,15,56,223,224 ++.byte 102,15,56,223,232 ++ ret ++.private_extern __aesni_encrypt6 ++.align 4 ++__aesni_encrypt6: ++ movups (%edx),%xmm0 ++ shll $4,%ecx ++ movups 16(%edx),%xmm1 ++ xorps %xmm0,%xmm2 ++ pxor %xmm0,%xmm3 ++ pxor %xmm0,%xmm4 ++.byte 102,15,56,220,209 ++ pxor %xmm0,%xmm5 ++ pxor %xmm0,%xmm6 ++.byte 102,15,56,220,217 ++ leal 32(%edx,%ecx,1),%edx ++ negl %ecx ++.byte 102,15,56,220,225 ++ pxor %xmm0,%xmm7 ++ movups (%edx,%ecx,1),%xmm0 ++ addl $16,%ecx ++ jmp L009_aesni_encrypt6_inner ++.align 4,0x90 ++L010enc6_loop: ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++L009_aesni_encrypt6_inner: ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++L_aesni_encrypt6_enter: ++ movups (%edx,%ecx,1),%xmm1 ++ addl $32,%ecx ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++.byte 102,15,56,220,224 ++.byte 102,15,56,220,232 ++.byte 102,15,56,220,240 ++.byte 102,15,56,220,248 ++ movups -16(%edx,%ecx,1),%xmm0 ++ jnz L010enc6_loop ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++.byte 102,15,56,221,208 ++.byte 102,15,56,221,216 ++.byte 102,15,56,221,224 ++.byte 102,15,56,221,232 ++.byte 102,15,56,221,240 ++.byte 102,15,56,221,248 ++ ret ++.private_extern __aesni_decrypt6 ++.align 4 ++__aesni_decrypt6: ++ movups (%edx),%xmm0 ++ shll $4,%ecx ++ movups 16(%edx),%xmm1 ++ xorps %xmm0,%xmm2 ++ pxor %xmm0,%xmm3 ++ pxor %xmm0,%xmm4 ++.byte 102,15,56,222,209 ++ pxor %xmm0,%xmm5 ++ pxor %xmm0,%xmm6 ++.byte 102,15,56,222,217 ++ leal 32(%edx,%ecx,1),%edx ++ negl %ecx ++.byte 102,15,56,222,225 ++ pxor %xmm0,%xmm7 ++ movups (%edx,%ecx,1),%xmm0 ++ addl $16,%ecx ++ jmp L011_aesni_decrypt6_inner ++.align 4,0x90 ++L012dec6_loop: ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++L011_aesni_decrypt6_inner: ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++L_aesni_decrypt6_enter: ++ movups (%edx,%ecx,1),%xmm1 ++ addl $32,%ecx ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++.byte 102,15,56,222,232 ++.byte 102,15,56,222,240 ++.byte 102,15,56,222,248 ++ movups -16(%edx,%ecx,1),%xmm0 ++ jnz L012dec6_loop ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++.byte 102,15,56,223,208 ++.byte 102,15,56,223,216 ++.byte 102,15,56,223,224 ++.byte 102,15,56,223,232 ++.byte 102,15,56,223,240 ++.byte 102,15,56,223,248 ++ ret ++.globl _aes_hw_ecb_encrypt ++.private_extern _aes_hw_ecb_encrypt ++.align 4 ++_aes_hw_ecb_encrypt: ++L_aes_hw_ecb_encrypt_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 20(%esp),%esi ++ movl 24(%esp),%edi ++ movl 28(%esp),%eax ++ movl 32(%esp),%edx ++ movl 36(%esp),%ebx ++ andl $-16,%eax ++ jz L013ecb_ret ++ movl 240(%edx),%ecx ++ testl %ebx,%ebx ++ jz L014ecb_decrypt ++ movl %edx,%ebp ++ movl %ecx,%ebx ++ cmpl $96,%eax ++ jb L015ecb_enc_tail ++ movdqu (%esi),%xmm2 ++ movdqu 16(%esi),%xmm3 ++ movdqu 32(%esi),%xmm4 ++ movdqu 48(%esi),%xmm5 ++ movdqu 64(%esi),%xmm6 ++ movdqu 80(%esi),%xmm7 ++ leal 96(%esi),%esi ++ subl $96,%eax ++ jmp L016ecb_enc_loop6_enter ++.align 4,0x90 ++L017ecb_enc_loop6: ++ movups %xmm2,(%edi) ++ movdqu (%esi),%xmm2 ++ movups %xmm3,16(%edi) ++ movdqu 16(%esi),%xmm3 ++ movups %xmm4,32(%edi) ++ movdqu 32(%esi),%xmm4 ++ movups %xmm5,48(%edi) ++ movdqu 48(%esi),%xmm5 ++ movups %xmm6,64(%edi) ++ movdqu 64(%esi),%xmm6 ++ movups %xmm7,80(%edi) ++ leal 96(%edi),%edi ++ movdqu 80(%esi),%xmm7 ++ leal 96(%esi),%esi ++L016ecb_enc_loop6_enter: ++ call __aesni_encrypt6 ++ movl %ebp,%edx ++ movl %ebx,%ecx ++ subl $96,%eax ++ jnc L017ecb_enc_loop6 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++ movups %xmm6,64(%edi) ++ movups %xmm7,80(%edi) ++ leal 96(%edi),%edi ++ addl $96,%eax ++ jz L013ecb_ret ++L015ecb_enc_tail: ++ movups (%esi),%xmm2 ++ cmpl $32,%eax ++ jb L018ecb_enc_one ++ movups 16(%esi),%xmm3 ++ je L019ecb_enc_two ++ movups 32(%esi),%xmm4 ++ cmpl $64,%eax ++ jb L020ecb_enc_three ++ movups 48(%esi),%xmm5 ++ je L021ecb_enc_four ++ movups 64(%esi),%xmm6 ++ xorps %xmm7,%xmm7 ++ call __aesni_encrypt6 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++ movups %xmm6,64(%edi) ++ jmp L013ecb_ret ++.align 4,0x90 ++L018ecb_enc_one: ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L022enc1_loop_3: ++.byte 102,15,56,220,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L022enc1_loop_3 ++.byte 102,15,56,221,209 ++ movups %xmm2,(%edi) ++ jmp L013ecb_ret ++.align 4,0x90 ++L019ecb_enc_two: ++ call __aesni_encrypt2 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ jmp L013ecb_ret ++.align 4,0x90 ++L020ecb_enc_three: ++ call __aesni_encrypt3 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ jmp L013ecb_ret ++.align 4,0x90 ++L021ecb_enc_four: ++ call __aesni_encrypt4 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++ jmp L013ecb_ret ++.align 4,0x90 ++L014ecb_decrypt: ++ movl %edx,%ebp ++ movl %ecx,%ebx ++ cmpl $96,%eax ++ jb L023ecb_dec_tail ++ movdqu (%esi),%xmm2 ++ movdqu 16(%esi),%xmm3 ++ movdqu 32(%esi),%xmm4 ++ movdqu 48(%esi),%xmm5 ++ movdqu 64(%esi),%xmm6 ++ movdqu 80(%esi),%xmm7 ++ leal 96(%esi),%esi ++ subl $96,%eax ++ jmp L024ecb_dec_loop6_enter ++.align 4,0x90 ++L025ecb_dec_loop6: ++ movups %xmm2,(%edi) ++ movdqu (%esi),%xmm2 ++ movups %xmm3,16(%edi) ++ movdqu 16(%esi),%xmm3 ++ movups %xmm4,32(%edi) ++ movdqu 32(%esi),%xmm4 ++ movups %xmm5,48(%edi) ++ movdqu 48(%esi),%xmm5 ++ movups %xmm6,64(%edi) ++ movdqu 64(%esi),%xmm6 ++ movups %xmm7,80(%edi) ++ leal 96(%edi),%edi ++ movdqu 80(%esi),%xmm7 ++ leal 96(%esi),%esi ++L024ecb_dec_loop6_enter: ++ call __aesni_decrypt6 ++ movl %ebp,%edx ++ movl %ebx,%ecx ++ subl $96,%eax ++ jnc L025ecb_dec_loop6 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++ movups %xmm6,64(%edi) ++ movups %xmm7,80(%edi) ++ leal 96(%edi),%edi ++ addl $96,%eax ++ jz L013ecb_ret ++L023ecb_dec_tail: ++ movups (%esi),%xmm2 ++ cmpl $32,%eax ++ jb L026ecb_dec_one ++ movups 16(%esi),%xmm3 ++ je L027ecb_dec_two ++ movups 32(%esi),%xmm4 ++ cmpl $64,%eax ++ jb L028ecb_dec_three ++ movups 48(%esi),%xmm5 ++ je L029ecb_dec_four ++ movups 64(%esi),%xmm6 ++ xorps %xmm7,%xmm7 ++ call __aesni_decrypt6 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++ movups %xmm6,64(%edi) ++ jmp L013ecb_ret ++.align 4,0x90 ++L026ecb_dec_one: ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L030dec1_loop_4: ++.byte 102,15,56,222,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L030dec1_loop_4 ++.byte 102,15,56,223,209 ++ movups %xmm2,(%edi) ++ jmp L013ecb_ret ++.align 4,0x90 ++L027ecb_dec_two: ++ call __aesni_decrypt2 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ jmp L013ecb_ret ++.align 4,0x90 ++L028ecb_dec_three: ++ call __aesni_decrypt3 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ jmp L013ecb_ret ++.align 4,0x90 ++L029ecb_dec_four: ++ call __aesni_decrypt4 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++L013ecb_ret: ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ pxor %xmm7,%xmm7 ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _aes_hw_ccm64_encrypt_blocks ++.private_extern _aes_hw_ccm64_encrypt_blocks ++.align 4 ++_aes_hw_ccm64_encrypt_blocks: ++L_aes_hw_ccm64_encrypt_blocks_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 20(%esp),%esi ++ movl 24(%esp),%edi ++ movl 28(%esp),%eax ++ movl 32(%esp),%edx ++ movl 36(%esp),%ebx ++ movl 40(%esp),%ecx ++ movl %esp,%ebp ++ subl $60,%esp ++ andl $-16,%esp ++ movl %ebp,48(%esp) ++ movdqu (%ebx),%xmm7 ++ movdqu (%ecx),%xmm3 ++ movl 240(%edx),%ecx ++ movl $202182159,(%esp) ++ movl $134810123,4(%esp) ++ movl $67438087,8(%esp) ++ movl $66051,12(%esp) ++ movl $1,%ebx ++ xorl %ebp,%ebp ++ movl %ebx,16(%esp) ++ movl %ebp,20(%esp) ++ movl %ebp,24(%esp) ++ movl %ebp,28(%esp) ++ shll $4,%ecx ++ movl $16,%ebx ++ leal (%edx),%ebp ++ movdqa (%esp),%xmm5 ++ movdqa %xmm7,%xmm2 ++ leal 32(%edx,%ecx,1),%edx ++ subl %ecx,%ebx ++.byte 102,15,56,0,253 ++L031ccm64_enc_outer: ++ movups (%ebp),%xmm0 ++ movl %ebx,%ecx ++ movups (%esi),%xmm6 ++ xorps %xmm0,%xmm2 ++ movups 16(%ebp),%xmm1 ++ xorps %xmm6,%xmm0 ++ xorps %xmm0,%xmm3 ++ movups 32(%ebp),%xmm0 ++L032ccm64_enc2_loop: ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++ movups (%edx,%ecx,1),%xmm1 ++ addl $32,%ecx ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++ movups -16(%edx,%ecx,1),%xmm0 ++ jnz L032ccm64_enc2_loop ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++ paddq 16(%esp),%xmm7 ++ decl %eax ++.byte 102,15,56,221,208 ++.byte 102,15,56,221,216 ++ leal 16(%esi),%esi ++ xorps %xmm2,%xmm6 ++ movdqa %xmm7,%xmm2 ++ movups %xmm6,(%edi) ++.byte 102,15,56,0,213 ++ leal 16(%edi),%edi ++ jnz L031ccm64_enc_outer ++ movl 48(%esp),%esp ++ movl 40(%esp),%edi ++ movups %xmm3,(%edi) ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ pxor %xmm7,%xmm7 ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _aes_hw_ccm64_decrypt_blocks ++.private_extern _aes_hw_ccm64_decrypt_blocks ++.align 4 ++_aes_hw_ccm64_decrypt_blocks: ++L_aes_hw_ccm64_decrypt_blocks_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 20(%esp),%esi ++ movl 24(%esp),%edi ++ movl 28(%esp),%eax ++ movl 32(%esp),%edx ++ movl 36(%esp),%ebx ++ movl 40(%esp),%ecx ++ movl %esp,%ebp ++ subl $60,%esp ++ andl $-16,%esp ++ movl %ebp,48(%esp) ++ movdqu (%ebx),%xmm7 ++ movdqu (%ecx),%xmm3 ++ movl 240(%edx),%ecx ++ movl $202182159,(%esp) ++ movl $134810123,4(%esp) ++ movl $67438087,8(%esp) ++ movl $66051,12(%esp) ++ movl $1,%ebx ++ xorl %ebp,%ebp ++ movl %ebx,16(%esp) ++ movl %ebp,20(%esp) ++ movl %ebp,24(%esp) ++ movl %ebp,28(%esp) ++ movdqa (%esp),%xmm5 ++ movdqa %xmm7,%xmm2 ++ movl %edx,%ebp ++ movl %ecx,%ebx ++.byte 102,15,56,0,253 ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L033enc1_loop_5: ++.byte 102,15,56,220,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L033enc1_loop_5 ++.byte 102,15,56,221,209 ++ shll $4,%ebx ++ movl $16,%ecx ++ movups (%esi),%xmm6 ++ paddq 16(%esp),%xmm7 ++ leal 16(%esi),%esi ++ subl %ebx,%ecx ++ leal 32(%ebp,%ebx,1),%edx ++ movl %ecx,%ebx ++ jmp L034ccm64_dec_outer ++.align 4,0x90 ++L034ccm64_dec_outer: ++ xorps %xmm2,%xmm6 ++ movdqa %xmm7,%xmm2 ++ movups %xmm6,(%edi) ++ leal 16(%edi),%edi ++.byte 102,15,56,0,213 ++ subl $1,%eax ++ jz L035ccm64_dec_break ++ movups (%ebp),%xmm0 ++ movl %ebx,%ecx ++ movups 16(%ebp),%xmm1 ++ xorps %xmm0,%xmm6 ++ xorps %xmm0,%xmm2 ++ xorps %xmm6,%xmm3 ++ movups 32(%ebp),%xmm0 ++L036ccm64_dec2_loop: ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++ movups (%edx,%ecx,1),%xmm1 ++ addl $32,%ecx ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++ movups -16(%edx,%ecx,1),%xmm0 ++ jnz L036ccm64_dec2_loop ++ movups (%esi),%xmm6 ++ paddq 16(%esp),%xmm7 ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,221,208 ++.byte 102,15,56,221,216 ++ leal 16(%esi),%esi ++ jmp L034ccm64_dec_outer ++.align 4,0x90 ++L035ccm64_dec_break: ++ movl 240(%ebp),%ecx ++ movl %ebp,%edx ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ xorps %xmm0,%xmm6 ++ leal 32(%edx),%edx ++ xorps %xmm6,%xmm3 ++L037enc1_loop_6: ++.byte 102,15,56,220,217 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L037enc1_loop_6 ++.byte 102,15,56,221,217 ++ movl 48(%esp),%esp ++ movl 40(%esp),%edi ++ movups %xmm3,(%edi) ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ pxor %xmm7,%xmm7 ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _aes_hw_ctr32_encrypt_blocks ++.private_extern _aes_hw_ctr32_encrypt_blocks ++.align 4 ++_aes_hw_ctr32_encrypt_blocks: ++L_aes_hw_ctr32_encrypt_blocks_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++#ifdef BORINGSSL_DISPATCH_TEST ++ pushl %ebx ++ pushl %edx ++ call L038pic ++L038pic: ++ popl %ebx ++ leal _BORINGSSL_function_hit+0-L038pic(%ebx),%ebx ++ movl $1,%edx ++ movb %dl,(%ebx) ++ popl %edx ++ popl %ebx ++#endif ++ movl 20(%esp),%esi ++ movl 24(%esp),%edi ++ movl 28(%esp),%eax ++ movl 32(%esp),%edx ++ movl 36(%esp),%ebx ++ movl %esp,%ebp ++ subl $88,%esp ++ andl $-16,%esp ++ movl %ebp,80(%esp) ++ cmpl $1,%eax ++ je L039ctr32_one_shortcut ++ movdqu (%ebx),%xmm7 ++ movl $202182159,(%esp) ++ movl $134810123,4(%esp) ++ movl $67438087,8(%esp) ++ movl $66051,12(%esp) ++ movl $6,%ecx ++ xorl %ebp,%ebp ++ movl %ecx,16(%esp) ++ movl %ecx,20(%esp) ++ movl %ecx,24(%esp) ++ movl %ebp,28(%esp) ++.byte 102,15,58,22,251,3 ++.byte 102,15,58,34,253,3 ++ movl 240(%edx),%ecx ++ bswap %ebx ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ movdqa (%esp),%xmm2 ++.byte 102,15,58,34,195,0 ++ leal 3(%ebx),%ebp ++.byte 102,15,58,34,205,0 ++ incl %ebx ++.byte 102,15,58,34,195,1 ++ incl %ebp ++.byte 102,15,58,34,205,1 ++ incl %ebx ++.byte 102,15,58,34,195,2 ++ incl %ebp ++.byte 102,15,58,34,205,2 ++ movdqa %xmm0,48(%esp) ++.byte 102,15,56,0,194 ++ movdqu (%edx),%xmm6 ++ movdqa %xmm1,64(%esp) ++.byte 102,15,56,0,202 ++ pshufd $192,%xmm0,%xmm2 ++ pshufd $128,%xmm0,%xmm3 ++ cmpl $6,%eax ++ jb L040ctr32_tail ++ pxor %xmm6,%xmm7 ++ shll $4,%ecx ++ movl $16,%ebx ++ movdqa %xmm7,32(%esp) ++ movl %edx,%ebp ++ subl %ecx,%ebx ++ leal 32(%edx,%ecx,1),%edx ++ subl $6,%eax ++ jmp L041ctr32_loop6 ++.align 4,0x90 ++L041ctr32_loop6: ++ pshufd $64,%xmm0,%xmm4 ++ movdqa 32(%esp),%xmm0 ++ pshufd $192,%xmm1,%xmm5 ++ pxor %xmm0,%xmm2 ++ pshufd $128,%xmm1,%xmm6 ++ pxor %xmm0,%xmm3 ++ pshufd $64,%xmm1,%xmm7 ++ movups 16(%ebp),%xmm1 ++ pxor %xmm0,%xmm4 ++ pxor %xmm0,%xmm5 ++.byte 102,15,56,220,209 ++ pxor %xmm0,%xmm6 ++ pxor %xmm0,%xmm7 ++.byte 102,15,56,220,217 ++ movups 32(%ebp),%xmm0 ++ movl %ebx,%ecx ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++ call L_aesni_encrypt6_enter ++ movups (%esi),%xmm1 ++ movups 16(%esi),%xmm0 ++ xorps %xmm1,%xmm2 ++ movups 32(%esi),%xmm1 ++ xorps %xmm0,%xmm3 ++ movups %xmm2,(%edi) ++ movdqa 16(%esp),%xmm0 ++ xorps %xmm1,%xmm4 ++ movdqa 64(%esp),%xmm1 ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ paddd %xmm0,%xmm1 ++ paddd 48(%esp),%xmm0 ++ movdqa (%esp),%xmm2 ++ movups 48(%esi),%xmm3 ++ movups 64(%esi),%xmm4 ++ xorps %xmm3,%xmm5 ++ movups 80(%esi),%xmm3 ++ leal 96(%esi),%esi ++ movdqa %xmm0,48(%esp) ++.byte 102,15,56,0,194 ++ xorps %xmm4,%xmm6 ++ movups %xmm5,48(%edi) ++ xorps %xmm3,%xmm7 ++ movdqa %xmm1,64(%esp) ++.byte 102,15,56,0,202 ++ movups %xmm6,64(%edi) ++ pshufd $192,%xmm0,%xmm2 ++ movups %xmm7,80(%edi) ++ leal 96(%edi),%edi ++ pshufd $128,%xmm0,%xmm3 ++ subl $6,%eax ++ jnc L041ctr32_loop6 ++ addl $6,%eax ++ jz L042ctr32_ret ++ movdqu (%ebp),%xmm7 ++ movl %ebp,%edx ++ pxor 32(%esp),%xmm7 ++ movl 240(%ebp),%ecx ++L040ctr32_tail: ++ por %xmm7,%xmm2 ++ cmpl $2,%eax ++ jb L043ctr32_one ++ pshufd $64,%xmm0,%xmm4 ++ por %xmm7,%xmm3 ++ je L044ctr32_two ++ pshufd $192,%xmm1,%xmm5 ++ por %xmm7,%xmm4 ++ cmpl $4,%eax ++ jb L045ctr32_three ++ pshufd $128,%xmm1,%xmm6 ++ por %xmm7,%xmm5 ++ je L046ctr32_four ++ por %xmm7,%xmm6 ++ call __aesni_encrypt6 ++ movups (%esi),%xmm1 ++ movups 16(%esi),%xmm0 ++ xorps %xmm1,%xmm2 ++ movups 32(%esi),%xmm1 ++ xorps %xmm0,%xmm3 ++ movups 48(%esi),%xmm0 ++ xorps %xmm1,%xmm4 ++ movups 64(%esi),%xmm1 ++ xorps %xmm0,%xmm5 ++ movups %xmm2,(%edi) ++ xorps %xmm1,%xmm6 ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++ movups %xmm6,64(%edi) ++ jmp L042ctr32_ret ++.align 4,0x90 ++L039ctr32_one_shortcut: ++ movups (%ebx),%xmm2 ++ movl 240(%edx),%ecx ++L043ctr32_one: ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L047enc1_loop_7: ++.byte 102,15,56,220,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L047enc1_loop_7 ++.byte 102,15,56,221,209 ++ movups (%esi),%xmm6 ++ xorps %xmm2,%xmm6 ++ movups %xmm6,(%edi) ++ jmp L042ctr32_ret ++.align 4,0x90 ++L044ctr32_two: ++ call __aesni_encrypt2 ++ movups (%esi),%xmm5 ++ movups 16(%esi),%xmm6 ++ xorps %xmm5,%xmm2 ++ xorps %xmm6,%xmm3 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ jmp L042ctr32_ret ++.align 4,0x90 ++L045ctr32_three: ++ call __aesni_encrypt3 ++ movups (%esi),%xmm5 ++ movups 16(%esi),%xmm6 ++ xorps %xmm5,%xmm2 ++ movups 32(%esi),%xmm7 ++ xorps %xmm6,%xmm3 ++ movups %xmm2,(%edi) ++ xorps %xmm7,%xmm4 ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ jmp L042ctr32_ret ++.align 4,0x90 ++L046ctr32_four: ++ call __aesni_encrypt4 ++ movups (%esi),%xmm6 ++ movups 16(%esi),%xmm7 ++ movups 32(%esi),%xmm1 ++ xorps %xmm6,%xmm2 ++ movups 48(%esi),%xmm0 ++ xorps %xmm7,%xmm3 ++ movups %xmm2,(%edi) ++ xorps %xmm1,%xmm4 ++ movups %xmm3,16(%edi) ++ xorps %xmm0,%xmm5 ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++L042ctr32_ret: ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ movdqa %xmm0,32(%esp) ++ pxor %xmm5,%xmm5 ++ movdqa %xmm0,48(%esp) ++ pxor %xmm6,%xmm6 ++ movdqa %xmm0,64(%esp) ++ pxor %xmm7,%xmm7 ++ movl 80(%esp),%esp ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _aes_hw_xts_encrypt ++.private_extern _aes_hw_xts_encrypt ++.align 4 ++_aes_hw_xts_encrypt: ++L_aes_hw_xts_encrypt_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 36(%esp),%edx ++ movl 40(%esp),%esi ++ movl 240(%edx),%ecx ++ movups (%esi),%xmm2 ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L048enc1_loop_8: ++.byte 102,15,56,220,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L048enc1_loop_8 ++.byte 102,15,56,221,209 ++ movl 20(%esp),%esi ++ movl 24(%esp),%edi ++ movl 28(%esp),%eax ++ movl 32(%esp),%edx ++ movl %esp,%ebp ++ subl $120,%esp ++ movl 240(%edx),%ecx ++ andl $-16,%esp ++ movl $135,96(%esp) ++ movl $0,100(%esp) ++ movl $1,104(%esp) ++ movl $0,108(%esp) ++ movl %eax,112(%esp) ++ movl %ebp,116(%esp) ++ movdqa %xmm2,%xmm1 ++ pxor %xmm0,%xmm0 ++ movdqa 96(%esp),%xmm3 ++ pcmpgtd %xmm1,%xmm0 ++ andl $-16,%eax ++ movl %edx,%ebp ++ movl %ecx,%ebx ++ subl $96,%eax ++ jc L049xts_enc_short ++ shll $4,%ecx ++ movl $16,%ebx ++ subl %ecx,%ebx ++ leal 32(%edx,%ecx,1),%edx ++ jmp L050xts_enc_loop6 ++.align 4,0x90 ++L050xts_enc_loop6: ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,(%esp) ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,16(%esp) ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,32(%esp) ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,48(%esp) ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ pshufd $19,%xmm0,%xmm7 ++ movdqa %xmm1,64(%esp) ++ paddq %xmm1,%xmm1 ++ movups (%ebp),%xmm0 ++ pand %xmm3,%xmm7 ++ movups (%esi),%xmm2 ++ pxor %xmm1,%xmm7 ++ movl %ebx,%ecx ++ movdqu 16(%esi),%xmm3 ++ xorps %xmm0,%xmm2 ++ movdqu 32(%esi),%xmm4 ++ pxor %xmm0,%xmm3 ++ movdqu 48(%esi),%xmm5 ++ pxor %xmm0,%xmm4 ++ movdqu 64(%esi),%xmm6 ++ pxor %xmm0,%xmm5 ++ movdqu 80(%esi),%xmm1 ++ pxor %xmm0,%xmm6 ++ leal 96(%esi),%esi ++ pxor (%esp),%xmm2 ++ movdqa %xmm7,80(%esp) ++ pxor %xmm1,%xmm7 ++ movups 16(%ebp),%xmm1 ++ pxor 16(%esp),%xmm3 ++ pxor 32(%esp),%xmm4 ++.byte 102,15,56,220,209 ++ pxor 48(%esp),%xmm5 ++ pxor 64(%esp),%xmm6 ++.byte 102,15,56,220,217 ++ pxor %xmm0,%xmm7 ++ movups 32(%ebp),%xmm0 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++ call L_aesni_encrypt6_enter ++ movdqa 80(%esp),%xmm1 ++ pxor %xmm0,%xmm0 ++ xorps (%esp),%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ xorps 16(%esp),%xmm3 ++ movups %xmm2,(%edi) ++ xorps 32(%esp),%xmm4 ++ movups %xmm3,16(%edi) ++ xorps 48(%esp),%xmm5 ++ movups %xmm4,32(%edi) ++ xorps 64(%esp),%xmm6 ++ movups %xmm5,48(%edi) ++ xorps %xmm1,%xmm7 ++ movups %xmm6,64(%edi) ++ pshufd $19,%xmm0,%xmm2 ++ movups %xmm7,80(%edi) ++ leal 96(%edi),%edi ++ movdqa 96(%esp),%xmm3 ++ pxor %xmm0,%xmm0 ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ subl $96,%eax ++ jnc L050xts_enc_loop6 ++ movl 240(%ebp),%ecx ++ movl %ebp,%edx ++ movl %ecx,%ebx ++L049xts_enc_short: ++ addl $96,%eax ++ jz L051xts_enc_done6x ++ movdqa %xmm1,%xmm5 ++ cmpl $32,%eax ++ jb L052xts_enc_one ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ je L053xts_enc_two ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,%xmm6 ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ cmpl $64,%eax ++ jb L054xts_enc_three ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,%xmm7 ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ movdqa %xmm5,(%esp) ++ movdqa %xmm6,16(%esp) ++ je L055xts_enc_four ++ movdqa %xmm7,32(%esp) ++ pshufd $19,%xmm0,%xmm7 ++ movdqa %xmm1,48(%esp) ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm7 ++ pxor %xmm1,%xmm7 ++ movdqu (%esi),%xmm2 ++ movdqu 16(%esi),%xmm3 ++ movdqu 32(%esi),%xmm4 ++ pxor (%esp),%xmm2 ++ movdqu 48(%esi),%xmm5 ++ pxor 16(%esp),%xmm3 ++ movdqu 64(%esi),%xmm6 ++ pxor 32(%esp),%xmm4 ++ leal 80(%esi),%esi ++ pxor 48(%esp),%xmm5 ++ movdqa %xmm7,64(%esp) ++ pxor %xmm7,%xmm6 ++ call __aesni_encrypt6 ++ movaps 64(%esp),%xmm1 ++ xorps (%esp),%xmm2 ++ xorps 16(%esp),%xmm3 ++ xorps 32(%esp),%xmm4 ++ movups %xmm2,(%edi) ++ xorps 48(%esp),%xmm5 ++ movups %xmm3,16(%edi) ++ xorps %xmm1,%xmm6 ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++ movups %xmm6,64(%edi) ++ leal 80(%edi),%edi ++ jmp L056xts_enc_done ++.align 4,0x90 ++L052xts_enc_one: ++ movups (%esi),%xmm2 ++ leal 16(%esi),%esi ++ xorps %xmm5,%xmm2 ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L057enc1_loop_9: ++.byte 102,15,56,220,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L057enc1_loop_9 ++.byte 102,15,56,221,209 ++ xorps %xmm5,%xmm2 ++ movups %xmm2,(%edi) ++ leal 16(%edi),%edi ++ movdqa %xmm5,%xmm1 ++ jmp L056xts_enc_done ++.align 4,0x90 ++L053xts_enc_two: ++ movaps %xmm1,%xmm6 ++ movups (%esi),%xmm2 ++ movups 16(%esi),%xmm3 ++ leal 32(%esi),%esi ++ xorps %xmm5,%xmm2 ++ xorps %xmm6,%xmm3 ++ call __aesni_encrypt2 ++ xorps %xmm5,%xmm2 ++ xorps %xmm6,%xmm3 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ leal 32(%edi),%edi ++ movdqa %xmm6,%xmm1 ++ jmp L056xts_enc_done ++.align 4,0x90 ++L054xts_enc_three: ++ movaps %xmm1,%xmm7 ++ movups (%esi),%xmm2 ++ movups 16(%esi),%xmm3 ++ movups 32(%esi),%xmm4 ++ leal 48(%esi),%esi ++ xorps %xmm5,%xmm2 ++ xorps %xmm6,%xmm3 ++ xorps %xmm7,%xmm4 ++ call __aesni_encrypt3 ++ xorps %xmm5,%xmm2 ++ xorps %xmm6,%xmm3 ++ xorps %xmm7,%xmm4 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ leal 48(%edi),%edi ++ movdqa %xmm7,%xmm1 ++ jmp L056xts_enc_done ++.align 4,0x90 ++L055xts_enc_four: ++ movaps %xmm1,%xmm6 ++ movups (%esi),%xmm2 ++ movups 16(%esi),%xmm3 ++ movups 32(%esi),%xmm4 ++ xorps (%esp),%xmm2 ++ movups 48(%esi),%xmm5 ++ leal 64(%esi),%esi ++ xorps 16(%esp),%xmm3 ++ xorps %xmm7,%xmm4 ++ xorps %xmm6,%xmm5 ++ call __aesni_encrypt4 ++ xorps (%esp),%xmm2 ++ xorps 16(%esp),%xmm3 ++ xorps %xmm7,%xmm4 ++ movups %xmm2,(%edi) ++ xorps %xmm6,%xmm5 ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++ leal 64(%edi),%edi ++ movdqa %xmm6,%xmm1 ++ jmp L056xts_enc_done ++.align 4,0x90 ++L051xts_enc_done6x: ++ movl 112(%esp),%eax ++ andl $15,%eax ++ jz L058xts_enc_ret ++ movdqa %xmm1,%xmm5 ++ movl %eax,112(%esp) ++ jmp L059xts_enc_steal ++.align 4,0x90 ++L056xts_enc_done: ++ movl 112(%esp),%eax ++ pxor %xmm0,%xmm0 ++ andl $15,%eax ++ jz L058xts_enc_ret ++ pcmpgtd %xmm1,%xmm0 ++ movl %eax,112(%esp) ++ pshufd $19,%xmm0,%xmm5 ++ paddq %xmm1,%xmm1 ++ pand 96(%esp),%xmm5 ++ pxor %xmm1,%xmm5 ++L059xts_enc_steal: ++ movzbl (%esi),%ecx ++ movzbl -16(%edi),%edx ++ leal 1(%esi),%esi ++ movb %cl,-16(%edi) ++ movb %dl,(%edi) ++ leal 1(%edi),%edi ++ subl $1,%eax ++ jnz L059xts_enc_steal ++ subl 112(%esp),%edi ++ movl %ebp,%edx ++ movl %ebx,%ecx ++ movups -16(%edi),%xmm2 ++ xorps %xmm5,%xmm2 ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L060enc1_loop_10: ++.byte 102,15,56,220,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L060enc1_loop_10 ++.byte 102,15,56,221,209 ++ xorps %xmm5,%xmm2 ++ movups %xmm2,-16(%edi) ++L058xts_enc_ret: ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ movdqa %xmm0,(%esp) ++ pxor %xmm3,%xmm3 ++ movdqa %xmm0,16(%esp) ++ pxor %xmm4,%xmm4 ++ movdqa %xmm0,32(%esp) ++ pxor %xmm5,%xmm5 ++ movdqa %xmm0,48(%esp) ++ pxor %xmm6,%xmm6 ++ movdqa %xmm0,64(%esp) ++ pxor %xmm7,%xmm7 ++ movdqa %xmm0,80(%esp) ++ movl 116(%esp),%esp ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _aes_hw_xts_decrypt ++.private_extern _aes_hw_xts_decrypt ++.align 4 ++_aes_hw_xts_decrypt: ++L_aes_hw_xts_decrypt_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 36(%esp),%edx ++ movl 40(%esp),%esi ++ movl 240(%edx),%ecx ++ movups (%esi),%xmm2 ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L061enc1_loop_11: ++.byte 102,15,56,220,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L061enc1_loop_11 ++.byte 102,15,56,221,209 ++ movl 20(%esp),%esi ++ movl 24(%esp),%edi ++ movl 28(%esp),%eax ++ movl 32(%esp),%edx ++ movl %esp,%ebp ++ subl $120,%esp ++ andl $-16,%esp ++ xorl %ebx,%ebx ++ testl $15,%eax ++ setnz %bl ++ shll $4,%ebx ++ subl %ebx,%eax ++ movl $135,96(%esp) ++ movl $0,100(%esp) ++ movl $1,104(%esp) ++ movl $0,108(%esp) ++ movl %eax,112(%esp) ++ movl %ebp,116(%esp) ++ movl 240(%edx),%ecx ++ movl %edx,%ebp ++ movl %ecx,%ebx ++ movdqa %xmm2,%xmm1 ++ pxor %xmm0,%xmm0 ++ movdqa 96(%esp),%xmm3 ++ pcmpgtd %xmm1,%xmm0 ++ andl $-16,%eax ++ subl $96,%eax ++ jc L062xts_dec_short ++ shll $4,%ecx ++ movl $16,%ebx ++ subl %ecx,%ebx ++ leal 32(%edx,%ecx,1),%edx ++ jmp L063xts_dec_loop6 ++.align 4,0x90 ++L063xts_dec_loop6: ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,(%esp) ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,16(%esp) ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,32(%esp) ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,48(%esp) ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ pshufd $19,%xmm0,%xmm7 ++ movdqa %xmm1,64(%esp) ++ paddq %xmm1,%xmm1 ++ movups (%ebp),%xmm0 ++ pand %xmm3,%xmm7 ++ movups (%esi),%xmm2 ++ pxor %xmm1,%xmm7 ++ movl %ebx,%ecx ++ movdqu 16(%esi),%xmm3 ++ xorps %xmm0,%xmm2 ++ movdqu 32(%esi),%xmm4 ++ pxor %xmm0,%xmm3 ++ movdqu 48(%esi),%xmm5 ++ pxor %xmm0,%xmm4 ++ movdqu 64(%esi),%xmm6 ++ pxor %xmm0,%xmm5 ++ movdqu 80(%esi),%xmm1 ++ pxor %xmm0,%xmm6 ++ leal 96(%esi),%esi ++ pxor (%esp),%xmm2 ++ movdqa %xmm7,80(%esp) ++ pxor %xmm1,%xmm7 ++ movups 16(%ebp),%xmm1 ++ pxor 16(%esp),%xmm3 ++ pxor 32(%esp),%xmm4 ++.byte 102,15,56,222,209 ++ pxor 48(%esp),%xmm5 ++ pxor 64(%esp),%xmm6 ++.byte 102,15,56,222,217 ++ pxor %xmm0,%xmm7 ++ movups 32(%ebp),%xmm0 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++ call L_aesni_decrypt6_enter ++ movdqa 80(%esp),%xmm1 ++ pxor %xmm0,%xmm0 ++ xorps (%esp),%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ xorps 16(%esp),%xmm3 ++ movups %xmm2,(%edi) ++ xorps 32(%esp),%xmm4 ++ movups %xmm3,16(%edi) ++ xorps 48(%esp),%xmm5 ++ movups %xmm4,32(%edi) ++ xorps 64(%esp),%xmm6 ++ movups %xmm5,48(%edi) ++ xorps %xmm1,%xmm7 ++ movups %xmm6,64(%edi) ++ pshufd $19,%xmm0,%xmm2 ++ movups %xmm7,80(%edi) ++ leal 96(%edi),%edi ++ movdqa 96(%esp),%xmm3 ++ pxor %xmm0,%xmm0 ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ subl $96,%eax ++ jnc L063xts_dec_loop6 ++ movl 240(%ebp),%ecx ++ movl %ebp,%edx ++ movl %ecx,%ebx ++L062xts_dec_short: ++ addl $96,%eax ++ jz L064xts_dec_done6x ++ movdqa %xmm1,%xmm5 ++ cmpl $32,%eax ++ jb L065xts_dec_one ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ je L066xts_dec_two ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,%xmm6 ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ cmpl $64,%eax ++ jb L067xts_dec_three ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa %xmm1,%xmm7 ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++ movdqa %xmm5,(%esp) ++ movdqa %xmm6,16(%esp) ++ je L068xts_dec_four ++ movdqa %xmm7,32(%esp) ++ pshufd $19,%xmm0,%xmm7 ++ movdqa %xmm1,48(%esp) ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm7 ++ pxor %xmm1,%xmm7 ++ movdqu (%esi),%xmm2 ++ movdqu 16(%esi),%xmm3 ++ movdqu 32(%esi),%xmm4 ++ pxor (%esp),%xmm2 ++ movdqu 48(%esi),%xmm5 ++ pxor 16(%esp),%xmm3 ++ movdqu 64(%esi),%xmm6 ++ pxor 32(%esp),%xmm4 ++ leal 80(%esi),%esi ++ pxor 48(%esp),%xmm5 ++ movdqa %xmm7,64(%esp) ++ pxor %xmm7,%xmm6 ++ call __aesni_decrypt6 ++ movaps 64(%esp),%xmm1 ++ xorps (%esp),%xmm2 ++ xorps 16(%esp),%xmm3 ++ xorps 32(%esp),%xmm4 ++ movups %xmm2,(%edi) ++ xorps 48(%esp),%xmm5 ++ movups %xmm3,16(%edi) ++ xorps %xmm1,%xmm6 ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++ movups %xmm6,64(%edi) ++ leal 80(%edi),%edi ++ jmp L069xts_dec_done ++.align 4,0x90 ++L065xts_dec_one: ++ movups (%esi),%xmm2 ++ leal 16(%esi),%esi ++ xorps %xmm5,%xmm2 ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L070dec1_loop_12: ++.byte 102,15,56,222,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L070dec1_loop_12 ++.byte 102,15,56,223,209 ++ xorps %xmm5,%xmm2 ++ movups %xmm2,(%edi) ++ leal 16(%edi),%edi ++ movdqa %xmm5,%xmm1 ++ jmp L069xts_dec_done ++.align 4,0x90 ++L066xts_dec_two: ++ movaps %xmm1,%xmm6 ++ movups (%esi),%xmm2 ++ movups 16(%esi),%xmm3 ++ leal 32(%esi),%esi ++ xorps %xmm5,%xmm2 ++ xorps %xmm6,%xmm3 ++ call __aesni_decrypt2 ++ xorps %xmm5,%xmm2 ++ xorps %xmm6,%xmm3 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ leal 32(%edi),%edi ++ movdqa %xmm6,%xmm1 ++ jmp L069xts_dec_done ++.align 4,0x90 ++L067xts_dec_three: ++ movaps %xmm1,%xmm7 ++ movups (%esi),%xmm2 ++ movups 16(%esi),%xmm3 ++ movups 32(%esi),%xmm4 ++ leal 48(%esi),%esi ++ xorps %xmm5,%xmm2 ++ xorps %xmm6,%xmm3 ++ xorps %xmm7,%xmm4 ++ call __aesni_decrypt3 ++ xorps %xmm5,%xmm2 ++ xorps %xmm6,%xmm3 ++ xorps %xmm7,%xmm4 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ leal 48(%edi),%edi ++ movdqa %xmm7,%xmm1 ++ jmp L069xts_dec_done ++.align 4,0x90 ++L068xts_dec_four: ++ movaps %xmm1,%xmm6 ++ movups (%esi),%xmm2 ++ movups 16(%esi),%xmm3 ++ movups 32(%esi),%xmm4 ++ xorps (%esp),%xmm2 ++ movups 48(%esi),%xmm5 ++ leal 64(%esi),%esi ++ xorps 16(%esp),%xmm3 ++ xorps %xmm7,%xmm4 ++ xorps %xmm6,%xmm5 ++ call __aesni_decrypt4 ++ xorps (%esp),%xmm2 ++ xorps 16(%esp),%xmm3 ++ xorps %xmm7,%xmm4 ++ movups %xmm2,(%edi) ++ xorps %xmm6,%xmm5 ++ movups %xmm3,16(%edi) ++ movups %xmm4,32(%edi) ++ movups %xmm5,48(%edi) ++ leal 64(%edi),%edi ++ movdqa %xmm6,%xmm1 ++ jmp L069xts_dec_done ++.align 4,0x90 ++L064xts_dec_done6x: ++ movl 112(%esp),%eax ++ andl $15,%eax ++ jz L071xts_dec_ret ++ movl %eax,112(%esp) ++ jmp L072xts_dec_only_one_more ++.align 4,0x90 ++L069xts_dec_done: ++ movl 112(%esp),%eax ++ pxor %xmm0,%xmm0 ++ andl $15,%eax ++ jz L071xts_dec_ret ++ pcmpgtd %xmm1,%xmm0 ++ movl %eax,112(%esp) ++ pshufd $19,%xmm0,%xmm2 ++ pxor %xmm0,%xmm0 ++ movdqa 96(%esp),%xmm3 ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm2 ++ pcmpgtd %xmm1,%xmm0 ++ pxor %xmm2,%xmm1 ++L072xts_dec_only_one_more: ++ pshufd $19,%xmm0,%xmm5 ++ movdqa %xmm1,%xmm6 ++ paddq %xmm1,%xmm1 ++ pand %xmm3,%xmm5 ++ pxor %xmm1,%xmm5 ++ movl %ebp,%edx ++ movl %ebx,%ecx ++ movups (%esi),%xmm2 ++ xorps %xmm5,%xmm2 ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L073dec1_loop_13: ++.byte 102,15,56,222,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L073dec1_loop_13 ++.byte 102,15,56,223,209 ++ xorps %xmm5,%xmm2 ++ movups %xmm2,(%edi) ++L074xts_dec_steal: ++ movzbl 16(%esi),%ecx ++ movzbl (%edi),%edx ++ leal 1(%esi),%esi ++ movb %cl,(%edi) ++ movb %dl,16(%edi) ++ leal 1(%edi),%edi ++ subl $1,%eax ++ jnz L074xts_dec_steal ++ subl 112(%esp),%edi ++ movl %ebp,%edx ++ movl %ebx,%ecx ++ movups (%edi),%xmm2 ++ xorps %xmm6,%xmm2 ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L075dec1_loop_14: ++.byte 102,15,56,222,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L075dec1_loop_14 ++.byte 102,15,56,223,209 ++ xorps %xmm6,%xmm2 ++ movups %xmm2,(%edi) ++L071xts_dec_ret: ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ movdqa %xmm0,(%esp) ++ pxor %xmm3,%xmm3 ++ movdqa %xmm0,16(%esp) ++ pxor %xmm4,%xmm4 ++ movdqa %xmm0,32(%esp) ++ pxor %xmm5,%xmm5 ++ movdqa %xmm0,48(%esp) ++ pxor %xmm6,%xmm6 ++ movdqa %xmm0,64(%esp) ++ pxor %xmm7,%xmm7 ++ movdqa %xmm0,80(%esp) ++ movl 116(%esp),%esp ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _aes_hw_cbc_encrypt ++.private_extern _aes_hw_cbc_encrypt ++.align 4 ++_aes_hw_cbc_encrypt: ++L_aes_hw_cbc_encrypt_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 20(%esp),%esi ++ movl %esp,%ebx ++ movl 24(%esp),%edi ++ subl $24,%ebx ++ movl 28(%esp),%eax ++ andl $-16,%ebx ++ movl 32(%esp),%edx ++ movl 36(%esp),%ebp ++ testl %eax,%eax ++ jz L076cbc_abort ++ cmpl $0,40(%esp) ++ xchgl %esp,%ebx ++ movups (%ebp),%xmm7 ++ movl 240(%edx),%ecx ++ movl %edx,%ebp ++ movl %ebx,16(%esp) ++ movl %ecx,%ebx ++ je L077cbc_decrypt ++ movaps %xmm7,%xmm2 ++ cmpl $16,%eax ++ jb L078cbc_enc_tail ++ subl $16,%eax ++ jmp L079cbc_enc_loop ++.align 4,0x90 ++L079cbc_enc_loop: ++ movups (%esi),%xmm7 ++ leal 16(%esi),%esi ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ xorps %xmm0,%xmm7 ++ leal 32(%edx),%edx ++ xorps %xmm7,%xmm2 ++L080enc1_loop_15: ++.byte 102,15,56,220,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L080enc1_loop_15 ++.byte 102,15,56,221,209 ++ movl %ebx,%ecx ++ movl %ebp,%edx ++ movups %xmm2,(%edi) ++ leal 16(%edi),%edi ++ subl $16,%eax ++ jnc L079cbc_enc_loop ++ addl $16,%eax ++ jnz L078cbc_enc_tail ++ movaps %xmm2,%xmm7 ++ pxor %xmm2,%xmm2 ++ jmp L081cbc_ret ++L078cbc_enc_tail: ++ movl %eax,%ecx ++.long 2767451785 ++ movl $16,%ecx ++ subl %eax,%ecx ++ xorl %eax,%eax ++.long 2868115081 ++ leal -16(%edi),%edi ++ movl %ebx,%ecx ++ movl %edi,%esi ++ movl %ebp,%edx ++ jmp L079cbc_enc_loop ++.align 4,0x90 ++L077cbc_decrypt: ++ cmpl $80,%eax ++ jbe L082cbc_dec_tail ++ movaps %xmm7,(%esp) ++ subl $80,%eax ++ jmp L083cbc_dec_loop6_enter ++.align 4,0x90 ++L084cbc_dec_loop6: ++ movaps %xmm0,(%esp) ++ movups %xmm7,(%edi) ++ leal 16(%edi),%edi ++L083cbc_dec_loop6_enter: ++ movdqu (%esi),%xmm2 ++ movdqu 16(%esi),%xmm3 ++ movdqu 32(%esi),%xmm4 ++ movdqu 48(%esi),%xmm5 ++ movdqu 64(%esi),%xmm6 ++ movdqu 80(%esi),%xmm7 ++ call __aesni_decrypt6 ++ movups (%esi),%xmm1 ++ movups 16(%esi),%xmm0 ++ xorps (%esp),%xmm2 ++ xorps %xmm1,%xmm3 ++ movups 32(%esi),%xmm1 ++ xorps %xmm0,%xmm4 ++ movups 48(%esi),%xmm0 ++ xorps %xmm1,%xmm5 ++ movups 64(%esi),%xmm1 ++ xorps %xmm0,%xmm6 ++ movups 80(%esi),%xmm0 ++ xorps %xmm1,%xmm7 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ leal 96(%esi),%esi ++ movups %xmm4,32(%edi) ++ movl %ebx,%ecx ++ movups %xmm5,48(%edi) ++ movl %ebp,%edx ++ movups %xmm6,64(%edi) ++ leal 80(%edi),%edi ++ subl $96,%eax ++ ja L084cbc_dec_loop6 ++ movaps %xmm7,%xmm2 ++ movaps %xmm0,%xmm7 ++ addl $80,%eax ++ jle L085cbc_dec_clear_tail_collected ++ movups %xmm2,(%edi) ++ leal 16(%edi),%edi ++L082cbc_dec_tail: ++ movups (%esi),%xmm2 ++ movaps %xmm2,%xmm6 ++ cmpl $16,%eax ++ jbe L086cbc_dec_one ++ movups 16(%esi),%xmm3 ++ movaps %xmm3,%xmm5 ++ cmpl $32,%eax ++ jbe L087cbc_dec_two ++ movups 32(%esi),%xmm4 ++ cmpl $48,%eax ++ jbe L088cbc_dec_three ++ movups 48(%esi),%xmm5 ++ cmpl $64,%eax ++ jbe L089cbc_dec_four ++ movups 64(%esi),%xmm6 ++ movaps %xmm7,(%esp) ++ movups (%esi),%xmm2 ++ xorps %xmm7,%xmm7 ++ call __aesni_decrypt6 ++ movups (%esi),%xmm1 ++ movups 16(%esi),%xmm0 ++ xorps (%esp),%xmm2 ++ xorps %xmm1,%xmm3 ++ movups 32(%esi),%xmm1 ++ xorps %xmm0,%xmm4 ++ movups 48(%esi),%xmm0 ++ xorps %xmm1,%xmm5 ++ movups 64(%esi),%xmm7 ++ xorps %xmm0,%xmm6 ++ movups %xmm2,(%edi) ++ movups %xmm3,16(%edi) ++ pxor %xmm3,%xmm3 ++ movups %xmm4,32(%edi) ++ pxor %xmm4,%xmm4 ++ movups %xmm5,48(%edi) ++ pxor %xmm5,%xmm5 ++ leal 64(%edi),%edi ++ movaps %xmm6,%xmm2 ++ pxor %xmm6,%xmm6 ++ subl $80,%eax ++ jmp L090cbc_dec_tail_collected ++.align 4,0x90 ++L086cbc_dec_one: ++ movups (%edx),%xmm0 ++ movups 16(%edx),%xmm1 ++ leal 32(%edx),%edx ++ xorps %xmm0,%xmm2 ++L091dec1_loop_16: ++.byte 102,15,56,222,209 ++ decl %ecx ++ movups (%edx),%xmm1 ++ leal 16(%edx),%edx ++ jnz L091dec1_loop_16 ++.byte 102,15,56,223,209 ++ xorps %xmm7,%xmm2 ++ movaps %xmm6,%xmm7 ++ subl $16,%eax ++ jmp L090cbc_dec_tail_collected ++.align 4,0x90 ++L087cbc_dec_two: ++ call __aesni_decrypt2 ++ xorps %xmm7,%xmm2 ++ xorps %xmm6,%xmm3 ++ movups %xmm2,(%edi) ++ movaps %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ leal 16(%edi),%edi ++ movaps %xmm5,%xmm7 ++ subl $32,%eax ++ jmp L090cbc_dec_tail_collected ++.align 4,0x90 ++L088cbc_dec_three: ++ call __aesni_decrypt3 ++ xorps %xmm7,%xmm2 ++ xorps %xmm6,%xmm3 ++ xorps %xmm5,%xmm4 ++ movups %xmm2,(%edi) ++ movaps %xmm4,%xmm2 ++ pxor %xmm4,%xmm4 ++ movups %xmm3,16(%edi) ++ pxor %xmm3,%xmm3 ++ leal 32(%edi),%edi ++ movups 32(%esi),%xmm7 ++ subl $48,%eax ++ jmp L090cbc_dec_tail_collected ++.align 4,0x90 ++L089cbc_dec_four: ++ call __aesni_decrypt4 ++ movups 16(%esi),%xmm1 ++ movups 32(%esi),%xmm0 ++ xorps %xmm7,%xmm2 ++ movups 48(%esi),%xmm7 ++ xorps %xmm6,%xmm3 ++ movups %xmm2,(%edi) ++ xorps %xmm1,%xmm4 ++ movups %xmm3,16(%edi) ++ pxor %xmm3,%xmm3 ++ xorps %xmm0,%xmm5 ++ movups %xmm4,32(%edi) ++ pxor %xmm4,%xmm4 ++ leal 48(%edi),%edi ++ movaps %xmm5,%xmm2 ++ pxor %xmm5,%xmm5 ++ subl $64,%eax ++ jmp L090cbc_dec_tail_collected ++.align 4,0x90 ++L085cbc_dec_clear_tail_collected: ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++L090cbc_dec_tail_collected: ++ andl $15,%eax ++ jnz L092cbc_dec_tail_partial ++ movups %xmm2,(%edi) ++ pxor %xmm0,%xmm0 ++ jmp L081cbc_ret ++.align 4,0x90 ++L092cbc_dec_tail_partial: ++ movaps %xmm2,(%esp) ++ pxor %xmm0,%xmm0 ++ movl $16,%ecx ++ movl %esp,%esi ++ subl %eax,%ecx ++.long 2767451785 ++ movdqa %xmm2,(%esp) ++L081cbc_ret: ++ movl 16(%esp),%esp ++ movl 36(%esp),%ebp ++ pxor %xmm2,%xmm2 ++ pxor %xmm1,%xmm1 ++ movups %xmm7,(%ebp) ++ pxor %xmm7,%xmm7 ++L076cbc_abort: ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.private_extern __aesni_set_encrypt_key ++.align 4 ++__aesni_set_encrypt_key: ++ pushl %ebp ++ pushl %ebx ++ testl %eax,%eax ++ jz L093bad_pointer ++ testl %edx,%edx ++ jz L093bad_pointer ++ call L094pic ++L094pic: ++ popl %ebx ++ leal Lkey_const-L094pic(%ebx),%ebx ++ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp ++ movups (%eax),%xmm0 ++ xorps %xmm4,%xmm4 ++ movl 4(%ebp),%ebp ++ leal 16(%edx),%edx ++ andl $268437504,%ebp ++ cmpl $256,%ecx ++ je L09514rounds ++ cmpl $192,%ecx ++ je L09612rounds ++ cmpl $128,%ecx ++ jne L097bad_keybits ++.align 4,0x90 ++L09810rounds: ++ cmpl $268435456,%ebp ++ je L09910rounds_alt ++ movl $9,%ecx ++ movups %xmm0,-16(%edx) ++.byte 102,15,58,223,200,1 ++ call L100key_128_cold ++.byte 102,15,58,223,200,2 ++ call L101key_128 ++.byte 102,15,58,223,200,4 ++ call L101key_128 ++.byte 102,15,58,223,200,8 ++ call L101key_128 ++.byte 102,15,58,223,200,16 ++ call L101key_128 ++.byte 102,15,58,223,200,32 ++ call L101key_128 ++.byte 102,15,58,223,200,64 ++ call L101key_128 ++.byte 102,15,58,223,200,128 ++ call L101key_128 ++.byte 102,15,58,223,200,27 ++ call L101key_128 ++.byte 102,15,58,223,200,54 ++ call L101key_128 ++ movups %xmm0,(%edx) ++ movl %ecx,80(%edx) ++ jmp L102good_key ++.align 4,0x90 ++L101key_128: ++ movups %xmm0,(%edx) ++ leal 16(%edx),%edx ++L100key_128_cold: ++ shufps $16,%xmm0,%xmm4 ++ xorps %xmm4,%xmm0 ++ shufps $140,%xmm0,%xmm4 ++ xorps %xmm4,%xmm0 ++ shufps $255,%xmm1,%xmm1 ++ xorps %xmm1,%xmm0 ++ ret ++.align 4,0x90 ++L09910rounds_alt: ++ movdqa (%ebx),%xmm5 ++ movl $8,%ecx ++ movdqa 32(%ebx),%xmm4 ++ movdqa %xmm0,%xmm2 ++ movdqu %xmm0,-16(%edx) ++L103loop_key128: ++.byte 102,15,56,0,197 ++.byte 102,15,56,221,196 ++ pslld $1,%xmm4 ++ leal 16(%edx),%edx ++ movdqa %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm3,%xmm2 ++ pxor %xmm2,%xmm0 ++ movdqu %xmm0,-16(%edx) ++ movdqa %xmm0,%xmm2 ++ decl %ecx ++ jnz L103loop_key128 ++ movdqa 48(%ebx),%xmm4 ++.byte 102,15,56,0,197 ++.byte 102,15,56,221,196 ++ pslld $1,%xmm4 ++ movdqa %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm3,%xmm2 ++ pxor %xmm2,%xmm0 ++ movdqu %xmm0,(%edx) ++ movdqa %xmm0,%xmm2 ++.byte 102,15,56,0,197 ++.byte 102,15,56,221,196 ++ movdqa %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm3,%xmm2 ++ pxor %xmm2,%xmm0 ++ movdqu %xmm0,16(%edx) ++ movl $9,%ecx ++ movl %ecx,96(%edx) ++ jmp L102good_key ++.align 4,0x90 ++L09612rounds: ++ movq 16(%eax),%xmm2 ++ cmpl $268435456,%ebp ++ je L10412rounds_alt ++ movl $11,%ecx ++ movups %xmm0,-16(%edx) ++.byte 102,15,58,223,202,1 ++ call L105key_192a_cold ++.byte 102,15,58,223,202,2 ++ call L106key_192b ++.byte 102,15,58,223,202,4 ++ call L107key_192a ++.byte 102,15,58,223,202,8 ++ call L106key_192b ++.byte 102,15,58,223,202,16 ++ call L107key_192a ++.byte 102,15,58,223,202,32 ++ call L106key_192b ++.byte 102,15,58,223,202,64 ++ call L107key_192a ++.byte 102,15,58,223,202,128 ++ call L106key_192b ++ movups %xmm0,(%edx) ++ movl %ecx,48(%edx) ++ jmp L102good_key ++.align 4,0x90 ++L107key_192a: ++ movups %xmm0,(%edx) ++ leal 16(%edx),%edx ++.align 4,0x90 ++L105key_192a_cold: ++ movaps %xmm2,%xmm5 ++L108key_192b_warm: ++ shufps $16,%xmm0,%xmm4 ++ movdqa %xmm2,%xmm3 ++ xorps %xmm4,%xmm0 ++ shufps $140,%xmm0,%xmm4 ++ pslldq $4,%xmm3 ++ xorps %xmm4,%xmm0 ++ pshufd $85,%xmm1,%xmm1 ++ pxor %xmm3,%xmm2 ++ pxor %xmm1,%xmm0 ++ pshufd $255,%xmm0,%xmm3 ++ pxor %xmm3,%xmm2 ++ ret ++.align 4,0x90 ++L106key_192b: ++ movaps %xmm0,%xmm3 ++ shufps $68,%xmm0,%xmm5 ++ movups %xmm5,(%edx) ++ shufps $78,%xmm2,%xmm3 ++ movups %xmm3,16(%edx) ++ leal 32(%edx),%edx ++ jmp L108key_192b_warm ++.align 4,0x90 ++L10412rounds_alt: ++ movdqa 16(%ebx),%xmm5 ++ movdqa 32(%ebx),%xmm4 ++ movl $8,%ecx ++ movdqu %xmm0,-16(%edx) ++L109loop_key192: ++ movq %xmm2,(%edx) ++ movdqa %xmm2,%xmm1 ++.byte 102,15,56,0,213 ++.byte 102,15,56,221,212 ++ pslld $1,%xmm4 ++ leal 24(%edx),%edx ++ movdqa %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm3,%xmm0 ++ pshufd $255,%xmm0,%xmm3 ++ pxor %xmm1,%xmm3 ++ pslldq $4,%xmm1 ++ pxor %xmm1,%xmm3 ++ pxor %xmm2,%xmm0 ++ pxor %xmm3,%xmm2 ++ movdqu %xmm0,-16(%edx) ++ decl %ecx ++ jnz L109loop_key192 ++ movl $11,%ecx ++ movl %ecx,32(%edx) ++ jmp L102good_key ++.align 4,0x90 ++L09514rounds: ++ movups 16(%eax),%xmm2 ++ leal 16(%edx),%edx ++ cmpl $268435456,%ebp ++ je L11014rounds_alt ++ movl $13,%ecx ++ movups %xmm0,-32(%edx) ++ movups %xmm2,-16(%edx) ++.byte 102,15,58,223,202,1 ++ call L111key_256a_cold ++.byte 102,15,58,223,200,1 ++ call L112key_256b ++.byte 102,15,58,223,202,2 ++ call L113key_256a ++.byte 102,15,58,223,200,2 ++ call L112key_256b ++.byte 102,15,58,223,202,4 ++ call L113key_256a ++.byte 102,15,58,223,200,4 ++ call L112key_256b ++.byte 102,15,58,223,202,8 ++ call L113key_256a ++.byte 102,15,58,223,200,8 ++ call L112key_256b ++.byte 102,15,58,223,202,16 ++ call L113key_256a ++.byte 102,15,58,223,200,16 ++ call L112key_256b ++.byte 102,15,58,223,202,32 ++ call L113key_256a ++.byte 102,15,58,223,200,32 ++ call L112key_256b ++.byte 102,15,58,223,202,64 ++ call L113key_256a ++ movups %xmm0,(%edx) ++ movl %ecx,16(%edx) ++ xorl %eax,%eax ++ jmp L102good_key ++.align 4,0x90 ++L113key_256a: ++ movups %xmm2,(%edx) ++ leal 16(%edx),%edx ++L111key_256a_cold: ++ shufps $16,%xmm0,%xmm4 ++ xorps %xmm4,%xmm0 ++ shufps $140,%xmm0,%xmm4 ++ xorps %xmm4,%xmm0 ++ shufps $255,%xmm1,%xmm1 ++ xorps %xmm1,%xmm0 ++ ret ++.align 4,0x90 ++L112key_256b: ++ movups %xmm0,(%edx) ++ leal 16(%edx),%edx ++ shufps $16,%xmm2,%xmm4 ++ xorps %xmm4,%xmm2 ++ shufps $140,%xmm2,%xmm4 ++ xorps %xmm4,%xmm2 ++ shufps $170,%xmm1,%xmm1 ++ xorps %xmm1,%xmm2 ++ ret ++.align 4,0x90 ++L11014rounds_alt: ++ movdqa (%ebx),%xmm5 ++ movdqa 32(%ebx),%xmm4 ++ movl $7,%ecx ++ movdqu %xmm0,-32(%edx) ++ movdqa %xmm2,%xmm1 ++ movdqu %xmm2,-16(%edx) ++L114loop_key256: ++.byte 102,15,56,0,213 ++.byte 102,15,56,221,212 ++ movdqa %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm3,%xmm0 ++ pslld $1,%xmm4 ++ pxor %xmm2,%xmm0 ++ movdqu %xmm0,(%edx) ++ decl %ecx ++ jz L115done_key256 ++ pshufd $255,%xmm0,%xmm2 ++ pxor %xmm3,%xmm3 ++.byte 102,15,56,221,211 ++ movdqa %xmm1,%xmm3 ++ pslldq $4,%xmm1 ++ pxor %xmm1,%xmm3 ++ pslldq $4,%xmm1 ++ pxor %xmm1,%xmm3 ++ pslldq $4,%xmm1 ++ pxor %xmm3,%xmm1 ++ pxor %xmm1,%xmm2 ++ movdqu %xmm2,16(%edx) ++ leal 32(%edx),%edx ++ movdqa %xmm2,%xmm1 ++ jmp L114loop_key256 ++L115done_key256: ++ movl $13,%ecx ++ movl %ecx,16(%edx) ++L102good_key: ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ xorl %eax,%eax ++ popl %ebx ++ popl %ebp ++ ret ++.align 2,0x90 ++L093bad_pointer: ++ movl $-1,%eax ++ popl %ebx ++ popl %ebp ++ ret ++.align 2,0x90 ++L097bad_keybits: ++ pxor %xmm0,%xmm0 ++ movl $-2,%eax ++ popl %ebx ++ popl %ebp ++ ret ++.globl _aes_hw_set_encrypt_key ++.private_extern _aes_hw_set_encrypt_key ++.align 4 ++_aes_hw_set_encrypt_key: ++L_aes_hw_set_encrypt_key_begin: ++#ifdef BORINGSSL_DISPATCH_TEST ++ pushl %ebx ++ pushl %edx ++ call L116pic ++L116pic: ++ popl %ebx ++ leal _BORINGSSL_function_hit+3-L116pic(%ebx),%ebx ++ movl $1,%edx ++ movb %dl,(%ebx) ++ popl %edx ++ popl %ebx ++#endif ++ movl 4(%esp),%eax ++ movl 8(%esp),%ecx ++ movl 12(%esp),%edx ++ call __aesni_set_encrypt_key ++ ret ++.globl _aes_hw_set_decrypt_key ++.private_extern _aes_hw_set_decrypt_key ++.align 4 ++_aes_hw_set_decrypt_key: ++L_aes_hw_set_decrypt_key_begin: ++ movl 4(%esp),%eax ++ movl 8(%esp),%ecx ++ movl 12(%esp),%edx ++ call __aesni_set_encrypt_key ++ movl 12(%esp),%edx ++ shll $4,%ecx ++ testl %eax,%eax ++ jnz L117dec_key_ret ++ leal 16(%edx,%ecx,1),%eax ++ movups (%edx),%xmm0 ++ movups (%eax),%xmm1 ++ movups %xmm0,(%eax) ++ movups %xmm1,(%edx) ++ leal 16(%edx),%edx ++ leal -16(%eax),%eax ++L118dec_key_inverse: ++ movups (%edx),%xmm0 ++ movups (%eax),%xmm1 ++.byte 102,15,56,219,192 ++.byte 102,15,56,219,201 ++ leal 16(%edx),%edx ++ leal -16(%eax),%eax ++ movups %xmm0,16(%eax) ++ movups %xmm1,-16(%edx) ++ cmpl %edx,%eax ++ ja L118dec_key_inverse ++ movups (%edx),%xmm0 ++.byte 102,15,56,219,192 ++ movups %xmm0,(%edx) ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ xorl %eax,%eax ++L117dec_key_ret: ++ ret ++.align 6,0x90 ++Lkey_const: ++.long 202313229,202313229,202313229,202313229 ++.long 67569157,67569157,67569157,67569157 ++.long 1,1,1,1 ++.long 27,27,27,27 ++.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 ++.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 ++.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 ++.byte 115,108,46,111,114,103,62,0 ++.section __IMPORT,__pointers,non_lazy_symbol_pointers ++L_OPENSSL_ia32cap_P$non_lazy_ptr: ++.indirect_symbol _OPENSSL_ia32cap_P ++.long 0 ++#endif +diff --git a/apple-x86/crypto/fipsmodule/bn-586.S b/apple-x86/crypto/fipsmodule/bn-586.S +new file mode 100644 +index 0000000..ede2e76 +--- /dev/null ++++ b/apple-x86/crypto/fipsmodule/bn-586.S +@@ -0,0 +1,988 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.globl _bn_mul_add_words ++.private_extern _bn_mul_add_words ++.align 4 ++_bn_mul_add_words: ++L_bn_mul_add_words_begin: ++ call L000PIC_me_up ++L000PIC_me_up: ++ popl %eax ++ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000PIC_me_up(%eax),%eax ++ btl $26,(%eax) ++ jnc L001maw_non_sse2 ++ movl 4(%esp),%eax ++ movl 8(%esp),%edx ++ movl 12(%esp),%ecx ++ movd 16(%esp),%mm0 ++ pxor %mm1,%mm1 ++ jmp L002maw_sse2_entry ++.align 4,0x90 ++L003maw_sse2_unrolled: ++ movd (%eax),%mm3 ++ paddq %mm3,%mm1 ++ movd (%edx),%mm2 ++ pmuludq %mm0,%mm2 ++ movd 4(%edx),%mm4 ++ pmuludq %mm0,%mm4 ++ movd 8(%edx),%mm6 ++ pmuludq %mm0,%mm6 ++ movd 12(%edx),%mm7 ++ pmuludq %mm0,%mm7 ++ paddq %mm2,%mm1 ++ movd 4(%eax),%mm3 ++ paddq %mm4,%mm3 ++ movd 8(%eax),%mm5 ++ paddq %mm6,%mm5 ++ movd 12(%eax),%mm4 ++ paddq %mm4,%mm7 ++ movd %mm1,(%eax) ++ movd 16(%edx),%mm2 ++ pmuludq %mm0,%mm2 ++ psrlq $32,%mm1 ++ movd 20(%edx),%mm4 ++ pmuludq %mm0,%mm4 ++ paddq %mm3,%mm1 ++ movd 24(%edx),%mm6 ++ pmuludq %mm0,%mm6 ++ movd %mm1,4(%eax) ++ psrlq $32,%mm1 ++ movd 28(%edx),%mm3 ++ addl $32,%edx ++ pmuludq %mm0,%mm3 ++ paddq %mm5,%mm1 ++ movd 16(%eax),%mm5 ++ paddq %mm5,%mm2 ++ movd %mm1,8(%eax) ++ psrlq $32,%mm1 ++ paddq %mm7,%mm1 ++ movd 20(%eax),%mm5 ++ paddq %mm5,%mm4 ++ movd %mm1,12(%eax) ++ psrlq $32,%mm1 ++ paddq %mm2,%mm1 ++ movd 24(%eax),%mm5 ++ paddq %mm5,%mm6 ++ movd %mm1,16(%eax) ++ psrlq $32,%mm1 ++ paddq %mm4,%mm1 ++ movd 28(%eax),%mm5 ++ paddq %mm5,%mm3 ++ movd %mm1,20(%eax) ++ psrlq $32,%mm1 ++ paddq %mm6,%mm1 ++ movd %mm1,24(%eax) ++ psrlq $32,%mm1 ++ paddq %mm3,%mm1 ++ movd %mm1,28(%eax) ++ leal 32(%eax),%eax ++ psrlq $32,%mm1 ++ subl $8,%ecx ++ jz L004maw_sse2_exit ++L002maw_sse2_entry: ++ testl $4294967288,%ecx ++ jnz L003maw_sse2_unrolled ++.align 2,0x90 ++L005maw_sse2_loop: ++ movd (%edx),%mm2 ++ movd (%eax),%mm3 ++ pmuludq %mm0,%mm2 ++ leal 4(%edx),%edx ++ paddq %mm3,%mm1 ++ paddq %mm2,%mm1 ++ movd %mm1,(%eax) ++ subl $1,%ecx ++ psrlq $32,%mm1 ++ leal 4(%eax),%eax ++ jnz L005maw_sse2_loop ++L004maw_sse2_exit: ++ movd %mm1,%eax ++ emms ++ ret ++.align 4,0x90 ++L001maw_non_sse2: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ ++ xorl %esi,%esi ++ movl 20(%esp),%edi ++ movl 28(%esp),%ecx ++ movl 24(%esp),%ebx ++ andl $4294967288,%ecx ++ movl 32(%esp),%ebp ++ pushl %ecx ++ jz L006maw_finish ++.align 4,0x90 ++L007maw_loop: ++ # Round 0 ++ movl (%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl (%edi),%eax ++ adcl $0,%edx ++ movl %eax,(%edi) ++ movl %edx,%esi ++ # Round 4 ++ movl 4(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 4(%edi),%eax ++ adcl $0,%edx ++ movl %eax,4(%edi) ++ movl %edx,%esi ++ # Round 8 ++ movl 8(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 8(%edi),%eax ++ adcl $0,%edx ++ movl %eax,8(%edi) ++ movl %edx,%esi ++ # Round 12 ++ movl 12(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 12(%edi),%eax ++ adcl $0,%edx ++ movl %eax,12(%edi) ++ movl %edx,%esi ++ # Round 16 ++ movl 16(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 16(%edi),%eax ++ adcl $0,%edx ++ movl %eax,16(%edi) ++ movl %edx,%esi ++ # Round 20 ++ movl 20(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 20(%edi),%eax ++ adcl $0,%edx ++ movl %eax,20(%edi) ++ movl %edx,%esi ++ # Round 24 ++ movl 24(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 24(%edi),%eax ++ adcl $0,%edx ++ movl %eax,24(%edi) ++ movl %edx,%esi ++ # Round 28 ++ movl 28(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 28(%edi),%eax ++ adcl $0,%edx ++ movl %eax,28(%edi) ++ movl %edx,%esi ++ ++ subl $8,%ecx ++ leal 32(%ebx),%ebx ++ leal 32(%edi),%edi ++ jnz L007maw_loop ++L006maw_finish: ++ movl 32(%esp),%ecx ++ andl $7,%ecx ++ jnz L008maw_finish2 ++ jmp L009maw_end ++L008maw_finish2: ++ # Tail Round 0 ++ movl (%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl (%edi),%eax ++ adcl $0,%edx ++ decl %ecx ++ movl %eax,(%edi) ++ movl %edx,%esi ++ jz L009maw_end ++ # Tail Round 1 ++ movl 4(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 4(%edi),%eax ++ adcl $0,%edx ++ decl %ecx ++ movl %eax,4(%edi) ++ movl %edx,%esi ++ jz L009maw_end ++ # Tail Round 2 ++ movl 8(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 8(%edi),%eax ++ adcl $0,%edx ++ decl %ecx ++ movl %eax,8(%edi) ++ movl %edx,%esi ++ jz L009maw_end ++ # Tail Round 3 ++ movl 12(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 12(%edi),%eax ++ adcl $0,%edx ++ decl %ecx ++ movl %eax,12(%edi) ++ movl %edx,%esi ++ jz L009maw_end ++ # Tail Round 4 ++ movl 16(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 16(%edi),%eax ++ adcl $0,%edx ++ decl %ecx ++ movl %eax,16(%edi) ++ movl %edx,%esi ++ jz L009maw_end ++ # Tail Round 5 ++ movl 20(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 20(%edi),%eax ++ adcl $0,%edx ++ decl %ecx ++ movl %eax,20(%edi) ++ movl %edx,%esi ++ jz L009maw_end ++ # Tail Round 6 ++ movl 24(%ebx),%eax ++ mull %ebp ++ addl %esi,%eax ++ adcl $0,%edx ++ addl 24(%edi),%eax ++ adcl $0,%edx ++ movl %eax,24(%edi) ++ movl %edx,%esi ++L009maw_end: ++ movl %esi,%eax ++ popl %ecx ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _bn_mul_words ++.private_extern _bn_mul_words ++.align 4 ++_bn_mul_words: ++L_bn_mul_words_begin: ++ call L010PIC_me_up ++L010PIC_me_up: ++ popl %eax ++ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%eax),%eax ++ btl $26,(%eax) ++ jnc L011mw_non_sse2 ++ movl 4(%esp),%eax ++ movl 8(%esp),%edx ++ movl 12(%esp),%ecx ++ movd 16(%esp),%mm0 ++ pxor %mm1,%mm1 ++.align 4,0x90 ++L012mw_sse2_loop: ++ movd (%edx),%mm2 ++ pmuludq %mm0,%mm2 ++ leal 4(%edx),%edx ++ paddq %mm2,%mm1 ++ movd %mm1,(%eax) ++ subl $1,%ecx ++ psrlq $32,%mm1 ++ leal 4(%eax),%eax ++ jnz L012mw_sse2_loop ++ movd %mm1,%eax ++ emms ++ ret ++.align 4,0x90 ++L011mw_non_sse2: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ ++ xorl %esi,%esi ++ movl 20(%esp),%edi ++ movl 24(%esp),%ebx ++ movl 28(%esp),%ebp ++ movl 32(%esp),%ecx ++ andl $4294967288,%ebp ++ jz L013mw_finish ++L014mw_loop: ++ # Round 0 ++ movl (%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,(%edi) ++ movl %edx,%esi ++ # Round 4 ++ movl 4(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,4(%edi) ++ movl %edx,%esi ++ # Round 8 ++ movl 8(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,8(%edi) ++ movl %edx,%esi ++ # Round 12 ++ movl 12(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,12(%edi) ++ movl %edx,%esi ++ # Round 16 ++ movl 16(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,16(%edi) ++ movl %edx,%esi ++ # Round 20 ++ movl 20(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,20(%edi) ++ movl %edx,%esi ++ # Round 24 ++ movl 24(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,24(%edi) ++ movl %edx,%esi ++ # Round 28 ++ movl 28(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,28(%edi) ++ movl %edx,%esi ++ ++ addl $32,%ebx ++ addl $32,%edi ++ subl $8,%ebp ++ jz L013mw_finish ++ jmp L014mw_loop ++L013mw_finish: ++ movl 28(%esp),%ebp ++ andl $7,%ebp ++ jnz L015mw_finish2 ++ jmp L016mw_end ++L015mw_finish2: ++ # Tail Round 0 ++ movl (%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,(%edi) ++ movl %edx,%esi ++ decl %ebp ++ jz L016mw_end ++ # Tail Round 1 ++ movl 4(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,4(%edi) ++ movl %edx,%esi ++ decl %ebp ++ jz L016mw_end ++ # Tail Round 2 ++ movl 8(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,8(%edi) ++ movl %edx,%esi ++ decl %ebp ++ jz L016mw_end ++ # Tail Round 3 ++ movl 12(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,12(%edi) ++ movl %edx,%esi ++ decl %ebp ++ jz L016mw_end ++ # Tail Round 4 ++ movl 16(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,16(%edi) ++ movl %edx,%esi ++ decl %ebp ++ jz L016mw_end ++ # Tail Round 5 ++ movl 20(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,20(%edi) ++ movl %edx,%esi ++ decl %ebp ++ jz L016mw_end ++ # Tail Round 6 ++ movl 24(%ebx),%eax ++ mull %ecx ++ addl %esi,%eax ++ adcl $0,%edx ++ movl %eax,24(%edi) ++ movl %edx,%esi ++L016mw_end: ++ movl %esi,%eax ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _bn_sqr_words ++.private_extern _bn_sqr_words ++.align 4 ++_bn_sqr_words: ++L_bn_sqr_words_begin: ++ call L017PIC_me_up ++L017PIC_me_up: ++ popl %eax ++ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L017PIC_me_up(%eax),%eax ++ btl $26,(%eax) ++ jnc L018sqr_non_sse2 ++ movl 4(%esp),%eax ++ movl 8(%esp),%edx ++ movl 12(%esp),%ecx ++.align 4,0x90 ++L019sqr_sse2_loop: ++ movd (%edx),%mm0 ++ pmuludq %mm0,%mm0 ++ leal 4(%edx),%edx ++ movq %mm0,(%eax) ++ subl $1,%ecx ++ leal 8(%eax),%eax ++ jnz L019sqr_sse2_loop ++ emms ++ ret ++.align 4,0x90 ++L018sqr_non_sse2: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ ++ movl 20(%esp),%esi ++ movl 24(%esp),%edi ++ movl 28(%esp),%ebx ++ andl $4294967288,%ebx ++ jz L020sw_finish ++L021sw_loop: ++ # Round 0 ++ movl (%edi),%eax ++ mull %eax ++ movl %eax,(%esi) ++ movl %edx,4(%esi) ++ # Round 4 ++ movl 4(%edi),%eax ++ mull %eax ++ movl %eax,8(%esi) ++ movl %edx,12(%esi) ++ # Round 8 ++ movl 8(%edi),%eax ++ mull %eax ++ movl %eax,16(%esi) ++ movl %edx,20(%esi) ++ # Round 12 ++ movl 12(%edi),%eax ++ mull %eax ++ movl %eax,24(%esi) ++ movl %edx,28(%esi) ++ # Round 16 ++ movl 16(%edi),%eax ++ mull %eax ++ movl %eax,32(%esi) ++ movl %edx,36(%esi) ++ # Round 20 ++ movl 20(%edi),%eax ++ mull %eax ++ movl %eax,40(%esi) ++ movl %edx,44(%esi) ++ # Round 24 ++ movl 24(%edi),%eax ++ mull %eax ++ movl %eax,48(%esi) ++ movl %edx,52(%esi) ++ # Round 28 ++ movl 28(%edi),%eax ++ mull %eax ++ movl %eax,56(%esi) ++ movl %edx,60(%esi) ++ ++ addl $32,%edi ++ addl $64,%esi ++ subl $8,%ebx ++ jnz L021sw_loop ++L020sw_finish: ++ movl 28(%esp),%ebx ++ andl $7,%ebx ++ jz L022sw_end ++ # Tail Round 0 ++ movl (%edi),%eax ++ mull %eax ++ movl %eax,(%esi) ++ decl %ebx ++ movl %edx,4(%esi) ++ jz L022sw_end ++ # Tail Round 1 ++ movl 4(%edi),%eax ++ mull %eax ++ movl %eax,8(%esi) ++ decl %ebx ++ movl %edx,12(%esi) ++ jz L022sw_end ++ # Tail Round 2 ++ movl 8(%edi),%eax ++ mull %eax ++ movl %eax,16(%esi) ++ decl %ebx ++ movl %edx,20(%esi) ++ jz L022sw_end ++ # Tail Round 3 ++ movl 12(%edi),%eax ++ mull %eax ++ movl %eax,24(%esi) ++ decl %ebx ++ movl %edx,28(%esi) ++ jz L022sw_end ++ # Tail Round 4 ++ movl 16(%edi),%eax ++ mull %eax ++ movl %eax,32(%esi) ++ decl %ebx ++ movl %edx,36(%esi) ++ jz L022sw_end ++ # Tail Round 5 ++ movl 20(%edi),%eax ++ mull %eax ++ movl %eax,40(%esi) ++ decl %ebx ++ movl %edx,44(%esi) ++ jz L022sw_end ++ # Tail Round 6 ++ movl 24(%edi),%eax ++ mull %eax ++ movl %eax,48(%esi) ++ movl %edx,52(%esi) ++L022sw_end: ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _bn_div_words ++.private_extern _bn_div_words ++.align 4 ++_bn_div_words: ++L_bn_div_words_begin: ++ movl 4(%esp),%edx ++ movl 8(%esp),%eax ++ movl 12(%esp),%ecx ++ divl %ecx ++ ret ++.globl _bn_add_words ++.private_extern _bn_add_words ++.align 4 ++_bn_add_words: ++L_bn_add_words_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ ++ movl 20(%esp),%ebx ++ movl 24(%esp),%esi ++ movl 28(%esp),%edi ++ movl 32(%esp),%ebp ++ xorl %eax,%eax ++ andl $4294967288,%ebp ++ jz L023aw_finish ++L024aw_loop: ++ # Round 0 ++ movl (%esi),%ecx ++ movl (%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,(%ebx) ++ # Round 1 ++ movl 4(%esi),%ecx ++ movl 4(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,4(%ebx) ++ # Round 2 ++ movl 8(%esi),%ecx ++ movl 8(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,8(%ebx) ++ # Round 3 ++ movl 12(%esi),%ecx ++ movl 12(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,12(%ebx) ++ # Round 4 ++ movl 16(%esi),%ecx ++ movl 16(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,16(%ebx) ++ # Round 5 ++ movl 20(%esi),%ecx ++ movl 20(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,20(%ebx) ++ # Round 6 ++ movl 24(%esi),%ecx ++ movl 24(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,24(%ebx) ++ # Round 7 ++ movl 28(%esi),%ecx ++ movl 28(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,28(%ebx) ++ ++ addl $32,%esi ++ addl $32,%edi ++ addl $32,%ebx ++ subl $8,%ebp ++ jnz L024aw_loop ++L023aw_finish: ++ movl 32(%esp),%ebp ++ andl $7,%ebp ++ jz L025aw_end ++ # Tail Round 0 ++ movl (%esi),%ecx ++ movl (%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,(%ebx) ++ jz L025aw_end ++ # Tail Round 1 ++ movl 4(%esi),%ecx ++ movl 4(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,4(%ebx) ++ jz L025aw_end ++ # Tail Round 2 ++ movl 8(%esi),%ecx ++ movl 8(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,8(%ebx) ++ jz L025aw_end ++ # Tail Round 3 ++ movl 12(%esi),%ecx ++ movl 12(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,12(%ebx) ++ jz L025aw_end ++ # Tail Round 4 ++ movl 16(%esi),%ecx ++ movl 16(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,16(%ebx) ++ jz L025aw_end ++ # Tail Round 5 ++ movl 20(%esi),%ecx ++ movl 20(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,20(%ebx) ++ jz L025aw_end ++ # Tail Round 6 ++ movl 24(%esi),%ecx ++ movl 24(%edi),%edx ++ addl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ addl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,24(%ebx) ++L025aw_end: ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _bn_sub_words ++.private_extern _bn_sub_words ++.align 4 ++_bn_sub_words: ++L_bn_sub_words_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ ++ movl 20(%esp),%ebx ++ movl 24(%esp),%esi ++ movl 28(%esp),%edi ++ movl 32(%esp),%ebp ++ xorl %eax,%eax ++ andl $4294967288,%ebp ++ jz L026aw_finish ++L027aw_loop: ++ # Round 0 ++ movl (%esi),%ecx ++ movl (%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,(%ebx) ++ # Round 1 ++ movl 4(%esi),%ecx ++ movl 4(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,4(%ebx) ++ # Round 2 ++ movl 8(%esi),%ecx ++ movl 8(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,8(%ebx) ++ # Round 3 ++ movl 12(%esi),%ecx ++ movl 12(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,12(%ebx) ++ # Round 4 ++ movl 16(%esi),%ecx ++ movl 16(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,16(%ebx) ++ # Round 5 ++ movl 20(%esi),%ecx ++ movl 20(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,20(%ebx) ++ # Round 6 ++ movl 24(%esi),%ecx ++ movl 24(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,24(%ebx) ++ # Round 7 ++ movl 28(%esi),%ecx ++ movl 28(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,28(%ebx) ++ ++ addl $32,%esi ++ addl $32,%edi ++ addl $32,%ebx ++ subl $8,%ebp ++ jnz L027aw_loop ++L026aw_finish: ++ movl 32(%esp),%ebp ++ andl $7,%ebp ++ jz L028aw_end ++ # Tail Round 0 ++ movl (%esi),%ecx ++ movl (%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,(%ebx) ++ jz L028aw_end ++ # Tail Round 1 ++ movl 4(%esi),%ecx ++ movl 4(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,4(%ebx) ++ jz L028aw_end ++ # Tail Round 2 ++ movl 8(%esi),%ecx ++ movl 8(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,8(%ebx) ++ jz L028aw_end ++ # Tail Round 3 ++ movl 12(%esi),%ecx ++ movl 12(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,12(%ebx) ++ jz L028aw_end ++ # Tail Round 4 ++ movl 16(%esi),%ecx ++ movl 16(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,16(%ebx) ++ jz L028aw_end ++ # Tail Round 5 ++ movl 20(%esi),%ecx ++ movl 20(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ decl %ebp ++ movl %ecx,20(%ebx) ++ jz L028aw_end ++ # Tail Round 6 ++ movl 24(%esi),%ecx ++ movl 24(%edi),%edx ++ subl %eax,%ecx ++ movl $0,%eax ++ adcl %eax,%eax ++ subl %edx,%ecx ++ adcl $0,%eax ++ movl %ecx,24(%ebx) ++L028aw_end: ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.section __IMPORT,__pointers,non_lazy_symbol_pointers ++L_OPENSSL_ia32cap_P$non_lazy_ptr: ++.indirect_symbol _OPENSSL_ia32cap_P ++.long 0 ++#endif +diff --git a/apple-x86/crypto/fipsmodule/co-586.S b/apple-x86/crypto/fipsmodule/co-586.S +new file mode 100644 +index 0000000..015dffa +--- /dev/null ++++ b/apple-x86/crypto/fipsmodule/co-586.S +@@ -0,0 +1,1257 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.globl _bn_mul_comba8 ++.private_extern _bn_mul_comba8 ++.align 4 ++_bn_mul_comba8: ++L_bn_mul_comba8_begin: ++ pushl %esi ++ movl 12(%esp),%esi ++ pushl %edi ++ movl 20(%esp),%edi ++ pushl %ebp ++ pushl %ebx ++ xorl %ebx,%ebx ++ movl (%esi),%eax ++ xorl %ecx,%ecx ++ movl (%edi),%edx ++ # ################## Calculate word 0 ++ xorl %ebp,%ebp ++ # mul a[0]*b[0] ++ mull %edx ++ addl %eax,%ebx ++ movl 20(%esp),%eax ++ adcl %edx,%ecx ++ movl (%edi),%edx ++ adcl $0,%ebp ++ movl %ebx,(%eax) ++ movl 4(%esi),%eax ++ # saved r[0] ++ # ################## Calculate word 1 ++ xorl %ebx,%ebx ++ # mul a[1]*b[0] ++ mull %edx ++ addl %eax,%ecx ++ movl (%esi),%eax ++ adcl %edx,%ebp ++ movl 4(%edi),%edx ++ adcl $0,%ebx ++ # mul a[0]*b[1] ++ mull %edx ++ addl %eax,%ecx ++ movl 20(%esp),%eax ++ adcl %edx,%ebp ++ movl (%edi),%edx ++ adcl $0,%ebx ++ movl %ecx,4(%eax) ++ movl 8(%esi),%eax ++ # saved r[1] ++ # ################## Calculate word 2 ++ xorl %ecx,%ecx ++ # mul a[2]*b[0] ++ mull %edx ++ addl %eax,%ebp ++ movl 4(%esi),%eax ++ adcl %edx,%ebx ++ movl 4(%edi),%edx ++ adcl $0,%ecx ++ # mul a[1]*b[1] ++ mull %edx ++ addl %eax,%ebp ++ movl (%esi),%eax ++ adcl %edx,%ebx ++ movl 8(%edi),%edx ++ adcl $0,%ecx ++ # mul a[0]*b[2] ++ mull %edx ++ addl %eax,%ebp ++ movl 20(%esp),%eax ++ adcl %edx,%ebx ++ movl (%edi),%edx ++ adcl $0,%ecx ++ movl %ebp,8(%eax) ++ movl 12(%esi),%eax ++ # saved r[2] ++ # ################## Calculate word 3 ++ xorl %ebp,%ebp ++ # mul a[3]*b[0] ++ mull %edx ++ addl %eax,%ebx ++ movl 8(%esi),%eax ++ adcl %edx,%ecx ++ movl 4(%edi),%edx ++ adcl $0,%ebp ++ # mul a[2]*b[1] ++ mull %edx ++ addl %eax,%ebx ++ movl 4(%esi),%eax ++ adcl %edx,%ecx ++ movl 8(%edi),%edx ++ adcl $0,%ebp ++ # mul a[1]*b[2] ++ mull %edx ++ addl %eax,%ebx ++ movl (%esi),%eax ++ adcl %edx,%ecx ++ movl 12(%edi),%edx ++ adcl $0,%ebp ++ # mul a[0]*b[3] ++ mull %edx ++ addl %eax,%ebx ++ movl 20(%esp),%eax ++ adcl %edx,%ecx ++ movl (%edi),%edx ++ adcl $0,%ebp ++ movl %ebx,12(%eax) ++ movl 16(%esi),%eax ++ # saved r[3] ++ # ################## Calculate word 4 ++ xorl %ebx,%ebx ++ # mul a[4]*b[0] ++ mull %edx ++ addl %eax,%ecx ++ movl 12(%esi),%eax ++ adcl %edx,%ebp ++ movl 4(%edi),%edx ++ adcl $0,%ebx ++ # mul a[3]*b[1] ++ mull %edx ++ addl %eax,%ecx ++ movl 8(%esi),%eax ++ adcl %edx,%ebp ++ movl 8(%edi),%edx ++ adcl $0,%ebx ++ # mul a[2]*b[2] ++ mull %edx ++ addl %eax,%ecx ++ movl 4(%esi),%eax ++ adcl %edx,%ebp ++ movl 12(%edi),%edx ++ adcl $0,%ebx ++ # mul a[1]*b[3] ++ mull %edx ++ addl %eax,%ecx ++ movl (%esi),%eax ++ adcl %edx,%ebp ++ movl 16(%edi),%edx ++ adcl $0,%ebx ++ # mul a[0]*b[4] ++ mull %edx ++ addl %eax,%ecx ++ movl 20(%esp),%eax ++ adcl %edx,%ebp ++ movl (%edi),%edx ++ adcl $0,%ebx ++ movl %ecx,16(%eax) ++ movl 20(%esi),%eax ++ # saved r[4] ++ # ################## Calculate word 5 ++ xorl %ecx,%ecx ++ # mul a[5]*b[0] ++ mull %edx ++ addl %eax,%ebp ++ movl 16(%esi),%eax ++ adcl %edx,%ebx ++ movl 4(%edi),%edx ++ adcl $0,%ecx ++ # mul a[4]*b[1] ++ mull %edx ++ addl %eax,%ebp ++ movl 12(%esi),%eax ++ adcl %edx,%ebx ++ movl 8(%edi),%edx ++ adcl $0,%ecx ++ # mul a[3]*b[2] ++ mull %edx ++ addl %eax,%ebp ++ movl 8(%esi),%eax ++ adcl %edx,%ebx ++ movl 12(%edi),%edx ++ adcl $0,%ecx ++ # mul a[2]*b[3] ++ mull %edx ++ addl %eax,%ebp ++ movl 4(%esi),%eax ++ adcl %edx,%ebx ++ movl 16(%edi),%edx ++ adcl $0,%ecx ++ # mul a[1]*b[4] ++ mull %edx ++ addl %eax,%ebp ++ movl (%esi),%eax ++ adcl %edx,%ebx ++ movl 20(%edi),%edx ++ adcl $0,%ecx ++ # mul a[0]*b[5] ++ mull %edx ++ addl %eax,%ebp ++ movl 20(%esp),%eax ++ adcl %edx,%ebx ++ movl (%edi),%edx ++ adcl $0,%ecx ++ movl %ebp,20(%eax) ++ movl 24(%esi),%eax ++ # saved r[5] ++ # ################## Calculate word 6 ++ xorl %ebp,%ebp ++ # mul a[6]*b[0] ++ mull %edx ++ addl %eax,%ebx ++ movl 20(%esi),%eax ++ adcl %edx,%ecx ++ movl 4(%edi),%edx ++ adcl $0,%ebp ++ # mul a[5]*b[1] ++ mull %edx ++ addl %eax,%ebx ++ movl 16(%esi),%eax ++ adcl %edx,%ecx ++ movl 8(%edi),%edx ++ adcl $0,%ebp ++ # mul a[4]*b[2] ++ mull %edx ++ addl %eax,%ebx ++ movl 12(%esi),%eax ++ adcl %edx,%ecx ++ movl 12(%edi),%edx ++ adcl $0,%ebp ++ # mul a[3]*b[3] ++ mull %edx ++ addl %eax,%ebx ++ movl 8(%esi),%eax ++ adcl %edx,%ecx ++ movl 16(%edi),%edx ++ adcl $0,%ebp ++ # mul a[2]*b[4] ++ mull %edx ++ addl %eax,%ebx ++ movl 4(%esi),%eax ++ adcl %edx,%ecx ++ movl 20(%edi),%edx ++ adcl $0,%ebp ++ # mul a[1]*b[5] ++ mull %edx ++ addl %eax,%ebx ++ movl (%esi),%eax ++ adcl %edx,%ecx ++ movl 24(%edi),%edx ++ adcl $0,%ebp ++ # mul a[0]*b[6] ++ mull %edx ++ addl %eax,%ebx ++ movl 20(%esp),%eax ++ adcl %edx,%ecx ++ movl (%edi),%edx ++ adcl $0,%ebp ++ movl %ebx,24(%eax) ++ movl 28(%esi),%eax ++ # saved r[6] ++ # ################## Calculate word 7 ++ xorl %ebx,%ebx ++ # mul a[7]*b[0] ++ mull %edx ++ addl %eax,%ecx ++ movl 24(%esi),%eax ++ adcl %edx,%ebp ++ movl 4(%edi),%edx ++ adcl $0,%ebx ++ # mul a[6]*b[1] ++ mull %edx ++ addl %eax,%ecx ++ movl 20(%esi),%eax ++ adcl %edx,%ebp ++ movl 8(%edi),%edx ++ adcl $0,%ebx ++ # mul a[5]*b[2] ++ mull %edx ++ addl %eax,%ecx ++ movl 16(%esi),%eax ++ adcl %edx,%ebp ++ movl 12(%edi),%edx ++ adcl $0,%ebx ++ # mul a[4]*b[3] ++ mull %edx ++ addl %eax,%ecx ++ movl 12(%esi),%eax ++ adcl %edx,%ebp ++ movl 16(%edi),%edx ++ adcl $0,%ebx ++ # mul a[3]*b[4] ++ mull %edx ++ addl %eax,%ecx ++ movl 8(%esi),%eax ++ adcl %edx,%ebp ++ movl 20(%edi),%edx ++ adcl $0,%ebx ++ # mul a[2]*b[5] ++ mull %edx ++ addl %eax,%ecx ++ movl 4(%esi),%eax ++ adcl %edx,%ebp ++ movl 24(%edi),%edx ++ adcl $0,%ebx ++ # mul a[1]*b[6] ++ mull %edx ++ addl %eax,%ecx ++ movl (%esi),%eax ++ adcl %edx,%ebp ++ movl 28(%edi),%edx ++ adcl $0,%ebx ++ # mul a[0]*b[7] ++ mull %edx ++ addl %eax,%ecx ++ movl 20(%esp),%eax ++ adcl %edx,%ebp ++ movl 4(%edi),%edx ++ adcl $0,%ebx ++ movl %ecx,28(%eax) ++ movl 28(%esi),%eax ++ # saved r[7] ++ # ################## Calculate word 8 ++ xorl %ecx,%ecx ++ # mul a[7]*b[1] ++ mull %edx ++ addl %eax,%ebp ++ movl 24(%esi),%eax ++ adcl %edx,%ebx ++ movl 8(%edi),%edx ++ adcl $0,%ecx ++ # mul a[6]*b[2] ++ mull %edx ++ addl %eax,%ebp ++ movl 20(%esi),%eax ++ adcl %edx,%ebx ++ movl 12(%edi),%edx ++ adcl $0,%ecx ++ # mul a[5]*b[3] ++ mull %edx ++ addl %eax,%ebp ++ movl 16(%esi),%eax ++ adcl %edx,%ebx ++ movl 16(%edi),%edx ++ adcl $0,%ecx ++ # mul a[4]*b[4] ++ mull %edx ++ addl %eax,%ebp ++ movl 12(%esi),%eax ++ adcl %edx,%ebx ++ movl 20(%edi),%edx ++ adcl $0,%ecx ++ # mul a[3]*b[5] ++ mull %edx ++ addl %eax,%ebp ++ movl 8(%esi),%eax ++ adcl %edx,%ebx ++ movl 24(%edi),%edx ++ adcl $0,%ecx ++ # mul a[2]*b[6] ++ mull %edx ++ addl %eax,%ebp ++ movl 4(%esi),%eax ++ adcl %edx,%ebx ++ movl 28(%edi),%edx ++ adcl $0,%ecx ++ # mul a[1]*b[7] ++ mull %edx ++ addl %eax,%ebp ++ movl 20(%esp),%eax ++ adcl %edx,%ebx ++ movl 8(%edi),%edx ++ adcl $0,%ecx ++ movl %ebp,32(%eax) ++ movl 28(%esi),%eax ++ # saved r[8] ++ # ################## Calculate word 9 ++ xorl %ebp,%ebp ++ # mul a[7]*b[2] ++ mull %edx ++ addl %eax,%ebx ++ movl 24(%esi),%eax ++ adcl %edx,%ecx ++ movl 12(%edi),%edx ++ adcl $0,%ebp ++ # mul a[6]*b[3] ++ mull %edx ++ addl %eax,%ebx ++ movl 20(%esi),%eax ++ adcl %edx,%ecx ++ movl 16(%edi),%edx ++ adcl $0,%ebp ++ # mul a[5]*b[4] ++ mull %edx ++ addl %eax,%ebx ++ movl 16(%esi),%eax ++ adcl %edx,%ecx ++ movl 20(%edi),%edx ++ adcl $0,%ebp ++ # mul a[4]*b[5] ++ mull %edx ++ addl %eax,%ebx ++ movl 12(%esi),%eax ++ adcl %edx,%ecx ++ movl 24(%edi),%edx ++ adcl $0,%ebp ++ # mul a[3]*b[6] ++ mull %edx ++ addl %eax,%ebx ++ movl 8(%esi),%eax ++ adcl %edx,%ecx ++ movl 28(%edi),%edx ++ adcl $0,%ebp ++ # mul a[2]*b[7] ++ mull %edx ++ addl %eax,%ebx ++ movl 20(%esp),%eax ++ adcl %edx,%ecx ++ movl 12(%edi),%edx ++ adcl $0,%ebp ++ movl %ebx,36(%eax) ++ movl 28(%esi),%eax ++ # saved r[9] ++ # ################## Calculate word 10 ++ xorl %ebx,%ebx ++ # mul a[7]*b[3] ++ mull %edx ++ addl %eax,%ecx ++ movl 24(%esi),%eax ++ adcl %edx,%ebp ++ movl 16(%edi),%edx ++ adcl $0,%ebx ++ # mul a[6]*b[4] ++ mull %edx ++ addl %eax,%ecx ++ movl 20(%esi),%eax ++ adcl %edx,%ebp ++ movl 20(%edi),%edx ++ adcl $0,%ebx ++ # mul a[5]*b[5] ++ mull %edx ++ addl %eax,%ecx ++ movl 16(%esi),%eax ++ adcl %edx,%ebp ++ movl 24(%edi),%edx ++ adcl $0,%ebx ++ # mul a[4]*b[6] ++ mull %edx ++ addl %eax,%ecx ++ movl 12(%esi),%eax ++ adcl %edx,%ebp ++ movl 28(%edi),%edx ++ adcl $0,%ebx ++ # mul a[3]*b[7] ++ mull %edx ++ addl %eax,%ecx ++ movl 20(%esp),%eax ++ adcl %edx,%ebp ++ movl 16(%edi),%edx ++ adcl $0,%ebx ++ movl %ecx,40(%eax) ++ movl 28(%esi),%eax ++ # saved r[10] ++ # ################## Calculate word 11 ++ xorl %ecx,%ecx ++ # mul a[7]*b[4] ++ mull %edx ++ addl %eax,%ebp ++ movl 24(%esi),%eax ++ adcl %edx,%ebx ++ movl 20(%edi),%edx ++ adcl $0,%ecx ++ # mul a[6]*b[5] ++ mull %edx ++ addl %eax,%ebp ++ movl 20(%esi),%eax ++ adcl %edx,%ebx ++ movl 24(%edi),%edx ++ adcl $0,%ecx ++ # mul a[5]*b[6] ++ mull %edx ++ addl %eax,%ebp ++ movl 16(%esi),%eax ++ adcl %edx,%ebx ++ movl 28(%edi),%edx ++ adcl $0,%ecx ++ # mul a[4]*b[7] ++ mull %edx ++ addl %eax,%ebp ++ movl 20(%esp),%eax ++ adcl %edx,%ebx ++ movl 20(%edi),%edx ++ adcl $0,%ecx ++ movl %ebp,44(%eax) ++ movl 28(%esi),%eax ++ # saved r[11] ++ # ################## Calculate word 12 ++ xorl %ebp,%ebp ++ # mul a[7]*b[5] ++ mull %edx ++ addl %eax,%ebx ++ movl 24(%esi),%eax ++ adcl %edx,%ecx ++ movl 24(%edi),%edx ++ adcl $0,%ebp ++ # mul a[6]*b[6] ++ mull %edx ++ addl %eax,%ebx ++ movl 20(%esi),%eax ++ adcl %edx,%ecx ++ movl 28(%edi),%edx ++ adcl $0,%ebp ++ # mul a[5]*b[7] ++ mull %edx ++ addl %eax,%ebx ++ movl 20(%esp),%eax ++ adcl %edx,%ecx ++ movl 24(%edi),%edx ++ adcl $0,%ebp ++ movl %ebx,48(%eax) ++ movl 28(%esi),%eax ++ # saved r[12] ++ # ################## Calculate word 13 ++ xorl %ebx,%ebx ++ # mul a[7]*b[6] ++ mull %edx ++ addl %eax,%ecx ++ movl 24(%esi),%eax ++ adcl %edx,%ebp ++ movl 28(%edi),%edx ++ adcl $0,%ebx ++ # mul a[6]*b[7] ++ mull %edx ++ addl %eax,%ecx ++ movl 20(%esp),%eax ++ adcl %edx,%ebp ++ movl 28(%edi),%edx ++ adcl $0,%ebx ++ movl %ecx,52(%eax) ++ movl 28(%esi),%eax ++ # saved r[13] ++ # ################## Calculate word 14 ++ xorl %ecx,%ecx ++ # mul a[7]*b[7] ++ mull %edx ++ addl %eax,%ebp ++ movl 20(%esp),%eax ++ adcl %edx,%ebx ++ adcl $0,%ecx ++ movl %ebp,56(%eax) ++ # saved r[14] ++ # save r[15] ++ movl %ebx,60(%eax) ++ popl %ebx ++ popl %ebp ++ popl %edi ++ popl %esi ++ ret ++.globl _bn_mul_comba4 ++.private_extern _bn_mul_comba4 ++.align 4 ++_bn_mul_comba4: ++L_bn_mul_comba4_begin: ++ pushl %esi ++ movl 12(%esp),%esi ++ pushl %edi ++ movl 20(%esp),%edi ++ pushl %ebp ++ pushl %ebx ++ xorl %ebx,%ebx ++ movl (%esi),%eax ++ xorl %ecx,%ecx ++ movl (%edi),%edx ++ # ################## Calculate word 0 ++ xorl %ebp,%ebp ++ # mul a[0]*b[0] ++ mull %edx ++ addl %eax,%ebx ++ movl 20(%esp),%eax ++ adcl %edx,%ecx ++ movl (%edi),%edx ++ adcl $0,%ebp ++ movl %ebx,(%eax) ++ movl 4(%esi),%eax ++ # saved r[0] ++ # ################## Calculate word 1 ++ xorl %ebx,%ebx ++ # mul a[1]*b[0] ++ mull %edx ++ addl %eax,%ecx ++ movl (%esi),%eax ++ adcl %edx,%ebp ++ movl 4(%edi),%edx ++ adcl $0,%ebx ++ # mul a[0]*b[1] ++ mull %edx ++ addl %eax,%ecx ++ movl 20(%esp),%eax ++ adcl %edx,%ebp ++ movl (%edi),%edx ++ adcl $0,%ebx ++ movl %ecx,4(%eax) ++ movl 8(%esi),%eax ++ # saved r[1] ++ # ################## Calculate word 2 ++ xorl %ecx,%ecx ++ # mul a[2]*b[0] ++ mull %edx ++ addl %eax,%ebp ++ movl 4(%esi),%eax ++ adcl %edx,%ebx ++ movl 4(%edi),%edx ++ adcl $0,%ecx ++ # mul a[1]*b[1] ++ mull %edx ++ addl %eax,%ebp ++ movl (%esi),%eax ++ adcl %edx,%ebx ++ movl 8(%edi),%edx ++ adcl $0,%ecx ++ # mul a[0]*b[2] ++ mull %edx ++ addl %eax,%ebp ++ movl 20(%esp),%eax ++ adcl %edx,%ebx ++ movl (%edi),%edx ++ adcl $0,%ecx ++ movl %ebp,8(%eax) ++ movl 12(%esi),%eax ++ # saved r[2] ++ # ################## Calculate word 3 ++ xorl %ebp,%ebp ++ # mul a[3]*b[0] ++ mull %edx ++ addl %eax,%ebx ++ movl 8(%esi),%eax ++ adcl %edx,%ecx ++ movl 4(%edi),%edx ++ adcl $0,%ebp ++ # mul a[2]*b[1] ++ mull %edx ++ addl %eax,%ebx ++ movl 4(%esi),%eax ++ adcl %edx,%ecx ++ movl 8(%edi),%edx ++ adcl $0,%ebp ++ # mul a[1]*b[2] ++ mull %edx ++ addl %eax,%ebx ++ movl (%esi),%eax ++ adcl %edx,%ecx ++ movl 12(%edi),%edx ++ adcl $0,%ebp ++ # mul a[0]*b[3] ++ mull %edx ++ addl %eax,%ebx ++ movl 20(%esp),%eax ++ adcl %edx,%ecx ++ movl 4(%edi),%edx ++ adcl $0,%ebp ++ movl %ebx,12(%eax) ++ movl 12(%esi),%eax ++ # saved r[3] ++ # ################## Calculate word 4 ++ xorl %ebx,%ebx ++ # mul a[3]*b[1] ++ mull %edx ++ addl %eax,%ecx ++ movl 8(%esi),%eax ++ adcl %edx,%ebp ++ movl 8(%edi),%edx ++ adcl $0,%ebx ++ # mul a[2]*b[2] ++ mull %edx ++ addl %eax,%ecx ++ movl 4(%esi),%eax ++ adcl %edx,%ebp ++ movl 12(%edi),%edx ++ adcl $0,%ebx ++ # mul a[1]*b[3] ++ mull %edx ++ addl %eax,%ecx ++ movl 20(%esp),%eax ++ adcl %edx,%ebp ++ movl 8(%edi),%edx ++ adcl $0,%ebx ++ movl %ecx,16(%eax) ++ movl 12(%esi),%eax ++ # saved r[4] ++ # ################## Calculate word 5 ++ xorl %ecx,%ecx ++ # mul a[3]*b[2] ++ mull %edx ++ addl %eax,%ebp ++ movl 8(%esi),%eax ++ adcl %edx,%ebx ++ movl 12(%edi),%edx ++ adcl $0,%ecx ++ # mul a[2]*b[3] ++ mull %edx ++ addl %eax,%ebp ++ movl 20(%esp),%eax ++ adcl %edx,%ebx ++ movl 12(%edi),%edx ++ adcl $0,%ecx ++ movl %ebp,20(%eax) ++ movl 12(%esi),%eax ++ # saved r[5] ++ # ################## Calculate word 6 ++ xorl %ebp,%ebp ++ # mul a[3]*b[3] ++ mull %edx ++ addl %eax,%ebx ++ movl 20(%esp),%eax ++ adcl %edx,%ecx ++ adcl $0,%ebp ++ movl %ebx,24(%eax) ++ # saved r[6] ++ # save r[7] ++ movl %ecx,28(%eax) ++ popl %ebx ++ popl %ebp ++ popl %edi ++ popl %esi ++ ret ++.globl _bn_sqr_comba8 ++.private_extern _bn_sqr_comba8 ++.align 4 ++_bn_sqr_comba8: ++L_bn_sqr_comba8_begin: ++ pushl %esi ++ pushl %edi ++ pushl %ebp ++ pushl %ebx ++ movl 20(%esp),%edi ++ movl 24(%esp),%esi ++ xorl %ebx,%ebx ++ xorl %ecx,%ecx ++ movl (%esi),%eax ++ # ############### Calculate word 0 ++ xorl %ebp,%ebp ++ # sqr a[0]*a[0] ++ mull %eax ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl (%esi),%edx ++ adcl $0,%ebp ++ movl %ebx,(%edi) ++ movl 4(%esi),%eax ++ # saved r[0] ++ # ############### Calculate word 1 ++ xorl %ebx,%ebx ++ # sqr a[1]*a[0] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 8(%esi),%eax ++ adcl $0,%ebx ++ movl %ecx,4(%edi) ++ movl (%esi),%edx ++ # saved r[1] ++ # ############### Calculate word 2 ++ xorl %ecx,%ecx ++ # sqr a[2]*a[0] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ecx ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 4(%esi),%eax ++ adcl $0,%ecx ++ # sqr a[1]*a[1] ++ mull %eax ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl (%esi),%edx ++ adcl $0,%ecx ++ movl %ebp,8(%edi) ++ movl 12(%esi),%eax ++ # saved r[2] ++ # ############### Calculate word 3 ++ xorl %ebp,%ebp ++ # sqr a[3]*a[0] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebp ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 8(%esi),%eax ++ adcl $0,%ebp ++ movl 4(%esi),%edx ++ # sqr a[2]*a[1] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebp ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 16(%esi),%eax ++ adcl $0,%ebp ++ movl %ebx,12(%edi) ++ movl (%esi),%edx ++ # saved r[3] ++ # ############### Calculate word 4 ++ xorl %ebx,%ebx ++ # sqr a[4]*a[0] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 12(%esi),%eax ++ adcl $0,%ebx ++ movl 4(%esi),%edx ++ # sqr a[3]*a[1] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 8(%esi),%eax ++ adcl $0,%ebx ++ # sqr a[2]*a[2] ++ mull %eax ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl (%esi),%edx ++ adcl $0,%ebx ++ movl %ecx,16(%edi) ++ movl 20(%esi),%eax ++ # saved r[4] ++ # ############### Calculate word 5 ++ xorl %ecx,%ecx ++ # sqr a[5]*a[0] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ecx ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 16(%esi),%eax ++ adcl $0,%ecx ++ movl 4(%esi),%edx ++ # sqr a[4]*a[1] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ecx ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 12(%esi),%eax ++ adcl $0,%ecx ++ movl 8(%esi),%edx ++ # sqr a[3]*a[2] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ecx ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 24(%esi),%eax ++ adcl $0,%ecx ++ movl %ebp,20(%edi) ++ movl (%esi),%edx ++ # saved r[5] ++ # ############### Calculate word 6 ++ xorl %ebp,%ebp ++ # sqr a[6]*a[0] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebp ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 20(%esi),%eax ++ adcl $0,%ebp ++ movl 4(%esi),%edx ++ # sqr a[5]*a[1] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebp ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 16(%esi),%eax ++ adcl $0,%ebp ++ movl 8(%esi),%edx ++ # sqr a[4]*a[2] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebp ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 12(%esi),%eax ++ adcl $0,%ebp ++ # sqr a[3]*a[3] ++ mull %eax ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl (%esi),%edx ++ adcl $0,%ebp ++ movl %ebx,24(%edi) ++ movl 28(%esi),%eax ++ # saved r[6] ++ # ############### Calculate word 7 ++ xorl %ebx,%ebx ++ # sqr a[7]*a[0] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 24(%esi),%eax ++ adcl $0,%ebx ++ movl 4(%esi),%edx ++ # sqr a[6]*a[1] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 20(%esi),%eax ++ adcl $0,%ebx ++ movl 8(%esi),%edx ++ # sqr a[5]*a[2] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 16(%esi),%eax ++ adcl $0,%ebx ++ movl 12(%esi),%edx ++ # sqr a[4]*a[3] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 28(%esi),%eax ++ adcl $0,%ebx ++ movl %ecx,28(%edi) ++ movl 4(%esi),%edx ++ # saved r[7] ++ # ############### Calculate word 8 ++ xorl %ecx,%ecx ++ # sqr a[7]*a[1] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ecx ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 24(%esi),%eax ++ adcl $0,%ecx ++ movl 8(%esi),%edx ++ # sqr a[6]*a[2] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ecx ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 20(%esi),%eax ++ adcl $0,%ecx ++ movl 12(%esi),%edx ++ # sqr a[5]*a[3] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ecx ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 16(%esi),%eax ++ adcl $0,%ecx ++ # sqr a[4]*a[4] ++ mull %eax ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 8(%esi),%edx ++ adcl $0,%ecx ++ movl %ebp,32(%edi) ++ movl 28(%esi),%eax ++ # saved r[8] ++ # ############### Calculate word 9 ++ xorl %ebp,%ebp ++ # sqr a[7]*a[2] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebp ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 24(%esi),%eax ++ adcl $0,%ebp ++ movl 12(%esi),%edx ++ # sqr a[6]*a[3] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebp ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 20(%esi),%eax ++ adcl $0,%ebp ++ movl 16(%esi),%edx ++ # sqr a[5]*a[4] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebp ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 28(%esi),%eax ++ adcl $0,%ebp ++ movl %ebx,36(%edi) ++ movl 12(%esi),%edx ++ # saved r[9] ++ # ############### Calculate word 10 ++ xorl %ebx,%ebx ++ # sqr a[7]*a[3] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 24(%esi),%eax ++ adcl $0,%ebx ++ movl 16(%esi),%edx ++ # sqr a[6]*a[4] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 20(%esi),%eax ++ adcl $0,%ebx ++ # sqr a[5]*a[5] ++ mull %eax ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 16(%esi),%edx ++ adcl $0,%ebx ++ movl %ecx,40(%edi) ++ movl 28(%esi),%eax ++ # saved r[10] ++ # ############### Calculate word 11 ++ xorl %ecx,%ecx ++ # sqr a[7]*a[4] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ecx ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 24(%esi),%eax ++ adcl $0,%ecx ++ movl 20(%esi),%edx ++ # sqr a[6]*a[5] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ecx ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 28(%esi),%eax ++ adcl $0,%ecx ++ movl %ebp,44(%edi) ++ movl 20(%esi),%edx ++ # saved r[11] ++ # ############### Calculate word 12 ++ xorl %ebp,%ebp ++ # sqr a[7]*a[5] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebp ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 24(%esi),%eax ++ adcl $0,%ebp ++ # sqr a[6]*a[6] ++ mull %eax ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 24(%esi),%edx ++ adcl $0,%ebp ++ movl %ebx,48(%edi) ++ movl 28(%esi),%eax ++ # saved r[12] ++ # ############### Calculate word 13 ++ xorl %ebx,%ebx ++ # sqr a[7]*a[6] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 28(%esi),%eax ++ adcl $0,%ebx ++ movl %ecx,52(%edi) ++ # saved r[13] ++ # ############### Calculate word 14 ++ xorl %ecx,%ecx ++ # sqr a[7]*a[7] ++ mull %eax ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ adcl $0,%ecx ++ movl %ebp,56(%edi) ++ # saved r[14] ++ movl %ebx,60(%edi) ++ popl %ebx ++ popl %ebp ++ popl %edi ++ popl %esi ++ ret ++.globl _bn_sqr_comba4 ++.private_extern _bn_sqr_comba4 ++.align 4 ++_bn_sqr_comba4: ++L_bn_sqr_comba4_begin: ++ pushl %esi ++ pushl %edi ++ pushl %ebp ++ pushl %ebx ++ movl 20(%esp),%edi ++ movl 24(%esp),%esi ++ xorl %ebx,%ebx ++ xorl %ecx,%ecx ++ movl (%esi),%eax ++ # ############### Calculate word 0 ++ xorl %ebp,%ebp ++ # sqr a[0]*a[0] ++ mull %eax ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl (%esi),%edx ++ adcl $0,%ebp ++ movl %ebx,(%edi) ++ movl 4(%esi),%eax ++ # saved r[0] ++ # ############### Calculate word 1 ++ xorl %ebx,%ebx ++ # sqr a[1]*a[0] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 8(%esi),%eax ++ adcl $0,%ebx ++ movl %ecx,4(%edi) ++ movl (%esi),%edx ++ # saved r[1] ++ # ############### Calculate word 2 ++ xorl %ecx,%ecx ++ # sqr a[2]*a[0] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ecx ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 4(%esi),%eax ++ adcl $0,%ecx ++ # sqr a[1]*a[1] ++ mull %eax ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl (%esi),%edx ++ adcl $0,%ecx ++ movl %ebp,8(%edi) ++ movl 12(%esi),%eax ++ # saved r[2] ++ # ############### Calculate word 3 ++ xorl %ebp,%ebp ++ # sqr a[3]*a[0] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebp ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 8(%esi),%eax ++ adcl $0,%ebp ++ movl 4(%esi),%edx ++ # sqr a[2]*a[1] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebp ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ movl 12(%esi),%eax ++ adcl $0,%ebp ++ movl %ebx,12(%edi) ++ movl 4(%esi),%edx ++ # saved r[3] ++ # ############### Calculate word 4 ++ xorl %ebx,%ebx ++ # sqr a[3]*a[1] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ebx ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 8(%esi),%eax ++ adcl $0,%ebx ++ # sqr a[2]*a[2] ++ mull %eax ++ addl %eax,%ecx ++ adcl %edx,%ebp ++ movl 8(%esi),%edx ++ adcl $0,%ebx ++ movl %ecx,16(%edi) ++ movl 12(%esi),%eax ++ # saved r[4] ++ # ############### Calculate word 5 ++ xorl %ecx,%ecx ++ # sqr a[3]*a[2] ++ mull %edx ++ addl %eax,%eax ++ adcl %edx,%edx ++ adcl $0,%ecx ++ addl %eax,%ebp ++ adcl %edx,%ebx ++ movl 12(%esi),%eax ++ adcl $0,%ecx ++ movl %ebp,20(%edi) ++ # saved r[5] ++ # ############### Calculate word 6 ++ xorl %ebp,%ebp ++ # sqr a[3]*a[3] ++ mull %eax ++ addl %eax,%ebx ++ adcl %edx,%ecx ++ adcl $0,%ebp ++ movl %ebx,24(%edi) ++ # saved r[6] ++ movl %ecx,28(%edi) ++ popl %ebx ++ popl %ebp ++ popl %edi ++ popl %esi ++ ret ++#endif +diff --git a/apple-x86/crypto/fipsmodule/ghash-ssse3-x86.S b/apple-x86/crypto/fipsmodule/ghash-ssse3-x86.S +new file mode 100644 +index 0000000..8656679 +--- /dev/null ++++ b/apple-x86/crypto/fipsmodule/ghash-ssse3-x86.S +@@ -0,0 +1,289 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.globl _gcm_gmult_ssse3 ++.private_extern _gcm_gmult_ssse3 ++.align 4 ++_gcm_gmult_ssse3: ++L_gcm_gmult_ssse3_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 20(%esp),%edi ++ movl 24(%esp),%esi ++ movdqu (%edi),%xmm0 ++ call L000pic_point ++L000pic_point: ++ popl %eax ++ movdqa Lreverse_bytes-L000pic_point(%eax),%xmm7 ++ movdqa Llow4_mask-L000pic_point(%eax),%xmm2 ++.byte 102,15,56,0,199 ++ movdqa %xmm2,%xmm1 ++ pandn %xmm0,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm2,%xmm0 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ movl $5,%eax ++L001loop_row_1: ++ movdqa (%esi),%xmm4 ++ leal 16(%esi),%esi ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ pxor %xmm5,%xmm2 ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ subl $1,%eax ++ jnz L001loop_row_1 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movl $5,%eax ++L002loop_row_2: ++ movdqa (%esi),%xmm4 ++ leal 16(%esi),%esi ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ pxor %xmm5,%xmm2 ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ subl $1,%eax ++ jnz L002loop_row_2 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movl $6,%eax ++L003loop_row_3: ++ movdqa (%esi),%xmm4 ++ leal 16(%esi),%esi ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ pxor %xmm5,%xmm2 ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ subl $1,%eax ++ jnz L003loop_row_3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++.byte 102,15,56,0,215 ++ movdqu %xmm2,(%edi) ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _gcm_ghash_ssse3 ++.private_extern _gcm_ghash_ssse3 ++.align 4 ++_gcm_ghash_ssse3: ++L_gcm_ghash_ssse3_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 20(%esp),%edi ++ movl 24(%esp),%esi ++ movl 28(%esp),%edx ++ movl 32(%esp),%ecx ++ movdqu (%edi),%xmm0 ++ call L004pic_point ++L004pic_point: ++ popl %ebx ++ movdqa Lreverse_bytes-L004pic_point(%ebx),%xmm7 ++ andl $-16,%ecx ++.byte 102,15,56,0,199 ++ pxor %xmm3,%xmm3 ++L005loop_ghash: ++ movdqa Llow4_mask-L004pic_point(%ebx),%xmm2 ++ movdqu (%edx),%xmm1 ++.byte 102,15,56,0,207 ++ pxor %xmm1,%xmm0 ++ movdqa %xmm2,%xmm1 ++ pandn %xmm0,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm2,%xmm0 ++ pxor %xmm2,%xmm2 ++ movl $5,%eax ++L006loop_row_4: ++ movdqa (%esi),%xmm4 ++ leal 16(%esi),%esi ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ pxor %xmm5,%xmm2 ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ subl $1,%eax ++ jnz L006loop_row_4 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movl $5,%eax ++L007loop_row_5: ++ movdqa (%esi),%xmm4 ++ leal 16(%esi),%esi ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ pxor %xmm5,%xmm2 ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ subl $1,%eax ++ jnz L007loop_row_5 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movl $6,%eax ++L008loop_row_6: ++ movdqa (%esi),%xmm4 ++ leal 16(%esi),%esi ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ pxor %xmm5,%xmm2 ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ subl $1,%eax ++ jnz L008loop_row_6 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movdqa %xmm2,%xmm0 ++ leal -256(%esi),%esi ++ leal 16(%edx),%edx ++ subl $16,%ecx ++ jnz L005loop_ghash ++.byte 102,15,56,0,199 ++ movdqu %xmm0,(%edi) ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 4,0x90 ++Lreverse_bytes: ++.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 ++.align 4,0x90 ++Llow4_mask: ++.long 252645135,252645135,252645135,252645135 ++#endif +diff --git a/apple-x86/crypto/fipsmodule/ghash-x86.S b/apple-x86/crypto/fipsmodule/ghash-x86.S +new file mode 100644 +index 0000000..c1e0d53 +--- /dev/null ++++ b/apple-x86/crypto/fipsmodule/ghash-x86.S +@@ -0,0 +1,323 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.globl _gcm_init_clmul ++.private_extern _gcm_init_clmul ++.align 4 ++_gcm_init_clmul: ++L_gcm_init_clmul_begin: ++ movl 4(%esp),%edx ++ movl 8(%esp),%eax ++ call L000pic ++L000pic: ++ popl %ecx ++ leal Lbswap-L000pic(%ecx),%ecx ++ movdqu (%eax),%xmm2 ++ pshufd $78,%xmm2,%xmm2 ++ pshufd $255,%xmm2,%xmm4 ++ movdqa %xmm2,%xmm3 ++ psllq $1,%xmm2 ++ pxor %xmm5,%xmm5 ++ psrlq $63,%xmm3 ++ pcmpgtd %xmm4,%xmm5 ++ pslldq $8,%xmm3 ++ por %xmm3,%xmm2 ++ pand 16(%ecx),%xmm5 ++ pxor %xmm5,%xmm2 ++ movdqa %xmm2,%xmm0 ++ movdqa %xmm0,%xmm1 ++ pshufd $78,%xmm0,%xmm3 ++ pshufd $78,%xmm2,%xmm4 ++ pxor %xmm0,%xmm3 ++ pxor %xmm2,%xmm4 ++.byte 102,15,58,68,194,0 ++.byte 102,15,58,68,202,17 ++.byte 102,15,58,68,220,0 ++ xorps %xmm0,%xmm3 ++ xorps %xmm1,%xmm3 ++ movdqa %xmm3,%xmm4 ++ psrldq $8,%xmm3 ++ pslldq $8,%xmm4 ++ pxor %xmm3,%xmm1 ++ pxor %xmm4,%xmm0 ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++ pshufd $78,%xmm2,%xmm3 ++ pshufd $78,%xmm0,%xmm4 ++ pxor %xmm2,%xmm3 ++ movdqu %xmm2,(%edx) ++ pxor %xmm0,%xmm4 ++ movdqu %xmm0,16(%edx) ++.byte 102,15,58,15,227,8 ++ movdqu %xmm4,32(%edx) ++ ret ++.globl _gcm_gmult_clmul ++.private_extern _gcm_gmult_clmul ++.align 4 ++_gcm_gmult_clmul: ++L_gcm_gmult_clmul_begin: ++ movl 4(%esp),%eax ++ movl 8(%esp),%edx ++ call L001pic ++L001pic: ++ popl %ecx ++ leal Lbswap-L001pic(%ecx),%ecx ++ movdqu (%eax),%xmm0 ++ movdqa (%ecx),%xmm5 ++ movups (%edx),%xmm2 ++.byte 102,15,56,0,197 ++ movups 32(%edx),%xmm4 ++ movdqa %xmm0,%xmm1 ++ pshufd $78,%xmm0,%xmm3 ++ pxor %xmm0,%xmm3 ++.byte 102,15,58,68,194,0 ++.byte 102,15,58,68,202,17 ++.byte 102,15,58,68,220,0 ++ xorps %xmm0,%xmm3 ++ xorps %xmm1,%xmm3 ++ movdqa %xmm3,%xmm4 ++ psrldq $8,%xmm3 ++ pslldq $8,%xmm4 ++ pxor %xmm3,%xmm1 ++ pxor %xmm4,%xmm0 ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++.byte 102,15,56,0,197 ++ movdqu %xmm0,(%eax) ++ ret ++.globl _gcm_ghash_clmul ++.private_extern _gcm_ghash_clmul ++.align 4 ++_gcm_ghash_clmul: ++L_gcm_ghash_clmul_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 20(%esp),%eax ++ movl 24(%esp),%edx ++ movl 28(%esp),%esi ++ movl 32(%esp),%ebx ++ call L002pic ++L002pic: ++ popl %ecx ++ leal Lbswap-L002pic(%ecx),%ecx ++ movdqu (%eax),%xmm0 ++ movdqa (%ecx),%xmm5 ++ movdqu (%edx),%xmm2 ++.byte 102,15,56,0,197 ++ subl $16,%ebx ++ jz L003odd_tail ++ movdqu (%esi),%xmm3 ++ movdqu 16(%esi),%xmm6 ++.byte 102,15,56,0,221 ++.byte 102,15,56,0,245 ++ movdqu 32(%edx),%xmm5 ++ pxor %xmm3,%xmm0 ++ pshufd $78,%xmm6,%xmm3 ++ movdqa %xmm6,%xmm7 ++ pxor %xmm6,%xmm3 ++ leal 32(%esi),%esi ++.byte 102,15,58,68,242,0 ++.byte 102,15,58,68,250,17 ++.byte 102,15,58,68,221,0 ++ movups 16(%edx),%xmm2 ++ nop ++ subl $32,%ebx ++ jbe L004even_tail ++ jmp L005mod_loop ++.align 5,0x90 ++L005mod_loop: ++ pshufd $78,%xmm0,%xmm4 ++ movdqa %xmm0,%xmm1 ++ pxor %xmm0,%xmm4 ++ nop ++.byte 102,15,58,68,194,0 ++.byte 102,15,58,68,202,17 ++.byte 102,15,58,68,229,16 ++ movups (%edx),%xmm2 ++ xorps %xmm6,%xmm0 ++ movdqa (%ecx),%xmm5 ++ xorps %xmm7,%xmm1 ++ movdqu (%esi),%xmm7 ++ pxor %xmm0,%xmm3 ++ movdqu 16(%esi),%xmm6 ++ pxor %xmm1,%xmm3 ++.byte 102,15,56,0,253 ++ pxor %xmm3,%xmm4 ++ movdqa %xmm4,%xmm3 ++ psrldq $8,%xmm4 ++ pslldq $8,%xmm3 ++ pxor %xmm4,%xmm1 ++ pxor %xmm3,%xmm0 ++.byte 102,15,56,0,245 ++ pxor %xmm7,%xmm1 ++ movdqa %xmm6,%xmm7 ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++.byte 102,15,58,68,242,0 ++ movups 32(%edx),%xmm5 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ pshufd $78,%xmm7,%xmm3 ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm7,%xmm3 ++ pxor %xmm4,%xmm1 ++.byte 102,15,58,68,250,17 ++ movups 16(%edx),%xmm2 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++.byte 102,15,58,68,221,0 ++ leal 32(%esi),%esi ++ subl $32,%ebx ++ ja L005mod_loop ++L004even_tail: ++ pshufd $78,%xmm0,%xmm4 ++ movdqa %xmm0,%xmm1 ++ pxor %xmm0,%xmm4 ++.byte 102,15,58,68,194,0 ++.byte 102,15,58,68,202,17 ++.byte 102,15,58,68,229,16 ++ movdqa (%ecx),%xmm5 ++ xorps %xmm6,%xmm0 ++ xorps %xmm7,%xmm1 ++ pxor %xmm0,%xmm3 ++ pxor %xmm1,%xmm3 ++ pxor %xmm3,%xmm4 ++ movdqa %xmm4,%xmm3 ++ psrldq $8,%xmm4 ++ pslldq $8,%xmm3 ++ pxor %xmm4,%xmm1 ++ pxor %xmm3,%xmm0 ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++ testl %ebx,%ebx ++ jnz L006done ++ movups (%edx),%xmm2 ++L003odd_tail: ++ movdqu (%esi),%xmm3 ++.byte 102,15,56,0,221 ++ pxor %xmm3,%xmm0 ++ movdqa %xmm0,%xmm1 ++ pshufd $78,%xmm0,%xmm3 ++ pshufd $78,%xmm2,%xmm4 ++ pxor %xmm0,%xmm3 ++ pxor %xmm2,%xmm4 ++.byte 102,15,58,68,194,0 ++.byte 102,15,58,68,202,17 ++.byte 102,15,58,68,220,0 ++ xorps %xmm0,%xmm3 ++ xorps %xmm1,%xmm3 ++ movdqa %xmm3,%xmm4 ++ psrldq $8,%xmm3 ++ pslldq $8,%xmm4 ++ pxor %xmm3,%xmm1 ++ pxor %xmm4,%xmm0 ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++L006done: ++.byte 102,15,56,0,197 ++ movdqu %xmm0,(%eax) ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 6,0x90 ++Lbswap: ++.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 ++.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 ++.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 ++.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 ++.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 ++.byte 0 ++#endif +diff --git a/apple-x86/crypto/fipsmodule/md5-586.S b/apple-x86/crypto/fipsmodule/md5-586.S +new file mode 100644 +index 0000000..f4c4b50 +--- /dev/null ++++ b/apple-x86/crypto/fipsmodule/md5-586.S +@@ -0,0 +1,685 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.globl _md5_block_asm_data_order ++.private_extern _md5_block_asm_data_order ++.align 4 ++_md5_block_asm_data_order: ++L_md5_block_asm_data_order_begin: ++ pushl %esi ++ pushl %edi ++ movl 12(%esp),%edi ++ movl 16(%esp),%esi ++ movl 20(%esp),%ecx ++ pushl %ebp ++ shll $6,%ecx ++ pushl %ebx ++ addl %esi,%ecx ++ subl $64,%ecx ++ movl (%edi),%eax ++ pushl %ecx ++ movl 4(%edi),%ebx ++ movl 8(%edi),%ecx ++ movl 12(%edi),%edx ++L000start: ++ ++ # R0 section ++ movl %ecx,%edi ++ movl (%esi),%ebp ++ # R0 0 ++ xorl %edx,%edi ++ andl %ebx,%edi ++ leal 3614090360(%eax,%ebp,1),%eax ++ xorl %edx,%edi ++ addl %edi,%eax ++ movl %ebx,%edi ++ roll $7,%eax ++ movl 4(%esi),%ebp ++ addl %ebx,%eax ++ # R0 1 ++ xorl %ecx,%edi ++ andl %eax,%edi ++ leal 3905402710(%edx,%ebp,1),%edx ++ xorl %ecx,%edi ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $12,%edx ++ movl 8(%esi),%ebp ++ addl %eax,%edx ++ # R0 2 ++ xorl %ebx,%edi ++ andl %edx,%edi ++ leal 606105819(%ecx,%ebp,1),%ecx ++ xorl %ebx,%edi ++ addl %edi,%ecx ++ movl %edx,%edi ++ roll $17,%ecx ++ movl 12(%esi),%ebp ++ addl %edx,%ecx ++ # R0 3 ++ xorl %eax,%edi ++ andl %ecx,%edi ++ leal 3250441966(%ebx,%ebp,1),%ebx ++ xorl %eax,%edi ++ addl %edi,%ebx ++ movl %ecx,%edi ++ roll $22,%ebx ++ movl 16(%esi),%ebp ++ addl %ecx,%ebx ++ # R0 4 ++ xorl %edx,%edi ++ andl %ebx,%edi ++ leal 4118548399(%eax,%ebp,1),%eax ++ xorl %edx,%edi ++ addl %edi,%eax ++ movl %ebx,%edi ++ roll $7,%eax ++ movl 20(%esi),%ebp ++ addl %ebx,%eax ++ # R0 5 ++ xorl %ecx,%edi ++ andl %eax,%edi ++ leal 1200080426(%edx,%ebp,1),%edx ++ xorl %ecx,%edi ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $12,%edx ++ movl 24(%esi),%ebp ++ addl %eax,%edx ++ # R0 6 ++ xorl %ebx,%edi ++ andl %edx,%edi ++ leal 2821735955(%ecx,%ebp,1),%ecx ++ xorl %ebx,%edi ++ addl %edi,%ecx ++ movl %edx,%edi ++ roll $17,%ecx ++ movl 28(%esi),%ebp ++ addl %edx,%ecx ++ # R0 7 ++ xorl %eax,%edi ++ andl %ecx,%edi ++ leal 4249261313(%ebx,%ebp,1),%ebx ++ xorl %eax,%edi ++ addl %edi,%ebx ++ movl %ecx,%edi ++ roll $22,%ebx ++ movl 32(%esi),%ebp ++ addl %ecx,%ebx ++ # R0 8 ++ xorl %edx,%edi ++ andl %ebx,%edi ++ leal 1770035416(%eax,%ebp,1),%eax ++ xorl %edx,%edi ++ addl %edi,%eax ++ movl %ebx,%edi ++ roll $7,%eax ++ movl 36(%esi),%ebp ++ addl %ebx,%eax ++ # R0 9 ++ xorl %ecx,%edi ++ andl %eax,%edi ++ leal 2336552879(%edx,%ebp,1),%edx ++ xorl %ecx,%edi ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $12,%edx ++ movl 40(%esi),%ebp ++ addl %eax,%edx ++ # R0 10 ++ xorl %ebx,%edi ++ andl %edx,%edi ++ leal 4294925233(%ecx,%ebp,1),%ecx ++ xorl %ebx,%edi ++ addl %edi,%ecx ++ movl %edx,%edi ++ roll $17,%ecx ++ movl 44(%esi),%ebp ++ addl %edx,%ecx ++ # R0 11 ++ xorl %eax,%edi ++ andl %ecx,%edi ++ leal 2304563134(%ebx,%ebp,1),%ebx ++ xorl %eax,%edi ++ addl %edi,%ebx ++ movl %ecx,%edi ++ roll $22,%ebx ++ movl 48(%esi),%ebp ++ addl %ecx,%ebx ++ # R0 12 ++ xorl %edx,%edi ++ andl %ebx,%edi ++ leal 1804603682(%eax,%ebp,1),%eax ++ xorl %edx,%edi ++ addl %edi,%eax ++ movl %ebx,%edi ++ roll $7,%eax ++ movl 52(%esi),%ebp ++ addl %ebx,%eax ++ # R0 13 ++ xorl %ecx,%edi ++ andl %eax,%edi ++ leal 4254626195(%edx,%ebp,1),%edx ++ xorl %ecx,%edi ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $12,%edx ++ movl 56(%esi),%ebp ++ addl %eax,%edx ++ # R0 14 ++ xorl %ebx,%edi ++ andl %edx,%edi ++ leal 2792965006(%ecx,%ebp,1),%ecx ++ xorl %ebx,%edi ++ addl %edi,%ecx ++ movl %edx,%edi ++ roll $17,%ecx ++ movl 60(%esi),%ebp ++ addl %edx,%ecx ++ # R0 15 ++ xorl %eax,%edi ++ andl %ecx,%edi ++ leal 1236535329(%ebx,%ebp,1),%ebx ++ xorl %eax,%edi ++ addl %edi,%ebx ++ movl %ecx,%edi ++ roll $22,%ebx ++ movl 4(%esi),%ebp ++ addl %ecx,%ebx ++ ++ # R1 section ++ # R1 16 ++ leal 4129170786(%eax,%ebp,1),%eax ++ xorl %ebx,%edi ++ andl %edx,%edi ++ movl 24(%esi),%ebp ++ xorl %ecx,%edi ++ addl %edi,%eax ++ movl %ebx,%edi ++ roll $5,%eax ++ addl %ebx,%eax ++ # R1 17 ++ leal 3225465664(%edx,%ebp,1),%edx ++ xorl %eax,%edi ++ andl %ecx,%edi ++ movl 44(%esi),%ebp ++ xorl %ebx,%edi ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $9,%edx ++ addl %eax,%edx ++ # R1 18 ++ leal 643717713(%ecx,%ebp,1),%ecx ++ xorl %edx,%edi ++ andl %ebx,%edi ++ movl (%esi),%ebp ++ xorl %eax,%edi ++ addl %edi,%ecx ++ movl %edx,%edi ++ roll $14,%ecx ++ addl %edx,%ecx ++ # R1 19 ++ leal 3921069994(%ebx,%ebp,1),%ebx ++ xorl %ecx,%edi ++ andl %eax,%edi ++ movl 20(%esi),%ebp ++ xorl %edx,%edi ++ addl %edi,%ebx ++ movl %ecx,%edi ++ roll $20,%ebx ++ addl %ecx,%ebx ++ # R1 20 ++ leal 3593408605(%eax,%ebp,1),%eax ++ xorl %ebx,%edi ++ andl %edx,%edi ++ movl 40(%esi),%ebp ++ xorl %ecx,%edi ++ addl %edi,%eax ++ movl %ebx,%edi ++ roll $5,%eax ++ addl %ebx,%eax ++ # R1 21 ++ leal 38016083(%edx,%ebp,1),%edx ++ xorl %eax,%edi ++ andl %ecx,%edi ++ movl 60(%esi),%ebp ++ xorl %ebx,%edi ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $9,%edx ++ addl %eax,%edx ++ # R1 22 ++ leal 3634488961(%ecx,%ebp,1),%ecx ++ xorl %edx,%edi ++ andl %ebx,%edi ++ movl 16(%esi),%ebp ++ xorl %eax,%edi ++ addl %edi,%ecx ++ movl %edx,%edi ++ roll $14,%ecx ++ addl %edx,%ecx ++ # R1 23 ++ leal 3889429448(%ebx,%ebp,1),%ebx ++ xorl %ecx,%edi ++ andl %eax,%edi ++ movl 36(%esi),%ebp ++ xorl %edx,%edi ++ addl %edi,%ebx ++ movl %ecx,%edi ++ roll $20,%ebx ++ addl %ecx,%ebx ++ # R1 24 ++ leal 568446438(%eax,%ebp,1),%eax ++ xorl %ebx,%edi ++ andl %edx,%edi ++ movl 56(%esi),%ebp ++ xorl %ecx,%edi ++ addl %edi,%eax ++ movl %ebx,%edi ++ roll $5,%eax ++ addl %ebx,%eax ++ # R1 25 ++ leal 3275163606(%edx,%ebp,1),%edx ++ xorl %eax,%edi ++ andl %ecx,%edi ++ movl 12(%esi),%ebp ++ xorl %ebx,%edi ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $9,%edx ++ addl %eax,%edx ++ # R1 26 ++ leal 4107603335(%ecx,%ebp,1),%ecx ++ xorl %edx,%edi ++ andl %ebx,%edi ++ movl 32(%esi),%ebp ++ xorl %eax,%edi ++ addl %edi,%ecx ++ movl %edx,%edi ++ roll $14,%ecx ++ addl %edx,%ecx ++ # R1 27 ++ leal 1163531501(%ebx,%ebp,1),%ebx ++ xorl %ecx,%edi ++ andl %eax,%edi ++ movl 52(%esi),%ebp ++ xorl %edx,%edi ++ addl %edi,%ebx ++ movl %ecx,%edi ++ roll $20,%ebx ++ addl %ecx,%ebx ++ # R1 28 ++ leal 2850285829(%eax,%ebp,1),%eax ++ xorl %ebx,%edi ++ andl %edx,%edi ++ movl 8(%esi),%ebp ++ xorl %ecx,%edi ++ addl %edi,%eax ++ movl %ebx,%edi ++ roll $5,%eax ++ addl %ebx,%eax ++ # R1 29 ++ leal 4243563512(%edx,%ebp,1),%edx ++ xorl %eax,%edi ++ andl %ecx,%edi ++ movl 28(%esi),%ebp ++ xorl %ebx,%edi ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $9,%edx ++ addl %eax,%edx ++ # R1 30 ++ leal 1735328473(%ecx,%ebp,1),%ecx ++ xorl %edx,%edi ++ andl %ebx,%edi ++ movl 48(%esi),%ebp ++ xorl %eax,%edi ++ addl %edi,%ecx ++ movl %edx,%edi ++ roll $14,%ecx ++ addl %edx,%ecx ++ # R1 31 ++ leal 2368359562(%ebx,%ebp,1),%ebx ++ xorl %ecx,%edi ++ andl %eax,%edi ++ movl 20(%esi),%ebp ++ xorl %edx,%edi ++ addl %edi,%ebx ++ movl %ecx,%edi ++ roll $20,%ebx ++ addl %ecx,%ebx ++ ++ # R2 section ++ # R2 32 ++ xorl %edx,%edi ++ xorl %ebx,%edi ++ leal 4294588738(%eax,%ebp,1),%eax ++ addl %edi,%eax ++ roll $4,%eax ++ movl 32(%esi),%ebp ++ movl %ebx,%edi ++ # R2 33 ++ leal 2272392833(%edx,%ebp,1),%edx ++ addl %ebx,%eax ++ xorl %ecx,%edi ++ xorl %eax,%edi ++ movl 44(%esi),%ebp ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $11,%edx ++ addl %eax,%edx ++ # R2 34 ++ xorl %ebx,%edi ++ xorl %edx,%edi ++ leal 1839030562(%ecx,%ebp,1),%ecx ++ addl %edi,%ecx ++ roll $16,%ecx ++ movl 56(%esi),%ebp ++ movl %edx,%edi ++ # R2 35 ++ leal 4259657740(%ebx,%ebp,1),%ebx ++ addl %edx,%ecx ++ xorl %eax,%edi ++ xorl %ecx,%edi ++ movl 4(%esi),%ebp ++ addl %edi,%ebx ++ movl %ecx,%edi ++ roll $23,%ebx ++ addl %ecx,%ebx ++ # R2 36 ++ xorl %edx,%edi ++ xorl %ebx,%edi ++ leal 2763975236(%eax,%ebp,1),%eax ++ addl %edi,%eax ++ roll $4,%eax ++ movl 16(%esi),%ebp ++ movl %ebx,%edi ++ # R2 37 ++ leal 1272893353(%edx,%ebp,1),%edx ++ addl %ebx,%eax ++ xorl %ecx,%edi ++ xorl %eax,%edi ++ movl 28(%esi),%ebp ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $11,%edx ++ addl %eax,%edx ++ # R2 38 ++ xorl %ebx,%edi ++ xorl %edx,%edi ++ leal 4139469664(%ecx,%ebp,1),%ecx ++ addl %edi,%ecx ++ roll $16,%ecx ++ movl 40(%esi),%ebp ++ movl %edx,%edi ++ # R2 39 ++ leal 3200236656(%ebx,%ebp,1),%ebx ++ addl %edx,%ecx ++ xorl %eax,%edi ++ xorl %ecx,%edi ++ movl 52(%esi),%ebp ++ addl %edi,%ebx ++ movl %ecx,%edi ++ roll $23,%ebx ++ addl %ecx,%ebx ++ # R2 40 ++ xorl %edx,%edi ++ xorl %ebx,%edi ++ leal 681279174(%eax,%ebp,1),%eax ++ addl %edi,%eax ++ roll $4,%eax ++ movl (%esi),%ebp ++ movl %ebx,%edi ++ # R2 41 ++ leal 3936430074(%edx,%ebp,1),%edx ++ addl %ebx,%eax ++ xorl %ecx,%edi ++ xorl %eax,%edi ++ movl 12(%esi),%ebp ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $11,%edx ++ addl %eax,%edx ++ # R2 42 ++ xorl %ebx,%edi ++ xorl %edx,%edi ++ leal 3572445317(%ecx,%ebp,1),%ecx ++ addl %edi,%ecx ++ roll $16,%ecx ++ movl 24(%esi),%ebp ++ movl %edx,%edi ++ # R2 43 ++ leal 76029189(%ebx,%ebp,1),%ebx ++ addl %edx,%ecx ++ xorl %eax,%edi ++ xorl %ecx,%edi ++ movl 36(%esi),%ebp ++ addl %edi,%ebx ++ movl %ecx,%edi ++ roll $23,%ebx ++ addl %ecx,%ebx ++ # R2 44 ++ xorl %edx,%edi ++ xorl %ebx,%edi ++ leal 3654602809(%eax,%ebp,1),%eax ++ addl %edi,%eax ++ roll $4,%eax ++ movl 48(%esi),%ebp ++ movl %ebx,%edi ++ # R2 45 ++ leal 3873151461(%edx,%ebp,1),%edx ++ addl %ebx,%eax ++ xorl %ecx,%edi ++ xorl %eax,%edi ++ movl 60(%esi),%ebp ++ addl %edi,%edx ++ movl %eax,%edi ++ roll $11,%edx ++ addl %eax,%edx ++ # R2 46 ++ xorl %ebx,%edi ++ xorl %edx,%edi ++ leal 530742520(%ecx,%ebp,1),%ecx ++ addl %edi,%ecx ++ roll $16,%ecx ++ movl 8(%esi),%ebp ++ movl %edx,%edi ++ # R2 47 ++ leal 3299628645(%ebx,%ebp,1),%ebx ++ addl %edx,%ecx ++ xorl %eax,%edi ++ xorl %ecx,%edi ++ movl (%esi),%ebp ++ addl %edi,%ebx ++ movl $-1,%edi ++ roll $23,%ebx ++ addl %ecx,%ebx ++ ++ # R3 section ++ # R3 48 ++ xorl %edx,%edi ++ orl %ebx,%edi ++ leal 4096336452(%eax,%ebp,1),%eax ++ xorl %ecx,%edi ++ movl 28(%esi),%ebp ++ addl %edi,%eax ++ movl $-1,%edi ++ roll $6,%eax ++ xorl %ecx,%edi ++ addl %ebx,%eax ++ # R3 49 ++ orl %eax,%edi ++ leal 1126891415(%edx,%ebp,1),%edx ++ xorl %ebx,%edi ++ movl 56(%esi),%ebp ++ addl %edi,%edx ++ movl $-1,%edi ++ roll $10,%edx ++ xorl %ebx,%edi ++ addl %eax,%edx ++ # R3 50 ++ orl %edx,%edi ++ leal 2878612391(%ecx,%ebp,1),%ecx ++ xorl %eax,%edi ++ movl 20(%esi),%ebp ++ addl %edi,%ecx ++ movl $-1,%edi ++ roll $15,%ecx ++ xorl %eax,%edi ++ addl %edx,%ecx ++ # R3 51 ++ orl %ecx,%edi ++ leal 4237533241(%ebx,%ebp,1),%ebx ++ xorl %edx,%edi ++ movl 48(%esi),%ebp ++ addl %edi,%ebx ++ movl $-1,%edi ++ roll $21,%ebx ++ xorl %edx,%edi ++ addl %ecx,%ebx ++ # R3 52 ++ orl %ebx,%edi ++ leal 1700485571(%eax,%ebp,1),%eax ++ xorl %ecx,%edi ++ movl 12(%esi),%ebp ++ addl %edi,%eax ++ movl $-1,%edi ++ roll $6,%eax ++ xorl %ecx,%edi ++ addl %ebx,%eax ++ # R3 53 ++ orl %eax,%edi ++ leal 2399980690(%edx,%ebp,1),%edx ++ xorl %ebx,%edi ++ movl 40(%esi),%ebp ++ addl %edi,%edx ++ movl $-1,%edi ++ roll $10,%edx ++ xorl %ebx,%edi ++ addl %eax,%edx ++ # R3 54 ++ orl %edx,%edi ++ leal 4293915773(%ecx,%ebp,1),%ecx ++ xorl %eax,%edi ++ movl 4(%esi),%ebp ++ addl %edi,%ecx ++ movl $-1,%edi ++ roll $15,%ecx ++ xorl %eax,%edi ++ addl %edx,%ecx ++ # R3 55 ++ orl %ecx,%edi ++ leal 2240044497(%ebx,%ebp,1),%ebx ++ xorl %edx,%edi ++ movl 32(%esi),%ebp ++ addl %edi,%ebx ++ movl $-1,%edi ++ roll $21,%ebx ++ xorl %edx,%edi ++ addl %ecx,%ebx ++ # R3 56 ++ orl %ebx,%edi ++ leal 1873313359(%eax,%ebp,1),%eax ++ xorl %ecx,%edi ++ movl 60(%esi),%ebp ++ addl %edi,%eax ++ movl $-1,%edi ++ roll $6,%eax ++ xorl %ecx,%edi ++ addl %ebx,%eax ++ # R3 57 ++ orl %eax,%edi ++ leal 4264355552(%edx,%ebp,1),%edx ++ xorl %ebx,%edi ++ movl 24(%esi),%ebp ++ addl %edi,%edx ++ movl $-1,%edi ++ roll $10,%edx ++ xorl %ebx,%edi ++ addl %eax,%edx ++ # R3 58 ++ orl %edx,%edi ++ leal 2734768916(%ecx,%ebp,1),%ecx ++ xorl %eax,%edi ++ movl 52(%esi),%ebp ++ addl %edi,%ecx ++ movl $-1,%edi ++ roll $15,%ecx ++ xorl %eax,%edi ++ addl %edx,%ecx ++ # R3 59 ++ orl %ecx,%edi ++ leal 1309151649(%ebx,%ebp,1),%ebx ++ xorl %edx,%edi ++ movl 16(%esi),%ebp ++ addl %edi,%ebx ++ movl $-1,%edi ++ roll $21,%ebx ++ xorl %edx,%edi ++ addl %ecx,%ebx ++ # R3 60 ++ orl %ebx,%edi ++ leal 4149444226(%eax,%ebp,1),%eax ++ xorl %ecx,%edi ++ movl 44(%esi),%ebp ++ addl %edi,%eax ++ movl $-1,%edi ++ roll $6,%eax ++ xorl %ecx,%edi ++ addl %ebx,%eax ++ # R3 61 ++ orl %eax,%edi ++ leal 3174756917(%edx,%ebp,1),%edx ++ xorl %ebx,%edi ++ movl 8(%esi),%ebp ++ addl %edi,%edx ++ movl $-1,%edi ++ roll $10,%edx ++ xorl %ebx,%edi ++ addl %eax,%edx ++ # R3 62 ++ orl %edx,%edi ++ leal 718787259(%ecx,%ebp,1),%ecx ++ xorl %eax,%edi ++ movl 36(%esi),%ebp ++ addl %edi,%ecx ++ movl $-1,%edi ++ roll $15,%ecx ++ xorl %eax,%edi ++ addl %edx,%ecx ++ # R3 63 ++ orl %ecx,%edi ++ leal 3951481745(%ebx,%ebp,1),%ebx ++ xorl %edx,%edi ++ movl 24(%esp),%ebp ++ addl %edi,%ebx ++ addl $64,%esi ++ roll $21,%ebx ++ movl (%ebp),%edi ++ addl %ecx,%ebx ++ addl %edi,%eax ++ movl 4(%ebp),%edi ++ addl %edi,%ebx ++ movl 8(%ebp),%edi ++ addl %edi,%ecx ++ movl 12(%ebp),%edi ++ addl %edi,%edx ++ movl %eax,(%ebp) ++ movl %ebx,4(%ebp) ++ movl (%esp),%edi ++ movl %ecx,8(%ebp) ++ movl %edx,12(%ebp) ++ cmpl %esi,%edi ++ jae L000start ++ popl %eax ++ popl %ebx ++ popl %ebp ++ popl %edi ++ popl %esi ++ ret ++#endif +diff --git a/apple-x86/crypto/fipsmodule/sha1-586.S b/apple-x86/crypto/fipsmodule/sha1-586.S +new file mode 100644 +index 0000000..3213a62 +--- /dev/null ++++ b/apple-x86/crypto/fipsmodule/sha1-586.S +@@ -0,0 +1,3805 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.globl _sha1_block_data_order ++.private_extern _sha1_block_data_order ++.align 4 ++_sha1_block_data_order: ++L_sha1_block_data_order_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ call L000pic_point ++L000pic_point: ++ popl %ebp ++ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000pic_point(%ebp),%esi ++ leal LK_XX_XX-L000pic_point(%ebp),%ebp ++ movl (%esi),%eax ++ movl 4(%esi),%edx ++ testl $512,%edx ++ jz L001x86 ++ movl 8(%esi),%ecx ++ testl $16777216,%eax ++ jz L001x86 ++ andl $268435456,%edx ++ andl $1073741824,%eax ++ orl %edx,%eax ++ cmpl $1342177280,%eax ++ je Lavx_shortcut ++ jmp Lssse3_shortcut ++.align 4,0x90 ++L001x86: ++ movl 20(%esp),%ebp ++ movl 24(%esp),%esi ++ movl 28(%esp),%eax ++ subl $76,%esp ++ shll $6,%eax ++ addl %esi,%eax ++ movl %eax,104(%esp) ++ movl 16(%ebp),%edi ++ jmp L002loop ++.align 4,0x90 ++L002loop: ++ movl (%esi),%eax ++ movl 4(%esi),%ebx ++ movl 8(%esi),%ecx ++ movl 12(%esi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ movl %eax,(%esp) ++ movl %ebx,4(%esp) ++ movl %ecx,8(%esp) ++ movl %edx,12(%esp) ++ movl 16(%esi),%eax ++ movl 20(%esi),%ebx ++ movl 24(%esi),%ecx ++ movl 28(%esi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ movl %eax,16(%esp) ++ movl %ebx,20(%esp) ++ movl %ecx,24(%esp) ++ movl %edx,28(%esp) ++ movl 32(%esi),%eax ++ movl 36(%esi),%ebx ++ movl 40(%esi),%ecx ++ movl 44(%esi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ movl %eax,32(%esp) ++ movl %ebx,36(%esp) ++ movl %ecx,40(%esp) ++ movl %edx,44(%esp) ++ movl 48(%esi),%eax ++ movl 52(%esi),%ebx ++ movl 56(%esi),%ecx ++ movl 60(%esi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ movl %eax,48(%esp) ++ movl %ebx,52(%esp) ++ movl %ecx,56(%esp) ++ movl %edx,60(%esp) ++ movl %esi,100(%esp) ++ movl (%ebp),%eax ++ movl 4(%ebp),%ebx ++ movl 8(%ebp),%ecx ++ movl 12(%ebp),%edx ++ # 00_15 0 ++ movl %ecx,%esi ++ movl %eax,%ebp ++ roll $5,%ebp ++ xorl %edx,%esi ++ addl %edi,%ebp ++ movl (%esp),%edi ++ andl %ebx,%esi ++ rorl $2,%ebx ++ xorl %edx,%esi ++ leal 1518500249(%ebp,%edi,1),%ebp ++ addl %esi,%ebp ++ # 00_15 1 ++ movl %ebx,%edi ++ movl %ebp,%esi ++ roll $5,%ebp ++ xorl %ecx,%edi ++ addl %edx,%ebp ++ movl 4(%esp),%edx ++ andl %eax,%edi ++ rorl $2,%eax ++ xorl %ecx,%edi ++ leal 1518500249(%ebp,%edx,1),%ebp ++ addl %edi,%ebp ++ # 00_15 2 ++ movl %eax,%edx ++ movl %ebp,%edi ++ roll $5,%ebp ++ xorl %ebx,%edx ++ addl %ecx,%ebp ++ movl 8(%esp),%ecx ++ andl %esi,%edx ++ rorl $2,%esi ++ xorl %ebx,%edx ++ leal 1518500249(%ebp,%ecx,1),%ebp ++ addl %edx,%ebp ++ # 00_15 3 ++ movl %esi,%ecx ++ movl %ebp,%edx ++ roll $5,%ebp ++ xorl %eax,%ecx ++ addl %ebx,%ebp ++ movl 12(%esp),%ebx ++ andl %edi,%ecx ++ rorl $2,%edi ++ xorl %eax,%ecx ++ leal 1518500249(%ebp,%ebx,1),%ebp ++ addl %ecx,%ebp ++ # 00_15 4 ++ movl %edi,%ebx ++ movl %ebp,%ecx ++ roll $5,%ebp ++ xorl %esi,%ebx ++ addl %eax,%ebp ++ movl 16(%esp),%eax ++ andl %edx,%ebx ++ rorl $2,%edx ++ xorl %esi,%ebx ++ leal 1518500249(%ebp,%eax,1),%ebp ++ addl %ebx,%ebp ++ # 00_15 5 ++ movl %edx,%eax ++ movl %ebp,%ebx ++ roll $5,%ebp ++ xorl %edi,%eax ++ addl %esi,%ebp ++ movl 20(%esp),%esi ++ andl %ecx,%eax ++ rorl $2,%ecx ++ xorl %edi,%eax ++ leal 1518500249(%ebp,%esi,1),%ebp ++ addl %eax,%ebp ++ # 00_15 6 ++ movl %ecx,%esi ++ movl %ebp,%eax ++ roll $5,%ebp ++ xorl %edx,%esi ++ addl %edi,%ebp ++ movl 24(%esp),%edi ++ andl %ebx,%esi ++ rorl $2,%ebx ++ xorl %edx,%esi ++ leal 1518500249(%ebp,%edi,1),%ebp ++ addl %esi,%ebp ++ # 00_15 7 ++ movl %ebx,%edi ++ movl %ebp,%esi ++ roll $5,%ebp ++ xorl %ecx,%edi ++ addl %edx,%ebp ++ movl 28(%esp),%edx ++ andl %eax,%edi ++ rorl $2,%eax ++ xorl %ecx,%edi ++ leal 1518500249(%ebp,%edx,1),%ebp ++ addl %edi,%ebp ++ # 00_15 8 ++ movl %eax,%edx ++ movl %ebp,%edi ++ roll $5,%ebp ++ xorl %ebx,%edx ++ addl %ecx,%ebp ++ movl 32(%esp),%ecx ++ andl %esi,%edx ++ rorl $2,%esi ++ xorl %ebx,%edx ++ leal 1518500249(%ebp,%ecx,1),%ebp ++ addl %edx,%ebp ++ # 00_15 9 ++ movl %esi,%ecx ++ movl %ebp,%edx ++ roll $5,%ebp ++ xorl %eax,%ecx ++ addl %ebx,%ebp ++ movl 36(%esp),%ebx ++ andl %edi,%ecx ++ rorl $2,%edi ++ xorl %eax,%ecx ++ leal 1518500249(%ebp,%ebx,1),%ebp ++ addl %ecx,%ebp ++ # 00_15 10 ++ movl %edi,%ebx ++ movl %ebp,%ecx ++ roll $5,%ebp ++ xorl %esi,%ebx ++ addl %eax,%ebp ++ movl 40(%esp),%eax ++ andl %edx,%ebx ++ rorl $2,%edx ++ xorl %esi,%ebx ++ leal 1518500249(%ebp,%eax,1),%ebp ++ addl %ebx,%ebp ++ # 00_15 11 ++ movl %edx,%eax ++ movl %ebp,%ebx ++ roll $5,%ebp ++ xorl %edi,%eax ++ addl %esi,%ebp ++ movl 44(%esp),%esi ++ andl %ecx,%eax ++ rorl $2,%ecx ++ xorl %edi,%eax ++ leal 1518500249(%ebp,%esi,1),%ebp ++ addl %eax,%ebp ++ # 00_15 12 ++ movl %ecx,%esi ++ movl %ebp,%eax ++ roll $5,%ebp ++ xorl %edx,%esi ++ addl %edi,%ebp ++ movl 48(%esp),%edi ++ andl %ebx,%esi ++ rorl $2,%ebx ++ xorl %edx,%esi ++ leal 1518500249(%ebp,%edi,1),%ebp ++ addl %esi,%ebp ++ # 00_15 13 ++ movl %ebx,%edi ++ movl %ebp,%esi ++ roll $5,%ebp ++ xorl %ecx,%edi ++ addl %edx,%ebp ++ movl 52(%esp),%edx ++ andl %eax,%edi ++ rorl $2,%eax ++ xorl %ecx,%edi ++ leal 1518500249(%ebp,%edx,1),%ebp ++ addl %edi,%ebp ++ # 00_15 14 ++ movl %eax,%edx ++ movl %ebp,%edi ++ roll $5,%ebp ++ xorl %ebx,%edx ++ addl %ecx,%ebp ++ movl 56(%esp),%ecx ++ andl %esi,%edx ++ rorl $2,%esi ++ xorl %ebx,%edx ++ leal 1518500249(%ebp,%ecx,1),%ebp ++ addl %edx,%ebp ++ # 00_15 15 ++ movl %esi,%ecx ++ movl %ebp,%edx ++ roll $5,%ebp ++ xorl %eax,%ecx ++ addl %ebx,%ebp ++ movl 60(%esp),%ebx ++ andl %edi,%ecx ++ rorl $2,%edi ++ xorl %eax,%ecx ++ leal 1518500249(%ebp,%ebx,1),%ebp ++ movl (%esp),%ebx ++ addl %ebp,%ecx ++ # 16_19 16 ++ movl %edi,%ebp ++ xorl 8(%esp),%ebx ++ xorl %esi,%ebp ++ xorl 32(%esp),%ebx ++ andl %edx,%ebp ++ xorl 52(%esp),%ebx ++ roll $1,%ebx ++ xorl %esi,%ebp ++ addl %ebp,%eax ++ movl %ecx,%ebp ++ rorl $2,%edx ++ movl %ebx,(%esp) ++ roll $5,%ebp ++ leal 1518500249(%ebx,%eax,1),%ebx ++ movl 4(%esp),%eax ++ addl %ebp,%ebx ++ # 16_19 17 ++ movl %edx,%ebp ++ xorl 12(%esp),%eax ++ xorl %edi,%ebp ++ xorl 36(%esp),%eax ++ andl %ecx,%ebp ++ xorl 56(%esp),%eax ++ roll $1,%eax ++ xorl %edi,%ebp ++ addl %ebp,%esi ++ movl %ebx,%ebp ++ rorl $2,%ecx ++ movl %eax,4(%esp) ++ roll $5,%ebp ++ leal 1518500249(%eax,%esi,1),%eax ++ movl 8(%esp),%esi ++ addl %ebp,%eax ++ # 16_19 18 ++ movl %ecx,%ebp ++ xorl 16(%esp),%esi ++ xorl %edx,%ebp ++ xorl 40(%esp),%esi ++ andl %ebx,%ebp ++ xorl 60(%esp),%esi ++ roll $1,%esi ++ xorl %edx,%ebp ++ addl %ebp,%edi ++ movl %eax,%ebp ++ rorl $2,%ebx ++ movl %esi,8(%esp) ++ roll $5,%ebp ++ leal 1518500249(%esi,%edi,1),%esi ++ movl 12(%esp),%edi ++ addl %ebp,%esi ++ # 16_19 19 ++ movl %ebx,%ebp ++ xorl 20(%esp),%edi ++ xorl %ecx,%ebp ++ xorl 44(%esp),%edi ++ andl %eax,%ebp ++ xorl (%esp),%edi ++ roll $1,%edi ++ xorl %ecx,%ebp ++ addl %ebp,%edx ++ movl %esi,%ebp ++ rorl $2,%eax ++ movl %edi,12(%esp) ++ roll $5,%ebp ++ leal 1518500249(%edi,%edx,1),%edi ++ movl 16(%esp),%edx ++ addl %ebp,%edi ++ # 20_39 20 ++ movl %esi,%ebp ++ xorl 24(%esp),%edx ++ xorl %eax,%ebp ++ xorl 48(%esp),%edx ++ xorl %ebx,%ebp ++ xorl 4(%esp),%edx ++ roll $1,%edx ++ addl %ebp,%ecx ++ rorl $2,%esi ++ movl %edi,%ebp ++ roll $5,%ebp ++ movl %edx,16(%esp) ++ leal 1859775393(%edx,%ecx,1),%edx ++ movl 20(%esp),%ecx ++ addl %ebp,%edx ++ # 20_39 21 ++ movl %edi,%ebp ++ xorl 28(%esp),%ecx ++ xorl %esi,%ebp ++ xorl 52(%esp),%ecx ++ xorl %eax,%ebp ++ xorl 8(%esp),%ecx ++ roll $1,%ecx ++ addl %ebp,%ebx ++ rorl $2,%edi ++ movl %edx,%ebp ++ roll $5,%ebp ++ movl %ecx,20(%esp) ++ leal 1859775393(%ecx,%ebx,1),%ecx ++ movl 24(%esp),%ebx ++ addl %ebp,%ecx ++ # 20_39 22 ++ movl %edx,%ebp ++ xorl 32(%esp),%ebx ++ xorl %edi,%ebp ++ xorl 56(%esp),%ebx ++ xorl %esi,%ebp ++ xorl 12(%esp),%ebx ++ roll $1,%ebx ++ addl %ebp,%eax ++ rorl $2,%edx ++ movl %ecx,%ebp ++ roll $5,%ebp ++ movl %ebx,24(%esp) ++ leal 1859775393(%ebx,%eax,1),%ebx ++ movl 28(%esp),%eax ++ addl %ebp,%ebx ++ # 20_39 23 ++ movl %ecx,%ebp ++ xorl 36(%esp),%eax ++ xorl %edx,%ebp ++ xorl 60(%esp),%eax ++ xorl %edi,%ebp ++ xorl 16(%esp),%eax ++ roll $1,%eax ++ addl %ebp,%esi ++ rorl $2,%ecx ++ movl %ebx,%ebp ++ roll $5,%ebp ++ movl %eax,28(%esp) ++ leal 1859775393(%eax,%esi,1),%eax ++ movl 32(%esp),%esi ++ addl %ebp,%eax ++ # 20_39 24 ++ movl %ebx,%ebp ++ xorl 40(%esp),%esi ++ xorl %ecx,%ebp ++ xorl (%esp),%esi ++ xorl %edx,%ebp ++ xorl 20(%esp),%esi ++ roll $1,%esi ++ addl %ebp,%edi ++ rorl $2,%ebx ++ movl %eax,%ebp ++ roll $5,%ebp ++ movl %esi,32(%esp) ++ leal 1859775393(%esi,%edi,1),%esi ++ movl 36(%esp),%edi ++ addl %ebp,%esi ++ # 20_39 25 ++ movl %eax,%ebp ++ xorl 44(%esp),%edi ++ xorl %ebx,%ebp ++ xorl 4(%esp),%edi ++ xorl %ecx,%ebp ++ xorl 24(%esp),%edi ++ roll $1,%edi ++ addl %ebp,%edx ++ rorl $2,%eax ++ movl %esi,%ebp ++ roll $5,%ebp ++ movl %edi,36(%esp) ++ leal 1859775393(%edi,%edx,1),%edi ++ movl 40(%esp),%edx ++ addl %ebp,%edi ++ # 20_39 26 ++ movl %esi,%ebp ++ xorl 48(%esp),%edx ++ xorl %eax,%ebp ++ xorl 8(%esp),%edx ++ xorl %ebx,%ebp ++ xorl 28(%esp),%edx ++ roll $1,%edx ++ addl %ebp,%ecx ++ rorl $2,%esi ++ movl %edi,%ebp ++ roll $5,%ebp ++ movl %edx,40(%esp) ++ leal 1859775393(%edx,%ecx,1),%edx ++ movl 44(%esp),%ecx ++ addl %ebp,%edx ++ # 20_39 27 ++ movl %edi,%ebp ++ xorl 52(%esp),%ecx ++ xorl %esi,%ebp ++ xorl 12(%esp),%ecx ++ xorl %eax,%ebp ++ xorl 32(%esp),%ecx ++ roll $1,%ecx ++ addl %ebp,%ebx ++ rorl $2,%edi ++ movl %edx,%ebp ++ roll $5,%ebp ++ movl %ecx,44(%esp) ++ leal 1859775393(%ecx,%ebx,1),%ecx ++ movl 48(%esp),%ebx ++ addl %ebp,%ecx ++ # 20_39 28 ++ movl %edx,%ebp ++ xorl 56(%esp),%ebx ++ xorl %edi,%ebp ++ xorl 16(%esp),%ebx ++ xorl %esi,%ebp ++ xorl 36(%esp),%ebx ++ roll $1,%ebx ++ addl %ebp,%eax ++ rorl $2,%edx ++ movl %ecx,%ebp ++ roll $5,%ebp ++ movl %ebx,48(%esp) ++ leal 1859775393(%ebx,%eax,1),%ebx ++ movl 52(%esp),%eax ++ addl %ebp,%ebx ++ # 20_39 29 ++ movl %ecx,%ebp ++ xorl 60(%esp),%eax ++ xorl %edx,%ebp ++ xorl 20(%esp),%eax ++ xorl %edi,%ebp ++ xorl 40(%esp),%eax ++ roll $1,%eax ++ addl %ebp,%esi ++ rorl $2,%ecx ++ movl %ebx,%ebp ++ roll $5,%ebp ++ movl %eax,52(%esp) ++ leal 1859775393(%eax,%esi,1),%eax ++ movl 56(%esp),%esi ++ addl %ebp,%eax ++ # 20_39 30 ++ movl %ebx,%ebp ++ xorl (%esp),%esi ++ xorl %ecx,%ebp ++ xorl 24(%esp),%esi ++ xorl %edx,%ebp ++ xorl 44(%esp),%esi ++ roll $1,%esi ++ addl %ebp,%edi ++ rorl $2,%ebx ++ movl %eax,%ebp ++ roll $5,%ebp ++ movl %esi,56(%esp) ++ leal 1859775393(%esi,%edi,1),%esi ++ movl 60(%esp),%edi ++ addl %ebp,%esi ++ # 20_39 31 ++ movl %eax,%ebp ++ xorl 4(%esp),%edi ++ xorl %ebx,%ebp ++ xorl 28(%esp),%edi ++ xorl %ecx,%ebp ++ xorl 48(%esp),%edi ++ roll $1,%edi ++ addl %ebp,%edx ++ rorl $2,%eax ++ movl %esi,%ebp ++ roll $5,%ebp ++ movl %edi,60(%esp) ++ leal 1859775393(%edi,%edx,1),%edi ++ movl (%esp),%edx ++ addl %ebp,%edi ++ # 20_39 32 ++ movl %esi,%ebp ++ xorl 8(%esp),%edx ++ xorl %eax,%ebp ++ xorl 32(%esp),%edx ++ xorl %ebx,%ebp ++ xorl 52(%esp),%edx ++ roll $1,%edx ++ addl %ebp,%ecx ++ rorl $2,%esi ++ movl %edi,%ebp ++ roll $5,%ebp ++ movl %edx,(%esp) ++ leal 1859775393(%edx,%ecx,1),%edx ++ movl 4(%esp),%ecx ++ addl %ebp,%edx ++ # 20_39 33 ++ movl %edi,%ebp ++ xorl 12(%esp),%ecx ++ xorl %esi,%ebp ++ xorl 36(%esp),%ecx ++ xorl %eax,%ebp ++ xorl 56(%esp),%ecx ++ roll $1,%ecx ++ addl %ebp,%ebx ++ rorl $2,%edi ++ movl %edx,%ebp ++ roll $5,%ebp ++ movl %ecx,4(%esp) ++ leal 1859775393(%ecx,%ebx,1),%ecx ++ movl 8(%esp),%ebx ++ addl %ebp,%ecx ++ # 20_39 34 ++ movl %edx,%ebp ++ xorl 16(%esp),%ebx ++ xorl %edi,%ebp ++ xorl 40(%esp),%ebx ++ xorl %esi,%ebp ++ xorl 60(%esp),%ebx ++ roll $1,%ebx ++ addl %ebp,%eax ++ rorl $2,%edx ++ movl %ecx,%ebp ++ roll $5,%ebp ++ movl %ebx,8(%esp) ++ leal 1859775393(%ebx,%eax,1),%ebx ++ movl 12(%esp),%eax ++ addl %ebp,%ebx ++ # 20_39 35 ++ movl %ecx,%ebp ++ xorl 20(%esp),%eax ++ xorl %edx,%ebp ++ xorl 44(%esp),%eax ++ xorl %edi,%ebp ++ xorl (%esp),%eax ++ roll $1,%eax ++ addl %ebp,%esi ++ rorl $2,%ecx ++ movl %ebx,%ebp ++ roll $5,%ebp ++ movl %eax,12(%esp) ++ leal 1859775393(%eax,%esi,1),%eax ++ movl 16(%esp),%esi ++ addl %ebp,%eax ++ # 20_39 36 ++ movl %ebx,%ebp ++ xorl 24(%esp),%esi ++ xorl %ecx,%ebp ++ xorl 48(%esp),%esi ++ xorl %edx,%ebp ++ xorl 4(%esp),%esi ++ roll $1,%esi ++ addl %ebp,%edi ++ rorl $2,%ebx ++ movl %eax,%ebp ++ roll $5,%ebp ++ movl %esi,16(%esp) ++ leal 1859775393(%esi,%edi,1),%esi ++ movl 20(%esp),%edi ++ addl %ebp,%esi ++ # 20_39 37 ++ movl %eax,%ebp ++ xorl 28(%esp),%edi ++ xorl %ebx,%ebp ++ xorl 52(%esp),%edi ++ xorl %ecx,%ebp ++ xorl 8(%esp),%edi ++ roll $1,%edi ++ addl %ebp,%edx ++ rorl $2,%eax ++ movl %esi,%ebp ++ roll $5,%ebp ++ movl %edi,20(%esp) ++ leal 1859775393(%edi,%edx,1),%edi ++ movl 24(%esp),%edx ++ addl %ebp,%edi ++ # 20_39 38 ++ movl %esi,%ebp ++ xorl 32(%esp),%edx ++ xorl %eax,%ebp ++ xorl 56(%esp),%edx ++ xorl %ebx,%ebp ++ xorl 12(%esp),%edx ++ roll $1,%edx ++ addl %ebp,%ecx ++ rorl $2,%esi ++ movl %edi,%ebp ++ roll $5,%ebp ++ movl %edx,24(%esp) ++ leal 1859775393(%edx,%ecx,1),%edx ++ movl 28(%esp),%ecx ++ addl %ebp,%edx ++ # 20_39 39 ++ movl %edi,%ebp ++ xorl 36(%esp),%ecx ++ xorl %esi,%ebp ++ xorl 60(%esp),%ecx ++ xorl %eax,%ebp ++ xorl 16(%esp),%ecx ++ roll $1,%ecx ++ addl %ebp,%ebx ++ rorl $2,%edi ++ movl %edx,%ebp ++ roll $5,%ebp ++ movl %ecx,28(%esp) ++ leal 1859775393(%ecx,%ebx,1),%ecx ++ movl 32(%esp),%ebx ++ addl %ebp,%ecx ++ # 40_59 40 ++ movl %edi,%ebp ++ xorl 40(%esp),%ebx ++ xorl %esi,%ebp ++ xorl (%esp),%ebx ++ andl %edx,%ebp ++ xorl 20(%esp),%ebx ++ roll $1,%ebx ++ addl %eax,%ebp ++ rorl $2,%edx ++ movl %ecx,%eax ++ roll $5,%eax ++ movl %ebx,32(%esp) ++ leal 2400959708(%ebx,%ebp,1),%ebx ++ movl %edi,%ebp ++ addl %eax,%ebx ++ andl %esi,%ebp ++ movl 36(%esp),%eax ++ addl %ebp,%ebx ++ # 40_59 41 ++ movl %edx,%ebp ++ xorl 44(%esp),%eax ++ xorl %edi,%ebp ++ xorl 4(%esp),%eax ++ andl %ecx,%ebp ++ xorl 24(%esp),%eax ++ roll $1,%eax ++ addl %esi,%ebp ++ rorl $2,%ecx ++ movl %ebx,%esi ++ roll $5,%esi ++ movl %eax,36(%esp) ++ leal 2400959708(%eax,%ebp,1),%eax ++ movl %edx,%ebp ++ addl %esi,%eax ++ andl %edi,%ebp ++ movl 40(%esp),%esi ++ addl %ebp,%eax ++ # 40_59 42 ++ movl %ecx,%ebp ++ xorl 48(%esp),%esi ++ xorl %edx,%ebp ++ xorl 8(%esp),%esi ++ andl %ebx,%ebp ++ xorl 28(%esp),%esi ++ roll $1,%esi ++ addl %edi,%ebp ++ rorl $2,%ebx ++ movl %eax,%edi ++ roll $5,%edi ++ movl %esi,40(%esp) ++ leal 2400959708(%esi,%ebp,1),%esi ++ movl %ecx,%ebp ++ addl %edi,%esi ++ andl %edx,%ebp ++ movl 44(%esp),%edi ++ addl %ebp,%esi ++ # 40_59 43 ++ movl %ebx,%ebp ++ xorl 52(%esp),%edi ++ xorl %ecx,%ebp ++ xorl 12(%esp),%edi ++ andl %eax,%ebp ++ xorl 32(%esp),%edi ++ roll $1,%edi ++ addl %edx,%ebp ++ rorl $2,%eax ++ movl %esi,%edx ++ roll $5,%edx ++ movl %edi,44(%esp) ++ leal 2400959708(%edi,%ebp,1),%edi ++ movl %ebx,%ebp ++ addl %edx,%edi ++ andl %ecx,%ebp ++ movl 48(%esp),%edx ++ addl %ebp,%edi ++ # 40_59 44 ++ movl %eax,%ebp ++ xorl 56(%esp),%edx ++ xorl %ebx,%ebp ++ xorl 16(%esp),%edx ++ andl %esi,%ebp ++ xorl 36(%esp),%edx ++ roll $1,%edx ++ addl %ecx,%ebp ++ rorl $2,%esi ++ movl %edi,%ecx ++ roll $5,%ecx ++ movl %edx,48(%esp) ++ leal 2400959708(%edx,%ebp,1),%edx ++ movl %eax,%ebp ++ addl %ecx,%edx ++ andl %ebx,%ebp ++ movl 52(%esp),%ecx ++ addl %ebp,%edx ++ # 40_59 45 ++ movl %esi,%ebp ++ xorl 60(%esp),%ecx ++ xorl %eax,%ebp ++ xorl 20(%esp),%ecx ++ andl %edi,%ebp ++ xorl 40(%esp),%ecx ++ roll $1,%ecx ++ addl %ebx,%ebp ++ rorl $2,%edi ++ movl %edx,%ebx ++ roll $5,%ebx ++ movl %ecx,52(%esp) ++ leal 2400959708(%ecx,%ebp,1),%ecx ++ movl %esi,%ebp ++ addl %ebx,%ecx ++ andl %eax,%ebp ++ movl 56(%esp),%ebx ++ addl %ebp,%ecx ++ # 40_59 46 ++ movl %edi,%ebp ++ xorl (%esp),%ebx ++ xorl %esi,%ebp ++ xorl 24(%esp),%ebx ++ andl %edx,%ebp ++ xorl 44(%esp),%ebx ++ roll $1,%ebx ++ addl %eax,%ebp ++ rorl $2,%edx ++ movl %ecx,%eax ++ roll $5,%eax ++ movl %ebx,56(%esp) ++ leal 2400959708(%ebx,%ebp,1),%ebx ++ movl %edi,%ebp ++ addl %eax,%ebx ++ andl %esi,%ebp ++ movl 60(%esp),%eax ++ addl %ebp,%ebx ++ # 40_59 47 ++ movl %edx,%ebp ++ xorl 4(%esp),%eax ++ xorl %edi,%ebp ++ xorl 28(%esp),%eax ++ andl %ecx,%ebp ++ xorl 48(%esp),%eax ++ roll $1,%eax ++ addl %esi,%ebp ++ rorl $2,%ecx ++ movl %ebx,%esi ++ roll $5,%esi ++ movl %eax,60(%esp) ++ leal 2400959708(%eax,%ebp,1),%eax ++ movl %edx,%ebp ++ addl %esi,%eax ++ andl %edi,%ebp ++ movl (%esp),%esi ++ addl %ebp,%eax ++ # 40_59 48 ++ movl %ecx,%ebp ++ xorl 8(%esp),%esi ++ xorl %edx,%ebp ++ xorl 32(%esp),%esi ++ andl %ebx,%ebp ++ xorl 52(%esp),%esi ++ roll $1,%esi ++ addl %edi,%ebp ++ rorl $2,%ebx ++ movl %eax,%edi ++ roll $5,%edi ++ movl %esi,(%esp) ++ leal 2400959708(%esi,%ebp,1),%esi ++ movl %ecx,%ebp ++ addl %edi,%esi ++ andl %edx,%ebp ++ movl 4(%esp),%edi ++ addl %ebp,%esi ++ # 40_59 49 ++ movl %ebx,%ebp ++ xorl 12(%esp),%edi ++ xorl %ecx,%ebp ++ xorl 36(%esp),%edi ++ andl %eax,%ebp ++ xorl 56(%esp),%edi ++ roll $1,%edi ++ addl %edx,%ebp ++ rorl $2,%eax ++ movl %esi,%edx ++ roll $5,%edx ++ movl %edi,4(%esp) ++ leal 2400959708(%edi,%ebp,1),%edi ++ movl %ebx,%ebp ++ addl %edx,%edi ++ andl %ecx,%ebp ++ movl 8(%esp),%edx ++ addl %ebp,%edi ++ # 40_59 50 ++ movl %eax,%ebp ++ xorl 16(%esp),%edx ++ xorl %ebx,%ebp ++ xorl 40(%esp),%edx ++ andl %esi,%ebp ++ xorl 60(%esp),%edx ++ roll $1,%edx ++ addl %ecx,%ebp ++ rorl $2,%esi ++ movl %edi,%ecx ++ roll $5,%ecx ++ movl %edx,8(%esp) ++ leal 2400959708(%edx,%ebp,1),%edx ++ movl %eax,%ebp ++ addl %ecx,%edx ++ andl %ebx,%ebp ++ movl 12(%esp),%ecx ++ addl %ebp,%edx ++ # 40_59 51 ++ movl %esi,%ebp ++ xorl 20(%esp),%ecx ++ xorl %eax,%ebp ++ xorl 44(%esp),%ecx ++ andl %edi,%ebp ++ xorl (%esp),%ecx ++ roll $1,%ecx ++ addl %ebx,%ebp ++ rorl $2,%edi ++ movl %edx,%ebx ++ roll $5,%ebx ++ movl %ecx,12(%esp) ++ leal 2400959708(%ecx,%ebp,1),%ecx ++ movl %esi,%ebp ++ addl %ebx,%ecx ++ andl %eax,%ebp ++ movl 16(%esp),%ebx ++ addl %ebp,%ecx ++ # 40_59 52 ++ movl %edi,%ebp ++ xorl 24(%esp),%ebx ++ xorl %esi,%ebp ++ xorl 48(%esp),%ebx ++ andl %edx,%ebp ++ xorl 4(%esp),%ebx ++ roll $1,%ebx ++ addl %eax,%ebp ++ rorl $2,%edx ++ movl %ecx,%eax ++ roll $5,%eax ++ movl %ebx,16(%esp) ++ leal 2400959708(%ebx,%ebp,1),%ebx ++ movl %edi,%ebp ++ addl %eax,%ebx ++ andl %esi,%ebp ++ movl 20(%esp),%eax ++ addl %ebp,%ebx ++ # 40_59 53 ++ movl %edx,%ebp ++ xorl 28(%esp),%eax ++ xorl %edi,%ebp ++ xorl 52(%esp),%eax ++ andl %ecx,%ebp ++ xorl 8(%esp),%eax ++ roll $1,%eax ++ addl %esi,%ebp ++ rorl $2,%ecx ++ movl %ebx,%esi ++ roll $5,%esi ++ movl %eax,20(%esp) ++ leal 2400959708(%eax,%ebp,1),%eax ++ movl %edx,%ebp ++ addl %esi,%eax ++ andl %edi,%ebp ++ movl 24(%esp),%esi ++ addl %ebp,%eax ++ # 40_59 54 ++ movl %ecx,%ebp ++ xorl 32(%esp),%esi ++ xorl %edx,%ebp ++ xorl 56(%esp),%esi ++ andl %ebx,%ebp ++ xorl 12(%esp),%esi ++ roll $1,%esi ++ addl %edi,%ebp ++ rorl $2,%ebx ++ movl %eax,%edi ++ roll $5,%edi ++ movl %esi,24(%esp) ++ leal 2400959708(%esi,%ebp,1),%esi ++ movl %ecx,%ebp ++ addl %edi,%esi ++ andl %edx,%ebp ++ movl 28(%esp),%edi ++ addl %ebp,%esi ++ # 40_59 55 ++ movl %ebx,%ebp ++ xorl 36(%esp),%edi ++ xorl %ecx,%ebp ++ xorl 60(%esp),%edi ++ andl %eax,%ebp ++ xorl 16(%esp),%edi ++ roll $1,%edi ++ addl %edx,%ebp ++ rorl $2,%eax ++ movl %esi,%edx ++ roll $5,%edx ++ movl %edi,28(%esp) ++ leal 2400959708(%edi,%ebp,1),%edi ++ movl %ebx,%ebp ++ addl %edx,%edi ++ andl %ecx,%ebp ++ movl 32(%esp),%edx ++ addl %ebp,%edi ++ # 40_59 56 ++ movl %eax,%ebp ++ xorl 40(%esp),%edx ++ xorl %ebx,%ebp ++ xorl (%esp),%edx ++ andl %esi,%ebp ++ xorl 20(%esp),%edx ++ roll $1,%edx ++ addl %ecx,%ebp ++ rorl $2,%esi ++ movl %edi,%ecx ++ roll $5,%ecx ++ movl %edx,32(%esp) ++ leal 2400959708(%edx,%ebp,1),%edx ++ movl %eax,%ebp ++ addl %ecx,%edx ++ andl %ebx,%ebp ++ movl 36(%esp),%ecx ++ addl %ebp,%edx ++ # 40_59 57 ++ movl %esi,%ebp ++ xorl 44(%esp),%ecx ++ xorl %eax,%ebp ++ xorl 4(%esp),%ecx ++ andl %edi,%ebp ++ xorl 24(%esp),%ecx ++ roll $1,%ecx ++ addl %ebx,%ebp ++ rorl $2,%edi ++ movl %edx,%ebx ++ roll $5,%ebx ++ movl %ecx,36(%esp) ++ leal 2400959708(%ecx,%ebp,1),%ecx ++ movl %esi,%ebp ++ addl %ebx,%ecx ++ andl %eax,%ebp ++ movl 40(%esp),%ebx ++ addl %ebp,%ecx ++ # 40_59 58 ++ movl %edi,%ebp ++ xorl 48(%esp),%ebx ++ xorl %esi,%ebp ++ xorl 8(%esp),%ebx ++ andl %edx,%ebp ++ xorl 28(%esp),%ebx ++ roll $1,%ebx ++ addl %eax,%ebp ++ rorl $2,%edx ++ movl %ecx,%eax ++ roll $5,%eax ++ movl %ebx,40(%esp) ++ leal 2400959708(%ebx,%ebp,1),%ebx ++ movl %edi,%ebp ++ addl %eax,%ebx ++ andl %esi,%ebp ++ movl 44(%esp),%eax ++ addl %ebp,%ebx ++ # 40_59 59 ++ movl %edx,%ebp ++ xorl 52(%esp),%eax ++ xorl %edi,%ebp ++ xorl 12(%esp),%eax ++ andl %ecx,%ebp ++ xorl 32(%esp),%eax ++ roll $1,%eax ++ addl %esi,%ebp ++ rorl $2,%ecx ++ movl %ebx,%esi ++ roll $5,%esi ++ movl %eax,44(%esp) ++ leal 2400959708(%eax,%ebp,1),%eax ++ movl %edx,%ebp ++ addl %esi,%eax ++ andl %edi,%ebp ++ movl 48(%esp),%esi ++ addl %ebp,%eax ++ # 20_39 60 ++ movl %ebx,%ebp ++ xorl 56(%esp),%esi ++ xorl %ecx,%ebp ++ xorl 16(%esp),%esi ++ xorl %edx,%ebp ++ xorl 36(%esp),%esi ++ roll $1,%esi ++ addl %ebp,%edi ++ rorl $2,%ebx ++ movl %eax,%ebp ++ roll $5,%ebp ++ movl %esi,48(%esp) ++ leal 3395469782(%esi,%edi,1),%esi ++ movl 52(%esp),%edi ++ addl %ebp,%esi ++ # 20_39 61 ++ movl %eax,%ebp ++ xorl 60(%esp),%edi ++ xorl %ebx,%ebp ++ xorl 20(%esp),%edi ++ xorl %ecx,%ebp ++ xorl 40(%esp),%edi ++ roll $1,%edi ++ addl %ebp,%edx ++ rorl $2,%eax ++ movl %esi,%ebp ++ roll $5,%ebp ++ movl %edi,52(%esp) ++ leal 3395469782(%edi,%edx,1),%edi ++ movl 56(%esp),%edx ++ addl %ebp,%edi ++ # 20_39 62 ++ movl %esi,%ebp ++ xorl (%esp),%edx ++ xorl %eax,%ebp ++ xorl 24(%esp),%edx ++ xorl %ebx,%ebp ++ xorl 44(%esp),%edx ++ roll $1,%edx ++ addl %ebp,%ecx ++ rorl $2,%esi ++ movl %edi,%ebp ++ roll $5,%ebp ++ movl %edx,56(%esp) ++ leal 3395469782(%edx,%ecx,1),%edx ++ movl 60(%esp),%ecx ++ addl %ebp,%edx ++ # 20_39 63 ++ movl %edi,%ebp ++ xorl 4(%esp),%ecx ++ xorl %esi,%ebp ++ xorl 28(%esp),%ecx ++ xorl %eax,%ebp ++ xorl 48(%esp),%ecx ++ roll $1,%ecx ++ addl %ebp,%ebx ++ rorl $2,%edi ++ movl %edx,%ebp ++ roll $5,%ebp ++ movl %ecx,60(%esp) ++ leal 3395469782(%ecx,%ebx,1),%ecx ++ movl (%esp),%ebx ++ addl %ebp,%ecx ++ # 20_39 64 ++ movl %edx,%ebp ++ xorl 8(%esp),%ebx ++ xorl %edi,%ebp ++ xorl 32(%esp),%ebx ++ xorl %esi,%ebp ++ xorl 52(%esp),%ebx ++ roll $1,%ebx ++ addl %ebp,%eax ++ rorl $2,%edx ++ movl %ecx,%ebp ++ roll $5,%ebp ++ movl %ebx,(%esp) ++ leal 3395469782(%ebx,%eax,1),%ebx ++ movl 4(%esp),%eax ++ addl %ebp,%ebx ++ # 20_39 65 ++ movl %ecx,%ebp ++ xorl 12(%esp),%eax ++ xorl %edx,%ebp ++ xorl 36(%esp),%eax ++ xorl %edi,%ebp ++ xorl 56(%esp),%eax ++ roll $1,%eax ++ addl %ebp,%esi ++ rorl $2,%ecx ++ movl %ebx,%ebp ++ roll $5,%ebp ++ movl %eax,4(%esp) ++ leal 3395469782(%eax,%esi,1),%eax ++ movl 8(%esp),%esi ++ addl %ebp,%eax ++ # 20_39 66 ++ movl %ebx,%ebp ++ xorl 16(%esp),%esi ++ xorl %ecx,%ebp ++ xorl 40(%esp),%esi ++ xorl %edx,%ebp ++ xorl 60(%esp),%esi ++ roll $1,%esi ++ addl %ebp,%edi ++ rorl $2,%ebx ++ movl %eax,%ebp ++ roll $5,%ebp ++ movl %esi,8(%esp) ++ leal 3395469782(%esi,%edi,1),%esi ++ movl 12(%esp),%edi ++ addl %ebp,%esi ++ # 20_39 67 ++ movl %eax,%ebp ++ xorl 20(%esp),%edi ++ xorl %ebx,%ebp ++ xorl 44(%esp),%edi ++ xorl %ecx,%ebp ++ xorl (%esp),%edi ++ roll $1,%edi ++ addl %ebp,%edx ++ rorl $2,%eax ++ movl %esi,%ebp ++ roll $5,%ebp ++ movl %edi,12(%esp) ++ leal 3395469782(%edi,%edx,1),%edi ++ movl 16(%esp),%edx ++ addl %ebp,%edi ++ # 20_39 68 ++ movl %esi,%ebp ++ xorl 24(%esp),%edx ++ xorl %eax,%ebp ++ xorl 48(%esp),%edx ++ xorl %ebx,%ebp ++ xorl 4(%esp),%edx ++ roll $1,%edx ++ addl %ebp,%ecx ++ rorl $2,%esi ++ movl %edi,%ebp ++ roll $5,%ebp ++ movl %edx,16(%esp) ++ leal 3395469782(%edx,%ecx,1),%edx ++ movl 20(%esp),%ecx ++ addl %ebp,%edx ++ # 20_39 69 ++ movl %edi,%ebp ++ xorl 28(%esp),%ecx ++ xorl %esi,%ebp ++ xorl 52(%esp),%ecx ++ xorl %eax,%ebp ++ xorl 8(%esp),%ecx ++ roll $1,%ecx ++ addl %ebp,%ebx ++ rorl $2,%edi ++ movl %edx,%ebp ++ roll $5,%ebp ++ movl %ecx,20(%esp) ++ leal 3395469782(%ecx,%ebx,1),%ecx ++ movl 24(%esp),%ebx ++ addl %ebp,%ecx ++ # 20_39 70 ++ movl %edx,%ebp ++ xorl 32(%esp),%ebx ++ xorl %edi,%ebp ++ xorl 56(%esp),%ebx ++ xorl %esi,%ebp ++ xorl 12(%esp),%ebx ++ roll $1,%ebx ++ addl %ebp,%eax ++ rorl $2,%edx ++ movl %ecx,%ebp ++ roll $5,%ebp ++ movl %ebx,24(%esp) ++ leal 3395469782(%ebx,%eax,1),%ebx ++ movl 28(%esp),%eax ++ addl %ebp,%ebx ++ # 20_39 71 ++ movl %ecx,%ebp ++ xorl 36(%esp),%eax ++ xorl %edx,%ebp ++ xorl 60(%esp),%eax ++ xorl %edi,%ebp ++ xorl 16(%esp),%eax ++ roll $1,%eax ++ addl %ebp,%esi ++ rorl $2,%ecx ++ movl %ebx,%ebp ++ roll $5,%ebp ++ movl %eax,28(%esp) ++ leal 3395469782(%eax,%esi,1),%eax ++ movl 32(%esp),%esi ++ addl %ebp,%eax ++ # 20_39 72 ++ movl %ebx,%ebp ++ xorl 40(%esp),%esi ++ xorl %ecx,%ebp ++ xorl (%esp),%esi ++ xorl %edx,%ebp ++ xorl 20(%esp),%esi ++ roll $1,%esi ++ addl %ebp,%edi ++ rorl $2,%ebx ++ movl %eax,%ebp ++ roll $5,%ebp ++ movl %esi,32(%esp) ++ leal 3395469782(%esi,%edi,1),%esi ++ movl 36(%esp),%edi ++ addl %ebp,%esi ++ # 20_39 73 ++ movl %eax,%ebp ++ xorl 44(%esp),%edi ++ xorl %ebx,%ebp ++ xorl 4(%esp),%edi ++ xorl %ecx,%ebp ++ xorl 24(%esp),%edi ++ roll $1,%edi ++ addl %ebp,%edx ++ rorl $2,%eax ++ movl %esi,%ebp ++ roll $5,%ebp ++ movl %edi,36(%esp) ++ leal 3395469782(%edi,%edx,1),%edi ++ movl 40(%esp),%edx ++ addl %ebp,%edi ++ # 20_39 74 ++ movl %esi,%ebp ++ xorl 48(%esp),%edx ++ xorl %eax,%ebp ++ xorl 8(%esp),%edx ++ xorl %ebx,%ebp ++ xorl 28(%esp),%edx ++ roll $1,%edx ++ addl %ebp,%ecx ++ rorl $2,%esi ++ movl %edi,%ebp ++ roll $5,%ebp ++ movl %edx,40(%esp) ++ leal 3395469782(%edx,%ecx,1),%edx ++ movl 44(%esp),%ecx ++ addl %ebp,%edx ++ # 20_39 75 ++ movl %edi,%ebp ++ xorl 52(%esp),%ecx ++ xorl %esi,%ebp ++ xorl 12(%esp),%ecx ++ xorl %eax,%ebp ++ xorl 32(%esp),%ecx ++ roll $1,%ecx ++ addl %ebp,%ebx ++ rorl $2,%edi ++ movl %edx,%ebp ++ roll $5,%ebp ++ movl %ecx,44(%esp) ++ leal 3395469782(%ecx,%ebx,1),%ecx ++ movl 48(%esp),%ebx ++ addl %ebp,%ecx ++ # 20_39 76 ++ movl %edx,%ebp ++ xorl 56(%esp),%ebx ++ xorl %edi,%ebp ++ xorl 16(%esp),%ebx ++ xorl %esi,%ebp ++ xorl 36(%esp),%ebx ++ roll $1,%ebx ++ addl %ebp,%eax ++ rorl $2,%edx ++ movl %ecx,%ebp ++ roll $5,%ebp ++ movl %ebx,48(%esp) ++ leal 3395469782(%ebx,%eax,1),%ebx ++ movl 52(%esp),%eax ++ addl %ebp,%ebx ++ # 20_39 77 ++ movl %ecx,%ebp ++ xorl 60(%esp),%eax ++ xorl %edx,%ebp ++ xorl 20(%esp),%eax ++ xorl %edi,%ebp ++ xorl 40(%esp),%eax ++ roll $1,%eax ++ addl %ebp,%esi ++ rorl $2,%ecx ++ movl %ebx,%ebp ++ roll $5,%ebp ++ leal 3395469782(%eax,%esi,1),%eax ++ movl 56(%esp),%esi ++ addl %ebp,%eax ++ # 20_39 78 ++ movl %ebx,%ebp ++ xorl (%esp),%esi ++ xorl %ecx,%ebp ++ xorl 24(%esp),%esi ++ xorl %edx,%ebp ++ xorl 44(%esp),%esi ++ roll $1,%esi ++ addl %ebp,%edi ++ rorl $2,%ebx ++ movl %eax,%ebp ++ roll $5,%ebp ++ leal 3395469782(%esi,%edi,1),%esi ++ movl 60(%esp),%edi ++ addl %ebp,%esi ++ # 20_39 79 ++ movl %eax,%ebp ++ xorl 4(%esp),%edi ++ xorl %ebx,%ebp ++ xorl 28(%esp),%edi ++ xorl %ecx,%ebp ++ xorl 48(%esp),%edi ++ roll $1,%edi ++ addl %ebp,%edx ++ rorl $2,%eax ++ movl %esi,%ebp ++ roll $5,%ebp ++ leal 3395469782(%edi,%edx,1),%edi ++ addl %ebp,%edi ++ movl 96(%esp),%ebp ++ movl 100(%esp),%edx ++ addl (%ebp),%edi ++ addl 4(%ebp),%esi ++ addl 8(%ebp),%eax ++ addl 12(%ebp),%ebx ++ addl 16(%ebp),%ecx ++ movl %edi,(%ebp) ++ addl $64,%edx ++ movl %esi,4(%ebp) ++ cmpl 104(%esp),%edx ++ movl %eax,8(%ebp) ++ movl %ecx,%edi ++ movl %ebx,12(%ebp) ++ movl %edx,%esi ++ movl %ecx,16(%ebp) ++ jb L002loop ++ addl $76,%esp ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.private_extern __sha1_block_data_order_ssse3 ++.align 4 ++__sha1_block_data_order_ssse3: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ call L003pic_point ++L003pic_point: ++ popl %ebp ++ leal LK_XX_XX-L003pic_point(%ebp),%ebp ++Lssse3_shortcut: ++ movdqa (%ebp),%xmm7 ++ movdqa 16(%ebp),%xmm0 ++ movdqa 32(%ebp),%xmm1 ++ movdqa 48(%ebp),%xmm2 ++ movdqa 64(%ebp),%xmm6 ++ movl 20(%esp),%edi ++ movl 24(%esp),%ebp ++ movl 28(%esp),%edx ++ movl %esp,%esi ++ subl $208,%esp ++ andl $-64,%esp ++ movdqa %xmm0,112(%esp) ++ movdqa %xmm1,128(%esp) ++ movdqa %xmm2,144(%esp) ++ shll $6,%edx ++ movdqa %xmm7,160(%esp) ++ addl %ebp,%edx ++ movdqa %xmm6,176(%esp) ++ addl $64,%ebp ++ movl %edi,192(%esp) ++ movl %ebp,196(%esp) ++ movl %edx,200(%esp) ++ movl %esi,204(%esp) ++ movl (%edi),%eax ++ movl 4(%edi),%ebx ++ movl 8(%edi),%ecx ++ movl 12(%edi),%edx ++ movl 16(%edi),%edi ++ movl %ebx,%esi ++ movdqu -64(%ebp),%xmm0 ++ movdqu -48(%ebp),%xmm1 ++ movdqu -32(%ebp),%xmm2 ++ movdqu -16(%ebp),%xmm3 ++.byte 102,15,56,0,198 ++.byte 102,15,56,0,206 ++.byte 102,15,56,0,214 ++ movdqa %xmm7,96(%esp) ++.byte 102,15,56,0,222 ++ paddd %xmm7,%xmm0 ++ paddd %xmm7,%xmm1 ++ paddd %xmm7,%xmm2 ++ movdqa %xmm0,(%esp) ++ psubd %xmm7,%xmm0 ++ movdqa %xmm1,16(%esp) ++ psubd %xmm7,%xmm1 ++ movdqa %xmm2,32(%esp) ++ movl %ecx,%ebp ++ psubd %xmm7,%xmm2 ++ xorl %edx,%ebp ++ pshufd $238,%xmm0,%xmm4 ++ andl %ebp,%esi ++ jmp L004loop ++.align 4,0x90 ++L004loop: ++ rorl $2,%ebx ++ xorl %edx,%esi ++ movl %eax,%ebp ++ punpcklqdq %xmm1,%xmm4 ++ movdqa %xmm3,%xmm6 ++ addl (%esp),%edi ++ xorl %ecx,%ebx ++ paddd %xmm3,%xmm7 ++ movdqa %xmm0,64(%esp) ++ roll $5,%eax ++ addl %esi,%edi ++ psrldq $4,%xmm6 ++ andl %ebx,%ebp ++ xorl %ecx,%ebx ++ pxor %xmm0,%xmm4 ++ addl %eax,%edi ++ rorl $7,%eax ++ pxor %xmm2,%xmm6 ++ xorl %ecx,%ebp ++ movl %edi,%esi ++ addl 4(%esp),%edx ++ pxor %xmm6,%xmm4 ++ xorl %ebx,%eax ++ roll $5,%edi ++ movdqa %xmm7,48(%esp) ++ addl %ebp,%edx ++ andl %eax,%esi ++ movdqa %xmm4,%xmm0 ++ xorl %ebx,%eax ++ addl %edi,%edx ++ rorl $7,%edi ++ movdqa %xmm4,%xmm6 ++ xorl %ebx,%esi ++ pslldq $12,%xmm0 ++ paddd %xmm4,%xmm4 ++ movl %edx,%ebp ++ addl 8(%esp),%ecx ++ psrld $31,%xmm6 ++ xorl %eax,%edi ++ roll $5,%edx ++ movdqa %xmm0,%xmm7 ++ addl %esi,%ecx ++ andl %edi,%ebp ++ xorl %eax,%edi ++ psrld $30,%xmm0 ++ addl %edx,%ecx ++ rorl $7,%edx ++ por %xmm6,%xmm4 ++ xorl %eax,%ebp ++ movl %ecx,%esi ++ addl 12(%esp),%ebx ++ pslld $2,%xmm7 ++ xorl %edi,%edx ++ roll $5,%ecx ++ pxor %xmm0,%xmm4 ++ movdqa 96(%esp),%xmm0 ++ addl %ebp,%ebx ++ andl %edx,%esi ++ pxor %xmm7,%xmm4 ++ pshufd $238,%xmm1,%xmm5 ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ movl %ebx,%ebp ++ punpcklqdq %xmm2,%xmm5 ++ movdqa %xmm4,%xmm7 ++ addl 16(%esp),%eax ++ xorl %edx,%ecx ++ paddd %xmm4,%xmm0 ++ movdqa %xmm1,80(%esp) ++ roll $5,%ebx ++ addl %esi,%eax ++ psrldq $4,%xmm7 ++ andl %ecx,%ebp ++ xorl %edx,%ecx ++ pxor %xmm1,%xmm5 ++ addl %ebx,%eax ++ rorl $7,%ebx ++ pxor %xmm3,%xmm7 ++ xorl %edx,%ebp ++ movl %eax,%esi ++ addl 20(%esp),%edi ++ pxor %xmm7,%xmm5 ++ xorl %ecx,%ebx ++ roll $5,%eax ++ movdqa %xmm0,(%esp) ++ addl %ebp,%edi ++ andl %ebx,%esi ++ movdqa %xmm5,%xmm1 ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ rorl $7,%eax ++ movdqa %xmm5,%xmm7 ++ xorl %ecx,%esi ++ pslldq $12,%xmm1 ++ paddd %xmm5,%xmm5 ++ movl %edi,%ebp ++ addl 24(%esp),%edx ++ psrld $31,%xmm7 ++ xorl %ebx,%eax ++ roll $5,%edi ++ movdqa %xmm1,%xmm0 ++ addl %esi,%edx ++ andl %eax,%ebp ++ xorl %ebx,%eax ++ psrld $30,%xmm1 ++ addl %edi,%edx ++ rorl $7,%edi ++ por %xmm7,%xmm5 ++ xorl %ebx,%ebp ++ movl %edx,%esi ++ addl 28(%esp),%ecx ++ pslld $2,%xmm0 ++ xorl %eax,%edi ++ roll $5,%edx ++ pxor %xmm1,%xmm5 ++ movdqa 112(%esp),%xmm1 ++ addl %ebp,%ecx ++ andl %edi,%esi ++ pxor %xmm0,%xmm5 ++ pshufd $238,%xmm2,%xmm6 ++ xorl %eax,%edi ++ addl %edx,%ecx ++ rorl $7,%edx ++ xorl %eax,%esi ++ movl %ecx,%ebp ++ punpcklqdq %xmm3,%xmm6 ++ movdqa %xmm5,%xmm0 ++ addl 32(%esp),%ebx ++ xorl %edi,%edx ++ paddd %xmm5,%xmm1 ++ movdqa %xmm2,96(%esp) ++ roll $5,%ecx ++ addl %esi,%ebx ++ psrldq $4,%xmm0 ++ andl %edx,%ebp ++ xorl %edi,%edx ++ pxor %xmm2,%xmm6 ++ addl %ecx,%ebx ++ rorl $7,%ecx ++ pxor %xmm4,%xmm0 ++ xorl %edi,%ebp ++ movl %ebx,%esi ++ addl 36(%esp),%eax ++ pxor %xmm0,%xmm6 ++ xorl %edx,%ecx ++ roll $5,%ebx ++ movdqa %xmm1,16(%esp) ++ addl %ebp,%eax ++ andl %ecx,%esi ++ movdqa %xmm6,%xmm2 ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ rorl $7,%ebx ++ movdqa %xmm6,%xmm0 ++ xorl %edx,%esi ++ pslldq $12,%xmm2 ++ paddd %xmm6,%xmm6 ++ movl %eax,%ebp ++ addl 40(%esp),%edi ++ psrld $31,%xmm0 ++ xorl %ecx,%ebx ++ roll $5,%eax ++ movdqa %xmm2,%xmm1 ++ addl %esi,%edi ++ andl %ebx,%ebp ++ xorl %ecx,%ebx ++ psrld $30,%xmm2 ++ addl %eax,%edi ++ rorl $7,%eax ++ por %xmm0,%xmm6 ++ xorl %ecx,%ebp ++ movdqa 64(%esp),%xmm0 ++ movl %edi,%esi ++ addl 44(%esp),%edx ++ pslld $2,%xmm1 ++ xorl %ebx,%eax ++ roll $5,%edi ++ pxor %xmm2,%xmm6 ++ movdqa 112(%esp),%xmm2 ++ addl %ebp,%edx ++ andl %eax,%esi ++ pxor %xmm1,%xmm6 ++ pshufd $238,%xmm3,%xmm7 ++ xorl %ebx,%eax ++ addl %edi,%edx ++ rorl $7,%edi ++ xorl %ebx,%esi ++ movl %edx,%ebp ++ punpcklqdq %xmm4,%xmm7 ++ movdqa %xmm6,%xmm1 ++ addl 48(%esp),%ecx ++ xorl %eax,%edi ++ paddd %xmm6,%xmm2 ++ movdqa %xmm3,64(%esp) ++ roll $5,%edx ++ addl %esi,%ecx ++ psrldq $4,%xmm1 ++ andl %edi,%ebp ++ xorl %eax,%edi ++ pxor %xmm3,%xmm7 ++ addl %edx,%ecx ++ rorl $7,%edx ++ pxor %xmm5,%xmm1 ++ xorl %eax,%ebp ++ movl %ecx,%esi ++ addl 52(%esp),%ebx ++ pxor %xmm1,%xmm7 ++ xorl %edi,%edx ++ roll $5,%ecx ++ movdqa %xmm2,32(%esp) ++ addl %ebp,%ebx ++ andl %edx,%esi ++ movdqa %xmm7,%xmm3 ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ rorl $7,%ecx ++ movdqa %xmm7,%xmm1 ++ xorl %edi,%esi ++ pslldq $12,%xmm3 ++ paddd %xmm7,%xmm7 ++ movl %ebx,%ebp ++ addl 56(%esp),%eax ++ psrld $31,%xmm1 ++ xorl %edx,%ecx ++ roll $5,%ebx ++ movdqa %xmm3,%xmm2 ++ addl %esi,%eax ++ andl %ecx,%ebp ++ xorl %edx,%ecx ++ psrld $30,%xmm3 ++ addl %ebx,%eax ++ rorl $7,%ebx ++ por %xmm1,%xmm7 ++ xorl %edx,%ebp ++ movdqa 80(%esp),%xmm1 ++ movl %eax,%esi ++ addl 60(%esp),%edi ++ pslld $2,%xmm2 ++ xorl %ecx,%ebx ++ roll $5,%eax ++ pxor %xmm3,%xmm7 ++ movdqa 112(%esp),%xmm3 ++ addl %ebp,%edi ++ andl %ebx,%esi ++ pxor %xmm2,%xmm7 ++ pshufd $238,%xmm6,%xmm2 ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ rorl $7,%eax ++ pxor %xmm4,%xmm0 ++ punpcklqdq %xmm7,%xmm2 ++ xorl %ecx,%esi ++ movl %edi,%ebp ++ addl (%esp),%edx ++ pxor %xmm1,%xmm0 ++ movdqa %xmm4,80(%esp) ++ xorl %ebx,%eax ++ roll $5,%edi ++ movdqa %xmm3,%xmm4 ++ addl %esi,%edx ++ paddd %xmm7,%xmm3 ++ andl %eax,%ebp ++ pxor %xmm2,%xmm0 ++ xorl %ebx,%eax ++ addl %edi,%edx ++ rorl $7,%edi ++ xorl %ebx,%ebp ++ movdqa %xmm0,%xmm2 ++ movdqa %xmm3,48(%esp) ++ movl %edx,%esi ++ addl 4(%esp),%ecx ++ xorl %eax,%edi ++ roll $5,%edx ++ pslld $2,%xmm0 ++ addl %ebp,%ecx ++ andl %edi,%esi ++ psrld $30,%xmm2 ++ xorl %eax,%edi ++ addl %edx,%ecx ++ rorl $7,%edx ++ xorl %eax,%esi ++ movl %ecx,%ebp ++ addl 8(%esp),%ebx ++ xorl %edi,%edx ++ roll $5,%ecx ++ por %xmm2,%xmm0 ++ addl %esi,%ebx ++ andl %edx,%ebp ++ movdqa 96(%esp),%xmm2 ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ addl 12(%esp),%eax ++ xorl %edi,%ebp ++ movl %ebx,%esi ++ pshufd $238,%xmm7,%xmm3 ++ roll $5,%ebx ++ addl %ebp,%eax ++ xorl %edx,%esi ++ rorl $7,%ecx ++ addl %ebx,%eax ++ addl 16(%esp),%edi ++ pxor %xmm5,%xmm1 ++ punpcklqdq %xmm0,%xmm3 ++ xorl %ecx,%esi ++ movl %eax,%ebp ++ roll $5,%eax ++ pxor %xmm2,%xmm1 ++ movdqa %xmm5,96(%esp) ++ addl %esi,%edi ++ xorl %ecx,%ebp ++ movdqa %xmm4,%xmm5 ++ rorl $7,%ebx ++ paddd %xmm0,%xmm4 ++ addl %eax,%edi ++ pxor %xmm3,%xmm1 ++ addl 20(%esp),%edx ++ xorl %ebx,%ebp ++ movl %edi,%esi ++ roll $5,%edi ++ movdqa %xmm1,%xmm3 ++ movdqa %xmm4,(%esp) ++ addl %ebp,%edx ++ xorl %ebx,%esi ++ rorl $7,%eax ++ addl %edi,%edx ++ pslld $2,%xmm1 ++ addl 24(%esp),%ecx ++ xorl %eax,%esi ++ psrld $30,%xmm3 ++ movl %edx,%ebp ++ roll $5,%edx ++ addl %esi,%ecx ++ xorl %eax,%ebp ++ rorl $7,%edi ++ addl %edx,%ecx ++ por %xmm3,%xmm1 ++ addl 28(%esp),%ebx ++ xorl %edi,%ebp ++ movdqa 64(%esp),%xmm3 ++ movl %ecx,%esi ++ roll $5,%ecx ++ addl %ebp,%ebx ++ xorl %edi,%esi ++ rorl $7,%edx ++ pshufd $238,%xmm0,%xmm4 ++ addl %ecx,%ebx ++ addl 32(%esp),%eax ++ pxor %xmm6,%xmm2 ++ punpcklqdq %xmm1,%xmm4 ++ xorl %edx,%esi ++ movl %ebx,%ebp ++ roll $5,%ebx ++ pxor %xmm3,%xmm2 ++ movdqa %xmm6,64(%esp) ++ addl %esi,%eax ++ xorl %edx,%ebp ++ movdqa 128(%esp),%xmm6 ++ rorl $7,%ecx ++ paddd %xmm1,%xmm5 ++ addl %ebx,%eax ++ pxor %xmm4,%xmm2 ++ addl 36(%esp),%edi ++ xorl %ecx,%ebp ++ movl %eax,%esi ++ roll $5,%eax ++ movdqa %xmm2,%xmm4 ++ movdqa %xmm5,16(%esp) ++ addl %ebp,%edi ++ xorl %ecx,%esi ++ rorl $7,%ebx ++ addl %eax,%edi ++ pslld $2,%xmm2 ++ addl 40(%esp),%edx ++ xorl %ebx,%esi ++ psrld $30,%xmm4 ++ movl %edi,%ebp ++ roll $5,%edi ++ addl %esi,%edx ++ xorl %ebx,%ebp ++ rorl $7,%eax ++ addl %edi,%edx ++ por %xmm4,%xmm2 ++ addl 44(%esp),%ecx ++ xorl %eax,%ebp ++ movdqa 80(%esp),%xmm4 ++ movl %edx,%esi ++ roll $5,%edx ++ addl %ebp,%ecx ++ xorl %eax,%esi ++ rorl $7,%edi ++ pshufd $238,%xmm1,%xmm5 ++ addl %edx,%ecx ++ addl 48(%esp),%ebx ++ pxor %xmm7,%xmm3 ++ punpcklqdq %xmm2,%xmm5 ++ xorl %edi,%esi ++ movl %ecx,%ebp ++ roll $5,%ecx ++ pxor %xmm4,%xmm3 ++ movdqa %xmm7,80(%esp) ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ movdqa %xmm6,%xmm7 ++ rorl $7,%edx ++ paddd %xmm2,%xmm6 ++ addl %ecx,%ebx ++ pxor %xmm5,%xmm3 ++ addl 52(%esp),%eax ++ xorl %edx,%ebp ++ movl %ebx,%esi ++ roll $5,%ebx ++ movdqa %xmm3,%xmm5 ++ movdqa %xmm6,32(%esp) ++ addl %ebp,%eax ++ xorl %edx,%esi ++ rorl $7,%ecx ++ addl %ebx,%eax ++ pslld $2,%xmm3 ++ addl 56(%esp),%edi ++ xorl %ecx,%esi ++ psrld $30,%xmm5 ++ movl %eax,%ebp ++ roll $5,%eax ++ addl %esi,%edi ++ xorl %ecx,%ebp ++ rorl $7,%ebx ++ addl %eax,%edi ++ por %xmm5,%xmm3 ++ addl 60(%esp),%edx ++ xorl %ebx,%ebp ++ movdqa 96(%esp),%xmm5 ++ movl %edi,%esi ++ roll $5,%edi ++ addl %ebp,%edx ++ xorl %ebx,%esi ++ rorl $7,%eax ++ pshufd $238,%xmm2,%xmm6 ++ addl %edi,%edx ++ addl (%esp),%ecx ++ pxor %xmm0,%xmm4 ++ punpcklqdq %xmm3,%xmm6 ++ xorl %eax,%esi ++ movl %edx,%ebp ++ roll $5,%edx ++ pxor %xmm5,%xmm4 ++ movdqa %xmm0,96(%esp) ++ addl %esi,%ecx ++ xorl %eax,%ebp ++ movdqa %xmm7,%xmm0 ++ rorl $7,%edi ++ paddd %xmm3,%xmm7 ++ addl %edx,%ecx ++ pxor %xmm6,%xmm4 ++ addl 4(%esp),%ebx ++ xorl %edi,%ebp ++ movl %ecx,%esi ++ roll $5,%ecx ++ movdqa %xmm4,%xmm6 ++ movdqa %xmm7,48(%esp) ++ addl %ebp,%ebx ++ xorl %edi,%esi ++ rorl $7,%edx ++ addl %ecx,%ebx ++ pslld $2,%xmm4 ++ addl 8(%esp),%eax ++ xorl %edx,%esi ++ psrld $30,%xmm6 ++ movl %ebx,%ebp ++ roll $5,%ebx ++ addl %esi,%eax ++ xorl %edx,%ebp ++ rorl $7,%ecx ++ addl %ebx,%eax ++ por %xmm6,%xmm4 ++ addl 12(%esp),%edi ++ xorl %ecx,%ebp ++ movdqa 64(%esp),%xmm6 ++ movl %eax,%esi ++ roll $5,%eax ++ addl %ebp,%edi ++ xorl %ecx,%esi ++ rorl $7,%ebx ++ pshufd $238,%xmm3,%xmm7 ++ addl %eax,%edi ++ addl 16(%esp),%edx ++ pxor %xmm1,%xmm5 ++ punpcklqdq %xmm4,%xmm7 ++ xorl %ebx,%esi ++ movl %edi,%ebp ++ roll $5,%edi ++ pxor %xmm6,%xmm5 ++ movdqa %xmm1,64(%esp) ++ addl %esi,%edx ++ xorl %ebx,%ebp ++ movdqa %xmm0,%xmm1 ++ rorl $7,%eax ++ paddd %xmm4,%xmm0 ++ addl %edi,%edx ++ pxor %xmm7,%xmm5 ++ addl 20(%esp),%ecx ++ xorl %eax,%ebp ++ movl %edx,%esi ++ roll $5,%edx ++ movdqa %xmm5,%xmm7 ++ movdqa %xmm0,(%esp) ++ addl %ebp,%ecx ++ xorl %eax,%esi ++ rorl $7,%edi ++ addl %edx,%ecx ++ pslld $2,%xmm5 ++ addl 24(%esp),%ebx ++ xorl %edi,%esi ++ psrld $30,%xmm7 ++ movl %ecx,%ebp ++ roll $5,%ecx ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ rorl $7,%edx ++ addl %ecx,%ebx ++ por %xmm7,%xmm5 ++ addl 28(%esp),%eax ++ movdqa 80(%esp),%xmm7 ++ rorl $7,%ecx ++ movl %ebx,%esi ++ xorl %edx,%ebp ++ roll $5,%ebx ++ pshufd $238,%xmm4,%xmm0 ++ addl %ebp,%eax ++ xorl %ecx,%esi ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ addl 32(%esp),%edi ++ pxor %xmm2,%xmm6 ++ punpcklqdq %xmm5,%xmm0 ++ andl %ecx,%esi ++ xorl %edx,%ecx ++ rorl $7,%ebx ++ pxor %xmm7,%xmm6 ++ movdqa %xmm2,80(%esp) ++ movl %eax,%ebp ++ xorl %ecx,%esi ++ roll $5,%eax ++ movdqa %xmm1,%xmm2 ++ addl %esi,%edi ++ paddd %xmm5,%xmm1 ++ xorl %ebx,%ebp ++ pxor %xmm0,%xmm6 ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ addl 36(%esp),%edx ++ andl %ebx,%ebp ++ movdqa %xmm6,%xmm0 ++ movdqa %xmm1,16(%esp) ++ xorl %ecx,%ebx ++ rorl $7,%eax ++ movl %edi,%esi ++ xorl %ebx,%ebp ++ roll $5,%edi ++ pslld $2,%xmm6 ++ addl %ebp,%edx ++ xorl %eax,%esi ++ psrld $30,%xmm0 ++ xorl %ebx,%eax ++ addl %edi,%edx ++ addl 40(%esp),%ecx ++ andl %eax,%esi ++ xorl %ebx,%eax ++ rorl $7,%edi ++ por %xmm0,%xmm6 ++ movl %edx,%ebp ++ xorl %eax,%esi ++ movdqa 96(%esp),%xmm0 ++ roll $5,%edx ++ addl %esi,%ecx ++ xorl %edi,%ebp ++ xorl %eax,%edi ++ addl %edx,%ecx ++ pshufd $238,%xmm5,%xmm1 ++ addl 44(%esp),%ebx ++ andl %edi,%ebp ++ xorl %eax,%edi ++ rorl $7,%edx ++ movl %ecx,%esi ++ xorl %edi,%ebp ++ roll $5,%ecx ++ addl %ebp,%ebx ++ xorl %edx,%esi ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ addl 48(%esp),%eax ++ pxor %xmm3,%xmm7 ++ punpcklqdq %xmm6,%xmm1 ++ andl %edx,%esi ++ xorl %edi,%edx ++ rorl $7,%ecx ++ pxor %xmm0,%xmm7 ++ movdqa %xmm3,96(%esp) ++ movl %ebx,%ebp ++ xorl %edx,%esi ++ roll $5,%ebx ++ movdqa 144(%esp),%xmm3 ++ addl %esi,%eax ++ paddd %xmm6,%xmm2 ++ xorl %ecx,%ebp ++ pxor %xmm1,%xmm7 ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ addl 52(%esp),%edi ++ andl %ecx,%ebp ++ movdqa %xmm7,%xmm1 ++ movdqa %xmm2,32(%esp) ++ xorl %edx,%ecx ++ rorl $7,%ebx ++ movl %eax,%esi ++ xorl %ecx,%ebp ++ roll $5,%eax ++ pslld $2,%xmm7 ++ addl %ebp,%edi ++ xorl %ebx,%esi ++ psrld $30,%xmm1 ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ addl 56(%esp),%edx ++ andl %ebx,%esi ++ xorl %ecx,%ebx ++ rorl $7,%eax ++ por %xmm1,%xmm7 ++ movl %edi,%ebp ++ xorl %ebx,%esi ++ movdqa 64(%esp),%xmm1 ++ roll $5,%edi ++ addl %esi,%edx ++ xorl %eax,%ebp ++ xorl %ebx,%eax ++ addl %edi,%edx ++ pshufd $238,%xmm6,%xmm2 ++ addl 60(%esp),%ecx ++ andl %eax,%ebp ++ xorl %ebx,%eax ++ rorl $7,%edi ++ movl %edx,%esi ++ xorl %eax,%ebp ++ roll $5,%edx ++ addl %ebp,%ecx ++ xorl %edi,%esi ++ xorl %eax,%edi ++ addl %edx,%ecx ++ addl (%esp),%ebx ++ pxor %xmm4,%xmm0 ++ punpcklqdq %xmm7,%xmm2 ++ andl %edi,%esi ++ xorl %eax,%edi ++ rorl $7,%edx ++ pxor %xmm1,%xmm0 ++ movdqa %xmm4,64(%esp) ++ movl %ecx,%ebp ++ xorl %edi,%esi ++ roll $5,%ecx ++ movdqa %xmm3,%xmm4 ++ addl %esi,%ebx ++ paddd %xmm7,%xmm3 ++ xorl %edx,%ebp ++ pxor %xmm2,%xmm0 ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ addl 4(%esp),%eax ++ andl %edx,%ebp ++ movdqa %xmm0,%xmm2 ++ movdqa %xmm3,48(%esp) ++ xorl %edi,%edx ++ rorl $7,%ecx ++ movl %ebx,%esi ++ xorl %edx,%ebp ++ roll $5,%ebx ++ pslld $2,%xmm0 ++ addl %ebp,%eax ++ xorl %ecx,%esi ++ psrld $30,%xmm2 ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ addl 8(%esp),%edi ++ andl %ecx,%esi ++ xorl %edx,%ecx ++ rorl $7,%ebx ++ por %xmm2,%xmm0 ++ movl %eax,%ebp ++ xorl %ecx,%esi ++ movdqa 80(%esp),%xmm2 ++ roll $5,%eax ++ addl %esi,%edi ++ xorl %ebx,%ebp ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ pshufd $238,%xmm7,%xmm3 ++ addl 12(%esp),%edx ++ andl %ebx,%ebp ++ xorl %ecx,%ebx ++ rorl $7,%eax ++ movl %edi,%esi ++ xorl %ebx,%ebp ++ roll $5,%edi ++ addl %ebp,%edx ++ xorl %eax,%esi ++ xorl %ebx,%eax ++ addl %edi,%edx ++ addl 16(%esp),%ecx ++ pxor %xmm5,%xmm1 ++ punpcklqdq %xmm0,%xmm3 ++ andl %eax,%esi ++ xorl %ebx,%eax ++ rorl $7,%edi ++ pxor %xmm2,%xmm1 ++ movdqa %xmm5,80(%esp) ++ movl %edx,%ebp ++ xorl %eax,%esi ++ roll $5,%edx ++ movdqa %xmm4,%xmm5 ++ addl %esi,%ecx ++ paddd %xmm0,%xmm4 ++ xorl %edi,%ebp ++ pxor %xmm3,%xmm1 ++ xorl %eax,%edi ++ addl %edx,%ecx ++ addl 20(%esp),%ebx ++ andl %edi,%ebp ++ movdqa %xmm1,%xmm3 ++ movdqa %xmm4,(%esp) ++ xorl %eax,%edi ++ rorl $7,%edx ++ movl %ecx,%esi ++ xorl %edi,%ebp ++ roll $5,%ecx ++ pslld $2,%xmm1 ++ addl %ebp,%ebx ++ xorl %edx,%esi ++ psrld $30,%xmm3 ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ addl 24(%esp),%eax ++ andl %edx,%esi ++ xorl %edi,%edx ++ rorl $7,%ecx ++ por %xmm3,%xmm1 ++ movl %ebx,%ebp ++ xorl %edx,%esi ++ movdqa 96(%esp),%xmm3 ++ roll $5,%ebx ++ addl %esi,%eax ++ xorl %ecx,%ebp ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ pshufd $238,%xmm0,%xmm4 ++ addl 28(%esp),%edi ++ andl %ecx,%ebp ++ xorl %edx,%ecx ++ rorl $7,%ebx ++ movl %eax,%esi ++ xorl %ecx,%ebp ++ roll $5,%eax ++ addl %ebp,%edi ++ xorl %ebx,%esi ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ addl 32(%esp),%edx ++ pxor %xmm6,%xmm2 ++ punpcklqdq %xmm1,%xmm4 ++ andl %ebx,%esi ++ xorl %ecx,%ebx ++ rorl $7,%eax ++ pxor %xmm3,%xmm2 ++ movdqa %xmm6,96(%esp) ++ movl %edi,%ebp ++ xorl %ebx,%esi ++ roll $5,%edi ++ movdqa %xmm5,%xmm6 ++ addl %esi,%edx ++ paddd %xmm1,%xmm5 ++ xorl %eax,%ebp ++ pxor %xmm4,%xmm2 ++ xorl %ebx,%eax ++ addl %edi,%edx ++ addl 36(%esp),%ecx ++ andl %eax,%ebp ++ movdqa %xmm2,%xmm4 ++ movdqa %xmm5,16(%esp) ++ xorl %ebx,%eax ++ rorl $7,%edi ++ movl %edx,%esi ++ xorl %eax,%ebp ++ roll $5,%edx ++ pslld $2,%xmm2 ++ addl %ebp,%ecx ++ xorl %edi,%esi ++ psrld $30,%xmm4 ++ xorl %eax,%edi ++ addl %edx,%ecx ++ addl 40(%esp),%ebx ++ andl %edi,%esi ++ xorl %eax,%edi ++ rorl $7,%edx ++ por %xmm4,%xmm2 ++ movl %ecx,%ebp ++ xorl %edi,%esi ++ movdqa 64(%esp),%xmm4 ++ roll $5,%ecx ++ addl %esi,%ebx ++ xorl %edx,%ebp ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ pshufd $238,%xmm1,%xmm5 ++ addl 44(%esp),%eax ++ andl %edx,%ebp ++ xorl %edi,%edx ++ rorl $7,%ecx ++ movl %ebx,%esi ++ xorl %edx,%ebp ++ roll $5,%ebx ++ addl %ebp,%eax ++ xorl %edx,%esi ++ addl %ebx,%eax ++ addl 48(%esp),%edi ++ pxor %xmm7,%xmm3 ++ punpcklqdq %xmm2,%xmm5 ++ xorl %ecx,%esi ++ movl %eax,%ebp ++ roll $5,%eax ++ pxor %xmm4,%xmm3 ++ movdqa %xmm7,64(%esp) ++ addl %esi,%edi ++ xorl %ecx,%ebp ++ movdqa %xmm6,%xmm7 ++ rorl $7,%ebx ++ paddd %xmm2,%xmm6 ++ addl %eax,%edi ++ pxor %xmm5,%xmm3 ++ addl 52(%esp),%edx ++ xorl %ebx,%ebp ++ movl %edi,%esi ++ roll $5,%edi ++ movdqa %xmm3,%xmm5 ++ movdqa %xmm6,32(%esp) ++ addl %ebp,%edx ++ xorl %ebx,%esi ++ rorl $7,%eax ++ addl %edi,%edx ++ pslld $2,%xmm3 ++ addl 56(%esp),%ecx ++ xorl %eax,%esi ++ psrld $30,%xmm5 ++ movl %edx,%ebp ++ roll $5,%edx ++ addl %esi,%ecx ++ xorl %eax,%ebp ++ rorl $7,%edi ++ addl %edx,%ecx ++ por %xmm5,%xmm3 ++ addl 60(%esp),%ebx ++ xorl %edi,%ebp ++ movl %ecx,%esi ++ roll $5,%ecx ++ addl %ebp,%ebx ++ xorl %edi,%esi ++ rorl $7,%edx ++ addl %ecx,%ebx ++ addl (%esp),%eax ++ xorl %edx,%esi ++ movl %ebx,%ebp ++ roll $5,%ebx ++ addl %esi,%eax ++ xorl %edx,%ebp ++ rorl $7,%ecx ++ paddd %xmm3,%xmm7 ++ addl %ebx,%eax ++ addl 4(%esp),%edi ++ xorl %ecx,%ebp ++ movl %eax,%esi ++ movdqa %xmm7,48(%esp) ++ roll $5,%eax ++ addl %ebp,%edi ++ xorl %ecx,%esi ++ rorl $7,%ebx ++ addl %eax,%edi ++ addl 8(%esp),%edx ++ xorl %ebx,%esi ++ movl %edi,%ebp ++ roll $5,%edi ++ addl %esi,%edx ++ xorl %ebx,%ebp ++ rorl $7,%eax ++ addl %edi,%edx ++ addl 12(%esp),%ecx ++ xorl %eax,%ebp ++ movl %edx,%esi ++ roll $5,%edx ++ addl %ebp,%ecx ++ xorl %eax,%esi ++ rorl $7,%edi ++ addl %edx,%ecx ++ movl 196(%esp),%ebp ++ cmpl 200(%esp),%ebp ++ je L005done ++ movdqa 160(%esp),%xmm7 ++ movdqa 176(%esp),%xmm6 ++ movdqu (%ebp),%xmm0 ++ movdqu 16(%ebp),%xmm1 ++ movdqu 32(%ebp),%xmm2 ++ movdqu 48(%ebp),%xmm3 ++ addl $64,%ebp ++.byte 102,15,56,0,198 ++ movl %ebp,196(%esp) ++ movdqa %xmm7,96(%esp) ++ addl 16(%esp),%ebx ++ xorl %edi,%esi ++ movl %ecx,%ebp ++ roll $5,%ecx ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ rorl $7,%edx ++.byte 102,15,56,0,206 ++ addl %ecx,%ebx ++ addl 20(%esp),%eax ++ xorl %edx,%ebp ++ movl %ebx,%esi ++ paddd %xmm7,%xmm0 ++ roll $5,%ebx ++ addl %ebp,%eax ++ xorl %edx,%esi ++ rorl $7,%ecx ++ movdqa %xmm0,(%esp) ++ addl %ebx,%eax ++ addl 24(%esp),%edi ++ xorl %ecx,%esi ++ movl %eax,%ebp ++ psubd %xmm7,%xmm0 ++ roll $5,%eax ++ addl %esi,%edi ++ xorl %ecx,%ebp ++ rorl $7,%ebx ++ addl %eax,%edi ++ addl 28(%esp),%edx ++ xorl %ebx,%ebp ++ movl %edi,%esi ++ roll $5,%edi ++ addl %ebp,%edx ++ xorl %ebx,%esi ++ rorl $7,%eax ++ addl %edi,%edx ++ addl 32(%esp),%ecx ++ xorl %eax,%esi ++ movl %edx,%ebp ++ roll $5,%edx ++ addl %esi,%ecx ++ xorl %eax,%ebp ++ rorl $7,%edi ++.byte 102,15,56,0,214 ++ addl %edx,%ecx ++ addl 36(%esp),%ebx ++ xorl %edi,%ebp ++ movl %ecx,%esi ++ paddd %xmm7,%xmm1 ++ roll $5,%ecx ++ addl %ebp,%ebx ++ xorl %edi,%esi ++ rorl $7,%edx ++ movdqa %xmm1,16(%esp) ++ addl %ecx,%ebx ++ addl 40(%esp),%eax ++ xorl %edx,%esi ++ movl %ebx,%ebp ++ psubd %xmm7,%xmm1 ++ roll $5,%ebx ++ addl %esi,%eax ++ xorl %edx,%ebp ++ rorl $7,%ecx ++ addl %ebx,%eax ++ addl 44(%esp),%edi ++ xorl %ecx,%ebp ++ movl %eax,%esi ++ roll $5,%eax ++ addl %ebp,%edi ++ xorl %ecx,%esi ++ rorl $7,%ebx ++ addl %eax,%edi ++ addl 48(%esp),%edx ++ xorl %ebx,%esi ++ movl %edi,%ebp ++ roll $5,%edi ++ addl %esi,%edx ++ xorl %ebx,%ebp ++ rorl $7,%eax ++.byte 102,15,56,0,222 ++ addl %edi,%edx ++ addl 52(%esp),%ecx ++ xorl %eax,%ebp ++ movl %edx,%esi ++ paddd %xmm7,%xmm2 ++ roll $5,%edx ++ addl %ebp,%ecx ++ xorl %eax,%esi ++ rorl $7,%edi ++ movdqa %xmm2,32(%esp) ++ addl %edx,%ecx ++ addl 56(%esp),%ebx ++ xorl %edi,%esi ++ movl %ecx,%ebp ++ psubd %xmm7,%xmm2 ++ roll $5,%ecx ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ rorl $7,%edx ++ addl %ecx,%ebx ++ addl 60(%esp),%eax ++ xorl %edx,%ebp ++ movl %ebx,%esi ++ roll $5,%ebx ++ addl %ebp,%eax ++ rorl $7,%ecx ++ addl %ebx,%eax ++ movl 192(%esp),%ebp ++ addl (%ebp),%eax ++ addl 4(%ebp),%esi ++ addl 8(%ebp),%ecx ++ movl %eax,(%ebp) ++ addl 12(%ebp),%edx ++ movl %esi,4(%ebp) ++ addl 16(%ebp),%edi ++ movl %ecx,8(%ebp) ++ movl %ecx,%ebx ++ movl %edx,12(%ebp) ++ xorl %edx,%ebx ++ movl %edi,16(%ebp) ++ movl %esi,%ebp ++ pshufd $238,%xmm0,%xmm4 ++ andl %ebx,%esi ++ movl %ebp,%ebx ++ jmp L004loop ++.align 4,0x90 ++L005done: ++ addl 16(%esp),%ebx ++ xorl %edi,%esi ++ movl %ecx,%ebp ++ roll $5,%ecx ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ rorl $7,%edx ++ addl %ecx,%ebx ++ addl 20(%esp),%eax ++ xorl %edx,%ebp ++ movl %ebx,%esi ++ roll $5,%ebx ++ addl %ebp,%eax ++ xorl %edx,%esi ++ rorl $7,%ecx ++ addl %ebx,%eax ++ addl 24(%esp),%edi ++ xorl %ecx,%esi ++ movl %eax,%ebp ++ roll $5,%eax ++ addl %esi,%edi ++ xorl %ecx,%ebp ++ rorl $7,%ebx ++ addl %eax,%edi ++ addl 28(%esp),%edx ++ xorl %ebx,%ebp ++ movl %edi,%esi ++ roll $5,%edi ++ addl %ebp,%edx ++ xorl %ebx,%esi ++ rorl $7,%eax ++ addl %edi,%edx ++ addl 32(%esp),%ecx ++ xorl %eax,%esi ++ movl %edx,%ebp ++ roll $5,%edx ++ addl %esi,%ecx ++ xorl %eax,%ebp ++ rorl $7,%edi ++ addl %edx,%ecx ++ addl 36(%esp),%ebx ++ xorl %edi,%ebp ++ movl %ecx,%esi ++ roll $5,%ecx ++ addl %ebp,%ebx ++ xorl %edi,%esi ++ rorl $7,%edx ++ addl %ecx,%ebx ++ addl 40(%esp),%eax ++ xorl %edx,%esi ++ movl %ebx,%ebp ++ roll $5,%ebx ++ addl %esi,%eax ++ xorl %edx,%ebp ++ rorl $7,%ecx ++ addl %ebx,%eax ++ addl 44(%esp),%edi ++ xorl %ecx,%ebp ++ movl %eax,%esi ++ roll $5,%eax ++ addl %ebp,%edi ++ xorl %ecx,%esi ++ rorl $7,%ebx ++ addl %eax,%edi ++ addl 48(%esp),%edx ++ xorl %ebx,%esi ++ movl %edi,%ebp ++ roll $5,%edi ++ addl %esi,%edx ++ xorl %ebx,%ebp ++ rorl $7,%eax ++ addl %edi,%edx ++ addl 52(%esp),%ecx ++ xorl %eax,%ebp ++ movl %edx,%esi ++ roll $5,%edx ++ addl %ebp,%ecx ++ xorl %eax,%esi ++ rorl $7,%edi ++ addl %edx,%ecx ++ addl 56(%esp),%ebx ++ xorl %edi,%esi ++ movl %ecx,%ebp ++ roll $5,%ecx ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ rorl $7,%edx ++ addl %ecx,%ebx ++ addl 60(%esp),%eax ++ xorl %edx,%ebp ++ movl %ebx,%esi ++ roll $5,%ebx ++ addl %ebp,%eax ++ rorl $7,%ecx ++ addl %ebx,%eax ++ movl 192(%esp),%ebp ++ addl (%ebp),%eax ++ movl 204(%esp),%esp ++ addl 4(%ebp),%esi ++ addl 8(%ebp),%ecx ++ movl %eax,(%ebp) ++ addl 12(%ebp),%edx ++ movl %esi,4(%ebp) ++ addl 16(%ebp),%edi ++ movl %ecx,8(%ebp) ++ movl %edx,12(%ebp) ++ movl %edi,16(%ebp) ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.private_extern __sha1_block_data_order_avx ++.align 4 ++__sha1_block_data_order_avx: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ call L006pic_point ++L006pic_point: ++ popl %ebp ++ leal LK_XX_XX-L006pic_point(%ebp),%ebp ++Lavx_shortcut: ++ vzeroall ++ vmovdqa (%ebp),%xmm7 ++ vmovdqa 16(%ebp),%xmm0 ++ vmovdqa 32(%ebp),%xmm1 ++ vmovdqa 48(%ebp),%xmm2 ++ vmovdqa 64(%ebp),%xmm6 ++ movl 20(%esp),%edi ++ movl 24(%esp),%ebp ++ movl 28(%esp),%edx ++ movl %esp,%esi ++ subl $208,%esp ++ andl $-64,%esp ++ vmovdqa %xmm0,112(%esp) ++ vmovdqa %xmm1,128(%esp) ++ vmovdqa %xmm2,144(%esp) ++ shll $6,%edx ++ vmovdqa %xmm7,160(%esp) ++ addl %ebp,%edx ++ vmovdqa %xmm6,176(%esp) ++ addl $64,%ebp ++ movl %edi,192(%esp) ++ movl %ebp,196(%esp) ++ movl %edx,200(%esp) ++ movl %esi,204(%esp) ++ movl (%edi),%eax ++ movl 4(%edi),%ebx ++ movl 8(%edi),%ecx ++ movl 12(%edi),%edx ++ movl 16(%edi),%edi ++ movl %ebx,%esi ++ vmovdqu -64(%ebp),%xmm0 ++ vmovdqu -48(%ebp),%xmm1 ++ vmovdqu -32(%ebp),%xmm2 ++ vmovdqu -16(%ebp),%xmm3 ++ vpshufb %xmm6,%xmm0,%xmm0 ++ vpshufb %xmm6,%xmm1,%xmm1 ++ vpshufb %xmm6,%xmm2,%xmm2 ++ vmovdqa %xmm7,96(%esp) ++ vpshufb %xmm6,%xmm3,%xmm3 ++ vpaddd %xmm7,%xmm0,%xmm4 ++ vpaddd %xmm7,%xmm1,%xmm5 ++ vpaddd %xmm7,%xmm2,%xmm6 ++ vmovdqa %xmm4,(%esp) ++ movl %ecx,%ebp ++ vmovdqa %xmm5,16(%esp) ++ xorl %edx,%ebp ++ vmovdqa %xmm6,32(%esp) ++ andl %ebp,%esi ++ jmp L007loop ++.align 4,0x90 ++L007loop: ++ shrdl $2,%ebx,%ebx ++ xorl %edx,%esi ++ vpalignr $8,%xmm0,%xmm1,%xmm4 ++ movl %eax,%ebp ++ addl (%esp),%edi ++ vpaddd %xmm3,%xmm7,%xmm7 ++ vmovdqa %xmm0,64(%esp) ++ xorl %ecx,%ebx ++ shldl $5,%eax,%eax ++ vpsrldq $4,%xmm3,%xmm6 ++ addl %esi,%edi ++ andl %ebx,%ebp ++ vpxor %xmm0,%xmm4,%xmm4 ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ vpxor %xmm2,%xmm6,%xmm6 ++ shrdl $7,%eax,%eax ++ xorl %ecx,%ebp ++ vmovdqa %xmm7,48(%esp) ++ movl %edi,%esi ++ addl 4(%esp),%edx ++ vpxor %xmm6,%xmm4,%xmm4 ++ xorl %ebx,%eax ++ shldl $5,%edi,%edi ++ addl %ebp,%edx ++ andl %eax,%esi ++ vpsrld $31,%xmm4,%xmm6 ++ xorl %ebx,%eax ++ addl %edi,%edx ++ shrdl $7,%edi,%edi ++ xorl %ebx,%esi ++ vpslldq $12,%xmm4,%xmm0 ++ vpaddd %xmm4,%xmm4,%xmm4 ++ movl %edx,%ebp ++ addl 8(%esp),%ecx ++ xorl %eax,%edi ++ shldl $5,%edx,%edx ++ vpsrld $30,%xmm0,%xmm7 ++ vpor %xmm6,%xmm4,%xmm4 ++ addl %esi,%ecx ++ andl %edi,%ebp ++ xorl %eax,%edi ++ addl %edx,%ecx ++ vpslld $2,%xmm0,%xmm0 ++ shrdl $7,%edx,%edx ++ xorl %eax,%ebp ++ vpxor %xmm7,%xmm4,%xmm4 ++ movl %ecx,%esi ++ addl 12(%esp),%ebx ++ xorl %edi,%edx ++ shldl $5,%ecx,%ecx ++ vpxor %xmm0,%xmm4,%xmm4 ++ addl %ebp,%ebx ++ andl %edx,%esi ++ vmovdqa 96(%esp),%xmm0 ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ shrdl $7,%ecx,%ecx ++ xorl %edi,%esi ++ vpalignr $8,%xmm1,%xmm2,%xmm5 ++ movl %ebx,%ebp ++ addl 16(%esp),%eax ++ vpaddd %xmm4,%xmm0,%xmm0 ++ vmovdqa %xmm1,80(%esp) ++ xorl %edx,%ecx ++ shldl $5,%ebx,%ebx ++ vpsrldq $4,%xmm4,%xmm7 ++ addl %esi,%eax ++ andl %ecx,%ebp ++ vpxor %xmm1,%xmm5,%xmm5 ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ vpxor %xmm3,%xmm7,%xmm7 ++ shrdl $7,%ebx,%ebx ++ xorl %edx,%ebp ++ vmovdqa %xmm0,(%esp) ++ movl %eax,%esi ++ addl 20(%esp),%edi ++ vpxor %xmm7,%xmm5,%xmm5 ++ xorl %ecx,%ebx ++ shldl $5,%eax,%eax ++ addl %ebp,%edi ++ andl %ebx,%esi ++ vpsrld $31,%xmm5,%xmm7 ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ shrdl $7,%eax,%eax ++ xorl %ecx,%esi ++ vpslldq $12,%xmm5,%xmm1 ++ vpaddd %xmm5,%xmm5,%xmm5 ++ movl %edi,%ebp ++ addl 24(%esp),%edx ++ xorl %ebx,%eax ++ shldl $5,%edi,%edi ++ vpsrld $30,%xmm1,%xmm0 ++ vpor %xmm7,%xmm5,%xmm5 ++ addl %esi,%edx ++ andl %eax,%ebp ++ xorl %ebx,%eax ++ addl %edi,%edx ++ vpslld $2,%xmm1,%xmm1 ++ shrdl $7,%edi,%edi ++ xorl %ebx,%ebp ++ vpxor %xmm0,%xmm5,%xmm5 ++ movl %edx,%esi ++ addl 28(%esp),%ecx ++ xorl %eax,%edi ++ shldl $5,%edx,%edx ++ vpxor %xmm1,%xmm5,%xmm5 ++ addl %ebp,%ecx ++ andl %edi,%esi ++ vmovdqa 112(%esp),%xmm1 ++ xorl %eax,%edi ++ addl %edx,%ecx ++ shrdl $7,%edx,%edx ++ xorl %eax,%esi ++ vpalignr $8,%xmm2,%xmm3,%xmm6 ++ movl %ecx,%ebp ++ addl 32(%esp),%ebx ++ vpaddd %xmm5,%xmm1,%xmm1 ++ vmovdqa %xmm2,96(%esp) ++ xorl %edi,%edx ++ shldl $5,%ecx,%ecx ++ vpsrldq $4,%xmm5,%xmm0 ++ addl %esi,%ebx ++ andl %edx,%ebp ++ vpxor %xmm2,%xmm6,%xmm6 ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ vpxor %xmm4,%xmm0,%xmm0 ++ shrdl $7,%ecx,%ecx ++ xorl %edi,%ebp ++ vmovdqa %xmm1,16(%esp) ++ movl %ebx,%esi ++ addl 36(%esp),%eax ++ vpxor %xmm0,%xmm6,%xmm6 ++ xorl %edx,%ecx ++ shldl $5,%ebx,%ebx ++ addl %ebp,%eax ++ andl %ecx,%esi ++ vpsrld $31,%xmm6,%xmm0 ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ shrdl $7,%ebx,%ebx ++ xorl %edx,%esi ++ vpslldq $12,%xmm6,%xmm2 ++ vpaddd %xmm6,%xmm6,%xmm6 ++ movl %eax,%ebp ++ addl 40(%esp),%edi ++ xorl %ecx,%ebx ++ shldl $5,%eax,%eax ++ vpsrld $30,%xmm2,%xmm1 ++ vpor %xmm0,%xmm6,%xmm6 ++ addl %esi,%edi ++ andl %ebx,%ebp ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ vpslld $2,%xmm2,%xmm2 ++ vmovdqa 64(%esp),%xmm0 ++ shrdl $7,%eax,%eax ++ xorl %ecx,%ebp ++ vpxor %xmm1,%xmm6,%xmm6 ++ movl %edi,%esi ++ addl 44(%esp),%edx ++ xorl %ebx,%eax ++ shldl $5,%edi,%edi ++ vpxor %xmm2,%xmm6,%xmm6 ++ addl %ebp,%edx ++ andl %eax,%esi ++ vmovdqa 112(%esp),%xmm2 ++ xorl %ebx,%eax ++ addl %edi,%edx ++ shrdl $7,%edi,%edi ++ xorl %ebx,%esi ++ vpalignr $8,%xmm3,%xmm4,%xmm7 ++ movl %edx,%ebp ++ addl 48(%esp),%ecx ++ vpaddd %xmm6,%xmm2,%xmm2 ++ vmovdqa %xmm3,64(%esp) ++ xorl %eax,%edi ++ shldl $5,%edx,%edx ++ vpsrldq $4,%xmm6,%xmm1 ++ addl %esi,%ecx ++ andl %edi,%ebp ++ vpxor %xmm3,%xmm7,%xmm7 ++ xorl %eax,%edi ++ addl %edx,%ecx ++ vpxor %xmm5,%xmm1,%xmm1 ++ shrdl $7,%edx,%edx ++ xorl %eax,%ebp ++ vmovdqa %xmm2,32(%esp) ++ movl %ecx,%esi ++ addl 52(%esp),%ebx ++ vpxor %xmm1,%xmm7,%xmm7 ++ xorl %edi,%edx ++ shldl $5,%ecx,%ecx ++ addl %ebp,%ebx ++ andl %edx,%esi ++ vpsrld $31,%xmm7,%xmm1 ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ shrdl $7,%ecx,%ecx ++ xorl %edi,%esi ++ vpslldq $12,%xmm7,%xmm3 ++ vpaddd %xmm7,%xmm7,%xmm7 ++ movl %ebx,%ebp ++ addl 56(%esp),%eax ++ xorl %edx,%ecx ++ shldl $5,%ebx,%ebx ++ vpsrld $30,%xmm3,%xmm2 ++ vpor %xmm1,%xmm7,%xmm7 ++ addl %esi,%eax ++ andl %ecx,%ebp ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ vpslld $2,%xmm3,%xmm3 ++ vmovdqa 80(%esp),%xmm1 ++ shrdl $7,%ebx,%ebx ++ xorl %edx,%ebp ++ vpxor %xmm2,%xmm7,%xmm7 ++ movl %eax,%esi ++ addl 60(%esp),%edi ++ xorl %ecx,%ebx ++ shldl $5,%eax,%eax ++ vpxor %xmm3,%xmm7,%xmm7 ++ addl %ebp,%edi ++ andl %ebx,%esi ++ vmovdqa 112(%esp),%xmm3 ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ vpalignr $8,%xmm6,%xmm7,%xmm2 ++ vpxor %xmm4,%xmm0,%xmm0 ++ shrdl $7,%eax,%eax ++ xorl %ecx,%esi ++ movl %edi,%ebp ++ addl (%esp),%edx ++ vpxor %xmm1,%xmm0,%xmm0 ++ vmovdqa %xmm4,80(%esp) ++ xorl %ebx,%eax ++ shldl $5,%edi,%edi ++ vmovdqa %xmm3,%xmm4 ++ vpaddd %xmm7,%xmm3,%xmm3 ++ addl %esi,%edx ++ andl %eax,%ebp ++ vpxor %xmm2,%xmm0,%xmm0 ++ xorl %ebx,%eax ++ addl %edi,%edx ++ shrdl $7,%edi,%edi ++ xorl %ebx,%ebp ++ vpsrld $30,%xmm0,%xmm2 ++ vmovdqa %xmm3,48(%esp) ++ movl %edx,%esi ++ addl 4(%esp),%ecx ++ xorl %eax,%edi ++ shldl $5,%edx,%edx ++ vpslld $2,%xmm0,%xmm0 ++ addl %ebp,%ecx ++ andl %edi,%esi ++ xorl %eax,%edi ++ addl %edx,%ecx ++ shrdl $7,%edx,%edx ++ xorl %eax,%esi ++ movl %ecx,%ebp ++ addl 8(%esp),%ebx ++ vpor %xmm2,%xmm0,%xmm0 ++ xorl %edi,%edx ++ shldl $5,%ecx,%ecx ++ vmovdqa 96(%esp),%xmm2 ++ addl %esi,%ebx ++ andl %edx,%ebp ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ addl 12(%esp),%eax ++ xorl %edi,%ebp ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ addl %ebp,%eax ++ xorl %edx,%esi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ vpalignr $8,%xmm7,%xmm0,%xmm3 ++ vpxor %xmm5,%xmm1,%xmm1 ++ addl 16(%esp),%edi ++ xorl %ecx,%esi ++ movl %eax,%ebp ++ shldl $5,%eax,%eax ++ vpxor %xmm2,%xmm1,%xmm1 ++ vmovdqa %xmm5,96(%esp) ++ addl %esi,%edi ++ xorl %ecx,%ebp ++ vmovdqa %xmm4,%xmm5 ++ vpaddd %xmm0,%xmm4,%xmm4 ++ shrdl $7,%ebx,%ebx ++ addl %eax,%edi ++ vpxor %xmm3,%xmm1,%xmm1 ++ addl 20(%esp),%edx ++ xorl %ebx,%ebp ++ movl %edi,%esi ++ shldl $5,%edi,%edi ++ vpsrld $30,%xmm1,%xmm3 ++ vmovdqa %xmm4,(%esp) ++ addl %ebp,%edx ++ xorl %ebx,%esi ++ shrdl $7,%eax,%eax ++ addl %edi,%edx ++ vpslld $2,%xmm1,%xmm1 ++ addl 24(%esp),%ecx ++ xorl %eax,%esi ++ movl %edx,%ebp ++ shldl $5,%edx,%edx ++ addl %esi,%ecx ++ xorl %eax,%ebp ++ shrdl $7,%edi,%edi ++ addl %edx,%ecx ++ vpor %xmm3,%xmm1,%xmm1 ++ addl 28(%esp),%ebx ++ xorl %edi,%ebp ++ vmovdqa 64(%esp),%xmm3 ++ movl %ecx,%esi ++ shldl $5,%ecx,%ecx ++ addl %ebp,%ebx ++ xorl %edi,%esi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ vpalignr $8,%xmm0,%xmm1,%xmm4 ++ vpxor %xmm6,%xmm2,%xmm2 ++ addl 32(%esp),%eax ++ xorl %edx,%esi ++ movl %ebx,%ebp ++ shldl $5,%ebx,%ebx ++ vpxor %xmm3,%xmm2,%xmm2 ++ vmovdqa %xmm6,64(%esp) ++ addl %esi,%eax ++ xorl %edx,%ebp ++ vmovdqa 128(%esp),%xmm6 ++ vpaddd %xmm1,%xmm5,%xmm5 ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ vpxor %xmm4,%xmm2,%xmm2 ++ addl 36(%esp),%edi ++ xorl %ecx,%ebp ++ movl %eax,%esi ++ shldl $5,%eax,%eax ++ vpsrld $30,%xmm2,%xmm4 ++ vmovdqa %xmm5,16(%esp) ++ addl %ebp,%edi ++ xorl %ecx,%esi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%edi ++ vpslld $2,%xmm2,%xmm2 ++ addl 40(%esp),%edx ++ xorl %ebx,%esi ++ movl %edi,%ebp ++ shldl $5,%edi,%edi ++ addl %esi,%edx ++ xorl %ebx,%ebp ++ shrdl $7,%eax,%eax ++ addl %edi,%edx ++ vpor %xmm4,%xmm2,%xmm2 ++ addl 44(%esp),%ecx ++ xorl %eax,%ebp ++ vmovdqa 80(%esp),%xmm4 ++ movl %edx,%esi ++ shldl $5,%edx,%edx ++ addl %ebp,%ecx ++ xorl %eax,%esi ++ shrdl $7,%edi,%edi ++ addl %edx,%ecx ++ vpalignr $8,%xmm1,%xmm2,%xmm5 ++ vpxor %xmm7,%xmm3,%xmm3 ++ addl 48(%esp),%ebx ++ xorl %edi,%esi ++ movl %ecx,%ebp ++ shldl $5,%ecx,%ecx ++ vpxor %xmm4,%xmm3,%xmm3 ++ vmovdqa %xmm7,80(%esp) ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ vmovdqa %xmm6,%xmm7 ++ vpaddd %xmm2,%xmm6,%xmm6 ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ vpxor %xmm5,%xmm3,%xmm3 ++ addl 52(%esp),%eax ++ xorl %edx,%ebp ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ vpsrld $30,%xmm3,%xmm5 ++ vmovdqa %xmm6,32(%esp) ++ addl %ebp,%eax ++ xorl %edx,%esi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ vpslld $2,%xmm3,%xmm3 ++ addl 56(%esp),%edi ++ xorl %ecx,%esi ++ movl %eax,%ebp ++ shldl $5,%eax,%eax ++ addl %esi,%edi ++ xorl %ecx,%ebp ++ shrdl $7,%ebx,%ebx ++ addl %eax,%edi ++ vpor %xmm5,%xmm3,%xmm3 ++ addl 60(%esp),%edx ++ xorl %ebx,%ebp ++ vmovdqa 96(%esp),%xmm5 ++ movl %edi,%esi ++ shldl $5,%edi,%edi ++ addl %ebp,%edx ++ xorl %ebx,%esi ++ shrdl $7,%eax,%eax ++ addl %edi,%edx ++ vpalignr $8,%xmm2,%xmm3,%xmm6 ++ vpxor %xmm0,%xmm4,%xmm4 ++ addl (%esp),%ecx ++ xorl %eax,%esi ++ movl %edx,%ebp ++ shldl $5,%edx,%edx ++ vpxor %xmm5,%xmm4,%xmm4 ++ vmovdqa %xmm0,96(%esp) ++ addl %esi,%ecx ++ xorl %eax,%ebp ++ vmovdqa %xmm7,%xmm0 ++ vpaddd %xmm3,%xmm7,%xmm7 ++ shrdl $7,%edi,%edi ++ addl %edx,%ecx ++ vpxor %xmm6,%xmm4,%xmm4 ++ addl 4(%esp),%ebx ++ xorl %edi,%ebp ++ movl %ecx,%esi ++ shldl $5,%ecx,%ecx ++ vpsrld $30,%xmm4,%xmm6 ++ vmovdqa %xmm7,48(%esp) ++ addl %ebp,%ebx ++ xorl %edi,%esi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ vpslld $2,%xmm4,%xmm4 ++ addl 8(%esp),%eax ++ xorl %edx,%esi ++ movl %ebx,%ebp ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ xorl %edx,%ebp ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ vpor %xmm6,%xmm4,%xmm4 ++ addl 12(%esp),%edi ++ xorl %ecx,%ebp ++ vmovdqa 64(%esp),%xmm6 ++ movl %eax,%esi ++ shldl $5,%eax,%eax ++ addl %ebp,%edi ++ xorl %ecx,%esi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%edi ++ vpalignr $8,%xmm3,%xmm4,%xmm7 ++ vpxor %xmm1,%xmm5,%xmm5 ++ addl 16(%esp),%edx ++ xorl %ebx,%esi ++ movl %edi,%ebp ++ shldl $5,%edi,%edi ++ vpxor %xmm6,%xmm5,%xmm5 ++ vmovdqa %xmm1,64(%esp) ++ addl %esi,%edx ++ xorl %ebx,%ebp ++ vmovdqa %xmm0,%xmm1 ++ vpaddd %xmm4,%xmm0,%xmm0 ++ shrdl $7,%eax,%eax ++ addl %edi,%edx ++ vpxor %xmm7,%xmm5,%xmm5 ++ addl 20(%esp),%ecx ++ xorl %eax,%ebp ++ movl %edx,%esi ++ shldl $5,%edx,%edx ++ vpsrld $30,%xmm5,%xmm7 ++ vmovdqa %xmm0,(%esp) ++ addl %ebp,%ecx ++ xorl %eax,%esi ++ shrdl $7,%edi,%edi ++ addl %edx,%ecx ++ vpslld $2,%xmm5,%xmm5 ++ addl 24(%esp),%ebx ++ xorl %edi,%esi ++ movl %ecx,%ebp ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ vpor %xmm7,%xmm5,%xmm5 ++ addl 28(%esp),%eax ++ vmovdqa 80(%esp),%xmm7 ++ shrdl $7,%ecx,%ecx ++ movl %ebx,%esi ++ xorl %edx,%ebp ++ shldl $5,%ebx,%ebx ++ addl %ebp,%eax ++ xorl %ecx,%esi ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ vpalignr $8,%xmm4,%xmm5,%xmm0 ++ vpxor %xmm2,%xmm6,%xmm6 ++ addl 32(%esp),%edi ++ andl %ecx,%esi ++ xorl %edx,%ecx ++ shrdl $7,%ebx,%ebx ++ vpxor %xmm7,%xmm6,%xmm6 ++ vmovdqa %xmm2,80(%esp) ++ movl %eax,%ebp ++ xorl %ecx,%esi ++ vmovdqa %xmm1,%xmm2 ++ vpaddd %xmm5,%xmm1,%xmm1 ++ shldl $5,%eax,%eax ++ addl %esi,%edi ++ vpxor %xmm0,%xmm6,%xmm6 ++ xorl %ebx,%ebp ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ addl 36(%esp),%edx ++ vpsrld $30,%xmm6,%xmm0 ++ vmovdqa %xmm1,16(%esp) ++ andl %ebx,%ebp ++ xorl %ecx,%ebx ++ shrdl $7,%eax,%eax ++ movl %edi,%esi ++ vpslld $2,%xmm6,%xmm6 ++ xorl %ebx,%ebp ++ shldl $5,%edi,%edi ++ addl %ebp,%edx ++ xorl %eax,%esi ++ xorl %ebx,%eax ++ addl %edi,%edx ++ addl 40(%esp),%ecx ++ andl %eax,%esi ++ vpor %xmm0,%xmm6,%xmm6 ++ xorl %ebx,%eax ++ shrdl $7,%edi,%edi ++ vmovdqa 96(%esp),%xmm0 ++ movl %edx,%ebp ++ xorl %eax,%esi ++ shldl $5,%edx,%edx ++ addl %esi,%ecx ++ xorl %edi,%ebp ++ xorl %eax,%edi ++ addl %edx,%ecx ++ addl 44(%esp),%ebx ++ andl %edi,%ebp ++ xorl %eax,%edi ++ shrdl $7,%edx,%edx ++ movl %ecx,%esi ++ xorl %edi,%ebp ++ shldl $5,%ecx,%ecx ++ addl %ebp,%ebx ++ xorl %edx,%esi ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ vpalignr $8,%xmm5,%xmm6,%xmm1 ++ vpxor %xmm3,%xmm7,%xmm7 ++ addl 48(%esp),%eax ++ andl %edx,%esi ++ xorl %edi,%edx ++ shrdl $7,%ecx,%ecx ++ vpxor %xmm0,%xmm7,%xmm7 ++ vmovdqa %xmm3,96(%esp) ++ movl %ebx,%ebp ++ xorl %edx,%esi ++ vmovdqa 144(%esp),%xmm3 ++ vpaddd %xmm6,%xmm2,%xmm2 ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ vpxor %xmm1,%xmm7,%xmm7 ++ xorl %ecx,%ebp ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ addl 52(%esp),%edi ++ vpsrld $30,%xmm7,%xmm1 ++ vmovdqa %xmm2,32(%esp) ++ andl %ecx,%ebp ++ xorl %edx,%ecx ++ shrdl $7,%ebx,%ebx ++ movl %eax,%esi ++ vpslld $2,%xmm7,%xmm7 ++ xorl %ecx,%ebp ++ shldl $5,%eax,%eax ++ addl %ebp,%edi ++ xorl %ebx,%esi ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ addl 56(%esp),%edx ++ andl %ebx,%esi ++ vpor %xmm1,%xmm7,%xmm7 ++ xorl %ecx,%ebx ++ shrdl $7,%eax,%eax ++ vmovdqa 64(%esp),%xmm1 ++ movl %edi,%ebp ++ xorl %ebx,%esi ++ shldl $5,%edi,%edi ++ addl %esi,%edx ++ xorl %eax,%ebp ++ xorl %ebx,%eax ++ addl %edi,%edx ++ addl 60(%esp),%ecx ++ andl %eax,%ebp ++ xorl %ebx,%eax ++ shrdl $7,%edi,%edi ++ movl %edx,%esi ++ xorl %eax,%ebp ++ shldl $5,%edx,%edx ++ addl %ebp,%ecx ++ xorl %edi,%esi ++ xorl %eax,%edi ++ addl %edx,%ecx ++ vpalignr $8,%xmm6,%xmm7,%xmm2 ++ vpxor %xmm4,%xmm0,%xmm0 ++ addl (%esp),%ebx ++ andl %edi,%esi ++ xorl %eax,%edi ++ shrdl $7,%edx,%edx ++ vpxor %xmm1,%xmm0,%xmm0 ++ vmovdqa %xmm4,64(%esp) ++ movl %ecx,%ebp ++ xorl %edi,%esi ++ vmovdqa %xmm3,%xmm4 ++ vpaddd %xmm7,%xmm3,%xmm3 ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ vpxor %xmm2,%xmm0,%xmm0 ++ xorl %edx,%ebp ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ addl 4(%esp),%eax ++ vpsrld $30,%xmm0,%xmm2 ++ vmovdqa %xmm3,48(%esp) ++ andl %edx,%ebp ++ xorl %edi,%edx ++ shrdl $7,%ecx,%ecx ++ movl %ebx,%esi ++ vpslld $2,%xmm0,%xmm0 ++ xorl %edx,%ebp ++ shldl $5,%ebx,%ebx ++ addl %ebp,%eax ++ xorl %ecx,%esi ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ addl 8(%esp),%edi ++ andl %ecx,%esi ++ vpor %xmm2,%xmm0,%xmm0 ++ xorl %edx,%ecx ++ shrdl $7,%ebx,%ebx ++ vmovdqa 80(%esp),%xmm2 ++ movl %eax,%ebp ++ xorl %ecx,%esi ++ shldl $5,%eax,%eax ++ addl %esi,%edi ++ xorl %ebx,%ebp ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ addl 12(%esp),%edx ++ andl %ebx,%ebp ++ xorl %ecx,%ebx ++ shrdl $7,%eax,%eax ++ movl %edi,%esi ++ xorl %ebx,%ebp ++ shldl $5,%edi,%edi ++ addl %ebp,%edx ++ xorl %eax,%esi ++ xorl %ebx,%eax ++ addl %edi,%edx ++ vpalignr $8,%xmm7,%xmm0,%xmm3 ++ vpxor %xmm5,%xmm1,%xmm1 ++ addl 16(%esp),%ecx ++ andl %eax,%esi ++ xorl %ebx,%eax ++ shrdl $7,%edi,%edi ++ vpxor %xmm2,%xmm1,%xmm1 ++ vmovdqa %xmm5,80(%esp) ++ movl %edx,%ebp ++ xorl %eax,%esi ++ vmovdqa %xmm4,%xmm5 ++ vpaddd %xmm0,%xmm4,%xmm4 ++ shldl $5,%edx,%edx ++ addl %esi,%ecx ++ vpxor %xmm3,%xmm1,%xmm1 ++ xorl %edi,%ebp ++ xorl %eax,%edi ++ addl %edx,%ecx ++ addl 20(%esp),%ebx ++ vpsrld $30,%xmm1,%xmm3 ++ vmovdqa %xmm4,(%esp) ++ andl %edi,%ebp ++ xorl %eax,%edi ++ shrdl $7,%edx,%edx ++ movl %ecx,%esi ++ vpslld $2,%xmm1,%xmm1 ++ xorl %edi,%ebp ++ shldl $5,%ecx,%ecx ++ addl %ebp,%ebx ++ xorl %edx,%esi ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ addl 24(%esp),%eax ++ andl %edx,%esi ++ vpor %xmm3,%xmm1,%xmm1 ++ xorl %edi,%edx ++ shrdl $7,%ecx,%ecx ++ vmovdqa 96(%esp),%xmm3 ++ movl %ebx,%ebp ++ xorl %edx,%esi ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ xorl %ecx,%ebp ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ addl 28(%esp),%edi ++ andl %ecx,%ebp ++ xorl %edx,%ecx ++ shrdl $7,%ebx,%ebx ++ movl %eax,%esi ++ xorl %ecx,%ebp ++ shldl $5,%eax,%eax ++ addl %ebp,%edi ++ xorl %ebx,%esi ++ xorl %ecx,%ebx ++ addl %eax,%edi ++ vpalignr $8,%xmm0,%xmm1,%xmm4 ++ vpxor %xmm6,%xmm2,%xmm2 ++ addl 32(%esp),%edx ++ andl %ebx,%esi ++ xorl %ecx,%ebx ++ shrdl $7,%eax,%eax ++ vpxor %xmm3,%xmm2,%xmm2 ++ vmovdqa %xmm6,96(%esp) ++ movl %edi,%ebp ++ xorl %ebx,%esi ++ vmovdqa %xmm5,%xmm6 ++ vpaddd %xmm1,%xmm5,%xmm5 ++ shldl $5,%edi,%edi ++ addl %esi,%edx ++ vpxor %xmm4,%xmm2,%xmm2 ++ xorl %eax,%ebp ++ xorl %ebx,%eax ++ addl %edi,%edx ++ addl 36(%esp),%ecx ++ vpsrld $30,%xmm2,%xmm4 ++ vmovdqa %xmm5,16(%esp) ++ andl %eax,%ebp ++ xorl %ebx,%eax ++ shrdl $7,%edi,%edi ++ movl %edx,%esi ++ vpslld $2,%xmm2,%xmm2 ++ xorl %eax,%ebp ++ shldl $5,%edx,%edx ++ addl %ebp,%ecx ++ xorl %edi,%esi ++ xorl %eax,%edi ++ addl %edx,%ecx ++ addl 40(%esp),%ebx ++ andl %edi,%esi ++ vpor %xmm4,%xmm2,%xmm2 ++ xorl %eax,%edi ++ shrdl $7,%edx,%edx ++ vmovdqa 64(%esp),%xmm4 ++ movl %ecx,%ebp ++ xorl %edi,%esi ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ xorl %edx,%ebp ++ xorl %edi,%edx ++ addl %ecx,%ebx ++ addl 44(%esp),%eax ++ andl %edx,%ebp ++ xorl %edi,%edx ++ shrdl $7,%ecx,%ecx ++ movl %ebx,%esi ++ xorl %edx,%ebp ++ shldl $5,%ebx,%ebx ++ addl %ebp,%eax ++ xorl %edx,%esi ++ addl %ebx,%eax ++ vpalignr $8,%xmm1,%xmm2,%xmm5 ++ vpxor %xmm7,%xmm3,%xmm3 ++ addl 48(%esp),%edi ++ xorl %ecx,%esi ++ movl %eax,%ebp ++ shldl $5,%eax,%eax ++ vpxor %xmm4,%xmm3,%xmm3 ++ vmovdqa %xmm7,64(%esp) ++ addl %esi,%edi ++ xorl %ecx,%ebp ++ vmovdqa %xmm6,%xmm7 ++ vpaddd %xmm2,%xmm6,%xmm6 ++ shrdl $7,%ebx,%ebx ++ addl %eax,%edi ++ vpxor %xmm5,%xmm3,%xmm3 ++ addl 52(%esp),%edx ++ xorl %ebx,%ebp ++ movl %edi,%esi ++ shldl $5,%edi,%edi ++ vpsrld $30,%xmm3,%xmm5 ++ vmovdqa %xmm6,32(%esp) ++ addl %ebp,%edx ++ xorl %ebx,%esi ++ shrdl $7,%eax,%eax ++ addl %edi,%edx ++ vpslld $2,%xmm3,%xmm3 ++ addl 56(%esp),%ecx ++ xorl %eax,%esi ++ movl %edx,%ebp ++ shldl $5,%edx,%edx ++ addl %esi,%ecx ++ xorl %eax,%ebp ++ shrdl $7,%edi,%edi ++ addl %edx,%ecx ++ vpor %xmm5,%xmm3,%xmm3 ++ addl 60(%esp),%ebx ++ xorl %edi,%ebp ++ movl %ecx,%esi ++ shldl $5,%ecx,%ecx ++ addl %ebp,%ebx ++ xorl %edi,%esi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl (%esp),%eax ++ vpaddd %xmm3,%xmm7,%xmm7 ++ xorl %edx,%esi ++ movl %ebx,%ebp ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ vmovdqa %xmm7,48(%esp) ++ xorl %edx,%ebp ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ addl 4(%esp),%edi ++ xorl %ecx,%ebp ++ movl %eax,%esi ++ shldl $5,%eax,%eax ++ addl %ebp,%edi ++ xorl %ecx,%esi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%edi ++ addl 8(%esp),%edx ++ xorl %ebx,%esi ++ movl %edi,%ebp ++ shldl $5,%edi,%edi ++ addl %esi,%edx ++ xorl %ebx,%ebp ++ shrdl $7,%eax,%eax ++ addl %edi,%edx ++ addl 12(%esp),%ecx ++ xorl %eax,%ebp ++ movl %edx,%esi ++ shldl $5,%edx,%edx ++ addl %ebp,%ecx ++ xorl %eax,%esi ++ shrdl $7,%edi,%edi ++ addl %edx,%ecx ++ movl 196(%esp),%ebp ++ cmpl 200(%esp),%ebp ++ je L008done ++ vmovdqa 160(%esp),%xmm7 ++ vmovdqa 176(%esp),%xmm6 ++ vmovdqu (%ebp),%xmm0 ++ vmovdqu 16(%ebp),%xmm1 ++ vmovdqu 32(%ebp),%xmm2 ++ vmovdqu 48(%ebp),%xmm3 ++ addl $64,%ebp ++ vpshufb %xmm6,%xmm0,%xmm0 ++ movl %ebp,196(%esp) ++ vmovdqa %xmm7,96(%esp) ++ addl 16(%esp),%ebx ++ xorl %edi,%esi ++ vpshufb %xmm6,%xmm1,%xmm1 ++ movl %ecx,%ebp ++ shldl $5,%ecx,%ecx ++ vpaddd %xmm7,%xmm0,%xmm4 ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ vmovdqa %xmm4,(%esp) ++ addl 20(%esp),%eax ++ xorl %edx,%ebp ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ addl %ebp,%eax ++ xorl %edx,%esi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ addl 24(%esp),%edi ++ xorl %ecx,%esi ++ movl %eax,%ebp ++ shldl $5,%eax,%eax ++ addl %esi,%edi ++ xorl %ecx,%ebp ++ shrdl $7,%ebx,%ebx ++ addl %eax,%edi ++ addl 28(%esp),%edx ++ xorl %ebx,%ebp ++ movl %edi,%esi ++ shldl $5,%edi,%edi ++ addl %ebp,%edx ++ xorl %ebx,%esi ++ shrdl $7,%eax,%eax ++ addl %edi,%edx ++ addl 32(%esp),%ecx ++ xorl %eax,%esi ++ vpshufb %xmm6,%xmm2,%xmm2 ++ movl %edx,%ebp ++ shldl $5,%edx,%edx ++ vpaddd %xmm7,%xmm1,%xmm5 ++ addl %esi,%ecx ++ xorl %eax,%ebp ++ shrdl $7,%edi,%edi ++ addl %edx,%ecx ++ vmovdqa %xmm5,16(%esp) ++ addl 36(%esp),%ebx ++ xorl %edi,%ebp ++ movl %ecx,%esi ++ shldl $5,%ecx,%ecx ++ addl %ebp,%ebx ++ xorl %edi,%esi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl 40(%esp),%eax ++ xorl %edx,%esi ++ movl %ebx,%ebp ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ xorl %edx,%ebp ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ addl 44(%esp),%edi ++ xorl %ecx,%ebp ++ movl %eax,%esi ++ shldl $5,%eax,%eax ++ addl %ebp,%edi ++ xorl %ecx,%esi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%edi ++ addl 48(%esp),%edx ++ xorl %ebx,%esi ++ vpshufb %xmm6,%xmm3,%xmm3 ++ movl %edi,%ebp ++ shldl $5,%edi,%edi ++ vpaddd %xmm7,%xmm2,%xmm6 ++ addl %esi,%edx ++ xorl %ebx,%ebp ++ shrdl $7,%eax,%eax ++ addl %edi,%edx ++ vmovdqa %xmm6,32(%esp) ++ addl 52(%esp),%ecx ++ xorl %eax,%ebp ++ movl %edx,%esi ++ shldl $5,%edx,%edx ++ addl %ebp,%ecx ++ xorl %eax,%esi ++ shrdl $7,%edi,%edi ++ addl %edx,%ecx ++ addl 56(%esp),%ebx ++ xorl %edi,%esi ++ movl %ecx,%ebp ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl 60(%esp),%eax ++ xorl %edx,%ebp ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ addl %ebp,%eax ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ movl 192(%esp),%ebp ++ addl (%ebp),%eax ++ addl 4(%ebp),%esi ++ addl 8(%ebp),%ecx ++ movl %eax,(%ebp) ++ addl 12(%ebp),%edx ++ movl %esi,4(%ebp) ++ addl 16(%ebp),%edi ++ movl %ecx,%ebx ++ movl %ecx,8(%ebp) ++ xorl %edx,%ebx ++ movl %edx,12(%ebp) ++ movl %edi,16(%ebp) ++ movl %esi,%ebp ++ andl %ebx,%esi ++ movl %ebp,%ebx ++ jmp L007loop ++.align 4,0x90 ++L008done: ++ addl 16(%esp),%ebx ++ xorl %edi,%esi ++ movl %ecx,%ebp ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl 20(%esp),%eax ++ xorl %edx,%ebp ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ addl %ebp,%eax ++ xorl %edx,%esi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ addl 24(%esp),%edi ++ xorl %ecx,%esi ++ movl %eax,%ebp ++ shldl $5,%eax,%eax ++ addl %esi,%edi ++ xorl %ecx,%ebp ++ shrdl $7,%ebx,%ebx ++ addl %eax,%edi ++ addl 28(%esp),%edx ++ xorl %ebx,%ebp ++ movl %edi,%esi ++ shldl $5,%edi,%edi ++ addl %ebp,%edx ++ xorl %ebx,%esi ++ shrdl $7,%eax,%eax ++ addl %edi,%edx ++ addl 32(%esp),%ecx ++ xorl %eax,%esi ++ movl %edx,%ebp ++ shldl $5,%edx,%edx ++ addl %esi,%ecx ++ xorl %eax,%ebp ++ shrdl $7,%edi,%edi ++ addl %edx,%ecx ++ addl 36(%esp),%ebx ++ xorl %edi,%ebp ++ movl %ecx,%esi ++ shldl $5,%ecx,%ecx ++ addl %ebp,%ebx ++ xorl %edi,%esi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl 40(%esp),%eax ++ xorl %edx,%esi ++ movl %ebx,%ebp ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ xorl %edx,%ebp ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ addl 44(%esp),%edi ++ xorl %ecx,%ebp ++ movl %eax,%esi ++ shldl $5,%eax,%eax ++ addl %ebp,%edi ++ xorl %ecx,%esi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%edi ++ addl 48(%esp),%edx ++ xorl %ebx,%esi ++ movl %edi,%ebp ++ shldl $5,%edi,%edi ++ addl %esi,%edx ++ xorl %ebx,%ebp ++ shrdl $7,%eax,%eax ++ addl %edi,%edx ++ addl 52(%esp),%ecx ++ xorl %eax,%ebp ++ movl %edx,%esi ++ shldl $5,%edx,%edx ++ addl %ebp,%ecx ++ xorl %eax,%esi ++ shrdl $7,%edi,%edi ++ addl %edx,%ecx ++ addl 56(%esp),%ebx ++ xorl %edi,%esi ++ movl %ecx,%ebp ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ xorl %edi,%ebp ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl 60(%esp),%eax ++ xorl %edx,%ebp ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ addl %ebp,%eax ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ vzeroall ++ movl 192(%esp),%ebp ++ addl (%ebp),%eax ++ movl 204(%esp),%esp ++ addl 4(%ebp),%esi ++ addl 8(%ebp),%ecx ++ movl %eax,(%ebp) ++ addl 12(%ebp),%edx ++ movl %esi,4(%ebp) ++ addl 16(%ebp),%edi ++ movl %ecx,8(%ebp) ++ movl %edx,12(%ebp) ++ movl %edi,16(%ebp) ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 6,0x90 ++LK_XX_XX: ++.long 1518500249,1518500249,1518500249,1518500249 ++.long 1859775393,1859775393,1859775393,1859775393 ++.long 2400959708,2400959708,2400959708,2400959708 ++.long 3395469782,3395469782,3395469782,3395469782 ++.long 66051,67438087,134810123,202182159 ++.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 ++.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 ++.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 ++.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 ++.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.section __IMPORT,__pointers,non_lazy_symbol_pointers ++L_OPENSSL_ia32cap_P$non_lazy_ptr: ++.indirect_symbol _OPENSSL_ia32cap_P ++.long 0 ++#endif +diff --git a/apple-x86/crypto/fipsmodule/sha256-586.S b/apple-x86/crypto/fipsmodule/sha256-586.S +new file mode 100644 +index 0000000..c81cb9a +--- /dev/null ++++ b/apple-x86/crypto/fipsmodule/sha256-586.S +@@ -0,0 +1,5568 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.globl _sha256_block_data_order ++.private_extern _sha256_block_data_order ++.align 4 ++_sha256_block_data_order: ++L_sha256_block_data_order_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 20(%esp),%esi ++ movl 24(%esp),%edi ++ movl 28(%esp),%eax ++ movl %esp,%ebx ++ call L000pic_point ++L000pic_point: ++ popl %ebp ++ leal L001K256-L000pic_point(%ebp),%ebp ++ subl $16,%esp ++ andl $-64,%esp ++ shll $6,%eax ++ addl %edi,%eax ++ movl %esi,(%esp) ++ movl %edi,4(%esp) ++ movl %eax,8(%esp) ++ movl %ebx,12(%esp) ++ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K256(%ebp),%edx ++ movl (%edx),%ecx ++ movl 4(%edx),%ebx ++ testl $1048576,%ecx ++ jnz L002loop ++ movl 8(%edx),%edx ++ testl $16777216,%ecx ++ jz L003no_xmm ++ andl $1073741824,%ecx ++ andl $268435968,%ebx ++ orl %ebx,%ecx ++ andl $1342177280,%ecx ++ cmpl $1342177280,%ecx ++ je L004AVX ++ testl $512,%ebx ++ jnz L005SSSE3 ++L003no_xmm: ++ subl %edi,%eax ++ cmpl $256,%eax ++ jae L006unrolled ++ jmp L002loop ++.align 4,0x90 ++L002loop: ++ movl (%edi),%eax ++ movl 4(%edi),%ebx ++ movl 8(%edi),%ecx ++ bswap %eax ++ movl 12(%edi),%edx ++ bswap %ebx ++ pushl %eax ++ bswap %ecx ++ pushl %ebx ++ bswap %edx ++ pushl %ecx ++ pushl %edx ++ movl 16(%edi),%eax ++ movl 20(%edi),%ebx ++ movl 24(%edi),%ecx ++ bswap %eax ++ movl 28(%edi),%edx ++ bswap %ebx ++ pushl %eax ++ bswap %ecx ++ pushl %ebx ++ bswap %edx ++ pushl %ecx ++ pushl %edx ++ movl 32(%edi),%eax ++ movl 36(%edi),%ebx ++ movl 40(%edi),%ecx ++ bswap %eax ++ movl 44(%edi),%edx ++ bswap %ebx ++ pushl %eax ++ bswap %ecx ++ pushl %ebx ++ bswap %edx ++ pushl %ecx ++ pushl %edx ++ movl 48(%edi),%eax ++ movl 52(%edi),%ebx ++ movl 56(%edi),%ecx ++ bswap %eax ++ movl 60(%edi),%edx ++ bswap %ebx ++ pushl %eax ++ bswap %ecx ++ pushl %ebx ++ bswap %edx ++ pushl %ecx ++ pushl %edx ++ addl $64,%edi ++ leal -36(%esp),%esp ++ movl %edi,104(%esp) ++ movl (%esi),%eax ++ movl 4(%esi),%ebx ++ movl 8(%esi),%ecx ++ movl 12(%esi),%edi ++ movl %ebx,8(%esp) ++ xorl %ecx,%ebx ++ movl %ecx,12(%esp) ++ movl %edi,16(%esp) ++ movl %ebx,(%esp) ++ movl 16(%esi),%edx ++ movl 20(%esi),%ebx ++ movl 24(%esi),%ecx ++ movl 28(%esi),%edi ++ movl %ebx,24(%esp) ++ movl %ecx,28(%esp) ++ movl %edi,32(%esp) ++.align 4,0x90 ++L00700_15: ++ movl %edx,%ecx ++ movl 24(%esp),%esi ++ rorl $14,%ecx ++ movl 28(%esp),%edi ++ xorl %edx,%ecx ++ xorl %edi,%esi ++ movl 96(%esp),%ebx ++ rorl $5,%ecx ++ andl %edx,%esi ++ movl %edx,20(%esp) ++ xorl %ecx,%edx ++ addl 32(%esp),%ebx ++ xorl %edi,%esi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %esi,%ebx ++ rorl $9,%ecx ++ addl %edx,%ebx ++ movl 8(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,4(%esp) ++ leal -4(%esp),%esp ++ rorl $11,%ecx ++ movl (%ebp),%esi ++ xorl %eax,%ecx ++ movl 20(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %esi,%ebx ++ movl %eax,(%esp) ++ addl %ebx,%edx ++ andl 4(%esp),%eax ++ addl %ecx,%ebx ++ xorl %edi,%eax ++ addl $4,%ebp ++ addl %ebx,%eax ++ cmpl $3248222580,%esi ++ jne L00700_15 ++ movl 156(%esp),%ecx ++ jmp L00816_63 ++.align 4,0x90 ++L00816_63: ++ movl %ecx,%ebx ++ movl 104(%esp),%esi ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 160(%esp),%ebx ++ shrl $10,%edi ++ addl 124(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 24(%esp),%esi ++ rorl $14,%ecx ++ addl %edi,%ebx ++ movl 28(%esp),%edi ++ xorl %edx,%ecx ++ xorl %edi,%esi ++ movl %ebx,96(%esp) ++ rorl $5,%ecx ++ andl %edx,%esi ++ movl %edx,20(%esp) ++ xorl %ecx,%edx ++ addl 32(%esp),%ebx ++ xorl %edi,%esi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %esi,%ebx ++ rorl $9,%ecx ++ addl %edx,%ebx ++ movl 8(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,4(%esp) ++ leal -4(%esp),%esp ++ rorl $11,%ecx ++ movl (%ebp),%esi ++ xorl %eax,%ecx ++ movl 20(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %esi,%ebx ++ movl %eax,(%esp) ++ addl %ebx,%edx ++ andl 4(%esp),%eax ++ addl %ecx,%ebx ++ xorl %edi,%eax ++ movl 156(%esp),%ecx ++ addl $4,%ebp ++ addl %ebx,%eax ++ cmpl $3329325298,%esi ++ jne L00816_63 ++ movl 356(%esp),%esi ++ movl 8(%esp),%ebx ++ movl 16(%esp),%ecx ++ addl (%esi),%eax ++ addl 4(%esi),%ebx ++ addl 8(%esi),%edi ++ addl 12(%esi),%ecx ++ movl %eax,(%esi) ++ movl %ebx,4(%esi) ++ movl %edi,8(%esi) ++ movl %ecx,12(%esi) ++ movl 24(%esp),%eax ++ movl 28(%esp),%ebx ++ movl 32(%esp),%ecx ++ movl 360(%esp),%edi ++ addl 16(%esi),%edx ++ addl 20(%esi),%eax ++ addl 24(%esi),%ebx ++ addl 28(%esi),%ecx ++ movl %edx,16(%esi) ++ movl %eax,20(%esi) ++ movl %ebx,24(%esi) ++ movl %ecx,28(%esi) ++ leal 356(%esp),%esp ++ subl $256,%ebp ++ cmpl 8(%esp),%edi ++ jb L002loop ++ movl 12(%esp),%esp ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 6,0x90 ++L001K256: ++.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 ++.long 66051,67438087,134810123,202182159 ++.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 ++.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 ++.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 ++.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 ++.byte 62,0 ++.align 4,0x90 ++L006unrolled: ++ leal -96(%esp),%esp ++ movl (%esi),%eax ++ movl 4(%esi),%ebp ++ movl 8(%esi),%ecx ++ movl 12(%esi),%ebx ++ movl %ebp,4(%esp) ++ xorl %ecx,%ebp ++ movl %ecx,8(%esp) ++ movl %ebx,12(%esp) ++ movl 16(%esi),%edx ++ movl 20(%esi),%ebx ++ movl 24(%esi),%ecx ++ movl 28(%esi),%esi ++ movl %ebx,20(%esp) ++ movl %ecx,24(%esp) ++ movl %esi,28(%esp) ++ jmp L009grand_loop ++.align 4,0x90 ++L009grand_loop: ++ movl (%edi),%ebx ++ movl 4(%edi),%ecx ++ bswap %ebx ++ movl 8(%edi),%esi ++ bswap %ecx ++ movl %ebx,32(%esp) ++ bswap %esi ++ movl %ecx,36(%esp) ++ movl %esi,40(%esp) ++ movl 12(%edi),%ebx ++ movl 16(%edi),%ecx ++ bswap %ebx ++ movl 20(%edi),%esi ++ bswap %ecx ++ movl %ebx,44(%esp) ++ bswap %esi ++ movl %ecx,48(%esp) ++ movl %esi,52(%esp) ++ movl 24(%edi),%ebx ++ movl 28(%edi),%ecx ++ bswap %ebx ++ movl 32(%edi),%esi ++ bswap %ecx ++ movl %ebx,56(%esp) ++ bswap %esi ++ movl %ecx,60(%esp) ++ movl %esi,64(%esp) ++ movl 36(%edi),%ebx ++ movl 40(%edi),%ecx ++ bswap %ebx ++ movl 44(%edi),%esi ++ bswap %ecx ++ movl %ebx,68(%esp) ++ bswap %esi ++ movl %ecx,72(%esp) ++ movl %esi,76(%esp) ++ movl 48(%edi),%ebx ++ movl 52(%edi),%ecx ++ bswap %ebx ++ movl 56(%edi),%esi ++ bswap %ecx ++ movl %ebx,80(%esp) ++ bswap %esi ++ movl %ecx,84(%esp) ++ movl %esi,88(%esp) ++ movl 60(%edi),%ebx ++ addl $64,%edi ++ bswap %ebx ++ movl %edi,100(%esp) ++ movl %ebx,92(%esp) ++ movl %edx,%ecx ++ movl 20(%esp),%esi ++ rorl $14,%edx ++ movl 24(%esp),%edi ++ xorl %ecx,%edx ++ movl 32(%esp),%ebx ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ addl 28(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 4(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 1116352408(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 12(%esp),%edx ++ addl %ecx,%ebp ++ movl %edx,%esi ++ movl 16(%esp),%ecx ++ rorl $14,%edx ++ movl 20(%esp),%edi ++ xorl %esi,%edx ++ movl 36(%esp),%ebx ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,12(%esp) ++ xorl %esi,%edx ++ addl 24(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl (%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,28(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 1899447441(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %esi,%eax ++ movl %edx,%ecx ++ movl 12(%esp),%esi ++ rorl $14,%edx ++ movl 16(%esp),%edi ++ xorl %ecx,%edx ++ movl 40(%esp),%ebx ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ addl 20(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 28(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,24(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 3049323471(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 4(%esp),%edx ++ addl %ecx,%ebp ++ movl %edx,%esi ++ movl 8(%esp),%ecx ++ rorl $14,%edx ++ movl 12(%esp),%edi ++ xorl %esi,%edx ++ movl 44(%esp),%ebx ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,4(%esp) ++ xorl %esi,%edx ++ addl 16(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 24(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,20(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 3921009573(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ rorl $2,%esi ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %esi,%eax ++ movl %edx,%ecx ++ movl 4(%esp),%esi ++ rorl $14,%edx ++ movl 8(%esp),%edi ++ xorl %ecx,%edx ++ movl 48(%esp),%ebx ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ addl 12(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 20(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,16(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 961987163(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 28(%esp),%edx ++ addl %ecx,%ebp ++ movl %edx,%esi ++ movl (%esp),%ecx ++ rorl $14,%edx ++ movl 4(%esp),%edi ++ xorl %esi,%edx ++ movl 52(%esp),%ebx ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,28(%esp) ++ xorl %esi,%edx ++ addl 8(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 16(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,12(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 1508970993(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %esi,%eax ++ movl %edx,%ecx ++ movl 28(%esp),%esi ++ rorl $14,%edx ++ movl (%esp),%edi ++ xorl %ecx,%edx ++ movl 56(%esp),%ebx ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ addl 4(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 12(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,8(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 2453635748(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 20(%esp),%edx ++ addl %ecx,%ebp ++ movl %edx,%esi ++ movl 24(%esp),%ecx ++ rorl $14,%edx ++ movl 28(%esp),%edi ++ xorl %esi,%edx ++ movl 60(%esp),%ebx ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,20(%esp) ++ xorl %esi,%edx ++ addl (%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 8(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,4(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 2870763221(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %esi,%eax ++ movl %edx,%ecx ++ movl 20(%esp),%esi ++ rorl $14,%edx ++ movl 24(%esp),%edi ++ xorl %ecx,%edx ++ movl 64(%esp),%ebx ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ addl 28(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 4(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 3624381080(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 12(%esp),%edx ++ addl %ecx,%ebp ++ movl %edx,%esi ++ movl 16(%esp),%ecx ++ rorl $14,%edx ++ movl 20(%esp),%edi ++ xorl %esi,%edx ++ movl 68(%esp),%ebx ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,12(%esp) ++ xorl %esi,%edx ++ addl 24(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl (%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,28(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 310598401(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %esi,%eax ++ movl %edx,%ecx ++ movl 12(%esp),%esi ++ rorl $14,%edx ++ movl 16(%esp),%edi ++ xorl %ecx,%edx ++ movl 72(%esp),%ebx ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ addl 20(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 28(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,24(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 607225278(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 4(%esp),%edx ++ addl %ecx,%ebp ++ movl %edx,%esi ++ movl 8(%esp),%ecx ++ rorl $14,%edx ++ movl 12(%esp),%edi ++ xorl %esi,%edx ++ movl 76(%esp),%ebx ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,4(%esp) ++ xorl %esi,%edx ++ addl 16(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 24(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,20(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 1426881987(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ rorl $2,%esi ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %esi,%eax ++ movl %edx,%ecx ++ movl 4(%esp),%esi ++ rorl $14,%edx ++ movl 8(%esp),%edi ++ xorl %ecx,%edx ++ movl 80(%esp),%ebx ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ addl 12(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 20(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,16(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 1925078388(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 28(%esp),%edx ++ addl %ecx,%ebp ++ movl %edx,%esi ++ movl (%esp),%ecx ++ rorl $14,%edx ++ movl 4(%esp),%edi ++ xorl %esi,%edx ++ movl 84(%esp),%ebx ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,28(%esp) ++ xorl %esi,%edx ++ addl 8(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 16(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,12(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 2162078206(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %esi,%eax ++ movl %edx,%ecx ++ movl 28(%esp),%esi ++ rorl $14,%edx ++ movl (%esp),%edi ++ xorl %ecx,%edx ++ movl 88(%esp),%ebx ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ addl 4(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 12(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,8(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 2614888103(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 20(%esp),%edx ++ addl %ecx,%ebp ++ movl %edx,%esi ++ movl 24(%esp),%ecx ++ rorl $14,%edx ++ movl 28(%esp),%edi ++ xorl %esi,%edx ++ movl 92(%esp),%ebx ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,20(%esp) ++ xorl %esi,%edx ++ addl (%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 8(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,4(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 3248222580(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 36(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %esi,%eax ++ movl 88(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 32(%esp),%ebx ++ shrl $10,%edi ++ addl 68(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 20(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 24(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,32(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ addl 28(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 4(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 3835390401(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 40(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 12(%esp),%edx ++ addl %ecx,%ebp ++ movl 92(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 36(%esp),%ebx ++ shrl $10,%edi ++ addl 72(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 16(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 20(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,36(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,12(%esp) ++ xorl %esi,%edx ++ addl 24(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl (%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,28(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 4022224774(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 44(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %esi,%eax ++ movl 32(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 40(%esp),%ebx ++ shrl $10,%edi ++ addl 76(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 12(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 16(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,40(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ addl 20(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 28(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,24(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 264347078(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 48(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 4(%esp),%edx ++ addl %ecx,%ebp ++ movl 36(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 44(%esp),%ebx ++ shrl $10,%edi ++ addl 80(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 8(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 12(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,44(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,4(%esp) ++ xorl %esi,%edx ++ addl 16(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 24(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,20(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 604807628(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 52(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %esi,%eax ++ movl 40(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 48(%esp),%ebx ++ shrl $10,%edi ++ addl 84(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 4(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 8(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,48(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ addl 12(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 20(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,16(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 770255983(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 56(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 28(%esp),%edx ++ addl %ecx,%ebp ++ movl 44(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 52(%esp),%ebx ++ shrl $10,%edi ++ addl 88(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl (%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 4(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,52(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,28(%esp) ++ xorl %esi,%edx ++ addl 8(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 16(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,12(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 1249150122(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 60(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %esi,%eax ++ movl 48(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 56(%esp),%ebx ++ shrl $10,%edi ++ addl 92(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 28(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl (%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,56(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ addl 4(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 12(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,8(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 1555081692(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 64(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 20(%esp),%edx ++ addl %ecx,%ebp ++ movl 52(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 60(%esp),%ebx ++ shrl $10,%edi ++ addl 32(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 24(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 28(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,60(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,20(%esp) ++ xorl %esi,%edx ++ addl (%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 8(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,4(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 1996064986(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 68(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %esi,%eax ++ movl 56(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 64(%esp),%ebx ++ shrl $10,%edi ++ addl 36(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 20(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 24(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,64(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ addl 28(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 4(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 2554220882(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 72(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 12(%esp),%edx ++ addl %ecx,%ebp ++ movl 60(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 68(%esp),%ebx ++ shrl $10,%edi ++ addl 40(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 16(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 20(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,68(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,12(%esp) ++ xorl %esi,%edx ++ addl 24(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl (%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,28(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 2821834349(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 76(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %esi,%eax ++ movl 64(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 72(%esp),%ebx ++ shrl $10,%edi ++ addl 44(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 12(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 16(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,72(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ addl 20(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 28(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,24(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 2952996808(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 80(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 4(%esp),%edx ++ addl %ecx,%ebp ++ movl 68(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 76(%esp),%ebx ++ shrl $10,%edi ++ addl 48(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 8(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 12(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,76(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,4(%esp) ++ xorl %esi,%edx ++ addl 16(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 24(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,20(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 3210313671(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 84(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %esi,%eax ++ movl 72(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 80(%esp),%ebx ++ shrl $10,%edi ++ addl 52(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 4(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 8(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,80(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ addl 12(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 20(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,16(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 3336571891(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 88(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 28(%esp),%edx ++ addl %ecx,%ebp ++ movl 76(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 84(%esp),%ebx ++ shrl $10,%edi ++ addl 56(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl (%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 4(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,84(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,28(%esp) ++ xorl %esi,%edx ++ addl 8(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 16(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,12(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 3584528711(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 92(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %esi,%eax ++ movl 80(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 88(%esp),%ebx ++ shrl $10,%edi ++ addl 60(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 28(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl (%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,88(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ addl 4(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 12(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,8(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 113926993(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 32(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 20(%esp),%edx ++ addl %ecx,%ebp ++ movl 84(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 92(%esp),%ebx ++ shrl $10,%edi ++ addl 64(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 24(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 28(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,92(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,20(%esp) ++ xorl %esi,%edx ++ addl (%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 8(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,4(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 338241895(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 36(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %esi,%eax ++ movl 88(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 32(%esp),%ebx ++ shrl $10,%edi ++ addl 68(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 20(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 24(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,32(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ addl 28(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 4(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 666307205(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 40(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 12(%esp),%edx ++ addl %ecx,%ebp ++ movl 92(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 36(%esp),%ebx ++ shrl $10,%edi ++ addl 72(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 16(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 20(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,36(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,12(%esp) ++ xorl %esi,%edx ++ addl 24(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl (%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,28(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 773529912(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 44(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %esi,%eax ++ movl 32(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 40(%esp),%ebx ++ shrl $10,%edi ++ addl 76(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 12(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 16(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,40(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ addl 20(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 28(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,24(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 1294757372(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 48(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 4(%esp),%edx ++ addl %ecx,%ebp ++ movl 36(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 44(%esp),%ebx ++ shrl $10,%edi ++ addl 80(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 8(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 12(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,44(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,4(%esp) ++ xorl %esi,%edx ++ addl 16(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 24(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,20(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 1396182291(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 52(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %esi,%eax ++ movl 40(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 48(%esp),%ebx ++ shrl $10,%edi ++ addl 84(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 4(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 8(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,48(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ addl 12(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 20(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,16(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 1695183700(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 56(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 28(%esp),%edx ++ addl %ecx,%ebp ++ movl 44(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 52(%esp),%ebx ++ shrl $10,%edi ++ addl 88(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl (%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 4(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,52(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,28(%esp) ++ xorl %esi,%edx ++ addl 8(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 16(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,12(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 1986661051(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 60(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %esi,%eax ++ movl 48(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 56(%esp),%ebx ++ shrl $10,%edi ++ addl 92(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 28(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl (%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,56(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ addl 4(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 12(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,8(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 2177026350(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 64(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 20(%esp),%edx ++ addl %ecx,%ebp ++ movl 52(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 60(%esp),%ebx ++ shrl $10,%edi ++ addl 32(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 24(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 28(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,60(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,20(%esp) ++ xorl %esi,%edx ++ addl (%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 8(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,4(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 2456956037(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 68(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %esi,%eax ++ movl 56(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 64(%esp),%ebx ++ shrl $10,%edi ++ addl 36(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 20(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 24(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,64(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ addl 28(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 4(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 2730485921(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 72(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 12(%esp),%edx ++ addl %ecx,%ebp ++ movl 60(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 68(%esp),%ebx ++ shrl $10,%edi ++ addl 40(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 16(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 20(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,68(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,12(%esp) ++ xorl %esi,%edx ++ addl 24(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl (%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,28(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 2820302411(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 76(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %esi,%eax ++ movl 64(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 72(%esp),%ebx ++ shrl $10,%edi ++ addl 44(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 12(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 16(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,72(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ addl 20(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 28(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,24(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 3259730800(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 80(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 4(%esp),%edx ++ addl %ecx,%ebp ++ movl 68(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 76(%esp),%ebx ++ shrl $10,%edi ++ addl 48(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 8(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 12(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,76(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,4(%esp) ++ xorl %esi,%edx ++ addl 16(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 24(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,20(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 3345764771(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 84(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %esi,%eax ++ movl 72(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 80(%esp),%ebx ++ shrl $10,%edi ++ addl 52(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 4(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 8(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,80(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ addl 12(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 20(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,16(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 3516065817(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 88(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 28(%esp),%edx ++ addl %ecx,%ebp ++ movl 76(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 84(%esp),%ebx ++ shrl $10,%edi ++ addl 56(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl (%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 4(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,84(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,28(%esp) ++ xorl %esi,%edx ++ addl 8(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 16(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,12(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 3600352804(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 92(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %esi,%eax ++ movl 80(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 88(%esp),%ebx ++ shrl $10,%edi ++ addl 60(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 28(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl (%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,88(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ addl 4(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 12(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,8(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 4094571909(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 32(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 20(%esp),%edx ++ addl %ecx,%ebp ++ movl 84(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 92(%esp),%ebx ++ shrl $10,%edi ++ addl 64(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 24(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 28(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,92(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,20(%esp) ++ xorl %esi,%edx ++ addl (%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 8(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,4(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 275423344(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 36(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %esi,%eax ++ movl 88(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 32(%esp),%ebx ++ shrl $10,%edi ++ addl 68(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 20(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 24(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,32(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ addl 28(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 4(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 430227734(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 40(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 12(%esp),%edx ++ addl %ecx,%ebp ++ movl 92(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 36(%esp),%ebx ++ shrl $10,%edi ++ addl 72(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 16(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 20(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,36(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,12(%esp) ++ xorl %esi,%edx ++ addl 24(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl (%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,28(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 506948616(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 44(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %esi,%eax ++ movl 32(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 40(%esp),%ebx ++ shrl $10,%edi ++ addl 76(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 12(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 16(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,40(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ addl 20(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 28(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,24(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 659060556(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 48(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 4(%esp),%edx ++ addl %ecx,%ebp ++ movl 36(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 44(%esp),%ebx ++ shrl $10,%edi ++ addl 80(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 8(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 12(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,44(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,4(%esp) ++ xorl %esi,%edx ++ addl 16(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 24(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,20(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 883997877(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 52(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %esi,%eax ++ movl 40(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 48(%esp),%ebx ++ shrl $10,%edi ++ addl 84(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 4(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 8(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,48(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ addl 12(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 20(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,16(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 958139571(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 56(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 28(%esp),%edx ++ addl %ecx,%ebp ++ movl 44(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 52(%esp),%ebx ++ shrl $10,%edi ++ addl 88(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl (%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 4(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,52(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,28(%esp) ++ xorl %esi,%edx ++ addl 8(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 16(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,12(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 1322822218(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 60(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %esi,%eax ++ movl 48(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 56(%esp),%ebx ++ shrl $10,%edi ++ addl 92(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 28(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl (%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,56(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ addl 4(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 12(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,8(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 1537002063(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 64(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 20(%esp),%edx ++ addl %ecx,%ebp ++ movl 52(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 60(%esp),%ebx ++ shrl $10,%edi ++ addl 32(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 24(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 28(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,60(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,20(%esp) ++ xorl %esi,%edx ++ addl (%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 8(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,4(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 1747873779(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 68(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %esi,%eax ++ movl 56(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 64(%esp),%ebx ++ shrl $10,%edi ++ addl 36(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 20(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 24(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,64(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ addl 28(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 4(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 1955562222(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 72(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 12(%esp),%edx ++ addl %ecx,%ebp ++ movl 60(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 68(%esp),%ebx ++ shrl $10,%edi ++ addl 40(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 16(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 20(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,68(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,12(%esp) ++ xorl %esi,%edx ++ addl 24(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl (%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,28(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 2024104815(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 76(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %esi,%eax ++ movl 64(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 72(%esp),%ebx ++ shrl $10,%edi ++ addl 44(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 12(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 16(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,72(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ addl 20(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 28(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,24(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 2227730452(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 80(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 4(%esp),%edx ++ addl %ecx,%ebp ++ movl 68(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 76(%esp),%ebx ++ shrl $10,%edi ++ addl 48(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 8(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 12(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,76(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,4(%esp) ++ xorl %esi,%edx ++ addl 16(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 24(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,20(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 2361852424(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 84(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %esi,%eax ++ movl 72(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 80(%esp),%ebx ++ shrl $10,%edi ++ addl 52(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 4(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 8(%esp),%edi ++ xorl %ecx,%edx ++ movl %ebx,80(%esp) ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ addl 12(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 20(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,16(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 2428436474(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 88(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 28(%esp),%edx ++ addl %ecx,%ebp ++ movl 76(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 84(%esp),%ebx ++ shrl $10,%edi ++ addl 56(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl (%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 4(%esp),%edi ++ xorl %esi,%edx ++ movl %ebx,84(%esp) ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,28(%esp) ++ xorl %esi,%edx ++ addl 8(%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 16(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,12(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 2756734187(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ movl 92(%esp),%ecx ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %esi,%eax ++ movl 80(%esp),%esi ++ movl %ecx,%ebx ++ rorl $11,%ecx ++ movl %esi,%edi ++ rorl $2,%esi ++ xorl %ebx,%ecx ++ shrl $3,%ebx ++ rorl $7,%ecx ++ xorl %edi,%esi ++ xorl %ecx,%ebx ++ rorl $17,%esi ++ addl 88(%esp),%ebx ++ shrl $10,%edi ++ addl 60(%esp),%ebx ++ movl %edx,%ecx ++ xorl %esi,%edi ++ movl 28(%esp),%esi ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl (%esp),%edi ++ xorl %ecx,%edx ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ addl 4(%esp),%ebx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%ebx ++ rorl $9,%ecx ++ movl %eax,%esi ++ movl 12(%esp),%edi ++ xorl %eax,%ecx ++ movl %eax,8(%esp) ++ xorl %edi,%eax ++ rorl $11,%ecx ++ andl %eax,%ebp ++ leal 3204031479(%ebx,%edx,1),%edx ++ xorl %esi,%ecx ++ xorl %edi,%ebp ++ movl 32(%esp),%esi ++ rorl $2,%ecx ++ addl %edx,%ebp ++ addl 20(%esp),%edx ++ addl %ecx,%ebp ++ movl 84(%esp),%ecx ++ movl %esi,%ebx ++ rorl $11,%esi ++ movl %ecx,%edi ++ rorl $2,%ecx ++ xorl %ebx,%esi ++ shrl $3,%ebx ++ rorl $7,%esi ++ xorl %edi,%ecx ++ xorl %esi,%ebx ++ rorl $17,%ecx ++ addl 92(%esp),%ebx ++ shrl $10,%edi ++ addl 64(%esp),%ebx ++ movl %edx,%esi ++ xorl %ecx,%edi ++ movl 24(%esp),%ecx ++ rorl $14,%edx ++ addl %edi,%ebx ++ movl 28(%esp),%edi ++ xorl %esi,%edx ++ xorl %edi,%ecx ++ rorl $5,%edx ++ andl %esi,%ecx ++ movl %esi,20(%esp) ++ xorl %esi,%edx ++ addl (%esp),%ebx ++ xorl %ecx,%edi ++ rorl $6,%edx ++ movl %ebp,%esi ++ addl %edi,%ebx ++ rorl $9,%esi ++ movl %ebp,%ecx ++ movl 8(%esp),%edi ++ xorl %ebp,%esi ++ movl %ebp,4(%esp) ++ xorl %edi,%ebp ++ rorl $11,%esi ++ andl %ebp,%eax ++ leal 3329325298(%ebx,%edx,1),%edx ++ xorl %ecx,%esi ++ xorl %edi,%eax ++ rorl $2,%esi ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %esi,%eax ++ movl 96(%esp),%esi ++ xorl %edi,%ebp ++ movl 12(%esp),%ecx ++ addl (%esi),%eax ++ addl 4(%esi),%ebp ++ addl 8(%esi),%edi ++ addl 12(%esi),%ecx ++ movl %eax,(%esi) ++ movl %ebp,4(%esi) ++ movl %edi,8(%esi) ++ movl %ecx,12(%esi) ++ movl %ebp,4(%esp) ++ xorl %edi,%ebp ++ movl %edi,8(%esp) ++ movl %ecx,12(%esp) ++ movl 20(%esp),%edi ++ movl 24(%esp),%ebx ++ movl 28(%esp),%ecx ++ addl 16(%esi),%edx ++ addl 20(%esi),%edi ++ addl 24(%esi),%ebx ++ addl 28(%esi),%ecx ++ movl %edx,16(%esi) ++ movl %edi,20(%esi) ++ movl %ebx,24(%esi) ++ movl %ecx,28(%esi) ++ movl %edi,20(%esp) ++ movl 100(%esp),%edi ++ movl %ebx,24(%esp) ++ movl %ecx,28(%esp) ++ cmpl 104(%esp),%edi ++ jb L009grand_loop ++ movl 108(%esp),%esp ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 5,0x90 ++L005SSSE3: ++ leal -96(%esp),%esp ++ movl (%esi),%eax ++ movl 4(%esi),%ebx ++ movl 8(%esi),%ecx ++ movl 12(%esi),%edi ++ movl %ebx,4(%esp) ++ xorl %ecx,%ebx ++ movl %ecx,8(%esp) ++ movl %edi,12(%esp) ++ movl 16(%esi),%edx ++ movl 20(%esi),%edi ++ movl 24(%esi),%ecx ++ movl 28(%esi),%esi ++ movl %edi,20(%esp) ++ movl 100(%esp),%edi ++ movl %ecx,24(%esp) ++ movl %esi,28(%esp) ++ movdqa 256(%ebp),%xmm7 ++ jmp L010grand_ssse3 ++.align 4,0x90 ++L010grand_ssse3: ++ movdqu (%edi),%xmm0 ++ movdqu 16(%edi),%xmm1 ++ movdqu 32(%edi),%xmm2 ++ movdqu 48(%edi),%xmm3 ++ addl $64,%edi ++.byte 102,15,56,0,199 ++ movl %edi,100(%esp) ++.byte 102,15,56,0,207 ++ movdqa (%ebp),%xmm4 ++.byte 102,15,56,0,215 ++ movdqa 16(%ebp),%xmm5 ++ paddd %xmm0,%xmm4 ++.byte 102,15,56,0,223 ++ movdqa 32(%ebp),%xmm6 ++ paddd %xmm1,%xmm5 ++ movdqa 48(%ebp),%xmm7 ++ movdqa %xmm4,32(%esp) ++ paddd %xmm2,%xmm6 ++ movdqa %xmm5,48(%esp) ++ paddd %xmm3,%xmm7 ++ movdqa %xmm6,64(%esp) ++ movdqa %xmm7,80(%esp) ++ jmp L011ssse3_00_47 ++.align 4,0x90 ++L011ssse3_00_47: ++ addl $64,%ebp ++ movl %edx,%ecx ++ movdqa %xmm1,%xmm4 ++ rorl $14,%edx ++ movl 20(%esp),%esi ++ movdqa %xmm3,%xmm7 ++ xorl %ecx,%edx ++ movl 24(%esp),%edi ++.byte 102,15,58,15,224,4 ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++.byte 102,15,58,15,250,4 ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ movdqa %xmm4,%xmm5 ++ rorl $6,%edx ++ movl %eax,%ecx ++ movdqa %xmm4,%xmm6 ++ addl %edi,%edx ++ movl 4(%esp),%edi ++ psrld $3,%xmm4 ++ movl %eax,%esi ++ rorl $9,%ecx ++ paddd %xmm7,%xmm0 ++ movl %eax,(%esp) ++ xorl %eax,%ecx ++ psrld $7,%xmm6 ++ xorl %edi,%eax ++ addl 28(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ pshufd $250,%xmm3,%xmm7 ++ xorl %esi,%ecx ++ addl 32(%esp),%edx ++ pslld $14,%xmm5 ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ pxor %xmm6,%xmm4 ++ addl %edx,%ebx ++ addl 12(%esp),%edx ++ psrld $11,%xmm6 ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ pxor %xmm5,%xmm4 ++ movl 16(%esp),%esi ++ xorl %ecx,%edx ++ pslld $11,%xmm5 ++ movl 20(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ pxor %xmm6,%xmm4 ++ andl %ecx,%esi ++ movl %ecx,12(%esp) ++ movdqa %xmm7,%xmm6 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ pxor %xmm5,%xmm4 ++ movl %ebx,%ecx ++ addl %edi,%edx ++ psrld $10,%xmm7 ++ movl (%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ paddd %xmm4,%xmm0 ++ movl %ebx,28(%esp) ++ xorl %ebx,%ecx ++ psrlq $17,%xmm6 ++ xorl %edi,%ebx ++ addl 24(%esp),%edx ++ rorl $11,%ecx ++ pxor %xmm6,%xmm7 ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ psrlq $2,%xmm6 ++ addl 36(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ pxor %xmm6,%xmm7 ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ pshufd $128,%xmm7,%xmm7 ++ addl %ecx,%eax ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 12(%esp),%esi ++ xorl %ecx,%edx ++ movl 16(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ psrldq $8,%xmm7 ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ paddd %xmm7,%xmm0 ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 28(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,24(%esp) ++ pshufd $80,%xmm0,%xmm7 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 20(%esp),%edx ++ movdqa %xmm7,%xmm6 ++ rorl $11,%ecx ++ psrld $10,%xmm7 ++ andl %eax,%ebx ++ psrlq $17,%xmm6 ++ xorl %esi,%ecx ++ addl 40(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ pxor %xmm6,%xmm7 ++ addl %edx,%ebx ++ addl 4(%esp),%edx ++ psrlq $2,%xmm6 ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ pxor %xmm6,%xmm7 ++ movl 8(%esp),%esi ++ xorl %ecx,%edx ++ movl 12(%esp),%edi ++ pshufd $8,%xmm7,%xmm7 ++ xorl %edi,%esi ++ rorl $5,%edx ++ movdqa (%ebp),%xmm6 ++ andl %ecx,%esi ++ movl %ecx,4(%esp) ++ pslldq $8,%xmm7 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 24(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ paddd %xmm7,%xmm0 ++ movl %ebx,20(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 16(%esp),%edx ++ paddd %xmm0,%xmm6 ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 44(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %ecx,%eax ++ movdqa %xmm6,32(%esp) ++ movl %edx,%ecx ++ movdqa %xmm2,%xmm4 ++ rorl $14,%edx ++ movl 4(%esp),%esi ++ movdqa %xmm0,%xmm7 ++ xorl %ecx,%edx ++ movl 8(%esp),%edi ++.byte 102,15,58,15,225,4 ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++.byte 102,15,58,15,251,4 ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ movdqa %xmm4,%xmm5 ++ rorl $6,%edx ++ movl %eax,%ecx ++ movdqa %xmm4,%xmm6 ++ addl %edi,%edx ++ movl 20(%esp),%edi ++ psrld $3,%xmm4 ++ movl %eax,%esi ++ rorl $9,%ecx ++ paddd %xmm7,%xmm1 ++ movl %eax,16(%esp) ++ xorl %eax,%ecx ++ psrld $7,%xmm6 ++ xorl %edi,%eax ++ addl 12(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ pshufd $250,%xmm0,%xmm7 ++ xorl %esi,%ecx ++ addl 48(%esp),%edx ++ pslld $14,%xmm5 ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ pxor %xmm6,%xmm4 ++ addl %edx,%ebx ++ addl 28(%esp),%edx ++ psrld $11,%xmm6 ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ pxor %xmm5,%xmm4 ++ movl (%esp),%esi ++ xorl %ecx,%edx ++ pslld $11,%xmm5 ++ movl 4(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ pxor %xmm6,%xmm4 ++ andl %ecx,%esi ++ movl %ecx,28(%esp) ++ movdqa %xmm7,%xmm6 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ pxor %xmm5,%xmm4 ++ movl %ebx,%ecx ++ addl %edi,%edx ++ psrld $10,%xmm7 ++ movl 16(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ paddd %xmm4,%xmm1 ++ movl %ebx,12(%esp) ++ xorl %ebx,%ecx ++ psrlq $17,%xmm6 ++ xorl %edi,%ebx ++ addl 8(%esp),%edx ++ rorl $11,%ecx ++ pxor %xmm6,%xmm7 ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ psrlq $2,%xmm6 ++ addl 52(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ pxor %xmm6,%xmm7 ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ pshufd $128,%xmm7,%xmm7 ++ addl %ecx,%eax ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 28(%esp),%esi ++ xorl %ecx,%edx ++ movl (%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ psrldq $8,%xmm7 ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ paddd %xmm7,%xmm1 ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 12(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,8(%esp) ++ pshufd $80,%xmm1,%xmm7 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 4(%esp),%edx ++ movdqa %xmm7,%xmm6 ++ rorl $11,%ecx ++ psrld $10,%xmm7 ++ andl %eax,%ebx ++ psrlq $17,%xmm6 ++ xorl %esi,%ecx ++ addl 56(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ pxor %xmm6,%xmm7 ++ addl %edx,%ebx ++ addl 20(%esp),%edx ++ psrlq $2,%xmm6 ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ pxor %xmm6,%xmm7 ++ movl 24(%esp),%esi ++ xorl %ecx,%edx ++ movl 28(%esp),%edi ++ pshufd $8,%xmm7,%xmm7 ++ xorl %edi,%esi ++ rorl $5,%edx ++ movdqa 16(%ebp),%xmm6 ++ andl %ecx,%esi ++ movl %ecx,20(%esp) ++ pslldq $8,%xmm7 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 8(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ paddd %xmm7,%xmm1 ++ movl %ebx,4(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl (%esp),%edx ++ paddd %xmm1,%xmm6 ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 60(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %ecx,%eax ++ movdqa %xmm6,48(%esp) ++ movl %edx,%ecx ++ movdqa %xmm3,%xmm4 ++ rorl $14,%edx ++ movl 20(%esp),%esi ++ movdqa %xmm1,%xmm7 ++ xorl %ecx,%edx ++ movl 24(%esp),%edi ++.byte 102,15,58,15,226,4 ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++.byte 102,15,58,15,248,4 ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ movdqa %xmm4,%xmm5 ++ rorl $6,%edx ++ movl %eax,%ecx ++ movdqa %xmm4,%xmm6 ++ addl %edi,%edx ++ movl 4(%esp),%edi ++ psrld $3,%xmm4 ++ movl %eax,%esi ++ rorl $9,%ecx ++ paddd %xmm7,%xmm2 ++ movl %eax,(%esp) ++ xorl %eax,%ecx ++ psrld $7,%xmm6 ++ xorl %edi,%eax ++ addl 28(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ pshufd $250,%xmm1,%xmm7 ++ xorl %esi,%ecx ++ addl 64(%esp),%edx ++ pslld $14,%xmm5 ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ pxor %xmm6,%xmm4 ++ addl %edx,%ebx ++ addl 12(%esp),%edx ++ psrld $11,%xmm6 ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ pxor %xmm5,%xmm4 ++ movl 16(%esp),%esi ++ xorl %ecx,%edx ++ pslld $11,%xmm5 ++ movl 20(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ pxor %xmm6,%xmm4 ++ andl %ecx,%esi ++ movl %ecx,12(%esp) ++ movdqa %xmm7,%xmm6 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ pxor %xmm5,%xmm4 ++ movl %ebx,%ecx ++ addl %edi,%edx ++ psrld $10,%xmm7 ++ movl (%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ paddd %xmm4,%xmm2 ++ movl %ebx,28(%esp) ++ xorl %ebx,%ecx ++ psrlq $17,%xmm6 ++ xorl %edi,%ebx ++ addl 24(%esp),%edx ++ rorl $11,%ecx ++ pxor %xmm6,%xmm7 ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ psrlq $2,%xmm6 ++ addl 68(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ pxor %xmm6,%xmm7 ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ pshufd $128,%xmm7,%xmm7 ++ addl %ecx,%eax ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 12(%esp),%esi ++ xorl %ecx,%edx ++ movl 16(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ psrldq $8,%xmm7 ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ paddd %xmm7,%xmm2 ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 28(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,24(%esp) ++ pshufd $80,%xmm2,%xmm7 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 20(%esp),%edx ++ movdqa %xmm7,%xmm6 ++ rorl $11,%ecx ++ psrld $10,%xmm7 ++ andl %eax,%ebx ++ psrlq $17,%xmm6 ++ xorl %esi,%ecx ++ addl 72(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ pxor %xmm6,%xmm7 ++ addl %edx,%ebx ++ addl 4(%esp),%edx ++ psrlq $2,%xmm6 ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ pxor %xmm6,%xmm7 ++ movl 8(%esp),%esi ++ xorl %ecx,%edx ++ movl 12(%esp),%edi ++ pshufd $8,%xmm7,%xmm7 ++ xorl %edi,%esi ++ rorl $5,%edx ++ movdqa 32(%ebp),%xmm6 ++ andl %ecx,%esi ++ movl %ecx,4(%esp) ++ pslldq $8,%xmm7 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 24(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ paddd %xmm7,%xmm2 ++ movl %ebx,20(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 16(%esp),%edx ++ paddd %xmm2,%xmm6 ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 76(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %ecx,%eax ++ movdqa %xmm6,64(%esp) ++ movl %edx,%ecx ++ movdqa %xmm0,%xmm4 ++ rorl $14,%edx ++ movl 4(%esp),%esi ++ movdqa %xmm2,%xmm7 ++ xorl %ecx,%edx ++ movl 8(%esp),%edi ++.byte 102,15,58,15,227,4 ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++.byte 102,15,58,15,249,4 ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ movdqa %xmm4,%xmm5 ++ rorl $6,%edx ++ movl %eax,%ecx ++ movdqa %xmm4,%xmm6 ++ addl %edi,%edx ++ movl 20(%esp),%edi ++ psrld $3,%xmm4 ++ movl %eax,%esi ++ rorl $9,%ecx ++ paddd %xmm7,%xmm3 ++ movl %eax,16(%esp) ++ xorl %eax,%ecx ++ psrld $7,%xmm6 ++ xorl %edi,%eax ++ addl 12(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ pshufd $250,%xmm2,%xmm7 ++ xorl %esi,%ecx ++ addl 80(%esp),%edx ++ pslld $14,%xmm5 ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ pxor %xmm6,%xmm4 ++ addl %edx,%ebx ++ addl 28(%esp),%edx ++ psrld $11,%xmm6 ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ pxor %xmm5,%xmm4 ++ movl (%esp),%esi ++ xorl %ecx,%edx ++ pslld $11,%xmm5 ++ movl 4(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ pxor %xmm6,%xmm4 ++ andl %ecx,%esi ++ movl %ecx,28(%esp) ++ movdqa %xmm7,%xmm6 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ pxor %xmm5,%xmm4 ++ movl %ebx,%ecx ++ addl %edi,%edx ++ psrld $10,%xmm7 ++ movl 16(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ paddd %xmm4,%xmm3 ++ movl %ebx,12(%esp) ++ xorl %ebx,%ecx ++ psrlq $17,%xmm6 ++ xorl %edi,%ebx ++ addl 8(%esp),%edx ++ rorl $11,%ecx ++ pxor %xmm6,%xmm7 ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ psrlq $2,%xmm6 ++ addl 84(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ pxor %xmm6,%xmm7 ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ pshufd $128,%xmm7,%xmm7 ++ addl %ecx,%eax ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 28(%esp),%esi ++ xorl %ecx,%edx ++ movl (%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ psrldq $8,%xmm7 ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ paddd %xmm7,%xmm3 ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 12(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,8(%esp) ++ pshufd $80,%xmm3,%xmm7 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 4(%esp),%edx ++ movdqa %xmm7,%xmm6 ++ rorl $11,%ecx ++ psrld $10,%xmm7 ++ andl %eax,%ebx ++ psrlq $17,%xmm6 ++ xorl %esi,%ecx ++ addl 88(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ pxor %xmm6,%xmm7 ++ addl %edx,%ebx ++ addl 20(%esp),%edx ++ psrlq $2,%xmm6 ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ pxor %xmm6,%xmm7 ++ movl 24(%esp),%esi ++ xorl %ecx,%edx ++ movl 28(%esp),%edi ++ pshufd $8,%xmm7,%xmm7 ++ xorl %edi,%esi ++ rorl $5,%edx ++ movdqa 48(%ebp),%xmm6 ++ andl %ecx,%esi ++ movl %ecx,20(%esp) ++ pslldq $8,%xmm7 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 8(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ paddd %xmm7,%xmm3 ++ movl %ebx,4(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl (%esp),%edx ++ paddd %xmm3,%xmm6 ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 92(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %ecx,%eax ++ movdqa %xmm6,80(%esp) ++ cmpl $66051,64(%ebp) ++ jne L011ssse3_00_47 ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 20(%esp),%esi ++ xorl %ecx,%edx ++ movl 24(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 4(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 28(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 32(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ addl %edx,%ebx ++ addl 12(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 16(%esp),%esi ++ xorl %ecx,%edx ++ movl 20(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,12(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl (%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ movl %ebx,28(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 24(%esp),%edx ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 36(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 12(%esp),%esi ++ xorl %ecx,%edx ++ movl 16(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 28(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,24(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 20(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 40(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ addl %edx,%ebx ++ addl 4(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 8(%esp),%esi ++ xorl %ecx,%edx ++ movl 12(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,4(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 24(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ movl %ebx,20(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 16(%esp),%edx ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 44(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 4(%esp),%esi ++ xorl %ecx,%edx ++ movl 8(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 20(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,16(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 12(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 48(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ addl %edx,%ebx ++ addl 28(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl (%esp),%esi ++ xorl %ecx,%edx ++ movl 4(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,28(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 16(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ movl %ebx,12(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 8(%esp),%edx ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 52(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 28(%esp),%esi ++ xorl %ecx,%edx ++ movl (%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 12(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,8(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 4(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 56(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ addl %edx,%ebx ++ addl 20(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 24(%esp),%esi ++ xorl %ecx,%edx ++ movl 28(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,20(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 8(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ movl %ebx,4(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl (%esp),%edx ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 60(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 20(%esp),%esi ++ xorl %ecx,%edx ++ movl 24(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 4(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 28(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 64(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ addl %edx,%ebx ++ addl 12(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 16(%esp),%esi ++ xorl %ecx,%edx ++ movl 20(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,12(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl (%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ movl %ebx,28(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 24(%esp),%edx ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 68(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 12(%esp),%esi ++ xorl %ecx,%edx ++ movl 16(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 28(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,24(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 20(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 72(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ addl %edx,%ebx ++ addl 4(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 8(%esp),%esi ++ xorl %ecx,%edx ++ movl 12(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,4(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 24(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ movl %ebx,20(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 16(%esp),%edx ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 76(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 4(%esp),%esi ++ xorl %ecx,%edx ++ movl 8(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 20(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,16(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 12(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 80(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ addl %edx,%ebx ++ addl 28(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl (%esp),%esi ++ xorl %ecx,%edx ++ movl 4(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,28(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 16(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ movl %ebx,12(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 8(%esp),%edx ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 84(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 28(%esp),%esi ++ xorl %ecx,%edx ++ movl (%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 12(%esp),%edi ++ movl %eax,%esi ++ rorl $9,%ecx ++ movl %eax,8(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 4(%esp),%edx ++ rorl $11,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 88(%esp),%edx ++ xorl %edi,%ebx ++ rorl $2,%ecx ++ addl %edx,%ebx ++ addl 20(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ rorl $14,%edx ++ movl 24(%esp),%esi ++ xorl %ecx,%edx ++ movl 28(%esp),%edi ++ xorl %edi,%esi ++ rorl $5,%edx ++ andl %ecx,%esi ++ movl %ecx,20(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ rorl $6,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 8(%esp),%edi ++ movl %ebx,%esi ++ rorl $9,%ecx ++ movl %ebx,4(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl (%esp),%edx ++ rorl $11,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 92(%esp),%edx ++ xorl %edi,%eax ++ rorl $2,%ecx ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %ecx,%eax ++ movl 96(%esp),%esi ++ xorl %edi,%ebx ++ movl 12(%esp),%ecx ++ addl (%esi),%eax ++ addl 4(%esi),%ebx ++ addl 8(%esi),%edi ++ addl 12(%esi),%ecx ++ movl %eax,(%esi) ++ movl %ebx,4(%esi) ++ movl %edi,8(%esi) ++ movl %ecx,12(%esi) ++ movl %ebx,4(%esp) ++ xorl %edi,%ebx ++ movl %edi,8(%esp) ++ movl %ecx,12(%esp) ++ movl 20(%esp),%edi ++ movl 24(%esp),%ecx ++ addl 16(%esi),%edx ++ addl 20(%esi),%edi ++ addl 24(%esi),%ecx ++ movl %edx,16(%esi) ++ movl %edi,20(%esi) ++ movl %edi,20(%esp) ++ movl 28(%esp),%edi ++ movl %ecx,24(%esi) ++ addl 28(%esi),%edi ++ movl %ecx,24(%esp) ++ movl %edi,28(%esi) ++ movl %edi,28(%esp) ++ movl 100(%esp),%edi ++ movdqa 64(%ebp),%xmm7 ++ subl $192,%ebp ++ cmpl 104(%esp),%edi ++ jb L010grand_ssse3 ++ movl 108(%esp),%esp ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 5,0x90 ++L004AVX: ++ leal -96(%esp),%esp ++ vzeroall ++ movl (%esi),%eax ++ movl 4(%esi),%ebx ++ movl 8(%esi),%ecx ++ movl 12(%esi),%edi ++ movl %ebx,4(%esp) ++ xorl %ecx,%ebx ++ movl %ecx,8(%esp) ++ movl %edi,12(%esp) ++ movl 16(%esi),%edx ++ movl 20(%esi),%edi ++ movl 24(%esi),%ecx ++ movl 28(%esi),%esi ++ movl %edi,20(%esp) ++ movl 100(%esp),%edi ++ movl %ecx,24(%esp) ++ movl %esi,28(%esp) ++ vmovdqa 256(%ebp),%xmm7 ++ jmp L012grand_avx ++.align 5,0x90 ++L012grand_avx: ++ vmovdqu (%edi),%xmm0 ++ vmovdqu 16(%edi),%xmm1 ++ vmovdqu 32(%edi),%xmm2 ++ vmovdqu 48(%edi),%xmm3 ++ addl $64,%edi ++ vpshufb %xmm7,%xmm0,%xmm0 ++ movl %edi,100(%esp) ++ vpshufb %xmm7,%xmm1,%xmm1 ++ vpshufb %xmm7,%xmm2,%xmm2 ++ vpaddd (%ebp),%xmm0,%xmm4 ++ vpshufb %xmm7,%xmm3,%xmm3 ++ vpaddd 16(%ebp),%xmm1,%xmm5 ++ vpaddd 32(%ebp),%xmm2,%xmm6 ++ vpaddd 48(%ebp),%xmm3,%xmm7 ++ vmovdqa %xmm4,32(%esp) ++ vmovdqa %xmm5,48(%esp) ++ vmovdqa %xmm6,64(%esp) ++ vmovdqa %xmm7,80(%esp) ++ jmp L013avx_00_47 ++.align 4,0x90 ++L013avx_00_47: ++ addl $64,%ebp ++ vpalignr $4,%xmm0,%xmm1,%xmm4 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 20(%esp),%esi ++ vpalignr $4,%xmm2,%xmm3,%xmm7 ++ xorl %ecx,%edx ++ movl 24(%esp),%edi ++ xorl %edi,%esi ++ vpsrld $7,%xmm4,%xmm6 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ vpaddd %xmm7,%xmm0,%xmm0 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrld $3,%xmm4,%xmm7 ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 4(%esp),%edi ++ vpslld $14,%xmm4,%xmm5 ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,(%esp) ++ vpxor %xmm6,%xmm7,%xmm4 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 28(%esp),%edx ++ vpshufd $250,%xmm3,%xmm7 ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ vpsrld $11,%xmm6,%xmm6 ++ addl 32(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ vpxor %xmm5,%xmm4,%xmm4 ++ addl %edx,%ebx ++ addl 12(%esp),%edx ++ addl %ecx,%ebx ++ vpslld $11,%xmm5,%xmm5 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 16(%esp),%esi ++ vpxor %xmm6,%xmm4,%xmm4 ++ xorl %ecx,%edx ++ movl 20(%esp),%edi ++ xorl %edi,%esi ++ vpsrld $10,%xmm7,%xmm6 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,12(%esp) ++ vpxor %xmm5,%xmm4,%xmm4 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrlq $17,%xmm7,%xmm5 ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl (%esp),%edi ++ vpaddd %xmm4,%xmm0,%xmm0 ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,28(%esp) ++ vpxor %xmm5,%xmm6,%xmm6 ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 24(%esp),%edx ++ vpsrlq $19,%xmm7,%xmm7 ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ vpxor %xmm7,%xmm6,%xmm6 ++ addl 36(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ vpshufd $132,%xmm6,%xmm7 ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %ecx,%eax ++ vpsrldq $8,%xmm7,%xmm7 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 12(%esp),%esi ++ vpaddd %xmm7,%xmm0,%xmm0 ++ xorl %ecx,%edx ++ movl 16(%esp),%edi ++ xorl %edi,%esi ++ vpshufd $80,%xmm0,%xmm7 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ vpsrld $10,%xmm7,%xmm6 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrlq $17,%xmm7,%xmm5 ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 28(%esp),%edi ++ vpxor %xmm5,%xmm6,%xmm6 ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,24(%esp) ++ vpsrlq $19,%xmm7,%xmm7 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 20(%esp),%edx ++ vpxor %xmm7,%xmm6,%xmm6 ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ vpshufd $232,%xmm6,%xmm7 ++ addl 40(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ vpslldq $8,%xmm7,%xmm7 ++ addl %edx,%ebx ++ addl 4(%esp),%edx ++ addl %ecx,%ebx ++ vpaddd %xmm7,%xmm0,%xmm0 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 8(%esp),%esi ++ vpaddd (%ebp),%xmm0,%xmm6 ++ xorl %ecx,%edx ++ movl 12(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,4(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 24(%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,20(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 16(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 44(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %ecx,%eax ++ vmovdqa %xmm6,32(%esp) ++ vpalignr $4,%xmm1,%xmm2,%xmm4 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 4(%esp),%esi ++ vpalignr $4,%xmm3,%xmm0,%xmm7 ++ xorl %ecx,%edx ++ movl 8(%esp),%edi ++ xorl %edi,%esi ++ vpsrld $7,%xmm4,%xmm6 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ vpaddd %xmm7,%xmm1,%xmm1 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrld $3,%xmm4,%xmm7 ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 20(%esp),%edi ++ vpslld $14,%xmm4,%xmm5 ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,16(%esp) ++ vpxor %xmm6,%xmm7,%xmm4 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 12(%esp),%edx ++ vpshufd $250,%xmm0,%xmm7 ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ vpsrld $11,%xmm6,%xmm6 ++ addl 48(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ vpxor %xmm5,%xmm4,%xmm4 ++ addl %edx,%ebx ++ addl 28(%esp),%edx ++ addl %ecx,%ebx ++ vpslld $11,%xmm5,%xmm5 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl (%esp),%esi ++ vpxor %xmm6,%xmm4,%xmm4 ++ xorl %ecx,%edx ++ movl 4(%esp),%edi ++ xorl %edi,%esi ++ vpsrld $10,%xmm7,%xmm6 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,28(%esp) ++ vpxor %xmm5,%xmm4,%xmm4 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrlq $17,%xmm7,%xmm5 ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 16(%esp),%edi ++ vpaddd %xmm4,%xmm1,%xmm1 ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,12(%esp) ++ vpxor %xmm5,%xmm6,%xmm6 ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 8(%esp),%edx ++ vpsrlq $19,%xmm7,%xmm7 ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ vpxor %xmm7,%xmm6,%xmm6 ++ addl 52(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ vpshufd $132,%xmm6,%xmm7 ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %ecx,%eax ++ vpsrldq $8,%xmm7,%xmm7 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 28(%esp),%esi ++ vpaddd %xmm7,%xmm1,%xmm1 ++ xorl %ecx,%edx ++ movl (%esp),%edi ++ xorl %edi,%esi ++ vpshufd $80,%xmm1,%xmm7 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ vpsrld $10,%xmm7,%xmm6 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrlq $17,%xmm7,%xmm5 ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 12(%esp),%edi ++ vpxor %xmm5,%xmm6,%xmm6 ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,8(%esp) ++ vpsrlq $19,%xmm7,%xmm7 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 4(%esp),%edx ++ vpxor %xmm7,%xmm6,%xmm6 ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ vpshufd $232,%xmm6,%xmm7 ++ addl 56(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ vpslldq $8,%xmm7,%xmm7 ++ addl %edx,%ebx ++ addl 20(%esp),%edx ++ addl %ecx,%ebx ++ vpaddd %xmm7,%xmm1,%xmm1 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 24(%esp),%esi ++ vpaddd 16(%ebp),%xmm1,%xmm6 ++ xorl %ecx,%edx ++ movl 28(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,20(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 8(%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,4(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl (%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 60(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %ecx,%eax ++ vmovdqa %xmm6,48(%esp) ++ vpalignr $4,%xmm2,%xmm3,%xmm4 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 20(%esp),%esi ++ vpalignr $4,%xmm0,%xmm1,%xmm7 ++ xorl %ecx,%edx ++ movl 24(%esp),%edi ++ xorl %edi,%esi ++ vpsrld $7,%xmm4,%xmm6 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ vpaddd %xmm7,%xmm2,%xmm2 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrld $3,%xmm4,%xmm7 ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 4(%esp),%edi ++ vpslld $14,%xmm4,%xmm5 ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,(%esp) ++ vpxor %xmm6,%xmm7,%xmm4 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 28(%esp),%edx ++ vpshufd $250,%xmm1,%xmm7 ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ vpsrld $11,%xmm6,%xmm6 ++ addl 64(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ vpxor %xmm5,%xmm4,%xmm4 ++ addl %edx,%ebx ++ addl 12(%esp),%edx ++ addl %ecx,%ebx ++ vpslld $11,%xmm5,%xmm5 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 16(%esp),%esi ++ vpxor %xmm6,%xmm4,%xmm4 ++ xorl %ecx,%edx ++ movl 20(%esp),%edi ++ xorl %edi,%esi ++ vpsrld $10,%xmm7,%xmm6 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,12(%esp) ++ vpxor %xmm5,%xmm4,%xmm4 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrlq $17,%xmm7,%xmm5 ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl (%esp),%edi ++ vpaddd %xmm4,%xmm2,%xmm2 ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,28(%esp) ++ vpxor %xmm5,%xmm6,%xmm6 ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 24(%esp),%edx ++ vpsrlq $19,%xmm7,%xmm7 ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ vpxor %xmm7,%xmm6,%xmm6 ++ addl 68(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ vpshufd $132,%xmm6,%xmm7 ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %ecx,%eax ++ vpsrldq $8,%xmm7,%xmm7 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 12(%esp),%esi ++ vpaddd %xmm7,%xmm2,%xmm2 ++ xorl %ecx,%edx ++ movl 16(%esp),%edi ++ xorl %edi,%esi ++ vpshufd $80,%xmm2,%xmm7 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ vpsrld $10,%xmm7,%xmm6 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrlq $17,%xmm7,%xmm5 ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 28(%esp),%edi ++ vpxor %xmm5,%xmm6,%xmm6 ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,24(%esp) ++ vpsrlq $19,%xmm7,%xmm7 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 20(%esp),%edx ++ vpxor %xmm7,%xmm6,%xmm6 ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ vpshufd $232,%xmm6,%xmm7 ++ addl 72(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ vpslldq $8,%xmm7,%xmm7 ++ addl %edx,%ebx ++ addl 4(%esp),%edx ++ addl %ecx,%ebx ++ vpaddd %xmm7,%xmm2,%xmm2 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 8(%esp),%esi ++ vpaddd 32(%ebp),%xmm2,%xmm6 ++ xorl %ecx,%edx ++ movl 12(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,4(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 24(%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,20(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 16(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 76(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %ecx,%eax ++ vmovdqa %xmm6,64(%esp) ++ vpalignr $4,%xmm3,%xmm0,%xmm4 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 4(%esp),%esi ++ vpalignr $4,%xmm1,%xmm2,%xmm7 ++ xorl %ecx,%edx ++ movl 8(%esp),%edi ++ xorl %edi,%esi ++ vpsrld $7,%xmm4,%xmm6 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ vpaddd %xmm7,%xmm3,%xmm3 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrld $3,%xmm4,%xmm7 ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 20(%esp),%edi ++ vpslld $14,%xmm4,%xmm5 ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,16(%esp) ++ vpxor %xmm6,%xmm7,%xmm4 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 12(%esp),%edx ++ vpshufd $250,%xmm2,%xmm7 ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ vpsrld $11,%xmm6,%xmm6 ++ addl 80(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ vpxor %xmm5,%xmm4,%xmm4 ++ addl %edx,%ebx ++ addl 28(%esp),%edx ++ addl %ecx,%ebx ++ vpslld $11,%xmm5,%xmm5 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl (%esp),%esi ++ vpxor %xmm6,%xmm4,%xmm4 ++ xorl %ecx,%edx ++ movl 4(%esp),%edi ++ xorl %edi,%esi ++ vpsrld $10,%xmm7,%xmm6 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,28(%esp) ++ vpxor %xmm5,%xmm4,%xmm4 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrlq $17,%xmm7,%xmm5 ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 16(%esp),%edi ++ vpaddd %xmm4,%xmm3,%xmm3 ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,12(%esp) ++ vpxor %xmm5,%xmm6,%xmm6 ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 8(%esp),%edx ++ vpsrlq $19,%xmm7,%xmm7 ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ vpxor %xmm7,%xmm6,%xmm6 ++ addl 84(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ vpshufd $132,%xmm6,%xmm7 ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %ecx,%eax ++ vpsrldq $8,%xmm7,%xmm7 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 28(%esp),%esi ++ vpaddd %xmm7,%xmm3,%xmm3 ++ xorl %ecx,%edx ++ movl (%esp),%edi ++ xorl %edi,%esi ++ vpshufd $80,%xmm3,%xmm7 ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ vpsrld $10,%xmm7,%xmm6 ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ vpsrlq $17,%xmm7,%xmm5 ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 12(%esp),%edi ++ vpxor %xmm5,%xmm6,%xmm6 ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,8(%esp) ++ vpsrlq $19,%xmm7,%xmm7 ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 4(%esp),%edx ++ vpxor %xmm7,%xmm6,%xmm6 ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ vpshufd $232,%xmm6,%xmm7 ++ addl 88(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ vpslldq $8,%xmm7,%xmm7 ++ addl %edx,%ebx ++ addl 20(%esp),%edx ++ addl %ecx,%ebx ++ vpaddd %xmm7,%xmm3,%xmm3 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 24(%esp),%esi ++ vpaddd 48(%ebp),%xmm3,%xmm6 ++ xorl %ecx,%edx ++ movl 28(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,20(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 8(%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,4(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl (%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 92(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %ecx,%eax ++ vmovdqa %xmm6,80(%esp) ++ cmpl $66051,64(%ebp) ++ jne L013avx_00_47 ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 20(%esp),%esi ++ xorl %ecx,%edx ++ movl 24(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 4(%esp),%edi ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 28(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 32(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ addl %edx,%ebx ++ addl 12(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 16(%esp),%esi ++ xorl %ecx,%edx ++ movl 20(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,12(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl (%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,28(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 24(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 36(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 12(%esp),%esi ++ xorl %ecx,%edx ++ movl 16(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 28(%esp),%edi ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,24(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 20(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 40(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ addl %edx,%ebx ++ addl 4(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 8(%esp),%esi ++ xorl %ecx,%edx ++ movl 12(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,4(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 24(%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,20(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 16(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 44(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 4(%esp),%esi ++ xorl %ecx,%edx ++ movl 8(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 20(%esp),%edi ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,16(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 12(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 48(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ addl %edx,%ebx ++ addl 28(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl (%esp),%esi ++ xorl %ecx,%edx ++ movl 4(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,28(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 16(%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,12(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 8(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 52(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 28(%esp),%esi ++ xorl %ecx,%edx ++ movl (%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 12(%esp),%edi ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,8(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 4(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 56(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ addl %edx,%ebx ++ addl 20(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 24(%esp),%esi ++ xorl %ecx,%edx ++ movl 28(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,20(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 8(%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,4(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl (%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 60(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 20(%esp),%esi ++ xorl %ecx,%edx ++ movl 24(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,16(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 4(%esp),%edi ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 28(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 64(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ addl %edx,%ebx ++ addl 12(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 16(%esp),%esi ++ xorl %ecx,%edx ++ movl 20(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,12(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl (%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,28(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 24(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 68(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl 8(%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 12(%esp),%esi ++ xorl %ecx,%edx ++ movl 16(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,8(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 28(%esp),%edi ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,24(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 20(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 72(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ addl %edx,%ebx ++ addl 4(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 8(%esp),%esi ++ xorl %ecx,%edx ++ movl 12(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,4(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 24(%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,20(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 16(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 76(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl (%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 4(%esp),%esi ++ xorl %ecx,%edx ++ movl 8(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 20(%esp),%edi ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,16(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 12(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 80(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ addl %edx,%ebx ++ addl 28(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl (%esp),%esi ++ xorl %ecx,%edx ++ movl 4(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,28(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 16(%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,12(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl 8(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 84(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl 24(%esp),%edx ++ addl %ecx,%eax ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 28(%esp),%esi ++ xorl %ecx,%edx ++ movl (%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,24(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %eax,%ecx ++ addl %edi,%edx ++ movl 12(%esp),%edi ++ movl %eax,%esi ++ shrdl $9,%ecx,%ecx ++ movl %eax,8(%esp) ++ xorl %eax,%ecx ++ xorl %edi,%eax ++ addl 4(%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %eax,%ebx ++ xorl %esi,%ecx ++ addl 88(%esp),%edx ++ xorl %edi,%ebx ++ shrdl $2,%ecx,%ecx ++ addl %edx,%ebx ++ addl 20(%esp),%edx ++ addl %ecx,%ebx ++ movl %edx,%ecx ++ shrdl $14,%edx,%edx ++ movl 24(%esp),%esi ++ xorl %ecx,%edx ++ movl 28(%esp),%edi ++ xorl %edi,%esi ++ shrdl $5,%edx,%edx ++ andl %ecx,%esi ++ movl %ecx,20(%esp) ++ xorl %ecx,%edx ++ xorl %esi,%edi ++ shrdl $6,%edx,%edx ++ movl %ebx,%ecx ++ addl %edi,%edx ++ movl 8(%esp),%edi ++ movl %ebx,%esi ++ shrdl $9,%ecx,%ecx ++ movl %ebx,4(%esp) ++ xorl %ebx,%ecx ++ xorl %edi,%ebx ++ addl (%esp),%edx ++ shrdl $11,%ecx,%ecx ++ andl %ebx,%eax ++ xorl %esi,%ecx ++ addl 92(%esp),%edx ++ xorl %edi,%eax ++ shrdl $2,%ecx,%ecx ++ addl %edx,%eax ++ addl 16(%esp),%edx ++ addl %ecx,%eax ++ movl 96(%esp),%esi ++ xorl %edi,%ebx ++ movl 12(%esp),%ecx ++ addl (%esi),%eax ++ addl 4(%esi),%ebx ++ addl 8(%esi),%edi ++ addl 12(%esi),%ecx ++ movl %eax,(%esi) ++ movl %ebx,4(%esi) ++ movl %edi,8(%esi) ++ movl %ecx,12(%esi) ++ movl %ebx,4(%esp) ++ xorl %edi,%ebx ++ movl %edi,8(%esp) ++ movl %ecx,12(%esp) ++ movl 20(%esp),%edi ++ movl 24(%esp),%ecx ++ addl 16(%esi),%edx ++ addl 20(%esi),%edi ++ addl 24(%esi),%ecx ++ movl %edx,16(%esi) ++ movl %edi,20(%esi) ++ movl %edi,20(%esp) ++ movl 28(%esp),%edi ++ movl %ecx,24(%esi) ++ addl 28(%esi),%edi ++ movl %ecx,24(%esp) ++ movl %edi,28(%esi) ++ movl %edi,28(%esp) ++ movl 100(%esp),%edi ++ vmovdqa 64(%ebp),%xmm7 ++ subl $192,%ebp ++ cmpl 104(%esp),%edi ++ jb L012grand_avx ++ movl 108(%esp),%esp ++ vzeroall ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.section __IMPORT,__pointers,non_lazy_symbol_pointers ++L_OPENSSL_ia32cap_P$non_lazy_ptr: ++.indirect_symbol _OPENSSL_ia32cap_P ++.long 0 ++#endif +diff --git a/apple-x86/crypto/fipsmodule/sha512-586.S b/apple-x86/crypto/fipsmodule/sha512-586.S +new file mode 100644 +index 0000000..8c33cf5 +--- /dev/null ++++ b/apple-x86/crypto/fipsmodule/sha512-586.S +@@ -0,0 +1,2838 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.globl _sha512_block_data_order ++.private_extern _sha512_block_data_order ++.align 4 ++_sha512_block_data_order: ++L_sha512_block_data_order_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 20(%esp),%esi ++ movl 24(%esp),%edi ++ movl 28(%esp),%eax ++ movl %esp,%ebx ++ call L000pic_point ++L000pic_point: ++ popl %ebp ++ leal L001K512-L000pic_point(%ebp),%ebp ++ subl $16,%esp ++ andl $-64,%esp ++ shll $7,%eax ++ addl %edi,%eax ++ movl %esi,(%esp) ++ movl %edi,4(%esp) ++ movl %eax,8(%esp) ++ movl %ebx,12(%esp) ++ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K512(%ebp),%edx ++ movl (%edx),%ecx ++ testl $67108864,%ecx ++ jz L002loop_x86 ++ movl 4(%edx),%edx ++ movq (%esi),%mm0 ++ andl $16777216,%ecx ++ movq 8(%esi),%mm1 ++ andl $512,%edx ++ movq 16(%esi),%mm2 ++ orl %edx,%ecx ++ movq 24(%esi),%mm3 ++ movq 32(%esi),%mm4 ++ movq 40(%esi),%mm5 ++ movq 48(%esi),%mm6 ++ movq 56(%esi),%mm7 ++ cmpl $16777728,%ecx ++ je L003SSSE3 ++ subl $80,%esp ++ jmp L004loop_sse2 ++.align 4,0x90 ++L004loop_sse2: ++ movq %mm1,8(%esp) ++ movq %mm2,16(%esp) ++ movq %mm3,24(%esp) ++ movq %mm5,40(%esp) ++ movq %mm6,48(%esp) ++ pxor %mm1,%mm2 ++ movq %mm7,56(%esp) ++ movq %mm0,%mm3 ++ movl (%edi),%eax ++ movl 4(%edi),%ebx ++ addl $8,%edi ++ movl $15,%edx ++ bswap %eax ++ bswap %ebx ++ jmp L00500_14_sse2 ++.align 4,0x90 ++L00500_14_sse2: ++ movd %eax,%mm1 ++ movl (%edi),%eax ++ movd %ebx,%mm7 ++ movl 4(%edi),%ebx ++ addl $8,%edi ++ bswap %eax ++ bswap %ebx ++ punpckldq %mm1,%mm7 ++ movq %mm4,%mm1 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,32(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ movq %mm3,%mm0 ++ movq %mm7,72(%esp) ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 56(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ paddq (%ebp),%mm7 ++ pxor %mm4,%mm3 ++ movq 24(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 8(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ subl $8,%esp ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 40(%esp),%mm5 ++ paddq %mm2,%mm3 ++ movq %mm0,%mm2 ++ addl $8,%ebp ++ paddq %mm6,%mm3 ++ movq 48(%esp),%mm6 ++ decl %edx ++ jnz L00500_14_sse2 ++ movd %eax,%mm1 ++ movd %ebx,%mm7 ++ punpckldq %mm1,%mm7 ++ movq %mm4,%mm1 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,32(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ movq %mm3,%mm0 ++ movq %mm7,72(%esp) ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 56(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ paddq (%ebp),%mm7 ++ pxor %mm4,%mm3 ++ movq 24(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 8(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ subl $8,%esp ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 192(%esp),%mm7 ++ paddq %mm2,%mm3 ++ movq %mm0,%mm2 ++ addl $8,%ebp ++ paddq %mm6,%mm3 ++ pxor %mm0,%mm0 ++ movl $32,%edx ++ jmp L00616_79_sse2 ++.align 4,0x90 ++L00616_79_sse2: ++ movq 88(%esp),%mm5 ++ movq %mm7,%mm1 ++ psrlq $1,%mm7 ++ movq %mm5,%mm6 ++ psrlq $6,%mm5 ++ psllq $56,%mm1 ++ paddq %mm3,%mm0 ++ movq %mm7,%mm3 ++ psrlq $6,%mm7 ++ pxor %mm1,%mm3 ++ psllq $7,%mm1 ++ pxor %mm7,%mm3 ++ psrlq $1,%mm7 ++ pxor %mm1,%mm3 ++ movq %mm5,%mm1 ++ psrlq $13,%mm5 ++ pxor %mm3,%mm7 ++ psllq $3,%mm6 ++ pxor %mm5,%mm1 ++ paddq 200(%esp),%mm7 ++ pxor %mm6,%mm1 ++ psrlq $42,%mm5 ++ paddq 128(%esp),%mm7 ++ pxor %mm5,%mm1 ++ psllq $42,%mm6 ++ movq 40(%esp),%mm5 ++ pxor %mm6,%mm1 ++ movq 48(%esp),%mm6 ++ paddq %mm1,%mm7 ++ movq %mm4,%mm1 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,32(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ movq %mm7,72(%esp) ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 56(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ paddq (%ebp),%mm7 ++ pxor %mm4,%mm3 ++ movq 24(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 8(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ subl $8,%esp ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 192(%esp),%mm7 ++ paddq %mm6,%mm2 ++ addl $8,%ebp ++ movq 88(%esp),%mm5 ++ movq %mm7,%mm1 ++ psrlq $1,%mm7 ++ movq %mm5,%mm6 ++ psrlq $6,%mm5 ++ psllq $56,%mm1 ++ paddq %mm3,%mm2 ++ movq %mm7,%mm3 ++ psrlq $6,%mm7 ++ pxor %mm1,%mm3 ++ psllq $7,%mm1 ++ pxor %mm7,%mm3 ++ psrlq $1,%mm7 ++ pxor %mm1,%mm3 ++ movq %mm5,%mm1 ++ psrlq $13,%mm5 ++ pxor %mm3,%mm7 ++ psllq $3,%mm6 ++ pxor %mm5,%mm1 ++ paddq 200(%esp),%mm7 ++ pxor %mm6,%mm1 ++ psrlq $42,%mm5 ++ paddq 128(%esp),%mm7 ++ pxor %mm5,%mm1 ++ psllq $42,%mm6 ++ movq 40(%esp),%mm5 ++ pxor %mm6,%mm1 ++ movq 48(%esp),%mm6 ++ paddq %mm1,%mm7 ++ movq %mm4,%mm1 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,32(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ movq %mm7,72(%esp) ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 56(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ paddq (%ebp),%mm7 ++ pxor %mm4,%mm3 ++ movq 24(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 8(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ subl $8,%esp ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 192(%esp),%mm7 ++ paddq %mm6,%mm0 ++ addl $8,%ebp ++ decl %edx ++ jnz L00616_79_sse2 ++ paddq %mm3,%mm0 ++ movq 8(%esp),%mm1 ++ movq 24(%esp),%mm3 ++ movq 40(%esp),%mm5 ++ movq 48(%esp),%mm6 ++ movq 56(%esp),%mm7 ++ pxor %mm1,%mm2 ++ paddq (%esi),%mm0 ++ paddq 8(%esi),%mm1 ++ paddq 16(%esi),%mm2 ++ paddq 24(%esi),%mm3 ++ paddq 32(%esi),%mm4 ++ paddq 40(%esi),%mm5 ++ paddq 48(%esi),%mm6 ++ paddq 56(%esi),%mm7 ++ movl $640,%eax ++ movq %mm0,(%esi) ++ movq %mm1,8(%esi) ++ movq %mm2,16(%esi) ++ movq %mm3,24(%esi) ++ movq %mm4,32(%esi) ++ movq %mm5,40(%esi) ++ movq %mm6,48(%esi) ++ movq %mm7,56(%esi) ++ leal (%esp,%eax,1),%esp ++ subl %eax,%ebp ++ cmpl 88(%esp),%edi ++ jb L004loop_sse2 ++ movl 92(%esp),%esp ++ emms ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 5,0x90 ++L003SSSE3: ++ leal -64(%esp),%edx ++ subl $256,%esp ++ movdqa 640(%ebp),%xmm1 ++ movdqu (%edi),%xmm0 ++.byte 102,15,56,0,193 ++ movdqa (%ebp),%xmm3 ++ movdqa %xmm1,%xmm2 ++ movdqu 16(%edi),%xmm1 ++ paddq %xmm0,%xmm3 ++.byte 102,15,56,0,202 ++ movdqa %xmm3,-128(%edx) ++ movdqa 16(%ebp),%xmm4 ++ movdqa %xmm2,%xmm3 ++ movdqu 32(%edi),%xmm2 ++ paddq %xmm1,%xmm4 ++.byte 102,15,56,0,211 ++ movdqa %xmm4,-112(%edx) ++ movdqa 32(%ebp),%xmm5 ++ movdqa %xmm3,%xmm4 ++ movdqu 48(%edi),%xmm3 ++ paddq %xmm2,%xmm5 ++.byte 102,15,56,0,220 ++ movdqa %xmm5,-96(%edx) ++ movdqa 48(%ebp),%xmm6 ++ movdqa %xmm4,%xmm5 ++ movdqu 64(%edi),%xmm4 ++ paddq %xmm3,%xmm6 ++.byte 102,15,56,0,229 ++ movdqa %xmm6,-80(%edx) ++ movdqa 64(%ebp),%xmm7 ++ movdqa %xmm5,%xmm6 ++ movdqu 80(%edi),%xmm5 ++ paddq %xmm4,%xmm7 ++.byte 102,15,56,0,238 ++ movdqa %xmm7,-64(%edx) ++ movdqa %xmm0,(%edx) ++ movdqa 80(%ebp),%xmm0 ++ movdqa %xmm6,%xmm7 ++ movdqu 96(%edi),%xmm6 ++ paddq %xmm5,%xmm0 ++.byte 102,15,56,0,247 ++ movdqa %xmm0,-48(%edx) ++ movdqa %xmm1,16(%edx) ++ movdqa 96(%ebp),%xmm1 ++ movdqa %xmm7,%xmm0 ++ movdqu 112(%edi),%xmm7 ++ paddq %xmm6,%xmm1 ++.byte 102,15,56,0,248 ++ movdqa %xmm1,-32(%edx) ++ movdqa %xmm2,32(%edx) ++ movdqa 112(%ebp),%xmm2 ++ movdqa (%edx),%xmm0 ++ paddq %xmm7,%xmm2 ++ movdqa %xmm2,-16(%edx) ++ nop ++.align 5,0x90 ++L007loop_ssse3: ++ movdqa 16(%edx),%xmm2 ++ movdqa %xmm3,48(%edx) ++ leal 128(%ebp),%ebp ++ movq %mm1,8(%esp) ++ movl %edi,%ebx ++ movq %mm2,16(%esp) ++ leal 128(%edi),%edi ++ movq %mm3,24(%esp) ++ cmpl %eax,%edi ++ movq %mm5,40(%esp) ++ cmovbl %edi,%ebx ++ movq %mm6,48(%esp) ++ movl $4,%ecx ++ pxor %mm1,%mm2 ++ movq %mm7,56(%esp) ++ pxor %mm3,%mm3 ++ jmp L00800_47_ssse3 ++.align 5,0x90 ++L00800_47_ssse3: ++ movdqa %xmm5,%xmm3 ++ movdqa %xmm2,%xmm1 ++.byte 102,15,58,15,208,8 ++ movdqa %xmm4,(%edx) ++.byte 102,15,58,15,220,8 ++ movdqa %xmm2,%xmm4 ++ psrlq $7,%xmm2 ++ paddq %xmm3,%xmm0 ++ movdqa %xmm4,%xmm3 ++ psrlq $1,%xmm4 ++ psllq $56,%xmm3 ++ pxor %xmm4,%xmm2 ++ psrlq $7,%xmm4 ++ pxor %xmm3,%xmm2 ++ psllq $7,%xmm3 ++ pxor %xmm4,%xmm2 ++ movdqa %xmm7,%xmm4 ++ pxor %xmm3,%xmm2 ++ movdqa %xmm7,%xmm3 ++ psrlq $6,%xmm4 ++ paddq %xmm2,%xmm0 ++ movdqa %xmm7,%xmm2 ++ psrlq $19,%xmm3 ++ psllq $3,%xmm2 ++ pxor %xmm3,%xmm4 ++ psrlq $42,%xmm3 ++ pxor %xmm2,%xmm4 ++ psllq $42,%xmm2 ++ pxor %xmm3,%xmm4 ++ movdqa 32(%edx),%xmm3 ++ pxor %xmm2,%xmm4 ++ movdqa (%ebp),%xmm2 ++ movq %mm4,%mm1 ++ paddq %xmm4,%xmm0 ++ movq -128(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,32(%esp) ++ paddq %xmm0,%xmm2 ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 56(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 24(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 8(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 32(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 40(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -120(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,24(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,56(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 48(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 16(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq (%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 24(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 32(%esp),%mm6 ++ movdqa %xmm2,-128(%edx) ++ movdqa %xmm6,%xmm4 ++ movdqa %xmm3,%xmm2 ++.byte 102,15,58,15,217,8 ++ movdqa %xmm5,16(%edx) ++.byte 102,15,58,15,229,8 ++ movdqa %xmm3,%xmm5 ++ psrlq $7,%xmm3 ++ paddq %xmm4,%xmm1 ++ movdqa %xmm5,%xmm4 ++ psrlq $1,%xmm5 ++ psllq $56,%xmm4 ++ pxor %xmm5,%xmm3 ++ psrlq $7,%xmm5 ++ pxor %xmm4,%xmm3 ++ psllq $7,%xmm4 ++ pxor %xmm5,%xmm3 ++ movdqa %xmm0,%xmm5 ++ pxor %xmm4,%xmm3 ++ movdqa %xmm0,%xmm4 ++ psrlq $6,%xmm5 ++ paddq %xmm3,%xmm1 ++ movdqa %xmm0,%xmm3 ++ psrlq $19,%xmm4 ++ psllq $3,%xmm3 ++ pxor %xmm4,%xmm5 ++ psrlq $42,%xmm4 ++ pxor %xmm3,%xmm5 ++ psllq $42,%xmm3 ++ pxor %xmm4,%xmm5 ++ movdqa 48(%edx),%xmm4 ++ pxor %xmm3,%xmm5 ++ movdqa 16(%ebp),%xmm3 ++ movq %mm4,%mm1 ++ paddq %xmm5,%xmm1 ++ movq -112(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,16(%esp) ++ paddq %xmm1,%xmm3 ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,48(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 40(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 8(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 56(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 16(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 24(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -104(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,8(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,40(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 32(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq (%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 48(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 8(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 16(%esp),%mm6 ++ movdqa %xmm3,-112(%edx) ++ movdqa %xmm7,%xmm5 ++ movdqa %xmm4,%xmm3 ++.byte 102,15,58,15,226,8 ++ movdqa %xmm6,32(%edx) ++.byte 102,15,58,15,238,8 ++ movdqa %xmm4,%xmm6 ++ psrlq $7,%xmm4 ++ paddq %xmm5,%xmm2 ++ movdqa %xmm6,%xmm5 ++ psrlq $1,%xmm6 ++ psllq $56,%xmm5 ++ pxor %xmm6,%xmm4 ++ psrlq $7,%xmm6 ++ pxor %xmm5,%xmm4 ++ psllq $7,%xmm5 ++ pxor %xmm6,%xmm4 ++ movdqa %xmm1,%xmm6 ++ pxor %xmm5,%xmm4 ++ movdqa %xmm1,%xmm5 ++ psrlq $6,%xmm6 ++ paddq %xmm4,%xmm2 ++ movdqa %xmm1,%xmm4 ++ psrlq $19,%xmm5 ++ psllq $3,%xmm4 ++ pxor %xmm5,%xmm6 ++ psrlq $42,%xmm5 ++ pxor %xmm4,%xmm6 ++ psllq $42,%xmm4 ++ pxor %xmm5,%xmm6 ++ movdqa (%edx),%xmm5 ++ pxor %xmm4,%xmm6 ++ movdqa 32(%ebp),%xmm4 ++ movq %mm4,%mm1 ++ paddq %xmm6,%xmm2 ++ movq -96(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,(%esp) ++ paddq %xmm2,%xmm4 ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,32(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 24(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 56(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 40(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq (%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 8(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -88(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,56(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,24(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 16(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 48(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 32(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 56(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq (%esp),%mm6 ++ movdqa %xmm4,-96(%edx) ++ movdqa %xmm0,%xmm6 ++ movdqa %xmm5,%xmm4 ++.byte 102,15,58,15,235,8 ++ movdqa %xmm7,48(%edx) ++.byte 102,15,58,15,247,8 ++ movdqa %xmm5,%xmm7 ++ psrlq $7,%xmm5 ++ paddq %xmm6,%xmm3 ++ movdqa %xmm7,%xmm6 ++ psrlq $1,%xmm7 ++ psllq $56,%xmm6 ++ pxor %xmm7,%xmm5 ++ psrlq $7,%xmm7 ++ pxor %xmm6,%xmm5 ++ psllq $7,%xmm6 ++ pxor %xmm7,%xmm5 ++ movdqa %xmm2,%xmm7 ++ pxor %xmm6,%xmm5 ++ movdqa %xmm2,%xmm6 ++ psrlq $6,%xmm7 ++ paddq %xmm5,%xmm3 ++ movdqa %xmm2,%xmm5 ++ psrlq $19,%xmm6 ++ psllq $3,%xmm5 ++ pxor %xmm6,%xmm7 ++ psrlq $42,%xmm6 ++ pxor %xmm5,%xmm7 ++ psllq $42,%xmm5 ++ pxor %xmm6,%xmm7 ++ movdqa 16(%edx),%xmm6 ++ pxor %xmm5,%xmm7 ++ movdqa 48(%ebp),%xmm5 ++ movq %mm4,%mm1 ++ paddq %xmm7,%xmm3 ++ movq -80(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,48(%esp) ++ paddq %xmm3,%xmm5 ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,16(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 8(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 40(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 24(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 48(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 56(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -72(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,40(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,8(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq (%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 32(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 16(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 40(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 48(%esp),%mm6 ++ movdqa %xmm5,-80(%edx) ++ movdqa %xmm1,%xmm7 ++ movdqa %xmm6,%xmm5 ++.byte 102,15,58,15,244,8 ++ movdqa %xmm0,(%edx) ++.byte 102,15,58,15,248,8 ++ movdqa %xmm6,%xmm0 ++ psrlq $7,%xmm6 ++ paddq %xmm7,%xmm4 ++ movdqa %xmm0,%xmm7 ++ psrlq $1,%xmm0 ++ psllq $56,%xmm7 ++ pxor %xmm0,%xmm6 ++ psrlq $7,%xmm0 ++ pxor %xmm7,%xmm6 ++ psllq $7,%xmm7 ++ pxor %xmm0,%xmm6 ++ movdqa %xmm3,%xmm0 ++ pxor %xmm7,%xmm6 ++ movdqa %xmm3,%xmm7 ++ psrlq $6,%xmm0 ++ paddq %xmm6,%xmm4 ++ movdqa %xmm3,%xmm6 ++ psrlq $19,%xmm7 ++ psllq $3,%xmm6 ++ pxor %xmm7,%xmm0 ++ psrlq $42,%xmm7 ++ pxor %xmm6,%xmm0 ++ psllq $42,%xmm6 ++ pxor %xmm7,%xmm0 ++ movdqa 32(%edx),%xmm7 ++ pxor %xmm6,%xmm0 ++ movdqa 64(%ebp),%xmm6 ++ movq %mm4,%mm1 ++ paddq %xmm0,%xmm4 ++ movq -64(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,32(%esp) ++ paddq %xmm4,%xmm6 ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 56(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 24(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 8(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 32(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 40(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -56(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,24(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,56(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 48(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 16(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq (%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 24(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 32(%esp),%mm6 ++ movdqa %xmm6,-64(%edx) ++ movdqa %xmm2,%xmm0 ++ movdqa %xmm7,%xmm6 ++.byte 102,15,58,15,253,8 ++ movdqa %xmm1,16(%edx) ++.byte 102,15,58,15,193,8 ++ movdqa %xmm7,%xmm1 ++ psrlq $7,%xmm7 ++ paddq %xmm0,%xmm5 ++ movdqa %xmm1,%xmm0 ++ psrlq $1,%xmm1 ++ psllq $56,%xmm0 ++ pxor %xmm1,%xmm7 ++ psrlq $7,%xmm1 ++ pxor %xmm0,%xmm7 ++ psllq $7,%xmm0 ++ pxor %xmm1,%xmm7 ++ movdqa %xmm4,%xmm1 ++ pxor %xmm0,%xmm7 ++ movdqa %xmm4,%xmm0 ++ psrlq $6,%xmm1 ++ paddq %xmm7,%xmm5 ++ movdqa %xmm4,%xmm7 ++ psrlq $19,%xmm0 ++ psllq $3,%xmm7 ++ pxor %xmm0,%xmm1 ++ psrlq $42,%xmm0 ++ pxor %xmm7,%xmm1 ++ psllq $42,%xmm7 ++ pxor %xmm0,%xmm1 ++ movdqa 48(%edx),%xmm0 ++ pxor %xmm7,%xmm1 ++ movdqa 80(%ebp),%xmm7 ++ movq %mm4,%mm1 ++ paddq %xmm1,%xmm5 ++ movq -48(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,16(%esp) ++ paddq %xmm5,%xmm7 ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,48(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 40(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 8(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 56(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 16(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 24(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -40(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,8(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,40(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 32(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq (%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 48(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 8(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 16(%esp),%mm6 ++ movdqa %xmm7,-48(%edx) ++ movdqa %xmm3,%xmm1 ++ movdqa %xmm0,%xmm7 ++.byte 102,15,58,15,198,8 ++ movdqa %xmm2,32(%edx) ++.byte 102,15,58,15,202,8 ++ movdqa %xmm0,%xmm2 ++ psrlq $7,%xmm0 ++ paddq %xmm1,%xmm6 ++ movdqa %xmm2,%xmm1 ++ psrlq $1,%xmm2 ++ psllq $56,%xmm1 ++ pxor %xmm2,%xmm0 ++ psrlq $7,%xmm2 ++ pxor %xmm1,%xmm0 ++ psllq $7,%xmm1 ++ pxor %xmm2,%xmm0 ++ movdqa %xmm5,%xmm2 ++ pxor %xmm1,%xmm0 ++ movdqa %xmm5,%xmm1 ++ psrlq $6,%xmm2 ++ paddq %xmm0,%xmm6 ++ movdqa %xmm5,%xmm0 ++ psrlq $19,%xmm1 ++ psllq $3,%xmm0 ++ pxor %xmm1,%xmm2 ++ psrlq $42,%xmm1 ++ pxor %xmm0,%xmm2 ++ psllq $42,%xmm0 ++ pxor %xmm1,%xmm2 ++ movdqa (%edx),%xmm1 ++ pxor %xmm0,%xmm2 ++ movdqa 96(%ebp),%xmm0 ++ movq %mm4,%mm1 ++ paddq %xmm2,%xmm6 ++ movq -32(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,(%esp) ++ paddq %xmm6,%xmm0 ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,32(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 24(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 56(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 40(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq (%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 8(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -24(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,56(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,24(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 16(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 48(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 32(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 56(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq (%esp),%mm6 ++ movdqa %xmm0,-32(%edx) ++ movdqa %xmm4,%xmm2 ++ movdqa %xmm1,%xmm0 ++.byte 102,15,58,15,207,8 ++ movdqa %xmm3,48(%edx) ++.byte 102,15,58,15,211,8 ++ movdqa %xmm1,%xmm3 ++ psrlq $7,%xmm1 ++ paddq %xmm2,%xmm7 ++ movdqa %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ psllq $56,%xmm2 ++ pxor %xmm3,%xmm1 ++ psrlq $7,%xmm3 ++ pxor %xmm2,%xmm1 ++ psllq $7,%xmm2 ++ pxor %xmm3,%xmm1 ++ movdqa %xmm6,%xmm3 ++ pxor %xmm2,%xmm1 ++ movdqa %xmm6,%xmm2 ++ psrlq $6,%xmm3 ++ paddq %xmm1,%xmm7 ++ movdqa %xmm6,%xmm1 ++ psrlq $19,%xmm2 ++ psllq $3,%xmm1 ++ pxor %xmm2,%xmm3 ++ psrlq $42,%xmm2 ++ pxor %xmm1,%xmm3 ++ psllq $42,%xmm1 ++ pxor %xmm2,%xmm3 ++ movdqa 16(%edx),%xmm2 ++ pxor %xmm1,%xmm3 ++ movdqa 112(%ebp),%xmm1 ++ movq %mm4,%mm1 ++ paddq %xmm3,%xmm7 ++ movq -16(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,48(%esp) ++ paddq %xmm7,%xmm1 ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,16(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 8(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 40(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 24(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 48(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 56(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -8(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,40(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,8(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq (%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 32(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 16(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 40(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 48(%esp),%mm6 ++ movdqa %xmm1,-16(%edx) ++ leal 128(%ebp),%ebp ++ decl %ecx ++ jnz L00800_47_ssse3 ++ movdqa (%ebp),%xmm1 ++ leal -640(%ebp),%ebp ++ movdqu (%ebx),%xmm0 ++.byte 102,15,56,0,193 ++ movdqa (%ebp),%xmm3 ++ movdqa %xmm1,%xmm2 ++ movdqu 16(%ebx),%xmm1 ++ paddq %xmm0,%xmm3 ++.byte 102,15,56,0,202 ++ movq %mm4,%mm1 ++ movq -128(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,32(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 56(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 24(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 8(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 32(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 40(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -120(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,24(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,56(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 48(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 16(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq (%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 24(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 32(%esp),%mm6 ++ movdqa %xmm3,-128(%edx) ++ movdqa 16(%ebp),%xmm4 ++ movdqa %xmm2,%xmm3 ++ movdqu 32(%ebx),%xmm2 ++ paddq %xmm1,%xmm4 ++.byte 102,15,56,0,211 ++ movq %mm4,%mm1 ++ movq -112(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,16(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,48(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 40(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 8(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 56(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 16(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 24(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -104(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,8(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,40(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 32(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq (%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 48(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 8(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 16(%esp),%mm6 ++ movdqa %xmm4,-112(%edx) ++ movdqa 32(%ebp),%xmm5 ++ movdqa %xmm3,%xmm4 ++ movdqu 48(%ebx),%xmm3 ++ paddq %xmm2,%xmm5 ++.byte 102,15,56,0,220 ++ movq %mm4,%mm1 ++ movq -96(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,32(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 24(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 56(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 40(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq (%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 8(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -88(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,56(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,24(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 16(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 48(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 32(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 56(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq (%esp),%mm6 ++ movdqa %xmm5,-96(%edx) ++ movdqa 48(%ebp),%xmm6 ++ movdqa %xmm4,%xmm5 ++ movdqu 64(%ebx),%xmm4 ++ paddq %xmm3,%xmm6 ++.byte 102,15,56,0,229 ++ movq %mm4,%mm1 ++ movq -80(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,48(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,16(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 8(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 40(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 24(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 48(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 56(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -72(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,40(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,8(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq (%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 32(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 16(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 40(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 48(%esp),%mm6 ++ movdqa %xmm6,-80(%edx) ++ movdqa 64(%ebp),%xmm7 ++ movdqa %xmm5,%xmm6 ++ movdqu 80(%ebx),%xmm5 ++ paddq %xmm4,%xmm7 ++.byte 102,15,56,0,238 ++ movq %mm4,%mm1 ++ movq -64(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,32(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 56(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 24(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 8(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 32(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 40(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -56(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,24(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,56(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 48(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 16(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq (%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 24(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 32(%esp),%mm6 ++ movdqa %xmm7,-64(%edx) ++ movdqa %xmm0,(%edx) ++ movdqa 80(%ebp),%xmm0 ++ movdqa %xmm6,%xmm7 ++ movdqu 96(%ebx),%xmm6 ++ paddq %xmm5,%xmm0 ++.byte 102,15,56,0,247 ++ movq %mm4,%mm1 ++ movq -48(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,16(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,48(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 40(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 8(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 56(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 16(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 24(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -40(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,8(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,40(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 32(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq (%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 48(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 8(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 16(%esp),%mm6 ++ movdqa %xmm0,-48(%edx) ++ movdqa %xmm1,16(%edx) ++ movdqa 96(%ebp),%xmm1 ++ movdqa %xmm7,%xmm0 ++ movdqu 112(%ebx),%xmm7 ++ paddq %xmm6,%xmm1 ++.byte 102,15,56,0,248 ++ movq %mm4,%mm1 ++ movq -32(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,32(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 24(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 56(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 40(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq (%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 8(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -24(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,56(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,24(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 16(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 48(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 32(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 56(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq (%esp),%mm6 ++ movdqa %xmm1,-32(%edx) ++ movdqa %xmm2,32(%edx) ++ movdqa 112(%ebp),%xmm2 ++ movdqa (%edx),%xmm0 ++ paddq %xmm7,%xmm2 ++ movq %mm4,%mm1 ++ movq -16(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,48(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm0 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm0,16(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq 8(%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 40(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm0,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm0,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 24(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm0,%mm2 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ pxor %mm7,%mm6 ++ movq 48(%esp),%mm5 ++ paddq %mm6,%mm2 ++ movq 56(%esp),%mm6 ++ movq %mm4,%mm1 ++ movq -8(%edx),%mm7 ++ pxor %mm6,%mm5 ++ psrlq $14,%mm1 ++ movq %mm4,40(%esp) ++ pand %mm4,%mm5 ++ psllq $23,%mm4 ++ paddq %mm3,%mm2 ++ movq %mm1,%mm3 ++ psrlq $4,%mm1 ++ pxor %mm6,%mm5 ++ pxor %mm4,%mm3 ++ psllq $23,%mm4 ++ pxor %mm1,%mm3 ++ movq %mm2,8(%esp) ++ paddq %mm5,%mm7 ++ pxor %mm4,%mm3 ++ psrlq $23,%mm1 ++ paddq (%esp),%mm7 ++ pxor %mm1,%mm3 ++ psllq $4,%mm4 ++ pxor %mm4,%mm3 ++ movq 32(%esp),%mm4 ++ paddq %mm7,%mm3 ++ movq %mm2,%mm5 ++ psrlq $28,%mm5 ++ paddq %mm3,%mm4 ++ movq %mm2,%mm6 ++ movq %mm5,%mm7 ++ psllq $25,%mm6 ++ movq 16(%esp),%mm1 ++ psrlq $6,%mm5 ++ pxor %mm6,%mm7 ++ psllq $5,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm2 ++ psrlq $5,%mm5 ++ pxor %mm6,%mm7 ++ pand %mm2,%mm0 ++ psllq $6,%mm6 ++ pxor %mm5,%mm7 ++ pxor %mm1,%mm0 ++ pxor %mm7,%mm6 ++ movq 40(%esp),%mm5 ++ paddq %mm6,%mm0 ++ movq 48(%esp),%mm6 ++ movdqa %xmm2,-16(%edx) ++ movq 8(%esp),%mm1 ++ paddq %mm3,%mm0 ++ movq 24(%esp),%mm3 ++ movq 56(%esp),%mm7 ++ pxor %mm1,%mm2 ++ paddq (%esi),%mm0 ++ paddq 8(%esi),%mm1 ++ paddq 16(%esi),%mm2 ++ paddq 24(%esi),%mm3 ++ paddq 32(%esi),%mm4 ++ paddq 40(%esi),%mm5 ++ paddq 48(%esi),%mm6 ++ paddq 56(%esi),%mm7 ++ movq %mm0,(%esi) ++ movq %mm1,8(%esi) ++ movq %mm2,16(%esi) ++ movq %mm3,24(%esi) ++ movq %mm4,32(%esi) ++ movq %mm5,40(%esi) ++ movq %mm6,48(%esi) ++ movq %mm7,56(%esi) ++ cmpl %eax,%edi ++ jb L007loop_ssse3 ++ movl 76(%edx),%esp ++ emms ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 4,0x90 ++L002loop_x86: ++ movl (%edi),%eax ++ movl 4(%edi),%ebx ++ movl 8(%edi),%ecx ++ movl 12(%edi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ pushl %eax ++ pushl %ebx ++ pushl %ecx ++ pushl %edx ++ movl 16(%edi),%eax ++ movl 20(%edi),%ebx ++ movl 24(%edi),%ecx ++ movl 28(%edi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ pushl %eax ++ pushl %ebx ++ pushl %ecx ++ pushl %edx ++ movl 32(%edi),%eax ++ movl 36(%edi),%ebx ++ movl 40(%edi),%ecx ++ movl 44(%edi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ pushl %eax ++ pushl %ebx ++ pushl %ecx ++ pushl %edx ++ movl 48(%edi),%eax ++ movl 52(%edi),%ebx ++ movl 56(%edi),%ecx ++ movl 60(%edi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ pushl %eax ++ pushl %ebx ++ pushl %ecx ++ pushl %edx ++ movl 64(%edi),%eax ++ movl 68(%edi),%ebx ++ movl 72(%edi),%ecx ++ movl 76(%edi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ pushl %eax ++ pushl %ebx ++ pushl %ecx ++ pushl %edx ++ movl 80(%edi),%eax ++ movl 84(%edi),%ebx ++ movl 88(%edi),%ecx ++ movl 92(%edi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ pushl %eax ++ pushl %ebx ++ pushl %ecx ++ pushl %edx ++ movl 96(%edi),%eax ++ movl 100(%edi),%ebx ++ movl 104(%edi),%ecx ++ movl 108(%edi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ pushl %eax ++ pushl %ebx ++ pushl %ecx ++ pushl %edx ++ movl 112(%edi),%eax ++ movl 116(%edi),%ebx ++ movl 120(%edi),%ecx ++ movl 124(%edi),%edx ++ bswap %eax ++ bswap %ebx ++ bswap %ecx ++ bswap %edx ++ pushl %eax ++ pushl %ebx ++ pushl %ecx ++ pushl %edx ++ addl $128,%edi ++ subl $72,%esp ++ movl %edi,204(%esp) ++ leal 8(%esp),%edi ++ movl $16,%ecx ++.long 2784229001 ++.align 4,0x90 ++L00900_15_x86: ++ movl 40(%esp),%ecx ++ movl 44(%esp),%edx ++ movl %ecx,%esi ++ shrl $9,%ecx ++ movl %edx,%edi ++ shrl $9,%edx ++ movl %ecx,%ebx ++ shll $14,%esi ++ movl %edx,%eax ++ shll $14,%edi ++ xorl %esi,%ebx ++ shrl $5,%ecx ++ xorl %edi,%eax ++ shrl $5,%edx ++ xorl %ecx,%eax ++ shll $4,%esi ++ xorl %edx,%ebx ++ shll $4,%edi ++ xorl %esi,%ebx ++ shrl $4,%ecx ++ xorl %edi,%eax ++ shrl $4,%edx ++ xorl %ecx,%eax ++ shll $5,%esi ++ xorl %edx,%ebx ++ shll $5,%edi ++ xorl %esi,%eax ++ xorl %edi,%ebx ++ movl 48(%esp),%ecx ++ movl 52(%esp),%edx ++ movl 56(%esp),%esi ++ movl 60(%esp),%edi ++ addl 64(%esp),%eax ++ adcl 68(%esp),%ebx ++ xorl %esi,%ecx ++ xorl %edi,%edx ++ andl 40(%esp),%ecx ++ andl 44(%esp),%edx ++ addl 192(%esp),%eax ++ adcl 196(%esp),%ebx ++ xorl %esi,%ecx ++ xorl %edi,%edx ++ movl (%ebp),%esi ++ movl 4(%ebp),%edi ++ addl %ecx,%eax ++ adcl %edx,%ebx ++ movl 32(%esp),%ecx ++ movl 36(%esp),%edx ++ addl %esi,%eax ++ adcl %edi,%ebx ++ movl %eax,(%esp) ++ movl %ebx,4(%esp) ++ addl %ecx,%eax ++ adcl %edx,%ebx ++ movl 8(%esp),%ecx ++ movl 12(%esp),%edx ++ movl %eax,32(%esp) ++ movl %ebx,36(%esp) ++ movl %ecx,%esi ++ shrl $2,%ecx ++ movl %edx,%edi ++ shrl $2,%edx ++ movl %ecx,%ebx ++ shll $4,%esi ++ movl %edx,%eax ++ shll $4,%edi ++ xorl %esi,%ebx ++ shrl $5,%ecx ++ xorl %edi,%eax ++ shrl $5,%edx ++ xorl %ecx,%ebx ++ shll $21,%esi ++ xorl %edx,%eax ++ shll $21,%edi ++ xorl %esi,%eax ++ shrl $21,%ecx ++ xorl %edi,%ebx ++ shrl $21,%edx ++ xorl %ecx,%eax ++ shll $5,%esi ++ xorl %edx,%ebx ++ shll $5,%edi ++ xorl %esi,%eax ++ xorl %edi,%ebx ++ movl 8(%esp),%ecx ++ movl 12(%esp),%edx ++ movl 16(%esp),%esi ++ movl 20(%esp),%edi ++ addl (%esp),%eax ++ adcl 4(%esp),%ebx ++ orl %esi,%ecx ++ orl %edi,%edx ++ andl 24(%esp),%ecx ++ andl 28(%esp),%edx ++ andl 8(%esp),%esi ++ andl 12(%esp),%edi ++ orl %esi,%ecx ++ orl %edi,%edx ++ addl %ecx,%eax ++ adcl %edx,%ebx ++ movl %eax,(%esp) ++ movl %ebx,4(%esp) ++ movb (%ebp),%dl ++ subl $8,%esp ++ leal 8(%ebp),%ebp ++ cmpb $148,%dl ++ jne L00900_15_x86 ++.align 4,0x90 ++L01016_79_x86: ++ movl 312(%esp),%ecx ++ movl 316(%esp),%edx ++ movl %ecx,%esi ++ shrl $1,%ecx ++ movl %edx,%edi ++ shrl $1,%edx ++ movl %ecx,%eax ++ shll $24,%esi ++ movl %edx,%ebx ++ shll $24,%edi ++ xorl %esi,%ebx ++ shrl $6,%ecx ++ xorl %edi,%eax ++ shrl $6,%edx ++ xorl %ecx,%eax ++ shll $7,%esi ++ xorl %edx,%ebx ++ shll $1,%edi ++ xorl %esi,%ebx ++ shrl $1,%ecx ++ xorl %edi,%eax ++ shrl $1,%edx ++ xorl %ecx,%eax ++ shll $6,%edi ++ xorl %edx,%ebx ++ xorl %edi,%eax ++ movl %eax,(%esp) ++ movl %ebx,4(%esp) ++ movl 208(%esp),%ecx ++ movl 212(%esp),%edx ++ movl %ecx,%esi ++ shrl $6,%ecx ++ movl %edx,%edi ++ shrl $6,%edx ++ movl %ecx,%eax ++ shll $3,%esi ++ movl %edx,%ebx ++ shll $3,%edi ++ xorl %esi,%eax ++ shrl $13,%ecx ++ xorl %edi,%ebx ++ shrl $13,%edx ++ xorl %ecx,%eax ++ shll $10,%esi ++ xorl %edx,%ebx ++ shll $10,%edi ++ xorl %esi,%ebx ++ shrl $10,%ecx ++ xorl %edi,%eax ++ shrl $10,%edx ++ xorl %ecx,%ebx ++ shll $13,%edi ++ xorl %edx,%eax ++ xorl %edi,%eax ++ movl 320(%esp),%ecx ++ movl 324(%esp),%edx ++ addl (%esp),%eax ++ adcl 4(%esp),%ebx ++ movl 248(%esp),%esi ++ movl 252(%esp),%edi ++ addl %ecx,%eax ++ adcl %edx,%ebx ++ addl %esi,%eax ++ adcl %edi,%ebx ++ movl %eax,192(%esp) ++ movl %ebx,196(%esp) ++ movl 40(%esp),%ecx ++ movl 44(%esp),%edx ++ movl %ecx,%esi ++ shrl $9,%ecx ++ movl %edx,%edi ++ shrl $9,%edx ++ movl %ecx,%ebx ++ shll $14,%esi ++ movl %edx,%eax ++ shll $14,%edi ++ xorl %esi,%ebx ++ shrl $5,%ecx ++ xorl %edi,%eax ++ shrl $5,%edx ++ xorl %ecx,%eax ++ shll $4,%esi ++ xorl %edx,%ebx ++ shll $4,%edi ++ xorl %esi,%ebx ++ shrl $4,%ecx ++ xorl %edi,%eax ++ shrl $4,%edx ++ xorl %ecx,%eax ++ shll $5,%esi ++ xorl %edx,%ebx ++ shll $5,%edi ++ xorl %esi,%eax ++ xorl %edi,%ebx ++ movl 48(%esp),%ecx ++ movl 52(%esp),%edx ++ movl 56(%esp),%esi ++ movl 60(%esp),%edi ++ addl 64(%esp),%eax ++ adcl 68(%esp),%ebx ++ xorl %esi,%ecx ++ xorl %edi,%edx ++ andl 40(%esp),%ecx ++ andl 44(%esp),%edx ++ addl 192(%esp),%eax ++ adcl 196(%esp),%ebx ++ xorl %esi,%ecx ++ xorl %edi,%edx ++ movl (%ebp),%esi ++ movl 4(%ebp),%edi ++ addl %ecx,%eax ++ adcl %edx,%ebx ++ movl 32(%esp),%ecx ++ movl 36(%esp),%edx ++ addl %esi,%eax ++ adcl %edi,%ebx ++ movl %eax,(%esp) ++ movl %ebx,4(%esp) ++ addl %ecx,%eax ++ adcl %edx,%ebx ++ movl 8(%esp),%ecx ++ movl 12(%esp),%edx ++ movl %eax,32(%esp) ++ movl %ebx,36(%esp) ++ movl %ecx,%esi ++ shrl $2,%ecx ++ movl %edx,%edi ++ shrl $2,%edx ++ movl %ecx,%ebx ++ shll $4,%esi ++ movl %edx,%eax ++ shll $4,%edi ++ xorl %esi,%ebx ++ shrl $5,%ecx ++ xorl %edi,%eax ++ shrl $5,%edx ++ xorl %ecx,%ebx ++ shll $21,%esi ++ xorl %edx,%eax ++ shll $21,%edi ++ xorl %esi,%eax ++ shrl $21,%ecx ++ xorl %edi,%ebx ++ shrl $21,%edx ++ xorl %ecx,%eax ++ shll $5,%esi ++ xorl %edx,%ebx ++ shll $5,%edi ++ xorl %esi,%eax ++ xorl %edi,%ebx ++ movl 8(%esp),%ecx ++ movl 12(%esp),%edx ++ movl 16(%esp),%esi ++ movl 20(%esp),%edi ++ addl (%esp),%eax ++ adcl 4(%esp),%ebx ++ orl %esi,%ecx ++ orl %edi,%edx ++ andl 24(%esp),%ecx ++ andl 28(%esp),%edx ++ andl 8(%esp),%esi ++ andl 12(%esp),%edi ++ orl %esi,%ecx ++ orl %edi,%edx ++ addl %ecx,%eax ++ adcl %edx,%ebx ++ movl %eax,(%esp) ++ movl %ebx,4(%esp) ++ movb (%ebp),%dl ++ subl $8,%esp ++ leal 8(%ebp),%ebp ++ cmpb $23,%dl ++ jne L01016_79_x86 ++ movl 840(%esp),%esi ++ movl 844(%esp),%edi ++ movl (%esi),%eax ++ movl 4(%esi),%ebx ++ movl 8(%esi),%ecx ++ movl 12(%esi),%edx ++ addl 8(%esp),%eax ++ adcl 12(%esp),%ebx ++ movl %eax,(%esi) ++ movl %ebx,4(%esi) ++ addl 16(%esp),%ecx ++ adcl 20(%esp),%edx ++ movl %ecx,8(%esi) ++ movl %edx,12(%esi) ++ movl 16(%esi),%eax ++ movl 20(%esi),%ebx ++ movl 24(%esi),%ecx ++ movl 28(%esi),%edx ++ addl 24(%esp),%eax ++ adcl 28(%esp),%ebx ++ movl %eax,16(%esi) ++ movl %ebx,20(%esi) ++ addl 32(%esp),%ecx ++ adcl 36(%esp),%edx ++ movl %ecx,24(%esi) ++ movl %edx,28(%esi) ++ movl 32(%esi),%eax ++ movl 36(%esi),%ebx ++ movl 40(%esi),%ecx ++ movl 44(%esi),%edx ++ addl 40(%esp),%eax ++ adcl 44(%esp),%ebx ++ movl %eax,32(%esi) ++ movl %ebx,36(%esi) ++ addl 48(%esp),%ecx ++ adcl 52(%esp),%edx ++ movl %ecx,40(%esi) ++ movl %edx,44(%esi) ++ movl 48(%esi),%eax ++ movl 52(%esi),%ebx ++ movl 56(%esi),%ecx ++ movl 60(%esi),%edx ++ addl 56(%esp),%eax ++ adcl 60(%esp),%ebx ++ movl %eax,48(%esi) ++ movl %ebx,52(%esi) ++ addl 64(%esp),%ecx ++ adcl 68(%esp),%edx ++ movl %ecx,56(%esi) ++ movl %edx,60(%esi) ++ addl $840,%esp ++ subl $640,%ebp ++ cmpl 8(%esp),%edi ++ jb L002loop_x86 ++ movl 12(%esp),%esp ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.align 6,0x90 ++L001K512: ++.long 3609767458,1116352408 ++.long 602891725,1899447441 ++.long 3964484399,3049323471 ++.long 2173295548,3921009573 ++.long 4081628472,961987163 ++.long 3053834265,1508970993 ++.long 2937671579,2453635748 ++.long 3664609560,2870763221 ++.long 2734883394,3624381080 ++.long 1164996542,310598401 ++.long 1323610764,607225278 ++.long 3590304994,1426881987 ++.long 4068182383,1925078388 ++.long 991336113,2162078206 ++.long 633803317,2614888103 ++.long 3479774868,3248222580 ++.long 2666613458,3835390401 ++.long 944711139,4022224774 ++.long 2341262773,264347078 ++.long 2007800933,604807628 ++.long 1495990901,770255983 ++.long 1856431235,1249150122 ++.long 3175218132,1555081692 ++.long 2198950837,1996064986 ++.long 3999719339,2554220882 ++.long 766784016,2821834349 ++.long 2566594879,2952996808 ++.long 3203337956,3210313671 ++.long 1034457026,3336571891 ++.long 2466948901,3584528711 ++.long 3758326383,113926993 ++.long 168717936,338241895 ++.long 1188179964,666307205 ++.long 1546045734,773529912 ++.long 1522805485,1294757372 ++.long 2643833823,1396182291 ++.long 2343527390,1695183700 ++.long 1014477480,1986661051 ++.long 1206759142,2177026350 ++.long 344077627,2456956037 ++.long 1290863460,2730485921 ++.long 3158454273,2820302411 ++.long 3505952657,3259730800 ++.long 106217008,3345764771 ++.long 3606008344,3516065817 ++.long 1432725776,3600352804 ++.long 1467031594,4094571909 ++.long 851169720,275423344 ++.long 3100823752,430227734 ++.long 1363258195,506948616 ++.long 3750685593,659060556 ++.long 3785050280,883997877 ++.long 3318307427,958139571 ++.long 3812723403,1322822218 ++.long 2003034995,1537002063 ++.long 3602036899,1747873779 ++.long 1575990012,1955562222 ++.long 1125592928,2024104815 ++.long 2716904306,2227730452 ++.long 442776044,2361852424 ++.long 593698344,2428436474 ++.long 3733110249,2756734187 ++.long 2999351573,3204031479 ++.long 3815920427,3329325298 ++.long 3928383900,3391569614 ++.long 566280711,3515267271 ++.long 3454069534,3940187606 ++.long 4000239992,4118630271 ++.long 1914138554,116418474 ++.long 2731055270,174292421 ++.long 3203993006,289380356 ++.long 320620315,460393269 ++.long 587496836,685471733 ++.long 1086792851,852142971 ++.long 365543100,1017036298 ++.long 2618297676,1126000580 ++.long 3409855158,1288033470 ++.long 4234509866,1501505948 ++.long 987167468,1607167915 ++.long 1246189591,1816402316 ++.long 67438087,66051 ++.long 202182159,134810123 ++.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 ++.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 ++.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 ++.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 ++.byte 62,0 ++.section __IMPORT,__pointers,non_lazy_symbol_pointers ++L_OPENSSL_ia32cap_P$non_lazy_ptr: ++.indirect_symbol _OPENSSL_ia32cap_P ++.long 0 ++#endif +diff --git a/apple-x86/crypto/fipsmodule/vpaes-x86.S b/apple-x86/crypto/fipsmodule/vpaes-x86.S +new file mode 100644 +index 0000000..00c0190 +--- /dev/null ++++ b/apple-x86/crypto/fipsmodule/vpaes-x86.S +@@ -0,0 +1,681 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++#ifdef BORINGSSL_DISPATCH_TEST ++#endif ++.align 6,0x90 ++L_vpaes_consts: ++.long 218628480,235210255,168496130,67568393 ++.long 252381056,17041926,33884169,51187212 ++.long 252645135,252645135,252645135,252645135 ++.long 1512730624,3266504856,1377990664,3401244816 ++.long 830229760,1275146365,2969422977,3447763452 ++.long 3411033600,2979783055,338359620,2782886510 ++.long 4209124096,907596821,221174255,1006095553 ++.long 191964160,3799684038,3164090317,1589111125 ++.long 182528256,1777043520,2877432650,3265356744 ++.long 1874708224,3503451415,3305285752,363511674 ++.long 1606117888,3487855781,1093350906,2384367825 ++.long 197121,67569157,134941193,202313229 ++.long 67569157,134941193,202313229,197121 ++.long 134941193,202313229,197121,67569157 ++.long 202313229,197121,67569157,134941193 ++.long 33619971,100992007,168364043,235736079 ++.long 235736079,33619971,100992007,168364043 ++.long 168364043,235736079,33619971,100992007 ++.long 100992007,168364043,235736079,33619971 ++.long 50462976,117835012,185207048,252579084 ++.long 252314880,51251460,117574920,184942860 ++.long 184682752,252054788,50987272,118359308 ++.long 118099200,185467140,251790600,50727180 ++.long 2946363062,528716217,1300004225,1881839624 ++.long 1532713819,1532713819,1532713819,1532713819 ++.long 3602276352,4288629033,3737020424,4153884961 ++.long 1354558464,32357713,2958822624,3775749553 ++.long 1201988352,132424512,1572796698,503232858 ++.long 2213177600,1597421020,4103937655,675398315 ++.long 2749646592,4273543773,1511898873,121693092 ++.long 3040248576,1103263732,2871565598,1608280554 ++.long 2236667136,2588920351,482954393,64377734 ++.long 3069987328,291237287,2117370568,3650299247 ++.long 533321216,3573750986,2572112006,1401264716 ++.long 1339849704,2721158661,548607111,3445553514 ++.long 2128193280,3054596040,2183486460,1257083700 ++.long 655635200,1165381986,3923443150,2344132524 ++.long 190078720,256924420,290342170,357187870 ++.long 1610966272,2263057382,4103205268,309794674 ++.long 2592527872,2233205587,1335446729,3402964816 ++.long 3973531904,3225098121,3002836325,1918774430 ++.long 3870401024,2102906079,2284471353,4117666579 ++.long 617007872,1021508343,366931923,691083277 ++.long 2528395776,3491914898,2968704004,1613121270 ++.long 3445188352,3247741094,844474987,4093578302 ++.long 651481088,1190302358,1689581232,574775300 ++.long 4289380608,206939853,2555985458,2489840491 ++.long 2130264064,327674451,3566485037,3349835193 ++.long 2470714624,316102159,3636825756,3393945945 ++.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 ++.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 ++.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 ++.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 ++.byte 118,101,114,115,105,116,121,41,0 ++.align 6,0x90 ++.private_extern __vpaes_preheat ++.align 4 ++__vpaes_preheat: ++ addl (%esp),%ebp ++ movdqa -48(%ebp),%xmm7 ++ movdqa -16(%ebp),%xmm6 ++ ret ++.private_extern __vpaes_encrypt_core ++.align 4 ++__vpaes_encrypt_core: ++ movl $16,%ecx ++ movl 240(%edx),%eax ++ movdqa %xmm6,%xmm1 ++ movdqa (%ebp),%xmm2 ++ pandn %xmm0,%xmm1 ++ pand %xmm6,%xmm0 ++ movdqu (%edx),%xmm5 ++.byte 102,15,56,0,208 ++ movdqa 16(%ebp),%xmm0 ++ pxor %xmm5,%xmm2 ++ psrld $4,%xmm1 ++ addl $16,%edx ++.byte 102,15,56,0,193 ++ leal 192(%ebp),%ebx ++ pxor %xmm2,%xmm0 ++ jmp L000enc_entry ++.align 4,0x90 ++L001enc_loop: ++ movdqa 32(%ebp),%xmm4 ++ movdqa 48(%ebp),%xmm0 ++.byte 102,15,56,0,226 ++.byte 102,15,56,0,195 ++ pxor %xmm5,%xmm4 ++ movdqa 64(%ebp),%xmm5 ++ pxor %xmm4,%xmm0 ++ movdqa -64(%ebx,%ecx,1),%xmm1 ++.byte 102,15,56,0,234 ++ movdqa 80(%ebp),%xmm2 ++ movdqa (%ebx,%ecx,1),%xmm4 ++.byte 102,15,56,0,211 ++ movdqa %xmm0,%xmm3 ++ pxor %xmm5,%xmm2 ++.byte 102,15,56,0,193 ++ addl $16,%edx ++ pxor %xmm2,%xmm0 ++.byte 102,15,56,0,220 ++ addl $16,%ecx ++ pxor %xmm0,%xmm3 ++.byte 102,15,56,0,193 ++ andl $48,%ecx ++ subl $1,%eax ++ pxor %xmm3,%xmm0 ++L000enc_entry: ++ movdqa %xmm6,%xmm1 ++ movdqa -32(%ebp),%xmm5 ++ pandn %xmm0,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm6,%xmm0 ++.byte 102,15,56,0,232 ++ movdqa %xmm7,%xmm3 ++ pxor %xmm1,%xmm0 ++.byte 102,15,56,0,217 ++ movdqa %xmm7,%xmm4 ++ pxor %xmm5,%xmm3 ++.byte 102,15,56,0,224 ++ movdqa %xmm7,%xmm2 ++ pxor %xmm5,%xmm4 ++.byte 102,15,56,0,211 ++ movdqa %xmm7,%xmm3 ++ pxor %xmm0,%xmm2 ++.byte 102,15,56,0,220 ++ movdqu (%edx),%xmm5 ++ pxor %xmm1,%xmm3 ++ jnz L001enc_loop ++ movdqa 96(%ebp),%xmm4 ++ movdqa 112(%ebp),%xmm0 ++.byte 102,15,56,0,226 ++ pxor %xmm5,%xmm4 ++.byte 102,15,56,0,195 ++ movdqa 64(%ebx,%ecx,1),%xmm1 ++ pxor %xmm4,%xmm0 ++.byte 102,15,56,0,193 ++ ret ++.private_extern __vpaes_decrypt_core ++.align 4 ++__vpaes_decrypt_core: ++ leal 608(%ebp),%ebx ++ movl 240(%edx),%eax ++ movdqa %xmm6,%xmm1 ++ movdqa -64(%ebx),%xmm2 ++ pandn %xmm0,%xmm1 ++ movl %eax,%ecx ++ psrld $4,%xmm1 ++ movdqu (%edx),%xmm5 ++ shll $4,%ecx ++ pand %xmm6,%xmm0 ++.byte 102,15,56,0,208 ++ movdqa -48(%ebx),%xmm0 ++ xorl $48,%ecx ++.byte 102,15,56,0,193 ++ andl $48,%ecx ++ pxor %xmm5,%xmm2 ++ movdqa 176(%ebp),%xmm5 ++ pxor %xmm2,%xmm0 ++ addl $16,%edx ++ leal -352(%ebx,%ecx,1),%ecx ++ jmp L002dec_entry ++.align 4,0x90 ++L003dec_loop: ++ movdqa -32(%ebx),%xmm4 ++ movdqa -16(%ebx),%xmm1 ++.byte 102,15,56,0,226 ++.byte 102,15,56,0,203 ++ pxor %xmm4,%xmm0 ++ movdqa (%ebx),%xmm4 ++ pxor %xmm1,%xmm0 ++ movdqa 16(%ebx),%xmm1 ++.byte 102,15,56,0,226 ++.byte 102,15,56,0,197 ++.byte 102,15,56,0,203 ++ pxor %xmm4,%xmm0 ++ movdqa 32(%ebx),%xmm4 ++ pxor %xmm1,%xmm0 ++ movdqa 48(%ebx),%xmm1 ++.byte 102,15,56,0,226 ++.byte 102,15,56,0,197 ++.byte 102,15,56,0,203 ++ pxor %xmm4,%xmm0 ++ movdqa 64(%ebx),%xmm4 ++ pxor %xmm1,%xmm0 ++ movdqa 80(%ebx),%xmm1 ++.byte 102,15,56,0,226 ++.byte 102,15,56,0,197 ++.byte 102,15,56,0,203 ++ pxor %xmm4,%xmm0 ++ addl $16,%edx ++.byte 102,15,58,15,237,12 ++ pxor %xmm1,%xmm0 ++ subl $1,%eax ++L002dec_entry: ++ movdqa %xmm6,%xmm1 ++ movdqa -32(%ebp),%xmm2 ++ pandn %xmm0,%xmm1 ++ pand %xmm6,%xmm0 ++ psrld $4,%xmm1 ++.byte 102,15,56,0,208 ++ movdqa %xmm7,%xmm3 ++ pxor %xmm1,%xmm0 ++.byte 102,15,56,0,217 ++ movdqa %xmm7,%xmm4 ++ pxor %xmm2,%xmm3 ++.byte 102,15,56,0,224 ++ pxor %xmm2,%xmm4 ++ movdqa %xmm7,%xmm2 ++.byte 102,15,56,0,211 ++ movdqa %xmm7,%xmm3 ++ pxor %xmm0,%xmm2 ++.byte 102,15,56,0,220 ++ movdqu (%edx),%xmm0 ++ pxor %xmm1,%xmm3 ++ jnz L003dec_loop ++ movdqa 96(%ebx),%xmm4 ++.byte 102,15,56,0,226 ++ pxor %xmm0,%xmm4 ++ movdqa 112(%ebx),%xmm0 ++ movdqa (%ecx),%xmm2 ++.byte 102,15,56,0,195 ++ pxor %xmm4,%xmm0 ++.byte 102,15,56,0,194 ++ ret ++.private_extern __vpaes_schedule_core ++.align 4 ++__vpaes_schedule_core: ++ addl (%esp),%ebp ++ movdqu (%esi),%xmm0 ++ movdqa 320(%ebp),%xmm2 ++ movdqa %xmm0,%xmm3 ++ leal (%ebp),%ebx ++ movdqa %xmm2,4(%esp) ++ call __vpaes_schedule_transform ++ movdqa %xmm0,%xmm7 ++ testl %edi,%edi ++ jnz L004schedule_am_decrypting ++ movdqu %xmm0,(%edx) ++ jmp L005schedule_go ++L004schedule_am_decrypting: ++ movdqa 256(%ebp,%ecx,1),%xmm1 ++.byte 102,15,56,0,217 ++ movdqu %xmm3,(%edx) ++ xorl $48,%ecx ++L005schedule_go: ++ cmpl $192,%eax ++ ja L006schedule_256 ++ je L007schedule_192 ++L008schedule_128: ++ movl $10,%eax ++L009loop_schedule_128: ++ call __vpaes_schedule_round ++ decl %eax ++ jz L010schedule_mangle_last ++ call __vpaes_schedule_mangle ++ jmp L009loop_schedule_128 ++.align 4,0x90 ++L007schedule_192: ++ movdqu 8(%esi),%xmm0 ++ call __vpaes_schedule_transform ++ movdqa %xmm0,%xmm6 ++ pxor %xmm4,%xmm4 ++ movhlps %xmm4,%xmm6 ++ movl $4,%eax ++L011loop_schedule_192: ++ call __vpaes_schedule_round ++.byte 102,15,58,15,198,8 ++ call __vpaes_schedule_mangle ++ call __vpaes_schedule_192_smear ++ call __vpaes_schedule_mangle ++ call __vpaes_schedule_round ++ decl %eax ++ jz L010schedule_mangle_last ++ call __vpaes_schedule_mangle ++ call __vpaes_schedule_192_smear ++ jmp L011loop_schedule_192 ++.align 4,0x90 ++L006schedule_256: ++ movdqu 16(%esi),%xmm0 ++ call __vpaes_schedule_transform ++ movl $7,%eax ++L012loop_schedule_256: ++ call __vpaes_schedule_mangle ++ movdqa %xmm0,%xmm6 ++ call __vpaes_schedule_round ++ decl %eax ++ jz L010schedule_mangle_last ++ call __vpaes_schedule_mangle ++ pshufd $255,%xmm0,%xmm0 ++ movdqa %xmm7,20(%esp) ++ movdqa %xmm6,%xmm7 ++ call L_vpaes_schedule_low_round ++ movdqa 20(%esp),%xmm7 ++ jmp L012loop_schedule_256 ++.align 4,0x90 ++L010schedule_mangle_last: ++ leal 384(%ebp),%ebx ++ testl %edi,%edi ++ jnz L013schedule_mangle_last_dec ++ movdqa 256(%ebp,%ecx,1),%xmm1 ++.byte 102,15,56,0,193 ++ leal 352(%ebp),%ebx ++ addl $32,%edx ++L013schedule_mangle_last_dec: ++ addl $-16,%edx ++ pxor 336(%ebp),%xmm0 ++ call __vpaes_schedule_transform ++ movdqu %xmm0,(%edx) ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ pxor %xmm7,%xmm7 ++ ret ++.private_extern __vpaes_schedule_192_smear ++.align 4 ++__vpaes_schedule_192_smear: ++ pshufd $128,%xmm6,%xmm1 ++ pshufd $254,%xmm7,%xmm0 ++ pxor %xmm1,%xmm6 ++ pxor %xmm1,%xmm1 ++ pxor %xmm0,%xmm6 ++ movdqa %xmm6,%xmm0 ++ movhlps %xmm1,%xmm6 ++ ret ++.private_extern __vpaes_schedule_round ++.align 4 ++__vpaes_schedule_round: ++ movdqa 8(%esp),%xmm2 ++ pxor %xmm1,%xmm1 ++.byte 102,15,58,15,202,15 ++.byte 102,15,58,15,210,15 ++ pxor %xmm1,%xmm7 ++ pshufd $255,%xmm0,%xmm0 ++.byte 102,15,58,15,192,1 ++ movdqa %xmm2,8(%esp) ++L_vpaes_schedule_low_round: ++ movdqa %xmm7,%xmm1 ++ pslldq $4,%xmm7 ++ pxor %xmm1,%xmm7 ++ movdqa %xmm7,%xmm1 ++ pslldq $8,%xmm7 ++ pxor %xmm1,%xmm7 ++ pxor 336(%ebp),%xmm7 ++ movdqa -16(%ebp),%xmm4 ++ movdqa -48(%ebp),%xmm5 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm0,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm4,%xmm0 ++ movdqa -32(%ebp),%xmm2 ++.byte 102,15,56,0,208 ++ pxor %xmm1,%xmm0 ++ movdqa %xmm5,%xmm3 ++.byte 102,15,56,0,217 ++ pxor %xmm2,%xmm3 ++ movdqa %xmm5,%xmm4 ++.byte 102,15,56,0,224 ++ pxor %xmm2,%xmm4 ++ movdqa %xmm5,%xmm2 ++.byte 102,15,56,0,211 ++ pxor %xmm0,%xmm2 ++ movdqa %xmm5,%xmm3 ++.byte 102,15,56,0,220 ++ pxor %xmm1,%xmm3 ++ movdqa 32(%ebp),%xmm4 ++.byte 102,15,56,0,226 ++ movdqa 48(%ebp),%xmm0 ++.byte 102,15,56,0,195 ++ pxor %xmm4,%xmm0 ++ pxor %xmm7,%xmm0 ++ movdqa %xmm0,%xmm7 ++ ret ++.private_extern __vpaes_schedule_transform ++.align 4 ++__vpaes_schedule_transform: ++ movdqa -16(%ebp),%xmm2 ++ movdqa %xmm2,%xmm1 ++ pandn %xmm0,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm2,%xmm0 ++ movdqa (%ebx),%xmm2 ++.byte 102,15,56,0,208 ++ movdqa 16(%ebx),%xmm0 ++.byte 102,15,56,0,193 ++ pxor %xmm2,%xmm0 ++ ret ++.private_extern __vpaes_schedule_mangle ++.align 4 ++__vpaes_schedule_mangle: ++ movdqa %xmm0,%xmm4 ++ movdqa 128(%ebp),%xmm5 ++ testl %edi,%edi ++ jnz L014schedule_mangle_dec ++ addl $16,%edx ++ pxor 336(%ebp),%xmm4 ++.byte 102,15,56,0,229 ++ movdqa %xmm4,%xmm3 ++.byte 102,15,56,0,229 ++ pxor %xmm4,%xmm3 ++.byte 102,15,56,0,229 ++ pxor %xmm4,%xmm3 ++ jmp L015schedule_mangle_both ++.align 4,0x90 ++L014schedule_mangle_dec: ++ movdqa -16(%ebp),%xmm2 ++ leal 416(%ebp),%esi ++ movdqa %xmm2,%xmm1 ++ pandn %xmm4,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm2,%xmm4 ++ movdqa (%esi),%xmm2 ++.byte 102,15,56,0,212 ++ movdqa 16(%esi),%xmm3 ++.byte 102,15,56,0,217 ++ pxor %xmm2,%xmm3 ++.byte 102,15,56,0,221 ++ movdqa 32(%esi),%xmm2 ++.byte 102,15,56,0,212 ++ pxor %xmm3,%xmm2 ++ movdqa 48(%esi),%xmm3 ++.byte 102,15,56,0,217 ++ pxor %xmm2,%xmm3 ++.byte 102,15,56,0,221 ++ movdqa 64(%esi),%xmm2 ++.byte 102,15,56,0,212 ++ pxor %xmm3,%xmm2 ++ movdqa 80(%esi),%xmm3 ++.byte 102,15,56,0,217 ++ pxor %xmm2,%xmm3 ++.byte 102,15,56,0,221 ++ movdqa 96(%esi),%xmm2 ++.byte 102,15,56,0,212 ++ pxor %xmm3,%xmm2 ++ movdqa 112(%esi),%xmm3 ++.byte 102,15,56,0,217 ++ pxor %xmm2,%xmm3 ++ addl $-16,%edx ++L015schedule_mangle_both: ++ movdqa 256(%ebp,%ecx,1),%xmm1 ++.byte 102,15,56,0,217 ++ addl $-16,%ecx ++ andl $48,%ecx ++ movdqu %xmm3,(%edx) ++ ret ++.globl _vpaes_set_encrypt_key ++.private_extern _vpaes_set_encrypt_key ++.align 4 ++_vpaes_set_encrypt_key: ++L_vpaes_set_encrypt_key_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++#ifdef BORINGSSL_DISPATCH_TEST ++ pushl %ebx ++ pushl %edx ++ call L016pic ++L016pic: ++ popl %ebx ++ leal _BORINGSSL_function_hit+5-L016pic(%ebx),%ebx ++ movl $1,%edx ++ movb %dl,(%ebx) ++ popl %edx ++ popl %ebx ++#endif ++ movl 20(%esp),%esi ++ leal -56(%esp),%ebx ++ movl 24(%esp),%eax ++ andl $-16,%ebx ++ movl 28(%esp),%edx ++ xchgl %esp,%ebx ++ movl %ebx,48(%esp) ++ movl %eax,%ebx ++ shrl $5,%ebx ++ addl $5,%ebx ++ movl %ebx,240(%edx) ++ movl $48,%ecx ++ movl $0,%edi ++ leal L_vpaes_consts+0x30-L017pic_point,%ebp ++ call __vpaes_schedule_core ++L017pic_point: ++ movl 48(%esp),%esp ++ xorl %eax,%eax ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _vpaes_set_decrypt_key ++.private_extern _vpaes_set_decrypt_key ++.align 4 ++_vpaes_set_decrypt_key: ++L_vpaes_set_decrypt_key_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 20(%esp),%esi ++ leal -56(%esp),%ebx ++ movl 24(%esp),%eax ++ andl $-16,%ebx ++ movl 28(%esp),%edx ++ xchgl %esp,%ebx ++ movl %ebx,48(%esp) ++ movl %eax,%ebx ++ shrl $5,%ebx ++ addl $5,%ebx ++ movl %ebx,240(%edx) ++ shll $4,%ebx ++ leal 16(%edx,%ebx,1),%edx ++ movl $1,%edi ++ movl %eax,%ecx ++ shrl $1,%ecx ++ andl $32,%ecx ++ xorl $32,%ecx ++ leal L_vpaes_consts+0x30-L018pic_point,%ebp ++ call __vpaes_schedule_core ++L018pic_point: ++ movl 48(%esp),%esp ++ xorl %eax,%eax ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _vpaes_encrypt ++.private_extern _vpaes_encrypt ++.align 4 ++_vpaes_encrypt: ++L_vpaes_encrypt_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++#ifdef BORINGSSL_DISPATCH_TEST ++ pushl %ebx ++ pushl %edx ++ call L019pic ++L019pic: ++ popl %ebx ++ leal _BORINGSSL_function_hit+4-L019pic(%ebx),%ebx ++ movl $1,%edx ++ movb %dl,(%ebx) ++ popl %edx ++ popl %ebx ++#endif ++ leal L_vpaes_consts+0x30-L020pic_point,%ebp ++ call __vpaes_preheat ++L020pic_point: ++ movl 20(%esp),%esi ++ leal -56(%esp),%ebx ++ movl 24(%esp),%edi ++ andl $-16,%ebx ++ movl 28(%esp),%edx ++ xchgl %esp,%ebx ++ movl %ebx,48(%esp) ++ movdqu (%esi),%xmm0 ++ call __vpaes_encrypt_core ++ movdqu %xmm0,(%edi) ++ movl 48(%esp),%esp ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _vpaes_decrypt ++.private_extern _vpaes_decrypt ++.align 4 ++_vpaes_decrypt: ++L_vpaes_decrypt_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ leal L_vpaes_consts+0x30-L021pic_point,%ebp ++ call __vpaes_preheat ++L021pic_point: ++ movl 20(%esp),%esi ++ leal -56(%esp),%ebx ++ movl 24(%esp),%edi ++ andl $-16,%ebx ++ movl 28(%esp),%edx ++ xchgl %esp,%ebx ++ movl %ebx,48(%esp) ++ movdqu (%esi),%xmm0 ++ call __vpaes_decrypt_core ++ movdqu %xmm0,(%edi) ++ movl 48(%esp),%esp ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _vpaes_cbc_encrypt ++.private_extern _vpaes_cbc_encrypt ++.align 4 ++_vpaes_cbc_encrypt: ++L_vpaes_cbc_encrypt_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 20(%esp),%esi ++ movl 24(%esp),%edi ++ movl 28(%esp),%eax ++ movl 32(%esp),%edx ++ subl $16,%eax ++ jc L022cbc_abort ++ leal -56(%esp),%ebx ++ movl 36(%esp),%ebp ++ andl $-16,%ebx ++ movl 40(%esp),%ecx ++ xchgl %esp,%ebx ++ movdqu (%ebp),%xmm1 ++ subl %esi,%edi ++ movl %ebx,48(%esp) ++ movl %edi,(%esp) ++ movl %edx,4(%esp) ++ movl %ebp,8(%esp) ++ movl %eax,%edi ++ leal L_vpaes_consts+0x30-L023pic_point,%ebp ++ call __vpaes_preheat ++L023pic_point: ++ cmpl $0,%ecx ++ je L024cbc_dec_loop ++ jmp L025cbc_enc_loop ++.align 4,0x90 ++L025cbc_enc_loop: ++ movdqu (%esi),%xmm0 ++ pxor %xmm1,%xmm0 ++ call __vpaes_encrypt_core ++ movl (%esp),%ebx ++ movl 4(%esp),%edx ++ movdqa %xmm0,%xmm1 ++ movdqu %xmm0,(%ebx,%esi,1) ++ leal 16(%esi),%esi ++ subl $16,%edi ++ jnc L025cbc_enc_loop ++ jmp L026cbc_done ++.align 4,0x90 ++L024cbc_dec_loop: ++ movdqu (%esi),%xmm0 ++ movdqa %xmm1,16(%esp) ++ movdqa %xmm0,32(%esp) ++ call __vpaes_decrypt_core ++ movl (%esp),%ebx ++ movl 4(%esp),%edx ++ pxor 16(%esp),%xmm0 ++ movdqa 32(%esp),%xmm1 ++ movdqu %xmm0,(%ebx,%esi,1) ++ leal 16(%esi),%esi ++ subl $16,%edi ++ jnc L024cbc_dec_loop ++L026cbc_done: ++ movl 8(%esp),%ebx ++ movl 48(%esp),%esp ++ movdqu %xmm1,(%ebx) ++L022cbc_abort: ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++#endif +diff --git a/apple-x86/crypto/fipsmodule/x86-mont.S b/apple-x86/crypto/fipsmodule/x86-mont.S +new file mode 100644 +index 0000000..7850a37 +--- /dev/null ++++ b/apple-x86/crypto/fipsmodule/x86-mont.S +@@ -0,0 +1,485 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.globl _bn_mul_mont ++.private_extern _bn_mul_mont ++.align 4 ++_bn_mul_mont: ++L_bn_mul_mont_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ xorl %eax,%eax ++ movl 40(%esp),%edi ++ cmpl $4,%edi ++ jl L000just_leave ++ leal 20(%esp),%esi ++ leal 24(%esp),%edx ++ addl $2,%edi ++ negl %edi ++ leal -32(%esp,%edi,4),%ebp ++ negl %edi ++ movl %ebp,%eax ++ subl %edx,%eax ++ andl $2047,%eax ++ subl %eax,%ebp ++ xorl %ebp,%edx ++ andl $2048,%edx ++ xorl $2048,%edx ++ subl %edx,%ebp ++ andl $-64,%ebp ++ movl %esp,%eax ++ subl %ebp,%eax ++ andl $-4096,%eax ++ movl %esp,%edx ++ leal (%ebp,%eax,1),%esp ++ movl (%esp),%eax ++ cmpl %ebp,%esp ++ ja L001page_walk ++ jmp L002page_walk_done ++.align 4,0x90 ++L001page_walk: ++ leal -4096(%esp),%esp ++ movl (%esp),%eax ++ cmpl %ebp,%esp ++ ja L001page_walk ++L002page_walk_done: ++ movl (%esi),%eax ++ movl 4(%esi),%ebx ++ movl 8(%esi),%ecx ++ movl 12(%esi),%ebp ++ movl 16(%esi),%esi ++ movl (%esi),%esi ++ movl %eax,4(%esp) ++ movl %ebx,8(%esp) ++ movl %ecx,12(%esp) ++ movl %ebp,16(%esp) ++ movl %esi,20(%esp) ++ leal -3(%edi),%ebx ++ movl %edx,24(%esp) ++ call L003PIC_me_up ++L003PIC_me_up: ++ popl %eax ++ movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax ++ btl $26,(%eax) ++ jnc L004non_sse2 ++ movl $-1,%eax ++ movd %eax,%mm7 ++ movl 8(%esp),%esi ++ movl 12(%esp),%edi ++ movl 16(%esp),%ebp ++ xorl %edx,%edx ++ xorl %ecx,%ecx ++ movd (%edi),%mm4 ++ movd (%esi),%mm5 ++ movd (%ebp),%mm3 ++ pmuludq %mm4,%mm5 ++ movq %mm5,%mm2 ++ movq %mm5,%mm0 ++ pand %mm7,%mm0 ++ pmuludq 20(%esp),%mm5 ++ pmuludq %mm5,%mm3 ++ paddq %mm0,%mm3 ++ movd 4(%ebp),%mm1 ++ movd 4(%esi),%mm0 ++ psrlq $32,%mm2 ++ psrlq $32,%mm3 ++ incl %ecx ++.align 4,0x90 ++L0051st: ++ pmuludq %mm4,%mm0 ++ pmuludq %mm5,%mm1 ++ paddq %mm0,%mm2 ++ paddq %mm1,%mm3 ++ movq %mm2,%mm0 ++ pand %mm7,%mm0 ++ movd 4(%ebp,%ecx,4),%mm1 ++ paddq %mm0,%mm3 ++ movd 4(%esi,%ecx,4),%mm0 ++ psrlq $32,%mm2 ++ movd %mm3,28(%esp,%ecx,4) ++ psrlq $32,%mm3 ++ leal 1(%ecx),%ecx ++ cmpl %ebx,%ecx ++ jl L0051st ++ pmuludq %mm4,%mm0 ++ pmuludq %mm5,%mm1 ++ paddq %mm0,%mm2 ++ paddq %mm1,%mm3 ++ movq %mm2,%mm0 ++ pand %mm7,%mm0 ++ paddq %mm0,%mm3 ++ movd %mm3,28(%esp,%ecx,4) ++ psrlq $32,%mm2 ++ psrlq $32,%mm3 ++ paddq %mm2,%mm3 ++ movq %mm3,32(%esp,%ebx,4) ++ incl %edx ++L006outer: ++ xorl %ecx,%ecx ++ movd (%edi,%edx,4),%mm4 ++ movd (%esi),%mm5 ++ movd 32(%esp),%mm6 ++ movd (%ebp),%mm3 ++ pmuludq %mm4,%mm5 ++ paddq %mm6,%mm5 ++ movq %mm5,%mm0 ++ movq %mm5,%mm2 ++ pand %mm7,%mm0 ++ pmuludq 20(%esp),%mm5 ++ pmuludq %mm5,%mm3 ++ paddq %mm0,%mm3 ++ movd 36(%esp),%mm6 ++ movd 4(%ebp),%mm1 ++ movd 4(%esi),%mm0 ++ psrlq $32,%mm2 ++ psrlq $32,%mm3 ++ paddq %mm6,%mm2 ++ incl %ecx ++ decl %ebx ++L007inner: ++ pmuludq %mm4,%mm0 ++ pmuludq %mm5,%mm1 ++ paddq %mm0,%mm2 ++ paddq %mm1,%mm3 ++ movq %mm2,%mm0 ++ movd 36(%esp,%ecx,4),%mm6 ++ pand %mm7,%mm0 ++ movd 4(%ebp,%ecx,4),%mm1 ++ paddq %mm0,%mm3 ++ movd 4(%esi,%ecx,4),%mm0 ++ psrlq $32,%mm2 ++ movd %mm3,28(%esp,%ecx,4) ++ psrlq $32,%mm3 ++ paddq %mm6,%mm2 ++ decl %ebx ++ leal 1(%ecx),%ecx ++ jnz L007inner ++ movl %ecx,%ebx ++ pmuludq %mm4,%mm0 ++ pmuludq %mm5,%mm1 ++ paddq %mm0,%mm2 ++ paddq %mm1,%mm3 ++ movq %mm2,%mm0 ++ pand %mm7,%mm0 ++ paddq %mm0,%mm3 ++ movd %mm3,28(%esp,%ecx,4) ++ psrlq $32,%mm2 ++ psrlq $32,%mm3 ++ movd 36(%esp,%ebx,4),%mm6 ++ paddq %mm2,%mm3 ++ paddq %mm6,%mm3 ++ movq %mm3,32(%esp,%ebx,4) ++ leal 1(%edx),%edx ++ cmpl %ebx,%edx ++ jle L006outer ++ emms ++ jmp L008common_tail ++.align 4,0x90 ++L004non_sse2: ++ movl 8(%esp),%esi ++ leal 1(%ebx),%ebp ++ movl 12(%esp),%edi ++ xorl %ecx,%ecx ++ movl %esi,%edx ++ andl $1,%ebp ++ subl %edi,%edx ++ leal 4(%edi,%ebx,4),%eax ++ orl %edx,%ebp ++ movl (%edi),%edi ++ jz L009bn_sqr_mont ++ movl %eax,28(%esp) ++ movl (%esi),%eax ++ xorl %edx,%edx ++.align 4,0x90 ++L010mull: ++ movl %edx,%ebp ++ mull %edi ++ addl %eax,%ebp ++ leal 1(%ecx),%ecx ++ adcl $0,%edx ++ movl (%esi,%ecx,4),%eax ++ cmpl %ebx,%ecx ++ movl %ebp,28(%esp,%ecx,4) ++ jl L010mull ++ movl %edx,%ebp ++ mull %edi ++ movl 20(%esp),%edi ++ addl %ebp,%eax ++ movl 16(%esp),%esi ++ adcl $0,%edx ++ imull 32(%esp),%edi ++ movl %eax,32(%esp,%ebx,4) ++ xorl %ecx,%ecx ++ movl %edx,36(%esp,%ebx,4) ++ movl %ecx,40(%esp,%ebx,4) ++ movl (%esi),%eax ++ mull %edi ++ addl 32(%esp),%eax ++ movl 4(%esi),%eax ++ adcl $0,%edx ++ incl %ecx ++ jmp L0112ndmadd ++.align 4,0x90 ++L0121stmadd: ++ movl %edx,%ebp ++ mull %edi ++ addl 32(%esp,%ecx,4),%ebp ++ leal 1(%ecx),%ecx ++ adcl $0,%edx ++ addl %eax,%ebp ++ movl (%esi,%ecx,4),%eax ++ adcl $0,%edx ++ cmpl %ebx,%ecx ++ movl %ebp,28(%esp,%ecx,4) ++ jl L0121stmadd ++ movl %edx,%ebp ++ mull %edi ++ addl 32(%esp,%ebx,4),%eax ++ movl 20(%esp),%edi ++ adcl $0,%edx ++ movl 16(%esp),%esi ++ addl %eax,%ebp ++ adcl $0,%edx ++ imull 32(%esp),%edi ++ xorl %ecx,%ecx ++ addl 36(%esp,%ebx,4),%edx ++ movl %ebp,32(%esp,%ebx,4) ++ adcl $0,%ecx ++ movl (%esi),%eax ++ movl %edx,36(%esp,%ebx,4) ++ movl %ecx,40(%esp,%ebx,4) ++ mull %edi ++ addl 32(%esp),%eax ++ movl 4(%esi),%eax ++ adcl $0,%edx ++ movl $1,%ecx ++.align 4,0x90 ++L0112ndmadd: ++ movl %edx,%ebp ++ mull %edi ++ addl 32(%esp,%ecx,4),%ebp ++ leal 1(%ecx),%ecx ++ adcl $0,%edx ++ addl %eax,%ebp ++ movl (%esi,%ecx,4),%eax ++ adcl $0,%edx ++ cmpl %ebx,%ecx ++ movl %ebp,24(%esp,%ecx,4) ++ jl L0112ndmadd ++ movl %edx,%ebp ++ mull %edi ++ addl 32(%esp,%ebx,4),%ebp ++ adcl $0,%edx ++ addl %eax,%ebp ++ adcl $0,%edx ++ movl %ebp,28(%esp,%ebx,4) ++ xorl %eax,%eax ++ movl 12(%esp),%ecx ++ addl 36(%esp,%ebx,4),%edx ++ adcl 40(%esp,%ebx,4),%eax ++ leal 4(%ecx),%ecx ++ movl %edx,32(%esp,%ebx,4) ++ cmpl 28(%esp),%ecx ++ movl %eax,36(%esp,%ebx,4) ++ je L008common_tail ++ movl (%ecx),%edi ++ movl 8(%esp),%esi ++ movl %ecx,12(%esp) ++ xorl %ecx,%ecx ++ xorl %edx,%edx ++ movl (%esi),%eax ++ jmp L0121stmadd ++.align 4,0x90 ++L009bn_sqr_mont: ++ movl %ebx,(%esp) ++ movl %ecx,12(%esp) ++ movl %edi,%eax ++ mull %edi ++ movl %eax,32(%esp) ++ movl %edx,%ebx ++ shrl $1,%edx ++ andl $1,%ebx ++ incl %ecx ++.align 4,0x90 ++L013sqr: ++ movl (%esi,%ecx,4),%eax ++ movl %edx,%ebp ++ mull %edi ++ addl %ebp,%eax ++ leal 1(%ecx),%ecx ++ adcl $0,%edx ++ leal (%ebx,%eax,2),%ebp ++ shrl $31,%eax ++ cmpl (%esp),%ecx ++ movl %eax,%ebx ++ movl %ebp,28(%esp,%ecx,4) ++ jl L013sqr ++ movl (%esi,%ecx,4),%eax ++ movl %edx,%ebp ++ mull %edi ++ addl %ebp,%eax ++ movl 20(%esp),%edi ++ adcl $0,%edx ++ movl 16(%esp),%esi ++ leal (%ebx,%eax,2),%ebp ++ imull 32(%esp),%edi ++ shrl $31,%eax ++ movl %ebp,32(%esp,%ecx,4) ++ leal (%eax,%edx,2),%ebp ++ movl (%esi),%eax ++ shrl $31,%edx ++ movl %ebp,36(%esp,%ecx,4) ++ movl %edx,40(%esp,%ecx,4) ++ mull %edi ++ addl 32(%esp),%eax ++ movl %ecx,%ebx ++ adcl $0,%edx ++ movl 4(%esi),%eax ++ movl $1,%ecx ++.align 4,0x90 ++L0143rdmadd: ++ movl %edx,%ebp ++ mull %edi ++ addl 32(%esp,%ecx,4),%ebp ++ adcl $0,%edx ++ addl %eax,%ebp ++ movl 4(%esi,%ecx,4),%eax ++ adcl $0,%edx ++ movl %ebp,28(%esp,%ecx,4) ++ movl %edx,%ebp ++ mull %edi ++ addl 36(%esp,%ecx,4),%ebp ++ leal 2(%ecx),%ecx ++ adcl $0,%edx ++ addl %eax,%ebp ++ movl (%esi,%ecx,4),%eax ++ adcl $0,%edx ++ cmpl %ebx,%ecx ++ movl %ebp,24(%esp,%ecx,4) ++ jl L0143rdmadd ++ movl %edx,%ebp ++ mull %edi ++ addl 32(%esp,%ebx,4),%ebp ++ adcl $0,%edx ++ addl %eax,%ebp ++ adcl $0,%edx ++ movl %ebp,28(%esp,%ebx,4) ++ movl 12(%esp),%ecx ++ xorl %eax,%eax ++ movl 8(%esp),%esi ++ addl 36(%esp,%ebx,4),%edx ++ adcl 40(%esp,%ebx,4),%eax ++ movl %edx,32(%esp,%ebx,4) ++ cmpl %ebx,%ecx ++ movl %eax,36(%esp,%ebx,4) ++ je L008common_tail ++ movl 4(%esi,%ecx,4),%edi ++ leal 1(%ecx),%ecx ++ movl %edi,%eax ++ movl %ecx,12(%esp) ++ mull %edi ++ addl 32(%esp,%ecx,4),%eax ++ adcl $0,%edx ++ movl %eax,32(%esp,%ecx,4) ++ xorl %ebp,%ebp ++ cmpl %ebx,%ecx ++ leal 1(%ecx),%ecx ++ je L015sqrlast ++ movl %edx,%ebx ++ shrl $1,%edx ++ andl $1,%ebx ++.align 4,0x90 ++L016sqradd: ++ movl (%esi,%ecx,4),%eax ++ movl %edx,%ebp ++ mull %edi ++ addl %ebp,%eax ++ leal (%eax,%eax,1),%ebp ++ adcl $0,%edx ++ shrl $31,%eax ++ addl 32(%esp,%ecx,4),%ebp ++ leal 1(%ecx),%ecx ++ adcl $0,%eax ++ addl %ebx,%ebp ++ adcl $0,%eax ++ cmpl (%esp),%ecx ++ movl %ebp,28(%esp,%ecx,4) ++ movl %eax,%ebx ++ jle L016sqradd ++ movl %edx,%ebp ++ addl %edx,%edx ++ shrl $31,%ebp ++ addl %ebx,%edx ++ adcl $0,%ebp ++L015sqrlast: ++ movl 20(%esp),%edi ++ movl 16(%esp),%esi ++ imull 32(%esp),%edi ++ addl 32(%esp,%ecx,4),%edx ++ movl (%esi),%eax ++ adcl $0,%ebp ++ movl %edx,32(%esp,%ecx,4) ++ movl %ebp,36(%esp,%ecx,4) ++ mull %edi ++ addl 32(%esp),%eax ++ leal -1(%ecx),%ebx ++ adcl $0,%edx ++ movl $1,%ecx ++ movl 4(%esi),%eax ++ jmp L0143rdmadd ++.align 4,0x90 ++L008common_tail: ++ movl 16(%esp),%ebp ++ movl 4(%esp),%edi ++ leal 32(%esp),%esi ++ movl (%esi),%eax ++ movl %ebx,%ecx ++ xorl %edx,%edx ++.align 4,0x90 ++L017sub: ++ sbbl (%ebp,%edx,4),%eax ++ movl %eax,(%edi,%edx,4) ++ decl %ecx ++ movl 4(%esi,%edx,4),%eax ++ leal 1(%edx),%edx ++ jge L017sub ++ sbbl $0,%eax ++ movl $-1,%edx ++ xorl %eax,%edx ++ jmp L018copy ++.align 4,0x90 ++L018copy: ++ movl 32(%esp,%ebx,4),%esi ++ movl (%edi,%ebx,4),%ebp ++ movl %ecx,32(%esp,%ebx,4) ++ andl %eax,%esi ++ andl %edx,%ebp ++ orl %esi,%ebp ++ movl %ebp,(%edi,%ebx,4) ++ decl %ebx ++ jge L018copy ++ movl 24(%esp),%esp ++ movl $1,%eax ++L000just_leave: ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 ++.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 ++.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 ++.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 ++.byte 111,114,103,62,0 ++.section __IMPORT,__pointers,non_lazy_symbol_pointers ++L_OPENSSL_ia32cap_P$non_lazy_ptr: ++.indirect_symbol _OPENSSL_ia32cap_P ++.long 0 ++#endif +diff --git a/apple-x86/crypto/test/trampoline-x86.S b/apple-x86/crypto/test/trampoline-x86.S +new file mode 100644 +index 0000000..fd40b95 +--- /dev/null ++++ b/apple-x86/crypto/test/trampoline-x86.S +@@ -0,0 +1,169 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__i386__) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.globl _abi_test_trampoline ++.private_extern _abi_test_trampoline ++.align 4 ++_abi_test_trampoline: ++L_abi_test_trampoline_begin: ++ pushl %ebp ++ pushl %ebx ++ pushl %esi ++ pushl %edi ++ movl 24(%esp),%ecx ++ movl (%ecx),%esi ++ movl 4(%ecx),%edi ++ movl 8(%ecx),%ebx ++ movl 12(%ecx),%ebp ++ subl $44,%esp ++ movl 72(%esp),%eax ++ xorl %ecx,%ecx ++L000loop: ++ cmpl 76(%esp),%ecx ++ jae L001loop_done ++ movl (%eax,%ecx,4),%edx ++ movl %edx,(%esp,%ecx,4) ++ addl $1,%ecx ++ jmp L000loop ++L001loop_done: ++ call *64(%esp) ++ addl $44,%esp ++ movl 24(%esp),%ecx ++ movl %esi,(%ecx) ++ movl %edi,4(%ecx) ++ movl %ebx,8(%ecx) ++ movl %ebp,12(%ecx) ++ popl %edi ++ popl %esi ++ popl %ebx ++ popl %ebp ++ ret ++.globl _abi_test_get_and_clear_direction_flag ++.private_extern _abi_test_get_and_clear_direction_flag ++.align 4 ++_abi_test_get_and_clear_direction_flag: ++L_abi_test_get_and_clear_direction_flag_begin: ++ pushfl ++ popl %eax ++ andl $1024,%eax ++ shrl $10,%eax ++ cld ++ ret ++.globl _abi_test_set_direction_flag ++.private_extern _abi_test_set_direction_flag ++.align 4 ++_abi_test_set_direction_flag: ++L_abi_test_set_direction_flag_begin: ++ std ++ ret ++.globl _abi_test_clobber_eax ++.private_extern _abi_test_clobber_eax ++.align 4 ++_abi_test_clobber_eax: ++L_abi_test_clobber_eax_begin: ++ xorl %eax,%eax ++ ret ++.globl _abi_test_clobber_ebx ++.private_extern _abi_test_clobber_ebx ++.align 4 ++_abi_test_clobber_ebx: ++L_abi_test_clobber_ebx_begin: ++ xorl %ebx,%ebx ++ ret ++.globl _abi_test_clobber_ecx ++.private_extern _abi_test_clobber_ecx ++.align 4 ++_abi_test_clobber_ecx: ++L_abi_test_clobber_ecx_begin: ++ xorl %ecx,%ecx ++ ret ++.globl _abi_test_clobber_edx ++.private_extern _abi_test_clobber_edx ++.align 4 ++_abi_test_clobber_edx: ++L_abi_test_clobber_edx_begin: ++ xorl %edx,%edx ++ ret ++.globl _abi_test_clobber_edi ++.private_extern _abi_test_clobber_edi ++.align 4 ++_abi_test_clobber_edi: ++L_abi_test_clobber_edi_begin: ++ xorl %edi,%edi ++ ret ++.globl _abi_test_clobber_esi ++.private_extern _abi_test_clobber_esi ++.align 4 ++_abi_test_clobber_esi: ++L_abi_test_clobber_esi_begin: ++ xorl %esi,%esi ++ ret ++.globl _abi_test_clobber_ebp ++.private_extern _abi_test_clobber_ebp ++.align 4 ++_abi_test_clobber_ebp: ++L_abi_test_clobber_ebp_begin: ++ xorl %ebp,%ebp ++ ret ++.globl _abi_test_clobber_xmm0 ++.private_extern _abi_test_clobber_xmm0 ++.align 4 ++_abi_test_clobber_xmm0: ++L_abi_test_clobber_xmm0_begin: ++ pxor %xmm0,%xmm0 ++ ret ++.globl _abi_test_clobber_xmm1 ++.private_extern _abi_test_clobber_xmm1 ++.align 4 ++_abi_test_clobber_xmm1: ++L_abi_test_clobber_xmm1_begin: ++ pxor %xmm1,%xmm1 ++ ret ++.globl _abi_test_clobber_xmm2 ++.private_extern _abi_test_clobber_xmm2 ++.align 4 ++_abi_test_clobber_xmm2: ++L_abi_test_clobber_xmm2_begin: ++ pxor %xmm2,%xmm2 ++ ret ++.globl _abi_test_clobber_xmm3 ++.private_extern _abi_test_clobber_xmm3 ++.align 4 ++_abi_test_clobber_xmm3: ++L_abi_test_clobber_xmm3_begin: ++ pxor %xmm3,%xmm3 ++ ret ++.globl _abi_test_clobber_xmm4 ++.private_extern _abi_test_clobber_xmm4 ++.align 4 ++_abi_test_clobber_xmm4: ++L_abi_test_clobber_xmm4_begin: ++ pxor %xmm4,%xmm4 ++ ret ++.globl _abi_test_clobber_xmm5 ++.private_extern _abi_test_clobber_xmm5 ++.align 4 ++_abi_test_clobber_xmm5: ++L_abi_test_clobber_xmm5_begin: ++ pxor %xmm5,%xmm5 ++ ret ++.globl _abi_test_clobber_xmm6 ++.private_extern _abi_test_clobber_xmm6 ++.align 4 ++_abi_test_clobber_xmm6: ++L_abi_test_clobber_xmm6_begin: ++ pxor %xmm6,%xmm6 ++ ret ++.globl _abi_test_clobber_xmm7 ++.private_extern _abi_test_clobber_xmm7 ++.align 4 ++_abi_test_clobber_xmm7: ++L_abi_test_clobber_xmm7_begin: ++ pxor %xmm7,%xmm7 ++ ret ++#endif +diff --git a/apple-x86_64/crypto/chacha/chacha-x86_64.S b/apple-x86_64/crypto/chacha/chacha-x86_64.S +new file mode 100644 +index 0000000..782ddf4 +--- /dev/null ++++ b/apple-x86_64/crypto/chacha/chacha-x86_64.S +@@ -0,0 +1,1625 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++ ++.p2align 6 ++L$zero: ++.long 0,0,0,0 ++L$one: ++.long 1,0,0,0 ++L$inc: ++.long 0,1,2,3 ++L$four: ++.long 4,4,4,4 ++L$incy: ++.long 0,2,4,6,1,3,5,7 ++L$eight: ++.long 8,8,8,8,8,8,8,8 ++L$rot16: ++.byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd ++L$rot24: ++.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe ++L$sigma: ++.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 ++.p2align 6 ++L$zeroz: ++.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0 ++L$fourz: ++.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0 ++L$incz: ++.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 ++L$sixteen: ++.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 ++.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.globl _ChaCha20_ctr32 ++.private_extern _ChaCha20_ctr32 ++ ++.p2align 6 ++_ChaCha20_ctr32: ++ ++ cmpq $0,%rdx ++ je L$no_data ++ movq _OPENSSL_ia32cap_P+4(%rip),%r10 ++ testl $512,%r10d ++ jnz L$ChaCha20_ssse3 ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ subq $64+24,%rsp ++ ++L$ctr32_body: ++ ++ ++ movdqu (%rcx),%xmm1 ++ movdqu 16(%rcx),%xmm2 ++ movdqu (%r8),%xmm3 ++ movdqa L$one(%rip),%xmm4 ++ ++ ++ movdqa %xmm1,16(%rsp) ++ movdqa %xmm2,32(%rsp) ++ movdqa %xmm3,48(%rsp) ++ movq %rdx,%rbp ++ jmp L$oop_outer ++ ++.p2align 5 ++L$oop_outer: ++ movl $0x61707865,%eax ++ movl $0x3320646e,%ebx ++ movl $0x79622d32,%ecx ++ movl $0x6b206574,%edx ++ movl 16(%rsp),%r8d ++ movl 20(%rsp),%r9d ++ movl 24(%rsp),%r10d ++ movl 28(%rsp),%r11d ++ movd %xmm3,%r12d ++ movl 52(%rsp),%r13d ++ movl 56(%rsp),%r14d ++ movl 60(%rsp),%r15d ++ ++ movq %rbp,64+0(%rsp) ++ movl $10,%ebp ++ movq %rsi,64+8(%rsp) ++.byte 102,72,15,126,214 ++ movq %rdi,64+16(%rsp) ++ movq %rsi,%rdi ++ shrq $32,%rdi ++ jmp L$oop ++ ++.p2align 5 ++L$oop: ++ addl %r8d,%eax ++ xorl %eax,%r12d ++ roll $16,%r12d ++ addl %r9d,%ebx ++ xorl %ebx,%r13d ++ roll $16,%r13d ++ addl %r12d,%esi ++ xorl %esi,%r8d ++ roll $12,%r8d ++ addl %r13d,%edi ++ xorl %edi,%r9d ++ roll $12,%r9d ++ addl %r8d,%eax ++ xorl %eax,%r12d ++ roll $8,%r12d ++ addl %r9d,%ebx ++ xorl %ebx,%r13d ++ roll $8,%r13d ++ addl %r12d,%esi ++ xorl %esi,%r8d ++ roll $7,%r8d ++ addl %r13d,%edi ++ xorl %edi,%r9d ++ roll $7,%r9d ++ movl %esi,32(%rsp) ++ movl %edi,36(%rsp) ++ movl 40(%rsp),%esi ++ movl 44(%rsp),%edi ++ addl %r10d,%ecx ++ xorl %ecx,%r14d ++ roll $16,%r14d ++ addl %r11d,%edx ++ xorl %edx,%r15d ++ roll $16,%r15d ++ addl %r14d,%esi ++ xorl %esi,%r10d ++ roll $12,%r10d ++ addl %r15d,%edi ++ xorl %edi,%r11d ++ roll $12,%r11d ++ addl %r10d,%ecx ++ xorl %ecx,%r14d ++ roll $8,%r14d ++ addl %r11d,%edx ++ xorl %edx,%r15d ++ roll $8,%r15d ++ addl %r14d,%esi ++ xorl %esi,%r10d ++ roll $7,%r10d ++ addl %r15d,%edi ++ xorl %edi,%r11d ++ roll $7,%r11d ++ addl %r9d,%eax ++ xorl %eax,%r15d ++ roll $16,%r15d ++ addl %r10d,%ebx ++ xorl %ebx,%r12d ++ roll $16,%r12d ++ addl %r15d,%esi ++ xorl %esi,%r9d ++ roll $12,%r9d ++ addl %r12d,%edi ++ xorl %edi,%r10d ++ roll $12,%r10d ++ addl %r9d,%eax ++ xorl %eax,%r15d ++ roll $8,%r15d ++ addl %r10d,%ebx ++ xorl %ebx,%r12d ++ roll $8,%r12d ++ addl %r15d,%esi ++ xorl %esi,%r9d ++ roll $7,%r9d ++ addl %r12d,%edi ++ xorl %edi,%r10d ++ roll $7,%r10d ++ movl %esi,40(%rsp) ++ movl %edi,44(%rsp) ++ movl 32(%rsp),%esi ++ movl 36(%rsp),%edi ++ addl %r11d,%ecx ++ xorl %ecx,%r13d ++ roll $16,%r13d ++ addl %r8d,%edx ++ xorl %edx,%r14d ++ roll $16,%r14d ++ addl %r13d,%esi ++ xorl %esi,%r11d ++ roll $12,%r11d ++ addl %r14d,%edi ++ xorl %edi,%r8d ++ roll $12,%r8d ++ addl %r11d,%ecx ++ xorl %ecx,%r13d ++ roll $8,%r13d ++ addl %r8d,%edx ++ xorl %edx,%r14d ++ roll $8,%r14d ++ addl %r13d,%esi ++ xorl %esi,%r11d ++ roll $7,%r11d ++ addl %r14d,%edi ++ xorl %edi,%r8d ++ roll $7,%r8d ++ decl %ebp ++ jnz L$oop ++ movl %edi,36(%rsp) ++ movl %esi,32(%rsp) ++ movq 64(%rsp),%rbp ++ movdqa %xmm2,%xmm1 ++ movq 64+8(%rsp),%rsi ++ paddd %xmm4,%xmm3 ++ movq 64+16(%rsp),%rdi ++ ++ addl $0x61707865,%eax ++ addl $0x3320646e,%ebx ++ addl $0x79622d32,%ecx ++ addl $0x6b206574,%edx ++ addl 16(%rsp),%r8d ++ addl 20(%rsp),%r9d ++ addl 24(%rsp),%r10d ++ addl 28(%rsp),%r11d ++ addl 48(%rsp),%r12d ++ addl 52(%rsp),%r13d ++ addl 56(%rsp),%r14d ++ addl 60(%rsp),%r15d ++ paddd 32(%rsp),%xmm1 ++ ++ cmpq $64,%rbp ++ jb L$tail ++ ++ xorl 0(%rsi),%eax ++ xorl 4(%rsi),%ebx ++ xorl 8(%rsi),%ecx ++ xorl 12(%rsi),%edx ++ xorl 16(%rsi),%r8d ++ xorl 20(%rsi),%r9d ++ xorl 24(%rsi),%r10d ++ xorl 28(%rsi),%r11d ++ movdqu 32(%rsi),%xmm0 ++ xorl 48(%rsi),%r12d ++ xorl 52(%rsi),%r13d ++ xorl 56(%rsi),%r14d ++ xorl 60(%rsi),%r15d ++ leaq 64(%rsi),%rsi ++ pxor %xmm1,%xmm0 ++ ++ movdqa %xmm2,32(%rsp) ++ movd %xmm3,48(%rsp) ++ ++ movl %eax,0(%rdi) ++ movl %ebx,4(%rdi) ++ movl %ecx,8(%rdi) ++ movl %edx,12(%rdi) ++ movl %r8d,16(%rdi) ++ movl %r9d,20(%rdi) ++ movl %r10d,24(%rdi) ++ movl %r11d,28(%rdi) ++ movdqu %xmm0,32(%rdi) ++ movl %r12d,48(%rdi) ++ movl %r13d,52(%rdi) ++ movl %r14d,56(%rdi) ++ movl %r15d,60(%rdi) ++ leaq 64(%rdi),%rdi ++ ++ subq $64,%rbp ++ jnz L$oop_outer ++ ++ jmp L$done ++ ++.p2align 4 ++L$tail: ++ movl %eax,0(%rsp) ++ movl %ebx,4(%rsp) ++ xorq %rbx,%rbx ++ movl %ecx,8(%rsp) ++ movl %edx,12(%rsp) ++ movl %r8d,16(%rsp) ++ movl %r9d,20(%rsp) ++ movl %r10d,24(%rsp) ++ movl %r11d,28(%rsp) ++ movdqa %xmm1,32(%rsp) ++ movl %r12d,48(%rsp) ++ movl %r13d,52(%rsp) ++ movl %r14d,56(%rsp) ++ movl %r15d,60(%rsp) ++ ++L$oop_tail: ++ movzbl (%rsi,%rbx,1),%eax ++ movzbl (%rsp,%rbx,1),%edx ++ leaq 1(%rbx),%rbx ++ xorl %edx,%eax ++ movb %al,-1(%rdi,%rbx,1) ++ decq %rbp ++ jnz L$oop_tail ++ ++L$done: ++ leaq 64+24+48(%rsp),%rsi ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$no_data: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++ChaCha20_ssse3: ++L$ChaCha20_ssse3: ++ ++ movq %rsp,%r9 ++ ++ cmpq $128,%rdx ++ ja L$ChaCha20_4x ++ ++L$do_sse3_after_all: ++ subq $64+8,%rsp ++ movdqa L$sigma(%rip),%xmm0 ++ movdqu (%rcx),%xmm1 ++ movdqu 16(%rcx),%xmm2 ++ movdqu (%r8),%xmm3 ++ movdqa L$rot16(%rip),%xmm6 ++ movdqa L$rot24(%rip),%xmm7 ++ ++ movdqa %xmm0,0(%rsp) ++ movdqa %xmm1,16(%rsp) ++ movdqa %xmm2,32(%rsp) ++ movdqa %xmm3,48(%rsp) ++ movq $10,%r8 ++ jmp L$oop_ssse3 ++ ++.p2align 5 ++L$oop_outer_ssse3: ++ movdqa L$one(%rip),%xmm3 ++ movdqa 0(%rsp),%xmm0 ++ movdqa 16(%rsp),%xmm1 ++ movdqa 32(%rsp),%xmm2 ++ paddd 48(%rsp),%xmm3 ++ movq $10,%r8 ++ movdqa %xmm3,48(%rsp) ++ jmp L$oop_ssse3 ++ ++.p2align 5 ++L$oop_ssse3: ++ paddd %xmm1,%xmm0 ++ pxor %xmm0,%xmm3 ++.byte 102,15,56,0,222 ++ paddd %xmm3,%xmm2 ++ pxor %xmm2,%xmm1 ++ movdqa %xmm1,%xmm4 ++ psrld $20,%xmm1 ++ pslld $12,%xmm4 ++ por %xmm4,%xmm1 ++ paddd %xmm1,%xmm0 ++ pxor %xmm0,%xmm3 ++.byte 102,15,56,0,223 ++ paddd %xmm3,%xmm2 ++ pxor %xmm2,%xmm1 ++ movdqa %xmm1,%xmm4 ++ psrld $25,%xmm1 ++ pslld $7,%xmm4 ++ por %xmm4,%xmm1 ++ pshufd $78,%xmm2,%xmm2 ++ pshufd $57,%xmm1,%xmm1 ++ pshufd $147,%xmm3,%xmm3 ++ nop ++ paddd %xmm1,%xmm0 ++ pxor %xmm0,%xmm3 ++.byte 102,15,56,0,222 ++ paddd %xmm3,%xmm2 ++ pxor %xmm2,%xmm1 ++ movdqa %xmm1,%xmm4 ++ psrld $20,%xmm1 ++ pslld $12,%xmm4 ++ por %xmm4,%xmm1 ++ paddd %xmm1,%xmm0 ++ pxor %xmm0,%xmm3 ++.byte 102,15,56,0,223 ++ paddd %xmm3,%xmm2 ++ pxor %xmm2,%xmm1 ++ movdqa %xmm1,%xmm4 ++ psrld $25,%xmm1 ++ pslld $7,%xmm4 ++ por %xmm4,%xmm1 ++ pshufd $78,%xmm2,%xmm2 ++ pshufd $147,%xmm1,%xmm1 ++ pshufd $57,%xmm3,%xmm3 ++ decq %r8 ++ jnz L$oop_ssse3 ++ paddd 0(%rsp),%xmm0 ++ paddd 16(%rsp),%xmm1 ++ paddd 32(%rsp),%xmm2 ++ paddd 48(%rsp),%xmm3 ++ ++ cmpq $64,%rdx ++ jb L$tail_ssse3 ++ ++ movdqu 0(%rsi),%xmm4 ++ movdqu 16(%rsi),%xmm5 ++ pxor %xmm4,%xmm0 ++ movdqu 32(%rsi),%xmm4 ++ pxor %xmm5,%xmm1 ++ movdqu 48(%rsi),%xmm5 ++ leaq 64(%rsi),%rsi ++ pxor %xmm4,%xmm2 ++ pxor %xmm5,%xmm3 ++ ++ movdqu %xmm0,0(%rdi) ++ movdqu %xmm1,16(%rdi) ++ movdqu %xmm2,32(%rdi) ++ movdqu %xmm3,48(%rdi) ++ leaq 64(%rdi),%rdi ++ ++ subq $64,%rdx ++ jnz L$oop_outer_ssse3 ++ ++ jmp L$done_ssse3 ++ ++.p2align 4 ++L$tail_ssse3: ++ movdqa %xmm0,0(%rsp) ++ movdqa %xmm1,16(%rsp) ++ movdqa %xmm2,32(%rsp) ++ movdqa %xmm3,48(%rsp) ++ xorq %r8,%r8 ++ ++L$oop_tail_ssse3: ++ movzbl (%rsi,%r8,1),%eax ++ movzbl (%rsp,%r8,1),%ecx ++ leaq 1(%r8),%r8 ++ xorl %ecx,%eax ++ movb %al,-1(%rdi,%r8,1) ++ decq %rdx ++ jnz L$oop_tail_ssse3 ++ ++L$done_ssse3: ++ leaq (%r9),%rsp ++ ++L$ssse3_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++ChaCha20_4x: ++L$ChaCha20_4x: ++ ++ movq %rsp,%r9 ++ ++ movq %r10,%r11 ++ shrq $32,%r10 ++ testq $32,%r10 ++ jnz L$ChaCha20_8x ++ cmpq $192,%rdx ++ ja L$proceed4x ++ ++ andq $71303168,%r11 ++ cmpq $4194304,%r11 ++ je L$do_sse3_after_all ++ ++L$proceed4x: ++ subq $0x140+8,%rsp ++ movdqa L$sigma(%rip),%xmm11 ++ movdqu (%rcx),%xmm15 ++ movdqu 16(%rcx),%xmm7 ++ movdqu (%r8),%xmm3 ++ leaq 256(%rsp),%rcx ++ leaq L$rot16(%rip),%r10 ++ leaq L$rot24(%rip),%r11 ++ ++ pshufd $0x00,%xmm11,%xmm8 ++ pshufd $0x55,%xmm11,%xmm9 ++ movdqa %xmm8,64(%rsp) ++ pshufd $0xaa,%xmm11,%xmm10 ++ movdqa %xmm9,80(%rsp) ++ pshufd $0xff,%xmm11,%xmm11 ++ movdqa %xmm10,96(%rsp) ++ movdqa %xmm11,112(%rsp) ++ ++ pshufd $0x00,%xmm15,%xmm12 ++ pshufd $0x55,%xmm15,%xmm13 ++ movdqa %xmm12,128-256(%rcx) ++ pshufd $0xaa,%xmm15,%xmm14 ++ movdqa %xmm13,144-256(%rcx) ++ pshufd $0xff,%xmm15,%xmm15 ++ movdqa %xmm14,160-256(%rcx) ++ movdqa %xmm15,176-256(%rcx) ++ ++ pshufd $0x00,%xmm7,%xmm4 ++ pshufd $0x55,%xmm7,%xmm5 ++ movdqa %xmm4,192-256(%rcx) ++ pshufd $0xaa,%xmm7,%xmm6 ++ movdqa %xmm5,208-256(%rcx) ++ pshufd $0xff,%xmm7,%xmm7 ++ movdqa %xmm6,224-256(%rcx) ++ movdqa %xmm7,240-256(%rcx) ++ ++ pshufd $0x00,%xmm3,%xmm0 ++ pshufd $0x55,%xmm3,%xmm1 ++ paddd L$inc(%rip),%xmm0 ++ pshufd $0xaa,%xmm3,%xmm2 ++ movdqa %xmm1,272-256(%rcx) ++ pshufd $0xff,%xmm3,%xmm3 ++ movdqa %xmm2,288-256(%rcx) ++ movdqa %xmm3,304-256(%rcx) ++ ++ jmp L$oop_enter4x ++ ++.p2align 5 ++L$oop_outer4x: ++ movdqa 64(%rsp),%xmm8 ++ movdqa 80(%rsp),%xmm9 ++ movdqa 96(%rsp),%xmm10 ++ movdqa 112(%rsp),%xmm11 ++ movdqa 128-256(%rcx),%xmm12 ++ movdqa 144-256(%rcx),%xmm13 ++ movdqa 160-256(%rcx),%xmm14 ++ movdqa 176-256(%rcx),%xmm15 ++ movdqa 192-256(%rcx),%xmm4 ++ movdqa 208-256(%rcx),%xmm5 ++ movdqa 224-256(%rcx),%xmm6 ++ movdqa 240-256(%rcx),%xmm7 ++ movdqa 256-256(%rcx),%xmm0 ++ movdqa 272-256(%rcx),%xmm1 ++ movdqa 288-256(%rcx),%xmm2 ++ movdqa 304-256(%rcx),%xmm3 ++ paddd L$four(%rip),%xmm0 ++ ++L$oop_enter4x: ++ movdqa %xmm6,32(%rsp) ++ movdqa %xmm7,48(%rsp) ++ movdqa (%r10),%xmm7 ++ movl $10,%eax ++ movdqa %xmm0,256-256(%rcx) ++ jmp L$oop4x ++ ++.p2align 5 ++L$oop4x: ++ paddd %xmm12,%xmm8 ++ paddd %xmm13,%xmm9 ++ pxor %xmm8,%xmm0 ++ pxor %xmm9,%xmm1 ++.byte 102,15,56,0,199 ++.byte 102,15,56,0,207 ++ paddd %xmm0,%xmm4 ++ paddd %xmm1,%xmm5 ++ pxor %xmm4,%xmm12 ++ pxor %xmm5,%xmm13 ++ movdqa %xmm12,%xmm6 ++ pslld $12,%xmm12 ++ psrld $20,%xmm6 ++ movdqa %xmm13,%xmm7 ++ pslld $12,%xmm13 ++ por %xmm6,%xmm12 ++ psrld $20,%xmm7 ++ movdqa (%r11),%xmm6 ++ por %xmm7,%xmm13 ++ paddd %xmm12,%xmm8 ++ paddd %xmm13,%xmm9 ++ pxor %xmm8,%xmm0 ++ pxor %xmm9,%xmm1 ++.byte 102,15,56,0,198 ++.byte 102,15,56,0,206 ++ paddd %xmm0,%xmm4 ++ paddd %xmm1,%xmm5 ++ pxor %xmm4,%xmm12 ++ pxor %xmm5,%xmm13 ++ movdqa %xmm12,%xmm7 ++ pslld $7,%xmm12 ++ psrld $25,%xmm7 ++ movdqa %xmm13,%xmm6 ++ pslld $7,%xmm13 ++ por %xmm7,%xmm12 ++ psrld $25,%xmm6 ++ movdqa (%r10),%xmm7 ++ por %xmm6,%xmm13 ++ movdqa %xmm4,0(%rsp) ++ movdqa %xmm5,16(%rsp) ++ movdqa 32(%rsp),%xmm4 ++ movdqa 48(%rsp),%xmm5 ++ paddd %xmm14,%xmm10 ++ paddd %xmm15,%xmm11 ++ pxor %xmm10,%xmm2 ++ pxor %xmm11,%xmm3 ++.byte 102,15,56,0,215 ++.byte 102,15,56,0,223 ++ paddd %xmm2,%xmm4 ++ paddd %xmm3,%xmm5 ++ pxor %xmm4,%xmm14 ++ pxor %xmm5,%xmm15 ++ movdqa %xmm14,%xmm6 ++ pslld $12,%xmm14 ++ psrld $20,%xmm6 ++ movdqa %xmm15,%xmm7 ++ pslld $12,%xmm15 ++ por %xmm6,%xmm14 ++ psrld $20,%xmm7 ++ movdqa (%r11),%xmm6 ++ por %xmm7,%xmm15 ++ paddd %xmm14,%xmm10 ++ paddd %xmm15,%xmm11 ++ pxor %xmm10,%xmm2 ++ pxor %xmm11,%xmm3 ++.byte 102,15,56,0,214 ++.byte 102,15,56,0,222 ++ paddd %xmm2,%xmm4 ++ paddd %xmm3,%xmm5 ++ pxor %xmm4,%xmm14 ++ pxor %xmm5,%xmm15 ++ movdqa %xmm14,%xmm7 ++ pslld $7,%xmm14 ++ psrld $25,%xmm7 ++ movdqa %xmm15,%xmm6 ++ pslld $7,%xmm15 ++ por %xmm7,%xmm14 ++ psrld $25,%xmm6 ++ movdqa (%r10),%xmm7 ++ por %xmm6,%xmm15 ++ paddd %xmm13,%xmm8 ++ paddd %xmm14,%xmm9 ++ pxor %xmm8,%xmm3 ++ pxor %xmm9,%xmm0 ++.byte 102,15,56,0,223 ++.byte 102,15,56,0,199 ++ paddd %xmm3,%xmm4 ++ paddd %xmm0,%xmm5 ++ pxor %xmm4,%xmm13 ++ pxor %xmm5,%xmm14 ++ movdqa %xmm13,%xmm6 ++ pslld $12,%xmm13 ++ psrld $20,%xmm6 ++ movdqa %xmm14,%xmm7 ++ pslld $12,%xmm14 ++ por %xmm6,%xmm13 ++ psrld $20,%xmm7 ++ movdqa (%r11),%xmm6 ++ por %xmm7,%xmm14 ++ paddd %xmm13,%xmm8 ++ paddd %xmm14,%xmm9 ++ pxor %xmm8,%xmm3 ++ pxor %xmm9,%xmm0 ++.byte 102,15,56,0,222 ++.byte 102,15,56,0,198 ++ paddd %xmm3,%xmm4 ++ paddd %xmm0,%xmm5 ++ pxor %xmm4,%xmm13 ++ pxor %xmm5,%xmm14 ++ movdqa %xmm13,%xmm7 ++ pslld $7,%xmm13 ++ psrld $25,%xmm7 ++ movdqa %xmm14,%xmm6 ++ pslld $7,%xmm14 ++ por %xmm7,%xmm13 ++ psrld $25,%xmm6 ++ movdqa (%r10),%xmm7 ++ por %xmm6,%xmm14 ++ movdqa %xmm4,32(%rsp) ++ movdqa %xmm5,48(%rsp) ++ movdqa 0(%rsp),%xmm4 ++ movdqa 16(%rsp),%xmm5 ++ paddd %xmm15,%xmm10 ++ paddd %xmm12,%xmm11 ++ pxor %xmm10,%xmm1 ++ pxor %xmm11,%xmm2 ++.byte 102,15,56,0,207 ++.byte 102,15,56,0,215 ++ paddd %xmm1,%xmm4 ++ paddd %xmm2,%xmm5 ++ pxor %xmm4,%xmm15 ++ pxor %xmm5,%xmm12 ++ movdqa %xmm15,%xmm6 ++ pslld $12,%xmm15 ++ psrld $20,%xmm6 ++ movdqa %xmm12,%xmm7 ++ pslld $12,%xmm12 ++ por %xmm6,%xmm15 ++ psrld $20,%xmm7 ++ movdqa (%r11),%xmm6 ++ por %xmm7,%xmm12 ++ paddd %xmm15,%xmm10 ++ paddd %xmm12,%xmm11 ++ pxor %xmm10,%xmm1 ++ pxor %xmm11,%xmm2 ++.byte 102,15,56,0,206 ++.byte 102,15,56,0,214 ++ paddd %xmm1,%xmm4 ++ paddd %xmm2,%xmm5 ++ pxor %xmm4,%xmm15 ++ pxor %xmm5,%xmm12 ++ movdqa %xmm15,%xmm7 ++ pslld $7,%xmm15 ++ psrld $25,%xmm7 ++ movdqa %xmm12,%xmm6 ++ pslld $7,%xmm12 ++ por %xmm7,%xmm15 ++ psrld $25,%xmm6 ++ movdqa (%r10),%xmm7 ++ por %xmm6,%xmm12 ++ decl %eax ++ jnz L$oop4x ++ ++ paddd 64(%rsp),%xmm8 ++ paddd 80(%rsp),%xmm9 ++ paddd 96(%rsp),%xmm10 ++ paddd 112(%rsp),%xmm11 ++ ++ movdqa %xmm8,%xmm6 ++ punpckldq %xmm9,%xmm8 ++ movdqa %xmm10,%xmm7 ++ punpckldq %xmm11,%xmm10 ++ punpckhdq %xmm9,%xmm6 ++ punpckhdq %xmm11,%xmm7 ++ movdqa %xmm8,%xmm9 ++ punpcklqdq %xmm10,%xmm8 ++ movdqa %xmm6,%xmm11 ++ punpcklqdq %xmm7,%xmm6 ++ punpckhqdq %xmm10,%xmm9 ++ punpckhqdq %xmm7,%xmm11 ++ paddd 128-256(%rcx),%xmm12 ++ paddd 144-256(%rcx),%xmm13 ++ paddd 160-256(%rcx),%xmm14 ++ paddd 176-256(%rcx),%xmm15 ++ ++ movdqa %xmm8,0(%rsp) ++ movdqa %xmm9,16(%rsp) ++ movdqa 32(%rsp),%xmm8 ++ movdqa 48(%rsp),%xmm9 ++ ++ movdqa %xmm12,%xmm10 ++ punpckldq %xmm13,%xmm12 ++ movdqa %xmm14,%xmm7 ++ punpckldq %xmm15,%xmm14 ++ punpckhdq %xmm13,%xmm10 ++ punpckhdq %xmm15,%xmm7 ++ movdqa %xmm12,%xmm13 ++ punpcklqdq %xmm14,%xmm12 ++ movdqa %xmm10,%xmm15 ++ punpcklqdq %xmm7,%xmm10 ++ punpckhqdq %xmm14,%xmm13 ++ punpckhqdq %xmm7,%xmm15 ++ paddd 192-256(%rcx),%xmm4 ++ paddd 208-256(%rcx),%xmm5 ++ paddd 224-256(%rcx),%xmm8 ++ paddd 240-256(%rcx),%xmm9 ++ ++ movdqa %xmm6,32(%rsp) ++ movdqa %xmm11,48(%rsp) ++ ++ movdqa %xmm4,%xmm14 ++ punpckldq %xmm5,%xmm4 ++ movdqa %xmm8,%xmm7 ++ punpckldq %xmm9,%xmm8 ++ punpckhdq %xmm5,%xmm14 ++ punpckhdq %xmm9,%xmm7 ++ movdqa %xmm4,%xmm5 ++ punpcklqdq %xmm8,%xmm4 ++ movdqa %xmm14,%xmm9 ++ punpcklqdq %xmm7,%xmm14 ++ punpckhqdq %xmm8,%xmm5 ++ punpckhqdq %xmm7,%xmm9 ++ paddd 256-256(%rcx),%xmm0 ++ paddd 272-256(%rcx),%xmm1 ++ paddd 288-256(%rcx),%xmm2 ++ paddd 304-256(%rcx),%xmm3 ++ ++ movdqa %xmm0,%xmm8 ++ punpckldq %xmm1,%xmm0 ++ movdqa %xmm2,%xmm7 ++ punpckldq %xmm3,%xmm2 ++ punpckhdq %xmm1,%xmm8 ++ punpckhdq %xmm3,%xmm7 ++ movdqa %xmm0,%xmm1 ++ punpcklqdq %xmm2,%xmm0 ++ movdqa %xmm8,%xmm3 ++ punpcklqdq %xmm7,%xmm8 ++ punpckhqdq %xmm2,%xmm1 ++ punpckhqdq %xmm7,%xmm3 ++ cmpq $256,%rdx ++ jb L$tail4x ++ ++ movdqu 0(%rsi),%xmm6 ++ movdqu 16(%rsi),%xmm11 ++ movdqu 32(%rsi),%xmm2 ++ movdqu 48(%rsi),%xmm7 ++ pxor 0(%rsp),%xmm6 ++ pxor %xmm12,%xmm11 ++ pxor %xmm4,%xmm2 ++ pxor %xmm0,%xmm7 ++ ++ movdqu %xmm6,0(%rdi) ++ movdqu 64(%rsi),%xmm6 ++ movdqu %xmm11,16(%rdi) ++ movdqu 80(%rsi),%xmm11 ++ movdqu %xmm2,32(%rdi) ++ movdqu 96(%rsi),%xmm2 ++ movdqu %xmm7,48(%rdi) ++ movdqu 112(%rsi),%xmm7 ++ leaq 128(%rsi),%rsi ++ pxor 16(%rsp),%xmm6 ++ pxor %xmm13,%xmm11 ++ pxor %xmm5,%xmm2 ++ pxor %xmm1,%xmm7 ++ ++ movdqu %xmm6,64(%rdi) ++ movdqu 0(%rsi),%xmm6 ++ movdqu %xmm11,80(%rdi) ++ movdqu 16(%rsi),%xmm11 ++ movdqu %xmm2,96(%rdi) ++ movdqu 32(%rsi),%xmm2 ++ movdqu %xmm7,112(%rdi) ++ leaq 128(%rdi),%rdi ++ movdqu 48(%rsi),%xmm7 ++ pxor 32(%rsp),%xmm6 ++ pxor %xmm10,%xmm11 ++ pxor %xmm14,%xmm2 ++ pxor %xmm8,%xmm7 ++ ++ movdqu %xmm6,0(%rdi) ++ movdqu 64(%rsi),%xmm6 ++ movdqu %xmm11,16(%rdi) ++ movdqu 80(%rsi),%xmm11 ++ movdqu %xmm2,32(%rdi) ++ movdqu 96(%rsi),%xmm2 ++ movdqu %xmm7,48(%rdi) ++ movdqu 112(%rsi),%xmm7 ++ leaq 128(%rsi),%rsi ++ pxor 48(%rsp),%xmm6 ++ pxor %xmm15,%xmm11 ++ pxor %xmm9,%xmm2 ++ pxor %xmm3,%xmm7 ++ movdqu %xmm6,64(%rdi) ++ movdqu %xmm11,80(%rdi) ++ movdqu %xmm2,96(%rdi) ++ movdqu %xmm7,112(%rdi) ++ leaq 128(%rdi),%rdi ++ ++ subq $256,%rdx ++ jnz L$oop_outer4x ++ ++ jmp L$done4x ++ ++L$tail4x: ++ cmpq $192,%rdx ++ jae L$192_or_more4x ++ cmpq $128,%rdx ++ jae L$128_or_more4x ++ cmpq $64,%rdx ++ jae L$64_or_more4x ++ ++ ++ xorq %r10,%r10 ++ ++ movdqa %xmm12,16(%rsp) ++ movdqa %xmm4,32(%rsp) ++ movdqa %xmm0,48(%rsp) ++ jmp L$oop_tail4x ++ ++.p2align 5 ++L$64_or_more4x: ++ movdqu 0(%rsi),%xmm6 ++ movdqu 16(%rsi),%xmm11 ++ movdqu 32(%rsi),%xmm2 ++ movdqu 48(%rsi),%xmm7 ++ pxor 0(%rsp),%xmm6 ++ pxor %xmm12,%xmm11 ++ pxor %xmm4,%xmm2 ++ pxor %xmm0,%xmm7 ++ movdqu %xmm6,0(%rdi) ++ movdqu %xmm11,16(%rdi) ++ movdqu %xmm2,32(%rdi) ++ movdqu %xmm7,48(%rdi) ++ je L$done4x ++ ++ movdqa 16(%rsp),%xmm6 ++ leaq 64(%rsi),%rsi ++ xorq %r10,%r10 ++ movdqa %xmm6,0(%rsp) ++ movdqa %xmm13,16(%rsp) ++ leaq 64(%rdi),%rdi ++ movdqa %xmm5,32(%rsp) ++ subq $64,%rdx ++ movdqa %xmm1,48(%rsp) ++ jmp L$oop_tail4x ++ ++.p2align 5 ++L$128_or_more4x: ++ movdqu 0(%rsi),%xmm6 ++ movdqu 16(%rsi),%xmm11 ++ movdqu 32(%rsi),%xmm2 ++ movdqu 48(%rsi),%xmm7 ++ pxor 0(%rsp),%xmm6 ++ pxor %xmm12,%xmm11 ++ pxor %xmm4,%xmm2 ++ pxor %xmm0,%xmm7 ++ ++ movdqu %xmm6,0(%rdi) ++ movdqu 64(%rsi),%xmm6 ++ movdqu %xmm11,16(%rdi) ++ movdqu 80(%rsi),%xmm11 ++ movdqu %xmm2,32(%rdi) ++ movdqu 96(%rsi),%xmm2 ++ movdqu %xmm7,48(%rdi) ++ movdqu 112(%rsi),%xmm7 ++ pxor 16(%rsp),%xmm6 ++ pxor %xmm13,%xmm11 ++ pxor %xmm5,%xmm2 ++ pxor %xmm1,%xmm7 ++ movdqu %xmm6,64(%rdi) ++ movdqu %xmm11,80(%rdi) ++ movdqu %xmm2,96(%rdi) ++ movdqu %xmm7,112(%rdi) ++ je L$done4x ++ ++ movdqa 32(%rsp),%xmm6 ++ leaq 128(%rsi),%rsi ++ xorq %r10,%r10 ++ movdqa %xmm6,0(%rsp) ++ movdqa %xmm10,16(%rsp) ++ leaq 128(%rdi),%rdi ++ movdqa %xmm14,32(%rsp) ++ subq $128,%rdx ++ movdqa %xmm8,48(%rsp) ++ jmp L$oop_tail4x ++ ++.p2align 5 ++L$192_or_more4x: ++ movdqu 0(%rsi),%xmm6 ++ movdqu 16(%rsi),%xmm11 ++ movdqu 32(%rsi),%xmm2 ++ movdqu 48(%rsi),%xmm7 ++ pxor 0(%rsp),%xmm6 ++ pxor %xmm12,%xmm11 ++ pxor %xmm4,%xmm2 ++ pxor %xmm0,%xmm7 ++ ++ movdqu %xmm6,0(%rdi) ++ movdqu 64(%rsi),%xmm6 ++ movdqu %xmm11,16(%rdi) ++ movdqu 80(%rsi),%xmm11 ++ movdqu %xmm2,32(%rdi) ++ movdqu 96(%rsi),%xmm2 ++ movdqu %xmm7,48(%rdi) ++ movdqu 112(%rsi),%xmm7 ++ leaq 128(%rsi),%rsi ++ pxor 16(%rsp),%xmm6 ++ pxor %xmm13,%xmm11 ++ pxor %xmm5,%xmm2 ++ pxor %xmm1,%xmm7 ++ ++ movdqu %xmm6,64(%rdi) ++ movdqu 0(%rsi),%xmm6 ++ movdqu %xmm11,80(%rdi) ++ movdqu 16(%rsi),%xmm11 ++ movdqu %xmm2,96(%rdi) ++ movdqu 32(%rsi),%xmm2 ++ movdqu %xmm7,112(%rdi) ++ leaq 128(%rdi),%rdi ++ movdqu 48(%rsi),%xmm7 ++ pxor 32(%rsp),%xmm6 ++ pxor %xmm10,%xmm11 ++ pxor %xmm14,%xmm2 ++ pxor %xmm8,%xmm7 ++ movdqu %xmm6,0(%rdi) ++ movdqu %xmm11,16(%rdi) ++ movdqu %xmm2,32(%rdi) ++ movdqu %xmm7,48(%rdi) ++ je L$done4x ++ ++ movdqa 48(%rsp),%xmm6 ++ leaq 64(%rsi),%rsi ++ xorq %r10,%r10 ++ movdqa %xmm6,0(%rsp) ++ movdqa %xmm15,16(%rsp) ++ leaq 64(%rdi),%rdi ++ movdqa %xmm9,32(%rsp) ++ subq $192,%rdx ++ movdqa %xmm3,48(%rsp) ++ ++L$oop_tail4x: ++ movzbl (%rsi,%r10,1),%eax ++ movzbl (%rsp,%r10,1),%ecx ++ leaq 1(%r10),%r10 ++ xorl %ecx,%eax ++ movb %al,-1(%rdi,%r10,1) ++ decq %rdx ++ jnz L$oop_tail4x ++ ++L$done4x: ++ leaq (%r9),%rsp ++ ++L$4x_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++ChaCha20_8x: ++L$ChaCha20_8x: ++ ++ movq %rsp,%r9 ++ ++ subq $0x280+8,%rsp ++ andq $-32,%rsp ++ vzeroupper ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ vbroadcasti128 L$sigma(%rip),%ymm11 ++ vbroadcasti128 (%rcx),%ymm3 ++ vbroadcasti128 16(%rcx),%ymm15 ++ vbroadcasti128 (%r8),%ymm7 ++ leaq 256(%rsp),%rcx ++ leaq 512(%rsp),%rax ++ leaq L$rot16(%rip),%r10 ++ leaq L$rot24(%rip),%r11 ++ ++ vpshufd $0x00,%ymm11,%ymm8 ++ vpshufd $0x55,%ymm11,%ymm9 ++ vmovdqa %ymm8,128-256(%rcx) ++ vpshufd $0xaa,%ymm11,%ymm10 ++ vmovdqa %ymm9,160-256(%rcx) ++ vpshufd $0xff,%ymm11,%ymm11 ++ vmovdqa %ymm10,192-256(%rcx) ++ vmovdqa %ymm11,224-256(%rcx) ++ ++ vpshufd $0x00,%ymm3,%ymm0 ++ vpshufd $0x55,%ymm3,%ymm1 ++ vmovdqa %ymm0,256-256(%rcx) ++ vpshufd $0xaa,%ymm3,%ymm2 ++ vmovdqa %ymm1,288-256(%rcx) ++ vpshufd $0xff,%ymm3,%ymm3 ++ vmovdqa %ymm2,320-256(%rcx) ++ vmovdqa %ymm3,352-256(%rcx) ++ ++ vpshufd $0x00,%ymm15,%ymm12 ++ vpshufd $0x55,%ymm15,%ymm13 ++ vmovdqa %ymm12,384-512(%rax) ++ vpshufd $0xaa,%ymm15,%ymm14 ++ vmovdqa %ymm13,416-512(%rax) ++ vpshufd $0xff,%ymm15,%ymm15 ++ vmovdqa %ymm14,448-512(%rax) ++ vmovdqa %ymm15,480-512(%rax) ++ ++ vpshufd $0x00,%ymm7,%ymm4 ++ vpshufd $0x55,%ymm7,%ymm5 ++ vpaddd L$incy(%rip),%ymm4,%ymm4 ++ vpshufd $0xaa,%ymm7,%ymm6 ++ vmovdqa %ymm5,544-512(%rax) ++ vpshufd $0xff,%ymm7,%ymm7 ++ vmovdqa %ymm6,576-512(%rax) ++ vmovdqa %ymm7,608-512(%rax) ++ ++ jmp L$oop_enter8x ++ ++.p2align 5 ++L$oop_outer8x: ++ vmovdqa 128-256(%rcx),%ymm8 ++ vmovdqa 160-256(%rcx),%ymm9 ++ vmovdqa 192-256(%rcx),%ymm10 ++ vmovdqa 224-256(%rcx),%ymm11 ++ vmovdqa 256-256(%rcx),%ymm0 ++ vmovdqa 288-256(%rcx),%ymm1 ++ vmovdqa 320-256(%rcx),%ymm2 ++ vmovdqa 352-256(%rcx),%ymm3 ++ vmovdqa 384-512(%rax),%ymm12 ++ vmovdqa 416-512(%rax),%ymm13 ++ vmovdqa 448-512(%rax),%ymm14 ++ vmovdqa 480-512(%rax),%ymm15 ++ vmovdqa 512-512(%rax),%ymm4 ++ vmovdqa 544-512(%rax),%ymm5 ++ vmovdqa 576-512(%rax),%ymm6 ++ vmovdqa 608-512(%rax),%ymm7 ++ vpaddd L$eight(%rip),%ymm4,%ymm4 ++ ++L$oop_enter8x: ++ vmovdqa %ymm14,64(%rsp) ++ vmovdqa %ymm15,96(%rsp) ++ vbroadcasti128 (%r10),%ymm15 ++ vmovdqa %ymm4,512-512(%rax) ++ movl $10,%eax ++ jmp L$oop8x ++ ++.p2align 5 ++L$oop8x: ++ vpaddd %ymm0,%ymm8,%ymm8 ++ vpxor %ymm4,%ymm8,%ymm4 ++ vpshufb %ymm15,%ymm4,%ymm4 ++ vpaddd %ymm1,%ymm9,%ymm9 ++ vpxor %ymm5,%ymm9,%ymm5 ++ vpshufb %ymm15,%ymm5,%ymm5 ++ vpaddd %ymm4,%ymm12,%ymm12 ++ vpxor %ymm0,%ymm12,%ymm0 ++ vpslld $12,%ymm0,%ymm14 ++ vpsrld $20,%ymm0,%ymm0 ++ vpor %ymm0,%ymm14,%ymm0 ++ vbroadcasti128 (%r11),%ymm14 ++ vpaddd %ymm5,%ymm13,%ymm13 ++ vpxor %ymm1,%ymm13,%ymm1 ++ vpslld $12,%ymm1,%ymm15 ++ vpsrld $20,%ymm1,%ymm1 ++ vpor %ymm1,%ymm15,%ymm1 ++ vpaddd %ymm0,%ymm8,%ymm8 ++ vpxor %ymm4,%ymm8,%ymm4 ++ vpshufb %ymm14,%ymm4,%ymm4 ++ vpaddd %ymm1,%ymm9,%ymm9 ++ vpxor %ymm5,%ymm9,%ymm5 ++ vpshufb %ymm14,%ymm5,%ymm5 ++ vpaddd %ymm4,%ymm12,%ymm12 ++ vpxor %ymm0,%ymm12,%ymm0 ++ vpslld $7,%ymm0,%ymm15 ++ vpsrld $25,%ymm0,%ymm0 ++ vpor %ymm0,%ymm15,%ymm0 ++ vbroadcasti128 (%r10),%ymm15 ++ vpaddd %ymm5,%ymm13,%ymm13 ++ vpxor %ymm1,%ymm13,%ymm1 ++ vpslld $7,%ymm1,%ymm14 ++ vpsrld $25,%ymm1,%ymm1 ++ vpor %ymm1,%ymm14,%ymm1 ++ vmovdqa %ymm12,0(%rsp) ++ vmovdqa %ymm13,32(%rsp) ++ vmovdqa 64(%rsp),%ymm12 ++ vmovdqa 96(%rsp),%ymm13 ++ vpaddd %ymm2,%ymm10,%ymm10 ++ vpxor %ymm6,%ymm10,%ymm6 ++ vpshufb %ymm15,%ymm6,%ymm6 ++ vpaddd %ymm3,%ymm11,%ymm11 ++ vpxor %ymm7,%ymm11,%ymm7 ++ vpshufb %ymm15,%ymm7,%ymm7 ++ vpaddd %ymm6,%ymm12,%ymm12 ++ vpxor %ymm2,%ymm12,%ymm2 ++ vpslld $12,%ymm2,%ymm14 ++ vpsrld $20,%ymm2,%ymm2 ++ vpor %ymm2,%ymm14,%ymm2 ++ vbroadcasti128 (%r11),%ymm14 ++ vpaddd %ymm7,%ymm13,%ymm13 ++ vpxor %ymm3,%ymm13,%ymm3 ++ vpslld $12,%ymm3,%ymm15 ++ vpsrld $20,%ymm3,%ymm3 ++ vpor %ymm3,%ymm15,%ymm3 ++ vpaddd %ymm2,%ymm10,%ymm10 ++ vpxor %ymm6,%ymm10,%ymm6 ++ vpshufb %ymm14,%ymm6,%ymm6 ++ vpaddd %ymm3,%ymm11,%ymm11 ++ vpxor %ymm7,%ymm11,%ymm7 ++ vpshufb %ymm14,%ymm7,%ymm7 ++ vpaddd %ymm6,%ymm12,%ymm12 ++ vpxor %ymm2,%ymm12,%ymm2 ++ vpslld $7,%ymm2,%ymm15 ++ vpsrld $25,%ymm2,%ymm2 ++ vpor %ymm2,%ymm15,%ymm2 ++ vbroadcasti128 (%r10),%ymm15 ++ vpaddd %ymm7,%ymm13,%ymm13 ++ vpxor %ymm3,%ymm13,%ymm3 ++ vpslld $7,%ymm3,%ymm14 ++ vpsrld $25,%ymm3,%ymm3 ++ vpor %ymm3,%ymm14,%ymm3 ++ vpaddd %ymm1,%ymm8,%ymm8 ++ vpxor %ymm7,%ymm8,%ymm7 ++ vpshufb %ymm15,%ymm7,%ymm7 ++ vpaddd %ymm2,%ymm9,%ymm9 ++ vpxor %ymm4,%ymm9,%ymm4 ++ vpshufb %ymm15,%ymm4,%ymm4 ++ vpaddd %ymm7,%ymm12,%ymm12 ++ vpxor %ymm1,%ymm12,%ymm1 ++ vpslld $12,%ymm1,%ymm14 ++ vpsrld $20,%ymm1,%ymm1 ++ vpor %ymm1,%ymm14,%ymm1 ++ vbroadcasti128 (%r11),%ymm14 ++ vpaddd %ymm4,%ymm13,%ymm13 ++ vpxor %ymm2,%ymm13,%ymm2 ++ vpslld $12,%ymm2,%ymm15 ++ vpsrld $20,%ymm2,%ymm2 ++ vpor %ymm2,%ymm15,%ymm2 ++ vpaddd %ymm1,%ymm8,%ymm8 ++ vpxor %ymm7,%ymm8,%ymm7 ++ vpshufb %ymm14,%ymm7,%ymm7 ++ vpaddd %ymm2,%ymm9,%ymm9 ++ vpxor %ymm4,%ymm9,%ymm4 ++ vpshufb %ymm14,%ymm4,%ymm4 ++ vpaddd %ymm7,%ymm12,%ymm12 ++ vpxor %ymm1,%ymm12,%ymm1 ++ vpslld $7,%ymm1,%ymm15 ++ vpsrld $25,%ymm1,%ymm1 ++ vpor %ymm1,%ymm15,%ymm1 ++ vbroadcasti128 (%r10),%ymm15 ++ vpaddd %ymm4,%ymm13,%ymm13 ++ vpxor %ymm2,%ymm13,%ymm2 ++ vpslld $7,%ymm2,%ymm14 ++ vpsrld $25,%ymm2,%ymm2 ++ vpor %ymm2,%ymm14,%ymm2 ++ vmovdqa %ymm12,64(%rsp) ++ vmovdqa %ymm13,96(%rsp) ++ vmovdqa 0(%rsp),%ymm12 ++ vmovdqa 32(%rsp),%ymm13 ++ vpaddd %ymm3,%ymm10,%ymm10 ++ vpxor %ymm5,%ymm10,%ymm5 ++ vpshufb %ymm15,%ymm5,%ymm5 ++ vpaddd %ymm0,%ymm11,%ymm11 ++ vpxor %ymm6,%ymm11,%ymm6 ++ vpshufb %ymm15,%ymm6,%ymm6 ++ vpaddd %ymm5,%ymm12,%ymm12 ++ vpxor %ymm3,%ymm12,%ymm3 ++ vpslld $12,%ymm3,%ymm14 ++ vpsrld $20,%ymm3,%ymm3 ++ vpor %ymm3,%ymm14,%ymm3 ++ vbroadcasti128 (%r11),%ymm14 ++ vpaddd %ymm6,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm13,%ymm0 ++ vpslld $12,%ymm0,%ymm15 ++ vpsrld $20,%ymm0,%ymm0 ++ vpor %ymm0,%ymm15,%ymm0 ++ vpaddd %ymm3,%ymm10,%ymm10 ++ vpxor %ymm5,%ymm10,%ymm5 ++ vpshufb %ymm14,%ymm5,%ymm5 ++ vpaddd %ymm0,%ymm11,%ymm11 ++ vpxor %ymm6,%ymm11,%ymm6 ++ vpshufb %ymm14,%ymm6,%ymm6 ++ vpaddd %ymm5,%ymm12,%ymm12 ++ vpxor %ymm3,%ymm12,%ymm3 ++ vpslld $7,%ymm3,%ymm15 ++ vpsrld $25,%ymm3,%ymm3 ++ vpor %ymm3,%ymm15,%ymm3 ++ vbroadcasti128 (%r10),%ymm15 ++ vpaddd %ymm6,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm13,%ymm0 ++ vpslld $7,%ymm0,%ymm14 ++ vpsrld $25,%ymm0,%ymm0 ++ vpor %ymm0,%ymm14,%ymm0 ++ decl %eax ++ jnz L$oop8x ++ ++ leaq 512(%rsp),%rax ++ vpaddd 128-256(%rcx),%ymm8,%ymm8 ++ vpaddd 160-256(%rcx),%ymm9,%ymm9 ++ vpaddd 192-256(%rcx),%ymm10,%ymm10 ++ vpaddd 224-256(%rcx),%ymm11,%ymm11 ++ ++ vpunpckldq %ymm9,%ymm8,%ymm14 ++ vpunpckldq %ymm11,%ymm10,%ymm15 ++ vpunpckhdq %ymm9,%ymm8,%ymm8 ++ vpunpckhdq %ymm11,%ymm10,%ymm10 ++ vpunpcklqdq %ymm15,%ymm14,%ymm9 ++ vpunpckhqdq %ymm15,%ymm14,%ymm14 ++ vpunpcklqdq %ymm10,%ymm8,%ymm11 ++ vpunpckhqdq %ymm10,%ymm8,%ymm8 ++ vpaddd 256-256(%rcx),%ymm0,%ymm0 ++ vpaddd 288-256(%rcx),%ymm1,%ymm1 ++ vpaddd 320-256(%rcx),%ymm2,%ymm2 ++ vpaddd 352-256(%rcx),%ymm3,%ymm3 ++ ++ vpunpckldq %ymm1,%ymm0,%ymm10 ++ vpunpckldq %ymm3,%ymm2,%ymm15 ++ vpunpckhdq %ymm1,%ymm0,%ymm0 ++ vpunpckhdq %ymm3,%ymm2,%ymm2 ++ vpunpcklqdq %ymm15,%ymm10,%ymm1 ++ vpunpckhqdq %ymm15,%ymm10,%ymm10 ++ vpunpcklqdq %ymm2,%ymm0,%ymm3 ++ vpunpckhqdq %ymm2,%ymm0,%ymm0 ++ vperm2i128 $0x20,%ymm1,%ymm9,%ymm15 ++ vperm2i128 $0x31,%ymm1,%ymm9,%ymm1 ++ vperm2i128 $0x20,%ymm10,%ymm14,%ymm9 ++ vperm2i128 $0x31,%ymm10,%ymm14,%ymm10 ++ vperm2i128 $0x20,%ymm3,%ymm11,%ymm14 ++ vperm2i128 $0x31,%ymm3,%ymm11,%ymm3 ++ vperm2i128 $0x20,%ymm0,%ymm8,%ymm11 ++ vperm2i128 $0x31,%ymm0,%ymm8,%ymm0 ++ vmovdqa %ymm15,0(%rsp) ++ vmovdqa %ymm9,32(%rsp) ++ vmovdqa 64(%rsp),%ymm15 ++ vmovdqa 96(%rsp),%ymm9 ++ ++ vpaddd 384-512(%rax),%ymm12,%ymm12 ++ vpaddd 416-512(%rax),%ymm13,%ymm13 ++ vpaddd 448-512(%rax),%ymm15,%ymm15 ++ vpaddd 480-512(%rax),%ymm9,%ymm9 ++ ++ vpunpckldq %ymm13,%ymm12,%ymm2 ++ vpunpckldq %ymm9,%ymm15,%ymm8 ++ vpunpckhdq %ymm13,%ymm12,%ymm12 ++ vpunpckhdq %ymm9,%ymm15,%ymm15 ++ vpunpcklqdq %ymm8,%ymm2,%ymm13 ++ vpunpckhqdq %ymm8,%ymm2,%ymm2 ++ vpunpcklqdq %ymm15,%ymm12,%ymm9 ++ vpunpckhqdq %ymm15,%ymm12,%ymm12 ++ vpaddd 512-512(%rax),%ymm4,%ymm4 ++ vpaddd 544-512(%rax),%ymm5,%ymm5 ++ vpaddd 576-512(%rax),%ymm6,%ymm6 ++ vpaddd 608-512(%rax),%ymm7,%ymm7 ++ ++ vpunpckldq %ymm5,%ymm4,%ymm15 ++ vpunpckldq %ymm7,%ymm6,%ymm8 ++ vpunpckhdq %ymm5,%ymm4,%ymm4 ++ vpunpckhdq %ymm7,%ymm6,%ymm6 ++ vpunpcklqdq %ymm8,%ymm15,%ymm5 ++ vpunpckhqdq %ymm8,%ymm15,%ymm15 ++ vpunpcklqdq %ymm6,%ymm4,%ymm7 ++ vpunpckhqdq %ymm6,%ymm4,%ymm4 ++ vperm2i128 $0x20,%ymm5,%ymm13,%ymm8 ++ vperm2i128 $0x31,%ymm5,%ymm13,%ymm5 ++ vperm2i128 $0x20,%ymm15,%ymm2,%ymm13 ++ vperm2i128 $0x31,%ymm15,%ymm2,%ymm15 ++ vperm2i128 $0x20,%ymm7,%ymm9,%ymm2 ++ vperm2i128 $0x31,%ymm7,%ymm9,%ymm7 ++ vperm2i128 $0x20,%ymm4,%ymm12,%ymm9 ++ vperm2i128 $0x31,%ymm4,%ymm12,%ymm4 ++ vmovdqa 0(%rsp),%ymm6 ++ vmovdqa 32(%rsp),%ymm12 ++ ++ cmpq $512,%rdx ++ jb L$tail8x ++ ++ vpxor 0(%rsi),%ymm6,%ymm6 ++ vpxor 32(%rsi),%ymm8,%ymm8 ++ vpxor 64(%rsi),%ymm1,%ymm1 ++ vpxor 96(%rsi),%ymm5,%ymm5 ++ leaq 128(%rsi),%rsi ++ vmovdqu %ymm6,0(%rdi) ++ vmovdqu %ymm8,32(%rdi) ++ vmovdqu %ymm1,64(%rdi) ++ vmovdqu %ymm5,96(%rdi) ++ leaq 128(%rdi),%rdi ++ ++ vpxor 0(%rsi),%ymm12,%ymm12 ++ vpxor 32(%rsi),%ymm13,%ymm13 ++ vpxor 64(%rsi),%ymm10,%ymm10 ++ vpxor 96(%rsi),%ymm15,%ymm15 ++ leaq 128(%rsi),%rsi ++ vmovdqu %ymm12,0(%rdi) ++ vmovdqu %ymm13,32(%rdi) ++ vmovdqu %ymm10,64(%rdi) ++ vmovdqu %ymm15,96(%rdi) ++ leaq 128(%rdi),%rdi ++ ++ vpxor 0(%rsi),%ymm14,%ymm14 ++ vpxor 32(%rsi),%ymm2,%ymm2 ++ vpxor 64(%rsi),%ymm3,%ymm3 ++ vpxor 96(%rsi),%ymm7,%ymm7 ++ leaq 128(%rsi),%rsi ++ vmovdqu %ymm14,0(%rdi) ++ vmovdqu %ymm2,32(%rdi) ++ vmovdqu %ymm3,64(%rdi) ++ vmovdqu %ymm7,96(%rdi) ++ leaq 128(%rdi),%rdi ++ ++ vpxor 0(%rsi),%ymm11,%ymm11 ++ vpxor 32(%rsi),%ymm9,%ymm9 ++ vpxor 64(%rsi),%ymm0,%ymm0 ++ vpxor 96(%rsi),%ymm4,%ymm4 ++ leaq 128(%rsi),%rsi ++ vmovdqu %ymm11,0(%rdi) ++ vmovdqu %ymm9,32(%rdi) ++ vmovdqu %ymm0,64(%rdi) ++ vmovdqu %ymm4,96(%rdi) ++ leaq 128(%rdi),%rdi ++ ++ subq $512,%rdx ++ jnz L$oop_outer8x ++ ++ jmp L$done8x ++ ++L$tail8x: ++ cmpq $448,%rdx ++ jae L$448_or_more8x ++ cmpq $384,%rdx ++ jae L$384_or_more8x ++ cmpq $320,%rdx ++ jae L$320_or_more8x ++ cmpq $256,%rdx ++ jae L$256_or_more8x ++ cmpq $192,%rdx ++ jae L$192_or_more8x ++ cmpq $128,%rdx ++ jae L$128_or_more8x ++ cmpq $64,%rdx ++ jae L$64_or_more8x ++ ++ xorq %r10,%r10 ++ vmovdqa %ymm6,0(%rsp) ++ vmovdqa %ymm8,32(%rsp) ++ jmp L$oop_tail8x ++ ++.p2align 5 ++L$64_or_more8x: ++ vpxor 0(%rsi),%ymm6,%ymm6 ++ vpxor 32(%rsi),%ymm8,%ymm8 ++ vmovdqu %ymm6,0(%rdi) ++ vmovdqu %ymm8,32(%rdi) ++ je L$done8x ++ ++ leaq 64(%rsi),%rsi ++ xorq %r10,%r10 ++ vmovdqa %ymm1,0(%rsp) ++ leaq 64(%rdi),%rdi ++ subq $64,%rdx ++ vmovdqa %ymm5,32(%rsp) ++ jmp L$oop_tail8x ++ ++.p2align 5 ++L$128_or_more8x: ++ vpxor 0(%rsi),%ymm6,%ymm6 ++ vpxor 32(%rsi),%ymm8,%ymm8 ++ vpxor 64(%rsi),%ymm1,%ymm1 ++ vpxor 96(%rsi),%ymm5,%ymm5 ++ vmovdqu %ymm6,0(%rdi) ++ vmovdqu %ymm8,32(%rdi) ++ vmovdqu %ymm1,64(%rdi) ++ vmovdqu %ymm5,96(%rdi) ++ je L$done8x ++ ++ leaq 128(%rsi),%rsi ++ xorq %r10,%r10 ++ vmovdqa %ymm12,0(%rsp) ++ leaq 128(%rdi),%rdi ++ subq $128,%rdx ++ vmovdqa %ymm13,32(%rsp) ++ jmp L$oop_tail8x ++ ++.p2align 5 ++L$192_or_more8x: ++ vpxor 0(%rsi),%ymm6,%ymm6 ++ vpxor 32(%rsi),%ymm8,%ymm8 ++ vpxor 64(%rsi),%ymm1,%ymm1 ++ vpxor 96(%rsi),%ymm5,%ymm5 ++ vpxor 128(%rsi),%ymm12,%ymm12 ++ vpxor 160(%rsi),%ymm13,%ymm13 ++ vmovdqu %ymm6,0(%rdi) ++ vmovdqu %ymm8,32(%rdi) ++ vmovdqu %ymm1,64(%rdi) ++ vmovdqu %ymm5,96(%rdi) ++ vmovdqu %ymm12,128(%rdi) ++ vmovdqu %ymm13,160(%rdi) ++ je L$done8x ++ ++ leaq 192(%rsi),%rsi ++ xorq %r10,%r10 ++ vmovdqa %ymm10,0(%rsp) ++ leaq 192(%rdi),%rdi ++ subq $192,%rdx ++ vmovdqa %ymm15,32(%rsp) ++ jmp L$oop_tail8x ++ ++.p2align 5 ++L$256_or_more8x: ++ vpxor 0(%rsi),%ymm6,%ymm6 ++ vpxor 32(%rsi),%ymm8,%ymm8 ++ vpxor 64(%rsi),%ymm1,%ymm1 ++ vpxor 96(%rsi),%ymm5,%ymm5 ++ vpxor 128(%rsi),%ymm12,%ymm12 ++ vpxor 160(%rsi),%ymm13,%ymm13 ++ vpxor 192(%rsi),%ymm10,%ymm10 ++ vpxor 224(%rsi),%ymm15,%ymm15 ++ vmovdqu %ymm6,0(%rdi) ++ vmovdqu %ymm8,32(%rdi) ++ vmovdqu %ymm1,64(%rdi) ++ vmovdqu %ymm5,96(%rdi) ++ vmovdqu %ymm12,128(%rdi) ++ vmovdqu %ymm13,160(%rdi) ++ vmovdqu %ymm10,192(%rdi) ++ vmovdqu %ymm15,224(%rdi) ++ je L$done8x ++ ++ leaq 256(%rsi),%rsi ++ xorq %r10,%r10 ++ vmovdqa %ymm14,0(%rsp) ++ leaq 256(%rdi),%rdi ++ subq $256,%rdx ++ vmovdqa %ymm2,32(%rsp) ++ jmp L$oop_tail8x ++ ++.p2align 5 ++L$320_or_more8x: ++ vpxor 0(%rsi),%ymm6,%ymm6 ++ vpxor 32(%rsi),%ymm8,%ymm8 ++ vpxor 64(%rsi),%ymm1,%ymm1 ++ vpxor 96(%rsi),%ymm5,%ymm5 ++ vpxor 128(%rsi),%ymm12,%ymm12 ++ vpxor 160(%rsi),%ymm13,%ymm13 ++ vpxor 192(%rsi),%ymm10,%ymm10 ++ vpxor 224(%rsi),%ymm15,%ymm15 ++ vpxor 256(%rsi),%ymm14,%ymm14 ++ vpxor 288(%rsi),%ymm2,%ymm2 ++ vmovdqu %ymm6,0(%rdi) ++ vmovdqu %ymm8,32(%rdi) ++ vmovdqu %ymm1,64(%rdi) ++ vmovdqu %ymm5,96(%rdi) ++ vmovdqu %ymm12,128(%rdi) ++ vmovdqu %ymm13,160(%rdi) ++ vmovdqu %ymm10,192(%rdi) ++ vmovdqu %ymm15,224(%rdi) ++ vmovdqu %ymm14,256(%rdi) ++ vmovdqu %ymm2,288(%rdi) ++ je L$done8x ++ ++ leaq 320(%rsi),%rsi ++ xorq %r10,%r10 ++ vmovdqa %ymm3,0(%rsp) ++ leaq 320(%rdi),%rdi ++ subq $320,%rdx ++ vmovdqa %ymm7,32(%rsp) ++ jmp L$oop_tail8x ++ ++.p2align 5 ++L$384_or_more8x: ++ vpxor 0(%rsi),%ymm6,%ymm6 ++ vpxor 32(%rsi),%ymm8,%ymm8 ++ vpxor 64(%rsi),%ymm1,%ymm1 ++ vpxor 96(%rsi),%ymm5,%ymm5 ++ vpxor 128(%rsi),%ymm12,%ymm12 ++ vpxor 160(%rsi),%ymm13,%ymm13 ++ vpxor 192(%rsi),%ymm10,%ymm10 ++ vpxor 224(%rsi),%ymm15,%ymm15 ++ vpxor 256(%rsi),%ymm14,%ymm14 ++ vpxor 288(%rsi),%ymm2,%ymm2 ++ vpxor 320(%rsi),%ymm3,%ymm3 ++ vpxor 352(%rsi),%ymm7,%ymm7 ++ vmovdqu %ymm6,0(%rdi) ++ vmovdqu %ymm8,32(%rdi) ++ vmovdqu %ymm1,64(%rdi) ++ vmovdqu %ymm5,96(%rdi) ++ vmovdqu %ymm12,128(%rdi) ++ vmovdqu %ymm13,160(%rdi) ++ vmovdqu %ymm10,192(%rdi) ++ vmovdqu %ymm15,224(%rdi) ++ vmovdqu %ymm14,256(%rdi) ++ vmovdqu %ymm2,288(%rdi) ++ vmovdqu %ymm3,320(%rdi) ++ vmovdqu %ymm7,352(%rdi) ++ je L$done8x ++ ++ leaq 384(%rsi),%rsi ++ xorq %r10,%r10 ++ vmovdqa %ymm11,0(%rsp) ++ leaq 384(%rdi),%rdi ++ subq $384,%rdx ++ vmovdqa %ymm9,32(%rsp) ++ jmp L$oop_tail8x ++ ++.p2align 5 ++L$448_or_more8x: ++ vpxor 0(%rsi),%ymm6,%ymm6 ++ vpxor 32(%rsi),%ymm8,%ymm8 ++ vpxor 64(%rsi),%ymm1,%ymm1 ++ vpxor 96(%rsi),%ymm5,%ymm5 ++ vpxor 128(%rsi),%ymm12,%ymm12 ++ vpxor 160(%rsi),%ymm13,%ymm13 ++ vpxor 192(%rsi),%ymm10,%ymm10 ++ vpxor 224(%rsi),%ymm15,%ymm15 ++ vpxor 256(%rsi),%ymm14,%ymm14 ++ vpxor 288(%rsi),%ymm2,%ymm2 ++ vpxor 320(%rsi),%ymm3,%ymm3 ++ vpxor 352(%rsi),%ymm7,%ymm7 ++ vpxor 384(%rsi),%ymm11,%ymm11 ++ vpxor 416(%rsi),%ymm9,%ymm9 ++ vmovdqu %ymm6,0(%rdi) ++ vmovdqu %ymm8,32(%rdi) ++ vmovdqu %ymm1,64(%rdi) ++ vmovdqu %ymm5,96(%rdi) ++ vmovdqu %ymm12,128(%rdi) ++ vmovdqu %ymm13,160(%rdi) ++ vmovdqu %ymm10,192(%rdi) ++ vmovdqu %ymm15,224(%rdi) ++ vmovdqu %ymm14,256(%rdi) ++ vmovdqu %ymm2,288(%rdi) ++ vmovdqu %ymm3,320(%rdi) ++ vmovdqu %ymm7,352(%rdi) ++ vmovdqu %ymm11,384(%rdi) ++ vmovdqu %ymm9,416(%rdi) ++ je L$done8x ++ ++ leaq 448(%rsi),%rsi ++ xorq %r10,%r10 ++ vmovdqa %ymm0,0(%rsp) ++ leaq 448(%rdi),%rdi ++ subq $448,%rdx ++ vmovdqa %ymm4,32(%rsp) ++ ++L$oop_tail8x: ++ movzbl (%rsi,%r10,1),%eax ++ movzbl (%rsp,%r10,1),%ecx ++ leaq 1(%r10),%r10 ++ xorl %ecx,%eax ++ movb %al,-1(%rdi,%r10,1) ++ decq %rdx ++ jnz L$oop_tail8x ++ ++L$done8x: ++ vzeroall ++ leaq (%r9),%rsp ++ ++L$8x_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++#endif +diff --git a/apple-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S b/apple-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S +new file mode 100644 +index 0000000..f988089 +--- /dev/null ++++ b/apple-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S +@@ -0,0 +1,3068 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.data ++ ++.p2align 4 ++one: ++.quad 1,0 ++two: ++.quad 2,0 ++three: ++.quad 3,0 ++four: ++.quad 4,0 ++five: ++.quad 5,0 ++six: ++.quad 6,0 ++seven: ++.quad 7,0 ++eight: ++.quad 8,0 ++ ++OR_MASK: ++.long 0x00000000,0x00000000,0x00000000,0x80000000 ++poly: ++.quad 0x1, 0xc200000000000000 ++mask: ++.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d ++con1: ++.long 1,1,1,1 ++con2: ++.long 0x1b,0x1b,0x1b,0x1b ++con3: ++.byte -1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7 ++and_mask: ++.long 0,0xffffffff, 0xffffffff, 0xffffffff ++.text ++ ++.p2align 4 ++GFMUL: ++ ++ vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 ++ vpclmulqdq $0x11,%xmm1,%xmm0,%xmm5 ++ vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 ++ vpclmulqdq $0x01,%xmm1,%xmm0,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $8,%xmm3,%xmm4 ++ vpsrldq $8,%xmm3,%xmm3 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpxor %xmm3,%xmm5,%xmm5 ++ ++ vpclmulqdq $0x10,poly(%rip),%xmm2,%xmm3 ++ vpshufd $78,%xmm2,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm2 ++ ++ vpclmulqdq $0x10,poly(%rip),%xmm2,%xmm3 ++ vpshufd $78,%xmm2,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm2 ++ ++ vpxor %xmm5,%xmm2,%xmm0 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aesgcmsiv_htable_init ++.private_extern _aesgcmsiv_htable_init ++ ++.p2align 4 ++_aesgcmsiv_htable_init: ++ ++ vmovdqa (%rsi),%xmm0 ++ vmovdqa %xmm0,%xmm1 ++ vmovdqa %xmm0,(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,16(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,32(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,48(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,64(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,80(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,96(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,112(%rdi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aesgcmsiv_htable6_init ++.private_extern _aesgcmsiv_htable6_init ++ ++.p2align 4 ++_aesgcmsiv_htable6_init: ++ ++ vmovdqa (%rsi),%xmm0 ++ vmovdqa %xmm0,%xmm1 ++ vmovdqa %xmm0,(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,16(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,32(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,48(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,64(%rdi) ++ call GFMUL ++ vmovdqa %xmm0,80(%rdi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aesgcmsiv_htable_polyval ++.private_extern _aesgcmsiv_htable_polyval ++ ++.p2align 4 ++_aesgcmsiv_htable_polyval: ++ ++ testq %rdx,%rdx ++ jnz L$htable_polyval_start ++ .byte 0xf3,0xc3 ++ ++L$htable_polyval_start: ++ vzeroall ++ ++ ++ ++ movq %rdx,%r11 ++ andq $127,%r11 ++ ++ jz L$htable_polyval_no_prefix ++ ++ vpxor %xmm9,%xmm9,%xmm9 ++ vmovdqa (%rcx),%xmm1 ++ subq %r11,%rdx ++ ++ subq $16,%r11 ++ ++ ++ vmovdqu (%rsi),%xmm0 ++ vpxor %xmm1,%xmm0,%xmm0 ++ ++ vpclmulqdq $0x01,(%rdi,%r11,1),%xmm0,%xmm5 ++ vpclmulqdq $0x00,(%rdi,%r11,1),%xmm0,%xmm3 ++ vpclmulqdq $0x11,(%rdi,%r11,1),%xmm0,%xmm4 ++ vpclmulqdq $0x10,(%rdi,%r11,1),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ ++ leaq 16(%rsi),%rsi ++ testq %r11,%r11 ++ jnz L$htable_polyval_prefix_loop ++ jmp L$htable_polyval_prefix_complete ++ ++ ++.p2align 6 ++L$htable_polyval_prefix_loop: ++ subq $16,%r11 ++ ++ vmovdqu (%rsi),%xmm0 ++ ++ vpclmulqdq $0x00,(%rdi,%r11,1),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm3,%xmm3 ++ vpclmulqdq $0x11,(%rdi,%r11,1),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm4,%xmm4 ++ vpclmulqdq $0x01,(%rdi,%r11,1),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ vpclmulqdq $0x10,(%rdi,%r11,1),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ ++ testq %r11,%r11 ++ ++ leaq 16(%rsi),%rsi ++ ++ jnz L$htable_polyval_prefix_loop ++ ++L$htable_polyval_prefix_complete: ++ vpsrldq $8,%xmm5,%xmm6 ++ vpslldq $8,%xmm5,%xmm5 ++ ++ vpxor %xmm6,%xmm4,%xmm9 ++ vpxor %xmm5,%xmm3,%xmm1 ++ ++ jmp L$htable_polyval_main_loop ++ ++L$htable_polyval_no_prefix: ++ ++ ++ ++ ++ vpxor %xmm1,%xmm1,%xmm1 ++ vmovdqa (%rcx),%xmm9 ++ ++.p2align 6 ++L$htable_polyval_main_loop: ++ subq $0x80,%rdx ++ jb L$htable_polyval_out ++ ++ vmovdqu 112(%rsi),%xmm0 ++ ++ vpclmulqdq $0x01,(%rdi),%xmm0,%xmm5 ++ vpclmulqdq $0x00,(%rdi),%xmm0,%xmm3 ++ vpclmulqdq $0x11,(%rdi),%xmm0,%xmm4 ++ vpclmulqdq $0x10,(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ ++ ++ vmovdqu 96(%rsi),%xmm0 ++ vpclmulqdq $0x01,16(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ vpclmulqdq $0x00,16(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm3,%xmm3 ++ vpclmulqdq $0x11,16(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm4,%xmm4 ++ vpclmulqdq $0x10,16(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ ++ ++ ++ vmovdqu 80(%rsi),%xmm0 ++ ++ vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm7 ++ vpalignr $8,%xmm1,%xmm1,%xmm1 ++ ++ vpclmulqdq $0x01,32(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ vpclmulqdq $0x00,32(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm3,%xmm3 ++ vpclmulqdq $0x11,32(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm4,%xmm4 ++ vpclmulqdq $0x10,32(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ ++ ++ vpxor %xmm7,%xmm1,%xmm1 ++ ++ vmovdqu 64(%rsi),%xmm0 ++ ++ vpclmulqdq $0x01,48(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ vpclmulqdq $0x00,48(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm3,%xmm3 ++ vpclmulqdq $0x11,48(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm4,%xmm4 ++ vpclmulqdq $0x10,48(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ ++ ++ vmovdqu 48(%rsi),%xmm0 ++ ++ vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm7 ++ vpalignr $8,%xmm1,%xmm1,%xmm1 ++ ++ vpclmulqdq $0x01,64(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ vpclmulqdq $0x00,64(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm3,%xmm3 ++ vpclmulqdq $0x11,64(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm4,%xmm4 ++ vpclmulqdq $0x10,64(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ ++ ++ vpxor %xmm7,%xmm1,%xmm1 ++ ++ vmovdqu 32(%rsi),%xmm0 ++ ++ vpclmulqdq $0x01,80(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ vpclmulqdq $0x00,80(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm3,%xmm3 ++ vpclmulqdq $0x11,80(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm4,%xmm4 ++ vpclmulqdq $0x10,80(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ ++ ++ vpxor %xmm9,%xmm1,%xmm1 ++ ++ vmovdqu 16(%rsi),%xmm0 ++ ++ vpclmulqdq $0x01,96(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ vpclmulqdq $0x00,96(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm3,%xmm3 ++ vpclmulqdq $0x11,96(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm4,%xmm4 ++ vpclmulqdq $0x10,96(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ ++ ++ vmovdqu 0(%rsi),%xmm0 ++ vpxor %xmm1,%xmm0,%xmm0 ++ ++ vpclmulqdq $0x01,112(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ vpclmulqdq $0x00,112(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm3,%xmm3 ++ vpclmulqdq $0x11,112(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm4,%xmm4 ++ vpclmulqdq $0x10,112(%rdi),%xmm0,%xmm6 ++ vpxor %xmm6,%xmm5,%xmm5 ++ ++ ++ vpsrldq $8,%xmm5,%xmm6 ++ vpslldq $8,%xmm5,%xmm5 ++ ++ vpxor %xmm6,%xmm4,%xmm9 ++ vpxor %xmm5,%xmm3,%xmm1 ++ ++ leaq 128(%rsi),%rsi ++ jmp L$htable_polyval_main_loop ++ ++ ++ ++L$htable_polyval_out: ++ vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm6 ++ vpalignr $8,%xmm1,%xmm1,%xmm1 ++ vpxor %xmm6,%xmm1,%xmm1 ++ ++ vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm6 ++ vpalignr $8,%xmm1,%xmm1,%xmm1 ++ vpxor %xmm6,%xmm1,%xmm1 ++ vpxor %xmm9,%xmm1,%xmm1 ++ ++ vmovdqu %xmm1,(%rcx) ++ vzeroupper ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aesgcmsiv_polyval_horner ++.private_extern _aesgcmsiv_polyval_horner ++ ++.p2align 4 ++_aesgcmsiv_polyval_horner: ++ ++ testq %rcx,%rcx ++ jnz L$polyval_horner_start ++ .byte 0xf3,0xc3 ++ ++L$polyval_horner_start: ++ ++ ++ ++ xorq %r10,%r10 ++ shlq $4,%rcx ++ ++ vmovdqa (%rsi),%xmm1 ++ vmovdqa (%rdi),%xmm0 ++ ++L$polyval_horner_loop: ++ vpxor (%rdx,%r10,1),%xmm0,%xmm0 ++ call GFMUL ++ ++ addq $16,%r10 ++ cmpq %r10,%rcx ++ jne L$polyval_horner_loop ++ ++ ++ vmovdqa %xmm0,(%rdi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes128gcmsiv_aes_ks ++.private_extern _aes128gcmsiv_aes_ks ++ ++.p2align 4 ++_aes128gcmsiv_aes_ks: ++ ++ vmovdqu (%rdi),%xmm1 ++ vmovdqa %xmm1,(%rsi) ++ ++ vmovdqa con1(%rip),%xmm0 ++ vmovdqa mask(%rip),%xmm15 ++ ++ movq $8,%rax ++ ++L$ks128_loop: ++ addq $16,%rsi ++ subq $1,%rax ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpslldq $4,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpslldq $4,%xmm3,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpslldq $4,%xmm3,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vmovdqa %xmm1,(%rsi) ++ jne L$ks128_loop ++ ++ vmovdqa con2(%rip),%xmm0 ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpslldq $4,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpslldq $4,%xmm3,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpslldq $4,%xmm3,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vmovdqa %xmm1,16(%rsi) ++ ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslldq $4,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpslldq $4,%xmm3,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpslldq $4,%xmm3,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vmovdqa %xmm1,32(%rsi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes256gcmsiv_aes_ks ++.private_extern _aes256gcmsiv_aes_ks ++ ++.p2align 4 ++_aes256gcmsiv_aes_ks: ++ ++ vmovdqu (%rdi),%xmm1 ++ vmovdqu 16(%rdi),%xmm3 ++ vmovdqa %xmm1,(%rsi) ++ vmovdqa %xmm3,16(%rsi) ++ vmovdqa con1(%rip),%xmm0 ++ vmovdqa mask(%rip),%xmm15 ++ vpxor %xmm14,%xmm14,%xmm14 ++ movq $6,%rax ++ ++L$ks256_loop: ++ addq $32,%rsi ++ subq $1,%rax ++ vpshufb %xmm15,%xmm3,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpsllq $32,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vmovdqa %xmm1,(%rsi) ++ vpshufd $0xff,%xmm1,%xmm2 ++ vaesenclast %xmm14,%xmm2,%xmm2 ++ vpsllq $32,%xmm3,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpshufb con3(%rip),%xmm3,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpxor %xmm2,%xmm3,%xmm3 ++ vmovdqa %xmm3,16(%rsi) ++ jne L$ks256_loop ++ ++ vpshufb %xmm15,%xmm3,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpsllq $32,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vmovdqa %xmm1,32(%rsi) ++ .byte 0xf3,0xc3 ++ ++.globl _aes128gcmsiv_aes_ks_enc_x1 ++.private_extern _aes128gcmsiv_aes_ks_enc_x1 ++ ++.p2align 4 ++_aes128gcmsiv_aes_ks_enc_x1: ++ ++ vmovdqa (%rcx),%xmm1 ++ vmovdqa 0(%rdi),%xmm4 ++ ++ vmovdqa %xmm1,(%rdx) ++ vpxor %xmm1,%xmm4,%xmm4 ++ ++ vmovdqa con1(%rip),%xmm0 ++ vmovdqa mask(%rip),%xmm15 ++ ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpsllq $32,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vmovdqa %xmm1,16(%rdx) ++ ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpsllq $32,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vmovdqa %xmm1,32(%rdx) ++ ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpsllq $32,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vmovdqa %xmm1,48(%rdx) ++ ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpsllq $32,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vmovdqa %xmm1,64(%rdx) ++ ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpsllq $32,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vmovdqa %xmm1,80(%rdx) ++ ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpsllq $32,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vmovdqa %xmm1,96(%rdx) ++ ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpsllq $32,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vmovdqa %xmm1,112(%rdx) ++ ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpsllq $32,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vmovdqa %xmm1,128(%rdx) ++ ++ ++ vmovdqa con2(%rip),%xmm0 ++ ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpsllq $32,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vmovdqa %xmm1,144(%rdx) ++ ++ vpshufb %xmm15,%xmm1,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpsllq $32,%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpshufb con3(%rip),%xmm1,%xmm3 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ ++ vaesenclast %xmm1,%xmm4,%xmm4 ++ vmovdqa %xmm1,160(%rdx) ++ ++ ++ vmovdqa %xmm4,0(%rsi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes128gcmsiv_kdf ++.private_extern _aes128gcmsiv_kdf ++ ++.p2align 4 ++_aes128gcmsiv_kdf: ++ ++ ++ ++ ++ ++ vmovdqa (%rdx),%xmm1 ++ vmovdqa 0(%rdi),%xmm9 ++ vmovdqa and_mask(%rip),%xmm12 ++ vmovdqa one(%rip),%xmm13 ++ vpshufd $0x90,%xmm9,%xmm9 ++ vpand %xmm12,%xmm9,%xmm9 ++ vpaddd %xmm13,%xmm9,%xmm10 ++ vpaddd %xmm13,%xmm10,%xmm11 ++ vpaddd %xmm13,%xmm11,%xmm12 ++ ++ vpxor %xmm1,%xmm9,%xmm9 ++ vpxor %xmm1,%xmm10,%xmm10 ++ vpxor %xmm1,%xmm11,%xmm11 ++ vpxor %xmm1,%xmm12,%xmm12 ++ ++ vmovdqa 16(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm9,%xmm9 ++ vaesenc %xmm1,%xmm10,%xmm10 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ ++ vmovdqa 32(%rdx),%xmm2 ++ vaesenc %xmm2,%xmm9,%xmm9 ++ vaesenc %xmm2,%xmm10,%xmm10 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ ++ vmovdqa 48(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm9,%xmm9 ++ vaesenc %xmm1,%xmm10,%xmm10 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ ++ vmovdqa 64(%rdx),%xmm2 ++ vaesenc %xmm2,%xmm9,%xmm9 ++ vaesenc %xmm2,%xmm10,%xmm10 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ ++ vmovdqa 80(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm9,%xmm9 ++ vaesenc %xmm1,%xmm10,%xmm10 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ ++ vmovdqa 96(%rdx),%xmm2 ++ vaesenc %xmm2,%xmm9,%xmm9 ++ vaesenc %xmm2,%xmm10,%xmm10 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ ++ vmovdqa 112(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm9,%xmm9 ++ vaesenc %xmm1,%xmm10,%xmm10 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ ++ vmovdqa 128(%rdx),%xmm2 ++ vaesenc %xmm2,%xmm9,%xmm9 ++ vaesenc %xmm2,%xmm10,%xmm10 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ ++ vmovdqa 144(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm9,%xmm9 ++ vaesenc %xmm1,%xmm10,%xmm10 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ ++ vmovdqa 160(%rdx),%xmm2 ++ vaesenclast %xmm2,%xmm9,%xmm9 ++ vaesenclast %xmm2,%xmm10,%xmm10 ++ vaesenclast %xmm2,%xmm11,%xmm11 ++ vaesenclast %xmm2,%xmm12,%xmm12 ++ ++ ++ vmovdqa %xmm9,0(%rsi) ++ vmovdqa %xmm10,16(%rsi) ++ vmovdqa %xmm11,32(%rsi) ++ vmovdqa %xmm12,48(%rsi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes128gcmsiv_enc_msg_x4 ++.private_extern _aes128gcmsiv_enc_msg_x4 ++ ++.p2align 4 ++_aes128gcmsiv_enc_msg_x4: ++ ++ testq %r8,%r8 ++ jnz L$128_enc_msg_x4_start ++ .byte 0xf3,0xc3 ++ ++L$128_enc_msg_x4_start: ++ pushq %r12 ++ ++ pushq %r13 ++ ++ ++ shrq $4,%r8 ++ movq %r8,%r10 ++ shlq $62,%r10 ++ shrq $62,%r10 ++ ++ ++ vmovdqa (%rdx),%xmm15 ++ vpor OR_MASK(%rip),%xmm15,%xmm15 ++ ++ vmovdqu four(%rip),%xmm4 ++ vmovdqa %xmm15,%xmm0 ++ vpaddd one(%rip),%xmm15,%xmm1 ++ vpaddd two(%rip),%xmm15,%xmm2 ++ vpaddd three(%rip),%xmm15,%xmm3 ++ ++ shrq $2,%r8 ++ je L$128_enc_msg_x4_check_remainder ++ ++ subq $64,%rsi ++ subq $64,%rdi ++ ++L$128_enc_msg_x4_loop1: ++ addq $64,%rsi ++ addq $64,%rdi ++ ++ vmovdqa %xmm0,%xmm5 ++ vmovdqa %xmm1,%xmm6 ++ vmovdqa %xmm2,%xmm7 ++ vmovdqa %xmm3,%xmm8 ++ ++ vpxor (%rcx),%xmm5,%xmm5 ++ vpxor (%rcx),%xmm6,%xmm6 ++ vpxor (%rcx),%xmm7,%xmm7 ++ vpxor (%rcx),%xmm8,%xmm8 ++ ++ vmovdqu 16(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vpaddd %xmm4,%xmm0,%xmm0 ++ vmovdqu 32(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vpaddd %xmm4,%xmm1,%xmm1 ++ vmovdqu 48(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vpaddd %xmm4,%xmm2,%xmm2 ++ vmovdqu 64(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vpaddd %xmm4,%xmm3,%xmm3 ++ ++ vmovdqu 80(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 96(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 112(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 128(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 144(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 160(%rcx),%xmm12 ++ vaesenclast %xmm12,%xmm5,%xmm5 ++ vaesenclast %xmm12,%xmm6,%xmm6 ++ vaesenclast %xmm12,%xmm7,%xmm7 ++ vaesenclast %xmm12,%xmm8,%xmm8 ++ ++ ++ ++ vpxor 0(%rdi),%xmm5,%xmm5 ++ vpxor 16(%rdi),%xmm6,%xmm6 ++ vpxor 32(%rdi),%xmm7,%xmm7 ++ vpxor 48(%rdi),%xmm8,%xmm8 ++ ++ subq $1,%r8 ++ ++ vmovdqu %xmm5,0(%rsi) ++ vmovdqu %xmm6,16(%rsi) ++ vmovdqu %xmm7,32(%rsi) ++ vmovdqu %xmm8,48(%rsi) ++ ++ jne L$128_enc_msg_x4_loop1 ++ ++ addq $64,%rsi ++ addq $64,%rdi ++ ++L$128_enc_msg_x4_check_remainder: ++ cmpq $0,%r10 ++ je L$128_enc_msg_x4_out ++ ++L$128_enc_msg_x4_loop2: ++ ++ ++ vmovdqa %xmm0,%xmm5 ++ vpaddd one(%rip),%xmm0,%xmm0 ++ ++ vpxor (%rcx),%xmm5,%xmm5 ++ vaesenc 16(%rcx),%xmm5,%xmm5 ++ vaesenc 32(%rcx),%xmm5,%xmm5 ++ vaesenc 48(%rcx),%xmm5,%xmm5 ++ vaesenc 64(%rcx),%xmm5,%xmm5 ++ vaesenc 80(%rcx),%xmm5,%xmm5 ++ vaesenc 96(%rcx),%xmm5,%xmm5 ++ vaesenc 112(%rcx),%xmm5,%xmm5 ++ vaesenc 128(%rcx),%xmm5,%xmm5 ++ vaesenc 144(%rcx),%xmm5,%xmm5 ++ vaesenclast 160(%rcx),%xmm5,%xmm5 ++ ++ ++ vpxor (%rdi),%xmm5,%xmm5 ++ vmovdqu %xmm5,(%rsi) ++ ++ addq $16,%rdi ++ addq $16,%rsi ++ ++ subq $1,%r10 ++ jne L$128_enc_msg_x4_loop2 ++ ++L$128_enc_msg_x4_out: ++ popq %r13 ++ ++ popq %r12 ++ ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes128gcmsiv_enc_msg_x8 ++.private_extern _aes128gcmsiv_enc_msg_x8 ++ ++.p2align 4 ++_aes128gcmsiv_enc_msg_x8: ++ ++ testq %r8,%r8 ++ jnz L$128_enc_msg_x8_start ++ .byte 0xf3,0xc3 ++ ++L$128_enc_msg_x8_start: ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %rbp ++ ++ movq %rsp,%rbp ++ ++ ++ ++ subq $128,%rsp ++ andq $-64,%rsp ++ ++ shrq $4,%r8 ++ movq %r8,%r10 ++ shlq $61,%r10 ++ shrq $61,%r10 ++ ++ ++ vmovdqu (%rdx),%xmm1 ++ vpor OR_MASK(%rip),%xmm1,%xmm1 ++ ++ ++ vpaddd seven(%rip),%xmm1,%xmm0 ++ vmovdqu %xmm0,(%rsp) ++ vpaddd one(%rip),%xmm1,%xmm9 ++ vpaddd two(%rip),%xmm1,%xmm10 ++ vpaddd three(%rip),%xmm1,%xmm11 ++ vpaddd four(%rip),%xmm1,%xmm12 ++ vpaddd five(%rip),%xmm1,%xmm13 ++ vpaddd six(%rip),%xmm1,%xmm14 ++ vmovdqa %xmm1,%xmm0 ++ ++ shrq $3,%r8 ++ je L$128_enc_msg_x8_check_remainder ++ ++ subq $128,%rsi ++ subq $128,%rdi ++ ++L$128_enc_msg_x8_loop1: ++ addq $128,%rsi ++ addq $128,%rdi ++ ++ vmovdqa %xmm0,%xmm1 ++ vmovdqa %xmm9,%xmm2 ++ vmovdqa %xmm10,%xmm3 ++ vmovdqa %xmm11,%xmm4 ++ vmovdqa %xmm12,%xmm5 ++ vmovdqa %xmm13,%xmm6 ++ vmovdqa %xmm14,%xmm7 ++ ++ vmovdqu (%rsp),%xmm8 ++ ++ vpxor (%rcx),%xmm1,%xmm1 ++ vpxor (%rcx),%xmm2,%xmm2 ++ vpxor (%rcx),%xmm3,%xmm3 ++ vpxor (%rcx),%xmm4,%xmm4 ++ vpxor (%rcx),%xmm5,%xmm5 ++ vpxor (%rcx),%xmm6,%xmm6 ++ vpxor (%rcx),%xmm7,%xmm7 ++ vpxor (%rcx),%xmm8,%xmm8 ++ ++ vmovdqu 16(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu (%rsp),%xmm14 ++ vpaddd eight(%rip),%xmm14,%xmm14 ++ vmovdqu %xmm14,(%rsp) ++ vmovdqu 32(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpsubd one(%rip),%xmm14,%xmm14 ++ vmovdqu 48(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm0,%xmm0 ++ vmovdqu 64(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm9,%xmm9 ++ vmovdqu 80(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm10,%xmm10 ++ vmovdqu 96(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm11,%xmm11 ++ vmovdqu 112(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm12,%xmm12 ++ vmovdqu 128(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm13,%xmm13 ++ vmovdqu 144(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 160(%rcx),%xmm15 ++ vaesenclast %xmm15,%xmm1,%xmm1 ++ vaesenclast %xmm15,%xmm2,%xmm2 ++ vaesenclast %xmm15,%xmm3,%xmm3 ++ vaesenclast %xmm15,%xmm4,%xmm4 ++ vaesenclast %xmm15,%xmm5,%xmm5 ++ vaesenclast %xmm15,%xmm6,%xmm6 ++ vaesenclast %xmm15,%xmm7,%xmm7 ++ vaesenclast %xmm15,%xmm8,%xmm8 ++ ++ ++ ++ vpxor 0(%rdi),%xmm1,%xmm1 ++ vpxor 16(%rdi),%xmm2,%xmm2 ++ vpxor 32(%rdi),%xmm3,%xmm3 ++ vpxor 48(%rdi),%xmm4,%xmm4 ++ vpxor 64(%rdi),%xmm5,%xmm5 ++ vpxor 80(%rdi),%xmm6,%xmm6 ++ vpxor 96(%rdi),%xmm7,%xmm7 ++ vpxor 112(%rdi),%xmm8,%xmm8 ++ ++ decq %r8 ++ ++ vmovdqu %xmm1,0(%rsi) ++ vmovdqu %xmm2,16(%rsi) ++ vmovdqu %xmm3,32(%rsi) ++ vmovdqu %xmm4,48(%rsi) ++ vmovdqu %xmm5,64(%rsi) ++ vmovdqu %xmm6,80(%rsi) ++ vmovdqu %xmm7,96(%rsi) ++ vmovdqu %xmm8,112(%rsi) ++ ++ jne L$128_enc_msg_x8_loop1 ++ ++ addq $128,%rsi ++ addq $128,%rdi ++ ++L$128_enc_msg_x8_check_remainder: ++ cmpq $0,%r10 ++ je L$128_enc_msg_x8_out ++ ++L$128_enc_msg_x8_loop2: ++ ++ ++ vmovdqa %xmm0,%xmm1 ++ vpaddd one(%rip),%xmm0,%xmm0 ++ ++ vpxor (%rcx),%xmm1,%xmm1 ++ vaesenc 16(%rcx),%xmm1,%xmm1 ++ vaesenc 32(%rcx),%xmm1,%xmm1 ++ vaesenc 48(%rcx),%xmm1,%xmm1 ++ vaesenc 64(%rcx),%xmm1,%xmm1 ++ vaesenc 80(%rcx),%xmm1,%xmm1 ++ vaesenc 96(%rcx),%xmm1,%xmm1 ++ vaesenc 112(%rcx),%xmm1,%xmm1 ++ vaesenc 128(%rcx),%xmm1,%xmm1 ++ vaesenc 144(%rcx),%xmm1,%xmm1 ++ vaesenclast 160(%rcx),%xmm1,%xmm1 ++ ++ ++ vpxor (%rdi),%xmm1,%xmm1 ++ ++ vmovdqu %xmm1,(%rsi) ++ ++ addq $16,%rdi ++ addq $16,%rsi ++ ++ decq %r10 ++ jne L$128_enc_msg_x8_loop2 ++ ++L$128_enc_msg_x8_out: ++ movq %rbp,%rsp ++ ++ popq %rbp ++ ++ popq %r13 ++ ++ popq %r12 ++ ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes128gcmsiv_dec ++.private_extern _aes128gcmsiv_dec ++ ++.p2align 4 ++_aes128gcmsiv_dec: ++ ++ testq $~15,%r9 ++ jnz L$128_dec_start ++ .byte 0xf3,0xc3 ++ ++L$128_dec_start: ++ vzeroupper ++ vmovdqa (%rdx),%xmm0 ++ movq %rdx,%rax ++ ++ leaq 32(%rax),%rax ++ leaq 32(%rcx),%rcx ++ ++ ++ vmovdqu (%rdi,%r9,1),%xmm15 ++ vpor OR_MASK(%rip),%xmm15,%xmm15 ++ andq $~15,%r9 ++ ++ ++ cmpq $96,%r9 ++ jb L$128_dec_loop2 ++ ++ ++ subq $96,%r9 ++ vmovdqa %xmm15,%xmm7 ++ vpaddd one(%rip),%xmm7,%xmm8 ++ vpaddd two(%rip),%xmm7,%xmm9 ++ vpaddd one(%rip),%xmm9,%xmm10 ++ vpaddd two(%rip),%xmm9,%xmm11 ++ vpaddd one(%rip),%xmm11,%xmm12 ++ vpaddd two(%rip),%xmm11,%xmm15 ++ ++ vpxor (%r8),%xmm7,%xmm7 ++ vpxor (%r8),%xmm8,%xmm8 ++ vpxor (%r8),%xmm9,%xmm9 ++ vpxor (%r8),%xmm10,%xmm10 ++ vpxor (%r8),%xmm11,%xmm11 ++ vpxor (%r8),%xmm12,%xmm12 ++ ++ vmovdqu 16(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 32(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 48(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 64(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 80(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 96(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 112(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 128(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 144(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 160(%r8),%xmm4 ++ vaesenclast %xmm4,%xmm7,%xmm7 ++ vaesenclast %xmm4,%xmm8,%xmm8 ++ vaesenclast %xmm4,%xmm9,%xmm9 ++ vaesenclast %xmm4,%xmm10,%xmm10 ++ vaesenclast %xmm4,%xmm11,%xmm11 ++ vaesenclast %xmm4,%xmm12,%xmm12 ++ ++ ++ vpxor 0(%rdi),%xmm7,%xmm7 ++ vpxor 16(%rdi),%xmm8,%xmm8 ++ vpxor 32(%rdi),%xmm9,%xmm9 ++ vpxor 48(%rdi),%xmm10,%xmm10 ++ vpxor 64(%rdi),%xmm11,%xmm11 ++ vpxor 80(%rdi),%xmm12,%xmm12 ++ ++ vmovdqu %xmm7,0(%rsi) ++ vmovdqu %xmm8,16(%rsi) ++ vmovdqu %xmm9,32(%rsi) ++ vmovdqu %xmm10,48(%rsi) ++ vmovdqu %xmm11,64(%rsi) ++ vmovdqu %xmm12,80(%rsi) ++ ++ addq $96,%rdi ++ addq $96,%rsi ++ jmp L$128_dec_loop1 ++ ++ ++.p2align 6 ++L$128_dec_loop1: ++ cmpq $96,%r9 ++ jb L$128_dec_finish_96 ++ subq $96,%r9 ++ ++ vmovdqa %xmm12,%xmm6 ++ vmovdqa %xmm11,16-32(%rax) ++ vmovdqa %xmm10,32-32(%rax) ++ vmovdqa %xmm9,48-32(%rax) ++ vmovdqa %xmm8,64-32(%rax) ++ vmovdqa %xmm7,80-32(%rax) ++ ++ vmovdqa %xmm15,%xmm7 ++ vpaddd one(%rip),%xmm7,%xmm8 ++ vpaddd two(%rip),%xmm7,%xmm9 ++ vpaddd one(%rip),%xmm9,%xmm10 ++ vpaddd two(%rip),%xmm9,%xmm11 ++ vpaddd one(%rip),%xmm11,%xmm12 ++ vpaddd two(%rip),%xmm11,%xmm15 ++ ++ vmovdqa (%r8),%xmm4 ++ vpxor %xmm4,%xmm7,%xmm7 ++ vpxor %xmm4,%xmm8,%xmm8 ++ vpxor %xmm4,%xmm9,%xmm9 ++ vpxor %xmm4,%xmm10,%xmm10 ++ vpxor %xmm4,%xmm11,%xmm11 ++ vpxor %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 0-32(%rcx),%xmm4 ++ vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 ++ vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 ++ vpclmulqdq $0x01,%xmm4,%xmm6,%xmm1 ++ vpclmulqdq $0x10,%xmm4,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu 16(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu -16(%rax),%xmm6 ++ vmovdqu -16(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ ++ vmovdqu 32(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 0(%rax),%xmm6 ++ vmovdqu 0(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ ++ vmovdqu 48(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 16(%rax),%xmm6 ++ vmovdqu 16(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ ++ vmovdqu 64(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 32(%rax),%xmm6 ++ vmovdqu 32(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ ++ vmovdqu 80(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 96(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 112(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ ++ vmovdqa 80-32(%rax),%xmm6 ++ vpxor %xmm0,%xmm6,%xmm6 ++ vmovdqu 80-32(%rcx),%xmm5 ++ ++ vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu 128(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ ++ vpsrldq $8,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm5 ++ vpslldq $8,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm0 ++ ++ vmovdqa poly(%rip),%xmm3 ++ ++ vmovdqu 144(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 160(%r8),%xmm6 ++ vpalignr $8,%xmm0,%xmm0,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 ++ vpxor %xmm0,%xmm2,%xmm0 ++ ++ vpxor 0(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm7,%xmm7 ++ vpxor 16(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm8,%xmm8 ++ vpxor 32(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm9,%xmm9 ++ vpxor 48(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm10,%xmm10 ++ vpxor 64(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm11,%xmm11 ++ vpxor 80(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm12,%xmm12 ++ ++ vpalignr $8,%xmm0,%xmm0,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 ++ vpxor %xmm0,%xmm2,%xmm0 ++ ++ vmovdqu %xmm7,0(%rsi) ++ vmovdqu %xmm8,16(%rsi) ++ vmovdqu %xmm9,32(%rsi) ++ vmovdqu %xmm10,48(%rsi) ++ vmovdqu %xmm11,64(%rsi) ++ vmovdqu %xmm12,80(%rsi) ++ ++ vpxor %xmm5,%xmm0,%xmm0 ++ ++ leaq 96(%rdi),%rdi ++ leaq 96(%rsi),%rsi ++ jmp L$128_dec_loop1 ++ ++L$128_dec_finish_96: ++ vmovdqa %xmm12,%xmm6 ++ vmovdqa %xmm11,16-32(%rax) ++ vmovdqa %xmm10,32-32(%rax) ++ vmovdqa %xmm9,48-32(%rax) ++ vmovdqa %xmm8,64-32(%rax) ++ vmovdqa %xmm7,80-32(%rax) ++ ++ vmovdqu 0-32(%rcx),%xmm4 ++ vpclmulqdq $0x10,%xmm4,%xmm6,%xmm1 ++ vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 ++ vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 ++ vpclmulqdq $0x01,%xmm4,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu -16(%rax),%xmm6 ++ vmovdqu -16(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu 0(%rax),%xmm6 ++ vmovdqu 0(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu 16(%rax),%xmm6 ++ vmovdqu 16(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu 32(%rax),%xmm6 ++ vmovdqu 32(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ ++ vmovdqu 80-32(%rax),%xmm6 ++ vpxor %xmm0,%xmm6,%xmm6 ++ vmovdqu 80-32(%rcx),%xmm5 ++ vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vpsrldq $8,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm5 ++ vpslldq $8,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm0 ++ ++ vmovdqa poly(%rip),%xmm3 ++ ++ vpalignr $8,%xmm0,%xmm0,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 ++ vpxor %xmm0,%xmm2,%xmm0 ++ ++ vpalignr $8,%xmm0,%xmm0,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 ++ vpxor %xmm0,%xmm2,%xmm0 ++ ++ vpxor %xmm5,%xmm0,%xmm0 ++ ++L$128_dec_loop2: ++ ++ ++ ++ cmpq $16,%r9 ++ jb L$128_dec_out ++ subq $16,%r9 ++ ++ vmovdqa %xmm15,%xmm2 ++ vpaddd one(%rip),%xmm15,%xmm15 ++ ++ vpxor 0(%r8),%xmm2,%xmm2 ++ vaesenc 16(%r8),%xmm2,%xmm2 ++ vaesenc 32(%r8),%xmm2,%xmm2 ++ vaesenc 48(%r8),%xmm2,%xmm2 ++ vaesenc 64(%r8),%xmm2,%xmm2 ++ vaesenc 80(%r8),%xmm2,%xmm2 ++ vaesenc 96(%r8),%xmm2,%xmm2 ++ vaesenc 112(%r8),%xmm2,%xmm2 ++ vaesenc 128(%r8),%xmm2,%xmm2 ++ vaesenc 144(%r8),%xmm2,%xmm2 ++ vaesenclast 160(%r8),%xmm2,%xmm2 ++ vpxor (%rdi),%xmm2,%xmm2 ++ vmovdqu %xmm2,(%rsi) ++ addq $16,%rdi ++ addq $16,%rsi ++ ++ vpxor %xmm2,%xmm0,%xmm0 ++ vmovdqa -32(%rcx),%xmm1 ++ call GFMUL ++ ++ jmp L$128_dec_loop2 ++ ++L$128_dec_out: ++ vmovdqu %xmm0,(%rdx) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes128gcmsiv_ecb_enc_block ++.private_extern _aes128gcmsiv_ecb_enc_block ++ ++.p2align 4 ++_aes128gcmsiv_ecb_enc_block: ++ ++ vmovdqa (%rdi),%xmm1 ++ ++ vpxor (%rdx),%xmm1,%xmm1 ++ vaesenc 16(%rdx),%xmm1,%xmm1 ++ vaesenc 32(%rdx),%xmm1,%xmm1 ++ vaesenc 48(%rdx),%xmm1,%xmm1 ++ vaesenc 64(%rdx),%xmm1,%xmm1 ++ vaesenc 80(%rdx),%xmm1,%xmm1 ++ vaesenc 96(%rdx),%xmm1,%xmm1 ++ vaesenc 112(%rdx),%xmm1,%xmm1 ++ vaesenc 128(%rdx),%xmm1,%xmm1 ++ vaesenc 144(%rdx),%xmm1,%xmm1 ++ vaesenclast 160(%rdx),%xmm1,%xmm1 ++ ++ vmovdqa %xmm1,(%rsi) ++ ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes256gcmsiv_aes_ks_enc_x1 ++.private_extern _aes256gcmsiv_aes_ks_enc_x1 ++ ++.p2align 4 ++_aes256gcmsiv_aes_ks_enc_x1: ++ ++ vmovdqa con1(%rip),%xmm0 ++ vmovdqa mask(%rip),%xmm15 ++ vmovdqa (%rdi),%xmm8 ++ vmovdqa (%rcx),%xmm1 ++ vmovdqa 16(%rcx),%xmm3 ++ vpxor %xmm1,%xmm8,%xmm8 ++ vaesenc %xmm3,%xmm8,%xmm8 ++ vmovdqu %xmm1,(%rdx) ++ vmovdqu %xmm3,16(%rdx) ++ vpxor %xmm14,%xmm14,%xmm14 ++ ++ vpshufb %xmm15,%xmm3,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpslldq $4,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vaesenc %xmm1,%xmm8,%xmm8 ++ vmovdqu %xmm1,32(%rdx) ++ ++ vpshufd $0xff,%xmm1,%xmm2 ++ vaesenclast %xmm14,%xmm2,%xmm2 ++ vpslldq $4,%xmm3,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpxor %xmm2,%xmm3,%xmm3 ++ vaesenc %xmm3,%xmm8,%xmm8 ++ vmovdqu %xmm3,48(%rdx) ++ ++ vpshufb %xmm15,%xmm3,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpslldq $4,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vaesenc %xmm1,%xmm8,%xmm8 ++ vmovdqu %xmm1,64(%rdx) ++ ++ vpshufd $0xff,%xmm1,%xmm2 ++ vaesenclast %xmm14,%xmm2,%xmm2 ++ vpslldq $4,%xmm3,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpxor %xmm2,%xmm3,%xmm3 ++ vaesenc %xmm3,%xmm8,%xmm8 ++ vmovdqu %xmm3,80(%rdx) ++ ++ vpshufb %xmm15,%xmm3,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpslldq $4,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vaesenc %xmm1,%xmm8,%xmm8 ++ vmovdqu %xmm1,96(%rdx) ++ ++ vpshufd $0xff,%xmm1,%xmm2 ++ vaesenclast %xmm14,%xmm2,%xmm2 ++ vpslldq $4,%xmm3,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpxor %xmm2,%xmm3,%xmm3 ++ vaesenc %xmm3,%xmm8,%xmm8 ++ vmovdqu %xmm3,112(%rdx) ++ ++ vpshufb %xmm15,%xmm3,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpslldq $4,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vaesenc %xmm1,%xmm8,%xmm8 ++ vmovdqu %xmm1,128(%rdx) ++ ++ vpshufd $0xff,%xmm1,%xmm2 ++ vaesenclast %xmm14,%xmm2,%xmm2 ++ vpslldq $4,%xmm3,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpxor %xmm2,%xmm3,%xmm3 ++ vaesenc %xmm3,%xmm8,%xmm8 ++ vmovdqu %xmm3,144(%rdx) ++ ++ vpshufb %xmm15,%xmm3,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpslldq $4,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vaesenc %xmm1,%xmm8,%xmm8 ++ vmovdqu %xmm1,160(%rdx) ++ ++ vpshufd $0xff,%xmm1,%xmm2 ++ vaesenclast %xmm14,%xmm2,%xmm2 ++ vpslldq $4,%xmm3,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpxor %xmm2,%xmm3,%xmm3 ++ vaesenc %xmm3,%xmm8,%xmm8 ++ vmovdqu %xmm3,176(%rdx) ++ ++ vpshufb %xmm15,%xmm3,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslld $1,%xmm0,%xmm0 ++ vpslldq $4,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vaesenc %xmm1,%xmm8,%xmm8 ++ vmovdqu %xmm1,192(%rdx) ++ ++ vpshufd $0xff,%xmm1,%xmm2 ++ vaesenclast %xmm14,%xmm2,%xmm2 ++ vpslldq $4,%xmm3,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpxor %xmm2,%xmm3,%xmm3 ++ vaesenc %xmm3,%xmm8,%xmm8 ++ vmovdqu %xmm3,208(%rdx) ++ ++ vpshufb %xmm15,%xmm3,%xmm2 ++ vaesenclast %xmm0,%xmm2,%xmm2 ++ vpslldq $4,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpslldq $4,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm1,%xmm1 ++ vaesenclast %xmm1,%xmm8,%xmm8 ++ vmovdqu %xmm1,224(%rdx) ++ ++ vmovdqa %xmm8,(%rsi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes256gcmsiv_ecb_enc_block ++.private_extern _aes256gcmsiv_ecb_enc_block ++ ++.p2align 4 ++_aes256gcmsiv_ecb_enc_block: ++ ++ vmovdqa (%rdi),%xmm1 ++ vpxor (%rdx),%xmm1,%xmm1 ++ vaesenc 16(%rdx),%xmm1,%xmm1 ++ vaesenc 32(%rdx),%xmm1,%xmm1 ++ vaesenc 48(%rdx),%xmm1,%xmm1 ++ vaesenc 64(%rdx),%xmm1,%xmm1 ++ vaesenc 80(%rdx),%xmm1,%xmm1 ++ vaesenc 96(%rdx),%xmm1,%xmm1 ++ vaesenc 112(%rdx),%xmm1,%xmm1 ++ vaesenc 128(%rdx),%xmm1,%xmm1 ++ vaesenc 144(%rdx),%xmm1,%xmm1 ++ vaesenc 160(%rdx),%xmm1,%xmm1 ++ vaesenc 176(%rdx),%xmm1,%xmm1 ++ vaesenc 192(%rdx),%xmm1,%xmm1 ++ vaesenc 208(%rdx),%xmm1,%xmm1 ++ vaesenclast 224(%rdx),%xmm1,%xmm1 ++ vmovdqa %xmm1,(%rsi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes256gcmsiv_enc_msg_x4 ++.private_extern _aes256gcmsiv_enc_msg_x4 ++ ++.p2align 4 ++_aes256gcmsiv_enc_msg_x4: ++ ++ testq %r8,%r8 ++ jnz L$256_enc_msg_x4_start ++ .byte 0xf3,0xc3 ++ ++L$256_enc_msg_x4_start: ++ movq %r8,%r10 ++ shrq $4,%r8 ++ shlq $60,%r10 ++ jz L$256_enc_msg_x4_start2 ++ addq $1,%r8 ++ ++L$256_enc_msg_x4_start2: ++ movq %r8,%r10 ++ shlq $62,%r10 ++ shrq $62,%r10 ++ ++ ++ vmovdqa (%rdx),%xmm15 ++ vpor OR_MASK(%rip),%xmm15,%xmm15 ++ ++ vmovdqa four(%rip),%xmm4 ++ vmovdqa %xmm15,%xmm0 ++ vpaddd one(%rip),%xmm15,%xmm1 ++ vpaddd two(%rip),%xmm15,%xmm2 ++ vpaddd three(%rip),%xmm15,%xmm3 ++ ++ shrq $2,%r8 ++ je L$256_enc_msg_x4_check_remainder ++ ++ subq $64,%rsi ++ subq $64,%rdi ++ ++L$256_enc_msg_x4_loop1: ++ addq $64,%rsi ++ addq $64,%rdi ++ ++ vmovdqa %xmm0,%xmm5 ++ vmovdqa %xmm1,%xmm6 ++ vmovdqa %xmm2,%xmm7 ++ vmovdqa %xmm3,%xmm8 ++ ++ vpxor (%rcx),%xmm5,%xmm5 ++ vpxor (%rcx),%xmm6,%xmm6 ++ vpxor (%rcx),%xmm7,%xmm7 ++ vpxor (%rcx),%xmm8,%xmm8 ++ ++ vmovdqu 16(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vpaddd %xmm4,%xmm0,%xmm0 ++ vmovdqu 32(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vpaddd %xmm4,%xmm1,%xmm1 ++ vmovdqu 48(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vpaddd %xmm4,%xmm2,%xmm2 ++ vmovdqu 64(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vpaddd %xmm4,%xmm3,%xmm3 ++ ++ vmovdqu 80(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 96(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 112(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 128(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 144(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 160(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 176(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 192(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 208(%rcx),%xmm12 ++ vaesenc %xmm12,%xmm5,%xmm5 ++ vaesenc %xmm12,%xmm6,%xmm6 ++ vaesenc %xmm12,%xmm7,%xmm7 ++ vaesenc %xmm12,%xmm8,%xmm8 ++ ++ vmovdqu 224(%rcx),%xmm12 ++ vaesenclast %xmm12,%xmm5,%xmm5 ++ vaesenclast %xmm12,%xmm6,%xmm6 ++ vaesenclast %xmm12,%xmm7,%xmm7 ++ vaesenclast %xmm12,%xmm8,%xmm8 ++ ++ ++ ++ vpxor 0(%rdi),%xmm5,%xmm5 ++ vpxor 16(%rdi),%xmm6,%xmm6 ++ vpxor 32(%rdi),%xmm7,%xmm7 ++ vpxor 48(%rdi),%xmm8,%xmm8 ++ ++ subq $1,%r8 ++ ++ vmovdqu %xmm5,0(%rsi) ++ vmovdqu %xmm6,16(%rsi) ++ vmovdqu %xmm7,32(%rsi) ++ vmovdqu %xmm8,48(%rsi) ++ ++ jne L$256_enc_msg_x4_loop1 ++ ++ addq $64,%rsi ++ addq $64,%rdi ++ ++L$256_enc_msg_x4_check_remainder: ++ cmpq $0,%r10 ++ je L$256_enc_msg_x4_out ++ ++L$256_enc_msg_x4_loop2: ++ ++ ++ ++ vmovdqa %xmm0,%xmm5 ++ vpaddd one(%rip),%xmm0,%xmm0 ++ vpxor (%rcx),%xmm5,%xmm5 ++ vaesenc 16(%rcx),%xmm5,%xmm5 ++ vaesenc 32(%rcx),%xmm5,%xmm5 ++ vaesenc 48(%rcx),%xmm5,%xmm5 ++ vaesenc 64(%rcx),%xmm5,%xmm5 ++ vaesenc 80(%rcx),%xmm5,%xmm5 ++ vaesenc 96(%rcx),%xmm5,%xmm5 ++ vaesenc 112(%rcx),%xmm5,%xmm5 ++ vaesenc 128(%rcx),%xmm5,%xmm5 ++ vaesenc 144(%rcx),%xmm5,%xmm5 ++ vaesenc 160(%rcx),%xmm5,%xmm5 ++ vaesenc 176(%rcx),%xmm5,%xmm5 ++ vaesenc 192(%rcx),%xmm5,%xmm5 ++ vaesenc 208(%rcx),%xmm5,%xmm5 ++ vaesenclast 224(%rcx),%xmm5,%xmm5 ++ ++ ++ vpxor (%rdi),%xmm5,%xmm5 ++ ++ vmovdqu %xmm5,(%rsi) ++ ++ addq $16,%rdi ++ addq $16,%rsi ++ ++ subq $1,%r10 ++ jne L$256_enc_msg_x4_loop2 ++ ++L$256_enc_msg_x4_out: ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes256gcmsiv_enc_msg_x8 ++.private_extern _aes256gcmsiv_enc_msg_x8 ++ ++.p2align 4 ++_aes256gcmsiv_enc_msg_x8: ++ ++ testq %r8,%r8 ++ jnz L$256_enc_msg_x8_start ++ .byte 0xf3,0xc3 ++ ++L$256_enc_msg_x8_start: ++ ++ movq %rsp,%r11 ++ subq $16,%r11 ++ andq $-64,%r11 ++ ++ movq %r8,%r10 ++ shrq $4,%r8 ++ shlq $60,%r10 ++ jz L$256_enc_msg_x8_start2 ++ addq $1,%r8 ++ ++L$256_enc_msg_x8_start2: ++ movq %r8,%r10 ++ shlq $61,%r10 ++ shrq $61,%r10 ++ ++ ++ vmovdqa (%rdx),%xmm1 ++ vpor OR_MASK(%rip),%xmm1,%xmm1 ++ ++ ++ vpaddd seven(%rip),%xmm1,%xmm0 ++ vmovdqa %xmm0,(%r11) ++ vpaddd one(%rip),%xmm1,%xmm9 ++ vpaddd two(%rip),%xmm1,%xmm10 ++ vpaddd three(%rip),%xmm1,%xmm11 ++ vpaddd four(%rip),%xmm1,%xmm12 ++ vpaddd five(%rip),%xmm1,%xmm13 ++ vpaddd six(%rip),%xmm1,%xmm14 ++ vmovdqa %xmm1,%xmm0 ++ ++ shrq $3,%r8 ++ jz L$256_enc_msg_x8_check_remainder ++ ++ subq $128,%rsi ++ subq $128,%rdi ++ ++L$256_enc_msg_x8_loop1: ++ addq $128,%rsi ++ addq $128,%rdi ++ ++ vmovdqa %xmm0,%xmm1 ++ vmovdqa %xmm9,%xmm2 ++ vmovdqa %xmm10,%xmm3 ++ vmovdqa %xmm11,%xmm4 ++ vmovdqa %xmm12,%xmm5 ++ vmovdqa %xmm13,%xmm6 ++ vmovdqa %xmm14,%xmm7 ++ ++ vmovdqa (%r11),%xmm8 ++ ++ vpxor (%rcx),%xmm1,%xmm1 ++ vpxor (%rcx),%xmm2,%xmm2 ++ vpxor (%rcx),%xmm3,%xmm3 ++ vpxor (%rcx),%xmm4,%xmm4 ++ vpxor (%rcx),%xmm5,%xmm5 ++ vpxor (%rcx),%xmm6,%xmm6 ++ vpxor (%rcx),%xmm7,%xmm7 ++ vpxor (%rcx),%xmm8,%xmm8 ++ ++ vmovdqu 16(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vmovdqa (%r11),%xmm14 ++ vpaddd eight(%rip),%xmm14,%xmm14 ++ vmovdqa %xmm14,(%r11) ++ vmovdqu 32(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpsubd one(%rip),%xmm14,%xmm14 ++ vmovdqu 48(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm0,%xmm0 ++ vmovdqu 64(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm9,%xmm9 ++ vmovdqu 80(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm10,%xmm10 ++ vmovdqu 96(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm11,%xmm11 ++ vmovdqu 112(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm12,%xmm12 ++ vmovdqu 128(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vpaddd eight(%rip),%xmm13,%xmm13 ++ vmovdqu 144(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 160(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 176(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 192(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 208(%rcx),%xmm15 ++ vaesenc %xmm15,%xmm1,%xmm1 ++ vaesenc %xmm15,%xmm2,%xmm2 ++ vaesenc %xmm15,%xmm3,%xmm3 ++ vaesenc %xmm15,%xmm4,%xmm4 ++ vaesenc %xmm15,%xmm5,%xmm5 ++ vaesenc %xmm15,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 224(%rcx),%xmm15 ++ vaesenclast %xmm15,%xmm1,%xmm1 ++ vaesenclast %xmm15,%xmm2,%xmm2 ++ vaesenclast %xmm15,%xmm3,%xmm3 ++ vaesenclast %xmm15,%xmm4,%xmm4 ++ vaesenclast %xmm15,%xmm5,%xmm5 ++ vaesenclast %xmm15,%xmm6,%xmm6 ++ vaesenclast %xmm15,%xmm7,%xmm7 ++ vaesenclast %xmm15,%xmm8,%xmm8 ++ ++ ++ ++ vpxor 0(%rdi),%xmm1,%xmm1 ++ vpxor 16(%rdi),%xmm2,%xmm2 ++ vpxor 32(%rdi),%xmm3,%xmm3 ++ vpxor 48(%rdi),%xmm4,%xmm4 ++ vpxor 64(%rdi),%xmm5,%xmm5 ++ vpxor 80(%rdi),%xmm6,%xmm6 ++ vpxor 96(%rdi),%xmm7,%xmm7 ++ vpxor 112(%rdi),%xmm8,%xmm8 ++ ++ subq $1,%r8 ++ ++ vmovdqu %xmm1,0(%rsi) ++ vmovdqu %xmm2,16(%rsi) ++ vmovdqu %xmm3,32(%rsi) ++ vmovdqu %xmm4,48(%rsi) ++ vmovdqu %xmm5,64(%rsi) ++ vmovdqu %xmm6,80(%rsi) ++ vmovdqu %xmm7,96(%rsi) ++ vmovdqu %xmm8,112(%rsi) ++ ++ jne L$256_enc_msg_x8_loop1 ++ ++ addq $128,%rsi ++ addq $128,%rdi ++ ++L$256_enc_msg_x8_check_remainder: ++ cmpq $0,%r10 ++ je L$256_enc_msg_x8_out ++ ++L$256_enc_msg_x8_loop2: ++ ++ ++ vmovdqa %xmm0,%xmm1 ++ vpaddd one(%rip),%xmm0,%xmm0 ++ ++ vpxor (%rcx),%xmm1,%xmm1 ++ vaesenc 16(%rcx),%xmm1,%xmm1 ++ vaesenc 32(%rcx),%xmm1,%xmm1 ++ vaesenc 48(%rcx),%xmm1,%xmm1 ++ vaesenc 64(%rcx),%xmm1,%xmm1 ++ vaesenc 80(%rcx),%xmm1,%xmm1 ++ vaesenc 96(%rcx),%xmm1,%xmm1 ++ vaesenc 112(%rcx),%xmm1,%xmm1 ++ vaesenc 128(%rcx),%xmm1,%xmm1 ++ vaesenc 144(%rcx),%xmm1,%xmm1 ++ vaesenc 160(%rcx),%xmm1,%xmm1 ++ vaesenc 176(%rcx),%xmm1,%xmm1 ++ vaesenc 192(%rcx),%xmm1,%xmm1 ++ vaesenc 208(%rcx),%xmm1,%xmm1 ++ vaesenclast 224(%rcx),%xmm1,%xmm1 ++ ++ ++ vpxor (%rdi),%xmm1,%xmm1 ++ ++ vmovdqu %xmm1,(%rsi) ++ ++ addq $16,%rdi ++ addq $16,%rsi ++ subq $1,%r10 ++ jnz L$256_enc_msg_x8_loop2 ++ ++L$256_enc_msg_x8_out: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.globl _aes256gcmsiv_dec ++.private_extern _aes256gcmsiv_dec ++ ++.p2align 4 ++_aes256gcmsiv_dec: ++ ++ testq $~15,%r9 ++ jnz L$256_dec_start ++ .byte 0xf3,0xc3 ++ ++L$256_dec_start: ++ vzeroupper ++ vmovdqa (%rdx),%xmm0 ++ movq %rdx,%rax ++ ++ leaq 32(%rax),%rax ++ leaq 32(%rcx),%rcx ++ ++ ++ vmovdqu (%rdi,%r9,1),%xmm15 ++ vpor OR_MASK(%rip),%xmm15,%xmm15 ++ andq $~15,%r9 ++ ++ ++ cmpq $96,%r9 ++ jb L$256_dec_loop2 ++ ++ ++ subq $96,%r9 ++ vmovdqa %xmm15,%xmm7 ++ vpaddd one(%rip),%xmm7,%xmm8 ++ vpaddd two(%rip),%xmm7,%xmm9 ++ vpaddd one(%rip),%xmm9,%xmm10 ++ vpaddd two(%rip),%xmm9,%xmm11 ++ vpaddd one(%rip),%xmm11,%xmm12 ++ vpaddd two(%rip),%xmm11,%xmm15 ++ ++ vpxor (%r8),%xmm7,%xmm7 ++ vpxor (%r8),%xmm8,%xmm8 ++ vpxor (%r8),%xmm9,%xmm9 ++ vpxor (%r8),%xmm10,%xmm10 ++ vpxor (%r8),%xmm11,%xmm11 ++ vpxor (%r8),%xmm12,%xmm12 ++ ++ vmovdqu 16(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 32(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 48(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 64(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 80(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 96(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 112(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 128(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 144(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 160(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 176(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 192(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 208(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 224(%r8),%xmm4 ++ vaesenclast %xmm4,%xmm7,%xmm7 ++ vaesenclast %xmm4,%xmm8,%xmm8 ++ vaesenclast %xmm4,%xmm9,%xmm9 ++ vaesenclast %xmm4,%xmm10,%xmm10 ++ vaesenclast %xmm4,%xmm11,%xmm11 ++ vaesenclast %xmm4,%xmm12,%xmm12 ++ ++ ++ vpxor 0(%rdi),%xmm7,%xmm7 ++ vpxor 16(%rdi),%xmm8,%xmm8 ++ vpxor 32(%rdi),%xmm9,%xmm9 ++ vpxor 48(%rdi),%xmm10,%xmm10 ++ vpxor 64(%rdi),%xmm11,%xmm11 ++ vpxor 80(%rdi),%xmm12,%xmm12 ++ ++ vmovdqu %xmm7,0(%rsi) ++ vmovdqu %xmm8,16(%rsi) ++ vmovdqu %xmm9,32(%rsi) ++ vmovdqu %xmm10,48(%rsi) ++ vmovdqu %xmm11,64(%rsi) ++ vmovdqu %xmm12,80(%rsi) ++ ++ addq $96,%rdi ++ addq $96,%rsi ++ jmp L$256_dec_loop1 ++ ++ ++.p2align 6 ++L$256_dec_loop1: ++ cmpq $96,%r9 ++ jb L$256_dec_finish_96 ++ subq $96,%r9 ++ ++ vmovdqa %xmm12,%xmm6 ++ vmovdqa %xmm11,16-32(%rax) ++ vmovdqa %xmm10,32-32(%rax) ++ vmovdqa %xmm9,48-32(%rax) ++ vmovdqa %xmm8,64-32(%rax) ++ vmovdqa %xmm7,80-32(%rax) ++ ++ vmovdqa %xmm15,%xmm7 ++ vpaddd one(%rip),%xmm7,%xmm8 ++ vpaddd two(%rip),%xmm7,%xmm9 ++ vpaddd one(%rip),%xmm9,%xmm10 ++ vpaddd two(%rip),%xmm9,%xmm11 ++ vpaddd one(%rip),%xmm11,%xmm12 ++ vpaddd two(%rip),%xmm11,%xmm15 ++ ++ vmovdqa (%r8),%xmm4 ++ vpxor %xmm4,%xmm7,%xmm7 ++ vpxor %xmm4,%xmm8,%xmm8 ++ vpxor %xmm4,%xmm9,%xmm9 ++ vpxor %xmm4,%xmm10,%xmm10 ++ vpxor %xmm4,%xmm11,%xmm11 ++ vpxor %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 0-32(%rcx),%xmm4 ++ vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 ++ vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 ++ vpclmulqdq $0x01,%xmm4,%xmm6,%xmm1 ++ vpclmulqdq $0x10,%xmm4,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu 16(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu -16(%rax),%xmm6 ++ vmovdqu -16(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ ++ vmovdqu 32(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 0(%rax),%xmm6 ++ vmovdqu 0(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ ++ vmovdqu 48(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 16(%rax),%xmm6 ++ vmovdqu 16(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ ++ vmovdqu 64(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 32(%rax),%xmm6 ++ vmovdqu 32(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ ++ vmovdqu 80(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 96(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 112(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ ++ vmovdqa 80-32(%rax),%xmm6 ++ vpxor %xmm0,%xmm6,%xmm6 ++ vmovdqu 80-32(%rcx),%xmm5 ++ ++ vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu 128(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ ++ vpsrldq $8,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm5 ++ vpslldq $8,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm0 ++ ++ vmovdqa poly(%rip),%xmm3 ++ ++ vmovdqu 144(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 160(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 176(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 192(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 208(%r8),%xmm4 ++ vaesenc %xmm4,%xmm7,%xmm7 ++ vaesenc %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm4,%xmm9,%xmm9 ++ vaesenc %xmm4,%xmm10,%xmm10 ++ vaesenc %xmm4,%xmm11,%xmm11 ++ vaesenc %xmm4,%xmm12,%xmm12 ++ ++ vmovdqu 224(%r8),%xmm6 ++ vpalignr $8,%xmm0,%xmm0,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 ++ vpxor %xmm0,%xmm2,%xmm0 ++ ++ vpxor 0(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm7,%xmm7 ++ vpxor 16(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm8,%xmm8 ++ vpxor 32(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm9,%xmm9 ++ vpxor 48(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm10,%xmm10 ++ vpxor 64(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm11,%xmm11 ++ vpxor 80(%rdi),%xmm6,%xmm4 ++ vaesenclast %xmm4,%xmm12,%xmm12 ++ ++ vpalignr $8,%xmm0,%xmm0,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 ++ vpxor %xmm0,%xmm2,%xmm0 ++ ++ vmovdqu %xmm7,0(%rsi) ++ vmovdqu %xmm8,16(%rsi) ++ vmovdqu %xmm9,32(%rsi) ++ vmovdqu %xmm10,48(%rsi) ++ vmovdqu %xmm11,64(%rsi) ++ vmovdqu %xmm12,80(%rsi) ++ ++ vpxor %xmm5,%xmm0,%xmm0 ++ ++ leaq 96(%rdi),%rdi ++ leaq 96(%rsi),%rsi ++ jmp L$256_dec_loop1 ++ ++L$256_dec_finish_96: ++ vmovdqa %xmm12,%xmm6 ++ vmovdqa %xmm11,16-32(%rax) ++ vmovdqa %xmm10,32-32(%rax) ++ vmovdqa %xmm9,48-32(%rax) ++ vmovdqa %xmm8,64-32(%rax) ++ vmovdqa %xmm7,80-32(%rax) ++ ++ vmovdqu 0-32(%rcx),%xmm4 ++ vpclmulqdq $0x10,%xmm4,%xmm6,%xmm1 ++ vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 ++ vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 ++ vpclmulqdq $0x01,%xmm4,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu -16(%rax),%xmm6 ++ vmovdqu -16(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu 0(%rax),%xmm6 ++ vmovdqu 0(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu 16(%rax),%xmm6 ++ vmovdqu 16(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vmovdqu 32(%rax),%xmm6 ++ vmovdqu 32(%rcx),%xmm13 ++ ++ vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ ++ vmovdqu 80-32(%rax),%xmm6 ++ vpxor %xmm0,%xmm6,%xmm6 ++ vmovdqu 80-32(%rcx),%xmm5 ++ vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vpsrldq $8,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm2,%xmm5 ++ vpslldq $8,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm0 ++ ++ vmovdqa poly(%rip),%xmm3 ++ ++ vpalignr $8,%xmm0,%xmm0,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 ++ vpxor %xmm0,%xmm2,%xmm0 ++ ++ vpalignr $8,%xmm0,%xmm0,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 ++ vpxor %xmm0,%xmm2,%xmm0 ++ ++ vpxor %xmm5,%xmm0,%xmm0 ++ ++L$256_dec_loop2: ++ ++ ++ ++ cmpq $16,%r9 ++ jb L$256_dec_out ++ subq $16,%r9 ++ ++ vmovdqa %xmm15,%xmm2 ++ vpaddd one(%rip),%xmm15,%xmm15 ++ ++ vpxor 0(%r8),%xmm2,%xmm2 ++ vaesenc 16(%r8),%xmm2,%xmm2 ++ vaesenc 32(%r8),%xmm2,%xmm2 ++ vaesenc 48(%r8),%xmm2,%xmm2 ++ vaesenc 64(%r8),%xmm2,%xmm2 ++ vaesenc 80(%r8),%xmm2,%xmm2 ++ vaesenc 96(%r8),%xmm2,%xmm2 ++ vaesenc 112(%r8),%xmm2,%xmm2 ++ vaesenc 128(%r8),%xmm2,%xmm2 ++ vaesenc 144(%r8),%xmm2,%xmm2 ++ vaesenc 160(%r8),%xmm2,%xmm2 ++ vaesenc 176(%r8),%xmm2,%xmm2 ++ vaesenc 192(%r8),%xmm2,%xmm2 ++ vaesenc 208(%r8),%xmm2,%xmm2 ++ vaesenclast 224(%r8),%xmm2,%xmm2 ++ vpxor (%rdi),%xmm2,%xmm2 ++ vmovdqu %xmm2,(%rsi) ++ addq $16,%rdi ++ addq $16,%rsi ++ ++ vpxor %xmm2,%xmm0,%xmm0 ++ vmovdqa -32(%rcx),%xmm1 ++ call GFMUL ++ ++ jmp L$256_dec_loop2 ++ ++L$256_dec_out: ++ vmovdqu %xmm0,(%rdx) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes256gcmsiv_kdf ++.private_extern _aes256gcmsiv_kdf ++ ++.p2align 4 ++_aes256gcmsiv_kdf: ++ ++ ++ ++ ++ ++ vmovdqa (%rdx),%xmm1 ++ vmovdqa 0(%rdi),%xmm4 ++ vmovdqa and_mask(%rip),%xmm11 ++ vmovdqa one(%rip),%xmm8 ++ vpshufd $0x90,%xmm4,%xmm4 ++ vpand %xmm11,%xmm4,%xmm4 ++ vpaddd %xmm8,%xmm4,%xmm6 ++ vpaddd %xmm8,%xmm6,%xmm7 ++ vpaddd %xmm8,%xmm7,%xmm11 ++ vpaddd %xmm8,%xmm11,%xmm12 ++ vpaddd %xmm8,%xmm12,%xmm13 ++ ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpxor %xmm1,%xmm6,%xmm6 ++ vpxor %xmm1,%xmm7,%xmm7 ++ vpxor %xmm1,%xmm11,%xmm11 ++ vpxor %xmm1,%xmm12,%xmm12 ++ vpxor %xmm1,%xmm13,%xmm13 ++ ++ vmovdqa 16(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vaesenc %xmm1,%xmm6,%xmm6 ++ vaesenc %xmm1,%xmm7,%xmm7 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ ++ vmovdqa 32(%rdx),%xmm2 ++ vaesenc %xmm2,%xmm4,%xmm4 ++ vaesenc %xmm2,%xmm6,%xmm6 ++ vaesenc %xmm2,%xmm7,%xmm7 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ vaesenc %xmm2,%xmm13,%xmm13 ++ ++ vmovdqa 48(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vaesenc %xmm1,%xmm6,%xmm6 ++ vaesenc %xmm1,%xmm7,%xmm7 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ ++ vmovdqa 64(%rdx),%xmm2 ++ vaesenc %xmm2,%xmm4,%xmm4 ++ vaesenc %xmm2,%xmm6,%xmm6 ++ vaesenc %xmm2,%xmm7,%xmm7 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ vaesenc %xmm2,%xmm13,%xmm13 ++ ++ vmovdqa 80(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vaesenc %xmm1,%xmm6,%xmm6 ++ vaesenc %xmm1,%xmm7,%xmm7 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ ++ vmovdqa 96(%rdx),%xmm2 ++ vaesenc %xmm2,%xmm4,%xmm4 ++ vaesenc %xmm2,%xmm6,%xmm6 ++ vaesenc %xmm2,%xmm7,%xmm7 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ vaesenc %xmm2,%xmm13,%xmm13 ++ ++ vmovdqa 112(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vaesenc %xmm1,%xmm6,%xmm6 ++ vaesenc %xmm1,%xmm7,%xmm7 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ ++ vmovdqa 128(%rdx),%xmm2 ++ vaesenc %xmm2,%xmm4,%xmm4 ++ vaesenc %xmm2,%xmm6,%xmm6 ++ vaesenc %xmm2,%xmm7,%xmm7 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ vaesenc %xmm2,%xmm13,%xmm13 ++ ++ vmovdqa 144(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vaesenc %xmm1,%xmm6,%xmm6 ++ vaesenc %xmm1,%xmm7,%xmm7 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ ++ vmovdqa 160(%rdx),%xmm2 ++ vaesenc %xmm2,%xmm4,%xmm4 ++ vaesenc %xmm2,%xmm6,%xmm6 ++ vaesenc %xmm2,%xmm7,%xmm7 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ vaesenc %xmm2,%xmm13,%xmm13 ++ ++ vmovdqa 176(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vaesenc %xmm1,%xmm6,%xmm6 ++ vaesenc %xmm1,%xmm7,%xmm7 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ ++ vmovdqa 192(%rdx),%xmm2 ++ vaesenc %xmm2,%xmm4,%xmm4 ++ vaesenc %xmm2,%xmm6,%xmm6 ++ vaesenc %xmm2,%xmm7,%xmm7 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ vaesenc %xmm2,%xmm13,%xmm13 ++ ++ vmovdqa 208(%rdx),%xmm1 ++ vaesenc %xmm1,%xmm4,%xmm4 ++ vaesenc %xmm1,%xmm6,%xmm6 ++ vaesenc %xmm1,%xmm7,%xmm7 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ ++ vmovdqa 224(%rdx),%xmm2 ++ vaesenclast %xmm2,%xmm4,%xmm4 ++ vaesenclast %xmm2,%xmm6,%xmm6 ++ vaesenclast %xmm2,%xmm7,%xmm7 ++ vaesenclast %xmm2,%xmm11,%xmm11 ++ vaesenclast %xmm2,%xmm12,%xmm12 ++ vaesenclast %xmm2,%xmm13,%xmm13 ++ ++ ++ vmovdqa %xmm4,0(%rsi) ++ vmovdqa %xmm6,16(%rsi) ++ vmovdqa %xmm7,32(%rsi) ++ vmovdqa %xmm11,48(%rsi) ++ vmovdqa %xmm12,64(%rsi) ++ vmovdqa %xmm13,80(%rsi) ++ .byte 0xf3,0xc3 ++ ++ ++#endif +diff --git a/apple-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S b/apple-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S +new file mode 100644 +index 0000000..6813510 +--- /dev/null ++++ b/apple-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S +@@ -0,0 +1,8878 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++chacha20_poly1305_constants: ++ ++.p2align 6 ++L$chacha20_consts: ++.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' ++.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' ++L$rol8: ++.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 ++.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 ++L$rol16: ++.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 ++.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 ++L$avx2_init: ++.long 0,0,0,0 ++L$sse_inc: ++.long 1,0,0,0 ++L$avx2_inc: ++.long 2,0,0,0,2,0,0,0 ++L$clamp: ++.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC ++.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF ++.p2align 4 ++L$and_masks: ++.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 ++.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff ++ ++ ++.p2align 6 ++poly_hash_ad_internal: ++ ++ ++ xorq %r10,%r10 ++ xorq %r11,%r11 ++ xorq %r12,%r12 ++ cmpq $13,%r8 ++ jne L$hash_ad_loop ++L$poly_fast_tls_ad: ++ ++ movq (%rcx),%r10 ++ movq 5(%rcx),%r11 ++ shrq $24,%r11 ++ movq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ .byte 0xf3,0xc3 ++L$hash_ad_loop: ++ ++ cmpq $16,%r8 ++ jb L$hash_ad_tail ++ addq 0+0(%rcx),%r10 ++ adcq 8+0(%rcx),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rcx),%rcx ++ subq $16,%r8 ++ jmp L$hash_ad_loop ++L$hash_ad_tail: ++ cmpq $0,%r8 ++ je L$hash_ad_done ++ ++ xorq %r13,%r13 ++ xorq %r14,%r14 ++ xorq %r15,%r15 ++ addq %r8,%rcx ++L$hash_ad_tail_loop: ++ shldq $8,%r13,%r14 ++ shlq $8,%r13 ++ movzbq -1(%rcx),%r15 ++ xorq %r15,%r13 ++ decq %rcx ++ decq %r8 ++ jne L$hash_ad_tail_loop ++ ++ addq %r13,%r10 ++ adcq %r14,%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ ++L$hash_ad_done: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.globl _chacha20_poly1305_open ++.private_extern _chacha20_poly1305_open ++ ++.p2align 6 ++_chacha20_poly1305_open: ++ ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ ++ ++ pushq %r9 ++ ++ subq $288 + 0 + 32,%rsp ++ ++ ++ leaq 32(%rsp),%rbp ++ andq $-32,%rbp ++ ++ movq %rdx,%rbx ++ movq %r8,0+0+32(%rbp) ++ movq %rbx,8+0+32(%rbp) ++ ++ movl _OPENSSL_ia32cap_P+8(%rip),%eax ++ andl $288,%eax ++ xorl $288,%eax ++ jz chacha20_poly1305_open_avx2 ++ ++ cmpq $128,%rbx ++ jbe L$open_sse_128 ++ ++ movdqa L$chacha20_consts(%rip),%xmm0 ++ movdqu 0(%r9),%xmm4 ++ movdqu 16(%r9),%xmm8 ++ movdqu 32(%r9),%xmm12 ++ ++ movdqa %xmm12,%xmm7 ++ ++ movdqa %xmm4,0+48(%rbp) ++ movdqa %xmm8,0+64(%rbp) ++ movdqa %xmm12,0+96(%rbp) ++ movq $10,%r10 ++L$open_sse_init_rounds: ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ ++ decq %r10 ++ jne L$open_sse_init_rounds ++ ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd 0+48(%rbp),%xmm4 ++ ++ pand L$clamp(%rip),%xmm0 ++ movdqa %xmm0,0+0(%rbp) ++ movdqa %xmm4,0+16(%rbp) ++ ++ movq %r8,%r8 ++ call poly_hash_ad_internal ++L$open_sse_main_loop: ++ cmpq $256,%rbx ++ jb L$open_sse_tail ++ ++ movdqa L$chacha20_consts(%rip),%xmm0 ++ movdqa 0+48(%rbp),%xmm4 ++ movdqa 0+64(%rbp),%xmm8 ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm4,%xmm5 ++ movdqa %xmm8,%xmm9 ++ movdqa %xmm0,%xmm2 ++ movdqa %xmm4,%xmm6 ++ movdqa %xmm8,%xmm10 ++ movdqa %xmm0,%xmm3 ++ movdqa %xmm4,%xmm7 ++ movdqa %xmm8,%xmm11 ++ movdqa 0+96(%rbp),%xmm15 ++ paddd L$sse_inc(%rip),%xmm15 ++ movdqa %xmm15,%xmm14 ++ paddd L$sse_inc(%rip),%xmm14 ++ movdqa %xmm14,%xmm13 ++ paddd L$sse_inc(%rip),%xmm13 ++ movdqa %xmm13,%xmm12 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,0+96(%rbp) ++ movdqa %xmm13,0+112(%rbp) ++ movdqa %xmm14,0+128(%rbp) ++ movdqa %xmm15,0+144(%rbp) ++ ++ ++ ++ movq $4,%rcx ++ movq %rsi,%r8 ++L$open_sse_main_loop_rounds: ++ movdqa %xmm8,0+80(%rbp) ++ movdqa L$rol16(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ addq 0+0(%r8),%r10 ++ adcq 8+0(%r8),%r11 ++ adcq $1,%r12 ++ ++ leaq 16(%r8),%r8 ++ pxor %xmm10,%xmm6 ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm4 ++ pxor %xmm8,%xmm4 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movdqa L$rol8(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ pxor %xmm10,%xmm6 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm4 ++ pxor %xmm8,%xmm4 ++ movdqa 0+80(%rbp),%xmm8 ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++.byte 102,15,58,15,255,4 ++.byte 102,69,15,58,15,219,8 ++.byte 102,69,15,58,15,255,12 ++.byte 102,15,58,15,246,4 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,12 ++.byte 102,15,58,15,237,4 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,12 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa L$rol16(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ pxor %xmm10,%xmm6 ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm4 ++ pxor %xmm8,%xmm4 ++ movdqa L$rol8(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ pxor %xmm10,%xmm6 ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm4 ++ pxor %xmm8,%xmm4 ++ movdqa 0+80(%rbp),%xmm8 ++.byte 102,15,58,15,255,12 ++.byte 102,69,15,58,15,219,8 ++.byte 102,69,15,58,15,255,4 ++.byte 102,15,58,15,246,12 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,4 ++.byte 102,15,58,15,237,12 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ ++ decq %rcx ++ jge L$open_sse_main_loop_rounds ++ addq 0+0(%r8),%r10 ++ adcq 8+0(%r8),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%r8),%r8 ++ cmpq $-6,%rcx ++ jg L$open_sse_main_loop_rounds ++ paddd L$chacha20_consts(%rip),%xmm3 ++ paddd 0+48(%rbp),%xmm7 ++ paddd 0+64(%rbp),%xmm11 ++ paddd 0+144(%rbp),%xmm15 ++ paddd L$chacha20_consts(%rip),%xmm2 ++ paddd 0+48(%rbp),%xmm6 ++ paddd 0+64(%rbp),%xmm10 ++ paddd 0+128(%rbp),%xmm14 ++ paddd L$chacha20_consts(%rip),%xmm1 ++ paddd 0+48(%rbp),%xmm5 ++ paddd 0+64(%rbp),%xmm9 ++ paddd 0+112(%rbp),%xmm13 ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd 0+48(%rbp),%xmm4 ++ paddd 0+64(%rbp),%xmm8 ++ paddd 0+96(%rbp),%xmm12 ++ movdqa %xmm12,0+80(%rbp) ++ movdqu 0 + 0(%rsi),%xmm12 ++ pxor %xmm3,%xmm12 ++ movdqu %xmm12,0 + 0(%rdi) ++ movdqu 16 + 0(%rsi),%xmm12 ++ pxor %xmm7,%xmm12 ++ movdqu %xmm12,16 + 0(%rdi) ++ movdqu 32 + 0(%rsi),%xmm12 ++ pxor %xmm11,%xmm12 ++ movdqu %xmm12,32 + 0(%rdi) ++ movdqu 48 + 0(%rsi),%xmm12 ++ pxor %xmm15,%xmm12 ++ movdqu %xmm12,48 + 0(%rdi) ++ movdqu 0 + 64(%rsi),%xmm3 ++ movdqu 16 + 64(%rsi),%xmm7 ++ movdqu 32 + 64(%rsi),%xmm11 ++ movdqu 48 + 64(%rsi),%xmm15 ++ pxor %xmm3,%xmm2 ++ pxor %xmm7,%xmm6 ++ pxor %xmm11,%xmm10 ++ pxor %xmm14,%xmm15 ++ movdqu %xmm2,0 + 64(%rdi) ++ movdqu %xmm6,16 + 64(%rdi) ++ movdqu %xmm10,32 + 64(%rdi) ++ movdqu %xmm15,48 + 64(%rdi) ++ movdqu 0 + 128(%rsi),%xmm3 ++ movdqu 16 + 128(%rsi),%xmm7 ++ movdqu 32 + 128(%rsi),%xmm11 ++ movdqu 48 + 128(%rsi),%xmm15 ++ pxor %xmm3,%xmm1 ++ pxor %xmm7,%xmm5 ++ pxor %xmm11,%xmm9 ++ pxor %xmm13,%xmm15 ++ movdqu %xmm1,0 + 128(%rdi) ++ movdqu %xmm5,16 + 128(%rdi) ++ movdqu %xmm9,32 + 128(%rdi) ++ movdqu %xmm15,48 + 128(%rdi) ++ movdqu 0 + 192(%rsi),%xmm3 ++ movdqu 16 + 192(%rsi),%xmm7 ++ movdqu 32 + 192(%rsi),%xmm11 ++ movdqu 48 + 192(%rsi),%xmm15 ++ pxor %xmm3,%xmm0 ++ pxor %xmm7,%xmm4 ++ pxor %xmm11,%xmm8 ++ pxor 0+80(%rbp),%xmm15 ++ movdqu %xmm0,0 + 192(%rdi) ++ movdqu %xmm4,16 + 192(%rdi) ++ movdqu %xmm8,32 + 192(%rdi) ++ movdqu %xmm15,48 + 192(%rdi) ++ ++ leaq 256(%rsi),%rsi ++ leaq 256(%rdi),%rdi ++ subq $256,%rbx ++ jmp L$open_sse_main_loop ++L$open_sse_tail: ++ ++ testq %rbx,%rbx ++ jz L$open_sse_finalize ++ cmpq $192,%rbx ++ ja L$open_sse_tail_256 ++ cmpq $128,%rbx ++ ja L$open_sse_tail_192 ++ cmpq $64,%rbx ++ ja L$open_sse_tail_128 ++ movdqa L$chacha20_consts(%rip),%xmm0 ++ movdqa 0+48(%rbp),%xmm4 ++ movdqa 0+64(%rbp),%xmm8 ++ movdqa 0+96(%rbp),%xmm12 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,0+96(%rbp) ++ ++ xorq %r8,%r8 ++ movq %rbx,%rcx ++ cmpq $16,%rcx ++ jb L$open_sse_tail_64_rounds ++L$open_sse_tail_64_rounds_and_x1hash: ++ addq 0+0(%rsi,%r8,1),%r10 ++ adcq 8+0(%rsi,%r8,1),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ subq $16,%rcx ++L$open_sse_tail_64_rounds: ++ addq $16,%r8 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ ++ cmpq $16,%rcx ++ jae L$open_sse_tail_64_rounds_and_x1hash ++ cmpq $160,%r8 ++ jne L$open_sse_tail_64_rounds ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd 0+48(%rbp),%xmm4 ++ paddd 0+64(%rbp),%xmm8 ++ paddd 0+96(%rbp),%xmm12 ++ ++ jmp L$open_sse_tail_64_dec_loop ++ ++L$open_sse_tail_128: ++ movdqa L$chacha20_consts(%rip),%xmm0 ++ movdqa 0+48(%rbp),%xmm4 ++ movdqa 0+64(%rbp),%xmm8 ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm4,%xmm5 ++ movdqa %xmm8,%xmm9 ++ movdqa 0+96(%rbp),%xmm13 ++ paddd L$sse_inc(%rip),%xmm13 ++ movdqa %xmm13,%xmm12 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,0+96(%rbp) ++ movdqa %xmm13,0+112(%rbp) ++ ++ movq %rbx,%rcx ++ andq $-16,%rcx ++ xorq %r8,%r8 ++L$open_sse_tail_128_rounds_and_x1hash: ++ addq 0+0(%rsi,%r8,1),%r10 ++ adcq 8+0(%rsi,%r8,1),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++L$open_sse_tail_128_rounds: ++ addq $16,%r8 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,4 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,12 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,12 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,4 ++ ++ cmpq %rcx,%r8 ++ jb L$open_sse_tail_128_rounds_and_x1hash ++ cmpq $160,%r8 ++ jne L$open_sse_tail_128_rounds ++ paddd L$chacha20_consts(%rip),%xmm1 ++ paddd 0+48(%rbp),%xmm5 ++ paddd 0+64(%rbp),%xmm9 ++ paddd 0+112(%rbp),%xmm13 ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd 0+48(%rbp),%xmm4 ++ paddd 0+64(%rbp),%xmm8 ++ paddd 0+96(%rbp),%xmm12 ++ movdqu 0 + 0(%rsi),%xmm3 ++ movdqu 16 + 0(%rsi),%xmm7 ++ movdqu 32 + 0(%rsi),%xmm11 ++ movdqu 48 + 0(%rsi),%xmm15 ++ pxor %xmm3,%xmm1 ++ pxor %xmm7,%xmm5 ++ pxor %xmm11,%xmm9 ++ pxor %xmm13,%xmm15 ++ movdqu %xmm1,0 + 0(%rdi) ++ movdqu %xmm5,16 + 0(%rdi) ++ movdqu %xmm9,32 + 0(%rdi) ++ movdqu %xmm15,48 + 0(%rdi) ++ ++ subq $64,%rbx ++ leaq 64(%rsi),%rsi ++ leaq 64(%rdi),%rdi ++ jmp L$open_sse_tail_64_dec_loop ++ ++L$open_sse_tail_192: ++ movdqa L$chacha20_consts(%rip),%xmm0 ++ movdqa 0+48(%rbp),%xmm4 ++ movdqa 0+64(%rbp),%xmm8 ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm4,%xmm5 ++ movdqa %xmm8,%xmm9 ++ movdqa %xmm0,%xmm2 ++ movdqa %xmm4,%xmm6 ++ movdqa %xmm8,%xmm10 ++ movdqa 0+96(%rbp),%xmm14 ++ paddd L$sse_inc(%rip),%xmm14 ++ movdqa %xmm14,%xmm13 ++ paddd L$sse_inc(%rip),%xmm13 ++ movdqa %xmm13,%xmm12 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,0+96(%rbp) ++ movdqa %xmm13,0+112(%rbp) ++ movdqa %xmm14,0+128(%rbp) ++ ++ movq %rbx,%rcx ++ movq $160,%r8 ++ cmpq $160,%rcx ++ cmovgq %r8,%rcx ++ andq $-16,%rcx ++ xorq %r8,%r8 ++L$open_sse_tail_192_rounds_and_x1hash: ++ addq 0+0(%rsi,%r8,1),%r10 ++ adcq 8+0(%rsi,%r8,1),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++L$open_sse_tail_192_rounds: ++ addq $16,%r8 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,4 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,12 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol16(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm6 ++ pxor %xmm3,%xmm6 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol8(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm6 ++ pxor %xmm3,%xmm6 ++.byte 102,15,58,15,246,4 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,12 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,12 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,4 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol16(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm6 ++ pxor %xmm3,%xmm6 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol8(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm6 ++ pxor %xmm3,%xmm6 ++.byte 102,15,58,15,246,12 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,4 ++ ++ cmpq %rcx,%r8 ++ jb L$open_sse_tail_192_rounds_and_x1hash ++ cmpq $160,%r8 ++ jne L$open_sse_tail_192_rounds ++ cmpq $176,%rbx ++ jb L$open_sse_tail_192_finish ++ addq 0+160(%rsi),%r10 ++ adcq 8+160(%rsi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ cmpq $192,%rbx ++ jb L$open_sse_tail_192_finish ++ addq 0+176(%rsi),%r10 ++ adcq 8+176(%rsi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++L$open_sse_tail_192_finish: ++ paddd L$chacha20_consts(%rip),%xmm2 ++ paddd 0+48(%rbp),%xmm6 ++ paddd 0+64(%rbp),%xmm10 ++ paddd 0+128(%rbp),%xmm14 ++ paddd L$chacha20_consts(%rip),%xmm1 ++ paddd 0+48(%rbp),%xmm5 ++ paddd 0+64(%rbp),%xmm9 ++ paddd 0+112(%rbp),%xmm13 ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd 0+48(%rbp),%xmm4 ++ paddd 0+64(%rbp),%xmm8 ++ paddd 0+96(%rbp),%xmm12 ++ movdqu 0 + 0(%rsi),%xmm3 ++ movdqu 16 + 0(%rsi),%xmm7 ++ movdqu 32 + 0(%rsi),%xmm11 ++ movdqu 48 + 0(%rsi),%xmm15 ++ pxor %xmm3,%xmm2 ++ pxor %xmm7,%xmm6 ++ pxor %xmm11,%xmm10 ++ pxor %xmm14,%xmm15 ++ movdqu %xmm2,0 + 0(%rdi) ++ movdqu %xmm6,16 + 0(%rdi) ++ movdqu %xmm10,32 + 0(%rdi) ++ movdqu %xmm15,48 + 0(%rdi) ++ movdqu 0 + 64(%rsi),%xmm3 ++ movdqu 16 + 64(%rsi),%xmm7 ++ movdqu 32 + 64(%rsi),%xmm11 ++ movdqu 48 + 64(%rsi),%xmm15 ++ pxor %xmm3,%xmm1 ++ pxor %xmm7,%xmm5 ++ pxor %xmm11,%xmm9 ++ pxor %xmm13,%xmm15 ++ movdqu %xmm1,0 + 64(%rdi) ++ movdqu %xmm5,16 + 64(%rdi) ++ movdqu %xmm9,32 + 64(%rdi) ++ movdqu %xmm15,48 + 64(%rdi) ++ ++ subq $128,%rbx ++ leaq 128(%rsi),%rsi ++ leaq 128(%rdi),%rdi ++ jmp L$open_sse_tail_64_dec_loop ++ ++L$open_sse_tail_256: ++ movdqa L$chacha20_consts(%rip),%xmm0 ++ movdqa 0+48(%rbp),%xmm4 ++ movdqa 0+64(%rbp),%xmm8 ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm4,%xmm5 ++ movdqa %xmm8,%xmm9 ++ movdqa %xmm0,%xmm2 ++ movdqa %xmm4,%xmm6 ++ movdqa %xmm8,%xmm10 ++ movdqa %xmm0,%xmm3 ++ movdqa %xmm4,%xmm7 ++ movdqa %xmm8,%xmm11 ++ movdqa 0+96(%rbp),%xmm15 ++ paddd L$sse_inc(%rip),%xmm15 ++ movdqa %xmm15,%xmm14 ++ paddd L$sse_inc(%rip),%xmm14 ++ movdqa %xmm14,%xmm13 ++ paddd L$sse_inc(%rip),%xmm13 ++ movdqa %xmm13,%xmm12 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,0+96(%rbp) ++ movdqa %xmm13,0+112(%rbp) ++ movdqa %xmm14,0+128(%rbp) ++ movdqa %xmm15,0+144(%rbp) ++ ++ xorq %r8,%r8 ++L$open_sse_tail_256_rounds_and_x1hash: ++ addq 0+0(%rsi,%r8,1),%r10 ++ adcq 8+0(%rsi,%r8,1),%r11 ++ adcq $1,%r12 ++ movdqa %xmm11,0+80(%rbp) ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm11 ++ pslld $12,%xmm11 ++ psrld $20,%xmm4 ++ pxor %xmm11,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm11 ++ pslld $7,%xmm11 ++ psrld $25,%xmm4 ++ pxor %xmm11,%xmm4 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm11 ++ pslld $12,%xmm11 ++ psrld $20,%xmm5 ++ pxor %xmm11,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm11 ++ pslld $7,%xmm11 ++ psrld $25,%xmm5 ++ pxor %xmm11,%xmm5 ++.byte 102,15,58,15,237,4 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,12 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol16(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm11 ++ pslld $12,%xmm11 ++ psrld $20,%xmm6 ++ pxor %xmm11,%xmm6 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol8(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm11 ++ pslld $7,%xmm11 ++ psrld $25,%xmm6 ++ pxor %xmm11,%xmm6 ++.byte 102,15,58,15,246,4 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,12 ++ movdqa 0+80(%rbp),%xmm11 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movdqa %xmm9,0+80(%rbp) ++ paddd %xmm7,%xmm3 ++ pxor %xmm3,%xmm15 ++ pshufb L$rol16(%rip),%xmm15 ++ paddd %xmm15,%xmm11 ++ pxor %xmm11,%xmm7 ++ movdqa %xmm7,%xmm9 ++ pslld $12,%xmm9 ++ psrld $20,%xmm7 ++ pxor %xmm9,%xmm7 ++ paddd %xmm7,%xmm3 ++ pxor %xmm3,%xmm15 ++ pshufb L$rol8(%rip),%xmm15 ++ paddd %xmm15,%xmm11 ++ pxor %xmm11,%xmm7 ++ movdqa %xmm7,%xmm9 ++ pslld $7,%xmm9 ++ psrld $25,%xmm7 ++ pxor %xmm9,%xmm7 ++.byte 102,15,58,15,255,4 ++.byte 102,69,15,58,15,219,8 ++.byte 102,69,15,58,15,255,12 ++ movdqa 0+80(%rbp),%xmm9 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ movdqa %xmm11,0+80(%rbp) ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm11 ++ pslld $12,%xmm11 ++ psrld $20,%xmm4 ++ pxor %xmm11,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm11 ++ pslld $7,%xmm11 ++ psrld $25,%xmm4 ++ pxor %xmm11,%xmm4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm11 ++ pslld $12,%xmm11 ++ psrld $20,%xmm5 ++ pxor %xmm11,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm11 ++ pslld $7,%xmm11 ++ psrld $25,%xmm5 ++ pxor %xmm11,%xmm5 ++.byte 102,15,58,15,237,12 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,4 ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol16(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm11 ++ pslld $12,%xmm11 ++ psrld $20,%xmm6 ++ pxor %xmm11,%xmm6 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol8(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm11 ++ pslld $7,%xmm11 ++ psrld $25,%xmm6 ++ pxor %xmm11,%xmm6 ++.byte 102,15,58,15,246,12 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,4 ++ movdqa 0+80(%rbp),%xmm11 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ movdqa %xmm9,0+80(%rbp) ++ paddd %xmm7,%xmm3 ++ pxor %xmm3,%xmm15 ++ pshufb L$rol16(%rip),%xmm15 ++ paddd %xmm15,%xmm11 ++ pxor %xmm11,%xmm7 ++ movdqa %xmm7,%xmm9 ++ pslld $12,%xmm9 ++ psrld $20,%xmm7 ++ pxor %xmm9,%xmm7 ++ paddd %xmm7,%xmm3 ++ pxor %xmm3,%xmm15 ++ pshufb L$rol8(%rip),%xmm15 ++ paddd %xmm15,%xmm11 ++ pxor %xmm11,%xmm7 ++ movdqa %xmm7,%xmm9 ++ pslld $7,%xmm9 ++ psrld $25,%xmm7 ++ pxor %xmm9,%xmm7 ++.byte 102,15,58,15,255,12 ++.byte 102,69,15,58,15,219,8 ++.byte 102,69,15,58,15,255,4 ++ movdqa 0+80(%rbp),%xmm9 ++ ++ addq $16,%r8 ++ cmpq $160,%r8 ++ jb L$open_sse_tail_256_rounds_and_x1hash ++ ++ movq %rbx,%rcx ++ andq $-16,%rcx ++L$open_sse_tail_256_hash: ++ addq 0+0(%rsi,%r8,1),%r10 ++ adcq 8+0(%rsi,%r8,1),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ addq $16,%r8 ++ cmpq %rcx,%r8 ++ jb L$open_sse_tail_256_hash ++ paddd L$chacha20_consts(%rip),%xmm3 ++ paddd 0+48(%rbp),%xmm7 ++ paddd 0+64(%rbp),%xmm11 ++ paddd 0+144(%rbp),%xmm15 ++ paddd L$chacha20_consts(%rip),%xmm2 ++ paddd 0+48(%rbp),%xmm6 ++ paddd 0+64(%rbp),%xmm10 ++ paddd 0+128(%rbp),%xmm14 ++ paddd L$chacha20_consts(%rip),%xmm1 ++ paddd 0+48(%rbp),%xmm5 ++ paddd 0+64(%rbp),%xmm9 ++ paddd 0+112(%rbp),%xmm13 ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd 0+48(%rbp),%xmm4 ++ paddd 0+64(%rbp),%xmm8 ++ paddd 0+96(%rbp),%xmm12 ++ movdqa %xmm12,0+80(%rbp) ++ movdqu 0 + 0(%rsi),%xmm12 ++ pxor %xmm3,%xmm12 ++ movdqu %xmm12,0 + 0(%rdi) ++ movdqu 16 + 0(%rsi),%xmm12 ++ pxor %xmm7,%xmm12 ++ movdqu %xmm12,16 + 0(%rdi) ++ movdqu 32 + 0(%rsi),%xmm12 ++ pxor %xmm11,%xmm12 ++ movdqu %xmm12,32 + 0(%rdi) ++ movdqu 48 + 0(%rsi),%xmm12 ++ pxor %xmm15,%xmm12 ++ movdqu %xmm12,48 + 0(%rdi) ++ movdqu 0 + 64(%rsi),%xmm3 ++ movdqu 16 + 64(%rsi),%xmm7 ++ movdqu 32 + 64(%rsi),%xmm11 ++ movdqu 48 + 64(%rsi),%xmm15 ++ pxor %xmm3,%xmm2 ++ pxor %xmm7,%xmm6 ++ pxor %xmm11,%xmm10 ++ pxor %xmm14,%xmm15 ++ movdqu %xmm2,0 + 64(%rdi) ++ movdqu %xmm6,16 + 64(%rdi) ++ movdqu %xmm10,32 + 64(%rdi) ++ movdqu %xmm15,48 + 64(%rdi) ++ movdqu 0 + 128(%rsi),%xmm3 ++ movdqu 16 + 128(%rsi),%xmm7 ++ movdqu 32 + 128(%rsi),%xmm11 ++ movdqu 48 + 128(%rsi),%xmm15 ++ pxor %xmm3,%xmm1 ++ pxor %xmm7,%xmm5 ++ pxor %xmm11,%xmm9 ++ pxor %xmm13,%xmm15 ++ movdqu %xmm1,0 + 128(%rdi) ++ movdqu %xmm5,16 + 128(%rdi) ++ movdqu %xmm9,32 + 128(%rdi) ++ movdqu %xmm15,48 + 128(%rdi) ++ ++ movdqa 0+80(%rbp),%xmm12 ++ subq $192,%rbx ++ leaq 192(%rsi),%rsi ++ leaq 192(%rdi),%rdi ++ ++ ++L$open_sse_tail_64_dec_loop: ++ cmpq $16,%rbx ++ jb L$open_sse_tail_16_init ++ subq $16,%rbx ++ movdqu (%rsi),%xmm3 ++ pxor %xmm3,%xmm0 ++ movdqu %xmm0,(%rdi) ++ leaq 16(%rsi),%rsi ++ leaq 16(%rdi),%rdi ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm8,%xmm4 ++ movdqa %xmm12,%xmm8 ++ jmp L$open_sse_tail_64_dec_loop ++L$open_sse_tail_16_init: ++ movdqa %xmm0,%xmm1 ++ ++ ++L$open_sse_tail_16: ++ testq %rbx,%rbx ++ jz L$open_sse_finalize ++ ++ ++ ++ pxor %xmm3,%xmm3 ++ leaq -1(%rsi,%rbx,1),%rsi ++ movq %rbx,%r8 ++L$open_sse_tail_16_compose: ++ pslldq $1,%xmm3 ++ pinsrb $0,(%rsi),%xmm3 ++ subq $1,%rsi ++ subq $1,%r8 ++ jnz L$open_sse_tail_16_compose ++ ++.byte 102,73,15,126,221 ++ pextrq $1,%xmm3,%r14 ++ ++ pxor %xmm1,%xmm3 ++ ++ ++L$open_sse_tail_16_extract: ++ pextrb $0,%xmm3,(%rdi) ++ psrldq $1,%xmm3 ++ addq $1,%rdi ++ subq $1,%rbx ++ jne L$open_sse_tail_16_extract ++ ++ addq %r13,%r10 ++ adcq %r14,%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ ++L$open_sse_finalize: ++ addq 0+0+32(%rbp),%r10 ++ adcq 8+0+32(%rbp),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ ++ movq %r10,%r13 ++ movq %r11,%r14 ++ movq %r12,%r15 ++ subq $-5,%r10 ++ sbbq $-1,%r11 ++ sbbq $3,%r12 ++ cmovcq %r13,%r10 ++ cmovcq %r14,%r11 ++ cmovcq %r15,%r12 ++ ++ addq 0+0+16(%rbp),%r10 ++ adcq 8+0+16(%rbp),%r11 ++ ++ ++ addq $288 + 0 + 32,%rsp ++ ++ ++ popq %r9 ++ ++ movq %r10,(%r9) ++ movq %r11,8(%r9) ++ popq %r15 ++ ++ popq %r14 ++ ++ popq %r13 ++ ++ popq %r12 ++ ++ popq %rbx ++ ++ popq %rbp ++ ++ .byte 0xf3,0xc3 ++ ++L$open_sse_128: ++ ++ movdqu L$chacha20_consts(%rip),%xmm0 ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm0,%xmm2 ++ movdqu 0(%r9),%xmm4 ++ movdqa %xmm4,%xmm5 ++ movdqa %xmm4,%xmm6 ++ movdqu 16(%r9),%xmm8 ++ movdqa %xmm8,%xmm9 ++ movdqa %xmm8,%xmm10 ++ movdqu 32(%r9),%xmm12 ++ movdqa %xmm12,%xmm13 ++ paddd L$sse_inc(%rip),%xmm13 ++ movdqa %xmm13,%xmm14 ++ paddd L$sse_inc(%rip),%xmm14 ++ movdqa %xmm4,%xmm7 ++ movdqa %xmm8,%xmm11 ++ movdqa %xmm13,%xmm15 ++ movq $10,%r10 ++ ++L$open_sse_128_rounds: ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,4 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,12 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol16(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm6 ++ pxor %xmm3,%xmm6 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol8(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm6 ++ pxor %xmm3,%xmm6 ++.byte 102,15,58,15,246,4 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,12 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,12 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,4 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol16(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm6 ++ pxor %xmm3,%xmm6 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol8(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm6 ++ pxor %xmm3,%xmm6 ++.byte 102,15,58,15,246,12 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,4 ++ ++ decq %r10 ++ jnz L$open_sse_128_rounds ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd L$chacha20_consts(%rip),%xmm1 ++ paddd L$chacha20_consts(%rip),%xmm2 ++ paddd %xmm7,%xmm4 ++ paddd %xmm7,%xmm5 ++ paddd %xmm7,%xmm6 ++ paddd %xmm11,%xmm9 ++ paddd %xmm11,%xmm10 ++ paddd %xmm15,%xmm13 ++ paddd L$sse_inc(%rip),%xmm15 ++ paddd %xmm15,%xmm14 ++ ++ pand L$clamp(%rip),%xmm0 ++ movdqa %xmm0,0+0(%rbp) ++ movdqa %xmm4,0+16(%rbp) ++ ++ movq %r8,%r8 ++ call poly_hash_ad_internal ++L$open_sse_128_xor_hash: ++ cmpq $16,%rbx ++ jb L$open_sse_tail_16 ++ subq $16,%rbx ++ addq 0+0(%rsi),%r10 ++ adcq 8+0(%rsi),%r11 ++ adcq $1,%r12 ++ ++ ++ movdqu 0(%rsi),%xmm3 ++ pxor %xmm3,%xmm1 ++ movdqu %xmm1,0(%rdi) ++ leaq 16(%rsi),%rsi ++ leaq 16(%rdi),%rdi ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ ++ movdqa %xmm5,%xmm1 ++ movdqa %xmm9,%xmm5 ++ movdqa %xmm13,%xmm9 ++ movdqa %xmm2,%xmm13 ++ movdqa %xmm6,%xmm2 ++ movdqa %xmm10,%xmm6 ++ movdqa %xmm14,%xmm10 ++ jmp L$open_sse_128_xor_hash ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.globl _chacha20_poly1305_seal ++.private_extern _chacha20_poly1305_seal ++ ++.p2align 6 ++_chacha20_poly1305_seal: ++ ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ ++ ++ pushq %r9 ++ ++ subq $288 + 0 + 32,%rsp ++ ++ leaq 32(%rsp),%rbp ++ andq $-32,%rbp ++ ++ movq 56(%r9),%rbx ++ addq %rdx,%rbx ++ movq %r8,0+0+32(%rbp) ++ movq %rbx,8+0+32(%rbp) ++ movq %rdx,%rbx ++ ++ movl _OPENSSL_ia32cap_P+8(%rip),%eax ++ andl $288,%eax ++ xorl $288,%eax ++ jz chacha20_poly1305_seal_avx2 ++ ++ cmpq $128,%rbx ++ jbe L$seal_sse_128 ++ ++ movdqa L$chacha20_consts(%rip),%xmm0 ++ movdqu 0(%r9),%xmm4 ++ movdqu 16(%r9),%xmm8 ++ movdqu 32(%r9),%xmm12 ++ ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm0,%xmm2 ++ movdqa %xmm0,%xmm3 ++ movdqa %xmm4,%xmm5 ++ movdqa %xmm4,%xmm6 ++ movdqa %xmm4,%xmm7 ++ movdqa %xmm8,%xmm9 ++ movdqa %xmm8,%xmm10 ++ movdqa %xmm8,%xmm11 ++ movdqa %xmm12,%xmm15 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,%xmm14 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,%xmm13 ++ paddd L$sse_inc(%rip),%xmm12 ++ ++ movdqa %xmm4,0+48(%rbp) ++ movdqa %xmm8,0+64(%rbp) ++ movdqa %xmm12,0+96(%rbp) ++ movdqa %xmm13,0+112(%rbp) ++ movdqa %xmm14,0+128(%rbp) ++ movdqa %xmm15,0+144(%rbp) ++ movq $10,%r10 ++L$seal_sse_init_rounds: ++ movdqa %xmm8,0+80(%rbp) ++ movdqa L$rol16(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ pxor %xmm10,%xmm6 ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm4 ++ pxor %xmm8,%xmm4 ++ movdqa L$rol8(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ pxor %xmm10,%xmm6 ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm4 ++ pxor %xmm8,%xmm4 ++ movdqa 0+80(%rbp),%xmm8 ++.byte 102,15,58,15,255,4 ++.byte 102,69,15,58,15,219,8 ++.byte 102,69,15,58,15,255,12 ++.byte 102,15,58,15,246,4 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,12 ++.byte 102,15,58,15,237,4 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,12 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa L$rol16(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ pxor %xmm10,%xmm6 ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm4 ++ pxor %xmm8,%xmm4 ++ movdqa L$rol8(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ pxor %xmm10,%xmm6 ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm4 ++ pxor %xmm8,%xmm4 ++ movdqa 0+80(%rbp),%xmm8 ++.byte 102,15,58,15,255,12 ++.byte 102,69,15,58,15,219,8 ++.byte 102,69,15,58,15,255,4 ++.byte 102,15,58,15,246,12 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,4 ++.byte 102,15,58,15,237,12 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ ++ decq %r10 ++ jnz L$seal_sse_init_rounds ++ paddd L$chacha20_consts(%rip),%xmm3 ++ paddd 0+48(%rbp),%xmm7 ++ paddd 0+64(%rbp),%xmm11 ++ paddd 0+144(%rbp),%xmm15 ++ paddd L$chacha20_consts(%rip),%xmm2 ++ paddd 0+48(%rbp),%xmm6 ++ paddd 0+64(%rbp),%xmm10 ++ paddd 0+128(%rbp),%xmm14 ++ paddd L$chacha20_consts(%rip),%xmm1 ++ paddd 0+48(%rbp),%xmm5 ++ paddd 0+64(%rbp),%xmm9 ++ paddd 0+112(%rbp),%xmm13 ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd 0+48(%rbp),%xmm4 ++ paddd 0+64(%rbp),%xmm8 ++ paddd 0+96(%rbp),%xmm12 ++ ++ ++ pand L$clamp(%rip),%xmm3 ++ movdqa %xmm3,0+0(%rbp) ++ movdqa %xmm7,0+16(%rbp) ++ ++ movq %r8,%r8 ++ call poly_hash_ad_internal ++ movdqu 0 + 0(%rsi),%xmm3 ++ movdqu 16 + 0(%rsi),%xmm7 ++ movdqu 32 + 0(%rsi),%xmm11 ++ movdqu 48 + 0(%rsi),%xmm15 ++ pxor %xmm3,%xmm2 ++ pxor %xmm7,%xmm6 ++ pxor %xmm11,%xmm10 ++ pxor %xmm14,%xmm15 ++ movdqu %xmm2,0 + 0(%rdi) ++ movdqu %xmm6,16 + 0(%rdi) ++ movdqu %xmm10,32 + 0(%rdi) ++ movdqu %xmm15,48 + 0(%rdi) ++ movdqu 0 + 64(%rsi),%xmm3 ++ movdqu 16 + 64(%rsi),%xmm7 ++ movdqu 32 + 64(%rsi),%xmm11 ++ movdqu 48 + 64(%rsi),%xmm15 ++ pxor %xmm3,%xmm1 ++ pxor %xmm7,%xmm5 ++ pxor %xmm11,%xmm9 ++ pxor %xmm13,%xmm15 ++ movdqu %xmm1,0 + 64(%rdi) ++ movdqu %xmm5,16 + 64(%rdi) ++ movdqu %xmm9,32 + 64(%rdi) ++ movdqu %xmm15,48 + 64(%rdi) ++ ++ cmpq $192,%rbx ++ ja L$seal_sse_main_init ++ movq $128,%rcx ++ subq $128,%rbx ++ leaq 128(%rsi),%rsi ++ jmp L$seal_sse_128_tail_hash ++L$seal_sse_main_init: ++ movdqu 0 + 128(%rsi),%xmm3 ++ movdqu 16 + 128(%rsi),%xmm7 ++ movdqu 32 + 128(%rsi),%xmm11 ++ movdqu 48 + 128(%rsi),%xmm15 ++ pxor %xmm3,%xmm0 ++ pxor %xmm7,%xmm4 ++ pxor %xmm11,%xmm8 ++ pxor %xmm12,%xmm15 ++ movdqu %xmm0,0 + 128(%rdi) ++ movdqu %xmm4,16 + 128(%rdi) ++ movdqu %xmm8,32 + 128(%rdi) ++ movdqu %xmm15,48 + 128(%rdi) ++ ++ movq $192,%rcx ++ subq $192,%rbx ++ leaq 192(%rsi),%rsi ++ movq $2,%rcx ++ movq $8,%r8 ++ cmpq $64,%rbx ++ jbe L$seal_sse_tail_64 ++ cmpq $128,%rbx ++ jbe L$seal_sse_tail_128 ++ cmpq $192,%rbx ++ jbe L$seal_sse_tail_192 ++ ++L$seal_sse_main_loop: ++ movdqa L$chacha20_consts(%rip),%xmm0 ++ movdqa 0+48(%rbp),%xmm4 ++ movdqa 0+64(%rbp),%xmm8 ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm4,%xmm5 ++ movdqa %xmm8,%xmm9 ++ movdqa %xmm0,%xmm2 ++ movdqa %xmm4,%xmm6 ++ movdqa %xmm8,%xmm10 ++ movdqa %xmm0,%xmm3 ++ movdqa %xmm4,%xmm7 ++ movdqa %xmm8,%xmm11 ++ movdqa 0+96(%rbp),%xmm15 ++ paddd L$sse_inc(%rip),%xmm15 ++ movdqa %xmm15,%xmm14 ++ paddd L$sse_inc(%rip),%xmm14 ++ movdqa %xmm14,%xmm13 ++ paddd L$sse_inc(%rip),%xmm13 ++ movdqa %xmm13,%xmm12 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,0+96(%rbp) ++ movdqa %xmm13,0+112(%rbp) ++ movdqa %xmm14,0+128(%rbp) ++ movdqa %xmm15,0+144(%rbp) ++ ++.p2align 5 ++L$seal_sse_main_rounds: ++ movdqa %xmm8,0+80(%rbp) ++ movdqa L$rol16(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ pxor %xmm10,%xmm6 ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm4 ++ pxor %xmm8,%xmm4 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movdqa L$rol8(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ pxor %xmm10,%xmm6 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm4 ++ pxor %xmm8,%xmm4 ++ movdqa 0+80(%rbp),%xmm8 ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++.byte 102,15,58,15,255,4 ++.byte 102,69,15,58,15,219,8 ++.byte 102,69,15,58,15,255,12 ++.byte 102,15,58,15,246,4 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,12 ++.byte 102,15,58,15,237,4 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,12 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa L$rol16(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ pxor %xmm10,%xmm6 ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $20,%xmm8 ++ pslld $32-20,%xmm4 ++ pxor %xmm8,%xmm4 ++ movdqa L$rol8(%rip),%xmm8 ++ paddd %xmm7,%xmm3 ++ paddd %xmm6,%xmm2 ++ paddd %xmm5,%xmm1 ++ paddd %xmm4,%xmm0 ++ pxor %xmm3,%xmm15 ++ pxor %xmm2,%xmm14 ++ pxor %xmm1,%xmm13 ++ pxor %xmm0,%xmm12 ++.byte 102,69,15,56,0,248 ++.byte 102,69,15,56,0,240 ++.byte 102,69,15,56,0,232 ++.byte 102,69,15,56,0,224 ++ movdqa 0+80(%rbp),%xmm8 ++ paddd %xmm15,%xmm11 ++ paddd %xmm14,%xmm10 ++ paddd %xmm13,%xmm9 ++ paddd %xmm12,%xmm8 ++ pxor %xmm11,%xmm7 ++ pxor %xmm10,%xmm6 ++ pxor %xmm9,%xmm5 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm8,0+80(%rbp) ++ movdqa %xmm7,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm7 ++ pxor %xmm8,%xmm7 ++ movdqa %xmm6,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm6 ++ pxor %xmm8,%xmm6 ++ movdqa %xmm5,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm5 ++ pxor %xmm8,%xmm5 ++ movdqa %xmm4,%xmm8 ++ psrld $25,%xmm8 ++ pslld $32-25,%xmm4 ++ pxor %xmm8,%xmm4 ++ movdqa 0+80(%rbp),%xmm8 ++.byte 102,15,58,15,255,12 ++.byte 102,69,15,58,15,219,8 ++.byte 102,69,15,58,15,255,4 ++.byte 102,15,58,15,246,12 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,4 ++.byte 102,15,58,15,237,12 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ ++ leaq 16(%rdi),%rdi ++ decq %r8 ++ jge L$seal_sse_main_rounds ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rdi),%rdi ++ decq %rcx ++ jg L$seal_sse_main_rounds ++ paddd L$chacha20_consts(%rip),%xmm3 ++ paddd 0+48(%rbp),%xmm7 ++ paddd 0+64(%rbp),%xmm11 ++ paddd 0+144(%rbp),%xmm15 ++ paddd L$chacha20_consts(%rip),%xmm2 ++ paddd 0+48(%rbp),%xmm6 ++ paddd 0+64(%rbp),%xmm10 ++ paddd 0+128(%rbp),%xmm14 ++ paddd L$chacha20_consts(%rip),%xmm1 ++ paddd 0+48(%rbp),%xmm5 ++ paddd 0+64(%rbp),%xmm9 ++ paddd 0+112(%rbp),%xmm13 ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd 0+48(%rbp),%xmm4 ++ paddd 0+64(%rbp),%xmm8 ++ paddd 0+96(%rbp),%xmm12 ++ ++ movdqa %xmm14,0+80(%rbp) ++ movdqa %xmm14,0+80(%rbp) ++ movdqu 0 + 0(%rsi),%xmm14 ++ pxor %xmm3,%xmm14 ++ movdqu %xmm14,0 + 0(%rdi) ++ movdqu 16 + 0(%rsi),%xmm14 ++ pxor %xmm7,%xmm14 ++ movdqu %xmm14,16 + 0(%rdi) ++ movdqu 32 + 0(%rsi),%xmm14 ++ pxor %xmm11,%xmm14 ++ movdqu %xmm14,32 + 0(%rdi) ++ movdqu 48 + 0(%rsi),%xmm14 ++ pxor %xmm15,%xmm14 ++ movdqu %xmm14,48 + 0(%rdi) ++ ++ movdqa 0+80(%rbp),%xmm14 ++ movdqu 0 + 64(%rsi),%xmm3 ++ movdqu 16 + 64(%rsi),%xmm7 ++ movdqu 32 + 64(%rsi),%xmm11 ++ movdqu 48 + 64(%rsi),%xmm15 ++ pxor %xmm3,%xmm2 ++ pxor %xmm7,%xmm6 ++ pxor %xmm11,%xmm10 ++ pxor %xmm14,%xmm15 ++ movdqu %xmm2,0 + 64(%rdi) ++ movdqu %xmm6,16 + 64(%rdi) ++ movdqu %xmm10,32 + 64(%rdi) ++ movdqu %xmm15,48 + 64(%rdi) ++ movdqu 0 + 128(%rsi),%xmm3 ++ movdqu 16 + 128(%rsi),%xmm7 ++ movdqu 32 + 128(%rsi),%xmm11 ++ movdqu 48 + 128(%rsi),%xmm15 ++ pxor %xmm3,%xmm1 ++ pxor %xmm7,%xmm5 ++ pxor %xmm11,%xmm9 ++ pxor %xmm13,%xmm15 ++ movdqu %xmm1,0 + 128(%rdi) ++ movdqu %xmm5,16 + 128(%rdi) ++ movdqu %xmm9,32 + 128(%rdi) ++ movdqu %xmm15,48 + 128(%rdi) ++ ++ cmpq $256,%rbx ++ ja L$seal_sse_main_loop_xor ++ ++ movq $192,%rcx ++ subq $192,%rbx ++ leaq 192(%rsi),%rsi ++ jmp L$seal_sse_128_tail_hash ++L$seal_sse_main_loop_xor: ++ movdqu 0 + 192(%rsi),%xmm3 ++ movdqu 16 + 192(%rsi),%xmm7 ++ movdqu 32 + 192(%rsi),%xmm11 ++ movdqu 48 + 192(%rsi),%xmm15 ++ pxor %xmm3,%xmm0 ++ pxor %xmm7,%xmm4 ++ pxor %xmm11,%xmm8 ++ pxor %xmm12,%xmm15 ++ movdqu %xmm0,0 + 192(%rdi) ++ movdqu %xmm4,16 + 192(%rdi) ++ movdqu %xmm8,32 + 192(%rdi) ++ movdqu %xmm15,48 + 192(%rdi) ++ ++ leaq 256(%rsi),%rsi ++ subq $256,%rbx ++ movq $6,%rcx ++ movq $4,%r8 ++ cmpq $192,%rbx ++ jg L$seal_sse_main_loop ++ movq %rbx,%rcx ++ testq %rbx,%rbx ++ je L$seal_sse_128_tail_hash ++ movq $6,%rcx ++ cmpq $128,%rbx ++ ja L$seal_sse_tail_192 ++ cmpq $64,%rbx ++ ja L$seal_sse_tail_128 ++ ++L$seal_sse_tail_64: ++ movdqa L$chacha20_consts(%rip),%xmm0 ++ movdqa 0+48(%rbp),%xmm4 ++ movdqa 0+64(%rbp),%xmm8 ++ movdqa 0+96(%rbp),%xmm12 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,0+96(%rbp) ++ ++L$seal_sse_tail_64_rounds_and_x2hash: ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rdi),%rdi ++L$seal_sse_tail_64_rounds_and_x1hash: ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rdi),%rdi ++ decq %rcx ++ jg L$seal_sse_tail_64_rounds_and_x2hash ++ decq %r8 ++ jge L$seal_sse_tail_64_rounds_and_x1hash ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd 0+48(%rbp),%xmm4 ++ paddd 0+64(%rbp),%xmm8 ++ paddd 0+96(%rbp),%xmm12 ++ ++ jmp L$seal_sse_128_tail_xor ++ ++L$seal_sse_tail_128: ++ movdqa L$chacha20_consts(%rip),%xmm0 ++ movdqa 0+48(%rbp),%xmm4 ++ movdqa 0+64(%rbp),%xmm8 ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm4,%xmm5 ++ movdqa %xmm8,%xmm9 ++ movdqa 0+96(%rbp),%xmm13 ++ paddd L$sse_inc(%rip),%xmm13 ++ movdqa %xmm13,%xmm12 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,0+96(%rbp) ++ movdqa %xmm13,0+112(%rbp) ++ ++L$seal_sse_tail_128_rounds_and_x2hash: ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rdi),%rdi ++L$seal_sse_tail_128_rounds_and_x1hash: ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,4 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,12 ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,12 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,4 ++ ++ leaq 16(%rdi),%rdi ++ decq %rcx ++ jg L$seal_sse_tail_128_rounds_and_x2hash ++ decq %r8 ++ jge L$seal_sse_tail_128_rounds_and_x1hash ++ paddd L$chacha20_consts(%rip),%xmm1 ++ paddd 0+48(%rbp),%xmm5 ++ paddd 0+64(%rbp),%xmm9 ++ paddd 0+112(%rbp),%xmm13 ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd 0+48(%rbp),%xmm4 ++ paddd 0+64(%rbp),%xmm8 ++ paddd 0+96(%rbp),%xmm12 ++ movdqu 0 + 0(%rsi),%xmm3 ++ movdqu 16 + 0(%rsi),%xmm7 ++ movdqu 32 + 0(%rsi),%xmm11 ++ movdqu 48 + 0(%rsi),%xmm15 ++ pxor %xmm3,%xmm1 ++ pxor %xmm7,%xmm5 ++ pxor %xmm11,%xmm9 ++ pxor %xmm13,%xmm15 ++ movdqu %xmm1,0 + 0(%rdi) ++ movdqu %xmm5,16 + 0(%rdi) ++ movdqu %xmm9,32 + 0(%rdi) ++ movdqu %xmm15,48 + 0(%rdi) ++ ++ movq $64,%rcx ++ subq $64,%rbx ++ leaq 64(%rsi),%rsi ++ jmp L$seal_sse_128_tail_hash ++ ++L$seal_sse_tail_192: ++ movdqa L$chacha20_consts(%rip),%xmm0 ++ movdqa 0+48(%rbp),%xmm4 ++ movdqa 0+64(%rbp),%xmm8 ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm4,%xmm5 ++ movdqa %xmm8,%xmm9 ++ movdqa %xmm0,%xmm2 ++ movdqa %xmm4,%xmm6 ++ movdqa %xmm8,%xmm10 ++ movdqa 0+96(%rbp),%xmm14 ++ paddd L$sse_inc(%rip),%xmm14 ++ movdqa %xmm14,%xmm13 ++ paddd L$sse_inc(%rip),%xmm13 ++ movdqa %xmm13,%xmm12 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,0+96(%rbp) ++ movdqa %xmm13,0+112(%rbp) ++ movdqa %xmm14,0+128(%rbp) ++ ++L$seal_sse_tail_192_rounds_and_x2hash: ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rdi),%rdi ++L$seal_sse_tail_192_rounds_and_x1hash: ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,4 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,12 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol16(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm6 ++ pxor %xmm3,%xmm6 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol8(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm6 ++ pxor %xmm3,%xmm6 ++.byte 102,15,58,15,246,4 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,12 ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,12 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,4 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol16(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm6 ++ pxor %xmm3,%xmm6 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol8(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm6 ++ pxor %xmm3,%xmm6 ++.byte 102,15,58,15,246,12 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,4 ++ ++ leaq 16(%rdi),%rdi ++ decq %rcx ++ jg L$seal_sse_tail_192_rounds_and_x2hash ++ decq %r8 ++ jge L$seal_sse_tail_192_rounds_and_x1hash ++ paddd L$chacha20_consts(%rip),%xmm2 ++ paddd 0+48(%rbp),%xmm6 ++ paddd 0+64(%rbp),%xmm10 ++ paddd 0+128(%rbp),%xmm14 ++ paddd L$chacha20_consts(%rip),%xmm1 ++ paddd 0+48(%rbp),%xmm5 ++ paddd 0+64(%rbp),%xmm9 ++ paddd 0+112(%rbp),%xmm13 ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd 0+48(%rbp),%xmm4 ++ paddd 0+64(%rbp),%xmm8 ++ paddd 0+96(%rbp),%xmm12 ++ movdqu 0 + 0(%rsi),%xmm3 ++ movdqu 16 + 0(%rsi),%xmm7 ++ movdqu 32 + 0(%rsi),%xmm11 ++ movdqu 48 + 0(%rsi),%xmm15 ++ pxor %xmm3,%xmm2 ++ pxor %xmm7,%xmm6 ++ pxor %xmm11,%xmm10 ++ pxor %xmm14,%xmm15 ++ movdqu %xmm2,0 + 0(%rdi) ++ movdqu %xmm6,16 + 0(%rdi) ++ movdqu %xmm10,32 + 0(%rdi) ++ movdqu %xmm15,48 + 0(%rdi) ++ movdqu 0 + 64(%rsi),%xmm3 ++ movdqu 16 + 64(%rsi),%xmm7 ++ movdqu 32 + 64(%rsi),%xmm11 ++ movdqu 48 + 64(%rsi),%xmm15 ++ pxor %xmm3,%xmm1 ++ pxor %xmm7,%xmm5 ++ pxor %xmm11,%xmm9 ++ pxor %xmm13,%xmm15 ++ movdqu %xmm1,0 + 64(%rdi) ++ movdqu %xmm5,16 + 64(%rdi) ++ movdqu %xmm9,32 + 64(%rdi) ++ movdqu %xmm15,48 + 64(%rdi) ++ ++ movq $128,%rcx ++ subq $128,%rbx ++ leaq 128(%rsi),%rsi ++ ++L$seal_sse_128_tail_hash: ++ cmpq $16,%rcx ++ jb L$seal_sse_128_tail_xor ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ subq $16,%rcx ++ leaq 16(%rdi),%rdi ++ jmp L$seal_sse_128_tail_hash ++ ++L$seal_sse_128_tail_xor: ++ cmpq $16,%rbx ++ jb L$seal_sse_tail_16 ++ subq $16,%rbx ++ ++ movdqu 0(%rsi),%xmm3 ++ pxor %xmm3,%xmm0 ++ movdqu %xmm0,0(%rdi) ++ ++ addq 0(%rdi),%r10 ++ adcq 8(%rdi),%r11 ++ adcq $1,%r12 ++ leaq 16(%rsi),%rsi ++ leaq 16(%rdi),%rdi ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm8,%xmm4 ++ movdqa %xmm12,%xmm8 ++ movdqa %xmm1,%xmm12 ++ movdqa %xmm5,%xmm1 ++ movdqa %xmm9,%xmm5 ++ movdqa %xmm13,%xmm9 ++ jmp L$seal_sse_128_tail_xor ++ ++L$seal_sse_tail_16: ++ testq %rbx,%rbx ++ jz L$process_blocks_of_extra_in ++ ++ movq %rbx,%r8 ++ movq %rbx,%rcx ++ leaq -1(%rsi,%rbx,1),%rsi ++ pxor %xmm15,%xmm15 ++L$seal_sse_tail_16_compose: ++ pslldq $1,%xmm15 ++ pinsrb $0,(%rsi),%xmm15 ++ leaq -1(%rsi),%rsi ++ decq %rcx ++ jne L$seal_sse_tail_16_compose ++ ++ ++ pxor %xmm0,%xmm15 ++ ++ ++ movq %rbx,%rcx ++ movdqu %xmm15,%xmm0 ++L$seal_sse_tail_16_extract: ++ pextrb $0,%xmm0,(%rdi) ++ psrldq $1,%xmm0 ++ addq $1,%rdi ++ subq $1,%rcx ++ jnz L$seal_sse_tail_16_extract ++ ++ ++ ++ ++ ++ ++ ++ ++ movq 288 + 0 + 32(%rsp),%r9 ++ movq 56(%r9),%r14 ++ movq 48(%r9),%r13 ++ testq %r14,%r14 ++ jz L$process_partial_block ++ ++ movq $16,%r15 ++ subq %rbx,%r15 ++ cmpq %r15,%r14 ++ ++ jge L$load_extra_in ++ movq %r14,%r15 ++ ++L$load_extra_in: ++ ++ ++ leaq -1(%r13,%r15,1),%rsi ++ ++ ++ addq %r15,%r13 ++ subq %r15,%r14 ++ movq %r13,48(%r9) ++ movq %r14,56(%r9) ++ ++ ++ ++ addq %r15,%r8 ++ ++ ++ pxor %xmm11,%xmm11 ++L$load_extra_load_loop: ++ pslldq $1,%xmm11 ++ pinsrb $0,(%rsi),%xmm11 ++ leaq -1(%rsi),%rsi ++ subq $1,%r15 ++ jnz L$load_extra_load_loop ++ ++ ++ ++ ++ movq %rbx,%r15 ++ ++L$load_extra_shift_loop: ++ pslldq $1,%xmm11 ++ subq $1,%r15 ++ jnz L$load_extra_shift_loop ++ ++ ++ ++ ++ leaq L$and_masks(%rip),%r15 ++ shlq $4,%rbx ++ pand -16(%r15,%rbx,1),%xmm15 ++ ++ ++ por %xmm11,%xmm15 ++ ++ ++ ++.byte 102,77,15,126,253 ++ pextrq $1,%xmm15,%r14 ++ addq %r13,%r10 ++ adcq %r14,%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ ++L$process_blocks_of_extra_in: ++ ++ movq 288+32+0 (%rsp),%r9 ++ movq 48(%r9),%rsi ++ movq 56(%r9),%r8 ++ movq %r8,%rcx ++ shrq $4,%r8 ++ ++L$process_extra_hash_loop: ++ jz process_extra_in_trailer ++ addq 0+0(%rsi),%r10 ++ adcq 8+0(%rsi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rsi),%rsi ++ subq $1,%r8 ++ jmp L$process_extra_hash_loop ++process_extra_in_trailer: ++ andq $15,%rcx ++ movq %rcx,%rbx ++ jz L$do_length_block ++ leaq -1(%rsi,%rcx,1),%rsi ++ ++L$process_extra_in_trailer_load: ++ pslldq $1,%xmm15 ++ pinsrb $0,(%rsi),%xmm15 ++ leaq -1(%rsi),%rsi ++ subq $1,%rcx ++ jnz L$process_extra_in_trailer_load ++ ++L$process_partial_block: ++ ++ leaq L$and_masks(%rip),%r15 ++ shlq $4,%rbx ++ pand -16(%r15,%rbx,1),%xmm15 ++.byte 102,77,15,126,253 ++ pextrq $1,%xmm15,%r14 ++ addq %r13,%r10 ++ adcq %r14,%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ ++L$do_length_block: ++ addq 0+0+32(%rbp),%r10 ++ adcq 8+0+32(%rbp),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ ++ movq %r10,%r13 ++ movq %r11,%r14 ++ movq %r12,%r15 ++ subq $-5,%r10 ++ sbbq $-1,%r11 ++ sbbq $3,%r12 ++ cmovcq %r13,%r10 ++ cmovcq %r14,%r11 ++ cmovcq %r15,%r12 ++ ++ addq 0+0+16(%rbp),%r10 ++ adcq 8+0+16(%rbp),%r11 ++ ++ ++ addq $288 + 0 + 32,%rsp ++ ++ ++ popq %r9 ++ ++ movq %r10,(%r9) ++ movq %r11,8(%r9) ++ popq %r15 ++ ++ popq %r14 ++ ++ popq %r13 ++ ++ popq %r12 ++ ++ popq %rbx ++ ++ popq %rbp ++ ++ .byte 0xf3,0xc3 ++ ++L$seal_sse_128: ++ ++ movdqu L$chacha20_consts(%rip),%xmm0 ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm0,%xmm2 ++ movdqu 0(%r9),%xmm4 ++ movdqa %xmm4,%xmm5 ++ movdqa %xmm4,%xmm6 ++ movdqu 16(%r9),%xmm8 ++ movdqa %xmm8,%xmm9 ++ movdqa %xmm8,%xmm10 ++ movdqu 32(%r9),%xmm14 ++ movdqa %xmm14,%xmm12 ++ paddd L$sse_inc(%rip),%xmm12 ++ movdqa %xmm12,%xmm13 ++ paddd L$sse_inc(%rip),%xmm13 ++ movdqa %xmm4,%xmm7 ++ movdqa %xmm8,%xmm11 ++ movdqa %xmm12,%xmm15 ++ movq $10,%r10 ++ ++L$seal_sse_128_rounds: ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,4 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,12 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,4 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,12 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol16(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm6 ++ pxor %xmm3,%xmm6 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol8(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm6 ++ pxor %xmm3,%xmm6 ++.byte 102,15,58,15,246,4 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,12 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol16(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm4 ++ pxor %xmm3,%xmm4 ++ paddd %xmm4,%xmm0 ++ pxor %xmm0,%xmm12 ++ pshufb L$rol8(%rip),%xmm12 ++ paddd %xmm12,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,15,228,12 ++.byte 102,69,15,58,15,192,8 ++.byte 102,69,15,58,15,228,4 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol16(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm5 ++ pxor %xmm3,%xmm5 ++ paddd %xmm5,%xmm1 ++ pxor %xmm1,%xmm13 ++ pshufb L$rol8(%rip),%xmm13 ++ paddd %xmm13,%xmm9 ++ pxor %xmm9,%xmm5 ++ movdqa %xmm5,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm5 ++ pxor %xmm3,%xmm5 ++.byte 102,15,58,15,237,12 ++.byte 102,69,15,58,15,201,8 ++.byte 102,69,15,58,15,237,4 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol16(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $12,%xmm3 ++ psrld $20,%xmm6 ++ pxor %xmm3,%xmm6 ++ paddd %xmm6,%xmm2 ++ pxor %xmm2,%xmm14 ++ pshufb L$rol8(%rip),%xmm14 ++ paddd %xmm14,%xmm10 ++ pxor %xmm10,%xmm6 ++ movdqa %xmm6,%xmm3 ++ pslld $7,%xmm3 ++ psrld $25,%xmm6 ++ pxor %xmm3,%xmm6 ++.byte 102,15,58,15,246,12 ++.byte 102,69,15,58,15,210,8 ++.byte 102,69,15,58,15,246,4 ++ ++ decq %r10 ++ jnz L$seal_sse_128_rounds ++ paddd L$chacha20_consts(%rip),%xmm0 ++ paddd L$chacha20_consts(%rip),%xmm1 ++ paddd L$chacha20_consts(%rip),%xmm2 ++ paddd %xmm7,%xmm4 ++ paddd %xmm7,%xmm5 ++ paddd %xmm7,%xmm6 ++ paddd %xmm11,%xmm8 ++ paddd %xmm11,%xmm9 ++ paddd %xmm15,%xmm12 ++ paddd L$sse_inc(%rip),%xmm15 ++ paddd %xmm15,%xmm13 ++ ++ pand L$clamp(%rip),%xmm2 ++ movdqa %xmm2,0+0(%rbp) ++ movdqa %xmm6,0+16(%rbp) ++ ++ movq %r8,%r8 ++ call poly_hash_ad_internal ++ jmp L$seal_sse_128_tail_xor ++ ++ ++ ++ ++ ++.p2align 6 ++chacha20_poly1305_open_avx2: ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ vzeroupper ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vbroadcasti128 0(%r9),%ymm4 ++ vbroadcasti128 16(%r9),%ymm8 ++ vbroadcasti128 32(%r9),%ymm12 ++ vpaddd L$avx2_init(%rip),%ymm12,%ymm12 ++ cmpq $192,%rbx ++ jbe L$open_avx2_192 ++ cmpq $320,%rbx ++ jbe L$open_avx2_320 ++ ++ vmovdqa %ymm4,0+64(%rbp) ++ vmovdqa %ymm8,0+96(%rbp) ++ vmovdqa %ymm12,0+160(%rbp) ++ movq $10,%r10 ++L$open_avx2_init_rounds: ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ ++ decq %r10 ++ jne L$open_avx2_init_rounds ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 ++ ++ vpand L$clamp(%rip),%ymm3,%ymm3 ++ vmovdqa %ymm3,0+0(%rbp) ++ ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 ++ ++ movq %r8,%r8 ++ call poly_hash_ad_internal ++ ++ xorq %rcx,%rcx ++L$open_avx2_init_hash: ++ addq 0+0(%rsi,%rcx,1),%r10 ++ adcq 8+0(%rsi,%rcx,1),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ addq $16,%rcx ++ cmpq $64,%rcx ++ jne L$open_avx2_init_hash ++ ++ vpxor 0(%rsi),%ymm0,%ymm0 ++ vpxor 32(%rsi),%ymm4,%ymm4 ++ ++ vmovdqu %ymm0,0(%rdi) ++ vmovdqu %ymm4,32(%rdi) ++ leaq 64(%rsi),%rsi ++ leaq 64(%rdi),%rdi ++ subq $64,%rbx ++L$open_avx2_main_loop: ++ ++ cmpq $512,%rbx ++ jb L$open_avx2_main_loop_done ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vmovdqa 0+64(%rbp),%ymm4 ++ vmovdqa 0+96(%rbp),%ymm8 ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm8,%ymm10 ++ vmovdqa %ymm0,%ymm3 ++ vmovdqa %ymm4,%ymm7 ++ vmovdqa %ymm8,%ymm11 ++ vmovdqa L$avx2_inc(%rip),%ymm12 ++ vpaddd 0+160(%rbp),%ymm12,%ymm15 ++ vpaddd %ymm15,%ymm12,%ymm14 ++ vpaddd %ymm14,%ymm12,%ymm13 ++ vpaddd %ymm13,%ymm12,%ymm12 ++ vmovdqa %ymm15,0+256(%rbp) ++ vmovdqa %ymm14,0+224(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ vmovdqa %ymm12,0+160(%rbp) ++ ++ xorq %rcx,%rcx ++L$open_avx2_main_loop_rounds: ++ addq 0+0(%rsi,%rcx,1),%r10 ++ adcq 8+0(%rsi,%rcx,1),%r11 ++ adcq $1,%r12 ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ addq 0+16(%rsi,%rcx,1),%r10 ++ adcq 8+16(%rsi,%rcx,1),%r11 ++ adcq $1,%r12 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $4,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $12,%ymm15,%ymm15,%ymm15 ++ vpalignr $4,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $12,%ymm14,%ymm14,%ymm14 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ addq 0+32(%rsi,%rcx,1),%r10 ++ adcq 8+32(%rsi,%rcx,1),%r11 ++ adcq $1,%r12 ++ ++ leaq 48(%rcx),%rcx ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $12,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $4,%ymm15,%ymm15,%ymm15 ++ vpalignr $12,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $4,%ymm14,%ymm14,%ymm14 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ ++ cmpq $60*8,%rcx ++ jne L$open_avx2_main_loop_rounds ++ vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 ++ vpaddd 0+64(%rbp),%ymm7,%ymm7 ++ vpaddd 0+96(%rbp),%ymm11,%ymm11 ++ vpaddd 0+256(%rbp),%ymm15,%ymm15 ++ vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 ++ vpaddd 0+64(%rbp),%ymm6,%ymm6 ++ vpaddd 0+96(%rbp),%ymm10,%ymm10 ++ vpaddd 0+224(%rbp),%ymm14,%ymm14 ++ vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 ++ vpaddd 0+64(%rbp),%ymm5,%ymm5 ++ vpaddd 0+96(%rbp),%ymm9,%ymm9 ++ vpaddd 0+192(%rbp),%ymm13,%ymm13 ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ ++ vmovdqa %ymm0,0+128(%rbp) ++ addq 0+60*8(%rsi),%r10 ++ adcq 8+60*8(%rsi),%r11 ++ adcq $1,%r12 ++ vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 ++ vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 ++ vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 ++ vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 ++ vpxor 0+0(%rsi),%ymm0,%ymm0 ++ vpxor 32+0(%rsi),%ymm3,%ymm3 ++ vpxor 64+0(%rsi),%ymm7,%ymm7 ++ vpxor 96+0(%rsi),%ymm11,%ymm11 ++ vmovdqu %ymm0,0+0(%rdi) ++ vmovdqu %ymm3,32+0(%rdi) ++ vmovdqu %ymm7,64+0(%rdi) ++ vmovdqu %ymm11,96+0(%rdi) ++ ++ vmovdqa 0+128(%rbp),%ymm0 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 ++ vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 ++ vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 ++ vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 ++ vpxor 0+128(%rsi),%ymm3,%ymm3 ++ vpxor 32+128(%rsi),%ymm2,%ymm2 ++ vpxor 64+128(%rsi),%ymm6,%ymm6 ++ vpxor 96+128(%rsi),%ymm10,%ymm10 ++ vmovdqu %ymm3,0+128(%rdi) ++ vmovdqu %ymm2,32+128(%rdi) ++ vmovdqu %ymm6,64+128(%rdi) ++ vmovdqu %ymm10,96+128(%rdi) ++ addq 0+60*8+16(%rsi),%r10 ++ adcq 8+60*8+16(%rsi),%r11 ++ adcq $1,%r12 ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 ++ vpxor 0+256(%rsi),%ymm3,%ymm3 ++ vpxor 32+256(%rsi),%ymm1,%ymm1 ++ vpxor 64+256(%rsi),%ymm5,%ymm5 ++ vpxor 96+256(%rsi),%ymm9,%ymm9 ++ vmovdqu %ymm3,0+256(%rdi) ++ vmovdqu %ymm1,32+256(%rdi) ++ vmovdqu %ymm5,64+256(%rdi) ++ vmovdqu %ymm9,96+256(%rdi) ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 ++ vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 ++ vpxor 0+384(%rsi),%ymm3,%ymm3 ++ vpxor 32+384(%rsi),%ymm0,%ymm0 ++ vpxor 64+384(%rsi),%ymm4,%ymm4 ++ vpxor 96+384(%rsi),%ymm8,%ymm8 ++ vmovdqu %ymm3,0+384(%rdi) ++ vmovdqu %ymm0,32+384(%rdi) ++ vmovdqu %ymm4,64+384(%rdi) ++ vmovdqu %ymm8,96+384(%rdi) ++ ++ leaq 512(%rsi),%rsi ++ leaq 512(%rdi),%rdi ++ subq $512,%rbx ++ jmp L$open_avx2_main_loop ++L$open_avx2_main_loop_done: ++ testq %rbx,%rbx ++ vzeroupper ++ je L$open_sse_finalize ++ ++ cmpq $384,%rbx ++ ja L$open_avx2_tail_512 ++ cmpq $256,%rbx ++ ja L$open_avx2_tail_384 ++ cmpq $128,%rbx ++ ja L$open_avx2_tail_256 ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vmovdqa 0+64(%rbp),%ymm4 ++ vmovdqa 0+96(%rbp),%ymm8 ++ vmovdqa L$avx2_inc(%rip),%ymm12 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ vmovdqa %ymm12,0+160(%rbp) ++ ++ xorq %r8,%r8 ++ movq %rbx,%rcx ++ andq $-16,%rcx ++ testq %rcx,%rcx ++ je L$open_avx2_tail_128_rounds ++L$open_avx2_tail_128_rounds_and_x1hash: ++ addq 0+0(%rsi,%r8,1),%r10 ++ adcq 8+0(%rsi,%r8,1),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++L$open_avx2_tail_128_rounds: ++ addq $16,%r8 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ ++ cmpq %rcx,%r8 ++ jb L$open_avx2_tail_128_rounds_and_x1hash ++ cmpq $160,%r8 ++ jne L$open_avx2_tail_128_rounds ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 ++ vmovdqa %ymm3,%ymm8 ++ ++ jmp L$open_avx2_tail_128_xor ++ ++L$open_avx2_tail_256: ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vmovdqa 0+64(%rbp),%ymm4 ++ vmovdqa 0+96(%rbp),%ymm8 ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa L$avx2_inc(%rip),%ymm12 ++ vpaddd 0+160(%rbp),%ymm12,%ymm13 ++ vpaddd %ymm13,%ymm12,%ymm12 ++ vmovdqa %ymm12,0+160(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ ++ movq %rbx,0+128(%rbp) ++ movq %rbx,%rcx ++ subq $128,%rcx ++ shrq $4,%rcx ++ movq $10,%r8 ++ cmpq $10,%rcx ++ cmovgq %r8,%rcx ++ movq %rsi,%rbx ++ xorq %r8,%r8 ++L$open_avx2_tail_256_rounds_and_x1hash: ++ addq 0+0(%rbx),%r10 ++ adcq 8+0(%rbx),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rbx),%rbx ++L$open_avx2_tail_256_rounds: ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ ++ incq %r8 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol16(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpsrld $20,%ymm6,%ymm3 ++ vpslld $12,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol8(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpslld $7,%ymm6,%ymm3 ++ vpsrld $25,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpalignr $4,%ymm14,%ymm14,%ymm14 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $12,%ymm6,%ymm6,%ymm6 ++ ++ cmpq %rcx,%r8 ++ jb L$open_avx2_tail_256_rounds_and_x1hash ++ cmpq $10,%r8 ++ jne L$open_avx2_tail_256_rounds ++ movq %rbx,%r8 ++ subq %rsi,%rbx ++ movq %rbx,%rcx ++ movq 0+128(%rbp),%rbx ++L$open_avx2_tail_256_hash: ++ addq $16,%rcx ++ cmpq %rbx,%rcx ++ jg L$open_avx2_tail_256_done ++ addq 0+0(%r8),%r10 ++ adcq 8+0(%r8),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%r8),%r8 ++ jmp L$open_avx2_tail_256_hash ++L$open_avx2_tail_256_done: ++ vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 ++ vpaddd 0+64(%rbp),%ymm5,%ymm5 ++ vpaddd 0+96(%rbp),%ymm9,%ymm9 ++ vpaddd 0+192(%rbp),%ymm13,%ymm13 ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 ++ vpxor 0+0(%rsi),%ymm3,%ymm3 ++ vpxor 32+0(%rsi),%ymm1,%ymm1 ++ vpxor 64+0(%rsi),%ymm5,%ymm5 ++ vpxor 96+0(%rsi),%ymm9,%ymm9 ++ vmovdqu %ymm3,0+0(%rdi) ++ vmovdqu %ymm1,32+0(%rdi) ++ vmovdqu %ymm5,64+0(%rdi) ++ vmovdqu %ymm9,96+0(%rdi) ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 ++ vmovdqa %ymm3,%ymm8 ++ ++ leaq 128(%rsi),%rsi ++ leaq 128(%rdi),%rdi ++ subq $128,%rbx ++ jmp L$open_avx2_tail_128_xor ++ ++L$open_avx2_tail_384: ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vmovdqa 0+64(%rbp),%ymm4 ++ vmovdqa 0+96(%rbp),%ymm8 ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm8,%ymm10 ++ vmovdqa L$avx2_inc(%rip),%ymm12 ++ vpaddd 0+160(%rbp),%ymm12,%ymm14 ++ vpaddd %ymm14,%ymm12,%ymm13 ++ vpaddd %ymm13,%ymm12,%ymm12 ++ vmovdqa %ymm12,0+160(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ vmovdqa %ymm14,0+224(%rbp) ++ ++ movq %rbx,0+128(%rbp) ++ movq %rbx,%rcx ++ subq $256,%rcx ++ shrq $4,%rcx ++ addq $6,%rcx ++ movq $10,%r8 ++ cmpq $10,%rcx ++ cmovgq %r8,%rcx ++ movq %rsi,%rbx ++ xorq %r8,%r8 ++L$open_avx2_tail_384_rounds_and_x2hash: ++ addq 0+0(%rbx),%r10 ++ adcq 8+0(%rbx),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rbx),%rbx ++L$open_avx2_tail_384_rounds_and_x1hash: ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol16(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpsrld $20,%ymm6,%ymm3 ++ vpslld $12,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol8(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpslld $7,%ymm6,%ymm3 ++ vpsrld $25,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpalignr $12,%ymm14,%ymm14,%ymm14 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $4,%ymm6,%ymm6,%ymm6 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ addq 0+0(%rbx),%r10 ++ adcq 8+0(%rbx),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rbx),%rbx ++ incq %r8 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol16(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpsrld $20,%ymm6,%ymm3 ++ vpslld $12,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol8(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpslld $7,%ymm6,%ymm3 ++ vpsrld $25,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpalignr $4,%ymm14,%ymm14,%ymm14 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $12,%ymm6,%ymm6,%ymm6 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ ++ cmpq %rcx,%r8 ++ jb L$open_avx2_tail_384_rounds_and_x2hash ++ cmpq $10,%r8 ++ jne L$open_avx2_tail_384_rounds_and_x1hash ++ movq %rbx,%r8 ++ subq %rsi,%rbx ++ movq %rbx,%rcx ++ movq 0+128(%rbp),%rbx ++L$open_avx2_384_tail_hash: ++ addq $16,%rcx ++ cmpq %rbx,%rcx ++ jg L$open_avx2_384_tail_done ++ addq 0+0(%r8),%r10 ++ adcq 8+0(%r8),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%r8),%r8 ++ jmp L$open_avx2_384_tail_hash ++L$open_avx2_384_tail_done: ++ vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 ++ vpaddd 0+64(%rbp),%ymm6,%ymm6 ++ vpaddd 0+96(%rbp),%ymm10,%ymm10 ++ vpaddd 0+224(%rbp),%ymm14,%ymm14 ++ vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 ++ vpaddd 0+64(%rbp),%ymm5,%ymm5 ++ vpaddd 0+96(%rbp),%ymm9,%ymm9 ++ vpaddd 0+192(%rbp),%ymm13,%ymm13 ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 ++ vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 ++ vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 ++ vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 ++ vpxor 0+0(%rsi),%ymm3,%ymm3 ++ vpxor 32+0(%rsi),%ymm2,%ymm2 ++ vpxor 64+0(%rsi),%ymm6,%ymm6 ++ vpxor 96+0(%rsi),%ymm10,%ymm10 ++ vmovdqu %ymm3,0+0(%rdi) ++ vmovdqu %ymm2,32+0(%rdi) ++ vmovdqu %ymm6,64+0(%rdi) ++ vmovdqu %ymm10,96+0(%rdi) ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 ++ vpxor 0+128(%rsi),%ymm3,%ymm3 ++ vpxor 32+128(%rsi),%ymm1,%ymm1 ++ vpxor 64+128(%rsi),%ymm5,%ymm5 ++ vpxor 96+128(%rsi),%ymm9,%ymm9 ++ vmovdqu %ymm3,0+128(%rdi) ++ vmovdqu %ymm1,32+128(%rdi) ++ vmovdqu %ymm5,64+128(%rdi) ++ vmovdqu %ymm9,96+128(%rdi) ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 ++ vmovdqa %ymm3,%ymm8 ++ ++ leaq 256(%rsi),%rsi ++ leaq 256(%rdi),%rdi ++ subq $256,%rbx ++ jmp L$open_avx2_tail_128_xor ++ ++L$open_avx2_tail_512: ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vmovdqa 0+64(%rbp),%ymm4 ++ vmovdqa 0+96(%rbp),%ymm8 ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm8,%ymm10 ++ vmovdqa %ymm0,%ymm3 ++ vmovdqa %ymm4,%ymm7 ++ vmovdqa %ymm8,%ymm11 ++ vmovdqa L$avx2_inc(%rip),%ymm12 ++ vpaddd 0+160(%rbp),%ymm12,%ymm15 ++ vpaddd %ymm15,%ymm12,%ymm14 ++ vpaddd %ymm14,%ymm12,%ymm13 ++ vpaddd %ymm13,%ymm12,%ymm12 ++ vmovdqa %ymm15,0+256(%rbp) ++ vmovdqa %ymm14,0+224(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ vmovdqa %ymm12,0+160(%rbp) ++ ++ xorq %rcx,%rcx ++ movq %rsi,%r8 ++L$open_avx2_tail_512_rounds_and_x2hash: ++ addq 0+0(%r8),%r10 ++ adcq 8+0(%r8),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%r8),%r8 ++L$open_avx2_tail_512_rounds_and_x1hash: ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ addq 0+0(%r8),%r10 ++ adcq 8+0(%r8),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $4,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $12,%ymm15,%ymm15,%ymm15 ++ vpalignr $4,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $12,%ymm14,%ymm14,%ymm14 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ addq 0+16(%r8),%r10 ++ adcq 8+16(%r8),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 32(%r8),%r8 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $12,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $4,%ymm15,%ymm15,%ymm15 ++ vpalignr $12,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $4,%ymm14,%ymm14,%ymm14 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ ++ incq %rcx ++ cmpq $4,%rcx ++ jl L$open_avx2_tail_512_rounds_and_x2hash ++ cmpq $10,%rcx ++ jne L$open_avx2_tail_512_rounds_and_x1hash ++ movq %rbx,%rcx ++ subq $384,%rcx ++ andq $-16,%rcx ++L$open_avx2_tail_512_hash: ++ testq %rcx,%rcx ++ je L$open_avx2_tail_512_done ++ addq 0+0(%r8),%r10 ++ adcq 8+0(%r8),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%r8),%r8 ++ subq $16,%rcx ++ jmp L$open_avx2_tail_512_hash ++L$open_avx2_tail_512_done: ++ vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 ++ vpaddd 0+64(%rbp),%ymm7,%ymm7 ++ vpaddd 0+96(%rbp),%ymm11,%ymm11 ++ vpaddd 0+256(%rbp),%ymm15,%ymm15 ++ vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 ++ vpaddd 0+64(%rbp),%ymm6,%ymm6 ++ vpaddd 0+96(%rbp),%ymm10,%ymm10 ++ vpaddd 0+224(%rbp),%ymm14,%ymm14 ++ vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 ++ vpaddd 0+64(%rbp),%ymm5,%ymm5 ++ vpaddd 0+96(%rbp),%ymm9,%ymm9 ++ vpaddd 0+192(%rbp),%ymm13,%ymm13 ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ ++ vmovdqa %ymm0,0+128(%rbp) ++ vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 ++ vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 ++ vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 ++ vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 ++ vpxor 0+0(%rsi),%ymm0,%ymm0 ++ vpxor 32+0(%rsi),%ymm3,%ymm3 ++ vpxor 64+0(%rsi),%ymm7,%ymm7 ++ vpxor 96+0(%rsi),%ymm11,%ymm11 ++ vmovdqu %ymm0,0+0(%rdi) ++ vmovdqu %ymm3,32+0(%rdi) ++ vmovdqu %ymm7,64+0(%rdi) ++ vmovdqu %ymm11,96+0(%rdi) ++ ++ vmovdqa 0+128(%rbp),%ymm0 ++ vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 ++ vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 ++ vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 ++ vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 ++ vpxor 0+128(%rsi),%ymm3,%ymm3 ++ vpxor 32+128(%rsi),%ymm2,%ymm2 ++ vpxor 64+128(%rsi),%ymm6,%ymm6 ++ vpxor 96+128(%rsi),%ymm10,%ymm10 ++ vmovdqu %ymm3,0+128(%rdi) ++ vmovdqu %ymm2,32+128(%rdi) ++ vmovdqu %ymm6,64+128(%rdi) ++ vmovdqu %ymm10,96+128(%rdi) ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 ++ vpxor 0+256(%rsi),%ymm3,%ymm3 ++ vpxor 32+256(%rsi),%ymm1,%ymm1 ++ vpxor 64+256(%rsi),%ymm5,%ymm5 ++ vpxor 96+256(%rsi),%ymm9,%ymm9 ++ vmovdqu %ymm3,0+256(%rdi) ++ vmovdqu %ymm1,32+256(%rdi) ++ vmovdqu %ymm5,64+256(%rdi) ++ vmovdqu %ymm9,96+256(%rdi) ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 ++ vmovdqa %ymm3,%ymm8 ++ ++ leaq 384(%rsi),%rsi ++ leaq 384(%rdi),%rdi ++ subq $384,%rbx ++L$open_avx2_tail_128_xor: ++ cmpq $32,%rbx ++ jb L$open_avx2_tail_32_xor ++ subq $32,%rbx ++ vpxor (%rsi),%ymm0,%ymm0 ++ vmovdqu %ymm0,(%rdi) ++ leaq 32(%rsi),%rsi ++ leaq 32(%rdi),%rdi ++ vmovdqa %ymm4,%ymm0 ++ vmovdqa %ymm8,%ymm4 ++ vmovdqa %ymm12,%ymm8 ++ jmp L$open_avx2_tail_128_xor ++L$open_avx2_tail_32_xor: ++ cmpq $16,%rbx ++ vmovdqa %xmm0,%xmm1 ++ jb L$open_avx2_exit ++ subq $16,%rbx ++ ++ vpxor (%rsi),%xmm0,%xmm1 ++ vmovdqu %xmm1,(%rdi) ++ leaq 16(%rsi),%rsi ++ leaq 16(%rdi),%rdi ++ vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 ++ vmovdqa %xmm0,%xmm1 ++L$open_avx2_exit: ++ vzeroupper ++ jmp L$open_sse_tail_16 ++ ++L$open_avx2_192: ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm8,%ymm10 ++ vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 ++ vmovdqa %ymm12,%ymm11 ++ vmovdqa %ymm13,%ymm15 ++ movq $10,%r10 ++L$open_avx2_192_rounds: ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ ++ decq %r10 ++ jne L$open_avx2_192_rounds ++ vpaddd %ymm2,%ymm0,%ymm0 ++ vpaddd %ymm2,%ymm1,%ymm1 ++ vpaddd %ymm6,%ymm4,%ymm4 ++ vpaddd %ymm6,%ymm5,%ymm5 ++ vpaddd %ymm10,%ymm8,%ymm8 ++ vpaddd %ymm10,%ymm9,%ymm9 ++ vpaddd %ymm11,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm13,%ymm13 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 ++ ++ vpand L$clamp(%rip),%ymm3,%ymm3 ++ vmovdqa %ymm3,0+0(%rbp) ++ ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 ++L$open_avx2_short: ++ movq %r8,%r8 ++ call poly_hash_ad_internal ++L$open_avx2_short_hash_and_xor_loop: ++ cmpq $32,%rbx ++ jb L$open_avx2_short_tail_32 ++ subq $32,%rbx ++ addq 0+0(%rsi),%r10 ++ adcq 8+0(%rsi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ addq 0+16(%rsi),%r10 ++ adcq 8+16(%rsi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ ++ vpxor (%rsi),%ymm0,%ymm0 ++ vmovdqu %ymm0,(%rdi) ++ leaq 32(%rsi),%rsi ++ leaq 32(%rdi),%rdi ++ ++ vmovdqa %ymm4,%ymm0 ++ vmovdqa %ymm8,%ymm4 ++ vmovdqa %ymm12,%ymm8 ++ vmovdqa %ymm1,%ymm12 ++ vmovdqa %ymm5,%ymm1 ++ vmovdqa %ymm9,%ymm5 ++ vmovdqa %ymm13,%ymm9 ++ vmovdqa %ymm2,%ymm13 ++ vmovdqa %ymm6,%ymm2 ++ jmp L$open_avx2_short_hash_and_xor_loop ++L$open_avx2_short_tail_32: ++ cmpq $16,%rbx ++ vmovdqa %xmm0,%xmm1 ++ jb L$open_avx2_short_tail_32_exit ++ subq $16,%rbx ++ addq 0+0(%rsi),%r10 ++ adcq 8+0(%rsi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ vpxor (%rsi),%xmm0,%xmm3 ++ vmovdqu %xmm3,(%rdi) ++ leaq 16(%rsi),%rsi ++ leaq 16(%rdi),%rdi ++ vextracti128 $1,%ymm0,%xmm1 ++L$open_avx2_short_tail_32_exit: ++ vzeroupper ++ jmp L$open_sse_tail_16 ++ ++L$open_avx2_320: ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm8,%ymm10 ++ vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 ++ vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 ++ vmovdqa %ymm4,%ymm7 ++ vmovdqa %ymm8,%ymm11 ++ vmovdqa %ymm12,0+160(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ vmovdqa %ymm14,0+224(%rbp) ++ movq $10,%r10 ++L$open_avx2_320_rounds: ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol16(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpsrld $20,%ymm6,%ymm3 ++ vpslld $12,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol8(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpslld $7,%ymm6,%ymm3 ++ vpsrld $25,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpalignr $12,%ymm14,%ymm14,%ymm14 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $4,%ymm6,%ymm6,%ymm6 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol16(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpsrld $20,%ymm6,%ymm3 ++ vpslld $12,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol8(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpslld $7,%ymm6,%ymm3 ++ vpsrld $25,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpalignr $4,%ymm14,%ymm14,%ymm14 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $12,%ymm6,%ymm6,%ymm6 ++ ++ decq %r10 ++ jne L$open_avx2_320_rounds ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 ++ vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 ++ vpaddd %ymm7,%ymm4,%ymm4 ++ vpaddd %ymm7,%ymm5,%ymm5 ++ vpaddd %ymm7,%ymm6,%ymm6 ++ vpaddd %ymm11,%ymm8,%ymm8 ++ vpaddd %ymm11,%ymm9,%ymm9 ++ vpaddd %ymm11,%ymm10,%ymm10 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ vpaddd 0+192(%rbp),%ymm13,%ymm13 ++ vpaddd 0+224(%rbp),%ymm14,%ymm14 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 ++ ++ vpand L$clamp(%rip),%ymm3,%ymm3 ++ vmovdqa %ymm3,0+0(%rbp) ++ ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 ++ vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 ++ vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 ++ vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 ++ vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 ++ jmp L$open_avx2_short ++ ++ ++ ++ ++ ++.p2align 6 ++chacha20_poly1305_seal_avx2: ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ vzeroupper ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vbroadcasti128 0(%r9),%ymm4 ++ vbroadcasti128 16(%r9),%ymm8 ++ vbroadcasti128 32(%r9),%ymm12 ++ vpaddd L$avx2_init(%rip),%ymm12,%ymm12 ++ cmpq $192,%rbx ++ jbe L$seal_avx2_192 ++ cmpq $320,%rbx ++ jbe L$seal_avx2_320 ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm0,%ymm3 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm4,%ymm7 ++ vmovdqa %ymm4,0+64(%rbp) ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm8,%ymm10 ++ vmovdqa %ymm8,%ymm11 ++ vmovdqa %ymm8,0+96(%rbp) ++ vmovdqa %ymm12,%ymm15 ++ vpaddd L$avx2_inc(%rip),%ymm15,%ymm14 ++ vpaddd L$avx2_inc(%rip),%ymm14,%ymm13 ++ vpaddd L$avx2_inc(%rip),%ymm13,%ymm12 ++ vmovdqa %ymm12,0+160(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ vmovdqa %ymm14,0+224(%rbp) ++ vmovdqa %ymm15,0+256(%rbp) ++ movq $10,%r10 ++L$seal_avx2_init_rounds: ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $4,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $12,%ymm15,%ymm15,%ymm15 ++ vpalignr $4,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $12,%ymm14,%ymm14,%ymm14 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $12,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $4,%ymm15,%ymm15,%ymm15 ++ vpalignr $12,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $4,%ymm14,%ymm14,%ymm14 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ ++ decq %r10 ++ jnz L$seal_avx2_init_rounds ++ vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 ++ vpaddd 0+64(%rbp),%ymm7,%ymm7 ++ vpaddd 0+96(%rbp),%ymm11,%ymm11 ++ vpaddd 0+256(%rbp),%ymm15,%ymm15 ++ vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 ++ vpaddd 0+64(%rbp),%ymm6,%ymm6 ++ vpaddd 0+96(%rbp),%ymm10,%ymm10 ++ vpaddd 0+224(%rbp),%ymm14,%ymm14 ++ vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 ++ vpaddd 0+64(%rbp),%ymm5,%ymm5 ++ vpaddd 0+96(%rbp),%ymm9,%ymm9 ++ vpaddd 0+192(%rbp),%ymm13,%ymm13 ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ ++ vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 ++ vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 ++ vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 ++ vpand L$clamp(%rip),%ymm15,%ymm15 ++ vmovdqa %ymm15,0+0(%rbp) ++ movq %r8,%r8 ++ call poly_hash_ad_internal ++ ++ vpxor 0(%rsi),%ymm3,%ymm3 ++ vpxor 32(%rsi),%ymm11,%ymm11 ++ vmovdqu %ymm3,0(%rdi) ++ vmovdqu %ymm11,32(%rdi) ++ vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 ++ vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 ++ vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 ++ vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 ++ vpxor 0+64(%rsi),%ymm15,%ymm15 ++ vpxor 32+64(%rsi),%ymm2,%ymm2 ++ vpxor 64+64(%rsi),%ymm6,%ymm6 ++ vpxor 96+64(%rsi),%ymm10,%ymm10 ++ vmovdqu %ymm15,0+64(%rdi) ++ vmovdqu %ymm2,32+64(%rdi) ++ vmovdqu %ymm6,64+64(%rdi) ++ vmovdqu %ymm10,96+64(%rdi) ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 ++ vpxor 0+192(%rsi),%ymm15,%ymm15 ++ vpxor 32+192(%rsi),%ymm1,%ymm1 ++ vpxor 64+192(%rsi),%ymm5,%ymm5 ++ vpxor 96+192(%rsi),%ymm9,%ymm9 ++ vmovdqu %ymm15,0+192(%rdi) ++ vmovdqu %ymm1,32+192(%rdi) ++ vmovdqu %ymm5,64+192(%rdi) ++ vmovdqu %ymm9,96+192(%rdi) ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 ++ vmovdqa %ymm15,%ymm8 ++ ++ leaq 320(%rsi),%rsi ++ subq $320,%rbx ++ movq $320,%rcx ++ cmpq $128,%rbx ++ jbe L$seal_avx2_short_hash_remainder ++ vpxor 0(%rsi),%ymm0,%ymm0 ++ vpxor 32(%rsi),%ymm4,%ymm4 ++ vpxor 64(%rsi),%ymm8,%ymm8 ++ vpxor 96(%rsi),%ymm12,%ymm12 ++ vmovdqu %ymm0,320(%rdi) ++ vmovdqu %ymm4,352(%rdi) ++ vmovdqu %ymm8,384(%rdi) ++ vmovdqu %ymm12,416(%rdi) ++ leaq 128(%rsi),%rsi ++ subq $128,%rbx ++ movq $8,%rcx ++ movq $2,%r8 ++ cmpq $128,%rbx ++ jbe L$seal_avx2_tail_128 ++ cmpq $256,%rbx ++ jbe L$seal_avx2_tail_256 ++ cmpq $384,%rbx ++ jbe L$seal_avx2_tail_384 ++ cmpq $512,%rbx ++ jbe L$seal_avx2_tail_512 ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vmovdqa 0+64(%rbp),%ymm4 ++ vmovdqa 0+96(%rbp),%ymm8 ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm8,%ymm10 ++ vmovdqa %ymm0,%ymm3 ++ vmovdqa %ymm4,%ymm7 ++ vmovdqa %ymm8,%ymm11 ++ vmovdqa L$avx2_inc(%rip),%ymm12 ++ vpaddd 0+160(%rbp),%ymm12,%ymm15 ++ vpaddd %ymm15,%ymm12,%ymm14 ++ vpaddd %ymm14,%ymm12,%ymm13 ++ vpaddd %ymm13,%ymm12,%ymm12 ++ vmovdqa %ymm15,0+256(%rbp) ++ vmovdqa %ymm14,0+224(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ vmovdqa %ymm12,0+160(%rbp) ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $4,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $12,%ymm15,%ymm15,%ymm15 ++ vpalignr $4,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $12,%ymm14,%ymm14,%ymm14 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $12,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $4,%ymm15,%ymm15,%ymm15 ++ vpalignr $12,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $4,%ymm14,%ymm14,%ymm14 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ ++ subq $16,%rdi ++ movq $9,%rcx ++ jmp L$seal_avx2_main_loop_rounds_entry ++.p2align 5 ++L$seal_avx2_main_loop: ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vmovdqa 0+64(%rbp),%ymm4 ++ vmovdqa 0+96(%rbp),%ymm8 ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm8,%ymm10 ++ vmovdqa %ymm0,%ymm3 ++ vmovdqa %ymm4,%ymm7 ++ vmovdqa %ymm8,%ymm11 ++ vmovdqa L$avx2_inc(%rip),%ymm12 ++ vpaddd 0+160(%rbp),%ymm12,%ymm15 ++ vpaddd %ymm15,%ymm12,%ymm14 ++ vpaddd %ymm14,%ymm12,%ymm13 ++ vpaddd %ymm13,%ymm12,%ymm12 ++ vmovdqa %ymm15,0+256(%rbp) ++ vmovdqa %ymm14,0+224(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ vmovdqa %ymm12,0+160(%rbp) ++ ++ movq $10,%rcx ++.p2align 5 ++L$seal_avx2_main_loop_rounds: ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++L$seal_avx2_main_loop_rounds_entry: ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ addq 0+16(%rdi),%r10 ++ adcq 8+16(%rdi),%r11 ++ adcq $1,%r12 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $4,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $12,%ymm15,%ymm15,%ymm15 ++ vpalignr $4,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $12,%ymm14,%ymm14,%ymm14 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ addq 0+32(%rdi),%r10 ++ adcq 8+32(%rdi),%r11 ++ adcq $1,%r12 ++ ++ leaq 48(%rdi),%rdi ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $12,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $4,%ymm15,%ymm15,%ymm15 ++ vpalignr $12,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $4,%ymm14,%ymm14,%ymm14 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ ++ decq %rcx ++ jne L$seal_avx2_main_loop_rounds ++ vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 ++ vpaddd 0+64(%rbp),%ymm7,%ymm7 ++ vpaddd 0+96(%rbp),%ymm11,%ymm11 ++ vpaddd 0+256(%rbp),%ymm15,%ymm15 ++ vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 ++ vpaddd 0+64(%rbp),%ymm6,%ymm6 ++ vpaddd 0+96(%rbp),%ymm10,%ymm10 ++ vpaddd 0+224(%rbp),%ymm14,%ymm14 ++ vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 ++ vpaddd 0+64(%rbp),%ymm5,%ymm5 ++ vpaddd 0+96(%rbp),%ymm9,%ymm9 ++ vpaddd 0+192(%rbp),%ymm13,%ymm13 ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ ++ vmovdqa %ymm0,0+128(%rbp) ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ addq 0+16(%rdi),%r10 ++ adcq 8+16(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 32(%rdi),%rdi ++ vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 ++ vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 ++ vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 ++ vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 ++ vpxor 0+0(%rsi),%ymm0,%ymm0 ++ vpxor 32+0(%rsi),%ymm3,%ymm3 ++ vpxor 64+0(%rsi),%ymm7,%ymm7 ++ vpxor 96+0(%rsi),%ymm11,%ymm11 ++ vmovdqu %ymm0,0+0(%rdi) ++ vmovdqu %ymm3,32+0(%rdi) ++ vmovdqu %ymm7,64+0(%rdi) ++ vmovdqu %ymm11,96+0(%rdi) ++ ++ vmovdqa 0+128(%rbp),%ymm0 ++ vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 ++ vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 ++ vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 ++ vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 ++ vpxor 0+128(%rsi),%ymm3,%ymm3 ++ vpxor 32+128(%rsi),%ymm2,%ymm2 ++ vpxor 64+128(%rsi),%ymm6,%ymm6 ++ vpxor 96+128(%rsi),%ymm10,%ymm10 ++ vmovdqu %ymm3,0+128(%rdi) ++ vmovdqu %ymm2,32+128(%rdi) ++ vmovdqu %ymm6,64+128(%rdi) ++ vmovdqu %ymm10,96+128(%rdi) ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 ++ vpxor 0+256(%rsi),%ymm3,%ymm3 ++ vpxor 32+256(%rsi),%ymm1,%ymm1 ++ vpxor 64+256(%rsi),%ymm5,%ymm5 ++ vpxor 96+256(%rsi),%ymm9,%ymm9 ++ vmovdqu %ymm3,0+256(%rdi) ++ vmovdqu %ymm1,32+256(%rdi) ++ vmovdqu %ymm5,64+256(%rdi) ++ vmovdqu %ymm9,96+256(%rdi) ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 ++ vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 ++ vpxor 0+384(%rsi),%ymm3,%ymm3 ++ vpxor 32+384(%rsi),%ymm0,%ymm0 ++ vpxor 64+384(%rsi),%ymm4,%ymm4 ++ vpxor 96+384(%rsi),%ymm8,%ymm8 ++ vmovdqu %ymm3,0+384(%rdi) ++ vmovdqu %ymm0,32+384(%rdi) ++ vmovdqu %ymm4,64+384(%rdi) ++ vmovdqu %ymm8,96+384(%rdi) ++ ++ leaq 512(%rsi),%rsi ++ subq $512,%rbx ++ cmpq $512,%rbx ++ jg L$seal_avx2_main_loop ++ ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ addq 0+16(%rdi),%r10 ++ adcq 8+16(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 32(%rdi),%rdi ++ movq $10,%rcx ++ xorq %r8,%r8 ++ ++ cmpq $384,%rbx ++ ja L$seal_avx2_tail_512 ++ cmpq $256,%rbx ++ ja L$seal_avx2_tail_384 ++ cmpq $128,%rbx ++ ja L$seal_avx2_tail_256 ++ ++L$seal_avx2_tail_128: ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vmovdqa 0+64(%rbp),%ymm4 ++ vmovdqa 0+96(%rbp),%ymm8 ++ vmovdqa L$avx2_inc(%rip),%ymm12 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ vmovdqa %ymm12,0+160(%rbp) ++ ++L$seal_avx2_tail_128_rounds_and_3xhash: ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rdi),%rdi ++L$seal_avx2_tail_128_rounds_and_2xhash: ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ addq 0+16(%rdi),%r10 ++ adcq 8+16(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 32(%rdi),%rdi ++ decq %rcx ++ jg L$seal_avx2_tail_128_rounds_and_3xhash ++ decq %r8 ++ jge L$seal_avx2_tail_128_rounds_and_2xhash ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 ++ vmovdqa %ymm3,%ymm8 ++ ++ jmp L$seal_avx2_short_loop ++ ++L$seal_avx2_tail_256: ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vmovdqa 0+64(%rbp),%ymm4 ++ vmovdqa 0+96(%rbp),%ymm8 ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa L$avx2_inc(%rip),%ymm12 ++ vpaddd 0+160(%rbp),%ymm12,%ymm13 ++ vpaddd %ymm13,%ymm12,%ymm12 ++ vmovdqa %ymm12,0+160(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ ++L$seal_avx2_tail_256_rounds_and_3xhash: ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rdi),%rdi ++L$seal_avx2_tail_256_rounds_and_2xhash: ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ addq 0+16(%rdi),%r10 ++ adcq 8+16(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 32(%rdi),%rdi ++ decq %rcx ++ jg L$seal_avx2_tail_256_rounds_and_3xhash ++ decq %r8 ++ jge L$seal_avx2_tail_256_rounds_and_2xhash ++ vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 ++ vpaddd 0+64(%rbp),%ymm5,%ymm5 ++ vpaddd 0+96(%rbp),%ymm9,%ymm9 ++ vpaddd 0+192(%rbp),%ymm13,%ymm13 ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 ++ vpxor 0+0(%rsi),%ymm3,%ymm3 ++ vpxor 32+0(%rsi),%ymm1,%ymm1 ++ vpxor 64+0(%rsi),%ymm5,%ymm5 ++ vpxor 96+0(%rsi),%ymm9,%ymm9 ++ vmovdqu %ymm3,0+0(%rdi) ++ vmovdqu %ymm1,32+0(%rdi) ++ vmovdqu %ymm5,64+0(%rdi) ++ vmovdqu %ymm9,96+0(%rdi) ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 ++ vmovdqa %ymm3,%ymm8 ++ ++ movq $128,%rcx ++ leaq 128(%rsi),%rsi ++ subq $128,%rbx ++ jmp L$seal_avx2_short_hash_remainder ++ ++L$seal_avx2_tail_384: ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vmovdqa 0+64(%rbp),%ymm4 ++ vmovdqa 0+96(%rbp),%ymm8 ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm8,%ymm10 ++ vmovdqa L$avx2_inc(%rip),%ymm12 ++ vpaddd 0+160(%rbp),%ymm12,%ymm14 ++ vpaddd %ymm14,%ymm12,%ymm13 ++ vpaddd %ymm13,%ymm12,%ymm12 ++ vmovdqa %ymm12,0+160(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ vmovdqa %ymm14,0+224(%rbp) ++ ++L$seal_avx2_tail_384_rounds_and_3xhash: ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rdi),%rdi ++L$seal_avx2_tail_384_rounds_and_2xhash: ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol16(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpsrld $20,%ymm6,%ymm3 ++ vpslld $12,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol8(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpslld $7,%ymm6,%ymm3 ++ vpsrld $25,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpalignr $12,%ymm14,%ymm14,%ymm14 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $4,%ymm6,%ymm6,%ymm6 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ addq 0+16(%rdi),%r10 ++ adcq 8+16(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol16(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpsrld $20,%ymm6,%ymm3 ++ vpslld $12,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol8(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpslld $7,%ymm6,%ymm3 ++ vpsrld $25,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpalignr $4,%ymm14,%ymm14,%ymm14 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $12,%ymm6,%ymm6,%ymm6 ++ ++ leaq 32(%rdi),%rdi ++ decq %rcx ++ jg L$seal_avx2_tail_384_rounds_and_3xhash ++ decq %r8 ++ jge L$seal_avx2_tail_384_rounds_and_2xhash ++ vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 ++ vpaddd 0+64(%rbp),%ymm6,%ymm6 ++ vpaddd 0+96(%rbp),%ymm10,%ymm10 ++ vpaddd 0+224(%rbp),%ymm14,%ymm14 ++ vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 ++ vpaddd 0+64(%rbp),%ymm5,%ymm5 ++ vpaddd 0+96(%rbp),%ymm9,%ymm9 ++ vpaddd 0+192(%rbp),%ymm13,%ymm13 ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 ++ vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 ++ vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 ++ vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 ++ vpxor 0+0(%rsi),%ymm3,%ymm3 ++ vpxor 32+0(%rsi),%ymm2,%ymm2 ++ vpxor 64+0(%rsi),%ymm6,%ymm6 ++ vpxor 96+0(%rsi),%ymm10,%ymm10 ++ vmovdqu %ymm3,0+0(%rdi) ++ vmovdqu %ymm2,32+0(%rdi) ++ vmovdqu %ymm6,64+0(%rdi) ++ vmovdqu %ymm10,96+0(%rdi) ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 ++ vpxor 0+128(%rsi),%ymm3,%ymm3 ++ vpxor 32+128(%rsi),%ymm1,%ymm1 ++ vpxor 64+128(%rsi),%ymm5,%ymm5 ++ vpxor 96+128(%rsi),%ymm9,%ymm9 ++ vmovdqu %ymm3,0+128(%rdi) ++ vmovdqu %ymm1,32+128(%rdi) ++ vmovdqu %ymm5,64+128(%rdi) ++ vmovdqu %ymm9,96+128(%rdi) ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 ++ vmovdqa %ymm3,%ymm8 ++ ++ movq $256,%rcx ++ leaq 256(%rsi),%rsi ++ subq $256,%rbx ++ jmp L$seal_avx2_short_hash_remainder ++ ++L$seal_avx2_tail_512: ++ vmovdqa L$chacha20_consts(%rip),%ymm0 ++ vmovdqa 0+64(%rbp),%ymm4 ++ vmovdqa 0+96(%rbp),%ymm8 ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm8,%ymm10 ++ vmovdqa %ymm0,%ymm3 ++ vmovdqa %ymm4,%ymm7 ++ vmovdqa %ymm8,%ymm11 ++ vmovdqa L$avx2_inc(%rip),%ymm12 ++ vpaddd 0+160(%rbp),%ymm12,%ymm15 ++ vpaddd %ymm15,%ymm12,%ymm14 ++ vpaddd %ymm14,%ymm12,%ymm13 ++ vpaddd %ymm13,%ymm12,%ymm12 ++ vmovdqa %ymm15,0+256(%rbp) ++ vmovdqa %ymm14,0+224(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ vmovdqa %ymm12,0+160(%rbp) ++ ++L$seal_avx2_tail_512_rounds_and_3xhash: ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rdi),%rdi ++L$seal_avx2_tail_512_rounds_and_2xhash: ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $4,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $12,%ymm15,%ymm15,%ymm15 ++ vpalignr $4,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $12,%ymm14,%ymm14,%ymm14 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vmovdqa %ymm8,0+128(%rbp) ++ vmovdqa L$rol16(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $20,%ymm7,%ymm8 ++ vpslld $32-20,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $20,%ymm6,%ymm8 ++ vpslld $32-20,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $20,%ymm5,%ymm8 ++ vpslld $32-20,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $20,%ymm4,%ymm8 ++ vpslld $32-20,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa L$rol8(%rip),%ymm8 ++ vpaddd %ymm7,%ymm3,%ymm3 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ addq 0+16(%rdi),%r10 ++ adcq 8+16(%rdi),%r11 ++ adcq $1,%r12 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm3,%ymm15,%ymm15 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb %ymm8,%ymm15,%ymm15 ++ vpshufb %ymm8,%ymm14,%ymm14 ++ vpshufb %ymm8,%ymm13,%ymm13 ++ vpshufb %ymm8,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm11,%ymm11 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpaddd 0+128(%rbp),%ymm12,%ymm8 ++ vpxor %ymm11,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa %ymm8,0+128(%rbp) ++ vpsrld $25,%ymm7,%ymm8 ++ movq 0+0+0(%rbp),%rdx ++ movq %rdx,%r15 ++ mulxq %r10,%r13,%r14 ++ mulxq %r11,%rax,%rdx ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ vpslld $32-25,%ymm7,%ymm7 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $25,%ymm6,%ymm8 ++ vpslld $32-25,%ymm6,%ymm6 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $25,%ymm5,%ymm8 ++ vpslld $32-25,%ymm5,%ymm5 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $25,%ymm4,%ymm8 ++ vpslld $32-25,%ymm4,%ymm4 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vmovdqa 0+128(%rbp),%ymm8 ++ vpalignr $12,%ymm7,%ymm7,%ymm7 ++ vpalignr $8,%ymm11,%ymm11,%ymm11 ++ vpalignr $4,%ymm15,%ymm15,%ymm15 ++ vpalignr $12,%ymm6,%ymm6,%ymm6 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $4,%ymm14,%ymm14,%ymm14 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ movq 8+0+0(%rbp),%rdx ++ mulxq %r10,%r10,%rax ++ addq %r10,%r14 ++ mulxq %r11,%r11,%r9 ++ adcq %r11,%r15 ++ adcq $0,%r9 ++ imulq %r12,%rdx ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ addq %rax,%r15 ++ adcq %rdx,%r9 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 32(%rdi),%rdi ++ decq %rcx ++ jg L$seal_avx2_tail_512_rounds_and_3xhash ++ decq %r8 ++ jge L$seal_avx2_tail_512_rounds_and_2xhash ++ vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 ++ vpaddd 0+64(%rbp),%ymm7,%ymm7 ++ vpaddd 0+96(%rbp),%ymm11,%ymm11 ++ vpaddd 0+256(%rbp),%ymm15,%ymm15 ++ vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 ++ vpaddd 0+64(%rbp),%ymm6,%ymm6 ++ vpaddd 0+96(%rbp),%ymm10,%ymm10 ++ vpaddd 0+224(%rbp),%ymm14,%ymm14 ++ vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 ++ vpaddd 0+64(%rbp),%ymm5,%ymm5 ++ vpaddd 0+96(%rbp),%ymm9,%ymm9 ++ vpaddd 0+192(%rbp),%ymm13,%ymm13 ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd 0+64(%rbp),%ymm4,%ymm4 ++ vpaddd 0+96(%rbp),%ymm8,%ymm8 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ ++ vmovdqa %ymm0,0+128(%rbp) ++ vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 ++ vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 ++ vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 ++ vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 ++ vpxor 0+0(%rsi),%ymm0,%ymm0 ++ vpxor 32+0(%rsi),%ymm3,%ymm3 ++ vpxor 64+0(%rsi),%ymm7,%ymm7 ++ vpxor 96+0(%rsi),%ymm11,%ymm11 ++ vmovdqu %ymm0,0+0(%rdi) ++ vmovdqu %ymm3,32+0(%rdi) ++ vmovdqu %ymm7,64+0(%rdi) ++ vmovdqu %ymm11,96+0(%rdi) ++ ++ vmovdqa 0+128(%rbp),%ymm0 ++ vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 ++ vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 ++ vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 ++ vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 ++ vpxor 0+128(%rsi),%ymm3,%ymm3 ++ vpxor 32+128(%rsi),%ymm2,%ymm2 ++ vpxor 64+128(%rsi),%ymm6,%ymm6 ++ vpxor 96+128(%rsi),%ymm10,%ymm10 ++ vmovdqu %ymm3,0+128(%rdi) ++ vmovdqu %ymm2,32+128(%rdi) ++ vmovdqu %ymm6,64+128(%rdi) ++ vmovdqu %ymm10,96+128(%rdi) ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 ++ vpxor 0+256(%rsi),%ymm3,%ymm3 ++ vpxor 32+256(%rsi),%ymm1,%ymm1 ++ vpxor 64+256(%rsi),%ymm5,%ymm5 ++ vpxor 96+256(%rsi),%ymm9,%ymm9 ++ vmovdqu %ymm3,0+256(%rdi) ++ vmovdqu %ymm1,32+256(%rdi) ++ vmovdqu %ymm5,64+256(%rdi) ++ vmovdqu %ymm9,96+256(%rdi) ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 ++ vmovdqa %ymm3,%ymm8 ++ ++ movq $384,%rcx ++ leaq 384(%rsi),%rsi ++ subq $384,%rbx ++ jmp L$seal_avx2_short_hash_remainder ++ ++L$seal_avx2_320: ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm8,%ymm10 ++ vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 ++ vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 ++ vmovdqa %ymm4,%ymm7 ++ vmovdqa %ymm8,%ymm11 ++ vmovdqa %ymm12,0+160(%rbp) ++ vmovdqa %ymm13,0+192(%rbp) ++ vmovdqa %ymm14,0+224(%rbp) ++ movq $10,%r10 ++L$seal_avx2_320_rounds: ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol16(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpsrld $20,%ymm6,%ymm3 ++ vpslld $12,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol8(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpslld $7,%ymm6,%ymm3 ++ vpsrld $25,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpalignr $12,%ymm14,%ymm14,%ymm14 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $4,%ymm6,%ymm6,%ymm6 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol16(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpsrld $20,%ymm6,%ymm3 ++ vpslld $12,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpaddd %ymm6,%ymm2,%ymm2 ++ vpxor %ymm2,%ymm14,%ymm14 ++ vpshufb L$rol8(%rip),%ymm14,%ymm14 ++ vpaddd %ymm14,%ymm10,%ymm10 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpslld $7,%ymm6,%ymm3 ++ vpsrld $25,%ymm6,%ymm6 ++ vpxor %ymm3,%ymm6,%ymm6 ++ vpalignr $4,%ymm14,%ymm14,%ymm14 ++ vpalignr $8,%ymm10,%ymm10,%ymm10 ++ vpalignr $12,%ymm6,%ymm6,%ymm6 ++ ++ decq %r10 ++ jne L$seal_avx2_320_rounds ++ vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 ++ vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 ++ vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 ++ vpaddd %ymm7,%ymm4,%ymm4 ++ vpaddd %ymm7,%ymm5,%ymm5 ++ vpaddd %ymm7,%ymm6,%ymm6 ++ vpaddd %ymm11,%ymm8,%ymm8 ++ vpaddd %ymm11,%ymm9,%ymm9 ++ vpaddd %ymm11,%ymm10,%ymm10 ++ vpaddd 0+160(%rbp),%ymm12,%ymm12 ++ vpaddd 0+192(%rbp),%ymm13,%ymm13 ++ vpaddd 0+224(%rbp),%ymm14,%ymm14 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 ++ ++ vpand L$clamp(%rip),%ymm3,%ymm3 ++ vmovdqa %ymm3,0+0(%rbp) ++ ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 ++ vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 ++ vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 ++ vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 ++ vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 ++ jmp L$seal_avx2_short ++ ++L$seal_avx2_192: ++ vmovdqa %ymm0,%ymm1 ++ vmovdqa %ymm0,%ymm2 ++ vmovdqa %ymm4,%ymm5 ++ vmovdqa %ymm4,%ymm6 ++ vmovdqa %ymm8,%ymm9 ++ vmovdqa %ymm8,%ymm10 ++ vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 ++ vmovdqa %ymm12,%ymm11 ++ vmovdqa %ymm13,%ymm15 ++ movq $10,%r10 ++L$seal_avx2_192_rounds: ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $12,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $4,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $12,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $4,%ymm5,%ymm5,%ymm5 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol16(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $20,%ymm4,%ymm3 ++ vpslld $12,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpaddd %ymm4,%ymm0,%ymm0 ++ vpxor %ymm0,%ymm12,%ymm12 ++ vpshufb L$rol8(%rip),%ymm12,%ymm12 ++ vpaddd %ymm12,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpslld $7,%ymm4,%ymm3 ++ vpsrld $25,%ymm4,%ymm4 ++ vpxor %ymm3,%ymm4,%ymm4 ++ vpalignr $4,%ymm12,%ymm12,%ymm12 ++ vpalignr $8,%ymm8,%ymm8,%ymm8 ++ vpalignr $12,%ymm4,%ymm4,%ymm4 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol16(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpsrld $20,%ymm5,%ymm3 ++ vpslld $12,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpaddd %ymm5,%ymm1,%ymm1 ++ vpxor %ymm1,%ymm13,%ymm13 ++ vpshufb L$rol8(%rip),%ymm13,%ymm13 ++ vpaddd %ymm13,%ymm9,%ymm9 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpslld $7,%ymm5,%ymm3 ++ vpsrld $25,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm5,%ymm5 ++ vpalignr $4,%ymm13,%ymm13,%ymm13 ++ vpalignr $8,%ymm9,%ymm9,%ymm9 ++ vpalignr $12,%ymm5,%ymm5,%ymm5 ++ ++ decq %r10 ++ jne L$seal_avx2_192_rounds ++ vpaddd %ymm2,%ymm0,%ymm0 ++ vpaddd %ymm2,%ymm1,%ymm1 ++ vpaddd %ymm6,%ymm4,%ymm4 ++ vpaddd %ymm6,%ymm5,%ymm5 ++ vpaddd %ymm10,%ymm8,%ymm8 ++ vpaddd %ymm10,%ymm9,%ymm9 ++ vpaddd %ymm11,%ymm12,%ymm12 ++ vpaddd %ymm15,%ymm13,%ymm13 ++ vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 ++ ++ vpand L$clamp(%rip),%ymm3,%ymm3 ++ vmovdqa %ymm3,0+0(%rbp) ++ ++ vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 ++ vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 ++ vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 ++ vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 ++ vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 ++ vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 ++L$seal_avx2_short: ++ movq %r8,%r8 ++ call poly_hash_ad_internal ++ xorq %rcx,%rcx ++L$seal_avx2_short_hash_remainder: ++ cmpq $16,%rcx ++ jb L$seal_avx2_short_loop ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ subq $16,%rcx ++ addq $16,%rdi ++ jmp L$seal_avx2_short_hash_remainder ++L$seal_avx2_short_loop: ++ cmpq $32,%rbx ++ jb L$seal_avx2_short_tail ++ subq $32,%rbx ++ ++ vpxor (%rsi),%ymm0,%ymm0 ++ vmovdqu %ymm0,(%rdi) ++ leaq 32(%rsi),%rsi ++ ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ addq 0+16(%rdi),%r10 ++ adcq 8+16(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 32(%rdi),%rdi ++ ++ vmovdqa %ymm4,%ymm0 ++ vmovdqa %ymm8,%ymm4 ++ vmovdqa %ymm12,%ymm8 ++ vmovdqa %ymm1,%ymm12 ++ vmovdqa %ymm5,%ymm1 ++ vmovdqa %ymm9,%ymm5 ++ vmovdqa %ymm13,%ymm9 ++ vmovdqa %ymm2,%ymm13 ++ vmovdqa %ymm6,%ymm2 ++ jmp L$seal_avx2_short_loop ++L$seal_avx2_short_tail: ++ cmpq $16,%rbx ++ jb L$seal_avx2_exit ++ subq $16,%rbx ++ vpxor (%rsi),%xmm0,%xmm3 ++ vmovdqu %xmm3,(%rdi) ++ leaq 16(%rsi),%rsi ++ addq 0+0(%rdi),%r10 ++ adcq 8+0(%rdi),%r11 ++ adcq $1,%r12 ++ movq 0+0+0(%rbp),%rax ++ movq %rax,%r15 ++ mulq %r10 ++ movq %rax,%r13 ++ movq %rdx,%r14 ++ movq 0+0+0(%rbp),%rax ++ mulq %r11 ++ imulq %r12,%r15 ++ addq %rax,%r14 ++ adcq %rdx,%r15 ++ movq 8+0+0(%rbp),%rax ++ movq %rax,%r9 ++ mulq %r10 ++ addq %rax,%r14 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ movq 8+0+0(%rbp),%rax ++ mulq %r11 ++ addq %rax,%r15 ++ adcq $0,%rdx ++ imulq %r12,%r9 ++ addq %r10,%r15 ++ adcq %rdx,%r9 ++ movq %r13,%r10 ++ movq %r14,%r11 ++ movq %r15,%r12 ++ andq $3,%r12 ++ movq %r15,%r13 ++ andq $-4,%r13 ++ movq %r9,%r14 ++ shrdq $2,%r9,%r15 ++ shrq $2,%r9 ++ addq %r13,%r15 ++ adcq %r14,%r9 ++ addq %r15,%r10 ++ adcq %r9,%r11 ++ adcq $0,%r12 ++ ++ leaq 16(%rdi),%rdi ++ vextracti128 $1,%ymm0,%xmm0 ++L$seal_avx2_exit: ++ vzeroupper ++ jmp L$seal_sse_tail_16 ++ ++ ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S b/apple-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S +new file mode 100644 +index 0000000..e497c35 +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S +@@ -0,0 +1,850 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++.p2align 5 ++_aesni_ctr32_ghash_6x: ++ ++ vmovdqu 32(%r11),%xmm2 ++ subq $6,%rdx ++ vpxor %xmm4,%xmm4,%xmm4 ++ vmovdqu 0-128(%rcx),%xmm15 ++ vpaddb %xmm2,%xmm1,%xmm10 ++ vpaddb %xmm2,%xmm10,%xmm11 ++ vpaddb %xmm2,%xmm11,%xmm12 ++ vpaddb %xmm2,%xmm12,%xmm13 ++ vpaddb %xmm2,%xmm13,%xmm14 ++ vpxor %xmm15,%xmm1,%xmm9 ++ vmovdqu %xmm4,16+8(%rsp) ++ jmp L$oop6x ++ ++.p2align 5 ++L$oop6x: ++ addl $100663296,%ebx ++ jc L$handle_ctr32 ++ vmovdqu 0-32(%r9),%xmm3 ++ vpaddb %xmm2,%xmm14,%xmm1 ++ vpxor %xmm15,%xmm10,%xmm10 ++ vpxor %xmm15,%xmm11,%xmm11 ++ ++L$resume_ctr32: ++ vmovdqu %xmm1,(%r8) ++ vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 ++ vpxor %xmm15,%xmm12,%xmm12 ++ vmovups 16-128(%rcx),%xmm2 ++ vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ xorq %r12,%r12 ++ cmpq %r14,%r15 ++ ++ vaesenc %xmm2,%xmm9,%xmm9 ++ vmovdqu 48+8(%rsp),%xmm0 ++ vpxor %xmm15,%xmm13,%xmm13 ++ vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 ++ vaesenc %xmm2,%xmm10,%xmm10 ++ vpxor %xmm15,%xmm14,%xmm14 ++ setnc %r12b ++ vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 ++ vaesenc %xmm2,%xmm11,%xmm11 ++ vmovdqu 16-32(%r9),%xmm3 ++ negq %r12 ++ vaesenc %xmm2,%xmm12,%xmm12 ++ vpxor %xmm5,%xmm6,%xmm6 ++ vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 ++ vpxor %xmm4,%xmm8,%xmm8 ++ vaesenc %xmm2,%xmm13,%xmm13 ++ vpxor %xmm5,%xmm1,%xmm4 ++ andq $0x60,%r12 ++ vmovups 32-128(%rcx),%xmm15 ++ vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 ++ vaesenc %xmm2,%xmm14,%xmm14 ++ ++ vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 ++ leaq (%r14,%r12,1),%r14 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor 16+8(%rsp),%xmm8,%xmm8 ++ vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 ++ vmovdqu 64+8(%rsp),%xmm0 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ movbeq 88(%r14),%r13 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ movbeq 80(%r14),%r12 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ movq %r13,32+8(%rsp) ++ vaesenc %xmm15,%xmm13,%xmm13 ++ movq %r12,40+8(%rsp) ++ vmovdqu 48-32(%r9),%xmm5 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vmovups 48-128(%rcx),%xmm15 ++ vpxor %xmm1,%xmm6,%xmm6 ++ vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor %xmm2,%xmm6,%xmm6 ++ vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vpxor %xmm3,%xmm7,%xmm7 ++ vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 ++ vmovdqu 80+8(%rsp),%xmm0 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vmovdqu 64-32(%r9),%xmm1 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vmovups 64-128(%rcx),%xmm15 ++ vpxor %xmm2,%xmm6,%xmm6 ++ vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor %xmm3,%xmm6,%xmm6 ++ vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ movbeq 72(%r14),%r13 ++ vpxor %xmm5,%xmm7,%xmm7 ++ vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ movbeq 64(%r14),%r12 ++ vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 ++ vmovdqu 96+8(%rsp),%xmm0 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ movq %r13,48+8(%rsp) ++ vaesenc %xmm15,%xmm13,%xmm13 ++ movq %r12,56+8(%rsp) ++ vpxor %xmm2,%xmm4,%xmm4 ++ vmovdqu 96-32(%r9),%xmm2 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vmovups 80-128(%rcx),%xmm15 ++ vpxor %xmm3,%xmm6,%xmm6 ++ vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor %xmm5,%xmm6,%xmm6 ++ vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ movbeq 56(%r14),%r13 ++ vpxor %xmm1,%xmm7,%xmm7 ++ vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 ++ vpxor 112+8(%rsp),%xmm8,%xmm8 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ movbeq 48(%r14),%r12 ++ vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ movq %r13,64+8(%rsp) ++ vaesenc %xmm15,%xmm13,%xmm13 ++ movq %r12,72+8(%rsp) ++ vpxor %xmm3,%xmm4,%xmm4 ++ vmovdqu 112-32(%r9),%xmm3 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vmovups 96-128(%rcx),%xmm15 ++ vpxor %xmm5,%xmm6,%xmm6 ++ vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor %xmm1,%xmm6,%xmm6 ++ vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ movbeq 40(%r14),%r13 ++ vpxor %xmm2,%xmm7,%xmm7 ++ vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ movbeq 32(%r14),%r12 ++ vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ movq %r13,80+8(%rsp) ++ vaesenc %xmm15,%xmm13,%xmm13 ++ movq %r12,88+8(%rsp) ++ vpxor %xmm5,%xmm6,%xmm6 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ vpxor %xmm1,%xmm6,%xmm6 ++ ++ vmovups 112-128(%rcx),%xmm15 ++ vpslldq $8,%xmm6,%xmm5 ++ vpxor %xmm2,%xmm4,%xmm4 ++ vmovdqu 16(%r11),%xmm3 ++ ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor %xmm8,%xmm7,%xmm7 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vpxor %xmm5,%xmm4,%xmm4 ++ movbeq 24(%r14),%r13 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ movbeq 16(%r14),%r12 ++ vpalignr $8,%xmm4,%xmm4,%xmm0 ++ vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 ++ movq %r13,96+8(%rsp) ++ vaesenc %xmm15,%xmm12,%xmm12 ++ movq %r12,104+8(%rsp) ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vmovups 128-128(%rcx),%xmm1 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vaesenc %xmm1,%xmm9,%xmm9 ++ vmovups 144-128(%rcx),%xmm15 ++ vaesenc %xmm1,%xmm10,%xmm10 ++ vpsrldq $8,%xmm6,%xmm6 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vpxor %xmm6,%xmm7,%xmm7 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vpxor %xmm0,%xmm4,%xmm4 ++ movbeq 8(%r14),%r13 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ movbeq 0(%r14),%r12 ++ vaesenc %xmm1,%xmm14,%xmm14 ++ vmovups 160-128(%rcx),%xmm1 ++ cmpl $11,%ebp ++ jb L$enc_tail ++ ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vaesenc %xmm1,%xmm9,%xmm9 ++ vaesenc %xmm1,%xmm10,%xmm10 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ vmovups 176-128(%rcx),%xmm15 ++ vaesenc %xmm1,%xmm14,%xmm14 ++ vmovups 192-128(%rcx),%xmm1 ++ je L$enc_tail ++ ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ ++ vaesenc %xmm1,%xmm9,%xmm9 ++ vaesenc %xmm1,%xmm10,%xmm10 ++ vaesenc %xmm1,%xmm11,%xmm11 ++ vaesenc %xmm1,%xmm12,%xmm12 ++ vaesenc %xmm1,%xmm13,%xmm13 ++ vmovups 208-128(%rcx),%xmm15 ++ vaesenc %xmm1,%xmm14,%xmm14 ++ vmovups 224-128(%rcx),%xmm1 ++ jmp L$enc_tail ++ ++.p2align 5 ++L$handle_ctr32: ++ vmovdqu (%r11),%xmm0 ++ vpshufb %xmm0,%xmm1,%xmm6 ++ vmovdqu 48(%r11),%xmm5 ++ vpaddd 64(%r11),%xmm6,%xmm10 ++ vpaddd %xmm5,%xmm6,%xmm11 ++ vmovdqu 0-32(%r9),%xmm3 ++ vpaddd %xmm5,%xmm10,%xmm12 ++ vpshufb %xmm0,%xmm10,%xmm10 ++ vpaddd %xmm5,%xmm11,%xmm13 ++ vpshufb %xmm0,%xmm11,%xmm11 ++ vpxor %xmm15,%xmm10,%xmm10 ++ vpaddd %xmm5,%xmm12,%xmm14 ++ vpshufb %xmm0,%xmm12,%xmm12 ++ vpxor %xmm15,%xmm11,%xmm11 ++ vpaddd %xmm5,%xmm13,%xmm1 ++ vpshufb %xmm0,%xmm13,%xmm13 ++ vpshufb %xmm0,%xmm14,%xmm14 ++ vpshufb %xmm0,%xmm1,%xmm1 ++ jmp L$resume_ctr32 ++ ++.p2align 5 ++L$enc_tail: ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vmovdqu %xmm7,16+8(%rsp) ++ vpalignr $8,%xmm4,%xmm4,%xmm8 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 ++ vpxor 0(%rdi),%xmm1,%xmm2 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vpxor 16(%rdi),%xmm1,%xmm0 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vpxor 32(%rdi),%xmm1,%xmm5 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vpxor 48(%rdi),%xmm1,%xmm6 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ vpxor 64(%rdi),%xmm1,%xmm7 ++ vpxor 80(%rdi),%xmm1,%xmm3 ++ vmovdqu (%r8),%xmm1 ++ ++ vaesenclast %xmm2,%xmm9,%xmm9 ++ vmovdqu 32(%r11),%xmm2 ++ vaesenclast %xmm0,%xmm10,%xmm10 ++ vpaddb %xmm2,%xmm1,%xmm0 ++ movq %r13,112+8(%rsp) ++ leaq 96(%rdi),%rdi ++ vaesenclast %xmm5,%xmm11,%xmm11 ++ vpaddb %xmm2,%xmm0,%xmm5 ++ movq %r12,120+8(%rsp) ++ leaq 96(%rsi),%rsi ++ vmovdqu 0-128(%rcx),%xmm15 ++ vaesenclast %xmm6,%xmm12,%xmm12 ++ vpaddb %xmm2,%xmm5,%xmm6 ++ vaesenclast %xmm7,%xmm13,%xmm13 ++ vpaddb %xmm2,%xmm6,%xmm7 ++ vaesenclast %xmm3,%xmm14,%xmm14 ++ vpaddb %xmm2,%xmm7,%xmm3 ++ ++ addq $0x60,%r10 ++ subq $0x6,%rdx ++ jc L$6x_done ++ ++ vmovups %xmm9,-96(%rsi) ++ vpxor %xmm15,%xmm1,%xmm9 ++ vmovups %xmm10,-80(%rsi) ++ vmovdqa %xmm0,%xmm10 ++ vmovups %xmm11,-64(%rsi) ++ vmovdqa %xmm5,%xmm11 ++ vmovups %xmm12,-48(%rsi) ++ vmovdqa %xmm6,%xmm12 ++ vmovups %xmm13,-32(%rsi) ++ vmovdqa %xmm7,%xmm13 ++ vmovups %xmm14,-16(%rsi) ++ vmovdqa %xmm3,%xmm14 ++ vmovdqu 32+8(%rsp),%xmm7 ++ jmp L$oop6x ++ ++L$6x_done: ++ vpxor 16+8(%rsp),%xmm8,%xmm8 ++ vpxor %xmm4,%xmm8,%xmm8 ++ ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aesni_gcm_decrypt ++.private_extern _aesni_gcm_decrypt ++ ++.p2align 5 ++_aesni_gcm_decrypt: ++ ++ xorq %r10,%r10 ++ ++ ++ ++ cmpq $0x60,%rdx ++ jb L$gcm_dec_abort ++ ++ leaq (%rsp),%rax ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ vzeroupper ++ ++ vmovdqu (%r8),%xmm1 ++ addq $-128,%rsp ++ movl 12(%r8),%ebx ++ leaq L$bswap_mask(%rip),%r11 ++ leaq -128(%rcx),%r14 ++ movq $0xf80,%r15 ++ vmovdqu (%r9),%xmm8 ++ andq $-128,%rsp ++ vmovdqu (%r11),%xmm0 ++ leaq 128(%rcx),%rcx ++ leaq 32+32(%r9),%r9 ++ movl 240-128(%rcx),%ebp ++ vpshufb %xmm0,%xmm8,%xmm8 ++ ++ andq %r15,%r14 ++ andq %rsp,%r15 ++ subq %r14,%r15 ++ jc L$dec_no_key_aliasing ++ cmpq $768,%r15 ++ jnc L$dec_no_key_aliasing ++ subq %r15,%rsp ++L$dec_no_key_aliasing: ++ ++ vmovdqu 80(%rdi),%xmm7 ++ leaq (%rdi),%r14 ++ vmovdqu 64(%rdi),%xmm4 ++ ++ ++ ++ ++ ++ ++ ++ leaq -192(%rdi,%rdx,1),%r15 ++ ++ vmovdqu 48(%rdi),%xmm5 ++ shrq $4,%rdx ++ xorq %r10,%r10 ++ vmovdqu 32(%rdi),%xmm6 ++ vpshufb %xmm0,%xmm7,%xmm7 ++ vmovdqu 16(%rdi),%xmm2 ++ vpshufb %xmm0,%xmm4,%xmm4 ++ vmovdqu (%rdi),%xmm3 ++ vpshufb %xmm0,%xmm5,%xmm5 ++ vmovdqu %xmm4,48(%rsp) ++ vpshufb %xmm0,%xmm6,%xmm6 ++ vmovdqu %xmm5,64(%rsp) ++ vpshufb %xmm0,%xmm2,%xmm2 ++ vmovdqu %xmm6,80(%rsp) ++ vpshufb %xmm0,%xmm3,%xmm3 ++ vmovdqu %xmm2,96(%rsp) ++ vmovdqu %xmm3,112(%rsp) ++ ++ call _aesni_ctr32_ghash_6x ++ ++ vmovups %xmm9,-96(%rsi) ++ vmovups %xmm10,-80(%rsi) ++ vmovups %xmm11,-64(%rsi) ++ vmovups %xmm12,-48(%rsi) ++ vmovups %xmm13,-32(%rsi) ++ vmovups %xmm14,-16(%rsi) ++ ++ vpshufb (%r11),%xmm8,%xmm8 ++ vmovdqu %xmm8,-64(%r9) ++ ++ vzeroupper ++ movq -48(%rax),%r15 ++ ++ movq -40(%rax),%r14 ++ ++ movq -32(%rax),%r13 ++ ++ movq -24(%rax),%r12 ++ ++ movq -16(%rax),%rbp ++ ++ movq -8(%rax),%rbx ++ ++ leaq (%rax),%rsp ++ ++L$gcm_dec_abort: ++ movq %r10,%rax ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++_aesni_ctr32_6x: ++ ++ vmovdqu 0-128(%rcx),%xmm4 ++ vmovdqu 32(%r11),%xmm2 ++ leaq -1(%rbp),%r13 ++ vmovups 16-128(%rcx),%xmm15 ++ leaq 32-128(%rcx),%r12 ++ vpxor %xmm4,%xmm1,%xmm9 ++ addl $100663296,%ebx ++ jc L$handle_ctr32_2 ++ vpaddb %xmm2,%xmm1,%xmm10 ++ vpaddb %xmm2,%xmm10,%xmm11 ++ vpxor %xmm4,%xmm10,%xmm10 ++ vpaddb %xmm2,%xmm11,%xmm12 ++ vpxor %xmm4,%xmm11,%xmm11 ++ vpaddb %xmm2,%xmm12,%xmm13 ++ vpxor %xmm4,%xmm12,%xmm12 ++ vpaddb %xmm2,%xmm13,%xmm14 ++ vpxor %xmm4,%xmm13,%xmm13 ++ vpaddb %xmm2,%xmm14,%xmm1 ++ vpxor %xmm4,%xmm14,%xmm14 ++ jmp L$oop_ctr32 ++ ++.p2align 4 ++L$oop_ctr32: ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ vmovups (%r12),%xmm15 ++ leaq 16(%r12),%r12 ++ decl %r13d ++ jnz L$oop_ctr32 ++ ++ vmovdqu (%r12),%xmm3 ++ vaesenc %xmm15,%xmm9,%xmm9 ++ vpxor 0(%rdi),%xmm3,%xmm4 ++ vaesenc %xmm15,%xmm10,%xmm10 ++ vpxor 16(%rdi),%xmm3,%xmm5 ++ vaesenc %xmm15,%xmm11,%xmm11 ++ vpxor 32(%rdi),%xmm3,%xmm6 ++ vaesenc %xmm15,%xmm12,%xmm12 ++ vpxor 48(%rdi),%xmm3,%xmm8 ++ vaesenc %xmm15,%xmm13,%xmm13 ++ vpxor 64(%rdi),%xmm3,%xmm2 ++ vaesenc %xmm15,%xmm14,%xmm14 ++ vpxor 80(%rdi),%xmm3,%xmm3 ++ leaq 96(%rdi),%rdi ++ ++ vaesenclast %xmm4,%xmm9,%xmm9 ++ vaesenclast %xmm5,%xmm10,%xmm10 ++ vaesenclast %xmm6,%xmm11,%xmm11 ++ vaesenclast %xmm8,%xmm12,%xmm12 ++ vaesenclast %xmm2,%xmm13,%xmm13 ++ vaesenclast %xmm3,%xmm14,%xmm14 ++ vmovups %xmm9,0(%rsi) ++ vmovups %xmm10,16(%rsi) ++ vmovups %xmm11,32(%rsi) ++ vmovups %xmm12,48(%rsi) ++ vmovups %xmm13,64(%rsi) ++ vmovups %xmm14,80(%rsi) ++ leaq 96(%rsi),%rsi ++ ++ .byte 0xf3,0xc3 ++.p2align 5 ++L$handle_ctr32_2: ++ vpshufb %xmm0,%xmm1,%xmm6 ++ vmovdqu 48(%r11),%xmm5 ++ vpaddd 64(%r11),%xmm6,%xmm10 ++ vpaddd %xmm5,%xmm6,%xmm11 ++ vpaddd %xmm5,%xmm10,%xmm12 ++ vpshufb %xmm0,%xmm10,%xmm10 ++ vpaddd %xmm5,%xmm11,%xmm13 ++ vpshufb %xmm0,%xmm11,%xmm11 ++ vpxor %xmm4,%xmm10,%xmm10 ++ vpaddd %xmm5,%xmm12,%xmm14 ++ vpshufb %xmm0,%xmm12,%xmm12 ++ vpxor %xmm4,%xmm11,%xmm11 ++ vpaddd %xmm5,%xmm13,%xmm1 ++ vpshufb %xmm0,%xmm13,%xmm13 ++ vpxor %xmm4,%xmm12,%xmm12 ++ vpshufb %xmm0,%xmm14,%xmm14 ++ vpxor %xmm4,%xmm13,%xmm13 ++ vpshufb %xmm0,%xmm1,%xmm1 ++ vpxor %xmm4,%xmm14,%xmm14 ++ jmp L$oop_ctr32 ++ ++ ++ ++.globl _aesni_gcm_encrypt ++.private_extern _aesni_gcm_encrypt ++ ++.p2align 5 ++_aesni_gcm_encrypt: ++ ++#ifdef BORINGSSL_DISPATCH_TEST ++ ++ movb $1,_BORINGSSL_function_hit+2(%rip) ++#endif ++ xorq %r10,%r10 ++ ++ ++ ++ ++ cmpq $288,%rdx ++ jb L$gcm_enc_abort ++ ++ leaq (%rsp),%rax ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ vzeroupper ++ ++ vmovdqu (%r8),%xmm1 ++ addq $-128,%rsp ++ movl 12(%r8),%ebx ++ leaq L$bswap_mask(%rip),%r11 ++ leaq -128(%rcx),%r14 ++ movq $0xf80,%r15 ++ leaq 128(%rcx),%rcx ++ vmovdqu (%r11),%xmm0 ++ andq $-128,%rsp ++ movl 240-128(%rcx),%ebp ++ ++ andq %r15,%r14 ++ andq %rsp,%r15 ++ subq %r14,%r15 ++ jc L$enc_no_key_aliasing ++ cmpq $768,%r15 ++ jnc L$enc_no_key_aliasing ++ subq %r15,%rsp ++L$enc_no_key_aliasing: ++ ++ leaq (%rsi),%r14 ++ ++ ++ ++ ++ ++ ++ ++ ++ leaq -192(%rsi,%rdx,1),%r15 ++ ++ shrq $4,%rdx ++ ++ call _aesni_ctr32_6x ++ vpshufb %xmm0,%xmm9,%xmm8 ++ vpshufb %xmm0,%xmm10,%xmm2 ++ vmovdqu %xmm8,112(%rsp) ++ vpshufb %xmm0,%xmm11,%xmm4 ++ vmovdqu %xmm2,96(%rsp) ++ vpshufb %xmm0,%xmm12,%xmm5 ++ vmovdqu %xmm4,80(%rsp) ++ vpshufb %xmm0,%xmm13,%xmm6 ++ vmovdqu %xmm5,64(%rsp) ++ vpshufb %xmm0,%xmm14,%xmm7 ++ vmovdqu %xmm6,48(%rsp) ++ ++ call _aesni_ctr32_6x ++ ++ vmovdqu (%r9),%xmm8 ++ leaq 32+32(%r9),%r9 ++ subq $12,%rdx ++ movq $192,%r10 ++ vpshufb %xmm0,%xmm8,%xmm8 ++ ++ call _aesni_ctr32_ghash_6x ++ vmovdqu 32(%rsp),%xmm7 ++ vmovdqu (%r11),%xmm0 ++ vmovdqu 0-32(%r9),%xmm3 ++ vpunpckhqdq %xmm7,%xmm7,%xmm1 ++ vmovdqu 32-32(%r9),%xmm15 ++ vmovups %xmm9,-96(%rsi) ++ vpshufb %xmm0,%xmm9,%xmm9 ++ vpxor %xmm7,%xmm1,%xmm1 ++ vmovups %xmm10,-80(%rsi) ++ vpshufb %xmm0,%xmm10,%xmm10 ++ vmovups %xmm11,-64(%rsi) ++ vpshufb %xmm0,%xmm11,%xmm11 ++ vmovups %xmm12,-48(%rsi) ++ vpshufb %xmm0,%xmm12,%xmm12 ++ vmovups %xmm13,-32(%rsi) ++ vpshufb %xmm0,%xmm13,%xmm13 ++ vmovups %xmm14,-16(%rsi) ++ vpshufb %xmm0,%xmm14,%xmm14 ++ vmovdqu %xmm9,16(%rsp) ++ vmovdqu 48(%rsp),%xmm6 ++ vmovdqu 16-32(%r9),%xmm0 ++ vpunpckhqdq %xmm6,%xmm6,%xmm2 ++ vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5 ++ vpxor %xmm6,%xmm2,%xmm2 ++ vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 ++ vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 ++ ++ vmovdqu 64(%rsp),%xmm9 ++ vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4 ++ vmovdqu 48-32(%r9),%xmm3 ++ vpxor %xmm5,%xmm4,%xmm4 ++ vpunpckhqdq %xmm9,%xmm9,%xmm5 ++ vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6 ++ vpxor %xmm9,%xmm5,%xmm5 ++ vpxor %xmm7,%xmm6,%xmm6 ++ vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 ++ vmovdqu 80-32(%r9),%xmm15 ++ vpxor %xmm1,%xmm2,%xmm2 ++ ++ vmovdqu 80(%rsp),%xmm1 ++ vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7 ++ vmovdqu 64-32(%r9),%xmm0 ++ vpxor %xmm4,%xmm7,%xmm7 ++ vpunpckhqdq %xmm1,%xmm1,%xmm4 ++ vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpxor %xmm6,%xmm9,%xmm9 ++ vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5 ++ vpxor %xmm2,%xmm5,%xmm5 ++ ++ vmovdqu 96(%rsp),%xmm2 ++ vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6 ++ vmovdqu 96-32(%r9),%xmm3 ++ vpxor %xmm7,%xmm6,%xmm6 ++ vpunpckhqdq %xmm2,%xmm2,%xmm7 ++ vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1 ++ vpxor %xmm2,%xmm7,%xmm7 ++ vpxor %xmm9,%xmm1,%xmm1 ++ vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4 ++ vmovdqu 128-32(%r9),%xmm15 ++ vpxor %xmm5,%xmm4,%xmm4 ++ ++ vpxor 112(%rsp),%xmm8,%xmm8 ++ vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5 ++ vmovdqu 112-32(%r9),%xmm0 ++ vpunpckhqdq %xmm8,%xmm8,%xmm9 ++ vpxor %xmm6,%xmm5,%xmm5 ++ vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2 ++ vpxor %xmm8,%xmm9,%xmm9 ++ vpxor %xmm1,%xmm2,%xmm2 ++ vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7 ++ vpxor %xmm4,%xmm7,%xmm4 ++ ++ vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6 ++ vmovdqu 0-32(%r9),%xmm3 ++ vpunpckhqdq %xmm14,%xmm14,%xmm1 ++ vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8 ++ vpxor %xmm14,%xmm1,%xmm1 ++ vpxor %xmm5,%xmm6,%xmm5 ++ vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9 ++ vmovdqu 32-32(%r9),%xmm15 ++ vpxor %xmm2,%xmm8,%xmm7 ++ vpxor %xmm4,%xmm9,%xmm6 ++ ++ vmovdqu 16-32(%r9),%xmm0 ++ vpxor %xmm5,%xmm7,%xmm9 ++ vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4 ++ vpxor %xmm9,%xmm6,%xmm6 ++ vpunpckhqdq %xmm13,%xmm13,%xmm2 ++ vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14 ++ vpxor %xmm13,%xmm2,%xmm2 ++ vpslldq $8,%xmm6,%xmm9 ++ vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 ++ vpxor %xmm9,%xmm5,%xmm8 ++ vpsrldq $8,%xmm6,%xmm6 ++ vpxor %xmm6,%xmm7,%xmm7 ++ ++ vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5 ++ vmovdqu 48-32(%r9),%xmm3 ++ vpxor %xmm4,%xmm5,%xmm5 ++ vpunpckhqdq %xmm12,%xmm12,%xmm9 ++ vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13 ++ vpxor %xmm12,%xmm9,%xmm9 ++ vpxor %xmm14,%xmm13,%xmm13 ++ vpalignr $8,%xmm8,%xmm8,%xmm14 ++ vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 ++ vmovdqu 80-32(%r9),%xmm15 ++ vpxor %xmm1,%xmm2,%xmm2 ++ ++ vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4 ++ vmovdqu 64-32(%r9),%xmm0 ++ vpxor %xmm5,%xmm4,%xmm4 ++ vpunpckhqdq %xmm11,%xmm11,%xmm1 ++ vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12 ++ vpxor %xmm11,%xmm1,%xmm1 ++ vpxor %xmm13,%xmm12,%xmm12 ++ vxorps 16(%rsp),%xmm7,%xmm7 ++ vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9 ++ vpxor %xmm2,%xmm9,%xmm9 ++ ++ vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 ++ vxorps %xmm14,%xmm8,%xmm8 ++ ++ vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5 ++ vmovdqu 96-32(%r9),%xmm3 ++ vpxor %xmm4,%xmm5,%xmm5 ++ vpunpckhqdq %xmm10,%xmm10,%xmm2 ++ vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11 ++ vpxor %xmm10,%xmm2,%xmm2 ++ vpalignr $8,%xmm8,%xmm8,%xmm14 ++ vpxor %xmm12,%xmm11,%xmm11 ++ vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1 ++ vmovdqu 128-32(%r9),%xmm15 ++ vpxor %xmm9,%xmm1,%xmm1 ++ ++ vxorps %xmm7,%xmm14,%xmm14 ++ vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 ++ vxorps %xmm14,%xmm8,%xmm8 ++ ++ vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4 ++ vmovdqu 112-32(%r9),%xmm0 ++ vpxor %xmm5,%xmm4,%xmm4 ++ vpunpckhqdq %xmm8,%xmm8,%xmm9 ++ vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10 ++ vpxor %xmm8,%xmm9,%xmm9 ++ vpxor %xmm11,%xmm10,%xmm10 ++ vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2 ++ vpxor %xmm1,%xmm2,%xmm2 ++ ++ vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5 ++ vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7 ++ vpxor %xmm4,%xmm5,%xmm5 ++ vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6 ++ vpxor %xmm10,%xmm7,%xmm7 ++ vpxor %xmm2,%xmm6,%xmm6 ++ ++ vpxor %xmm5,%xmm7,%xmm4 ++ vpxor %xmm4,%xmm6,%xmm6 ++ vpslldq $8,%xmm6,%xmm1 ++ vmovdqu 16(%r11),%xmm3 ++ vpsrldq $8,%xmm6,%xmm6 ++ vpxor %xmm1,%xmm5,%xmm8 ++ vpxor %xmm6,%xmm7,%xmm7 ++ ++ vpalignr $8,%xmm8,%xmm8,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 ++ vpxor %xmm2,%xmm8,%xmm8 ++ ++ vpalignr $8,%xmm8,%xmm8,%xmm2 ++ vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 ++ vpxor %xmm7,%xmm2,%xmm2 ++ vpxor %xmm2,%xmm8,%xmm8 ++ vpshufb (%r11),%xmm8,%xmm8 ++ vmovdqu %xmm8,-64(%r9) ++ ++ vzeroupper ++ movq -48(%rax),%r15 ++ ++ movq -40(%rax),%r14 ++ ++ movq -32(%rax),%r13 ++ ++ movq -24(%rax),%r12 ++ ++ movq -16(%rax),%rbp ++ ++ movq -8(%rax),%rbx ++ ++ leaq (%rax),%rsp ++ ++L$gcm_enc_abort: ++ movq %r10,%rax ++ .byte 0xf3,0xc3 ++ ++ ++.p2align 6 ++L$bswap_mask: ++.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 ++L$poly: ++.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 ++L$one_msb: ++.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 ++L$two_lsb: ++.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ++L$one_lsb: ++.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ++.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.p2align 6 ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/aesni-x86_64.S b/apple-x86_64/crypto/fipsmodule/aesni-x86_64.S +new file mode 100644 +index 0000000..7633880 +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/aesni-x86_64.S +@@ -0,0 +1,2503 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++.globl _aes_hw_encrypt ++.private_extern _aes_hw_encrypt ++ ++.p2align 4 ++_aes_hw_encrypt: ++ ++#ifdef BORINGSSL_DISPATCH_TEST ++ ++ movb $1,_BORINGSSL_function_hit+1(%rip) ++#endif ++ movups (%rdi),%xmm2 ++ movl 240(%rdx),%eax ++ movups (%rdx),%xmm0 ++ movups 16(%rdx),%xmm1 ++ leaq 32(%rdx),%rdx ++ xorps %xmm0,%xmm2 ++L$oop_enc1_1: ++.byte 102,15,56,220,209 ++ decl %eax ++ movups (%rdx),%xmm1 ++ leaq 16(%rdx),%rdx ++ jnz L$oop_enc1_1 ++.byte 102,15,56,221,209 ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ .byte 0xf3,0xc3 ++ ++ ++ ++.globl _aes_hw_decrypt ++.private_extern _aes_hw_decrypt ++ ++.p2align 4 ++_aes_hw_decrypt: ++ ++ movups (%rdi),%xmm2 ++ movl 240(%rdx),%eax ++ movups (%rdx),%xmm0 ++ movups 16(%rdx),%xmm1 ++ leaq 32(%rdx),%rdx ++ xorps %xmm0,%xmm2 ++L$oop_dec1_2: ++.byte 102,15,56,222,209 ++ decl %eax ++ movups (%rdx),%xmm1 ++ leaq 16(%rdx),%rdx ++ jnz L$oop_dec1_2 ++.byte 102,15,56,223,209 ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++_aesni_encrypt2: ++ ++ movups (%rcx),%xmm0 ++ shll $4,%eax ++ movups 16(%rcx),%xmm1 ++ xorps %xmm0,%xmm2 ++ xorps %xmm0,%xmm3 ++ movups 32(%rcx),%xmm0 ++ leaq 32(%rcx,%rax,1),%rcx ++ negq %rax ++ addq $16,%rax ++ ++L$enc_loop2: ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++ movups (%rcx,%rax,1),%xmm1 ++ addq $32,%rax ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++ movups -16(%rcx,%rax,1),%xmm0 ++ jnz L$enc_loop2 ++ ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,221,208 ++.byte 102,15,56,221,216 ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++_aesni_decrypt2: ++ ++ movups (%rcx),%xmm0 ++ shll $4,%eax ++ movups 16(%rcx),%xmm1 ++ xorps %xmm0,%xmm2 ++ xorps %xmm0,%xmm3 ++ movups 32(%rcx),%xmm0 ++ leaq 32(%rcx,%rax,1),%rcx ++ negq %rax ++ addq $16,%rax ++ ++L$dec_loop2: ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++ movups (%rcx,%rax,1),%xmm1 ++ addq $32,%rax ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++ movups -16(%rcx,%rax,1),%xmm0 ++ jnz L$dec_loop2 ++ ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,223,208 ++.byte 102,15,56,223,216 ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++_aesni_encrypt3: ++ ++ movups (%rcx),%xmm0 ++ shll $4,%eax ++ movups 16(%rcx),%xmm1 ++ xorps %xmm0,%xmm2 ++ xorps %xmm0,%xmm3 ++ xorps %xmm0,%xmm4 ++ movups 32(%rcx),%xmm0 ++ leaq 32(%rcx,%rax,1),%rcx ++ negq %rax ++ addq $16,%rax ++ ++L$enc_loop3: ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++ movups (%rcx,%rax,1),%xmm1 ++ addq $32,%rax ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++.byte 102,15,56,220,224 ++ movups -16(%rcx,%rax,1),%xmm0 ++ jnz L$enc_loop3 ++ ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,221,208 ++.byte 102,15,56,221,216 ++.byte 102,15,56,221,224 ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++_aesni_decrypt3: ++ ++ movups (%rcx),%xmm0 ++ shll $4,%eax ++ movups 16(%rcx),%xmm1 ++ xorps %xmm0,%xmm2 ++ xorps %xmm0,%xmm3 ++ xorps %xmm0,%xmm4 ++ movups 32(%rcx),%xmm0 ++ leaq 32(%rcx,%rax,1),%rcx ++ negq %rax ++ addq $16,%rax ++ ++L$dec_loop3: ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++ movups (%rcx,%rax,1),%xmm1 ++ addq $32,%rax ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++ movups -16(%rcx,%rax,1),%xmm0 ++ jnz L$dec_loop3 ++ ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,223,208 ++.byte 102,15,56,223,216 ++.byte 102,15,56,223,224 ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++_aesni_encrypt4: ++ ++ movups (%rcx),%xmm0 ++ shll $4,%eax ++ movups 16(%rcx),%xmm1 ++ xorps %xmm0,%xmm2 ++ xorps %xmm0,%xmm3 ++ xorps %xmm0,%xmm4 ++ xorps %xmm0,%xmm5 ++ movups 32(%rcx),%xmm0 ++ leaq 32(%rcx,%rax,1),%rcx ++ negq %rax ++.byte 0x0f,0x1f,0x00 ++ addq $16,%rax ++ ++L$enc_loop4: ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++ movups (%rcx,%rax,1),%xmm1 ++ addq $32,%rax ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++.byte 102,15,56,220,224 ++.byte 102,15,56,220,232 ++ movups -16(%rcx,%rax,1),%xmm0 ++ jnz L$enc_loop4 ++ ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++.byte 102,15,56,221,208 ++.byte 102,15,56,221,216 ++.byte 102,15,56,221,224 ++.byte 102,15,56,221,232 ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++_aesni_decrypt4: ++ ++ movups (%rcx),%xmm0 ++ shll $4,%eax ++ movups 16(%rcx),%xmm1 ++ xorps %xmm0,%xmm2 ++ xorps %xmm0,%xmm3 ++ xorps %xmm0,%xmm4 ++ xorps %xmm0,%xmm5 ++ movups 32(%rcx),%xmm0 ++ leaq 32(%rcx,%rax,1),%rcx ++ negq %rax ++.byte 0x0f,0x1f,0x00 ++ addq $16,%rax ++ ++L$dec_loop4: ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++ movups (%rcx,%rax,1),%xmm1 ++ addq $32,%rax ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++.byte 102,15,56,222,232 ++ movups -16(%rcx,%rax,1),%xmm0 ++ jnz L$dec_loop4 ++ ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,223,208 ++.byte 102,15,56,223,216 ++.byte 102,15,56,223,224 ++.byte 102,15,56,223,232 ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++_aesni_encrypt6: ++ ++ movups (%rcx),%xmm0 ++ shll $4,%eax ++ movups 16(%rcx),%xmm1 ++ xorps %xmm0,%xmm2 ++ pxor %xmm0,%xmm3 ++ pxor %xmm0,%xmm4 ++.byte 102,15,56,220,209 ++ leaq 32(%rcx,%rax,1),%rcx ++ negq %rax ++.byte 102,15,56,220,217 ++ pxor %xmm0,%xmm5 ++ pxor %xmm0,%xmm6 ++.byte 102,15,56,220,225 ++ pxor %xmm0,%xmm7 ++ movups (%rcx,%rax,1),%xmm0 ++ addq $16,%rax ++ jmp L$enc_loop6_enter ++.p2align 4 ++L$enc_loop6: ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++L$enc_loop6_enter: ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++ movups (%rcx,%rax,1),%xmm1 ++ addq $32,%rax ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++.byte 102,15,56,220,224 ++.byte 102,15,56,220,232 ++.byte 102,15,56,220,240 ++.byte 102,15,56,220,248 ++ movups -16(%rcx,%rax,1),%xmm0 ++ jnz L$enc_loop6 ++ ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++.byte 102,15,56,221,208 ++.byte 102,15,56,221,216 ++.byte 102,15,56,221,224 ++.byte 102,15,56,221,232 ++.byte 102,15,56,221,240 ++.byte 102,15,56,221,248 ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++_aesni_decrypt6: ++ ++ movups (%rcx),%xmm0 ++ shll $4,%eax ++ movups 16(%rcx),%xmm1 ++ xorps %xmm0,%xmm2 ++ pxor %xmm0,%xmm3 ++ pxor %xmm0,%xmm4 ++.byte 102,15,56,222,209 ++ leaq 32(%rcx,%rax,1),%rcx ++ negq %rax ++.byte 102,15,56,222,217 ++ pxor %xmm0,%xmm5 ++ pxor %xmm0,%xmm6 ++.byte 102,15,56,222,225 ++ pxor %xmm0,%xmm7 ++ movups (%rcx,%rax,1),%xmm0 ++ addq $16,%rax ++ jmp L$dec_loop6_enter ++.p2align 4 ++L$dec_loop6: ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++L$dec_loop6_enter: ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++ movups (%rcx,%rax,1),%xmm1 ++ addq $32,%rax ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++.byte 102,15,56,222,232 ++.byte 102,15,56,222,240 ++.byte 102,15,56,222,248 ++ movups -16(%rcx,%rax,1),%xmm0 ++ jnz L$dec_loop6 ++ ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++.byte 102,15,56,223,208 ++.byte 102,15,56,223,216 ++.byte 102,15,56,223,224 ++.byte 102,15,56,223,232 ++.byte 102,15,56,223,240 ++.byte 102,15,56,223,248 ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++_aesni_encrypt8: ++ ++ movups (%rcx),%xmm0 ++ shll $4,%eax ++ movups 16(%rcx),%xmm1 ++ xorps %xmm0,%xmm2 ++ xorps %xmm0,%xmm3 ++ pxor %xmm0,%xmm4 ++ pxor %xmm0,%xmm5 ++ pxor %xmm0,%xmm6 ++ leaq 32(%rcx,%rax,1),%rcx ++ negq %rax ++.byte 102,15,56,220,209 ++ pxor %xmm0,%xmm7 ++ pxor %xmm0,%xmm8 ++.byte 102,15,56,220,217 ++ pxor %xmm0,%xmm9 ++ movups (%rcx,%rax,1),%xmm0 ++ addq $16,%rax ++ jmp L$enc_loop8_inner ++.p2align 4 ++L$enc_loop8: ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++L$enc_loop8_inner: ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++.byte 102,68,15,56,220,193 ++.byte 102,68,15,56,220,201 ++L$enc_loop8_enter: ++ movups (%rcx,%rax,1),%xmm1 ++ addq $32,%rax ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++.byte 102,15,56,220,224 ++.byte 102,15,56,220,232 ++.byte 102,15,56,220,240 ++.byte 102,15,56,220,248 ++.byte 102,68,15,56,220,192 ++.byte 102,68,15,56,220,200 ++ movups -16(%rcx,%rax,1),%xmm0 ++ jnz L$enc_loop8 ++ ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++.byte 102,68,15,56,220,193 ++.byte 102,68,15,56,220,201 ++.byte 102,15,56,221,208 ++.byte 102,15,56,221,216 ++.byte 102,15,56,221,224 ++.byte 102,15,56,221,232 ++.byte 102,15,56,221,240 ++.byte 102,15,56,221,248 ++.byte 102,68,15,56,221,192 ++.byte 102,68,15,56,221,200 ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++_aesni_decrypt8: ++ ++ movups (%rcx),%xmm0 ++ shll $4,%eax ++ movups 16(%rcx),%xmm1 ++ xorps %xmm0,%xmm2 ++ xorps %xmm0,%xmm3 ++ pxor %xmm0,%xmm4 ++ pxor %xmm0,%xmm5 ++ pxor %xmm0,%xmm6 ++ leaq 32(%rcx,%rax,1),%rcx ++ negq %rax ++.byte 102,15,56,222,209 ++ pxor %xmm0,%xmm7 ++ pxor %xmm0,%xmm8 ++.byte 102,15,56,222,217 ++ pxor %xmm0,%xmm9 ++ movups (%rcx,%rax,1),%xmm0 ++ addq $16,%rax ++ jmp L$dec_loop8_inner ++.p2align 4 ++L$dec_loop8: ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++L$dec_loop8_inner: ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++.byte 102,68,15,56,222,193 ++.byte 102,68,15,56,222,201 ++L$dec_loop8_enter: ++ movups (%rcx,%rax,1),%xmm1 ++ addq $32,%rax ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++.byte 102,15,56,222,232 ++.byte 102,15,56,222,240 ++.byte 102,15,56,222,248 ++.byte 102,68,15,56,222,192 ++.byte 102,68,15,56,222,200 ++ movups -16(%rcx,%rax,1),%xmm0 ++ jnz L$dec_loop8 ++ ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++.byte 102,68,15,56,222,193 ++.byte 102,68,15,56,222,201 ++.byte 102,15,56,223,208 ++.byte 102,15,56,223,216 ++.byte 102,15,56,223,224 ++.byte 102,15,56,223,232 ++.byte 102,15,56,223,240 ++.byte 102,15,56,223,248 ++.byte 102,68,15,56,223,192 ++.byte 102,68,15,56,223,200 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes_hw_ecb_encrypt ++.private_extern _aes_hw_ecb_encrypt ++ ++.p2align 4 ++_aes_hw_ecb_encrypt: ++ ++ andq $-16,%rdx ++ jz L$ecb_ret ++ ++ movl 240(%rcx),%eax ++ movups (%rcx),%xmm0 ++ movq %rcx,%r11 ++ movl %eax,%r10d ++ testl %r8d,%r8d ++ jz L$ecb_decrypt ++ ++ cmpq $0x80,%rdx ++ jb L$ecb_enc_tail ++ ++ movdqu (%rdi),%xmm2 ++ movdqu 16(%rdi),%xmm3 ++ movdqu 32(%rdi),%xmm4 ++ movdqu 48(%rdi),%xmm5 ++ movdqu 64(%rdi),%xmm6 ++ movdqu 80(%rdi),%xmm7 ++ movdqu 96(%rdi),%xmm8 ++ movdqu 112(%rdi),%xmm9 ++ leaq 128(%rdi),%rdi ++ subq $0x80,%rdx ++ jmp L$ecb_enc_loop8_enter ++.p2align 4 ++L$ecb_enc_loop8: ++ movups %xmm2,(%rsi) ++ movq %r11,%rcx ++ movdqu (%rdi),%xmm2 ++ movl %r10d,%eax ++ movups %xmm3,16(%rsi) ++ movdqu 16(%rdi),%xmm3 ++ movups %xmm4,32(%rsi) ++ movdqu 32(%rdi),%xmm4 ++ movups %xmm5,48(%rsi) ++ movdqu 48(%rdi),%xmm5 ++ movups %xmm6,64(%rsi) ++ movdqu 64(%rdi),%xmm6 ++ movups %xmm7,80(%rsi) ++ movdqu 80(%rdi),%xmm7 ++ movups %xmm8,96(%rsi) ++ movdqu 96(%rdi),%xmm8 ++ movups %xmm9,112(%rsi) ++ leaq 128(%rsi),%rsi ++ movdqu 112(%rdi),%xmm9 ++ leaq 128(%rdi),%rdi ++L$ecb_enc_loop8_enter: ++ ++ call _aesni_encrypt8 ++ ++ subq $0x80,%rdx ++ jnc L$ecb_enc_loop8 ++ ++ movups %xmm2,(%rsi) ++ movq %r11,%rcx ++ movups %xmm3,16(%rsi) ++ movl %r10d,%eax ++ movups %xmm4,32(%rsi) ++ movups %xmm5,48(%rsi) ++ movups %xmm6,64(%rsi) ++ movups %xmm7,80(%rsi) ++ movups %xmm8,96(%rsi) ++ movups %xmm9,112(%rsi) ++ leaq 128(%rsi),%rsi ++ addq $0x80,%rdx ++ jz L$ecb_ret ++ ++L$ecb_enc_tail: ++ movups (%rdi),%xmm2 ++ cmpq $0x20,%rdx ++ jb L$ecb_enc_one ++ movups 16(%rdi),%xmm3 ++ je L$ecb_enc_two ++ movups 32(%rdi),%xmm4 ++ cmpq $0x40,%rdx ++ jb L$ecb_enc_three ++ movups 48(%rdi),%xmm5 ++ je L$ecb_enc_four ++ movups 64(%rdi),%xmm6 ++ cmpq $0x60,%rdx ++ jb L$ecb_enc_five ++ movups 80(%rdi),%xmm7 ++ je L$ecb_enc_six ++ movdqu 96(%rdi),%xmm8 ++ xorps %xmm9,%xmm9 ++ call _aesni_encrypt8 ++ movups %xmm2,(%rsi) ++ movups %xmm3,16(%rsi) ++ movups %xmm4,32(%rsi) ++ movups %xmm5,48(%rsi) ++ movups %xmm6,64(%rsi) ++ movups %xmm7,80(%rsi) ++ movups %xmm8,96(%rsi) ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_enc_one: ++ movups (%rcx),%xmm0 ++ movups 16(%rcx),%xmm1 ++ leaq 32(%rcx),%rcx ++ xorps %xmm0,%xmm2 ++L$oop_enc1_3: ++.byte 102,15,56,220,209 ++ decl %eax ++ movups (%rcx),%xmm1 ++ leaq 16(%rcx),%rcx ++ jnz L$oop_enc1_3 ++.byte 102,15,56,221,209 ++ movups %xmm2,(%rsi) ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_enc_two: ++ call _aesni_encrypt2 ++ movups %xmm2,(%rsi) ++ movups %xmm3,16(%rsi) ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_enc_three: ++ call _aesni_encrypt3 ++ movups %xmm2,(%rsi) ++ movups %xmm3,16(%rsi) ++ movups %xmm4,32(%rsi) ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_enc_four: ++ call _aesni_encrypt4 ++ movups %xmm2,(%rsi) ++ movups %xmm3,16(%rsi) ++ movups %xmm4,32(%rsi) ++ movups %xmm5,48(%rsi) ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_enc_five: ++ xorps %xmm7,%xmm7 ++ call _aesni_encrypt6 ++ movups %xmm2,(%rsi) ++ movups %xmm3,16(%rsi) ++ movups %xmm4,32(%rsi) ++ movups %xmm5,48(%rsi) ++ movups %xmm6,64(%rsi) ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_enc_six: ++ call _aesni_encrypt6 ++ movups %xmm2,(%rsi) ++ movups %xmm3,16(%rsi) ++ movups %xmm4,32(%rsi) ++ movups %xmm5,48(%rsi) ++ movups %xmm6,64(%rsi) ++ movups %xmm7,80(%rsi) ++ jmp L$ecb_ret ++ ++.p2align 4 ++L$ecb_decrypt: ++ cmpq $0x80,%rdx ++ jb L$ecb_dec_tail ++ ++ movdqu (%rdi),%xmm2 ++ movdqu 16(%rdi),%xmm3 ++ movdqu 32(%rdi),%xmm4 ++ movdqu 48(%rdi),%xmm5 ++ movdqu 64(%rdi),%xmm6 ++ movdqu 80(%rdi),%xmm7 ++ movdqu 96(%rdi),%xmm8 ++ movdqu 112(%rdi),%xmm9 ++ leaq 128(%rdi),%rdi ++ subq $0x80,%rdx ++ jmp L$ecb_dec_loop8_enter ++.p2align 4 ++L$ecb_dec_loop8: ++ movups %xmm2,(%rsi) ++ movq %r11,%rcx ++ movdqu (%rdi),%xmm2 ++ movl %r10d,%eax ++ movups %xmm3,16(%rsi) ++ movdqu 16(%rdi),%xmm3 ++ movups %xmm4,32(%rsi) ++ movdqu 32(%rdi),%xmm4 ++ movups %xmm5,48(%rsi) ++ movdqu 48(%rdi),%xmm5 ++ movups %xmm6,64(%rsi) ++ movdqu 64(%rdi),%xmm6 ++ movups %xmm7,80(%rsi) ++ movdqu 80(%rdi),%xmm7 ++ movups %xmm8,96(%rsi) ++ movdqu 96(%rdi),%xmm8 ++ movups %xmm9,112(%rsi) ++ leaq 128(%rsi),%rsi ++ movdqu 112(%rdi),%xmm9 ++ leaq 128(%rdi),%rdi ++L$ecb_dec_loop8_enter: ++ ++ call _aesni_decrypt8 ++ ++ movups (%r11),%xmm0 ++ subq $0x80,%rdx ++ jnc L$ecb_dec_loop8 ++ ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ movq %r11,%rcx ++ movups %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ movl %r10d,%eax ++ movups %xmm4,32(%rsi) ++ pxor %xmm4,%xmm4 ++ movups %xmm5,48(%rsi) ++ pxor %xmm5,%xmm5 ++ movups %xmm6,64(%rsi) ++ pxor %xmm6,%xmm6 ++ movups %xmm7,80(%rsi) ++ pxor %xmm7,%xmm7 ++ movups %xmm8,96(%rsi) ++ pxor %xmm8,%xmm8 ++ movups %xmm9,112(%rsi) ++ pxor %xmm9,%xmm9 ++ leaq 128(%rsi),%rsi ++ addq $0x80,%rdx ++ jz L$ecb_ret ++ ++L$ecb_dec_tail: ++ movups (%rdi),%xmm2 ++ cmpq $0x20,%rdx ++ jb L$ecb_dec_one ++ movups 16(%rdi),%xmm3 ++ je L$ecb_dec_two ++ movups 32(%rdi),%xmm4 ++ cmpq $0x40,%rdx ++ jb L$ecb_dec_three ++ movups 48(%rdi),%xmm5 ++ je L$ecb_dec_four ++ movups 64(%rdi),%xmm6 ++ cmpq $0x60,%rdx ++ jb L$ecb_dec_five ++ movups 80(%rdi),%xmm7 ++ je L$ecb_dec_six ++ movups 96(%rdi),%xmm8 ++ movups (%rcx),%xmm0 ++ xorps %xmm9,%xmm9 ++ call _aesni_decrypt8 ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ movups %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ movups %xmm4,32(%rsi) ++ pxor %xmm4,%xmm4 ++ movups %xmm5,48(%rsi) ++ pxor %xmm5,%xmm5 ++ movups %xmm6,64(%rsi) ++ pxor %xmm6,%xmm6 ++ movups %xmm7,80(%rsi) ++ pxor %xmm7,%xmm7 ++ movups %xmm8,96(%rsi) ++ pxor %xmm8,%xmm8 ++ pxor %xmm9,%xmm9 ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_dec_one: ++ movups (%rcx),%xmm0 ++ movups 16(%rcx),%xmm1 ++ leaq 32(%rcx),%rcx ++ xorps %xmm0,%xmm2 ++L$oop_dec1_4: ++.byte 102,15,56,222,209 ++ decl %eax ++ movups (%rcx),%xmm1 ++ leaq 16(%rcx),%rcx ++ jnz L$oop_dec1_4 ++.byte 102,15,56,223,209 ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_dec_two: ++ call _aesni_decrypt2 ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ movups %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_dec_three: ++ call _aesni_decrypt3 ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ movups %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ movups %xmm4,32(%rsi) ++ pxor %xmm4,%xmm4 ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_dec_four: ++ call _aesni_decrypt4 ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ movups %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ movups %xmm4,32(%rsi) ++ pxor %xmm4,%xmm4 ++ movups %xmm5,48(%rsi) ++ pxor %xmm5,%xmm5 ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_dec_five: ++ xorps %xmm7,%xmm7 ++ call _aesni_decrypt6 ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ movups %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ movups %xmm4,32(%rsi) ++ pxor %xmm4,%xmm4 ++ movups %xmm5,48(%rsi) ++ pxor %xmm5,%xmm5 ++ movups %xmm6,64(%rsi) ++ pxor %xmm6,%xmm6 ++ pxor %xmm7,%xmm7 ++ jmp L$ecb_ret ++.p2align 4 ++L$ecb_dec_six: ++ call _aesni_decrypt6 ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ movups %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ movups %xmm4,32(%rsi) ++ pxor %xmm4,%xmm4 ++ movups %xmm5,48(%rsi) ++ pxor %xmm5,%xmm5 ++ movups %xmm6,64(%rsi) ++ pxor %xmm6,%xmm6 ++ movups %xmm7,80(%rsi) ++ pxor %xmm7,%xmm7 ++ ++L$ecb_ret: ++ xorps %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes_hw_ctr32_encrypt_blocks ++.private_extern _aes_hw_ctr32_encrypt_blocks ++ ++.p2align 4 ++_aes_hw_ctr32_encrypt_blocks: ++ ++#ifdef BORINGSSL_DISPATCH_TEST ++ movb $1,_BORINGSSL_function_hit(%rip) ++#endif ++ cmpq $1,%rdx ++ jne L$ctr32_bulk ++ ++ ++ ++ movups (%r8),%xmm2 ++ movups (%rdi),%xmm3 ++ movl 240(%rcx),%edx ++ movups (%rcx),%xmm0 ++ movups 16(%rcx),%xmm1 ++ leaq 32(%rcx),%rcx ++ xorps %xmm0,%xmm2 ++L$oop_enc1_5: ++.byte 102,15,56,220,209 ++ decl %edx ++ movups (%rcx),%xmm1 ++ leaq 16(%rcx),%rcx ++ jnz L$oop_enc1_5 ++.byte 102,15,56,221,209 ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ xorps %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movups %xmm2,(%rsi) ++ xorps %xmm2,%xmm2 ++ jmp L$ctr32_epilogue ++ ++.p2align 4 ++L$ctr32_bulk: ++ leaq (%rsp),%r11 ++ ++ pushq %rbp ++ ++ subq $128,%rsp ++ andq $-16,%rsp ++ ++ ++ ++ ++ movdqu (%r8),%xmm2 ++ movdqu (%rcx),%xmm0 ++ movl 12(%r8),%r8d ++ pxor %xmm0,%xmm2 ++ movl 12(%rcx),%ebp ++ movdqa %xmm2,0(%rsp) ++ bswapl %r8d ++ movdqa %xmm2,%xmm3 ++ movdqa %xmm2,%xmm4 ++ movdqa %xmm2,%xmm5 ++ movdqa %xmm2,64(%rsp) ++ movdqa %xmm2,80(%rsp) ++ movdqa %xmm2,96(%rsp) ++ movq %rdx,%r10 ++ movdqa %xmm2,112(%rsp) ++ ++ leaq 1(%r8),%rax ++ leaq 2(%r8),%rdx ++ bswapl %eax ++ bswapl %edx ++ xorl %ebp,%eax ++ xorl %ebp,%edx ++.byte 102,15,58,34,216,3 ++ leaq 3(%r8),%rax ++ movdqa %xmm3,16(%rsp) ++.byte 102,15,58,34,226,3 ++ bswapl %eax ++ movq %r10,%rdx ++ leaq 4(%r8),%r10 ++ movdqa %xmm4,32(%rsp) ++ xorl %ebp,%eax ++ bswapl %r10d ++.byte 102,15,58,34,232,3 ++ xorl %ebp,%r10d ++ movdqa %xmm5,48(%rsp) ++ leaq 5(%r8),%r9 ++ movl %r10d,64+12(%rsp) ++ bswapl %r9d ++ leaq 6(%r8),%r10 ++ movl 240(%rcx),%eax ++ xorl %ebp,%r9d ++ bswapl %r10d ++ movl %r9d,80+12(%rsp) ++ xorl %ebp,%r10d ++ leaq 7(%r8),%r9 ++ movl %r10d,96+12(%rsp) ++ bswapl %r9d ++ leaq _OPENSSL_ia32cap_P(%rip),%r10 ++ movl 4(%r10),%r10d ++ xorl %ebp,%r9d ++ andl $71303168,%r10d ++ movl %r9d,112+12(%rsp) ++ ++ movups 16(%rcx),%xmm1 ++ ++ movdqa 64(%rsp),%xmm6 ++ movdqa 80(%rsp),%xmm7 ++ ++ cmpq $8,%rdx ++ jb L$ctr32_tail ++ ++ subq $6,%rdx ++ cmpl $4194304,%r10d ++ je L$ctr32_6x ++ ++ leaq 128(%rcx),%rcx ++ subq $2,%rdx ++ jmp L$ctr32_loop8 ++ ++.p2align 4 ++L$ctr32_6x: ++ shll $4,%eax ++ movl $48,%r10d ++ bswapl %ebp ++ leaq 32(%rcx,%rax,1),%rcx ++ subq %rax,%r10 ++ jmp L$ctr32_loop6 ++ ++.p2align 4 ++L$ctr32_loop6: ++ addl $6,%r8d ++ movups -48(%rcx,%r10,1),%xmm0 ++.byte 102,15,56,220,209 ++ movl %r8d,%eax ++ xorl %ebp,%eax ++.byte 102,15,56,220,217 ++.byte 0x0f,0x38,0xf1,0x44,0x24,12 ++ leal 1(%r8),%eax ++.byte 102,15,56,220,225 ++ xorl %ebp,%eax ++.byte 0x0f,0x38,0xf1,0x44,0x24,28 ++.byte 102,15,56,220,233 ++ leal 2(%r8),%eax ++ xorl %ebp,%eax ++.byte 102,15,56,220,241 ++.byte 0x0f,0x38,0xf1,0x44,0x24,44 ++ leal 3(%r8),%eax ++.byte 102,15,56,220,249 ++ movups -32(%rcx,%r10,1),%xmm1 ++ xorl %ebp,%eax ++ ++.byte 102,15,56,220,208 ++.byte 0x0f,0x38,0xf1,0x44,0x24,60 ++ leal 4(%r8),%eax ++.byte 102,15,56,220,216 ++ xorl %ebp,%eax ++.byte 0x0f,0x38,0xf1,0x44,0x24,76 ++.byte 102,15,56,220,224 ++ leal 5(%r8),%eax ++ xorl %ebp,%eax ++.byte 102,15,56,220,232 ++.byte 0x0f,0x38,0xf1,0x44,0x24,92 ++ movq %r10,%rax ++.byte 102,15,56,220,240 ++.byte 102,15,56,220,248 ++ movups -16(%rcx,%r10,1),%xmm0 ++ ++ call L$enc_loop6 ++ ++ movdqu (%rdi),%xmm8 ++ movdqu 16(%rdi),%xmm9 ++ movdqu 32(%rdi),%xmm10 ++ movdqu 48(%rdi),%xmm11 ++ movdqu 64(%rdi),%xmm12 ++ movdqu 80(%rdi),%xmm13 ++ leaq 96(%rdi),%rdi ++ movups -64(%rcx,%r10,1),%xmm1 ++ pxor %xmm2,%xmm8 ++ movaps 0(%rsp),%xmm2 ++ pxor %xmm3,%xmm9 ++ movaps 16(%rsp),%xmm3 ++ pxor %xmm4,%xmm10 ++ movaps 32(%rsp),%xmm4 ++ pxor %xmm5,%xmm11 ++ movaps 48(%rsp),%xmm5 ++ pxor %xmm6,%xmm12 ++ movaps 64(%rsp),%xmm6 ++ pxor %xmm7,%xmm13 ++ movaps 80(%rsp),%xmm7 ++ movdqu %xmm8,(%rsi) ++ movdqu %xmm9,16(%rsi) ++ movdqu %xmm10,32(%rsi) ++ movdqu %xmm11,48(%rsi) ++ movdqu %xmm12,64(%rsi) ++ movdqu %xmm13,80(%rsi) ++ leaq 96(%rsi),%rsi ++ ++ subq $6,%rdx ++ jnc L$ctr32_loop6 ++ ++ addq $6,%rdx ++ jz L$ctr32_done ++ ++ leal -48(%r10),%eax ++ leaq -80(%rcx,%r10,1),%rcx ++ negl %eax ++ shrl $4,%eax ++ jmp L$ctr32_tail ++ ++.p2align 5 ++L$ctr32_loop8: ++ addl $8,%r8d ++ movdqa 96(%rsp),%xmm8 ++.byte 102,15,56,220,209 ++ movl %r8d,%r9d ++ movdqa 112(%rsp),%xmm9 ++.byte 102,15,56,220,217 ++ bswapl %r9d ++ movups 32-128(%rcx),%xmm0 ++.byte 102,15,56,220,225 ++ xorl %ebp,%r9d ++ nop ++.byte 102,15,56,220,233 ++ movl %r9d,0+12(%rsp) ++ leaq 1(%r8),%r9 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++.byte 102,68,15,56,220,193 ++.byte 102,68,15,56,220,201 ++ movups 48-128(%rcx),%xmm1 ++ bswapl %r9d ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++ xorl %ebp,%r9d ++.byte 0x66,0x90 ++.byte 102,15,56,220,224 ++.byte 102,15,56,220,232 ++ movl %r9d,16+12(%rsp) ++ leaq 2(%r8),%r9 ++.byte 102,15,56,220,240 ++.byte 102,15,56,220,248 ++.byte 102,68,15,56,220,192 ++.byte 102,68,15,56,220,200 ++ movups 64-128(%rcx),%xmm0 ++ bswapl %r9d ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++ xorl %ebp,%r9d ++.byte 0x66,0x90 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++ movl %r9d,32+12(%rsp) ++ leaq 3(%r8),%r9 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++.byte 102,68,15,56,220,193 ++.byte 102,68,15,56,220,201 ++ movups 80-128(%rcx),%xmm1 ++ bswapl %r9d ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++ xorl %ebp,%r9d ++.byte 0x66,0x90 ++.byte 102,15,56,220,224 ++.byte 102,15,56,220,232 ++ movl %r9d,48+12(%rsp) ++ leaq 4(%r8),%r9 ++.byte 102,15,56,220,240 ++.byte 102,15,56,220,248 ++.byte 102,68,15,56,220,192 ++.byte 102,68,15,56,220,200 ++ movups 96-128(%rcx),%xmm0 ++ bswapl %r9d ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++ xorl %ebp,%r9d ++.byte 0x66,0x90 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++ movl %r9d,64+12(%rsp) ++ leaq 5(%r8),%r9 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++.byte 102,68,15,56,220,193 ++.byte 102,68,15,56,220,201 ++ movups 112-128(%rcx),%xmm1 ++ bswapl %r9d ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++ xorl %ebp,%r9d ++.byte 0x66,0x90 ++.byte 102,15,56,220,224 ++.byte 102,15,56,220,232 ++ movl %r9d,80+12(%rsp) ++ leaq 6(%r8),%r9 ++.byte 102,15,56,220,240 ++.byte 102,15,56,220,248 ++.byte 102,68,15,56,220,192 ++.byte 102,68,15,56,220,200 ++ movups 128-128(%rcx),%xmm0 ++ bswapl %r9d ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++ xorl %ebp,%r9d ++.byte 0x66,0x90 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++ movl %r9d,96+12(%rsp) ++ leaq 7(%r8),%r9 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++.byte 102,68,15,56,220,193 ++.byte 102,68,15,56,220,201 ++ movups 144-128(%rcx),%xmm1 ++ bswapl %r9d ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++.byte 102,15,56,220,224 ++ xorl %ebp,%r9d ++ movdqu 0(%rdi),%xmm10 ++.byte 102,15,56,220,232 ++ movl %r9d,112+12(%rsp) ++ cmpl $11,%eax ++.byte 102,15,56,220,240 ++.byte 102,15,56,220,248 ++.byte 102,68,15,56,220,192 ++.byte 102,68,15,56,220,200 ++ movups 160-128(%rcx),%xmm0 ++ ++ jb L$ctr32_enc_done ++ ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++.byte 102,68,15,56,220,193 ++.byte 102,68,15,56,220,201 ++ movups 176-128(%rcx),%xmm1 ++ ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++.byte 102,15,56,220,224 ++.byte 102,15,56,220,232 ++.byte 102,15,56,220,240 ++.byte 102,15,56,220,248 ++.byte 102,68,15,56,220,192 ++.byte 102,68,15,56,220,200 ++ movups 192-128(%rcx),%xmm0 ++ je L$ctr32_enc_done ++ ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++.byte 102,68,15,56,220,193 ++.byte 102,68,15,56,220,201 ++ movups 208-128(%rcx),%xmm1 ++ ++.byte 102,15,56,220,208 ++.byte 102,15,56,220,216 ++.byte 102,15,56,220,224 ++.byte 102,15,56,220,232 ++.byte 102,15,56,220,240 ++.byte 102,15,56,220,248 ++.byte 102,68,15,56,220,192 ++.byte 102,68,15,56,220,200 ++ movups 224-128(%rcx),%xmm0 ++ jmp L$ctr32_enc_done ++ ++.p2align 4 ++L$ctr32_enc_done: ++ movdqu 16(%rdi),%xmm11 ++ pxor %xmm0,%xmm10 ++ movdqu 32(%rdi),%xmm12 ++ pxor %xmm0,%xmm11 ++ movdqu 48(%rdi),%xmm13 ++ pxor %xmm0,%xmm12 ++ movdqu 64(%rdi),%xmm14 ++ pxor %xmm0,%xmm13 ++ movdqu 80(%rdi),%xmm15 ++ pxor %xmm0,%xmm14 ++ pxor %xmm0,%xmm15 ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++.byte 102,15,56,220,249 ++.byte 102,68,15,56,220,193 ++.byte 102,68,15,56,220,201 ++ movdqu 96(%rdi),%xmm1 ++ leaq 128(%rdi),%rdi ++ ++.byte 102,65,15,56,221,210 ++ pxor %xmm0,%xmm1 ++ movdqu 112-128(%rdi),%xmm10 ++.byte 102,65,15,56,221,219 ++ pxor %xmm0,%xmm10 ++ movdqa 0(%rsp),%xmm11 ++.byte 102,65,15,56,221,228 ++.byte 102,65,15,56,221,237 ++ movdqa 16(%rsp),%xmm12 ++ movdqa 32(%rsp),%xmm13 ++.byte 102,65,15,56,221,246 ++.byte 102,65,15,56,221,255 ++ movdqa 48(%rsp),%xmm14 ++ movdqa 64(%rsp),%xmm15 ++.byte 102,68,15,56,221,193 ++ movdqa 80(%rsp),%xmm0 ++ movups 16-128(%rcx),%xmm1 ++.byte 102,69,15,56,221,202 ++ ++ movups %xmm2,(%rsi) ++ movdqa %xmm11,%xmm2 ++ movups %xmm3,16(%rsi) ++ movdqa %xmm12,%xmm3 ++ movups %xmm4,32(%rsi) ++ movdqa %xmm13,%xmm4 ++ movups %xmm5,48(%rsi) ++ movdqa %xmm14,%xmm5 ++ movups %xmm6,64(%rsi) ++ movdqa %xmm15,%xmm6 ++ movups %xmm7,80(%rsi) ++ movdqa %xmm0,%xmm7 ++ movups %xmm8,96(%rsi) ++ movups %xmm9,112(%rsi) ++ leaq 128(%rsi),%rsi ++ ++ subq $8,%rdx ++ jnc L$ctr32_loop8 ++ ++ addq $8,%rdx ++ jz L$ctr32_done ++ leaq -128(%rcx),%rcx ++ ++L$ctr32_tail: ++ ++ ++ leaq 16(%rcx),%rcx ++ cmpq $4,%rdx ++ jb L$ctr32_loop3 ++ je L$ctr32_loop4 ++ ++ ++ shll $4,%eax ++ movdqa 96(%rsp),%xmm8 ++ pxor %xmm9,%xmm9 ++ ++ movups 16(%rcx),%xmm0 ++.byte 102,15,56,220,209 ++.byte 102,15,56,220,217 ++ leaq 32-16(%rcx,%rax,1),%rcx ++ negq %rax ++.byte 102,15,56,220,225 ++ addq $16,%rax ++ movups (%rdi),%xmm10 ++.byte 102,15,56,220,233 ++.byte 102,15,56,220,241 ++ movups 16(%rdi),%xmm11 ++ movups 32(%rdi),%xmm12 ++.byte 102,15,56,220,249 ++.byte 102,68,15,56,220,193 ++ ++ call L$enc_loop8_enter ++ ++ movdqu 48(%rdi),%xmm13 ++ pxor %xmm10,%xmm2 ++ movdqu 64(%rdi),%xmm10 ++ pxor %xmm11,%xmm3 ++ movdqu %xmm2,(%rsi) ++ pxor %xmm12,%xmm4 ++ movdqu %xmm3,16(%rsi) ++ pxor %xmm13,%xmm5 ++ movdqu %xmm4,32(%rsi) ++ pxor %xmm10,%xmm6 ++ movdqu %xmm5,48(%rsi) ++ movdqu %xmm6,64(%rsi) ++ cmpq $6,%rdx ++ jb L$ctr32_done ++ ++ movups 80(%rdi),%xmm11 ++ xorps %xmm11,%xmm7 ++ movups %xmm7,80(%rsi) ++ je L$ctr32_done ++ ++ movups 96(%rdi),%xmm12 ++ xorps %xmm12,%xmm8 ++ movups %xmm8,96(%rsi) ++ jmp L$ctr32_done ++ ++.p2align 5 ++L$ctr32_loop4: ++.byte 102,15,56,220,209 ++ leaq 16(%rcx),%rcx ++ decl %eax ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++.byte 102,15,56,220,233 ++ movups (%rcx),%xmm1 ++ jnz L$ctr32_loop4 ++.byte 102,15,56,221,209 ++.byte 102,15,56,221,217 ++ movups (%rdi),%xmm10 ++ movups 16(%rdi),%xmm11 ++.byte 102,15,56,221,225 ++.byte 102,15,56,221,233 ++ movups 32(%rdi),%xmm12 ++ movups 48(%rdi),%xmm13 ++ ++ xorps %xmm10,%xmm2 ++ movups %xmm2,(%rsi) ++ xorps %xmm11,%xmm3 ++ movups %xmm3,16(%rsi) ++ pxor %xmm12,%xmm4 ++ movdqu %xmm4,32(%rsi) ++ pxor %xmm13,%xmm5 ++ movdqu %xmm5,48(%rsi) ++ jmp L$ctr32_done ++ ++.p2align 5 ++L$ctr32_loop3: ++.byte 102,15,56,220,209 ++ leaq 16(%rcx),%rcx ++ decl %eax ++.byte 102,15,56,220,217 ++.byte 102,15,56,220,225 ++ movups (%rcx),%xmm1 ++ jnz L$ctr32_loop3 ++.byte 102,15,56,221,209 ++.byte 102,15,56,221,217 ++.byte 102,15,56,221,225 ++ ++ movups (%rdi),%xmm10 ++ xorps %xmm10,%xmm2 ++ movups %xmm2,(%rsi) ++ cmpq $2,%rdx ++ jb L$ctr32_done ++ ++ movups 16(%rdi),%xmm11 ++ xorps %xmm11,%xmm3 ++ movups %xmm3,16(%rsi) ++ je L$ctr32_done ++ ++ movups 32(%rdi),%xmm12 ++ xorps %xmm12,%xmm4 ++ movups %xmm4,32(%rsi) ++ ++L$ctr32_done: ++ xorps %xmm0,%xmm0 ++ xorl %ebp,%ebp ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ pxor %xmm7,%xmm7 ++ movaps %xmm0,0(%rsp) ++ pxor %xmm8,%xmm8 ++ movaps %xmm0,16(%rsp) ++ pxor %xmm9,%xmm9 ++ movaps %xmm0,32(%rsp) ++ pxor %xmm10,%xmm10 ++ movaps %xmm0,48(%rsp) ++ pxor %xmm11,%xmm11 ++ movaps %xmm0,64(%rsp) ++ pxor %xmm12,%xmm12 ++ movaps %xmm0,80(%rsp) ++ pxor %xmm13,%xmm13 ++ movaps %xmm0,96(%rsp) ++ pxor %xmm14,%xmm14 ++ movaps %xmm0,112(%rsp) ++ pxor %xmm15,%xmm15 ++ movq -8(%r11),%rbp ++ ++ leaq (%r11),%rsp ++ ++L$ctr32_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes_hw_cbc_encrypt ++.private_extern _aes_hw_cbc_encrypt ++ ++.p2align 4 ++_aes_hw_cbc_encrypt: ++ ++ testq %rdx,%rdx ++ jz L$cbc_ret ++ ++ movl 240(%rcx),%r10d ++ movq %rcx,%r11 ++ testl %r9d,%r9d ++ jz L$cbc_decrypt ++ ++ movups (%r8),%xmm2 ++ movl %r10d,%eax ++ cmpq $16,%rdx ++ jb L$cbc_enc_tail ++ subq $16,%rdx ++ jmp L$cbc_enc_loop ++.p2align 4 ++L$cbc_enc_loop: ++ movups (%rdi),%xmm3 ++ leaq 16(%rdi),%rdi ++ ++ movups (%rcx),%xmm0 ++ movups 16(%rcx),%xmm1 ++ xorps %xmm0,%xmm3 ++ leaq 32(%rcx),%rcx ++ xorps %xmm3,%xmm2 ++L$oop_enc1_6: ++.byte 102,15,56,220,209 ++ decl %eax ++ movups (%rcx),%xmm1 ++ leaq 16(%rcx),%rcx ++ jnz L$oop_enc1_6 ++.byte 102,15,56,221,209 ++ movl %r10d,%eax ++ movq %r11,%rcx ++ movups %xmm2,0(%rsi) ++ leaq 16(%rsi),%rsi ++ subq $16,%rdx ++ jnc L$cbc_enc_loop ++ addq $16,%rdx ++ jnz L$cbc_enc_tail ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ movups %xmm2,(%r8) ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ jmp L$cbc_ret ++ ++L$cbc_enc_tail: ++ movq %rdx,%rcx ++ xchgq %rdi,%rsi ++.long 0x9066A4F3 ++ movl $16,%ecx ++ subq %rdx,%rcx ++ xorl %eax,%eax ++.long 0x9066AAF3 ++ leaq -16(%rdi),%rdi ++ movl %r10d,%eax ++ movq %rdi,%rsi ++ movq %r11,%rcx ++ xorq %rdx,%rdx ++ jmp L$cbc_enc_loop ++ ++.p2align 4 ++L$cbc_decrypt: ++ cmpq $16,%rdx ++ jne L$cbc_decrypt_bulk ++ ++ ++ ++ movdqu (%rdi),%xmm2 ++ movdqu (%r8),%xmm3 ++ movdqa %xmm2,%xmm4 ++ movups (%rcx),%xmm0 ++ movups 16(%rcx),%xmm1 ++ leaq 32(%rcx),%rcx ++ xorps %xmm0,%xmm2 ++L$oop_dec1_7: ++.byte 102,15,56,222,209 ++ decl %r10d ++ movups (%rcx),%xmm1 ++ leaq 16(%rcx),%rcx ++ jnz L$oop_dec1_7 ++.byte 102,15,56,223,209 ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ movdqu %xmm4,(%r8) ++ xorps %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ jmp L$cbc_ret ++.p2align 4 ++L$cbc_decrypt_bulk: ++ leaq (%rsp),%r11 ++ ++ pushq %rbp ++ ++ subq $16,%rsp ++ andq $-16,%rsp ++ movq %rcx,%rbp ++ movups (%r8),%xmm10 ++ movl %r10d,%eax ++ cmpq $0x50,%rdx ++ jbe L$cbc_dec_tail ++ ++ movups (%rcx),%xmm0 ++ movdqu 0(%rdi),%xmm2 ++ movdqu 16(%rdi),%xmm3 ++ movdqa %xmm2,%xmm11 ++ movdqu 32(%rdi),%xmm4 ++ movdqa %xmm3,%xmm12 ++ movdqu 48(%rdi),%xmm5 ++ movdqa %xmm4,%xmm13 ++ movdqu 64(%rdi),%xmm6 ++ movdqa %xmm5,%xmm14 ++ movdqu 80(%rdi),%xmm7 ++ movdqa %xmm6,%xmm15 ++ leaq _OPENSSL_ia32cap_P(%rip),%r9 ++ movl 4(%r9),%r9d ++ cmpq $0x70,%rdx ++ jbe L$cbc_dec_six_or_seven ++ ++ andl $71303168,%r9d ++ subq $0x50,%rdx ++ cmpl $4194304,%r9d ++ je L$cbc_dec_loop6_enter ++ subq $0x20,%rdx ++ leaq 112(%rcx),%rcx ++ jmp L$cbc_dec_loop8_enter ++.p2align 4 ++L$cbc_dec_loop8: ++ movups %xmm9,(%rsi) ++ leaq 16(%rsi),%rsi ++L$cbc_dec_loop8_enter: ++ movdqu 96(%rdi),%xmm8 ++ pxor %xmm0,%xmm2 ++ movdqu 112(%rdi),%xmm9 ++ pxor %xmm0,%xmm3 ++ movups 16-112(%rcx),%xmm1 ++ pxor %xmm0,%xmm4 ++ movq $-1,%rbp ++ cmpq $0x70,%rdx ++ pxor %xmm0,%xmm5 ++ pxor %xmm0,%xmm6 ++ pxor %xmm0,%xmm7 ++ pxor %xmm0,%xmm8 ++ ++.byte 102,15,56,222,209 ++ pxor %xmm0,%xmm9 ++ movups 32-112(%rcx),%xmm0 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++.byte 102,68,15,56,222,193 ++ adcq $0,%rbp ++ andq $128,%rbp ++.byte 102,68,15,56,222,201 ++ addq %rdi,%rbp ++ movups 48-112(%rcx),%xmm1 ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++.byte 102,15,56,222,232 ++.byte 102,15,56,222,240 ++.byte 102,15,56,222,248 ++.byte 102,68,15,56,222,192 ++.byte 102,68,15,56,222,200 ++ movups 64-112(%rcx),%xmm0 ++ nop ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++.byte 102,68,15,56,222,193 ++.byte 102,68,15,56,222,201 ++ movups 80-112(%rcx),%xmm1 ++ nop ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++.byte 102,15,56,222,232 ++.byte 102,15,56,222,240 ++.byte 102,15,56,222,248 ++.byte 102,68,15,56,222,192 ++.byte 102,68,15,56,222,200 ++ movups 96-112(%rcx),%xmm0 ++ nop ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++.byte 102,68,15,56,222,193 ++.byte 102,68,15,56,222,201 ++ movups 112-112(%rcx),%xmm1 ++ nop ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++.byte 102,15,56,222,232 ++.byte 102,15,56,222,240 ++.byte 102,15,56,222,248 ++.byte 102,68,15,56,222,192 ++.byte 102,68,15,56,222,200 ++ movups 128-112(%rcx),%xmm0 ++ nop ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++.byte 102,68,15,56,222,193 ++.byte 102,68,15,56,222,201 ++ movups 144-112(%rcx),%xmm1 ++ cmpl $11,%eax ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++.byte 102,15,56,222,232 ++.byte 102,15,56,222,240 ++.byte 102,15,56,222,248 ++.byte 102,68,15,56,222,192 ++.byte 102,68,15,56,222,200 ++ movups 160-112(%rcx),%xmm0 ++ jb L$cbc_dec_done ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++.byte 102,68,15,56,222,193 ++.byte 102,68,15,56,222,201 ++ movups 176-112(%rcx),%xmm1 ++ nop ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++.byte 102,15,56,222,232 ++.byte 102,15,56,222,240 ++.byte 102,15,56,222,248 ++.byte 102,68,15,56,222,192 ++.byte 102,68,15,56,222,200 ++ movups 192-112(%rcx),%xmm0 ++ je L$cbc_dec_done ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++.byte 102,68,15,56,222,193 ++.byte 102,68,15,56,222,201 ++ movups 208-112(%rcx),%xmm1 ++ nop ++.byte 102,15,56,222,208 ++.byte 102,15,56,222,216 ++.byte 102,15,56,222,224 ++.byte 102,15,56,222,232 ++.byte 102,15,56,222,240 ++.byte 102,15,56,222,248 ++.byte 102,68,15,56,222,192 ++.byte 102,68,15,56,222,200 ++ movups 224-112(%rcx),%xmm0 ++ jmp L$cbc_dec_done ++.p2align 4 ++L$cbc_dec_done: ++.byte 102,15,56,222,209 ++.byte 102,15,56,222,217 ++ pxor %xmm0,%xmm10 ++ pxor %xmm0,%xmm11 ++.byte 102,15,56,222,225 ++.byte 102,15,56,222,233 ++ pxor %xmm0,%xmm12 ++ pxor %xmm0,%xmm13 ++.byte 102,15,56,222,241 ++.byte 102,15,56,222,249 ++ pxor %xmm0,%xmm14 ++ pxor %xmm0,%xmm15 ++.byte 102,68,15,56,222,193 ++.byte 102,68,15,56,222,201 ++ movdqu 80(%rdi),%xmm1 ++ ++.byte 102,65,15,56,223,210 ++ movdqu 96(%rdi),%xmm10 ++ pxor %xmm0,%xmm1 ++.byte 102,65,15,56,223,219 ++ pxor %xmm0,%xmm10 ++ movdqu 112(%rdi),%xmm0 ++.byte 102,65,15,56,223,228 ++ leaq 128(%rdi),%rdi ++ movdqu 0(%rbp),%xmm11 ++.byte 102,65,15,56,223,237 ++.byte 102,65,15,56,223,246 ++ movdqu 16(%rbp),%xmm12 ++ movdqu 32(%rbp),%xmm13 ++.byte 102,65,15,56,223,255 ++.byte 102,68,15,56,223,193 ++ movdqu 48(%rbp),%xmm14 ++ movdqu 64(%rbp),%xmm15 ++.byte 102,69,15,56,223,202 ++ movdqa %xmm0,%xmm10 ++ movdqu 80(%rbp),%xmm1 ++ movups -112(%rcx),%xmm0 ++ ++ movups %xmm2,(%rsi) ++ movdqa %xmm11,%xmm2 ++ movups %xmm3,16(%rsi) ++ movdqa %xmm12,%xmm3 ++ movups %xmm4,32(%rsi) ++ movdqa %xmm13,%xmm4 ++ movups %xmm5,48(%rsi) ++ movdqa %xmm14,%xmm5 ++ movups %xmm6,64(%rsi) ++ movdqa %xmm15,%xmm6 ++ movups %xmm7,80(%rsi) ++ movdqa %xmm1,%xmm7 ++ movups %xmm8,96(%rsi) ++ leaq 112(%rsi),%rsi ++ ++ subq $0x80,%rdx ++ ja L$cbc_dec_loop8 ++ ++ movaps %xmm9,%xmm2 ++ leaq -112(%rcx),%rcx ++ addq $0x70,%rdx ++ jle L$cbc_dec_clear_tail_collected ++ movups %xmm9,(%rsi) ++ leaq 16(%rsi),%rsi ++ cmpq $0x50,%rdx ++ jbe L$cbc_dec_tail ++ ++ movaps %xmm11,%xmm2 ++L$cbc_dec_six_or_seven: ++ cmpq $0x60,%rdx ++ ja L$cbc_dec_seven ++ ++ movaps %xmm7,%xmm8 ++ call _aesni_decrypt6 ++ pxor %xmm10,%xmm2 ++ movaps %xmm8,%xmm10 ++ pxor %xmm11,%xmm3 ++ movdqu %xmm2,(%rsi) ++ pxor %xmm12,%xmm4 ++ movdqu %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ pxor %xmm13,%xmm5 ++ movdqu %xmm4,32(%rsi) ++ pxor %xmm4,%xmm4 ++ pxor %xmm14,%xmm6 ++ movdqu %xmm5,48(%rsi) ++ pxor %xmm5,%xmm5 ++ pxor %xmm15,%xmm7 ++ movdqu %xmm6,64(%rsi) ++ pxor %xmm6,%xmm6 ++ leaq 80(%rsi),%rsi ++ movdqa %xmm7,%xmm2 ++ pxor %xmm7,%xmm7 ++ jmp L$cbc_dec_tail_collected ++ ++.p2align 4 ++L$cbc_dec_seven: ++ movups 96(%rdi),%xmm8 ++ xorps %xmm9,%xmm9 ++ call _aesni_decrypt8 ++ movups 80(%rdi),%xmm9 ++ pxor %xmm10,%xmm2 ++ movups 96(%rdi),%xmm10 ++ pxor %xmm11,%xmm3 ++ movdqu %xmm2,(%rsi) ++ pxor %xmm12,%xmm4 ++ movdqu %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ pxor %xmm13,%xmm5 ++ movdqu %xmm4,32(%rsi) ++ pxor %xmm4,%xmm4 ++ pxor %xmm14,%xmm6 ++ movdqu %xmm5,48(%rsi) ++ pxor %xmm5,%xmm5 ++ pxor %xmm15,%xmm7 ++ movdqu %xmm6,64(%rsi) ++ pxor %xmm6,%xmm6 ++ pxor %xmm9,%xmm8 ++ movdqu %xmm7,80(%rsi) ++ pxor %xmm7,%xmm7 ++ leaq 96(%rsi),%rsi ++ movdqa %xmm8,%xmm2 ++ pxor %xmm8,%xmm8 ++ pxor %xmm9,%xmm9 ++ jmp L$cbc_dec_tail_collected ++ ++.p2align 4 ++L$cbc_dec_loop6: ++ movups %xmm7,(%rsi) ++ leaq 16(%rsi),%rsi ++ movdqu 0(%rdi),%xmm2 ++ movdqu 16(%rdi),%xmm3 ++ movdqa %xmm2,%xmm11 ++ movdqu 32(%rdi),%xmm4 ++ movdqa %xmm3,%xmm12 ++ movdqu 48(%rdi),%xmm5 ++ movdqa %xmm4,%xmm13 ++ movdqu 64(%rdi),%xmm6 ++ movdqa %xmm5,%xmm14 ++ movdqu 80(%rdi),%xmm7 ++ movdqa %xmm6,%xmm15 ++L$cbc_dec_loop6_enter: ++ leaq 96(%rdi),%rdi ++ movdqa %xmm7,%xmm8 ++ ++ call _aesni_decrypt6 ++ ++ pxor %xmm10,%xmm2 ++ movdqa %xmm8,%xmm10 ++ pxor %xmm11,%xmm3 ++ movdqu %xmm2,(%rsi) ++ pxor %xmm12,%xmm4 ++ movdqu %xmm3,16(%rsi) ++ pxor %xmm13,%xmm5 ++ movdqu %xmm4,32(%rsi) ++ pxor %xmm14,%xmm6 ++ movq %rbp,%rcx ++ movdqu %xmm5,48(%rsi) ++ pxor %xmm15,%xmm7 ++ movl %r10d,%eax ++ movdqu %xmm6,64(%rsi) ++ leaq 80(%rsi),%rsi ++ subq $0x60,%rdx ++ ja L$cbc_dec_loop6 ++ ++ movdqa %xmm7,%xmm2 ++ addq $0x50,%rdx ++ jle L$cbc_dec_clear_tail_collected ++ movups %xmm7,(%rsi) ++ leaq 16(%rsi),%rsi ++ ++L$cbc_dec_tail: ++ movups (%rdi),%xmm2 ++ subq $0x10,%rdx ++ jbe L$cbc_dec_one ++ ++ movups 16(%rdi),%xmm3 ++ movaps %xmm2,%xmm11 ++ subq $0x10,%rdx ++ jbe L$cbc_dec_two ++ ++ movups 32(%rdi),%xmm4 ++ movaps %xmm3,%xmm12 ++ subq $0x10,%rdx ++ jbe L$cbc_dec_three ++ ++ movups 48(%rdi),%xmm5 ++ movaps %xmm4,%xmm13 ++ subq $0x10,%rdx ++ jbe L$cbc_dec_four ++ ++ movups 64(%rdi),%xmm6 ++ movaps %xmm5,%xmm14 ++ movaps %xmm6,%xmm15 ++ xorps %xmm7,%xmm7 ++ call _aesni_decrypt6 ++ pxor %xmm10,%xmm2 ++ movaps %xmm15,%xmm10 ++ pxor %xmm11,%xmm3 ++ movdqu %xmm2,(%rsi) ++ pxor %xmm12,%xmm4 ++ movdqu %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ pxor %xmm13,%xmm5 ++ movdqu %xmm4,32(%rsi) ++ pxor %xmm4,%xmm4 ++ pxor %xmm14,%xmm6 ++ movdqu %xmm5,48(%rsi) ++ pxor %xmm5,%xmm5 ++ leaq 64(%rsi),%rsi ++ movdqa %xmm6,%xmm2 ++ pxor %xmm6,%xmm6 ++ pxor %xmm7,%xmm7 ++ subq $0x10,%rdx ++ jmp L$cbc_dec_tail_collected ++ ++.p2align 4 ++L$cbc_dec_one: ++ movaps %xmm2,%xmm11 ++ movups (%rcx),%xmm0 ++ movups 16(%rcx),%xmm1 ++ leaq 32(%rcx),%rcx ++ xorps %xmm0,%xmm2 ++L$oop_dec1_8: ++.byte 102,15,56,222,209 ++ decl %eax ++ movups (%rcx),%xmm1 ++ leaq 16(%rcx),%rcx ++ jnz L$oop_dec1_8 ++.byte 102,15,56,223,209 ++ xorps %xmm10,%xmm2 ++ movaps %xmm11,%xmm10 ++ jmp L$cbc_dec_tail_collected ++.p2align 4 ++L$cbc_dec_two: ++ movaps %xmm3,%xmm12 ++ call _aesni_decrypt2 ++ pxor %xmm10,%xmm2 ++ movaps %xmm12,%xmm10 ++ pxor %xmm11,%xmm3 ++ movdqu %xmm2,(%rsi) ++ movdqa %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ leaq 16(%rsi),%rsi ++ jmp L$cbc_dec_tail_collected ++.p2align 4 ++L$cbc_dec_three: ++ movaps %xmm4,%xmm13 ++ call _aesni_decrypt3 ++ pxor %xmm10,%xmm2 ++ movaps %xmm13,%xmm10 ++ pxor %xmm11,%xmm3 ++ movdqu %xmm2,(%rsi) ++ pxor %xmm12,%xmm4 ++ movdqu %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ movdqa %xmm4,%xmm2 ++ pxor %xmm4,%xmm4 ++ leaq 32(%rsi),%rsi ++ jmp L$cbc_dec_tail_collected ++.p2align 4 ++L$cbc_dec_four: ++ movaps %xmm5,%xmm14 ++ call _aesni_decrypt4 ++ pxor %xmm10,%xmm2 ++ movaps %xmm14,%xmm10 ++ pxor %xmm11,%xmm3 ++ movdqu %xmm2,(%rsi) ++ pxor %xmm12,%xmm4 ++ movdqu %xmm3,16(%rsi) ++ pxor %xmm3,%xmm3 ++ pxor %xmm13,%xmm5 ++ movdqu %xmm4,32(%rsi) ++ pxor %xmm4,%xmm4 ++ movdqa %xmm5,%xmm2 ++ pxor %xmm5,%xmm5 ++ leaq 48(%rsi),%rsi ++ jmp L$cbc_dec_tail_collected ++ ++.p2align 4 ++L$cbc_dec_clear_tail_collected: ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ pxor %xmm7,%xmm7 ++ pxor %xmm8,%xmm8 ++ pxor %xmm9,%xmm9 ++L$cbc_dec_tail_collected: ++ movups %xmm10,(%r8) ++ andq $15,%rdx ++ jnz L$cbc_dec_tail_partial ++ movups %xmm2,(%rsi) ++ pxor %xmm2,%xmm2 ++ jmp L$cbc_dec_ret ++.p2align 4 ++L$cbc_dec_tail_partial: ++ movaps %xmm2,(%rsp) ++ pxor %xmm2,%xmm2 ++ movq $16,%rcx ++ movq %rsi,%rdi ++ subq %rdx,%rcx ++ leaq (%rsp),%rsi ++.long 0x9066A4F3 ++ movdqa %xmm2,(%rsp) ++ ++L$cbc_dec_ret: ++ xorps %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ movq -8(%r11),%rbp ++ ++ leaq (%r11),%rsp ++ ++L$cbc_ret: ++ .byte 0xf3,0xc3 ++ ++ ++.globl _aes_hw_set_decrypt_key ++.private_extern _aes_hw_set_decrypt_key ++ ++.p2align 4 ++_aes_hw_set_decrypt_key: ++ ++.byte 0x48,0x83,0xEC,0x08 ++ ++ call __aesni_set_encrypt_key ++ shll $4,%esi ++ testl %eax,%eax ++ jnz L$dec_key_ret ++ leaq 16(%rdx,%rsi,1),%rdi ++ ++ movups (%rdx),%xmm0 ++ movups (%rdi),%xmm1 ++ movups %xmm0,(%rdi) ++ movups %xmm1,(%rdx) ++ leaq 16(%rdx),%rdx ++ leaq -16(%rdi),%rdi ++ ++L$dec_key_inverse: ++ movups (%rdx),%xmm0 ++ movups (%rdi),%xmm1 ++.byte 102,15,56,219,192 ++.byte 102,15,56,219,201 ++ leaq 16(%rdx),%rdx ++ leaq -16(%rdi),%rdi ++ movups %xmm0,16(%rdi) ++ movups %xmm1,-16(%rdx) ++ cmpq %rdx,%rdi ++ ja L$dec_key_inverse ++ ++ movups (%rdx),%xmm0 ++.byte 102,15,56,219,192 ++ pxor %xmm1,%xmm1 ++ movups %xmm0,(%rdi) ++ pxor %xmm0,%xmm0 ++L$dec_key_ret: ++ addq $8,%rsp ++ ++ .byte 0xf3,0xc3 ++ ++L$SEH_end_set_decrypt_key: ++ ++.globl _aes_hw_set_encrypt_key ++.private_extern _aes_hw_set_encrypt_key ++ ++.p2align 4 ++_aes_hw_set_encrypt_key: ++__aesni_set_encrypt_key: ++ ++#ifdef BORINGSSL_DISPATCH_TEST ++ movb $1,_BORINGSSL_function_hit+3(%rip) ++#endif ++.byte 0x48,0x83,0xEC,0x08 ++ ++ movq $-1,%rax ++ testq %rdi,%rdi ++ jz L$enc_key_ret ++ testq %rdx,%rdx ++ jz L$enc_key_ret ++ ++ movups (%rdi),%xmm0 ++ xorps %xmm4,%xmm4 ++ leaq _OPENSSL_ia32cap_P(%rip),%r10 ++ movl 4(%r10),%r10d ++ andl $268437504,%r10d ++ leaq 16(%rdx),%rax ++ cmpl $256,%esi ++ je L$14rounds ++ cmpl $192,%esi ++ je L$12rounds ++ cmpl $128,%esi ++ jne L$bad_keybits ++ ++L$10rounds: ++ movl $9,%esi ++ cmpl $268435456,%r10d ++ je L$10rounds_alt ++ ++ movups %xmm0,(%rdx) ++.byte 102,15,58,223,200,1 ++ call L$key_expansion_128_cold ++.byte 102,15,58,223,200,2 ++ call L$key_expansion_128 ++.byte 102,15,58,223,200,4 ++ call L$key_expansion_128 ++.byte 102,15,58,223,200,8 ++ call L$key_expansion_128 ++.byte 102,15,58,223,200,16 ++ call L$key_expansion_128 ++.byte 102,15,58,223,200,32 ++ call L$key_expansion_128 ++.byte 102,15,58,223,200,64 ++ call L$key_expansion_128 ++.byte 102,15,58,223,200,128 ++ call L$key_expansion_128 ++.byte 102,15,58,223,200,27 ++ call L$key_expansion_128 ++.byte 102,15,58,223,200,54 ++ call L$key_expansion_128 ++ movups %xmm0,(%rax) ++ movl %esi,80(%rax) ++ xorl %eax,%eax ++ jmp L$enc_key_ret ++ ++.p2align 4 ++L$10rounds_alt: ++ movdqa L$key_rotate(%rip),%xmm5 ++ movl $8,%r10d ++ movdqa L$key_rcon1(%rip),%xmm4 ++ movdqa %xmm0,%xmm2 ++ movdqu %xmm0,(%rdx) ++ jmp L$oop_key128 ++ ++.p2align 4 ++L$oop_key128: ++.byte 102,15,56,0,197 ++.byte 102,15,56,221,196 ++ pslld $1,%xmm4 ++ leaq 16(%rax),%rax ++ ++ movdqa %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm3,%xmm2 ++ ++ pxor %xmm2,%xmm0 ++ movdqu %xmm0,-16(%rax) ++ movdqa %xmm0,%xmm2 ++ ++ decl %r10d ++ jnz L$oop_key128 ++ ++ movdqa L$key_rcon1b(%rip),%xmm4 ++ ++.byte 102,15,56,0,197 ++.byte 102,15,56,221,196 ++ pslld $1,%xmm4 ++ ++ movdqa %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm3,%xmm2 ++ ++ pxor %xmm2,%xmm0 ++ movdqu %xmm0,(%rax) ++ ++ movdqa %xmm0,%xmm2 ++.byte 102,15,56,0,197 ++.byte 102,15,56,221,196 ++ ++ movdqa %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm2,%xmm3 ++ pslldq $4,%xmm2 ++ pxor %xmm3,%xmm2 ++ ++ pxor %xmm2,%xmm0 ++ movdqu %xmm0,16(%rax) ++ ++ movl %esi,96(%rax) ++ xorl %eax,%eax ++ jmp L$enc_key_ret ++ ++.p2align 4 ++L$12rounds: ++ movq 16(%rdi),%xmm2 ++ movl $11,%esi ++ cmpl $268435456,%r10d ++ je L$12rounds_alt ++ ++ movups %xmm0,(%rdx) ++.byte 102,15,58,223,202,1 ++ call L$key_expansion_192a_cold ++.byte 102,15,58,223,202,2 ++ call L$key_expansion_192b ++.byte 102,15,58,223,202,4 ++ call L$key_expansion_192a ++.byte 102,15,58,223,202,8 ++ call L$key_expansion_192b ++.byte 102,15,58,223,202,16 ++ call L$key_expansion_192a ++.byte 102,15,58,223,202,32 ++ call L$key_expansion_192b ++.byte 102,15,58,223,202,64 ++ call L$key_expansion_192a ++.byte 102,15,58,223,202,128 ++ call L$key_expansion_192b ++ movups %xmm0,(%rax) ++ movl %esi,48(%rax) ++ xorq %rax,%rax ++ jmp L$enc_key_ret ++ ++.p2align 4 ++L$12rounds_alt: ++ movdqa L$key_rotate192(%rip),%xmm5 ++ movdqa L$key_rcon1(%rip),%xmm4 ++ movl $8,%r10d ++ movdqu %xmm0,(%rdx) ++ jmp L$oop_key192 ++ ++.p2align 4 ++L$oop_key192: ++ movq %xmm2,0(%rax) ++ movdqa %xmm2,%xmm1 ++.byte 102,15,56,0,213 ++.byte 102,15,56,221,212 ++ pslld $1,%xmm4 ++ leaq 24(%rax),%rax ++ ++ movdqa %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm3,%xmm0 ++ ++ pshufd $0xff,%xmm0,%xmm3 ++ pxor %xmm1,%xmm3 ++ pslldq $4,%xmm1 ++ pxor %xmm1,%xmm3 ++ ++ pxor %xmm2,%xmm0 ++ pxor %xmm3,%xmm2 ++ movdqu %xmm0,-16(%rax) ++ ++ decl %r10d ++ jnz L$oop_key192 ++ ++ movl %esi,32(%rax) ++ xorl %eax,%eax ++ jmp L$enc_key_ret ++ ++.p2align 4 ++L$14rounds: ++ movups 16(%rdi),%xmm2 ++ movl $13,%esi ++ leaq 16(%rax),%rax ++ cmpl $268435456,%r10d ++ je L$14rounds_alt ++ ++ movups %xmm0,(%rdx) ++ movups %xmm2,16(%rdx) ++.byte 102,15,58,223,202,1 ++ call L$key_expansion_256a_cold ++.byte 102,15,58,223,200,1 ++ call L$key_expansion_256b ++.byte 102,15,58,223,202,2 ++ call L$key_expansion_256a ++.byte 102,15,58,223,200,2 ++ call L$key_expansion_256b ++.byte 102,15,58,223,202,4 ++ call L$key_expansion_256a ++.byte 102,15,58,223,200,4 ++ call L$key_expansion_256b ++.byte 102,15,58,223,202,8 ++ call L$key_expansion_256a ++.byte 102,15,58,223,200,8 ++ call L$key_expansion_256b ++.byte 102,15,58,223,202,16 ++ call L$key_expansion_256a ++.byte 102,15,58,223,200,16 ++ call L$key_expansion_256b ++.byte 102,15,58,223,202,32 ++ call L$key_expansion_256a ++.byte 102,15,58,223,200,32 ++ call L$key_expansion_256b ++.byte 102,15,58,223,202,64 ++ call L$key_expansion_256a ++ movups %xmm0,(%rax) ++ movl %esi,16(%rax) ++ xorq %rax,%rax ++ jmp L$enc_key_ret ++ ++.p2align 4 ++L$14rounds_alt: ++ movdqa L$key_rotate(%rip),%xmm5 ++ movdqa L$key_rcon1(%rip),%xmm4 ++ movl $7,%r10d ++ movdqu %xmm0,0(%rdx) ++ movdqa %xmm2,%xmm1 ++ movdqu %xmm2,16(%rdx) ++ jmp L$oop_key256 ++ ++.p2align 4 ++L$oop_key256: ++.byte 102,15,56,0,213 ++.byte 102,15,56,221,212 ++ ++ movdqa %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm0,%xmm3 ++ pslldq $4,%xmm0 ++ pxor %xmm3,%xmm0 ++ pslld $1,%xmm4 ++ ++ pxor %xmm2,%xmm0 ++ movdqu %xmm0,(%rax) ++ ++ decl %r10d ++ jz L$done_key256 ++ ++ pshufd $0xff,%xmm0,%xmm2 ++ pxor %xmm3,%xmm3 ++.byte 102,15,56,221,211 ++ ++ movdqa %xmm1,%xmm3 ++ pslldq $4,%xmm1 ++ pxor %xmm1,%xmm3 ++ pslldq $4,%xmm1 ++ pxor %xmm1,%xmm3 ++ pslldq $4,%xmm1 ++ pxor %xmm3,%xmm1 ++ ++ pxor %xmm1,%xmm2 ++ movdqu %xmm2,16(%rax) ++ leaq 32(%rax),%rax ++ movdqa %xmm2,%xmm1 ++ ++ jmp L$oop_key256 ++ ++L$done_key256: ++ movl %esi,16(%rax) ++ xorl %eax,%eax ++ jmp L$enc_key_ret ++ ++.p2align 4 ++L$bad_keybits: ++ movq $-2,%rax ++L$enc_key_ret: ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ addq $8,%rsp ++ ++ .byte 0xf3,0xc3 ++ ++L$SEH_end_set_encrypt_key: ++ ++.p2align 4 ++L$key_expansion_128: ++ movups %xmm0,(%rax) ++ leaq 16(%rax),%rax ++L$key_expansion_128_cold: ++ shufps $16,%xmm0,%xmm4 ++ xorps %xmm4,%xmm0 ++ shufps $140,%xmm0,%xmm4 ++ xorps %xmm4,%xmm0 ++ shufps $255,%xmm1,%xmm1 ++ xorps %xmm1,%xmm0 ++ .byte 0xf3,0xc3 ++ ++.p2align 4 ++L$key_expansion_192a: ++ movups %xmm0,(%rax) ++ leaq 16(%rax),%rax ++L$key_expansion_192a_cold: ++ movaps %xmm2,%xmm5 ++L$key_expansion_192b_warm: ++ shufps $16,%xmm0,%xmm4 ++ movdqa %xmm2,%xmm3 ++ xorps %xmm4,%xmm0 ++ shufps $140,%xmm0,%xmm4 ++ pslldq $4,%xmm3 ++ xorps %xmm4,%xmm0 ++ pshufd $85,%xmm1,%xmm1 ++ pxor %xmm3,%xmm2 ++ pxor %xmm1,%xmm0 ++ pshufd $255,%xmm0,%xmm3 ++ pxor %xmm3,%xmm2 ++ .byte 0xf3,0xc3 ++ ++.p2align 4 ++L$key_expansion_192b: ++ movaps %xmm0,%xmm3 ++ shufps $68,%xmm0,%xmm5 ++ movups %xmm5,(%rax) ++ shufps $78,%xmm2,%xmm3 ++ movups %xmm3,16(%rax) ++ leaq 32(%rax),%rax ++ jmp L$key_expansion_192b_warm ++ ++.p2align 4 ++L$key_expansion_256a: ++ movups %xmm2,(%rax) ++ leaq 16(%rax),%rax ++L$key_expansion_256a_cold: ++ shufps $16,%xmm0,%xmm4 ++ xorps %xmm4,%xmm0 ++ shufps $140,%xmm0,%xmm4 ++ xorps %xmm4,%xmm0 ++ shufps $255,%xmm1,%xmm1 ++ xorps %xmm1,%xmm0 ++ .byte 0xf3,0xc3 ++ ++.p2align 4 ++L$key_expansion_256b: ++ movups %xmm0,(%rax) ++ leaq 16(%rax),%rax ++ ++ shufps $16,%xmm2,%xmm4 ++ xorps %xmm4,%xmm2 ++ shufps $140,%xmm2,%xmm4 ++ xorps %xmm4,%xmm2 ++ shufps $170,%xmm1,%xmm1 ++ xorps %xmm1,%xmm2 ++ .byte 0xf3,0xc3 ++ ++ ++.p2align 6 ++L$bswap_mask: ++.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 ++L$increment32: ++.long 6,6,6,0 ++L$increment64: ++.long 1,0,0,0 ++L$xts_magic: ++.long 0x87,0,1,0 ++L$increment1: ++.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 ++L$key_rotate: ++.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d ++L$key_rotate192: ++.long 0x04070605,0x04070605,0x04070605,0x04070605 ++L$key_rcon1: ++.long 1,1,1,1 ++L$key_rcon1b: ++.long 0x1b,0x1b,0x1b,0x1b ++ ++.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.p2align 6 ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S b/apple-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S +new file mode 100644 +index 0000000..7f92fc5 +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S +@@ -0,0 +1,426 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++ ++ ++ ++ ++.globl _gcm_gmult_ssse3 ++.private_extern _gcm_gmult_ssse3 ++.p2align 4 ++_gcm_gmult_ssse3: ++ ++L$gmult_seh_begin: ++ movdqu (%rdi),%xmm0 ++ movdqa L$reverse_bytes(%rip),%xmm10 ++ movdqa L$low4_mask(%rip),%xmm2 ++ ++ ++.byte 102,65,15,56,0,194 ++ ++ ++ movdqa %xmm2,%xmm1 ++ pandn %xmm0,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm2,%xmm0 ++ ++ ++ ++ ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ movq $5,%rax ++L$oop_row_1: ++ movdqa (%rsi),%xmm4 ++ leaq 16(%rsi),%rsi ++ ++ ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ ++ ++ pxor %xmm5,%xmm2 ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ ++ ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ ++ subq $1,%rax ++ jnz L$oop_row_1 ++ ++ ++ ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movq $5,%rax ++L$oop_row_2: ++ movdqa (%rsi),%xmm4 ++ leaq 16(%rsi),%rsi ++ ++ ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ ++ ++ pxor %xmm5,%xmm2 ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ ++ ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ ++ subq $1,%rax ++ jnz L$oop_row_2 ++ ++ ++ ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movq $6,%rax ++L$oop_row_3: ++ movdqa (%rsi),%xmm4 ++ leaq 16(%rsi),%rsi ++ ++ ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ ++ ++ pxor %xmm5,%xmm2 ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ ++ ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ ++ subq $1,%rax ++ jnz L$oop_row_3 ++ ++ ++ ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ ++.byte 102,65,15,56,0,210 ++ movdqu %xmm2,(%rdi) ++ ++ ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ .byte 0xf3,0xc3 ++L$gmult_seh_end: ++ ++ ++ ++ ++ ++ ++ ++ ++.globl _gcm_ghash_ssse3 ++.private_extern _gcm_ghash_ssse3 ++.p2align 4 ++_gcm_ghash_ssse3: ++L$ghash_seh_begin: ++ ++ movdqu (%rdi),%xmm0 ++ movdqa L$reverse_bytes(%rip),%xmm10 ++ movdqa L$low4_mask(%rip),%xmm11 ++ ++ ++ andq $-16,%rcx ++ ++ ++ ++.byte 102,65,15,56,0,194 ++ ++ ++ pxor %xmm3,%xmm3 ++L$oop_ghash: ++ ++ movdqu (%rdx),%xmm1 ++.byte 102,65,15,56,0,202 ++ pxor %xmm1,%xmm0 ++ ++ ++ movdqa %xmm11,%xmm1 ++ pandn %xmm0,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm11,%xmm0 ++ ++ ++ ++ ++ pxor %xmm2,%xmm2 ++ ++ movq $5,%rax ++L$oop_row_4: ++ movdqa (%rsi),%xmm4 ++ leaq 16(%rsi),%rsi ++ ++ ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ ++ ++ pxor %xmm5,%xmm2 ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ ++ ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ ++ subq $1,%rax ++ jnz L$oop_row_4 ++ ++ ++ ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movq $5,%rax ++L$oop_row_5: ++ movdqa (%rsi),%xmm4 ++ leaq 16(%rsi),%rsi ++ ++ ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ ++ ++ pxor %xmm5,%xmm2 ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ ++ ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ ++ subq $1,%rax ++ jnz L$oop_row_5 ++ ++ ++ ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movq $6,%rax ++L$oop_row_6: ++ movdqa (%rsi),%xmm4 ++ leaq 16(%rsi),%rsi ++ ++ ++ movdqa %xmm2,%xmm6 ++.byte 102,15,58,15,243,1 ++ movdqa %xmm6,%xmm3 ++ psrldq $1,%xmm2 ++ ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++.byte 102,15,56,0,224 ++.byte 102,15,56,0,233 ++ ++ ++ pxor %xmm5,%xmm2 ++ ++ ++ ++ movdqa %xmm4,%xmm5 ++ psllq $60,%xmm5 ++ movdqa %xmm5,%xmm6 ++ pslldq $8,%xmm6 ++ pxor %xmm6,%xmm3 ++ ++ ++ psrldq $8,%xmm5 ++ pxor %xmm5,%xmm2 ++ psrlq $4,%xmm4 ++ pxor %xmm4,%xmm2 ++ ++ subq $1,%rax ++ jnz L$oop_row_6 ++ ++ ++ ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $1,%xmm3 ++ pxor %xmm3,%xmm2 ++ psrlq $5,%xmm3 ++ pxor %xmm3,%xmm2 ++ pxor %xmm3,%xmm3 ++ movdqa %xmm2,%xmm0 ++ ++ ++ leaq -256(%rsi),%rsi ++ ++ ++ leaq 16(%rdx),%rdx ++ subq $16,%rcx ++ jnz L$oop_ghash ++ ++ ++.byte 102,65,15,56,0,194 ++ movdqu %xmm0,(%rdi) ++ ++ ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ .byte 0xf3,0xc3 ++L$ghash_seh_end: ++ ++ ++ ++.p2align 4 ++ ++ ++L$reverse_bytes: ++.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 ++ ++L$low4_mask: ++.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/ghash-x86_64.S b/apple-x86_64/crypto/fipsmodule/ghash-x86_64.S +new file mode 100644 +index 0000000..fd767a0 +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/ghash-x86_64.S +@@ -0,0 +1,1125 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++.globl _gcm_init_clmul ++.private_extern _gcm_init_clmul ++ ++.p2align 4 ++_gcm_init_clmul: ++ ++L$_init_clmul: ++ movdqu (%rsi),%xmm2 ++ pshufd $78,%xmm2,%xmm2 ++ ++ ++ pshufd $255,%xmm2,%xmm4 ++ movdqa %xmm2,%xmm3 ++ psllq $1,%xmm2 ++ pxor %xmm5,%xmm5 ++ psrlq $63,%xmm3 ++ pcmpgtd %xmm4,%xmm5 ++ pslldq $8,%xmm3 ++ por %xmm3,%xmm2 ++ ++ ++ pand L$0x1c2_polynomial(%rip),%xmm5 ++ pxor %xmm5,%xmm2 ++ ++ ++ pshufd $78,%xmm2,%xmm6 ++ movdqa %xmm2,%xmm0 ++ pxor %xmm2,%xmm6 ++ movdqa %xmm0,%xmm1 ++ pshufd $78,%xmm0,%xmm3 ++ pxor %xmm0,%xmm3 ++.byte 102,15,58,68,194,0 ++.byte 102,15,58,68,202,17 ++.byte 102,15,58,68,222,0 ++ pxor %xmm0,%xmm3 ++ pxor %xmm1,%xmm3 ++ ++ movdqa %xmm3,%xmm4 ++ psrldq $8,%xmm3 ++ pslldq $8,%xmm4 ++ pxor %xmm3,%xmm1 ++ pxor %xmm4,%xmm0 ++ ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ ++ ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++ pshufd $78,%xmm2,%xmm3 ++ pshufd $78,%xmm0,%xmm4 ++ pxor %xmm2,%xmm3 ++ movdqu %xmm2,0(%rdi) ++ pxor %xmm0,%xmm4 ++ movdqu %xmm0,16(%rdi) ++.byte 102,15,58,15,227,8 ++ movdqu %xmm4,32(%rdi) ++ movdqa %xmm0,%xmm1 ++ pshufd $78,%xmm0,%xmm3 ++ pxor %xmm0,%xmm3 ++.byte 102,15,58,68,194,0 ++.byte 102,15,58,68,202,17 ++.byte 102,15,58,68,222,0 ++ pxor %xmm0,%xmm3 ++ pxor %xmm1,%xmm3 ++ ++ movdqa %xmm3,%xmm4 ++ psrldq $8,%xmm3 ++ pslldq $8,%xmm4 ++ pxor %xmm3,%xmm1 ++ pxor %xmm4,%xmm0 ++ ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ ++ ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++ movdqa %xmm0,%xmm5 ++ movdqa %xmm0,%xmm1 ++ pshufd $78,%xmm0,%xmm3 ++ pxor %xmm0,%xmm3 ++.byte 102,15,58,68,194,0 ++.byte 102,15,58,68,202,17 ++.byte 102,15,58,68,222,0 ++ pxor %xmm0,%xmm3 ++ pxor %xmm1,%xmm3 ++ ++ movdqa %xmm3,%xmm4 ++ psrldq $8,%xmm3 ++ pslldq $8,%xmm4 ++ pxor %xmm3,%xmm1 ++ pxor %xmm4,%xmm0 ++ ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ ++ ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++ pshufd $78,%xmm5,%xmm3 ++ pshufd $78,%xmm0,%xmm4 ++ pxor %xmm5,%xmm3 ++ movdqu %xmm5,48(%rdi) ++ pxor %xmm0,%xmm4 ++ movdqu %xmm0,64(%rdi) ++.byte 102,15,58,15,227,8 ++ movdqu %xmm4,80(%rdi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _gcm_gmult_clmul ++.private_extern _gcm_gmult_clmul ++ ++.p2align 4 ++_gcm_gmult_clmul: ++ ++L$_gmult_clmul: ++ movdqu (%rdi),%xmm0 ++ movdqa L$bswap_mask(%rip),%xmm5 ++ movdqu (%rsi),%xmm2 ++ movdqu 32(%rsi),%xmm4 ++.byte 102,15,56,0,197 ++ movdqa %xmm0,%xmm1 ++ pshufd $78,%xmm0,%xmm3 ++ pxor %xmm0,%xmm3 ++.byte 102,15,58,68,194,0 ++.byte 102,15,58,68,202,17 ++.byte 102,15,58,68,220,0 ++ pxor %xmm0,%xmm3 ++ pxor %xmm1,%xmm3 ++ ++ movdqa %xmm3,%xmm4 ++ psrldq $8,%xmm3 ++ pslldq $8,%xmm4 ++ pxor %xmm3,%xmm1 ++ pxor %xmm4,%xmm0 ++ ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ ++ ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++.byte 102,15,56,0,197 ++ movdqu %xmm0,(%rdi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _gcm_ghash_clmul ++.private_extern _gcm_ghash_clmul ++ ++.p2align 5 ++_gcm_ghash_clmul: ++ ++L$_ghash_clmul: ++ movdqa L$bswap_mask(%rip),%xmm10 ++ ++ movdqu (%rdi),%xmm0 ++ movdqu (%rsi),%xmm2 ++ movdqu 32(%rsi),%xmm7 ++.byte 102,65,15,56,0,194 ++ ++ subq $0x10,%rcx ++ jz L$odd_tail ++ ++ movdqu 16(%rsi),%xmm6 ++ leaq _OPENSSL_ia32cap_P(%rip),%rax ++ movl 4(%rax),%eax ++ cmpq $0x30,%rcx ++ jb L$skip4x ++ ++ andl $71303168,%eax ++ cmpl $4194304,%eax ++ je L$skip4x ++ ++ subq $0x30,%rcx ++ movq $0xA040608020C0E000,%rax ++ movdqu 48(%rsi),%xmm14 ++ movdqu 64(%rsi),%xmm15 ++ ++ ++ ++ ++ movdqu 48(%rdx),%xmm3 ++ movdqu 32(%rdx),%xmm11 ++.byte 102,65,15,56,0,218 ++.byte 102,69,15,56,0,218 ++ movdqa %xmm3,%xmm5 ++ pshufd $78,%xmm3,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,68,218,0 ++.byte 102,15,58,68,234,17 ++.byte 102,15,58,68,231,0 ++ ++ movdqa %xmm11,%xmm13 ++ pshufd $78,%xmm11,%xmm12 ++ pxor %xmm11,%xmm12 ++.byte 102,68,15,58,68,222,0 ++.byte 102,68,15,58,68,238,17 ++.byte 102,68,15,58,68,231,16 ++ xorps %xmm11,%xmm3 ++ xorps %xmm13,%xmm5 ++ movups 80(%rsi),%xmm7 ++ xorps %xmm12,%xmm4 ++ ++ movdqu 16(%rdx),%xmm11 ++ movdqu 0(%rdx),%xmm8 ++.byte 102,69,15,56,0,218 ++.byte 102,69,15,56,0,194 ++ movdqa %xmm11,%xmm13 ++ pshufd $78,%xmm11,%xmm12 ++ pxor %xmm8,%xmm0 ++ pxor %xmm11,%xmm12 ++.byte 102,69,15,58,68,222,0 ++ movdqa %xmm0,%xmm1 ++ pshufd $78,%xmm0,%xmm8 ++ pxor %xmm0,%xmm8 ++.byte 102,69,15,58,68,238,17 ++.byte 102,68,15,58,68,231,0 ++ xorps %xmm11,%xmm3 ++ xorps %xmm13,%xmm5 ++ ++ leaq 64(%rdx),%rdx ++ subq $0x40,%rcx ++ jc L$tail4x ++ ++ jmp L$mod4_loop ++.p2align 5 ++L$mod4_loop: ++.byte 102,65,15,58,68,199,0 ++ xorps %xmm12,%xmm4 ++ movdqu 48(%rdx),%xmm11 ++.byte 102,69,15,56,0,218 ++.byte 102,65,15,58,68,207,17 ++ xorps %xmm3,%xmm0 ++ movdqu 32(%rdx),%xmm3 ++ movdqa %xmm11,%xmm13 ++.byte 102,68,15,58,68,199,16 ++ pshufd $78,%xmm11,%xmm12 ++ xorps %xmm5,%xmm1 ++ pxor %xmm11,%xmm12 ++.byte 102,65,15,56,0,218 ++ movups 32(%rsi),%xmm7 ++ xorps %xmm4,%xmm8 ++.byte 102,68,15,58,68,218,0 ++ pshufd $78,%xmm3,%xmm4 ++ ++ pxor %xmm0,%xmm8 ++ movdqa %xmm3,%xmm5 ++ pxor %xmm1,%xmm8 ++ pxor %xmm3,%xmm4 ++ movdqa %xmm8,%xmm9 ++.byte 102,68,15,58,68,234,17 ++ pslldq $8,%xmm8 ++ psrldq $8,%xmm9 ++ pxor %xmm8,%xmm0 ++ movdqa L$7_mask(%rip),%xmm8 ++ pxor %xmm9,%xmm1 ++.byte 102,76,15,110,200 ++ ++ pand %xmm0,%xmm8 ++.byte 102,69,15,56,0,200 ++ pxor %xmm0,%xmm9 ++.byte 102,68,15,58,68,231,0 ++ psllq $57,%xmm9 ++ movdqa %xmm9,%xmm8 ++ pslldq $8,%xmm9 ++.byte 102,15,58,68,222,0 ++ psrldq $8,%xmm8 ++ pxor %xmm9,%xmm0 ++ pxor %xmm8,%xmm1 ++ movdqu 0(%rdx),%xmm8 ++ ++ movdqa %xmm0,%xmm9 ++ psrlq $1,%xmm0 ++.byte 102,15,58,68,238,17 ++ xorps %xmm11,%xmm3 ++ movdqu 16(%rdx),%xmm11 ++.byte 102,69,15,56,0,218 ++.byte 102,15,58,68,231,16 ++ xorps %xmm13,%xmm5 ++ movups 80(%rsi),%xmm7 ++.byte 102,69,15,56,0,194 ++ pxor %xmm9,%xmm1 ++ pxor %xmm0,%xmm9 ++ psrlq $5,%xmm0 ++ ++ movdqa %xmm11,%xmm13 ++ pxor %xmm12,%xmm4 ++ pshufd $78,%xmm11,%xmm12 ++ pxor %xmm9,%xmm0 ++ pxor %xmm8,%xmm1 ++ pxor %xmm11,%xmm12 ++.byte 102,69,15,58,68,222,0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++ movdqa %xmm0,%xmm1 ++.byte 102,69,15,58,68,238,17 ++ xorps %xmm11,%xmm3 ++ pshufd $78,%xmm0,%xmm8 ++ pxor %xmm0,%xmm8 ++ ++.byte 102,68,15,58,68,231,0 ++ xorps %xmm13,%xmm5 ++ ++ leaq 64(%rdx),%rdx ++ subq $0x40,%rcx ++ jnc L$mod4_loop ++ ++L$tail4x: ++.byte 102,65,15,58,68,199,0 ++.byte 102,65,15,58,68,207,17 ++.byte 102,68,15,58,68,199,16 ++ xorps %xmm12,%xmm4 ++ xorps %xmm3,%xmm0 ++ xorps %xmm5,%xmm1 ++ pxor %xmm0,%xmm1 ++ pxor %xmm4,%xmm8 ++ ++ pxor %xmm1,%xmm8 ++ pxor %xmm0,%xmm1 ++ ++ movdqa %xmm8,%xmm9 ++ psrldq $8,%xmm8 ++ pslldq $8,%xmm9 ++ pxor %xmm8,%xmm1 ++ pxor %xmm9,%xmm0 ++ ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ ++ ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++ addq $0x40,%rcx ++ jz L$done ++ movdqu 32(%rsi),%xmm7 ++ subq $0x10,%rcx ++ jz L$odd_tail ++L$skip4x: ++ ++ ++ ++ ++ ++ movdqu (%rdx),%xmm8 ++ movdqu 16(%rdx),%xmm3 ++.byte 102,69,15,56,0,194 ++.byte 102,65,15,56,0,218 ++ pxor %xmm8,%xmm0 ++ ++ movdqa %xmm3,%xmm5 ++ pshufd $78,%xmm3,%xmm4 ++ pxor %xmm3,%xmm4 ++.byte 102,15,58,68,218,0 ++.byte 102,15,58,68,234,17 ++.byte 102,15,58,68,231,0 ++ ++ leaq 32(%rdx),%rdx ++ nop ++ subq $0x20,%rcx ++ jbe L$even_tail ++ nop ++ jmp L$mod_loop ++ ++.p2align 5 ++L$mod_loop: ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm4,%xmm8 ++ pshufd $78,%xmm0,%xmm4 ++ pxor %xmm0,%xmm4 ++ ++.byte 102,15,58,68,198,0 ++.byte 102,15,58,68,206,17 ++.byte 102,15,58,68,231,16 ++ ++ pxor %xmm3,%xmm0 ++ pxor %xmm5,%xmm1 ++ movdqu (%rdx),%xmm9 ++ pxor %xmm0,%xmm8 ++.byte 102,69,15,56,0,202 ++ movdqu 16(%rdx),%xmm3 ++ ++ pxor %xmm1,%xmm8 ++ pxor %xmm9,%xmm1 ++ pxor %xmm8,%xmm4 ++.byte 102,65,15,56,0,218 ++ movdqa %xmm4,%xmm8 ++ psrldq $8,%xmm8 ++ pslldq $8,%xmm4 ++ pxor %xmm8,%xmm1 ++ pxor %xmm4,%xmm0 ++ ++ movdqa %xmm3,%xmm5 ++ ++ movdqa %xmm0,%xmm9 ++ movdqa %xmm0,%xmm8 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm8 ++.byte 102,15,58,68,218,0 ++ psllq $1,%xmm0 ++ pxor %xmm8,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm8 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm8 ++ pxor %xmm9,%xmm0 ++ pshufd $78,%xmm5,%xmm4 ++ pxor %xmm8,%xmm1 ++ pxor %xmm5,%xmm4 ++ ++ movdqa %xmm0,%xmm9 ++ psrlq $1,%xmm0 ++.byte 102,15,58,68,234,17 ++ pxor %xmm9,%xmm1 ++ pxor %xmm0,%xmm9 ++ psrlq $5,%xmm0 ++ pxor %xmm9,%xmm0 ++ leaq 32(%rdx),%rdx ++ psrlq $1,%xmm0 ++.byte 102,15,58,68,231,0 ++ pxor %xmm1,%xmm0 ++ ++ subq $0x20,%rcx ++ ja L$mod_loop ++ ++L$even_tail: ++ movdqa %xmm0,%xmm1 ++ movdqa %xmm4,%xmm8 ++ pshufd $78,%xmm0,%xmm4 ++ pxor %xmm0,%xmm4 ++ ++.byte 102,15,58,68,198,0 ++.byte 102,15,58,68,206,17 ++.byte 102,15,58,68,231,16 ++ ++ pxor %xmm3,%xmm0 ++ pxor %xmm5,%xmm1 ++ pxor %xmm0,%xmm8 ++ pxor %xmm1,%xmm8 ++ pxor %xmm8,%xmm4 ++ movdqa %xmm4,%xmm8 ++ psrldq $8,%xmm8 ++ pslldq $8,%xmm4 ++ pxor %xmm8,%xmm1 ++ pxor %xmm4,%xmm0 ++ ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ ++ ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++ testq %rcx,%rcx ++ jnz L$done ++ ++L$odd_tail: ++ movdqu (%rdx),%xmm8 ++.byte 102,69,15,56,0,194 ++ pxor %xmm8,%xmm0 ++ movdqa %xmm0,%xmm1 ++ pshufd $78,%xmm0,%xmm3 ++ pxor %xmm0,%xmm3 ++.byte 102,15,58,68,194,0 ++.byte 102,15,58,68,202,17 ++.byte 102,15,58,68,223,0 ++ pxor %xmm0,%xmm3 ++ pxor %xmm1,%xmm3 ++ ++ movdqa %xmm3,%xmm4 ++ psrldq $8,%xmm3 ++ pslldq $8,%xmm4 ++ pxor %xmm3,%xmm1 ++ pxor %xmm4,%xmm0 ++ ++ movdqa %xmm0,%xmm4 ++ movdqa %xmm0,%xmm3 ++ psllq $5,%xmm0 ++ pxor %xmm0,%xmm3 ++ psllq $1,%xmm0 ++ pxor %xmm3,%xmm0 ++ psllq $57,%xmm0 ++ movdqa %xmm0,%xmm3 ++ pslldq $8,%xmm0 ++ psrldq $8,%xmm3 ++ pxor %xmm4,%xmm0 ++ pxor %xmm3,%xmm1 ++ ++ ++ movdqa %xmm0,%xmm4 ++ psrlq $1,%xmm0 ++ pxor %xmm4,%xmm1 ++ pxor %xmm0,%xmm4 ++ psrlq $5,%xmm0 ++ pxor %xmm4,%xmm0 ++ psrlq $1,%xmm0 ++ pxor %xmm1,%xmm0 ++L$done: ++.byte 102,65,15,56,0,194 ++ movdqu %xmm0,(%rdi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _gcm_init_avx ++.private_extern _gcm_init_avx ++ ++.p2align 5 ++_gcm_init_avx: ++ ++ vzeroupper ++ ++ vmovdqu (%rsi),%xmm2 ++ vpshufd $78,%xmm2,%xmm2 ++ ++ ++ vpshufd $255,%xmm2,%xmm4 ++ vpsrlq $63,%xmm2,%xmm3 ++ vpsllq $1,%xmm2,%xmm2 ++ vpxor %xmm5,%xmm5,%xmm5 ++ vpcmpgtd %xmm4,%xmm5,%xmm5 ++ vpslldq $8,%xmm3,%xmm3 ++ vpor %xmm3,%xmm2,%xmm2 ++ ++ ++ vpand L$0x1c2_polynomial(%rip),%xmm5,%xmm5 ++ vpxor %xmm5,%xmm2,%xmm2 ++ ++ vpunpckhqdq %xmm2,%xmm2,%xmm6 ++ vmovdqa %xmm2,%xmm0 ++ vpxor %xmm2,%xmm6,%xmm6 ++ movq $4,%r10 ++ jmp L$init_start_avx ++.p2align 5 ++L$init_loop_avx: ++ vpalignr $8,%xmm3,%xmm4,%xmm5 ++ vmovdqu %xmm5,-16(%rdi) ++ vpunpckhqdq %xmm0,%xmm0,%xmm3 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 ++ vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 ++ vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 ++ vpxor %xmm0,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ ++ vpslldq $8,%xmm3,%xmm4 ++ vpsrldq $8,%xmm3,%xmm3 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpsllq $57,%xmm0,%xmm3 ++ vpsllq $62,%xmm0,%xmm4 ++ vpxor %xmm3,%xmm4,%xmm4 ++ vpsllq $63,%xmm0,%xmm3 ++ vpxor %xmm3,%xmm4,%xmm4 ++ vpslldq $8,%xmm4,%xmm3 ++ vpsrldq $8,%xmm4,%xmm4 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vpsrlq $1,%xmm0,%xmm4 ++ vpxor %xmm0,%xmm1,%xmm1 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpsrlq $5,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpsrlq $1,%xmm0,%xmm0 ++ vpxor %xmm1,%xmm0,%xmm0 ++L$init_start_avx: ++ vmovdqa %xmm0,%xmm5 ++ vpunpckhqdq %xmm0,%xmm0,%xmm3 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 ++ vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 ++ vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 ++ vpxor %xmm0,%xmm1,%xmm4 ++ vpxor %xmm4,%xmm3,%xmm3 ++ ++ vpslldq $8,%xmm3,%xmm4 ++ vpsrldq $8,%xmm3,%xmm3 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpxor %xmm3,%xmm1,%xmm1 ++ vpsllq $57,%xmm0,%xmm3 ++ vpsllq $62,%xmm0,%xmm4 ++ vpxor %xmm3,%xmm4,%xmm4 ++ vpsllq $63,%xmm0,%xmm3 ++ vpxor %xmm3,%xmm4,%xmm4 ++ vpslldq $8,%xmm4,%xmm3 ++ vpsrldq $8,%xmm4,%xmm4 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpxor %xmm4,%xmm1,%xmm1 ++ ++ vpsrlq $1,%xmm0,%xmm4 ++ vpxor %xmm0,%xmm1,%xmm1 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpsrlq $5,%xmm4,%xmm4 ++ vpxor %xmm4,%xmm0,%xmm0 ++ vpsrlq $1,%xmm0,%xmm0 ++ vpxor %xmm1,%xmm0,%xmm0 ++ vpshufd $78,%xmm5,%xmm3 ++ vpshufd $78,%xmm0,%xmm4 ++ vpxor %xmm5,%xmm3,%xmm3 ++ vmovdqu %xmm5,0(%rdi) ++ vpxor %xmm0,%xmm4,%xmm4 ++ vmovdqu %xmm0,16(%rdi) ++ leaq 48(%rdi),%rdi ++ subq $1,%r10 ++ jnz L$init_loop_avx ++ ++ vpalignr $8,%xmm4,%xmm3,%xmm5 ++ vmovdqu %xmm5,-16(%rdi) ++ ++ vzeroupper ++ .byte 0xf3,0xc3 ++ ++ ++.globl _gcm_gmult_avx ++.private_extern _gcm_gmult_avx ++ ++.p2align 5 ++_gcm_gmult_avx: ++ ++ jmp L$_gmult_clmul ++ ++ ++.globl _gcm_ghash_avx ++.private_extern _gcm_ghash_avx ++ ++.p2align 5 ++_gcm_ghash_avx: ++ ++ vzeroupper ++ ++ vmovdqu (%rdi),%xmm10 ++ leaq L$0x1c2_polynomial(%rip),%r10 ++ leaq 64(%rsi),%rsi ++ vmovdqu L$bswap_mask(%rip),%xmm13 ++ vpshufb %xmm13,%xmm10,%xmm10 ++ cmpq $0x80,%rcx ++ jb L$short_avx ++ subq $0x80,%rcx ++ ++ vmovdqu 112(%rdx),%xmm14 ++ vmovdqu 0-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vmovdqu 32-64(%rsi),%xmm7 ++ ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vmovdqu 96(%rdx),%xmm15 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpxor %xmm14,%xmm9,%xmm9 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 16-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vmovdqu 80(%rdx),%xmm14 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm15,%xmm8,%xmm8 ++ ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vmovdqu 48-64(%rsi),%xmm6 ++ vpxor %xmm14,%xmm9,%xmm9 ++ vmovdqu 64(%rdx),%xmm15 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 80-64(%rsi),%xmm7 ++ ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 64-64(%rsi),%xmm6 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 48(%rdx),%xmm14 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vmovdqu 96-64(%rsi),%xmm6 ++ vpxor %xmm5,%xmm2,%xmm2 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 128-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ ++ vmovdqu 32(%rdx),%xmm15 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 112-64(%rsi),%xmm6 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 16(%rdx),%xmm14 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vmovdqu 144-64(%rsi),%xmm6 ++ vpxor %xmm5,%xmm2,%xmm2 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 176-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ ++ vmovdqu (%rdx),%xmm15 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 160-64(%rsi),%xmm6 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 ++ ++ leaq 128(%rdx),%rdx ++ cmpq $0x80,%rcx ++ jb L$tail_avx ++ ++ vpxor %xmm10,%xmm15,%xmm15 ++ subq $0x80,%rcx ++ jmp L$oop8x_avx ++ ++.p2align 5 ++L$oop8x_avx: ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vmovdqu 112(%rdx),%xmm14 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 ++ vmovdqu 0-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 ++ vmovdqu 32-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ ++ vmovdqu 96(%rdx),%xmm15 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpxor %xmm3,%xmm10,%xmm10 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vxorps %xmm4,%xmm11,%xmm11 ++ vmovdqu 16-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm5,%xmm12,%xmm12 ++ vxorps %xmm15,%xmm8,%xmm8 ++ ++ vmovdqu 80(%rdx),%xmm14 ++ vpxor %xmm10,%xmm12,%xmm12 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpxor %xmm11,%xmm12,%xmm12 ++ vpslldq $8,%xmm12,%xmm9 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vpsrldq $8,%xmm12,%xmm12 ++ vpxor %xmm9,%xmm10,%xmm10 ++ vmovdqu 48-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vxorps %xmm12,%xmm11,%xmm11 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 80-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ vpxor %xmm2,%xmm5,%xmm5 ++ ++ vmovdqu 64(%rdx),%xmm15 ++ vpalignr $8,%xmm10,%xmm10,%xmm12 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 64-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vxorps %xmm15,%xmm8,%xmm8 ++ vpxor %xmm5,%xmm2,%xmm2 ++ ++ vmovdqu 48(%rdx),%xmm14 ++ vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vmovdqu 96-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 128-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ vpxor %xmm2,%xmm5,%xmm5 ++ ++ vmovdqu 32(%rdx),%xmm15 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpxor %xmm3,%xmm0,%xmm0 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 112-64(%rsi),%xmm6 ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm4,%xmm1,%xmm1 ++ vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vpxor %xmm5,%xmm2,%xmm2 ++ vxorps %xmm12,%xmm10,%xmm10 ++ ++ vmovdqu 16(%rdx),%xmm14 ++ vpalignr $8,%xmm10,%xmm10,%xmm12 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 ++ vpshufb %xmm13,%xmm14,%xmm14 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 ++ vmovdqu 144-64(%rsi),%xmm6 ++ vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 ++ vxorps %xmm11,%xmm12,%xmm12 ++ vpunpckhqdq %xmm14,%xmm14,%xmm9 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 ++ vmovdqu 176-64(%rsi),%xmm7 ++ vpxor %xmm14,%xmm9,%xmm9 ++ vpxor %xmm2,%xmm5,%xmm5 ++ ++ vmovdqu (%rdx),%xmm15 ++ vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 ++ vpshufb %xmm13,%xmm15,%xmm15 ++ vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 ++ vmovdqu 160-64(%rsi),%xmm6 ++ vpxor %xmm12,%xmm15,%xmm15 ++ vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 ++ vpxor %xmm10,%xmm15,%xmm15 ++ ++ leaq 128(%rdx),%rdx ++ subq $0x80,%rcx ++ jnc L$oop8x_avx ++ ++ addq $0x80,%rcx ++ jmp L$tail_no_xor_avx ++ ++.p2align 5 ++L$short_avx: ++ vmovdqu -16(%rdx,%rcx,1),%xmm14 ++ leaq (%rdx,%rcx,1),%rdx ++ vmovdqu 0-64(%rsi),%xmm6 ++ vmovdqu 32-64(%rsi),%xmm7 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ ++ vmovdqa %xmm0,%xmm3 ++ vmovdqa %xmm1,%xmm4 ++ vmovdqa %xmm2,%xmm5 ++ subq $0x10,%rcx ++ jz L$tail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -32(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 16-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vpsrldq $8,%xmm7,%xmm7 ++ subq $0x10,%rcx ++ jz L$tail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -48(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 48-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vmovdqu 80-64(%rsi),%xmm7 ++ subq $0x10,%rcx ++ jz L$tail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -64(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 64-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vpsrldq $8,%xmm7,%xmm7 ++ subq $0x10,%rcx ++ jz L$tail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -80(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 96-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vmovdqu 128-64(%rsi),%xmm7 ++ subq $0x10,%rcx ++ jz L$tail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -96(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 112-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vpsrldq $8,%xmm7,%xmm7 ++ subq $0x10,%rcx ++ jz L$tail_avx ++ ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vmovdqu -112(%rdx),%xmm14 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vmovdqu 144-64(%rsi),%xmm6 ++ vpshufb %xmm13,%xmm14,%xmm15 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ vmovq 184-64(%rsi),%xmm7 ++ subq $0x10,%rcx ++ jmp L$tail_avx ++ ++.p2align 5 ++L$tail_avx: ++ vpxor %xmm10,%xmm15,%xmm15 ++L$tail_no_xor_avx: ++ vpunpckhqdq %xmm15,%xmm15,%xmm8 ++ vpxor %xmm0,%xmm3,%xmm3 ++ vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 ++ vpxor %xmm15,%xmm8,%xmm8 ++ vpxor %xmm1,%xmm4,%xmm4 ++ vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 ++ vpxor %xmm2,%xmm5,%xmm5 ++ vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 ++ ++ vmovdqu (%r10),%xmm12 ++ ++ vpxor %xmm0,%xmm3,%xmm10 ++ vpxor %xmm1,%xmm4,%xmm11 ++ vpxor %xmm2,%xmm5,%xmm5 ++ ++ vpxor %xmm10,%xmm5,%xmm5 ++ vpxor %xmm11,%xmm5,%xmm5 ++ vpslldq $8,%xmm5,%xmm9 ++ vpsrldq $8,%xmm5,%xmm5 ++ vpxor %xmm9,%xmm10,%xmm10 ++ vpxor %xmm5,%xmm11,%xmm11 ++ ++ vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 ++ vpalignr $8,%xmm10,%xmm10,%xmm10 ++ vpxor %xmm9,%xmm10,%xmm10 ++ ++ vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 ++ vpalignr $8,%xmm10,%xmm10,%xmm10 ++ vpxor %xmm11,%xmm10,%xmm10 ++ vpxor %xmm9,%xmm10,%xmm10 ++ ++ cmpq $0,%rcx ++ jne L$short_avx ++ ++ vpshufb %xmm13,%xmm10,%xmm10 ++ vmovdqu %xmm10,(%rdi) ++ vzeroupper ++ .byte 0xf3,0xc3 ++ ++ ++.p2align 6 ++L$bswap_mask: ++.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 ++L$0x1c2_polynomial: ++.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 ++L$7_mask: ++.long 7,0,7,0 ++.p2align 6 ++ ++.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.p2align 6 ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/md5-x86_64.S b/apple-x86_64/crypto/fipsmodule/md5-x86_64.S +new file mode 100644 +index 0000000..06e3ba0 +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/md5-x86_64.S +@@ -0,0 +1,696 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++.p2align 4 ++ ++.globl _md5_block_asm_data_order ++.private_extern _md5_block_asm_data_order ++ ++_md5_block_asm_data_order: ++ ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$prologue: ++ ++ ++ ++ ++ movq %rdi,%rbp ++ shlq $6,%rdx ++ leaq (%rsi,%rdx,1),%rdi ++ movl 0(%rbp),%eax ++ movl 4(%rbp),%ebx ++ movl 8(%rbp),%ecx ++ movl 12(%rbp),%edx ++ ++ ++ ++ ++ ++ ++ ++ cmpq %rdi,%rsi ++ je L$end ++ ++ ++L$loop: ++ movl %eax,%r8d ++ movl %ebx,%r9d ++ movl %ecx,%r14d ++ movl %edx,%r15d ++ movl 0(%rsi),%r10d ++ movl %edx,%r11d ++ xorl %ecx,%r11d ++ leal -680876936(%rax,%r10,1),%eax ++ andl %ebx,%r11d ++ xorl %edx,%r11d ++ movl 4(%rsi),%r10d ++ addl %r11d,%eax ++ roll $7,%eax ++ movl %ecx,%r11d ++ addl %ebx,%eax ++ xorl %ebx,%r11d ++ leal -389564586(%rdx,%r10,1),%edx ++ andl %eax,%r11d ++ xorl %ecx,%r11d ++ movl 8(%rsi),%r10d ++ addl %r11d,%edx ++ roll $12,%edx ++ movl %ebx,%r11d ++ addl %eax,%edx ++ xorl %eax,%r11d ++ leal 606105819(%rcx,%r10,1),%ecx ++ andl %edx,%r11d ++ xorl %ebx,%r11d ++ movl 12(%rsi),%r10d ++ addl %r11d,%ecx ++ roll $17,%ecx ++ movl %eax,%r11d ++ addl %edx,%ecx ++ xorl %edx,%r11d ++ leal -1044525330(%rbx,%r10,1),%ebx ++ andl %ecx,%r11d ++ xorl %eax,%r11d ++ movl 16(%rsi),%r10d ++ addl %r11d,%ebx ++ roll $22,%ebx ++ movl %edx,%r11d ++ addl %ecx,%ebx ++ xorl %ecx,%r11d ++ leal -176418897(%rax,%r10,1),%eax ++ andl %ebx,%r11d ++ xorl %edx,%r11d ++ movl 20(%rsi),%r10d ++ addl %r11d,%eax ++ roll $7,%eax ++ movl %ecx,%r11d ++ addl %ebx,%eax ++ xorl %ebx,%r11d ++ leal 1200080426(%rdx,%r10,1),%edx ++ andl %eax,%r11d ++ xorl %ecx,%r11d ++ movl 24(%rsi),%r10d ++ addl %r11d,%edx ++ roll $12,%edx ++ movl %ebx,%r11d ++ addl %eax,%edx ++ xorl %eax,%r11d ++ leal -1473231341(%rcx,%r10,1),%ecx ++ andl %edx,%r11d ++ xorl %ebx,%r11d ++ movl 28(%rsi),%r10d ++ addl %r11d,%ecx ++ roll $17,%ecx ++ movl %eax,%r11d ++ addl %edx,%ecx ++ xorl %edx,%r11d ++ leal -45705983(%rbx,%r10,1),%ebx ++ andl %ecx,%r11d ++ xorl %eax,%r11d ++ movl 32(%rsi),%r10d ++ addl %r11d,%ebx ++ roll $22,%ebx ++ movl %edx,%r11d ++ addl %ecx,%ebx ++ xorl %ecx,%r11d ++ leal 1770035416(%rax,%r10,1),%eax ++ andl %ebx,%r11d ++ xorl %edx,%r11d ++ movl 36(%rsi),%r10d ++ addl %r11d,%eax ++ roll $7,%eax ++ movl %ecx,%r11d ++ addl %ebx,%eax ++ xorl %ebx,%r11d ++ leal -1958414417(%rdx,%r10,1),%edx ++ andl %eax,%r11d ++ xorl %ecx,%r11d ++ movl 40(%rsi),%r10d ++ addl %r11d,%edx ++ roll $12,%edx ++ movl %ebx,%r11d ++ addl %eax,%edx ++ xorl %eax,%r11d ++ leal -42063(%rcx,%r10,1),%ecx ++ andl %edx,%r11d ++ xorl %ebx,%r11d ++ movl 44(%rsi),%r10d ++ addl %r11d,%ecx ++ roll $17,%ecx ++ movl %eax,%r11d ++ addl %edx,%ecx ++ xorl %edx,%r11d ++ leal -1990404162(%rbx,%r10,1),%ebx ++ andl %ecx,%r11d ++ xorl %eax,%r11d ++ movl 48(%rsi),%r10d ++ addl %r11d,%ebx ++ roll $22,%ebx ++ movl %edx,%r11d ++ addl %ecx,%ebx ++ xorl %ecx,%r11d ++ leal 1804603682(%rax,%r10,1),%eax ++ andl %ebx,%r11d ++ xorl %edx,%r11d ++ movl 52(%rsi),%r10d ++ addl %r11d,%eax ++ roll $7,%eax ++ movl %ecx,%r11d ++ addl %ebx,%eax ++ xorl %ebx,%r11d ++ leal -40341101(%rdx,%r10,1),%edx ++ andl %eax,%r11d ++ xorl %ecx,%r11d ++ movl 56(%rsi),%r10d ++ addl %r11d,%edx ++ roll $12,%edx ++ movl %ebx,%r11d ++ addl %eax,%edx ++ xorl %eax,%r11d ++ leal -1502002290(%rcx,%r10,1),%ecx ++ andl %edx,%r11d ++ xorl %ebx,%r11d ++ movl 60(%rsi),%r10d ++ addl %r11d,%ecx ++ roll $17,%ecx ++ movl %eax,%r11d ++ addl %edx,%ecx ++ xorl %edx,%r11d ++ leal 1236535329(%rbx,%r10,1),%ebx ++ andl %ecx,%r11d ++ xorl %eax,%r11d ++ movl 0(%rsi),%r10d ++ addl %r11d,%ebx ++ roll $22,%ebx ++ movl %edx,%r11d ++ addl %ecx,%ebx ++ movl 4(%rsi),%r10d ++ movl %edx,%r11d ++ movl %edx,%r12d ++ notl %r11d ++ leal -165796510(%rax,%r10,1),%eax ++ andl %ebx,%r12d ++ andl %ecx,%r11d ++ movl 24(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %ecx,%r11d ++ addl %r12d,%eax ++ movl %ecx,%r12d ++ roll $5,%eax ++ addl %ebx,%eax ++ notl %r11d ++ leal -1069501632(%rdx,%r10,1),%edx ++ andl %eax,%r12d ++ andl %ebx,%r11d ++ movl 44(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %ebx,%r11d ++ addl %r12d,%edx ++ movl %ebx,%r12d ++ roll $9,%edx ++ addl %eax,%edx ++ notl %r11d ++ leal 643717713(%rcx,%r10,1),%ecx ++ andl %edx,%r12d ++ andl %eax,%r11d ++ movl 0(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %eax,%r11d ++ addl %r12d,%ecx ++ movl %eax,%r12d ++ roll $14,%ecx ++ addl %edx,%ecx ++ notl %r11d ++ leal -373897302(%rbx,%r10,1),%ebx ++ andl %ecx,%r12d ++ andl %edx,%r11d ++ movl 20(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %edx,%r11d ++ addl %r12d,%ebx ++ movl %edx,%r12d ++ roll $20,%ebx ++ addl %ecx,%ebx ++ notl %r11d ++ leal -701558691(%rax,%r10,1),%eax ++ andl %ebx,%r12d ++ andl %ecx,%r11d ++ movl 40(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %ecx,%r11d ++ addl %r12d,%eax ++ movl %ecx,%r12d ++ roll $5,%eax ++ addl %ebx,%eax ++ notl %r11d ++ leal 38016083(%rdx,%r10,1),%edx ++ andl %eax,%r12d ++ andl %ebx,%r11d ++ movl 60(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %ebx,%r11d ++ addl %r12d,%edx ++ movl %ebx,%r12d ++ roll $9,%edx ++ addl %eax,%edx ++ notl %r11d ++ leal -660478335(%rcx,%r10,1),%ecx ++ andl %edx,%r12d ++ andl %eax,%r11d ++ movl 16(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %eax,%r11d ++ addl %r12d,%ecx ++ movl %eax,%r12d ++ roll $14,%ecx ++ addl %edx,%ecx ++ notl %r11d ++ leal -405537848(%rbx,%r10,1),%ebx ++ andl %ecx,%r12d ++ andl %edx,%r11d ++ movl 36(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %edx,%r11d ++ addl %r12d,%ebx ++ movl %edx,%r12d ++ roll $20,%ebx ++ addl %ecx,%ebx ++ notl %r11d ++ leal 568446438(%rax,%r10,1),%eax ++ andl %ebx,%r12d ++ andl %ecx,%r11d ++ movl 56(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %ecx,%r11d ++ addl %r12d,%eax ++ movl %ecx,%r12d ++ roll $5,%eax ++ addl %ebx,%eax ++ notl %r11d ++ leal -1019803690(%rdx,%r10,1),%edx ++ andl %eax,%r12d ++ andl %ebx,%r11d ++ movl 12(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %ebx,%r11d ++ addl %r12d,%edx ++ movl %ebx,%r12d ++ roll $9,%edx ++ addl %eax,%edx ++ notl %r11d ++ leal -187363961(%rcx,%r10,1),%ecx ++ andl %edx,%r12d ++ andl %eax,%r11d ++ movl 32(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %eax,%r11d ++ addl %r12d,%ecx ++ movl %eax,%r12d ++ roll $14,%ecx ++ addl %edx,%ecx ++ notl %r11d ++ leal 1163531501(%rbx,%r10,1),%ebx ++ andl %ecx,%r12d ++ andl %edx,%r11d ++ movl 52(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %edx,%r11d ++ addl %r12d,%ebx ++ movl %edx,%r12d ++ roll $20,%ebx ++ addl %ecx,%ebx ++ notl %r11d ++ leal -1444681467(%rax,%r10,1),%eax ++ andl %ebx,%r12d ++ andl %ecx,%r11d ++ movl 8(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %ecx,%r11d ++ addl %r12d,%eax ++ movl %ecx,%r12d ++ roll $5,%eax ++ addl %ebx,%eax ++ notl %r11d ++ leal -51403784(%rdx,%r10,1),%edx ++ andl %eax,%r12d ++ andl %ebx,%r11d ++ movl 28(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %ebx,%r11d ++ addl %r12d,%edx ++ movl %ebx,%r12d ++ roll $9,%edx ++ addl %eax,%edx ++ notl %r11d ++ leal 1735328473(%rcx,%r10,1),%ecx ++ andl %edx,%r12d ++ andl %eax,%r11d ++ movl 48(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %eax,%r11d ++ addl %r12d,%ecx ++ movl %eax,%r12d ++ roll $14,%ecx ++ addl %edx,%ecx ++ notl %r11d ++ leal -1926607734(%rbx,%r10,1),%ebx ++ andl %ecx,%r12d ++ andl %edx,%r11d ++ movl 0(%rsi),%r10d ++ orl %r11d,%r12d ++ movl %edx,%r11d ++ addl %r12d,%ebx ++ movl %edx,%r12d ++ roll $20,%ebx ++ addl %ecx,%ebx ++ movl 20(%rsi),%r10d ++ movl %ecx,%r11d ++ leal -378558(%rax,%r10,1),%eax ++ movl 32(%rsi),%r10d ++ xorl %edx,%r11d ++ xorl %ebx,%r11d ++ addl %r11d,%eax ++ roll $4,%eax ++ movl %ebx,%r11d ++ addl %ebx,%eax ++ leal -2022574463(%rdx,%r10,1),%edx ++ movl 44(%rsi),%r10d ++ xorl %ecx,%r11d ++ xorl %eax,%r11d ++ addl %r11d,%edx ++ roll $11,%edx ++ movl %eax,%r11d ++ addl %eax,%edx ++ leal 1839030562(%rcx,%r10,1),%ecx ++ movl 56(%rsi),%r10d ++ xorl %ebx,%r11d ++ xorl %edx,%r11d ++ addl %r11d,%ecx ++ roll $16,%ecx ++ movl %edx,%r11d ++ addl %edx,%ecx ++ leal -35309556(%rbx,%r10,1),%ebx ++ movl 4(%rsi),%r10d ++ xorl %eax,%r11d ++ xorl %ecx,%r11d ++ addl %r11d,%ebx ++ roll $23,%ebx ++ movl %ecx,%r11d ++ addl %ecx,%ebx ++ leal -1530992060(%rax,%r10,1),%eax ++ movl 16(%rsi),%r10d ++ xorl %edx,%r11d ++ xorl %ebx,%r11d ++ addl %r11d,%eax ++ roll $4,%eax ++ movl %ebx,%r11d ++ addl %ebx,%eax ++ leal 1272893353(%rdx,%r10,1),%edx ++ movl 28(%rsi),%r10d ++ xorl %ecx,%r11d ++ xorl %eax,%r11d ++ addl %r11d,%edx ++ roll $11,%edx ++ movl %eax,%r11d ++ addl %eax,%edx ++ leal -155497632(%rcx,%r10,1),%ecx ++ movl 40(%rsi),%r10d ++ xorl %ebx,%r11d ++ xorl %edx,%r11d ++ addl %r11d,%ecx ++ roll $16,%ecx ++ movl %edx,%r11d ++ addl %edx,%ecx ++ leal -1094730640(%rbx,%r10,1),%ebx ++ movl 52(%rsi),%r10d ++ xorl %eax,%r11d ++ xorl %ecx,%r11d ++ addl %r11d,%ebx ++ roll $23,%ebx ++ movl %ecx,%r11d ++ addl %ecx,%ebx ++ leal 681279174(%rax,%r10,1),%eax ++ movl 0(%rsi),%r10d ++ xorl %edx,%r11d ++ xorl %ebx,%r11d ++ addl %r11d,%eax ++ roll $4,%eax ++ movl %ebx,%r11d ++ addl %ebx,%eax ++ leal -358537222(%rdx,%r10,1),%edx ++ movl 12(%rsi),%r10d ++ xorl %ecx,%r11d ++ xorl %eax,%r11d ++ addl %r11d,%edx ++ roll $11,%edx ++ movl %eax,%r11d ++ addl %eax,%edx ++ leal -722521979(%rcx,%r10,1),%ecx ++ movl 24(%rsi),%r10d ++ xorl %ebx,%r11d ++ xorl %edx,%r11d ++ addl %r11d,%ecx ++ roll $16,%ecx ++ movl %edx,%r11d ++ addl %edx,%ecx ++ leal 76029189(%rbx,%r10,1),%ebx ++ movl 36(%rsi),%r10d ++ xorl %eax,%r11d ++ xorl %ecx,%r11d ++ addl %r11d,%ebx ++ roll $23,%ebx ++ movl %ecx,%r11d ++ addl %ecx,%ebx ++ leal -640364487(%rax,%r10,1),%eax ++ movl 48(%rsi),%r10d ++ xorl %edx,%r11d ++ xorl %ebx,%r11d ++ addl %r11d,%eax ++ roll $4,%eax ++ movl %ebx,%r11d ++ addl %ebx,%eax ++ leal -421815835(%rdx,%r10,1),%edx ++ movl 60(%rsi),%r10d ++ xorl %ecx,%r11d ++ xorl %eax,%r11d ++ addl %r11d,%edx ++ roll $11,%edx ++ movl %eax,%r11d ++ addl %eax,%edx ++ leal 530742520(%rcx,%r10,1),%ecx ++ movl 8(%rsi),%r10d ++ xorl %ebx,%r11d ++ xorl %edx,%r11d ++ addl %r11d,%ecx ++ roll $16,%ecx ++ movl %edx,%r11d ++ addl %edx,%ecx ++ leal -995338651(%rbx,%r10,1),%ebx ++ movl 0(%rsi),%r10d ++ xorl %eax,%r11d ++ xorl %ecx,%r11d ++ addl %r11d,%ebx ++ roll $23,%ebx ++ movl %ecx,%r11d ++ addl %ecx,%ebx ++ movl 0(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ xorl %edx,%r11d ++ leal -198630844(%rax,%r10,1),%eax ++ orl %ebx,%r11d ++ xorl %ecx,%r11d ++ addl %r11d,%eax ++ movl 28(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $6,%eax ++ xorl %ecx,%r11d ++ addl %ebx,%eax ++ leal 1126891415(%rdx,%r10,1),%edx ++ orl %eax,%r11d ++ xorl %ebx,%r11d ++ addl %r11d,%edx ++ movl 56(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $10,%edx ++ xorl %ebx,%r11d ++ addl %eax,%edx ++ leal -1416354905(%rcx,%r10,1),%ecx ++ orl %edx,%r11d ++ xorl %eax,%r11d ++ addl %r11d,%ecx ++ movl 20(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $15,%ecx ++ xorl %eax,%r11d ++ addl %edx,%ecx ++ leal -57434055(%rbx,%r10,1),%ebx ++ orl %ecx,%r11d ++ xorl %edx,%r11d ++ addl %r11d,%ebx ++ movl 48(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $21,%ebx ++ xorl %edx,%r11d ++ addl %ecx,%ebx ++ leal 1700485571(%rax,%r10,1),%eax ++ orl %ebx,%r11d ++ xorl %ecx,%r11d ++ addl %r11d,%eax ++ movl 12(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $6,%eax ++ xorl %ecx,%r11d ++ addl %ebx,%eax ++ leal -1894986606(%rdx,%r10,1),%edx ++ orl %eax,%r11d ++ xorl %ebx,%r11d ++ addl %r11d,%edx ++ movl 40(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $10,%edx ++ xorl %ebx,%r11d ++ addl %eax,%edx ++ leal -1051523(%rcx,%r10,1),%ecx ++ orl %edx,%r11d ++ xorl %eax,%r11d ++ addl %r11d,%ecx ++ movl 4(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $15,%ecx ++ xorl %eax,%r11d ++ addl %edx,%ecx ++ leal -2054922799(%rbx,%r10,1),%ebx ++ orl %ecx,%r11d ++ xorl %edx,%r11d ++ addl %r11d,%ebx ++ movl 32(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $21,%ebx ++ xorl %edx,%r11d ++ addl %ecx,%ebx ++ leal 1873313359(%rax,%r10,1),%eax ++ orl %ebx,%r11d ++ xorl %ecx,%r11d ++ addl %r11d,%eax ++ movl 60(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $6,%eax ++ xorl %ecx,%r11d ++ addl %ebx,%eax ++ leal -30611744(%rdx,%r10,1),%edx ++ orl %eax,%r11d ++ xorl %ebx,%r11d ++ addl %r11d,%edx ++ movl 24(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $10,%edx ++ xorl %ebx,%r11d ++ addl %eax,%edx ++ leal -1560198380(%rcx,%r10,1),%ecx ++ orl %edx,%r11d ++ xorl %eax,%r11d ++ addl %r11d,%ecx ++ movl 52(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $15,%ecx ++ xorl %eax,%r11d ++ addl %edx,%ecx ++ leal 1309151649(%rbx,%r10,1),%ebx ++ orl %ecx,%r11d ++ xorl %edx,%r11d ++ addl %r11d,%ebx ++ movl 16(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $21,%ebx ++ xorl %edx,%r11d ++ addl %ecx,%ebx ++ leal -145523070(%rax,%r10,1),%eax ++ orl %ebx,%r11d ++ xorl %ecx,%r11d ++ addl %r11d,%eax ++ movl 44(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $6,%eax ++ xorl %ecx,%r11d ++ addl %ebx,%eax ++ leal -1120210379(%rdx,%r10,1),%edx ++ orl %eax,%r11d ++ xorl %ebx,%r11d ++ addl %r11d,%edx ++ movl 8(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $10,%edx ++ xorl %ebx,%r11d ++ addl %eax,%edx ++ leal 718787259(%rcx,%r10,1),%ecx ++ orl %edx,%r11d ++ xorl %eax,%r11d ++ addl %r11d,%ecx ++ movl 36(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $15,%ecx ++ xorl %eax,%r11d ++ addl %edx,%ecx ++ leal -343485551(%rbx,%r10,1),%ebx ++ orl %ecx,%r11d ++ xorl %edx,%r11d ++ addl %r11d,%ebx ++ movl 0(%rsi),%r10d ++ movl $0xffffffff,%r11d ++ roll $21,%ebx ++ xorl %edx,%r11d ++ addl %ecx,%ebx ++ ++ addl %r8d,%eax ++ addl %r9d,%ebx ++ addl %r14d,%ecx ++ addl %r15d,%edx ++ ++ ++ addq $64,%rsi ++ cmpq %rdi,%rsi ++ jb L$loop ++ ++ ++L$end: ++ movl %eax,0(%rbp) ++ movl %ebx,4(%rbp) ++ movl %ecx,8(%rbp) ++ movl %edx,12(%rbp) ++ ++ movq (%rsp),%r15 ++ ++ movq 8(%rsp),%r14 ++ ++ movq 16(%rsp),%r12 ++ ++ movq 24(%rsp),%rbx ++ ++ movq 32(%rsp),%rbp ++ ++ addq $40,%rsp ++ ++L$epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/p256-x86_64-asm.S b/apple-x86_64/crypto/fipsmodule/p256-x86_64-asm.S +new file mode 100644 +index 0000000..36057aa +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/p256-x86_64-asm.S +@@ -0,0 +1,4467 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++ ++.p2align 6 ++L$poly: ++.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 ++ ++L$One: ++.long 1,1,1,1,1,1,1,1 ++L$Two: ++.long 2,2,2,2,2,2,2,2 ++L$Three: ++.long 3,3,3,3,3,3,3,3 ++L$ONE_mont: ++.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe ++ ++ ++L$ord: ++.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 ++L$ordK: ++.quad 0xccd1c8aaee00bc4f ++ ++ ++ ++.globl _ecp_nistz256_neg ++.private_extern _ecp_nistz256_neg ++ ++.p2align 5 ++_ecp_nistz256_neg: ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++L$neg_body: ++ ++ xorq %r8,%r8 ++ xorq %r9,%r9 ++ xorq %r10,%r10 ++ xorq %r11,%r11 ++ xorq %r13,%r13 ++ ++ subq 0(%rsi),%r8 ++ sbbq 8(%rsi),%r9 ++ sbbq 16(%rsi),%r10 ++ movq %r8,%rax ++ sbbq 24(%rsi),%r11 ++ leaq L$poly(%rip),%rsi ++ movq %r9,%rdx ++ sbbq $0,%r13 ++ ++ addq 0(%rsi),%r8 ++ movq %r10,%rcx ++ adcq 8(%rsi),%r9 ++ adcq 16(%rsi),%r10 ++ movq %r11,%r12 ++ adcq 24(%rsi),%r11 ++ testq %r13,%r13 ++ ++ cmovzq %rax,%r8 ++ cmovzq %rdx,%r9 ++ movq %r8,0(%rdi) ++ cmovzq %rcx,%r10 ++ movq %r9,8(%rdi) ++ cmovzq %r12,%r11 ++ movq %r10,16(%rdi) ++ movq %r11,24(%rdi) ++ ++ movq 0(%rsp),%r13 ++ ++ movq 8(%rsp),%r12 ++ ++ leaq 16(%rsp),%rsp ++ ++L$neg_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++.globl _ecp_nistz256_ord_mul_mont ++.private_extern _ecp_nistz256_ord_mul_mont ++ ++.p2align 5 ++_ecp_nistz256_ord_mul_mont: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%rcx ++ movq 8(%rcx),%rcx ++ andl $0x80100,%ecx ++ cmpl $0x80100,%ecx ++ je L$ecp_nistz256_ord_mul_montx ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$ord_mul_body: ++ ++ movq 0(%rdx),%rax ++ movq %rdx,%rbx ++ leaq L$ord(%rip),%r14 ++ movq L$ordK(%rip),%r15 ++ ++ ++ movq %rax,%rcx ++ mulq 0(%rsi) ++ movq %rax,%r8 ++ movq %rcx,%rax ++ movq %rdx,%r9 ++ ++ mulq 8(%rsi) ++ addq %rax,%r9 ++ movq %rcx,%rax ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq 16(%rsi) ++ addq %rax,%r10 ++ movq %rcx,%rax ++ adcq $0,%rdx ++ ++ movq %r8,%r13 ++ imulq %r15,%r8 ++ ++ movq %rdx,%r11 ++ mulq 24(%rsi) ++ addq %rax,%r11 ++ movq %r8,%rax ++ adcq $0,%rdx ++ movq %rdx,%r12 ++ ++ ++ mulq 0(%r14) ++ movq %r8,%rbp ++ addq %rax,%r13 ++ movq %r8,%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ subq %r8,%r10 ++ sbbq $0,%r8 ++ ++ mulq 8(%r14) ++ addq %rcx,%r9 ++ adcq $0,%rdx ++ addq %rax,%r9 ++ movq %rbp,%rax ++ adcq %rdx,%r10 ++ movq %rbp,%rdx ++ adcq $0,%r8 ++ ++ shlq $32,%rax ++ shrq $32,%rdx ++ subq %rax,%r11 ++ movq 8(%rbx),%rax ++ sbbq %rdx,%rbp ++ ++ addq %r8,%r11 ++ adcq %rbp,%r12 ++ adcq $0,%r13 ++ ++ ++ movq %rax,%rcx ++ mulq 0(%rsi) ++ addq %rax,%r9 ++ movq %rcx,%rax ++ adcq $0,%rdx ++ movq %rdx,%rbp ++ ++ mulq 8(%rsi) ++ addq %rbp,%r10 ++ adcq $0,%rdx ++ addq %rax,%r10 ++ movq %rcx,%rax ++ adcq $0,%rdx ++ movq %rdx,%rbp ++ ++ mulq 16(%rsi) ++ addq %rbp,%r11 ++ adcq $0,%rdx ++ addq %rax,%r11 ++ movq %rcx,%rax ++ adcq $0,%rdx ++ ++ movq %r9,%rcx ++ imulq %r15,%r9 ++ ++ movq %rdx,%rbp ++ mulq 24(%rsi) ++ addq %rbp,%r12 ++ adcq $0,%rdx ++ xorq %r8,%r8 ++ addq %rax,%r12 ++ movq %r9,%rax ++ adcq %rdx,%r13 ++ adcq $0,%r8 ++ ++ ++ mulq 0(%r14) ++ movq %r9,%rbp ++ addq %rax,%rcx ++ movq %r9,%rax ++ adcq %rdx,%rcx ++ ++ subq %r9,%r11 ++ sbbq $0,%r9 ++ ++ mulq 8(%r14) ++ addq %rcx,%r10 ++ adcq $0,%rdx ++ addq %rax,%r10 ++ movq %rbp,%rax ++ adcq %rdx,%r11 ++ movq %rbp,%rdx ++ adcq $0,%r9 ++ ++ shlq $32,%rax ++ shrq $32,%rdx ++ subq %rax,%r12 ++ movq 16(%rbx),%rax ++ sbbq %rdx,%rbp ++ ++ addq %r9,%r12 ++ adcq %rbp,%r13 ++ adcq $0,%r8 ++ ++ ++ movq %rax,%rcx ++ mulq 0(%rsi) ++ addq %rax,%r10 ++ movq %rcx,%rax ++ adcq $0,%rdx ++ movq %rdx,%rbp ++ ++ mulq 8(%rsi) ++ addq %rbp,%r11 ++ adcq $0,%rdx ++ addq %rax,%r11 ++ movq %rcx,%rax ++ adcq $0,%rdx ++ movq %rdx,%rbp ++ ++ mulq 16(%rsi) ++ addq %rbp,%r12 ++ adcq $0,%rdx ++ addq %rax,%r12 ++ movq %rcx,%rax ++ adcq $0,%rdx ++ ++ movq %r10,%rcx ++ imulq %r15,%r10 ++ ++ movq %rdx,%rbp ++ mulq 24(%rsi) ++ addq %rbp,%r13 ++ adcq $0,%rdx ++ xorq %r9,%r9 ++ addq %rax,%r13 ++ movq %r10,%rax ++ adcq %rdx,%r8 ++ adcq $0,%r9 ++ ++ ++ mulq 0(%r14) ++ movq %r10,%rbp ++ addq %rax,%rcx ++ movq %r10,%rax ++ adcq %rdx,%rcx ++ ++ subq %r10,%r12 ++ sbbq $0,%r10 ++ ++ mulq 8(%r14) ++ addq %rcx,%r11 ++ adcq $0,%rdx ++ addq %rax,%r11 ++ movq %rbp,%rax ++ adcq %rdx,%r12 ++ movq %rbp,%rdx ++ adcq $0,%r10 ++ ++ shlq $32,%rax ++ shrq $32,%rdx ++ subq %rax,%r13 ++ movq 24(%rbx),%rax ++ sbbq %rdx,%rbp ++ ++ addq %r10,%r13 ++ adcq %rbp,%r8 ++ adcq $0,%r9 ++ ++ ++ movq %rax,%rcx ++ mulq 0(%rsi) ++ addq %rax,%r11 ++ movq %rcx,%rax ++ adcq $0,%rdx ++ movq %rdx,%rbp ++ ++ mulq 8(%rsi) ++ addq %rbp,%r12 ++ adcq $0,%rdx ++ addq %rax,%r12 ++ movq %rcx,%rax ++ adcq $0,%rdx ++ movq %rdx,%rbp ++ ++ mulq 16(%rsi) ++ addq %rbp,%r13 ++ adcq $0,%rdx ++ addq %rax,%r13 ++ movq %rcx,%rax ++ adcq $0,%rdx ++ ++ movq %r11,%rcx ++ imulq %r15,%r11 ++ ++ movq %rdx,%rbp ++ mulq 24(%rsi) ++ addq %rbp,%r8 ++ adcq $0,%rdx ++ xorq %r10,%r10 ++ addq %rax,%r8 ++ movq %r11,%rax ++ adcq %rdx,%r9 ++ adcq $0,%r10 ++ ++ ++ mulq 0(%r14) ++ movq %r11,%rbp ++ addq %rax,%rcx ++ movq %r11,%rax ++ adcq %rdx,%rcx ++ ++ subq %r11,%r13 ++ sbbq $0,%r11 ++ ++ mulq 8(%r14) ++ addq %rcx,%r12 ++ adcq $0,%rdx ++ addq %rax,%r12 ++ movq %rbp,%rax ++ adcq %rdx,%r13 ++ movq %rbp,%rdx ++ adcq $0,%r11 ++ ++ shlq $32,%rax ++ shrq $32,%rdx ++ subq %rax,%r8 ++ sbbq %rdx,%rbp ++ ++ addq %r11,%r8 ++ adcq %rbp,%r9 ++ adcq $0,%r10 ++ ++ ++ movq %r12,%rsi ++ subq 0(%r14),%r12 ++ movq %r13,%r11 ++ sbbq 8(%r14),%r13 ++ movq %r8,%rcx ++ sbbq 16(%r14),%r8 ++ movq %r9,%rbp ++ sbbq 24(%r14),%r9 ++ sbbq $0,%r10 ++ ++ cmovcq %rsi,%r12 ++ cmovcq %r11,%r13 ++ cmovcq %rcx,%r8 ++ cmovcq %rbp,%r9 ++ ++ movq %r12,0(%rdi) ++ movq %r13,8(%rdi) ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ ++ movq 0(%rsp),%r15 ++ ++ movq 8(%rsp),%r14 ++ ++ movq 16(%rsp),%r13 ++ ++ movq 24(%rsp),%r12 ++ ++ movq 32(%rsp),%rbx ++ ++ movq 40(%rsp),%rbp ++ ++ leaq 48(%rsp),%rsp ++ ++L$ord_mul_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.globl _ecp_nistz256_ord_sqr_mont ++.private_extern _ecp_nistz256_ord_sqr_mont ++ ++.p2align 5 ++_ecp_nistz256_ord_sqr_mont: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%rcx ++ movq 8(%rcx),%rcx ++ andl $0x80100,%ecx ++ cmpl $0x80100,%ecx ++ je L$ecp_nistz256_ord_sqr_montx ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$ord_sqr_body: ++ ++ movq 0(%rsi),%r8 ++ movq 8(%rsi),%rax ++ movq 16(%rsi),%r14 ++ movq 24(%rsi),%r15 ++ leaq L$ord(%rip),%rsi ++ movq %rdx,%rbx ++ jmp L$oop_ord_sqr ++ ++.p2align 5 ++L$oop_ord_sqr: ++ ++ movq %rax,%rbp ++ mulq %r8 ++ movq %rax,%r9 ++.byte 102,72,15,110,205 ++ movq %r14,%rax ++ movq %rdx,%r10 ++ ++ mulq %r8 ++ addq %rax,%r10 ++ movq %r15,%rax ++.byte 102,73,15,110,214 ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %r8 ++ addq %rax,%r11 ++ movq %r15,%rax ++.byte 102,73,15,110,223 ++ adcq $0,%rdx ++ movq %rdx,%r12 ++ ++ ++ mulq %r14 ++ movq %rax,%r13 ++ movq %r14,%rax ++ movq %rdx,%r14 ++ ++ ++ mulq %rbp ++ addq %rax,%r11 ++ movq %r15,%rax ++ adcq $0,%rdx ++ movq %rdx,%r15 ++ ++ mulq %rbp ++ addq %rax,%r12 ++ adcq $0,%rdx ++ ++ addq %r15,%r12 ++ adcq %rdx,%r13 ++ adcq $0,%r14 ++ ++ ++ xorq %r15,%r15 ++ movq %r8,%rax ++ addq %r9,%r9 ++ adcq %r10,%r10 ++ adcq %r11,%r11 ++ adcq %r12,%r12 ++ adcq %r13,%r13 ++ adcq %r14,%r14 ++ adcq $0,%r15 ++ ++ ++ mulq %rax ++ movq %rax,%r8 ++.byte 102,72,15,126,200 ++ movq %rdx,%rbp ++ ++ mulq %rax ++ addq %rbp,%r9 ++ adcq %rax,%r10 ++.byte 102,72,15,126,208 ++ adcq $0,%rdx ++ movq %rdx,%rbp ++ ++ mulq %rax ++ addq %rbp,%r11 ++ adcq %rax,%r12 ++.byte 102,72,15,126,216 ++ adcq $0,%rdx ++ movq %rdx,%rbp ++ ++ movq %r8,%rcx ++ imulq 32(%rsi),%r8 ++ ++ mulq %rax ++ addq %rbp,%r13 ++ adcq %rax,%r14 ++ movq 0(%rsi),%rax ++ adcq %rdx,%r15 ++ ++ ++ mulq %r8 ++ movq %r8,%rbp ++ addq %rax,%rcx ++ movq 8(%rsi),%rax ++ adcq %rdx,%rcx ++ ++ subq %r8,%r10 ++ sbbq $0,%rbp ++ ++ mulq %r8 ++ addq %rcx,%r9 ++ adcq $0,%rdx ++ addq %rax,%r9 ++ movq %r8,%rax ++ adcq %rdx,%r10 ++ movq %r8,%rdx ++ adcq $0,%rbp ++ ++ movq %r9,%rcx ++ imulq 32(%rsi),%r9 ++ ++ shlq $32,%rax ++ shrq $32,%rdx ++ subq %rax,%r11 ++ movq 0(%rsi),%rax ++ sbbq %rdx,%r8 ++ ++ addq %rbp,%r11 ++ adcq $0,%r8 ++ ++ ++ mulq %r9 ++ movq %r9,%rbp ++ addq %rax,%rcx ++ movq 8(%rsi),%rax ++ adcq %rdx,%rcx ++ ++ subq %r9,%r11 ++ sbbq $0,%rbp ++ ++ mulq %r9 ++ addq %rcx,%r10 ++ adcq $0,%rdx ++ addq %rax,%r10 ++ movq %r9,%rax ++ adcq %rdx,%r11 ++ movq %r9,%rdx ++ adcq $0,%rbp ++ ++ movq %r10,%rcx ++ imulq 32(%rsi),%r10 ++ ++ shlq $32,%rax ++ shrq $32,%rdx ++ subq %rax,%r8 ++ movq 0(%rsi),%rax ++ sbbq %rdx,%r9 ++ ++ addq %rbp,%r8 ++ adcq $0,%r9 ++ ++ ++ mulq %r10 ++ movq %r10,%rbp ++ addq %rax,%rcx ++ movq 8(%rsi),%rax ++ adcq %rdx,%rcx ++ ++ subq %r10,%r8 ++ sbbq $0,%rbp ++ ++ mulq %r10 ++ addq %rcx,%r11 ++ adcq $0,%rdx ++ addq %rax,%r11 ++ movq %r10,%rax ++ adcq %rdx,%r8 ++ movq %r10,%rdx ++ adcq $0,%rbp ++ ++ movq %r11,%rcx ++ imulq 32(%rsi),%r11 ++ ++ shlq $32,%rax ++ shrq $32,%rdx ++ subq %rax,%r9 ++ movq 0(%rsi),%rax ++ sbbq %rdx,%r10 ++ ++ addq %rbp,%r9 ++ adcq $0,%r10 ++ ++ ++ mulq %r11 ++ movq %r11,%rbp ++ addq %rax,%rcx ++ movq 8(%rsi),%rax ++ adcq %rdx,%rcx ++ ++ subq %r11,%r9 ++ sbbq $0,%rbp ++ ++ mulq %r11 ++ addq %rcx,%r8 ++ adcq $0,%rdx ++ addq %rax,%r8 ++ movq %r11,%rax ++ adcq %rdx,%r9 ++ movq %r11,%rdx ++ adcq $0,%rbp ++ ++ shlq $32,%rax ++ shrq $32,%rdx ++ subq %rax,%r10 ++ sbbq %rdx,%r11 ++ ++ addq %rbp,%r10 ++ adcq $0,%r11 ++ ++ ++ xorq %rdx,%rdx ++ addq %r12,%r8 ++ adcq %r13,%r9 ++ movq %r8,%r12 ++ adcq %r14,%r10 ++ adcq %r15,%r11 ++ movq %r9,%rax ++ adcq $0,%rdx ++ ++ ++ subq 0(%rsi),%r8 ++ movq %r10,%r14 ++ sbbq 8(%rsi),%r9 ++ sbbq 16(%rsi),%r10 ++ movq %r11,%r15 ++ sbbq 24(%rsi),%r11 ++ sbbq $0,%rdx ++ ++ cmovcq %r12,%r8 ++ cmovncq %r9,%rax ++ cmovncq %r10,%r14 ++ cmovncq %r11,%r15 ++ ++ decq %rbx ++ jnz L$oop_ord_sqr ++ ++ movq %r8,0(%rdi) ++ movq %rax,8(%rdi) ++ pxor %xmm1,%xmm1 ++ movq %r14,16(%rdi) ++ pxor %xmm2,%xmm2 ++ movq %r15,24(%rdi) ++ pxor %xmm3,%xmm3 ++ ++ movq 0(%rsp),%r15 ++ ++ movq 8(%rsp),%r14 ++ ++ movq 16(%rsp),%r13 ++ ++ movq 24(%rsp),%r12 ++ ++ movq 32(%rsp),%rbx ++ ++ movq 40(%rsp),%rbp ++ ++ leaq 48(%rsp),%rsp ++ ++L$ord_sqr_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++ecp_nistz256_ord_mul_montx: ++ ++L$ecp_nistz256_ord_mul_montx: ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$ord_mulx_body: ++ ++ movq %rdx,%rbx ++ movq 0(%rdx),%rdx ++ movq 0(%rsi),%r9 ++ movq 8(%rsi),%r10 ++ movq 16(%rsi),%r11 ++ movq 24(%rsi),%r12 ++ leaq -128(%rsi),%rsi ++ leaq L$ord-128(%rip),%r14 ++ movq L$ordK(%rip),%r15 ++ ++ ++ mulxq %r9,%r8,%r9 ++ mulxq %r10,%rcx,%r10 ++ mulxq %r11,%rbp,%r11 ++ addq %rcx,%r9 ++ mulxq %r12,%rcx,%r12 ++ movq %r8,%rdx ++ mulxq %r15,%rdx,%rax ++ adcq %rbp,%r10 ++ adcq %rcx,%r11 ++ adcq $0,%r12 ++ ++ ++ xorq %r13,%r13 ++ mulxq 0+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r8 ++ adoxq %rbp,%r9 ++ ++ mulxq 8+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r9 ++ adoxq %rbp,%r10 ++ ++ mulxq 16+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r10 ++ adoxq %rbp,%r11 ++ ++ mulxq 24+128(%r14),%rcx,%rbp ++ movq 8(%rbx),%rdx ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ adcxq %r8,%r12 ++ adoxq %r8,%r13 ++ adcq $0,%r13 ++ ++ ++ mulxq 0+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r9 ++ adoxq %rbp,%r10 ++ ++ mulxq 8+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r10 ++ adoxq %rbp,%r11 ++ ++ mulxq 16+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ ++ mulxq 24+128(%rsi),%rcx,%rbp ++ movq %r9,%rdx ++ mulxq %r15,%rdx,%rax ++ adcxq %rcx,%r12 ++ adoxq %rbp,%r13 ++ ++ adcxq %r8,%r13 ++ adoxq %r8,%r8 ++ adcq $0,%r8 ++ ++ ++ mulxq 0+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r9 ++ adoxq %rbp,%r10 ++ ++ mulxq 8+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r10 ++ adoxq %rbp,%r11 ++ ++ mulxq 16+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ ++ mulxq 24+128(%r14),%rcx,%rbp ++ movq 16(%rbx),%rdx ++ adcxq %rcx,%r12 ++ adoxq %rbp,%r13 ++ adcxq %r9,%r13 ++ adoxq %r9,%r8 ++ adcq $0,%r8 ++ ++ ++ mulxq 0+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r10 ++ adoxq %rbp,%r11 ++ ++ mulxq 8+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ ++ mulxq 16+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r12 ++ adoxq %rbp,%r13 ++ ++ mulxq 24+128(%rsi),%rcx,%rbp ++ movq %r10,%rdx ++ mulxq %r15,%rdx,%rax ++ adcxq %rcx,%r13 ++ adoxq %rbp,%r8 ++ ++ adcxq %r9,%r8 ++ adoxq %r9,%r9 ++ adcq $0,%r9 ++ ++ ++ mulxq 0+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r10 ++ adoxq %rbp,%r11 ++ ++ mulxq 8+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ ++ mulxq 16+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r12 ++ adoxq %rbp,%r13 ++ ++ mulxq 24+128(%r14),%rcx,%rbp ++ movq 24(%rbx),%rdx ++ adcxq %rcx,%r13 ++ adoxq %rbp,%r8 ++ adcxq %r10,%r8 ++ adoxq %r10,%r9 ++ adcq $0,%r9 ++ ++ ++ mulxq 0+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ ++ mulxq 8+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r12 ++ adoxq %rbp,%r13 ++ ++ mulxq 16+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r13 ++ adoxq %rbp,%r8 ++ ++ mulxq 24+128(%rsi),%rcx,%rbp ++ movq %r11,%rdx ++ mulxq %r15,%rdx,%rax ++ adcxq %rcx,%r8 ++ adoxq %rbp,%r9 ++ ++ adcxq %r10,%r9 ++ adoxq %r10,%r10 ++ adcq $0,%r10 ++ ++ ++ mulxq 0+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ ++ mulxq 8+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r12 ++ adoxq %rbp,%r13 ++ ++ mulxq 16+128(%r14),%rcx,%rbp ++ adcxq %rcx,%r13 ++ adoxq %rbp,%r8 ++ ++ mulxq 24+128(%r14),%rcx,%rbp ++ leaq 128(%r14),%r14 ++ movq %r12,%rbx ++ adcxq %rcx,%r8 ++ adoxq %rbp,%r9 ++ movq %r13,%rdx ++ adcxq %r11,%r9 ++ adoxq %r11,%r10 ++ adcq $0,%r10 ++ ++ ++ ++ movq %r8,%rcx ++ subq 0(%r14),%r12 ++ sbbq 8(%r14),%r13 ++ sbbq 16(%r14),%r8 ++ movq %r9,%rbp ++ sbbq 24(%r14),%r9 ++ sbbq $0,%r10 ++ ++ cmovcq %rbx,%r12 ++ cmovcq %rdx,%r13 ++ cmovcq %rcx,%r8 ++ cmovcq %rbp,%r9 ++ ++ movq %r12,0(%rdi) ++ movq %r13,8(%rdi) ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ ++ movq 0(%rsp),%r15 ++ ++ movq 8(%rsp),%r14 ++ ++ movq 16(%rsp),%r13 ++ ++ movq 24(%rsp),%r12 ++ ++ movq 32(%rsp),%rbx ++ ++ movq 40(%rsp),%rbp ++ ++ leaq 48(%rsp),%rsp ++ ++L$ord_mulx_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++ecp_nistz256_ord_sqr_montx: ++ ++L$ecp_nistz256_ord_sqr_montx: ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$ord_sqrx_body: ++ ++ movq %rdx,%rbx ++ movq 0(%rsi),%rdx ++ movq 8(%rsi),%r14 ++ movq 16(%rsi),%r15 ++ movq 24(%rsi),%r8 ++ leaq L$ord(%rip),%rsi ++ jmp L$oop_ord_sqrx ++ ++.p2align 5 ++L$oop_ord_sqrx: ++ mulxq %r14,%r9,%r10 ++ mulxq %r15,%rcx,%r11 ++ movq %rdx,%rax ++.byte 102,73,15,110,206 ++ mulxq %r8,%rbp,%r12 ++ movq %r14,%rdx ++ addq %rcx,%r10 ++.byte 102,73,15,110,215 ++ adcq %rbp,%r11 ++ adcq $0,%r12 ++ xorq %r13,%r13 ++ ++ mulxq %r15,%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ ++ mulxq %r8,%rcx,%rbp ++ movq %r15,%rdx ++ adcxq %rcx,%r12 ++ adoxq %rbp,%r13 ++ adcq $0,%r13 ++ ++ mulxq %r8,%rcx,%r14 ++ movq %rax,%rdx ++.byte 102,73,15,110,216 ++ xorq %r15,%r15 ++ adcxq %r9,%r9 ++ adoxq %rcx,%r13 ++ adcxq %r10,%r10 ++ adoxq %r15,%r14 ++ ++ ++ mulxq %rdx,%r8,%rbp ++.byte 102,72,15,126,202 ++ adcxq %r11,%r11 ++ adoxq %rbp,%r9 ++ adcxq %r12,%r12 ++ mulxq %rdx,%rcx,%rax ++.byte 102,72,15,126,210 ++ adcxq %r13,%r13 ++ adoxq %rcx,%r10 ++ adcxq %r14,%r14 ++ mulxq %rdx,%rcx,%rbp ++.byte 0x67 ++.byte 102,72,15,126,218 ++ adoxq %rax,%r11 ++ adcxq %r15,%r15 ++ adoxq %rcx,%r12 ++ adoxq %rbp,%r13 ++ mulxq %rdx,%rcx,%rax ++ adoxq %rcx,%r14 ++ adoxq %rax,%r15 ++ ++ ++ movq %r8,%rdx ++ mulxq 32(%rsi),%rdx,%rcx ++ ++ xorq %rax,%rax ++ mulxq 0(%rsi),%rcx,%rbp ++ adcxq %rcx,%r8 ++ adoxq %rbp,%r9 ++ mulxq 8(%rsi),%rcx,%rbp ++ adcxq %rcx,%r9 ++ adoxq %rbp,%r10 ++ mulxq 16(%rsi),%rcx,%rbp ++ adcxq %rcx,%r10 ++ adoxq %rbp,%r11 ++ mulxq 24(%rsi),%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r8 ++ adcxq %rax,%r8 ++ ++ ++ movq %r9,%rdx ++ mulxq 32(%rsi),%rdx,%rcx ++ ++ mulxq 0(%rsi),%rcx,%rbp ++ adoxq %rcx,%r9 ++ adcxq %rbp,%r10 ++ mulxq 8(%rsi),%rcx,%rbp ++ adoxq %rcx,%r10 ++ adcxq %rbp,%r11 ++ mulxq 16(%rsi),%rcx,%rbp ++ adoxq %rcx,%r11 ++ adcxq %rbp,%r8 ++ mulxq 24(%rsi),%rcx,%rbp ++ adoxq %rcx,%r8 ++ adcxq %rbp,%r9 ++ adoxq %rax,%r9 ++ ++ ++ movq %r10,%rdx ++ mulxq 32(%rsi),%rdx,%rcx ++ ++ mulxq 0(%rsi),%rcx,%rbp ++ adcxq %rcx,%r10 ++ adoxq %rbp,%r11 ++ mulxq 8(%rsi),%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r8 ++ mulxq 16(%rsi),%rcx,%rbp ++ adcxq %rcx,%r8 ++ adoxq %rbp,%r9 ++ mulxq 24(%rsi),%rcx,%rbp ++ adcxq %rcx,%r9 ++ adoxq %rbp,%r10 ++ adcxq %rax,%r10 ++ ++ ++ movq %r11,%rdx ++ mulxq 32(%rsi),%rdx,%rcx ++ ++ mulxq 0(%rsi),%rcx,%rbp ++ adoxq %rcx,%r11 ++ adcxq %rbp,%r8 ++ mulxq 8(%rsi),%rcx,%rbp ++ adoxq %rcx,%r8 ++ adcxq %rbp,%r9 ++ mulxq 16(%rsi),%rcx,%rbp ++ adoxq %rcx,%r9 ++ adcxq %rbp,%r10 ++ mulxq 24(%rsi),%rcx,%rbp ++ adoxq %rcx,%r10 ++ adcxq %rbp,%r11 ++ adoxq %rax,%r11 ++ ++ ++ addq %r8,%r12 ++ adcq %r13,%r9 ++ movq %r12,%rdx ++ adcq %r14,%r10 ++ adcq %r15,%r11 ++ movq %r9,%r14 ++ adcq $0,%rax ++ ++ ++ subq 0(%rsi),%r12 ++ movq %r10,%r15 ++ sbbq 8(%rsi),%r9 ++ sbbq 16(%rsi),%r10 ++ movq %r11,%r8 ++ sbbq 24(%rsi),%r11 ++ sbbq $0,%rax ++ ++ cmovncq %r12,%rdx ++ cmovncq %r9,%r14 ++ cmovncq %r10,%r15 ++ cmovncq %r11,%r8 ++ ++ decq %rbx ++ jnz L$oop_ord_sqrx ++ ++ movq %rdx,0(%rdi) ++ movq %r14,8(%rdi) ++ pxor %xmm1,%xmm1 ++ movq %r15,16(%rdi) ++ pxor %xmm2,%xmm2 ++ movq %r8,24(%rdi) ++ pxor %xmm3,%xmm3 ++ ++ movq 0(%rsp),%r15 ++ ++ movq 8(%rsp),%r14 ++ ++ movq 16(%rsp),%r13 ++ ++ movq 24(%rsp),%r12 ++ ++ movq 32(%rsp),%rbx ++ ++ movq 40(%rsp),%rbp ++ ++ leaq 48(%rsp),%rsp ++ ++L$ord_sqrx_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++.globl _ecp_nistz256_mul_mont ++.private_extern _ecp_nistz256_mul_mont ++ ++.p2align 5 ++_ecp_nistz256_mul_mont: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%rcx ++ movq 8(%rcx),%rcx ++ andl $0x80100,%ecx ++L$mul_mont: ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$mul_body: ++ cmpl $0x80100,%ecx ++ je L$mul_montx ++ movq %rdx,%rbx ++ movq 0(%rdx),%rax ++ movq 0(%rsi),%r9 ++ movq 8(%rsi),%r10 ++ movq 16(%rsi),%r11 ++ movq 24(%rsi),%r12 ++ ++ call __ecp_nistz256_mul_montq ++ jmp L$mul_mont_done ++ ++.p2align 5 ++L$mul_montx: ++ movq %rdx,%rbx ++ movq 0(%rdx),%rdx ++ movq 0(%rsi),%r9 ++ movq 8(%rsi),%r10 ++ movq 16(%rsi),%r11 ++ movq 24(%rsi),%r12 ++ leaq -128(%rsi),%rsi ++ ++ call __ecp_nistz256_mul_montx ++L$mul_mont_done: ++ movq 0(%rsp),%r15 ++ ++ movq 8(%rsp),%r14 ++ ++ movq 16(%rsp),%r13 ++ ++ movq 24(%rsp),%r12 ++ ++ movq 32(%rsp),%rbx ++ ++ movq 40(%rsp),%rbp ++ ++ leaq 48(%rsp),%rsp ++ ++L$mul_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++__ecp_nistz256_mul_montq: ++ ++ ++ ++ movq %rax,%rbp ++ mulq %r9 ++ movq L$poly+8(%rip),%r14 ++ movq %rax,%r8 ++ movq %rbp,%rax ++ movq %rdx,%r9 ++ ++ mulq %r10 ++ movq L$poly+24(%rip),%r15 ++ addq %rax,%r9 ++ movq %rbp,%rax ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %r11 ++ addq %rax,%r10 ++ movq %rbp,%rax ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %r12 ++ addq %rax,%r11 ++ movq %r8,%rax ++ adcq $0,%rdx ++ xorq %r13,%r13 ++ movq %rdx,%r12 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ movq %r8,%rbp ++ shlq $32,%r8 ++ mulq %r15 ++ shrq $32,%rbp ++ addq %r8,%r9 ++ adcq %rbp,%r10 ++ adcq %rax,%r11 ++ movq 8(%rbx),%rax ++ adcq %rdx,%r12 ++ adcq $0,%r13 ++ xorq %r8,%r8 ++ ++ ++ ++ movq %rax,%rbp ++ mulq 0(%rsi) ++ addq %rax,%r9 ++ movq %rbp,%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ mulq 8(%rsi) ++ addq %rcx,%r10 ++ adcq $0,%rdx ++ addq %rax,%r10 ++ movq %rbp,%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ mulq 16(%rsi) ++ addq %rcx,%r11 ++ adcq $0,%rdx ++ addq %rax,%r11 ++ movq %rbp,%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ mulq 24(%rsi) ++ addq %rcx,%r12 ++ adcq $0,%rdx ++ addq %rax,%r12 ++ movq %r9,%rax ++ adcq %rdx,%r13 ++ adcq $0,%r8 ++ ++ ++ ++ movq %r9,%rbp ++ shlq $32,%r9 ++ mulq %r15 ++ shrq $32,%rbp ++ addq %r9,%r10 ++ adcq %rbp,%r11 ++ adcq %rax,%r12 ++ movq 16(%rbx),%rax ++ adcq %rdx,%r13 ++ adcq $0,%r8 ++ xorq %r9,%r9 ++ ++ ++ ++ movq %rax,%rbp ++ mulq 0(%rsi) ++ addq %rax,%r10 ++ movq %rbp,%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ mulq 8(%rsi) ++ addq %rcx,%r11 ++ adcq $0,%rdx ++ addq %rax,%r11 ++ movq %rbp,%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ mulq 16(%rsi) ++ addq %rcx,%r12 ++ adcq $0,%rdx ++ addq %rax,%r12 ++ movq %rbp,%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ mulq 24(%rsi) ++ addq %rcx,%r13 ++ adcq $0,%rdx ++ addq %rax,%r13 ++ movq %r10,%rax ++ adcq %rdx,%r8 ++ adcq $0,%r9 ++ ++ ++ ++ movq %r10,%rbp ++ shlq $32,%r10 ++ mulq %r15 ++ shrq $32,%rbp ++ addq %r10,%r11 ++ adcq %rbp,%r12 ++ adcq %rax,%r13 ++ movq 24(%rbx),%rax ++ adcq %rdx,%r8 ++ adcq $0,%r9 ++ xorq %r10,%r10 ++ ++ ++ ++ movq %rax,%rbp ++ mulq 0(%rsi) ++ addq %rax,%r11 ++ movq %rbp,%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ mulq 8(%rsi) ++ addq %rcx,%r12 ++ adcq $0,%rdx ++ addq %rax,%r12 ++ movq %rbp,%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ mulq 16(%rsi) ++ addq %rcx,%r13 ++ adcq $0,%rdx ++ addq %rax,%r13 ++ movq %rbp,%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ mulq 24(%rsi) ++ addq %rcx,%r8 ++ adcq $0,%rdx ++ addq %rax,%r8 ++ movq %r11,%rax ++ adcq %rdx,%r9 ++ adcq $0,%r10 ++ ++ ++ ++ movq %r11,%rbp ++ shlq $32,%r11 ++ mulq %r15 ++ shrq $32,%rbp ++ addq %r11,%r12 ++ adcq %rbp,%r13 ++ movq %r12,%rcx ++ adcq %rax,%r8 ++ adcq %rdx,%r9 ++ movq %r13,%rbp ++ adcq $0,%r10 ++ ++ ++ ++ subq $-1,%r12 ++ movq %r8,%rbx ++ sbbq %r14,%r13 ++ sbbq $0,%r8 ++ movq %r9,%rdx ++ sbbq %r15,%r9 ++ sbbq $0,%r10 ++ ++ cmovcq %rcx,%r12 ++ cmovcq %rbp,%r13 ++ movq %r12,0(%rdi) ++ cmovcq %rbx,%r8 ++ movq %r13,8(%rdi) ++ cmovcq %rdx,%r9 ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.globl _ecp_nistz256_sqr_mont ++.private_extern _ecp_nistz256_sqr_mont ++ ++.p2align 5 ++_ecp_nistz256_sqr_mont: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%rcx ++ movq 8(%rcx),%rcx ++ andl $0x80100,%ecx ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$sqr_body: ++ cmpl $0x80100,%ecx ++ je L$sqr_montx ++ movq 0(%rsi),%rax ++ movq 8(%rsi),%r14 ++ movq 16(%rsi),%r15 ++ movq 24(%rsi),%r8 ++ ++ call __ecp_nistz256_sqr_montq ++ jmp L$sqr_mont_done ++ ++.p2align 5 ++L$sqr_montx: ++ movq 0(%rsi),%rdx ++ movq 8(%rsi),%r14 ++ movq 16(%rsi),%r15 ++ movq 24(%rsi),%r8 ++ leaq -128(%rsi),%rsi ++ ++ call __ecp_nistz256_sqr_montx ++L$sqr_mont_done: ++ movq 0(%rsp),%r15 ++ ++ movq 8(%rsp),%r14 ++ ++ movq 16(%rsp),%r13 ++ ++ movq 24(%rsp),%r12 ++ ++ movq 32(%rsp),%rbx ++ ++ movq 40(%rsp),%rbp ++ ++ leaq 48(%rsp),%rsp ++ ++L$sqr_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++__ecp_nistz256_sqr_montq: ++ ++ movq %rax,%r13 ++ mulq %r14 ++ movq %rax,%r9 ++ movq %r15,%rax ++ movq %rdx,%r10 ++ ++ mulq %r13 ++ addq %rax,%r10 ++ movq %r8,%rax ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %r13 ++ addq %rax,%r11 ++ movq %r15,%rax ++ adcq $0,%rdx ++ movq %rdx,%r12 ++ ++ ++ mulq %r14 ++ addq %rax,%r11 ++ movq %r8,%rax ++ adcq $0,%rdx ++ movq %rdx,%rbp ++ ++ mulq %r14 ++ addq %rax,%r12 ++ movq %r8,%rax ++ adcq $0,%rdx ++ addq %rbp,%r12 ++ movq %rdx,%r13 ++ adcq $0,%r13 ++ ++ ++ mulq %r15 ++ xorq %r15,%r15 ++ addq %rax,%r13 ++ movq 0(%rsi),%rax ++ movq %rdx,%r14 ++ adcq $0,%r14 ++ ++ addq %r9,%r9 ++ adcq %r10,%r10 ++ adcq %r11,%r11 ++ adcq %r12,%r12 ++ adcq %r13,%r13 ++ adcq %r14,%r14 ++ adcq $0,%r15 ++ ++ mulq %rax ++ movq %rax,%r8 ++ movq 8(%rsi),%rax ++ movq %rdx,%rcx ++ ++ mulq %rax ++ addq %rcx,%r9 ++ adcq %rax,%r10 ++ movq 16(%rsi),%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ mulq %rax ++ addq %rcx,%r11 ++ adcq %rax,%r12 ++ movq 24(%rsi),%rax ++ adcq $0,%rdx ++ movq %rdx,%rcx ++ ++ mulq %rax ++ addq %rcx,%r13 ++ adcq %rax,%r14 ++ movq %r8,%rax ++ adcq %rdx,%r15 ++ ++ movq L$poly+8(%rip),%rsi ++ movq L$poly+24(%rip),%rbp ++ ++ ++ ++ ++ movq %r8,%rcx ++ shlq $32,%r8 ++ mulq %rbp ++ shrq $32,%rcx ++ addq %r8,%r9 ++ adcq %rcx,%r10 ++ adcq %rax,%r11 ++ movq %r9,%rax ++ adcq $0,%rdx ++ ++ ++ ++ movq %r9,%rcx ++ shlq $32,%r9 ++ movq %rdx,%r8 ++ mulq %rbp ++ shrq $32,%rcx ++ addq %r9,%r10 ++ adcq %rcx,%r11 ++ adcq %rax,%r8 ++ movq %r10,%rax ++ adcq $0,%rdx ++ ++ ++ ++ movq %r10,%rcx ++ shlq $32,%r10 ++ movq %rdx,%r9 ++ mulq %rbp ++ shrq $32,%rcx ++ addq %r10,%r11 ++ adcq %rcx,%r8 ++ adcq %rax,%r9 ++ movq %r11,%rax ++ adcq $0,%rdx ++ ++ ++ ++ movq %r11,%rcx ++ shlq $32,%r11 ++ movq %rdx,%r10 ++ mulq %rbp ++ shrq $32,%rcx ++ addq %r11,%r8 ++ adcq %rcx,%r9 ++ adcq %rax,%r10 ++ adcq $0,%rdx ++ xorq %r11,%r11 ++ ++ ++ ++ addq %r8,%r12 ++ adcq %r9,%r13 ++ movq %r12,%r8 ++ adcq %r10,%r14 ++ adcq %rdx,%r15 ++ movq %r13,%r9 ++ adcq $0,%r11 ++ ++ subq $-1,%r12 ++ movq %r14,%r10 ++ sbbq %rsi,%r13 ++ sbbq $0,%r14 ++ movq %r15,%rcx ++ sbbq %rbp,%r15 ++ sbbq $0,%r11 ++ ++ cmovcq %r8,%r12 ++ cmovcq %r9,%r13 ++ movq %r12,0(%rdi) ++ cmovcq %r10,%r14 ++ movq %r13,8(%rdi) ++ cmovcq %rcx,%r15 ++ movq %r14,16(%rdi) ++ movq %r15,24(%rdi) ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++__ecp_nistz256_mul_montx: ++ ++ ++ ++ mulxq %r9,%r8,%r9 ++ mulxq %r10,%rcx,%r10 ++ movq $32,%r14 ++ xorq %r13,%r13 ++ mulxq %r11,%rbp,%r11 ++ movq L$poly+24(%rip),%r15 ++ adcq %rcx,%r9 ++ mulxq %r12,%rcx,%r12 ++ movq %r8,%rdx ++ adcq %rbp,%r10 ++ shlxq %r14,%r8,%rbp ++ adcq %rcx,%r11 ++ shrxq %r14,%r8,%rcx ++ adcq $0,%r12 ++ ++ ++ ++ addq %rbp,%r9 ++ adcq %rcx,%r10 ++ ++ mulxq %r15,%rcx,%rbp ++ movq 8(%rbx),%rdx ++ adcq %rcx,%r11 ++ adcq %rbp,%r12 ++ adcq $0,%r13 ++ xorq %r8,%r8 ++ ++ ++ ++ mulxq 0+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r9 ++ adoxq %rbp,%r10 ++ ++ mulxq 8+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r10 ++ adoxq %rbp,%r11 ++ ++ mulxq 16+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ ++ mulxq 24+128(%rsi),%rcx,%rbp ++ movq %r9,%rdx ++ adcxq %rcx,%r12 ++ shlxq %r14,%r9,%rcx ++ adoxq %rbp,%r13 ++ shrxq %r14,%r9,%rbp ++ ++ adcxq %r8,%r13 ++ adoxq %r8,%r8 ++ adcq $0,%r8 ++ ++ ++ ++ addq %rcx,%r10 ++ adcq %rbp,%r11 ++ ++ mulxq %r15,%rcx,%rbp ++ movq 16(%rbx),%rdx ++ adcq %rcx,%r12 ++ adcq %rbp,%r13 ++ adcq $0,%r8 ++ xorq %r9,%r9 ++ ++ ++ ++ mulxq 0+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r10 ++ adoxq %rbp,%r11 ++ ++ mulxq 8+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ ++ mulxq 16+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r12 ++ adoxq %rbp,%r13 ++ ++ mulxq 24+128(%rsi),%rcx,%rbp ++ movq %r10,%rdx ++ adcxq %rcx,%r13 ++ shlxq %r14,%r10,%rcx ++ adoxq %rbp,%r8 ++ shrxq %r14,%r10,%rbp ++ ++ adcxq %r9,%r8 ++ adoxq %r9,%r9 ++ adcq $0,%r9 ++ ++ ++ ++ addq %rcx,%r11 ++ adcq %rbp,%r12 ++ ++ mulxq %r15,%rcx,%rbp ++ movq 24(%rbx),%rdx ++ adcq %rcx,%r13 ++ adcq %rbp,%r8 ++ adcq $0,%r9 ++ xorq %r10,%r10 ++ ++ ++ ++ mulxq 0+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ ++ mulxq 8+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r12 ++ adoxq %rbp,%r13 ++ ++ mulxq 16+128(%rsi),%rcx,%rbp ++ adcxq %rcx,%r13 ++ adoxq %rbp,%r8 ++ ++ mulxq 24+128(%rsi),%rcx,%rbp ++ movq %r11,%rdx ++ adcxq %rcx,%r8 ++ shlxq %r14,%r11,%rcx ++ adoxq %rbp,%r9 ++ shrxq %r14,%r11,%rbp ++ ++ adcxq %r10,%r9 ++ adoxq %r10,%r10 ++ adcq $0,%r10 ++ ++ ++ ++ addq %rcx,%r12 ++ adcq %rbp,%r13 ++ ++ mulxq %r15,%rcx,%rbp ++ movq %r12,%rbx ++ movq L$poly+8(%rip),%r14 ++ adcq %rcx,%r8 ++ movq %r13,%rdx ++ adcq %rbp,%r9 ++ adcq $0,%r10 ++ ++ ++ ++ xorl %eax,%eax ++ movq %r8,%rcx ++ sbbq $-1,%r12 ++ sbbq %r14,%r13 ++ sbbq $0,%r8 ++ movq %r9,%rbp ++ sbbq %r15,%r9 ++ sbbq $0,%r10 ++ ++ cmovcq %rbx,%r12 ++ cmovcq %rdx,%r13 ++ movq %r12,0(%rdi) ++ cmovcq %rcx,%r8 ++ movq %r13,8(%rdi) ++ cmovcq %rbp,%r9 ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++__ecp_nistz256_sqr_montx: ++ ++ mulxq %r14,%r9,%r10 ++ mulxq %r15,%rcx,%r11 ++ xorl %eax,%eax ++ adcq %rcx,%r10 ++ mulxq %r8,%rbp,%r12 ++ movq %r14,%rdx ++ adcq %rbp,%r11 ++ adcq $0,%r12 ++ xorq %r13,%r13 ++ ++ ++ mulxq %r15,%rcx,%rbp ++ adcxq %rcx,%r11 ++ adoxq %rbp,%r12 ++ ++ mulxq %r8,%rcx,%rbp ++ movq %r15,%rdx ++ adcxq %rcx,%r12 ++ adoxq %rbp,%r13 ++ adcq $0,%r13 ++ ++ ++ mulxq %r8,%rcx,%r14 ++ movq 0+128(%rsi),%rdx ++ xorq %r15,%r15 ++ adcxq %r9,%r9 ++ adoxq %rcx,%r13 ++ adcxq %r10,%r10 ++ adoxq %r15,%r14 ++ ++ mulxq %rdx,%r8,%rbp ++ movq 8+128(%rsi),%rdx ++ adcxq %r11,%r11 ++ adoxq %rbp,%r9 ++ adcxq %r12,%r12 ++ mulxq %rdx,%rcx,%rax ++ movq 16+128(%rsi),%rdx ++ adcxq %r13,%r13 ++ adoxq %rcx,%r10 ++ adcxq %r14,%r14 ++.byte 0x67 ++ mulxq %rdx,%rcx,%rbp ++ movq 24+128(%rsi),%rdx ++ adoxq %rax,%r11 ++ adcxq %r15,%r15 ++ adoxq %rcx,%r12 ++ movq $32,%rsi ++ adoxq %rbp,%r13 ++.byte 0x67,0x67 ++ mulxq %rdx,%rcx,%rax ++ movq L$poly+24(%rip),%rdx ++ adoxq %rcx,%r14 ++ shlxq %rsi,%r8,%rcx ++ adoxq %rax,%r15 ++ shrxq %rsi,%r8,%rax ++ movq %rdx,%rbp ++ ++ ++ addq %rcx,%r9 ++ adcq %rax,%r10 ++ ++ mulxq %r8,%rcx,%r8 ++ adcq %rcx,%r11 ++ shlxq %rsi,%r9,%rcx ++ adcq $0,%r8 ++ shrxq %rsi,%r9,%rax ++ ++ ++ addq %rcx,%r10 ++ adcq %rax,%r11 ++ ++ mulxq %r9,%rcx,%r9 ++ adcq %rcx,%r8 ++ shlxq %rsi,%r10,%rcx ++ adcq $0,%r9 ++ shrxq %rsi,%r10,%rax ++ ++ ++ addq %rcx,%r11 ++ adcq %rax,%r8 ++ ++ mulxq %r10,%rcx,%r10 ++ adcq %rcx,%r9 ++ shlxq %rsi,%r11,%rcx ++ adcq $0,%r10 ++ shrxq %rsi,%r11,%rax ++ ++ ++ addq %rcx,%r8 ++ adcq %rax,%r9 ++ ++ mulxq %r11,%rcx,%r11 ++ adcq %rcx,%r10 ++ adcq $0,%r11 ++ ++ xorq %rdx,%rdx ++ addq %r8,%r12 ++ movq L$poly+8(%rip),%rsi ++ adcq %r9,%r13 ++ movq %r12,%r8 ++ adcq %r10,%r14 ++ adcq %r11,%r15 ++ movq %r13,%r9 ++ adcq $0,%rdx ++ ++ subq $-1,%r12 ++ movq %r14,%r10 ++ sbbq %rsi,%r13 ++ sbbq $0,%r14 ++ movq %r15,%r11 ++ sbbq %rbp,%r15 ++ sbbq $0,%rdx ++ ++ cmovcq %r8,%r12 ++ cmovcq %r9,%r13 ++ movq %r12,0(%rdi) ++ cmovcq %r10,%r14 ++ movq %r13,8(%rdi) ++ cmovcq %r11,%r15 ++ movq %r14,16(%rdi) ++ movq %r15,24(%rdi) ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.globl _ecp_nistz256_select_w5 ++.private_extern _ecp_nistz256_select_w5 ++ ++.p2align 5 ++_ecp_nistz256_select_w5: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%rax ++ movq 8(%rax),%rax ++ testl $32,%eax ++ jnz L$avx2_select_w5 ++ movdqa L$One(%rip),%xmm0 ++ movd %edx,%xmm1 ++ ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ pxor %xmm7,%xmm7 ++ ++ movdqa %xmm0,%xmm8 ++ pshufd $0,%xmm1,%xmm1 ++ ++ movq $16,%rax ++L$select_loop_sse_w5: ++ ++ movdqa %xmm8,%xmm15 ++ paddd %xmm0,%xmm8 ++ pcmpeqd %xmm1,%xmm15 ++ ++ movdqa 0(%rsi),%xmm9 ++ movdqa 16(%rsi),%xmm10 ++ movdqa 32(%rsi),%xmm11 ++ movdqa 48(%rsi),%xmm12 ++ movdqa 64(%rsi),%xmm13 ++ movdqa 80(%rsi),%xmm14 ++ leaq 96(%rsi),%rsi ++ ++ pand %xmm15,%xmm9 ++ pand %xmm15,%xmm10 ++ por %xmm9,%xmm2 ++ pand %xmm15,%xmm11 ++ por %xmm10,%xmm3 ++ pand %xmm15,%xmm12 ++ por %xmm11,%xmm4 ++ pand %xmm15,%xmm13 ++ por %xmm12,%xmm5 ++ pand %xmm15,%xmm14 ++ por %xmm13,%xmm6 ++ por %xmm14,%xmm7 ++ ++ decq %rax ++ jnz L$select_loop_sse_w5 ++ ++ movdqu %xmm2,0(%rdi) ++ movdqu %xmm3,16(%rdi) ++ movdqu %xmm4,32(%rdi) ++ movdqu %xmm5,48(%rdi) ++ movdqu %xmm6,64(%rdi) ++ movdqu %xmm7,80(%rdi) ++ .byte 0xf3,0xc3 ++ ++L$SEH_end_ecp_nistz256_select_w5: ++ ++ ++ ++ ++.globl _ecp_nistz256_select_w7 ++.private_extern _ecp_nistz256_select_w7 ++ ++.p2align 5 ++_ecp_nistz256_select_w7: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%rax ++ movq 8(%rax),%rax ++ testl $32,%eax ++ jnz L$avx2_select_w7 ++ movdqa L$One(%rip),%xmm8 ++ movd %edx,%xmm1 ++ ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ ++ movdqa %xmm8,%xmm0 ++ pshufd $0,%xmm1,%xmm1 ++ movq $64,%rax ++ ++L$select_loop_sse_w7: ++ movdqa %xmm8,%xmm15 ++ paddd %xmm0,%xmm8 ++ movdqa 0(%rsi),%xmm9 ++ movdqa 16(%rsi),%xmm10 ++ pcmpeqd %xmm1,%xmm15 ++ movdqa 32(%rsi),%xmm11 ++ movdqa 48(%rsi),%xmm12 ++ leaq 64(%rsi),%rsi ++ ++ pand %xmm15,%xmm9 ++ pand %xmm15,%xmm10 ++ por %xmm9,%xmm2 ++ pand %xmm15,%xmm11 ++ por %xmm10,%xmm3 ++ pand %xmm15,%xmm12 ++ por %xmm11,%xmm4 ++ prefetcht0 255(%rsi) ++ por %xmm12,%xmm5 ++ ++ decq %rax ++ jnz L$select_loop_sse_w7 ++ ++ movdqu %xmm2,0(%rdi) ++ movdqu %xmm3,16(%rdi) ++ movdqu %xmm4,32(%rdi) ++ movdqu %xmm5,48(%rdi) ++ .byte 0xf3,0xc3 ++ ++L$SEH_end_ecp_nistz256_select_w7: ++ ++ ++ ++ ++.p2align 5 ++ecp_nistz256_avx2_select_w5: ++ ++L$avx2_select_w5: ++ vzeroupper ++ vmovdqa L$Two(%rip),%ymm0 ++ ++ vpxor %ymm2,%ymm2,%ymm2 ++ vpxor %ymm3,%ymm3,%ymm3 ++ vpxor %ymm4,%ymm4,%ymm4 ++ ++ vmovdqa L$One(%rip),%ymm5 ++ vmovdqa L$Two(%rip),%ymm10 ++ ++ vmovd %edx,%xmm1 ++ vpermd %ymm1,%ymm2,%ymm1 ++ ++ movq $8,%rax ++L$select_loop_avx2_w5: ++ ++ vmovdqa 0(%rsi),%ymm6 ++ vmovdqa 32(%rsi),%ymm7 ++ vmovdqa 64(%rsi),%ymm8 ++ ++ vmovdqa 96(%rsi),%ymm11 ++ vmovdqa 128(%rsi),%ymm12 ++ vmovdqa 160(%rsi),%ymm13 ++ ++ vpcmpeqd %ymm1,%ymm5,%ymm9 ++ vpcmpeqd %ymm1,%ymm10,%ymm14 ++ ++ vpaddd %ymm0,%ymm5,%ymm5 ++ vpaddd %ymm0,%ymm10,%ymm10 ++ leaq 192(%rsi),%rsi ++ ++ vpand %ymm9,%ymm6,%ymm6 ++ vpand %ymm9,%ymm7,%ymm7 ++ vpand %ymm9,%ymm8,%ymm8 ++ vpand %ymm14,%ymm11,%ymm11 ++ vpand %ymm14,%ymm12,%ymm12 ++ vpand %ymm14,%ymm13,%ymm13 ++ ++ vpxor %ymm6,%ymm2,%ymm2 ++ vpxor %ymm7,%ymm3,%ymm3 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpxor %ymm11,%ymm2,%ymm2 ++ vpxor %ymm12,%ymm3,%ymm3 ++ vpxor %ymm13,%ymm4,%ymm4 ++ ++ decq %rax ++ jnz L$select_loop_avx2_w5 ++ ++ vmovdqu %ymm2,0(%rdi) ++ vmovdqu %ymm3,32(%rdi) ++ vmovdqu %ymm4,64(%rdi) ++ vzeroupper ++ .byte 0xf3,0xc3 ++ ++L$SEH_end_ecp_nistz256_avx2_select_w5: ++ ++ ++ ++ ++.globl _ecp_nistz256_avx2_select_w7 ++.private_extern _ecp_nistz256_avx2_select_w7 ++ ++.p2align 5 ++_ecp_nistz256_avx2_select_w7: ++ ++L$avx2_select_w7: ++ vzeroupper ++ vmovdqa L$Three(%rip),%ymm0 ++ ++ vpxor %ymm2,%ymm2,%ymm2 ++ vpxor %ymm3,%ymm3,%ymm3 ++ ++ vmovdqa L$One(%rip),%ymm4 ++ vmovdqa L$Two(%rip),%ymm8 ++ vmovdqa L$Three(%rip),%ymm12 ++ ++ vmovd %edx,%xmm1 ++ vpermd %ymm1,%ymm2,%ymm1 ++ ++ ++ movq $21,%rax ++L$select_loop_avx2_w7: ++ ++ vmovdqa 0(%rsi),%ymm5 ++ vmovdqa 32(%rsi),%ymm6 ++ ++ vmovdqa 64(%rsi),%ymm9 ++ vmovdqa 96(%rsi),%ymm10 ++ ++ vmovdqa 128(%rsi),%ymm13 ++ vmovdqa 160(%rsi),%ymm14 ++ ++ vpcmpeqd %ymm1,%ymm4,%ymm7 ++ vpcmpeqd %ymm1,%ymm8,%ymm11 ++ vpcmpeqd %ymm1,%ymm12,%ymm15 ++ ++ vpaddd %ymm0,%ymm4,%ymm4 ++ vpaddd %ymm0,%ymm8,%ymm8 ++ vpaddd %ymm0,%ymm12,%ymm12 ++ leaq 192(%rsi),%rsi ++ ++ vpand %ymm7,%ymm5,%ymm5 ++ vpand %ymm7,%ymm6,%ymm6 ++ vpand %ymm11,%ymm9,%ymm9 ++ vpand %ymm11,%ymm10,%ymm10 ++ vpand %ymm15,%ymm13,%ymm13 ++ vpand %ymm15,%ymm14,%ymm14 ++ ++ vpxor %ymm5,%ymm2,%ymm2 ++ vpxor %ymm6,%ymm3,%ymm3 ++ vpxor %ymm9,%ymm2,%ymm2 ++ vpxor %ymm10,%ymm3,%ymm3 ++ vpxor %ymm13,%ymm2,%ymm2 ++ vpxor %ymm14,%ymm3,%ymm3 ++ ++ decq %rax ++ jnz L$select_loop_avx2_w7 ++ ++ ++ vmovdqa 0(%rsi),%ymm5 ++ vmovdqa 32(%rsi),%ymm6 ++ ++ vpcmpeqd %ymm1,%ymm4,%ymm7 ++ ++ vpand %ymm7,%ymm5,%ymm5 ++ vpand %ymm7,%ymm6,%ymm6 ++ ++ vpxor %ymm5,%ymm2,%ymm2 ++ vpxor %ymm6,%ymm3,%ymm3 ++ ++ vmovdqu %ymm2,0(%rdi) ++ vmovdqu %ymm3,32(%rdi) ++ vzeroupper ++ .byte 0xf3,0xc3 ++ ++L$SEH_end_ecp_nistz256_avx2_select_w7: ++ ++ ++.p2align 5 ++__ecp_nistz256_add_toq: ++ ++ xorq %r11,%r11 ++ addq 0(%rbx),%r12 ++ adcq 8(%rbx),%r13 ++ movq %r12,%rax ++ adcq 16(%rbx),%r8 ++ adcq 24(%rbx),%r9 ++ movq %r13,%rbp ++ adcq $0,%r11 ++ ++ subq $-1,%r12 ++ movq %r8,%rcx ++ sbbq %r14,%r13 ++ sbbq $0,%r8 ++ movq %r9,%r10 ++ sbbq %r15,%r9 ++ sbbq $0,%r11 ++ ++ cmovcq %rax,%r12 ++ cmovcq %rbp,%r13 ++ movq %r12,0(%rdi) ++ cmovcq %rcx,%r8 ++ movq %r13,8(%rdi) ++ cmovcq %r10,%r9 ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++__ecp_nistz256_sub_fromq: ++ ++ subq 0(%rbx),%r12 ++ sbbq 8(%rbx),%r13 ++ movq %r12,%rax ++ sbbq 16(%rbx),%r8 ++ sbbq 24(%rbx),%r9 ++ movq %r13,%rbp ++ sbbq %r11,%r11 ++ ++ addq $-1,%r12 ++ movq %r8,%rcx ++ adcq %r14,%r13 ++ adcq $0,%r8 ++ movq %r9,%r10 ++ adcq %r15,%r9 ++ testq %r11,%r11 ++ ++ cmovzq %rax,%r12 ++ cmovzq %rbp,%r13 ++ movq %r12,0(%rdi) ++ cmovzq %rcx,%r8 ++ movq %r13,8(%rdi) ++ cmovzq %r10,%r9 ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++__ecp_nistz256_subq: ++ ++ subq %r12,%rax ++ sbbq %r13,%rbp ++ movq %rax,%r12 ++ sbbq %r8,%rcx ++ sbbq %r9,%r10 ++ movq %rbp,%r13 ++ sbbq %r11,%r11 ++ ++ addq $-1,%rax ++ movq %rcx,%r8 ++ adcq %r14,%rbp ++ adcq $0,%rcx ++ movq %r10,%r9 ++ adcq %r15,%r10 ++ testq %r11,%r11 ++ ++ cmovnzq %rax,%r12 ++ cmovnzq %rbp,%r13 ++ cmovnzq %rcx,%r8 ++ cmovnzq %r10,%r9 ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++__ecp_nistz256_mul_by_2q: ++ ++ xorq %r11,%r11 ++ addq %r12,%r12 ++ adcq %r13,%r13 ++ movq %r12,%rax ++ adcq %r8,%r8 ++ adcq %r9,%r9 ++ movq %r13,%rbp ++ adcq $0,%r11 ++ ++ subq $-1,%r12 ++ movq %r8,%rcx ++ sbbq %r14,%r13 ++ sbbq $0,%r8 ++ movq %r9,%r10 ++ sbbq %r15,%r9 ++ sbbq $0,%r11 ++ ++ cmovcq %rax,%r12 ++ cmovcq %rbp,%r13 ++ movq %r12,0(%rdi) ++ cmovcq %rcx,%r8 ++ movq %r13,8(%rdi) ++ cmovcq %r10,%r9 ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ ++ .byte 0xf3,0xc3 ++ ++ ++.globl _ecp_nistz256_point_double ++.private_extern _ecp_nistz256_point_double ++ ++.p2align 5 ++_ecp_nistz256_point_double: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%rcx ++ movq 8(%rcx),%rcx ++ andl $0x80100,%ecx ++ cmpl $0x80100,%ecx ++ je L$point_doublex ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ subq $160+8,%rsp ++ ++L$point_doubleq_body: ++ ++L$point_double_shortcutq: ++ movdqu 0(%rsi),%xmm0 ++ movq %rsi,%rbx ++ movdqu 16(%rsi),%xmm1 ++ movq 32+0(%rsi),%r12 ++ movq 32+8(%rsi),%r13 ++ movq 32+16(%rsi),%r8 ++ movq 32+24(%rsi),%r9 ++ movq L$poly+8(%rip),%r14 ++ movq L$poly+24(%rip),%r15 ++ movdqa %xmm0,96(%rsp) ++ movdqa %xmm1,96+16(%rsp) ++ leaq 32(%rdi),%r10 ++ leaq 64(%rdi),%r11 ++.byte 102,72,15,110,199 ++.byte 102,73,15,110,202 ++.byte 102,73,15,110,211 ++ ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_mul_by_2q ++ ++ movq 64+0(%rsi),%rax ++ movq 64+8(%rsi),%r14 ++ movq 64+16(%rsi),%r15 ++ movq 64+24(%rsi),%r8 ++ leaq 64-0(%rsi),%rsi ++ leaq 64(%rsp),%rdi ++ call __ecp_nistz256_sqr_montq ++ ++ movq 0+0(%rsp),%rax ++ movq 8+0(%rsp),%r14 ++ leaq 0+0(%rsp),%rsi ++ movq 16+0(%rsp),%r15 ++ movq 24+0(%rsp),%r8 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_sqr_montq ++ ++ movq 32(%rbx),%rax ++ movq 64+0(%rbx),%r9 ++ movq 64+8(%rbx),%r10 ++ movq 64+16(%rbx),%r11 ++ movq 64+24(%rbx),%r12 ++ leaq 64-0(%rbx),%rsi ++ leaq 32(%rbx),%rbx ++.byte 102,72,15,126,215 ++ call __ecp_nistz256_mul_montq ++ call __ecp_nistz256_mul_by_2q ++ ++ movq 96+0(%rsp),%r12 ++ movq 96+8(%rsp),%r13 ++ leaq 64(%rsp),%rbx ++ movq 96+16(%rsp),%r8 ++ movq 96+24(%rsp),%r9 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_add_toq ++ ++ movq 96+0(%rsp),%r12 ++ movq 96+8(%rsp),%r13 ++ leaq 64(%rsp),%rbx ++ movq 96+16(%rsp),%r8 ++ movq 96+24(%rsp),%r9 ++ leaq 64(%rsp),%rdi ++ call __ecp_nistz256_sub_fromq ++ ++ movq 0+0(%rsp),%rax ++ movq 8+0(%rsp),%r14 ++ leaq 0+0(%rsp),%rsi ++ movq 16+0(%rsp),%r15 ++ movq 24+0(%rsp),%r8 ++.byte 102,72,15,126,207 ++ call __ecp_nistz256_sqr_montq ++ xorq %r9,%r9 ++ movq %r12,%rax ++ addq $-1,%r12 ++ movq %r13,%r10 ++ adcq %rsi,%r13 ++ movq %r14,%rcx ++ adcq $0,%r14 ++ movq %r15,%r8 ++ adcq %rbp,%r15 ++ adcq $0,%r9 ++ xorq %rsi,%rsi ++ testq $1,%rax ++ ++ cmovzq %rax,%r12 ++ cmovzq %r10,%r13 ++ cmovzq %rcx,%r14 ++ cmovzq %r8,%r15 ++ cmovzq %rsi,%r9 ++ ++ movq %r13,%rax ++ shrq $1,%r12 ++ shlq $63,%rax ++ movq %r14,%r10 ++ shrq $1,%r13 ++ orq %rax,%r12 ++ shlq $63,%r10 ++ movq %r15,%rcx ++ shrq $1,%r14 ++ orq %r10,%r13 ++ shlq $63,%rcx ++ movq %r12,0(%rdi) ++ shrq $1,%r15 ++ movq %r13,8(%rdi) ++ shlq $63,%r9 ++ orq %rcx,%r14 ++ orq %r9,%r15 ++ movq %r14,16(%rdi) ++ movq %r15,24(%rdi) ++ movq 64(%rsp),%rax ++ leaq 64(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq 0+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ leaq 128(%rsp),%rdi ++ call __ecp_nistz256_mul_by_2q ++ ++ leaq 32(%rsp),%rbx ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_add_toq ++ ++ movq 96(%rsp),%rax ++ leaq 96(%rsp),%rbx ++ movq 0+0(%rsp),%r9 ++ movq 8+0(%rsp),%r10 ++ leaq 0+0(%rsp),%rsi ++ movq 16+0(%rsp),%r11 ++ movq 24+0(%rsp),%r12 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ leaq 128(%rsp),%rdi ++ call __ecp_nistz256_mul_by_2q ++ ++ movq 0+32(%rsp),%rax ++ movq 8+32(%rsp),%r14 ++ leaq 0+32(%rsp),%rsi ++ movq 16+32(%rsp),%r15 ++ movq 24+32(%rsp),%r8 ++.byte 102,72,15,126,199 ++ call __ecp_nistz256_sqr_montq ++ ++ leaq 128(%rsp),%rbx ++ movq %r14,%r8 ++ movq %r15,%r9 ++ movq %rsi,%r14 ++ movq %rbp,%r15 ++ call __ecp_nistz256_sub_fromq ++ ++ movq 0+0(%rsp),%rax ++ movq 0+8(%rsp),%rbp ++ movq 0+16(%rsp),%rcx ++ movq 0+24(%rsp),%r10 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_subq ++ ++ movq 32(%rsp),%rax ++ leaq 32(%rsp),%rbx ++ movq %r12,%r14 ++ xorl %ecx,%ecx ++ movq %r12,0+0(%rsp) ++ movq %r13,%r10 ++ movq %r13,0+8(%rsp) ++ cmovzq %r8,%r11 ++ movq %r8,0+16(%rsp) ++ leaq 0-0(%rsp),%rsi ++ cmovzq %r9,%r12 ++ movq %r9,0+24(%rsp) ++ movq %r14,%r9 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++.byte 102,72,15,126,203 ++.byte 102,72,15,126,207 ++ call __ecp_nistz256_sub_fromq ++ ++ leaq 160+56(%rsp),%rsi ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbx ++ ++ movq -8(%rsi),%rbp ++ ++ leaq (%rsi),%rsp ++ ++L$point_doubleq_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++.globl _ecp_nistz256_point_add ++.private_extern _ecp_nistz256_point_add ++ ++.p2align 5 ++_ecp_nistz256_point_add: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%rcx ++ movq 8(%rcx),%rcx ++ andl $0x80100,%ecx ++ cmpl $0x80100,%ecx ++ je L$point_addx ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ subq $576+8,%rsp ++ ++L$point_addq_body: ++ ++ movdqu 0(%rsi),%xmm0 ++ movdqu 16(%rsi),%xmm1 ++ movdqu 32(%rsi),%xmm2 ++ movdqu 48(%rsi),%xmm3 ++ movdqu 64(%rsi),%xmm4 ++ movdqu 80(%rsi),%xmm5 ++ movq %rsi,%rbx ++ movq %rdx,%rsi ++ movdqa %xmm0,384(%rsp) ++ movdqa %xmm1,384+16(%rsp) ++ movdqa %xmm2,416(%rsp) ++ movdqa %xmm3,416+16(%rsp) ++ movdqa %xmm4,448(%rsp) ++ movdqa %xmm5,448+16(%rsp) ++ por %xmm4,%xmm5 ++ ++ movdqu 0(%rsi),%xmm0 ++ pshufd $0xb1,%xmm5,%xmm3 ++ movdqu 16(%rsi),%xmm1 ++ movdqu 32(%rsi),%xmm2 ++ por %xmm3,%xmm5 ++ movdqu 48(%rsi),%xmm3 ++ movq 64+0(%rsi),%rax ++ movq 64+8(%rsi),%r14 ++ movq 64+16(%rsi),%r15 ++ movq 64+24(%rsi),%r8 ++ movdqa %xmm0,480(%rsp) ++ pshufd $0x1e,%xmm5,%xmm4 ++ movdqa %xmm1,480+16(%rsp) ++ movdqu 64(%rsi),%xmm0 ++ movdqu 80(%rsi),%xmm1 ++ movdqa %xmm2,512(%rsp) ++ movdqa %xmm3,512+16(%rsp) ++ por %xmm4,%xmm5 ++ pxor %xmm4,%xmm4 ++ por %xmm0,%xmm1 ++.byte 102,72,15,110,199 ++ ++ leaq 64-0(%rsi),%rsi ++ movq %rax,544+0(%rsp) ++ movq %r14,544+8(%rsp) ++ movq %r15,544+16(%rsp) ++ movq %r8,544+24(%rsp) ++ leaq 96(%rsp),%rdi ++ call __ecp_nistz256_sqr_montq ++ ++ pcmpeqd %xmm4,%xmm5 ++ pshufd $0xb1,%xmm1,%xmm4 ++ por %xmm1,%xmm4 ++ pshufd $0,%xmm5,%xmm5 ++ pshufd $0x1e,%xmm4,%xmm3 ++ por %xmm3,%xmm4 ++ pxor %xmm3,%xmm3 ++ pcmpeqd %xmm3,%xmm4 ++ pshufd $0,%xmm4,%xmm4 ++ movq 64+0(%rbx),%rax ++ movq 64+8(%rbx),%r14 ++ movq 64+16(%rbx),%r15 ++ movq 64+24(%rbx),%r8 ++.byte 102,72,15,110,203 ++ ++ leaq 64-0(%rbx),%rsi ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_sqr_montq ++ ++ movq 544(%rsp),%rax ++ leaq 544(%rsp),%rbx ++ movq 0+96(%rsp),%r9 ++ movq 8+96(%rsp),%r10 ++ leaq 0+96(%rsp),%rsi ++ movq 16+96(%rsp),%r11 ++ movq 24+96(%rsp),%r12 ++ leaq 224(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 448(%rsp),%rax ++ leaq 448(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq 0+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 256(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 416(%rsp),%rax ++ leaq 416(%rsp),%rbx ++ movq 0+224(%rsp),%r9 ++ movq 8+224(%rsp),%r10 ++ leaq 0+224(%rsp),%rsi ++ movq 16+224(%rsp),%r11 ++ movq 24+224(%rsp),%r12 ++ leaq 224(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 512(%rsp),%rax ++ leaq 512(%rsp),%rbx ++ movq 0+256(%rsp),%r9 ++ movq 8+256(%rsp),%r10 ++ leaq 0+256(%rsp),%rsi ++ movq 16+256(%rsp),%r11 ++ movq 24+256(%rsp),%r12 ++ leaq 256(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ leaq 224(%rsp),%rbx ++ leaq 64(%rsp),%rdi ++ call __ecp_nistz256_sub_fromq ++ ++ orq %r13,%r12 ++ movdqa %xmm4,%xmm2 ++ orq %r8,%r12 ++ orq %r9,%r12 ++ por %xmm5,%xmm2 ++.byte 102,73,15,110,220 ++ ++ movq 384(%rsp),%rax ++ leaq 384(%rsp),%rbx ++ movq 0+96(%rsp),%r9 ++ movq 8+96(%rsp),%r10 ++ leaq 0+96(%rsp),%rsi ++ movq 16+96(%rsp),%r11 ++ movq 24+96(%rsp),%r12 ++ leaq 160(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 480(%rsp),%rax ++ leaq 480(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq 0+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 192(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ leaq 160(%rsp),%rbx ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_sub_fromq ++ ++ orq %r13,%r12 ++ orq %r8,%r12 ++ orq %r9,%r12 ++ ++.byte 102,73,15,126,208 ++.byte 102,73,15,126,217 ++ orq %r8,%r12 ++.byte 0x3e ++ jnz L$add_proceedq ++ ++ ++ ++ testq %r9,%r9 ++ jz L$add_doubleq ++ ++ ++ ++ ++ ++ ++.byte 102,72,15,126,199 ++ pxor %xmm0,%xmm0 ++ movdqu %xmm0,0(%rdi) ++ movdqu %xmm0,16(%rdi) ++ movdqu %xmm0,32(%rdi) ++ movdqu %xmm0,48(%rdi) ++ movdqu %xmm0,64(%rdi) ++ movdqu %xmm0,80(%rdi) ++ jmp L$add_doneq ++ ++.p2align 5 ++L$add_doubleq: ++.byte 102,72,15,126,206 ++.byte 102,72,15,126,199 ++ addq $416,%rsp ++ ++ jmp L$point_double_shortcutq ++ ++ ++.p2align 5 ++L$add_proceedq: ++ movq 0+64(%rsp),%rax ++ movq 8+64(%rsp),%r14 ++ leaq 0+64(%rsp),%rsi ++ movq 16+64(%rsp),%r15 ++ movq 24+64(%rsp),%r8 ++ leaq 96(%rsp),%rdi ++ call __ecp_nistz256_sqr_montq ++ ++ movq 448(%rsp),%rax ++ leaq 448(%rsp),%rbx ++ movq 0+0(%rsp),%r9 ++ movq 8+0(%rsp),%r10 ++ leaq 0+0(%rsp),%rsi ++ movq 16+0(%rsp),%r11 ++ movq 24+0(%rsp),%r12 ++ leaq 352(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 0+0(%rsp),%rax ++ movq 8+0(%rsp),%r14 ++ leaq 0+0(%rsp),%rsi ++ movq 16+0(%rsp),%r15 ++ movq 24+0(%rsp),%r8 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_sqr_montq ++ ++ movq 544(%rsp),%rax ++ leaq 544(%rsp),%rbx ++ movq 0+352(%rsp),%r9 ++ movq 8+352(%rsp),%r10 ++ leaq 0+352(%rsp),%rsi ++ movq 16+352(%rsp),%r11 ++ movq 24+352(%rsp),%r12 ++ leaq 352(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 0(%rsp),%rax ++ leaq 0(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq 0+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 128(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 160(%rsp),%rax ++ leaq 160(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq 0+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 192(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ ++ ++ ++ xorq %r11,%r11 ++ addq %r12,%r12 ++ leaq 96(%rsp),%rsi ++ adcq %r13,%r13 ++ movq %r12,%rax ++ adcq %r8,%r8 ++ adcq %r9,%r9 ++ movq %r13,%rbp ++ adcq $0,%r11 ++ ++ subq $-1,%r12 ++ movq %r8,%rcx ++ sbbq %r14,%r13 ++ sbbq $0,%r8 ++ movq %r9,%r10 ++ sbbq %r15,%r9 ++ sbbq $0,%r11 ++ ++ cmovcq %rax,%r12 ++ movq 0(%rsi),%rax ++ cmovcq %rbp,%r13 ++ movq 8(%rsi),%rbp ++ cmovcq %rcx,%r8 ++ movq 16(%rsi),%rcx ++ cmovcq %r10,%r9 ++ movq 24(%rsi),%r10 ++ ++ call __ecp_nistz256_subq ++ ++ leaq 128(%rsp),%rbx ++ leaq 288(%rsp),%rdi ++ call __ecp_nistz256_sub_fromq ++ ++ movq 192+0(%rsp),%rax ++ movq 192+8(%rsp),%rbp ++ movq 192+16(%rsp),%rcx ++ movq 192+24(%rsp),%r10 ++ leaq 320(%rsp),%rdi ++ ++ call __ecp_nistz256_subq ++ ++ movq %r12,0(%rdi) ++ movq %r13,8(%rdi) ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ movq 128(%rsp),%rax ++ leaq 128(%rsp),%rbx ++ movq 0+224(%rsp),%r9 ++ movq 8+224(%rsp),%r10 ++ leaq 0+224(%rsp),%rsi ++ movq 16+224(%rsp),%r11 ++ movq 24+224(%rsp),%r12 ++ leaq 256(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 320(%rsp),%rax ++ leaq 320(%rsp),%rbx ++ movq 0+64(%rsp),%r9 ++ movq 8+64(%rsp),%r10 ++ leaq 0+64(%rsp),%rsi ++ movq 16+64(%rsp),%r11 ++ movq 24+64(%rsp),%r12 ++ leaq 320(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ leaq 256(%rsp),%rbx ++ leaq 320(%rsp),%rdi ++ call __ecp_nistz256_sub_fromq ++ ++.byte 102,72,15,126,199 ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 352(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 352+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand 544(%rsp),%xmm2 ++ pand 544+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 448(%rsp),%xmm2 ++ pand 448+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,64(%rdi) ++ movdqu %xmm3,80(%rdi) ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 288(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 288+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand 480(%rsp),%xmm2 ++ pand 480+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 384(%rsp),%xmm2 ++ pand 384+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,0(%rdi) ++ movdqu %xmm3,16(%rdi) ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 320(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 320+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand 512(%rsp),%xmm2 ++ pand 512+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 416(%rsp),%xmm2 ++ pand 416+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,32(%rdi) ++ movdqu %xmm3,48(%rdi) ++ ++L$add_doneq: ++ leaq 576+56(%rsp),%rsi ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbx ++ ++ movq -8(%rsi),%rbp ++ ++ leaq (%rsi),%rsp ++ ++L$point_addq_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++.globl _ecp_nistz256_point_add_affine ++.private_extern _ecp_nistz256_point_add_affine ++ ++.p2align 5 ++_ecp_nistz256_point_add_affine: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%rcx ++ movq 8(%rcx),%rcx ++ andl $0x80100,%ecx ++ cmpl $0x80100,%ecx ++ je L$point_add_affinex ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ subq $480+8,%rsp ++ ++L$add_affineq_body: ++ ++ movdqu 0(%rsi),%xmm0 ++ movq %rdx,%rbx ++ movdqu 16(%rsi),%xmm1 ++ movdqu 32(%rsi),%xmm2 ++ movdqu 48(%rsi),%xmm3 ++ movdqu 64(%rsi),%xmm4 ++ movdqu 80(%rsi),%xmm5 ++ movq 64+0(%rsi),%rax ++ movq 64+8(%rsi),%r14 ++ movq 64+16(%rsi),%r15 ++ movq 64+24(%rsi),%r8 ++ movdqa %xmm0,320(%rsp) ++ movdqa %xmm1,320+16(%rsp) ++ movdqa %xmm2,352(%rsp) ++ movdqa %xmm3,352+16(%rsp) ++ movdqa %xmm4,384(%rsp) ++ movdqa %xmm5,384+16(%rsp) ++ por %xmm4,%xmm5 ++ ++ movdqu 0(%rbx),%xmm0 ++ pshufd $0xb1,%xmm5,%xmm3 ++ movdqu 16(%rbx),%xmm1 ++ movdqu 32(%rbx),%xmm2 ++ por %xmm3,%xmm5 ++ movdqu 48(%rbx),%xmm3 ++ movdqa %xmm0,416(%rsp) ++ pshufd $0x1e,%xmm5,%xmm4 ++ movdqa %xmm1,416+16(%rsp) ++ por %xmm0,%xmm1 ++.byte 102,72,15,110,199 ++ movdqa %xmm2,448(%rsp) ++ movdqa %xmm3,448+16(%rsp) ++ por %xmm2,%xmm3 ++ por %xmm4,%xmm5 ++ pxor %xmm4,%xmm4 ++ por %xmm1,%xmm3 ++ ++ leaq 64-0(%rsi),%rsi ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_sqr_montq ++ ++ pcmpeqd %xmm4,%xmm5 ++ pshufd $0xb1,%xmm3,%xmm4 ++ movq 0(%rbx),%rax ++ ++ movq %r12,%r9 ++ por %xmm3,%xmm4 ++ pshufd $0,%xmm5,%xmm5 ++ pshufd $0x1e,%xmm4,%xmm3 ++ movq %r13,%r10 ++ por %xmm3,%xmm4 ++ pxor %xmm3,%xmm3 ++ movq %r14,%r11 ++ pcmpeqd %xmm3,%xmm4 ++ pshufd $0,%xmm4,%xmm4 ++ ++ leaq 32-0(%rsp),%rsi ++ movq %r15,%r12 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ leaq 320(%rsp),%rbx ++ leaq 64(%rsp),%rdi ++ call __ecp_nistz256_sub_fromq ++ ++ movq 384(%rsp),%rax ++ leaq 384(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq 0+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 384(%rsp),%rax ++ leaq 384(%rsp),%rbx ++ movq 0+64(%rsp),%r9 ++ movq 8+64(%rsp),%r10 ++ leaq 0+64(%rsp),%rsi ++ movq 16+64(%rsp),%r11 ++ movq 24+64(%rsp),%r12 ++ leaq 288(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 448(%rsp),%rax ++ leaq 448(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq 0+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ leaq 352(%rsp),%rbx ++ leaq 96(%rsp),%rdi ++ call __ecp_nistz256_sub_fromq ++ ++ movq 0+64(%rsp),%rax ++ movq 8+64(%rsp),%r14 ++ leaq 0+64(%rsp),%rsi ++ movq 16+64(%rsp),%r15 ++ movq 24+64(%rsp),%r8 ++ leaq 128(%rsp),%rdi ++ call __ecp_nistz256_sqr_montq ++ ++ movq 0+96(%rsp),%rax ++ movq 8+96(%rsp),%r14 ++ leaq 0+96(%rsp),%rsi ++ movq 16+96(%rsp),%r15 ++ movq 24+96(%rsp),%r8 ++ leaq 192(%rsp),%rdi ++ call __ecp_nistz256_sqr_montq ++ ++ movq 128(%rsp),%rax ++ leaq 128(%rsp),%rbx ++ movq 0+64(%rsp),%r9 ++ movq 8+64(%rsp),%r10 ++ leaq 0+64(%rsp),%rsi ++ movq 16+64(%rsp),%r11 ++ movq 24+64(%rsp),%r12 ++ leaq 160(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 320(%rsp),%rax ++ leaq 320(%rsp),%rbx ++ movq 0+128(%rsp),%r9 ++ movq 8+128(%rsp),%r10 ++ leaq 0+128(%rsp),%rsi ++ movq 16+128(%rsp),%r11 ++ movq 24+128(%rsp),%r12 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ ++ ++ ++ xorq %r11,%r11 ++ addq %r12,%r12 ++ leaq 192(%rsp),%rsi ++ adcq %r13,%r13 ++ movq %r12,%rax ++ adcq %r8,%r8 ++ adcq %r9,%r9 ++ movq %r13,%rbp ++ adcq $0,%r11 ++ ++ subq $-1,%r12 ++ movq %r8,%rcx ++ sbbq %r14,%r13 ++ sbbq $0,%r8 ++ movq %r9,%r10 ++ sbbq %r15,%r9 ++ sbbq $0,%r11 ++ ++ cmovcq %rax,%r12 ++ movq 0(%rsi),%rax ++ cmovcq %rbp,%r13 ++ movq 8(%rsi),%rbp ++ cmovcq %rcx,%r8 ++ movq 16(%rsi),%rcx ++ cmovcq %r10,%r9 ++ movq 24(%rsi),%r10 ++ ++ call __ecp_nistz256_subq ++ ++ leaq 160(%rsp),%rbx ++ leaq 224(%rsp),%rdi ++ call __ecp_nistz256_sub_fromq ++ ++ movq 0+0(%rsp),%rax ++ movq 0+8(%rsp),%rbp ++ movq 0+16(%rsp),%rcx ++ movq 0+24(%rsp),%r10 ++ leaq 64(%rsp),%rdi ++ ++ call __ecp_nistz256_subq ++ ++ movq %r12,0(%rdi) ++ movq %r13,8(%rdi) ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ movq 352(%rsp),%rax ++ leaq 352(%rsp),%rbx ++ movq 0+160(%rsp),%r9 ++ movq 8+160(%rsp),%r10 ++ leaq 0+160(%rsp),%rsi ++ movq 16+160(%rsp),%r11 ++ movq 24+160(%rsp),%r12 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ movq 96(%rsp),%rax ++ leaq 96(%rsp),%rbx ++ movq 0+64(%rsp),%r9 ++ movq 8+64(%rsp),%r10 ++ leaq 0+64(%rsp),%rsi ++ movq 16+64(%rsp),%r11 ++ movq 24+64(%rsp),%r12 ++ leaq 64(%rsp),%rdi ++ call __ecp_nistz256_mul_montq ++ ++ leaq 32(%rsp),%rbx ++ leaq 256(%rsp),%rdi ++ call __ecp_nistz256_sub_fromq ++ ++.byte 102,72,15,126,199 ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 288(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 288+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand L$ONE_mont(%rip),%xmm2 ++ pand L$ONE_mont+16(%rip),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 384(%rsp),%xmm2 ++ pand 384+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,64(%rdi) ++ movdqu %xmm3,80(%rdi) ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 224(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 224+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand 416(%rsp),%xmm2 ++ pand 416+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 320(%rsp),%xmm2 ++ pand 320+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,0(%rdi) ++ movdqu %xmm3,16(%rdi) ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 256(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 256+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand 448(%rsp),%xmm2 ++ pand 448+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 352(%rsp),%xmm2 ++ pand 352+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,32(%rdi) ++ movdqu %xmm3,48(%rdi) ++ ++ leaq 480+56(%rsp),%rsi ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbx ++ ++ movq -8(%rsi),%rbp ++ ++ leaq (%rsi),%rsp ++ ++L$add_affineq_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++__ecp_nistz256_add_tox: ++ ++ xorq %r11,%r11 ++ adcq 0(%rbx),%r12 ++ adcq 8(%rbx),%r13 ++ movq %r12,%rax ++ adcq 16(%rbx),%r8 ++ adcq 24(%rbx),%r9 ++ movq %r13,%rbp ++ adcq $0,%r11 ++ ++ xorq %r10,%r10 ++ sbbq $-1,%r12 ++ movq %r8,%rcx ++ sbbq %r14,%r13 ++ sbbq $0,%r8 ++ movq %r9,%r10 ++ sbbq %r15,%r9 ++ sbbq $0,%r11 ++ ++ cmovcq %rax,%r12 ++ cmovcq %rbp,%r13 ++ movq %r12,0(%rdi) ++ cmovcq %rcx,%r8 ++ movq %r13,8(%rdi) ++ cmovcq %r10,%r9 ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++__ecp_nistz256_sub_fromx: ++ ++ xorq %r11,%r11 ++ sbbq 0(%rbx),%r12 ++ sbbq 8(%rbx),%r13 ++ movq %r12,%rax ++ sbbq 16(%rbx),%r8 ++ sbbq 24(%rbx),%r9 ++ movq %r13,%rbp ++ sbbq $0,%r11 ++ ++ xorq %r10,%r10 ++ adcq $-1,%r12 ++ movq %r8,%rcx ++ adcq %r14,%r13 ++ adcq $0,%r8 ++ movq %r9,%r10 ++ adcq %r15,%r9 ++ ++ btq $0,%r11 ++ cmovncq %rax,%r12 ++ cmovncq %rbp,%r13 ++ movq %r12,0(%rdi) ++ cmovncq %rcx,%r8 ++ movq %r13,8(%rdi) ++ cmovncq %r10,%r9 ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++__ecp_nistz256_subx: ++ ++ xorq %r11,%r11 ++ sbbq %r12,%rax ++ sbbq %r13,%rbp ++ movq %rax,%r12 ++ sbbq %r8,%rcx ++ sbbq %r9,%r10 ++ movq %rbp,%r13 ++ sbbq $0,%r11 ++ ++ xorq %r9,%r9 ++ adcq $-1,%rax ++ movq %rcx,%r8 ++ adcq %r14,%rbp ++ adcq $0,%rcx ++ movq %r10,%r9 ++ adcq %r15,%r10 ++ ++ btq $0,%r11 ++ cmovcq %rax,%r12 ++ cmovcq %rbp,%r13 ++ cmovcq %rcx,%r8 ++ cmovcq %r10,%r9 ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++__ecp_nistz256_mul_by_2x: ++ ++ xorq %r11,%r11 ++ adcq %r12,%r12 ++ adcq %r13,%r13 ++ movq %r12,%rax ++ adcq %r8,%r8 ++ adcq %r9,%r9 ++ movq %r13,%rbp ++ adcq $0,%r11 ++ ++ xorq %r10,%r10 ++ sbbq $-1,%r12 ++ movq %r8,%rcx ++ sbbq %r14,%r13 ++ sbbq $0,%r8 ++ movq %r9,%r10 ++ sbbq %r15,%r9 ++ sbbq $0,%r11 ++ ++ cmovcq %rax,%r12 ++ cmovcq %rbp,%r13 ++ movq %r12,0(%rdi) ++ cmovcq %rcx,%r8 ++ movq %r13,8(%rdi) ++ cmovcq %r10,%r9 ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++ecp_nistz256_point_doublex: ++ ++L$point_doublex: ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ subq $160+8,%rsp ++ ++L$point_doublex_body: ++ ++L$point_double_shortcutx: ++ movdqu 0(%rsi),%xmm0 ++ movq %rsi,%rbx ++ movdqu 16(%rsi),%xmm1 ++ movq 32+0(%rsi),%r12 ++ movq 32+8(%rsi),%r13 ++ movq 32+16(%rsi),%r8 ++ movq 32+24(%rsi),%r9 ++ movq L$poly+8(%rip),%r14 ++ movq L$poly+24(%rip),%r15 ++ movdqa %xmm0,96(%rsp) ++ movdqa %xmm1,96+16(%rsp) ++ leaq 32(%rdi),%r10 ++ leaq 64(%rdi),%r11 ++.byte 102,72,15,110,199 ++.byte 102,73,15,110,202 ++.byte 102,73,15,110,211 ++ ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_mul_by_2x ++ ++ movq 64+0(%rsi),%rdx ++ movq 64+8(%rsi),%r14 ++ movq 64+16(%rsi),%r15 ++ movq 64+24(%rsi),%r8 ++ leaq 64-128(%rsi),%rsi ++ leaq 64(%rsp),%rdi ++ call __ecp_nistz256_sqr_montx ++ ++ movq 0+0(%rsp),%rdx ++ movq 8+0(%rsp),%r14 ++ leaq -128+0(%rsp),%rsi ++ movq 16+0(%rsp),%r15 ++ movq 24+0(%rsp),%r8 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_sqr_montx ++ ++ movq 32(%rbx),%rdx ++ movq 64+0(%rbx),%r9 ++ movq 64+8(%rbx),%r10 ++ movq 64+16(%rbx),%r11 ++ movq 64+24(%rbx),%r12 ++ leaq 64-128(%rbx),%rsi ++ leaq 32(%rbx),%rbx ++.byte 102,72,15,126,215 ++ call __ecp_nistz256_mul_montx ++ call __ecp_nistz256_mul_by_2x ++ ++ movq 96+0(%rsp),%r12 ++ movq 96+8(%rsp),%r13 ++ leaq 64(%rsp),%rbx ++ movq 96+16(%rsp),%r8 ++ movq 96+24(%rsp),%r9 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_add_tox ++ ++ movq 96+0(%rsp),%r12 ++ movq 96+8(%rsp),%r13 ++ leaq 64(%rsp),%rbx ++ movq 96+16(%rsp),%r8 ++ movq 96+24(%rsp),%r9 ++ leaq 64(%rsp),%rdi ++ call __ecp_nistz256_sub_fromx ++ ++ movq 0+0(%rsp),%rdx ++ movq 8+0(%rsp),%r14 ++ leaq -128+0(%rsp),%rsi ++ movq 16+0(%rsp),%r15 ++ movq 24+0(%rsp),%r8 ++.byte 102,72,15,126,207 ++ call __ecp_nistz256_sqr_montx ++ xorq %r9,%r9 ++ movq %r12,%rax ++ addq $-1,%r12 ++ movq %r13,%r10 ++ adcq %rsi,%r13 ++ movq %r14,%rcx ++ adcq $0,%r14 ++ movq %r15,%r8 ++ adcq %rbp,%r15 ++ adcq $0,%r9 ++ xorq %rsi,%rsi ++ testq $1,%rax ++ ++ cmovzq %rax,%r12 ++ cmovzq %r10,%r13 ++ cmovzq %rcx,%r14 ++ cmovzq %r8,%r15 ++ cmovzq %rsi,%r9 ++ ++ movq %r13,%rax ++ shrq $1,%r12 ++ shlq $63,%rax ++ movq %r14,%r10 ++ shrq $1,%r13 ++ orq %rax,%r12 ++ shlq $63,%r10 ++ movq %r15,%rcx ++ shrq $1,%r14 ++ orq %r10,%r13 ++ shlq $63,%rcx ++ movq %r12,0(%rdi) ++ shrq $1,%r15 ++ movq %r13,8(%rdi) ++ shlq $63,%r9 ++ orq %rcx,%r14 ++ orq %r9,%r15 ++ movq %r14,16(%rdi) ++ movq %r15,24(%rdi) ++ movq 64(%rsp),%rdx ++ leaq 64(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq -128+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ leaq 128(%rsp),%rdi ++ call __ecp_nistz256_mul_by_2x ++ ++ leaq 32(%rsp),%rbx ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_add_tox ++ ++ movq 96(%rsp),%rdx ++ leaq 96(%rsp),%rbx ++ movq 0+0(%rsp),%r9 ++ movq 8+0(%rsp),%r10 ++ leaq -128+0(%rsp),%rsi ++ movq 16+0(%rsp),%r11 ++ movq 24+0(%rsp),%r12 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ leaq 128(%rsp),%rdi ++ call __ecp_nistz256_mul_by_2x ++ ++ movq 0+32(%rsp),%rdx ++ movq 8+32(%rsp),%r14 ++ leaq -128+32(%rsp),%rsi ++ movq 16+32(%rsp),%r15 ++ movq 24+32(%rsp),%r8 ++.byte 102,72,15,126,199 ++ call __ecp_nistz256_sqr_montx ++ ++ leaq 128(%rsp),%rbx ++ movq %r14,%r8 ++ movq %r15,%r9 ++ movq %rsi,%r14 ++ movq %rbp,%r15 ++ call __ecp_nistz256_sub_fromx ++ ++ movq 0+0(%rsp),%rax ++ movq 0+8(%rsp),%rbp ++ movq 0+16(%rsp),%rcx ++ movq 0+24(%rsp),%r10 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_subx ++ ++ movq 32(%rsp),%rdx ++ leaq 32(%rsp),%rbx ++ movq %r12,%r14 ++ xorl %ecx,%ecx ++ movq %r12,0+0(%rsp) ++ movq %r13,%r10 ++ movq %r13,0+8(%rsp) ++ cmovzq %r8,%r11 ++ movq %r8,0+16(%rsp) ++ leaq 0-128(%rsp),%rsi ++ cmovzq %r9,%r12 ++ movq %r9,0+24(%rsp) ++ movq %r14,%r9 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++.byte 102,72,15,126,203 ++.byte 102,72,15,126,207 ++ call __ecp_nistz256_sub_fromx ++ ++ leaq 160+56(%rsp),%rsi ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbx ++ ++ movq -8(%rsi),%rbp ++ ++ leaq (%rsi),%rsp ++ ++L$point_doublex_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++ecp_nistz256_point_addx: ++ ++L$point_addx: ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ subq $576+8,%rsp ++ ++L$point_addx_body: ++ ++ movdqu 0(%rsi),%xmm0 ++ movdqu 16(%rsi),%xmm1 ++ movdqu 32(%rsi),%xmm2 ++ movdqu 48(%rsi),%xmm3 ++ movdqu 64(%rsi),%xmm4 ++ movdqu 80(%rsi),%xmm5 ++ movq %rsi,%rbx ++ movq %rdx,%rsi ++ movdqa %xmm0,384(%rsp) ++ movdqa %xmm1,384+16(%rsp) ++ movdqa %xmm2,416(%rsp) ++ movdqa %xmm3,416+16(%rsp) ++ movdqa %xmm4,448(%rsp) ++ movdqa %xmm5,448+16(%rsp) ++ por %xmm4,%xmm5 ++ ++ movdqu 0(%rsi),%xmm0 ++ pshufd $0xb1,%xmm5,%xmm3 ++ movdqu 16(%rsi),%xmm1 ++ movdqu 32(%rsi),%xmm2 ++ por %xmm3,%xmm5 ++ movdqu 48(%rsi),%xmm3 ++ movq 64+0(%rsi),%rdx ++ movq 64+8(%rsi),%r14 ++ movq 64+16(%rsi),%r15 ++ movq 64+24(%rsi),%r8 ++ movdqa %xmm0,480(%rsp) ++ pshufd $0x1e,%xmm5,%xmm4 ++ movdqa %xmm1,480+16(%rsp) ++ movdqu 64(%rsi),%xmm0 ++ movdqu 80(%rsi),%xmm1 ++ movdqa %xmm2,512(%rsp) ++ movdqa %xmm3,512+16(%rsp) ++ por %xmm4,%xmm5 ++ pxor %xmm4,%xmm4 ++ por %xmm0,%xmm1 ++.byte 102,72,15,110,199 ++ ++ leaq 64-128(%rsi),%rsi ++ movq %rdx,544+0(%rsp) ++ movq %r14,544+8(%rsp) ++ movq %r15,544+16(%rsp) ++ movq %r8,544+24(%rsp) ++ leaq 96(%rsp),%rdi ++ call __ecp_nistz256_sqr_montx ++ ++ pcmpeqd %xmm4,%xmm5 ++ pshufd $0xb1,%xmm1,%xmm4 ++ por %xmm1,%xmm4 ++ pshufd $0,%xmm5,%xmm5 ++ pshufd $0x1e,%xmm4,%xmm3 ++ por %xmm3,%xmm4 ++ pxor %xmm3,%xmm3 ++ pcmpeqd %xmm3,%xmm4 ++ pshufd $0,%xmm4,%xmm4 ++ movq 64+0(%rbx),%rdx ++ movq 64+8(%rbx),%r14 ++ movq 64+16(%rbx),%r15 ++ movq 64+24(%rbx),%r8 ++.byte 102,72,15,110,203 ++ ++ leaq 64-128(%rbx),%rsi ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_sqr_montx ++ ++ movq 544(%rsp),%rdx ++ leaq 544(%rsp),%rbx ++ movq 0+96(%rsp),%r9 ++ movq 8+96(%rsp),%r10 ++ leaq -128+96(%rsp),%rsi ++ movq 16+96(%rsp),%r11 ++ movq 24+96(%rsp),%r12 ++ leaq 224(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 448(%rsp),%rdx ++ leaq 448(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq -128+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 256(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 416(%rsp),%rdx ++ leaq 416(%rsp),%rbx ++ movq 0+224(%rsp),%r9 ++ movq 8+224(%rsp),%r10 ++ leaq -128+224(%rsp),%rsi ++ movq 16+224(%rsp),%r11 ++ movq 24+224(%rsp),%r12 ++ leaq 224(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 512(%rsp),%rdx ++ leaq 512(%rsp),%rbx ++ movq 0+256(%rsp),%r9 ++ movq 8+256(%rsp),%r10 ++ leaq -128+256(%rsp),%rsi ++ movq 16+256(%rsp),%r11 ++ movq 24+256(%rsp),%r12 ++ leaq 256(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ leaq 224(%rsp),%rbx ++ leaq 64(%rsp),%rdi ++ call __ecp_nistz256_sub_fromx ++ ++ orq %r13,%r12 ++ movdqa %xmm4,%xmm2 ++ orq %r8,%r12 ++ orq %r9,%r12 ++ por %xmm5,%xmm2 ++.byte 102,73,15,110,220 ++ ++ movq 384(%rsp),%rdx ++ leaq 384(%rsp),%rbx ++ movq 0+96(%rsp),%r9 ++ movq 8+96(%rsp),%r10 ++ leaq -128+96(%rsp),%rsi ++ movq 16+96(%rsp),%r11 ++ movq 24+96(%rsp),%r12 ++ leaq 160(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 480(%rsp),%rdx ++ leaq 480(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq -128+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 192(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ leaq 160(%rsp),%rbx ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_sub_fromx ++ ++ orq %r13,%r12 ++ orq %r8,%r12 ++ orq %r9,%r12 ++ ++.byte 102,73,15,126,208 ++.byte 102,73,15,126,217 ++ orq %r8,%r12 ++.byte 0x3e ++ jnz L$add_proceedx ++ ++ ++ ++ testq %r9,%r9 ++ jz L$add_doublex ++ ++ ++ ++ ++ ++ ++.byte 102,72,15,126,199 ++ pxor %xmm0,%xmm0 ++ movdqu %xmm0,0(%rdi) ++ movdqu %xmm0,16(%rdi) ++ movdqu %xmm0,32(%rdi) ++ movdqu %xmm0,48(%rdi) ++ movdqu %xmm0,64(%rdi) ++ movdqu %xmm0,80(%rdi) ++ jmp L$add_donex ++ ++.p2align 5 ++L$add_doublex: ++.byte 102,72,15,126,206 ++.byte 102,72,15,126,199 ++ addq $416,%rsp ++ ++ jmp L$point_double_shortcutx ++ ++ ++.p2align 5 ++L$add_proceedx: ++ movq 0+64(%rsp),%rdx ++ movq 8+64(%rsp),%r14 ++ leaq -128+64(%rsp),%rsi ++ movq 16+64(%rsp),%r15 ++ movq 24+64(%rsp),%r8 ++ leaq 96(%rsp),%rdi ++ call __ecp_nistz256_sqr_montx ++ ++ movq 448(%rsp),%rdx ++ leaq 448(%rsp),%rbx ++ movq 0+0(%rsp),%r9 ++ movq 8+0(%rsp),%r10 ++ leaq -128+0(%rsp),%rsi ++ movq 16+0(%rsp),%r11 ++ movq 24+0(%rsp),%r12 ++ leaq 352(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 0+0(%rsp),%rdx ++ movq 8+0(%rsp),%r14 ++ leaq -128+0(%rsp),%rsi ++ movq 16+0(%rsp),%r15 ++ movq 24+0(%rsp),%r8 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_sqr_montx ++ ++ movq 544(%rsp),%rdx ++ leaq 544(%rsp),%rbx ++ movq 0+352(%rsp),%r9 ++ movq 8+352(%rsp),%r10 ++ leaq -128+352(%rsp),%rsi ++ movq 16+352(%rsp),%r11 ++ movq 24+352(%rsp),%r12 ++ leaq 352(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 0(%rsp),%rdx ++ leaq 0(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq -128+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 128(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 160(%rsp),%rdx ++ leaq 160(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq -128+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 192(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ ++ ++ ++ xorq %r11,%r11 ++ addq %r12,%r12 ++ leaq 96(%rsp),%rsi ++ adcq %r13,%r13 ++ movq %r12,%rax ++ adcq %r8,%r8 ++ adcq %r9,%r9 ++ movq %r13,%rbp ++ adcq $0,%r11 ++ ++ subq $-1,%r12 ++ movq %r8,%rcx ++ sbbq %r14,%r13 ++ sbbq $0,%r8 ++ movq %r9,%r10 ++ sbbq %r15,%r9 ++ sbbq $0,%r11 ++ ++ cmovcq %rax,%r12 ++ movq 0(%rsi),%rax ++ cmovcq %rbp,%r13 ++ movq 8(%rsi),%rbp ++ cmovcq %rcx,%r8 ++ movq 16(%rsi),%rcx ++ cmovcq %r10,%r9 ++ movq 24(%rsi),%r10 ++ ++ call __ecp_nistz256_subx ++ ++ leaq 128(%rsp),%rbx ++ leaq 288(%rsp),%rdi ++ call __ecp_nistz256_sub_fromx ++ ++ movq 192+0(%rsp),%rax ++ movq 192+8(%rsp),%rbp ++ movq 192+16(%rsp),%rcx ++ movq 192+24(%rsp),%r10 ++ leaq 320(%rsp),%rdi ++ ++ call __ecp_nistz256_subx ++ ++ movq %r12,0(%rdi) ++ movq %r13,8(%rdi) ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ movq 128(%rsp),%rdx ++ leaq 128(%rsp),%rbx ++ movq 0+224(%rsp),%r9 ++ movq 8+224(%rsp),%r10 ++ leaq -128+224(%rsp),%rsi ++ movq 16+224(%rsp),%r11 ++ movq 24+224(%rsp),%r12 ++ leaq 256(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 320(%rsp),%rdx ++ leaq 320(%rsp),%rbx ++ movq 0+64(%rsp),%r9 ++ movq 8+64(%rsp),%r10 ++ leaq -128+64(%rsp),%rsi ++ movq 16+64(%rsp),%r11 ++ movq 24+64(%rsp),%r12 ++ leaq 320(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ leaq 256(%rsp),%rbx ++ leaq 320(%rsp),%rdi ++ call __ecp_nistz256_sub_fromx ++ ++.byte 102,72,15,126,199 ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 352(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 352+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand 544(%rsp),%xmm2 ++ pand 544+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 448(%rsp),%xmm2 ++ pand 448+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,64(%rdi) ++ movdqu %xmm3,80(%rdi) ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 288(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 288+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand 480(%rsp),%xmm2 ++ pand 480+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 384(%rsp),%xmm2 ++ pand 384+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,0(%rdi) ++ movdqu %xmm3,16(%rdi) ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 320(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 320+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand 512(%rsp),%xmm2 ++ pand 512+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 416(%rsp),%xmm2 ++ pand 416+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,32(%rdi) ++ movdqu %xmm3,48(%rdi) ++ ++L$add_donex: ++ leaq 576+56(%rsp),%rsi ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbx ++ ++ movq -8(%rsi),%rbp ++ ++ leaq (%rsi),%rsp ++ ++L$point_addx_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++ecp_nistz256_point_add_affinex: ++ ++L$point_add_affinex: ++ pushq %rbp ++ ++ pushq %rbx ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ subq $480+8,%rsp ++ ++L$add_affinex_body: ++ ++ movdqu 0(%rsi),%xmm0 ++ movq %rdx,%rbx ++ movdqu 16(%rsi),%xmm1 ++ movdqu 32(%rsi),%xmm2 ++ movdqu 48(%rsi),%xmm3 ++ movdqu 64(%rsi),%xmm4 ++ movdqu 80(%rsi),%xmm5 ++ movq 64+0(%rsi),%rdx ++ movq 64+8(%rsi),%r14 ++ movq 64+16(%rsi),%r15 ++ movq 64+24(%rsi),%r8 ++ movdqa %xmm0,320(%rsp) ++ movdqa %xmm1,320+16(%rsp) ++ movdqa %xmm2,352(%rsp) ++ movdqa %xmm3,352+16(%rsp) ++ movdqa %xmm4,384(%rsp) ++ movdqa %xmm5,384+16(%rsp) ++ por %xmm4,%xmm5 ++ ++ movdqu 0(%rbx),%xmm0 ++ pshufd $0xb1,%xmm5,%xmm3 ++ movdqu 16(%rbx),%xmm1 ++ movdqu 32(%rbx),%xmm2 ++ por %xmm3,%xmm5 ++ movdqu 48(%rbx),%xmm3 ++ movdqa %xmm0,416(%rsp) ++ pshufd $0x1e,%xmm5,%xmm4 ++ movdqa %xmm1,416+16(%rsp) ++ por %xmm0,%xmm1 ++.byte 102,72,15,110,199 ++ movdqa %xmm2,448(%rsp) ++ movdqa %xmm3,448+16(%rsp) ++ por %xmm2,%xmm3 ++ por %xmm4,%xmm5 ++ pxor %xmm4,%xmm4 ++ por %xmm1,%xmm3 ++ ++ leaq 64-128(%rsi),%rsi ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_sqr_montx ++ ++ pcmpeqd %xmm4,%xmm5 ++ pshufd $0xb1,%xmm3,%xmm4 ++ movq 0(%rbx),%rdx ++ ++ movq %r12,%r9 ++ por %xmm3,%xmm4 ++ pshufd $0,%xmm5,%xmm5 ++ pshufd $0x1e,%xmm4,%xmm3 ++ movq %r13,%r10 ++ por %xmm3,%xmm4 ++ pxor %xmm3,%xmm3 ++ movq %r14,%r11 ++ pcmpeqd %xmm3,%xmm4 ++ pshufd $0,%xmm4,%xmm4 ++ ++ leaq 32-128(%rsp),%rsi ++ movq %r15,%r12 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ leaq 320(%rsp),%rbx ++ leaq 64(%rsp),%rdi ++ call __ecp_nistz256_sub_fromx ++ ++ movq 384(%rsp),%rdx ++ leaq 384(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq -128+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 384(%rsp),%rdx ++ leaq 384(%rsp),%rbx ++ movq 0+64(%rsp),%r9 ++ movq 8+64(%rsp),%r10 ++ leaq -128+64(%rsp),%rsi ++ movq 16+64(%rsp),%r11 ++ movq 24+64(%rsp),%r12 ++ leaq 288(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 448(%rsp),%rdx ++ leaq 448(%rsp),%rbx ++ movq 0+32(%rsp),%r9 ++ movq 8+32(%rsp),%r10 ++ leaq -128+32(%rsp),%rsi ++ movq 16+32(%rsp),%r11 ++ movq 24+32(%rsp),%r12 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ leaq 352(%rsp),%rbx ++ leaq 96(%rsp),%rdi ++ call __ecp_nistz256_sub_fromx ++ ++ movq 0+64(%rsp),%rdx ++ movq 8+64(%rsp),%r14 ++ leaq -128+64(%rsp),%rsi ++ movq 16+64(%rsp),%r15 ++ movq 24+64(%rsp),%r8 ++ leaq 128(%rsp),%rdi ++ call __ecp_nistz256_sqr_montx ++ ++ movq 0+96(%rsp),%rdx ++ movq 8+96(%rsp),%r14 ++ leaq -128+96(%rsp),%rsi ++ movq 16+96(%rsp),%r15 ++ movq 24+96(%rsp),%r8 ++ leaq 192(%rsp),%rdi ++ call __ecp_nistz256_sqr_montx ++ ++ movq 128(%rsp),%rdx ++ leaq 128(%rsp),%rbx ++ movq 0+64(%rsp),%r9 ++ movq 8+64(%rsp),%r10 ++ leaq -128+64(%rsp),%rsi ++ movq 16+64(%rsp),%r11 ++ movq 24+64(%rsp),%r12 ++ leaq 160(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 320(%rsp),%rdx ++ leaq 320(%rsp),%rbx ++ movq 0+128(%rsp),%r9 ++ movq 8+128(%rsp),%r10 ++ leaq -128+128(%rsp),%rsi ++ movq 16+128(%rsp),%r11 ++ movq 24+128(%rsp),%r12 ++ leaq 0(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ ++ ++ ++ xorq %r11,%r11 ++ addq %r12,%r12 ++ leaq 192(%rsp),%rsi ++ adcq %r13,%r13 ++ movq %r12,%rax ++ adcq %r8,%r8 ++ adcq %r9,%r9 ++ movq %r13,%rbp ++ adcq $0,%r11 ++ ++ subq $-1,%r12 ++ movq %r8,%rcx ++ sbbq %r14,%r13 ++ sbbq $0,%r8 ++ movq %r9,%r10 ++ sbbq %r15,%r9 ++ sbbq $0,%r11 ++ ++ cmovcq %rax,%r12 ++ movq 0(%rsi),%rax ++ cmovcq %rbp,%r13 ++ movq 8(%rsi),%rbp ++ cmovcq %rcx,%r8 ++ movq 16(%rsi),%rcx ++ cmovcq %r10,%r9 ++ movq 24(%rsi),%r10 ++ ++ call __ecp_nistz256_subx ++ ++ leaq 160(%rsp),%rbx ++ leaq 224(%rsp),%rdi ++ call __ecp_nistz256_sub_fromx ++ ++ movq 0+0(%rsp),%rax ++ movq 0+8(%rsp),%rbp ++ movq 0+16(%rsp),%rcx ++ movq 0+24(%rsp),%r10 ++ leaq 64(%rsp),%rdi ++ ++ call __ecp_nistz256_subx ++ ++ movq %r12,0(%rdi) ++ movq %r13,8(%rdi) ++ movq %r8,16(%rdi) ++ movq %r9,24(%rdi) ++ movq 352(%rsp),%rdx ++ leaq 352(%rsp),%rbx ++ movq 0+160(%rsp),%r9 ++ movq 8+160(%rsp),%r10 ++ leaq -128+160(%rsp),%rsi ++ movq 16+160(%rsp),%r11 ++ movq 24+160(%rsp),%r12 ++ leaq 32(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ movq 96(%rsp),%rdx ++ leaq 96(%rsp),%rbx ++ movq 0+64(%rsp),%r9 ++ movq 8+64(%rsp),%r10 ++ leaq -128+64(%rsp),%rsi ++ movq 16+64(%rsp),%r11 ++ movq 24+64(%rsp),%r12 ++ leaq 64(%rsp),%rdi ++ call __ecp_nistz256_mul_montx ++ ++ leaq 32(%rsp),%rbx ++ leaq 256(%rsp),%rdi ++ call __ecp_nistz256_sub_fromx ++ ++.byte 102,72,15,126,199 ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 288(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 288+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand L$ONE_mont(%rip),%xmm2 ++ pand L$ONE_mont+16(%rip),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 384(%rsp),%xmm2 ++ pand 384+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,64(%rdi) ++ movdqu %xmm3,80(%rdi) ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 224(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 224+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand 416(%rsp),%xmm2 ++ pand 416+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 320(%rsp),%xmm2 ++ pand 320+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,0(%rdi) ++ movdqu %xmm3,16(%rdi) ++ ++ movdqa %xmm5,%xmm0 ++ movdqa %xmm5,%xmm1 ++ pandn 256(%rsp),%xmm0 ++ movdqa %xmm5,%xmm2 ++ pandn 256+16(%rsp),%xmm1 ++ movdqa %xmm5,%xmm3 ++ pand 448(%rsp),%xmm2 ++ pand 448+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ ++ movdqa %xmm4,%xmm0 ++ movdqa %xmm4,%xmm1 ++ pandn %xmm2,%xmm0 ++ movdqa %xmm4,%xmm2 ++ pandn %xmm3,%xmm1 ++ movdqa %xmm4,%xmm3 ++ pand 352(%rsp),%xmm2 ++ pand 352+16(%rsp),%xmm3 ++ por %xmm0,%xmm2 ++ por %xmm1,%xmm3 ++ movdqu %xmm2,32(%rdi) ++ movdqu %xmm3,48(%rdi) ++ ++ leaq 480+56(%rsp),%rsi ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbx ++ ++ movq -8(%rsi),%rbp ++ ++ leaq (%rsi),%rsp ++ ++L$add_affinex_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S b/apple-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S +new file mode 100644 +index 0000000..ae7293a +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S +@@ -0,0 +1,328 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++.private_extern _beeu_mod_inverse_vartime ++.globl _beeu_mod_inverse_vartime ++.private_extern _beeu_mod_inverse_vartime ++.p2align 5 ++_beeu_mod_inverse_vartime: ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ pushq %rbx ++ ++ pushq %rsi ++ ++ ++ subq $80,%rsp ++ ++ movq %rdi,0(%rsp) ++ ++ ++ movq $1,%r8 ++ xorq %r9,%r9 ++ xorq %r10,%r10 ++ xorq %r11,%r11 ++ xorq %rdi,%rdi ++ ++ xorq %r12,%r12 ++ xorq %r13,%r13 ++ xorq %r14,%r14 ++ xorq %r15,%r15 ++ xorq %rbp,%rbp ++ ++ ++ vmovdqu 0(%rsi),%xmm0 ++ vmovdqu 16(%rsi),%xmm1 ++ vmovdqu %xmm0,48(%rsp) ++ vmovdqu %xmm1,64(%rsp) ++ ++ vmovdqu 0(%rdx),%xmm0 ++ vmovdqu 16(%rdx),%xmm1 ++ vmovdqu %xmm0,16(%rsp) ++ vmovdqu %xmm1,32(%rsp) ++ ++L$beeu_loop: ++ xorq %rbx,%rbx ++ orq 48(%rsp),%rbx ++ orq 56(%rsp),%rbx ++ orq 64(%rsp),%rbx ++ orq 72(%rsp),%rbx ++ jz L$beeu_loop_end ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ movq $1,%rcx ++ ++ ++L$beeu_shift_loop_XB: ++ movq %rcx,%rbx ++ andq 48(%rsp),%rbx ++ jnz L$beeu_shift_loop_end_XB ++ ++ ++ movq $1,%rbx ++ andq %r8,%rbx ++ jz L$shift1_0 ++ addq 0(%rdx),%r8 ++ adcq 8(%rdx),%r9 ++ adcq 16(%rdx),%r10 ++ adcq 24(%rdx),%r11 ++ adcq $0,%rdi ++ ++L$shift1_0: ++ shrdq $1,%r9,%r8 ++ shrdq $1,%r10,%r9 ++ shrdq $1,%r11,%r10 ++ shrdq $1,%rdi,%r11 ++ shrq $1,%rdi ++ ++ shlq $1,%rcx ++ ++ ++ ++ ++ ++ cmpq $0x8000000,%rcx ++ jne L$beeu_shift_loop_XB ++ ++L$beeu_shift_loop_end_XB: ++ bsfq %rcx,%rcx ++ testq %rcx,%rcx ++ jz L$beeu_no_shift_XB ++ ++ ++ ++ movq 8+48(%rsp),%rax ++ movq 16+48(%rsp),%rbx ++ movq 24+48(%rsp),%rsi ++ ++ shrdq %cl,%rax,0+48(%rsp) ++ shrdq %cl,%rbx,8+48(%rsp) ++ shrdq %cl,%rsi,16+48(%rsp) ++ ++ shrq %cl,%rsi ++ movq %rsi,24+48(%rsp) ++ ++ ++L$beeu_no_shift_XB: ++ ++ movq $1,%rcx ++ ++ ++L$beeu_shift_loop_YA: ++ movq %rcx,%rbx ++ andq 16(%rsp),%rbx ++ jnz L$beeu_shift_loop_end_YA ++ ++ ++ movq $1,%rbx ++ andq %r12,%rbx ++ jz L$shift1_1 ++ addq 0(%rdx),%r12 ++ adcq 8(%rdx),%r13 ++ adcq 16(%rdx),%r14 ++ adcq 24(%rdx),%r15 ++ adcq $0,%rbp ++ ++L$shift1_1: ++ shrdq $1,%r13,%r12 ++ shrdq $1,%r14,%r13 ++ shrdq $1,%r15,%r14 ++ shrdq $1,%rbp,%r15 ++ shrq $1,%rbp ++ ++ shlq $1,%rcx ++ ++ ++ ++ ++ ++ cmpq $0x8000000,%rcx ++ jne L$beeu_shift_loop_YA ++ ++L$beeu_shift_loop_end_YA: ++ bsfq %rcx,%rcx ++ testq %rcx,%rcx ++ jz L$beeu_no_shift_YA ++ ++ ++ ++ movq 8+16(%rsp),%rax ++ movq 16+16(%rsp),%rbx ++ movq 24+16(%rsp),%rsi ++ ++ shrdq %cl,%rax,0+16(%rsp) ++ shrdq %cl,%rbx,8+16(%rsp) ++ shrdq %cl,%rsi,16+16(%rsp) ++ ++ shrq %cl,%rsi ++ movq %rsi,24+16(%rsp) ++ ++ ++L$beeu_no_shift_YA: ++ ++ movq 48(%rsp),%rax ++ movq 56(%rsp),%rbx ++ movq 64(%rsp),%rsi ++ movq 72(%rsp),%rcx ++ subq 16(%rsp),%rax ++ sbbq 24(%rsp),%rbx ++ sbbq 32(%rsp),%rsi ++ sbbq 40(%rsp),%rcx ++ jnc L$beeu_B_bigger_than_A ++ ++ ++ movq 16(%rsp),%rax ++ movq 24(%rsp),%rbx ++ movq 32(%rsp),%rsi ++ movq 40(%rsp),%rcx ++ subq 48(%rsp),%rax ++ sbbq 56(%rsp),%rbx ++ sbbq 64(%rsp),%rsi ++ sbbq 72(%rsp),%rcx ++ movq %rax,16(%rsp) ++ movq %rbx,24(%rsp) ++ movq %rsi,32(%rsp) ++ movq %rcx,40(%rsp) ++ ++ ++ addq %r8,%r12 ++ adcq %r9,%r13 ++ adcq %r10,%r14 ++ adcq %r11,%r15 ++ adcq %rdi,%rbp ++ jmp L$beeu_loop ++ ++L$beeu_B_bigger_than_A: ++ ++ movq %rax,48(%rsp) ++ movq %rbx,56(%rsp) ++ movq %rsi,64(%rsp) ++ movq %rcx,72(%rsp) ++ ++ ++ addq %r12,%r8 ++ adcq %r13,%r9 ++ adcq %r14,%r10 ++ adcq %r15,%r11 ++ adcq %rbp,%rdi ++ ++ jmp L$beeu_loop ++ ++L$beeu_loop_end: ++ ++ ++ ++ ++ movq 16(%rsp),%rbx ++ subq $1,%rbx ++ orq 24(%rsp),%rbx ++ orq 32(%rsp),%rbx ++ orq 40(%rsp),%rbx ++ ++ jnz L$beeu_err ++ ++ ++ ++ ++ movq 0(%rdx),%r8 ++ movq 8(%rdx),%r9 ++ movq 16(%rdx),%r10 ++ movq 24(%rdx),%r11 ++ xorq %rdi,%rdi ++ ++L$beeu_reduction_loop: ++ movq %r12,16(%rsp) ++ movq %r13,24(%rsp) ++ movq %r14,32(%rsp) ++ movq %r15,40(%rsp) ++ movq %rbp,48(%rsp) ++ ++ ++ subq %r8,%r12 ++ sbbq %r9,%r13 ++ sbbq %r10,%r14 ++ sbbq %r11,%r15 ++ sbbq $0,%rbp ++ ++ ++ cmovcq 16(%rsp),%r12 ++ cmovcq 24(%rsp),%r13 ++ cmovcq 32(%rsp),%r14 ++ cmovcq 40(%rsp),%r15 ++ jnc L$beeu_reduction_loop ++ ++ ++ subq %r12,%r8 ++ sbbq %r13,%r9 ++ sbbq %r14,%r10 ++ sbbq %r15,%r11 ++ ++L$beeu_save: ++ ++ movq 0(%rsp),%rdi ++ ++ movq %r8,0(%rdi) ++ movq %r9,8(%rdi) ++ movq %r10,16(%rdi) ++ movq %r11,24(%rdi) ++ ++ ++ movq $1,%rax ++ jmp L$beeu_finish ++ ++L$beeu_err: ++ ++ xorq %rax,%rax ++ ++L$beeu_finish: ++ addq $80,%rsp ++ ++ popq %rsi ++ ++ popq %rbx ++ ++ popq %r15 ++ ++ popq %r14 ++ ++ popq %r13 ++ ++ popq %r12 ++ ++ popq %rbp ++ ++ .byte 0xf3,0xc3 ++ ++ ++ ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/rdrand-x86_64.S b/apple-x86_64/crypto/fipsmodule/rdrand-x86_64.S +new file mode 100644 +index 0000000..664c067 +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/rdrand-x86_64.S +@@ -0,0 +1,62 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++ ++ ++.globl _CRYPTO_rdrand ++.private_extern _CRYPTO_rdrand ++ ++.p2align 4 ++_CRYPTO_rdrand: ++ ++ xorq %rax,%rax ++.byte 72,15,199,242 ++ ++ adcq %rax,%rax ++ movq %rdx,0(%rdi) ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++.globl _CRYPTO_rdrand_multiple8_buf ++.private_extern _CRYPTO_rdrand_multiple8_buf ++ ++.p2align 4 ++_CRYPTO_rdrand_multiple8_buf: ++ ++ testq %rsi,%rsi ++ jz L$out ++ movq $8,%rdx ++L$loop: ++.byte 72,15,199,241 ++ jnc L$err ++ movq %rcx,0(%rdi) ++ addq %rdx,%rdi ++ subq %rdx,%rsi ++ jnz L$loop ++L$out: ++ movq $1,%rax ++ .byte 0xf3,0xc3 ++L$err: ++ xorq %rax,%rax ++ .byte 0xf3,0xc3 ++ ++ ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/rsaz-avx2.S b/apple-x86_64/crypto/fipsmodule/rsaz-avx2.S +new file mode 100644 +index 0000000..bebc699 +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/rsaz-avx2.S +@@ -0,0 +1,1748 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++.globl _rsaz_1024_sqr_avx2 ++.private_extern _rsaz_1024_sqr_avx2 ++ ++.p2align 6 ++_rsaz_1024_sqr_avx2: ++ ++ leaq (%rsp),%rax ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ vzeroupper ++ movq %rax,%rbp ++ ++ movq %rdx,%r13 ++ subq $832,%rsp ++ movq %r13,%r15 ++ subq $-128,%rdi ++ subq $-128,%rsi ++ subq $-128,%r13 ++ ++ andq $4095,%r15 ++ addq $320,%r15 ++ shrq $12,%r15 ++ vpxor %ymm9,%ymm9,%ymm9 ++ jz L$sqr_1024_no_n_copy ++ ++ ++ ++ ++ ++ subq $320,%rsp ++ vmovdqu 0-128(%r13),%ymm0 ++ andq $-2048,%rsp ++ vmovdqu 32-128(%r13),%ymm1 ++ vmovdqu 64-128(%r13),%ymm2 ++ vmovdqu 96-128(%r13),%ymm3 ++ vmovdqu 128-128(%r13),%ymm4 ++ vmovdqu 160-128(%r13),%ymm5 ++ vmovdqu 192-128(%r13),%ymm6 ++ vmovdqu 224-128(%r13),%ymm7 ++ vmovdqu 256-128(%r13),%ymm8 ++ leaq 832+128(%rsp),%r13 ++ vmovdqu %ymm0,0-128(%r13) ++ vmovdqu %ymm1,32-128(%r13) ++ vmovdqu %ymm2,64-128(%r13) ++ vmovdqu %ymm3,96-128(%r13) ++ vmovdqu %ymm4,128-128(%r13) ++ vmovdqu %ymm5,160-128(%r13) ++ vmovdqu %ymm6,192-128(%r13) ++ vmovdqu %ymm7,224-128(%r13) ++ vmovdqu %ymm8,256-128(%r13) ++ vmovdqu %ymm9,288-128(%r13) ++ ++L$sqr_1024_no_n_copy: ++ andq $-1024,%rsp ++ ++ vmovdqu 32-128(%rsi),%ymm1 ++ vmovdqu 64-128(%rsi),%ymm2 ++ vmovdqu 96-128(%rsi),%ymm3 ++ vmovdqu 128-128(%rsi),%ymm4 ++ vmovdqu 160-128(%rsi),%ymm5 ++ vmovdqu 192-128(%rsi),%ymm6 ++ vmovdqu 224-128(%rsi),%ymm7 ++ vmovdqu 256-128(%rsi),%ymm8 ++ ++ leaq 192(%rsp),%rbx ++ vmovdqu L$and_mask(%rip),%ymm15 ++ jmp L$OOP_GRANDE_SQR_1024 ++ ++.p2align 5 ++L$OOP_GRANDE_SQR_1024: ++ leaq 576+128(%rsp),%r9 ++ leaq 448(%rsp),%r12 ++ ++ ++ ++ ++ vpaddq %ymm1,%ymm1,%ymm1 ++ vpbroadcastq 0-128(%rsi),%ymm10 ++ vpaddq %ymm2,%ymm2,%ymm2 ++ vmovdqa %ymm1,0-128(%r9) ++ vpaddq %ymm3,%ymm3,%ymm3 ++ vmovdqa %ymm2,32-128(%r9) ++ vpaddq %ymm4,%ymm4,%ymm4 ++ vmovdqa %ymm3,64-128(%r9) ++ vpaddq %ymm5,%ymm5,%ymm5 ++ vmovdqa %ymm4,96-128(%r9) ++ vpaddq %ymm6,%ymm6,%ymm6 ++ vmovdqa %ymm5,128-128(%r9) ++ vpaddq %ymm7,%ymm7,%ymm7 ++ vmovdqa %ymm6,160-128(%r9) ++ vpaddq %ymm8,%ymm8,%ymm8 ++ vmovdqa %ymm7,192-128(%r9) ++ vpxor %ymm9,%ymm9,%ymm9 ++ vmovdqa %ymm8,224-128(%r9) ++ ++ vpmuludq 0-128(%rsi),%ymm10,%ymm0 ++ vpbroadcastq 32-128(%rsi),%ymm11 ++ vmovdqu %ymm9,288-192(%rbx) ++ vpmuludq %ymm10,%ymm1,%ymm1 ++ vmovdqu %ymm9,320-448(%r12) ++ vpmuludq %ymm10,%ymm2,%ymm2 ++ vmovdqu %ymm9,352-448(%r12) ++ vpmuludq %ymm10,%ymm3,%ymm3 ++ vmovdqu %ymm9,384-448(%r12) ++ vpmuludq %ymm10,%ymm4,%ymm4 ++ vmovdqu %ymm9,416-448(%r12) ++ vpmuludq %ymm10,%ymm5,%ymm5 ++ vmovdqu %ymm9,448-448(%r12) ++ vpmuludq %ymm10,%ymm6,%ymm6 ++ vmovdqu %ymm9,480-448(%r12) ++ vpmuludq %ymm10,%ymm7,%ymm7 ++ vmovdqu %ymm9,512-448(%r12) ++ vpmuludq %ymm10,%ymm8,%ymm8 ++ vpbroadcastq 64-128(%rsi),%ymm10 ++ vmovdqu %ymm9,544-448(%r12) ++ ++ movq %rsi,%r15 ++ movl $4,%r14d ++ jmp L$sqr_entry_1024 ++.p2align 5 ++L$OOP_SQR_1024: ++ vpbroadcastq 32-128(%r15),%ymm11 ++ vpmuludq 0-128(%rsi),%ymm10,%ymm0 ++ vpaddq 0-192(%rbx),%ymm0,%ymm0 ++ vpmuludq 0-128(%r9),%ymm10,%ymm1 ++ vpaddq 32-192(%rbx),%ymm1,%ymm1 ++ vpmuludq 32-128(%r9),%ymm10,%ymm2 ++ vpaddq 64-192(%rbx),%ymm2,%ymm2 ++ vpmuludq 64-128(%r9),%ymm10,%ymm3 ++ vpaddq 96-192(%rbx),%ymm3,%ymm3 ++ vpmuludq 96-128(%r9),%ymm10,%ymm4 ++ vpaddq 128-192(%rbx),%ymm4,%ymm4 ++ vpmuludq 128-128(%r9),%ymm10,%ymm5 ++ vpaddq 160-192(%rbx),%ymm5,%ymm5 ++ vpmuludq 160-128(%r9),%ymm10,%ymm6 ++ vpaddq 192-192(%rbx),%ymm6,%ymm6 ++ vpmuludq 192-128(%r9),%ymm10,%ymm7 ++ vpaddq 224-192(%rbx),%ymm7,%ymm7 ++ vpmuludq 224-128(%r9),%ymm10,%ymm8 ++ vpbroadcastq 64-128(%r15),%ymm10 ++ vpaddq 256-192(%rbx),%ymm8,%ymm8 ++L$sqr_entry_1024: ++ vmovdqu %ymm0,0-192(%rbx) ++ vmovdqu %ymm1,32-192(%rbx) ++ ++ vpmuludq 32-128(%rsi),%ymm11,%ymm12 ++ vpaddq %ymm12,%ymm2,%ymm2 ++ vpmuludq 32-128(%r9),%ymm11,%ymm14 ++ vpaddq %ymm14,%ymm3,%ymm3 ++ vpmuludq 64-128(%r9),%ymm11,%ymm13 ++ vpaddq %ymm13,%ymm4,%ymm4 ++ vpmuludq 96-128(%r9),%ymm11,%ymm12 ++ vpaddq %ymm12,%ymm5,%ymm5 ++ vpmuludq 128-128(%r9),%ymm11,%ymm14 ++ vpaddq %ymm14,%ymm6,%ymm6 ++ vpmuludq 160-128(%r9),%ymm11,%ymm13 ++ vpaddq %ymm13,%ymm7,%ymm7 ++ vpmuludq 192-128(%r9),%ymm11,%ymm12 ++ vpaddq %ymm12,%ymm8,%ymm8 ++ vpmuludq 224-128(%r9),%ymm11,%ymm0 ++ vpbroadcastq 96-128(%r15),%ymm11 ++ vpaddq 288-192(%rbx),%ymm0,%ymm0 ++ ++ vmovdqu %ymm2,64-192(%rbx) ++ vmovdqu %ymm3,96-192(%rbx) ++ ++ vpmuludq 64-128(%rsi),%ymm10,%ymm13 ++ vpaddq %ymm13,%ymm4,%ymm4 ++ vpmuludq 64-128(%r9),%ymm10,%ymm12 ++ vpaddq %ymm12,%ymm5,%ymm5 ++ vpmuludq 96-128(%r9),%ymm10,%ymm14 ++ vpaddq %ymm14,%ymm6,%ymm6 ++ vpmuludq 128-128(%r9),%ymm10,%ymm13 ++ vpaddq %ymm13,%ymm7,%ymm7 ++ vpmuludq 160-128(%r9),%ymm10,%ymm12 ++ vpaddq %ymm12,%ymm8,%ymm8 ++ vpmuludq 192-128(%r9),%ymm10,%ymm14 ++ vpaddq %ymm14,%ymm0,%ymm0 ++ vpmuludq 224-128(%r9),%ymm10,%ymm1 ++ vpbroadcastq 128-128(%r15),%ymm10 ++ vpaddq 320-448(%r12),%ymm1,%ymm1 ++ ++ vmovdqu %ymm4,128-192(%rbx) ++ vmovdqu %ymm5,160-192(%rbx) ++ ++ vpmuludq 96-128(%rsi),%ymm11,%ymm12 ++ vpaddq %ymm12,%ymm6,%ymm6 ++ vpmuludq 96-128(%r9),%ymm11,%ymm14 ++ vpaddq %ymm14,%ymm7,%ymm7 ++ vpmuludq 128-128(%r9),%ymm11,%ymm13 ++ vpaddq %ymm13,%ymm8,%ymm8 ++ vpmuludq 160-128(%r9),%ymm11,%ymm12 ++ vpaddq %ymm12,%ymm0,%ymm0 ++ vpmuludq 192-128(%r9),%ymm11,%ymm14 ++ vpaddq %ymm14,%ymm1,%ymm1 ++ vpmuludq 224-128(%r9),%ymm11,%ymm2 ++ vpbroadcastq 160-128(%r15),%ymm11 ++ vpaddq 352-448(%r12),%ymm2,%ymm2 ++ ++ vmovdqu %ymm6,192-192(%rbx) ++ vmovdqu %ymm7,224-192(%rbx) ++ ++ vpmuludq 128-128(%rsi),%ymm10,%ymm12 ++ vpaddq %ymm12,%ymm8,%ymm8 ++ vpmuludq 128-128(%r9),%ymm10,%ymm14 ++ vpaddq %ymm14,%ymm0,%ymm0 ++ vpmuludq 160-128(%r9),%ymm10,%ymm13 ++ vpaddq %ymm13,%ymm1,%ymm1 ++ vpmuludq 192-128(%r9),%ymm10,%ymm12 ++ vpaddq %ymm12,%ymm2,%ymm2 ++ vpmuludq 224-128(%r9),%ymm10,%ymm3 ++ vpbroadcastq 192-128(%r15),%ymm10 ++ vpaddq 384-448(%r12),%ymm3,%ymm3 ++ ++ vmovdqu %ymm8,256-192(%rbx) ++ vmovdqu %ymm0,288-192(%rbx) ++ leaq 8(%rbx),%rbx ++ ++ vpmuludq 160-128(%rsi),%ymm11,%ymm13 ++ vpaddq %ymm13,%ymm1,%ymm1 ++ vpmuludq 160-128(%r9),%ymm11,%ymm12 ++ vpaddq %ymm12,%ymm2,%ymm2 ++ vpmuludq 192-128(%r9),%ymm11,%ymm14 ++ vpaddq %ymm14,%ymm3,%ymm3 ++ vpmuludq 224-128(%r9),%ymm11,%ymm4 ++ vpbroadcastq 224-128(%r15),%ymm11 ++ vpaddq 416-448(%r12),%ymm4,%ymm4 ++ ++ vmovdqu %ymm1,320-448(%r12) ++ vmovdqu %ymm2,352-448(%r12) ++ ++ vpmuludq 192-128(%rsi),%ymm10,%ymm12 ++ vpaddq %ymm12,%ymm3,%ymm3 ++ vpmuludq 192-128(%r9),%ymm10,%ymm14 ++ vpbroadcastq 256-128(%r15),%ymm0 ++ vpaddq %ymm14,%ymm4,%ymm4 ++ vpmuludq 224-128(%r9),%ymm10,%ymm5 ++ vpbroadcastq 0+8-128(%r15),%ymm10 ++ vpaddq 448-448(%r12),%ymm5,%ymm5 ++ ++ vmovdqu %ymm3,384-448(%r12) ++ vmovdqu %ymm4,416-448(%r12) ++ leaq 8(%r15),%r15 ++ ++ vpmuludq 224-128(%rsi),%ymm11,%ymm12 ++ vpaddq %ymm12,%ymm5,%ymm5 ++ vpmuludq 224-128(%r9),%ymm11,%ymm6 ++ vpaddq 480-448(%r12),%ymm6,%ymm6 ++ ++ vpmuludq 256-128(%rsi),%ymm0,%ymm7 ++ vmovdqu %ymm5,448-448(%r12) ++ vpaddq 512-448(%r12),%ymm7,%ymm7 ++ vmovdqu %ymm6,480-448(%r12) ++ vmovdqu %ymm7,512-448(%r12) ++ leaq 8(%r12),%r12 ++ ++ decl %r14d ++ jnz L$OOP_SQR_1024 ++ ++ vmovdqu 256(%rsp),%ymm8 ++ vmovdqu 288(%rsp),%ymm1 ++ vmovdqu 320(%rsp),%ymm2 ++ leaq 192(%rsp),%rbx ++ ++ vpsrlq $29,%ymm8,%ymm14 ++ vpand %ymm15,%ymm8,%ymm8 ++ vpsrlq $29,%ymm1,%ymm11 ++ vpand %ymm15,%ymm1,%ymm1 ++ ++ vpermq $0x93,%ymm14,%ymm14 ++ vpxor %ymm9,%ymm9,%ymm9 ++ vpermq $0x93,%ymm11,%ymm11 ++ ++ vpblendd $3,%ymm9,%ymm14,%ymm10 ++ vpblendd $3,%ymm14,%ymm11,%ymm14 ++ vpaddq %ymm10,%ymm8,%ymm8 ++ vpblendd $3,%ymm11,%ymm9,%ymm11 ++ vpaddq %ymm14,%ymm1,%ymm1 ++ vpaddq %ymm11,%ymm2,%ymm2 ++ vmovdqu %ymm1,288-192(%rbx) ++ vmovdqu %ymm2,320-192(%rbx) ++ ++ movq (%rsp),%rax ++ movq 8(%rsp),%r10 ++ movq 16(%rsp),%r11 ++ movq 24(%rsp),%r12 ++ vmovdqu 32(%rsp),%ymm1 ++ vmovdqu 64-192(%rbx),%ymm2 ++ vmovdqu 96-192(%rbx),%ymm3 ++ vmovdqu 128-192(%rbx),%ymm4 ++ vmovdqu 160-192(%rbx),%ymm5 ++ vmovdqu 192-192(%rbx),%ymm6 ++ vmovdqu 224-192(%rbx),%ymm7 ++ ++ movq %rax,%r9 ++ imull %ecx,%eax ++ andl $0x1fffffff,%eax ++ vmovd %eax,%xmm12 ++ ++ movq %rax,%rdx ++ imulq -128(%r13),%rax ++ vpbroadcastq %xmm12,%ymm12 ++ addq %rax,%r9 ++ movq %rdx,%rax ++ imulq 8-128(%r13),%rax ++ shrq $29,%r9 ++ addq %rax,%r10 ++ movq %rdx,%rax ++ imulq 16-128(%r13),%rax ++ addq %r9,%r10 ++ addq %rax,%r11 ++ imulq 24-128(%r13),%rdx ++ addq %rdx,%r12 ++ ++ movq %r10,%rax ++ imull %ecx,%eax ++ andl $0x1fffffff,%eax ++ ++ movl $9,%r14d ++ jmp L$OOP_REDUCE_1024 ++ ++.p2align 5 ++L$OOP_REDUCE_1024: ++ vmovd %eax,%xmm13 ++ vpbroadcastq %xmm13,%ymm13 ++ ++ vpmuludq 32-128(%r13),%ymm12,%ymm10 ++ movq %rax,%rdx ++ imulq -128(%r13),%rax ++ vpaddq %ymm10,%ymm1,%ymm1 ++ addq %rax,%r10 ++ vpmuludq 64-128(%r13),%ymm12,%ymm14 ++ movq %rdx,%rax ++ imulq 8-128(%r13),%rax ++ vpaddq %ymm14,%ymm2,%ymm2 ++ vpmuludq 96-128(%r13),%ymm12,%ymm11 ++.byte 0x67 ++ addq %rax,%r11 ++.byte 0x67 ++ movq %rdx,%rax ++ imulq 16-128(%r13),%rax ++ shrq $29,%r10 ++ vpaddq %ymm11,%ymm3,%ymm3 ++ vpmuludq 128-128(%r13),%ymm12,%ymm10 ++ addq %rax,%r12 ++ addq %r10,%r11 ++ vpaddq %ymm10,%ymm4,%ymm4 ++ vpmuludq 160-128(%r13),%ymm12,%ymm14 ++ movq %r11,%rax ++ imull %ecx,%eax ++ vpaddq %ymm14,%ymm5,%ymm5 ++ vpmuludq 192-128(%r13),%ymm12,%ymm11 ++ andl $0x1fffffff,%eax ++ vpaddq %ymm11,%ymm6,%ymm6 ++ vpmuludq 224-128(%r13),%ymm12,%ymm10 ++ vpaddq %ymm10,%ymm7,%ymm7 ++ vpmuludq 256-128(%r13),%ymm12,%ymm14 ++ vmovd %eax,%xmm12 ++ ++ vpaddq %ymm14,%ymm8,%ymm8 ++ ++ vpbroadcastq %xmm12,%ymm12 ++ ++ vpmuludq 32-8-128(%r13),%ymm13,%ymm11 ++ vmovdqu 96-8-128(%r13),%ymm14 ++ movq %rax,%rdx ++ imulq -128(%r13),%rax ++ vpaddq %ymm11,%ymm1,%ymm1 ++ vpmuludq 64-8-128(%r13),%ymm13,%ymm10 ++ vmovdqu 128-8-128(%r13),%ymm11 ++ addq %rax,%r11 ++ movq %rdx,%rax ++ imulq 8-128(%r13),%rax ++ vpaddq %ymm10,%ymm2,%ymm2 ++ addq %r12,%rax ++ shrq $29,%r11 ++ vpmuludq %ymm13,%ymm14,%ymm14 ++ vmovdqu 160-8-128(%r13),%ymm10 ++ addq %r11,%rax ++ vpaddq %ymm14,%ymm3,%ymm3 ++ vpmuludq %ymm13,%ymm11,%ymm11 ++ vmovdqu 192-8-128(%r13),%ymm14 ++.byte 0x67 ++ movq %rax,%r12 ++ imull %ecx,%eax ++ vpaddq %ymm11,%ymm4,%ymm4 ++ vpmuludq %ymm13,%ymm10,%ymm10 ++.byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 ++ andl $0x1fffffff,%eax ++ vpaddq %ymm10,%ymm5,%ymm5 ++ vpmuludq %ymm13,%ymm14,%ymm14 ++ vmovdqu 256-8-128(%r13),%ymm10 ++ vpaddq %ymm14,%ymm6,%ymm6 ++ vpmuludq %ymm13,%ymm11,%ymm11 ++ vmovdqu 288-8-128(%r13),%ymm9 ++ vmovd %eax,%xmm0 ++ imulq -128(%r13),%rax ++ vpaddq %ymm11,%ymm7,%ymm7 ++ vpmuludq %ymm13,%ymm10,%ymm10 ++ vmovdqu 32-16-128(%r13),%ymm14 ++ vpbroadcastq %xmm0,%ymm0 ++ vpaddq %ymm10,%ymm8,%ymm8 ++ vpmuludq %ymm13,%ymm9,%ymm9 ++ vmovdqu 64-16-128(%r13),%ymm11 ++ addq %rax,%r12 ++ ++ vmovdqu 32-24-128(%r13),%ymm13 ++ vpmuludq %ymm12,%ymm14,%ymm14 ++ vmovdqu 96-16-128(%r13),%ymm10 ++ vpaddq %ymm14,%ymm1,%ymm1 ++ vpmuludq %ymm0,%ymm13,%ymm13 ++ vpmuludq %ymm12,%ymm11,%ymm11 ++.byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff ++ vpaddq %ymm1,%ymm13,%ymm13 ++ vpaddq %ymm11,%ymm2,%ymm2 ++ vpmuludq %ymm12,%ymm10,%ymm10 ++ vmovdqu 160-16-128(%r13),%ymm11 ++.byte 0x67 ++ vmovq %xmm13,%rax ++ vmovdqu %ymm13,(%rsp) ++ vpaddq %ymm10,%ymm3,%ymm3 ++ vpmuludq %ymm12,%ymm14,%ymm14 ++ vmovdqu 192-16-128(%r13),%ymm10 ++ vpaddq %ymm14,%ymm4,%ymm4 ++ vpmuludq %ymm12,%ymm11,%ymm11 ++ vmovdqu 224-16-128(%r13),%ymm14 ++ vpaddq %ymm11,%ymm5,%ymm5 ++ vpmuludq %ymm12,%ymm10,%ymm10 ++ vmovdqu 256-16-128(%r13),%ymm11 ++ vpaddq %ymm10,%ymm6,%ymm6 ++ vpmuludq %ymm12,%ymm14,%ymm14 ++ shrq $29,%r12 ++ vmovdqu 288-16-128(%r13),%ymm10 ++ addq %r12,%rax ++ vpaddq %ymm14,%ymm7,%ymm7 ++ vpmuludq %ymm12,%ymm11,%ymm11 ++ ++ movq %rax,%r9 ++ imull %ecx,%eax ++ vpaddq %ymm11,%ymm8,%ymm8 ++ vpmuludq %ymm12,%ymm10,%ymm10 ++ andl $0x1fffffff,%eax ++ vmovd %eax,%xmm12 ++ vmovdqu 96-24-128(%r13),%ymm11 ++.byte 0x67 ++ vpaddq %ymm10,%ymm9,%ymm9 ++ vpbroadcastq %xmm12,%ymm12 ++ ++ vpmuludq 64-24-128(%r13),%ymm0,%ymm14 ++ vmovdqu 128-24-128(%r13),%ymm10 ++ movq %rax,%rdx ++ imulq -128(%r13),%rax ++ movq 8(%rsp),%r10 ++ vpaddq %ymm14,%ymm2,%ymm1 ++ vpmuludq %ymm0,%ymm11,%ymm11 ++ vmovdqu 160-24-128(%r13),%ymm14 ++ addq %rax,%r9 ++ movq %rdx,%rax ++ imulq 8-128(%r13),%rax ++.byte 0x67 ++ shrq $29,%r9 ++ movq 16(%rsp),%r11 ++ vpaddq %ymm11,%ymm3,%ymm2 ++ vpmuludq %ymm0,%ymm10,%ymm10 ++ vmovdqu 192-24-128(%r13),%ymm11 ++ addq %rax,%r10 ++ movq %rdx,%rax ++ imulq 16-128(%r13),%rax ++ vpaddq %ymm10,%ymm4,%ymm3 ++ vpmuludq %ymm0,%ymm14,%ymm14 ++ vmovdqu 224-24-128(%r13),%ymm10 ++ imulq 24-128(%r13),%rdx ++ addq %rax,%r11 ++ leaq (%r9,%r10,1),%rax ++ vpaddq %ymm14,%ymm5,%ymm4 ++ vpmuludq %ymm0,%ymm11,%ymm11 ++ vmovdqu 256-24-128(%r13),%ymm14 ++ movq %rax,%r10 ++ imull %ecx,%eax ++ vpmuludq %ymm0,%ymm10,%ymm10 ++ vpaddq %ymm11,%ymm6,%ymm5 ++ vmovdqu 288-24-128(%r13),%ymm11 ++ andl $0x1fffffff,%eax ++ vpaddq %ymm10,%ymm7,%ymm6 ++ vpmuludq %ymm0,%ymm14,%ymm14 ++ addq 24(%rsp),%rdx ++ vpaddq %ymm14,%ymm8,%ymm7 ++ vpmuludq %ymm0,%ymm11,%ymm11 ++ vpaddq %ymm11,%ymm9,%ymm8 ++ vmovq %r12,%xmm9 ++ movq %rdx,%r12 ++ ++ decl %r14d ++ jnz L$OOP_REDUCE_1024 ++ leaq 448(%rsp),%r12 ++ vpaddq %ymm9,%ymm13,%ymm0 ++ vpxor %ymm9,%ymm9,%ymm9 ++ ++ vpaddq 288-192(%rbx),%ymm0,%ymm0 ++ vpaddq 320-448(%r12),%ymm1,%ymm1 ++ vpaddq 352-448(%r12),%ymm2,%ymm2 ++ vpaddq 384-448(%r12),%ymm3,%ymm3 ++ vpaddq 416-448(%r12),%ymm4,%ymm4 ++ vpaddq 448-448(%r12),%ymm5,%ymm5 ++ vpaddq 480-448(%r12),%ymm6,%ymm6 ++ vpaddq 512-448(%r12),%ymm7,%ymm7 ++ vpaddq 544-448(%r12),%ymm8,%ymm8 ++ ++ vpsrlq $29,%ymm0,%ymm14 ++ vpand %ymm15,%ymm0,%ymm0 ++ vpsrlq $29,%ymm1,%ymm11 ++ vpand %ymm15,%ymm1,%ymm1 ++ vpsrlq $29,%ymm2,%ymm12 ++ vpermq $0x93,%ymm14,%ymm14 ++ vpand %ymm15,%ymm2,%ymm2 ++ vpsrlq $29,%ymm3,%ymm13 ++ vpermq $0x93,%ymm11,%ymm11 ++ vpand %ymm15,%ymm3,%ymm3 ++ vpermq $0x93,%ymm12,%ymm12 ++ ++ vpblendd $3,%ymm9,%ymm14,%ymm10 ++ vpermq $0x93,%ymm13,%ymm13 ++ vpblendd $3,%ymm14,%ymm11,%ymm14 ++ vpaddq %ymm10,%ymm0,%ymm0 ++ vpblendd $3,%ymm11,%ymm12,%ymm11 ++ vpaddq %ymm14,%ymm1,%ymm1 ++ vpblendd $3,%ymm12,%ymm13,%ymm12 ++ vpaddq %ymm11,%ymm2,%ymm2 ++ vpblendd $3,%ymm13,%ymm9,%ymm13 ++ vpaddq %ymm12,%ymm3,%ymm3 ++ vpaddq %ymm13,%ymm4,%ymm4 ++ ++ vpsrlq $29,%ymm0,%ymm14 ++ vpand %ymm15,%ymm0,%ymm0 ++ vpsrlq $29,%ymm1,%ymm11 ++ vpand %ymm15,%ymm1,%ymm1 ++ vpsrlq $29,%ymm2,%ymm12 ++ vpermq $0x93,%ymm14,%ymm14 ++ vpand %ymm15,%ymm2,%ymm2 ++ vpsrlq $29,%ymm3,%ymm13 ++ vpermq $0x93,%ymm11,%ymm11 ++ vpand %ymm15,%ymm3,%ymm3 ++ vpermq $0x93,%ymm12,%ymm12 ++ ++ vpblendd $3,%ymm9,%ymm14,%ymm10 ++ vpermq $0x93,%ymm13,%ymm13 ++ vpblendd $3,%ymm14,%ymm11,%ymm14 ++ vpaddq %ymm10,%ymm0,%ymm0 ++ vpblendd $3,%ymm11,%ymm12,%ymm11 ++ vpaddq %ymm14,%ymm1,%ymm1 ++ vmovdqu %ymm0,0-128(%rdi) ++ vpblendd $3,%ymm12,%ymm13,%ymm12 ++ vpaddq %ymm11,%ymm2,%ymm2 ++ vmovdqu %ymm1,32-128(%rdi) ++ vpblendd $3,%ymm13,%ymm9,%ymm13 ++ vpaddq %ymm12,%ymm3,%ymm3 ++ vmovdqu %ymm2,64-128(%rdi) ++ vpaddq %ymm13,%ymm4,%ymm4 ++ vmovdqu %ymm3,96-128(%rdi) ++ vpsrlq $29,%ymm4,%ymm14 ++ vpand %ymm15,%ymm4,%ymm4 ++ vpsrlq $29,%ymm5,%ymm11 ++ vpand %ymm15,%ymm5,%ymm5 ++ vpsrlq $29,%ymm6,%ymm12 ++ vpermq $0x93,%ymm14,%ymm14 ++ vpand %ymm15,%ymm6,%ymm6 ++ vpsrlq $29,%ymm7,%ymm13 ++ vpermq $0x93,%ymm11,%ymm11 ++ vpand %ymm15,%ymm7,%ymm7 ++ vpsrlq $29,%ymm8,%ymm0 ++ vpermq $0x93,%ymm12,%ymm12 ++ vpand %ymm15,%ymm8,%ymm8 ++ vpermq $0x93,%ymm13,%ymm13 ++ ++ vpblendd $3,%ymm9,%ymm14,%ymm10 ++ vpermq $0x93,%ymm0,%ymm0 ++ vpblendd $3,%ymm14,%ymm11,%ymm14 ++ vpaddq %ymm10,%ymm4,%ymm4 ++ vpblendd $3,%ymm11,%ymm12,%ymm11 ++ vpaddq %ymm14,%ymm5,%ymm5 ++ vpblendd $3,%ymm12,%ymm13,%ymm12 ++ vpaddq %ymm11,%ymm6,%ymm6 ++ vpblendd $3,%ymm13,%ymm0,%ymm13 ++ vpaddq %ymm12,%ymm7,%ymm7 ++ vpaddq %ymm13,%ymm8,%ymm8 ++ ++ vpsrlq $29,%ymm4,%ymm14 ++ vpand %ymm15,%ymm4,%ymm4 ++ vpsrlq $29,%ymm5,%ymm11 ++ vpand %ymm15,%ymm5,%ymm5 ++ vpsrlq $29,%ymm6,%ymm12 ++ vpermq $0x93,%ymm14,%ymm14 ++ vpand %ymm15,%ymm6,%ymm6 ++ vpsrlq $29,%ymm7,%ymm13 ++ vpermq $0x93,%ymm11,%ymm11 ++ vpand %ymm15,%ymm7,%ymm7 ++ vpsrlq $29,%ymm8,%ymm0 ++ vpermq $0x93,%ymm12,%ymm12 ++ vpand %ymm15,%ymm8,%ymm8 ++ vpermq $0x93,%ymm13,%ymm13 ++ ++ vpblendd $3,%ymm9,%ymm14,%ymm10 ++ vpermq $0x93,%ymm0,%ymm0 ++ vpblendd $3,%ymm14,%ymm11,%ymm14 ++ vpaddq %ymm10,%ymm4,%ymm4 ++ vpblendd $3,%ymm11,%ymm12,%ymm11 ++ vpaddq %ymm14,%ymm5,%ymm5 ++ vmovdqu %ymm4,128-128(%rdi) ++ vpblendd $3,%ymm12,%ymm13,%ymm12 ++ vpaddq %ymm11,%ymm6,%ymm6 ++ vmovdqu %ymm5,160-128(%rdi) ++ vpblendd $3,%ymm13,%ymm0,%ymm13 ++ vpaddq %ymm12,%ymm7,%ymm7 ++ vmovdqu %ymm6,192-128(%rdi) ++ vpaddq %ymm13,%ymm8,%ymm8 ++ vmovdqu %ymm7,224-128(%rdi) ++ vmovdqu %ymm8,256-128(%rdi) ++ ++ movq %rdi,%rsi ++ decl %r8d ++ jne L$OOP_GRANDE_SQR_1024 ++ ++ vzeroall ++ movq %rbp,%rax ++ ++ movq -48(%rax),%r15 ++ ++ movq -40(%rax),%r14 ++ ++ movq -32(%rax),%r13 ++ ++ movq -24(%rax),%r12 ++ ++ movq -16(%rax),%rbp ++ ++ movq -8(%rax),%rbx ++ ++ leaq (%rax),%rsp ++ ++L$sqr_1024_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++.globl _rsaz_1024_mul_avx2 ++.private_extern _rsaz_1024_mul_avx2 ++ ++.p2align 6 ++_rsaz_1024_mul_avx2: ++ ++ leaq (%rsp),%rax ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ movq %rax,%rbp ++ ++ vzeroall ++ movq %rdx,%r13 ++ subq $64,%rsp ++ ++ ++ ++ ++ ++ ++.byte 0x67,0x67 ++ movq %rsi,%r15 ++ andq $4095,%r15 ++ addq $320,%r15 ++ shrq $12,%r15 ++ movq %rsi,%r15 ++ cmovnzq %r13,%rsi ++ cmovnzq %r15,%r13 ++ ++ movq %rcx,%r15 ++ subq $-128,%rsi ++ subq $-128,%rcx ++ subq $-128,%rdi ++ ++ andq $4095,%r15 ++ addq $320,%r15 ++.byte 0x67,0x67 ++ shrq $12,%r15 ++ jz L$mul_1024_no_n_copy ++ ++ ++ ++ ++ ++ subq $320,%rsp ++ vmovdqu 0-128(%rcx),%ymm0 ++ andq $-512,%rsp ++ vmovdqu 32-128(%rcx),%ymm1 ++ vmovdqu 64-128(%rcx),%ymm2 ++ vmovdqu 96-128(%rcx),%ymm3 ++ vmovdqu 128-128(%rcx),%ymm4 ++ vmovdqu 160-128(%rcx),%ymm5 ++ vmovdqu 192-128(%rcx),%ymm6 ++ vmovdqu 224-128(%rcx),%ymm7 ++ vmovdqu 256-128(%rcx),%ymm8 ++ leaq 64+128(%rsp),%rcx ++ vmovdqu %ymm0,0-128(%rcx) ++ vpxor %ymm0,%ymm0,%ymm0 ++ vmovdqu %ymm1,32-128(%rcx) ++ vpxor %ymm1,%ymm1,%ymm1 ++ vmovdqu %ymm2,64-128(%rcx) ++ vpxor %ymm2,%ymm2,%ymm2 ++ vmovdqu %ymm3,96-128(%rcx) ++ vpxor %ymm3,%ymm3,%ymm3 ++ vmovdqu %ymm4,128-128(%rcx) ++ vpxor %ymm4,%ymm4,%ymm4 ++ vmovdqu %ymm5,160-128(%rcx) ++ vpxor %ymm5,%ymm5,%ymm5 ++ vmovdqu %ymm6,192-128(%rcx) ++ vpxor %ymm6,%ymm6,%ymm6 ++ vmovdqu %ymm7,224-128(%rcx) ++ vpxor %ymm7,%ymm7,%ymm7 ++ vmovdqu %ymm8,256-128(%rcx) ++ vmovdqa %ymm0,%ymm8 ++ vmovdqu %ymm9,288-128(%rcx) ++L$mul_1024_no_n_copy: ++ andq $-64,%rsp ++ ++ movq (%r13),%rbx ++ vpbroadcastq (%r13),%ymm10 ++ vmovdqu %ymm0,(%rsp) ++ xorq %r9,%r9 ++.byte 0x67 ++ xorq %r10,%r10 ++ xorq %r11,%r11 ++ xorq %r12,%r12 ++ ++ vmovdqu L$and_mask(%rip),%ymm15 ++ movl $9,%r14d ++ vmovdqu %ymm9,288-128(%rdi) ++ jmp L$oop_mul_1024 ++ ++.p2align 5 ++L$oop_mul_1024: ++ vpsrlq $29,%ymm3,%ymm9 ++ movq %rbx,%rax ++ imulq -128(%rsi),%rax ++ addq %r9,%rax ++ movq %rbx,%r10 ++ imulq 8-128(%rsi),%r10 ++ addq 8(%rsp),%r10 ++ ++ movq %rax,%r9 ++ imull %r8d,%eax ++ andl $0x1fffffff,%eax ++ ++ movq %rbx,%r11 ++ imulq 16-128(%rsi),%r11 ++ addq 16(%rsp),%r11 ++ ++ movq %rbx,%r12 ++ imulq 24-128(%rsi),%r12 ++ addq 24(%rsp),%r12 ++ vpmuludq 32-128(%rsi),%ymm10,%ymm0 ++ vmovd %eax,%xmm11 ++ vpaddq %ymm0,%ymm1,%ymm1 ++ vpmuludq 64-128(%rsi),%ymm10,%ymm12 ++ vpbroadcastq %xmm11,%ymm11 ++ vpaddq %ymm12,%ymm2,%ymm2 ++ vpmuludq 96-128(%rsi),%ymm10,%ymm13 ++ vpand %ymm15,%ymm3,%ymm3 ++ vpaddq %ymm13,%ymm3,%ymm3 ++ vpmuludq 128-128(%rsi),%ymm10,%ymm0 ++ vpaddq %ymm0,%ymm4,%ymm4 ++ vpmuludq 160-128(%rsi),%ymm10,%ymm12 ++ vpaddq %ymm12,%ymm5,%ymm5 ++ vpmuludq 192-128(%rsi),%ymm10,%ymm13 ++ vpaddq %ymm13,%ymm6,%ymm6 ++ vpmuludq 224-128(%rsi),%ymm10,%ymm0 ++ vpermq $0x93,%ymm9,%ymm9 ++ vpaddq %ymm0,%ymm7,%ymm7 ++ vpmuludq 256-128(%rsi),%ymm10,%ymm12 ++ vpbroadcastq 8(%r13),%ymm10 ++ vpaddq %ymm12,%ymm8,%ymm8 ++ ++ movq %rax,%rdx ++ imulq -128(%rcx),%rax ++ addq %rax,%r9 ++ movq %rdx,%rax ++ imulq 8-128(%rcx),%rax ++ addq %rax,%r10 ++ movq %rdx,%rax ++ imulq 16-128(%rcx),%rax ++ addq %rax,%r11 ++ shrq $29,%r9 ++ imulq 24-128(%rcx),%rdx ++ addq %rdx,%r12 ++ addq %r9,%r10 ++ ++ vpmuludq 32-128(%rcx),%ymm11,%ymm13 ++ vmovq %xmm10,%rbx ++ vpaddq %ymm13,%ymm1,%ymm1 ++ vpmuludq 64-128(%rcx),%ymm11,%ymm0 ++ vpaddq %ymm0,%ymm2,%ymm2 ++ vpmuludq 96-128(%rcx),%ymm11,%ymm12 ++ vpaddq %ymm12,%ymm3,%ymm3 ++ vpmuludq 128-128(%rcx),%ymm11,%ymm13 ++ vpaddq %ymm13,%ymm4,%ymm4 ++ vpmuludq 160-128(%rcx),%ymm11,%ymm0 ++ vpaddq %ymm0,%ymm5,%ymm5 ++ vpmuludq 192-128(%rcx),%ymm11,%ymm12 ++ vpaddq %ymm12,%ymm6,%ymm6 ++ vpmuludq 224-128(%rcx),%ymm11,%ymm13 ++ vpblendd $3,%ymm14,%ymm9,%ymm12 ++ vpaddq %ymm13,%ymm7,%ymm7 ++ vpmuludq 256-128(%rcx),%ymm11,%ymm0 ++ vpaddq %ymm12,%ymm3,%ymm3 ++ vpaddq %ymm0,%ymm8,%ymm8 ++ ++ movq %rbx,%rax ++ imulq -128(%rsi),%rax ++ addq %rax,%r10 ++ vmovdqu -8+32-128(%rsi),%ymm12 ++ movq %rbx,%rax ++ imulq 8-128(%rsi),%rax ++ addq %rax,%r11 ++ vmovdqu -8+64-128(%rsi),%ymm13 ++ ++ movq %r10,%rax ++ vpblendd $0xfc,%ymm14,%ymm9,%ymm9 ++ imull %r8d,%eax ++ vpaddq %ymm9,%ymm4,%ymm4 ++ andl $0x1fffffff,%eax ++ ++ imulq 16-128(%rsi),%rbx ++ addq %rbx,%r12 ++ vpmuludq %ymm10,%ymm12,%ymm12 ++ vmovd %eax,%xmm11 ++ vmovdqu -8+96-128(%rsi),%ymm0 ++ vpaddq %ymm12,%ymm1,%ymm1 ++ vpmuludq %ymm10,%ymm13,%ymm13 ++ vpbroadcastq %xmm11,%ymm11 ++ vmovdqu -8+128-128(%rsi),%ymm12 ++ vpaddq %ymm13,%ymm2,%ymm2 ++ vpmuludq %ymm10,%ymm0,%ymm0 ++ vmovdqu -8+160-128(%rsi),%ymm13 ++ vpaddq %ymm0,%ymm3,%ymm3 ++ vpmuludq %ymm10,%ymm12,%ymm12 ++ vmovdqu -8+192-128(%rsi),%ymm0 ++ vpaddq %ymm12,%ymm4,%ymm4 ++ vpmuludq %ymm10,%ymm13,%ymm13 ++ vmovdqu -8+224-128(%rsi),%ymm12 ++ vpaddq %ymm13,%ymm5,%ymm5 ++ vpmuludq %ymm10,%ymm0,%ymm0 ++ vmovdqu -8+256-128(%rsi),%ymm13 ++ vpaddq %ymm0,%ymm6,%ymm6 ++ vpmuludq %ymm10,%ymm12,%ymm12 ++ vmovdqu -8+288-128(%rsi),%ymm9 ++ vpaddq %ymm12,%ymm7,%ymm7 ++ vpmuludq %ymm10,%ymm13,%ymm13 ++ vpaddq %ymm13,%ymm8,%ymm8 ++ vpmuludq %ymm10,%ymm9,%ymm9 ++ vpbroadcastq 16(%r13),%ymm10 ++ ++ movq %rax,%rdx ++ imulq -128(%rcx),%rax ++ addq %rax,%r10 ++ vmovdqu -8+32-128(%rcx),%ymm0 ++ movq %rdx,%rax ++ imulq 8-128(%rcx),%rax ++ addq %rax,%r11 ++ vmovdqu -8+64-128(%rcx),%ymm12 ++ shrq $29,%r10 ++ imulq 16-128(%rcx),%rdx ++ addq %rdx,%r12 ++ addq %r10,%r11 ++ ++ vpmuludq %ymm11,%ymm0,%ymm0 ++ vmovq %xmm10,%rbx ++ vmovdqu -8+96-128(%rcx),%ymm13 ++ vpaddq %ymm0,%ymm1,%ymm1 ++ vpmuludq %ymm11,%ymm12,%ymm12 ++ vmovdqu -8+128-128(%rcx),%ymm0 ++ vpaddq %ymm12,%ymm2,%ymm2 ++ vpmuludq %ymm11,%ymm13,%ymm13 ++ vmovdqu -8+160-128(%rcx),%ymm12 ++ vpaddq %ymm13,%ymm3,%ymm3 ++ vpmuludq %ymm11,%ymm0,%ymm0 ++ vmovdqu -8+192-128(%rcx),%ymm13 ++ vpaddq %ymm0,%ymm4,%ymm4 ++ vpmuludq %ymm11,%ymm12,%ymm12 ++ vmovdqu -8+224-128(%rcx),%ymm0 ++ vpaddq %ymm12,%ymm5,%ymm5 ++ vpmuludq %ymm11,%ymm13,%ymm13 ++ vmovdqu -8+256-128(%rcx),%ymm12 ++ vpaddq %ymm13,%ymm6,%ymm6 ++ vpmuludq %ymm11,%ymm0,%ymm0 ++ vmovdqu -8+288-128(%rcx),%ymm13 ++ vpaddq %ymm0,%ymm7,%ymm7 ++ vpmuludq %ymm11,%ymm12,%ymm12 ++ vpaddq %ymm12,%ymm8,%ymm8 ++ vpmuludq %ymm11,%ymm13,%ymm13 ++ vpaddq %ymm13,%ymm9,%ymm9 ++ ++ vmovdqu -16+32-128(%rsi),%ymm0 ++ movq %rbx,%rax ++ imulq -128(%rsi),%rax ++ addq %r11,%rax ++ ++ vmovdqu -16+64-128(%rsi),%ymm12 ++ movq %rax,%r11 ++ imull %r8d,%eax ++ andl $0x1fffffff,%eax ++ ++ imulq 8-128(%rsi),%rbx ++ addq %rbx,%r12 ++ vpmuludq %ymm10,%ymm0,%ymm0 ++ vmovd %eax,%xmm11 ++ vmovdqu -16+96-128(%rsi),%ymm13 ++ vpaddq %ymm0,%ymm1,%ymm1 ++ vpmuludq %ymm10,%ymm12,%ymm12 ++ vpbroadcastq %xmm11,%ymm11 ++ vmovdqu -16+128-128(%rsi),%ymm0 ++ vpaddq %ymm12,%ymm2,%ymm2 ++ vpmuludq %ymm10,%ymm13,%ymm13 ++ vmovdqu -16+160-128(%rsi),%ymm12 ++ vpaddq %ymm13,%ymm3,%ymm3 ++ vpmuludq %ymm10,%ymm0,%ymm0 ++ vmovdqu -16+192-128(%rsi),%ymm13 ++ vpaddq %ymm0,%ymm4,%ymm4 ++ vpmuludq %ymm10,%ymm12,%ymm12 ++ vmovdqu -16+224-128(%rsi),%ymm0 ++ vpaddq %ymm12,%ymm5,%ymm5 ++ vpmuludq %ymm10,%ymm13,%ymm13 ++ vmovdqu -16+256-128(%rsi),%ymm12 ++ vpaddq %ymm13,%ymm6,%ymm6 ++ vpmuludq %ymm10,%ymm0,%ymm0 ++ vmovdqu -16+288-128(%rsi),%ymm13 ++ vpaddq %ymm0,%ymm7,%ymm7 ++ vpmuludq %ymm10,%ymm12,%ymm12 ++ vpaddq %ymm12,%ymm8,%ymm8 ++ vpmuludq %ymm10,%ymm13,%ymm13 ++ vpbroadcastq 24(%r13),%ymm10 ++ vpaddq %ymm13,%ymm9,%ymm9 ++ ++ vmovdqu -16+32-128(%rcx),%ymm0 ++ movq %rax,%rdx ++ imulq -128(%rcx),%rax ++ addq %rax,%r11 ++ vmovdqu -16+64-128(%rcx),%ymm12 ++ imulq 8-128(%rcx),%rdx ++ addq %rdx,%r12 ++ shrq $29,%r11 ++ ++ vpmuludq %ymm11,%ymm0,%ymm0 ++ vmovq %xmm10,%rbx ++ vmovdqu -16+96-128(%rcx),%ymm13 ++ vpaddq %ymm0,%ymm1,%ymm1 ++ vpmuludq %ymm11,%ymm12,%ymm12 ++ vmovdqu -16+128-128(%rcx),%ymm0 ++ vpaddq %ymm12,%ymm2,%ymm2 ++ vpmuludq %ymm11,%ymm13,%ymm13 ++ vmovdqu -16+160-128(%rcx),%ymm12 ++ vpaddq %ymm13,%ymm3,%ymm3 ++ vpmuludq %ymm11,%ymm0,%ymm0 ++ vmovdqu -16+192-128(%rcx),%ymm13 ++ vpaddq %ymm0,%ymm4,%ymm4 ++ vpmuludq %ymm11,%ymm12,%ymm12 ++ vmovdqu -16+224-128(%rcx),%ymm0 ++ vpaddq %ymm12,%ymm5,%ymm5 ++ vpmuludq %ymm11,%ymm13,%ymm13 ++ vmovdqu -16+256-128(%rcx),%ymm12 ++ vpaddq %ymm13,%ymm6,%ymm6 ++ vpmuludq %ymm11,%ymm0,%ymm0 ++ vmovdqu -16+288-128(%rcx),%ymm13 ++ vpaddq %ymm0,%ymm7,%ymm7 ++ vpmuludq %ymm11,%ymm12,%ymm12 ++ vmovdqu -24+32-128(%rsi),%ymm0 ++ vpaddq %ymm12,%ymm8,%ymm8 ++ vpmuludq %ymm11,%ymm13,%ymm13 ++ vmovdqu -24+64-128(%rsi),%ymm12 ++ vpaddq %ymm13,%ymm9,%ymm9 ++ ++ addq %r11,%r12 ++ imulq -128(%rsi),%rbx ++ addq %rbx,%r12 ++ ++ movq %r12,%rax ++ imull %r8d,%eax ++ andl $0x1fffffff,%eax ++ ++ vpmuludq %ymm10,%ymm0,%ymm0 ++ vmovd %eax,%xmm11 ++ vmovdqu -24+96-128(%rsi),%ymm13 ++ vpaddq %ymm0,%ymm1,%ymm1 ++ vpmuludq %ymm10,%ymm12,%ymm12 ++ vpbroadcastq %xmm11,%ymm11 ++ vmovdqu -24+128-128(%rsi),%ymm0 ++ vpaddq %ymm12,%ymm2,%ymm2 ++ vpmuludq %ymm10,%ymm13,%ymm13 ++ vmovdqu -24+160-128(%rsi),%ymm12 ++ vpaddq %ymm13,%ymm3,%ymm3 ++ vpmuludq %ymm10,%ymm0,%ymm0 ++ vmovdqu -24+192-128(%rsi),%ymm13 ++ vpaddq %ymm0,%ymm4,%ymm4 ++ vpmuludq %ymm10,%ymm12,%ymm12 ++ vmovdqu -24+224-128(%rsi),%ymm0 ++ vpaddq %ymm12,%ymm5,%ymm5 ++ vpmuludq %ymm10,%ymm13,%ymm13 ++ vmovdqu -24+256-128(%rsi),%ymm12 ++ vpaddq %ymm13,%ymm6,%ymm6 ++ vpmuludq %ymm10,%ymm0,%ymm0 ++ vmovdqu -24+288-128(%rsi),%ymm13 ++ vpaddq %ymm0,%ymm7,%ymm7 ++ vpmuludq %ymm10,%ymm12,%ymm12 ++ vpaddq %ymm12,%ymm8,%ymm8 ++ vpmuludq %ymm10,%ymm13,%ymm13 ++ vpbroadcastq 32(%r13),%ymm10 ++ vpaddq %ymm13,%ymm9,%ymm9 ++ addq $32,%r13 ++ ++ vmovdqu -24+32-128(%rcx),%ymm0 ++ imulq -128(%rcx),%rax ++ addq %rax,%r12 ++ shrq $29,%r12 ++ ++ vmovdqu -24+64-128(%rcx),%ymm12 ++ vpmuludq %ymm11,%ymm0,%ymm0 ++ vmovq %xmm10,%rbx ++ vmovdqu -24+96-128(%rcx),%ymm13 ++ vpaddq %ymm0,%ymm1,%ymm0 ++ vpmuludq %ymm11,%ymm12,%ymm12 ++ vmovdqu %ymm0,(%rsp) ++ vpaddq %ymm12,%ymm2,%ymm1 ++ vmovdqu -24+128-128(%rcx),%ymm0 ++ vpmuludq %ymm11,%ymm13,%ymm13 ++ vmovdqu -24+160-128(%rcx),%ymm12 ++ vpaddq %ymm13,%ymm3,%ymm2 ++ vpmuludq %ymm11,%ymm0,%ymm0 ++ vmovdqu -24+192-128(%rcx),%ymm13 ++ vpaddq %ymm0,%ymm4,%ymm3 ++ vpmuludq %ymm11,%ymm12,%ymm12 ++ vmovdqu -24+224-128(%rcx),%ymm0 ++ vpaddq %ymm12,%ymm5,%ymm4 ++ vpmuludq %ymm11,%ymm13,%ymm13 ++ vmovdqu -24+256-128(%rcx),%ymm12 ++ vpaddq %ymm13,%ymm6,%ymm5 ++ vpmuludq %ymm11,%ymm0,%ymm0 ++ vmovdqu -24+288-128(%rcx),%ymm13 ++ movq %r12,%r9 ++ vpaddq %ymm0,%ymm7,%ymm6 ++ vpmuludq %ymm11,%ymm12,%ymm12 ++ addq (%rsp),%r9 ++ vpaddq %ymm12,%ymm8,%ymm7 ++ vpmuludq %ymm11,%ymm13,%ymm13 ++ vmovq %r12,%xmm12 ++ vpaddq %ymm13,%ymm9,%ymm8 ++ ++ decl %r14d ++ jnz L$oop_mul_1024 ++ vpaddq (%rsp),%ymm12,%ymm0 ++ ++ vpsrlq $29,%ymm0,%ymm12 ++ vpand %ymm15,%ymm0,%ymm0 ++ vpsrlq $29,%ymm1,%ymm13 ++ vpand %ymm15,%ymm1,%ymm1 ++ vpsrlq $29,%ymm2,%ymm10 ++ vpermq $0x93,%ymm12,%ymm12 ++ vpand %ymm15,%ymm2,%ymm2 ++ vpsrlq $29,%ymm3,%ymm11 ++ vpermq $0x93,%ymm13,%ymm13 ++ vpand %ymm15,%ymm3,%ymm3 ++ ++ vpblendd $3,%ymm14,%ymm12,%ymm9 ++ vpermq $0x93,%ymm10,%ymm10 ++ vpblendd $3,%ymm12,%ymm13,%ymm12 ++ vpermq $0x93,%ymm11,%ymm11 ++ vpaddq %ymm9,%ymm0,%ymm0 ++ vpblendd $3,%ymm13,%ymm10,%ymm13 ++ vpaddq %ymm12,%ymm1,%ymm1 ++ vpblendd $3,%ymm10,%ymm11,%ymm10 ++ vpaddq %ymm13,%ymm2,%ymm2 ++ vpblendd $3,%ymm11,%ymm14,%ymm11 ++ vpaddq %ymm10,%ymm3,%ymm3 ++ vpaddq %ymm11,%ymm4,%ymm4 ++ ++ vpsrlq $29,%ymm0,%ymm12 ++ vpand %ymm15,%ymm0,%ymm0 ++ vpsrlq $29,%ymm1,%ymm13 ++ vpand %ymm15,%ymm1,%ymm1 ++ vpsrlq $29,%ymm2,%ymm10 ++ vpermq $0x93,%ymm12,%ymm12 ++ vpand %ymm15,%ymm2,%ymm2 ++ vpsrlq $29,%ymm3,%ymm11 ++ vpermq $0x93,%ymm13,%ymm13 ++ vpand %ymm15,%ymm3,%ymm3 ++ vpermq $0x93,%ymm10,%ymm10 ++ ++ vpblendd $3,%ymm14,%ymm12,%ymm9 ++ vpermq $0x93,%ymm11,%ymm11 ++ vpblendd $3,%ymm12,%ymm13,%ymm12 ++ vpaddq %ymm9,%ymm0,%ymm0 ++ vpblendd $3,%ymm13,%ymm10,%ymm13 ++ vpaddq %ymm12,%ymm1,%ymm1 ++ vpblendd $3,%ymm10,%ymm11,%ymm10 ++ vpaddq %ymm13,%ymm2,%ymm2 ++ vpblendd $3,%ymm11,%ymm14,%ymm11 ++ vpaddq %ymm10,%ymm3,%ymm3 ++ vpaddq %ymm11,%ymm4,%ymm4 ++ ++ vmovdqu %ymm0,0-128(%rdi) ++ vmovdqu %ymm1,32-128(%rdi) ++ vmovdqu %ymm2,64-128(%rdi) ++ vmovdqu %ymm3,96-128(%rdi) ++ vpsrlq $29,%ymm4,%ymm12 ++ vpand %ymm15,%ymm4,%ymm4 ++ vpsrlq $29,%ymm5,%ymm13 ++ vpand %ymm15,%ymm5,%ymm5 ++ vpsrlq $29,%ymm6,%ymm10 ++ vpermq $0x93,%ymm12,%ymm12 ++ vpand %ymm15,%ymm6,%ymm6 ++ vpsrlq $29,%ymm7,%ymm11 ++ vpermq $0x93,%ymm13,%ymm13 ++ vpand %ymm15,%ymm7,%ymm7 ++ vpsrlq $29,%ymm8,%ymm0 ++ vpermq $0x93,%ymm10,%ymm10 ++ vpand %ymm15,%ymm8,%ymm8 ++ vpermq $0x93,%ymm11,%ymm11 ++ ++ vpblendd $3,%ymm14,%ymm12,%ymm9 ++ vpermq $0x93,%ymm0,%ymm0 ++ vpblendd $3,%ymm12,%ymm13,%ymm12 ++ vpaddq %ymm9,%ymm4,%ymm4 ++ vpblendd $3,%ymm13,%ymm10,%ymm13 ++ vpaddq %ymm12,%ymm5,%ymm5 ++ vpblendd $3,%ymm10,%ymm11,%ymm10 ++ vpaddq %ymm13,%ymm6,%ymm6 ++ vpblendd $3,%ymm11,%ymm0,%ymm11 ++ vpaddq %ymm10,%ymm7,%ymm7 ++ vpaddq %ymm11,%ymm8,%ymm8 ++ ++ vpsrlq $29,%ymm4,%ymm12 ++ vpand %ymm15,%ymm4,%ymm4 ++ vpsrlq $29,%ymm5,%ymm13 ++ vpand %ymm15,%ymm5,%ymm5 ++ vpsrlq $29,%ymm6,%ymm10 ++ vpermq $0x93,%ymm12,%ymm12 ++ vpand %ymm15,%ymm6,%ymm6 ++ vpsrlq $29,%ymm7,%ymm11 ++ vpermq $0x93,%ymm13,%ymm13 ++ vpand %ymm15,%ymm7,%ymm7 ++ vpsrlq $29,%ymm8,%ymm0 ++ vpermq $0x93,%ymm10,%ymm10 ++ vpand %ymm15,%ymm8,%ymm8 ++ vpermq $0x93,%ymm11,%ymm11 ++ ++ vpblendd $3,%ymm14,%ymm12,%ymm9 ++ vpermq $0x93,%ymm0,%ymm0 ++ vpblendd $3,%ymm12,%ymm13,%ymm12 ++ vpaddq %ymm9,%ymm4,%ymm4 ++ vpblendd $3,%ymm13,%ymm10,%ymm13 ++ vpaddq %ymm12,%ymm5,%ymm5 ++ vpblendd $3,%ymm10,%ymm11,%ymm10 ++ vpaddq %ymm13,%ymm6,%ymm6 ++ vpblendd $3,%ymm11,%ymm0,%ymm11 ++ vpaddq %ymm10,%ymm7,%ymm7 ++ vpaddq %ymm11,%ymm8,%ymm8 ++ ++ vmovdqu %ymm4,128-128(%rdi) ++ vmovdqu %ymm5,160-128(%rdi) ++ vmovdqu %ymm6,192-128(%rdi) ++ vmovdqu %ymm7,224-128(%rdi) ++ vmovdqu %ymm8,256-128(%rdi) ++ vzeroupper ++ ++ movq %rbp,%rax ++ ++ movq -48(%rax),%r15 ++ ++ movq -40(%rax),%r14 ++ ++ movq -32(%rax),%r13 ++ ++ movq -24(%rax),%r12 ++ ++ movq -16(%rax),%rbp ++ ++ movq -8(%rax),%rbx ++ ++ leaq (%rax),%rsp ++ ++L$mul_1024_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++.globl _rsaz_1024_red2norm_avx2 ++.private_extern _rsaz_1024_red2norm_avx2 ++ ++.p2align 5 ++_rsaz_1024_red2norm_avx2: ++ ++ subq $-128,%rsi ++ xorq %rax,%rax ++ movq -128(%rsi),%r8 ++ movq -120(%rsi),%r9 ++ movq -112(%rsi),%r10 ++ shlq $0,%r8 ++ shlq $29,%r9 ++ movq %r10,%r11 ++ shlq $58,%r10 ++ shrq $6,%r11 ++ addq %r8,%rax ++ addq %r9,%rax ++ addq %r10,%rax ++ adcq $0,%r11 ++ movq %rax,0(%rdi) ++ movq %r11,%rax ++ movq -104(%rsi),%r8 ++ movq -96(%rsi),%r9 ++ shlq $23,%r8 ++ movq %r9,%r10 ++ shlq $52,%r9 ++ shrq $12,%r10 ++ addq %r8,%rax ++ addq %r9,%rax ++ adcq $0,%r10 ++ movq %rax,8(%rdi) ++ movq %r10,%rax ++ movq -88(%rsi),%r11 ++ movq -80(%rsi),%r8 ++ shlq $17,%r11 ++ movq %r8,%r9 ++ shlq $46,%r8 ++ shrq $18,%r9 ++ addq %r11,%rax ++ addq %r8,%rax ++ adcq $0,%r9 ++ movq %rax,16(%rdi) ++ movq %r9,%rax ++ movq -72(%rsi),%r10 ++ movq -64(%rsi),%r11 ++ shlq $11,%r10 ++ movq %r11,%r8 ++ shlq $40,%r11 ++ shrq $24,%r8 ++ addq %r10,%rax ++ addq %r11,%rax ++ adcq $0,%r8 ++ movq %rax,24(%rdi) ++ movq %r8,%rax ++ movq -56(%rsi),%r9 ++ movq -48(%rsi),%r10 ++ movq -40(%rsi),%r11 ++ shlq $5,%r9 ++ shlq $34,%r10 ++ movq %r11,%r8 ++ shlq $63,%r11 ++ shrq $1,%r8 ++ addq %r9,%rax ++ addq %r10,%rax ++ addq %r11,%rax ++ adcq $0,%r8 ++ movq %rax,32(%rdi) ++ movq %r8,%rax ++ movq -32(%rsi),%r9 ++ movq -24(%rsi),%r10 ++ shlq $28,%r9 ++ movq %r10,%r11 ++ shlq $57,%r10 ++ shrq $7,%r11 ++ addq %r9,%rax ++ addq %r10,%rax ++ adcq $0,%r11 ++ movq %rax,40(%rdi) ++ movq %r11,%rax ++ movq -16(%rsi),%r8 ++ movq -8(%rsi),%r9 ++ shlq $22,%r8 ++ movq %r9,%r10 ++ shlq $51,%r9 ++ shrq $13,%r10 ++ addq %r8,%rax ++ addq %r9,%rax ++ adcq $0,%r10 ++ movq %rax,48(%rdi) ++ movq %r10,%rax ++ movq 0(%rsi),%r11 ++ movq 8(%rsi),%r8 ++ shlq $16,%r11 ++ movq %r8,%r9 ++ shlq $45,%r8 ++ shrq $19,%r9 ++ addq %r11,%rax ++ addq %r8,%rax ++ adcq $0,%r9 ++ movq %rax,56(%rdi) ++ movq %r9,%rax ++ movq 16(%rsi),%r10 ++ movq 24(%rsi),%r11 ++ shlq $10,%r10 ++ movq %r11,%r8 ++ shlq $39,%r11 ++ shrq $25,%r8 ++ addq %r10,%rax ++ addq %r11,%rax ++ adcq $0,%r8 ++ movq %rax,64(%rdi) ++ movq %r8,%rax ++ movq 32(%rsi),%r9 ++ movq 40(%rsi),%r10 ++ movq 48(%rsi),%r11 ++ shlq $4,%r9 ++ shlq $33,%r10 ++ movq %r11,%r8 ++ shlq $62,%r11 ++ shrq $2,%r8 ++ addq %r9,%rax ++ addq %r10,%rax ++ addq %r11,%rax ++ adcq $0,%r8 ++ movq %rax,72(%rdi) ++ movq %r8,%rax ++ movq 56(%rsi),%r9 ++ movq 64(%rsi),%r10 ++ shlq $27,%r9 ++ movq %r10,%r11 ++ shlq $56,%r10 ++ shrq $8,%r11 ++ addq %r9,%rax ++ addq %r10,%rax ++ adcq $0,%r11 ++ movq %rax,80(%rdi) ++ movq %r11,%rax ++ movq 72(%rsi),%r8 ++ movq 80(%rsi),%r9 ++ shlq $21,%r8 ++ movq %r9,%r10 ++ shlq $50,%r9 ++ shrq $14,%r10 ++ addq %r8,%rax ++ addq %r9,%rax ++ adcq $0,%r10 ++ movq %rax,88(%rdi) ++ movq %r10,%rax ++ movq 88(%rsi),%r11 ++ movq 96(%rsi),%r8 ++ shlq $15,%r11 ++ movq %r8,%r9 ++ shlq $44,%r8 ++ shrq $20,%r9 ++ addq %r11,%rax ++ addq %r8,%rax ++ adcq $0,%r9 ++ movq %rax,96(%rdi) ++ movq %r9,%rax ++ movq 104(%rsi),%r10 ++ movq 112(%rsi),%r11 ++ shlq $9,%r10 ++ movq %r11,%r8 ++ shlq $38,%r11 ++ shrq $26,%r8 ++ addq %r10,%rax ++ addq %r11,%rax ++ adcq $0,%r8 ++ movq %rax,104(%rdi) ++ movq %r8,%rax ++ movq 120(%rsi),%r9 ++ movq 128(%rsi),%r10 ++ movq 136(%rsi),%r11 ++ shlq $3,%r9 ++ shlq $32,%r10 ++ movq %r11,%r8 ++ shlq $61,%r11 ++ shrq $3,%r8 ++ addq %r9,%rax ++ addq %r10,%rax ++ addq %r11,%rax ++ adcq $0,%r8 ++ movq %rax,112(%rdi) ++ movq %r8,%rax ++ movq 144(%rsi),%r9 ++ movq 152(%rsi),%r10 ++ shlq $26,%r9 ++ movq %r10,%r11 ++ shlq $55,%r10 ++ shrq $9,%r11 ++ addq %r9,%rax ++ addq %r10,%rax ++ adcq $0,%r11 ++ movq %rax,120(%rdi) ++ movq %r11,%rax ++ .byte 0xf3,0xc3 ++ ++ ++ ++.globl _rsaz_1024_norm2red_avx2 ++.private_extern _rsaz_1024_norm2red_avx2 ++ ++.p2align 5 ++_rsaz_1024_norm2red_avx2: ++ ++ subq $-128,%rdi ++ movq (%rsi),%r8 ++ movl $0x1fffffff,%eax ++ movq 8(%rsi),%r9 ++ movq %r8,%r11 ++ shrq $0,%r11 ++ andq %rax,%r11 ++ movq %r11,-128(%rdi) ++ movq %r8,%r10 ++ shrq $29,%r10 ++ andq %rax,%r10 ++ movq %r10,-120(%rdi) ++ shrdq $58,%r9,%r8 ++ andq %rax,%r8 ++ movq %r8,-112(%rdi) ++ movq 16(%rsi),%r10 ++ movq %r9,%r8 ++ shrq $23,%r8 ++ andq %rax,%r8 ++ movq %r8,-104(%rdi) ++ shrdq $52,%r10,%r9 ++ andq %rax,%r9 ++ movq %r9,-96(%rdi) ++ movq 24(%rsi),%r11 ++ movq %r10,%r9 ++ shrq $17,%r9 ++ andq %rax,%r9 ++ movq %r9,-88(%rdi) ++ shrdq $46,%r11,%r10 ++ andq %rax,%r10 ++ movq %r10,-80(%rdi) ++ movq 32(%rsi),%r8 ++ movq %r11,%r10 ++ shrq $11,%r10 ++ andq %rax,%r10 ++ movq %r10,-72(%rdi) ++ shrdq $40,%r8,%r11 ++ andq %rax,%r11 ++ movq %r11,-64(%rdi) ++ movq 40(%rsi),%r9 ++ movq %r8,%r11 ++ shrq $5,%r11 ++ andq %rax,%r11 ++ movq %r11,-56(%rdi) ++ movq %r8,%r10 ++ shrq $34,%r10 ++ andq %rax,%r10 ++ movq %r10,-48(%rdi) ++ shrdq $63,%r9,%r8 ++ andq %rax,%r8 ++ movq %r8,-40(%rdi) ++ movq 48(%rsi),%r10 ++ movq %r9,%r8 ++ shrq $28,%r8 ++ andq %rax,%r8 ++ movq %r8,-32(%rdi) ++ shrdq $57,%r10,%r9 ++ andq %rax,%r9 ++ movq %r9,-24(%rdi) ++ movq 56(%rsi),%r11 ++ movq %r10,%r9 ++ shrq $22,%r9 ++ andq %rax,%r9 ++ movq %r9,-16(%rdi) ++ shrdq $51,%r11,%r10 ++ andq %rax,%r10 ++ movq %r10,-8(%rdi) ++ movq 64(%rsi),%r8 ++ movq %r11,%r10 ++ shrq $16,%r10 ++ andq %rax,%r10 ++ movq %r10,0(%rdi) ++ shrdq $45,%r8,%r11 ++ andq %rax,%r11 ++ movq %r11,8(%rdi) ++ movq 72(%rsi),%r9 ++ movq %r8,%r11 ++ shrq $10,%r11 ++ andq %rax,%r11 ++ movq %r11,16(%rdi) ++ shrdq $39,%r9,%r8 ++ andq %rax,%r8 ++ movq %r8,24(%rdi) ++ movq 80(%rsi),%r10 ++ movq %r9,%r8 ++ shrq $4,%r8 ++ andq %rax,%r8 ++ movq %r8,32(%rdi) ++ movq %r9,%r11 ++ shrq $33,%r11 ++ andq %rax,%r11 ++ movq %r11,40(%rdi) ++ shrdq $62,%r10,%r9 ++ andq %rax,%r9 ++ movq %r9,48(%rdi) ++ movq 88(%rsi),%r11 ++ movq %r10,%r9 ++ shrq $27,%r9 ++ andq %rax,%r9 ++ movq %r9,56(%rdi) ++ shrdq $56,%r11,%r10 ++ andq %rax,%r10 ++ movq %r10,64(%rdi) ++ movq 96(%rsi),%r8 ++ movq %r11,%r10 ++ shrq $21,%r10 ++ andq %rax,%r10 ++ movq %r10,72(%rdi) ++ shrdq $50,%r8,%r11 ++ andq %rax,%r11 ++ movq %r11,80(%rdi) ++ movq 104(%rsi),%r9 ++ movq %r8,%r11 ++ shrq $15,%r11 ++ andq %rax,%r11 ++ movq %r11,88(%rdi) ++ shrdq $44,%r9,%r8 ++ andq %rax,%r8 ++ movq %r8,96(%rdi) ++ movq 112(%rsi),%r10 ++ movq %r9,%r8 ++ shrq $9,%r8 ++ andq %rax,%r8 ++ movq %r8,104(%rdi) ++ shrdq $38,%r10,%r9 ++ andq %rax,%r9 ++ movq %r9,112(%rdi) ++ movq 120(%rsi),%r11 ++ movq %r10,%r9 ++ shrq $3,%r9 ++ andq %rax,%r9 ++ movq %r9,120(%rdi) ++ movq %r10,%r8 ++ shrq $32,%r8 ++ andq %rax,%r8 ++ movq %r8,128(%rdi) ++ shrdq $61,%r11,%r10 ++ andq %rax,%r10 ++ movq %r10,136(%rdi) ++ xorq %r8,%r8 ++ movq %r11,%r10 ++ shrq $26,%r10 ++ andq %rax,%r10 ++ movq %r10,144(%rdi) ++ shrdq $55,%r8,%r11 ++ andq %rax,%r11 ++ movq %r11,152(%rdi) ++ movq %r8,160(%rdi) ++ movq %r8,168(%rdi) ++ movq %r8,176(%rdi) ++ movq %r8,184(%rdi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _rsaz_1024_scatter5_avx2 ++.private_extern _rsaz_1024_scatter5_avx2 ++ ++.p2align 5 ++_rsaz_1024_scatter5_avx2: ++ ++ vzeroupper ++ vmovdqu L$scatter_permd(%rip),%ymm5 ++ shll $4,%edx ++ leaq (%rdi,%rdx,1),%rdi ++ movl $9,%eax ++ jmp L$oop_scatter_1024 ++ ++.p2align 5 ++L$oop_scatter_1024: ++ vmovdqu (%rsi),%ymm0 ++ leaq 32(%rsi),%rsi ++ vpermd %ymm0,%ymm5,%ymm0 ++ vmovdqu %xmm0,(%rdi) ++ leaq 512(%rdi),%rdi ++ decl %eax ++ jnz L$oop_scatter_1024 ++ ++ vzeroupper ++ .byte 0xf3,0xc3 ++ ++ ++ ++.globl _rsaz_1024_gather5_avx2 ++.private_extern _rsaz_1024_gather5_avx2 ++ ++.p2align 5 ++_rsaz_1024_gather5_avx2: ++ ++ vzeroupper ++ movq %rsp,%r11 ++ ++ leaq -256(%rsp),%rsp ++ andq $-32,%rsp ++ leaq L$inc(%rip),%r10 ++ leaq -128(%rsp),%rax ++ ++ vmovd %edx,%xmm4 ++ vmovdqa (%r10),%ymm0 ++ vmovdqa 32(%r10),%ymm1 ++ vmovdqa 64(%r10),%ymm5 ++ vpbroadcastd %xmm4,%ymm4 ++ ++ vpaddd %ymm5,%ymm0,%ymm2 ++ vpcmpeqd %ymm4,%ymm0,%ymm0 ++ vpaddd %ymm5,%ymm1,%ymm3 ++ vpcmpeqd %ymm4,%ymm1,%ymm1 ++ vmovdqa %ymm0,0+128(%rax) ++ vpaddd %ymm5,%ymm2,%ymm0 ++ vpcmpeqd %ymm4,%ymm2,%ymm2 ++ vmovdqa %ymm1,32+128(%rax) ++ vpaddd %ymm5,%ymm3,%ymm1 ++ vpcmpeqd %ymm4,%ymm3,%ymm3 ++ vmovdqa %ymm2,64+128(%rax) ++ vpaddd %ymm5,%ymm0,%ymm2 ++ vpcmpeqd %ymm4,%ymm0,%ymm0 ++ vmovdqa %ymm3,96+128(%rax) ++ vpaddd %ymm5,%ymm1,%ymm3 ++ vpcmpeqd %ymm4,%ymm1,%ymm1 ++ vmovdqa %ymm0,128+128(%rax) ++ vpaddd %ymm5,%ymm2,%ymm8 ++ vpcmpeqd %ymm4,%ymm2,%ymm2 ++ vmovdqa %ymm1,160+128(%rax) ++ vpaddd %ymm5,%ymm3,%ymm9 ++ vpcmpeqd %ymm4,%ymm3,%ymm3 ++ vmovdqa %ymm2,192+128(%rax) ++ vpaddd %ymm5,%ymm8,%ymm10 ++ vpcmpeqd %ymm4,%ymm8,%ymm8 ++ vmovdqa %ymm3,224+128(%rax) ++ vpaddd %ymm5,%ymm9,%ymm11 ++ vpcmpeqd %ymm4,%ymm9,%ymm9 ++ vpaddd %ymm5,%ymm10,%ymm12 ++ vpcmpeqd %ymm4,%ymm10,%ymm10 ++ vpaddd %ymm5,%ymm11,%ymm13 ++ vpcmpeqd %ymm4,%ymm11,%ymm11 ++ vpaddd %ymm5,%ymm12,%ymm14 ++ vpcmpeqd %ymm4,%ymm12,%ymm12 ++ vpaddd %ymm5,%ymm13,%ymm15 ++ vpcmpeqd %ymm4,%ymm13,%ymm13 ++ vpcmpeqd %ymm4,%ymm14,%ymm14 ++ vpcmpeqd %ymm4,%ymm15,%ymm15 ++ ++ vmovdqa -32(%r10),%ymm7 ++ leaq 128(%rsi),%rsi ++ movl $9,%edx ++ ++L$oop_gather_1024: ++ vmovdqa 0-128(%rsi),%ymm0 ++ vmovdqa 32-128(%rsi),%ymm1 ++ vmovdqa 64-128(%rsi),%ymm2 ++ vmovdqa 96-128(%rsi),%ymm3 ++ vpand 0+128(%rax),%ymm0,%ymm0 ++ vpand 32+128(%rax),%ymm1,%ymm1 ++ vpand 64+128(%rax),%ymm2,%ymm2 ++ vpor %ymm0,%ymm1,%ymm4 ++ vpand 96+128(%rax),%ymm3,%ymm3 ++ vmovdqa 128-128(%rsi),%ymm0 ++ vmovdqa 160-128(%rsi),%ymm1 ++ vpor %ymm2,%ymm3,%ymm5 ++ vmovdqa 192-128(%rsi),%ymm2 ++ vmovdqa 224-128(%rsi),%ymm3 ++ vpand 128+128(%rax),%ymm0,%ymm0 ++ vpand 160+128(%rax),%ymm1,%ymm1 ++ vpand 192+128(%rax),%ymm2,%ymm2 ++ vpor %ymm0,%ymm4,%ymm4 ++ vpand 224+128(%rax),%ymm3,%ymm3 ++ vpand 256-128(%rsi),%ymm8,%ymm0 ++ vpor %ymm1,%ymm5,%ymm5 ++ vpand 288-128(%rsi),%ymm9,%ymm1 ++ vpor %ymm2,%ymm4,%ymm4 ++ vpand 320-128(%rsi),%ymm10,%ymm2 ++ vpor %ymm3,%ymm5,%ymm5 ++ vpand 352-128(%rsi),%ymm11,%ymm3 ++ vpor %ymm0,%ymm4,%ymm4 ++ vpand 384-128(%rsi),%ymm12,%ymm0 ++ vpor %ymm1,%ymm5,%ymm5 ++ vpand 416-128(%rsi),%ymm13,%ymm1 ++ vpor %ymm2,%ymm4,%ymm4 ++ vpand 448-128(%rsi),%ymm14,%ymm2 ++ vpor %ymm3,%ymm5,%ymm5 ++ vpand 480-128(%rsi),%ymm15,%ymm3 ++ leaq 512(%rsi),%rsi ++ vpor %ymm0,%ymm4,%ymm4 ++ vpor %ymm1,%ymm5,%ymm5 ++ vpor %ymm2,%ymm4,%ymm4 ++ vpor %ymm3,%ymm5,%ymm5 ++ ++ vpor %ymm5,%ymm4,%ymm4 ++ vextracti128 $1,%ymm4,%xmm5 ++ vpor %xmm4,%xmm5,%xmm5 ++ vpermd %ymm5,%ymm7,%ymm5 ++ vmovdqu %ymm5,(%rdi) ++ leaq 32(%rdi),%rdi ++ decl %edx ++ jnz L$oop_gather_1024 ++ ++ vpxor %ymm0,%ymm0,%ymm0 ++ vmovdqu %ymm0,(%rdi) ++ vzeroupper ++ leaq (%r11),%rsp ++ ++ .byte 0xf3,0xc3 ++ ++L$SEH_end_rsaz_1024_gather5: ++ ++.p2align 6 ++L$and_mask: ++.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff ++L$scatter_permd: ++.long 0,2,4,6,7,7,7,7 ++L$gather_permd: ++.long 0,7,1,7,2,7,3,7 ++L$inc: ++.long 0,0,0,0, 1,1,1,1 ++.long 2,2,2,2, 3,3,3,3 ++.long 4,4,4,4, 4,4,4,4 ++.p2align 6 ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/sha1-x86_64.S b/apple-x86_64/crypto/fipsmodule/sha1-x86_64.S +new file mode 100644 +index 0000000..d50851e +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/sha1-x86_64.S +@@ -0,0 +1,5466 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++.globl _sha1_block_data_order ++.private_extern _sha1_block_data_order ++ ++.p2align 4 ++_sha1_block_data_order: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%r10 ++ movl 0(%r10),%r9d ++ movl 4(%r10),%r8d ++ movl 8(%r10),%r10d ++ testl $512,%r8d ++ jz L$ialu ++ testl $536870912,%r10d ++ jnz _shaext_shortcut ++ andl $296,%r10d ++ cmpl $296,%r10d ++ je _avx2_shortcut ++ andl $268435456,%r8d ++ andl $1073741824,%r9d ++ orl %r9d,%r8d ++ cmpl $1342177280,%r8d ++ je _avx_shortcut ++ jmp _ssse3_shortcut ++ ++.p2align 4 ++L$ialu: ++ movq %rsp,%rax ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ movq %rdi,%r8 ++ subq $72,%rsp ++ movq %rsi,%r9 ++ andq $-64,%rsp ++ movq %rdx,%r10 ++ movq %rax,64(%rsp) ++ ++L$prologue: ++ ++ movl 0(%r8),%esi ++ movl 4(%r8),%edi ++ movl 8(%r8),%r11d ++ movl 12(%r8),%r12d ++ movl 16(%r8),%r13d ++ jmp L$loop ++ ++.p2align 4 ++L$loop: ++ movl 0(%r9),%edx ++ bswapl %edx ++ movl 4(%r9),%ebp ++ movl %r12d,%eax ++ movl %edx,0(%rsp) ++ movl %esi,%ecx ++ bswapl %ebp ++ xorl %r11d,%eax ++ roll $5,%ecx ++ andl %edi,%eax ++ leal 1518500249(%rdx,%r13,1),%r13d ++ addl %ecx,%r13d ++ xorl %r12d,%eax ++ roll $30,%edi ++ addl %eax,%r13d ++ movl 8(%r9),%r14d ++ movl %r11d,%eax ++ movl %ebp,4(%rsp) ++ movl %r13d,%ecx ++ bswapl %r14d ++ xorl %edi,%eax ++ roll $5,%ecx ++ andl %esi,%eax ++ leal 1518500249(%rbp,%r12,1),%r12d ++ addl %ecx,%r12d ++ xorl %r11d,%eax ++ roll $30,%esi ++ addl %eax,%r12d ++ movl 12(%r9),%edx ++ movl %edi,%eax ++ movl %r14d,8(%rsp) ++ movl %r12d,%ecx ++ bswapl %edx ++ xorl %esi,%eax ++ roll $5,%ecx ++ andl %r13d,%eax ++ leal 1518500249(%r14,%r11,1),%r11d ++ addl %ecx,%r11d ++ xorl %edi,%eax ++ roll $30,%r13d ++ addl %eax,%r11d ++ movl 16(%r9),%ebp ++ movl %esi,%eax ++ movl %edx,12(%rsp) ++ movl %r11d,%ecx ++ bswapl %ebp ++ xorl %r13d,%eax ++ roll $5,%ecx ++ andl %r12d,%eax ++ leal 1518500249(%rdx,%rdi,1),%edi ++ addl %ecx,%edi ++ xorl %esi,%eax ++ roll $30,%r12d ++ addl %eax,%edi ++ movl 20(%r9),%r14d ++ movl %r13d,%eax ++ movl %ebp,16(%rsp) ++ movl %edi,%ecx ++ bswapl %r14d ++ xorl %r12d,%eax ++ roll $5,%ecx ++ andl %r11d,%eax ++ leal 1518500249(%rbp,%rsi,1),%esi ++ addl %ecx,%esi ++ xorl %r13d,%eax ++ roll $30,%r11d ++ addl %eax,%esi ++ movl 24(%r9),%edx ++ movl %r12d,%eax ++ movl %r14d,20(%rsp) ++ movl %esi,%ecx ++ bswapl %edx ++ xorl %r11d,%eax ++ roll $5,%ecx ++ andl %edi,%eax ++ leal 1518500249(%r14,%r13,1),%r13d ++ addl %ecx,%r13d ++ xorl %r12d,%eax ++ roll $30,%edi ++ addl %eax,%r13d ++ movl 28(%r9),%ebp ++ movl %r11d,%eax ++ movl %edx,24(%rsp) ++ movl %r13d,%ecx ++ bswapl %ebp ++ xorl %edi,%eax ++ roll $5,%ecx ++ andl %esi,%eax ++ leal 1518500249(%rdx,%r12,1),%r12d ++ addl %ecx,%r12d ++ xorl %r11d,%eax ++ roll $30,%esi ++ addl %eax,%r12d ++ movl 32(%r9),%r14d ++ movl %edi,%eax ++ movl %ebp,28(%rsp) ++ movl %r12d,%ecx ++ bswapl %r14d ++ xorl %esi,%eax ++ roll $5,%ecx ++ andl %r13d,%eax ++ leal 1518500249(%rbp,%r11,1),%r11d ++ addl %ecx,%r11d ++ xorl %edi,%eax ++ roll $30,%r13d ++ addl %eax,%r11d ++ movl 36(%r9),%edx ++ movl %esi,%eax ++ movl %r14d,32(%rsp) ++ movl %r11d,%ecx ++ bswapl %edx ++ xorl %r13d,%eax ++ roll $5,%ecx ++ andl %r12d,%eax ++ leal 1518500249(%r14,%rdi,1),%edi ++ addl %ecx,%edi ++ xorl %esi,%eax ++ roll $30,%r12d ++ addl %eax,%edi ++ movl 40(%r9),%ebp ++ movl %r13d,%eax ++ movl %edx,36(%rsp) ++ movl %edi,%ecx ++ bswapl %ebp ++ xorl %r12d,%eax ++ roll $5,%ecx ++ andl %r11d,%eax ++ leal 1518500249(%rdx,%rsi,1),%esi ++ addl %ecx,%esi ++ xorl %r13d,%eax ++ roll $30,%r11d ++ addl %eax,%esi ++ movl 44(%r9),%r14d ++ movl %r12d,%eax ++ movl %ebp,40(%rsp) ++ movl %esi,%ecx ++ bswapl %r14d ++ xorl %r11d,%eax ++ roll $5,%ecx ++ andl %edi,%eax ++ leal 1518500249(%rbp,%r13,1),%r13d ++ addl %ecx,%r13d ++ xorl %r12d,%eax ++ roll $30,%edi ++ addl %eax,%r13d ++ movl 48(%r9),%edx ++ movl %r11d,%eax ++ movl %r14d,44(%rsp) ++ movl %r13d,%ecx ++ bswapl %edx ++ xorl %edi,%eax ++ roll $5,%ecx ++ andl %esi,%eax ++ leal 1518500249(%r14,%r12,1),%r12d ++ addl %ecx,%r12d ++ xorl %r11d,%eax ++ roll $30,%esi ++ addl %eax,%r12d ++ movl 52(%r9),%ebp ++ movl %edi,%eax ++ movl %edx,48(%rsp) ++ movl %r12d,%ecx ++ bswapl %ebp ++ xorl %esi,%eax ++ roll $5,%ecx ++ andl %r13d,%eax ++ leal 1518500249(%rdx,%r11,1),%r11d ++ addl %ecx,%r11d ++ xorl %edi,%eax ++ roll $30,%r13d ++ addl %eax,%r11d ++ movl 56(%r9),%r14d ++ movl %esi,%eax ++ movl %ebp,52(%rsp) ++ movl %r11d,%ecx ++ bswapl %r14d ++ xorl %r13d,%eax ++ roll $5,%ecx ++ andl %r12d,%eax ++ leal 1518500249(%rbp,%rdi,1),%edi ++ addl %ecx,%edi ++ xorl %esi,%eax ++ roll $30,%r12d ++ addl %eax,%edi ++ movl 60(%r9),%edx ++ movl %r13d,%eax ++ movl %r14d,56(%rsp) ++ movl %edi,%ecx ++ bswapl %edx ++ xorl %r12d,%eax ++ roll $5,%ecx ++ andl %r11d,%eax ++ leal 1518500249(%r14,%rsi,1),%esi ++ addl %ecx,%esi ++ xorl %r13d,%eax ++ roll $30,%r11d ++ addl %eax,%esi ++ xorl 0(%rsp),%ebp ++ movl %r12d,%eax ++ movl %edx,60(%rsp) ++ movl %esi,%ecx ++ xorl 8(%rsp),%ebp ++ xorl %r11d,%eax ++ roll $5,%ecx ++ xorl 32(%rsp),%ebp ++ andl %edi,%eax ++ leal 1518500249(%rdx,%r13,1),%r13d ++ roll $30,%edi ++ xorl %r12d,%eax ++ addl %ecx,%r13d ++ roll $1,%ebp ++ addl %eax,%r13d ++ xorl 4(%rsp),%r14d ++ movl %r11d,%eax ++ movl %ebp,0(%rsp) ++ movl %r13d,%ecx ++ xorl 12(%rsp),%r14d ++ xorl %edi,%eax ++ roll $5,%ecx ++ xorl 36(%rsp),%r14d ++ andl %esi,%eax ++ leal 1518500249(%rbp,%r12,1),%r12d ++ roll $30,%esi ++ xorl %r11d,%eax ++ addl %ecx,%r12d ++ roll $1,%r14d ++ addl %eax,%r12d ++ xorl 8(%rsp),%edx ++ movl %edi,%eax ++ movl %r14d,4(%rsp) ++ movl %r12d,%ecx ++ xorl 16(%rsp),%edx ++ xorl %esi,%eax ++ roll $5,%ecx ++ xorl 40(%rsp),%edx ++ andl %r13d,%eax ++ leal 1518500249(%r14,%r11,1),%r11d ++ roll $30,%r13d ++ xorl %edi,%eax ++ addl %ecx,%r11d ++ roll $1,%edx ++ addl %eax,%r11d ++ xorl 12(%rsp),%ebp ++ movl %esi,%eax ++ movl %edx,8(%rsp) ++ movl %r11d,%ecx ++ xorl 20(%rsp),%ebp ++ xorl %r13d,%eax ++ roll $5,%ecx ++ xorl 44(%rsp),%ebp ++ andl %r12d,%eax ++ leal 1518500249(%rdx,%rdi,1),%edi ++ roll $30,%r12d ++ xorl %esi,%eax ++ addl %ecx,%edi ++ roll $1,%ebp ++ addl %eax,%edi ++ xorl 16(%rsp),%r14d ++ movl %r13d,%eax ++ movl %ebp,12(%rsp) ++ movl %edi,%ecx ++ xorl 24(%rsp),%r14d ++ xorl %r12d,%eax ++ roll $5,%ecx ++ xorl 48(%rsp),%r14d ++ andl %r11d,%eax ++ leal 1518500249(%rbp,%rsi,1),%esi ++ roll $30,%r11d ++ xorl %r13d,%eax ++ addl %ecx,%esi ++ roll $1,%r14d ++ addl %eax,%esi ++ xorl 20(%rsp),%edx ++ movl %edi,%eax ++ movl %r14d,16(%rsp) ++ movl %esi,%ecx ++ xorl 28(%rsp),%edx ++ xorl %r12d,%eax ++ roll $5,%ecx ++ xorl 52(%rsp),%edx ++ leal 1859775393(%r14,%r13,1),%r13d ++ xorl %r11d,%eax ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %eax,%r13d ++ roll $1,%edx ++ xorl 24(%rsp),%ebp ++ movl %esi,%eax ++ movl %edx,20(%rsp) ++ movl %r13d,%ecx ++ xorl 32(%rsp),%ebp ++ xorl %r11d,%eax ++ roll $5,%ecx ++ xorl 56(%rsp),%ebp ++ leal 1859775393(%rdx,%r12,1),%r12d ++ xorl %edi,%eax ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %eax,%r12d ++ roll $1,%ebp ++ xorl 28(%rsp),%r14d ++ movl %r13d,%eax ++ movl %ebp,24(%rsp) ++ movl %r12d,%ecx ++ xorl 36(%rsp),%r14d ++ xorl %edi,%eax ++ roll $5,%ecx ++ xorl 60(%rsp),%r14d ++ leal 1859775393(%rbp,%r11,1),%r11d ++ xorl %esi,%eax ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %eax,%r11d ++ roll $1,%r14d ++ xorl 32(%rsp),%edx ++ movl %r12d,%eax ++ movl %r14d,28(%rsp) ++ movl %r11d,%ecx ++ xorl 40(%rsp),%edx ++ xorl %esi,%eax ++ roll $5,%ecx ++ xorl 0(%rsp),%edx ++ leal 1859775393(%r14,%rdi,1),%edi ++ xorl %r13d,%eax ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %eax,%edi ++ roll $1,%edx ++ xorl 36(%rsp),%ebp ++ movl %r11d,%eax ++ movl %edx,32(%rsp) ++ movl %edi,%ecx ++ xorl 44(%rsp),%ebp ++ xorl %r13d,%eax ++ roll $5,%ecx ++ xorl 4(%rsp),%ebp ++ leal 1859775393(%rdx,%rsi,1),%esi ++ xorl %r12d,%eax ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %eax,%esi ++ roll $1,%ebp ++ xorl 40(%rsp),%r14d ++ movl %edi,%eax ++ movl %ebp,36(%rsp) ++ movl %esi,%ecx ++ xorl 48(%rsp),%r14d ++ xorl %r12d,%eax ++ roll $5,%ecx ++ xorl 8(%rsp),%r14d ++ leal 1859775393(%rbp,%r13,1),%r13d ++ xorl %r11d,%eax ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %eax,%r13d ++ roll $1,%r14d ++ xorl 44(%rsp),%edx ++ movl %esi,%eax ++ movl %r14d,40(%rsp) ++ movl %r13d,%ecx ++ xorl 52(%rsp),%edx ++ xorl %r11d,%eax ++ roll $5,%ecx ++ xorl 12(%rsp),%edx ++ leal 1859775393(%r14,%r12,1),%r12d ++ xorl %edi,%eax ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %eax,%r12d ++ roll $1,%edx ++ xorl 48(%rsp),%ebp ++ movl %r13d,%eax ++ movl %edx,44(%rsp) ++ movl %r12d,%ecx ++ xorl 56(%rsp),%ebp ++ xorl %edi,%eax ++ roll $5,%ecx ++ xorl 16(%rsp),%ebp ++ leal 1859775393(%rdx,%r11,1),%r11d ++ xorl %esi,%eax ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %eax,%r11d ++ roll $1,%ebp ++ xorl 52(%rsp),%r14d ++ movl %r12d,%eax ++ movl %ebp,48(%rsp) ++ movl %r11d,%ecx ++ xorl 60(%rsp),%r14d ++ xorl %esi,%eax ++ roll $5,%ecx ++ xorl 20(%rsp),%r14d ++ leal 1859775393(%rbp,%rdi,1),%edi ++ xorl %r13d,%eax ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %eax,%edi ++ roll $1,%r14d ++ xorl 56(%rsp),%edx ++ movl %r11d,%eax ++ movl %r14d,52(%rsp) ++ movl %edi,%ecx ++ xorl 0(%rsp),%edx ++ xorl %r13d,%eax ++ roll $5,%ecx ++ xorl 24(%rsp),%edx ++ leal 1859775393(%r14,%rsi,1),%esi ++ xorl %r12d,%eax ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %eax,%esi ++ roll $1,%edx ++ xorl 60(%rsp),%ebp ++ movl %edi,%eax ++ movl %edx,56(%rsp) ++ movl %esi,%ecx ++ xorl 4(%rsp),%ebp ++ xorl %r12d,%eax ++ roll $5,%ecx ++ xorl 28(%rsp),%ebp ++ leal 1859775393(%rdx,%r13,1),%r13d ++ xorl %r11d,%eax ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %eax,%r13d ++ roll $1,%ebp ++ xorl 0(%rsp),%r14d ++ movl %esi,%eax ++ movl %ebp,60(%rsp) ++ movl %r13d,%ecx ++ xorl 8(%rsp),%r14d ++ xorl %r11d,%eax ++ roll $5,%ecx ++ xorl 32(%rsp),%r14d ++ leal 1859775393(%rbp,%r12,1),%r12d ++ xorl %edi,%eax ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %eax,%r12d ++ roll $1,%r14d ++ xorl 4(%rsp),%edx ++ movl %r13d,%eax ++ movl %r14d,0(%rsp) ++ movl %r12d,%ecx ++ xorl 12(%rsp),%edx ++ xorl %edi,%eax ++ roll $5,%ecx ++ xorl 36(%rsp),%edx ++ leal 1859775393(%r14,%r11,1),%r11d ++ xorl %esi,%eax ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %eax,%r11d ++ roll $1,%edx ++ xorl 8(%rsp),%ebp ++ movl %r12d,%eax ++ movl %edx,4(%rsp) ++ movl %r11d,%ecx ++ xorl 16(%rsp),%ebp ++ xorl %esi,%eax ++ roll $5,%ecx ++ xorl 40(%rsp),%ebp ++ leal 1859775393(%rdx,%rdi,1),%edi ++ xorl %r13d,%eax ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %eax,%edi ++ roll $1,%ebp ++ xorl 12(%rsp),%r14d ++ movl %r11d,%eax ++ movl %ebp,8(%rsp) ++ movl %edi,%ecx ++ xorl 20(%rsp),%r14d ++ xorl %r13d,%eax ++ roll $5,%ecx ++ xorl 44(%rsp),%r14d ++ leal 1859775393(%rbp,%rsi,1),%esi ++ xorl %r12d,%eax ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %eax,%esi ++ roll $1,%r14d ++ xorl 16(%rsp),%edx ++ movl %edi,%eax ++ movl %r14d,12(%rsp) ++ movl %esi,%ecx ++ xorl 24(%rsp),%edx ++ xorl %r12d,%eax ++ roll $5,%ecx ++ xorl 48(%rsp),%edx ++ leal 1859775393(%r14,%r13,1),%r13d ++ xorl %r11d,%eax ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %eax,%r13d ++ roll $1,%edx ++ xorl 20(%rsp),%ebp ++ movl %esi,%eax ++ movl %edx,16(%rsp) ++ movl %r13d,%ecx ++ xorl 28(%rsp),%ebp ++ xorl %r11d,%eax ++ roll $5,%ecx ++ xorl 52(%rsp),%ebp ++ leal 1859775393(%rdx,%r12,1),%r12d ++ xorl %edi,%eax ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %eax,%r12d ++ roll $1,%ebp ++ xorl 24(%rsp),%r14d ++ movl %r13d,%eax ++ movl %ebp,20(%rsp) ++ movl %r12d,%ecx ++ xorl 32(%rsp),%r14d ++ xorl %edi,%eax ++ roll $5,%ecx ++ xorl 56(%rsp),%r14d ++ leal 1859775393(%rbp,%r11,1),%r11d ++ xorl %esi,%eax ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %eax,%r11d ++ roll $1,%r14d ++ xorl 28(%rsp),%edx ++ movl %r12d,%eax ++ movl %r14d,24(%rsp) ++ movl %r11d,%ecx ++ xorl 36(%rsp),%edx ++ xorl %esi,%eax ++ roll $5,%ecx ++ xorl 60(%rsp),%edx ++ leal 1859775393(%r14,%rdi,1),%edi ++ xorl %r13d,%eax ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %eax,%edi ++ roll $1,%edx ++ xorl 32(%rsp),%ebp ++ movl %r11d,%eax ++ movl %edx,28(%rsp) ++ movl %edi,%ecx ++ xorl 40(%rsp),%ebp ++ xorl %r13d,%eax ++ roll $5,%ecx ++ xorl 0(%rsp),%ebp ++ leal 1859775393(%rdx,%rsi,1),%esi ++ xorl %r12d,%eax ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %eax,%esi ++ roll $1,%ebp ++ xorl 36(%rsp),%r14d ++ movl %r12d,%eax ++ movl %ebp,32(%rsp) ++ movl %r12d,%ebx ++ xorl 44(%rsp),%r14d ++ andl %r11d,%eax ++ movl %esi,%ecx ++ xorl 4(%rsp),%r14d ++ leal -1894007588(%rbp,%r13,1),%r13d ++ xorl %r11d,%ebx ++ roll $5,%ecx ++ addl %eax,%r13d ++ roll $1,%r14d ++ andl %edi,%ebx ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %ebx,%r13d ++ xorl 40(%rsp),%edx ++ movl %r11d,%eax ++ movl %r14d,36(%rsp) ++ movl %r11d,%ebx ++ xorl 48(%rsp),%edx ++ andl %edi,%eax ++ movl %r13d,%ecx ++ xorl 8(%rsp),%edx ++ leal -1894007588(%r14,%r12,1),%r12d ++ xorl %edi,%ebx ++ roll $5,%ecx ++ addl %eax,%r12d ++ roll $1,%edx ++ andl %esi,%ebx ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %ebx,%r12d ++ xorl 44(%rsp),%ebp ++ movl %edi,%eax ++ movl %edx,40(%rsp) ++ movl %edi,%ebx ++ xorl 52(%rsp),%ebp ++ andl %esi,%eax ++ movl %r12d,%ecx ++ xorl 12(%rsp),%ebp ++ leal -1894007588(%rdx,%r11,1),%r11d ++ xorl %esi,%ebx ++ roll $5,%ecx ++ addl %eax,%r11d ++ roll $1,%ebp ++ andl %r13d,%ebx ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %ebx,%r11d ++ xorl 48(%rsp),%r14d ++ movl %esi,%eax ++ movl %ebp,44(%rsp) ++ movl %esi,%ebx ++ xorl 56(%rsp),%r14d ++ andl %r13d,%eax ++ movl %r11d,%ecx ++ xorl 16(%rsp),%r14d ++ leal -1894007588(%rbp,%rdi,1),%edi ++ xorl %r13d,%ebx ++ roll $5,%ecx ++ addl %eax,%edi ++ roll $1,%r14d ++ andl %r12d,%ebx ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %ebx,%edi ++ xorl 52(%rsp),%edx ++ movl %r13d,%eax ++ movl %r14d,48(%rsp) ++ movl %r13d,%ebx ++ xorl 60(%rsp),%edx ++ andl %r12d,%eax ++ movl %edi,%ecx ++ xorl 20(%rsp),%edx ++ leal -1894007588(%r14,%rsi,1),%esi ++ xorl %r12d,%ebx ++ roll $5,%ecx ++ addl %eax,%esi ++ roll $1,%edx ++ andl %r11d,%ebx ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %ebx,%esi ++ xorl 56(%rsp),%ebp ++ movl %r12d,%eax ++ movl %edx,52(%rsp) ++ movl %r12d,%ebx ++ xorl 0(%rsp),%ebp ++ andl %r11d,%eax ++ movl %esi,%ecx ++ xorl 24(%rsp),%ebp ++ leal -1894007588(%rdx,%r13,1),%r13d ++ xorl %r11d,%ebx ++ roll $5,%ecx ++ addl %eax,%r13d ++ roll $1,%ebp ++ andl %edi,%ebx ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %ebx,%r13d ++ xorl 60(%rsp),%r14d ++ movl %r11d,%eax ++ movl %ebp,56(%rsp) ++ movl %r11d,%ebx ++ xorl 4(%rsp),%r14d ++ andl %edi,%eax ++ movl %r13d,%ecx ++ xorl 28(%rsp),%r14d ++ leal -1894007588(%rbp,%r12,1),%r12d ++ xorl %edi,%ebx ++ roll $5,%ecx ++ addl %eax,%r12d ++ roll $1,%r14d ++ andl %esi,%ebx ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %ebx,%r12d ++ xorl 0(%rsp),%edx ++ movl %edi,%eax ++ movl %r14d,60(%rsp) ++ movl %edi,%ebx ++ xorl 8(%rsp),%edx ++ andl %esi,%eax ++ movl %r12d,%ecx ++ xorl 32(%rsp),%edx ++ leal -1894007588(%r14,%r11,1),%r11d ++ xorl %esi,%ebx ++ roll $5,%ecx ++ addl %eax,%r11d ++ roll $1,%edx ++ andl %r13d,%ebx ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %ebx,%r11d ++ xorl 4(%rsp),%ebp ++ movl %esi,%eax ++ movl %edx,0(%rsp) ++ movl %esi,%ebx ++ xorl 12(%rsp),%ebp ++ andl %r13d,%eax ++ movl %r11d,%ecx ++ xorl 36(%rsp),%ebp ++ leal -1894007588(%rdx,%rdi,1),%edi ++ xorl %r13d,%ebx ++ roll $5,%ecx ++ addl %eax,%edi ++ roll $1,%ebp ++ andl %r12d,%ebx ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %ebx,%edi ++ xorl 8(%rsp),%r14d ++ movl %r13d,%eax ++ movl %ebp,4(%rsp) ++ movl %r13d,%ebx ++ xorl 16(%rsp),%r14d ++ andl %r12d,%eax ++ movl %edi,%ecx ++ xorl 40(%rsp),%r14d ++ leal -1894007588(%rbp,%rsi,1),%esi ++ xorl %r12d,%ebx ++ roll $5,%ecx ++ addl %eax,%esi ++ roll $1,%r14d ++ andl %r11d,%ebx ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %ebx,%esi ++ xorl 12(%rsp),%edx ++ movl %r12d,%eax ++ movl %r14d,8(%rsp) ++ movl %r12d,%ebx ++ xorl 20(%rsp),%edx ++ andl %r11d,%eax ++ movl %esi,%ecx ++ xorl 44(%rsp),%edx ++ leal -1894007588(%r14,%r13,1),%r13d ++ xorl %r11d,%ebx ++ roll $5,%ecx ++ addl %eax,%r13d ++ roll $1,%edx ++ andl %edi,%ebx ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %ebx,%r13d ++ xorl 16(%rsp),%ebp ++ movl %r11d,%eax ++ movl %edx,12(%rsp) ++ movl %r11d,%ebx ++ xorl 24(%rsp),%ebp ++ andl %edi,%eax ++ movl %r13d,%ecx ++ xorl 48(%rsp),%ebp ++ leal -1894007588(%rdx,%r12,1),%r12d ++ xorl %edi,%ebx ++ roll $5,%ecx ++ addl %eax,%r12d ++ roll $1,%ebp ++ andl %esi,%ebx ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %ebx,%r12d ++ xorl 20(%rsp),%r14d ++ movl %edi,%eax ++ movl %ebp,16(%rsp) ++ movl %edi,%ebx ++ xorl 28(%rsp),%r14d ++ andl %esi,%eax ++ movl %r12d,%ecx ++ xorl 52(%rsp),%r14d ++ leal -1894007588(%rbp,%r11,1),%r11d ++ xorl %esi,%ebx ++ roll $5,%ecx ++ addl %eax,%r11d ++ roll $1,%r14d ++ andl %r13d,%ebx ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %ebx,%r11d ++ xorl 24(%rsp),%edx ++ movl %esi,%eax ++ movl %r14d,20(%rsp) ++ movl %esi,%ebx ++ xorl 32(%rsp),%edx ++ andl %r13d,%eax ++ movl %r11d,%ecx ++ xorl 56(%rsp),%edx ++ leal -1894007588(%r14,%rdi,1),%edi ++ xorl %r13d,%ebx ++ roll $5,%ecx ++ addl %eax,%edi ++ roll $1,%edx ++ andl %r12d,%ebx ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %ebx,%edi ++ xorl 28(%rsp),%ebp ++ movl %r13d,%eax ++ movl %edx,24(%rsp) ++ movl %r13d,%ebx ++ xorl 36(%rsp),%ebp ++ andl %r12d,%eax ++ movl %edi,%ecx ++ xorl 60(%rsp),%ebp ++ leal -1894007588(%rdx,%rsi,1),%esi ++ xorl %r12d,%ebx ++ roll $5,%ecx ++ addl %eax,%esi ++ roll $1,%ebp ++ andl %r11d,%ebx ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %ebx,%esi ++ xorl 32(%rsp),%r14d ++ movl %r12d,%eax ++ movl %ebp,28(%rsp) ++ movl %r12d,%ebx ++ xorl 40(%rsp),%r14d ++ andl %r11d,%eax ++ movl %esi,%ecx ++ xorl 0(%rsp),%r14d ++ leal -1894007588(%rbp,%r13,1),%r13d ++ xorl %r11d,%ebx ++ roll $5,%ecx ++ addl %eax,%r13d ++ roll $1,%r14d ++ andl %edi,%ebx ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %ebx,%r13d ++ xorl 36(%rsp),%edx ++ movl %r11d,%eax ++ movl %r14d,32(%rsp) ++ movl %r11d,%ebx ++ xorl 44(%rsp),%edx ++ andl %edi,%eax ++ movl %r13d,%ecx ++ xorl 4(%rsp),%edx ++ leal -1894007588(%r14,%r12,1),%r12d ++ xorl %edi,%ebx ++ roll $5,%ecx ++ addl %eax,%r12d ++ roll $1,%edx ++ andl %esi,%ebx ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %ebx,%r12d ++ xorl 40(%rsp),%ebp ++ movl %edi,%eax ++ movl %edx,36(%rsp) ++ movl %edi,%ebx ++ xorl 48(%rsp),%ebp ++ andl %esi,%eax ++ movl %r12d,%ecx ++ xorl 8(%rsp),%ebp ++ leal -1894007588(%rdx,%r11,1),%r11d ++ xorl %esi,%ebx ++ roll $5,%ecx ++ addl %eax,%r11d ++ roll $1,%ebp ++ andl %r13d,%ebx ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %ebx,%r11d ++ xorl 44(%rsp),%r14d ++ movl %esi,%eax ++ movl %ebp,40(%rsp) ++ movl %esi,%ebx ++ xorl 52(%rsp),%r14d ++ andl %r13d,%eax ++ movl %r11d,%ecx ++ xorl 12(%rsp),%r14d ++ leal -1894007588(%rbp,%rdi,1),%edi ++ xorl %r13d,%ebx ++ roll $5,%ecx ++ addl %eax,%edi ++ roll $1,%r14d ++ andl %r12d,%ebx ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %ebx,%edi ++ xorl 48(%rsp),%edx ++ movl %r13d,%eax ++ movl %r14d,44(%rsp) ++ movl %r13d,%ebx ++ xorl 56(%rsp),%edx ++ andl %r12d,%eax ++ movl %edi,%ecx ++ xorl 16(%rsp),%edx ++ leal -1894007588(%r14,%rsi,1),%esi ++ xorl %r12d,%ebx ++ roll $5,%ecx ++ addl %eax,%esi ++ roll $1,%edx ++ andl %r11d,%ebx ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %ebx,%esi ++ xorl 52(%rsp),%ebp ++ movl %edi,%eax ++ movl %edx,48(%rsp) ++ movl %esi,%ecx ++ xorl 60(%rsp),%ebp ++ xorl %r12d,%eax ++ roll $5,%ecx ++ xorl 20(%rsp),%ebp ++ leal -899497514(%rdx,%r13,1),%r13d ++ xorl %r11d,%eax ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %eax,%r13d ++ roll $1,%ebp ++ xorl 56(%rsp),%r14d ++ movl %esi,%eax ++ movl %ebp,52(%rsp) ++ movl %r13d,%ecx ++ xorl 0(%rsp),%r14d ++ xorl %r11d,%eax ++ roll $5,%ecx ++ xorl 24(%rsp),%r14d ++ leal -899497514(%rbp,%r12,1),%r12d ++ xorl %edi,%eax ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %eax,%r12d ++ roll $1,%r14d ++ xorl 60(%rsp),%edx ++ movl %r13d,%eax ++ movl %r14d,56(%rsp) ++ movl %r12d,%ecx ++ xorl 4(%rsp),%edx ++ xorl %edi,%eax ++ roll $5,%ecx ++ xorl 28(%rsp),%edx ++ leal -899497514(%r14,%r11,1),%r11d ++ xorl %esi,%eax ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %eax,%r11d ++ roll $1,%edx ++ xorl 0(%rsp),%ebp ++ movl %r12d,%eax ++ movl %edx,60(%rsp) ++ movl %r11d,%ecx ++ xorl 8(%rsp),%ebp ++ xorl %esi,%eax ++ roll $5,%ecx ++ xorl 32(%rsp),%ebp ++ leal -899497514(%rdx,%rdi,1),%edi ++ xorl %r13d,%eax ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %eax,%edi ++ roll $1,%ebp ++ xorl 4(%rsp),%r14d ++ movl %r11d,%eax ++ movl %ebp,0(%rsp) ++ movl %edi,%ecx ++ xorl 12(%rsp),%r14d ++ xorl %r13d,%eax ++ roll $5,%ecx ++ xorl 36(%rsp),%r14d ++ leal -899497514(%rbp,%rsi,1),%esi ++ xorl %r12d,%eax ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %eax,%esi ++ roll $1,%r14d ++ xorl 8(%rsp),%edx ++ movl %edi,%eax ++ movl %r14d,4(%rsp) ++ movl %esi,%ecx ++ xorl 16(%rsp),%edx ++ xorl %r12d,%eax ++ roll $5,%ecx ++ xorl 40(%rsp),%edx ++ leal -899497514(%r14,%r13,1),%r13d ++ xorl %r11d,%eax ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %eax,%r13d ++ roll $1,%edx ++ xorl 12(%rsp),%ebp ++ movl %esi,%eax ++ movl %edx,8(%rsp) ++ movl %r13d,%ecx ++ xorl 20(%rsp),%ebp ++ xorl %r11d,%eax ++ roll $5,%ecx ++ xorl 44(%rsp),%ebp ++ leal -899497514(%rdx,%r12,1),%r12d ++ xorl %edi,%eax ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %eax,%r12d ++ roll $1,%ebp ++ xorl 16(%rsp),%r14d ++ movl %r13d,%eax ++ movl %ebp,12(%rsp) ++ movl %r12d,%ecx ++ xorl 24(%rsp),%r14d ++ xorl %edi,%eax ++ roll $5,%ecx ++ xorl 48(%rsp),%r14d ++ leal -899497514(%rbp,%r11,1),%r11d ++ xorl %esi,%eax ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %eax,%r11d ++ roll $1,%r14d ++ xorl 20(%rsp),%edx ++ movl %r12d,%eax ++ movl %r14d,16(%rsp) ++ movl %r11d,%ecx ++ xorl 28(%rsp),%edx ++ xorl %esi,%eax ++ roll $5,%ecx ++ xorl 52(%rsp),%edx ++ leal -899497514(%r14,%rdi,1),%edi ++ xorl %r13d,%eax ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %eax,%edi ++ roll $1,%edx ++ xorl 24(%rsp),%ebp ++ movl %r11d,%eax ++ movl %edx,20(%rsp) ++ movl %edi,%ecx ++ xorl 32(%rsp),%ebp ++ xorl %r13d,%eax ++ roll $5,%ecx ++ xorl 56(%rsp),%ebp ++ leal -899497514(%rdx,%rsi,1),%esi ++ xorl %r12d,%eax ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %eax,%esi ++ roll $1,%ebp ++ xorl 28(%rsp),%r14d ++ movl %edi,%eax ++ movl %ebp,24(%rsp) ++ movl %esi,%ecx ++ xorl 36(%rsp),%r14d ++ xorl %r12d,%eax ++ roll $5,%ecx ++ xorl 60(%rsp),%r14d ++ leal -899497514(%rbp,%r13,1),%r13d ++ xorl %r11d,%eax ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %eax,%r13d ++ roll $1,%r14d ++ xorl 32(%rsp),%edx ++ movl %esi,%eax ++ movl %r14d,28(%rsp) ++ movl %r13d,%ecx ++ xorl 40(%rsp),%edx ++ xorl %r11d,%eax ++ roll $5,%ecx ++ xorl 0(%rsp),%edx ++ leal -899497514(%r14,%r12,1),%r12d ++ xorl %edi,%eax ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %eax,%r12d ++ roll $1,%edx ++ xorl 36(%rsp),%ebp ++ movl %r13d,%eax ++ ++ movl %r12d,%ecx ++ xorl 44(%rsp),%ebp ++ xorl %edi,%eax ++ roll $5,%ecx ++ xorl 4(%rsp),%ebp ++ leal -899497514(%rdx,%r11,1),%r11d ++ xorl %esi,%eax ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %eax,%r11d ++ roll $1,%ebp ++ xorl 40(%rsp),%r14d ++ movl %r12d,%eax ++ ++ movl %r11d,%ecx ++ xorl 48(%rsp),%r14d ++ xorl %esi,%eax ++ roll $5,%ecx ++ xorl 8(%rsp),%r14d ++ leal -899497514(%rbp,%rdi,1),%edi ++ xorl %r13d,%eax ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %eax,%edi ++ roll $1,%r14d ++ xorl 44(%rsp),%edx ++ movl %r11d,%eax ++ ++ movl %edi,%ecx ++ xorl 52(%rsp),%edx ++ xorl %r13d,%eax ++ roll $5,%ecx ++ xorl 12(%rsp),%edx ++ leal -899497514(%r14,%rsi,1),%esi ++ xorl %r12d,%eax ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %eax,%esi ++ roll $1,%edx ++ xorl 48(%rsp),%ebp ++ movl %edi,%eax ++ ++ movl %esi,%ecx ++ xorl 56(%rsp),%ebp ++ xorl %r12d,%eax ++ roll $5,%ecx ++ xorl 16(%rsp),%ebp ++ leal -899497514(%rdx,%r13,1),%r13d ++ xorl %r11d,%eax ++ addl %ecx,%r13d ++ roll $30,%edi ++ addl %eax,%r13d ++ roll $1,%ebp ++ xorl 52(%rsp),%r14d ++ movl %esi,%eax ++ ++ movl %r13d,%ecx ++ xorl 60(%rsp),%r14d ++ xorl %r11d,%eax ++ roll $5,%ecx ++ xorl 20(%rsp),%r14d ++ leal -899497514(%rbp,%r12,1),%r12d ++ xorl %edi,%eax ++ addl %ecx,%r12d ++ roll $30,%esi ++ addl %eax,%r12d ++ roll $1,%r14d ++ xorl 56(%rsp),%edx ++ movl %r13d,%eax ++ ++ movl %r12d,%ecx ++ xorl 0(%rsp),%edx ++ xorl %edi,%eax ++ roll $5,%ecx ++ xorl 24(%rsp),%edx ++ leal -899497514(%r14,%r11,1),%r11d ++ xorl %esi,%eax ++ addl %ecx,%r11d ++ roll $30,%r13d ++ addl %eax,%r11d ++ roll $1,%edx ++ xorl 60(%rsp),%ebp ++ movl %r12d,%eax ++ ++ movl %r11d,%ecx ++ xorl 4(%rsp),%ebp ++ xorl %esi,%eax ++ roll $5,%ecx ++ xorl 28(%rsp),%ebp ++ leal -899497514(%rdx,%rdi,1),%edi ++ xorl %r13d,%eax ++ addl %ecx,%edi ++ roll $30,%r12d ++ addl %eax,%edi ++ roll $1,%ebp ++ movl %r11d,%eax ++ movl %edi,%ecx ++ xorl %r13d,%eax ++ leal -899497514(%rbp,%rsi,1),%esi ++ roll $5,%ecx ++ xorl %r12d,%eax ++ addl %ecx,%esi ++ roll $30,%r11d ++ addl %eax,%esi ++ addl 0(%r8),%esi ++ addl 4(%r8),%edi ++ addl 8(%r8),%r11d ++ addl 12(%r8),%r12d ++ addl 16(%r8),%r13d ++ movl %esi,0(%r8) ++ movl %edi,4(%r8) ++ movl %r11d,8(%r8) ++ movl %r12d,12(%r8) ++ movl %r13d,16(%r8) ++ ++ subq $1,%r10 ++ leaq 64(%r9),%r9 ++ jnz L$loop ++ ++ movq 64(%rsp),%rsi ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++sha1_block_data_order_shaext: ++_shaext_shortcut: ++ ++ movdqu (%rdi),%xmm0 ++ movd 16(%rdi),%xmm1 ++ movdqa K_XX_XX+160(%rip),%xmm3 ++ ++ movdqu (%rsi),%xmm4 ++ pshufd $27,%xmm0,%xmm0 ++ movdqu 16(%rsi),%xmm5 ++ pshufd $27,%xmm1,%xmm1 ++ movdqu 32(%rsi),%xmm6 ++.byte 102,15,56,0,227 ++ movdqu 48(%rsi),%xmm7 ++.byte 102,15,56,0,235 ++.byte 102,15,56,0,243 ++ movdqa %xmm1,%xmm9 ++.byte 102,15,56,0,251 ++ jmp L$oop_shaext ++ ++.p2align 4 ++L$oop_shaext: ++ decq %rdx ++ leaq 64(%rsi),%r8 ++ paddd %xmm4,%xmm1 ++ cmovneq %r8,%rsi ++ movdqa %xmm0,%xmm8 ++.byte 15,56,201,229 ++ movdqa %xmm0,%xmm2 ++.byte 15,58,204,193,0 ++.byte 15,56,200,213 ++ pxor %xmm6,%xmm4 ++.byte 15,56,201,238 ++.byte 15,56,202,231 ++ ++ movdqa %xmm0,%xmm1 ++.byte 15,58,204,194,0 ++.byte 15,56,200,206 ++ pxor %xmm7,%xmm5 ++.byte 15,56,202,236 ++.byte 15,56,201,247 ++ movdqa %xmm0,%xmm2 ++.byte 15,58,204,193,0 ++.byte 15,56,200,215 ++ pxor %xmm4,%xmm6 ++.byte 15,56,201,252 ++.byte 15,56,202,245 ++ ++ movdqa %xmm0,%xmm1 ++.byte 15,58,204,194,0 ++.byte 15,56,200,204 ++ pxor %xmm5,%xmm7 ++.byte 15,56,202,254 ++.byte 15,56,201,229 ++ movdqa %xmm0,%xmm2 ++.byte 15,58,204,193,0 ++.byte 15,56,200,213 ++ pxor %xmm6,%xmm4 ++.byte 15,56,201,238 ++.byte 15,56,202,231 ++ ++ movdqa %xmm0,%xmm1 ++.byte 15,58,204,194,1 ++.byte 15,56,200,206 ++ pxor %xmm7,%xmm5 ++.byte 15,56,202,236 ++.byte 15,56,201,247 ++ movdqa %xmm0,%xmm2 ++.byte 15,58,204,193,1 ++.byte 15,56,200,215 ++ pxor %xmm4,%xmm6 ++.byte 15,56,201,252 ++.byte 15,56,202,245 ++ ++ movdqa %xmm0,%xmm1 ++.byte 15,58,204,194,1 ++.byte 15,56,200,204 ++ pxor %xmm5,%xmm7 ++.byte 15,56,202,254 ++.byte 15,56,201,229 ++ movdqa %xmm0,%xmm2 ++.byte 15,58,204,193,1 ++.byte 15,56,200,213 ++ pxor %xmm6,%xmm4 ++.byte 15,56,201,238 ++.byte 15,56,202,231 ++ ++ movdqa %xmm0,%xmm1 ++.byte 15,58,204,194,1 ++.byte 15,56,200,206 ++ pxor %xmm7,%xmm5 ++.byte 15,56,202,236 ++.byte 15,56,201,247 ++ movdqa %xmm0,%xmm2 ++.byte 15,58,204,193,2 ++.byte 15,56,200,215 ++ pxor %xmm4,%xmm6 ++.byte 15,56,201,252 ++.byte 15,56,202,245 ++ ++ movdqa %xmm0,%xmm1 ++.byte 15,58,204,194,2 ++.byte 15,56,200,204 ++ pxor %xmm5,%xmm7 ++.byte 15,56,202,254 ++.byte 15,56,201,229 ++ movdqa %xmm0,%xmm2 ++.byte 15,58,204,193,2 ++.byte 15,56,200,213 ++ pxor %xmm6,%xmm4 ++.byte 15,56,201,238 ++.byte 15,56,202,231 ++ ++ movdqa %xmm0,%xmm1 ++.byte 15,58,204,194,2 ++.byte 15,56,200,206 ++ pxor %xmm7,%xmm5 ++.byte 15,56,202,236 ++.byte 15,56,201,247 ++ movdqa %xmm0,%xmm2 ++.byte 15,58,204,193,2 ++.byte 15,56,200,215 ++ pxor %xmm4,%xmm6 ++.byte 15,56,201,252 ++.byte 15,56,202,245 ++ ++ movdqa %xmm0,%xmm1 ++.byte 15,58,204,194,3 ++.byte 15,56,200,204 ++ pxor %xmm5,%xmm7 ++.byte 15,56,202,254 ++ movdqu (%rsi),%xmm4 ++ movdqa %xmm0,%xmm2 ++.byte 15,58,204,193,3 ++.byte 15,56,200,213 ++ movdqu 16(%rsi),%xmm5 ++.byte 102,15,56,0,227 ++ ++ movdqa %xmm0,%xmm1 ++.byte 15,58,204,194,3 ++.byte 15,56,200,206 ++ movdqu 32(%rsi),%xmm6 ++.byte 102,15,56,0,235 ++ ++ movdqa %xmm0,%xmm2 ++.byte 15,58,204,193,3 ++.byte 15,56,200,215 ++ movdqu 48(%rsi),%xmm7 ++.byte 102,15,56,0,243 ++ ++ movdqa %xmm0,%xmm1 ++.byte 15,58,204,194,3 ++.byte 65,15,56,200,201 ++.byte 102,15,56,0,251 ++ ++ paddd %xmm8,%xmm0 ++ movdqa %xmm1,%xmm9 ++ ++ jnz L$oop_shaext ++ ++ pshufd $27,%xmm0,%xmm0 ++ pshufd $27,%xmm1,%xmm1 ++ movdqu %xmm0,(%rdi) ++ movd %xmm1,16(%rdi) ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++sha1_block_data_order_ssse3: ++_ssse3_shortcut: ++ ++ movq %rsp,%r11 ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ leaq -64(%rsp),%rsp ++ andq $-64,%rsp ++ movq %rdi,%r8 ++ movq %rsi,%r9 ++ movq %rdx,%r10 ++ ++ shlq $6,%r10 ++ addq %r9,%r10 ++ leaq K_XX_XX+64(%rip),%r14 ++ ++ movl 0(%r8),%eax ++ movl 4(%r8),%ebx ++ movl 8(%r8),%ecx ++ movl 12(%r8),%edx ++ movl %ebx,%esi ++ movl 16(%r8),%ebp ++ movl %ecx,%edi ++ xorl %edx,%edi ++ andl %edi,%esi ++ ++ movdqa 64(%r14),%xmm6 ++ movdqa -64(%r14),%xmm9 ++ movdqu 0(%r9),%xmm0 ++ movdqu 16(%r9),%xmm1 ++ movdqu 32(%r9),%xmm2 ++ movdqu 48(%r9),%xmm3 ++.byte 102,15,56,0,198 ++.byte 102,15,56,0,206 ++.byte 102,15,56,0,214 ++ addq $64,%r9 ++ paddd %xmm9,%xmm0 ++.byte 102,15,56,0,222 ++ paddd %xmm9,%xmm1 ++ paddd %xmm9,%xmm2 ++ movdqa %xmm0,0(%rsp) ++ psubd %xmm9,%xmm0 ++ movdqa %xmm1,16(%rsp) ++ psubd %xmm9,%xmm1 ++ movdqa %xmm2,32(%rsp) ++ psubd %xmm9,%xmm2 ++ jmp L$oop_ssse3 ++.p2align 4 ++L$oop_ssse3: ++ rorl $2,%ebx ++ pshufd $238,%xmm0,%xmm4 ++ xorl %edx,%esi ++ movdqa %xmm3,%xmm8 ++ paddd %xmm3,%xmm9 ++ movl %eax,%edi ++ addl 0(%rsp),%ebp ++ punpcklqdq %xmm1,%xmm4 ++ xorl %ecx,%ebx ++ roll $5,%eax ++ addl %esi,%ebp ++ psrldq $4,%xmm8 ++ andl %ebx,%edi ++ xorl %ecx,%ebx ++ pxor %xmm0,%xmm4 ++ addl %eax,%ebp ++ rorl $7,%eax ++ pxor %xmm2,%xmm8 ++ xorl %ecx,%edi ++ movl %ebp,%esi ++ addl 4(%rsp),%edx ++ pxor %xmm8,%xmm4 ++ xorl %ebx,%eax ++ roll $5,%ebp ++ movdqa %xmm9,48(%rsp) ++ addl %edi,%edx ++ andl %eax,%esi ++ movdqa %xmm4,%xmm10 ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ rorl $7,%ebp ++ movdqa %xmm4,%xmm8 ++ xorl %ebx,%esi ++ pslldq $12,%xmm10 ++ paddd %xmm4,%xmm4 ++ movl %edx,%edi ++ addl 8(%rsp),%ecx ++ psrld $31,%xmm8 ++ xorl %eax,%ebp ++ roll $5,%edx ++ addl %esi,%ecx ++ movdqa %xmm10,%xmm9 ++ andl %ebp,%edi ++ xorl %eax,%ebp ++ psrld $30,%xmm10 ++ addl %edx,%ecx ++ rorl $7,%edx ++ por %xmm8,%xmm4 ++ xorl %eax,%edi ++ movl %ecx,%esi ++ addl 12(%rsp),%ebx ++ pslld $2,%xmm9 ++ pxor %xmm10,%xmm4 ++ xorl %ebp,%edx ++ movdqa -64(%r14),%xmm10 ++ roll $5,%ecx ++ addl %edi,%ebx ++ andl %edx,%esi ++ pxor %xmm9,%xmm4 ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ rorl $7,%ecx ++ pshufd $238,%xmm1,%xmm5 ++ xorl %ebp,%esi ++ movdqa %xmm4,%xmm9 ++ paddd %xmm4,%xmm10 ++ movl %ebx,%edi ++ addl 16(%rsp),%eax ++ punpcklqdq %xmm2,%xmm5 ++ xorl %edx,%ecx ++ roll $5,%ebx ++ addl %esi,%eax ++ psrldq $4,%xmm9 ++ andl %ecx,%edi ++ xorl %edx,%ecx ++ pxor %xmm1,%xmm5 ++ addl %ebx,%eax ++ rorl $7,%ebx ++ pxor %xmm3,%xmm9 ++ xorl %edx,%edi ++ movl %eax,%esi ++ addl 20(%rsp),%ebp ++ pxor %xmm9,%xmm5 ++ xorl %ecx,%ebx ++ roll $5,%eax ++ movdqa %xmm10,0(%rsp) ++ addl %edi,%ebp ++ andl %ebx,%esi ++ movdqa %xmm5,%xmm8 ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ rorl $7,%eax ++ movdqa %xmm5,%xmm9 ++ xorl %ecx,%esi ++ pslldq $12,%xmm8 ++ paddd %xmm5,%xmm5 ++ movl %ebp,%edi ++ addl 24(%rsp),%edx ++ psrld $31,%xmm9 ++ xorl %ebx,%eax ++ roll $5,%ebp ++ addl %esi,%edx ++ movdqa %xmm8,%xmm10 ++ andl %eax,%edi ++ xorl %ebx,%eax ++ psrld $30,%xmm8 ++ addl %ebp,%edx ++ rorl $7,%ebp ++ por %xmm9,%xmm5 ++ xorl %ebx,%edi ++ movl %edx,%esi ++ addl 28(%rsp),%ecx ++ pslld $2,%xmm10 ++ pxor %xmm8,%xmm5 ++ xorl %eax,%ebp ++ movdqa -32(%r14),%xmm8 ++ roll $5,%edx ++ addl %edi,%ecx ++ andl %ebp,%esi ++ pxor %xmm10,%xmm5 ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ rorl $7,%edx ++ pshufd $238,%xmm2,%xmm6 ++ xorl %eax,%esi ++ movdqa %xmm5,%xmm10 ++ paddd %xmm5,%xmm8 ++ movl %ecx,%edi ++ addl 32(%rsp),%ebx ++ punpcklqdq %xmm3,%xmm6 ++ xorl %ebp,%edx ++ roll $5,%ecx ++ addl %esi,%ebx ++ psrldq $4,%xmm10 ++ andl %edx,%edi ++ xorl %ebp,%edx ++ pxor %xmm2,%xmm6 ++ addl %ecx,%ebx ++ rorl $7,%ecx ++ pxor %xmm4,%xmm10 ++ xorl %ebp,%edi ++ movl %ebx,%esi ++ addl 36(%rsp),%eax ++ pxor %xmm10,%xmm6 ++ xorl %edx,%ecx ++ roll $5,%ebx ++ movdqa %xmm8,16(%rsp) ++ addl %edi,%eax ++ andl %ecx,%esi ++ movdqa %xmm6,%xmm9 ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ rorl $7,%ebx ++ movdqa %xmm6,%xmm10 ++ xorl %edx,%esi ++ pslldq $12,%xmm9 ++ paddd %xmm6,%xmm6 ++ movl %eax,%edi ++ addl 40(%rsp),%ebp ++ psrld $31,%xmm10 ++ xorl %ecx,%ebx ++ roll $5,%eax ++ addl %esi,%ebp ++ movdqa %xmm9,%xmm8 ++ andl %ebx,%edi ++ xorl %ecx,%ebx ++ psrld $30,%xmm9 ++ addl %eax,%ebp ++ rorl $7,%eax ++ por %xmm10,%xmm6 ++ xorl %ecx,%edi ++ movl %ebp,%esi ++ addl 44(%rsp),%edx ++ pslld $2,%xmm8 ++ pxor %xmm9,%xmm6 ++ xorl %ebx,%eax ++ movdqa -32(%r14),%xmm9 ++ roll $5,%ebp ++ addl %edi,%edx ++ andl %eax,%esi ++ pxor %xmm8,%xmm6 ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ rorl $7,%ebp ++ pshufd $238,%xmm3,%xmm7 ++ xorl %ebx,%esi ++ movdqa %xmm6,%xmm8 ++ paddd %xmm6,%xmm9 ++ movl %edx,%edi ++ addl 48(%rsp),%ecx ++ punpcklqdq %xmm4,%xmm7 ++ xorl %eax,%ebp ++ roll $5,%edx ++ addl %esi,%ecx ++ psrldq $4,%xmm8 ++ andl %ebp,%edi ++ xorl %eax,%ebp ++ pxor %xmm3,%xmm7 ++ addl %edx,%ecx ++ rorl $7,%edx ++ pxor %xmm5,%xmm8 ++ xorl %eax,%edi ++ movl %ecx,%esi ++ addl 52(%rsp),%ebx ++ pxor %xmm8,%xmm7 ++ xorl %ebp,%edx ++ roll $5,%ecx ++ movdqa %xmm9,32(%rsp) ++ addl %edi,%ebx ++ andl %edx,%esi ++ movdqa %xmm7,%xmm10 ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ rorl $7,%ecx ++ movdqa %xmm7,%xmm8 ++ xorl %ebp,%esi ++ pslldq $12,%xmm10 ++ paddd %xmm7,%xmm7 ++ movl %ebx,%edi ++ addl 56(%rsp),%eax ++ psrld $31,%xmm8 ++ xorl %edx,%ecx ++ roll $5,%ebx ++ addl %esi,%eax ++ movdqa %xmm10,%xmm9 ++ andl %ecx,%edi ++ xorl %edx,%ecx ++ psrld $30,%xmm10 ++ addl %ebx,%eax ++ rorl $7,%ebx ++ por %xmm8,%xmm7 ++ xorl %edx,%edi ++ movl %eax,%esi ++ addl 60(%rsp),%ebp ++ pslld $2,%xmm9 ++ pxor %xmm10,%xmm7 ++ xorl %ecx,%ebx ++ movdqa -32(%r14),%xmm10 ++ roll $5,%eax ++ addl %edi,%ebp ++ andl %ebx,%esi ++ pxor %xmm9,%xmm7 ++ pshufd $238,%xmm6,%xmm9 ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ rorl $7,%eax ++ pxor %xmm4,%xmm0 ++ xorl %ecx,%esi ++ movl %ebp,%edi ++ addl 0(%rsp),%edx ++ punpcklqdq %xmm7,%xmm9 ++ xorl %ebx,%eax ++ roll $5,%ebp ++ pxor %xmm1,%xmm0 ++ addl %esi,%edx ++ andl %eax,%edi ++ movdqa %xmm10,%xmm8 ++ xorl %ebx,%eax ++ paddd %xmm7,%xmm10 ++ addl %ebp,%edx ++ pxor %xmm9,%xmm0 ++ rorl $7,%ebp ++ xorl %ebx,%edi ++ movl %edx,%esi ++ addl 4(%rsp),%ecx ++ movdqa %xmm0,%xmm9 ++ xorl %eax,%ebp ++ roll $5,%edx ++ movdqa %xmm10,48(%rsp) ++ addl %edi,%ecx ++ andl %ebp,%esi ++ xorl %eax,%ebp ++ pslld $2,%xmm0 ++ addl %edx,%ecx ++ rorl $7,%edx ++ psrld $30,%xmm9 ++ xorl %eax,%esi ++ movl %ecx,%edi ++ addl 8(%rsp),%ebx ++ por %xmm9,%xmm0 ++ xorl %ebp,%edx ++ roll $5,%ecx ++ pshufd $238,%xmm7,%xmm10 ++ addl %esi,%ebx ++ andl %edx,%edi ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ addl 12(%rsp),%eax ++ xorl %ebp,%edi ++ movl %ebx,%esi ++ roll $5,%ebx ++ addl %edi,%eax ++ xorl %edx,%esi ++ rorl $7,%ecx ++ addl %ebx,%eax ++ pxor %xmm5,%xmm1 ++ addl 16(%rsp),%ebp ++ xorl %ecx,%esi ++ punpcklqdq %xmm0,%xmm10 ++ movl %eax,%edi ++ roll $5,%eax ++ pxor %xmm2,%xmm1 ++ addl %esi,%ebp ++ xorl %ecx,%edi ++ movdqa %xmm8,%xmm9 ++ rorl $7,%ebx ++ paddd %xmm0,%xmm8 ++ addl %eax,%ebp ++ pxor %xmm10,%xmm1 ++ addl 20(%rsp),%edx ++ xorl %ebx,%edi ++ movl %ebp,%esi ++ roll $5,%ebp ++ movdqa %xmm1,%xmm10 ++ addl %edi,%edx ++ xorl %ebx,%esi ++ movdqa %xmm8,0(%rsp) ++ rorl $7,%eax ++ addl %ebp,%edx ++ addl 24(%rsp),%ecx ++ pslld $2,%xmm1 ++ xorl %eax,%esi ++ movl %edx,%edi ++ psrld $30,%xmm10 ++ roll $5,%edx ++ addl %esi,%ecx ++ xorl %eax,%edi ++ rorl $7,%ebp ++ por %xmm10,%xmm1 ++ addl %edx,%ecx ++ addl 28(%rsp),%ebx ++ pshufd $238,%xmm0,%xmm8 ++ xorl %ebp,%edi ++ movl %ecx,%esi ++ roll $5,%ecx ++ addl %edi,%ebx ++ xorl %ebp,%esi ++ rorl $7,%edx ++ addl %ecx,%ebx ++ pxor %xmm6,%xmm2 ++ addl 32(%rsp),%eax ++ xorl %edx,%esi ++ punpcklqdq %xmm1,%xmm8 ++ movl %ebx,%edi ++ roll $5,%ebx ++ pxor %xmm3,%xmm2 ++ addl %esi,%eax ++ xorl %edx,%edi ++ movdqa 0(%r14),%xmm10 ++ rorl $7,%ecx ++ paddd %xmm1,%xmm9 ++ addl %ebx,%eax ++ pxor %xmm8,%xmm2 ++ addl 36(%rsp),%ebp ++ xorl %ecx,%edi ++ movl %eax,%esi ++ roll $5,%eax ++ movdqa %xmm2,%xmm8 ++ addl %edi,%ebp ++ xorl %ecx,%esi ++ movdqa %xmm9,16(%rsp) ++ rorl $7,%ebx ++ addl %eax,%ebp ++ addl 40(%rsp),%edx ++ pslld $2,%xmm2 ++ xorl %ebx,%esi ++ movl %ebp,%edi ++ psrld $30,%xmm8 ++ roll $5,%ebp ++ addl %esi,%edx ++ xorl %ebx,%edi ++ rorl $7,%eax ++ por %xmm8,%xmm2 ++ addl %ebp,%edx ++ addl 44(%rsp),%ecx ++ pshufd $238,%xmm1,%xmm9 ++ xorl %eax,%edi ++ movl %edx,%esi ++ roll $5,%edx ++ addl %edi,%ecx ++ xorl %eax,%esi ++ rorl $7,%ebp ++ addl %edx,%ecx ++ pxor %xmm7,%xmm3 ++ addl 48(%rsp),%ebx ++ xorl %ebp,%esi ++ punpcklqdq %xmm2,%xmm9 ++ movl %ecx,%edi ++ roll $5,%ecx ++ pxor %xmm4,%xmm3 ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ movdqa %xmm10,%xmm8 ++ rorl $7,%edx ++ paddd %xmm2,%xmm10 ++ addl %ecx,%ebx ++ pxor %xmm9,%xmm3 ++ addl 52(%rsp),%eax ++ xorl %edx,%edi ++ movl %ebx,%esi ++ roll $5,%ebx ++ movdqa %xmm3,%xmm9 ++ addl %edi,%eax ++ xorl %edx,%esi ++ movdqa %xmm10,32(%rsp) ++ rorl $7,%ecx ++ addl %ebx,%eax ++ addl 56(%rsp),%ebp ++ pslld $2,%xmm3 ++ xorl %ecx,%esi ++ movl %eax,%edi ++ psrld $30,%xmm9 ++ roll $5,%eax ++ addl %esi,%ebp ++ xorl %ecx,%edi ++ rorl $7,%ebx ++ por %xmm9,%xmm3 ++ addl %eax,%ebp ++ addl 60(%rsp),%edx ++ pshufd $238,%xmm2,%xmm10 ++ xorl %ebx,%edi ++ movl %ebp,%esi ++ roll $5,%ebp ++ addl %edi,%edx ++ xorl %ebx,%esi ++ rorl $7,%eax ++ addl %ebp,%edx ++ pxor %xmm0,%xmm4 ++ addl 0(%rsp),%ecx ++ xorl %eax,%esi ++ punpcklqdq %xmm3,%xmm10 ++ movl %edx,%edi ++ roll $5,%edx ++ pxor %xmm5,%xmm4 ++ addl %esi,%ecx ++ xorl %eax,%edi ++ movdqa %xmm8,%xmm9 ++ rorl $7,%ebp ++ paddd %xmm3,%xmm8 ++ addl %edx,%ecx ++ pxor %xmm10,%xmm4 ++ addl 4(%rsp),%ebx ++ xorl %ebp,%edi ++ movl %ecx,%esi ++ roll $5,%ecx ++ movdqa %xmm4,%xmm10 ++ addl %edi,%ebx ++ xorl %ebp,%esi ++ movdqa %xmm8,48(%rsp) ++ rorl $7,%edx ++ addl %ecx,%ebx ++ addl 8(%rsp),%eax ++ pslld $2,%xmm4 ++ xorl %edx,%esi ++ movl %ebx,%edi ++ psrld $30,%xmm10 ++ roll $5,%ebx ++ addl %esi,%eax ++ xorl %edx,%edi ++ rorl $7,%ecx ++ por %xmm10,%xmm4 ++ addl %ebx,%eax ++ addl 12(%rsp),%ebp ++ pshufd $238,%xmm3,%xmm8 ++ xorl %ecx,%edi ++ movl %eax,%esi ++ roll $5,%eax ++ addl %edi,%ebp ++ xorl %ecx,%esi ++ rorl $7,%ebx ++ addl %eax,%ebp ++ pxor %xmm1,%xmm5 ++ addl 16(%rsp),%edx ++ xorl %ebx,%esi ++ punpcklqdq %xmm4,%xmm8 ++ movl %ebp,%edi ++ roll $5,%ebp ++ pxor %xmm6,%xmm5 ++ addl %esi,%edx ++ xorl %ebx,%edi ++ movdqa %xmm9,%xmm10 ++ rorl $7,%eax ++ paddd %xmm4,%xmm9 ++ addl %ebp,%edx ++ pxor %xmm8,%xmm5 ++ addl 20(%rsp),%ecx ++ xorl %eax,%edi ++ movl %edx,%esi ++ roll $5,%edx ++ movdqa %xmm5,%xmm8 ++ addl %edi,%ecx ++ xorl %eax,%esi ++ movdqa %xmm9,0(%rsp) ++ rorl $7,%ebp ++ addl %edx,%ecx ++ addl 24(%rsp),%ebx ++ pslld $2,%xmm5 ++ xorl %ebp,%esi ++ movl %ecx,%edi ++ psrld $30,%xmm8 ++ roll $5,%ecx ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ rorl $7,%edx ++ por %xmm8,%xmm5 ++ addl %ecx,%ebx ++ addl 28(%rsp),%eax ++ pshufd $238,%xmm4,%xmm9 ++ rorl $7,%ecx ++ movl %ebx,%esi ++ xorl %edx,%edi ++ roll $5,%ebx ++ addl %edi,%eax ++ xorl %ecx,%esi ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ pxor %xmm2,%xmm6 ++ addl 32(%rsp),%ebp ++ andl %ecx,%esi ++ xorl %edx,%ecx ++ rorl $7,%ebx ++ punpcklqdq %xmm5,%xmm9 ++ movl %eax,%edi ++ xorl %ecx,%esi ++ pxor %xmm7,%xmm6 ++ roll $5,%eax ++ addl %esi,%ebp ++ movdqa %xmm10,%xmm8 ++ xorl %ebx,%edi ++ paddd %xmm5,%xmm10 ++ xorl %ecx,%ebx ++ pxor %xmm9,%xmm6 ++ addl %eax,%ebp ++ addl 36(%rsp),%edx ++ andl %ebx,%edi ++ xorl %ecx,%ebx ++ rorl $7,%eax ++ movdqa %xmm6,%xmm9 ++ movl %ebp,%esi ++ xorl %ebx,%edi ++ movdqa %xmm10,16(%rsp) ++ roll $5,%ebp ++ addl %edi,%edx ++ xorl %eax,%esi ++ pslld $2,%xmm6 ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ psrld $30,%xmm9 ++ addl 40(%rsp),%ecx ++ andl %eax,%esi ++ xorl %ebx,%eax ++ por %xmm9,%xmm6 ++ rorl $7,%ebp ++ movl %edx,%edi ++ xorl %eax,%esi ++ roll $5,%edx ++ pshufd $238,%xmm5,%xmm10 ++ addl %esi,%ecx ++ xorl %ebp,%edi ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ addl 44(%rsp),%ebx ++ andl %ebp,%edi ++ xorl %eax,%ebp ++ rorl $7,%edx ++ movl %ecx,%esi ++ xorl %ebp,%edi ++ roll $5,%ecx ++ addl %edi,%ebx ++ xorl %edx,%esi ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ pxor %xmm3,%xmm7 ++ addl 48(%rsp),%eax ++ andl %edx,%esi ++ xorl %ebp,%edx ++ rorl $7,%ecx ++ punpcklqdq %xmm6,%xmm10 ++ movl %ebx,%edi ++ xorl %edx,%esi ++ pxor %xmm0,%xmm7 ++ roll $5,%ebx ++ addl %esi,%eax ++ movdqa 32(%r14),%xmm9 ++ xorl %ecx,%edi ++ paddd %xmm6,%xmm8 ++ xorl %edx,%ecx ++ pxor %xmm10,%xmm7 ++ addl %ebx,%eax ++ addl 52(%rsp),%ebp ++ andl %ecx,%edi ++ xorl %edx,%ecx ++ rorl $7,%ebx ++ movdqa %xmm7,%xmm10 ++ movl %eax,%esi ++ xorl %ecx,%edi ++ movdqa %xmm8,32(%rsp) ++ roll $5,%eax ++ addl %edi,%ebp ++ xorl %ebx,%esi ++ pslld $2,%xmm7 ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ psrld $30,%xmm10 ++ addl 56(%rsp),%edx ++ andl %ebx,%esi ++ xorl %ecx,%ebx ++ por %xmm10,%xmm7 ++ rorl $7,%eax ++ movl %ebp,%edi ++ xorl %ebx,%esi ++ roll $5,%ebp ++ pshufd $238,%xmm6,%xmm8 ++ addl %esi,%edx ++ xorl %eax,%edi ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ addl 60(%rsp),%ecx ++ andl %eax,%edi ++ xorl %ebx,%eax ++ rorl $7,%ebp ++ movl %edx,%esi ++ xorl %eax,%edi ++ roll $5,%edx ++ addl %edi,%ecx ++ xorl %ebp,%esi ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ pxor %xmm4,%xmm0 ++ addl 0(%rsp),%ebx ++ andl %ebp,%esi ++ xorl %eax,%ebp ++ rorl $7,%edx ++ punpcklqdq %xmm7,%xmm8 ++ movl %ecx,%edi ++ xorl %ebp,%esi ++ pxor %xmm1,%xmm0 ++ roll $5,%ecx ++ addl %esi,%ebx ++ movdqa %xmm9,%xmm10 ++ xorl %edx,%edi ++ paddd %xmm7,%xmm9 ++ xorl %ebp,%edx ++ pxor %xmm8,%xmm0 ++ addl %ecx,%ebx ++ addl 4(%rsp),%eax ++ andl %edx,%edi ++ xorl %ebp,%edx ++ rorl $7,%ecx ++ movdqa %xmm0,%xmm8 ++ movl %ebx,%esi ++ xorl %edx,%edi ++ movdqa %xmm9,48(%rsp) ++ roll $5,%ebx ++ addl %edi,%eax ++ xorl %ecx,%esi ++ pslld $2,%xmm0 ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ psrld $30,%xmm8 ++ addl 8(%rsp),%ebp ++ andl %ecx,%esi ++ xorl %edx,%ecx ++ por %xmm8,%xmm0 ++ rorl $7,%ebx ++ movl %eax,%edi ++ xorl %ecx,%esi ++ roll $5,%eax ++ pshufd $238,%xmm7,%xmm9 ++ addl %esi,%ebp ++ xorl %ebx,%edi ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ addl 12(%rsp),%edx ++ andl %ebx,%edi ++ xorl %ecx,%ebx ++ rorl $7,%eax ++ movl %ebp,%esi ++ xorl %ebx,%edi ++ roll $5,%ebp ++ addl %edi,%edx ++ xorl %eax,%esi ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ pxor %xmm5,%xmm1 ++ addl 16(%rsp),%ecx ++ andl %eax,%esi ++ xorl %ebx,%eax ++ rorl $7,%ebp ++ punpcklqdq %xmm0,%xmm9 ++ movl %edx,%edi ++ xorl %eax,%esi ++ pxor %xmm2,%xmm1 ++ roll $5,%edx ++ addl %esi,%ecx ++ movdqa %xmm10,%xmm8 ++ xorl %ebp,%edi ++ paddd %xmm0,%xmm10 ++ xorl %eax,%ebp ++ pxor %xmm9,%xmm1 ++ addl %edx,%ecx ++ addl 20(%rsp),%ebx ++ andl %ebp,%edi ++ xorl %eax,%ebp ++ rorl $7,%edx ++ movdqa %xmm1,%xmm9 ++ movl %ecx,%esi ++ xorl %ebp,%edi ++ movdqa %xmm10,0(%rsp) ++ roll $5,%ecx ++ addl %edi,%ebx ++ xorl %edx,%esi ++ pslld $2,%xmm1 ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ psrld $30,%xmm9 ++ addl 24(%rsp),%eax ++ andl %edx,%esi ++ xorl %ebp,%edx ++ por %xmm9,%xmm1 ++ rorl $7,%ecx ++ movl %ebx,%edi ++ xorl %edx,%esi ++ roll $5,%ebx ++ pshufd $238,%xmm0,%xmm10 ++ addl %esi,%eax ++ xorl %ecx,%edi ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ addl 28(%rsp),%ebp ++ andl %ecx,%edi ++ xorl %edx,%ecx ++ rorl $7,%ebx ++ movl %eax,%esi ++ xorl %ecx,%edi ++ roll $5,%eax ++ addl %edi,%ebp ++ xorl %ebx,%esi ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ pxor %xmm6,%xmm2 ++ addl 32(%rsp),%edx ++ andl %ebx,%esi ++ xorl %ecx,%ebx ++ rorl $7,%eax ++ punpcklqdq %xmm1,%xmm10 ++ movl %ebp,%edi ++ xorl %ebx,%esi ++ pxor %xmm3,%xmm2 ++ roll $5,%ebp ++ addl %esi,%edx ++ movdqa %xmm8,%xmm9 ++ xorl %eax,%edi ++ paddd %xmm1,%xmm8 ++ xorl %ebx,%eax ++ pxor %xmm10,%xmm2 ++ addl %ebp,%edx ++ addl 36(%rsp),%ecx ++ andl %eax,%edi ++ xorl %ebx,%eax ++ rorl $7,%ebp ++ movdqa %xmm2,%xmm10 ++ movl %edx,%esi ++ xorl %eax,%edi ++ movdqa %xmm8,16(%rsp) ++ roll $5,%edx ++ addl %edi,%ecx ++ xorl %ebp,%esi ++ pslld $2,%xmm2 ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ psrld $30,%xmm10 ++ addl 40(%rsp),%ebx ++ andl %ebp,%esi ++ xorl %eax,%ebp ++ por %xmm10,%xmm2 ++ rorl $7,%edx ++ movl %ecx,%edi ++ xorl %ebp,%esi ++ roll $5,%ecx ++ pshufd $238,%xmm1,%xmm8 ++ addl %esi,%ebx ++ xorl %edx,%edi ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ addl 44(%rsp),%eax ++ andl %edx,%edi ++ xorl %ebp,%edx ++ rorl $7,%ecx ++ movl %ebx,%esi ++ xorl %edx,%edi ++ roll $5,%ebx ++ addl %edi,%eax ++ xorl %edx,%esi ++ addl %ebx,%eax ++ pxor %xmm7,%xmm3 ++ addl 48(%rsp),%ebp ++ xorl %ecx,%esi ++ punpcklqdq %xmm2,%xmm8 ++ movl %eax,%edi ++ roll $5,%eax ++ pxor %xmm4,%xmm3 ++ addl %esi,%ebp ++ xorl %ecx,%edi ++ movdqa %xmm9,%xmm10 ++ rorl $7,%ebx ++ paddd %xmm2,%xmm9 ++ addl %eax,%ebp ++ pxor %xmm8,%xmm3 ++ addl 52(%rsp),%edx ++ xorl %ebx,%edi ++ movl %ebp,%esi ++ roll $5,%ebp ++ movdqa %xmm3,%xmm8 ++ addl %edi,%edx ++ xorl %ebx,%esi ++ movdqa %xmm9,32(%rsp) ++ rorl $7,%eax ++ addl %ebp,%edx ++ addl 56(%rsp),%ecx ++ pslld $2,%xmm3 ++ xorl %eax,%esi ++ movl %edx,%edi ++ psrld $30,%xmm8 ++ roll $5,%edx ++ addl %esi,%ecx ++ xorl %eax,%edi ++ rorl $7,%ebp ++ por %xmm8,%xmm3 ++ addl %edx,%ecx ++ addl 60(%rsp),%ebx ++ xorl %ebp,%edi ++ movl %ecx,%esi ++ roll $5,%ecx ++ addl %edi,%ebx ++ xorl %ebp,%esi ++ rorl $7,%edx ++ addl %ecx,%ebx ++ addl 0(%rsp),%eax ++ xorl %edx,%esi ++ movl %ebx,%edi ++ roll $5,%ebx ++ paddd %xmm3,%xmm10 ++ addl %esi,%eax ++ xorl %edx,%edi ++ movdqa %xmm10,48(%rsp) ++ rorl $7,%ecx ++ addl %ebx,%eax ++ addl 4(%rsp),%ebp ++ xorl %ecx,%edi ++ movl %eax,%esi ++ roll $5,%eax ++ addl %edi,%ebp ++ xorl %ecx,%esi ++ rorl $7,%ebx ++ addl %eax,%ebp ++ addl 8(%rsp),%edx ++ xorl %ebx,%esi ++ movl %ebp,%edi ++ roll $5,%ebp ++ addl %esi,%edx ++ xorl %ebx,%edi ++ rorl $7,%eax ++ addl %ebp,%edx ++ addl 12(%rsp),%ecx ++ xorl %eax,%edi ++ movl %edx,%esi ++ roll $5,%edx ++ addl %edi,%ecx ++ xorl %eax,%esi ++ rorl $7,%ebp ++ addl %edx,%ecx ++ cmpq %r10,%r9 ++ je L$done_ssse3 ++ movdqa 64(%r14),%xmm6 ++ movdqa -64(%r14),%xmm9 ++ movdqu 0(%r9),%xmm0 ++ movdqu 16(%r9),%xmm1 ++ movdqu 32(%r9),%xmm2 ++ movdqu 48(%r9),%xmm3 ++.byte 102,15,56,0,198 ++ addq $64,%r9 ++ addl 16(%rsp),%ebx ++ xorl %ebp,%esi ++ movl %ecx,%edi ++.byte 102,15,56,0,206 ++ roll $5,%ecx ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ rorl $7,%edx ++ paddd %xmm9,%xmm0 ++ addl %ecx,%ebx ++ addl 20(%rsp),%eax ++ xorl %edx,%edi ++ movl %ebx,%esi ++ movdqa %xmm0,0(%rsp) ++ roll $5,%ebx ++ addl %edi,%eax ++ xorl %edx,%esi ++ rorl $7,%ecx ++ psubd %xmm9,%xmm0 ++ addl %ebx,%eax ++ addl 24(%rsp),%ebp ++ xorl %ecx,%esi ++ movl %eax,%edi ++ roll $5,%eax ++ addl %esi,%ebp ++ xorl %ecx,%edi ++ rorl $7,%ebx ++ addl %eax,%ebp ++ addl 28(%rsp),%edx ++ xorl %ebx,%edi ++ movl %ebp,%esi ++ roll $5,%ebp ++ addl %edi,%edx ++ xorl %ebx,%esi ++ rorl $7,%eax ++ addl %ebp,%edx ++ addl 32(%rsp),%ecx ++ xorl %eax,%esi ++ movl %edx,%edi ++.byte 102,15,56,0,214 ++ roll $5,%edx ++ addl %esi,%ecx ++ xorl %eax,%edi ++ rorl $7,%ebp ++ paddd %xmm9,%xmm1 ++ addl %edx,%ecx ++ addl 36(%rsp),%ebx ++ xorl %ebp,%edi ++ movl %ecx,%esi ++ movdqa %xmm1,16(%rsp) ++ roll $5,%ecx ++ addl %edi,%ebx ++ xorl %ebp,%esi ++ rorl $7,%edx ++ psubd %xmm9,%xmm1 ++ addl %ecx,%ebx ++ addl 40(%rsp),%eax ++ xorl %edx,%esi ++ movl %ebx,%edi ++ roll $5,%ebx ++ addl %esi,%eax ++ xorl %edx,%edi ++ rorl $7,%ecx ++ addl %ebx,%eax ++ addl 44(%rsp),%ebp ++ xorl %ecx,%edi ++ movl %eax,%esi ++ roll $5,%eax ++ addl %edi,%ebp ++ xorl %ecx,%esi ++ rorl $7,%ebx ++ addl %eax,%ebp ++ addl 48(%rsp),%edx ++ xorl %ebx,%esi ++ movl %ebp,%edi ++.byte 102,15,56,0,222 ++ roll $5,%ebp ++ addl %esi,%edx ++ xorl %ebx,%edi ++ rorl $7,%eax ++ paddd %xmm9,%xmm2 ++ addl %ebp,%edx ++ addl 52(%rsp),%ecx ++ xorl %eax,%edi ++ movl %edx,%esi ++ movdqa %xmm2,32(%rsp) ++ roll $5,%edx ++ addl %edi,%ecx ++ xorl %eax,%esi ++ rorl $7,%ebp ++ psubd %xmm9,%xmm2 ++ addl %edx,%ecx ++ addl 56(%rsp),%ebx ++ xorl %ebp,%esi ++ movl %ecx,%edi ++ roll $5,%ecx ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ rorl $7,%edx ++ addl %ecx,%ebx ++ addl 60(%rsp),%eax ++ xorl %edx,%edi ++ movl %ebx,%esi ++ roll $5,%ebx ++ addl %edi,%eax ++ rorl $7,%ecx ++ addl %ebx,%eax ++ addl 0(%r8),%eax ++ addl 4(%r8),%esi ++ addl 8(%r8),%ecx ++ addl 12(%r8),%edx ++ movl %eax,0(%r8) ++ addl 16(%r8),%ebp ++ movl %esi,4(%r8) ++ movl %esi,%ebx ++ movl %ecx,8(%r8) ++ movl %ecx,%edi ++ movl %edx,12(%r8) ++ xorl %edx,%edi ++ movl %ebp,16(%r8) ++ andl %edi,%esi ++ jmp L$oop_ssse3 ++ ++.p2align 4 ++L$done_ssse3: ++ addl 16(%rsp),%ebx ++ xorl %ebp,%esi ++ movl %ecx,%edi ++ roll $5,%ecx ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ rorl $7,%edx ++ addl %ecx,%ebx ++ addl 20(%rsp),%eax ++ xorl %edx,%edi ++ movl %ebx,%esi ++ roll $5,%ebx ++ addl %edi,%eax ++ xorl %edx,%esi ++ rorl $7,%ecx ++ addl %ebx,%eax ++ addl 24(%rsp),%ebp ++ xorl %ecx,%esi ++ movl %eax,%edi ++ roll $5,%eax ++ addl %esi,%ebp ++ xorl %ecx,%edi ++ rorl $7,%ebx ++ addl %eax,%ebp ++ addl 28(%rsp),%edx ++ xorl %ebx,%edi ++ movl %ebp,%esi ++ roll $5,%ebp ++ addl %edi,%edx ++ xorl %ebx,%esi ++ rorl $7,%eax ++ addl %ebp,%edx ++ addl 32(%rsp),%ecx ++ xorl %eax,%esi ++ movl %edx,%edi ++ roll $5,%edx ++ addl %esi,%ecx ++ xorl %eax,%edi ++ rorl $7,%ebp ++ addl %edx,%ecx ++ addl 36(%rsp),%ebx ++ xorl %ebp,%edi ++ movl %ecx,%esi ++ roll $5,%ecx ++ addl %edi,%ebx ++ xorl %ebp,%esi ++ rorl $7,%edx ++ addl %ecx,%ebx ++ addl 40(%rsp),%eax ++ xorl %edx,%esi ++ movl %ebx,%edi ++ roll $5,%ebx ++ addl %esi,%eax ++ xorl %edx,%edi ++ rorl $7,%ecx ++ addl %ebx,%eax ++ addl 44(%rsp),%ebp ++ xorl %ecx,%edi ++ movl %eax,%esi ++ roll $5,%eax ++ addl %edi,%ebp ++ xorl %ecx,%esi ++ rorl $7,%ebx ++ addl %eax,%ebp ++ addl 48(%rsp),%edx ++ xorl %ebx,%esi ++ movl %ebp,%edi ++ roll $5,%ebp ++ addl %esi,%edx ++ xorl %ebx,%edi ++ rorl $7,%eax ++ addl %ebp,%edx ++ addl 52(%rsp),%ecx ++ xorl %eax,%edi ++ movl %edx,%esi ++ roll $5,%edx ++ addl %edi,%ecx ++ xorl %eax,%esi ++ rorl $7,%ebp ++ addl %edx,%ecx ++ addl 56(%rsp),%ebx ++ xorl %ebp,%esi ++ movl %ecx,%edi ++ roll $5,%ecx ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ rorl $7,%edx ++ addl %ecx,%ebx ++ addl 60(%rsp),%eax ++ xorl %edx,%edi ++ movl %ebx,%esi ++ roll $5,%ebx ++ addl %edi,%eax ++ rorl $7,%ecx ++ addl %ebx,%eax ++ addl 0(%r8),%eax ++ addl 4(%r8),%esi ++ addl 8(%r8),%ecx ++ movl %eax,0(%r8) ++ addl 12(%r8),%edx ++ movl %esi,4(%r8) ++ addl 16(%r8),%ebp ++ movl %ecx,8(%r8) ++ movl %edx,12(%r8) ++ movl %ebp,16(%r8) ++ movq -40(%r11),%r14 ++ ++ movq -32(%r11),%r13 ++ ++ movq -24(%r11),%r12 ++ ++ movq -16(%r11),%rbp ++ ++ movq -8(%r11),%rbx ++ ++ leaq (%r11),%rsp ++ ++L$epilogue_ssse3: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++sha1_block_data_order_avx: ++_avx_shortcut: ++ ++ movq %rsp,%r11 ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ leaq -64(%rsp),%rsp ++ vzeroupper ++ andq $-64,%rsp ++ movq %rdi,%r8 ++ movq %rsi,%r9 ++ movq %rdx,%r10 ++ ++ shlq $6,%r10 ++ addq %r9,%r10 ++ leaq K_XX_XX+64(%rip),%r14 ++ ++ movl 0(%r8),%eax ++ movl 4(%r8),%ebx ++ movl 8(%r8),%ecx ++ movl 12(%r8),%edx ++ movl %ebx,%esi ++ movl 16(%r8),%ebp ++ movl %ecx,%edi ++ xorl %edx,%edi ++ andl %edi,%esi ++ ++ vmovdqa 64(%r14),%xmm6 ++ vmovdqa -64(%r14),%xmm11 ++ vmovdqu 0(%r9),%xmm0 ++ vmovdqu 16(%r9),%xmm1 ++ vmovdqu 32(%r9),%xmm2 ++ vmovdqu 48(%r9),%xmm3 ++ vpshufb %xmm6,%xmm0,%xmm0 ++ addq $64,%r9 ++ vpshufb %xmm6,%xmm1,%xmm1 ++ vpshufb %xmm6,%xmm2,%xmm2 ++ vpshufb %xmm6,%xmm3,%xmm3 ++ vpaddd %xmm11,%xmm0,%xmm4 ++ vpaddd %xmm11,%xmm1,%xmm5 ++ vpaddd %xmm11,%xmm2,%xmm6 ++ vmovdqa %xmm4,0(%rsp) ++ vmovdqa %xmm5,16(%rsp) ++ vmovdqa %xmm6,32(%rsp) ++ jmp L$oop_avx ++.p2align 4 ++L$oop_avx: ++ shrdl $2,%ebx,%ebx ++ xorl %edx,%esi ++ vpalignr $8,%xmm0,%xmm1,%xmm4 ++ movl %eax,%edi ++ addl 0(%rsp),%ebp ++ vpaddd %xmm3,%xmm11,%xmm9 ++ xorl %ecx,%ebx ++ shldl $5,%eax,%eax ++ vpsrldq $4,%xmm3,%xmm8 ++ addl %esi,%ebp ++ andl %ebx,%edi ++ vpxor %xmm0,%xmm4,%xmm4 ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ vpxor %xmm2,%xmm8,%xmm8 ++ shrdl $7,%eax,%eax ++ xorl %ecx,%edi ++ movl %ebp,%esi ++ addl 4(%rsp),%edx ++ vpxor %xmm8,%xmm4,%xmm4 ++ xorl %ebx,%eax ++ shldl $5,%ebp,%ebp ++ vmovdqa %xmm9,48(%rsp) ++ addl %edi,%edx ++ andl %eax,%esi ++ vpsrld $31,%xmm4,%xmm8 ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ shrdl $7,%ebp,%ebp ++ xorl %ebx,%esi ++ vpslldq $12,%xmm4,%xmm10 ++ vpaddd %xmm4,%xmm4,%xmm4 ++ movl %edx,%edi ++ addl 8(%rsp),%ecx ++ xorl %eax,%ebp ++ shldl $5,%edx,%edx ++ vpsrld $30,%xmm10,%xmm9 ++ vpor %xmm8,%xmm4,%xmm4 ++ addl %esi,%ecx ++ andl %ebp,%edi ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ vpslld $2,%xmm10,%xmm10 ++ vpxor %xmm9,%xmm4,%xmm4 ++ shrdl $7,%edx,%edx ++ xorl %eax,%edi ++ movl %ecx,%esi ++ addl 12(%rsp),%ebx ++ vpxor %xmm10,%xmm4,%xmm4 ++ xorl %ebp,%edx ++ shldl $5,%ecx,%ecx ++ addl %edi,%ebx ++ andl %edx,%esi ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ shrdl $7,%ecx,%ecx ++ xorl %ebp,%esi ++ vpalignr $8,%xmm1,%xmm2,%xmm5 ++ movl %ebx,%edi ++ addl 16(%rsp),%eax ++ vpaddd %xmm4,%xmm11,%xmm9 ++ xorl %edx,%ecx ++ shldl $5,%ebx,%ebx ++ vpsrldq $4,%xmm4,%xmm8 ++ addl %esi,%eax ++ andl %ecx,%edi ++ vpxor %xmm1,%xmm5,%xmm5 ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ vpxor %xmm3,%xmm8,%xmm8 ++ shrdl $7,%ebx,%ebx ++ xorl %edx,%edi ++ movl %eax,%esi ++ addl 20(%rsp),%ebp ++ vpxor %xmm8,%xmm5,%xmm5 ++ xorl %ecx,%ebx ++ shldl $5,%eax,%eax ++ vmovdqa %xmm9,0(%rsp) ++ addl %edi,%ebp ++ andl %ebx,%esi ++ vpsrld $31,%xmm5,%xmm8 ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ shrdl $7,%eax,%eax ++ xorl %ecx,%esi ++ vpslldq $12,%xmm5,%xmm10 ++ vpaddd %xmm5,%xmm5,%xmm5 ++ movl %ebp,%edi ++ addl 24(%rsp),%edx ++ xorl %ebx,%eax ++ shldl $5,%ebp,%ebp ++ vpsrld $30,%xmm10,%xmm9 ++ vpor %xmm8,%xmm5,%xmm5 ++ addl %esi,%edx ++ andl %eax,%edi ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ vpslld $2,%xmm10,%xmm10 ++ vpxor %xmm9,%xmm5,%xmm5 ++ shrdl $7,%ebp,%ebp ++ xorl %ebx,%edi ++ movl %edx,%esi ++ addl 28(%rsp),%ecx ++ vpxor %xmm10,%xmm5,%xmm5 ++ xorl %eax,%ebp ++ shldl $5,%edx,%edx ++ vmovdqa -32(%r14),%xmm11 ++ addl %edi,%ecx ++ andl %ebp,%esi ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ shrdl $7,%edx,%edx ++ xorl %eax,%esi ++ vpalignr $8,%xmm2,%xmm3,%xmm6 ++ movl %ecx,%edi ++ addl 32(%rsp),%ebx ++ vpaddd %xmm5,%xmm11,%xmm9 ++ xorl %ebp,%edx ++ shldl $5,%ecx,%ecx ++ vpsrldq $4,%xmm5,%xmm8 ++ addl %esi,%ebx ++ andl %edx,%edi ++ vpxor %xmm2,%xmm6,%xmm6 ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ vpxor %xmm4,%xmm8,%xmm8 ++ shrdl $7,%ecx,%ecx ++ xorl %ebp,%edi ++ movl %ebx,%esi ++ addl 36(%rsp),%eax ++ vpxor %xmm8,%xmm6,%xmm6 ++ xorl %edx,%ecx ++ shldl $5,%ebx,%ebx ++ vmovdqa %xmm9,16(%rsp) ++ addl %edi,%eax ++ andl %ecx,%esi ++ vpsrld $31,%xmm6,%xmm8 ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ shrdl $7,%ebx,%ebx ++ xorl %edx,%esi ++ vpslldq $12,%xmm6,%xmm10 ++ vpaddd %xmm6,%xmm6,%xmm6 ++ movl %eax,%edi ++ addl 40(%rsp),%ebp ++ xorl %ecx,%ebx ++ shldl $5,%eax,%eax ++ vpsrld $30,%xmm10,%xmm9 ++ vpor %xmm8,%xmm6,%xmm6 ++ addl %esi,%ebp ++ andl %ebx,%edi ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ vpslld $2,%xmm10,%xmm10 ++ vpxor %xmm9,%xmm6,%xmm6 ++ shrdl $7,%eax,%eax ++ xorl %ecx,%edi ++ movl %ebp,%esi ++ addl 44(%rsp),%edx ++ vpxor %xmm10,%xmm6,%xmm6 ++ xorl %ebx,%eax ++ shldl $5,%ebp,%ebp ++ addl %edi,%edx ++ andl %eax,%esi ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ shrdl $7,%ebp,%ebp ++ xorl %ebx,%esi ++ vpalignr $8,%xmm3,%xmm4,%xmm7 ++ movl %edx,%edi ++ addl 48(%rsp),%ecx ++ vpaddd %xmm6,%xmm11,%xmm9 ++ xorl %eax,%ebp ++ shldl $5,%edx,%edx ++ vpsrldq $4,%xmm6,%xmm8 ++ addl %esi,%ecx ++ andl %ebp,%edi ++ vpxor %xmm3,%xmm7,%xmm7 ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ vpxor %xmm5,%xmm8,%xmm8 ++ shrdl $7,%edx,%edx ++ xorl %eax,%edi ++ movl %ecx,%esi ++ addl 52(%rsp),%ebx ++ vpxor %xmm8,%xmm7,%xmm7 ++ xorl %ebp,%edx ++ shldl $5,%ecx,%ecx ++ vmovdqa %xmm9,32(%rsp) ++ addl %edi,%ebx ++ andl %edx,%esi ++ vpsrld $31,%xmm7,%xmm8 ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ shrdl $7,%ecx,%ecx ++ xorl %ebp,%esi ++ vpslldq $12,%xmm7,%xmm10 ++ vpaddd %xmm7,%xmm7,%xmm7 ++ movl %ebx,%edi ++ addl 56(%rsp),%eax ++ xorl %edx,%ecx ++ shldl $5,%ebx,%ebx ++ vpsrld $30,%xmm10,%xmm9 ++ vpor %xmm8,%xmm7,%xmm7 ++ addl %esi,%eax ++ andl %ecx,%edi ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ vpslld $2,%xmm10,%xmm10 ++ vpxor %xmm9,%xmm7,%xmm7 ++ shrdl $7,%ebx,%ebx ++ xorl %edx,%edi ++ movl %eax,%esi ++ addl 60(%rsp),%ebp ++ vpxor %xmm10,%xmm7,%xmm7 ++ xorl %ecx,%ebx ++ shldl $5,%eax,%eax ++ addl %edi,%ebp ++ andl %ebx,%esi ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ vpalignr $8,%xmm6,%xmm7,%xmm8 ++ vpxor %xmm4,%xmm0,%xmm0 ++ shrdl $7,%eax,%eax ++ xorl %ecx,%esi ++ movl %ebp,%edi ++ addl 0(%rsp),%edx ++ vpxor %xmm1,%xmm0,%xmm0 ++ xorl %ebx,%eax ++ shldl $5,%ebp,%ebp ++ vpaddd %xmm7,%xmm11,%xmm9 ++ addl %esi,%edx ++ andl %eax,%edi ++ vpxor %xmm8,%xmm0,%xmm0 ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ shrdl $7,%ebp,%ebp ++ xorl %ebx,%edi ++ vpsrld $30,%xmm0,%xmm8 ++ vmovdqa %xmm9,48(%rsp) ++ movl %edx,%esi ++ addl 4(%rsp),%ecx ++ xorl %eax,%ebp ++ shldl $5,%edx,%edx ++ vpslld $2,%xmm0,%xmm0 ++ addl %edi,%ecx ++ andl %ebp,%esi ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ shrdl $7,%edx,%edx ++ xorl %eax,%esi ++ movl %ecx,%edi ++ addl 8(%rsp),%ebx ++ vpor %xmm8,%xmm0,%xmm0 ++ xorl %ebp,%edx ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ andl %edx,%edi ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ addl 12(%rsp),%eax ++ xorl %ebp,%edi ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ addl %edi,%eax ++ xorl %edx,%esi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ vpalignr $8,%xmm7,%xmm0,%xmm8 ++ vpxor %xmm5,%xmm1,%xmm1 ++ addl 16(%rsp),%ebp ++ xorl %ecx,%esi ++ movl %eax,%edi ++ shldl $5,%eax,%eax ++ vpxor %xmm2,%xmm1,%xmm1 ++ addl %esi,%ebp ++ xorl %ecx,%edi ++ vpaddd %xmm0,%xmm11,%xmm9 ++ shrdl $7,%ebx,%ebx ++ addl %eax,%ebp ++ vpxor %xmm8,%xmm1,%xmm1 ++ addl 20(%rsp),%edx ++ xorl %ebx,%edi ++ movl %ebp,%esi ++ shldl $5,%ebp,%ebp ++ vpsrld $30,%xmm1,%xmm8 ++ vmovdqa %xmm9,0(%rsp) ++ addl %edi,%edx ++ xorl %ebx,%esi ++ shrdl $7,%eax,%eax ++ addl %ebp,%edx ++ vpslld $2,%xmm1,%xmm1 ++ addl 24(%rsp),%ecx ++ xorl %eax,%esi ++ movl %edx,%edi ++ shldl $5,%edx,%edx ++ addl %esi,%ecx ++ xorl %eax,%edi ++ shrdl $7,%ebp,%ebp ++ addl %edx,%ecx ++ vpor %xmm8,%xmm1,%xmm1 ++ addl 28(%rsp),%ebx ++ xorl %ebp,%edi ++ movl %ecx,%esi ++ shldl $5,%ecx,%ecx ++ addl %edi,%ebx ++ xorl %ebp,%esi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ vpalignr $8,%xmm0,%xmm1,%xmm8 ++ vpxor %xmm6,%xmm2,%xmm2 ++ addl 32(%rsp),%eax ++ xorl %edx,%esi ++ movl %ebx,%edi ++ shldl $5,%ebx,%ebx ++ vpxor %xmm3,%xmm2,%xmm2 ++ addl %esi,%eax ++ xorl %edx,%edi ++ vpaddd %xmm1,%xmm11,%xmm9 ++ vmovdqa 0(%r14),%xmm11 ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ vpxor %xmm8,%xmm2,%xmm2 ++ addl 36(%rsp),%ebp ++ xorl %ecx,%edi ++ movl %eax,%esi ++ shldl $5,%eax,%eax ++ vpsrld $30,%xmm2,%xmm8 ++ vmovdqa %xmm9,16(%rsp) ++ addl %edi,%ebp ++ xorl %ecx,%esi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%ebp ++ vpslld $2,%xmm2,%xmm2 ++ addl 40(%rsp),%edx ++ xorl %ebx,%esi ++ movl %ebp,%edi ++ shldl $5,%ebp,%ebp ++ addl %esi,%edx ++ xorl %ebx,%edi ++ shrdl $7,%eax,%eax ++ addl %ebp,%edx ++ vpor %xmm8,%xmm2,%xmm2 ++ addl 44(%rsp),%ecx ++ xorl %eax,%edi ++ movl %edx,%esi ++ shldl $5,%edx,%edx ++ addl %edi,%ecx ++ xorl %eax,%esi ++ shrdl $7,%ebp,%ebp ++ addl %edx,%ecx ++ vpalignr $8,%xmm1,%xmm2,%xmm8 ++ vpxor %xmm7,%xmm3,%xmm3 ++ addl 48(%rsp),%ebx ++ xorl %ebp,%esi ++ movl %ecx,%edi ++ shldl $5,%ecx,%ecx ++ vpxor %xmm4,%xmm3,%xmm3 ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ vpaddd %xmm2,%xmm11,%xmm9 ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ vpxor %xmm8,%xmm3,%xmm3 ++ addl 52(%rsp),%eax ++ xorl %edx,%edi ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ vpsrld $30,%xmm3,%xmm8 ++ vmovdqa %xmm9,32(%rsp) ++ addl %edi,%eax ++ xorl %edx,%esi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ vpslld $2,%xmm3,%xmm3 ++ addl 56(%rsp),%ebp ++ xorl %ecx,%esi ++ movl %eax,%edi ++ shldl $5,%eax,%eax ++ addl %esi,%ebp ++ xorl %ecx,%edi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%ebp ++ vpor %xmm8,%xmm3,%xmm3 ++ addl 60(%rsp),%edx ++ xorl %ebx,%edi ++ movl %ebp,%esi ++ shldl $5,%ebp,%ebp ++ addl %edi,%edx ++ xorl %ebx,%esi ++ shrdl $7,%eax,%eax ++ addl %ebp,%edx ++ vpalignr $8,%xmm2,%xmm3,%xmm8 ++ vpxor %xmm0,%xmm4,%xmm4 ++ addl 0(%rsp),%ecx ++ xorl %eax,%esi ++ movl %edx,%edi ++ shldl $5,%edx,%edx ++ vpxor %xmm5,%xmm4,%xmm4 ++ addl %esi,%ecx ++ xorl %eax,%edi ++ vpaddd %xmm3,%xmm11,%xmm9 ++ shrdl $7,%ebp,%ebp ++ addl %edx,%ecx ++ vpxor %xmm8,%xmm4,%xmm4 ++ addl 4(%rsp),%ebx ++ xorl %ebp,%edi ++ movl %ecx,%esi ++ shldl $5,%ecx,%ecx ++ vpsrld $30,%xmm4,%xmm8 ++ vmovdqa %xmm9,48(%rsp) ++ addl %edi,%ebx ++ xorl %ebp,%esi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ vpslld $2,%xmm4,%xmm4 ++ addl 8(%rsp),%eax ++ xorl %edx,%esi ++ movl %ebx,%edi ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ xorl %edx,%edi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ vpor %xmm8,%xmm4,%xmm4 ++ addl 12(%rsp),%ebp ++ xorl %ecx,%edi ++ movl %eax,%esi ++ shldl $5,%eax,%eax ++ addl %edi,%ebp ++ xorl %ecx,%esi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%ebp ++ vpalignr $8,%xmm3,%xmm4,%xmm8 ++ vpxor %xmm1,%xmm5,%xmm5 ++ addl 16(%rsp),%edx ++ xorl %ebx,%esi ++ movl %ebp,%edi ++ shldl $5,%ebp,%ebp ++ vpxor %xmm6,%xmm5,%xmm5 ++ addl %esi,%edx ++ xorl %ebx,%edi ++ vpaddd %xmm4,%xmm11,%xmm9 ++ shrdl $7,%eax,%eax ++ addl %ebp,%edx ++ vpxor %xmm8,%xmm5,%xmm5 ++ addl 20(%rsp),%ecx ++ xorl %eax,%edi ++ movl %edx,%esi ++ shldl $5,%edx,%edx ++ vpsrld $30,%xmm5,%xmm8 ++ vmovdqa %xmm9,0(%rsp) ++ addl %edi,%ecx ++ xorl %eax,%esi ++ shrdl $7,%ebp,%ebp ++ addl %edx,%ecx ++ vpslld $2,%xmm5,%xmm5 ++ addl 24(%rsp),%ebx ++ xorl %ebp,%esi ++ movl %ecx,%edi ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ vpor %xmm8,%xmm5,%xmm5 ++ addl 28(%rsp),%eax ++ shrdl $7,%ecx,%ecx ++ movl %ebx,%esi ++ xorl %edx,%edi ++ shldl $5,%ebx,%ebx ++ addl %edi,%eax ++ xorl %ecx,%esi ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ vpalignr $8,%xmm4,%xmm5,%xmm8 ++ vpxor %xmm2,%xmm6,%xmm6 ++ addl 32(%rsp),%ebp ++ andl %ecx,%esi ++ xorl %edx,%ecx ++ shrdl $7,%ebx,%ebx ++ vpxor %xmm7,%xmm6,%xmm6 ++ movl %eax,%edi ++ xorl %ecx,%esi ++ vpaddd %xmm5,%xmm11,%xmm9 ++ shldl $5,%eax,%eax ++ addl %esi,%ebp ++ vpxor %xmm8,%xmm6,%xmm6 ++ xorl %ebx,%edi ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ addl 36(%rsp),%edx ++ vpsrld $30,%xmm6,%xmm8 ++ vmovdqa %xmm9,16(%rsp) ++ andl %ebx,%edi ++ xorl %ecx,%ebx ++ shrdl $7,%eax,%eax ++ movl %ebp,%esi ++ vpslld $2,%xmm6,%xmm6 ++ xorl %ebx,%edi ++ shldl $5,%ebp,%ebp ++ addl %edi,%edx ++ xorl %eax,%esi ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ addl 40(%rsp),%ecx ++ andl %eax,%esi ++ vpor %xmm8,%xmm6,%xmm6 ++ xorl %ebx,%eax ++ shrdl $7,%ebp,%ebp ++ movl %edx,%edi ++ xorl %eax,%esi ++ shldl $5,%edx,%edx ++ addl %esi,%ecx ++ xorl %ebp,%edi ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ addl 44(%rsp),%ebx ++ andl %ebp,%edi ++ xorl %eax,%ebp ++ shrdl $7,%edx,%edx ++ movl %ecx,%esi ++ xorl %ebp,%edi ++ shldl $5,%ecx,%ecx ++ addl %edi,%ebx ++ xorl %edx,%esi ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ vpalignr $8,%xmm5,%xmm6,%xmm8 ++ vpxor %xmm3,%xmm7,%xmm7 ++ addl 48(%rsp),%eax ++ andl %edx,%esi ++ xorl %ebp,%edx ++ shrdl $7,%ecx,%ecx ++ vpxor %xmm0,%xmm7,%xmm7 ++ movl %ebx,%edi ++ xorl %edx,%esi ++ vpaddd %xmm6,%xmm11,%xmm9 ++ vmovdqa 32(%r14),%xmm11 ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ vpxor %xmm8,%xmm7,%xmm7 ++ xorl %ecx,%edi ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ addl 52(%rsp),%ebp ++ vpsrld $30,%xmm7,%xmm8 ++ vmovdqa %xmm9,32(%rsp) ++ andl %ecx,%edi ++ xorl %edx,%ecx ++ shrdl $7,%ebx,%ebx ++ movl %eax,%esi ++ vpslld $2,%xmm7,%xmm7 ++ xorl %ecx,%edi ++ shldl $5,%eax,%eax ++ addl %edi,%ebp ++ xorl %ebx,%esi ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ addl 56(%rsp),%edx ++ andl %ebx,%esi ++ vpor %xmm8,%xmm7,%xmm7 ++ xorl %ecx,%ebx ++ shrdl $7,%eax,%eax ++ movl %ebp,%edi ++ xorl %ebx,%esi ++ shldl $5,%ebp,%ebp ++ addl %esi,%edx ++ xorl %eax,%edi ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ addl 60(%rsp),%ecx ++ andl %eax,%edi ++ xorl %ebx,%eax ++ shrdl $7,%ebp,%ebp ++ movl %edx,%esi ++ xorl %eax,%edi ++ shldl $5,%edx,%edx ++ addl %edi,%ecx ++ xorl %ebp,%esi ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ vpalignr $8,%xmm6,%xmm7,%xmm8 ++ vpxor %xmm4,%xmm0,%xmm0 ++ addl 0(%rsp),%ebx ++ andl %ebp,%esi ++ xorl %eax,%ebp ++ shrdl $7,%edx,%edx ++ vpxor %xmm1,%xmm0,%xmm0 ++ movl %ecx,%edi ++ xorl %ebp,%esi ++ vpaddd %xmm7,%xmm11,%xmm9 ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ vpxor %xmm8,%xmm0,%xmm0 ++ xorl %edx,%edi ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ addl 4(%rsp),%eax ++ vpsrld $30,%xmm0,%xmm8 ++ vmovdqa %xmm9,48(%rsp) ++ andl %edx,%edi ++ xorl %ebp,%edx ++ shrdl $7,%ecx,%ecx ++ movl %ebx,%esi ++ vpslld $2,%xmm0,%xmm0 ++ xorl %edx,%edi ++ shldl $5,%ebx,%ebx ++ addl %edi,%eax ++ xorl %ecx,%esi ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ addl 8(%rsp),%ebp ++ andl %ecx,%esi ++ vpor %xmm8,%xmm0,%xmm0 ++ xorl %edx,%ecx ++ shrdl $7,%ebx,%ebx ++ movl %eax,%edi ++ xorl %ecx,%esi ++ shldl $5,%eax,%eax ++ addl %esi,%ebp ++ xorl %ebx,%edi ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ addl 12(%rsp),%edx ++ andl %ebx,%edi ++ xorl %ecx,%ebx ++ shrdl $7,%eax,%eax ++ movl %ebp,%esi ++ xorl %ebx,%edi ++ shldl $5,%ebp,%ebp ++ addl %edi,%edx ++ xorl %eax,%esi ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ vpalignr $8,%xmm7,%xmm0,%xmm8 ++ vpxor %xmm5,%xmm1,%xmm1 ++ addl 16(%rsp),%ecx ++ andl %eax,%esi ++ xorl %ebx,%eax ++ shrdl $7,%ebp,%ebp ++ vpxor %xmm2,%xmm1,%xmm1 ++ movl %edx,%edi ++ xorl %eax,%esi ++ vpaddd %xmm0,%xmm11,%xmm9 ++ shldl $5,%edx,%edx ++ addl %esi,%ecx ++ vpxor %xmm8,%xmm1,%xmm1 ++ xorl %ebp,%edi ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ addl 20(%rsp),%ebx ++ vpsrld $30,%xmm1,%xmm8 ++ vmovdqa %xmm9,0(%rsp) ++ andl %ebp,%edi ++ xorl %eax,%ebp ++ shrdl $7,%edx,%edx ++ movl %ecx,%esi ++ vpslld $2,%xmm1,%xmm1 ++ xorl %ebp,%edi ++ shldl $5,%ecx,%ecx ++ addl %edi,%ebx ++ xorl %edx,%esi ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ addl 24(%rsp),%eax ++ andl %edx,%esi ++ vpor %xmm8,%xmm1,%xmm1 ++ xorl %ebp,%edx ++ shrdl $7,%ecx,%ecx ++ movl %ebx,%edi ++ xorl %edx,%esi ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ xorl %ecx,%edi ++ xorl %edx,%ecx ++ addl %ebx,%eax ++ addl 28(%rsp),%ebp ++ andl %ecx,%edi ++ xorl %edx,%ecx ++ shrdl $7,%ebx,%ebx ++ movl %eax,%esi ++ xorl %ecx,%edi ++ shldl $5,%eax,%eax ++ addl %edi,%ebp ++ xorl %ebx,%esi ++ xorl %ecx,%ebx ++ addl %eax,%ebp ++ vpalignr $8,%xmm0,%xmm1,%xmm8 ++ vpxor %xmm6,%xmm2,%xmm2 ++ addl 32(%rsp),%edx ++ andl %ebx,%esi ++ xorl %ecx,%ebx ++ shrdl $7,%eax,%eax ++ vpxor %xmm3,%xmm2,%xmm2 ++ movl %ebp,%edi ++ xorl %ebx,%esi ++ vpaddd %xmm1,%xmm11,%xmm9 ++ shldl $5,%ebp,%ebp ++ addl %esi,%edx ++ vpxor %xmm8,%xmm2,%xmm2 ++ xorl %eax,%edi ++ xorl %ebx,%eax ++ addl %ebp,%edx ++ addl 36(%rsp),%ecx ++ vpsrld $30,%xmm2,%xmm8 ++ vmovdqa %xmm9,16(%rsp) ++ andl %eax,%edi ++ xorl %ebx,%eax ++ shrdl $7,%ebp,%ebp ++ movl %edx,%esi ++ vpslld $2,%xmm2,%xmm2 ++ xorl %eax,%edi ++ shldl $5,%edx,%edx ++ addl %edi,%ecx ++ xorl %ebp,%esi ++ xorl %eax,%ebp ++ addl %edx,%ecx ++ addl 40(%rsp),%ebx ++ andl %ebp,%esi ++ vpor %xmm8,%xmm2,%xmm2 ++ xorl %eax,%ebp ++ shrdl $7,%edx,%edx ++ movl %ecx,%edi ++ xorl %ebp,%esi ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ xorl %edx,%edi ++ xorl %ebp,%edx ++ addl %ecx,%ebx ++ addl 44(%rsp),%eax ++ andl %edx,%edi ++ xorl %ebp,%edx ++ shrdl $7,%ecx,%ecx ++ movl %ebx,%esi ++ xorl %edx,%edi ++ shldl $5,%ebx,%ebx ++ addl %edi,%eax ++ xorl %edx,%esi ++ addl %ebx,%eax ++ vpalignr $8,%xmm1,%xmm2,%xmm8 ++ vpxor %xmm7,%xmm3,%xmm3 ++ addl 48(%rsp),%ebp ++ xorl %ecx,%esi ++ movl %eax,%edi ++ shldl $5,%eax,%eax ++ vpxor %xmm4,%xmm3,%xmm3 ++ addl %esi,%ebp ++ xorl %ecx,%edi ++ vpaddd %xmm2,%xmm11,%xmm9 ++ shrdl $7,%ebx,%ebx ++ addl %eax,%ebp ++ vpxor %xmm8,%xmm3,%xmm3 ++ addl 52(%rsp),%edx ++ xorl %ebx,%edi ++ movl %ebp,%esi ++ shldl $5,%ebp,%ebp ++ vpsrld $30,%xmm3,%xmm8 ++ vmovdqa %xmm9,32(%rsp) ++ addl %edi,%edx ++ xorl %ebx,%esi ++ shrdl $7,%eax,%eax ++ addl %ebp,%edx ++ vpslld $2,%xmm3,%xmm3 ++ addl 56(%rsp),%ecx ++ xorl %eax,%esi ++ movl %edx,%edi ++ shldl $5,%edx,%edx ++ addl %esi,%ecx ++ xorl %eax,%edi ++ shrdl $7,%ebp,%ebp ++ addl %edx,%ecx ++ vpor %xmm8,%xmm3,%xmm3 ++ addl 60(%rsp),%ebx ++ xorl %ebp,%edi ++ movl %ecx,%esi ++ shldl $5,%ecx,%ecx ++ addl %edi,%ebx ++ xorl %ebp,%esi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl 0(%rsp),%eax ++ vpaddd %xmm3,%xmm11,%xmm9 ++ xorl %edx,%esi ++ movl %ebx,%edi ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ vmovdqa %xmm9,48(%rsp) ++ xorl %edx,%edi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ addl 4(%rsp),%ebp ++ xorl %ecx,%edi ++ movl %eax,%esi ++ shldl $5,%eax,%eax ++ addl %edi,%ebp ++ xorl %ecx,%esi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%ebp ++ addl 8(%rsp),%edx ++ xorl %ebx,%esi ++ movl %ebp,%edi ++ shldl $5,%ebp,%ebp ++ addl %esi,%edx ++ xorl %ebx,%edi ++ shrdl $7,%eax,%eax ++ addl %ebp,%edx ++ addl 12(%rsp),%ecx ++ xorl %eax,%edi ++ movl %edx,%esi ++ shldl $5,%edx,%edx ++ addl %edi,%ecx ++ xorl %eax,%esi ++ shrdl $7,%ebp,%ebp ++ addl %edx,%ecx ++ cmpq %r10,%r9 ++ je L$done_avx ++ vmovdqa 64(%r14),%xmm6 ++ vmovdqa -64(%r14),%xmm11 ++ vmovdqu 0(%r9),%xmm0 ++ vmovdqu 16(%r9),%xmm1 ++ vmovdqu 32(%r9),%xmm2 ++ vmovdqu 48(%r9),%xmm3 ++ vpshufb %xmm6,%xmm0,%xmm0 ++ addq $64,%r9 ++ addl 16(%rsp),%ebx ++ xorl %ebp,%esi ++ vpshufb %xmm6,%xmm1,%xmm1 ++ movl %ecx,%edi ++ shldl $5,%ecx,%ecx ++ vpaddd %xmm11,%xmm0,%xmm4 ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ vmovdqa %xmm4,0(%rsp) ++ addl 20(%rsp),%eax ++ xorl %edx,%edi ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ addl %edi,%eax ++ xorl %edx,%esi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ addl 24(%rsp),%ebp ++ xorl %ecx,%esi ++ movl %eax,%edi ++ shldl $5,%eax,%eax ++ addl %esi,%ebp ++ xorl %ecx,%edi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%ebp ++ addl 28(%rsp),%edx ++ xorl %ebx,%edi ++ movl %ebp,%esi ++ shldl $5,%ebp,%ebp ++ addl %edi,%edx ++ xorl %ebx,%esi ++ shrdl $7,%eax,%eax ++ addl %ebp,%edx ++ addl 32(%rsp),%ecx ++ xorl %eax,%esi ++ vpshufb %xmm6,%xmm2,%xmm2 ++ movl %edx,%edi ++ shldl $5,%edx,%edx ++ vpaddd %xmm11,%xmm1,%xmm5 ++ addl %esi,%ecx ++ xorl %eax,%edi ++ shrdl $7,%ebp,%ebp ++ addl %edx,%ecx ++ vmovdqa %xmm5,16(%rsp) ++ addl 36(%rsp),%ebx ++ xorl %ebp,%edi ++ movl %ecx,%esi ++ shldl $5,%ecx,%ecx ++ addl %edi,%ebx ++ xorl %ebp,%esi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl 40(%rsp),%eax ++ xorl %edx,%esi ++ movl %ebx,%edi ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ xorl %edx,%edi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ addl 44(%rsp),%ebp ++ xorl %ecx,%edi ++ movl %eax,%esi ++ shldl $5,%eax,%eax ++ addl %edi,%ebp ++ xorl %ecx,%esi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%ebp ++ addl 48(%rsp),%edx ++ xorl %ebx,%esi ++ vpshufb %xmm6,%xmm3,%xmm3 ++ movl %ebp,%edi ++ shldl $5,%ebp,%ebp ++ vpaddd %xmm11,%xmm2,%xmm6 ++ addl %esi,%edx ++ xorl %ebx,%edi ++ shrdl $7,%eax,%eax ++ addl %ebp,%edx ++ vmovdqa %xmm6,32(%rsp) ++ addl 52(%rsp),%ecx ++ xorl %eax,%edi ++ movl %edx,%esi ++ shldl $5,%edx,%edx ++ addl %edi,%ecx ++ xorl %eax,%esi ++ shrdl $7,%ebp,%ebp ++ addl %edx,%ecx ++ addl 56(%rsp),%ebx ++ xorl %ebp,%esi ++ movl %ecx,%edi ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl 60(%rsp),%eax ++ xorl %edx,%edi ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ addl %edi,%eax ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ addl 0(%r8),%eax ++ addl 4(%r8),%esi ++ addl 8(%r8),%ecx ++ addl 12(%r8),%edx ++ movl %eax,0(%r8) ++ addl 16(%r8),%ebp ++ movl %esi,4(%r8) ++ movl %esi,%ebx ++ movl %ecx,8(%r8) ++ movl %ecx,%edi ++ movl %edx,12(%r8) ++ xorl %edx,%edi ++ movl %ebp,16(%r8) ++ andl %edi,%esi ++ jmp L$oop_avx ++ ++.p2align 4 ++L$done_avx: ++ addl 16(%rsp),%ebx ++ xorl %ebp,%esi ++ movl %ecx,%edi ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl 20(%rsp),%eax ++ xorl %edx,%edi ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ addl %edi,%eax ++ xorl %edx,%esi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ addl 24(%rsp),%ebp ++ xorl %ecx,%esi ++ movl %eax,%edi ++ shldl $5,%eax,%eax ++ addl %esi,%ebp ++ xorl %ecx,%edi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%ebp ++ addl 28(%rsp),%edx ++ xorl %ebx,%edi ++ movl %ebp,%esi ++ shldl $5,%ebp,%ebp ++ addl %edi,%edx ++ xorl %ebx,%esi ++ shrdl $7,%eax,%eax ++ addl %ebp,%edx ++ addl 32(%rsp),%ecx ++ xorl %eax,%esi ++ movl %edx,%edi ++ shldl $5,%edx,%edx ++ addl %esi,%ecx ++ xorl %eax,%edi ++ shrdl $7,%ebp,%ebp ++ addl %edx,%ecx ++ addl 36(%rsp),%ebx ++ xorl %ebp,%edi ++ movl %ecx,%esi ++ shldl $5,%ecx,%ecx ++ addl %edi,%ebx ++ xorl %ebp,%esi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl 40(%rsp),%eax ++ xorl %edx,%esi ++ movl %ebx,%edi ++ shldl $5,%ebx,%ebx ++ addl %esi,%eax ++ xorl %edx,%edi ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ addl 44(%rsp),%ebp ++ xorl %ecx,%edi ++ movl %eax,%esi ++ shldl $5,%eax,%eax ++ addl %edi,%ebp ++ xorl %ecx,%esi ++ shrdl $7,%ebx,%ebx ++ addl %eax,%ebp ++ addl 48(%rsp),%edx ++ xorl %ebx,%esi ++ movl %ebp,%edi ++ shldl $5,%ebp,%ebp ++ addl %esi,%edx ++ xorl %ebx,%edi ++ shrdl $7,%eax,%eax ++ addl %ebp,%edx ++ addl 52(%rsp),%ecx ++ xorl %eax,%edi ++ movl %edx,%esi ++ shldl $5,%edx,%edx ++ addl %edi,%ecx ++ xorl %eax,%esi ++ shrdl $7,%ebp,%ebp ++ addl %edx,%ecx ++ addl 56(%rsp),%ebx ++ xorl %ebp,%esi ++ movl %ecx,%edi ++ shldl $5,%ecx,%ecx ++ addl %esi,%ebx ++ xorl %ebp,%edi ++ shrdl $7,%edx,%edx ++ addl %ecx,%ebx ++ addl 60(%rsp),%eax ++ xorl %edx,%edi ++ movl %ebx,%esi ++ shldl $5,%ebx,%ebx ++ addl %edi,%eax ++ shrdl $7,%ecx,%ecx ++ addl %ebx,%eax ++ vzeroupper ++ ++ addl 0(%r8),%eax ++ addl 4(%r8),%esi ++ addl 8(%r8),%ecx ++ movl %eax,0(%r8) ++ addl 12(%r8),%edx ++ movl %esi,4(%r8) ++ addl 16(%r8),%ebp ++ movl %ecx,8(%r8) ++ movl %edx,12(%r8) ++ movl %ebp,16(%r8) ++ movq -40(%r11),%r14 ++ ++ movq -32(%r11),%r13 ++ ++ movq -24(%r11),%r12 ++ ++ movq -16(%r11),%rbp ++ ++ movq -8(%r11),%rbx ++ ++ leaq (%r11),%rsp ++ ++L$epilogue_avx: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++sha1_block_data_order_avx2: ++_avx2_shortcut: ++ ++ movq %rsp,%r11 ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ vzeroupper ++ movq %rdi,%r8 ++ movq %rsi,%r9 ++ movq %rdx,%r10 ++ ++ leaq -640(%rsp),%rsp ++ shlq $6,%r10 ++ leaq 64(%r9),%r13 ++ andq $-128,%rsp ++ addq %r9,%r10 ++ leaq K_XX_XX+64(%rip),%r14 ++ ++ movl 0(%r8),%eax ++ cmpq %r10,%r13 ++ cmovaeq %r9,%r13 ++ movl 4(%r8),%ebp ++ movl 8(%r8),%ecx ++ movl 12(%r8),%edx ++ movl 16(%r8),%esi ++ vmovdqu 64(%r14),%ymm6 ++ ++ vmovdqu (%r9),%xmm0 ++ vmovdqu 16(%r9),%xmm1 ++ vmovdqu 32(%r9),%xmm2 ++ vmovdqu 48(%r9),%xmm3 ++ leaq 64(%r9),%r9 ++ vinserti128 $1,(%r13),%ymm0,%ymm0 ++ vinserti128 $1,16(%r13),%ymm1,%ymm1 ++ vpshufb %ymm6,%ymm0,%ymm0 ++ vinserti128 $1,32(%r13),%ymm2,%ymm2 ++ vpshufb %ymm6,%ymm1,%ymm1 ++ vinserti128 $1,48(%r13),%ymm3,%ymm3 ++ vpshufb %ymm6,%ymm2,%ymm2 ++ vmovdqu -64(%r14),%ymm11 ++ vpshufb %ymm6,%ymm3,%ymm3 ++ ++ vpaddd %ymm11,%ymm0,%ymm4 ++ vpaddd %ymm11,%ymm1,%ymm5 ++ vmovdqu %ymm4,0(%rsp) ++ vpaddd %ymm11,%ymm2,%ymm6 ++ vmovdqu %ymm5,32(%rsp) ++ vpaddd %ymm11,%ymm3,%ymm7 ++ vmovdqu %ymm6,64(%rsp) ++ vmovdqu %ymm7,96(%rsp) ++ vpalignr $8,%ymm0,%ymm1,%ymm4 ++ vpsrldq $4,%ymm3,%ymm8 ++ vpxor %ymm0,%ymm4,%ymm4 ++ vpxor %ymm2,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm4,%ymm4 ++ vpsrld $31,%ymm4,%ymm8 ++ vpslldq $12,%ymm4,%ymm10 ++ vpaddd %ymm4,%ymm4,%ymm4 ++ vpsrld $30,%ymm10,%ymm9 ++ vpor %ymm8,%ymm4,%ymm4 ++ vpslld $2,%ymm10,%ymm10 ++ vpxor %ymm9,%ymm4,%ymm4 ++ vpxor %ymm10,%ymm4,%ymm4 ++ vpaddd %ymm11,%ymm4,%ymm9 ++ vmovdqu %ymm9,128(%rsp) ++ vpalignr $8,%ymm1,%ymm2,%ymm5 ++ vpsrldq $4,%ymm4,%ymm8 ++ vpxor %ymm1,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm5,%ymm5 ++ vpsrld $31,%ymm5,%ymm8 ++ vmovdqu -32(%r14),%ymm11 ++ vpslldq $12,%ymm5,%ymm10 ++ vpaddd %ymm5,%ymm5,%ymm5 ++ vpsrld $30,%ymm10,%ymm9 ++ vpor %ymm8,%ymm5,%ymm5 ++ vpslld $2,%ymm10,%ymm10 ++ vpxor %ymm9,%ymm5,%ymm5 ++ vpxor %ymm10,%ymm5,%ymm5 ++ vpaddd %ymm11,%ymm5,%ymm9 ++ vmovdqu %ymm9,160(%rsp) ++ vpalignr $8,%ymm2,%ymm3,%ymm6 ++ vpsrldq $4,%ymm5,%ymm8 ++ vpxor %ymm2,%ymm6,%ymm6 ++ vpxor %ymm4,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm6,%ymm6 ++ vpsrld $31,%ymm6,%ymm8 ++ vpslldq $12,%ymm6,%ymm10 ++ vpaddd %ymm6,%ymm6,%ymm6 ++ vpsrld $30,%ymm10,%ymm9 ++ vpor %ymm8,%ymm6,%ymm6 ++ vpslld $2,%ymm10,%ymm10 ++ vpxor %ymm9,%ymm6,%ymm6 ++ vpxor %ymm10,%ymm6,%ymm6 ++ vpaddd %ymm11,%ymm6,%ymm9 ++ vmovdqu %ymm9,192(%rsp) ++ vpalignr $8,%ymm3,%ymm4,%ymm7 ++ vpsrldq $4,%ymm6,%ymm8 ++ vpxor %ymm3,%ymm7,%ymm7 ++ vpxor %ymm5,%ymm8,%ymm8 ++ vpxor %ymm8,%ymm7,%ymm7 ++ vpsrld $31,%ymm7,%ymm8 ++ vpslldq $12,%ymm7,%ymm10 ++ vpaddd %ymm7,%ymm7,%ymm7 ++ vpsrld $30,%ymm10,%ymm9 ++ vpor %ymm8,%ymm7,%ymm7 ++ vpslld $2,%ymm10,%ymm10 ++ vpxor %ymm9,%ymm7,%ymm7 ++ vpxor %ymm10,%ymm7,%ymm7 ++ vpaddd %ymm11,%ymm7,%ymm9 ++ vmovdqu %ymm9,224(%rsp) ++ leaq 128(%rsp),%r13 ++ jmp L$oop_avx2 ++.p2align 5 ++L$oop_avx2: ++ rorxl $2,%ebp,%ebx ++ andnl %edx,%ebp,%edi ++ andl %ecx,%ebp ++ xorl %edi,%ebp ++ jmp L$align32_1 ++.p2align 5 ++L$align32_1: ++ vpalignr $8,%ymm6,%ymm7,%ymm8 ++ vpxor %ymm4,%ymm0,%ymm0 ++ addl -128(%r13),%esi ++ andnl %ecx,%eax,%edi ++ vpxor %ymm1,%ymm0,%ymm0 ++ addl %ebp,%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ vpxor %ymm8,%ymm0,%ymm0 ++ andl %ebx,%eax ++ addl %r12d,%esi ++ xorl %edi,%eax ++ vpsrld $30,%ymm0,%ymm8 ++ vpslld $2,%ymm0,%ymm0 ++ addl -124(%r13),%edx ++ andnl %ebx,%esi,%edi ++ addl %eax,%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ andl %ebp,%esi ++ vpor %ymm8,%ymm0,%ymm0 ++ addl %r12d,%edx ++ xorl %edi,%esi ++ addl -120(%r13),%ecx ++ andnl %ebp,%edx,%edi ++ vpaddd %ymm11,%ymm0,%ymm9 ++ addl %esi,%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ andl %eax,%edx ++ vmovdqu %ymm9,256(%rsp) ++ addl %r12d,%ecx ++ xorl %edi,%edx ++ addl -116(%r13),%ebx ++ andnl %eax,%ecx,%edi ++ addl %edx,%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ andl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %edi,%ecx ++ addl -96(%r13),%ebp ++ andnl %esi,%ebx,%edi ++ addl %ecx,%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ andl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %edi,%ebx ++ vpalignr $8,%ymm7,%ymm0,%ymm8 ++ vpxor %ymm5,%ymm1,%ymm1 ++ addl -92(%r13),%eax ++ andnl %edx,%ebp,%edi ++ vpxor %ymm2,%ymm1,%ymm1 ++ addl %ebx,%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ vpxor %ymm8,%ymm1,%ymm1 ++ andl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edi,%ebp ++ vpsrld $30,%ymm1,%ymm8 ++ vpslld $2,%ymm1,%ymm1 ++ addl -88(%r13),%esi ++ andnl %ecx,%eax,%edi ++ addl %ebp,%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ andl %ebx,%eax ++ vpor %ymm8,%ymm1,%ymm1 ++ addl %r12d,%esi ++ xorl %edi,%eax ++ addl -84(%r13),%edx ++ andnl %ebx,%esi,%edi ++ vpaddd %ymm11,%ymm1,%ymm9 ++ addl %eax,%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ andl %ebp,%esi ++ vmovdqu %ymm9,288(%rsp) ++ addl %r12d,%edx ++ xorl %edi,%esi ++ addl -64(%r13),%ecx ++ andnl %ebp,%edx,%edi ++ addl %esi,%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ andl %eax,%edx ++ addl %r12d,%ecx ++ xorl %edi,%edx ++ addl -60(%r13),%ebx ++ andnl %eax,%ecx,%edi ++ addl %edx,%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ andl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %edi,%ecx ++ vpalignr $8,%ymm0,%ymm1,%ymm8 ++ vpxor %ymm6,%ymm2,%ymm2 ++ addl -56(%r13),%ebp ++ andnl %esi,%ebx,%edi ++ vpxor %ymm3,%ymm2,%ymm2 ++ vmovdqu 0(%r14),%ymm11 ++ addl %ecx,%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ vpxor %ymm8,%ymm2,%ymm2 ++ andl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %edi,%ebx ++ vpsrld $30,%ymm2,%ymm8 ++ vpslld $2,%ymm2,%ymm2 ++ addl -52(%r13),%eax ++ andnl %edx,%ebp,%edi ++ addl %ebx,%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ andl %ecx,%ebp ++ vpor %ymm8,%ymm2,%ymm2 ++ addl %r12d,%eax ++ xorl %edi,%ebp ++ addl -32(%r13),%esi ++ andnl %ecx,%eax,%edi ++ vpaddd %ymm11,%ymm2,%ymm9 ++ addl %ebp,%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ andl %ebx,%eax ++ vmovdqu %ymm9,320(%rsp) ++ addl %r12d,%esi ++ xorl %edi,%eax ++ addl -28(%r13),%edx ++ andnl %ebx,%esi,%edi ++ addl %eax,%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ andl %ebp,%esi ++ addl %r12d,%edx ++ xorl %edi,%esi ++ addl -24(%r13),%ecx ++ andnl %ebp,%edx,%edi ++ addl %esi,%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ andl %eax,%edx ++ addl %r12d,%ecx ++ xorl %edi,%edx ++ vpalignr $8,%ymm1,%ymm2,%ymm8 ++ vpxor %ymm7,%ymm3,%ymm3 ++ addl -20(%r13),%ebx ++ andnl %eax,%ecx,%edi ++ vpxor %ymm4,%ymm3,%ymm3 ++ addl %edx,%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ vpxor %ymm8,%ymm3,%ymm3 ++ andl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %edi,%ecx ++ vpsrld $30,%ymm3,%ymm8 ++ vpslld $2,%ymm3,%ymm3 ++ addl 0(%r13),%ebp ++ andnl %esi,%ebx,%edi ++ addl %ecx,%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ andl %edx,%ebx ++ vpor %ymm8,%ymm3,%ymm3 ++ addl %r12d,%ebp ++ xorl %edi,%ebx ++ addl 4(%r13),%eax ++ andnl %edx,%ebp,%edi ++ vpaddd %ymm11,%ymm3,%ymm9 ++ addl %ebx,%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ andl %ecx,%ebp ++ vmovdqu %ymm9,352(%rsp) ++ addl %r12d,%eax ++ xorl %edi,%ebp ++ addl 8(%r13),%esi ++ andnl %ecx,%eax,%edi ++ addl %ebp,%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ andl %ebx,%eax ++ addl %r12d,%esi ++ xorl %edi,%eax ++ addl 12(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ vpalignr $8,%ymm2,%ymm3,%ymm8 ++ vpxor %ymm0,%ymm4,%ymm4 ++ addl 32(%r13),%ecx ++ leal (%rcx,%rsi,1),%ecx ++ vpxor %ymm5,%ymm4,%ymm4 ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ vpxor %ymm8,%ymm4,%ymm4 ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ addl 36(%r13),%ebx ++ vpsrld $30,%ymm4,%ymm8 ++ vpslld $2,%ymm4,%ymm4 ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ vpor %ymm8,%ymm4,%ymm4 ++ addl 40(%r13),%ebp ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ vpaddd %ymm11,%ymm4,%ymm9 ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ addl 44(%r13),%eax ++ vmovdqu %ymm9,384(%rsp) ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ addl 64(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ vpalignr $8,%ymm3,%ymm4,%ymm8 ++ vpxor %ymm1,%ymm5,%ymm5 ++ addl 68(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ vpxor %ymm6,%ymm5,%ymm5 ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ vpxor %ymm8,%ymm5,%ymm5 ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ addl 72(%r13),%ecx ++ vpsrld $30,%ymm5,%ymm8 ++ vpslld $2,%ymm5,%ymm5 ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ vpor %ymm8,%ymm5,%ymm5 ++ addl 76(%r13),%ebx ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ vpaddd %ymm11,%ymm5,%ymm9 ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ addl 96(%r13),%ebp ++ vmovdqu %ymm9,416(%rsp) ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ addl 100(%r13),%eax ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ vpalignr $8,%ymm4,%ymm5,%ymm8 ++ vpxor %ymm2,%ymm6,%ymm6 ++ addl 104(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ vpxor %ymm7,%ymm6,%ymm6 ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ vpxor %ymm8,%ymm6,%ymm6 ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ addl 108(%r13),%edx ++ leaq 256(%r13),%r13 ++ vpsrld $30,%ymm6,%ymm8 ++ vpslld $2,%ymm6,%ymm6 ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ vpor %ymm8,%ymm6,%ymm6 ++ addl -128(%r13),%ecx ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ vpaddd %ymm11,%ymm6,%ymm9 ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ addl -124(%r13),%ebx ++ vmovdqu %ymm9,448(%rsp) ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ addl -120(%r13),%ebp ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ vpalignr $8,%ymm5,%ymm6,%ymm8 ++ vpxor %ymm3,%ymm7,%ymm7 ++ addl -116(%r13),%eax ++ leal (%rax,%rbx,1),%eax ++ vpxor %ymm0,%ymm7,%ymm7 ++ vmovdqu 32(%r14),%ymm11 ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ vpxor %ymm8,%ymm7,%ymm7 ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ addl -96(%r13),%esi ++ vpsrld $30,%ymm7,%ymm8 ++ vpslld $2,%ymm7,%ymm7 ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ vpor %ymm8,%ymm7,%ymm7 ++ addl -92(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ vpaddd %ymm11,%ymm7,%ymm9 ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ addl -88(%r13),%ecx ++ vmovdqu %ymm9,480(%rsp) ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ addl -84(%r13),%ebx ++ movl %esi,%edi ++ xorl %eax,%edi ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ andl %edi,%ecx ++ jmp L$align32_2 ++.p2align 5 ++L$align32_2: ++ vpalignr $8,%ymm6,%ymm7,%ymm8 ++ vpxor %ymm4,%ymm0,%ymm0 ++ addl -64(%r13),%ebp ++ xorl %esi,%ecx ++ vpxor %ymm1,%ymm0,%ymm0 ++ movl %edx,%edi ++ xorl %esi,%edi ++ leal (%rcx,%rbp,1),%ebp ++ vpxor %ymm8,%ymm0,%ymm0 ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ vpsrld $30,%ymm0,%ymm8 ++ vpslld $2,%ymm0,%ymm0 ++ addl %r12d,%ebp ++ andl %edi,%ebx ++ addl -60(%r13),%eax ++ xorl %edx,%ebx ++ movl %ecx,%edi ++ xorl %edx,%edi ++ vpor %ymm8,%ymm0,%ymm0 ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ vpaddd %ymm11,%ymm0,%ymm9 ++ addl %r12d,%eax ++ andl %edi,%ebp ++ addl -56(%r13),%esi ++ xorl %ecx,%ebp ++ vmovdqu %ymm9,512(%rsp) ++ movl %ebx,%edi ++ xorl %ecx,%edi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ andl %edi,%eax ++ addl -52(%r13),%edx ++ xorl %ebx,%eax ++ movl %ebp,%edi ++ xorl %ebx,%edi ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ andl %edi,%esi ++ addl -32(%r13),%ecx ++ xorl %ebp,%esi ++ movl %eax,%edi ++ xorl %ebp,%edi ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ andl %edi,%edx ++ vpalignr $8,%ymm7,%ymm0,%ymm8 ++ vpxor %ymm5,%ymm1,%ymm1 ++ addl -28(%r13),%ebx ++ xorl %eax,%edx ++ vpxor %ymm2,%ymm1,%ymm1 ++ movl %esi,%edi ++ xorl %eax,%edi ++ leal (%rbx,%rdx,1),%ebx ++ vpxor %ymm8,%ymm1,%ymm1 ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ vpsrld $30,%ymm1,%ymm8 ++ vpslld $2,%ymm1,%ymm1 ++ addl %r12d,%ebx ++ andl %edi,%ecx ++ addl -24(%r13),%ebp ++ xorl %esi,%ecx ++ movl %edx,%edi ++ xorl %esi,%edi ++ vpor %ymm8,%ymm1,%ymm1 ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ vpaddd %ymm11,%ymm1,%ymm9 ++ addl %r12d,%ebp ++ andl %edi,%ebx ++ addl -20(%r13),%eax ++ xorl %edx,%ebx ++ vmovdqu %ymm9,544(%rsp) ++ movl %ecx,%edi ++ xorl %edx,%edi ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ andl %edi,%ebp ++ addl 0(%r13),%esi ++ xorl %ecx,%ebp ++ movl %ebx,%edi ++ xorl %ecx,%edi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ andl %edi,%eax ++ addl 4(%r13),%edx ++ xorl %ebx,%eax ++ movl %ebp,%edi ++ xorl %ebx,%edi ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ andl %edi,%esi ++ vpalignr $8,%ymm0,%ymm1,%ymm8 ++ vpxor %ymm6,%ymm2,%ymm2 ++ addl 8(%r13),%ecx ++ xorl %ebp,%esi ++ vpxor %ymm3,%ymm2,%ymm2 ++ movl %eax,%edi ++ xorl %ebp,%edi ++ leal (%rcx,%rsi,1),%ecx ++ vpxor %ymm8,%ymm2,%ymm2 ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ vpsrld $30,%ymm2,%ymm8 ++ vpslld $2,%ymm2,%ymm2 ++ addl %r12d,%ecx ++ andl %edi,%edx ++ addl 12(%r13),%ebx ++ xorl %eax,%edx ++ movl %esi,%edi ++ xorl %eax,%edi ++ vpor %ymm8,%ymm2,%ymm2 ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ vpaddd %ymm11,%ymm2,%ymm9 ++ addl %r12d,%ebx ++ andl %edi,%ecx ++ addl 32(%r13),%ebp ++ xorl %esi,%ecx ++ vmovdqu %ymm9,576(%rsp) ++ movl %edx,%edi ++ xorl %esi,%edi ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ andl %edi,%ebx ++ addl 36(%r13),%eax ++ xorl %edx,%ebx ++ movl %ecx,%edi ++ xorl %edx,%edi ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ andl %edi,%ebp ++ addl 40(%r13),%esi ++ xorl %ecx,%ebp ++ movl %ebx,%edi ++ xorl %ecx,%edi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ andl %edi,%eax ++ vpalignr $8,%ymm1,%ymm2,%ymm8 ++ vpxor %ymm7,%ymm3,%ymm3 ++ addl 44(%r13),%edx ++ xorl %ebx,%eax ++ vpxor %ymm4,%ymm3,%ymm3 ++ movl %ebp,%edi ++ xorl %ebx,%edi ++ leal (%rdx,%rax,1),%edx ++ vpxor %ymm8,%ymm3,%ymm3 ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ vpsrld $30,%ymm3,%ymm8 ++ vpslld $2,%ymm3,%ymm3 ++ addl %r12d,%edx ++ andl %edi,%esi ++ addl 64(%r13),%ecx ++ xorl %ebp,%esi ++ movl %eax,%edi ++ xorl %ebp,%edi ++ vpor %ymm8,%ymm3,%ymm3 ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ vpaddd %ymm11,%ymm3,%ymm9 ++ addl %r12d,%ecx ++ andl %edi,%edx ++ addl 68(%r13),%ebx ++ xorl %eax,%edx ++ vmovdqu %ymm9,608(%rsp) ++ movl %esi,%edi ++ xorl %eax,%edi ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ andl %edi,%ecx ++ addl 72(%r13),%ebp ++ xorl %esi,%ecx ++ movl %edx,%edi ++ xorl %esi,%edi ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ andl %edi,%ebx ++ addl 76(%r13),%eax ++ xorl %edx,%ebx ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ addl 96(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ addl 100(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ addl 104(%r13),%ecx ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ addl 108(%r13),%ebx ++ leaq 256(%r13),%r13 ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ addl -128(%r13),%ebp ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ addl -124(%r13),%eax ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ addl -120(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ addl -116(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ addl -96(%r13),%ecx ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ addl -92(%r13),%ebx ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ addl -88(%r13),%ebp ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ addl -84(%r13),%eax ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ addl -64(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ addl -60(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ addl -56(%r13),%ecx ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ addl -52(%r13),%ebx ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ addl -32(%r13),%ebp ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ addl -28(%r13),%eax ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ addl -24(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ addl -20(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ addl %r12d,%edx ++ leaq 128(%r9),%r13 ++ leaq 128(%r9),%rdi ++ cmpq %r10,%r13 ++ cmovaeq %r9,%r13 ++ ++ ++ addl 0(%r8),%edx ++ addl 4(%r8),%esi ++ addl 8(%r8),%ebp ++ movl %edx,0(%r8) ++ addl 12(%r8),%ebx ++ movl %esi,4(%r8) ++ movl %edx,%eax ++ addl 16(%r8),%ecx ++ movl %ebp,%r12d ++ movl %ebp,8(%r8) ++ movl %ebx,%edx ++ ++ movl %ebx,12(%r8) ++ movl %esi,%ebp ++ movl %ecx,16(%r8) ++ ++ movl %ecx,%esi ++ movl %r12d,%ecx ++ ++ ++ cmpq %r10,%r9 ++ je L$done_avx2 ++ vmovdqu 64(%r14),%ymm6 ++ cmpq %r10,%rdi ++ ja L$ast_avx2 ++ ++ vmovdqu -64(%rdi),%xmm0 ++ vmovdqu -48(%rdi),%xmm1 ++ vmovdqu -32(%rdi),%xmm2 ++ vmovdqu -16(%rdi),%xmm3 ++ vinserti128 $1,0(%r13),%ymm0,%ymm0 ++ vinserti128 $1,16(%r13),%ymm1,%ymm1 ++ vinserti128 $1,32(%r13),%ymm2,%ymm2 ++ vinserti128 $1,48(%r13),%ymm3,%ymm3 ++ jmp L$ast_avx2 ++ ++.p2align 5 ++L$ast_avx2: ++ leaq 128+16(%rsp),%r13 ++ rorxl $2,%ebp,%ebx ++ andnl %edx,%ebp,%edi ++ andl %ecx,%ebp ++ xorl %edi,%ebp ++ subq $-128,%r9 ++ addl -128(%r13),%esi ++ andnl %ecx,%eax,%edi ++ addl %ebp,%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ andl %ebx,%eax ++ addl %r12d,%esi ++ xorl %edi,%eax ++ addl -124(%r13),%edx ++ andnl %ebx,%esi,%edi ++ addl %eax,%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ andl %ebp,%esi ++ addl %r12d,%edx ++ xorl %edi,%esi ++ addl -120(%r13),%ecx ++ andnl %ebp,%edx,%edi ++ addl %esi,%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ andl %eax,%edx ++ addl %r12d,%ecx ++ xorl %edi,%edx ++ addl -116(%r13),%ebx ++ andnl %eax,%ecx,%edi ++ addl %edx,%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ andl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %edi,%ecx ++ addl -96(%r13),%ebp ++ andnl %esi,%ebx,%edi ++ addl %ecx,%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ andl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %edi,%ebx ++ addl -92(%r13),%eax ++ andnl %edx,%ebp,%edi ++ addl %ebx,%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ andl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edi,%ebp ++ addl -88(%r13),%esi ++ andnl %ecx,%eax,%edi ++ addl %ebp,%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ andl %ebx,%eax ++ addl %r12d,%esi ++ xorl %edi,%eax ++ addl -84(%r13),%edx ++ andnl %ebx,%esi,%edi ++ addl %eax,%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ andl %ebp,%esi ++ addl %r12d,%edx ++ xorl %edi,%esi ++ addl -64(%r13),%ecx ++ andnl %ebp,%edx,%edi ++ addl %esi,%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ andl %eax,%edx ++ addl %r12d,%ecx ++ xorl %edi,%edx ++ addl -60(%r13),%ebx ++ andnl %eax,%ecx,%edi ++ addl %edx,%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ andl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %edi,%ecx ++ addl -56(%r13),%ebp ++ andnl %esi,%ebx,%edi ++ addl %ecx,%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ andl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %edi,%ebx ++ addl -52(%r13),%eax ++ andnl %edx,%ebp,%edi ++ addl %ebx,%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ andl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edi,%ebp ++ addl -32(%r13),%esi ++ andnl %ecx,%eax,%edi ++ addl %ebp,%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ andl %ebx,%eax ++ addl %r12d,%esi ++ xorl %edi,%eax ++ addl -28(%r13),%edx ++ andnl %ebx,%esi,%edi ++ addl %eax,%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ andl %ebp,%esi ++ addl %r12d,%edx ++ xorl %edi,%esi ++ addl -24(%r13),%ecx ++ andnl %ebp,%edx,%edi ++ addl %esi,%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ andl %eax,%edx ++ addl %r12d,%ecx ++ xorl %edi,%edx ++ addl -20(%r13),%ebx ++ andnl %eax,%ecx,%edi ++ addl %edx,%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ andl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %edi,%ecx ++ addl 0(%r13),%ebp ++ andnl %esi,%ebx,%edi ++ addl %ecx,%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ andl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %edi,%ebx ++ addl 4(%r13),%eax ++ andnl %edx,%ebp,%edi ++ addl %ebx,%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ andl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edi,%ebp ++ addl 8(%r13),%esi ++ andnl %ecx,%eax,%edi ++ addl %ebp,%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ andl %ebx,%eax ++ addl %r12d,%esi ++ xorl %edi,%eax ++ addl 12(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ addl 32(%r13),%ecx ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ addl 36(%r13),%ebx ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ addl 40(%r13),%ebp ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ addl 44(%r13),%eax ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ addl 64(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ vmovdqu -64(%r14),%ymm11 ++ vpshufb %ymm6,%ymm0,%ymm0 ++ addl 68(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ addl 72(%r13),%ecx ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ addl 76(%r13),%ebx ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ addl 96(%r13),%ebp ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ addl 100(%r13),%eax ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ vpshufb %ymm6,%ymm1,%ymm1 ++ vpaddd %ymm11,%ymm0,%ymm8 ++ addl 104(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ addl 108(%r13),%edx ++ leaq 256(%r13),%r13 ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ addl -128(%r13),%ecx ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ addl -124(%r13),%ebx ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ addl -120(%r13),%ebp ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ vmovdqu %ymm8,0(%rsp) ++ vpshufb %ymm6,%ymm2,%ymm2 ++ vpaddd %ymm11,%ymm1,%ymm9 ++ addl -116(%r13),%eax ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ addl -96(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ addl -92(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ addl -88(%r13),%ecx ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ addl -84(%r13),%ebx ++ movl %esi,%edi ++ xorl %eax,%edi ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ andl %edi,%ecx ++ vmovdqu %ymm9,32(%rsp) ++ vpshufb %ymm6,%ymm3,%ymm3 ++ vpaddd %ymm11,%ymm2,%ymm6 ++ addl -64(%r13),%ebp ++ xorl %esi,%ecx ++ movl %edx,%edi ++ xorl %esi,%edi ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ andl %edi,%ebx ++ addl -60(%r13),%eax ++ xorl %edx,%ebx ++ movl %ecx,%edi ++ xorl %edx,%edi ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ andl %edi,%ebp ++ addl -56(%r13),%esi ++ xorl %ecx,%ebp ++ movl %ebx,%edi ++ xorl %ecx,%edi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ andl %edi,%eax ++ addl -52(%r13),%edx ++ xorl %ebx,%eax ++ movl %ebp,%edi ++ xorl %ebx,%edi ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ andl %edi,%esi ++ addl -32(%r13),%ecx ++ xorl %ebp,%esi ++ movl %eax,%edi ++ xorl %ebp,%edi ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ andl %edi,%edx ++ jmp L$align32_3 ++.p2align 5 ++L$align32_3: ++ vmovdqu %ymm6,64(%rsp) ++ vpaddd %ymm11,%ymm3,%ymm7 ++ addl -28(%r13),%ebx ++ xorl %eax,%edx ++ movl %esi,%edi ++ xorl %eax,%edi ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ andl %edi,%ecx ++ addl -24(%r13),%ebp ++ xorl %esi,%ecx ++ movl %edx,%edi ++ xorl %esi,%edi ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ andl %edi,%ebx ++ addl -20(%r13),%eax ++ xorl %edx,%ebx ++ movl %ecx,%edi ++ xorl %edx,%edi ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ andl %edi,%ebp ++ addl 0(%r13),%esi ++ xorl %ecx,%ebp ++ movl %ebx,%edi ++ xorl %ecx,%edi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ andl %edi,%eax ++ addl 4(%r13),%edx ++ xorl %ebx,%eax ++ movl %ebp,%edi ++ xorl %ebx,%edi ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ andl %edi,%esi ++ vmovdqu %ymm7,96(%rsp) ++ addl 8(%r13),%ecx ++ xorl %ebp,%esi ++ movl %eax,%edi ++ xorl %ebp,%edi ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ andl %edi,%edx ++ addl 12(%r13),%ebx ++ xorl %eax,%edx ++ movl %esi,%edi ++ xorl %eax,%edi ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ andl %edi,%ecx ++ addl 32(%r13),%ebp ++ xorl %esi,%ecx ++ movl %edx,%edi ++ xorl %esi,%edi ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ andl %edi,%ebx ++ addl 36(%r13),%eax ++ xorl %edx,%ebx ++ movl %ecx,%edi ++ xorl %edx,%edi ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ andl %edi,%ebp ++ addl 40(%r13),%esi ++ xorl %ecx,%ebp ++ movl %ebx,%edi ++ xorl %ecx,%edi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ andl %edi,%eax ++ vpalignr $8,%ymm0,%ymm1,%ymm4 ++ addl 44(%r13),%edx ++ xorl %ebx,%eax ++ movl %ebp,%edi ++ xorl %ebx,%edi ++ vpsrldq $4,%ymm3,%ymm8 ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ vpxor %ymm0,%ymm4,%ymm4 ++ vpxor %ymm2,%ymm8,%ymm8 ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ vpxor %ymm8,%ymm4,%ymm4 ++ andl %edi,%esi ++ addl 64(%r13),%ecx ++ xorl %ebp,%esi ++ movl %eax,%edi ++ vpsrld $31,%ymm4,%ymm8 ++ xorl %ebp,%edi ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ vpslldq $12,%ymm4,%ymm10 ++ vpaddd %ymm4,%ymm4,%ymm4 ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ vpsrld $30,%ymm10,%ymm9 ++ vpor %ymm8,%ymm4,%ymm4 ++ addl %r12d,%ecx ++ andl %edi,%edx ++ vpslld $2,%ymm10,%ymm10 ++ vpxor %ymm9,%ymm4,%ymm4 ++ addl 68(%r13),%ebx ++ xorl %eax,%edx ++ vpxor %ymm10,%ymm4,%ymm4 ++ movl %esi,%edi ++ xorl %eax,%edi ++ leal (%rbx,%rdx,1),%ebx ++ vpaddd %ymm11,%ymm4,%ymm9 ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ vmovdqu %ymm9,128(%rsp) ++ addl %r12d,%ebx ++ andl %edi,%ecx ++ addl 72(%r13),%ebp ++ xorl %esi,%ecx ++ movl %edx,%edi ++ xorl %esi,%edi ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ andl %edi,%ebx ++ addl 76(%r13),%eax ++ xorl %edx,%ebx ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ vpalignr $8,%ymm1,%ymm2,%ymm5 ++ addl 96(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ vpsrldq $4,%ymm4,%ymm8 ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ vpxor %ymm1,%ymm5,%ymm5 ++ vpxor %ymm3,%ymm8,%ymm8 ++ addl 100(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ vpxor %ymm8,%ymm5,%ymm5 ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ vpsrld $31,%ymm5,%ymm8 ++ vmovdqu -32(%r14),%ymm11 ++ xorl %ebx,%esi ++ addl 104(%r13),%ecx ++ leal (%rcx,%rsi,1),%ecx ++ vpslldq $12,%ymm5,%ymm10 ++ vpaddd %ymm5,%ymm5,%ymm5 ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ vpsrld $30,%ymm10,%ymm9 ++ vpor %ymm8,%ymm5,%ymm5 ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ vpslld $2,%ymm10,%ymm10 ++ vpxor %ymm9,%ymm5,%ymm5 ++ xorl %ebp,%edx ++ addl 108(%r13),%ebx ++ leaq 256(%r13),%r13 ++ vpxor %ymm10,%ymm5,%ymm5 ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ vpaddd %ymm11,%ymm5,%ymm9 ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ vmovdqu %ymm9,160(%rsp) ++ addl -128(%r13),%ebp ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ vpalignr $8,%ymm2,%ymm3,%ymm6 ++ addl -124(%r13),%eax ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ vpsrldq $4,%ymm5,%ymm8 ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ vpxor %ymm2,%ymm6,%ymm6 ++ vpxor %ymm4,%ymm8,%ymm8 ++ addl -120(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ vpxor %ymm8,%ymm6,%ymm6 ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ vpsrld $31,%ymm6,%ymm8 ++ xorl %ecx,%eax ++ addl -116(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ vpslldq $12,%ymm6,%ymm10 ++ vpaddd %ymm6,%ymm6,%ymm6 ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ vpsrld $30,%ymm10,%ymm9 ++ vpor %ymm8,%ymm6,%ymm6 ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ vpslld $2,%ymm10,%ymm10 ++ vpxor %ymm9,%ymm6,%ymm6 ++ xorl %ebx,%esi ++ addl -96(%r13),%ecx ++ vpxor %ymm10,%ymm6,%ymm6 ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ vpaddd %ymm11,%ymm6,%ymm9 ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ vmovdqu %ymm9,192(%rsp) ++ addl -92(%r13),%ebx ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ vpalignr $8,%ymm3,%ymm4,%ymm7 ++ addl -88(%r13),%ebp ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ vpsrldq $4,%ymm6,%ymm8 ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ vpxor %ymm3,%ymm7,%ymm7 ++ vpxor %ymm5,%ymm8,%ymm8 ++ addl -84(%r13),%eax ++ leal (%rax,%rbx,1),%eax ++ vpxor %ymm8,%ymm7,%ymm7 ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ vpsrld $31,%ymm7,%ymm8 ++ xorl %edx,%ebp ++ addl -64(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ vpslldq $12,%ymm7,%ymm10 ++ vpaddd %ymm7,%ymm7,%ymm7 ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ vpsrld $30,%ymm10,%ymm9 ++ vpor %ymm8,%ymm7,%ymm7 ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ vpslld $2,%ymm10,%ymm10 ++ vpxor %ymm9,%ymm7,%ymm7 ++ xorl %ecx,%eax ++ addl -60(%r13),%edx ++ vpxor %ymm10,%ymm7,%ymm7 ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ rorxl $2,%esi,%eax ++ vpaddd %ymm11,%ymm7,%ymm9 ++ xorl %ebp,%esi ++ addl %r12d,%edx ++ xorl %ebx,%esi ++ vmovdqu %ymm9,224(%rsp) ++ addl -56(%r13),%ecx ++ leal (%rcx,%rsi,1),%ecx ++ rorxl $27,%edx,%r12d ++ rorxl $2,%edx,%esi ++ xorl %eax,%edx ++ addl %r12d,%ecx ++ xorl %ebp,%edx ++ addl -52(%r13),%ebx ++ leal (%rbx,%rdx,1),%ebx ++ rorxl $27,%ecx,%r12d ++ rorxl $2,%ecx,%edx ++ xorl %esi,%ecx ++ addl %r12d,%ebx ++ xorl %eax,%ecx ++ addl -32(%r13),%ebp ++ leal (%rcx,%rbp,1),%ebp ++ rorxl $27,%ebx,%r12d ++ rorxl $2,%ebx,%ecx ++ xorl %edx,%ebx ++ addl %r12d,%ebp ++ xorl %esi,%ebx ++ addl -28(%r13),%eax ++ leal (%rax,%rbx,1),%eax ++ rorxl $27,%ebp,%r12d ++ rorxl $2,%ebp,%ebx ++ xorl %ecx,%ebp ++ addl %r12d,%eax ++ xorl %edx,%ebp ++ addl -24(%r13),%esi ++ leal (%rsi,%rbp,1),%esi ++ rorxl $27,%eax,%r12d ++ rorxl $2,%eax,%ebp ++ xorl %ebx,%eax ++ addl %r12d,%esi ++ xorl %ecx,%eax ++ addl -20(%r13),%edx ++ leal (%rdx,%rax,1),%edx ++ rorxl $27,%esi,%r12d ++ addl %r12d,%edx ++ leaq 128(%rsp),%r13 ++ ++ ++ addl 0(%r8),%edx ++ addl 4(%r8),%esi ++ addl 8(%r8),%ebp ++ movl %edx,0(%r8) ++ addl 12(%r8),%ebx ++ movl %esi,4(%r8) ++ movl %edx,%eax ++ addl 16(%r8),%ecx ++ movl %ebp,%r12d ++ movl %ebp,8(%r8) ++ movl %ebx,%edx ++ ++ movl %ebx,12(%r8) ++ movl %esi,%ebp ++ movl %ecx,16(%r8) ++ ++ movl %ecx,%esi ++ movl %r12d,%ecx ++ ++ ++ cmpq %r10,%r9 ++ jbe L$oop_avx2 ++ ++L$done_avx2: ++ vzeroupper ++ movq -40(%r11),%r14 ++ ++ movq -32(%r11),%r13 ++ ++ movq -24(%r11),%r12 ++ ++ movq -16(%r11),%rbp ++ ++ movq -8(%r11),%rbx ++ ++ leaq (%r11),%rsp ++ ++L$epilogue_avx2: ++ .byte 0xf3,0xc3 ++ ++ ++.p2align 6 ++K_XX_XX: ++.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 ++.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 ++.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 ++.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 ++.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc ++.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc ++.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 ++.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 ++.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f ++.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f ++.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 ++.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.p2align 6 ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/sha256-x86_64.S b/apple-x86_64/crypto/fipsmodule/sha256-x86_64.S +new file mode 100644 +index 0000000..00dc01c +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/sha256-x86_64.S +@@ -0,0 +1,4182 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++.globl _sha256_block_data_order ++.private_extern _sha256_block_data_order ++ ++.p2align 4 ++_sha256_block_data_order: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%r11 ++ movl 0(%r11),%r9d ++ movl 4(%r11),%r10d ++ movl 8(%r11),%r11d ++ testl $536870912,%r11d ++ jnz L$shaext_shortcut ++ andl $1073741824,%r9d ++ andl $268435968,%r10d ++ orl %r9d,%r10d ++ cmpl $1342177792,%r10d ++ je L$avx_shortcut ++ testl $512,%r10d ++ jnz L$ssse3_shortcut ++ movq %rsp,%rax ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ shlq $4,%rdx ++ subq $64+32,%rsp ++ leaq (%rsi,%rdx,4),%rdx ++ andq $-64,%rsp ++ movq %rdi,64+0(%rsp) ++ movq %rsi,64+8(%rsp) ++ movq %rdx,64+16(%rsp) ++ movq %rax,88(%rsp) ++ ++L$prologue: ++ ++ movl 0(%rdi),%eax ++ movl 4(%rdi),%ebx ++ movl 8(%rdi),%ecx ++ movl 12(%rdi),%edx ++ movl 16(%rdi),%r8d ++ movl 20(%rdi),%r9d ++ movl 24(%rdi),%r10d ++ movl 28(%rdi),%r11d ++ jmp L$loop ++ ++.p2align 4 ++L$loop: ++ movl %ebx,%edi ++ leaq K256(%rip),%rbp ++ xorl %ecx,%edi ++ movl 0(%rsi),%r12d ++ movl %r8d,%r13d ++ movl %eax,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %r9d,%r15d ++ ++ xorl %r8d,%r13d ++ rorl $9,%r14d ++ xorl %r10d,%r15d ++ ++ movl %r12d,0(%rsp) ++ xorl %eax,%r14d ++ andl %r8d,%r15d ++ ++ rorl $5,%r13d ++ addl %r11d,%r12d ++ xorl %r10d,%r15d ++ ++ rorl $11,%r14d ++ xorl %r8d,%r13d ++ addl %r15d,%r12d ++ ++ movl %eax,%r15d ++ addl (%rbp),%r12d ++ xorl %eax,%r14d ++ ++ xorl %ebx,%r15d ++ rorl $6,%r13d ++ movl %ebx,%r11d ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%r11d ++ addl %r12d,%edx ++ addl %r12d,%r11d ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%r11d ++ movl 4(%rsi),%r12d ++ movl %edx,%r13d ++ movl %r11d,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %r8d,%edi ++ ++ xorl %edx,%r13d ++ rorl $9,%r14d ++ xorl %r9d,%edi ++ ++ movl %r12d,4(%rsp) ++ xorl %r11d,%r14d ++ andl %edx,%edi ++ ++ rorl $5,%r13d ++ addl %r10d,%r12d ++ xorl %r9d,%edi ++ ++ rorl $11,%r14d ++ xorl %edx,%r13d ++ addl %edi,%r12d ++ ++ movl %r11d,%edi ++ addl (%rbp),%r12d ++ xorl %r11d,%r14d ++ ++ xorl %eax,%edi ++ rorl $6,%r13d ++ movl %eax,%r10d ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%r10d ++ addl %r12d,%ecx ++ addl %r12d,%r10d ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%r10d ++ movl 8(%rsi),%r12d ++ movl %ecx,%r13d ++ movl %r10d,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %edx,%r15d ++ ++ xorl %ecx,%r13d ++ rorl $9,%r14d ++ xorl %r8d,%r15d ++ ++ movl %r12d,8(%rsp) ++ xorl %r10d,%r14d ++ andl %ecx,%r15d ++ ++ rorl $5,%r13d ++ addl %r9d,%r12d ++ xorl %r8d,%r15d ++ ++ rorl $11,%r14d ++ xorl %ecx,%r13d ++ addl %r15d,%r12d ++ ++ movl %r10d,%r15d ++ addl (%rbp),%r12d ++ xorl %r10d,%r14d ++ ++ xorl %r11d,%r15d ++ rorl $6,%r13d ++ movl %r11d,%r9d ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%r9d ++ addl %r12d,%ebx ++ addl %r12d,%r9d ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%r9d ++ movl 12(%rsi),%r12d ++ movl %ebx,%r13d ++ movl %r9d,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %ecx,%edi ++ ++ xorl %ebx,%r13d ++ rorl $9,%r14d ++ xorl %edx,%edi ++ ++ movl %r12d,12(%rsp) ++ xorl %r9d,%r14d ++ andl %ebx,%edi ++ ++ rorl $5,%r13d ++ addl %r8d,%r12d ++ xorl %edx,%edi ++ ++ rorl $11,%r14d ++ xorl %ebx,%r13d ++ addl %edi,%r12d ++ ++ movl %r9d,%edi ++ addl (%rbp),%r12d ++ xorl %r9d,%r14d ++ ++ xorl %r10d,%edi ++ rorl $6,%r13d ++ movl %r10d,%r8d ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%r8d ++ addl %r12d,%eax ++ addl %r12d,%r8d ++ ++ leaq 20(%rbp),%rbp ++ addl %r14d,%r8d ++ movl 16(%rsi),%r12d ++ movl %eax,%r13d ++ movl %r8d,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %ebx,%r15d ++ ++ xorl %eax,%r13d ++ rorl $9,%r14d ++ xorl %ecx,%r15d ++ ++ movl %r12d,16(%rsp) ++ xorl %r8d,%r14d ++ andl %eax,%r15d ++ ++ rorl $5,%r13d ++ addl %edx,%r12d ++ xorl %ecx,%r15d ++ ++ rorl $11,%r14d ++ xorl %eax,%r13d ++ addl %r15d,%r12d ++ ++ movl %r8d,%r15d ++ addl (%rbp),%r12d ++ xorl %r8d,%r14d ++ ++ xorl %r9d,%r15d ++ rorl $6,%r13d ++ movl %r9d,%edx ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%edx ++ addl %r12d,%r11d ++ addl %r12d,%edx ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%edx ++ movl 20(%rsi),%r12d ++ movl %r11d,%r13d ++ movl %edx,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %eax,%edi ++ ++ xorl %r11d,%r13d ++ rorl $9,%r14d ++ xorl %ebx,%edi ++ ++ movl %r12d,20(%rsp) ++ xorl %edx,%r14d ++ andl %r11d,%edi ++ ++ rorl $5,%r13d ++ addl %ecx,%r12d ++ xorl %ebx,%edi ++ ++ rorl $11,%r14d ++ xorl %r11d,%r13d ++ addl %edi,%r12d ++ ++ movl %edx,%edi ++ addl (%rbp),%r12d ++ xorl %edx,%r14d ++ ++ xorl %r8d,%edi ++ rorl $6,%r13d ++ movl %r8d,%ecx ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%ecx ++ addl %r12d,%r10d ++ addl %r12d,%ecx ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%ecx ++ movl 24(%rsi),%r12d ++ movl %r10d,%r13d ++ movl %ecx,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %r11d,%r15d ++ ++ xorl %r10d,%r13d ++ rorl $9,%r14d ++ xorl %eax,%r15d ++ ++ movl %r12d,24(%rsp) ++ xorl %ecx,%r14d ++ andl %r10d,%r15d ++ ++ rorl $5,%r13d ++ addl %ebx,%r12d ++ xorl %eax,%r15d ++ ++ rorl $11,%r14d ++ xorl %r10d,%r13d ++ addl %r15d,%r12d ++ ++ movl %ecx,%r15d ++ addl (%rbp),%r12d ++ xorl %ecx,%r14d ++ ++ xorl %edx,%r15d ++ rorl $6,%r13d ++ movl %edx,%ebx ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%ebx ++ addl %r12d,%r9d ++ addl %r12d,%ebx ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%ebx ++ movl 28(%rsi),%r12d ++ movl %r9d,%r13d ++ movl %ebx,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %r10d,%edi ++ ++ xorl %r9d,%r13d ++ rorl $9,%r14d ++ xorl %r11d,%edi ++ ++ movl %r12d,28(%rsp) ++ xorl %ebx,%r14d ++ andl %r9d,%edi ++ ++ rorl $5,%r13d ++ addl %eax,%r12d ++ xorl %r11d,%edi ++ ++ rorl $11,%r14d ++ xorl %r9d,%r13d ++ addl %edi,%r12d ++ ++ movl %ebx,%edi ++ addl (%rbp),%r12d ++ xorl %ebx,%r14d ++ ++ xorl %ecx,%edi ++ rorl $6,%r13d ++ movl %ecx,%eax ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%eax ++ addl %r12d,%r8d ++ addl %r12d,%eax ++ ++ leaq 20(%rbp),%rbp ++ addl %r14d,%eax ++ movl 32(%rsi),%r12d ++ movl %r8d,%r13d ++ movl %eax,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %r9d,%r15d ++ ++ xorl %r8d,%r13d ++ rorl $9,%r14d ++ xorl %r10d,%r15d ++ ++ movl %r12d,32(%rsp) ++ xorl %eax,%r14d ++ andl %r8d,%r15d ++ ++ rorl $5,%r13d ++ addl %r11d,%r12d ++ xorl %r10d,%r15d ++ ++ rorl $11,%r14d ++ xorl %r8d,%r13d ++ addl %r15d,%r12d ++ ++ movl %eax,%r15d ++ addl (%rbp),%r12d ++ xorl %eax,%r14d ++ ++ xorl %ebx,%r15d ++ rorl $6,%r13d ++ movl %ebx,%r11d ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%r11d ++ addl %r12d,%edx ++ addl %r12d,%r11d ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%r11d ++ movl 36(%rsi),%r12d ++ movl %edx,%r13d ++ movl %r11d,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %r8d,%edi ++ ++ xorl %edx,%r13d ++ rorl $9,%r14d ++ xorl %r9d,%edi ++ ++ movl %r12d,36(%rsp) ++ xorl %r11d,%r14d ++ andl %edx,%edi ++ ++ rorl $5,%r13d ++ addl %r10d,%r12d ++ xorl %r9d,%edi ++ ++ rorl $11,%r14d ++ xorl %edx,%r13d ++ addl %edi,%r12d ++ ++ movl %r11d,%edi ++ addl (%rbp),%r12d ++ xorl %r11d,%r14d ++ ++ xorl %eax,%edi ++ rorl $6,%r13d ++ movl %eax,%r10d ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%r10d ++ addl %r12d,%ecx ++ addl %r12d,%r10d ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%r10d ++ movl 40(%rsi),%r12d ++ movl %ecx,%r13d ++ movl %r10d,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %edx,%r15d ++ ++ xorl %ecx,%r13d ++ rorl $9,%r14d ++ xorl %r8d,%r15d ++ ++ movl %r12d,40(%rsp) ++ xorl %r10d,%r14d ++ andl %ecx,%r15d ++ ++ rorl $5,%r13d ++ addl %r9d,%r12d ++ xorl %r8d,%r15d ++ ++ rorl $11,%r14d ++ xorl %ecx,%r13d ++ addl %r15d,%r12d ++ ++ movl %r10d,%r15d ++ addl (%rbp),%r12d ++ xorl %r10d,%r14d ++ ++ xorl %r11d,%r15d ++ rorl $6,%r13d ++ movl %r11d,%r9d ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%r9d ++ addl %r12d,%ebx ++ addl %r12d,%r9d ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%r9d ++ movl 44(%rsi),%r12d ++ movl %ebx,%r13d ++ movl %r9d,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %ecx,%edi ++ ++ xorl %ebx,%r13d ++ rorl $9,%r14d ++ xorl %edx,%edi ++ ++ movl %r12d,44(%rsp) ++ xorl %r9d,%r14d ++ andl %ebx,%edi ++ ++ rorl $5,%r13d ++ addl %r8d,%r12d ++ xorl %edx,%edi ++ ++ rorl $11,%r14d ++ xorl %ebx,%r13d ++ addl %edi,%r12d ++ ++ movl %r9d,%edi ++ addl (%rbp),%r12d ++ xorl %r9d,%r14d ++ ++ xorl %r10d,%edi ++ rorl $6,%r13d ++ movl %r10d,%r8d ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%r8d ++ addl %r12d,%eax ++ addl %r12d,%r8d ++ ++ leaq 20(%rbp),%rbp ++ addl %r14d,%r8d ++ movl 48(%rsi),%r12d ++ movl %eax,%r13d ++ movl %r8d,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %ebx,%r15d ++ ++ xorl %eax,%r13d ++ rorl $9,%r14d ++ xorl %ecx,%r15d ++ ++ movl %r12d,48(%rsp) ++ xorl %r8d,%r14d ++ andl %eax,%r15d ++ ++ rorl $5,%r13d ++ addl %edx,%r12d ++ xorl %ecx,%r15d ++ ++ rorl $11,%r14d ++ xorl %eax,%r13d ++ addl %r15d,%r12d ++ ++ movl %r8d,%r15d ++ addl (%rbp),%r12d ++ xorl %r8d,%r14d ++ ++ xorl %r9d,%r15d ++ rorl $6,%r13d ++ movl %r9d,%edx ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%edx ++ addl %r12d,%r11d ++ addl %r12d,%edx ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%edx ++ movl 52(%rsi),%r12d ++ movl %r11d,%r13d ++ movl %edx,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %eax,%edi ++ ++ xorl %r11d,%r13d ++ rorl $9,%r14d ++ xorl %ebx,%edi ++ ++ movl %r12d,52(%rsp) ++ xorl %edx,%r14d ++ andl %r11d,%edi ++ ++ rorl $5,%r13d ++ addl %ecx,%r12d ++ xorl %ebx,%edi ++ ++ rorl $11,%r14d ++ xorl %r11d,%r13d ++ addl %edi,%r12d ++ ++ movl %edx,%edi ++ addl (%rbp),%r12d ++ xorl %edx,%r14d ++ ++ xorl %r8d,%edi ++ rorl $6,%r13d ++ movl %r8d,%ecx ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%ecx ++ addl %r12d,%r10d ++ addl %r12d,%ecx ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%ecx ++ movl 56(%rsi),%r12d ++ movl %r10d,%r13d ++ movl %ecx,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %r11d,%r15d ++ ++ xorl %r10d,%r13d ++ rorl $9,%r14d ++ xorl %eax,%r15d ++ ++ movl %r12d,56(%rsp) ++ xorl %ecx,%r14d ++ andl %r10d,%r15d ++ ++ rorl $5,%r13d ++ addl %ebx,%r12d ++ xorl %eax,%r15d ++ ++ rorl $11,%r14d ++ xorl %r10d,%r13d ++ addl %r15d,%r12d ++ ++ movl %ecx,%r15d ++ addl (%rbp),%r12d ++ xorl %ecx,%r14d ++ ++ xorl %edx,%r15d ++ rorl $6,%r13d ++ movl %edx,%ebx ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%ebx ++ addl %r12d,%r9d ++ addl %r12d,%ebx ++ ++ leaq 4(%rbp),%rbp ++ addl %r14d,%ebx ++ movl 60(%rsi),%r12d ++ movl %r9d,%r13d ++ movl %ebx,%r14d ++ bswapl %r12d ++ rorl $14,%r13d ++ movl %r10d,%edi ++ ++ xorl %r9d,%r13d ++ rorl $9,%r14d ++ xorl %r11d,%edi ++ ++ movl %r12d,60(%rsp) ++ xorl %ebx,%r14d ++ andl %r9d,%edi ++ ++ rorl $5,%r13d ++ addl %eax,%r12d ++ xorl %r11d,%edi ++ ++ rorl $11,%r14d ++ xorl %r9d,%r13d ++ addl %edi,%r12d ++ ++ movl %ebx,%edi ++ addl (%rbp),%r12d ++ xorl %ebx,%r14d ++ ++ xorl %ecx,%edi ++ rorl $6,%r13d ++ movl %ecx,%eax ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%eax ++ addl %r12d,%r8d ++ addl %r12d,%eax ++ ++ leaq 20(%rbp),%rbp ++ jmp L$rounds_16_xx ++.p2align 4 ++L$rounds_16_xx: ++ movl 4(%rsp),%r13d ++ movl 56(%rsp),%r15d ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%eax ++ movl %r15d,%r14d ++ rorl $2,%r15d ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%r15d ++ shrl $10,%r14d ++ ++ rorl $17,%r15d ++ xorl %r13d,%r12d ++ xorl %r14d,%r15d ++ addl 36(%rsp),%r12d ++ ++ addl 0(%rsp),%r12d ++ movl %r8d,%r13d ++ addl %r15d,%r12d ++ movl %eax,%r14d ++ rorl $14,%r13d ++ movl %r9d,%r15d ++ ++ xorl %r8d,%r13d ++ rorl $9,%r14d ++ xorl %r10d,%r15d ++ ++ movl %r12d,0(%rsp) ++ xorl %eax,%r14d ++ andl %r8d,%r15d ++ ++ rorl $5,%r13d ++ addl %r11d,%r12d ++ xorl %r10d,%r15d ++ ++ rorl $11,%r14d ++ xorl %r8d,%r13d ++ addl %r15d,%r12d ++ ++ movl %eax,%r15d ++ addl (%rbp),%r12d ++ xorl %eax,%r14d ++ ++ xorl %ebx,%r15d ++ rorl $6,%r13d ++ movl %ebx,%r11d ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%r11d ++ addl %r12d,%edx ++ addl %r12d,%r11d ++ ++ leaq 4(%rbp),%rbp ++ movl 8(%rsp),%r13d ++ movl 60(%rsp),%edi ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%r11d ++ movl %edi,%r14d ++ rorl $2,%edi ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%edi ++ shrl $10,%r14d ++ ++ rorl $17,%edi ++ xorl %r13d,%r12d ++ xorl %r14d,%edi ++ addl 40(%rsp),%r12d ++ ++ addl 4(%rsp),%r12d ++ movl %edx,%r13d ++ addl %edi,%r12d ++ movl %r11d,%r14d ++ rorl $14,%r13d ++ movl %r8d,%edi ++ ++ xorl %edx,%r13d ++ rorl $9,%r14d ++ xorl %r9d,%edi ++ ++ movl %r12d,4(%rsp) ++ xorl %r11d,%r14d ++ andl %edx,%edi ++ ++ rorl $5,%r13d ++ addl %r10d,%r12d ++ xorl %r9d,%edi ++ ++ rorl $11,%r14d ++ xorl %edx,%r13d ++ addl %edi,%r12d ++ ++ movl %r11d,%edi ++ addl (%rbp),%r12d ++ xorl %r11d,%r14d ++ ++ xorl %eax,%edi ++ rorl $6,%r13d ++ movl %eax,%r10d ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%r10d ++ addl %r12d,%ecx ++ addl %r12d,%r10d ++ ++ leaq 4(%rbp),%rbp ++ movl 12(%rsp),%r13d ++ movl 0(%rsp),%r15d ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%r10d ++ movl %r15d,%r14d ++ rorl $2,%r15d ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%r15d ++ shrl $10,%r14d ++ ++ rorl $17,%r15d ++ xorl %r13d,%r12d ++ xorl %r14d,%r15d ++ addl 44(%rsp),%r12d ++ ++ addl 8(%rsp),%r12d ++ movl %ecx,%r13d ++ addl %r15d,%r12d ++ movl %r10d,%r14d ++ rorl $14,%r13d ++ movl %edx,%r15d ++ ++ xorl %ecx,%r13d ++ rorl $9,%r14d ++ xorl %r8d,%r15d ++ ++ movl %r12d,8(%rsp) ++ xorl %r10d,%r14d ++ andl %ecx,%r15d ++ ++ rorl $5,%r13d ++ addl %r9d,%r12d ++ xorl %r8d,%r15d ++ ++ rorl $11,%r14d ++ xorl %ecx,%r13d ++ addl %r15d,%r12d ++ ++ movl %r10d,%r15d ++ addl (%rbp),%r12d ++ xorl %r10d,%r14d ++ ++ xorl %r11d,%r15d ++ rorl $6,%r13d ++ movl %r11d,%r9d ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%r9d ++ addl %r12d,%ebx ++ addl %r12d,%r9d ++ ++ leaq 4(%rbp),%rbp ++ movl 16(%rsp),%r13d ++ movl 4(%rsp),%edi ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%r9d ++ movl %edi,%r14d ++ rorl $2,%edi ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%edi ++ shrl $10,%r14d ++ ++ rorl $17,%edi ++ xorl %r13d,%r12d ++ xorl %r14d,%edi ++ addl 48(%rsp),%r12d ++ ++ addl 12(%rsp),%r12d ++ movl %ebx,%r13d ++ addl %edi,%r12d ++ movl %r9d,%r14d ++ rorl $14,%r13d ++ movl %ecx,%edi ++ ++ xorl %ebx,%r13d ++ rorl $9,%r14d ++ xorl %edx,%edi ++ ++ movl %r12d,12(%rsp) ++ xorl %r9d,%r14d ++ andl %ebx,%edi ++ ++ rorl $5,%r13d ++ addl %r8d,%r12d ++ xorl %edx,%edi ++ ++ rorl $11,%r14d ++ xorl %ebx,%r13d ++ addl %edi,%r12d ++ ++ movl %r9d,%edi ++ addl (%rbp),%r12d ++ xorl %r9d,%r14d ++ ++ xorl %r10d,%edi ++ rorl $6,%r13d ++ movl %r10d,%r8d ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%r8d ++ addl %r12d,%eax ++ addl %r12d,%r8d ++ ++ leaq 20(%rbp),%rbp ++ movl 20(%rsp),%r13d ++ movl 8(%rsp),%r15d ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%r8d ++ movl %r15d,%r14d ++ rorl $2,%r15d ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%r15d ++ shrl $10,%r14d ++ ++ rorl $17,%r15d ++ xorl %r13d,%r12d ++ xorl %r14d,%r15d ++ addl 52(%rsp),%r12d ++ ++ addl 16(%rsp),%r12d ++ movl %eax,%r13d ++ addl %r15d,%r12d ++ movl %r8d,%r14d ++ rorl $14,%r13d ++ movl %ebx,%r15d ++ ++ xorl %eax,%r13d ++ rorl $9,%r14d ++ xorl %ecx,%r15d ++ ++ movl %r12d,16(%rsp) ++ xorl %r8d,%r14d ++ andl %eax,%r15d ++ ++ rorl $5,%r13d ++ addl %edx,%r12d ++ xorl %ecx,%r15d ++ ++ rorl $11,%r14d ++ xorl %eax,%r13d ++ addl %r15d,%r12d ++ ++ movl %r8d,%r15d ++ addl (%rbp),%r12d ++ xorl %r8d,%r14d ++ ++ xorl %r9d,%r15d ++ rorl $6,%r13d ++ movl %r9d,%edx ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%edx ++ addl %r12d,%r11d ++ addl %r12d,%edx ++ ++ leaq 4(%rbp),%rbp ++ movl 24(%rsp),%r13d ++ movl 12(%rsp),%edi ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%edx ++ movl %edi,%r14d ++ rorl $2,%edi ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%edi ++ shrl $10,%r14d ++ ++ rorl $17,%edi ++ xorl %r13d,%r12d ++ xorl %r14d,%edi ++ addl 56(%rsp),%r12d ++ ++ addl 20(%rsp),%r12d ++ movl %r11d,%r13d ++ addl %edi,%r12d ++ movl %edx,%r14d ++ rorl $14,%r13d ++ movl %eax,%edi ++ ++ xorl %r11d,%r13d ++ rorl $9,%r14d ++ xorl %ebx,%edi ++ ++ movl %r12d,20(%rsp) ++ xorl %edx,%r14d ++ andl %r11d,%edi ++ ++ rorl $5,%r13d ++ addl %ecx,%r12d ++ xorl %ebx,%edi ++ ++ rorl $11,%r14d ++ xorl %r11d,%r13d ++ addl %edi,%r12d ++ ++ movl %edx,%edi ++ addl (%rbp),%r12d ++ xorl %edx,%r14d ++ ++ xorl %r8d,%edi ++ rorl $6,%r13d ++ movl %r8d,%ecx ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%ecx ++ addl %r12d,%r10d ++ addl %r12d,%ecx ++ ++ leaq 4(%rbp),%rbp ++ movl 28(%rsp),%r13d ++ movl 16(%rsp),%r15d ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%ecx ++ movl %r15d,%r14d ++ rorl $2,%r15d ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%r15d ++ shrl $10,%r14d ++ ++ rorl $17,%r15d ++ xorl %r13d,%r12d ++ xorl %r14d,%r15d ++ addl 60(%rsp),%r12d ++ ++ addl 24(%rsp),%r12d ++ movl %r10d,%r13d ++ addl %r15d,%r12d ++ movl %ecx,%r14d ++ rorl $14,%r13d ++ movl %r11d,%r15d ++ ++ xorl %r10d,%r13d ++ rorl $9,%r14d ++ xorl %eax,%r15d ++ ++ movl %r12d,24(%rsp) ++ xorl %ecx,%r14d ++ andl %r10d,%r15d ++ ++ rorl $5,%r13d ++ addl %ebx,%r12d ++ xorl %eax,%r15d ++ ++ rorl $11,%r14d ++ xorl %r10d,%r13d ++ addl %r15d,%r12d ++ ++ movl %ecx,%r15d ++ addl (%rbp),%r12d ++ xorl %ecx,%r14d ++ ++ xorl %edx,%r15d ++ rorl $6,%r13d ++ movl %edx,%ebx ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%ebx ++ addl %r12d,%r9d ++ addl %r12d,%ebx ++ ++ leaq 4(%rbp),%rbp ++ movl 32(%rsp),%r13d ++ movl 20(%rsp),%edi ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%ebx ++ movl %edi,%r14d ++ rorl $2,%edi ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%edi ++ shrl $10,%r14d ++ ++ rorl $17,%edi ++ xorl %r13d,%r12d ++ xorl %r14d,%edi ++ addl 0(%rsp),%r12d ++ ++ addl 28(%rsp),%r12d ++ movl %r9d,%r13d ++ addl %edi,%r12d ++ movl %ebx,%r14d ++ rorl $14,%r13d ++ movl %r10d,%edi ++ ++ xorl %r9d,%r13d ++ rorl $9,%r14d ++ xorl %r11d,%edi ++ ++ movl %r12d,28(%rsp) ++ xorl %ebx,%r14d ++ andl %r9d,%edi ++ ++ rorl $5,%r13d ++ addl %eax,%r12d ++ xorl %r11d,%edi ++ ++ rorl $11,%r14d ++ xorl %r9d,%r13d ++ addl %edi,%r12d ++ ++ movl %ebx,%edi ++ addl (%rbp),%r12d ++ xorl %ebx,%r14d ++ ++ xorl %ecx,%edi ++ rorl $6,%r13d ++ movl %ecx,%eax ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%eax ++ addl %r12d,%r8d ++ addl %r12d,%eax ++ ++ leaq 20(%rbp),%rbp ++ movl 36(%rsp),%r13d ++ movl 24(%rsp),%r15d ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%eax ++ movl %r15d,%r14d ++ rorl $2,%r15d ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%r15d ++ shrl $10,%r14d ++ ++ rorl $17,%r15d ++ xorl %r13d,%r12d ++ xorl %r14d,%r15d ++ addl 4(%rsp),%r12d ++ ++ addl 32(%rsp),%r12d ++ movl %r8d,%r13d ++ addl %r15d,%r12d ++ movl %eax,%r14d ++ rorl $14,%r13d ++ movl %r9d,%r15d ++ ++ xorl %r8d,%r13d ++ rorl $9,%r14d ++ xorl %r10d,%r15d ++ ++ movl %r12d,32(%rsp) ++ xorl %eax,%r14d ++ andl %r8d,%r15d ++ ++ rorl $5,%r13d ++ addl %r11d,%r12d ++ xorl %r10d,%r15d ++ ++ rorl $11,%r14d ++ xorl %r8d,%r13d ++ addl %r15d,%r12d ++ ++ movl %eax,%r15d ++ addl (%rbp),%r12d ++ xorl %eax,%r14d ++ ++ xorl %ebx,%r15d ++ rorl $6,%r13d ++ movl %ebx,%r11d ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%r11d ++ addl %r12d,%edx ++ addl %r12d,%r11d ++ ++ leaq 4(%rbp),%rbp ++ movl 40(%rsp),%r13d ++ movl 28(%rsp),%edi ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%r11d ++ movl %edi,%r14d ++ rorl $2,%edi ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%edi ++ shrl $10,%r14d ++ ++ rorl $17,%edi ++ xorl %r13d,%r12d ++ xorl %r14d,%edi ++ addl 8(%rsp),%r12d ++ ++ addl 36(%rsp),%r12d ++ movl %edx,%r13d ++ addl %edi,%r12d ++ movl %r11d,%r14d ++ rorl $14,%r13d ++ movl %r8d,%edi ++ ++ xorl %edx,%r13d ++ rorl $9,%r14d ++ xorl %r9d,%edi ++ ++ movl %r12d,36(%rsp) ++ xorl %r11d,%r14d ++ andl %edx,%edi ++ ++ rorl $5,%r13d ++ addl %r10d,%r12d ++ xorl %r9d,%edi ++ ++ rorl $11,%r14d ++ xorl %edx,%r13d ++ addl %edi,%r12d ++ ++ movl %r11d,%edi ++ addl (%rbp),%r12d ++ xorl %r11d,%r14d ++ ++ xorl %eax,%edi ++ rorl $6,%r13d ++ movl %eax,%r10d ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%r10d ++ addl %r12d,%ecx ++ addl %r12d,%r10d ++ ++ leaq 4(%rbp),%rbp ++ movl 44(%rsp),%r13d ++ movl 32(%rsp),%r15d ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%r10d ++ movl %r15d,%r14d ++ rorl $2,%r15d ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%r15d ++ shrl $10,%r14d ++ ++ rorl $17,%r15d ++ xorl %r13d,%r12d ++ xorl %r14d,%r15d ++ addl 12(%rsp),%r12d ++ ++ addl 40(%rsp),%r12d ++ movl %ecx,%r13d ++ addl %r15d,%r12d ++ movl %r10d,%r14d ++ rorl $14,%r13d ++ movl %edx,%r15d ++ ++ xorl %ecx,%r13d ++ rorl $9,%r14d ++ xorl %r8d,%r15d ++ ++ movl %r12d,40(%rsp) ++ xorl %r10d,%r14d ++ andl %ecx,%r15d ++ ++ rorl $5,%r13d ++ addl %r9d,%r12d ++ xorl %r8d,%r15d ++ ++ rorl $11,%r14d ++ xorl %ecx,%r13d ++ addl %r15d,%r12d ++ ++ movl %r10d,%r15d ++ addl (%rbp),%r12d ++ xorl %r10d,%r14d ++ ++ xorl %r11d,%r15d ++ rorl $6,%r13d ++ movl %r11d,%r9d ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%r9d ++ addl %r12d,%ebx ++ addl %r12d,%r9d ++ ++ leaq 4(%rbp),%rbp ++ movl 48(%rsp),%r13d ++ movl 36(%rsp),%edi ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%r9d ++ movl %edi,%r14d ++ rorl $2,%edi ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%edi ++ shrl $10,%r14d ++ ++ rorl $17,%edi ++ xorl %r13d,%r12d ++ xorl %r14d,%edi ++ addl 16(%rsp),%r12d ++ ++ addl 44(%rsp),%r12d ++ movl %ebx,%r13d ++ addl %edi,%r12d ++ movl %r9d,%r14d ++ rorl $14,%r13d ++ movl %ecx,%edi ++ ++ xorl %ebx,%r13d ++ rorl $9,%r14d ++ xorl %edx,%edi ++ ++ movl %r12d,44(%rsp) ++ xorl %r9d,%r14d ++ andl %ebx,%edi ++ ++ rorl $5,%r13d ++ addl %r8d,%r12d ++ xorl %edx,%edi ++ ++ rorl $11,%r14d ++ xorl %ebx,%r13d ++ addl %edi,%r12d ++ ++ movl %r9d,%edi ++ addl (%rbp),%r12d ++ xorl %r9d,%r14d ++ ++ xorl %r10d,%edi ++ rorl $6,%r13d ++ movl %r10d,%r8d ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%r8d ++ addl %r12d,%eax ++ addl %r12d,%r8d ++ ++ leaq 20(%rbp),%rbp ++ movl 52(%rsp),%r13d ++ movl 40(%rsp),%r15d ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%r8d ++ movl %r15d,%r14d ++ rorl $2,%r15d ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%r15d ++ shrl $10,%r14d ++ ++ rorl $17,%r15d ++ xorl %r13d,%r12d ++ xorl %r14d,%r15d ++ addl 20(%rsp),%r12d ++ ++ addl 48(%rsp),%r12d ++ movl %eax,%r13d ++ addl %r15d,%r12d ++ movl %r8d,%r14d ++ rorl $14,%r13d ++ movl %ebx,%r15d ++ ++ xorl %eax,%r13d ++ rorl $9,%r14d ++ xorl %ecx,%r15d ++ ++ movl %r12d,48(%rsp) ++ xorl %r8d,%r14d ++ andl %eax,%r15d ++ ++ rorl $5,%r13d ++ addl %edx,%r12d ++ xorl %ecx,%r15d ++ ++ rorl $11,%r14d ++ xorl %eax,%r13d ++ addl %r15d,%r12d ++ ++ movl %r8d,%r15d ++ addl (%rbp),%r12d ++ xorl %r8d,%r14d ++ ++ xorl %r9d,%r15d ++ rorl $6,%r13d ++ movl %r9d,%edx ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%edx ++ addl %r12d,%r11d ++ addl %r12d,%edx ++ ++ leaq 4(%rbp),%rbp ++ movl 56(%rsp),%r13d ++ movl 44(%rsp),%edi ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%edx ++ movl %edi,%r14d ++ rorl $2,%edi ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%edi ++ shrl $10,%r14d ++ ++ rorl $17,%edi ++ xorl %r13d,%r12d ++ xorl %r14d,%edi ++ addl 24(%rsp),%r12d ++ ++ addl 52(%rsp),%r12d ++ movl %r11d,%r13d ++ addl %edi,%r12d ++ movl %edx,%r14d ++ rorl $14,%r13d ++ movl %eax,%edi ++ ++ xorl %r11d,%r13d ++ rorl $9,%r14d ++ xorl %ebx,%edi ++ ++ movl %r12d,52(%rsp) ++ xorl %edx,%r14d ++ andl %r11d,%edi ++ ++ rorl $5,%r13d ++ addl %ecx,%r12d ++ xorl %ebx,%edi ++ ++ rorl $11,%r14d ++ xorl %r11d,%r13d ++ addl %edi,%r12d ++ ++ movl %edx,%edi ++ addl (%rbp),%r12d ++ xorl %edx,%r14d ++ ++ xorl %r8d,%edi ++ rorl $6,%r13d ++ movl %r8d,%ecx ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%ecx ++ addl %r12d,%r10d ++ addl %r12d,%ecx ++ ++ leaq 4(%rbp),%rbp ++ movl 60(%rsp),%r13d ++ movl 48(%rsp),%r15d ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%ecx ++ movl %r15d,%r14d ++ rorl $2,%r15d ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%r15d ++ shrl $10,%r14d ++ ++ rorl $17,%r15d ++ xorl %r13d,%r12d ++ xorl %r14d,%r15d ++ addl 28(%rsp),%r12d ++ ++ addl 56(%rsp),%r12d ++ movl %r10d,%r13d ++ addl %r15d,%r12d ++ movl %ecx,%r14d ++ rorl $14,%r13d ++ movl %r11d,%r15d ++ ++ xorl %r10d,%r13d ++ rorl $9,%r14d ++ xorl %eax,%r15d ++ ++ movl %r12d,56(%rsp) ++ xorl %ecx,%r14d ++ andl %r10d,%r15d ++ ++ rorl $5,%r13d ++ addl %ebx,%r12d ++ xorl %eax,%r15d ++ ++ rorl $11,%r14d ++ xorl %r10d,%r13d ++ addl %r15d,%r12d ++ ++ movl %ecx,%r15d ++ addl (%rbp),%r12d ++ xorl %ecx,%r14d ++ ++ xorl %edx,%r15d ++ rorl $6,%r13d ++ movl %edx,%ebx ++ ++ andl %r15d,%edi ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %edi,%ebx ++ addl %r12d,%r9d ++ addl %r12d,%ebx ++ ++ leaq 4(%rbp),%rbp ++ movl 0(%rsp),%r13d ++ movl 52(%rsp),%edi ++ ++ movl %r13d,%r12d ++ rorl $11,%r13d ++ addl %r14d,%ebx ++ movl %edi,%r14d ++ rorl $2,%edi ++ ++ xorl %r12d,%r13d ++ shrl $3,%r12d ++ rorl $7,%r13d ++ xorl %r14d,%edi ++ shrl $10,%r14d ++ ++ rorl $17,%edi ++ xorl %r13d,%r12d ++ xorl %r14d,%edi ++ addl 32(%rsp),%r12d ++ ++ addl 60(%rsp),%r12d ++ movl %r9d,%r13d ++ addl %edi,%r12d ++ movl %ebx,%r14d ++ rorl $14,%r13d ++ movl %r10d,%edi ++ ++ xorl %r9d,%r13d ++ rorl $9,%r14d ++ xorl %r11d,%edi ++ ++ movl %r12d,60(%rsp) ++ xorl %ebx,%r14d ++ andl %r9d,%edi ++ ++ rorl $5,%r13d ++ addl %eax,%r12d ++ xorl %r11d,%edi ++ ++ rorl $11,%r14d ++ xorl %r9d,%r13d ++ addl %edi,%r12d ++ ++ movl %ebx,%edi ++ addl (%rbp),%r12d ++ xorl %ebx,%r14d ++ ++ xorl %ecx,%edi ++ rorl $6,%r13d ++ movl %ecx,%eax ++ ++ andl %edi,%r15d ++ rorl $2,%r14d ++ addl %r13d,%r12d ++ ++ xorl %r15d,%eax ++ addl %r12d,%r8d ++ addl %r12d,%eax ++ ++ leaq 20(%rbp),%rbp ++ cmpb $0,3(%rbp) ++ jnz L$rounds_16_xx ++ ++ movq 64+0(%rsp),%rdi ++ addl %r14d,%eax ++ leaq 64(%rsi),%rsi ++ ++ addl 0(%rdi),%eax ++ addl 4(%rdi),%ebx ++ addl 8(%rdi),%ecx ++ addl 12(%rdi),%edx ++ addl 16(%rdi),%r8d ++ addl 20(%rdi),%r9d ++ addl 24(%rdi),%r10d ++ addl 28(%rdi),%r11d ++ ++ cmpq 64+16(%rsp),%rsi ++ ++ movl %eax,0(%rdi) ++ movl %ebx,4(%rdi) ++ movl %ecx,8(%rdi) ++ movl %edx,12(%rdi) ++ movl %r8d,16(%rdi) ++ movl %r9d,20(%rdi) ++ movl %r10d,24(%rdi) ++ movl %r11d,28(%rdi) ++ jb L$loop ++ ++ movq 88(%rsp),%rsi ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++.p2align 6 ++ ++K256: ++.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 ++.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 ++.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 ++.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 ++.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 ++.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 ++.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 ++.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 ++.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc ++.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc ++.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da ++.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da ++.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 ++.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 ++.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 ++.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 ++.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 ++.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 ++.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 ++.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 ++.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 ++.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 ++.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 ++.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 ++.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 ++.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 ++.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 ++.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 ++.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 ++.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 ++.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 ++.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 ++ ++.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f ++.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f ++.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff ++.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff ++.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 ++.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 ++.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++ ++.p2align 6 ++sha256_block_data_order_shaext: ++ ++L$shaext_shortcut: ++ leaq K256+128(%rip),%rcx ++ movdqu (%rdi),%xmm1 ++ movdqu 16(%rdi),%xmm2 ++ movdqa 512-128(%rcx),%xmm7 ++ ++ pshufd $0x1b,%xmm1,%xmm0 ++ pshufd $0xb1,%xmm1,%xmm1 ++ pshufd $0x1b,%xmm2,%xmm2 ++ movdqa %xmm7,%xmm8 ++.byte 102,15,58,15,202,8 ++ punpcklqdq %xmm0,%xmm2 ++ jmp L$oop_shaext ++ ++.p2align 4 ++L$oop_shaext: ++ movdqu (%rsi),%xmm3 ++ movdqu 16(%rsi),%xmm4 ++ movdqu 32(%rsi),%xmm5 ++.byte 102,15,56,0,223 ++ movdqu 48(%rsi),%xmm6 ++ ++ movdqa 0-128(%rcx),%xmm0 ++ paddd %xmm3,%xmm0 ++.byte 102,15,56,0,231 ++ movdqa %xmm2,%xmm10 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ nop ++ movdqa %xmm1,%xmm9 ++.byte 15,56,203,202 ++ ++ movdqa 32-128(%rcx),%xmm0 ++ paddd %xmm4,%xmm0 ++.byte 102,15,56,0,239 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ leaq 64(%rsi),%rsi ++.byte 15,56,204,220 ++.byte 15,56,203,202 ++ ++ movdqa 64-128(%rcx),%xmm0 ++ paddd %xmm5,%xmm0 ++.byte 102,15,56,0,247 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm6,%xmm7 ++.byte 102,15,58,15,253,4 ++ nop ++ paddd %xmm7,%xmm3 ++.byte 15,56,204,229 ++.byte 15,56,203,202 ++ ++ movdqa 96-128(%rcx),%xmm0 ++ paddd %xmm6,%xmm0 ++.byte 15,56,205,222 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm3,%xmm7 ++.byte 102,15,58,15,254,4 ++ nop ++ paddd %xmm7,%xmm4 ++.byte 15,56,204,238 ++.byte 15,56,203,202 ++ movdqa 128-128(%rcx),%xmm0 ++ paddd %xmm3,%xmm0 ++.byte 15,56,205,227 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm4,%xmm7 ++.byte 102,15,58,15,251,4 ++ nop ++ paddd %xmm7,%xmm5 ++.byte 15,56,204,243 ++.byte 15,56,203,202 ++ movdqa 160-128(%rcx),%xmm0 ++ paddd %xmm4,%xmm0 ++.byte 15,56,205,236 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm5,%xmm7 ++.byte 102,15,58,15,252,4 ++ nop ++ paddd %xmm7,%xmm6 ++.byte 15,56,204,220 ++.byte 15,56,203,202 ++ movdqa 192-128(%rcx),%xmm0 ++ paddd %xmm5,%xmm0 ++.byte 15,56,205,245 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm6,%xmm7 ++.byte 102,15,58,15,253,4 ++ nop ++ paddd %xmm7,%xmm3 ++.byte 15,56,204,229 ++.byte 15,56,203,202 ++ movdqa 224-128(%rcx),%xmm0 ++ paddd %xmm6,%xmm0 ++.byte 15,56,205,222 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm3,%xmm7 ++.byte 102,15,58,15,254,4 ++ nop ++ paddd %xmm7,%xmm4 ++.byte 15,56,204,238 ++.byte 15,56,203,202 ++ movdqa 256-128(%rcx),%xmm0 ++ paddd %xmm3,%xmm0 ++.byte 15,56,205,227 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm4,%xmm7 ++.byte 102,15,58,15,251,4 ++ nop ++ paddd %xmm7,%xmm5 ++.byte 15,56,204,243 ++.byte 15,56,203,202 ++ movdqa 288-128(%rcx),%xmm0 ++ paddd %xmm4,%xmm0 ++.byte 15,56,205,236 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm5,%xmm7 ++.byte 102,15,58,15,252,4 ++ nop ++ paddd %xmm7,%xmm6 ++.byte 15,56,204,220 ++.byte 15,56,203,202 ++ movdqa 320-128(%rcx),%xmm0 ++ paddd %xmm5,%xmm0 ++.byte 15,56,205,245 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm6,%xmm7 ++.byte 102,15,58,15,253,4 ++ nop ++ paddd %xmm7,%xmm3 ++.byte 15,56,204,229 ++.byte 15,56,203,202 ++ movdqa 352-128(%rcx),%xmm0 ++ paddd %xmm6,%xmm0 ++.byte 15,56,205,222 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm3,%xmm7 ++.byte 102,15,58,15,254,4 ++ nop ++ paddd %xmm7,%xmm4 ++.byte 15,56,204,238 ++.byte 15,56,203,202 ++ movdqa 384-128(%rcx),%xmm0 ++ paddd %xmm3,%xmm0 ++.byte 15,56,205,227 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm4,%xmm7 ++.byte 102,15,58,15,251,4 ++ nop ++ paddd %xmm7,%xmm5 ++.byte 15,56,204,243 ++.byte 15,56,203,202 ++ movdqa 416-128(%rcx),%xmm0 ++ paddd %xmm4,%xmm0 ++.byte 15,56,205,236 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm5,%xmm7 ++.byte 102,15,58,15,252,4 ++.byte 15,56,203,202 ++ paddd %xmm7,%xmm6 ++ ++ movdqa 448-128(%rcx),%xmm0 ++ paddd %xmm5,%xmm0 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++.byte 15,56,205,245 ++ movdqa %xmm8,%xmm7 ++.byte 15,56,203,202 ++ ++ movdqa 480-128(%rcx),%xmm0 ++ paddd %xmm6,%xmm0 ++ nop ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ decq %rdx ++ nop ++.byte 15,56,203,202 ++ ++ paddd %xmm10,%xmm2 ++ paddd %xmm9,%xmm1 ++ jnz L$oop_shaext ++ ++ pshufd $0xb1,%xmm2,%xmm2 ++ pshufd $0x1b,%xmm1,%xmm7 ++ pshufd $0xb1,%xmm1,%xmm1 ++ punpckhqdq %xmm2,%xmm1 ++.byte 102,15,58,15,215,8 ++ ++ movdqu %xmm1,(%rdi) ++ movdqu %xmm2,16(%rdi) ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 6 ++sha256_block_data_order_ssse3: ++ ++L$ssse3_shortcut: ++ movq %rsp,%rax ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ shlq $4,%rdx ++ subq $96,%rsp ++ leaq (%rsi,%rdx,4),%rdx ++ andq $-64,%rsp ++ movq %rdi,64+0(%rsp) ++ movq %rsi,64+8(%rsp) ++ movq %rdx,64+16(%rsp) ++ movq %rax,88(%rsp) ++ ++L$prologue_ssse3: ++ ++ movl 0(%rdi),%eax ++ movl 4(%rdi),%ebx ++ movl 8(%rdi),%ecx ++ movl 12(%rdi),%edx ++ movl 16(%rdi),%r8d ++ movl 20(%rdi),%r9d ++ movl 24(%rdi),%r10d ++ movl 28(%rdi),%r11d ++ ++ ++ jmp L$loop_ssse3 ++.p2align 4 ++L$loop_ssse3: ++ movdqa K256+512(%rip),%xmm7 ++ movdqu 0(%rsi),%xmm0 ++ movdqu 16(%rsi),%xmm1 ++ movdqu 32(%rsi),%xmm2 ++.byte 102,15,56,0,199 ++ movdqu 48(%rsi),%xmm3 ++ leaq K256(%rip),%rbp ++.byte 102,15,56,0,207 ++ movdqa 0(%rbp),%xmm4 ++ movdqa 32(%rbp),%xmm5 ++.byte 102,15,56,0,215 ++ paddd %xmm0,%xmm4 ++ movdqa 64(%rbp),%xmm6 ++.byte 102,15,56,0,223 ++ movdqa 96(%rbp),%xmm7 ++ paddd %xmm1,%xmm5 ++ paddd %xmm2,%xmm6 ++ paddd %xmm3,%xmm7 ++ movdqa %xmm4,0(%rsp) ++ movl %eax,%r14d ++ movdqa %xmm5,16(%rsp) ++ movl %ebx,%edi ++ movdqa %xmm6,32(%rsp) ++ xorl %ecx,%edi ++ movdqa %xmm7,48(%rsp) ++ movl %r8d,%r13d ++ jmp L$ssse3_00_47 ++ ++.p2align 4 ++L$ssse3_00_47: ++ subq $-128,%rbp ++ rorl $14,%r13d ++ movdqa %xmm1,%xmm4 ++ movl %r14d,%eax ++ movl %r9d,%r12d ++ movdqa %xmm3,%xmm7 ++ rorl $9,%r14d ++ xorl %r8d,%r13d ++ xorl %r10d,%r12d ++ rorl $5,%r13d ++ xorl %eax,%r14d ++.byte 102,15,58,15,224,4 ++ andl %r8d,%r12d ++ xorl %r8d,%r13d ++.byte 102,15,58,15,250,4 ++ addl 0(%rsp),%r11d ++ movl %eax,%r15d ++ xorl %r10d,%r12d ++ rorl $11,%r14d ++ movdqa %xmm4,%xmm5 ++ xorl %ebx,%r15d ++ addl %r12d,%r11d ++ movdqa %xmm4,%xmm6 ++ rorl $6,%r13d ++ andl %r15d,%edi ++ psrld $3,%xmm4 ++ xorl %eax,%r14d ++ addl %r13d,%r11d ++ xorl %ebx,%edi ++ paddd %xmm7,%xmm0 ++ rorl $2,%r14d ++ addl %r11d,%edx ++ psrld $7,%xmm6 ++ addl %edi,%r11d ++ movl %edx,%r13d ++ pshufd $250,%xmm3,%xmm7 ++ addl %r11d,%r14d ++ rorl $14,%r13d ++ pslld $14,%xmm5 ++ movl %r14d,%r11d ++ movl %r8d,%r12d ++ pxor %xmm6,%xmm4 ++ rorl $9,%r14d ++ xorl %edx,%r13d ++ xorl %r9d,%r12d ++ rorl $5,%r13d ++ psrld $11,%xmm6 ++ xorl %r11d,%r14d ++ pxor %xmm5,%xmm4 ++ andl %edx,%r12d ++ xorl %edx,%r13d ++ pslld $11,%xmm5 ++ addl 4(%rsp),%r10d ++ movl %r11d,%edi ++ pxor %xmm6,%xmm4 ++ xorl %r9d,%r12d ++ rorl $11,%r14d ++ movdqa %xmm7,%xmm6 ++ xorl %eax,%edi ++ addl %r12d,%r10d ++ pxor %xmm5,%xmm4 ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %r11d,%r14d ++ psrld $10,%xmm7 ++ addl %r13d,%r10d ++ xorl %eax,%r15d ++ paddd %xmm4,%xmm0 ++ rorl $2,%r14d ++ addl %r10d,%ecx ++ psrlq $17,%xmm6 ++ addl %r15d,%r10d ++ movl %ecx,%r13d ++ addl %r10d,%r14d ++ pxor %xmm6,%xmm7 ++ rorl $14,%r13d ++ movl %r14d,%r10d ++ movl %edx,%r12d ++ rorl $9,%r14d ++ psrlq $2,%xmm6 ++ xorl %ecx,%r13d ++ xorl %r8d,%r12d ++ pxor %xmm6,%xmm7 ++ rorl $5,%r13d ++ xorl %r10d,%r14d ++ andl %ecx,%r12d ++ pshufd $128,%xmm7,%xmm7 ++ xorl %ecx,%r13d ++ addl 8(%rsp),%r9d ++ movl %r10d,%r15d ++ psrldq $8,%xmm7 ++ xorl %r8d,%r12d ++ rorl $11,%r14d ++ xorl %r11d,%r15d ++ addl %r12d,%r9d ++ rorl $6,%r13d ++ paddd %xmm7,%xmm0 ++ andl %r15d,%edi ++ xorl %r10d,%r14d ++ addl %r13d,%r9d ++ pshufd $80,%xmm0,%xmm7 ++ xorl %r11d,%edi ++ rorl $2,%r14d ++ addl %r9d,%ebx ++ movdqa %xmm7,%xmm6 ++ addl %edi,%r9d ++ movl %ebx,%r13d ++ psrld $10,%xmm7 ++ addl %r9d,%r14d ++ rorl $14,%r13d ++ psrlq $17,%xmm6 ++ movl %r14d,%r9d ++ movl %ecx,%r12d ++ pxor %xmm6,%xmm7 ++ rorl $9,%r14d ++ xorl %ebx,%r13d ++ xorl %edx,%r12d ++ rorl $5,%r13d ++ xorl %r9d,%r14d ++ psrlq $2,%xmm6 ++ andl %ebx,%r12d ++ xorl %ebx,%r13d ++ addl 12(%rsp),%r8d ++ pxor %xmm6,%xmm7 ++ movl %r9d,%edi ++ xorl %edx,%r12d ++ rorl $11,%r14d ++ pshufd $8,%xmm7,%xmm7 ++ xorl %r10d,%edi ++ addl %r12d,%r8d ++ movdqa 0(%rbp),%xmm6 ++ rorl $6,%r13d ++ andl %edi,%r15d ++ pslldq $8,%xmm7 ++ xorl %r9d,%r14d ++ addl %r13d,%r8d ++ xorl %r10d,%r15d ++ paddd %xmm7,%xmm0 ++ rorl $2,%r14d ++ addl %r8d,%eax ++ addl %r15d,%r8d ++ paddd %xmm0,%xmm6 ++ movl %eax,%r13d ++ addl %r8d,%r14d ++ movdqa %xmm6,0(%rsp) ++ rorl $14,%r13d ++ movdqa %xmm2,%xmm4 ++ movl %r14d,%r8d ++ movl %ebx,%r12d ++ movdqa %xmm0,%xmm7 ++ rorl $9,%r14d ++ xorl %eax,%r13d ++ xorl %ecx,%r12d ++ rorl $5,%r13d ++ xorl %r8d,%r14d ++.byte 102,15,58,15,225,4 ++ andl %eax,%r12d ++ xorl %eax,%r13d ++.byte 102,15,58,15,251,4 ++ addl 16(%rsp),%edx ++ movl %r8d,%r15d ++ xorl %ecx,%r12d ++ rorl $11,%r14d ++ movdqa %xmm4,%xmm5 ++ xorl %r9d,%r15d ++ addl %r12d,%edx ++ movdqa %xmm4,%xmm6 ++ rorl $6,%r13d ++ andl %r15d,%edi ++ psrld $3,%xmm4 ++ xorl %r8d,%r14d ++ addl %r13d,%edx ++ xorl %r9d,%edi ++ paddd %xmm7,%xmm1 ++ rorl $2,%r14d ++ addl %edx,%r11d ++ psrld $7,%xmm6 ++ addl %edi,%edx ++ movl %r11d,%r13d ++ pshufd $250,%xmm0,%xmm7 ++ addl %edx,%r14d ++ rorl $14,%r13d ++ pslld $14,%xmm5 ++ movl %r14d,%edx ++ movl %eax,%r12d ++ pxor %xmm6,%xmm4 ++ rorl $9,%r14d ++ xorl %r11d,%r13d ++ xorl %ebx,%r12d ++ rorl $5,%r13d ++ psrld $11,%xmm6 ++ xorl %edx,%r14d ++ pxor %xmm5,%xmm4 ++ andl %r11d,%r12d ++ xorl %r11d,%r13d ++ pslld $11,%xmm5 ++ addl 20(%rsp),%ecx ++ movl %edx,%edi ++ pxor %xmm6,%xmm4 ++ xorl %ebx,%r12d ++ rorl $11,%r14d ++ movdqa %xmm7,%xmm6 ++ xorl %r8d,%edi ++ addl %r12d,%ecx ++ pxor %xmm5,%xmm4 ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %edx,%r14d ++ psrld $10,%xmm7 ++ addl %r13d,%ecx ++ xorl %r8d,%r15d ++ paddd %xmm4,%xmm1 ++ rorl $2,%r14d ++ addl %ecx,%r10d ++ psrlq $17,%xmm6 ++ addl %r15d,%ecx ++ movl %r10d,%r13d ++ addl %ecx,%r14d ++ pxor %xmm6,%xmm7 ++ rorl $14,%r13d ++ movl %r14d,%ecx ++ movl %r11d,%r12d ++ rorl $9,%r14d ++ psrlq $2,%xmm6 ++ xorl %r10d,%r13d ++ xorl %eax,%r12d ++ pxor %xmm6,%xmm7 ++ rorl $5,%r13d ++ xorl %ecx,%r14d ++ andl %r10d,%r12d ++ pshufd $128,%xmm7,%xmm7 ++ xorl %r10d,%r13d ++ addl 24(%rsp),%ebx ++ movl %ecx,%r15d ++ psrldq $8,%xmm7 ++ xorl %eax,%r12d ++ rorl $11,%r14d ++ xorl %edx,%r15d ++ addl %r12d,%ebx ++ rorl $6,%r13d ++ paddd %xmm7,%xmm1 ++ andl %r15d,%edi ++ xorl %ecx,%r14d ++ addl %r13d,%ebx ++ pshufd $80,%xmm1,%xmm7 ++ xorl %edx,%edi ++ rorl $2,%r14d ++ addl %ebx,%r9d ++ movdqa %xmm7,%xmm6 ++ addl %edi,%ebx ++ movl %r9d,%r13d ++ psrld $10,%xmm7 ++ addl %ebx,%r14d ++ rorl $14,%r13d ++ psrlq $17,%xmm6 ++ movl %r14d,%ebx ++ movl %r10d,%r12d ++ pxor %xmm6,%xmm7 ++ rorl $9,%r14d ++ xorl %r9d,%r13d ++ xorl %r11d,%r12d ++ rorl $5,%r13d ++ xorl %ebx,%r14d ++ psrlq $2,%xmm6 ++ andl %r9d,%r12d ++ xorl %r9d,%r13d ++ addl 28(%rsp),%eax ++ pxor %xmm6,%xmm7 ++ movl %ebx,%edi ++ xorl %r11d,%r12d ++ rorl $11,%r14d ++ pshufd $8,%xmm7,%xmm7 ++ xorl %ecx,%edi ++ addl %r12d,%eax ++ movdqa 32(%rbp),%xmm6 ++ rorl $6,%r13d ++ andl %edi,%r15d ++ pslldq $8,%xmm7 ++ xorl %ebx,%r14d ++ addl %r13d,%eax ++ xorl %ecx,%r15d ++ paddd %xmm7,%xmm1 ++ rorl $2,%r14d ++ addl %eax,%r8d ++ addl %r15d,%eax ++ paddd %xmm1,%xmm6 ++ movl %r8d,%r13d ++ addl %eax,%r14d ++ movdqa %xmm6,16(%rsp) ++ rorl $14,%r13d ++ movdqa %xmm3,%xmm4 ++ movl %r14d,%eax ++ movl %r9d,%r12d ++ movdqa %xmm1,%xmm7 ++ rorl $9,%r14d ++ xorl %r8d,%r13d ++ xorl %r10d,%r12d ++ rorl $5,%r13d ++ xorl %eax,%r14d ++.byte 102,15,58,15,226,4 ++ andl %r8d,%r12d ++ xorl %r8d,%r13d ++.byte 102,15,58,15,248,4 ++ addl 32(%rsp),%r11d ++ movl %eax,%r15d ++ xorl %r10d,%r12d ++ rorl $11,%r14d ++ movdqa %xmm4,%xmm5 ++ xorl %ebx,%r15d ++ addl %r12d,%r11d ++ movdqa %xmm4,%xmm6 ++ rorl $6,%r13d ++ andl %r15d,%edi ++ psrld $3,%xmm4 ++ xorl %eax,%r14d ++ addl %r13d,%r11d ++ xorl %ebx,%edi ++ paddd %xmm7,%xmm2 ++ rorl $2,%r14d ++ addl %r11d,%edx ++ psrld $7,%xmm6 ++ addl %edi,%r11d ++ movl %edx,%r13d ++ pshufd $250,%xmm1,%xmm7 ++ addl %r11d,%r14d ++ rorl $14,%r13d ++ pslld $14,%xmm5 ++ movl %r14d,%r11d ++ movl %r8d,%r12d ++ pxor %xmm6,%xmm4 ++ rorl $9,%r14d ++ xorl %edx,%r13d ++ xorl %r9d,%r12d ++ rorl $5,%r13d ++ psrld $11,%xmm6 ++ xorl %r11d,%r14d ++ pxor %xmm5,%xmm4 ++ andl %edx,%r12d ++ xorl %edx,%r13d ++ pslld $11,%xmm5 ++ addl 36(%rsp),%r10d ++ movl %r11d,%edi ++ pxor %xmm6,%xmm4 ++ xorl %r9d,%r12d ++ rorl $11,%r14d ++ movdqa %xmm7,%xmm6 ++ xorl %eax,%edi ++ addl %r12d,%r10d ++ pxor %xmm5,%xmm4 ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %r11d,%r14d ++ psrld $10,%xmm7 ++ addl %r13d,%r10d ++ xorl %eax,%r15d ++ paddd %xmm4,%xmm2 ++ rorl $2,%r14d ++ addl %r10d,%ecx ++ psrlq $17,%xmm6 ++ addl %r15d,%r10d ++ movl %ecx,%r13d ++ addl %r10d,%r14d ++ pxor %xmm6,%xmm7 ++ rorl $14,%r13d ++ movl %r14d,%r10d ++ movl %edx,%r12d ++ rorl $9,%r14d ++ psrlq $2,%xmm6 ++ xorl %ecx,%r13d ++ xorl %r8d,%r12d ++ pxor %xmm6,%xmm7 ++ rorl $5,%r13d ++ xorl %r10d,%r14d ++ andl %ecx,%r12d ++ pshufd $128,%xmm7,%xmm7 ++ xorl %ecx,%r13d ++ addl 40(%rsp),%r9d ++ movl %r10d,%r15d ++ psrldq $8,%xmm7 ++ xorl %r8d,%r12d ++ rorl $11,%r14d ++ xorl %r11d,%r15d ++ addl %r12d,%r9d ++ rorl $6,%r13d ++ paddd %xmm7,%xmm2 ++ andl %r15d,%edi ++ xorl %r10d,%r14d ++ addl %r13d,%r9d ++ pshufd $80,%xmm2,%xmm7 ++ xorl %r11d,%edi ++ rorl $2,%r14d ++ addl %r9d,%ebx ++ movdqa %xmm7,%xmm6 ++ addl %edi,%r9d ++ movl %ebx,%r13d ++ psrld $10,%xmm7 ++ addl %r9d,%r14d ++ rorl $14,%r13d ++ psrlq $17,%xmm6 ++ movl %r14d,%r9d ++ movl %ecx,%r12d ++ pxor %xmm6,%xmm7 ++ rorl $9,%r14d ++ xorl %ebx,%r13d ++ xorl %edx,%r12d ++ rorl $5,%r13d ++ xorl %r9d,%r14d ++ psrlq $2,%xmm6 ++ andl %ebx,%r12d ++ xorl %ebx,%r13d ++ addl 44(%rsp),%r8d ++ pxor %xmm6,%xmm7 ++ movl %r9d,%edi ++ xorl %edx,%r12d ++ rorl $11,%r14d ++ pshufd $8,%xmm7,%xmm7 ++ xorl %r10d,%edi ++ addl %r12d,%r8d ++ movdqa 64(%rbp),%xmm6 ++ rorl $6,%r13d ++ andl %edi,%r15d ++ pslldq $8,%xmm7 ++ xorl %r9d,%r14d ++ addl %r13d,%r8d ++ xorl %r10d,%r15d ++ paddd %xmm7,%xmm2 ++ rorl $2,%r14d ++ addl %r8d,%eax ++ addl %r15d,%r8d ++ paddd %xmm2,%xmm6 ++ movl %eax,%r13d ++ addl %r8d,%r14d ++ movdqa %xmm6,32(%rsp) ++ rorl $14,%r13d ++ movdqa %xmm0,%xmm4 ++ movl %r14d,%r8d ++ movl %ebx,%r12d ++ movdqa %xmm2,%xmm7 ++ rorl $9,%r14d ++ xorl %eax,%r13d ++ xorl %ecx,%r12d ++ rorl $5,%r13d ++ xorl %r8d,%r14d ++.byte 102,15,58,15,227,4 ++ andl %eax,%r12d ++ xorl %eax,%r13d ++.byte 102,15,58,15,249,4 ++ addl 48(%rsp),%edx ++ movl %r8d,%r15d ++ xorl %ecx,%r12d ++ rorl $11,%r14d ++ movdqa %xmm4,%xmm5 ++ xorl %r9d,%r15d ++ addl %r12d,%edx ++ movdqa %xmm4,%xmm6 ++ rorl $6,%r13d ++ andl %r15d,%edi ++ psrld $3,%xmm4 ++ xorl %r8d,%r14d ++ addl %r13d,%edx ++ xorl %r9d,%edi ++ paddd %xmm7,%xmm3 ++ rorl $2,%r14d ++ addl %edx,%r11d ++ psrld $7,%xmm6 ++ addl %edi,%edx ++ movl %r11d,%r13d ++ pshufd $250,%xmm2,%xmm7 ++ addl %edx,%r14d ++ rorl $14,%r13d ++ pslld $14,%xmm5 ++ movl %r14d,%edx ++ movl %eax,%r12d ++ pxor %xmm6,%xmm4 ++ rorl $9,%r14d ++ xorl %r11d,%r13d ++ xorl %ebx,%r12d ++ rorl $5,%r13d ++ psrld $11,%xmm6 ++ xorl %edx,%r14d ++ pxor %xmm5,%xmm4 ++ andl %r11d,%r12d ++ xorl %r11d,%r13d ++ pslld $11,%xmm5 ++ addl 52(%rsp),%ecx ++ movl %edx,%edi ++ pxor %xmm6,%xmm4 ++ xorl %ebx,%r12d ++ rorl $11,%r14d ++ movdqa %xmm7,%xmm6 ++ xorl %r8d,%edi ++ addl %r12d,%ecx ++ pxor %xmm5,%xmm4 ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %edx,%r14d ++ psrld $10,%xmm7 ++ addl %r13d,%ecx ++ xorl %r8d,%r15d ++ paddd %xmm4,%xmm3 ++ rorl $2,%r14d ++ addl %ecx,%r10d ++ psrlq $17,%xmm6 ++ addl %r15d,%ecx ++ movl %r10d,%r13d ++ addl %ecx,%r14d ++ pxor %xmm6,%xmm7 ++ rorl $14,%r13d ++ movl %r14d,%ecx ++ movl %r11d,%r12d ++ rorl $9,%r14d ++ psrlq $2,%xmm6 ++ xorl %r10d,%r13d ++ xorl %eax,%r12d ++ pxor %xmm6,%xmm7 ++ rorl $5,%r13d ++ xorl %ecx,%r14d ++ andl %r10d,%r12d ++ pshufd $128,%xmm7,%xmm7 ++ xorl %r10d,%r13d ++ addl 56(%rsp),%ebx ++ movl %ecx,%r15d ++ psrldq $8,%xmm7 ++ xorl %eax,%r12d ++ rorl $11,%r14d ++ xorl %edx,%r15d ++ addl %r12d,%ebx ++ rorl $6,%r13d ++ paddd %xmm7,%xmm3 ++ andl %r15d,%edi ++ xorl %ecx,%r14d ++ addl %r13d,%ebx ++ pshufd $80,%xmm3,%xmm7 ++ xorl %edx,%edi ++ rorl $2,%r14d ++ addl %ebx,%r9d ++ movdqa %xmm7,%xmm6 ++ addl %edi,%ebx ++ movl %r9d,%r13d ++ psrld $10,%xmm7 ++ addl %ebx,%r14d ++ rorl $14,%r13d ++ psrlq $17,%xmm6 ++ movl %r14d,%ebx ++ movl %r10d,%r12d ++ pxor %xmm6,%xmm7 ++ rorl $9,%r14d ++ xorl %r9d,%r13d ++ xorl %r11d,%r12d ++ rorl $5,%r13d ++ xorl %ebx,%r14d ++ psrlq $2,%xmm6 ++ andl %r9d,%r12d ++ xorl %r9d,%r13d ++ addl 60(%rsp),%eax ++ pxor %xmm6,%xmm7 ++ movl %ebx,%edi ++ xorl %r11d,%r12d ++ rorl $11,%r14d ++ pshufd $8,%xmm7,%xmm7 ++ xorl %ecx,%edi ++ addl %r12d,%eax ++ movdqa 96(%rbp),%xmm6 ++ rorl $6,%r13d ++ andl %edi,%r15d ++ pslldq $8,%xmm7 ++ xorl %ebx,%r14d ++ addl %r13d,%eax ++ xorl %ecx,%r15d ++ paddd %xmm7,%xmm3 ++ rorl $2,%r14d ++ addl %eax,%r8d ++ addl %r15d,%eax ++ paddd %xmm3,%xmm6 ++ movl %r8d,%r13d ++ addl %eax,%r14d ++ movdqa %xmm6,48(%rsp) ++ cmpb $0,131(%rbp) ++ jne L$ssse3_00_47 ++ rorl $14,%r13d ++ movl %r14d,%eax ++ movl %r9d,%r12d ++ rorl $9,%r14d ++ xorl %r8d,%r13d ++ xorl %r10d,%r12d ++ rorl $5,%r13d ++ xorl %eax,%r14d ++ andl %r8d,%r12d ++ xorl %r8d,%r13d ++ addl 0(%rsp),%r11d ++ movl %eax,%r15d ++ xorl %r10d,%r12d ++ rorl $11,%r14d ++ xorl %ebx,%r15d ++ addl %r12d,%r11d ++ rorl $6,%r13d ++ andl %r15d,%edi ++ xorl %eax,%r14d ++ addl %r13d,%r11d ++ xorl %ebx,%edi ++ rorl $2,%r14d ++ addl %r11d,%edx ++ addl %edi,%r11d ++ movl %edx,%r13d ++ addl %r11d,%r14d ++ rorl $14,%r13d ++ movl %r14d,%r11d ++ movl %r8d,%r12d ++ rorl $9,%r14d ++ xorl %edx,%r13d ++ xorl %r9d,%r12d ++ rorl $5,%r13d ++ xorl %r11d,%r14d ++ andl %edx,%r12d ++ xorl %edx,%r13d ++ addl 4(%rsp),%r10d ++ movl %r11d,%edi ++ xorl %r9d,%r12d ++ rorl $11,%r14d ++ xorl %eax,%edi ++ addl %r12d,%r10d ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %r11d,%r14d ++ addl %r13d,%r10d ++ xorl %eax,%r15d ++ rorl $2,%r14d ++ addl %r10d,%ecx ++ addl %r15d,%r10d ++ movl %ecx,%r13d ++ addl %r10d,%r14d ++ rorl $14,%r13d ++ movl %r14d,%r10d ++ movl %edx,%r12d ++ rorl $9,%r14d ++ xorl %ecx,%r13d ++ xorl %r8d,%r12d ++ rorl $5,%r13d ++ xorl %r10d,%r14d ++ andl %ecx,%r12d ++ xorl %ecx,%r13d ++ addl 8(%rsp),%r9d ++ movl %r10d,%r15d ++ xorl %r8d,%r12d ++ rorl $11,%r14d ++ xorl %r11d,%r15d ++ addl %r12d,%r9d ++ rorl $6,%r13d ++ andl %r15d,%edi ++ xorl %r10d,%r14d ++ addl %r13d,%r9d ++ xorl %r11d,%edi ++ rorl $2,%r14d ++ addl %r9d,%ebx ++ addl %edi,%r9d ++ movl %ebx,%r13d ++ addl %r9d,%r14d ++ rorl $14,%r13d ++ movl %r14d,%r9d ++ movl %ecx,%r12d ++ rorl $9,%r14d ++ xorl %ebx,%r13d ++ xorl %edx,%r12d ++ rorl $5,%r13d ++ xorl %r9d,%r14d ++ andl %ebx,%r12d ++ xorl %ebx,%r13d ++ addl 12(%rsp),%r8d ++ movl %r9d,%edi ++ xorl %edx,%r12d ++ rorl $11,%r14d ++ xorl %r10d,%edi ++ addl %r12d,%r8d ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %r9d,%r14d ++ addl %r13d,%r8d ++ xorl %r10d,%r15d ++ rorl $2,%r14d ++ addl %r8d,%eax ++ addl %r15d,%r8d ++ movl %eax,%r13d ++ addl %r8d,%r14d ++ rorl $14,%r13d ++ movl %r14d,%r8d ++ movl %ebx,%r12d ++ rorl $9,%r14d ++ xorl %eax,%r13d ++ xorl %ecx,%r12d ++ rorl $5,%r13d ++ xorl %r8d,%r14d ++ andl %eax,%r12d ++ xorl %eax,%r13d ++ addl 16(%rsp),%edx ++ movl %r8d,%r15d ++ xorl %ecx,%r12d ++ rorl $11,%r14d ++ xorl %r9d,%r15d ++ addl %r12d,%edx ++ rorl $6,%r13d ++ andl %r15d,%edi ++ xorl %r8d,%r14d ++ addl %r13d,%edx ++ xorl %r9d,%edi ++ rorl $2,%r14d ++ addl %edx,%r11d ++ addl %edi,%edx ++ movl %r11d,%r13d ++ addl %edx,%r14d ++ rorl $14,%r13d ++ movl %r14d,%edx ++ movl %eax,%r12d ++ rorl $9,%r14d ++ xorl %r11d,%r13d ++ xorl %ebx,%r12d ++ rorl $5,%r13d ++ xorl %edx,%r14d ++ andl %r11d,%r12d ++ xorl %r11d,%r13d ++ addl 20(%rsp),%ecx ++ movl %edx,%edi ++ xorl %ebx,%r12d ++ rorl $11,%r14d ++ xorl %r8d,%edi ++ addl %r12d,%ecx ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %edx,%r14d ++ addl %r13d,%ecx ++ xorl %r8d,%r15d ++ rorl $2,%r14d ++ addl %ecx,%r10d ++ addl %r15d,%ecx ++ movl %r10d,%r13d ++ addl %ecx,%r14d ++ rorl $14,%r13d ++ movl %r14d,%ecx ++ movl %r11d,%r12d ++ rorl $9,%r14d ++ xorl %r10d,%r13d ++ xorl %eax,%r12d ++ rorl $5,%r13d ++ xorl %ecx,%r14d ++ andl %r10d,%r12d ++ xorl %r10d,%r13d ++ addl 24(%rsp),%ebx ++ movl %ecx,%r15d ++ xorl %eax,%r12d ++ rorl $11,%r14d ++ xorl %edx,%r15d ++ addl %r12d,%ebx ++ rorl $6,%r13d ++ andl %r15d,%edi ++ xorl %ecx,%r14d ++ addl %r13d,%ebx ++ xorl %edx,%edi ++ rorl $2,%r14d ++ addl %ebx,%r9d ++ addl %edi,%ebx ++ movl %r9d,%r13d ++ addl %ebx,%r14d ++ rorl $14,%r13d ++ movl %r14d,%ebx ++ movl %r10d,%r12d ++ rorl $9,%r14d ++ xorl %r9d,%r13d ++ xorl %r11d,%r12d ++ rorl $5,%r13d ++ xorl %ebx,%r14d ++ andl %r9d,%r12d ++ xorl %r9d,%r13d ++ addl 28(%rsp),%eax ++ movl %ebx,%edi ++ xorl %r11d,%r12d ++ rorl $11,%r14d ++ xorl %ecx,%edi ++ addl %r12d,%eax ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %ebx,%r14d ++ addl %r13d,%eax ++ xorl %ecx,%r15d ++ rorl $2,%r14d ++ addl %eax,%r8d ++ addl %r15d,%eax ++ movl %r8d,%r13d ++ addl %eax,%r14d ++ rorl $14,%r13d ++ movl %r14d,%eax ++ movl %r9d,%r12d ++ rorl $9,%r14d ++ xorl %r8d,%r13d ++ xorl %r10d,%r12d ++ rorl $5,%r13d ++ xorl %eax,%r14d ++ andl %r8d,%r12d ++ xorl %r8d,%r13d ++ addl 32(%rsp),%r11d ++ movl %eax,%r15d ++ xorl %r10d,%r12d ++ rorl $11,%r14d ++ xorl %ebx,%r15d ++ addl %r12d,%r11d ++ rorl $6,%r13d ++ andl %r15d,%edi ++ xorl %eax,%r14d ++ addl %r13d,%r11d ++ xorl %ebx,%edi ++ rorl $2,%r14d ++ addl %r11d,%edx ++ addl %edi,%r11d ++ movl %edx,%r13d ++ addl %r11d,%r14d ++ rorl $14,%r13d ++ movl %r14d,%r11d ++ movl %r8d,%r12d ++ rorl $9,%r14d ++ xorl %edx,%r13d ++ xorl %r9d,%r12d ++ rorl $5,%r13d ++ xorl %r11d,%r14d ++ andl %edx,%r12d ++ xorl %edx,%r13d ++ addl 36(%rsp),%r10d ++ movl %r11d,%edi ++ xorl %r9d,%r12d ++ rorl $11,%r14d ++ xorl %eax,%edi ++ addl %r12d,%r10d ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %r11d,%r14d ++ addl %r13d,%r10d ++ xorl %eax,%r15d ++ rorl $2,%r14d ++ addl %r10d,%ecx ++ addl %r15d,%r10d ++ movl %ecx,%r13d ++ addl %r10d,%r14d ++ rorl $14,%r13d ++ movl %r14d,%r10d ++ movl %edx,%r12d ++ rorl $9,%r14d ++ xorl %ecx,%r13d ++ xorl %r8d,%r12d ++ rorl $5,%r13d ++ xorl %r10d,%r14d ++ andl %ecx,%r12d ++ xorl %ecx,%r13d ++ addl 40(%rsp),%r9d ++ movl %r10d,%r15d ++ xorl %r8d,%r12d ++ rorl $11,%r14d ++ xorl %r11d,%r15d ++ addl %r12d,%r9d ++ rorl $6,%r13d ++ andl %r15d,%edi ++ xorl %r10d,%r14d ++ addl %r13d,%r9d ++ xorl %r11d,%edi ++ rorl $2,%r14d ++ addl %r9d,%ebx ++ addl %edi,%r9d ++ movl %ebx,%r13d ++ addl %r9d,%r14d ++ rorl $14,%r13d ++ movl %r14d,%r9d ++ movl %ecx,%r12d ++ rorl $9,%r14d ++ xorl %ebx,%r13d ++ xorl %edx,%r12d ++ rorl $5,%r13d ++ xorl %r9d,%r14d ++ andl %ebx,%r12d ++ xorl %ebx,%r13d ++ addl 44(%rsp),%r8d ++ movl %r9d,%edi ++ xorl %edx,%r12d ++ rorl $11,%r14d ++ xorl %r10d,%edi ++ addl %r12d,%r8d ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %r9d,%r14d ++ addl %r13d,%r8d ++ xorl %r10d,%r15d ++ rorl $2,%r14d ++ addl %r8d,%eax ++ addl %r15d,%r8d ++ movl %eax,%r13d ++ addl %r8d,%r14d ++ rorl $14,%r13d ++ movl %r14d,%r8d ++ movl %ebx,%r12d ++ rorl $9,%r14d ++ xorl %eax,%r13d ++ xorl %ecx,%r12d ++ rorl $5,%r13d ++ xorl %r8d,%r14d ++ andl %eax,%r12d ++ xorl %eax,%r13d ++ addl 48(%rsp),%edx ++ movl %r8d,%r15d ++ xorl %ecx,%r12d ++ rorl $11,%r14d ++ xorl %r9d,%r15d ++ addl %r12d,%edx ++ rorl $6,%r13d ++ andl %r15d,%edi ++ xorl %r8d,%r14d ++ addl %r13d,%edx ++ xorl %r9d,%edi ++ rorl $2,%r14d ++ addl %edx,%r11d ++ addl %edi,%edx ++ movl %r11d,%r13d ++ addl %edx,%r14d ++ rorl $14,%r13d ++ movl %r14d,%edx ++ movl %eax,%r12d ++ rorl $9,%r14d ++ xorl %r11d,%r13d ++ xorl %ebx,%r12d ++ rorl $5,%r13d ++ xorl %edx,%r14d ++ andl %r11d,%r12d ++ xorl %r11d,%r13d ++ addl 52(%rsp),%ecx ++ movl %edx,%edi ++ xorl %ebx,%r12d ++ rorl $11,%r14d ++ xorl %r8d,%edi ++ addl %r12d,%ecx ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %edx,%r14d ++ addl %r13d,%ecx ++ xorl %r8d,%r15d ++ rorl $2,%r14d ++ addl %ecx,%r10d ++ addl %r15d,%ecx ++ movl %r10d,%r13d ++ addl %ecx,%r14d ++ rorl $14,%r13d ++ movl %r14d,%ecx ++ movl %r11d,%r12d ++ rorl $9,%r14d ++ xorl %r10d,%r13d ++ xorl %eax,%r12d ++ rorl $5,%r13d ++ xorl %ecx,%r14d ++ andl %r10d,%r12d ++ xorl %r10d,%r13d ++ addl 56(%rsp),%ebx ++ movl %ecx,%r15d ++ xorl %eax,%r12d ++ rorl $11,%r14d ++ xorl %edx,%r15d ++ addl %r12d,%ebx ++ rorl $6,%r13d ++ andl %r15d,%edi ++ xorl %ecx,%r14d ++ addl %r13d,%ebx ++ xorl %edx,%edi ++ rorl $2,%r14d ++ addl %ebx,%r9d ++ addl %edi,%ebx ++ movl %r9d,%r13d ++ addl %ebx,%r14d ++ rorl $14,%r13d ++ movl %r14d,%ebx ++ movl %r10d,%r12d ++ rorl $9,%r14d ++ xorl %r9d,%r13d ++ xorl %r11d,%r12d ++ rorl $5,%r13d ++ xorl %ebx,%r14d ++ andl %r9d,%r12d ++ xorl %r9d,%r13d ++ addl 60(%rsp),%eax ++ movl %ebx,%edi ++ xorl %r11d,%r12d ++ rorl $11,%r14d ++ xorl %ecx,%edi ++ addl %r12d,%eax ++ rorl $6,%r13d ++ andl %edi,%r15d ++ xorl %ebx,%r14d ++ addl %r13d,%eax ++ xorl %ecx,%r15d ++ rorl $2,%r14d ++ addl %eax,%r8d ++ addl %r15d,%eax ++ movl %r8d,%r13d ++ addl %eax,%r14d ++ movq 64+0(%rsp),%rdi ++ movl %r14d,%eax ++ ++ addl 0(%rdi),%eax ++ leaq 64(%rsi),%rsi ++ addl 4(%rdi),%ebx ++ addl 8(%rdi),%ecx ++ addl 12(%rdi),%edx ++ addl 16(%rdi),%r8d ++ addl 20(%rdi),%r9d ++ addl 24(%rdi),%r10d ++ addl 28(%rdi),%r11d ++ ++ cmpq 64+16(%rsp),%rsi ++ ++ movl %eax,0(%rdi) ++ movl %ebx,4(%rdi) ++ movl %ecx,8(%rdi) ++ movl %edx,12(%rdi) ++ movl %r8d,16(%rdi) ++ movl %r9d,20(%rdi) ++ movl %r10d,24(%rdi) ++ movl %r11d,28(%rdi) ++ jb L$loop_ssse3 ++ ++ movq 88(%rsp),%rsi ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$epilogue_ssse3: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 6 ++sha256_block_data_order_avx: ++ ++L$avx_shortcut: ++ movq %rsp,%rax ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ shlq $4,%rdx ++ subq $96,%rsp ++ leaq (%rsi,%rdx,4),%rdx ++ andq $-64,%rsp ++ movq %rdi,64+0(%rsp) ++ movq %rsi,64+8(%rsp) ++ movq %rdx,64+16(%rsp) ++ movq %rax,88(%rsp) ++ ++L$prologue_avx: ++ ++ vzeroupper ++ movl 0(%rdi),%eax ++ movl 4(%rdi),%ebx ++ movl 8(%rdi),%ecx ++ movl 12(%rdi),%edx ++ movl 16(%rdi),%r8d ++ movl 20(%rdi),%r9d ++ movl 24(%rdi),%r10d ++ movl 28(%rdi),%r11d ++ vmovdqa K256+512+32(%rip),%xmm8 ++ vmovdqa K256+512+64(%rip),%xmm9 ++ jmp L$loop_avx ++.p2align 4 ++L$loop_avx: ++ vmovdqa K256+512(%rip),%xmm7 ++ vmovdqu 0(%rsi),%xmm0 ++ vmovdqu 16(%rsi),%xmm1 ++ vmovdqu 32(%rsi),%xmm2 ++ vmovdqu 48(%rsi),%xmm3 ++ vpshufb %xmm7,%xmm0,%xmm0 ++ leaq K256(%rip),%rbp ++ vpshufb %xmm7,%xmm1,%xmm1 ++ vpshufb %xmm7,%xmm2,%xmm2 ++ vpaddd 0(%rbp),%xmm0,%xmm4 ++ vpshufb %xmm7,%xmm3,%xmm3 ++ vpaddd 32(%rbp),%xmm1,%xmm5 ++ vpaddd 64(%rbp),%xmm2,%xmm6 ++ vpaddd 96(%rbp),%xmm3,%xmm7 ++ vmovdqa %xmm4,0(%rsp) ++ movl %eax,%r14d ++ vmovdqa %xmm5,16(%rsp) ++ movl %ebx,%edi ++ vmovdqa %xmm6,32(%rsp) ++ xorl %ecx,%edi ++ vmovdqa %xmm7,48(%rsp) ++ movl %r8d,%r13d ++ jmp L$avx_00_47 ++ ++.p2align 4 ++L$avx_00_47: ++ subq $-128,%rbp ++ vpalignr $4,%xmm0,%xmm1,%xmm4 ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%eax ++ movl %r9d,%r12d ++ vpalignr $4,%xmm2,%xmm3,%xmm7 ++ shrdl $9,%r14d,%r14d ++ xorl %r8d,%r13d ++ xorl %r10d,%r12d ++ vpsrld $7,%xmm4,%xmm6 ++ shrdl $5,%r13d,%r13d ++ xorl %eax,%r14d ++ andl %r8d,%r12d ++ vpaddd %xmm7,%xmm0,%xmm0 ++ xorl %r8d,%r13d ++ addl 0(%rsp),%r11d ++ movl %eax,%r15d ++ vpsrld $3,%xmm4,%xmm7 ++ xorl %r10d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %ebx,%r15d ++ vpslld $14,%xmm4,%xmm5 ++ addl %r12d,%r11d ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ vpxor %xmm6,%xmm7,%xmm4 ++ xorl %eax,%r14d ++ addl %r13d,%r11d ++ xorl %ebx,%edi ++ vpshufd $250,%xmm3,%xmm7 ++ shrdl $2,%r14d,%r14d ++ addl %r11d,%edx ++ addl %edi,%r11d ++ vpsrld $11,%xmm6,%xmm6 ++ movl %edx,%r13d ++ addl %r11d,%r14d ++ shrdl $14,%r13d,%r13d ++ vpxor %xmm5,%xmm4,%xmm4 ++ movl %r14d,%r11d ++ movl %r8d,%r12d ++ shrdl $9,%r14d,%r14d ++ vpslld $11,%xmm5,%xmm5 ++ xorl %edx,%r13d ++ xorl %r9d,%r12d ++ shrdl $5,%r13d,%r13d ++ vpxor %xmm6,%xmm4,%xmm4 ++ xorl %r11d,%r14d ++ andl %edx,%r12d ++ xorl %edx,%r13d ++ vpsrld $10,%xmm7,%xmm6 ++ addl 4(%rsp),%r10d ++ movl %r11d,%edi ++ xorl %r9d,%r12d ++ vpxor %xmm5,%xmm4,%xmm4 ++ shrdl $11,%r14d,%r14d ++ xorl %eax,%edi ++ addl %r12d,%r10d ++ vpsrlq $17,%xmm7,%xmm7 ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %r11d,%r14d ++ vpaddd %xmm4,%xmm0,%xmm0 ++ addl %r13d,%r10d ++ xorl %eax,%r15d ++ shrdl $2,%r14d,%r14d ++ vpxor %xmm7,%xmm6,%xmm6 ++ addl %r10d,%ecx ++ addl %r15d,%r10d ++ movl %ecx,%r13d ++ vpsrlq $2,%xmm7,%xmm7 ++ addl %r10d,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r10d ++ vpxor %xmm7,%xmm6,%xmm6 ++ movl %edx,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %ecx,%r13d ++ vpshufb %xmm8,%xmm6,%xmm6 ++ xorl %r8d,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %r10d,%r14d ++ vpaddd %xmm6,%xmm0,%xmm0 ++ andl %ecx,%r12d ++ xorl %ecx,%r13d ++ addl 8(%rsp),%r9d ++ vpshufd $80,%xmm0,%xmm7 ++ movl %r10d,%r15d ++ xorl %r8d,%r12d ++ shrdl $11,%r14d,%r14d ++ vpsrld $10,%xmm7,%xmm6 ++ xorl %r11d,%r15d ++ addl %r12d,%r9d ++ shrdl $6,%r13d,%r13d ++ vpsrlq $17,%xmm7,%xmm7 ++ andl %r15d,%edi ++ xorl %r10d,%r14d ++ addl %r13d,%r9d ++ vpxor %xmm7,%xmm6,%xmm6 ++ xorl %r11d,%edi ++ shrdl $2,%r14d,%r14d ++ addl %r9d,%ebx ++ vpsrlq $2,%xmm7,%xmm7 ++ addl %edi,%r9d ++ movl %ebx,%r13d ++ addl %r9d,%r14d ++ vpxor %xmm7,%xmm6,%xmm6 ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r9d ++ movl %ecx,%r12d ++ vpshufb %xmm9,%xmm6,%xmm6 ++ shrdl $9,%r14d,%r14d ++ xorl %ebx,%r13d ++ xorl %edx,%r12d ++ vpaddd %xmm6,%xmm0,%xmm0 ++ shrdl $5,%r13d,%r13d ++ xorl %r9d,%r14d ++ andl %ebx,%r12d ++ vpaddd 0(%rbp),%xmm0,%xmm6 ++ xorl %ebx,%r13d ++ addl 12(%rsp),%r8d ++ movl %r9d,%edi ++ xorl %edx,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r10d,%edi ++ addl %r12d,%r8d ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %r9d,%r14d ++ addl %r13d,%r8d ++ xorl %r10d,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %r8d,%eax ++ addl %r15d,%r8d ++ movl %eax,%r13d ++ addl %r8d,%r14d ++ vmovdqa %xmm6,0(%rsp) ++ vpalignr $4,%xmm1,%xmm2,%xmm4 ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r8d ++ movl %ebx,%r12d ++ vpalignr $4,%xmm3,%xmm0,%xmm7 ++ shrdl $9,%r14d,%r14d ++ xorl %eax,%r13d ++ xorl %ecx,%r12d ++ vpsrld $7,%xmm4,%xmm6 ++ shrdl $5,%r13d,%r13d ++ xorl %r8d,%r14d ++ andl %eax,%r12d ++ vpaddd %xmm7,%xmm1,%xmm1 ++ xorl %eax,%r13d ++ addl 16(%rsp),%edx ++ movl %r8d,%r15d ++ vpsrld $3,%xmm4,%xmm7 ++ xorl %ecx,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r9d,%r15d ++ vpslld $14,%xmm4,%xmm5 ++ addl %r12d,%edx ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ vpxor %xmm6,%xmm7,%xmm4 ++ xorl %r8d,%r14d ++ addl %r13d,%edx ++ xorl %r9d,%edi ++ vpshufd $250,%xmm0,%xmm7 ++ shrdl $2,%r14d,%r14d ++ addl %edx,%r11d ++ addl %edi,%edx ++ vpsrld $11,%xmm6,%xmm6 ++ movl %r11d,%r13d ++ addl %edx,%r14d ++ shrdl $14,%r13d,%r13d ++ vpxor %xmm5,%xmm4,%xmm4 ++ movl %r14d,%edx ++ movl %eax,%r12d ++ shrdl $9,%r14d,%r14d ++ vpslld $11,%xmm5,%xmm5 ++ xorl %r11d,%r13d ++ xorl %ebx,%r12d ++ shrdl $5,%r13d,%r13d ++ vpxor %xmm6,%xmm4,%xmm4 ++ xorl %edx,%r14d ++ andl %r11d,%r12d ++ xorl %r11d,%r13d ++ vpsrld $10,%xmm7,%xmm6 ++ addl 20(%rsp),%ecx ++ movl %edx,%edi ++ xorl %ebx,%r12d ++ vpxor %xmm5,%xmm4,%xmm4 ++ shrdl $11,%r14d,%r14d ++ xorl %r8d,%edi ++ addl %r12d,%ecx ++ vpsrlq $17,%xmm7,%xmm7 ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %edx,%r14d ++ vpaddd %xmm4,%xmm1,%xmm1 ++ addl %r13d,%ecx ++ xorl %r8d,%r15d ++ shrdl $2,%r14d,%r14d ++ vpxor %xmm7,%xmm6,%xmm6 ++ addl %ecx,%r10d ++ addl %r15d,%ecx ++ movl %r10d,%r13d ++ vpsrlq $2,%xmm7,%xmm7 ++ addl %ecx,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%ecx ++ vpxor %xmm7,%xmm6,%xmm6 ++ movl %r11d,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %r10d,%r13d ++ vpshufb %xmm8,%xmm6,%xmm6 ++ xorl %eax,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %ecx,%r14d ++ vpaddd %xmm6,%xmm1,%xmm1 ++ andl %r10d,%r12d ++ xorl %r10d,%r13d ++ addl 24(%rsp),%ebx ++ vpshufd $80,%xmm1,%xmm7 ++ movl %ecx,%r15d ++ xorl %eax,%r12d ++ shrdl $11,%r14d,%r14d ++ vpsrld $10,%xmm7,%xmm6 ++ xorl %edx,%r15d ++ addl %r12d,%ebx ++ shrdl $6,%r13d,%r13d ++ vpsrlq $17,%xmm7,%xmm7 ++ andl %r15d,%edi ++ xorl %ecx,%r14d ++ addl %r13d,%ebx ++ vpxor %xmm7,%xmm6,%xmm6 ++ xorl %edx,%edi ++ shrdl $2,%r14d,%r14d ++ addl %ebx,%r9d ++ vpsrlq $2,%xmm7,%xmm7 ++ addl %edi,%ebx ++ movl %r9d,%r13d ++ addl %ebx,%r14d ++ vpxor %xmm7,%xmm6,%xmm6 ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%ebx ++ movl %r10d,%r12d ++ vpshufb %xmm9,%xmm6,%xmm6 ++ shrdl $9,%r14d,%r14d ++ xorl %r9d,%r13d ++ xorl %r11d,%r12d ++ vpaddd %xmm6,%xmm1,%xmm1 ++ shrdl $5,%r13d,%r13d ++ xorl %ebx,%r14d ++ andl %r9d,%r12d ++ vpaddd 32(%rbp),%xmm1,%xmm6 ++ xorl %r9d,%r13d ++ addl 28(%rsp),%eax ++ movl %ebx,%edi ++ xorl %r11d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %ecx,%edi ++ addl %r12d,%eax ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %ebx,%r14d ++ addl %r13d,%eax ++ xorl %ecx,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %eax,%r8d ++ addl %r15d,%eax ++ movl %r8d,%r13d ++ addl %eax,%r14d ++ vmovdqa %xmm6,16(%rsp) ++ vpalignr $4,%xmm2,%xmm3,%xmm4 ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%eax ++ movl %r9d,%r12d ++ vpalignr $4,%xmm0,%xmm1,%xmm7 ++ shrdl $9,%r14d,%r14d ++ xorl %r8d,%r13d ++ xorl %r10d,%r12d ++ vpsrld $7,%xmm4,%xmm6 ++ shrdl $5,%r13d,%r13d ++ xorl %eax,%r14d ++ andl %r8d,%r12d ++ vpaddd %xmm7,%xmm2,%xmm2 ++ xorl %r8d,%r13d ++ addl 32(%rsp),%r11d ++ movl %eax,%r15d ++ vpsrld $3,%xmm4,%xmm7 ++ xorl %r10d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %ebx,%r15d ++ vpslld $14,%xmm4,%xmm5 ++ addl %r12d,%r11d ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ vpxor %xmm6,%xmm7,%xmm4 ++ xorl %eax,%r14d ++ addl %r13d,%r11d ++ xorl %ebx,%edi ++ vpshufd $250,%xmm1,%xmm7 ++ shrdl $2,%r14d,%r14d ++ addl %r11d,%edx ++ addl %edi,%r11d ++ vpsrld $11,%xmm6,%xmm6 ++ movl %edx,%r13d ++ addl %r11d,%r14d ++ shrdl $14,%r13d,%r13d ++ vpxor %xmm5,%xmm4,%xmm4 ++ movl %r14d,%r11d ++ movl %r8d,%r12d ++ shrdl $9,%r14d,%r14d ++ vpslld $11,%xmm5,%xmm5 ++ xorl %edx,%r13d ++ xorl %r9d,%r12d ++ shrdl $5,%r13d,%r13d ++ vpxor %xmm6,%xmm4,%xmm4 ++ xorl %r11d,%r14d ++ andl %edx,%r12d ++ xorl %edx,%r13d ++ vpsrld $10,%xmm7,%xmm6 ++ addl 36(%rsp),%r10d ++ movl %r11d,%edi ++ xorl %r9d,%r12d ++ vpxor %xmm5,%xmm4,%xmm4 ++ shrdl $11,%r14d,%r14d ++ xorl %eax,%edi ++ addl %r12d,%r10d ++ vpsrlq $17,%xmm7,%xmm7 ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %r11d,%r14d ++ vpaddd %xmm4,%xmm2,%xmm2 ++ addl %r13d,%r10d ++ xorl %eax,%r15d ++ shrdl $2,%r14d,%r14d ++ vpxor %xmm7,%xmm6,%xmm6 ++ addl %r10d,%ecx ++ addl %r15d,%r10d ++ movl %ecx,%r13d ++ vpsrlq $2,%xmm7,%xmm7 ++ addl %r10d,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r10d ++ vpxor %xmm7,%xmm6,%xmm6 ++ movl %edx,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %ecx,%r13d ++ vpshufb %xmm8,%xmm6,%xmm6 ++ xorl %r8d,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %r10d,%r14d ++ vpaddd %xmm6,%xmm2,%xmm2 ++ andl %ecx,%r12d ++ xorl %ecx,%r13d ++ addl 40(%rsp),%r9d ++ vpshufd $80,%xmm2,%xmm7 ++ movl %r10d,%r15d ++ xorl %r8d,%r12d ++ shrdl $11,%r14d,%r14d ++ vpsrld $10,%xmm7,%xmm6 ++ xorl %r11d,%r15d ++ addl %r12d,%r9d ++ shrdl $6,%r13d,%r13d ++ vpsrlq $17,%xmm7,%xmm7 ++ andl %r15d,%edi ++ xorl %r10d,%r14d ++ addl %r13d,%r9d ++ vpxor %xmm7,%xmm6,%xmm6 ++ xorl %r11d,%edi ++ shrdl $2,%r14d,%r14d ++ addl %r9d,%ebx ++ vpsrlq $2,%xmm7,%xmm7 ++ addl %edi,%r9d ++ movl %ebx,%r13d ++ addl %r9d,%r14d ++ vpxor %xmm7,%xmm6,%xmm6 ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r9d ++ movl %ecx,%r12d ++ vpshufb %xmm9,%xmm6,%xmm6 ++ shrdl $9,%r14d,%r14d ++ xorl %ebx,%r13d ++ xorl %edx,%r12d ++ vpaddd %xmm6,%xmm2,%xmm2 ++ shrdl $5,%r13d,%r13d ++ xorl %r9d,%r14d ++ andl %ebx,%r12d ++ vpaddd 64(%rbp),%xmm2,%xmm6 ++ xorl %ebx,%r13d ++ addl 44(%rsp),%r8d ++ movl %r9d,%edi ++ xorl %edx,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r10d,%edi ++ addl %r12d,%r8d ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %r9d,%r14d ++ addl %r13d,%r8d ++ xorl %r10d,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %r8d,%eax ++ addl %r15d,%r8d ++ movl %eax,%r13d ++ addl %r8d,%r14d ++ vmovdqa %xmm6,32(%rsp) ++ vpalignr $4,%xmm3,%xmm0,%xmm4 ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r8d ++ movl %ebx,%r12d ++ vpalignr $4,%xmm1,%xmm2,%xmm7 ++ shrdl $9,%r14d,%r14d ++ xorl %eax,%r13d ++ xorl %ecx,%r12d ++ vpsrld $7,%xmm4,%xmm6 ++ shrdl $5,%r13d,%r13d ++ xorl %r8d,%r14d ++ andl %eax,%r12d ++ vpaddd %xmm7,%xmm3,%xmm3 ++ xorl %eax,%r13d ++ addl 48(%rsp),%edx ++ movl %r8d,%r15d ++ vpsrld $3,%xmm4,%xmm7 ++ xorl %ecx,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r9d,%r15d ++ vpslld $14,%xmm4,%xmm5 ++ addl %r12d,%edx ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ vpxor %xmm6,%xmm7,%xmm4 ++ xorl %r8d,%r14d ++ addl %r13d,%edx ++ xorl %r9d,%edi ++ vpshufd $250,%xmm2,%xmm7 ++ shrdl $2,%r14d,%r14d ++ addl %edx,%r11d ++ addl %edi,%edx ++ vpsrld $11,%xmm6,%xmm6 ++ movl %r11d,%r13d ++ addl %edx,%r14d ++ shrdl $14,%r13d,%r13d ++ vpxor %xmm5,%xmm4,%xmm4 ++ movl %r14d,%edx ++ movl %eax,%r12d ++ shrdl $9,%r14d,%r14d ++ vpslld $11,%xmm5,%xmm5 ++ xorl %r11d,%r13d ++ xorl %ebx,%r12d ++ shrdl $5,%r13d,%r13d ++ vpxor %xmm6,%xmm4,%xmm4 ++ xorl %edx,%r14d ++ andl %r11d,%r12d ++ xorl %r11d,%r13d ++ vpsrld $10,%xmm7,%xmm6 ++ addl 52(%rsp),%ecx ++ movl %edx,%edi ++ xorl %ebx,%r12d ++ vpxor %xmm5,%xmm4,%xmm4 ++ shrdl $11,%r14d,%r14d ++ xorl %r8d,%edi ++ addl %r12d,%ecx ++ vpsrlq $17,%xmm7,%xmm7 ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %edx,%r14d ++ vpaddd %xmm4,%xmm3,%xmm3 ++ addl %r13d,%ecx ++ xorl %r8d,%r15d ++ shrdl $2,%r14d,%r14d ++ vpxor %xmm7,%xmm6,%xmm6 ++ addl %ecx,%r10d ++ addl %r15d,%ecx ++ movl %r10d,%r13d ++ vpsrlq $2,%xmm7,%xmm7 ++ addl %ecx,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%ecx ++ vpxor %xmm7,%xmm6,%xmm6 ++ movl %r11d,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %r10d,%r13d ++ vpshufb %xmm8,%xmm6,%xmm6 ++ xorl %eax,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %ecx,%r14d ++ vpaddd %xmm6,%xmm3,%xmm3 ++ andl %r10d,%r12d ++ xorl %r10d,%r13d ++ addl 56(%rsp),%ebx ++ vpshufd $80,%xmm3,%xmm7 ++ movl %ecx,%r15d ++ xorl %eax,%r12d ++ shrdl $11,%r14d,%r14d ++ vpsrld $10,%xmm7,%xmm6 ++ xorl %edx,%r15d ++ addl %r12d,%ebx ++ shrdl $6,%r13d,%r13d ++ vpsrlq $17,%xmm7,%xmm7 ++ andl %r15d,%edi ++ xorl %ecx,%r14d ++ addl %r13d,%ebx ++ vpxor %xmm7,%xmm6,%xmm6 ++ xorl %edx,%edi ++ shrdl $2,%r14d,%r14d ++ addl %ebx,%r9d ++ vpsrlq $2,%xmm7,%xmm7 ++ addl %edi,%ebx ++ movl %r9d,%r13d ++ addl %ebx,%r14d ++ vpxor %xmm7,%xmm6,%xmm6 ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%ebx ++ movl %r10d,%r12d ++ vpshufb %xmm9,%xmm6,%xmm6 ++ shrdl $9,%r14d,%r14d ++ xorl %r9d,%r13d ++ xorl %r11d,%r12d ++ vpaddd %xmm6,%xmm3,%xmm3 ++ shrdl $5,%r13d,%r13d ++ xorl %ebx,%r14d ++ andl %r9d,%r12d ++ vpaddd 96(%rbp),%xmm3,%xmm6 ++ xorl %r9d,%r13d ++ addl 60(%rsp),%eax ++ movl %ebx,%edi ++ xorl %r11d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %ecx,%edi ++ addl %r12d,%eax ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %ebx,%r14d ++ addl %r13d,%eax ++ xorl %ecx,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %eax,%r8d ++ addl %r15d,%eax ++ movl %r8d,%r13d ++ addl %eax,%r14d ++ vmovdqa %xmm6,48(%rsp) ++ cmpb $0,131(%rbp) ++ jne L$avx_00_47 ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%eax ++ movl %r9d,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %r8d,%r13d ++ xorl %r10d,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %eax,%r14d ++ andl %r8d,%r12d ++ xorl %r8d,%r13d ++ addl 0(%rsp),%r11d ++ movl %eax,%r15d ++ xorl %r10d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %ebx,%r15d ++ addl %r12d,%r11d ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ xorl %eax,%r14d ++ addl %r13d,%r11d ++ xorl %ebx,%edi ++ shrdl $2,%r14d,%r14d ++ addl %r11d,%edx ++ addl %edi,%r11d ++ movl %edx,%r13d ++ addl %r11d,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r11d ++ movl %r8d,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %edx,%r13d ++ xorl %r9d,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %r11d,%r14d ++ andl %edx,%r12d ++ xorl %edx,%r13d ++ addl 4(%rsp),%r10d ++ movl %r11d,%edi ++ xorl %r9d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %eax,%edi ++ addl %r12d,%r10d ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %r11d,%r14d ++ addl %r13d,%r10d ++ xorl %eax,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %r10d,%ecx ++ addl %r15d,%r10d ++ movl %ecx,%r13d ++ addl %r10d,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r10d ++ movl %edx,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %ecx,%r13d ++ xorl %r8d,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %r10d,%r14d ++ andl %ecx,%r12d ++ xorl %ecx,%r13d ++ addl 8(%rsp),%r9d ++ movl %r10d,%r15d ++ xorl %r8d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r11d,%r15d ++ addl %r12d,%r9d ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ xorl %r10d,%r14d ++ addl %r13d,%r9d ++ xorl %r11d,%edi ++ shrdl $2,%r14d,%r14d ++ addl %r9d,%ebx ++ addl %edi,%r9d ++ movl %ebx,%r13d ++ addl %r9d,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r9d ++ movl %ecx,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %ebx,%r13d ++ xorl %edx,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %r9d,%r14d ++ andl %ebx,%r12d ++ xorl %ebx,%r13d ++ addl 12(%rsp),%r8d ++ movl %r9d,%edi ++ xorl %edx,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r10d,%edi ++ addl %r12d,%r8d ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %r9d,%r14d ++ addl %r13d,%r8d ++ xorl %r10d,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %r8d,%eax ++ addl %r15d,%r8d ++ movl %eax,%r13d ++ addl %r8d,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r8d ++ movl %ebx,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %eax,%r13d ++ xorl %ecx,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %r8d,%r14d ++ andl %eax,%r12d ++ xorl %eax,%r13d ++ addl 16(%rsp),%edx ++ movl %r8d,%r15d ++ xorl %ecx,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r9d,%r15d ++ addl %r12d,%edx ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ xorl %r8d,%r14d ++ addl %r13d,%edx ++ xorl %r9d,%edi ++ shrdl $2,%r14d,%r14d ++ addl %edx,%r11d ++ addl %edi,%edx ++ movl %r11d,%r13d ++ addl %edx,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%edx ++ movl %eax,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %r11d,%r13d ++ xorl %ebx,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %edx,%r14d ++ andl %r11d,%r12d ++ xorl %r11d,%r13d ++ addl 20(%rsp),%ecx ++ movl %edx,%edi ++ xorl %ebx,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r8d,%edi ++ addl %r12d,%ecx ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %edx,%r14d ++ addl %r13d,%ecx ++ xorl %r8d,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %ecx,%r10d ++ addl %r15d,%ecx ++ movl %r10d,%r13d ++ addl %ecx,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%ecx ++ movl %r11d,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %r10d,%r13d ++ xorl %eax,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %ecx,%r14d ++ andl %r10d,%r12d ++ xorl %r10d,%r13d ++ addl 24(%rsp),%ebx ++ movl %ecx,%r15d ++ xorl %eax,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %edx,%r15d ++ addl %r12d,%ebx ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ xorl %ecx,%r14d ++ addl %r13d,%ebx ++ xorl %edx,%edi ++ shrdl $2,%r14d,%r14d ++ addl %ebx,%r9d ++ addl %edi,%ebx ++ movl %r9d,%r13d ++ addl %ebx,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%ebx ++ movl %r10d,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %r9d,%r13d ++ xorl %r11d,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %ebx,%r14d ++ andl %r9d,%r12d ++ xorl %r9d,%r13d ++ addl 28(%rsp),%eax ++ movl %ebx,%edi ++ xorl %r11d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %ecx,%edi ++ addl %r12d,%eax ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %ebx,%r14d ++ addl %r13d,%eax ++ xorl %ecx,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %eax,%r8d ++ addl %r15d,%eax ++ movl %r8d,%r13d ++ addl %eax,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%eax ++ movl %r9d,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %r8d,%r13d ++ xorl %r10d,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %eax,%r14d ++ andl %r8d,%r12d ++ xorl %r8d,%r13d ++ addl 32(%rsp),%r11d ++ movl %eax,%r15d ++ xorl %r10d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %ebx,%r15d ++ addl %r12d,%r11d ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ xorl %eax,%r14d ++ addl %r13d,%r11d ++ xorl %ebx,%edi ++ shrdl $2,%r14d,%r14d ++ addl %r11d,%edx ++ addl %edi,%r11d ++ movl %edx,%r13d ++ addl %r11d,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r11d ++ movl %r8d,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %edx,%r13d ++ xorl %r9d,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %r11d,%r14d ++ andl %edx,%r12d ++ xorl %edx,%r13d ++ addl 36(%rsp),%r10d ++ movl %r11d,%edi ++ xorl %r9d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %eax,%edi ++ addl %r12d,%r10d ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %r11d,%r14d ++ addl %r13d,%r10d ++ xorl %eax,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %r10d,%ecx ++ addl %r15d,%r10d ++ movl %ecx,%r13d ++ addl %r10d,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r10d ++ movl %edx,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %ecx,%r13d ++ xorl %r8d,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %r10d,%r14d ++ andl %ecx,%r12d ++ xorl %ecx,%r13d ++ addl 40(%rsp),%r9d ++ movl %r10d,%r15d ++ xorl %r8d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r11d,%r15d ++ addl %r12d,%r9d ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ xorl %r10d,%r14d ++ addl %r13d,%r9d ++ xorl %r11d,%edi ++ shrdl $2,%r14d,%r14d ++ addl %r9d,%ebx ++ addl %edi,%r9d ++ movl %ebx,%r13d ++ addl %r9d,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r9d ++ movl %ecx,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %ebx,%r13d ++ xorl %edx,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %r9d,%r14d ++ andl %ebx,%r12d ++ xorl %ebx,%r13d ++ addl 44(%rsp),%r8d ++ movl %r9d,%edi ++ xorl %edx,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r10d,%edi ++ addl %r12d,%r8d ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %r9d,%r14d ++ addl %r13d,%r8d ++ xorl %r10d,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %r8d,%eax ++ addl %r15d,%r8d ++ movl %eax,%r13d ++ addl %r8d,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%r8d ++ movl %ebx,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %eax,%r13d ++ xorl %ecx,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %r8d,%r14d ++ andl %eax,%r12d ++ xorl %eax,%r13d ++ addl 48(%rsp),%edx ++ movl %r8d,%r15d ++ xorl %ecx,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r9d,%r15d ++ addl %r12d,%edx ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ xorl %r8d,%r14d ++ addl %r13d,%edx ++ xorl %r9d,%edi ++ shrdl $2,%r14d,%r14d ++ addl %edx,%r11d ++ addl %edi,%edx ++ movl %r11d,%r13d ++ addl %edx,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%edx ++ movl %eax,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %r11d,%r13d ++ xorl %ebx,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %edx,%r14d ++ andl %r11d,%r12d ++ xorl %r11d,%r13d ++ addl 52(%rsp),%ecx ++ movl %edx,%edi ++ xorl %ebx,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %r8d,%edi ++ addl %r12d,%ecx ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %edx,%r14d ++ addl %r13d,%ecx ++ xorl %r8d,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %ecx,%r10d ++ addl %r15d,%ecx ++ movl %r10d,%r13d ++ addl %ecx,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%ecx ++ movl %r11d,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %r10d,%r13d ++ xorl %eax,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %ecx,%r14d ++ andl %r10d,%r12d ++ xorl %r10d,%r13d ++ addl 56(%rsp),%ebx ++ movl %ecx,%r15d ++ xorl %eax,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %edx,%r15d ++ addl %r12d,%ebx ++ shrdl $6,%r13d,%r13d ++ andl %r15d,%edi ++ xorl %ecx,%r14d ++ addl %r13d,%ebx ++ xorl %edx,%edi ++ shrdl $2,%r14d,%r14d ++ addl %ebx,%r9d ++ addl %edi,%ebx ++ movl %r9d,%r13d ++ addl %ebx,%r14d ++ shrdl $14,%r13d,%r13d ++ movl %r14d,%ebx ++ movl %r10d,%r12d ++ shrdl $9,%r14d,%r14d ++ xorl %r9d,%r13d ++ xorl %r11d,%r12d ++ shrdl $5,%r13d,%r13d ++ xorl %ebx,%r14d ++ andl %r9d,%r12d ++ xorl %r9d,%r13d ++ addl 60(%rsp),%eax ++ movl %ebx,%edi ++ xorl %r11d,%r12d ++ shrdl $11,%r14d,%r14d ++ xorl %ecx,%edi ++ addl %r12d,%eax ++ shrdl $6,%r13d,%r13d ++ andl %edi,%r15d ++ xorl %ebx,%r14d ++ addl %r13d,%eax ++ xorl %ecx,%r15d ++ shrdl $2,%r14d,%r14d ++ addl %eax,%r8d ++ addl %r15d,%eax ++ movl %r8d,%r13d ++ addl %eax,%r14d ++ movq 64+0(%rsp),%rdi ++ movl %r14d,%eax ++ ++ addl 0(%rdi),%eax ++ leaq 64(%rsi),%rsi ++ addl 4(%rdi),%ebx ++ addl 8(%rdi),%ecx ++ addl 12(%rdi),%edx ++ addl 16(%rdi),%r8d ++ addl 20(%rdi),%r9d ++ addl 24(%rdi),%r10d ++ addl 28(%rdi),%r11d ++ ++ cmpq 64+16(%rsp),%rsi ++ ++ movl %eax,0(%rdi) ++ movl %ebx,4(%rdi) ++ movl %ecx,8(%rdi) ++ movl %edx,12(%rdi) ++ movl %r8d,16(%rdi) ++ movl %r9d,20(%rdi) ++ movl %r10d,24(%rdi) ++ movl %r11d,28(%rdi) ++ jb L$loop_avx ++ ++ movq 88(%rsp),%rsi ++ ++ vzeroupper ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$epilogue_avx: ++ .byte 0xf3,0xc3 ++ ++ ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/sha512-x86_64.S b/apple-x86_64/crypto/fipsmodule/sha512-x86_64.S +new file mode 100644 +index 0000000..5732f43 +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/sha512-x86_64.S +@@ -0,0 +1,2990 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++.globl _sha512_block_data_order ++.private_extern _sha512_block_data_order ++ ++.p2align 4 ++_sha512_block_data_order: ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%r11 ++ movl 0(%r11),%r9d ++ movl 4(%r11),%r10d ++ movl 8(%r11),%r11d ++ andl $1073741824,%r9d ++ andl $268435968,%r10d ++ orl %r9d,%r10d ++ cmpl $1342177792,%r10d ++ je L$avx_shortcut ++ movq %rsp,%rax ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ shlq $4,%rdx ++ subq $128+32,%rsp ++ leaq (%rsi,%rdx,8),%rdx ++ andq $-64,%rsp ++ movq %rdi,128+0(%rsp) ++ movq %rsi,128+8(%rsp) ++ movq %rdx,128+16(%rsp) ++ movq %rax,152(%rsp) ++ ++L$prologue: ++ ++ movq 0(%rdi),%rax ++ movq 8(%rdi),%rbx ++ movq 16(%rdi),%rcx ++ movq 24(%rdi),%rdx ++ movq 32(%rdi),%r8 ++ movq 40(%rdi),%r9 ++ movq 48(%rdi),%r10 ++ movq 56(%rdi),%r11 ++ jmp L$loop ++ ++.p2align 4 ++L$loop: ++ movq %rbx,%rdi ++ leaq K512(%rip),%rbp ++ xorq %rcx,%rdi ++ movq 0(%rsi),%r12 ++ movq %r8,%r13 ++ movq %rax,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %r9,%r15 ++ ++ xorq %r8,%r13 ++ rorq $5,%r14 ++ xorq %r10,%r15 ++ ++ movq %r12,0(%rsp) ++ xorq %rax,%r14 ++ andq %r8,%r15 ++ ++ rorq $4,%r13 ++ addq %r11,%r12 ++ xorq %r10,%r15 ++ ++ rorq $6,%r14 ++ xorq %r8,%r13 ++ addq %r15,%r12 ++ ++ movq %rax,%r15 ++ addq (%rbp),%r12 ++ xorq %rax,%r14 ++ ++ xorq %rbx,%r15 ++ rorq $14,%r13 ++ movq %rbx,%r11 ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%r11 ++ addq %r12,%rdx ++ addq %r12,%r11 ++ ++ leaq 8(%rbp),%rbp ++ addq %r14,%r11 ++ movq 8(%rsi),%r12 ++ movq %rdx,%r13 ++ movq %r11,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %r8,%rdi ++ ++ xorq %rdx,%r13 ++ rorq $5,%r14 ++ xorq %r9,%rdi ++ ++ movq %r12,8(%rsp) ++ xorq %r11,%r14 ++ andq %rdx,%rdi ++ ++ rorq $4,%r13 ++ addq %r10,%r12 ++ xorq %r9,%rdi ++ ++ rorq $6,%r14 ++ xorq %rdx,%r13 ++ addq %rdi,%r12 ++ ++ movq %r11,%rdi ++ addq (%rbp),%r12 ++ xorq %r11,%r14 ++ ++ xorq %rax,%rdi ++ rorq $14,%r13 ++ movq %rax,%r10 ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%r10 ++ addq %r12,%rcx ++ addq %r12,%r10 ++ ++ leaq 24(%rbp),%rbp ++ addq %r14,%r10 ++ movq 16(%rsi),%r12 ++ movq %rcx,%r13 ++ movq %r10,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %rdx,%r15 ++ ++ xorq %rcx,%r13 ++ rorq $5,%r14 ++ xorq %r8,%r15 ++ ++ movq %r12,16(%rsp) ++ xorq %r10,%r14 ++ andq %rcx,%r15 ++ ++ rorq $4,%r13 ++ addq %r9,%r12 ++ xorq %r8,%r15 ++ ++ rorq $6,%r14 ++ xorq %rcx,%r13 ++ addq %r15,%r12 ++ ++ movq %r10,%r15 ++ addq (%rbp),%r12 ++ xorq %r10,%r14 ++ ++ xorq %r11,%r15 ++ rorq $14,%r13 ++ movq %r11,%r9 ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%r9 ++ addq %r12,%rbx ++ addq %r12,%r9 ++ ++ leaq 8(%rbp),%rbp ++ addq %r14,%r9 ++ movq 24(%rsi),%r12 ++ movq %rbx,%r13 ++ movq %r9,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %rcx,%rdi ++ ++ xorq %rbx,%r13 ++ rorq $5,%r14 ++ xorq %rdx,%rdi ++ ++ movq %r12,24(%rsp) ++ xorq %r9,%r14 ++ andq %rbx,%rdi ++ ++ rorq $4,%r13 ++ addq %r8,%r12 ++ xorq %rdx,%rdi ++ ++ rorq $6,%r14 ++ xorq %rbx,%r13 ++ addq %rdi,%r12 ++ ++ movq %r9,%rdi ++ addq (%rbp),%r12 ++ xorq %r9,%r14 ++ ++ xorq %r10,%rdi ++ rorq $14,%r13 ++ movq %r10,%r8 ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%r8 ++ addq %r12,%rax ++ addq %r12,%r8 ++ ++ leaq 24(%rbp),%rbp ++ addq %r14,%r8 ++ movq 32(%rsi),%r12 ++ movq %rax,%r13 ++ movq %r8,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %rbx,%r15 ++ ++ xorq %rax,%r13 ++ rorq $5,%r14 ++ xorq %rcx,%r15 ++ ++ movq %r12,32(%rsp) ++ xorq %r8,%r14 ++ andq %rax,%r15 ++ ++ rorq $4,%r13 ++ addq %rdx,%r12 ++ xorq %rcx,%r15 ++ ++ rorq $6,%r14 ++ xorq %rax,%r13 ++ addq %r15,%r12 ++ ++ movq %r8,%r15 ++ addq (%rbp),%r12 ++ xorq %r8,%r14 ++ ++ xorq %r9,%r15 ++ rorq $14,%r13 ++ movq %r9,%rdx ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%rdx ++ addq %r12,%r11 ++ addq %r12,%rdx ++ ++ leaq 8(%rbp),%rbp ++ addq %r14,%rdx ++ movq 40(%rsi),%r12 ++ movq %r11,%r13 ++ movq %rdx,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %rax,%rdi ++ ++ xorq %r11,%r13 ++ rorq $5,%r14 ++ xorq %rbx,%rdi ++ ++ movq %r12,40(%rsp) ++ xorq %rdx,%r14 ++ andq %r11,%rdi ++ ++ rorq $4,%r13 ++ addq %rcx,%r12 ++ xorq %rbx,%rdi ++ ++ rorq $6,%r14 ++ xorq %r11,%r13 ++ addq %rdi,%r12 ++ ++ movq %rdx,%rdi ++ addq (%rbp),%r12 ++ xorq %rdx,%r14 ++ ++ xorq %r8,%rdi ++ rorq $14,%r13 ++ movq %r8,%rcx ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%rcx ++ addq %r12,%r10 ++ addq %r12,%rcx ++ ++ leaq 24(%rbp),%rbp ++ addq %r14,%rcx ++ movq 48(%rsi),%r12 ++ movq %r10,%r13 ++ movq %rcx,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %r11,%r15 ++ ++ xorq %r10,%r13 ++ rorq $5,%r14 ++ xorq %rax,%r15 ++ ++ movq %r12,48(%rsp) ++ xorq %rcx,%r14 ++ andq %r10,%r15 ++ ++ rorq $4,%r13 ++ addq %rbx,%r12 ++ xorq %rax,%r15 ++ ++ rorq $6,%r14 ++ xorq %r10,%r13 ++ addq %r15,%r12 ++ ++ movq %rcx,%r15 ++ addq (%rbp),%r12 ++ xorq %rcx,%r14 ++ ++ xorq %rdx,%r15 ++ rorq $14,%r13 ++ movq %rdx,%rbx ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%rbx ++ addq %r12,%r9 ++ addq %r12,%rbx ++ ++ leaq 8(%rbp),%rbp ++ addq %r14,%rbx ++ movq 56(%rsi),%r12 ++ movq %r9,%r13 ++ movq %rbx,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %r10,%rdi ++ ++ xorq %r9,%r13 ++ rorq $5,%r14 ++ xorq %r11,%rdi ++ ++ movq %r12,56(%rsp) ++ xorq %rbx,%r14 ++ andq %r9,%rdi ++ ++ rorq $4,%r13 ++ addq %rax,%r12 ++ xorq %r11,%rdi ++ ++ rorq $6,%r14 ++ xorq %r9,%r13 ++ addq %rdi,%r12 ++ ++ movq %rbx,%rdi ++ addq (%rbp),%r12 ++ xorq %rbx,%r14 ++ ++ xorq %rcx,%rdi ++ rorq $14,%r13 ++ movq %rcx,%rax ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%rax ++ addq %r12,%r8 ++ addq %r12,%rax ++ ++ leaq 24(%rbp),%rbp ++ addq %r14,%rax ++ movq 64(%rsi),%r12 ++ movq %r8,%r13 ++ movq %rax,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %r9,%r15 ++ ++ xorq %r8,%r13 ++ rorq $5,%r14 ++ xorq %r10,%r15 ++ ++ movq %r12,64(%rsp) ++ xorq %rax,%r14 ++ andq %r8,%r15 ++ ++ rorq $4,%r13 ++ addq %r11,%r12 ++ xorq %r10,%r15 ++ ++ rorq $6,%r14 ++ xorq %r8,%r13 ++ addq %r15,%r12 ++ ++ movq %rax,%r15 ++ addq (%rbp),%r12 ++ xorq %rax,%r14 ++ ++ xorq %rbx,%r15 ++ rorq $14,%r13 ++ movq %rbx,%r11 ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%r11 ++ addq %r12,%rdx ++ addq %r12,%r11 ++ ++ leaq 8(%rbp),%rbp ++ addq %r14,%r11 ++ movq 72(%rsi),%r12 ++ movq %rdx,%r13 ++ movq %r11,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %r8,%rdi ++ ++ xorq %rdx,%r13 ++ rorq $5,%r14 ++ xorq %r9,%rdi ++ ++ movq %r12,72(%rsp) ++ xorq %r11,%r14 ++ andq %rdx,%rdi ++ ++ rorq $4,%r13 ++ addq %r10,%r12 ++ xorq %r9,%rdi ++ ++ rorq $6,%r14 ++ xorq %rdx,%r13 ++ addq %rdi,%r12 ++ ++ movq %r11,%rdi ++ addq (%rbp),%r12 ++ xorq %r11,%r14 ++ ++ xorq %rax,%rdi ++ rorq $14,%r13 ++ movq %rax,%r10 ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%r10 ++ addq %r12,%rcx ++ addq %r12,%r10 ++ ++ leaq 24(%rbp),%rbp ++ addq %r14,%r10 ++ movq 80(%rsi),%r12 ++ movq %rcx,%r13 ++ movq %r10,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %rdx,%r15 ++ ++ xorq %rcx,%r13 ++ rorq $5,%r14 ++ xorq %r8,%r15 ++ ++ movq %r12,80(%rsp) ++ xorq %r10,%r14 ++ andq %rcx,%r15 ++ ++ rorq $4,%r13 ++ addq %r9,%r12 ++ xorq %r8,%r15 ++ ++ rorq $6,%r14 ++ xorq %rcx,%r13 ++ addq %r15,%r12 ++ ++ movq %r10,%r15 ++ addq (%rbp),%r12 ++ xorq %r10,%r14 ++ ++ xorq %r11,%r15 ++ rorq $14,%r13 ++ movq %r11,%r9 ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%r9 ++ addq %r12,%rbx ++ addq %r12,%r9 ++ ++ leaq 8(%rbp),%rbp ++ addq %r14,%r9 ++ movq 88(%rsi),%r12 ++ movq %rbx,%r13 ++ movq %r9,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %rcx,%rdi ++ ++ xorq %rbx,%r13 ++ rorq $5,%r14 ++ xorq %rdx,%rdi ++ ++ movq %r12,88(%rsp) ++ xorq %r9,%r14 ++ andq %rbx,%rdi ++ ++ rorq $4,%r13 ++ addq %r8,%r12 ++ xorq %rdx,%rdi ++ ++ rorq $6,%r14 ++ xorq %rbx,%r13 ++ addq %rdi,%r12 ++ ++ movq %r9,%rdi ++ addq (%rbp),%r12 ++ xorq %r9,%r14 ++ ++ xorq %r10,%rdi ++ rorq $14,%r13 ++ movq %r10,%r8 ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%r8 ++ addq %r12,%rax ++ addq %r12,%r8 ++ ++ leaq 24(%rbp),%rbp ++ addq %r14,%r8 ++ movq 96(%rsi),%r12 ++ movq %rax,%r13 ++ movq %r8,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %rbx,%r15 ++ ++ xorq %rax,%r13 ++ rorq $5,%r14 ++ xorq %rcx,%r15 ++ ++ movq %r12,96(%rsp) ++ xorq %r8,%r14 ++ andq %rax,%r15 ++ ++ rorq $4,%r13 ++ addq %rdx,%r12 ++ xorq %rcx,%r15 ++ ++ rorq $6,%r14 ++ xorq %rax,%r13 ++ addq %r15,%r12 ++ ++ movq %r8,%r15 ++ addq (%rbp),%r12 ++ xorq %r8,%r14 ++ ++ xorq %r9,%r15 ++ rorq $14,%r13 ++ movq %r9,%rdx ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%rdx ++ addq %r12,%r11 ++ addq %r12,%rdx ++ ++ leaq 8(%rbp),%rbp ++ addq %r14,%rdx ++ movq 104(%rsi),%r12 ++ movq %r11,%r13 ++ movq %rdx,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %rax,%rdi ++ ++ xorq %r11,%r13 ++ rorq $5,%r14 ++ xorq %rbx,%rdi ++ ++ movq %r12,104(%rsp) ++ xorq %rdx,%r14 ++ andq %r11,%rdi ++ ++ rorq $4,%r13 ++ addq %rcx,%r12 ++ xorq %rbx,%rdi ++ ++ rorq $6,%r14 ++ xorq %r11,%r13 ++ addq %rdi,%r12 ++ ++ movq %rdx,%rdi ++ addq (%rbp),%r12 ++ xorq %rdx,%r14 ++ ++ xorq %r8,%rdi ++ rorq $14,%r13 ++ movq %r8,%rcx ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%rcx ++ addq %r12,%r10 ++ addq %r12,%rcx ++ ++ leaq 24(%rbp),%rbp ++ addq %r14,%rcx ++ movq 112(%rsi),%r12 ++ movq %r10,%r13 ++ movq %rcx,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %r11,%r15 ++ ++ xorq %r10,%r13 ++ rorq $5,%r14 ++ xorq %rax,%r15 ++ ++ movq %r12,112(%rsp) ++ xorq %rcx,%r14 ++ andq %r10,%r15 ++ ++ rorq $4,%r13 ++ addq %rbx,%r12 ++ xorq %rax,%r15 ++ ++ rorq $6,%r14 ++ xorq %r10,%r13 ++ addq %r15,%r12 ++ ++ movq %rcx,%r15 ++ addq (%rbp),%r12 ++ xorq %rcx,%r14 ++ ++ xorq %rdx,%r15 ++ rorq $14,%r13 ++ movq %rdx,%rbx ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%rbx ++ addq %r12,%r9 ++ addq %r12,%rbx ++ ++ leaq 8(%rbp),%rbp ++ addq %r14,%rbx ++ movq 120(%rsi),%r12 ++ movq %r9,%r13 ++ movq %rbx,%r14 ++ bswapq %r12 ++ rorq $23,%r13 ++ movq %r10,%rdi ++ ++ xorq %r9,%r13 ++ rorq $5,%r14 ++ xorq %r11,%rdi ++ ++ movq %r12,120(%rsp) ++ xorq %rbx,%r14 ++ andq %r9,%rdi ++ ++ rorq $4,%r13 ++ addq %rax,%r12 ++ xorq %r11,%rdi ++ ++ rorq $6,%r14 ++ xorq %r9,%r13 ++ addq %rdi,%r12 ++ ++ movq %rbx,%rdi ++ addq (%rbp),%r12 ++ xorq %rbx,%r14 ++ ++ xorq %rcx,%rdi ++ rorq $14,%r13 ++ movq %rcx,%rax ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%rax ++ addq %r12,%r8 ++ addq %r12,%rax ++ ++ leaq 24(%rbp),%rbp ++ jmp L$rounds_16_xx ++.p2align 4 ++L$rounds_16_xx: ++ movq 8(%rsp),%r13 ++ movq 112(%rsp),%r15 ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%rax ++ movq %r15,%r14 ++ rorq $42,%r15 ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%r15 ++ shrq $6,%r14 ++ ++ rorq $19,%r15 ++ xorq %r13,%r12 ++ xorq %r14,%r15 ++ addq 72(%rsp),%r12 ++ ++ addq 0(%rsp),%r12 ++ movq %r8,%r13 ++ addq %r15,%r12 ++ movq %rax,%r14 ++ rorq $23,%r13 ++ movq %r9,%r15 ++ ++ xorq %r8,%r13 ++ rorq $5,%r14 ++ xorq %r10,%r15 ++ ++ movq %r12,0(%rsp) ++ xorq %rax,%r14 ++ andq %r8,%r15 ++ ++ rorq $4,%r13 ++ addq %r11,%r12 ++ xorq %r10,%r15 ++ ++ rorq $6,%r14 ++ xorq %r8,%r13 ++ addq %r15,%r12 ++ ++ movq %rax,%r15 ++ addq (%rbp),%r12 ++ xorq %rax,%r14 ++ ++ xorq %rbx,%r15 ++ rorq $14,%r13 ++ movq %rbx,%r11 ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%r11 ++ addq %r12,%rdx ++ addq %r12,%r11 ++ ++ leaq 8(%rbp),%rbp ++ movq 16(%rsp),%r13 ++ movq 120(%rsp),%rdi ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%r11 ++ movq %rdi,%r14 ++ rorq $42,%rdi ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%rdi ++ shrq $6,%r14 ++ ++ rorq $19,%rdi ++ xorq %r13,%r12 ++ xorq %r14,%rdi ++ addq 80(%rsp),%r12 ++ ++ addq 8(%rsp),%r12 ++ movq %rdx,%r13 ++ addq %rdi,%r12 ++ movq %r11,%r14 ++ rorq $23,%r13 ++ movq %r8,%rdi ++ ++ xorq %rdx,%r13 ++ rorq $5,%r14 ++ xorq %r9,%rdi ++ ++ movq %r12,8(%rsp) ++ xorq %r11,%r14 ++ andq %rdx,%rdi ++ ++ rorq $4,%r13 ++ addq %r10,%r12 ++ xorq %r9,%rdi ++ ++ rorq $6,%r14 ++ xorq %rdx,%r13 ++ addq %rdi,%r12 ++ ++ movq %r11,%rdi ++ addq (%rbp),%r12 ++ xorq %r11,%r14 ++ ++ xorq %rax,%rdi ++ rorq $14,%r13 ++ movq %rax,%r10 ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%r10 ++ addq %r12,%rcx ++ addq %r12,%r10 ++ ++ leaq 24(%rbp),%rbp ++ movq 24(%rsp),%r13 ++ movq 0(%rsp),%r15 ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%r10 ++ movq %r15,%r14 ++ rorq $42,%r15 ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%r15 ++ shrq $6,%r14 ++ ++ rorq $19,%r15 ++ xorq %r13,%r12 ++ xorq %r14,%r15 ++ addq 88(%rsp),%r12 ++ ++ addq 16(%rsp),%r12 ++ movq %rcx,%r13 ++ addq %r15,%r12 ++ movq %r10,%r14 ++ rorq $23,%r13 ++ movq %rdx,%r15 ++ ++ xorq %rcx,%r13 ++ rorq $5,%r14 ++ xorq %r8,%r15 ++ ++ movq %r12,16(%rsp) ++ xorq %r10,%r14 ++ andq %rcx,%r15 ++ ++ rorq $4,%r13 ++ addq %r9,%r12 ++ xorq %r8,%r15 ++ ++ rorq $6,%r14 ++ xorq %rcx,%r13 ++ addq %r15,%r12 ++ ++ movq %r10,%r15 ++ addq (%rbp),%r12 ++ xorq %r10,%r14 ++ ++ xorq %r11,%r15 ++ rorq $14,%r13 ++ movq %r11,%r9 ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%r9 ++ addq %r12,%rbx ++ addq %r12,%r9 ++ ++ leaq 8(%rbp),%rbp ++ movq 32(%rsp),%r13 ++ movq 8(%rsp),%rdi ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%r9 ++ movq %rdi,%r14 ++ rorq $42,%rdi ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%rdi ++ shrq $6,%r14 ++ ++ rorq $19,%rdi ++ xorq %r13,%r12 ++ xorq %r14,%rdi ++ addq 96(%rsp),%r12 ++ ++ addq 24(%rsp),%r12 ++ movq %rbx,%r13 ++ addq %rdi,%r12 ++ movq %r9,%r14 ++ rorq $23,%r13 ++ movq %rcx,%rdi ++ ++ xorq %rbx,%r13 ++ rorq $5,%r14 ++ xorq %rdx,%rdi ++ ++ movq %r12,24(%rsp) ++ xorq %r9,%r14 ++ andq %rbx,%rdi ++ ++ rorq $4,%r13 ++ addq %r8,%r12 ++ xorq %rdx,%rdi ++ ++ rorq $6,%r14 ++ xorq %rbx,%r13 ++ addq %rdi,%r12 ++ ++ movq %r9,%rdi ++ addq (%rbp),%r12 ++ xorq %r9,%r14 ++ ++ xorq %r10,%rdi ++ rorq $14,%r13 ++ movq %r10,%r8 ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%r8 ++ addq %r12,%rax ++ addq %r12,%r8 ++ ++ leaq 24(%rbp),%rbp ++ movq 40(%rsp),%r13 ++ movq 16(%rsp),%r15 ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%r8 ++ movq %r15,%r14 ++ rorq $42,%r15 ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%r15 ++ shrq $6,%r14 ++ ++ rorq $19,%r15 ++ xorq %r13,%r12 ++ xorq %r14,%r15 ++ addq 104(%rsp),%r12 ++ ++ addq 32(%rsp),%r12 ++ movq %rax,%r13 ++ addq %r15,%r12 ++ movq %r8,%r14 ++ rorq $23,%r13 ++ movq %rbx,%r15 ++ ++ xorq %rax,%r13 ++ rorq $5,%r14 ++ xorq %rcx,%r15 ++ ++ movq %r12,32(%rsp) ++ xorq %r8,%r14 ++ andq %rax,%r15 ++ ++ rorq $4,%r13 ++ addq %rdx,%r12 ++ xorq %rcx,%r15 ++ ++ rorq $6,%r14 ++ xorq %rax,%r13 ++ addq %r15,%r12 ++ ++ movq %r8,%r15 ++ addq (%rbp),%r12 ++ xorq %r8,%r14 ++ ++ xorq %r9,%r15 ++ rorq $14,%r13 ++ movq %r9,%rdx ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%rdx ++ addq %r12,%r11 ++ addq %r12,%rdx ++ ++ leaq 8(%rbp),%rbp ++ movq 48(%rsp),%r13 ++ movq 24(%rsp),%rdi ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%rdx ++ movq %rdi,%r14 ++ rorq $42,%rdi ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%rdi ++ shrq $6,%r14 ++ ++ rorq $19,%rdi ++ xorq %r13,%r12 ++ xorq %r14,%rdi ++ addq 112(%rsp),%r12 ++ ++ addq 40(%rsp),%r12 ++ movq %r11,%r13 ++ addq %rdi,%r12 ++ movq %rdx,%r14 ++ rorq $23,%r13 ++ movq %rax,%rdi ++ ++ xorq %r11,%r13 ++ rorq $5,%r14 ++ xorq %rbx,%rdi ++ ++ movq %r12,40(%rsp) ++ xorq %rdx,%r14 ++ andq %r11,%rdi ++ ++ rorq $4,%r13 ++ addq %rcx,%r12 ++ xorq %rbx,%rdi ++ ++ rorq $6,%r14 ++ xorq %r11,%r13 ++ addq %rdi,%r12 ++ ++ movq %rdx,%rdi ++ addq (%rbp),%r12 ++ xorq %rdx,%r14 ++ ++ xorq %r8,%rdi ++ rorq $14,%r13 ++ movq %r8,%rcx ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%rcx ++ addq %r12,%r10 ++ addq %r12,%rcx ++ ++ leaq 24(%rbp),%rbp ++ movq 56(%rsp),%r13 ++ movq 32(%rsp),%r15 ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%rcx ++ movq %r15,%r14 ++ rorq $42,%r15 ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%r15 ++ shrq $6,%r14 ++ ++ rorq $19,%r15 ++ xorq %r13,%r12 ++ xorq %r14,%r15 ++ addq 120(%rsp),%r12 ++ ++ addq 48(%rsp),%r12 ++ movq %r10,%r13 ++ addq %r15,%r12 ++ movq %rcx,%r14 ++ rorq $23,%r13 ++ movq %r11,%r15 ++ ++ xorq %r10,%r13 ++ rorq $5,%r14 ++ xorq %rax,%r15 ++ ++ movq %r12,48(%rsp) ++ xorq %rcx,%r14 ++ andq %r10,%r15 ++ ++ rorq $4,%r13 ++ addq %rbx,%r12 ++ xorq %rax,%r15 ++ ++ rorq $6,%r14 ++ xorq %r10,%r13 ++ addq %r15,%r12 ++ ++ movq %rcx,%r15 ++ addq (%rbp),%r12 ++ xorq %rcx,%r14 ++ ++ xorq %rdx,%r15 ++ rorq $14,%r13 ++ movq %rdx,%rbx ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%rbx ++ addq %r12,%r9 ++ addq %r12,%rbx ++ ++ leaq 8(%rbp),%rbp ++ movq 64(%rsp),%r13 ++ movq 40(%rsp),%rdi ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%rbx ++ movq %rdi,%r14 ++ rorq $42,%rdi ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%rdi ++ shrq $6,%r14 ++ ++ rorq $19,%rdi ++ xorq %r13,%r12 ++ xorq %r14,%rdi ++ addq 0(%rsp),%r12 ++ ++ addq 56(%rsp),%r12 ++ movq %r9,%r13 ++ addq %rdi,%r12 ++ movq %rbx,%r14 ++ rorq $23,%r13 ++ movq %r10,%rdi ++ ++ xorq %r9,%r13 ++ rorq $5,%r14 ++ xorq %r11,%rdi ++ ++ movq %r12,56(%rsp) ++ xorq %rbx,%r14 ++ andq %r9,%rdi ++ ++ rorq $4,%r13 ++ addq %rax,%r12 ++ xorq %r11,%rdi ++ ++ rorq $6,%r14 ++ xorq %r9,%r13 ++ addq %rdi,%r12 ++ ++ movq %rbx,%rdi ++ addq (%rbp),%r12 ++ xorq %rbx,%r14 ++ ++ xorq %rcx,%rdi ++ rorq $14,%r13 ++ movq %rcx,%rax ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%rax ++ addq %r12,%r8 ++ addq %r12,%rax ++ ++ leaq 24(%rbp),%rbp ++ movq 72(%rsp),%r13 ++ movq 48(%rsp),%r15 ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%rax ++ movq %r15,%r14 ++ rorq $42,%r15 ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%r15 ++ shrq $6,%r14 ++ ++ rorq $19,%r15 ++ xorq %r13,%r12 ++ xorq %r14,%r15 ++ addq 8(%rsp),%r12 ++ ++ addq 64(%rsp),%r12 ++ movq %r8,%r13 ++ addq %r15,%r12 ++ movq %rax,%r14 ++ rorq $23,%r13 ++ movq %r9,%r15 ++ ++ xorq %r8,%r13 ++ rorq $5,%r14 ++ xorq %r10,%r15 ++ ++ movq %r12,64(%rsp) ++ xorq %rax,%r14 ++ andq %r8,%r15 ++ ++ rorq $4,%r13 ++ addq %r11,%r12 ++ xorq %r10,%r15 ++ ++ rorq $6,%r14 ++ xorq %r8,%r13 ++ addq %r15,%r12 ++ ++ movq %rax,%r15 ++ addq (%rbp),%r12 ++ xorq %rax,%r14 ++ ++ xorq %rbx,%r15 ++ rorq $14,%r13 ++ movq %rbx,%r11 ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%r11 ++ addq %r12,%rdx ++ addq %r12,%r11 ++ ++ leaq 8(%rbp),%rbp ++ movq 80(%rsp),%r13 ++ movq 56(%rsp),%rdi ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%r11 ++ movq %rdi,%r14 ++ rorq $42,%rdi ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%rdi ++ shrq $6,%r14 ++ ++ rorq $19,%rdi ++ xorq %r13,%r12 ++ xorq %r14,%rdi ++ addq 16(%rsp),%r12 ++ ++ addq 72(%rsp),%r12 ++ movq %rdx,%r13 ++ addq %rdi,%r12 ++ movq %r11,%r14 ++ rorq $23,%r13 ++ movq %r8,%rdi ++ ++ xorq %rdx,%r13 ++ rorq $5,%r14 ++ xorq %r9,%rdi ++ ++ movq %r12,72(%rsp) ++ xorq %r11,%r14 ++ andq %rdx,%rdi ++ ++ rorq $4,%r13 ++ addq %r10,%r12 ++ xorq %r9,%rdi ++ ++ rorq $6,%r14 ++ xorq %rdx,%r13 ++ addq %rdi,%r12 ++ ++ movq %r11,%rdi ++ addq (%rbp),%r12 ++ xorq %r11,%r14 ++ ++ xorq %rax,%rdi ++ rorq $14,%r13 ++ movq %rax,%r10 ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%r10 ++ addq %r12,%rcx ++ addq %r12,%r10 ++ ++ leaq 24(%rbp),%rbp ++ movq 88(%rsp),%r13 ++ movq 64(%rsp),%r15 ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%r10 ++ movq %r15,%r14 ++ rorq $42,%r15 ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%r15 ++ shrq $6,%r14 ++ ++ rorq $19,%r15 ++ xorq %r13,%r12 ++ xorq %r14,%r15 ++ addq 24(%rsp),%r12 ++ ++ addq 80(%rsp),%r12 ++ movq %rcx,%r13 ++ addq %r15,%r12 ++ movq %r10,%r14 ++ rorq $23,%r13 ++ movq %rdx,%r15 ++ ++ xorq %rcx,%r13 ++ rorq $5,%r14 ++ xorq %r8,%r15 ++ ++ movq %r12,80(%rsp) ++ xorq %r10,%r14 ++ andq %rcx,%r15 ++ ++ rorq $4,%r13 ++ addq %r9,%r12 ++ xorq %r8,%r15 ++ ++ rorq $6,%r14 ++ xorq %rcx,%r13 ++ addq %r15,%r12 ++ ++ movq %r10,%r15 ++ addq (%rbp),%r12 ++ xorq %r10,%r14 ++ ++ xorq %r11,%r15 ++ rorq $14,%r13 ++ movq %r11,%r9 ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%r9 ++ addq %r12,%rbx ++ addq %r12,%r9 ++ ++ leaq 8(%rbp),%rbp ++ movq 96(%rsp),%r13 ++ movq 72(%rsp),%rdi ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%r9 ++ movq %rdi,%r14 ++ rorq $42,%rdi ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%rdi ++ shrq $6,%r14 ++ ++ rorq $19,%rdi ++ xorq %r13,%r12 ++ xorq %r14,%rdi ++ addq 32(%rsp),%r12 ++ ++ addq 88(%rsp),%r12 ++ movq %rbx,%r13 ++ addq %rdi,%r12 ++ movq %r9,%r14 ++ rorq $23,%r13 ++ movq %rcx,%rdi ++ ++ xorq %rbx,%r13 ++ rorq $5,%r14 ++ xorq %rdx,%rdi ++ ++ movq %r12,88(%rsp) ++ xorq %r9,%r14 ++ andq %rbx,%rdi ++ ++ rorq $4,%r13 ++ addq %r8,%r12 ++ xorq %rdx,%rdi ++ ++ rorq $6,%r14 ++ xorq %rbx,%r13 ++ addq %rdi,%r12 ++ ++ movq %r9,%rdi ++ addq (%rbp),%r12 ++ xorq %r9,%r14 ++ ++ xorq %r10,%rdi ++ rorq $14,%r13 ++ movq %r10,%r8 ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%r8 ++ addq %r12,%rax ++ addq %r12,%r8 ++ ++ leaq 24(%rbp),%rbp ++ movq 104(%rsp),%r13 ++ movq 80(%rsp),%r15 ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%r8 ++ movq %r15,%r14 ++ rorq $42,%r15 ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%r15 ++ shrq $6,%r14 ++ ++ rorq $19,%r15 ++ xorq %r13,%r12 ++ xorq %r14,%r15 ++ addq 40(%rsp),%r12 ++ ++ addq 96(%rsp),%r12 ++ movq %rax,%r13 ++ addq %r15,%r12 ++ movq %r8,%r14 ++ rorq $23,%r13 ++ movq %rbx,%r15 ++ ++ xorq %rax,%r13 ++ rorq $5,%r14 ++ xorq %rcx,%r15 ++ ++ movq %r12,96(%rsp) ++ xorq %r8,%r14 ++ andq %rax,%r15 ++ ++ rorq $4,%r13 ++ addq %rdx,%r12 ++ xorq %rcx,%r15 ++ ++ rorq $6,%r14 ++ xorq %rax,%r13 ++ addq %r15,%r12 ++ ++ movq %r8,%r15 ++ addq (%rbp),%r12 ++ xorq %r8,%r14 ++ ++ xorq %r9,%r15 ++ rorq $14,%r13 ++ movq %r9,%rdx ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%rdx ++ addq %r12,%r11 ++ addq %r12,%rdx ++ ++ leaq 8(%rbp),%rbp ++ movq 112(%rsp),%r13 ++ movq 88(%rsp),%rdi ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%rdx ++ movq %rdi,%r14 ++ rorq $42,%rdi ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%rdi ++ shrq $6,%r14 ++ ++ rorq $19,%rdi ++ xorq %r13,%r12 ++ xorq %r14,%rdi ++ addq 48(%rsp),%r12 ++ ++ addq 104(%rsp),%r12 ++ movq %r11,%r13 ++ addq %rdi,%r12 ++ movq %rdx,%r14 ++ rorq $23,%r13 ++ movq %rax,%rdi ++ ++ xorq %r11,%r13 ++ rorq $5,%r14 ++ xorq %rbx,%rdi ++ ++ movq %r12,104(%rsp) ++ xorq %rdx,%r14 ++ andq %r11,%rdi ++ ++ rorq $4,%r13 ++ addq %rcx,%r12 ++ xorq %rbx,%rdi ++ ++ rorq $6,%r14 ++ xorq %r11,%r13 ++ addq %rdi,%r12 ++ ++ movq %rdx,%rdi ++ addq (%rbp),%r12 ++ xorq %rdx,%r14 ++ ++ xorq %r8,%rdi ++ rorq $14,%r13 ++ movq %r8,%rcx ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%rcx ++ addq %r12,%r10 ++ addq %r12,%rcx ++ ++ leaq 24(%rbp),%rbp ++ movq 120(%rsp),%r13 ++ movq 96(%rsp),%r15 ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%rcx ++ movq %r15,%r14 ++ rorq $42,%r15 ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%r15 ++ shrq $6,%r14 ++ ++ rorq $19,%r15 ++ xorq %r13,%r12 ++ xorq %r14,%r15 ++ addq 56(%rsp),%r12 ++ ++ addq 112(%rsp),%r12 ++ movq %r10,%r13 ++ addq %r15,%r12 ++ movq %rcx,%r14 ++ rorq $23,%r13 ++ movq %r11,%r15 ++ ++ xorq %r10,%r13 ++ rorq $5,%r14 ++ xorq %rax,%r15 ++ ++ movq %r12,112(%rsp) ++ xorq %rcx,%r14 ++ andq %r10,%r15 ++ ++ rorq $4,%r13 ++ addq %rbx,%r12 ++ xorq %rax,%r15 ++ ++ rorq $6,%r14 ++ xorq %r10,%r13 ++ addq %r15,%r12 ++ ++ movq %rcx,%r15 ++ addq (%rbp),%r12 ++ xorq %rcx,%r14 ++ ++ xorq %rdx,%r15 ++ rorq $14,%r13 ++ movq %rdx,%rbx ++ ++ andq %r15,%rdi ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %rdi,%rbx ++ addq %r12,%r9 ++ addq %r12,%rbx ++ ++ leaq 8(%rbp),%rbp ++ movq 0(%rsp),%r13 ++ movq 104(%rsp),%rdi ++ ++ movq %r13,%r12 ++ rorq $7,%r13 ++ addq %r14,%rbx ++ movq %rdi,%r14 ++ rorq $42,%rdi ++ ++ xorq %r12,%r13 ++ shrq $7,%r12 ++ rorq $1,%r13 ++ xorq %r14,%rdi ++ shrq $6,%r14 ++ ++ rorq $19,%rdi ++ xorq %r13,%r12 ++ xorq %r14,%rdi ++ addq 64(%rsp),%r12 ++ ++ addq 120(%rsp),%r12 ++ movq %r9,%r13 ++ addq %rdi,%r12 ++ movq %rbx,%r14 ++ rorq $23,%r13 ++ movq %r10,%rdi ++ ++ xorq %r9,%r13 ++ rorq $5,%r14 ++ xorq %r11,%rdi ++ ++ movq %r12,120(%rsp) ++ xorq %rbx,%r14 ++ andq %r9,%rdi ++ ++ rorq $4,%r13 ++ addq %rax,%r12 ++ xorq %r11,%rdi ++ ++ rorq $6,%r14 ++ xorq %r9,%r13 ++ addq %rdi,%r12 ++ ++ movq %rbx,%rdi ++ addq (%rbp),%r12 ++ xorq %rbx,%r14 ++ ++ xorq %rcx,%rdi ++ rorq $14,%r13 ++ movq %rcx,%rax ++ ++ andq %rdi,%r15 ++ rorq $28,%r14 ++ addq %r13,%r12 ++ ++ xorq %r15,%rax ++ addq %r12,%r8 ++ addq %r12,%rax ++ ++ leaq 24(%rbp),%rbp ++ cmpb $0,7(%rbp) ++ jnz L$rounds_16_xx ++ ++ movq 128+0(%rsp),%rdi ++ addq %r14,%rax ++ leaq 128(%rsi),%rsi ++ ++ addq 0(%rdi),%rax ++ addq 8(%rdi),%rbx ++ addq 16(%rdi),%rcx ++ addq 24(%rdi),%rdx ++ addq 32(%rdi),%r8 ++ addq 40(%rdi),%r9 ++ addq 48(%rdi),%r10 ++ addq 56(%rdi),%r11 ++ ++ cmpq 128+16(%rsp),%rsi ++ ++ movq %rax,0(%rdi) ++ movq %rbx,8(%rdi) ++ movq %rcx,16(%rdi) ++ movq %rdx,24(%rdi) ++ movq %r8,32(%rdi) ++ movq %r9,40(%rdi) ++ movq %r10,48(%rdi) ++ movq %r11,56(%rdi) ++ jb L$loop ++ ++ movq 152(%rsp),%rsi ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++.p2align 6 ++ ++K512: ++.quad 0x428a2f98d728ae22,0x7137449123ef65cd ++.quad 0x428a2f98d728ae22,0x7137449123ef65cd ++.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc ++.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc ++.quad 0x3956c25bf348b538,0x59f111f1b605d019 ++.quad 0x3956c25bf348b538,0x59f111f1b605d019 ++.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 ++.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 ++.quad 0xd807aa98a3030242,0x12835b0145706fbe ++.quad 0xd807aa98a3030242,0x12835b0145706fbe ++.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 ++.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 ++.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 ++.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 ++.quad 0x9bdc06a725c71235,0xc19bf174cf692694 ++.quad 0x9bdc06a725c71235,0xc19bf174cf692694 ++.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 ++.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 ++.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 ++.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 ++.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 ++.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 ++.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 ++.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 ++.quad 0x983e5152ee66dfab,0xa831c66d2db43210 ++.quad 0x983e5152ee66dfab,0xa831c66d2db43210 ++.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 ++.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 ++.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 ++.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 ++.quad 0x06ca6351e003826f,0x142929670a0e6e70 ++.quad 0x06ca6351e003826f,0x142929670a0e6e70 ++.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 ++.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 ++.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df ++.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df ++.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 ++.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 ++.quad 0x81c2c92e47edaee6,0x92722c851482353b ++.quad 0x81c2c92e47edaee6,0x92722c851482353b ++.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 ++.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 ++.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 ++.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 ++.quad 0xd192e819d6ef5218,0xd69906245565a910 ++.quad 0xd192e819d6ef5218,0xd69906245565a910 ++.quad 0xf40e35855771202a,0x106aa07032bbd1b8 ++.quad 0xf40e35855771202a,0x106aa07032bbd1b8 ++.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 ++.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 ++.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 ++.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 ++.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb ++.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb ++.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 ++.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 ++.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 ++.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 ++.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec ++.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec ++.quad 0x90befffa23631e28,0xa4506cebde82bde9 ++.quad 0x90befffa23631e28,0xa4506cebde82bde9 ++.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b ++.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b ++.quad 0xca273eceea26619c,0xd186b8c721c0c207 ++.quad 0xca273eceea26619c,0xd186b8c721c0c207 ++.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 ++.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 ++.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 ++.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 ++.quad 0x113f9804bef90dae,0x1b710b35131c471b ++.quad 0x113f9804bef90dae,0x1b710b35131c471b ++.quad 0x28db77f523047d84,0x32caab7b40c72493 ++.quad 0x28db77f523047d84,0x32caab7b40c72493 ++.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c ++.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c ++.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a ++.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a ++.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 ++.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 ++ ++.quad 0x0001020304050607,0x08090a0b0c0d0e0f ++.quad 0x0001020304050607,0x08090a0b0c0d0e0f ++.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++ ++.p2align 6 ++sha512_block_data_order_avx: ++ ++L$avx_shortcut: ++ movq %rsp,%rax ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ shlq $4,%rdx ++ subq $160,%rsp ++ leaq (%rsi,%rdx,8),%rdx ++ andq $-64,%rsp ++ movq %rdi,128+0(%rsp) ++ movq %rsi,128+8(%rsp) ++ movq %rdx,128+16(%rsp) ++ movq %rax,152(%rsp) ++ ++L$prologue_avx: ++ ++ vzeroupper ++ movq 0(%rdi),%rax ++ movq 8(%rdi),%rbx ++ movq 16(%rdi),%rcx ++ movq 24(%rdi),%rdx ++ movq 32(%rdi),%r8 ++ movq 40(%rdi),%r9 ++ movq 48(%rdi),%r10 ++ movq 56(%rdi),%r11 ++ jmp L$loop_avx ++.p2align 4 ++L$loop_avx: ++ vmovdqa K512+1280(%rip),%xmm11 ++ vmovdqu 0(%rsi),%xmm0 ++ leaq K512+128(%rip),%rbp ++ vmovdqu 16(%rsi),%xmm1 ++ vmovdqu 32(%rsi),%xmm2 ++ vpshufb %xmm11,%xmm0,%xmm0 ++ vmovdqu 48(%rsi),%xmm3 ++ vpshufb %xmm11,%xmm1,%xmm1 ++ vmovdqu 64(%rsi),%xmm4 ++ vpshufb %xmm11,%xmm2,%xmm2 ++ vmovdqu 80(%rsi),%xmm5 ++ vpshufb %xmm11,%xmm3,%xmm3 ++ vmovdqu 96(%rsi),%xmm6 ++ vpshufb %xmm11,%xmm4,%xmm4 ++ vmovdqu 112(%rsi),%xmm7 ++ vpshufb %xmm11,%xmm5,%xmm5 ++ vpaddq -128(%rbp),%xmm0,%xmm8 ++ vpshufb %xmm11,%xmm6,%xmm6 ++ vpaddq -96(%rbp),%xmm1,%xmm9 ++ vpshufb %xmm11,%xmm7,%xmm7 ++ vpaddq -64(%rbp),%xmm2,%xmm10 ++ vpaddq -32(%rbp),%xmm3,%xmm11 ++ vmovdqa %xmm8,0(%rsp) ++ vpaddq 0(%rbp),%xmm4,%xmm8 ++ vmovdqa %xmm9,16(%rsp) ++ vpaddq 32(%rbp),%xmm5,%xmm9 ++ vmovdqa %xmm10,32(%rsp) ++ vpaddq 64(%rbp),%xmm6,%xmm10 ++ vmovdqa %xmm11,48(%rsp) ++ vpaddq 96(%rbp),%xmm7,%xmm11 ++ vmovdqa %xmm8,64(%rsp) ++ movq %rax,%r14 ++ vmovdqa %xmm9,80(%rsp) ++ movq %rbx,%rdi ++ vmovdqa %xmm10,96(%rsp) ++ xorq %rcx,%rdi ++ vmovdqa %xmm11,112(%rsp) ++ movq %r8,%r13 ++ jmp L$avx_00_47 ++ ++.p2align 4 ++L$avx_00_47: ++ addq $256,%rbp ++ vpalignr $8,%xmm0,%xmm1,%xmm8 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rax ++ vpalignr $8,%xmm4,%xmm5,%xmm11 ++ movq %r9,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $1,%xmm8,%xmm10 ++ xorq %r8,%r13 ++ xorq %r10,%r12 ++ vpaddq %xmm11,%xmm0,%xmm0 ++ shrdq $4,%r13,%r13 ++ xorq %rax,%r14 ++ vpsrlq $7,%xmm8,%xmm11 ++ andq %r8,%r12 ++ xorq %r8,%r13 ++ vpsllq $56,%xmm8,%xmm9 ++ addq 0(%rsp),%r11 ++ movq %rax,%r15 ++ vpxor %xmm10,%xmm11,%xmm8 ++ xorq %r10,%r12 ++ shrdq $6,%r14,%r14 ++ vpsrlq $7,%xmm10,%xmm10 ++ xorq %rbx,%r15 ++ addq %r12,%r11 ++ vpxor %xmm9,%xmm8,%xmm8 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ vpsllq $7,%xmm9,%xmm9 ++ xorq %rax,%r14 ++ addq %r13,%r11 ++ vpxor %xmm10,%xmm8,%xmm8 ++ xorq %rbx,%rdi ++ shrdq $28,%r14,%r14 ++ vpsrlq $6,%xmm7,%xmm11 ++ addq %r11,%rdx ++ addq %rdi,%r11 ++ vpxor %xmm9,%xmm8,%xmm8 ++ movq %rdx,%r13 ++ addq %r11,%r14 ++ vpsllq $3,%xmm7,%xmm10 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r11 ++ vpaddq %xmm8,%xmm0,%xmm0 ++ movq %r8,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $19,%xmm7,%xmm9 ++ xorq %rdx,%r13 ++ xorq %r9,%r12 ++ vpxor %xmm10,%xmm11,%xmm11 ++ shrdq $4,%r13,%r13 ++ xorq %r11,%r14 ++ vpsllq $42,%xmm10,%xmm10 ++ andq %rdx,%r12 ++ xorq %rdx,%r13 ++ vpxor %xmm9,%xmm11,%xmm11 ++ addq 8(%rsp),%r10 ++ movq %r11,%rdi ++ vpsrlq $42,%xmm9,%xmm9 ++ xorq %r9,%r12 ++ shrdq $6,%r14,%r14 ++ vpxor %xmm10,%xmm11,%xmm11 ++ xorq %rax,%rdi ++ addq %r12,%r10 ++ vpxor %xmm9,%xmm11,%xmm11 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ vpaddq %xmm11,%xmm0,%xmm0 ++ xorq %r11,%r14 ++ addq %r13,%r10 ++ vpaddq -128(%rbp),%xmm0,%xmm10 ++ xorq %rax,%r15 ++ shrdq $28,%r14,%r14 ++ addq %r10,%rcx ++ addq %r15,%r10 ++ movq %rcx,%r13 ++ addq %r10,%r14 ++ vmovdqa %xmm10,0(%rsp) ++ vpalignr $8,%xmm1,%xmm2,%xmm8 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r10 ++ vpalignr $8,%xmm5,%xmm6,%xmm11 ++ movq %rdx,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $1,%xmm8,%xmm10 ++ xorq %rcx,%r13 ++ xorq %r8,%r12 ++ vpaddq %xmm11,%xmm1,%xmm1 ++ shrdq $4,%r13,%r13 ++ xorq %r10,%r14 ++ vpsrlq $7,%xmm8,%xmm11 ++ andq %rcx,%r12 ++ xorq %rcx,%r13 ++ vpsllq $56,%xmm8,%xmm9 ++ addq 16(%rsp),%r9 ++ movq %r10,%r15 ++ vpxor %xmm10,%xmm11,%xmm8 ++ xorq %r8,%r12 ++ shrdq $6,%r14,%r14 ++ vpsrlq $7,%xmm10,%xmm10 ++ xorq %r11,%r15 ++ addq %r12,%r9 ++ vpxor %xmm9,%xmm8,%xmm8 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ vpsllq $7,%xmm9,%xmm9 ++ xorq %r10,%r14 ++ addq %r13,%r9 ++ vpxor %xmm10,%xmm8,%xmm8 ++ xorq %r11,%rdi ++ shrdq $28,%r14,%r14 ++ vpsrlq $6,%xmm0,%xmm11 ++ addq %r9,%rbx ++ addq %rdi,%r9 ++ vpxor %xmm9,%xmm8,%xmm8 ++ movq %rbx,%r13 ++ addq %r9,%r14 ++ vpsllq $3,%xmm0,%xmm10 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r9 ++ vpaddq %xmm8,%xmm1,%xmm1 ++ movq %rcx,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $19,%xmm0,%xmm9 ++ xorq %rbx,%r13 ++ xorq %rdx,%r12 ++ vpxor %xmm10,%xmm11,%xmm11 ++ shrdq $4,%r13,%r13 ++ xorq %r9,%r14 ++ vpsllq $42,%xmm10,%xmm10 ++ andq %rbx,%r12 ++ xorq %rbx,%r13 ++ vpxor %xmm9,%xmm11,%xmm11 ++ addq 24(%rsp),%r8 ++ movq %r9,%rdi ++ vpsrlq $42,%xmm9,%xmm9 ++ xorq %rdx,%r12 ++ shrdq $6,%r14,%r14 ++ vpxor %xmm10,%xmm11,%xmm11 ++ xorq %r10,%rdi ++ addq %r12,%r8 ++ vpxor %xmm9,%xmm11,%xmm11 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ vpaddq %xmm11,%xmm1,%xmm1 ++ xorq %r9,%r14 ++ addq %r13,%r8 ++ vpaddq -96(%rbp),%xmm1,%xmm10 ++ xorq %r10,%r15 ++ shrdq $28,%r14,%r14 ++ addq %r8,%rax ++ addq %r15,%r8 ++ movq %rax,%r13 ++ addq %r8,%r14 ++ vmovdqa %xmm10,16(%rsp) ++ vpalignr $8,%xmm2,%xmm3,%xmm8 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r8 ++ vpalignr $8,%xmm6,%xmm7,%xmm11 ++ movq %rbx,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $1,%xmm8,%xmm10 ++ xorq %rax,%r13 ++ xorq %rcx,%r12 ++ vpaddq %xmm11,%xmm2,%xmm2 ++ shrdq $4,%r13,%r13 ++ xorq %r8,%r14 ++ vpsrlq $7,%xmm8,%xmm11 ++ andq %rax,%r12 ++ xorq %rax,%r13 ++ vpsllq $56,%xmm8,%xmm9 ++ addq 32(%rsp),%rdx ++ movq %r8,%r15 ++ vpxor %xmm10,%xmm11,%xmm8 ++ xorq %rcx,%r12 ++ shrdq $6,%r14,%r14 ++ vpsrlq $7,%xmm10,%xmm10 ++ xorq %r9,%r15 ++ addq %r12,%rdx ++ vpxor %xmm9,%xmm8,%xmm8 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ vpsllq $7,%xmm9,%xmm9 ++ xorq %r8,%r14 ++ addq %r13,%rdx ++ vpxor %xmm10,%xmm8,%xmm8 ++ xorq %r9,%rdi ++ shrdq $28,%r14,%r14 ++ vpsrlq $6,%xmm1,%xmm11 ++ addq %rdx,%r11 ++ addq %rdi,%rdx ++ vpxor %xmm9,%xmm8,%xmm8 ++ movq %r11,%r13 ++ addq %rdx,%r14 ++ vpsllq $3,%xmm1,%xmm10 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rdx ++ vpaddq %xmm8,%xmm2,%xmm2 ++ movq %rax,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $19,%xmm1,%xmm9 ++ xorq %r11,%r13 ++ xorq %rbx,%r12 ++ vpxor %xmm10,%xmm11,%xmm11 ++ shrdq $4,%r13,%r13 ++ xorq %rdx,%r14 ++ vpsllq $42,%xmm10,%xmm10 ++ andq %r11,%r12 ++ xorq %r11,%r13 ++ vpxor %xmm9,%xmm11,%xmm11 ++ addq 40(%rsp),%rcx ++ movq %rdx,%rdi ++ vpsrlq $42,%xmm9,%xmm9 ++ xorq %rbx,%r12 ++ shrdq $6,%r14,%r14 ++ vpxor %xmm10,%xmm11,%xmm11 ++ xorq %r8,%rdi ++ addq %r12,%rcx ++ vpxor %xmm9,%xmm11,%xmm11 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ vpaddq %xmm11,%xmm2,%xmm2 ++ xorq %rdx,%r14 ++ addq %r13,%rcx ++ vpaddq -64(%rbp),%xmm2,%xmm10 ++ xorq %r8,%r15 ++ shrdq $28,%r14,%r14 ++ addq %rcx,%r10 ++ addq %r15,%rcx ++ movq %r10,%r13 ++ addq %rcx,%r14 ++ vmovdqa %xmm10,32(%rsp) ++ vpalignr $8,%xmm3,%xmm4,%xmm8 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rcx ++ vpalignr $8,%xmm7,%xmm0,%xmm11 ++ movq %r11,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $1,%xmm8,%xmm10 ++ xorq %r10,%r13 ++ xorq %rax,%r12 ++ vpaddq %xmm11,%xmm3,%xmm3 ++ shrdq $4,%r13,%r13 ++ xorq %rcx,%r14 ++ vpsrlq $7,%xmm8,%xmm11 ++ andq %r10,%r12 ++ xorq %r10,%r13 ++ vpsllq $56,%xmm8,%xmm9 ++ addq 48(%rsp),%rbx ++ movq %rcx,%r15 ++ vpxor %xmm10,%xmm11,%xmm8 ++ xorq %rax,%r12 ++ shrdq $6,%r14,%r14 ++ vpsrlq $7,%xmm10,%xmm10 ++ xorq %rdx,%r15 ++ addq %r12,%rbx ++ vpxor %xmm9,%xmm8,%xmm8 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ vpsllq $7,%xmm9,%xmm9 ++ xorq %rcx,%r14 ++ addq %r13,%rbx ++ vpxor %xmm10,%xmm8,%xmm8 ++ xorq %rdx,%rdi ++ shrdq $28,%r14,%r14 ++ vpsrlq $6,%xmm2,%xmm11 ++ addq %rbx,%r9 ++ addq %rdi,%rbx ++ vpxor %xmm9,%xmm8,%xmm8 ++ movq %r9,%r13 ++ addq %rbx,%r14 ++ vpsllq $3,%xmm2,%xmm10 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rbx ++ vpaddq %xmm8,%xmm3,%xmm3 ++ movq %r10,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $19,%xmm2,%xmm9 ++ xorq %r9,%r13 ++ xorq %r11,%r12 ++ vpxor %xmm10,%xmm11,%xmm11 ++ shrdq $4,%r13,%r13 ++ xorq %rbx,%r14 ++ vpsllq $42,%xmm10,%xmm10 ++ andq %r9,%r12 ++ xorq %r9,%r13 ++ vpxor %xmm9,%xmm11,%xmm11 ++ addq 56(%rsp),%rax ++ movq %rbx,%rdi ++ vpsrlq $42,%xmm9,%xmm9 ++ xorq %r11,%r12 ++ shrdq $6,%r14,%r14 ++ vpxor %xmm10,%xmm11,%xmm11 ++ xorq %rcx,%rdi ++ addq %r12,%rax ++ vpxor %xmm9,%xmm11,%xmm11 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ vpaddq %xmm11,%xmm3,%xmm3 ++ xorq %rbx,%r14 ++ addq %r13,%rax ++ vpaddq -32(%rbp),%xmm3,%xmm10 ++ xorq %rcx,%r15 ++ shrdq $28,%r14,%r14 ++ addq %rax,%r8 ++ addq %r15,%rax ++ movq %r8,%r13 ++ addq %rax,%r14 ++ vmovdqa %xmm10,48(%rsp) ++ vpalignr $8,%xmm4,%xmm5,%xmm8 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rax ++ vpalignr $8,%xmm0,%xmm1,%xmm11 ++ movq %r9,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $1,%xmm8,%xmm10 ++ xorq %r8,%r13 ++ xorq %r10,%r12 ++ vpaddq %xmm11,%xmm4,%xmm4 ++ shrdq $4,%r13,%r13 ++ xorq %rax,%r14 ++ vpsrlq $7,%xmm8,%xmm11 ++ andq %r8,%r12 ++ xorq %r8,%r13 ++ vpsllq $56,%xmm8,%xmm9 ++ addq 64(%rsp),%r11 ++ movq %rax,%r15 ++ vpxor %xmm10,%xmm11,%xmm8 ++ xorq %r10,%r12 ++ shrdq $6,%r14,%r14 ++ vpsrlq $7,%xmm10,%xmm10 ++ xorq %rbx,%r15 ++ addq %r12,%r11 ++ vpxor %xmm9,%xmm8,%xmm8 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ vpsllq $7,%xmm9,%xmm9 ++ xorq %rax,%r14 ++ addq %r13,%r11 ++ vpxor %xmm10,%xmm8,%xmm8 ++ xorq %rbx,%rdi ++ shrdq $28,%r14,%r14 ++ vpsrlq $6,%xmm3,%xmm11 ++ addq %r11,%rdx ++ addq %rdi,%r11 ++ vpxor %xmm9,%xmm8,%xmm8 ++ movq %rdx,%r13 ++ addq %r11,%r14 ++ vpsllq $3,%xmm3,%xmm10 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r11 ++ vpaddq %xmm8,%xmm4,%xmm4 ++ movq %r8,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $19,%xmm3,%xmm9 ++ xorq %rdx,%r13 ++ xorq %r9,%r12 ++ vpxor %xmm10,%xmm11,%xmm11 ++ shrdq $4,%r13,%r13 ++ xorq %r11,%r14 ++ vpsllq $42,%xmm10,%xmm10 ++ andq %rdx,%r12 ++ xorq %rdx,%r13 ++ vpxor %xmm9,%xmm11,%xmm11 ++ addq 72(%rsp),%r10 ++ movq %r11,%rdi ++ vpsrlq $42,%xmm9,%xmm9 ++ xorq %r9,%r12 ++ shrdq $6,%r14,%r14 ++ vpxor %xmm10,%xmm11,%xmm11 ++ xorq %rax,%rdi ++ addq %r12,%r10 ++ vpxor %xmm9,%xmm11,%xmm11 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ vpaddq %xmm11,%xmm4,%xmm4 ++ xorq %r11,%r14 ++ addq %r13,%r10 ++ vpaddq 0(%rbp),%xmm4,%xmm10 ++ xorq %rax,%r15 ++ shrdq $28,%r14,%r14 ++ addq %r10,%rcx ++ addq %r15,%r10 ++ movq %rcx,%r13 ++ addq %r10,%r14 ++ vmovdqa %xmm10,64(%rsp) ++ vpalignr $8,%xmm5,%xmm6,%xmm8 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r10 ++ vpalignr $8,%xmm1,%xmm2,%xmm11 ++ movq %rdx,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $1,%xmm8,%xmm10 ++ xorq %rcx,%r13 ++ xorq %r8,%r12 ++ vpaddq %xmm11,%xmm5,%xmm5 ++ shrdq $4,%r13,%r13 ++ xorq %r10,%r14 ++ vpsrlq $7,%xmm8,%xmm11 ++ andq %rcx,%r12 ++ xorq %rcx,%r13 ++ vpsllq $56,%xmm8,%xmm9 ++ addq 80(%rsp),%r9 ++ movq %r10,%r15 ++ vpxor %xmm10,%xmm11,%xmm8 ++ xorq %r8,%r12 ++ shrdq $6,%r14,%r14 ++ vpsrlq $7,%xmm10,%xmm10 ++ xorq %r11,%r15 ++ addq %r12,%r9 ++ vpxor %xmm9,%xmm8,%xmm8 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ vpsllq $7,%xmm9,%xmm9 ++ xorq %r10,%r14 ++ addq %r13,%r9 ++ vpxor %xmm10,%xmm8,%xmm8 ++ xorq %r11,%rdi ++ shrdq $28,%r14,%r14 ++ vpsrlq $6,%xmm4,%xmm11 ++ addq %r9,%rbx ++ addq %rdi,%r9 ++ vpxor %xmm9,%xmm8,%xmm8 ++ movq %rbx,%r13 ++ addq %r9,%r14 ++ vpsllq $3,%xmm4,%xmm10 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r9 ++ vpaddq %xmm8,%xmm5,%xmm5 ++ movq %rcx,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $19,%xmm4,%xmm9 ++ xorq %rbx,%r13 ++ xorq %rdx,%r12 ++ vpxor %xmm10,%xmm11,%xmm11 ++ shrdq $4,%r13,%r13 ++ xorq %r9,%r14 ++ vpsllq $42,%xmm10,%xmm10 ++ andq %rbx,%r12 ++ xorq %rbx,%r13 ++ vpxor %xmm9,%xmm11,%xmm11 ++ addq 88(%rsp),%r8 ++ movq %r9,%rdi ++ vpsrlq $42,%xmm9,%xmm9 ++ xorq %rdx,%r12 ++ shrdq $6,%r14,%r14 ++ vpxor %xmm10,%xmm11,%xmm11 ++ xorq %r10,%rdi ++ addq %r12,%r8 ++ vpxor %xmm9,%xmm11,%xmm11 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ vpaddq %xmm11,%xmm5,%xmm5 ++ xorq %r9,%r14 ++ addq %r13,%r8 ++ vpaddq 32(%rbp),%xmm5,%xmm10 ++ xorq %r10,%r15 ++ shrdq $28,%r14,%r14 ++ addq %r8,%rax ++ addq %r15,%r8 ++ movq %rax,%r13 ++ addq %r8,%r14 ++ vmovdqa %xmm10,80(%rsp) ++ vpalignr $8,%xmm6,%xmm7,%xmm8 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r8 ++ vpalignr $8,%xmm2,%xmm3,%xmm11 ++ movq %rbx,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $1,%xmm8,%xmm10 ++ xorq %rax,%r13 ++ xorq %rcx,%r12 ++ vpaddq %xmm11,%xmm6,%xmm6 ++ shrdq $4,%r13,%r13 ++ xorq %r8,%r14 ++ vpsrlq $7,%xmm8,%xmm11 ++ andq %rax,%r12 ++ xorq %rax,%r13 ++ vpsllq $56,%xmm8,%xmm9 ++ addq 96(%rsp),%rdx ++ movq %r8,%r15 ++ vpxor %xmm10,%xmm11,%xmm8 ++ xorq %rcx,%r12 ++ shrdq $6,%r14,%r14 ++ vpsrlq $7,%xmm10,%xmm10 ++ xorq %r9,%r15 ++ addq %r12,%rdx ++ vpxor %xmm9,%xmm8,%xmm8 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ vpsllq $7,%xmm9,%xmm9 ++ xorq %r8,%r14 ++ addq %r13,%rdx ++ vpxor %xmm10,%xmm8,%xmm8 ++ xorq %r9,%rdi ++ shrdq $28,%r14,%r14 ++ vpsrlq $6,%xmm5,%xmm11 ++ addq %rdx,%r11 ++ addq %rdi,%rdx ++ vpxor %xmm9,%xmm8,%xmm8 ++ movq %r11,%r13 ++ addq %rdx,%r14 ++ vpsllq $3,%xmm5,%xmm10 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rdx ++ vpaddq %xmm8,%xmm6,%xmm6 ++ movq %rax,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $19,%xmm5,%xmm9 ++ xorq %r11,%r13 ++ xorq %rbx,%r12 ++ vpxor %xmm10,%xmm11,%xmm11 ++ shrdq $4,%r13,%r13 ++ xorq %rdx,%r14 ++ vpsllq $42,%xmm10,%xmm10 ++ andq %r11,%r12 ++ xorq %r11,%r13 ++ vpxor %xmm9,%xmm11,%xmm11 ++ addq 104(%rsp),%rcx ++ movq %rdx,%rdi ++ vpsrlq $42,%xmm9,%xmm9 ++ xorq %rbx,%r12 ++ shrdq $6,%r14,%r14 ++ vpxor %xmm10,%xmm11,%xmm11 ++ xorq %r8,%rdi ++ addq %r12,%rcx ++ vpxor %xmm9,%xmm11,%xmm11 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ vpaddq %xmm11,%xmm6,%xmm6 ++ xorq %rdx,%r14 ++ addq %r13,%rcx ++ vpaddq 64(%rbp),%xmm6,%xmm10 ++ xorq %r8,%r15 ++ shrdq $28,%r14,%r14 ++ addq %rcx,%r10 ++ addq %r15,%rcx ++ movq %r10,%r13 ++ addq %rcx,%r14 ++ vmovdqa %xmm10,96(%rsp) ++ vpalignr $8,%xmm7,%xmm0,%xmm8 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rcx ++ vpalignr $8,%xmm3,%xmm4,%xmm11 ++ movq %r11,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $1,%xmm8,%xmm10 ++ xorq %r10,%r13 ++ xorq %rax,%r12 ++ vpaddq %xmm11,%xmm7,%xmm7 ++ shrdq $4,%r13,%r13 ++ xorq %rcx,%r14 ++ vpsrlq $7,%xmm8,%xmm11 ++ andq %r10,%r12 ++ xorq %r10,%r13 ++ vpsllq $56,%xmm8,%xmm9 ++ addq 112(%rsp),%rbx ++ movq %rcx,%r15 ++ vpxor %xmm10,%xmm11,%xmm8 ++ xorq %rax,%r12 ++ shrdq $6,%r14,%r14 ++ vpsrlq $7,%xmm10,%xmm10 ++ xorq %rdx,%r15 ++ addq %r12,%rbx ++ vpxor %xmm9,%xmm8,%xmm8 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ vpsllq $7,%xmm9,%xmm9 ++ xorq %rcx,%r14 ++ addq %r13,%rbx ++ vpxor %xmm10,%xmm8,%xmm8 ++ xorq %rdx,%rdi ++ shrdq $28,%r14,%r14 ++ vpsrlq $6,%xmm6,%xmm11 ++ addq %rbx,%r9 ++ addq %rdi,%rbx ++ vpxor %xmm9,%xmm8,%xmm8 ++ movq %r9,%r13 ++ addq %rbx,%r14 ++ vpsllq $3,%xmm6,%xmm10 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rbx ++ vpaddq %xmm8,%xmm7,%xmm7 ++ movq %r10,%r12 ++ shrdq $5,%r14,%r14 ++ vpsrlq $19,%xmm6,%xmm9 ++ xorq %r9,%r13 ++ xorq %r11,%r12 ++ vpxor %xmm10,%xmm11,%xmm11 ++ shrdq $4,%r13,%r13 ++ xorq %rbx,%r14 ++ vpsllq $42,%xmm10,%xmm10 ++ andq %r9,%r12 ++ xorq %r9,%r13 ++ vpxor %xmm9,%xmm11,%xmm11 ++ addq 120(%rsp),%rax ++ movq %rbx,%rdi ++ vpsrlq $42,%xmm9,%xmm9 ++ xorq %r11,%r12 ++ shrdq $6,%r14,%r14 ++ vpxor %xmm10,%xmm11,%xmm11 ++ xorq %rcx,%rdi ++ addq %r12,%rax ++ vpxor %xmm9,%xmm11,%xmm11 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ vpaddq %xmm11,%xmm7,%xmm7 ++ xorq %rbx,%r14 ++ addq %r13,%rax ++ vpaddq 96(%rbp),%xmm7,%xmm10 ++ xorq %rcx,%r15 ++ shrdq $28,%r14,%r14 ++ addq %rax,%r8 ++ addq %r15,%rax ++ movq %r8,%r13 ++ addq %rax,%r14 ++ vmovdqa %xmm10,112(%rsp) ++ cmpb $0,135(%rbp) ++ jne L$avx_00_47 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rax ++ movq %r9,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %r8,%r13 ++ xorq %r10,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %rax,%r14 ++ andq %r8,%r12 ++ xorq %r8,%r13 ++ addq 0(%rsp),%r11 ++ movq %rax,%r15 ++ xorq %r10,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %rbx,%r15 ++ addq %r12,%r11 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ xorq %rax,%r14 ++ addq %r13,%r11 ++ xorq %rbx,%rdi ++ shrdq $28,%r14,%r14 ++ addq %r11,%rdx ++ addq %rdi,%r11 ++ movq %rdx,%r13 ++ addq %r11,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r11 ++ movq %r8,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %rdx,%r13 ++ xorq %r9,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %r11,%r14 ++ andq %rdx,%r12 ++ xorq %rdx,%r13 ++ addq 8(%rsp),%r10 ++ movq %r11,%rdi ++ xorq %r9,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %rax,%rdi ++ addq %r12,%r10 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ xorq %r11,%r14 ++ addq %r13,%r10 ++ xorq %rax,%r15 ++ shrdq $28,%r14,%r14 ++ addq %r10,%rcx ++ addq %r15,%r10 ++ movq %rcx,%r13 ++ addq %r10,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r10 ++ movq %rdx,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %rcx,%r13 ++ xorq %r8,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %r10,%r14 ++ andq %rcx,%r12 ++ xorq %rcx,%r13 ++ addq 16(%rsp),%r9 ++ movq %r10,%r15 ++ xorq %r8,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %r11,%r15 ++ addq %r12,%r9 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ xorq %r10,%r14 ++ addq %r13,%r9 ++ xorq %r11,%rdi ++ shrdq $28,%r14,%r14 ++ addq %r9,%rbx ++ addq %rdi,%r9 ++ movq %rbx,%r13 ++ addq %r9,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r9 ++ movq %rcx,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %rbx,%r13 ++ xorq %rdx,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %r9,%r14 ++ andq %rbx,%r12 ++ xorq %rbx,%r13 ++ addq 24(%rsp),%r8 ++ movq %r9,%rdi ++ xorq %rdx,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %r10,%rdi ++ addq %r12,%r8 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ xorq %r9,%r14 ++ addq %r13,%r8 ++ xorq %r10,%r15 ++ shrdq $28,%r14,%r14 ++ addq %r8,%rax ++ addq %r15,%r8 ++ movq %rax,%r13 ++ addq %r8,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r8 ++ movq %rbx,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %rax,%r13 ++ xorq %rcx,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %r8,%r14 ++ andq %rax,%r12 ++ xorq %rax,%r13 ++ addq 32(%rsp),%rdx ++ movq %r8,%r15 ++ xorq %rcx,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %r9,%r15 ++ addq %r12,%rdx ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ xorq %r8,%r14 ++ addq %r13,%rdx ++ xorq %r9,%rdi ++ shrdq $28,%r14,%r14 ++ addq %rdx,%r11 ++ addq %rdi,%rdx ++ movq %r11,%r13 ++ addq %rdx,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rdx ++ movq %rax,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %r11,%r13 ++ xorq %rbx,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %rdx,%r14 ++ andq %r11,%r12 ++ xorq %r11,%r13 ++ addq 40(%rsp),%rcx ++ movq %rdx,%rdi ++ xorq %rbx,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %r8,%rdi ++ addq %r12,%rcx ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ xorq %rdx,%r14 ++ addq %r13,%rcx ++ xorq %r8,%r15 ++ shrdq $28,%r14,%r14 ++ addq %rcx,%r10 ++ addq %r15,%rcx ++ movq %r10,%r13 ++ addq %rcx,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rcx ++ movq %r11,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %r10,%r13 ++ xorq %rax,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %rcx,%r14 ++ andq %r10,%r12 ++ xorq %r10,%r13 ++ addq 48(%rsp),%rbx ++ movq %rcx,%r15 ++ xorq %rax,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %rdx,%r15 ++ addq %r12,%rbx ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ xorq %rcx,%r14 ++ addq %r13,%rbx ++ xorq %rdx,%rdi ++ shrdq $28,%r14,%r14 ++ addq %rbx,%r9 ++ addq %rdi,%rbx ++ movq %r9,%r13 ++ addq %rbx,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rbx ++ movq %r10,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %r9,%r13 ++ xorq %r11,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %rbx,%r14 ++ andq %r9,%r12 ++ xorq %r9,%r13 ++ addq 56(%rsp),%rax ++ movq %rbx,%rdi ++ xorq %r11,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %rcx,%rdi ++ addq %r12,%rax ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ xorq %rbx,%r14 ++ addq %r13,%rax ++ xorq %rcx,%r15 ++ shrdq $28,%r14,%r14 ++ addq %rax,%r8 ++ addq %r15,%rax ++ movq %r8,%r13 ++ addq %rax,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rax ++ movq %r9,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %r8,%r13 ++ xorq %r10,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %rax,%r14 ++ andq %r8,%r12 ++ xorq %r8,%r13 ++ addq 64(%rsp),%r11 ++ movq %rax,%r15 ++ xorq %r10,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %rbx,%r15 ++ addq %r12,%r11 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ xorq %rax,%r14 ++ addq %r13,%r11 ++ xorq %rbx,%rdi ++ shrdq $28,%r14,%r14 ++ addq %r11,%rdx ++ addq %rdi,%r11 ++ movq %rdx,%r13 ++ addq %r11,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r11 ++ movq %r8,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %rdx,%r13 ++ xorq %r9,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %r11,%r14 ++ andq %rdx,%r12 ++ xorq %rdx,%r13 ++ addq 72(%rsp),%r10 ++ movq %r11,%rdi ++ xorq %r9,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %rax,%rdi ++ addq %r12,%r10 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ xorq %r11,%r14 ++ addq %r13,%r10 ++ xorq %rax,%r15 ++ shrdq $28,%r14,%r14 ++ addq %r10,%rcx ++ addq %r15,%r10 ++ movq %rcx,%r13 ++ addq %r10,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r10 ++ movq %rdx,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %rcx,%r13 ++ xorq %r8,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %r10,%r14 ++ andq %rcx,%r12 ++ xorq %rcx,%r13 ++ addq 80(%rsp),%r9 ++ movq %r10,%r15 ++ xorq %r8,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %r11,%r15 ++ addq %r12,%r9 ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ xorq %r10,%r14 ++ addq %r13,%r9 ++ xorq %r11,%rdi ++ shrdq $28,%r14,%r14 ++ addq %r9,%rbx ++ addq %rdi,%r9 ++ movq %rbx,%r13 ++ addq %r9,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r9 ++ movq %rcx,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %rbx,%r13 ++ xorq %rdx,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %r9,%r14 ++ andq %rbx,%r12 ++ xorq %rbx,%r13 ++ addq 88(%rsp),%r8 ++ movq %r9,%rdi ++ xorq %rdx,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %r10,%rdi ++ addq %r12,%r8 ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ xorq %r9,%r14 ++ addq %r13,%r8 ++ xorq %r10,%r15 ++ shrdq $28,%r14,%r14 ++ addq %r8,%rax ++ addq %r15,%r8 ++ movq %rax,%r13 ++ addq %r8,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%r8 ++ movq %rbx,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %rax,%r13 ++ xorq %rcx,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %r8,%r14 ++ andq %rax,%r12 ++ xorq %rax,%r13 ++ addq 96(%rsp),%rdx ++ movq %r8,%r15 ++ xorq %rcx,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %r9,%r15 ++ addq %r12,%rdx ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ xorq %r8,%r14 ++ addq %r13,%rdx ++ xorq %r9,%rdi ++ shrdq $28,%r14,%r14 ++ addq %rdx,%r11 ++ addq %rdi,%rdx ++ movq %r11,%r13 ++ addq %rdx,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rdx ++ movq %rax,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %r11,%r13 ++ xorq %rbx,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %rdx,%r14 ++ andq %r11,%r12 ++ xorq %r11,%r13 ++ addq 104(%rsp),%rcx ++ movq %rdx,%rdi ++ xorq %rbx,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %r8,%rdi ++ addq %r12,%rcx ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ xorq %rdx,%r14 ++ addq %r13,%rcx ++ xorq %r8,%r15 ++ shrdq $28,%r14,%r14 ++ addq %rcx,%r10 ++ addq %r15,%rcx ++ movq %r10,%r13 ++ addq %rcx,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rcx ++ movq %r11,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %r10,%r13 ++ xorq %rax,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %rcx,%r14 ++ andq %r10,%r12 ++ xorq %r10,%r13 ++ addq 112(%rsp),%rbx ++ movq %rcx,%r15 ++ xorq %rax,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %rdx,%r15 ++ addq %r12,%rbx ++ shrdq $14,%r13,%r13 ++ andq %r15,%rdi ++ xorq %rcx,%r14 ++ addq %r13,%rbx ++ xorq %rdx,%rdi ++ shrdq $28,%r14,%r14 ++ addq %rbx,%r9 ++ addq %rdi,%rbx ++ movq %r9,%r13 ++ addq %rbx,%r14 ++ shrdq $23,%r13,%r13 ++ movq %r14,%rbx ++ movq %r10,%r12 ++ shrdq $5,%r14,%r14 ++ xorq %r9,%r13 ++ xorq %r11,%r12 ++ shrdq $4,%r13,%r13 ++ xorq %rbx,%r14 ++ andq %r9,%r12 ++ xorq %r9,%r13 ++ addq 120(%rsp),%rax ++ movq %rbx,%rdi ++ xorq %r11,%r12 ++ shrdq $6,%r14,%r14 ++ xorq %rcx,%rdi ++ addq %r12,%rax ++ shrdq $14,%r13,%r13 ++ andq %rdi,%r15 ++ xorq %rbx,%r14 ++ addq %r13,%rax ++ xorq %rcx,%r15 ++ shrdq $28,%r14,%r14 ++ addq %rax,%r8 ++ addq %r15,%rax ++ movq %r8,%r13 ++ addq %rax,%r14 ++ movq 128+0(%rsp),%rdi ++ movq %r14,%rax ++ ++ addq 0(%rdi),%rax ++ leaq 128(%rsi),%rsi ++ addq 8(%rdi),%rbx ++ addq 16(%rdi),%rcx ++ addq 24(%rdi),%rdx ++ addq 32(%rdi),%r8 ++ addq 40(%rdi),%r9 ++ addq 48(%rdi),%r10 ++ addq 56(%rdi),%r11 ++ ++ cmpq 128+16(%rsp),%rsi ++ ++ movq %rax,0(%rdi) ++ movq %rbx,8(%rdi) ++ movq %rcx,16(%rdi) ++ movq %rdx,24(%rdi) ++ movq %r8,32(%rdi) ++ movq %r9,40(%rdi) ++ movq %r10,48(%rdi) ++ movq %r11,56(%rdi) ++ jb L$loop_avx ++ ++ movq 152(%rsp),%rsi ++ ++ vzeroupper ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$epilogue_avx: ++ .byte 0xf3,0xc3 ++ ++ ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/vpaes-x86_64.S b/apple-x86_64/crypto/fipsmodule/vpaes-x86_64.S +new file mode 100644 +index 0000000..31cf329 +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/vpaes-x86_64.S +@@ -0,0 +1,1130 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++_vpaes_encrypt_core: ++ ++ movq %rdx,%r9 ++ movq $16,%r11 ++ movl 240(%rdx),%eax ++ movdqa %xmm9,%xmm1 ++ movdqa L$k_ipt(%rip),%xmm2 ++ pandn %xmm0,%xmm1 ++ movdqu (%r9),%xmm5 ++ psrld $4,%xmm1 ++ pand %xmm9,%xmm0 ++.byte 102,15,56,0,208 ++ movdqa L$k_ipt+16(%rip),%xmm0 ++.byte 102,15,56,0,193 ++ pxor %xmm5,%xmm2 ++ addq $16,%r9 ++ pxor %xmm2,%xmm0 ++ leaq L$k_mc_backward(%rip),%r10 ++ jmp L$enc_entry ++ ++.p2align 4 ++L$enc_loop: ++ ++ movdqa %xmm13,%xmm4 ++ movdqa %xmm12,%xmm0 ++.byte 102,15,56,0,226 ++.byte 102,15,56,0,195 ++ pxor %xmm5,%xmm4 ++ movdqa %xmm15,%xmm5 ++ pxor %xmm4,%xmm0 ++ movdqa -64(%r11,%r10,1),%xmm1 ++.byte 102,15,56,0,234 ++ movdqa (%r11,%r10,1),%xmm4 ++ movdqa %xmm14,%xmm2 ++.byte 102,15,56,0,211 ++ movdqa %xmm0,%xmm3 ++ pxor %xmm5,%xmm2 ++.byte 102,15,56,0,193 ++ addq $16,%r9 ++ pxor %xmm2,%xmm0 ++.byte 102,15,56,0,220 ++ addq $16,%r11 ++ pxor %xmm0,%xmm3 ++.byte 102,15,56,0,193 ++ andq $0x30,%r11 ++ subq $1,%rax ++ pxor %xmm3,%xmm0 ++ ++L$enc_entry: ++ ++ movdqa %xmm9,%xmm1 ++ movdqa %xmm11,%xmm5 ++ pandn %xmm0,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm9,%xmm0 ++.byte 102,15,56,0,232 ++ movdqa %xmm10,%xmm3 ++ pxor %xmm1,%xmm0 ++.byte 102,15,56,0,217 ++ movdqa %xmm10,%xmm4 ++ pxor %xmm5,%xmm3 ++.byte 102,15,56,0,224 ++ movdqa %xmm10,%xmm2 ++ pxor %xmm5,%xmm4 ++.byte 102,15,56,0,211 ++ movdqa %xmm10,%xmm3 ++ pxor %xmm0,%xmm2 ++.byte 102,15,56,0,220 ++ movdqu (%r9),%xmm5 ++ pxor %xmm1,%xmm3 ++ jnz L$enc_loop ++ ++ ++ movdqa -96(%r10),%xmm4 ++ movdqa -80(%r10),%xmm0 ++.byte 102,15,56,0,226 ++ pxor %xmm5,%xmm4 ++.byte 102,15,56,0,195 ++ movdqa 64(%r11,%r10,1),%xmm1 ++ pxor %xmm4,%xmm0 ++.byte 102,15,56,0,193 ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++_vpaes_encrypt_core_2x: ++ ++ movq %rdx,%r9 ++ movq $16,%r11 ++ movl 240(%rdx),%eax ++ movdqa %xmm9,%xmm1 ++ movdqa %xmm9,%xmm7 ++ movdqa L$k_ipt(%rip),%xmm2 ++ movdqa %xmm2,%xmm8 ++ pandn %xmm0,%xmm1 ++ pandn %xmm6,%xmm7 ++ movdqu (%r9),%xmm5 ++ ++ psrld $4,%xmm1 ++ psrld $4,%xmm7 ++ pand %xmm9,%xmm0 ++ pand %xmm9,%xmm6 ++.byte 102,15,56,0,208 ++.byte 102,68,15,56,0,198 ++ movdqa L$k_ipt+16(%rip),%xmm0 ++ movdqa %xmm0,%xmm6 ++.byte 102,15,56,0,193 ++.byte 102,15,56,0,247 ++ pxor %xmm5,%xmm2 ++ pxor %xmm5,%xmm8 ++ addq $16,%r9 ++ pxor %xmm2,%xmm0 ++ pxor %xmm8,%xmm6 ++ leaq L$k_mc_backward(%rip),%r10 ++ jmp L$enc2x_entry ++ ++.p2align 4 ++L$enc2x_loop: ++ ++ movdqa L$k_sb1(%rip),%xmm4 ++ movdqa L$k_sb1+16(%rip),%xmm0 ++ movdqa %xmm4,%xmm12 ++ movdqa %xmm0,%xmm6 ++.byte 102,15,56,0,226 ++.byte 102,69,15,56,0,224 ++.byte 102,15,56,0,195 ++.byte 102,65,15,56,0,243 ++ pxor %xmm5,%xmm4 ++ pxor %xmm5,%xmm12 ++ movdqa L$k_sb2(%rip),%xmm5 ++ movdqa %xmm5,%xmm13 ++ pxor %xmm4,%xmm0 ++ pxor %xmm12,%xmm6 ++ movdqa -64(%r11,%r10,1),%xmm1 ++ ++.byte 102,15,56,0,234 ++.byte 102,69,15,56,0,232 ++ movdqa (%r11,%r10,1),%xmm4 ++ ++ movdqa L$k_sb2+16(%rip),%xmm2 ++ movdqa %xmm2,%xmm8 ++.byte 102,15,56,0,211 ++.byte 102,69,15,56,0,195 ++ movdqa %xmm0,%xmm3 ++ movdqa %xmm6,%xmm11 ++ pxor %xmm5,%xmm2 ++ pxor %xmm13,%xmm8 ++.byte 102,15,56,0,193 ++.byte 102,15,56,0,241 ++ addq $16,%r9 ++ pxor %xmm2,%xmm0 ++ pxor %xmm8,%xmm6 ++.byte 102,15,56,0,220 ++.byte 102,68,15,56,0,220 ++ addq $16,%r11 ++ pxor %xmm0,%xmm3 ++ pxor %xmm6,%xmm11 ++.byte 102,15,56,0,193 ++.byte 102,15,56,0,241 ++ andq $0x30,%r11 ++ subq $1,%rax ++ pxor %xmm3,%xmm0 ++ pxor %xmm11,%xmm6 ++ ++L$enc2x_entry: ++ ++ movdqa %xmm9,%xmm1 ++ movdqa %xmm9,%xmm7 ++ movdqa L$k_inv+16(%rip),%xmm5 ++ movdqa %xmm5,%xmm13 ++ pandn %xmm0,%xmm1 ++ pandn %xmm6,%xmm7 ++ psrld $4,%xmm1 ++ psrld $4,%xmm7 ++ pand %xmm9,%xmm0 ++ pand %xmm9,%xmm6 ++.byte 102,15,56,0,232 ++.byte 102,68,15,56,0,238 ++ movdqa %xmm10,%xmm3 ++ movdqa %xmm10,%xmm11 ++ pxor %xmm1,%xmm0 ++ pxor %xmm7,%xmm6 ++.byte 102,15,56,0,217 ++.byte 102,68,15,56,0,223 ++ movdqa %xmm10,%xmm4 ++ movdqa %xmm10,%xmm12 ++ pxor %xmm5,%xmm3 ++ pxor %xmm13,%xmm11 ++.byte 102,15,56,0,224 ++.byte 102,68,15,56,0,230 ++ movdqa %xmm10,%xmm2 ++ movdqa %xmm10,%xmm8 ++ pxor %xmm5,%xmm4 ++ pxor %xmm13,%xmm12 ++.byte 102,15,56,0,211 ++.byte 102,69,15,56,0,195 ++ movdqa %xmm10,%xmm3 ++ movdqa %xmm10,%xmm11 ++ pxor %xmm0,%xmm2 ++ pxor %xmm6,%xmm8 ++.byte 102,15,56,0,220 ++.byte 102,69,15,56,0,220 ++ movdqu (%r9),%xmm5 ++ ++ pxor %xmm1,%xmm3 ++ pxor %xmm7,%xmm11 ++ jnz L$enc2x_loop ++ ++ ++ movdqa -96(%r10),%xmm4 ++ movdqa -80(%r10),%xmm0 ++ movdqa %xmm4,%xmm12 ++ movdqa %xmm0,%xmm6 ++.byte 102,15,56,0,226 ++.byte 102,69,15,56,0,224 ++ pxor %xmm5,%xmm4 ++ pxor %xmm5,%xmm12 ++.byte 102,15,56,0,195 ++.byte 102,65,15,56,0,243 ++ movdqa 64(%r11,%r10,1),%xmm1 ++ ++ pxor %xmm4,%xmm0 ++ pxor %xmm12,%xmm6 ++.byte 102,15,56,0,193 ++.byte 102,15,56,0,241 ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++_vpaes_decrypt_core: ++ ++ movq %rdx,%r9 ++ movl 240(%rdx),%eax ++ movdqa %xmm9,%xmm1 ++ movdqa L$k_dipt(%rip),%xmm2 ++ pandn %xmm0,%xmm1 ++ movq %rax,%r11 ++ psrld $4,%xmm1 ++ movdqu (%r9),%xmm5 ++ shlq $4,%r11 ++ pand %xmm9,%xmm0 ++.byte 102,15,56,0,208 ++ movdqa L$k_dipt+16(%rip),%xmm0 ++ xorq $0x30,%r11 ++ leaq L$k_dsbd(%rip),%r10 ++.byte 102,15,56,0,193 ++ andq $0x30,%r11 ++ pxor %xmm5,%xmm2 ++ movdqa L$k_mc_forward+48(%rip),%xmm5 ++ pxor %xmm2,%xmm0 ++ addq $16,%r9 ++ addq %r10,%r11 ++ jmp L$dec_entry ++ ++.p2align 4 ++L$dec_loop: ++ ++ ++ ++ movdqa -32(%r10),%xmm4 ++ movdqa -16(%r10),%xmm1 ++.byte 102,15,56,0,226 ++.byte 102,15,56,0,203 ++ pxor %xmm4,%xmm0 ++ movdqa 0(%r10),%xmm4 ++ pxor %xmm1,%xmm0 ++ movdqa 16(%r10),%xmm1 ++ ++.byte 102,15,56,0,226 ++.byte 102,15,56,0,197 ++.byte 102,15,56,0,203 ++ pxor %xmm4,%xmm0 ++ movdqa 32(%r10),%xmm4 ++ pxor %xmm1,%xmm0 ++ movdqa 48(%r10),%xmm1 ++ ++.byte 102,15,56,0,226 ++.byte 102,15,56,0,197 ++.byte 102,15,56,0,203 ++ pxor %xmm4,%xmm0 ++ movdqa 64(%r10),%xmm4 ++ pxor %xmm1,%xmm0 ++ movdqa 80(%r10),%xmm1 ++ ++.byte 102,15,56,0,226 ++.byte 102,15,56,0,197 ++.byte 102,15,56,0,203 ++ pxor %xmm4,%xmm0 ++ addq $16,%r9 ++.byte 102,15,58,15,237,12 ++ pxor %xmm1,%xmm0 ++ subq $1,%rax ++ ++L$dec_entry: ++ ++ movdqa %xmm9,%xmm1 ++ pandn %xmm0,%xmm1 ++ movdqa %xmm11,%xmm2 ++ psrld $4,%xmm1 ++ pand %xmm9,%xmm0 ++.byte 102,15,56,0,208 ++ movdqa %xmm10,%xmm3 ++ pxor %xmm1,%xmm0 ++.byte 102,15,56,0,217 ++ movdqa %xmm10,%xmm4 ++ pxor %xmm2,%xmm3 ++.byte 102,15,56,0,224 ++ pxor %xmm2,%xmm4 ++ movdqa %xmm10,%xmm2 ++.byte 102,15,56,0,211 ++ movdqa %xmm10,%xmm3 ++ pxor %xmm0,%xmm2 ++.byte 102,15,56,0,220 ++ movdqu (%r9),%xmm0 ++ pxor %xmm1,%xmm3 ++ jnz L$dec_loop ++ ++ ++ movdqa 96(%r10),%xmm4 ++.byte 102,15,56,0,226 ++ pxor %xmm0,%xmm4 ++ movdqa 112(%r10),%xmm0 ++ movdqa -352(%r11),%xmm2 ++.byte 102,15,56,0,195 ++ pxor %xmm4,%xmm0 ++.byte 102,15,56,0,194 ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++_vpaes_schedule_core: ++ ++ ++ ++ ++ ++ ++ call _vpaes_preheat ++ movdqa L$k_rcon(%rip),%xmm8 ++ movdqu (%rdi),%xmm0 ++ ++ ++ movdqa %xmm0,%xmm3 ++ leaq L$k_ipt(%rip),%r11 ++ call _vpaes_schedule_transform ++ movdqa %xmm0,%xmm7 ++ ++ leaq L$k_sr(%rip),%r10 ++ testq %rcx,%rcx ++ jnz L$schedule_am_decrypting ++ ++ ++ movdqu %xmm0,(%rdx) ++ jmp L$schedule_go ++ ++L$schedule_am_decrypting: ++ ++ movdqa (%r8,%r10,1),%xmm1 ++.byte 102,15,56,0,217 ++ movdqu %xmm3,(%rdx) ++ xorq $0x30,%r8 ++ ++L$schedule_go: ++ cmpl $192,%esi ++ ja L$schedule_256 ++ je L$schedule_192 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++L$schedule_128: ++ movl $10,%esi ++ ++L$oop_schedule_128: ++ call _vpaes_schedule_round ++ decq %rsi ++ jz L$schedule_mangle_last ++ call _vpaes_schedule_mangle ++ jmp L$oop_schedule_128 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++L$schedule_192: ++ movdqu 8(%rdi),%xmm0 ++ call _vpaes_schedule_transform ++ movdqa %xmm0,%xmm6 ++ pxor %xmm4,%xmm4 ++ movhlps %xmm4,%xmm6 ++ movl $4,%esi ++ ++L$oop_schedule_192: ++ call _vpaes_schedule_round ++.byte 102,15,58,15,198,8 ++ call _vpaes_schedule_mangle ++ call _vpaes_schedule_192_smear ++ call _vpaes_schedule_mangle ++ call _vpaes_schedule_round ++ decq %rsi ++ jz L$schedule_mangle_last ++ call _vpaes_schedule_mangle ++ call _vpaes_schedule_192_smear ++ jmp L$oop_schedule_192 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++L$schedule_256: ++ movdqu 16(%rdi),%xmm0 ++ call _vpaes_schedule_transform ++ movl $7,%esi ++ ++L$oop_schedule_256: ++ call _vpaes_schedule_mangle ++ movdqa %xmm0,%xmm6 ++ ++ ++ call _vpaes_schedule_round ++ decq %rsi ++ jz L$schedule_mangle_last ++ call _vpaes_schedule_mangle ++ ++ ++ pshufd $0xFF,%xmm0,%xmm0 ++ movdqa %xmm7,%xmm5 ++ movdqa %xmm6,%xmm7 ++ call _vpaes_schedule_low_round ++ movdqa %xmm5,%xmm7 ++ ++ jmp L$oop_schedule_256 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++L$schedule_mangle_last: ++ ++ leaq L$k_deskew(%rip),%r11 ++ testq %rcx,%rcx ++ jnz L$schedule_mangle_last_dec ++ ++ ++ movdqa (%r8,%r10,1),%xmm1 ++.byte 102,15,56,0,193 ++ leaq L$k_opt(%rip),%r11 ++ addq $32,%rdx ++ ++L$schedule_mangle_last_dec: ++ addq $-16,%rdx ++ pxor L$k_s63(%rip),%xmm0 ++ call _vpaes_schedule_transform ++ movdqu %xmm0,(%rdx) ++ ++ ++ pxor %xmm0,%xmm0 ++ pxor %xmm1,%xmm1 ++ pxor %xmm2,%xmm2 ++ pxor %xmm3,%xmm3 ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ pxor %xmm6,%xmm6 ++ pxor %xmm7,%xmm7 ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++_vpaes_schedule_192_smear: ++ ++ pshufd $0x80,%xmm6,%xmm1 ++ pshufd $0xFE,%xmm7,%xmm0 ++ pxor %xmm1,%xmm6 ++ pxor %xmm1,%xmm1 ++ pxor %xmm0,%xmm6 ++ movdqa %xmm6,%xmm0 ++ movhlps %xmm1,%xmm6 ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++_vpaes_schedule_round: ++ ++ ++ pxor %xmm1,%xmm1 ++.byte 102,65,15,58,15,200,15 ++.byte 102,69,15,58,15,192,15 ++ pxor %xmm1,%xmm7 ++ ++ ++ pshufd $0xFF,%xmm0,%xmm0 ++.byte 102,15,58,15,192,1 ++ ++ ++ ++ ++_vpaes_schedule_low_round: ++ ++ movdqa %xmm7,%xmm1 ++ pslldq $4,%xmm7 ++ pxor %xmm1,%xmm7 ++ movdqa %xmm7,%xmm1 ++ pslldq $8,%xmm7 ++ pxor %xmm1,%xmm7 ++ pxor L$k_s63(%rip),%xmm7 ++ ++ ++ movdqa %xmm9,%xmm1 ++ pandn %xmm0,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm9,%xmm0 ++ movdqa %xmm11,%xmm2 ++.byte 102,15,56,0,208 ++ pxor %xmm1,%xmm0 ++ movdqa %xmm10,%xmm3 ++.byte 102,15,56,0,217 ++ pxor %xmm2,%xmm3 ++ movdqa %xmm10,%xmm4 ++.byte 102,15,56,0,224 ++ pxor %xmm2,%xmm4 ++ movdqa %xmm10,%xmm2 ++.byte 102,15,56,0,211 ++ pxor %xmm0,%xmm2 ++ movdqa %xmm10,%xmm3 ++.byte 102,15,56,0,220 ++ pxor %xmm1,%xmm3 ++ movdqa %xmm13,%xmm4 ++.byte 102,15,56,0,226 ++ movdqa %xmm12,%xmm0 ++.byte 102,15,56,0,195 ++ pxor %xmm4,%xmm0 ++ ++ ++ pxor %xmm7,%xmm0 ++ movdqa %xmm0,%xmm7 ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++_vpaes_schedule_transform: ++ ++ movdqa %xmm9,%xmm1 ++ pandn %xmm0,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm9,%xmm0 ++ movdqa (%r11),%xmm2 ++.byte 102,15,56,0,208 ++ movdqa 16(%r11),%xmm0 ++.byte 102,15,56,0,193 ++ pxor %xmm2,%xmm0 ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++_vpaes_schedule_mangle: ++ ++ movdqa %xmm0,%xmm4 ++ movdqa L$k_mc_forward(%rip),%xmm5 ++ testq %rcx,%rcx ++ jnz L$schedule_mangle_dec ++ ++ ++ addq $16,%rdx ++ pxor L$k_s63(%rip),%xmm4 ++.byte 102,15,56,0,229 ++ movdqa %xmm4,%xmm3 ++.byte 102,15,56,0,229 ++ pxor %xmm4,%xmm3 ++.byte 102,15,56,0,229 ++ pxor %xmm4,%xmm3 ++ ++ jmp L$schedule_mangle_both ++.p2align 4 ++L$schedule_mangle_dec: ++ ++ leaq L$k_dksd(%rip),%r11 ++ movdqa %xmm9,%xmm1 ++ pandn %xmm4,%xmm1 ++ psrld $4,%xmm1 ++ pand %xmm9,%xmm4 ++ ++ movdqa 0(%r11),%xmm2 ++.byte 102,15,56,0,212 ++ movdqa 16(%r11),%xmm3 ++.byte 102,15,56,0,217 ++ pxor %xmm2,%xmm3 ++.byte 102,15,56,0,221 ++ ++ movdqa 32(%r11),%xmm2 ++.byte 102,15,56,0,212 ++ pxor %xmm3,%xmm2 ++ movdqa 48(%r11),%xmm3 ++.byte 102,15,56,0,217 ++ pxor %xmm2,%xmm3 ++.byte 102,15,56,0,221 ++ ++ movdqa 64(%r11),%xmm2 ++.byte 102,15,56,0,212 ++ pxor %xmm3,%xmm2 ++ movdqa 80(%r11),%xmm3 ++.byte 102,15,56,0,217 ++ pxor %xmm2,%xmm3 ++.byte 102,15,56,0,221 ++ ++ movdqa 96(%r11),%xmm2 ++.byte 102,15,56,0,212 ++ pxor %xmm3,%xmm2 ++ movdqa 112(%r11),%xmm3 ++.byte 102,15,56,0,217 ++ pxor %xmm2,%xmm3 ++ ++ addq $-16,%rdx ++ ++L$schedule_mangle_both: ++ movdqa (%r8,%r10,1),%xmm1 ++.byte 102,15,56,0,217 ++ addq $-16,%r8 ++ andq $0x30,%r8 ++ movdqu %xmm3,(%rdx) ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++.globl _vpaes_set_encrypt_key ++.private_extern _vpaes_set_encrypt_key ++ ++.p2align 4 ++_vpaes_set_encrypt_key: ++ ++#ifdef BORINGSSL_DISPATCH_TEST ++ ++ movb $1,_BORINGSSL_function_hit+5(%rip) ++#endif ++ ++ movl %esi,%eax ++ shrl $5,%eax ++ addl $5,%eax ++ movl %eax,240(%rdx) ++ ++ movl $0,%ecx ++ movl $0x30,%r8d ++ call _vpaes_schedule_core ++ xorl %eax,%eax ++ .byte 0xf3,0xc3 ++ ++ ++ ++.globl _vpaes_set_decrypt_key ++.private_extern _vpaes_set_decrypt_key ++ ++.p2align 4 ++_vpaes_set_decrypt_key: ++ ++ movl %esi,%eax ++ shrl $5,%eax ++ addl $5,%eax ++ movl %eax,240(%rdx) ++ shll $4,%eax ++ leaq 16(%rdx,%rax,1),%rdx ++ ++ movl $1,%ecx ++ movl %esi,%r8d ++ shrl $1,%r8d ++ andl $32,%r8d ++ xorl $32,%r8d ++ call _vpaes_schedule_core ++ xorl %eax,%eax ++ .byte 0xf3,0xc3 ++ ++ ++ ++.globl _vpaes_encrypt ++.private_extern _vpaes_encrypt ++ ++.p2align 4 ++_vpaes_encrypt: ++ ++#ifdef BORINGSSL_DISPATCH_TEST ++ ++ movb $1,_BORINGSSL_function_hit+4(%rip) ++#endif ++ movdqu (%rdi),%xmm0 ++ call _vpaes_preheat ++ call _vpaes_encrypt_core ++ movdqu %xmm0,(%rsi) ++ .byte 0xf3,0xc3 ++ ++ ++ ++.globl _vpaes_decrypt ++.private_extern _vpaes_decrypt ++ ++.p2align 4 ++_vpaes_decrypt: ++ ++ movdqu (%rdi),%xmm0 ++ call _vpaes_preheat ++ call _vpaes_decrypt_core ++ movdqu %xmm0,(%rsi) ++ .byte 0xf3,0xc3 ++ ++ ++.globl _vpaes_cbc_encrypt ++.private_extern _vpaes_cbc_encrypt ++ ++.p2align 4 ++_vpaes_cbc_encrypt: ++ ++ xchgq %rcx,%rdx ++ subq $16,%rcx ++ jc L$cbc_abort ++ movdqu (%r8),%xmm6 ++ subq %rdi,%rsi ++ call _vpaes_preheat ++ cmpl $0,%r9d ++ je L$cbc_dec_loop ++ jmp L$cbc_enc_loop ++.p2align 4 ++L$cbc_enc_loop: ++ movdqu (%rdi),%xmm0 ++ pxor %xmm6,%xmm0 ++ call _vpaes_encrypt_core ++ movdqa %xmm0,%xmm6 ++ movdqu %xmm0,(%rsi,%rdi,1) ++ leaq 16(%rdi),%rdi ++ subq $16,%rcx ++ jnc L$cbc_enc_loop ++ jmp L$cbc_done ++.p2align 4 ++L$cbc_dec_loop: ++ movdqu (%rdi),%xmm0 ++ movdqa %xmm0,%xmm7 ++ call _vpaes_decrypt_core ++ pxor %xmm6,%xmm0 ++ movdqa %xmm7,%xmm6 ++ movdqu %xmm0,(%rsi,%rdi,1) ++ leaq 16(%rdi),%rdi ++ subq $16,%rcx ++ jnc L$cbc_dec_loop ++L$cbc_done: ++ movdqu %xmm6,(%r8) ++L$cbc_abort: ++ .byte 0xf3,0xc3 ++ ++ ++.globl _vpaes_ctr32_encrypt_blocks ++.private_extern _vpaes_ctr32_encrypt_blocks ++ ++.p2align 4 ++_vpaes_ctr32_encrypt_blocks: ++ ++ ++ xchgq %rcx,%rdx ++ testq %rcx,%rcx ++ jz L$ctr32_abort ++ movdqu (%r8),%xmm0 ++ movdqa L$ctr_add_one(%rip),%xmm8 ++ subq %rdi,%rsi ++ call _vpaes_preheat ++ movdqa %xmm0,%xmm6 ++ pshufb L$rev_ctr(%rip),%xmm6 ++ ++ testq $1,%rcx ++ jz L$ctr32_prep_loop ++ ++ ++ ++ movdqu (%rdi),%xmm7 ++ call _vpaes_encrypt_core ++ pxor %xmm7,%xmm0 ++ paddd %xmm8,%xmm6 ++ movdqu %xmm0,(%rsi,%rdi,1) ++ subq $1,%rcx ++ leaq 16(%rdi),%rdi ++ jz L$ctr32_done ++ ++L$ctr32_prep_loop: ++ ++ ++ movdqa %xmm6,%xmm14 ++ movdqa %xmm6,%xmm15 ++ paddd %xmm8,%xmm15 ++ ++L$ctr32_loop: ++ movdqa L$rev_ctr(%rip),%xmm1 ++ movdqa %xmm14,%xmm0 ++ movdqa %xmm15,%xmm6 ++.byte 102,15,56,0,193 ++.byte 102,15,56,0,241 ++ call _vpaes_encrypt_core_2x ++ movdqu (%rdi),%xmm1 ++ movdqu 16(%rdi),%xmm2 ++ movdqa L$ctr_add_two(%rip),%xmm3 ++ pxor %xmm1,%xmm0 ++ pxor %xmm2,%xmm6 ++ paddd %xmm3,%xmm14 ++ paddd %xmm3,%xmm15 ++ movdqu %xmm0,(%rsi,%rdi,1) ++ movdqu %xmm6,16(%rsi,%rdi,1) ++ subq $2,%rcx ++ leaq 32(%rdi),%rdi ++ jnz L$ctr32_loop ++ ++L$ctr32_done: ++L$ctr32_abort: ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 4 ++_vpaes_preheat: ++ ++ leaq L$k_s0F(%rip),%r10 ++ movdqa -32(%r10),%xmm10 ++ movdqa -16(%r10),%xmm11 ++ movdqa 0(%r10),%xmm9 ++ movdqa 48(%r10),%xmm13 ++ movdqa 64(%r10),%xmm12 ++ movdqa 80(%r10),%xmm15 ++ movdqa 96(%r10),%xmm14 ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++ ++ ++.p2align 6 ++_vpaes_consts: ++L$k_inv: ++.quad 0x0E05060F0D080180, 0x040703090A0B0C02 ++.quad 0x01040A060F0B0780, 0x030D0E0C02050809 ++ ++L$k_s0F: ++.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F ++ ++L$k_ipt: ++.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 ++.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 ++ ++L$k_sb1: ++.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 ++.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF ++L$k_sb2: ++.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD ++.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A ++L$k_sbo: ++.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 ++.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA ++ ++L$k_mc_forward: ++.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 ++.quad 0x080B0A0904070605, 0x000302010C0F0E0D ++.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 ++.quad 0x000302010C0F0E0D, 0x080B0A0904070605 ++ ++L$k_mc_backward: ++.quad 0x0605040702010003, 0x0E0D0C0F0A09080B ++.quad 0x020100030E0D0C0F, 0x0A09080B06050407 ++.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 ++.quad 0x0A09080B06050407, 0x020100030E0D0C0F ++ ++L$k_sr: ++.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 ++.quad 0x030E09040F0A0500, 0x0B06010C07020D08 ++.quad 0x0F060D040B020900, 0x070E050C030A0108 ++.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 ++ ++L$k_rcon: ++.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 ++ ++L$k_s63: ++.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B ++ ++L$k_opt: ++.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 ++.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 ++ ++L$k_deskew: ++.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A ++.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 ++ ++ ++ ++ ++ ++L$k_dksd: ++.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 ++.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E ++L$k_dksb: ++.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 ++.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 ++L$k_dkse: ++.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 ++.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 ++L$k_dks9: ++.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC ++.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE ++ ++ ++ ++ ++ ++L$k_dipt: ++.quad 0x0F505B040B545F00, 0x154A411E114E451A ++.quad 0x86E383E660056500, 0x12771772F491F194 ++ ++L$k_dsb9: ++.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 ++.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 ++L$k_dsbd: ++.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 ++.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 ++L$k_dsbb: ++.quad 0xD022649296B44200, 0x602646F6B0F2D404 ++.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B ++L$k_dsbe: ++.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 ++.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 ++L$k_dsbo: ++.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D ++.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C ++ ++ ++L$rev_ctr: ++.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 ++ ++ ++L$ctr_add_one: ++.quad 0x0000000000000000, 0x0000000100000000 ++L$ctr_add_two: ++.quad 0x0000000000000000, 0x0000000200000000 ++ ++.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 ++.p2align 6 ++ ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/x86_64-mont.S b/apple-x86_64/crypto/fipsmodule/x86_64-mont.S +new file mode 100644 +index 0000000..d354b2d +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/x86_64-mont.S +@@ -0,0 +1,1256 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++ ++.globl _bn_mul_mont ++.private_extern _bn_mul_mont ++ ++.p2align 4 ++_bn_mul_mont: ++ ++ movl %r9d,%r9d ++ movq %rsp,%rax ++ ++ testl $3,%r9d ++ jnz L$mul_enter ++ cmpl $8,%r9d ++ jb L$mul_enter ++ leaq _OPENSSL_ia32cap_P(%rip),%r11 ++ movl 8(%r11),%r11d ++ cmpq %rsi,%rdx ++ jne L$mul4x_enter ++ testl $7,%r9d ++ jz L$sqr8x_enter ++ jmp L$mul4x_enter ++ ++.p2align 4 ++L$mul_enter: ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ ++ negq %r9 ++ movq %rsp,%r11 ++ leaq -16(%rsp,%r9,8),%r10 ++ negq %r9 ++ andq $-1024,%r10 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ subq %r10,%r11 ++ andq $-4096,%r11 ++ leaq (%r10,%r11,1),%rsp ++ movq (%rsp),%r11 ++ cmpq %r10,%rsp ++ ja L$mul_page_walk ++ jmp L$mul_page_walk_done ++ ++.p2align 4 ++L$mul_page_walk: ++ leaq -4096(%rsp),%rsp ++ movq (%rsp),%r11 ++ cmpq %r10,%rsp ++ ja L$mul_page_walk ++L$mul_page_walk_done: ++ ++ movq %rax,8(%rsp,%r9,8) ++ ++L$mul_body: ++ movq %rdx,%r12 ++ movq (%r8),%r8 ++ movq (%r12),%rbx ++ movq (%rsi),%rax ++ ++ xorq %r14,%r14 ++ xorq %r15,%r15 ++ ++ movq %r8,%rbp ++ mulq %rbx ++ movq %rax,%r10 ++ movq (%rcx),%rax ++ ++ imulq %r10,%rbp ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r10 ++ movq 8(%rsi),%rax ++ adcq $0,%rdx ++ movq %rdx,%r13 ++ ++ leaq 1(%r15),%r15 ++ jmp L$1st_enter ++ ++.p2align 4 ++L$1st: ++ addq %rax,%r13 ++ movq (%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r11,%r13 ++ movq %r10,%r11 ++ adcq $0,%rdx ++ movq %r13,-16(%rsp,%r15,8) ++ movq %rdx,%r13 ++ ++L$1st_enter: ++ mulq %rbx ++ addq %rax,%r11 ++ movq (%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ leaq 1(%r15),%r15 ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ cmpq %r9,%r15 ++ jne L$1st ++ ++ addq %rax,%r13 ++ movq (%rsi),%rax ++ adcq $0,%rdx ++ addq %r11,%r13 ++ adcq $0,%rdx ++ movq %r13,-16(%rsp,%r15,8) ++ movq %rdx,%r13 ++ movq %r10,%r11 ++ ++ xorq %rdx,%rdx ++ addq %r11,%r13 ++ adcq $0,%rdx ++ movq %r13,-8(%rsp,%r9,8) ++ movq %rdx,(%rsp,%r9,8) ++ ++ leaq 1(%r14),%r14 ++ jmp L$outer ++.p2align 4 ++L$outer: ++ movq (%r12,%r14,8),%rbx ++ xorq %r15,%r15 ++ movq %r8,%rbp ++ movq (%rsp),%r10 ++ mulq %rbx ++ addq %rax,%r10 ++ movq (%rcx),%rax ++ adcq $0,%rdx ++ ++ imulq %r10,%rbp ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r10 ++ movq 8(%rsi),%rax ++ adcq $0,%rdx ++ movq 8(%rsp),%r10 ++ movq %rdx,%r13 ++ ++ leaq 1(%r15),%r15 ++ jmp L$inner_enter ++ ++.p2align 4 ++L$inner: ++ addq %rax,%r13 ++ movq (%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ movq (%rsp,%r15,8),%r10 ++ adcq $0,%rdx ++ movq %r13,-16(%rsp,%r15,8) ++ movq %rdx,%r13 ++ ++L$inner_enter: ++ mulq %rbx ++ addq %rax,%r11 ++ movq (%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r11,%r10 ++ movq %rdx,%r11 ++ adcq $0,%r11 ++ leaq 1(%r15),%r15 ++ ++ mulq %rbp ++ cmpq %r9,%r15 ++ jne L$inner ++ ++ addq %rax,%r13 ++ movq (%rsi),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ movq (%rsp,%r15,8),%r10 ++ adcq $0,%rdx ++ movq %r13,-16(%rsp,%r15,8) ++ movq %rdx,%r13 ++ ++ xorq %rdx,%rdx ++ addq %r11,%r13 ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %r13,-8(%rsp,%r9,8) ++ movq %rdx,(%rsp,%r9,8) ++ ++ leaq 1(%r14),%r14 ++ cmpq %r9,%r14 ++ jb L$outer ++ ++ xorq %r14,%r14 ++ movq (%rsp),%rax ++ movq %r9,%r15 ++ ++.p2align 4 ++L$sub: sbbq (%rcx,%r14,8),%rax ++ movq %rax,(%rdi,%r14,8) ++ movq 8(%rsp,%r14,8),%rax ++ leaq 1(%r14),%r14 ++ decq %r15 ++ jnz L$sub ++ ++ sbbq $0,%rax ++ movq $-1,%rbx ++ xorq %rax,%rbx ++ xorq %r14,%r14 ++ movq %r9,%r15 ++ ++L$copy: ++ movq (%rdi,%r14,8),%rcx ++ movq (%rsp,%r14,8),%rdx ++ andq %rbx,%rcx ++ andq %rax,%rdx ++ movq %r9,(%rsp,%r14,8) ++ orq %rcx,%rdx ++ movq %rdx,(%rdi,%r14,8) ++ leaq 1(%r14),%r14 ++ subq $1,%r15 ++ jnz L$copy ++ ++ movq 8(%rsp,%r9,8),%rsi ++ ++ movq $1,%rax ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$mul_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 4 ++bn_mul4x_mont: ++ ++ movl %r9d,%r9d ++ movq %rsp,%rax ++ ++L$mul4x_enter: ++ andl $0x80100,%r11d ++ cmpl $0x80100,%r11d ++ je L$mulx4x_enter ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ ++ negq %r9 ++ movq %rsp,%r11 ++ leaq -32(%rsp,%r9,8),%r10 ++ negq %r9 ++ andq $-1024,%r10 ++ ++ subq %r10,%r11 ++ andq $-4096,%r11 ++ leaq (%r10,%r11,1),%rsp ++ movq (%rsp),%r11 ++ cmpq %r10,%rsp ++ ja L$mul4x_page_walk ++ jmp L$mul4x_page_walk_done ++ ++L$mul4x_page_walk: ++ leaq -4096(%rsp),%rsp ++ movq (%rsp),%r11 ++ cmpq %r10,%rsp ++ ja L$mul4x_page_walk ++L$mul4x_page_walk_done: ++ ++ movq %rax,8(%rsp,%r9,8) ++ ++L$mul4x_body: ++ movq %rdi,16(%rsp,%r9,8) ++ movq %rdx,%r12 ++ movq (%r8),%r8 ++ movq (%r12),%rbx ++ movq (%rsi),%rax ++ ++ xorq %r14,%r14 ++ xorq %r15,%r15 ++ ++ movq %r8,%rbp ++ mulq %rbx ++ movq %rax,%r10 ++ movq (%rcx),%rax ++ ++ imulq %r10,%rbp ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r10 ++ movq 8(%rsi),%rax ++ adcq $0,%rdx ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq 8(%rcx),%rax ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq 16(%rsi),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ leaq 4(%r15),%r15 ++ adcq $0,%rdx ++ movq %rdi,(%rsp) ++ movq %rdx,%r13 ++ jmp L$1st4x ++.p2align 4 ++L$1st4x: ++ mulq %rbx ++ addq %rax,%r10 ++ movq -16(%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq -8(%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %r13,-24(%rsp,%r15,8) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq -8(%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq (%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ adcq $0,%rdx ++ movq %rdi,-16(%rsp,%r15,8) ++ movq %rdx,%r13 ++ ++ mulq %rbx ++ addq %rax,%r10 ++ movq (%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq 8(%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %r13,-8(%rsp,%r15,8) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq 8(%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ leaq 4(%r15),%r15 ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq -16(%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ adcq $0,%rdx ++ movq %rdi,-32(%rsp,%r15,8) ++ movq %rdx,%r13 ++ cmpq %r9,%r15 ++ jb L$1st4x ++ ++ mulq %rbx ++ addq %rax,%r10 ++ movq -16(%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq -8(%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %r13,-24(%rsp,%r15,8) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq -8(%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq (%rsi),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ adcq $0,%rdx ++ movq %rdi,-16(%rsp,%r15,8) ++ movq %rdx,%r13 ++ ++ xorq %rdi,%rdi ++ addq %r10,%r13 ++ adcq $0,%rdi ++ movq %r13,-8(%rsp,%r15,8) ++ movq %rdi,(%rsp,%r15,8) ++ ++ leaq 1(%r14),%r14 ++.p2align 2 ++L$outer4x: ++ movq (%r12,%r14,8),%rbx ++ xorq %r15,%r15 ++ movq (%rsp),%r10 ++ movq %r8,%rbp ++ mulq %rbx ++ addq %rax,%r10 ++ movq (%rcx),%rax ++ adcq $0,%rdx ++ ++ imulq %r10,%rbp ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r10 ++ movq 8(%rsi),%rax ++ adcq $0,%rdx ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq 8(%rcx),%rax ++ adcq $0,%rdx ++ addq 8(%rsp),%r11 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq 16(%rsi),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ leaq 4(%r15),%r15 ++ adcq $0,%rdx ++ movq %rdi,(%rsp) ++ movq %rdx,%r13 ++ jmp L$inner4x ++.p2align 4 ++L$inner4x: ++ mulq %rbx ++ addq %rax,%r10 ++ movq -16(%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ addq -16(%rsp,%r15,8),%r10 ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq -8(%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %r13,-24(%rsp,%r15,8) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq -8(%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ addq -8(%rsp,%r15,8),%r11 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq (%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ adcq $0,%rdx ++ movq %rdi,-16(%rsp,%r15,8) ++ movq %rdx,%r13 ++ ++ mulq %rbx ++ addq %rax,%r10 ++ movq (%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ addq (%rsp,%r15,8),%r10 ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq 8(%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %r13,-8(%rsp,%r15,8) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq 8(%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ addq 8(%rsp,%r15,8),%r11 ++ adcq $0,%rdx ++ leaq 4(%r15),%r15 ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq -16(%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ adcq $0,%rdx ++ movq %rdi,-32(%rsp,%r15,8) ++ movq %rdx,%r13 ++ cmpq %r9,%r15 ++ jb L$inner4x ++ ++ mulq %rbx ++ addq %rax,%r10 ++ movq -16(%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ addq -16(%rsp,%r15,8),%r10 ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq -8(%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %r13,-24(%rsp,%r15,8) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq -8(%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ addq -8(%rsp,%r15,8),%r11 ++ adcq $0,%rdx ++ leaq 1(%r14),%r14 ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq (%rsi),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ adcq $0,%rdx ++ movq %rdi,-16(%rsp,%r15,8) ++ movq %rdx,%r13 ++ ++ xorq %rdi,%rdi ++ addq %r10,%r13 ++ adcq $0,%rdi ++ addq (%rsp,%r9,8),%r13 ++ adcq $0,%rdi ++ movq %r13,-8(%rsp,%r15,8) ++ movq %rdi,(%rsp,%r15,8) ++ ++ cmpq %r9,%r14 ++ jb L$outer4x ++ movq 16(%rsp,%r9,8),%rdi ++ leaq -4(%r9),%r15 ++ movq 0(%rsp),%rax ++ movq 8(%rsp),%rdx ++ shrq $2,%r15 ++ leaq (%rsp),%rsi ++ xorq %r14,%r14 ++ ++ subq 0(%rcx),%rax ++ movq 16(%rsi),%rbx ++ movq 24(%rsi),%rbp ++ sbbq 8(%rcx),%rdx ++ ++L$sub4x: ++ movq %rax,0(%rdi,%r14,8) ++ movq %rdx,8(%rdi,%r14,8) ++ sbbq 16(%rcx,%r14,8),%rbx ++ movq 32(%rsi,%r14,8),%rax ++ movq 40(%rsi,%r14,8),%rdx ++ sbbq 24(%rcx,%r14,8),%rbp ++ movq %rbx,16(%rdi,%r14,8) ++ movq %rbp,24(%rdi,%r14,8) ++ sbbq 32(%rcx,%r14,8),%rax ++ movq 48(%rsi,%r14,8),%rbx ++ movq 56(%rsi,%r14,8),%rbp ++ sbbq 40(%rcx,%r14,8),%rdx ++ leaq 4(%r14),%r14 ++ decq %r15 ++ jnz L$sub4x ++ ++ movq %rax,0(%rdi,%r14,8) ++ movq 32(%rsi,%r14,8),%rax ++ sbbq 16(%rcx,%r14,8),%rbx ++ movq %rdx,8(%rdi,%r14,8) ++ sbbq 24(%rcx,%r14,8),%rbp ++ movq %rbx,16(%rdi,%r14,8) ++ ++ sbbq $0,%rax ++ movq %rbp,24(%rdi,%r14,8) ++ pxor %xmm0,%xmm0 ++.byte 102,72,15,110,224 ++ pcmpeqd %xmm5,%xmm5 ++ pshufd $0,%xmm4,%xmm4 ++ movq %r9,%r15 ++ pxor %xmm4,%xmm5 ++ shrq $2,%r15 ++ xorl %eax,%eax ++ ++ jmp L$copy4x ++.p2align 4 ++L$copy4x: ++ movdqa (%rsp,%rax,1),%xmm1 ++ movdqu (%rdi,%rax,1),%xmm2 ++ pand %xmm4,%xmm1 ++ pand %xmm5,%xmm2 ++ movdqa 16(%rsp,%rax,1),%xmm3 ++ movdqa %xmm0,(%rsp,%rax,1) ++ por %xmm2,%xmm1 ++ movdqu 16(%rdi,%rax,1),%xmm2 ++ movdqu %xmm1,(%rdi,%rax,1) ++ pand %xmm4,%xmm3 ++ pand %xmm5,%xmm2 ++ movdqa %xmm0,16(%rsp,%rax,1) ++ por %xmm2,%xmm3 ++ movdqu %xmm3,16(%rdi,%rax,1) ++ leaq 32(%rax),%rax ++ decq %r15 ++ jnz L$copy4x ++ movq 8(%rsp,%r9,8),%rsi ++ ++ movq $1,%rax ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$mul4x_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++ ++.p2align 5 ++bn_sqr8x_mont: ++ ++ movq %rsp,%rax ++ ++L$sqr8x_enter: ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$sqr8x_prologue: ++ ++ movl %r9d,%r10d ++ shll $3,%r9d ++ shlq $3+2,%r10 ++ negq %r9 ++ ++ ++ ++ ++ ++ ++ leaq -64(%rsp,%r9,2),%r11 ++ movq %rsp,%rbp ++ movq (%r8),%r8 ++ subq %rsi,%r11 ++ andq $4095,%r11 ++ cmpq %r11,%r10 ++ jb L$sqr8x_sp_alt ++ subq %r11,%rbp ++ leaq -64(%rbp,%r9,2),%rbp ++ jmp L$sqr8x_sp_done ++ ++.p2align 5 ++L$sqr8x_sp_alt: ++ leaq 4096-64(,%r9,2),%r10 ++ leaq -64(%rbp,%r9,2),%rbp ++ subq %r10,%r11 ++ movq $0,%r10 ++ cmovcq %r10,%r11 ++ subq %r11,%rbp ++L$sqr8x_sp_done: ++ andq $-64,%rbp ++ movq %rsp,%r11 ++ subq %rbp,%r11 ++ andq $-4096,%r11 ++ leaq (%r11,%rbp,1),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$sqr8x_page_walk ++ jmp L$sqr8x_page_walk_done ++ ++.p2align 4 ++L$sqr8x_page_walk: ++ leaq -4096(%rsp),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$sqr8x_page_walk ++L$sqr8x_page_walk_done: ++ ++ movq %r9,%r10 ++ negq %r9 ++ ++ movq %r8,32(%rsp) ++ movq %rax,40(%rsp) ++ ++L$sqr8x_body: ++ ++.byte 102,72,15,110,209 ++ pxor %xmm0,%xmm0 ++.byte 102,72,15,110,207 ++.byte 102,73,15,110,218 ++ leaq _OPENSSL_ia32cap_P(%rip),%rax ++ movl 8(%rax),%eax ++ andl $0x80100,%eax ++ cmpl $0x80100,%eax ++ jne L$sqr8x_nox ++ ++ call _bn_sqrx8x_internal ++ ++ ++ ++ ++ leaq (%r8,%rcx,1),%rbx ++ movq %rcx,%r9 ++ movq %rcx,%rdx ++.byte 102,72,15,126,207 ++ sarq $3+2,%rcx ++ jmp L$sqr8x_sub ++ ++.p2align 5 ++L$sqr8x_nox: ++ call _bn_sqr8x_internal ++ ++ ++ ++ ++ leaq (%rdi,%r9,1),%rbx ++ movq %r9,%rcx ++ movq %r9,%rdx ++.byte 102,72,15,126,207 ++ sarq $3+2,%rcx ++ jmp L$sqr8x_sub ++ ++.p2align 5 ++L$sqr8x_sub: ++ movq 0(%rbx),%r12 ++ movq 8(%rbx),%r13 ++ movq 16(%rbx),%r14 ++ movq 24(%rbx),%r15 ++ leaq 32(%rbx),%rbx ++ sbbq 0(%rbp),%r12 ++ sbbq 8(%rbp),%r13 ++ sbbq 16(%rbp),%r14 ++ sbbq 24(%rbp),%r15 ++ leaq 32(%rbp),%rbp ++ movq %r12,0(%rdi) ++ movq %r13,8(%rdi) ++ movq %r14,16(%rdi) ++ movq %r15,24(%rdi) ++ leaq 32(%rdi),%rdi ++ incq %rcx ++ jnz L$sqr8x_sub ++ ++ sbbq $0,%rax ++ leaq (%rbx,%r9,1),%rbx ++ leaq (%rdi,%r9,1),%rdi ++ ++.byte 102,72,15,110,200 ++ pxor %xmm0,%xmm0 ++ pshufd $0,%xmm1,%xmm1 ++ movq 40(%rsp),%rsi ++ ++ jmp L$sqr8x_cond_copy ++ ++.p2align 5 ++L$sqr8x_cond_copy: ++ movdqa 0(%rbx),%xmm2 ++ movdqa 16(%rbx),%xmm3 ++ leaq 32(%rbx),%rbx ++ movdqu 0(%rdi),%xmm4 ++ movdqu 16(%rdi),%xmm5 ++ leaq 32(%rdi),%rdi ++ movdqa %xmm0,-32(%rbx) ++ movdqa %xmm0,-16(%rbx) ++ movdqa %xmm0,-32(%rbx,%rdx,1) ++ movdqa %xmm0,-16(%rbx,%rdx,1) ++ pcmpeqd %xmm1,%xmm0 ++ pand %xmm1,%xmm2 ++ pand %xmm1,%xmm3 ++ pand %xmm0,%xmm4 ++ pand %xmm0,%xmm5 ++ pxor %xmm0,%xmm0 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqu %xmm4,-32(%rdi) ++ movdqu %xmm5,-16(%rdi) ++ addq $32,%r9 ++ jnz L$sqr8x_cond_copy ++ ++ movq $1,%rax ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$sqr8x_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++bn_mulx4x_mont: ++ ++ movq %rsp,%rax ++ ++L$mulx4x_enter: ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$mulx4x_prologue: ++ ++ shll $3,%r9d ++ xorq %r10,%r10 ++ subq %r9,%r10 ++ movq (%r8),%r8 ++ leaq -72(%rsp,%r10,1),%rbp ++ andq $-128,%rbp ++ movq %rsp,%r11 ++ subq %rbp,%r11 ++ andq $-4096,%r11 ++ leaq (%r11,%rbp,1),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$mulx4x_page_walk ++ jmp L$mulx4x_page_walk_done ++ ++.p2align 4 ++L$mulx4x_page_walk: ++ leaq -4096(%rsp),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$mulx4x_page_walk ++L$mulx4x_page_walk_done: ++ ++ leaq (%rdx,%r9,1),%r10 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ movq %r9,0(%rsp) ++ shrq $5,%r9 ++ movq %r10,16(%rsp) ++ subq $1,%r9 ++ movq %r8,24(%rsp) ++ movq %rdi,32(%rsp) ++ movq %rax,40(%rsp) ++ ++ movq %r9,48(%rsp) ++ jmp L$mulx4x_body ++ ++.p2align 5 ++L$mulx4x_body: ++ leaq 8(%rdx),%rdi ++ movq (%rdx),%rdx ++ leaq 64+32(%rsp),%rbx ++ movq %rdx,%r9 ++ ++ mulxq 0(%rsi),%r8,%rax ++ mulxq 8(%rsi),%r11,%r14 ++ addq %rax,%r11 ++ movq %rdi,8(%rsp) ++ mulxq 16(%rsi),%r12,%r13 ++ adcq %r14,%r12 ++ adcq $0,%r13 ++ ++ movq %r8,%rdi ++ imulq 24(%rsp),%r8 ++ xorq %rbp,%rbp ++ ++ mulxq 24(%rsi),%rax,%r14 ++ movq %r8,%rdx ++ leaq 32(%rsi),%rsi ++ adcxq %rax,%r13 ++ adcxq %rbp,%r14 ++ ++ mulxq 0(%rcx),%rax,%r10 ++ adcxq %rax,%rdi ++ adoxq %r11,%r10 ++ mulxq 8(%rcx),%rax,%r11 ++ adcxq %rax,%r10 ++ adoxq %r12,%r11 ++.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 ++ movq 48(%rsp),%rdi ++ movq %r10,-32(%rbx) ++ adcxq %rax,%r11 ++ adoxq %r13,%r12 ++ mulxq 24(%rcx),%rax,%r15 ++ movq %r9,%rdx ++ movq %r11,-24(%rbx) ++ adcxq %rax,%r12 ++ adoxq %rbp,%r15 ++ leaq 32(%rcx),%rcx ++ movq %r12,-16(%rbx) ++ ++ jmp L$mulx4x_1st ++ ++.p2align 5 ++L$mulx4x_1st: ++ adcxq %rbp,%r15 ++ mulxq 0(%rsi),%r10,%rax ++ adcxq %r14,%r10 ++ mulxq 8(%rsi),%r11,%r14 ++ adcxq %rax,%r11 ++ mulxq 16(%rsi),%r12,%rax ++ adcxq %r14,%r12 ++ mulxq 24(%rsi),%r13,%r14 ++.byte 0x67,0x67 ++ movq %r8,%rdx ++ adcxq %rax,%r13 ++ adcxq %rbp,%r14 ++ leaq 32(%rsi),%rsi ++ leaq 32(%rbx),%rbx ++ ++ adoxq %r15,%r10 ++ mulxq 0(%rcx),%rax,%r15 ++ adcxq %rax,%r10 ++ adoxq %r15,%r11 ++ mulxq 8(%rcx),%rax,%r15 ++ adcxq %rax,%r11 ++ adoxq %r15,%r12 ++ mulxq 16(%rcx),%rax,%r15 ++ movq %r10,-40(%rbx) ++ adcxq %rax,%r12 ++ movq %r11,-32(%rbx) ++ adoxq %r15,%r13 ++ mulxq 24(%rcx),%rax,%r15 ++ movq %r9,%rdx ++ movq %r12,-24(%rbx) ++ adcxq %rax,%r13 ++ adoxq %rbp,%r15 ++ leaq 32(%rcx),%rcx ++ movq %r13,-16(%rbx) ++ ++ decq %rdi ++ jnz L$mulx4x_1st ++ ++ movq 0(%rsp),%rax ++ movq 8(%rsp),%rdi ++ adcq %rbp,%r15 ++ addq %r15,%r14 ++ sbbq %r15,%r15 ++ movq %r14,-8(%rbx) ++ jmp L$mulx4x_outer ++ ++.p2align 5 ++L$mulx4x_outer: ++ movq (%rdi),%rdx ++ leaq 8(%rdi),%rdi ++ subq %rax,%rsi ++ movq %r15,(%rbx) ++ leaq 64+32(%rsp),%rbx ++ subq %rax,%rcx ++ ++ mulxq 0(%rsi),%r8,%r11 ++ xorl %ebp,%ebp ++ movq %rdx,%r9 ++ mulxq 8(%rsi),%r14,%r12 ++ adoxq -32(%rbx),%r8 ++ adcxq %r14,%r11 ++ mulxq 16(%rsi),%r15,%r13 ++ adoxq -24(%rbx),%r11 ++ adcxq %r15,%r12 ++ adoxq -16(%rbx),%r12 ++ adcxq %rbp,%r13 ++ adoxq %rbp,%r13 ++ ++ movq %rdi,8(%rsp) ++ movq %r8,%r15 ++ imulq 24(%rsp),%r8 ++ xorl %ebp,%ebp ++ ++ mulxq 24(%rsi),%rax,%r14 ++ movq %r8,%rdx ++ adcxq %rax,%r13 ++ adoxq -8(%rbx),%r13 ++ adcxq %rbp,%r14 ++ leaq 32(%rsi),%rsi ++ adoxq %rbp,%r14 ++ ++ mulxq 0(%rcx),%rax,%r10 ++ adcxq %rax,%r15 ++ adoxq %r11,%r10 ++ mulxq 8(%rcx),%rax,%r11 ++ adcxq %rax,%r10 ++ adoxq %r12,%r11 ++ mulxq 16(%rcx),%rax,%r12 ++ movq %r10,-32(%rbx) ++ adcxq %rax,%r11 ++ adoxq %r13,%r12 ++ mulxq 24(%rcx),%rax,%r15 ++ movq %r9,%rdx ++ movq %r11,-24(%rbx) ++ leaq 32(%rcx),%rcx ++ adcxq %rax,%r12 ++ adoxq %rbp,%r15 ++ movq 48(%rsp),%rdi ++ movq %r12,-16(%rbx) ++ ++ jmp L$mulx4x_inner ++ ++.p2align 5 ++L$mulx4x_inner: ++ mulxq 0(%rsi),%r10,%rax ++ adcxq %rbp,%r15 ++ adoxq %r14,%r10 ++ mulxq 8(%rsi),%r11,%r14 ++ adcxq 0(%rbx),%r10 ++ adoxq %rax,%r11 ++ mulxq 16(%rsi),%r12,%rax ++ adcxq 8(%rbx),%r11 ++ adoxq %r14,%r12 ++ mulxq 24(%rsi),%r13,%r14 ++ movq %r8,%rdx ++ adcxq 16(%rbx),%r12 ++ adoxq %rax,%r13 ++ adcxq 24(%rbx),%r13 ++ adoxq %rbp,%r14 ++ leaq 32(%rsi),%rsi ++ leaq 32(%rbx),%rbx ++ adcxq %rbp,%r14 ++ ++ adoxq %r15,%r10 ++ mulxq 0(%rcx),%rax,%r15 ++ adcxq %rax,%r10 ++ adoxq %r15,%r11 ++ mulxq 8(%rcx),%rax,%r15 ++ adcxq %rax,%r11 ++ adoxq %r15,%r12 ++ mulxq 16(%rcx),%rax,%r15 ++ movq %r10,-40(%rbx) ++ adcxq %rax,%r12 ++ adoxq %r15,%r13 ++ mulxq 24(%rcx),%rax,%r15 ++ movq %r9,%rdx ++ movq %r11,-32(%rbx) ++ movq %r12,-24(%rbx) ++ adcxq %rax,%r13 ++ adoxq %rbp,%r15 ++ leaq 32(%rcx),%rcx ++ movq %r13,-16(%rbx) ++ ++ decq %rdi ++ jnz L$mulx4x_inner ++ ++ movq 0(%rsp),%rax ++ movq 8(%rsp),%rdi ++ adcq %rbp,%r15 ++ subq 0(%rbx),%rbp ++ adcq %r15,%r14 ++ sbbq %r15,%r15 ++ movq %r14,-8(%rbx) ++ ++ cmpq 16(%rsp),%rdi ++ jne L$mulx4x_outer ++ ++ leaq 64(%rsp),%rbx ++ subq %rax,%rcx ++ negq %r15 ++ movq %rax,%rdx ++ shrq $3+2,%rax ++ movq 32(%rsp),%rdi ++ jmp L$mulx4x_sub ++ ++.p2align 5 ++L$mulx4x_sub: ++ movq 0(%rbx),%r11 ++ movq 8(%rbx),%r12 ++ movq 16(%rbx),%r13 ++ movq 24(%rbx),%r14 ++ leaq 32(%rbx),%rbx ++ sbbq 0(%rcx),%r11 ++ sbbq 8(%rcx),%r12 ++ sbbq 16(%rcx),%r13 ++ sbbq 24(%rcx),%r14 ++ leaq 32(%rcx),%rcx ++ movq %r11,0(%rdi) ++ movq %r12,8(%rdi) ++ movq %r13,16(%rdi) ++ movq %r14,24(%rdi) ++ leaq 32(%rdi),%rdi ++ decq %rax ++ jnz L$mulx4x_sub ++ ++ sbbq $0,%r15 ++ leaq 64(%rsp),%rbx ++ subq %rdx,%rdi ++ ++.byte 102,73,15,110,207 ++ pxor %xmm0,%xmm0 ++ pshufd $0,%xmm1,%xmm1 ++ movq 40(%rsp),%rsi ++ ++ jmp L$mulx4x_cond_copy ++ ++.p2align 5 ++L$mulx4x_cond_copy: ++ movdqa 0(%rbx),%xmm2 ++ movdqa 16(%rbx),%xmm3 ++ leaq 32(%rbx),%rbx ++ movdqu 0(%rdi),%xmm4 ++ movdqu 16(%rdi),%xmm5 ++ leaq 32(%rdi),%rdi ++ movdqa %xmm0,-32(%rbx) ++ movdqa %xmm0,-16(%rbx) ++ pcmpeqd %xmm1,%xmm0 ++ pand %xmm1,%xmm2 ++ pand %xmm1,%xmm3 ++ pand %xmm0,%xmm4 ++ pand %xmm0,%xmm5 ++ pxor %xmm0,%xmm0 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqu %xmm4,-32(%rdi) ++ movdqu %xmm5,-16(%rdi) ++ subq $32,%rdx ++ jnz L$mulx4x_cond_copy ++ ++ movq %rdx,(%rbx) ++ ++ movq $1,%rax ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$mulx4x_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.p2align 4 ++#endif +diff --git a/apple-x86_64/crypto/fipsmodule/x86_64-mont5.S b/apple-x86_64/crypto/fipsmodule/x86_64-mont5.S +new file mode 100644 +index 0000000..e1fd9c9 +--- /dev/null ++++ b/apple-x86_64/crypto/fipsmodule/x86_64-mont5.S +@@ -0,0 +1,3788 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++ ++.globl _bn_mul_mont_gather5 ++.private_extern _bn_mul_mont_gather5 ++ ++.p2align 6 ++_bn_mul_mont_gather5: ++ ++ movl %r9d,%r9d ++ movq %rsp,%rax ++ ++ testl $7,%r9d ++ jnz L$mul_enter ++ leaq _OPENSSL_ia32cap_P(%rip),%r11 ++ movl 8(%r11),%r11d ++ jmp L$mul4x_enter ++ ++.p2align 4 ++L$mul_enter: ++ movd 8(%rsp),%xmm5 ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++ ++ negq %r9 ++ movq %rsp,%r11 ++ leaq -280(%rsp,%r9,8),%r10 ++ negq %r9 ++ andq $-1024,%r10 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ subq %r10,%r11 ++ andq $-4096,%r11 ++ leaq (%r10,%r11,1),%rsp ++ movq (%rsp),%r11 ++ cmpq %r10,%rsp ++ ja L$mul_page_walk ++ jmp L$mul_page_walk_done ++ ++L$mul_page_walk: ++ leaq -4096(%rsp),%rsp ++ movq (%rsp),%r11 ++ cmpq %r10,%rsp ++ ja L$mul_page_walk ++L$mul_page_walk_done: ++ ++ leaq L$inc(%rip),%r10 ++ movq %rax,8(%rsp,%r9,8) ++ ++L$mul_body: ++ ++ leaq 128(%rdx),%r12 ++ movdqa 0(%r10),%xmm0 ++ movdqa 16(%r10),%xmm1 ++ leaq 24-112(%rsp,%r9,8),%r10 ++ andq $-16,%r10 ++ ++ pshufd $0,%xmm5,%xmm5 ++ movdqa %xmm1,%xmm4 ++ movdqa %xmm1,%xmm2 ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++.byte 0x67 ++ movdqa %xmm4,%xmm3 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,112(%r10) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,128(%r10) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,144(%r10) ++ movdqa %xmm4,%xmm2 ++ ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,160(%r10) ++ movdqa %xmm4,%xmm3 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,176(%r10) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,192(%r10) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,208(%r10) ++ movdqa %xmm4,%xmm2 ++ ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,224(%r10) ++ movdqa %xmm4,%xmm3 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,240(%r10) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,256(%r10) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,272(%r10) ++ movdqa %xmm4,%xmm2 ++ ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,288(%r10) ++ movdqa %xmm4,%xmm3 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,304(%r10) ++ ++ paddd %xmm2,%xmm3 ++.byte 0x67 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,320(%r10) ++ ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,336(%r10) ++ pand 64(%r12),%xmm0 ++ ++ pand 80(%r12),%xmm1 ++ pand 96(%r12),%xmm2 ++ movdqa %xmm3,352(%r10) ++ pand 112(%r12),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ movdqa -128(%r12),%xmm4 ++ movdqa -112(%r12),%xmm5 ++ movdqa -96(%r12),%xmm2 ++ pand 112(%r10),%xmm4 ++ movdqa -80(%r12),%xmm3 ++ pand 128(%r10),%xmm5 ++ por %xmm4,%xmm0 ++ pand 144(%r10),%xmm2 ++ por %xmm5,%xmm1 ++ pand 160(%r10),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ movdqa -64(%r12),%xmm4 ++ movdqa -48(%r12),%xmm5 ++ movdqa -32(%r12),%xmm2 ++ pand 176(%r10),%xmm4 ++ movdqa -16(%r12),%xmm3 ++ pand 192(%r10),%xmm5 ++ por %xmm4,%xmm0 ++ pand 208(%r10),%xmm2 ++ por %xmm5,%xmm1 ++ pand 224(%r10),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ movdqa 0(%r12),%xmm4 ++ movdqa 16(%r12),%xmm5 ++ movdqa 32(%r12),%xmm2 ++ pand 240(%r10),%xmm4 ++ movdqa 48(%r12),%xmm3 ++ pand 256(%r10),%xmm5 ++ por %xmm4,%xmm0 ++ pand 272(%r10),%xmm2 ++ por %xmm5,%xmm1 ++ pand 288(%r10),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ por %xmm1,%xmm0 ++ pshufd $0x4e,%xmm0,%xmm1 ++ por %xmm1,%xmm0 ++ leaq 256(%r12),%r12 ++.byte 102,72,15,126,195 ++ ++ movq (%r8),%r8 ++ movq (%rsi),%rax ++ ++ xorq %r14,%r14 ++ xorq %r15,%r15 ++ ++ movq %r8,%rbp ++ mulq %rbx ++ movq %rax,%r10 ++ movq (%rcx),%rax ++ ++ imulq %r10,%rbp ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r10 ++ movq 8(%rsi),%rax ++ adcq $0,%rdx ++ movq %rdx,%r13 ++ ++ leaq 1(%r15),%r15 ++ jmp L$1st_enter ++ ++.p2align 4 ++L$1st: ++ addq %rax,%r13 ++ movq (%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r11,%r13 ++ movq %r10,%r11 ++ adcq $0,%rdx ++ movq %r13,-16(%rsp,%r15,8) ++ movq %rdx,%r13 ++ ++L$1st_enter: ++ mulq %rbx ++ addq %rax,%r11 ++ movq (%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ leaq 1(%r15),%r15 ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ cmpq %r9,%r15 ++ jne L$1st ++ ++ ++ addq %rax,%r13 ++ adcq $0,%rdx ++ addq %r11,%r13 ++ adcq $0,%rdx ++ movq %r13,-16(%rsp,%r9,8) ++ movq %rdx,%r13 ++ movq %r10,%r11 ++ ++ xorq %rdx,%rdx ++ addq %r11,%r13 ++ adcq $0,%rdx ++ movq %r13,-8(%rsp,%r9,8) ++ movq %rdx,(%rsp,%r9,8) ++ ++ leaq 1(%r14),%r14 ++ jmp L$outer ++.p2align 4 ++L$outer: ++ leaq 24+128(%rsp,%r9,8),%rdx ++ andq $-16,%rdx ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ movdqa -128(%r12),%xmm0 ++ movdqa -112(%r12),%xmm1 ++ movdqa -96(%r12),%xmm2 ++ movdqa -80(%r12),%xmm3 ++ pand -128(%rdx),%xmm0 ++ pand -112(%rdx),%xmm1 ++ por %xmm0,%xmm4 ++ pand -96(%rdx),%xmm2 ++ por %xmm1,%xmm5 ++ pand -80(%rdx),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa -64(%r12),%xmm0 ++ movdqa -48(%r12),%xmm1 ++ movdqa -32(%r12),%xmm2 ++ movdqa -16(%r12),%xmm3 ++ pand -64(%rdx),%xmm0 ++ pand -48(%rdx),%xmm1 ++ por %xmm0,%xmm4 ++ pand -32(%rdx),%xmm2 ++ por %xmm1,%xmm5 ++ pand -16(%rdx),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa 0(%r12),%xmm0 ++ movdqa 16(%r12),%xmm1 ++ movdqa 32(%r12),%xmm2 ++ movdqa 48(%r12),%xmm3 ++ pand 0(%rdx),%xmm0 ++ pand 16(%rdx),%xmm1 ++ por %xmm0,%xmm4 ++ pand 32(%rdx),%xmm2 ++ por %xmm1,%xmm5 ++ pand 48(%rdx),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa 64(%r12),%xmm0 ++ movdqa 80(%r12),%xmm1 ++ movdqa 96(%r12),%xmm2 ++ movdqa 112(%r12),%xmm3 ++ pand 64(%rdx),%xmm0 ++ pand 80(%rdx),%xmm1 ++ por %xmm0,%xmm4 ++ pand 96(%rdx),%xmm2 ++ por %xmm1,%xmm5 ++ pand 112(%rdx),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ por %xmm5,%xmm4 ++ pshufd $0x4e,%xmm4,%xmm0 ++ por %xmm4,%xmm0 ++ leaq 256(%r12),%r12 ++ ++ movq (%rsi),%rax ++.byte 102,72,15,126,195 ++ ++ xorq %r15,%r15 ++ movq %r8,%rbp ++ movq (%rsp),%r10 ++ ++ mulq %rbx ++ addq %rax,%r10 ++ movq (%rcx),%rax ++ adcq $0,%rdx ++ ++ imulq %r10,%rbp ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r10 ++ movq 8(%rsi),%rax ++ adcq $0,%rdx ++ movq 8(%rsp),%r10 ++ movq %rdx,%r13 ++ ++ leaq 1(%r15),%r15 ++ jmp L$inner_enter ++ ++.p2align 4 ++L$inner: ++ addq %rax,%r13 ++ movq (%rsi,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ movq (%rsp,%r15,8),%r10 ++ adcq $0,%rdx ++ movq %r13,-16(%rsp,%r15,8) ++ movq %rdx,%r13 ++ ++L$inner_enter: ++ mulq %rbx ++ addq %rax,%r11 ++ movq (%rcx,%r15,8),%rax ++ adcq $0,%rdx ++ addq %r11,%r10 ++ movq %rdx,%r11 ++ adcq $0,%r11 ++ leaq 1(%r15),%r15 ++ ++ mulq %rbp ++ cmpq %r9,%r15 ++ jne L$inner ++ ++ addq %rax,%r13 ++ adcq $0,%rdx ++ addq %r10,%r13 ++ movq (%rsp,%r9,8),%r10 ++ adcq $0,%rdx ++ movq %r13,-16(%rsp,%r9,8) ++ movq %rdx,%r13 ++ ++ xorq %rdx,%rdx ++ addq %r11,%r13 ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %r13,-8(%rsp,%r9,8) ++ movq %rdx,(%rsp,%r9,8) ++ ++ leaq 1(%r14),%r14 ++ cmpq %r9,%r14 ++ jb L$outer ++ ++ xorq %r14,%r14 ++ movq (%rsp),%rax ++ leaq (%rsp),%rsi ++ movq %r9,%r15 ++ jmp L$sub ++.p2align 4 ++L$sub: sbbq (%rcx,%r14,8),%rax ++ movq %rax,(%rdi,%r14,8) ++ movq 8(%rsi,%r14,8),%rax ++ leaq 1(%r14),%r14 ++ decq %r15 ++ jnz L$sub ++ ++ sbbq $0,%rax ++ movq $-1,%rbx ++ xorq %rax,%rbx ++ xorq %r14,%r14 ++ movq %r9,%r15 ++ ++L$copy: ++ movq (%rdi,%r14,8),%rcx ++ movq (%rsp,%r14,8),%rdx ++ andq %rbx,%rcx ++ andq %rax,%rdx ++ movq %r14,(%rsp,%r14,8) ++ orq %rcx,%rdx ++ movq %rdx,(%rdi,%r14,8) ++ leaq 1(%r14),%r14 ++ subq $1,%r15 ++ jnz L$copy ++ ++ movq 8(%rsp,%r9,8),%rsi ++ ++ movq $1,%rax ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$mul_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++bn_mul4x_mont_gather5: ++ ++.byte 0x67 ++ movq %rsp,%rax ++ ++L$mul4x_enter: ++ andl $0x80108,%r11d ++ cmpl $0x80108,%r11d ++ je L$mulx4x_enter ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$mul4x_prologue: ++ ++.byte 0x67 ++ shll $3,%r9d ++ leaq (%r9,%r9,2),%r10 ++ negq %r9 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ leaq -320(%rsp,%r9,2),%r11 ++ movq %rsp,%rbp ++ subq %rdi,%r11 ++ andq $4095,%r11 ++ cmpq %r11,%r10 ++ jb L$mul4xsp_alt ++ subq %r11,%rbp ++ leaq -320(%rbp,%r9,2),%rbp ++ jmp L$mul4xsp_done ++ ++.p2align 5 ++L$mul4xsp_alt: ++ leaq 4096-320(,%r9,2),%r10 ++ leaq -320(%rbp,%r9,2),%rbp ++ subq %r10,%r11 ++ movq $0,%r10 ++ cmovcq %r10,%r11 ++ subq %r11,%rbp ++L$mul4xsp_done: ++ andq $-64,%rbp ++ movq %rsp,%r11 ++ subq %rbp,%r11 ++ andq $-4096,%r11 ++ leaq (%r11,%rbp,1),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$mul4x_page_walk ++ jmp L$mul4x_page_walk_done ++ ++L$mul4x_page_walk: ++ leaq -4096(%rsp),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$mul4x_page_walk ++L$mul4x_page_walk_done: ++ ++ negq %r9 ++ ++ movq %rax,40(%rsp) ++ ++L$mul4x_body: ++ ++ call mul4x_internal ++ ++ movq 40(%rsp),%rsi ++ ++ movq $1,%rax ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$mul4x_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++mul4x_internal: ++ ++ shlq $5,%r9 ++ movd 8(%rax),%xmm5 ++ leaq L$inc(%rip),%rax ++ leaq 128(%rdx,%r9,1),%r13 ++ shrq $5,%r9 ++ movdqa 0(%rax),%xmm0 ++ movdqa 16(%rax),%xmm1 ++ leaq 88-112(%rsp,%r9,1),%r10 ++ leaq 128(%rdx),%r12 ++ ++ pshufd $0,%xmm5,%xmm5 ++ movdqa %xmm1,%xmm4 ++.byte 0x67,0x67 ++ movdqa %xmm1,%xmm2 ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++.byte 0x67 ++ movdqa %xmm4,%xmm3 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,112(%r10) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,128(%r10) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,144(%r10) ++ movdqa %xmm4,%xmm2 ++ ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,160(%r10) ++ movdqa %xmm4,%xmm3 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,176(%r10) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,192(%r10) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,208(%r10) ++ movdqa %xmm4,%xmm2 ++ ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,224(%r10) ++ movdqa %xmm4,%xmm3 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,240(%r10) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,256(%r10) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,272(%r10) ++ movdqa %xmm4,%xmm2 ++ ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,288(%r10) ++ movdqa %xmm4,%xmm3 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,304(%r10) ++ ++ paddd %xmm2,%xmm3 ++.byte 0x67 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,320(%r10) ++ ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,336(%r10) ++ pand 64(%r12),%xmm0 ++ ++ pand 80(%r12),%xmm1 ++ pand 96(%r12),%xmm2 ++ movdqa %xmm3,352(%r10) ++ pand 112(%r12),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ movdqa -128(%r12),%xmm4 ++ movdqa -112(%r12),%xmm5 ++ movdqa -96(%r12),%xmm2 ++ pand 112(%r10),%xmm4 ++ movdqa -80(%r12),%xmm3 ++ pand 128(%r10),%xmm5 ++ por %xmm4,%xmm0 ++ pand 144(%r10),%xmm2 ++ por %xmm5,%xmm1 ++ pand 160(%r10),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ movdqa -64(%r12),%xmm4 ++ movdqa -48(%r12),%xmm5 ++ movdqa -32(%r12),%xmm2 ++ pand 176(%r10),%xmm4 ++ movdqa -16(%r12),%xmm3 ++ pand 192(%r10),%xmm5 ++ por %xmm4,%xmm0 ++ pand 208(%r10),%xmm2 ++ por %xmm5,%xmm1 ++ pand 224(%r10),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ movdqa 0(%r12),%xmm4 ++ movdqa 16(%r12),%xmm5 ++ movdqa 32(%r12),%xmm2 ++ pand 240(%r10),%xmm4 ++ movdqa 48(%r12),%xmm3 ++ pand 256(%r10),%xmm5 ++ por %xmm4,%xmm0 ++ pand 272(%r10),%xmm2 ++ por %xmm5,%xmm1 ++ pand 288(%r10),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ por %xmm1,%xmm0 ++ pshufd $0x4e,%xmm0,%xmm1 ++ por %xmm1,%xmm0 ++ leaq 256(%r12),%r12 ++.byte 102,72,15,126,195 ++ ++ movq %r13,16+8(%rsp) ++ movq %rdi,56+8(%rsp) ++ ++ movq (%r8),%r8 ++ movq (%rsi),%rax ++ leaq (%rsi,%r9,1),%rsi ++ negq %r9 ++ ++ movq %r8,%rbp ++ mulq %rbx ++ movq %rax,%r10 ++ movq (%rcx),%rax ++ ++ imulq %r10,%rbp ++ leaq 64+8(%rsp),%r14 ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r10 ++ movq 8(%rsi,%r9,1),%rax ++ adcq $0,%rdx ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq 8(%rcx),%rax ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq 16(%rsi,%r9,1),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ leaq 32(%r9),%r15 ++ leaq 32(%rcx),%rcx ++ adcq $0,%rdx ++ movq %rdi,(%r14) ++ movq %rdx,%r13 ++ jmp L$1st4x ++ ++.p2align 5 ++L$1st4x: ++ mulq %rbx ++ addq %rax,%r10 ++ movq -16(%rcx),%rax ++ leaq 32(%r14),%r14 ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq -8(%rsi,%r15,1),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %r13,-24(%r14) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq -8(%rcx),%rax ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq (%rsi,%r15,1),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ adcq $0,%rdx ++ movq %rdi,-16(%r14) ++ movq %rdx,%r13 ++ ++ mulq %rbx ++ addq %rax,%r10 ++ movq 0(%rcx),%rax ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq 8(%rsi,%r15,1),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %r13,-8(%r14) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq 8(%rcx),%rax ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq 16(%rsi,%r15,1),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ leaq 32(%rcx),%rcx ++ adcq $0,%rdx ++ movq %rdi,(%r14) ++ movq %rdx,%r13 ++ ++ addq $32,%r15 ++ jnz L$1st4x ++ ++ mulq %rbx ++ addq %rax,%r10 ++ movq -16(%rcx),%rax ++ leaq 32(%r14),%r14 ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq -8(%rsi),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %r13,-24(%r14) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq -8(%rcx),%rax ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq (%rsi,%r9,1),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ adcq $0,%rdx ++ movq %rdi,-16(%r14) ++ movq %rdx,%r13 ++ ++ leaq (%rcx,%r9,1),%rcx ++ ++ xorq %rdi,%rdi ++ addq %r10,%r13 ++ adcq $0,%rdi ++ movq %r13,-8(%r14) ++ ++ jmp L$outer4x ++ ++.p2align 5 ++L$outer4x: ++ leaq 16+128(%r14),%rdx ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ movdqa -128(%r12),%xmm0 ++ movdqa -112(%r12),%xmm1 ++ movdqa -96(%r12),%xmm2 ++ movdqa -80(%r12),%xmm3 ++ pand -128(%rdx),%xmm0 ++ pand -112(%rdx),%xmm1 ++ por %xmm0,%xmm4 ++ pand -96(%rdx),%xmm2 ++ por %xmm1,%xmm5 ++ pand -80(%rdx),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa -64(%r12),%xmm0 ++ movdqa -48(%r12),%xmm1 ++ movdqa -32(%r12),%xmm2 ++ movdqa -16(%r12),%xmm3 ++ pand -64(%rdx),%xmm0 ++ pand -48(%rdx),%xmm1 ++ por %xmm0,%xmm4 ++ pand -32(%rdx),%xmm2 ++ por %xmm1,%xmm5 ++ pand -16(%rdx),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa 0(%r12),%xmm0 ++ movdqa 16(%r12),%xmm1 ++ movdqa 32(%r12),%xmm2 ++ movdqa 48(%r12),%xmm3 ++ pand 0(%rdx),%xmm0 ++ pand 16(%rdx),%xmm1 ++ por %xmm0,%xmm4 ++ pand 32(%rdx),%xmm2 ++ por %xmm1,%xmm5 ++ pand 48(%rdx),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa 64(%r12),%xmm0 ++ movdqa 80(%r12),%xmm1 ++ movdqa 96(%r12),%xmm2 ++ movdqa 112(%r12),%xmm3 ++ pand 64(%rdx),%xmm0 ++ pand 80(%rdx),%xmm1 ++ por %xmm0,%xmm4 ++ pand 96(%rdx),%xmm2 ++ por %xmm1,%xmm5 ++ pand 112(%rdx),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ por %xmm5,%xmm4 ++ pshufd $0x4e,%xmm4,%xmm0 ++ por %xmm4,%xmm0 ++ leaq 256(%r12),%r12 ++.byte 102,72,15,126,195 ++ ++ movq (%r14,%r9,1),%r10 ++ movq %r8,%rbp ++ mulq %rbx ++ addq %rax,%r10 ++ movq (%rcx),%rax ++ adcq $0,%rdx ++ ++ imulq %r10,%rbp ++ movq %rdx,%r11 ++ movq %rdi,(%r14) ++ ++ leaq (%r14,%r9,1),%r14 ++ ++ mulq %rbp ++ addq %rax,%r10 ++ movq 8(%rsi,%r9,1),%rax ++ adcq $0,%rdx ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq 8(%rcx),%rax ++ adcq $0,%rdx ++ addq 8(%r14),%r11 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq 16(%rsi,%r9,1),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ leaq 32(%r9),%r15 ++ leaq 32(%rcx),%rcx ++ adcq $0,%rdx ++ movq %rdx,%r13 ++ jmp L$inner4x ++ ++.p2align 5 ++L$inner4x: ++ mulq %rbx ++ addq %rax,%r10 ++ movq -16(%rcx),%rax ++ adcq $0,%rdx ++ addq 16(%r14),%r10 ++ leaq 32(%r14),%r14 ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq -8(%rsi,%r15,1),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %rdi,-32(%r14) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq -8(%rcx),%rax ++ adcq $0,%rdx ++ addq -8(%r14),%r11 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq (%rsi,%r15,1),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ adcq $0,%rdx ++ movq %r13,-24(%r14) ++ movq %rdx,%r13 ++ ++ mulq %rbx ++ addq %rax,%r10 ++ movq 0(%rcx),%rax ++ adcq $0,%rdx ++ addq (%r14),%r10 ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq 8(%rsi,%r15,1),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %rdi,-16(%r14) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq 8(%rcx),%rax ++ adcq $0,%rdx ++ addq 8(%r14),%r11 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq 16(%rsi,%r15,1),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ leaq 32(%rcx),%rcx ++ adcq $0,%rdx ++ movq %r13,-8(%r14) ++ movq %rdx,%r13 ++ ++ addq $32,%r15 ++ jnz L$inner4x ++ ++ mulq %rbx ++ addq %rax,%r10 ++ movq -16(%rcx),%rax ++ adcq $0,%rdx ++ addq 16(%r14),%r10 ++ leaq 32(%r14),%r14 ++ adcq $0,%rdx ++ movq %rdx,%r11 ++ ++ mulq %rbp ++ addq %rax,%r13 ++ movq -8(%rsi),%rax ++ adcq $0,%rdx ++ addq %r10,%r13 ++ adcq $0,%rdx ++ movq %rdi,-32(%r14) ++ movq %rdx,%rdi ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq %rbp,%rax ++ movq -8(%rcx),%rbp ++ adcq $0,%rdx ++ addq -8(%r14),%r11 ++ adcq $0,%rdx ++ movq %rdx,%r10 ++ ++ mulq %rbp ++ addq %rax,%rdi ++ movq (%rsi,%r9,1),%rax ++ adcq $0,%rdx ++ addq %r11,%rdi ++ adcq $0,%rdx ++ movq %r13,-24(%r14) ++ movq %rdx,%r13 ++ ++ movq %rdi,-16(%r14) ++ leaq (%rcx,%r9,1),%rcx ++ ++ xorq %rdi,%rdi ++ addq %r10,%r13 ++ adcq $0,%rdi ++ addq (%r14),%r13 ++ adcq $0,%rdi ++ movq %r13,-8(%r14) ++ ++ cmpq 16+8(%rsp),%r12 ++ jb L$outer4x ++ xorq %rax,%rax ++ subq %r13,%rbp ++ adcq %r15,%r15 ++ orq %r15,%rdi ++ subq %rdi,%rax ++ leaq (%r14,%r9,1),%rbx ++ movq (%rcx),%r12 ++ leaq (%rcx),%rbp ++ movq %r9,%rcx ++ sarq $3+2,%rcx ++ movq 56+8(%rsp),%rdi ++ decq %r12 ++ xorq %r10,%r10 ++ movq 8(%rbp),%r13 ++ movq 16(%rbp),%r14 ++ movq 24(%rbp),%r15 ++ jmp L$sqr4x_sub_entry ++ ++ ++.globl _bn_power5 ++.private_extern _bn_power5 ++ ++.p2align 5 ++_bn_power5: ++ ++ movq %rsp,%rax ++ ++ leaq _OPENSSL_ia32cap_P(%rip),%r11 ++ movl 8(%r11),%r11d ++ andl $0x80108,%r11d ++ cmpl $0x80108,%r11d ++ je L$powerx5_enter ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$power5_prologue: ++ ++ shll $3,%r9d ++ leal (%r9,%r9,2),%r10d ++ negq %r9 ++ movq (%r8),%r8 ++ ++ ++ ++ ++ ++ ++ ++ ++ leaq -320(%rsp,%r9,2),%r11 ++ movq %rsp,%rbp ++ subq %rdi,%r11 ++ andq $4095,%r11 ++ cmpq %r11,%r10 ++ jb L$pwr_sp_alt ++ subq %r11,%rbp ++ leaq -320(%rbp,%r9,2),%rbp ++ jmp L$pwr_sp_done ++ ++.p2align 5 ++L$pwr_sp_alt: ++ leaq 4096-320(,%r9,2),%r10 ++ leaq -320(%rbp,%r9,2),%rbp ++ subq %r10,%r11 ++ movq $0,%r10 ++ cmovcq %r10,%r11 ++ subq %r11,%rbp ++L$pwr_sp_done: ++ andq $-64,%rbp ++ movq %rsp,%r11 ++ subq %rbp,%r11 ++ andq $-4096,%r11 ++ leaq (%r11,%rbp,1),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$pwr_page_walk ++ jmp L$pwr_page_walk_done ++ ++L$pwr_page_walk: ++ leaq -4096(%rsp),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$pwr_page_walk ++L$pwr_page_walk_done: ++ ++ movq %r9,%r10 ++ negq %r9 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ movq %r8,32(%rsp) ++ movq %rax,40(%rsp) ++ ++L$power5_body: ++.byte 102,72,15,110,207 ++.byte 102,72,15,110,209 ++.byte 102,73,15,110,218 ++.byte 102,72,15,110,226 ++ ++ call __bn_sqr8x_internal ++ call __bn_post4x_internal ++ call __bn_sqr8x_internal ++ call __bn_post4x_internal ++ call __bn_sqr8x_internal ++ call __bn_post4x_internal ++ call __bn_sqr8x_internal ++ call __bn_post4x_internal ++ call __bn_sqr8x_internal ++ call __bn_post4x_internal ++ ++.byte 102,72,15,126,209 ++.byte 102,72,15,126,226 ++ movq %rsi,%rdi ++ movq 40(%rsp),%rax ++ leaq 32(%rsp),%r8 ++ ++ call mul4x_internal ++ ++ movq 40(%rsp),%rsi ++ ++ movq $1,%rax ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$power5_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.globl _bn_sqr8x_internal ++.private_extern _bn_sqr8x_internal ++.private_extern _bn_sqr8x_internal ++ ++.p2align 5 ++_bn_sqr8x_internal: ++__bn_sqr8x_internal: ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ leaq 32(%r10),%rbp ++ leaq (%rsi,%r9,1),%rsi ++ ++ movq %r9,%rcx ++ ++ ++ movq -32(%rsi,%rbp,1),%r14 ++ leaq 48+8(%rsp,%r9,2),%rdi ++ movq -24(%rsi,%rbp,1),%rax ++ leaq -32(%rdi,%rbp,1),%rdi ++ movq -16(%rsi,%rbp,1),%rbx ++ movq %rax,%r15 ++ ++ mulq %r14 ++ movq %rax,%r10 ++ movq %rbx,%rax ++ movq %rdx,%r11 ++ movq %r10,-24(%rdi,%rbp,1) ++ ++ mulq %r14 ++ addq %rax,%r11 ++ movq %rbx,%rax ++ adcq $0,%rdx ++ movq %r11,-16(%rdi,%rbp,1) ++ movq %rdx,%r10 ++ ++ ++ movq -8(%rsi,%rbp,1),%rbx ++ mulq %r15 ++ movq %rax,%r12 ++ movq %rbx,%rax ++ movq %rdx,%r13 ++ ++ leaq (%rbp),%rcx ++ mulq %r14 ++ addq %rax,%r10 ++ movq %rbx,%rax ++ movq %rdx,%r11 ++ adcq $0,%r11 ++ addq %r12,%r10 ++ adcq $0,%r11 ++ movq %r10,-8(%rdi,%rcx,1) ++ jmp L$sqr4x_1st ++ ++.p2align 5 ++L$sqr4x_1st: ++ movq (%rsi,%rcx,1),%rbx ++ mulq %r15 ++ addq %rax,%r13 ++ movq %rbx,%rax ++ movq %rdx,%r12 ++ adcq $0,%r12 ++ ++ mulq %r14 ++ addq %rax,%r11 ++ movq %rbx,%rax ++ movq 8(%rsi,%rcx,1),%rbx ++ movq %rdx,%r10 ++ adcq $0,%r10 ++ addq %r13,%r11 ++ adcq $0,%r10 ++ ++ ++ mulq %r15 ++ addq %rax,%r12 ++ movq %rbx,%rax ++ movq %r11,(%rdi,%rcx,1) ++ movq %rdx,%r13 ++ adcq $0,%r13 ++ ++ mulq %r14 ++ addq %rax,%r10 ++ movq %rbx,%rax ++ movq 16(%rsi,%rcx,1),%rbx ++ movq %rdx,%r11 ++ adcq $0,%r11 ++ addq %r12,%r10 ++ adcq $0,%r11 ++ ++ mulq %r15 ++ addq %rax,%r13 ++ movq %rbx,%rax ++ movq %r10,8(%rdi,%rcx,1) ++ movq %rdx,%r12 ++ adcq $0,%r12 ++ ++ mulq %r14 ++ addq %rax,%r11 ++ movq %rbx,%rax ++ movq 24(%rsi,%rcx,1),%rbx ++ movq %rdx,%r10 ++ adcq $0,%r10 ++ addq %r13,%r11 ++ adcq $0,%r10 ++ ++ ++ mulq %r15 ++ addq %rax,%r12 ++ movq %rbx,%rax ++ movq %r11,16(%rdi,%rcx,1) ++ movq %rdx,%r13 ++ adcq $0,%r13 ++ leaq 32(%rcx),%rcx ++ ++ mulq %r14 ++ addq %rax,%r10 ++ movq %rbx,%rax ++ movq %rdx,%r11 ++ adcq $0,%r11 ++ addq %r12,%r10 ++ adcq $0,%r11 ++ movq %r10,-8(%rdi,%rcx,1) ++ ++ cmpq $0,%rcx ++ jne L$sqr4x_1st ++ ++ mulq %r15 ++ addq %rax,%r13 ++ leaq 16(%rbp),%rbp ++ adcq $0,%rdx ++ addq %r11,%r13 ++ adcq $0,%rdx ++ ++ movq %r13,(%rdi) ++ movq %rdx,%r12 ++ movq %rdx,8(%rdi) ++ jmp L$sqr4x_outer ++ ++.p2align 5 ++L$sqr4x_outer: ++ movq -32(%rsi,%rbp,1),%r14 ++ leaq 48+8(%rsp,%r9,2),%rdi ++ movq -24(%rsi,%rbp,1),%rax ++ leaq -32(%rdi,%rbp,1),%rdi ++ movq -16(%rsi,%rbp,1),%rbx ++ movq %rax,%r15 ++ ++ mulq %r14 ++ movq -24(%rdi,%rbp,1),%r10 ++ addq %rax,%r10 ++ movq %rbx,%rax ++ adcq $0,%rdx ++ movq %r10,-24(%rdi,%rbp,1) ++ movq %rdx,%r11 ++ ++ mulq %r14 ++ addq %rax,%r11 ++ movq %rbx,%rax ++ adcq $0,%rdx ++ addq -16(%rdi,%rbp,1),%r11 ++ movq %rdx,%r10 ++ adcq $0,%r10 ++ movq %r11,-16(%rdi,%rbp,1) ++ ++ xorq %r12,%r12 ++ ++ movq -8(%rsi,%rbp,1),%rbx ++ mulq %r15 ++ addq %rax,%r12 ++ movq %rbx,%rax ++ adcq $0,%rdx ++ addq -8(%rdi,%rbp,1),%r12 ++ movq %rdx,%r13 ++ adcq $0,%r13 ++ ++ mulq %r14 ++ addq %rax,%r10 ++ movq %rbx,%rax ++ adcq $0,%rdx ++ addq %r12,%r10 ++ movq %rdx,%r11 ++ adcq $0,%r11 ++ movq %r10,-8(%rdi,%rbp,1) ++ ++ leaq (%rbp),%rcx ++ jmp L$sqr4x_inner ++ ++.p2align 5 ++L$sqr4x_inner: ++ movq (%rsi,%rcx,1),%rbx ++ mulq %r15 ++ addq %rax,%r13 ++ movq %rbx,%rax ++ movq %rdx,%r12 ++ adcq $0,%r12 ++ addq (%rdi,%rcx,1),%r13 ++ adcq $0,%r12 ++ ++.byte 0x67 ++ mulq %r14 ++ addq %rax,%r11 ++ movq %rbx,%rax ++ movq 8(%rsi,%rcx,1),%rbx ++ movq %rdx,%r10 ++ adcq $0,%r10 ++ addq %r13,%r11 ++ adcq $0,%r10 ++ ++ mulq %r15 ++ addq %rax,%r12 ++ movq %r11,(%rdi,%rcx,1) ++ movq %rbx,%rax ++ movq %rdx,%r13 ++ adcq $0,%r13 ++ addq 8(%rdi,%rcx,1),%r12 ++ leaq 16(%rcx),%rcx ++ adcq $0,%r13 ++ ++ mulq %r14 ++ addq %rax,%r10 ++ movq %rbx,%rax ++ adcq $0,%rdx ++ addq %r12,%r10 ++ movq %rdx,%r11 ++ adcq $0,%r11 ++ movq %r10,-8(%rdi,%rcx,1) ++ ++ cmpq $0,%rcx ++ jne L$sqr4x_inner ++ ++.byte 0x67 ++ mulq %r15 ++ addq %rax,%r13 ++ adcq $0,%rdx ++ addq %r11,%r13 ++ adcq $0,%rdx ++ ++ movq %r13,(%rdi) ++ movq %rdx,%r12 ++ movq %rdx,8(%rdi) ++ ++ addq $16,%rbp ++ jnz L$sqr4x_outer ++ ++ ++ movq -32(%rsi),%r14 ++ leaq 48+8(%rsp,%r9,2),%rdi ++ movq -24(%rsi),%rax ++ leaq -32(%rdi,%rbp,1),%rdi ++ movq -16(%rsi),%rbx ++ movq %rax,%r15 ++ ++ mulq %r14 ++ addq %rax,%r10 ++ movq %rbx,%rax ++ movq %rdx,%r11 ++ adcq $0,%r11 ++ ++ mulq %r14 ++ addq %rax,%r11 ++ movq %rbx,%rax ++ movq %r10,-24(%rdi) ++ movq %rdx,%r10 ++ adcq $0,%r10 ++ addq %r13,%r11 ++ movq -8(%rsi),%rbx ++ adcq $0,%r10 ++ ++ mulq %r15 ++ addq %rax,%r12 ++ movq %rbx,%rax ++ movq %r11,-16(%rdi) ++ movq %rdx,%r13 ++ adcq $0,%r13 ++ ++ mulq %r14 ++ addq %rax,%r10 ++ movq %rbx,%rax ++ movq %rdx,%r11 ++ adcq $0,%r11 ++ addq %r12,%r10 ++ adcq $0,%r11 ++ movq %r10,-8(%rdi) ++ ++ mulq %r15 ++ addq %rax,%r13 ++ movq -16(%rsi),%rax ++ adcq $0,%rdx ++ addq %r11,%r13 ++ adcq $0,%rdx ++ ++ movq %r13,(%rdi) ++ movq %rdx,%r12 ++ movq %rdx,8(%rdi) ++ ++ mulq %rbx ++ addq $16,%rbp ++ xorq %r14,%r14 ++ subq %r9,%rbp ++ xorq %r15,%r15 ++ ++ addq %r12,%rax ++ adcq $0,%rdx ++ movq %rax,8(%rdi) ++ movq %rdx,16(%rdi) ++ movq %r15,24(%rdi) ++ ++ movq -16(%rsi,%rbp,1),%rax ++ leaq 48+8(%rsp),%rdi ++ xorq %r10,%r10 ++ movq 8(%rdi),%r11 ++ ++ leaq (%r14,%r10,2),%r12 ++ shrq $63,%r10 ++ leaq (%rcx,%r11,2),%r13 ++ shrq $63,%r11 ++ orq %r10,%r13 ++ movq 16(%rdi),%r10 ++ movq %r11,%r14 ++ mulq %rax ++ negq %r15 ++ movq 24(%rdi),%r11 ++ adcq %rax,%r12 ++ movq -8(%rsi,%rbp,1),%rax ++ movq %r12,(%rdi) ++ adcq %rdx,%r13 ++ ++ leaq (%r14,%r10,2),%rbx ++ movq %r13,8(%rdi) ++ sbbq %r15,%r15 ++ shrq $63,%r10 ++ leaq (%rcx,%r11,2),%r8 ++ shrq $63,%r11 ++ orq %r10,%r8 ++ movq 32(%rdi),%r10 ++ movq %r11,%r14 ++ mulq %rax ++ negq %r15 ++ movq 40(%rdi),%r11 ++ adcq %rax,%rbx ++ movq 0(%rsi,%rbp,1),%rax ++ movq %rbx,16(%rdi) ++ adcq %rdx,%r8 ++ leaq 16(%rbp),%rbp ++ movq %r8,24(%rdi) ++ sbbq %r15,%r15 ++ leaq 64(%rdi),%rdi ++ jmp L$sqr4x_shift_n_add ++ ++.p2align 5 ++L$sqr4x_shift_n_add: ++ leaq (%r14,%r10,2),%r12 ++ shrq $63,%r10 ++ leaq (%rcx,%r11,2),%r13 ++ shrq $63,%r11 ++ orq %r10,%r13 ++ movq -16(%rdi),%r10 ++ movq %r11,%r14 ++ mulq %rax ++ negq %r15 ++ movq -8(%rdi),%r11 ++ adcq %rax,%r12 ++ movq -8(%rsi,%rbp,1),%rax ++ movq %r12,-32(%rdi) ++ adcq %rdx,%r13 ++ ++ leaq (%r14,%r10,2),%rbx ++ movq %r13,-24(%rdi) ++ sbbq %r15,%r15 ++ shrq $63,%r10 ++ leaq (%rcx,%r11,2),%r8 ++ shrq $63,%r11 ++ orq %r10,%r8 ++ movq 0(%rdi),%r10 ++ movq %r11,%r14 ++ mulq %rax ++ negq %r15 ++ movq 8(%rdi),%r11 ++ adcq %rax,%rbx ++ movq 0(%rsi,%rbp,1),%rax ++ movq %rbx,-16(%rdi) ++ adcq %rdx,%r8 ++ ++ leaq (%r14,%r10,2),%r12 ++ movq %r8,-8(%rdi) ++ sbbq %r15,%r15 ++ shrq $63,%r10 ++ leaq (%rcx,%r11,2),%r13 ++ shrq $63,%r11 ++ orq %r10,%r13 ++ movq 16(%rdi),%r10 ++ movq %r11,%r14 ++ mulq %rax ++ negq %r15 ++ movq 24(%rdi),%r11 ++ adcq %rax,%r12 ++ movq 8(%rsi,%rbp,1),%rax ++ movq %r12,0(%rdi) ++ adcq %rdx,%r13 ++ ++ leaq (%r14,%r10,2),%rbx ++ movq %r13,8(%rdi) ++ sbbq %r15,%r15 ++ shrq $63,%r10 ++ leaq (%rcx,%r11,2),%r8 ++ shrq $63,%r11 ++ orq %r10,%r8 ++ movq 32(%rdi),%r10 ++ movq %r11,%r14 ++ mulq %rax ++ negq %r15 ++ movq 40(%rdi),%r11 ++ adcq %rax,%rbx ++ movq 16(%rsi,%rbp,1),%rax ++ movq %rbx,16(%rdi) ++ adcq %rdx,%r8 ++ movq %r8,24(%rdi) ++ sbbq %r15,%r15 ++ leaq 64(%rdi),%rdi ++ addq $32,%rbp ++ jnz L$sqr4x_shift_n_add ++ ++ leaq (%r14,%r10,2),%r12 ++.byte 0x67 ++ shrq $63,%r10 ++ leaq (%rcx,%r11,2),%r13 ++ shrq $63,%r11 ++ orq %r10,%r13 ++ movq -16(%rdi),%r10 ++ movq %r11,%r14 ++ mulq %rax ++ negq %r15 ++ movq -8(%rdi),%r11 ++ adcq %rax,%r12 ++ movq -8(%rsi),%rax ++ movq %r12,-32(%rdi) ++ adcq %rdx,%r13 ++ ++ leaq (%r14,%r10,2),%rbx ++ movq %r13,-24(%rdi) ++ sbbq %r15,%r15 ++ shrq $63,%r10 ++ leaq (%rcx,%r11,2),%r8 ++ shrq $63,%r11 ++ orq %r10,%r8 ++ mulq %rax ++ negq %r15 ++ adcq %rax,%rbx ++ adcq %rdx,%r8 ++ movq %rbx,-16(%rdi) ++ movq %r8,-8(%rdi) ++.byte 102,72,15,126,213 ++__bn_sqr8x_reduction: ++ xorq %rax,%rax ++ leaq (%r9,%rbp,1),%rcx ++ leaq 48+8(%rsp,%r9,2),%rdx ++ movq %rcx,0+8(%rsp) ++ leaq 48+8(%rsp,%r9,1),%rdi ++ movq %rdx,8+8(%rsp) ++ negq %r9 ++ jmp L$8x_reduction_loop ++ ++.p2align 5 ++L$8x_reduction_loop: ++ leaq (%rdi,%r9,1),%rdi ++.byte 0x66 ++ movq 0(%rdi),%rbx ++ movq 8(%rdi),%r9 ++ movq 16(%rdi),%r10 ++ movq 24(%rdi),%r11 ++ movq 32(%rdi),%r12 ++ movq 40(%rdi),%r13 ++ movq 48(%rdi),%r14 ++ movq 56(%rdi),%r15 ++ movq %rax,(%rdx) ++ leaq 64(%rdi),%rdi ++ ++.byte 0x67 ++ movq %rbx,%r8 ++ imulq 32+8(%rsp),%rbx ++ movq 0(%rbp),%rax ++ movl $8,%ecx ++ jmp L$8x_reduce ++ ++.p2align 5 ++L$8x_reduce: ++ mulq %rbx ++ movq 8(%rbp),%rax ++ negq %r8 ++ movq %rdx,%r8 ++ adcq $0,%r8 ++ ++ mulq %rbx ++ addq %rax,%r9 ++ movq 16(%rbp),%rax ++ adcq $0,%rdx ++ addq %r9,%r8 ++ movq %rbx,48-8+8(%rsp,%rcx,8) ++ movq %rdx,%r9 ++ adcq $0,%r9 ++ ++ mulq %rbx ++ addq %rax,%r10 ++ movq 24(%rbp),%rax ++ adcq $0,%rdx ++ addq %r10,%r9 ++ movq 32+8(%rsp),%rsi ++ movq %rdx,%r10 ++ adcq $0,%r10 ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq 32(%rbp),%rax ++ adcq $0,%rdx ++ imulq %r8,%rsi ++ addq %r11,%r10 ++ movq %rdx,%r11 ++ adcq $0,%r11 ++ ++ mulq %rbx ++ addq %rax,%r12 ++ movq 40(%rbp),%rax ++ adcq $0,%rdx ++ addq %r12,%r11 ++ movq %rdx,%r12 ++ adcq $0,%r12 ++ ++ mulq %rbx ++ addq %rax,%r13 ++ movq 48(%rbp),%rax ++ adcq $0,%rdx ++ addq %r13,%r12 ++ movq %rdx,%r13 ++ adcq $0,%r13 ++ ++ mulq %rbx ++ addq %rax,%r14 ++ movq 56(%rbp),%rax ++ adcq $0,%rdx ++ addq %r14,%r13 ++ movq %rdx,%r14 ++ adcq $0,%r14 ++ ++ mulq %rbx ++ movq %rsi,%rbx ++ addq %rax,%r15 ++ movq 0(%rbp),%rax ++ adcq $0,%rdx ++ addq %r15,%r14 ++ movq %rdx,%r15 ++ adcq $0,%r15 ++ ++ decl %ecx ++ jnz L$8x_reduce ++ ++ leaq 64(%rbp),%rbp ++ xorq %rax,%rax ++ movq 8+8(%rsp),%rdx ++ cmpq 0+8(%rsp),%rbp ++ jae L$8x_no_tail ++ ++.byte 0x66 ++ addq 0(%rdi),%r8 ++ adcq 8(%rdi),%r9 ++ adcq 16(%rdi),%r10 ++ adcq 24(%rdi),%r11 ++ adcq 32(%rdi),%r12 ++ adcq 40(%rdi),%r13 ++ adcq 48(%rdi),%r14 ++ adcq 56(%rdi),%r15 ++ sbbq %rsi,%rsi ++ ++ movq 48+56+8(%rsp),%rbx ++ movl $8,%ecx ++ movq 0(%rbp),%rax ++ jmp L$8x_tail ++ ++.p2align 5 ++L$8x_tail: ++ mulq %rbx ++ addq %rax,%r8 ++ movq 8(%rbp),%rax ++ movq %r8,(%rdi) ++ movq %rdx,%r8 ++ adcq $0,%r8 ++ ++ mulq %rbx ++ addq %rax,%r9 ++ movq 16(%rbp),%rax ++ adcq $0,%rdx ++ addq %r9,%r8 ++ leaq 8(%rdi),%rdi ++ movq %rdx,%r9 ++ adcq $0,%r9 ++ ++ mulq %rbx ++ addq %rax,%r10 ++ movq 24(%rbp),%rax ++ adcq $0,%rdx ++ addq %r10,%r9 ++ movq %rdx,%r10 ++ adcq $0,%r10 ++ ++ mulq %rbx ++ addq %rax,%r11 ++ movq 32(%rbp),%rax ++ adcq $0,%rdx ++ addq %r11,%r10 ++ movq %rdx,%r11 ++ adcq $0,%r11 ++ ++ mulq %rbx ++ addq %rax,%r12 ++ movq 40(%rbp),%rax ++ adcq $0,%rdx ++ addq %r12,%r11 ++ movq %rdx,%r12 ++ adcq $0,%r12 ++ ++ mulq %rbx ++ addq %rax,%r13 ++ movq 48(%rbp),%rax ++ adcq $0,%rdx ++ addq %r13,%r12 ++ movq %rdx,%r13 ++ adcq $0,%r13 ++ ++ mulq %rbx ++ addq %rax,%r14 ++ movq 56(%rbp),%rax ++ adcq $0,%rdx ++ addq %r14,%r13 ++ movq %rdx,%r14 ++ adcq $0,%r14 ++ ++ mulq %rbx ++ movq 48-16+8(%rsp,%rcx,8),%rbx ++ addq %rax,%r15 ++ adcq $0,%rdx ++ addq %r15,%r14 ++ movq 0(%rbp),%rax ++ movq %rdx,%r15 ++ adcq $0,%r15 ++ ++ decl %ecx ++ jnz L$8x_tail ++ ++ leaq 64(%rbp),%rbp ++ movq 8+8(%rsp),%rdx ++ cmpq 0+8(%rsp),%rbp ++ jae L$8x_tail_done ++ ++ movq 48+56+8(%rsp),%rbx ++ negq %rsi ++ movq 0(%rbp),%rax ++ adcq 0(%rdi),%r8 ++ adcq 8(%rdi),%r9 ++ adcq 16(%rdi),%r10 ++ adcq 24(%rdi),%r11 ++ adcq 32(%rdi),%r12 ++ adcq 40(%rdi),%r13 ++ adcq 48(%rdi),%r14 ++ adcq 56(%rdi),%r15 ++ sbbq %rsi,%rsi ++ ++ movl $8,%ecx ++ jmp L$8x_tail ++ ++.p2align 5 ++L$8x_tail_done: ++ xorq %rax,%rax ++ addq (%rdx),%r8 ++ adcq $0,%r9 ++ adcq $0,%r10 ++ adcq $0,%r11 ++ adcq $0,%r12 ++ adcq $0,%r13 ++ adcq $0,%r14 ++ adcq $0,%r15 ++ adcq $0,%rax ++ ++ negq %rsi ++L$8x_no_tail: ++ adcq 0(%rdi),%r8 ++ adcq 8(%rdi),%r9 ++ adcq 16(%rdi),%r10 ++ adcq 24(%rdi),%r11 ++ adcq 32(%rdi),%r12 ++ adcq 40(%rdi),%r13 ++ adcq 48(%rdi),%r14 ++ adcq 56(%rdi),%r15 ++ adcq $0,%rax ++ movq -8(%rbp),%rcx ++ xorq %rsi,%rsi ++ ++.byte 102,72,15,126,213 ++ ++ movq %r8,0(%rdi) ++ movq %r9,8(%rdi) ++.byte 102,73,15,126,217 ++ movq %r10,16(%rdi) ++ movq %r11,24(%rdi) ++ movq %r12,32(%rdi) ++ movq %r13,40(%rdi) ++ movq %r14,48(%rdi) ++ movq %r15,56(%rdi) ++ leaq 64(%rdi),%rdi ++ ++ cmpq %rdx,%rdi ++ jb L$8x_reduction_loop ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++__bn_post4x_internal: ++ ++ movq 0(%rbp),%r12 ++ leaq (%rdi,%r9,1),%rbx ++ movq %r9,%rcx ++.byte 102,72,15,126,207 ++ negq %rax ++.byte 102,72,15,126,206 ++ sarq $3+2,%rcx ++ decq %r12 ++ xorq %r10,%r10 ++ movq 8(%rbp),%r13 ++ movq 16(%rbp),%r14 ++ movq 24(%rbp),%r15 ++ jmp L$sqr4x_sub_entry ++ ++.p2align 4 ++L$sqr4x_sub: ++ movq 0(%rbp),%r12 ++ movq 8(%rbp),%r13 ++ movq 16(%rbp),%r14 ++ movq 24(%rbp),%r15 ++L$sqr4x_sub_entry: ++ leaq 32(%rbp),%rbp ++ notq %r12 ++ notq %r13 ++ notq %r14 ++ notq %r15 ++ andq %rax,%r12 ++ andq %rax,%r13 ++ andq %rax,%r14 ++ andq %rax,%r15 ++ ++ negq %r10 ++ adcq 0(%rbx),%r12 ++ adcq 8(%rbx),%r13 ++ adcq 16(%rbx),%r14 ++ adcq 24(%rbx),%r15 ++ movq %r12,0(%rdi) ++ leaq 32(%rbx),%rbx ++ movq %r13,8(%rdi) ++ sbbq %r10,%r10 ++ movq %r14,16(%rdi) ++ movq %r15,24(%rdi) ++ leaq 32(%rdi),%rdi ++ ++ incq %rcx ++ jnz L$sqr4x_sub ++ ++ movq %r9,%r10 ++ negq %r9 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _bn_from_montgomery ++.private_extern _bn_from_montgomery ++ ++.p2align 5 ++_bn_from_montgomery: ++ ++ testl $7,%r9d ++ jz bn_from_mont8x ++ xorl %eax,%eax ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++bn_from_mont8x: ++ ++.byte 0x67 ++ movq %rsp,%rax ++ ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$from_prologue: ++ ++ shll $3,%r9d ++ leaq (%r9,%r9,2),%r10 ++ negq %r9 ++ movq (%r8),%r8 ++ ++ ++ ++ ++ ++ ++ ++ ++ leaq -320(%rsp,%r9,2),%r11 ++ movq %rsp,%rbp ++ subq %rdi,%r11 ++ andq $4095,%r11 ++ cmpq %r11,%r10 ++ jb L$from_sp_alt ++ subq %r11,%rbp ++ leaq -320(%rbp,%r9,2),%rbp ++ jmp L$from_sp_done ++ ++.p2align 5 ++L$from_sp_alt: ++ leaq 4096-320(,%r9,2),%r10 ++ leaq -320(%rbp,%r9,2),%rbp ++ subq %r10,%r11 ++ movq $0,%r10 ++ cmovcq %r10,%r11 ++ subq %r11,%rbp ++L$from_sp_done: ++ andq $-64,%rbp ++ movq %rsp,%r11 ++ subq %rbp,%r11 ++ andq $-4096,%r11 ++ leaq (%r11,%rbp,1),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$from_page_walk ++ jmp L$from_page_walk_done ++ ++L$from_page_walk: ++ leaq -4096(%rsp),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$from_page_walk ++L$from_page_walk_done: ++ ++ movq %r9,%r10 ++ negq %r9 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ movq %r8,32(%rsp) ++ movq %rax,40(%rsp) ++ ++L$from_body: ++ movq %r9,%r11 ++ leaq 48(%rsp),%rax ++ pxor %xmm0,%xmm0 ++ jmp L$mul_by_1 ++ ++.p2align 5 ++L$mul_by_1: ++ movdqu (%rsi),%xmm1 ++ movdqu 16(%rsi),%xmm2 ++ movdqu 32(%rsi),%xmm3 ++ movdqa %xmm0,(%rax,%r9,1) ++ movdqu 48(%rsi),%xmm4 ++ movdqa %xmm0,16(%rax,%r9,1) ++.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 ++ movdqa %xmm1,(%rax) ++ movdqa %xmm0,32(%rax,%r9,1) ++ movdqa %xmm2,16(%rax) ++ movdqa %xmm0,48(%rax,%r9,1) ++ movdqa %xmm3,32(%rax) ++ movdqa %xmm4,48(%rax) ++ leaq 64(%rax),%rax ++ subq $64,%r11 ++ jnz L$mul_by_1 ++ ++.byte 102,72,15,110,207 ++.byte 102,72,15,110,209 ++.byte 0x67 ++ movq %rcx,%rbp ++.byte 102,73,15,110,218 ++ leaq _OPENSSL_ia32cap_P(%rip),%r11 ++ movl 8(%r11),%r11d ++ andl $0x80108,%r11d ++ cmpl $0x80108,%r11d ++ jne L$from_mont_nox ++ ++ leaq (%rax,%r9,1),%rdi ++ call __bn_sqrx8x_reduction ++ call __bn_postx4x_internal ++ ++ pxor %xmm0,%xmm0 ++ leaq 48(%rsp),%rax ++ jmp L$from_mont_zero ++ ++.p2align 5 ++L$from_mont_nox: ++ call __bn_sqr8x_reduction ++ call __bn_post4x_internal ++ ++ pxor %xmm0,%xmm0 ++ leaq 48(%rsp),%rax ++ jmp L$from_mont_zero ++ ++.p2align 5 ++L$from_mont_zero: ++ movq 40(%rsp),%rsi ++ ++ movdqa %xmm0,0(%rax) ++ movdqa %xmm0,16(%rax) ++ movdqa %xmm0,32(%rax) ++ movdqa %xmm0,48(%rax) ++ leaq 64(%rax),%rax ++ subq $32,%r9 ++ jnz L$from_mont_zero ++ ++ movq $1,%rax ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$from_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.p2align 5 ++bn_mulx4x_mont_gather5: ++ ++ movq %rsp,%rax ++ ++L$mulx4x_enter: ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$mulx4x_prologue: ++ ++ shll $3,%r9d ++ leaq (%r9,%r9,2),%r10 ++ negq %r9 ++ movq (%r8),%r8 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ leaq -320(%rsp,%r9,2),%r11 ++ movq %rsp,%rbp ++ subq %rdi,%r11 ++ andq $4095,%r11 ++ cmpq %r11,%r10 ++ jb L$mulx4xsp_alt ++ subq %r11,%rbp ++ leaq -320(%rbp,%r9,2),%rbp ++ jmp L$mulx4xsp_done ++ ++L$mulx4xsp_alt: ++ leaq 4096-320(,%r9,2),%r10 ++ leaq -320(%rbp,%r9,2),%rbp ++ subq %r10,%r11 ++ movq $0,%r10 ++ cmovcq %r10,%r11 ++ subq %r11,%rbp ++L$mulx4xsp_done: ++ andq $-64,%rbp ++ movq %rsp,%r11 ++ subq %rbp,%r11 ++ andq $-4096,%r11 ++ leaq (%r11,%rbp,1),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$mulx4x_page_walk ++ jmp L$mulx4x_page_walk_done ++ ++L$mulx4x_page_walk: ++ leaq -4096(%rsp),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$mulx4x_page_walk ++L$mulx4x_page_walk_done: ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ movq %r8,32(%rsp) ++ movq %rax,40(%rsp) ++ ++L$mulx4x_body: ++ call mulx4x_internal ++ ++ movq 40(%rsp),%rsi ++ ++ movq $1,%rax ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$mulx4x_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++.p2align 5 ++mulx4x_internal: ++ ++ movq %r9,8(%rsp) ++ movq %r9,%r10 ++ negq %r9 ++ shlq $5,%r9 ++ negq %r10 ++ leaq 128(%rdx,%r9,1),%r13 ++ shrq $5+5,%r9 ++ movd 8(%rax),%xmm5 ++ subq $1,%r9 ++ leaq L$inc(%rip),%rax ++ movq %r13,16+8(%rsp) ++ movq %r9,24+8(%rsp) ++ movq %rdi,56+8(%rsp) ++ movdqa 0(%rax),%xmm0 ++ movdqa 16(%rax),%xmm1 ++ leaq 88-112(%rsp,%r10,1),%r10 ++ leaq 128(%rdx),%rdi ++ ++ pshufd $0,%xmm5,%xmm5 ++ movdqa %xmm1,%xmm4 ++.byte 0x67 ++ movdqa %xmm1,%xmm2 ++.byte 0x67 ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm4,%xmm3 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,112(%r10) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,128(%r10) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,144(%r10) ++ movdqa %xmm4,%xmm2 ++ ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,160(%r10) ++ movdqa %xmm4,%xmm3 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,176(%r10) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,192(%r10) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,208(%r10) ++ movdqa %xmm4,%xmm2 ++ ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,224(%r10) ++ movdqa %xmm4,%xmm3 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,240(%r10) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,256(%r10) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,272(%r10) ++ movdqa %xmm4,%xmm2 ++ ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,288(%r10) ++ movdqa %xmm4,%xmm3 ++.byte 0x67 ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,304(%r10) ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,320(%r10) ++ ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,336(%r10) ++ ++ pand 64(%rdi),%xmm0 ++ pand 80(%rdi),%xmm1 ++ pand 96(%rdi),%xmm2 ++ movdqa %xmm3,352(%r10) ++ pand 112(%rdi),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ movdqa -128(%rdi),%xmm4 ++ movdqa -112(%rdi),%xmm5 ++ movdqa -96(%rdi),%xmm2 ++ pand 112(%r10),%xmm4 ++ movdqa -80(%rdi),%xmm3 ++ pand 128(%r10),%xmm5 ++ por %xmm4,%xmm0 ++ pand 144(%r10),%xmm2 ++ por %xmm5,%xmm1 ++ pand 160(%r10),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ movdqa -64(%rdi),%xmm4 ++ movdqa -48(%rdi),%xmm5 ++ movdqa -32(%rdi),%xmm2 ++ pand 176(%r10),%xmm4 ++ movdqa -16(%rdi),%xmm3 ++ pand 192(%r10),%xmm5 ++ por %xmm4,%xmm0 ++ pand 208(%r10),%xmm2 ++ por %xmm5,%xmm1 ++ pand 224(%r10),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ movdqa 0(%rdi),%xmm4 ++ movdqa 16(%rdi),%xmm5 ++ movdqa 32(%rdi),%xmm2 ++ pand 240(%r10),%xmm4 ++ movdqa 48(%rdi),%xmm3 ++ pand 256(%r10),%xmm5 ++ por %xmm4,%xmm0 ++ pand 272(%r10),%xmm2 ++ por %xmm5,%xmm1 ++ pand 288(%r10),%xmm3 ++ por %xmm2,%xmm0 ++ por %xmm3,%xmm1 ++ pxor %xmm1,%xmm0 ++ pshufd $0x4e,%xmm0,%xmm1 ++ por %xmm1,%xmm0 ++ leaq 256(%rdi),%rdi ++.byte 102,72,15,126,194 ++ leaq 64+32+8(%rsp),%rbx ++ ++ movq %rdx,%r9 ++ mulxq 0(%rsi),%r8,%rax ++ mulxq 8(%rsi),%r11,%r12 ++ addq %rax,%r11 ++ mulxq 16(%rsi),%rax,%r13 ++ adcq %rax,%r12 ++ adcq $0,%r13 ++ mulxq 24(%rsi),%rax,%r14 ++ ++ movq %r8,%r15 ++ imulq 32+8(%rsp),%r8 ++ xorq %rbp,%rbp ++ movq %r8,%rdx ++ ++ movq %rdi,8+8(%rsp) ++ ++ leaq 32(%rsi),%rsi ++ adcxq %rax,%r13 ++ adcxq %rbp,%r14 ++ ++ mulxq 0(%rcx),%rax,%r10 ++ adcxq %rax,%r15 ++ adoxq %r11,%r10 ++ mulxq 8(%rcx),%rax,%r11 ++ adcxq %rax,%r10 ++ adoxq %r12,%r11 ++ mulxq 16(%rcx),%rax,%r12 ++ movq 24+8(%rsp),%rdi ++ movq %r10,-32(%rbx) ++ adcxq %rax,%r11 ++ adoxq %r13,%r12 ++ mulxq 24(%rcx),%rax,%r15 ++ movq %r9,%rdx ++ movq %r11,-24(%rbx) ++ adcxq %rax,%r12 ++ adoxq %rbp,%r15 ++ leaq 32(%rcx),%rcx ++ movq %r12,-16(%rbx) ++ jmp L$mulx4x_1st ++ ++.p2align 5 ++L$mulx4x_1st: ++ adcxq %rbp,%r15 ++ mulxq 0(%rsi),%r10,%rax ++ adcxq %r14,%r10 ++ mulxq 8(%rsi),%r11,%r14 ++ adcxq %rax,%r11 ++ mulxq 16(%rsi),%r12,%rax ++ adcxq %r14,%r12 ++ mulxq 24(%rsi),%r13,%r14 ++.byte 0x67,0x67 ++ movq %r8,%rdx ++ adcxq %rax,%r13 ++ adcxq %rbp,%r14 ++ leaq 32(%rsi),%rsi ++ leaq 32(%rbx),%rbx ++ ++ adoxq %r15,%r10 ++ mulxq 0(%rcx),%rax,%r15 ++ adcxq %rax,%r10 ++ adoxq %r15,%r11 ++ mulxq 8(%rcx),%rax,%r15 ++ adcxq %rax,%r11 ++ adoxq %r15,%r12 ++ mulxq 16(%rcx),%rax,%r15 ++ movq %r10,-40(%rbx) ++ adcxq %rax,%r12 ++ movq %r11,-32(%rbx) ++ adoxq %r15,%r13 ++ mulxq 24(%rcx),%rax,%r15 ++ movq %r9,%rdx ++ movq %r12,-24(%rbx) ++ adcxq %rax,%r13 ++ adoxq %rbp,%r15 ++ leaq 32(%rcx),%rcx ++ movq %r13,-16(%rbx) ++ ++ decq %rdi ++ jnz L$mulx4x_1st ++ ++ movq 8(%rsp),%rax ++ adcq %rbp,%r15 ++ leaq (%rsi,%rax,1),%rsi ++ addq %r15,%r14 ++ movq 8+8(%rsp),%rdi ++ adcq %rbp,%rbp ++ movq %r14,-8(%rbx) ++ jmp L$mulx4x_outer ++ ++.p2align 5 ++L$mulx4x_outer: ++ leaq 16-256(%rbx),%r10 ++ pxor %xmm4,%xmm4 ++.byte 0x67,0x67 ++ pxor %xmm5,%xmm5 ++ movdqa -128(%rdi),%xmm0 ++ movdqa -112(%rdi),%xmm1 ++ movdqa -96(%rdi),%xmm2 ++ pand 256(%r10),%xmm0 ++ movdqa -80(%rdi),%xmm3 ++ pand 272(%r10),%xmm1 ++ por %xmm0,%xmm4 ++ pand 288(%r10),%xmm2 ++ por %xmm1,%xmm5 ++ pand 304(%r10),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa -64(%rdi),%xmm0 ++ movdqa -48(%rdi),%xmm1 ++ movdqa -32(%rdi),%xmm2 ++ pand 320(%r10),%xmm0 ++ movdqa -16(%rdi),%xmm3 ++ pand 336(%r10),%xmm1 ++ por %xmm0,%xmm4 ++ pand 352(%r10),%xmm2 ++ por %xmm1,%xmm5 ++ pand 368(%r10),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa 0(%rdi),%xmm0 ++ movdqa 16(%rdi),%xmm1 ++ movdqa 32(%rdi),%xmm2 ++ pand 384(%r10),%xmm0 ++ movdqa 48(%rdi),%xmm3 ++ pand 400(%r10),%xmm1 ++ por %xmm0,%xmm4 ++ pand 416(%r10),%xmm2 ++ por %xmm1,%xmm5 ++ pand 432(%r10),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa 64(%rdi),%xmm0 ++ movdqa 80(%rdi),%xmm1 ++ movdqa 96(%rdi),%xmm2 ++ pand 448(%r10),%xmm0 ++ movdqa 112(%rdi),%xmm3 ++ pand 464(%r10),%xmm1 ++ por %xmm0,%xmm4 ++ pand 480(%r10),%xmm2 ++ por %xmm1,%xmm5 ++ pand 496(%r10),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ por %xmm5,%xmm4 ++ pshufd $0x4e,%xmm4,%xmm0 ++ por %xmm4,%xmm0 ++ leaq 256(%rdi),%rdi ++.byte 102,72,15,126,194 ++ ++ movq %rbp,(%rbx) ++ leaq 32(%rbx,%rax,1),%rbx ++ mulxq 0(%rsi),%r8,%r11 ++ xorq %rbp,%rbp ++ movq %rdx,%r9 ++ mulxq 8(%rsi),%r14,%r12 ++ adoxq -32(%rbx),%r8 ++ adcxq %r14,%r11 ++ mulxq 16(%rsi),%r15,%r13 ++ adoxq -24(%rbx),%r11 ++ adcxq %r15,%r12 ++ mulxq 24(%rsi),%rdx,%r14 ++ adoxq -16(%rbx),%r12 ++ adcxq %rdx,%r13 ++ leaq (%rcx,%rax,1),%rcx ++ leaq 32(%rsi),%rsi ++ adoxq -8(%rbx),%r13 ++ adcxq %rbp,%r14 ++ adoxq %rbp,%r14 ++ ++ movq %r8,%r15 ++ imulq 32+8(%rsp),%r8 ++ ++ movq %r8,%rdx ++ xorq %rbp,%rbp ++ movq %rdi,8+8(%rsp) ++ ++ mulxq 0(%rcx),%rax,%r10 ++ adcxq %rax,%r15 ++ adoxq %r11,%r10 ++ mulxq 8(%rcx),%rax,%r11 ++ adcxq %rax,%r10 ++ adoxq %r12,%r11 ++ mulxq 16(%rcx),%rax,%r12 ++ adcxq %rax,%r11 ++ adoxq %r13,%r12 ++ mulxq 24(%rcx),%rax,%r15 ++ movq %r9,%rdx ++ movq 24+8(%rsp),%rdi ++ movq %r10,-32(%rbx) ++ adcxq %rax,%r12 ++ movq %r11,-24(%rbx) ++ adoxq %rbp,%r15 ++ movq %r12,-16(%rbx) ++ leaq 32(%rcx),%rcx ++ jmp L$mulx4x_inner ++ ++.p2align 5 ++L$mulx4x_inner: ++ mulxq 0(%rsi),%r10,%rax ++ adcxq %rbp,%r15 ++ adoxq %r14,%r10 ++ mulxq 8(%rsi),%r11,%r14 ++ adcxq 0(%rbx),%r10 ++ adoxq %rax,%r11 ++ mulxq 16(%rsi),%r12,%rax ++ adcxq 8(%rbx),%r11 ++ adoxq %r14,%r12 ++ mulxq 24(%rsi),%r13,%r14 ++ movq %r8,%rdx ++ adcxq 16(%rbx),%r12 ++ adoxq %rax,%r13 ++ adcxq 24(%rbx),%r13 ++ adoxq %rbp,%r14 ++ leaq 32(%rsi),%rsi ++ leaq 32(%rbx),%rbx ++ adcxq %rbp,%r14 ++ ++ adoxq %r15,%r10 ++ mulxq 0(%rcx),%rax,%r15 ++ adcxq %rax,%r10 ++ adoxq %r15,%r11 ++ mulxq 8(%rcx),%rax,%r15 ++ adcxq %rax,%r11 ++ adoxq %r15,%r12 ++ mulxq 16(%rcx),%rax,%r15 ++ movq %r10,-40(%rbx) ++ adcxq %rax,%r12 ++ adoxq %r15,%r13 ++ movq %r11,-32(%rbx) ++ mulxq 24(%rcx),%rax,%r15 ++ movq %r9,%rdx ++ leaq 32(%rcx),%rcx ++ movq %r12,-24(%rbx) ++ adcxq %rax,%r13 ++ adoxq %rbp,%r15 ++ movq %r13,-16(%rbx) ++ ++ decq %rdi ++ jnz L$mulx4x_inner ++ ++ movq 0+8(%rsp),%rax ++ adcq %rbp,%r15 ++ subq 0(%rbx),%rdi ++ movq 8+8(%rsp),%rdi ++ movq 16+8(%rsp),%r10 ++ adcq %r15,%r14 ++ leaq (%rsi,%rax,1),%rsi ++ adcq %rbp,%rbp ++ movq %r14,-8(%rbx) ++ ++ cmpq %r10,%rdi ++ jb L$mulx4x_outer ++ ++ movq -8(%rcx),%r10 ++ movq %rbp,%r8 ++ movq (%rcx,%rax,1),%r12 ++ leaq (%rcx,%rax,1),%rbp ++ movq %rax,%rcx ++ leaq (%rbx,%rax,1),%rdi ++ xorl %eax,%eax ++ xorq %r15,%r15 ++ subq %r14,%r10 ++ adcq %r15,%r15 ++ orq %r15,%r8 ++ sarq $3+2,%rcx ++ subq %r8,%rax ++ movq 56+8(%rsp),%rdx ++ decq %r12 ++ movq 8(%rbp),%r13 ++ xorq %r8,%r8 ++ movq 16(%rbp),%r14 ++ movq 24(%rbp),%r15 ++ jmp L$sqrx4x_sub_entry ++ ++ ++ ++.p2align 5 ++bn_powerx5: ++ ++ movq %rsp,%rax ++ ++L$powerx5_enter: ++ pushq %rbx ++ ++ pushq %rbp ++ ++ pushq %r12 ++ ++ pushq %r13 ++ ++ pushq %r14 ++ ++ pushq %r15 ++ ++L$powerx5_prologue: ++ ++ shll $3,%r9d ++ leaq (%r9,%r9,2),%r10 ++ negq %r9 ++ movq (%r8),%r8 ++ ++ ++ ++ ++ ++ ++ ++ ++ leaq -320(%rsp,%r9,2),%r11 ++ movq %rsp,%rbp ++ subq %rdi,%r11 ++ andq $4095,%r11 ++ cmpq %r11,%r10 ++ jb L$pwrx_sp_alt ++ subq %r11,%rbp ++ leaq -320(%rbp,%r9,2),%rbp ++ jmp L$pwrx_sp_done ++ ++.p2align 5 ++L$pwrx_sp_alt: ++ leaq 4096-320(,%r9,2),%r10 ++ leaq -320(%rbp,%r9,2),%rbp ++ subq %r10,%r11 ++ movq $0,%r10 ++ cmovcq %r10,%r11 ++ subq %r11,%rbp ++L$pwrx_sp_done: ++ andq $-64,%rbp ++ movq %rsp,%r11 ++ subq %rbp,%r11 ++ andq $-4096,%r11 ++ leaq (%r11,%rbp,1),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$pwrx_page_walk ++ jmp L$pwrx_page_walk_done ++ ++L$pwrx_page_walk: ++ leaq -4096(%rsp),%rsp ++ movq (%rsp),%r10 ++ cmpq %rbp,%rsp ++ ja L$pwrx_page_walk ++L$pwrx_page_walk_done: ++ ++ movq %r9,%r10 ++ negq %r9 ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ pxor %xmm0,%xmm0 ++.byte 102,72,15,110,207 ++.byte 102,72,15,110,209 ++.byte 102,73,15,110,218 ++.byte 102,72,15,110,226 ++ movq %r8,32(%rsp) ++ movq %rax,40(%rsp) ++ ++L$powerx5_body: ++ ++ call __bn_sqrx8x_internal ++ call __bn_postx4x_internal ++ call __bn_sqrx8x_internal ++ call __bn_postx4x_internal ++ call __bn_sqrx8x_internal ++ call __bn_postx4x_internal ++ call __bn_sqrx8x_internal ++ call __bn_postx4x_internal ++ call __bn_sqrx8x_internal ++ call __bn_postx4x_internal ++ ++ movq %r10,%r9 ++ movq %rsi,%rdi ++.byte 102,72,15,126,209 ++.byte 102,72,15,126,226 ++ movq 40(%rsp),%rax ++ ++ call mulx4x_internal ++ ++ movq 40(%rsp),%rsi ++ ++ movq $1,%rax ++ ++ movq -48(%rsi),%r15 ++ ++ movq -40(%rsi),%r14 ++ ++ movq -32(%rsi),%r13 ++ ++ movq -24(%rsi),%r12 ++ ++ movq -16(%rsi),%rbp ++ ++ movq -8(%rsi),%rbx ++ ++ leaq (%rsi),%rsp ++ ++L$powerx5_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.globl _bn_sqrx8x_internal ++.private_extern _bn_sqrx8x_internal ++.private_extern _bn_sqrx8x_internal ++ ++.p2align 5 ++_bn_sqrx8x_internal: ++__bn_sqrx8x_internal: ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ leaq 48+8(%rsp),%rdi ++ leaq (%rsi,%r9,1),%rbp ++ movq %r9,0+8(%rsp) ++ movq %rbp,8+8(%rsp) ++ jmp L$sqr8x_zero_start ++ ++.p2align 5 ++.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 ++L$sqrx8x_zero: ++.byte 0x3e ++ movdqa %xmm0,0(%rdi) ++ movdqa %xmm0,16(%rdi) ++ movdqa %xmm0,32(%rdi) ++ movdqa %xmm0,48(%rdi) ++L$sqr8x_zero_start: ++ movdqa %xmm0,64(%rdi) ++ movdqa %xmm0,80(%rdi) ++ movdqa %xmm0,96(%rdi) ++ movdqa %xmm0,112(%rdi) ++ leaq 128(%rdi),%rdi ++ subq $64,%r9 ++ jnz L$sqrx8x_zero ++ ++ movq 0(%rsi),%rdx ++ ++ xorq %r10,%r10 ++ xorq %r11,%r11 ++ xorq %r12,%r12 ++ xorq %r13,%r13 ++ xorq %r14,%r14 ++ xorq %r15,%r15 ++ leaq 48+8(%rsp),%rdi ++ xorq %rbp,%rbp ++ jmp L$sqrx8x_outer_loop ++ ++.p2align 5 ++L$sqrx8x_outer_loop: ++ mulxq 8(%rsi),%r8,%rax ++ adcxq %r9,%r8 ++ adoxq %rax,%r10 ++ mulxq 16(%rsi),%r9,%rax ++ adcxq %r10,%r9 ++ adoxq %rax,%r11 ++.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 ++ adcxq %r11,%r10 ++ adoxq %rax,%r12 ++.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 ++ adcxq %r12,%r11 ++ adoxq %rax,%r13 ++ mulxq 40(%rsi),%r12,%rax ++ adcxq %r13,%r12 ++ adoxq %rax,%r14 ++ mulxq 48(%rsi),%r13,%rax ++ adcxq %r14,%r13 ++ adoxq %r15,%rax ++ mulxq 56(%rsi),%r14,%r15 ++ movq 8(%rsi),%rdx ++ adcxq %rax,%r14 ++ adoxq %rbp,%r15 ++ adcq 64(%rdi),%r15 ++ movq %r8,8(%rdi) ++ movq %r9,16(%rdi) ++ sbbq %rcx,%rcx ++ xorq %rbp,%rbp ++ ++ ++ mulxq 16(%rsi),%r8,%rbx ++ mulxq 24(%rsi),%r9,%rax ++ adcxq %r10,%r8 ++ adoxq %rbx,%r9 ++ mulxq 32(%rsi),%r10,%rbx ++ adcxq %r11,%r9 ++ adoxq %rax,%r10 ++.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 ++ adcxq %r12,%r10 ++ adoxq %rbx,%r11 ++.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 ++ adcxq %r13,%r11 ++ adoxq %r14,%r12 ++.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 ++ movq 16(%rsi),%rdx ++ adcxq %rax,%r12 ++ adoxq %rbx,%r13 ++ adcxq %r15,%r13 ++ adoxq %rbp,%r14 ++ adcxq %rbp,%r14 ++ ++ movq %r8,24(%rdi) ++ movq %r9,32(%rdi) ++ ++ mulxq 24(%rsi),%r8,%rbx ++ mulxq 32(%rsi),%r9,%rax ++ adcxq %r10,%r8 ++ adoxq %rbx,%r9 ++ mulxq 40(%rsi),%r10,%rbx ++ adcxq %r11,%r9 ++ adoxq %rax,%r10 ++.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 ++ adcxq %r12,%r10 ++ adoxq %r13,%r11 ++.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 ++.byte 0x3e ++ movq 24(%rsi),%rdx ++ adcxq %rbx,%r11 ++ adoxq %rax,%r12 ++ adcxq %r14,%r12 ++ movq %r8,40(%rdi) ++ movq %r9,48(%rdi) ++ mulxq 32(%rsi),%r8,%rax ++ adoxq %rbp,%r13 ++ adcxq %rbp,%r13 ++ ++ mulxq 40(%rsi),%r9,%rbx ++ adcxq %r10,%r8 ++ adoxq %rax,%r9 ++ mulxq 48(%rsi),%r10,%rax ++ adcxq %r11,%r9 ++ adoxq %r12,%r10 ++ mulxq 56(%rsi),%r11,%r12 ++ movq 32(%rsi),%rdx ++ movq 40(%rsi),%r14 ++ adcxq %rbx,%r10 ++ adoxq %rax,%r11 ++ movq 48(%rsi),%r15 ++ adcxq %r13,%r11 ++ adoxq %rbp,%r12 ++ adcxq %rbp,%r12 ++ ++ movq %r8,56(%rdi) ++ movq %r9,64(%rdi) ++ ++ mulxq %r14,%r9,%rax ++ movq 56(%rsi),%r8 ++ adcxq %r10,%r9 ++ mulxq %r15,%r10,%rbx ++ adoxq %rax,%r10 ++ adcxq %r11,%r10 ++ mulxq %r8,%r11,%rax ++ movq %r14,%rdx ++ adoxq %rbx,%r11 ++ adcxq %r12,%r11 ++ ++ adcxq %rbp,%rax ++ ++ mulxq %r15,%r14,%rbx ++ mulxq %r8,%r12,%r13 ++ movq %r15,%rdx ++ leaq 64(%rsi),%rsi ++ adcxq %r14,%r11 ++ adoxq %rbx,%r12 ++ adcxq %rax,%r12 ++ adoxq %rbp,%r13 ++ ++.byte 0x67,0x67 ++ mulxq %r8,%r8,%r14 ++ adcxq %r8,%r13 ++ adcxq %rbp,%r14 ++ ++ cmpq 8+8(%rsp),%rsi ++ je L$sqrx8x_outer_break ++ ++ negq %rcx ++ movq $-8,%rcx ++ movq %rbp,%r15 ++ movq 64(%rdi),%r8 ++ adcxq 72(%rdi),%r9 ++ adcxq 80(%rdi),%r10 ++ adcxq 88(%rdi),%r11 ++ adcq 96(%rdi),%r12 ++ adcq 104(%rdi),%r13 ++ adcq 112(%rdi),%r14 ++ adcq 120(%rdi),%r15 ++ leaq (%rsi),%rbp ++ leaq 128(%rdi),%rdi ++ sbbq %rax,%rax ++ ++ movq -64(%rsi),%rdx ++ movq %rax,16+8(%rsp) ++ movq %rdi,24+8(%rsp) ++ ++ ++ xorl %eax,%eax ++ jmp L$sqrx8x_loop ++ ++.p2align 5 ++L$sqrx8x_loop: ++ movq %r8,%rbx ++ mulxq 0(%rbp),%rax,%r8 ++ adcxq %rax,%rbx ++ adoxq %r9,%r8 ++ ++ mulxq 8(%rbp),%rax,%r9 ++ adcxq %rax,%r8 ++ adoxq %r10,%r9 ++ ++ mulxq 16(%rbp),%rax,%r10 ++ adcxq %rax,%r9 ++ adoxq %r11,%r10 ++ ++ mulxq 24(%rbp),%rax,%r11 ++ adcxq %rax,%r10 ++ adoxq %r12,%r11 ++ ++.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 ++ adcxq %rax,%r11 ++ adoxq %r13,%r12 ++ ++ mulxq 40(%rbp),%rax,%r13 ++ adcxq %rax,%r12 ++ adoxq %r14,%r13 ++ ++ mulxq 48(%rbp),%rax,%r14 ++ movq %rbx,(%rdi,%rcx,8) ++ movl $0,%ebx ++ adcxq %rax,%r13 ++ adoxq %r15,%r14 ++ ++.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 ++ movq 8(%rsi,%rcx,8),%rdx ++ adcxq %rax,%r14 ++ adoxq %rbx,%r15 ++ adcxq %rbx,%r15 ++ ++.byte 0x67 ++ incq %rcx ++ jnz L$sqrx8x_loop ++ ++ leaq 64(%rbp),%rbp ++ movq $-8,%rcx ++ cmpq 8+8(%rsp),%rbp ++ je L$sqrx8x_break ++ ++ subq 16+8(%rsp),%rbx ++.byte 0x66 ++ movq -64(%rsi),%rdx ++ adcxq 0(%rdi),%r8 ++ adcxq 8(%rdi),%r9 ++ adcq 16(%rdi),%r10 ++ adcq 24(%rdi),%r11 ++ adcq 32(%rdi),%r12 ++ adcq 40(%rdi),%r13 ++ adcq 48(%rdi),%r14 ++ adcq 56(%rdi),%r15 ++ leaq 64(%rdi),%rdi ++.byte 0x67 ++ sbbq %rax,%rax ++ xorl %ebx,%ebx ++ movq %rax,16+8(%rsp) ++ jmp L$sqrx8x_loop ++ ++.p2align 5 ++L$sqrx8x_break: ++ xorq %rbp,%rbp ++ subq 16+8(%rsp),%rbx ++ adcxq %rbp,%r8 ++ movq 24+8(%rsp),%rcx ++ adcxq %rbp,%r9 ++ movq 0(%rsi),%rdx ++ adcq $0,%r10 ++ movq %r8,0(%rdi) ++ adcq $0,%r11 ++ adcq $0,%r12 ++ adcq $0,%r13 ++ adcq $0,%r14 ++ adcq $0,%r15 ++ cmpq %rcx,%rdi ++ je L$sqrx8x_outer_loop ++ ++ movq %r9,8(%rdi) ++ movq 8(%rcx),%r9 ++ movq %r10,16(%rdi) ++ movq 16(%rcx),%r10 ++ movq %r11,24(%rdi) ++ movq 24(%rcx),%r11 ++ movq %r12,32(%rdi) ++ movq 32(%rcx),%r12 ++ movq %r13,40(%rdi) ++ movq 40(%rcx),%r13 ++ movq %r14,48(%rdi) ++ movq 48(%rcx),%r14 ++ movq %r15,56(%rdi) ++ movq 56(%rcx),%r15 ++ movq %rcx,%rdi ++ jmp L$sqrx8x_outer_loop ++ ++.p2align 5 ++L$sqrx8x_outer_break: ++ movq %r9,72(%rdi) ++.byte 102,72,15,126,217 ++ movq %r10,80(%rdi) ++ movq %r11,88(%rdi) ++ movq %r12,96(%rdi) ++ movq %r13,104(%rdi) ++ movq %r14,112(%rdi) ++ leaq 48+8(%rsp),%rdi ++ movq (%rsi,%rcx,1),%rdx ++ ++ movq 8(%rdi),%r11 ++ xorq %r10,%r10 ++ movq 0+8(%rsp),%r9 ++ adoxq %r11,%r11 ++ movq 16(%rdi),%r12 ++ movq 24(%rdi),%r13 ++ ++ ++.p2align 5 ++L$sqrx4x_shift_n_add: ++ mulxq %rdx,%rax,%rbx ++ adoxq %r12,%r12 ++ adcxq %r10,%rax ++.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 ++.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 ++ adoxq %r13,%r13 ++ adcxq %r11,%rbx ++ movq 40(%rdi),%r11 ++ movq %rax,0(%rdi) ++ movq %rbx,8(%rdi) ++ ++ mulxq %rdx,%rax,%rbx ++ adoxq %r10,%r10 ++ adcxq %r12,%rax ++ movq 16(%rsi,%rcx,1),%rdx ++ movq 48(%rdi),%r12 ++ adoxq %r11,%r11 ++ adcxq %r13,%rbx ++ movq 56(%rdi),%r13 ++ movq %rax,16(%rdi) ++ movq %rbx,24(%rdi) ++ ++ mulxq %rdx,%rax,%rbx ++ adoxq %r12,%r12 ++ adcxq %r10,%rax ++ movq 24(%rsi,%rcx,1),%rdx ++ leaq 32(%rcx),%rcx ++ movq 64(%rdi),%r10 ++ adoxq %r13,%r13 ++ adcxq %r11,%rbx ++ movq 72(%rdi),%r11 ++ movq %rax,32(%rdi) ++ movq %rbx,40(%rdi) ++ ++ mulxq %rdx,%rax,%rbx ++ adoxq %r10,%r10 ++ adcxq %r12,%rax ++ jrcxz L$sqrx4x_shift_n_add_break ++.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 ++ adoxq %r11,%r11 ++ adcxq %r13,%rbx ++ movq 80(%rdi),%r12 ++ movq 88(%rdi),%r13 ++ movq %rax,48(%rdi) ++ movq %rbx,56(%rdi) ++ leaq 64(%rdi),%rdi ++ nop ++ jmp L$sqrx4x_shift_n_add ++ ++.p2align 5 ++L$sqrx4x_shift_n_add_break: ++ adcxq %r13,%rbx ++ movq %rax,48(%rdi) ++ movq %rbx,56(%rdi) ++ leaq 64(%rdi),%rdi ++.byte 102,72,15,126,213 ++__bn_sqrx8x_reduction: ++ xorl %eax,%eax ++ movq 32+8(%rsp),%rbx ++ movq 48+8(%rsp),%rdx ++ leaq -64(%rbp,%r9,1),%rcx ++ ++ movq %rcx,0+8(%rsp) ++ movq %rdi,8+8(%rsp) ++ ++ leaq 48+8(%rsp),%rdi ++ jmp L$sqrx8x_reduction_loop ++ ++.p2align 5 ++L$sqrx8x_reduction_loop: ++ movq 8(%rdi),%r9 ++ movq 16(%rdi),%r10 ++ movq 24(%rdi),%r11 ++ movq 32(%rdi),%r12 ++ movq %rdx,%r8 ++ imulq %rbx,%rdx ++ movq 40(%rdi),%r13 ++ movq 48(%rdi),%r14 ++ movq 56(%rdi),%r15 ++ movq %rax,24+8(%rsp) ++ ++ leaq 64(%rdi),%rdi ++ xorq %rsi,%rsi ++ movq $-8,%rcx ++ jmp L$sqrx8x_reduce ++ ++.p2align 5 ++L$sqrx8x_reduce: ++ movq %r8,%rbx ++ mulxq 0(%rbp),%rax,%r8 ++ adcxq %rbx,%rax ++ adoxq %r9,%r8 ++ ++ mulxq 8(%rbp),%rbx,%r9 ++ adcxq %rbx,%r8 ++ adoxq %r10,%r9 ++ ++ mulxq 16(%rbp),%rbx,%r10 ++ adcxq %rbx,%r9 ++ adoxq %r11,%r10 ++ ++ mulxq 24(%rbp),%rbx,%r11 ++ adcxq %rbx,%r10 ++ adoxq %r12,%r11 ++ ++.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 ++ movq %rdx,%rax ++ movq %r8,%rdx ++ adcxq %rbx,%r11 ++ adoxq %r13,%r12 ++ ++ mulxq 32+8(%rsp),%rbx,%rdx ++ movq %rax,%rdx ++ movq %rax,64+48+8(%rsp,%rcx,8) ++ ++ mulxq 40(%rbp),%rax,%r13 ++ adcxq %rax,%r12 ++ adoxq %r14,%r13 ++ ++ mulxq 48(%rbp),%rax,%r14 ++ adcxq %rax,%r13 ++ adoxq %r15,%r14 ++ ++ mulxq 56(%rbp),%rax,%r15 ++ movq %rbx,%rdx ++ adcxq %rax,%r14 ++ adoxq %rsi,%r15 ++ adcxq %rsi,%r15 ++ ++.byte 0x67,0x67,0x67 ++ incq %rcx ++ jnz L$sqrx8x_reduce ++ ++ movq %rsi,%rax ++ cmpq 0+8(%rsp),%rbp ++ jae L$sqrx8x_no_tail ++ ++ movq 48+8(%rsp),%rdx ++ addq 0(%rdi),%r8 ++ leaq 64(%rbp),%rbp ++ movq $-8,%rcx ++ adcxq 8(%rdi),%r9 ++ adcxq 16(%rdi),%r10 ++ adcq 24(%rdi),%r11 ++ adcq 32(%rdi),%r12 ++ adcq 40(%rdi),%r13 ++ adcq 48(%rdi),%r14 ++ adcq 56(%rdi),%r15 ++ leaq 64(%rdi),%rdi ++ sbbq %rax,%rax ++ ++ xorq %rsi,%rsi ++ movq %rax,16+8(%rsp) ++ jmp L$sqrx8x_tail ++ ++.p2align 5 ++L$sqrx8x_tail: ++ movq %r8,%rbx ++ mulxq 0(%rbp),%rax,%r8 ++ adcxq %rax,%rbx ++ adoxq %r9,%r8 ++ ++ mulxq 8(%rbp),%rax,%r9 ++ adcxq %rax,%r8 ++ adoxq %r10,%r9 ++ ++ mulxq 16(%rbp),%rax,%r10 ++ adcxq %rax,%r9 ++ adoxq %r11,%r10 ++ ++ mulxq 24(%rbp),%rax,%r11 ++ adcxq %rax,%r10 ++ adoxq %r12,%r11 ++ ++.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 ++ adcxq %rax,%r11 ++ adoxq %r13,%r12 ++ ++ mulxq 40(%rbp),%rax,%r13 ++ adcxq %rax,%r12 ++ adoxq %r14,%r13 ++ ++ mulxq 48(%rbp),%rax,%r14 ++ adcxq %rax,%r13 ++ adoxq %r15,%r14 ++ ++ mulxq 56(%rbp),%rax,%r15 ++ movq 72+48+8(%rsp,%rcx,8),%rdx ++ adcxq %rax,%r14 ++ adoxq %rsi,%r15 ++ movq %rbx,(%rdi,%rcx,8) ++ movq %r8,%rbx ++ adcxq %rsi,%r15 ++ ++ incq %rcx ++ jnz L$sqrx8x_tail ++ ++ cmpq 0+8(%rsp),%rbp ++ jae L$sqrx8x_tail_done ++ ++ subq 16+8(%rsp),%rsi ++ movq 48+8(%rsp),%rdx ++ leaq 64(%rbp),%rbp ++ adcq 0(%rdi),%r8 ++ adcq 8(%rdi),%r9 ++ adcq 16(%rdi),%r10 ++ adcq 24(%rdi),%r11 ++ adcq 32(%rdi),%r12 ++ adcq 40(%rdi),%r13 ++ adcq 48(%rdi),%r14 ++ adcq 56(%rdi),%r15 ++ leaq 64(%rdi),%rdi ++ sbbq %rax,%rax ++ subq $8,%rcx ++ ++ xorq %rsi,%rsi ++ movq %rax,16+8(%rsp) ++ jmp L$sqrx8x_tail ++ ++.p2align 5 ++L$sqrx8x_tail_done: ++ xorq %rax,%rax ++ addq 24+8(%rsp),%r8 ++ adcq $0,%r9 ++ adcq $0,%r10 ++ adcq $0,%r11 ++ adcq $0,%r12 ++ adcq $0,%r13 ++ adcq $0,%r14 ++ adcq $0,%r15 ++ adcq $0,%rax ++ ++ subq 16+8(%rsp),%rsi ++L$sqrx8x_no_tail: ++ adcq 0(%rdi),%r8 ++.byte 102,72,15,126,217 ++ adcq 8(%rdi),%r9 ++ movq 56(%rbp),%rsi ++.byte 102,72,15,126,213 ++ adcq 16(%rdi),%r10 ++ adcq 24(%rdi),%r11 ++ adcq 32(%rdi),%r12 ++ adcq 40(%rdi),%r13 ++ adcq 48(%rdi),%r14 ++ adcq 56(%rdi),%r15 ++ adcq $0,%rax ++ ++ movq 32+8(%rsp),%rbx ++ movq 64(%rdi,%rcx,1),%rdx ++ ++ movq %r8,0(%rdi) ++ leaq 64(%rdi),%r8 ++ movq %r9,8(%rdi) ++ movq %r10,16(%rdi) ++ movq %r11,24(%rdi) ++ movq %r12,32(%rdi) ++ movq %r13,40(%rdi) ++ movq %r14,48(%rdi) ++ movq %r15,56(%rdi) ++ ++ leaq 64(%rdi,%rcx,1),%rdi ++ cmpq 8+8(%rsp),%r8 ++ jb L$sqrx8x_reduction_loop ++ .byte 0xf3,0xc3 ++ ++ ++.p2align 5 ++ ++__bn_postx4x_internal: ++ ++ movq 0(%rbp),%r12 ++ movq %rcx,%r10 ++ movq %rcx,%r9 ++ negq %rax ++ sarq $3+2,%rcx ++ ++.byte 102,72,15,126,202 ++.byte 102,72,15,126,206 ++ decq %r12 ++ movq 8(%rbp),%r13 ++ xorq %r8,%r8 ++ movq 16(%rbp),%r14 ++ movq 24(%rbp),%r15 ++ jmp L$sqrx4x_sub_entry ++ ++.p2align 4 ++L$sqrx4x_sub: ++ movq 0(%rbp),%r12 ++ movq 8(%rbp),%r13 ++ movq 16(%rbp),%r14 ++ movq 24(%rbp),%r15 ++L$sqrx4x_sub_entry: ++ andnq %rax,%r12,%r12 ++ leaq 32(%rbp),%rbp ++ andnq %rax,%r13,%r13 ++ andnq %rax,%r14,%r14 ++ andnq %rax,%r15,%r15 ++ ++ negq %r8 ++ adcq 0(%rdi),%r12 ++ adcq 8(%rdi),%r13 ++ adcq 16(%rdi),%r14 ++ adcq 24(%rdi),%r15 ++ movq %r12,0(%rdx) ++ leaq 32(%rdi),%rdi ++ movq %r13,8(%rdx) ++ sbbq %r8,%r8 ++ movq %r14,16(%rdx) ++ movq %r15,24(%rdx) ++ leaq 32(%rdx),%rdx ++ ++ incq %rcx ++ jnz L$sqrx4x_sub ++ ++ negq %r9 ++ ++ .byte 0xf3,0xc3 ++ ++ ++.globl _bn_scatter5 ++.private_extern _bn_scatter5 ++ ++.p2align 4 ++_bn_scatter5: ++ ++ cmpl $0,%esi ++ jz L$scatter_epilogue ++ leaq (%rdx,%rcx,8),%rdx ++L$scatter: ++ movq (%rdi),%rax ++ leaq 8(%rdi),%rdi ++ movq %rax,(%rdx) ++ leaq 256(%rdx),%rdx ++ subl $1,%esi ++ jnz L$scatter ++L$scatter_epilogue: ++ .byte 0xf3,0xc3 ++ ++ ++ ++.globl _bn_gather5 ++.private_extern _bn_gather5 ++ ++.p2align 5 ++_bn_gather5: ++ ++L$SEH_begin_bn_gather5: ++ ++.byte 0x4c,0x8d,0x14,0x24 ++ ++.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 ++ leaq L$inc(%rip),%rax ++ andq $-16,%rsp ++ ++ movd %ecx,%xmm5 ++ movdqa 0(%rax),%xmm0 ++ movdqa 16(%rax),%xmm1 ++ leaq 128(%rdx),%r11 ++ leaq 128(%rsp),%rax ++ ++ pshufd $0,%xmm5,%xmm5 ++ movdqa %xmm1,%xmm4 ++ movdqa %xmm1,%xmm2 ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm4,%xmm3 ++ ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,-128(%rax) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,-112(%rax) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,-96(%rax) ++ movdqa %xmm4,%xmm2 ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,-80(%rax) ++ movdqa %xmm4,%xmm3 ++ ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,-64(%rax) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,-48(%rax) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,-32(%rax) ++ movdqa %xmm4,%xmm2 ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,-16(%rax) ++ movdqa %xmm4,%xmm3 ++ ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,0(%rax) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,16(%rax) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,32(%rax) ++ movdqa %xmm4,%xmm2 ++ paddd %xmm0,%xmm1 ++ pcmpeqd %xmm5,%xmm0 ++ movdqa %xmm3,48(%rax) ++ movdqa %xmm4,%xmm3 ++ ++ paddd %xmm1,%xmm2 ++ pcmpeqd %xmm5,%xmm1 ++ movdqa %xmm0,64(%rax) ++ movdqa %xmm4,%xmm0 ++ ++ paddd %xmm2,%xmm3 ++ pcmpeqd %xmm5,%xmm2 ++ movdqa %xmm1,80(%rax) ++ movdqa %xmm4,%xmm1 ++ ++ paddd %xmm3,%xmm0 ++ pcmpeqd %xmm5,%xmm3 ++ movdqa %xmm2,96(%rax) ++ movdqa %xmm4,%xmm2 ++ movdqa %xmm3,112(%rax) ++ jmp L$gather ++ ++.p2align 5 ++L$gather: ++ pxor %xmm4,%xmm4 ++ pxor %xmm5,%xmm5 ++ movdqa -128(%r11),%xmm0 ++ movdqa -112(%r11),%xmm1 ++ movdqa -96(%r11),%xmm2 ++ pand -128(%rax),%xmm0 ++ movdqa -80(%r11),%xmm3 ++ pand -112(%rax),%xmm1 ++ por %xmm0,%xmm4 ++ pand -96(%rax),%xmm2 ++ por %xmm1,%xmm5 ++ pand -80(%rax),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa -64(%r11),%xmm0 ++ movdqa -48(%r11),%xmm1 ++ movdqa -32(%r11),%xmm2 ++ pand -64(%rax),%xmm0 ++ movdqa -16(%r11),%xmm3 ++ pand -48(%rax),%xmm1 ++ por %xmm0,%xmm4 ++ pand -32(%rax),%xmm2 ++ por %xmm1,%xmm5 ++ pand -16(%rax),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa 0(%r11),%xmm0 ++ movdqa 16(%r11),%xmm1 ++ movdqa 32(%r11),%xmm2 ++ pand 0(%rax),%xmm0 ++ movdqa 48(%r11),%xmm3 ++ pand 16(%rax),%xmm1 ++ por %xmm0,%xmm4 ++ pand 32(%rax),%xmm2 ++ por %xmm1,%xmm5 ++ pand 48(%rax),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ movdqa 64(%r11),%xmm0 ++ movdqa 80(%r11),%xmm1 ++ movdqa 96(%r11),%xmm2 ++ pand 64(%rax),%xmm0 ++ movdqa 112(%r11),%xmm3 ++ pand 80(%rax),%xmm1 ++ por %xmm0,%xmm4 ++ pand 96(%rax),%xmm2 ++ por %xmm1,%xmm5 ++ pand 112(%rax),%xmm3 ++ por %xmm2,%xmm4 ++ por %xmm3,%xmm5 ++ por %xmm5,%xmm4 ++ leaq 256(%r11),%r11 ++ pshufd $0x4e,%xmm4,%xmm0 ++ por %xmm4,%xmm0 ++ movq %xmm0,(%rdi) ++ leaq 8(%rdi),%rdi ++ subl $1,%esi ++ jnz L$gather ++ ++ leaq (%r10),%rsp ++ ++ .byte 0xf3,0xc3 ++L$SEH_end_bn_gather5: ++ ++ ++.p2align 6 ++L$inc: ++.long 0,0, 1,1 ++.long 2,2, 2,2 ++.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++#endif +diff --git a/apple-x86_64/crypto/test/trampoline-x86_64.S b/apple-x86_64/crypto/test/trampoline-x86_64.S +new file mode 100644 +index 0000000..5f20aa7 +--- /dev/null ++++ b/apple-x86_64/crypto/test/trampoline-x86_64.S +@@ -0,0 +1,513 @@ ++// This file is generated from a similarly-named Perl script in the BoringSSL ++// source tree. Do not edit by hand. ++ ++#if defined(__has_feature) ++#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) ++#define OPENSSL_NO_ASM ++#endif ++#endif ++ ++#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) ++#if defined(BORINGSSL_PREFIX) ++#include ++#endif ++.text ++ ++ ++ ++ ++ ++ ++ ++ ++ ++.globl _abi_test_trampoline ++.private_extern _abi_test_trampoline ++.p2align 4 ++_abi_test_trampoline: ++L$abi_test_trampoline_seh_begin: ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ ++ subq $120,%rsp ++ ++L$abi_test_trampoline_seh_prolog_alloc: ++ movq %r8,48(%rsp) ++ movq %rbx,64(%rsp) ++ ++L$abi_test_trampoline_seh_prolog_rbx: ++ movq %rbp,72(%rsp) ++ ++L$abi_test_trampoline_seh_prolog_rbp: ++ movq %r12,80(%rsp) ++ ++L$abi_test_trampoline_seh_prolog_r12: ++ movq %r13,88(%rsp) ++ ++L$abi_test_trampoline_seh_prolog_r13: ++ movq %r14,96(%rsp) ++ ++L$abi_test_trampoline_seh_prolog_r14: ++ movq %r15,104(%rsp) ++ ++L$abi_test_trampoline_seh_prolog_r15: ++L$abi_test_trampoline_seh_prolog_end: ++ movq 0(%rsi),%rbx ++ movq 8(%rsi),%rbp ++ movq 16(%rsi),%r12 ++ movq 24(%rsi),%r13 ++ movq 32(%rsi),%r14 ++ movq 40(%rsi),%r15 ++ ++ movq %rdi,32(%rsp) ++ movq %rsi,40(%rsp) ++ ++ ++ ++ ++ movq %rdx,%r10 ++ movq %rcx,%r11 ++ decq %r11 ++ js L$args_done ++ movq (%r10),%rdi ++ addq $8,%r10 ++ decq %r11 ++ js L$args_done ++ movq (%r10),%rsi ++ addq $8,%r10 ++ decq %r11 ++ js L$args_done ++ movq (%r10),%rdx ++ addq $8,%r10 ++ decq %r11 ++ js L$args_done ++ movq (%r10),%rcx ++ addq $8,%r10 ++ decq %r11 ++ js L$args_done ++ movq (%r10),%r8 ++ addq $8,%r10 ++ decq %r11 ++ js L$args_done ++ movq (%r10),%r9 ++ addq $8,%r10 ++ leaq 0(%rsp),%rax ++L$args_loop: ++ decq %r11 ++ js L$args_done ++ ++ ++ ++ ++ ++ ++ movq %r11,56(%rsp) ++ movq (%r10),%r11 ++ movq %r11,(%rax) ++ movq 56(%rsp),%r11 ++ ++ addq $8,%r10 ++ addq $8,%rax ++ jmp L$args_loop ++ ++L$args_done: ++ movq 32(%rsp),%rax ++ movq 48(%rsp),%r10 ++ testq %r10,%r10 ++ jz L$no_unwind ++ ++ ++ pushfq ++ orq $0x100,0(%rsp) ++ popfq ++ ++ ++ ++ nop ++.globl _abi_test_unwind_start ++.private_extern _abi_test_unwind_start ++_abi_test_unwind_start: ++ ++ call *%rax ++.globl _abi_test_unwind_return ++.private_extern _abi_test_unwind_return ++_abi_test_unwind_return: ++ ++ ++ ++ ++ pushfq ++ andq $-0x101,0(%rsp) ++ popfq ++.globl _abi_test_unwind_stop ++.private_extern _abi_test_unwind_stop ++_abi_test_unwind_stop: ++ ++ jmp L$call_done ++ ++L$no_unwind: ++ call *%rax ++ ++L$call_done: ++ ++ movq 40(%rsp),%rsi ++ movq %rbx,0(%rsi) ++ movq %rbp,8(%rsi) ++ movq %r12,16(%rsi) ++ movq %r13,24(%rsi) ++ movq %r14,32(%rsi) ++ movq %r15,40(%rsi) ++ movq 64(%rsp),%rbx ++ ++ movq 72(%rsp),%rbp ++ ++ movq 80(%rsp),%r12 ++ ++ movq 88(%rsp),%r13 ++ ++ movq 96(%rsp),%r14 ++ ++ movq 104(%rsp),%r15 ++ ++ addq $120,%rsp ++ ++ ++ ++ .byte 0xf3,0xc3 ++ ++L$abi_test_trampoline_seh_end: ++ ++ ++.globl _abi_test_clobber_rax ++.private_extern _abi_test_clobber_rax ++.p2align 4 ++_abi_test_clobber_rax: ++ xorq %rax,%rax ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_rbx ++.private_extern _abi_test_clobber_rbx ++.p2align 4 ++_abi_test_clobber_rbx: ++ xorq %rbx,%rbx ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_rcx ++.private_extern _abi_test_clobber_rcx ++.p2align 4 ++_abi_test_clobber_rcx: ++ xorq %rcx,%rcx ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_rdx ++.private_extern _abi_test_clobber_rdx ++.p2align 4 ++_abi_test_clobber_rdx: ++ xorq %rdx,%rdx ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_rdi ++.private_extern _abi_test_clobber_rdi ++.p2align 4 ++_abi_test_clobber_rdi: ++ xorq %rdi,%rdi ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_rsi ++.private_extern _abi_test_clobber_rsi ++.p2align 4 ++_abi_test_clobber_rsi: ++ xorq %rsi,%rsi ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_rbp ++.private_extern _abi_test_clobber_rbp ++.p2align 4 ++_abi_test_clobber_rbp: ++ xorq %rbp,%rbp ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_r8 ++.private_extern _abi_test_clobber_r8 ++.p2align 4 ++_abi_test_clobber_r8: ++ xorq %r8,%r8 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_r9 ++.private_extern _abi_test_clobber_r9 ++.p2align 4 ++_abi_test_clobber_r9: ++ xorq %r9,%r9 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_r10 ++.private_extern _abi_test_clobber_r10 ++.p2align 4 ++_abi_test_clobber_r10: ++ xorq %r10,%r10 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_r11 ++.private_extern _abi_test_clobber_r11 ++.p2align 4 ++_abi_test_clobber_r11: ++ xorq %r11,%r11 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_r12 ++.private_extern _abi_test_clobber_r12 ++.p2align 4 ++_abi_test_clobber_r12: ++ xorq %r12,%r12 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_r13 ++.private_extern _abi_test_clobber_r13 ++.p2align 4 ++_abi_test_clobber_r13: ++ xorq %r13,%r13 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_r14 ++.private_extern _abi_test_clobber_r14 ++.p2align 4 ++_abi_test_clobber_r14: ++ xorq %r14,%r14 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_r15 ++.private_extern _abi_test_clobber_r15 ++.p2align 4 ++_abi_test_clobber_r15: ++ xorq %r15,%r15 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm0 ++.private_extern _abi_test_clobber_xmm0 ++.p2align 4 ++_abi_test_clobber_xmm0: ++ pxor %xmm0,%xmm0 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm1 ++.private_extern _abi_test_clobber_xmm1 ++.p2align 4 ++_abi_test_clobber_xmm1: ++ pxor %xmm1,%xmm1 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm2 ++.private_extern _abi_test_clobber_xmm2 ++.p2align 4 ++_abi_test_clobber_xmm2: ++ pxor %xmm2,%xmm2 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm3 ++.private_extern _abi_test_clobber_xmm3 ++.p2align 4 ++_abi_test_clobber_xmm3: ++ pxor %xmm3,%xmm3 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm4 ++.private_extern _abi_test_clobber_xmm4 ++.p2align 4 ++_abi_test_clobber_xmm4: ++ pxor %xmm4,%xmm4 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm5 ++.private_extern _abi_test_clobber_xmm5 ++.p2align 4 ++_abi_test_clobber_xmm5: ++ pxor %xmm5,%xmm5 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm6 ++.private_extern _abi_test_clobber_xmm6 ++.p2align 4 ++_abi_test_clobber_xmm6: ++ pxor %xmm6,%xmm6 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm7 ++.private_extern _abi_test_clobber_xmm7 ++.p2align 4 ++_abi_test_clobber_xmm7: ++ pxor %xmm7,%xmm7 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm8 ++.private_extern _abi_test_clobber_xmm8 ++.p2align 4 ++_abi_test_clobber_xmm8: ++ pxor %xmm8,%xmm8 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm9 ++.private_extern _abi_test_clobber_xmm9 ++.p2align 4 ++_abi_test_clobber_xmm9: ++ pxor %xmm9,%xmm9 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm10 ++.private_extern _abi_test_clobber_xmm10 ++.p2align 4 ++_abi_test_clobber_xmm10: ++ pxor %xmm10,%xmm10 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm11 ++.private_extern _abi_test_clobber_xmm11 ++.p2align 4 ++_abi_test_clobber_xmm11: ++ pxor %xmm11,%xmm11 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm12 ++.private_extern _abi_test_clobber_xmm12 ++.p2align 4 ++_abi_test_clobber_xmm12: ++ pxor %xmm12,%xmm12 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm13 ++.private_extern _abi_test_clobber_xmm13 ++.p2align 4 ++_abi_test_clobber_xmm13: ++ pxor %xmm13,%xmm13 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm14 ++.private_extern _abi_test_clobber_xmm14 ++.p2align 4 ++_abi_test_clobber_xmm14: ++ pxor %xmm14,%xmm14 ++ .byte 0xf3,0xc3 ++ ++ ++.globl _abi_test_clobber_xmm15 ++.private_extern _abi_test_clobber_xmm15 ++.p2align 4 ++_abi_test_clobber_xmm15: ++ pxor %xmm15,%xmm15 ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++.globl _abi_test_bad_unwind_wrong_register ++.private_extern _abi_test_bad_unwind_wrong_register ++.p2align 4 ++_abi_test_bad_unwind_wrong_register: ++ ++L$abi_test_bad_unwind_wrong_register_seh_begin: ++ pushq %r12 ++ ++L$abi_test_bad_unwind_wrong_register_seh_push_r13: ++ ++ ++ ++ nop ++ popq %r12 ++ ++ .byte 0xf3,0xc3 ++L$abi_test_bad_unwind_wrong_register_seh_end: ++ ++ ++ ++ ++ ++ ++ ++.globl _abi_test_bad_unwind_temporary ++.private_extern _abi_test_bad_unwind_temporary ++.p2align 4 ++_abi_test_bad_unwind_temporary: ++ ++L$abi_test_bad_unwind_temporary_seh_begin: ++ pushq %r12 ++ ++L$abi_test_bad_unwind_temporary_seh_push_r12: ++ ++ movq %r12,%rax ++ incq %rax ++ movq %rax,(%rsp) ++ ++ ++ ++ movq %r12,(%rsp) ++ ++ ++ popq %r12 ++ ++ .byte 0xf3,0xc3 ++L$abi_test_bad_unwind_temporary_seh_end: ++ ++ ++ ++ ++ ++ ++ ++.globl _abi_test_get_and_clear_direction_flag ++.private_extern _abi_test_get_and_clear_direction_flag ++_abi_test_get_and_clear_direction_flag: ++ pushfq ++ popq %rax ++ andq $0x400,%rax ++ shrq $10,%rax ++ cld ++ .byte 0xf3,0xc3 ++ ++ ++ ++ ++ ++.globl _abi_test_set_direction_flag ++.private_extern _abi_test_set_direction_flag ++_abi_test_set_direction_flag: ++ std ++ .byte 0xf3,0xc3 ++ ++#endif +diff --git a/err_data.c b/err_data.c +index 8432081..de52cc0 100644 +--- a/err_data.c ++++ b/err_data.c +@@ -193,50 +193,50 @@ const uint32_t kOpenSSLReasonValues[] = { + 0x283480b9, + 0x283500f7, + 0x28358c94, +- 0x2c323286, ++ 0x2c323284, + 0x2c32932e, +- 0x2c333294, +- 0x2c33b2a6, +- 0x2c3432ba, +- 0x2c34b2cc, +- 0x2c3532e7, +- 0x2c35b2f9, +- 0x2c363329, ++ 0x2c333292, ++ 0x2c33b2a4, ++ 0x2c3432b8, ++ 0x2c34b2ca, ++ 0x2c3532e5, ++ 0x2c35b2f7, ++ 0x2c363327, + 0x2c36833a, +- 0x2c373336, +- 0x2c37b362, +- 0x2c383387, +- 0x2c38b39e, +- 0x2c3933bc, +- 0x2c39b3cc, +- 0x2c3a33de, +- 0x2c3ab3f2, +- 0x2c3b3403, +- 0x2c3bb422, ++ 0x2c373334, ++ 0x2c37b360, ++ 0x2c383385, ++ 0x2c38b39c, ++ 0x2c3933ba, ++ 0x2c39b3ca, ++ 0x2c3a33dc, ++ 0x2c3ab3f0, ++ 0x2c3b3401, ++ 0x2c3bb420, + 0x2c3c1340, + 0x2c3c9356, +- 0x2c3d3467, ++ 0x2c3d3465, + 0x2c3d936f, +- 0x2c3e3491, +- 0x2c3eb49f, +- 0x2c3f34b7, +- 0x2c3fb4cf, +- 0x2c4034f9, ++ 0x2c3e348f, ++ 0x2c3eb49d, ++ 0x2c3f34b5, ++ 0x2c3fb4cd, ++ 0x2c4034f7, + 0x2c409241, +- 0x2c41350a, +- 0x2c41b51d, ++ 0x2c413508, ++ 0x2c41b51b, + 0x2c421207, +- 0x2c42b52e, ++ 0x2c42b52c, + 0x2c43074a, +- 0x2c43b414, +- 0x2c443375, +- 0x2c44b4dc, +- 0x2c45330c, +- 0x2c45b348, +- 0x2c4633ac, +- 0x2c46b436, +- 0x2c47344b, +- 0x2c47b484, ++ 0x2c43b412, ++ 0x2c443373, ++ 0x2c44b4da, ++ 0x2c45330a, ++ 0x2c45b346, ++ 0x2c4633aa, ++ 0x2c46b434, ++ 0x2c473449, ++ 0x2c47b482, + 0x30320000, + 0x30328015, + 0x3033001f, +@@ -433,158 +433,158 @@ const uint32_t kOpenSSLReasonValues[] = { + 0x404ea057, + 0x404f20f1, + 0x404fa167, +- 0x405021be, +- 0x4050a1d2, +- 0x40512205, +- 0x40522215, +- 0x4052a239, +- 0x40532251, +- 0x4053a264, +- 0x40542279, +- 0x4054a29c, +- 0x405522c7, +- 0x4055a304, +- 0x40562329, +- 0x4056a342, +- 0x4057235a, +- 0x4057a36d, +- 0x40582382, +- 0x4058a3a9, +- 0x405923d8, +- 0x4059a405, +- 0x405a2419, +- 0x405aa429, +- 0x405b2441, +- 0x405ba452, +- 0x405c2465, +- 0x405ca4a4, +- 0x405d24b1, +- 0x405da4d6, +- 0x405e2514, ++ 0x405021d6, ++ 0x4050a1ea, ++ 0x4051221d, ++ 0x4052222d, ++ 0x4052a251, ++ 0x40532269, ++ 0x4053a27c, ++ 0x40542291, ++ 0x4054a2b4, ++ 0x405522df, ++ 0x4055a31c, ++ 0x40562341, ++ 0x4056a35a, ++ 0x40572372, ++ 0x4057a385, ++ 0x4058239a, ++ 0x4058a3c1, ++ 0x405923f0, ++ 0x4059a41d, ++ 0x405a2431, ++ 0x405aa441, ++ 0x405b2459, ++ 0x405ba46a, ++ 0x405c247d, ++ 0x405ca4bc, ++ 0x405d24c9, ++ 0x405da4ee, ++ 0x405e252c, + 0x405e8adb, +- 0x405f254f, +- 0x405fa55c, +- 0x4060256a, +- 0x4060a58c, +- 0x406125ed, +- 0x4061a625, +- 0x4062263c, +- 0x4062a64d, +- 0x4063269a, +- 0x4063a6af, +- 0x406426c6, +- 0x4064a6f2, +- 0x4065270d, +- 0x4065a724, +- 0x4066273c, +- 0x4066a766, +- 0x40672791, +- 0x4067a7d6, +- 0x4068281e, +- 0x4068a83f, +- 0x40692871, +- 0x4069a89f, +- 0x406a28c0, +- 0x406aa8e0, +- 0x406b2a68, +- 0x406baa8b, +- 0x406c2aa1, +- 0x406cadab, +- 0x406d2dda, +- 0x406dae02, +- 0x406e2e30, +- 0x406eae7d, +- 0x406f2ed6, +- 0x406faf0e, +- 0x40702f21, +- 0x4070af3e, ++ 0x405f254d, ++ 0x405fa55a, ++ 0x40602568, ++ 0x4060a58a, ++ 0x406125eb, ++ 0x4061a623, ++ 0x4062263a, ++ 0x4062a64b, ++ 0x40632698, ++ 0x4063a6ad, ++ 0x406426c4, ++ 0x4064a6f0, ++ 0x4065270b, ++ 0x4065a722, ++ 0x4066273a, ++ 0x4066a764, ++ 0x4067278f, ++ 0x4067a7d4, ++ 0x4068281c, ++ 0x4068a83d, ++ 0x4069286f, ++ 0x4069a89d, ++ 0x406a28be, ++ 0x406aa8de, ++ 0x406b2a66, ++ 0x406baa89, ++ 0x406c2a9f, ++ 0x406cada9, ++ 0x406d2dd8, ++ 0x406dae00, ++ 0x406e2e2e, ++ 0x406eae7b, ++ 0x406f2ed4, ++ 0x406faf0c, ++ 0x40702f1f, ++ 0x4070af3c, + 0x4071082a, +- 0x4071af50, +- 0x40722f63, +- 0x4072af99, +- 0x40732fb1, ++ 0x4071af4e, ++ 0x40722f61, ++ 0x4072af97, ++ 0x40732faf, + 0x40739540, +- 0x40742fc5, +- 0x4074afdf, +- 0x40752ff0, +- 0x4075b004, +- 0x40763012, ++ 0x40742fc3, ++ 0x4074afdd, ++ 0x40752fee, ++ 0x4075b002, ++ 0x40763010, + 0x40769304, +- 0x40773037, +- 0x4077b077, +- 0x40783092, +- 0x4078b0cb, +- 0x407930e2, +- 0x4079b0f8, +- 0x407a3124, +- 0x407ab137, +- 0x407b314c, +- 0x407bb15e, +- 0x407c318f, +- 0x407cb198, +- 0x407d285a, +- 0x407da177, +- 0x407e30a7, +- 0x407ea3b9, ++ 0x40773035, ++ 0x4077b075, ++ 0x40783090, ++ 0x4078b0c9, ++ 0x407930e0, ++ 0x4079b0f6, ++ 0x407a3122, ++ 0x407ab135, ++ 0x407b314a, ++ 0x407bb15c, ++ 0x407c318d, ++ 0x407cb196, ++ 0x407d2858, ++ 0x407da18f, ++ 0x407e30a5, ++ 0x407ea3d1, + 0x407f1dcb, + 0x407f9f9e, + 0x40802101, + 0x40809df3, +- 0x40812227, ++ 0x4081223f, + 0x4081a0a5, +- 0x40822e1b, ++ 0x40822e19, + 0x40829b46, +- 0x40832394, +- 0x4083a6d7, ++ 0x408323ac, ++ 0x4083a6d5, + 0x40841e07, +- 0x4084a3f1, +- 0x40852476, +- 0x4085a5b4, +- 0x408624f6, +- 0x4086a191, +- 0x40872e61, +- 0x4087a602, ++ 0x4084a409, ++ 0x4085248e, ++ 0x4085a5b2, ++ 0x4086250e, ++ 0x4086a1a9, ++ 0x40872e5f, ++ 0x4087a600, + 0x40881b84, +- 0x4088a7e9, ++ 0x4088a7e7, + 0x40891bd3, + 0x40899b60, +- 0x408a2ad9, ++ 0x408a2ad7, + 0x408a9958, +- 0x408b3173, +- 0x408baeeb, +- 0x408c2486, ++ 0x408b3171, ++ 0x408baee9, ++ 0x408c249e, + 0x408c9990, + 0x408d1eef, + 0x408d9e39, + 0x408e201f, +- 0x408ea2e4, +- 0x408f27fd, +- 0x408fa5d0, +- 0x409027b2, +- 0x4090a4c8, +- 0x40912ac1, ++ 0x408ea2fc, ++ 0x408f27fb, ++ 0x408fa5ce, ++ 0x409027b0, ++ 0x4090a4e0, ++ 0x40912abf, + 0x409199b6, + 0x40921c20, +- 0x4092ae9c, +- 0x40932f7c, +- 0x4093a1a2, ++ 0x4092ae9a, ++ 0x40932f7a, ++ 0x4093a1ba, + 0x40941e1b, +- 0x4094aaf2, +- 0x4095265e, +- 0x4095b104, +- 0x40962e48, ++ 0x4094aaf0, ++ 0x4095265c, ++ 0x4095b102, ++ 0x40962e46, + 0x4096a11a, +- 0x409721ed, ++ 0x40972205, + 0x4097a06e, + 0x40981c80, +- 0x4098a672, +- 0x40992eb8, +- 0x4099a311, +- 0x409a22aa, ++ 0x4098a670, ++ 0x40992eb6, ++ 0x4099a329, ++ 0x409a22c2, + 0x409a9974, + 0x409b1e75, + 0x409b9ea0, +- 0x409c3059, ++ 0x409c3057, + 0x409c9ec8, + 0x409d20d6, + 0x409da0bb, +@@ -592,42 +592,42 @@ const uint32_t kOpenSSLReasonValues[] = { + 0x409ea14f, + 0x409f2137, + 0x409f9e68, +- 0x40a02535, ++ 0x40a02177, + 0x40a0a088, +- 0x41f42993, +- 0x41f92a25, +- 0x41fe2918, +- 0x41feabce, +- 0x41ff2cfc, +- 0x420329ac, +- 0x420829ce, +- 0x4208aa0a, +- 0x420928fc, +- 0x4209aa44, +- 0x420a2953, +- 0x420aa933, +- 0x420b2973, +- 0x420ba9ec, +- 0x420c2d18, +- 0x420cab02, +- 0x420d2bb5, +- 0x420dabec, +- 0x42122c1f, +- 0x42172cdf, +- 0x4217ac61, +- 0x421c2c83, +- 0x421f2c3e, +- 0x42212d90, +- 0x42262cc2, +- 0x422b2d6e, +- 0x422bab90, +- 0x422c2d50, +- 0x422cab43, +- 0x422d2b1c, +- 0x422dad2f, +- 0x422e2b6f, +- 0x42302c9e, +- 0x4230ac06, ++ 0x41f42991, ++ 0x41f92a23, ++ 0x41fe2916, ++ 0x41feabcc, ++ 0x41ff2cfa, ++ 0x420329aa, ++ 0x420829cc, ++ 0x4208aa08, ++ 0x420928fa, ++ 0x4209aa42, ++ 0x420a2951, ++ 0x420aa931, ++ 0x420b2971, ++ 0x420ba9ea, ++ 0x420c2d16, ++ 0x420cab00, ++ 0x420d2bb3, ++ 0x420dabea, ++ 0x42122c1d, ++ 0x42172cdd, ++ 0x4217ac5f, ++ 0x421c2c81, ++ 0x421f2c3c, ++ 0x42212d8e, ++ 0x42262cc0, ++ 0x422b2d6c, ++ 0x422bab8e, ++ 0x422c2d4e, ++ 0x422cab41, ++ 0x422d2b1a, ++ 0x422dad2d, ++ 0x422e2b6d, ++ 0x42302c9c, ++ 0x4230ac04, + 0x44320755, + 0x44328764, + 0x44330770, +@@ -682,71 +682,71 @@ const uint32_t kOpenSSLReasonValues[] = { + 0x4c41159d, + 0x4c419420, + 0x4c421589, +- 0x50323540, +- 0x5032b54f, +- 0x5033355a, +- 0x5033b56a, +- 0x50343583, +- 0x5034b59d, +- 0x503535ab, +- 0x5035b5c1, +- 0x503635d3, +- 0x5036b5e9, +- 0x50373602, +- 0x5037b615, +- 0x5038362d, +- 0x5038b63e, +- 0x50393653, +- 0x5039b667, +- 0x503a3687, +- 0x503ab69d, +- 0x503b36b5, +- 0x503bb6c7, +- 0x503c36e3, +- 0x503cb6fa, +- 0x503d3713, +- 0x503db729, +- 0x503e3736, +- 0x503eb74c, +- 0x503f375e, ++ 0x5032353e, ++ 0x5032b54d, ++ 0x50333558, ++ 0x5033b568, ++ 0x50343581, ++ 0x5034b59b, ++ 0x503535a9, ++ 0x5035b5bf, ++ 0x503635d1, ++ 0x5036b5e7, ++ 0x50373600, ++ 0x5037b613, ++ 0x5038362b, ++ 0x5038b63c, ++ 0x50393651, ++ 0x5039b665, ++ 0x503a3685, ++ 0x503ab69b, ++ 0x503b36b3, ++ 0x503bb6c5, ++ 0x503c36e1, ++ 0x503cb6f8, ++ 0x503d3711, ++ 0x503db727, ++ 0x503e3734, ++ 0x503eb74a, ++ 0x503f375c, + 0x503f83a3, +- 0x50403771, +- 0x5040b781, +- 0x5041379b, +- 0x5041b7aa, +- 0x504237c4, +- 0x5042b7e1, +- 0x504337f1, +- 0x5043b801, +- 0x5044381e, ++ 0x5040376f, ++ 0x5040b77f, ++ 0x50413799, ++ 0x5041b7a8, ++ 0x504237c2, ++ 0x5042b7df, ++ 0x504337ef, ++ 0x5043b7ff, ++ 0x5044381c, + 0x50448459, +- 0x50453832, +- 0x5045b850, +- 0x50463863, +- 0x5046b879, +- 0x5047388b, +- 0x5047b8a0, +- 0x504838c6, +- 0x5048b8d4, +- 0x504938e7, +- 0x5049b8fc, +- 0x504a3912, +- 0x504ab922, +- 0x504b3942, +- 0x504bb955, +- 0x504c3978, +- 0x504cb9a6, +- 0x504d39d3, +- 0x504db9f0, +- 0x504e3a0b, +- 0x504eba27, +- 0x504f3a39, +- 0x504fba50, +- 0x50503a5f, ++ 0x50453830, ++ 0x5045b84e, ++ 0x50463861, ++ 0x5046b877, ++ 0x50473889, ++ 0x5047b89e, ++ 0x504838c4, ++ 0x5048b8d2, ++ 0x504938e5, ++ 0x5049b8fa, ++ 0x504a3910, ++ 0x504ab920, ++ 0x504b3940, ++ 0x504bb953, ++ 0x504c3976, ++ 0x504cb9a4, ++ 0x504d39d1, ++ 0x504db9ee, ++ 0x504e3a09, ++ 0x504eba25, ++ 0x504f3a37, ++ 0x504fba4e, ++ 0x50503a5d, + 0x50508719, +- 0x50513a72, +- 0x5051b810, +- 0x505239b8, ++ 0x50513a70, ++ 0x5051b80e, ++ 0x505239b6, + 0x58320f8d, + 0x68320f4f, + 0x68328ca7, +@@ -790,19 +790,19 @@ const uint32_t kOpenSSLReasonValues[] = { + 0x7c32121d, + 0x80321433, + 0x80328090, +- 0x80333255, ++ 0x80333253, + 0x803380b9, +- 0x80343264, +- 0x8034b1cc, +- 0x803531ea, +- 0x8035b278, +- 0x8036322c, +- 0x8036b1db, +- 0x8037321e, +- 0x8037b1b9, +- 0x8038323f, +- 0x8038b1fb, +- 0x80393210, ++ 0x80343262, ++ 0x8034b1ca, ++ 0x803531e8, ++ 0x8035b276, ++ 0x8036322a, ++ 0x8036b1d9, ++ 0x8037321c, ++ 0x8037b1b7, ++ 0x8038323d, ++ 0x8038b1f9, ++ 0x8039320e, + }; + + const size_t kOpenSSLReasonValuesLen = sizeof(kOpenSSLReasonValues) / sizeof(kOpenSSLReasonValues[0]); +@@ -1226,6 +1226,7 @@ const char kOpenSSLReasonStringData[] = + "INVALID_ECH_CONFIG_LIST\0" + "INVALID_ECH_PUBLIC_NAME\0" + "INVALID_MESSAGE\0" ++ "INVALID_OUTER_EXTENSION\0" + "INVALID_OUTER_RECORD_TYPE\0" + "INVALID_SCT_LIST\0" + "INVALID_SIGNATURE_ALGORITHM\0" +@@ -1269,7 +1270,6 @@ const char kOpenSSLReasonStringData[] = + "OLD_SESSION_CIPHER_NOT_RETURNED\0" + "OLD_SESSION_PRF_HASH_MISMATCH\0" + "OLD_SESSION_VERSION_NOT_RETURNED\0" +- "OUTER_EXTENSION_NOT_FOUND\0" + "PARSE_TLSEXT\0" + "PATH_TOO_LONG\0" + "PEER_DID_NOT_RETURN_A_CERTIFICATE\0" +diff --git a/eureka.mk b/eureka.mk +index ec431f4..93d4437 100644 +--- a/eureka.mk ++++ b/eureka.mk +@@ -73,6 +73,7 @@ crypto_sources := \ + src/crypto/cipher_extra/e_aesctrhmac.c\ + src/crypto/cipher_extra/e_aesgcmsiv.c\ + src/crypto/cipher_extra/e_chacha20poly1305.c\ ++ src/crypto/cipher_extra/e_des.c\ + src/crypto/cipher_extra/e_null.c\ + src/crypto/cipher_extra/e_rc2.c\ + src/crypto/cipher_extra/e_rc4.c\ +@@ -91,6 +92,7 @@ crypto_sources := \ + src/crypto/crypto.c\ + src/crypto/curve25519/curve25519.c\ + src/crypto/curve25519/spake25519.c\ ++ src/crypto/des/des.c\ + src/crypto/dh_extra/dh_asn1.c\ + src/crypto/dh_extra/params.c\ + src/crypto/digest_extra/digest_extra.c\ +diff --git a/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S b/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S +index a09764a..6ce216f 100644 +--- a/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S ++++ b/linux-x86_64/crypto/fipsmodule/sha256-x86_64.S +@@ -25,6 +25,8 @@ sha256_block_data_order: + movl 0(%r11),%r9d + movl 4(%r11),%r10d + movl 8(%r11),%r11d ++ testl $536870912,%r11d ++ jnz .Lshaext_shortcut + andl $1073741824,%r9d + andl $268435968,%r10d + orl %r9d,%r10d +@@ -1781,6 +1783,215 @@ K256: + .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 + .long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 + .byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 ++.type sha256_block_data_order_shaext,@function ++.align 64 ++sha256_block_data_order_shaext: ++.cfi_startproc ++.Lshaext_shortcut: ++ leaq K256+128(%rip),%rcx ++ movdqu (%rdi),%xmm1 ++ movdqu 16(%rdi),%xmm2 ++ movdqa 512-128(%rcx),%xmm7 ++ ++ pshufd $0x1b,%xmm1,%xmm0 ++ pshufd $0xb1,%xmm1,%xmm1 ++ pshufd $0x1b,%xmm2,%xmm2 ++ movdqa %xmm7,%xmm8 ++.byte 102,15,58,15,202,8 ++ punpcklqdq %xmm0,%xmm2 ++ jmp .Loop_shaext ++ ++.align 16 ++.Loop_shaext: ++ movdqu (%rsi),%xmm3 ++ movdqu 16(%rsi),%xmm4 ++ movdqu 32(%rsi),%xmm5 ++.byte 102,15,56,0,223 ++ movdqu 48(%rsi),%xmm6 ++ ++ movdqa 0-128(%rcx),%xmm0 ++ paddd %xmm3,%xmm0 ++.byte 102,15,56,0,231 ++ movdqa %xmm2,%xmm10 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ nop ++ movdqa %xmm1,%xmm9 ++.byte 15,56,203,202 ++ ++ movdqa 32-128(%rcx),%xmm0 ++ paddd %xmm4,%xmm0 ++.byte 102,15,56,0,239 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ leaq 64(%rsi),%rsi ++.byte 15,56,204,220 ++.byte 15,56,203,202 ++ ++ movdqa 64-128(%rcx),%xmm0 ++ paddd %xmm5,%xmm0 ++.byte 102,15,56,0,247 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm6,%xmm7 ++.byte 102,15,58,15,253,4 ++ nop ++ paddd %xmm7,%xmm3 ++.byte 15,56,204,229 ++.byte 15,56,203,202 ++ ++ movdqa 96-128(%rcx),%xmm0 ++ paddd %xmm6,%xmm0 ++.byte 15,56,205,222 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm3,%xmm7 ++.byte 102,15,58,15,254,4 ++ nop ++ paddd %xmm7,%xmm4 ++.byte 15,56,204,238 ++.byte 15,56,203,202 ++ movdqa 128-128(%rcx),%xmm0 ++ paddd %xmm3,%xmm0 ++.byte 15,56,205,227 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm4,%xmm7 ++.byte 102,15,58,15,251,4 ++ nop ++ paddd %xmm7,%xmm5 ++.byte 15,56,204,243 ++.byte 15,56,203,202 ++ movdqa 160-128(%rcx),%xmm0 ++ paddd %xmm4,%xmm0 ++.byte 15,56,205,236 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm5,%xmm7 ++.byte 102,15,58,15,252,4 ++ nop ++ paddd %xmm7,%xmm6 ++.byte 15,56,204,220 ++.byte 15,56,203,202 ++ movdqa 192-128(%rcx),%xmm0 ++ paddd %xmm5,%xmm0 ++.byte 15,56,205,245 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm6,%xmm7 ++.byte 102,15,58,15,253,4 ++ nop ++ paddd %xmm7,%xmm3 ++.byte 15,56,204,229 ++.byte 15,56,203,202 ++ movdqa 224-128(%rcx),%xmm0 ++ paddd %xmm6,%xmm0 ++.byte 15,56,205,222 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm3,%xmm7 ++.byte 102,15,58,15,254,4 ++ nop ++ paddd %xmm7,%xmm4 ++.byte 15,56,204,238 ++.byte 15,56,203,202 ++ movdqa 256-128(%rcx),%xmm0 ++ paddd %xmm3,%xmm0 ++.byte 15,56,205,227 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm4,%xmm7 ++.byte 102,15,58,15,251,4 ++ nop ++ paddd %xmm7,%xmm5 ++.byte 15,56,204,243 ++.byte 15,56,203,202 ++ movdqa 288-128(%rcx),%xmm0 ++ paddd %xmm4,%xmm0 ++.byte 15,56,205,236 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm5,%xmm7 ++.byte 102,15,58,15,252,4 ++ nop ++ paddd %xmm7,%xmm6 ++.byte 15,56,204,220 ++.byte 15,56,203,202 ++ movdqa 320-128(%rcx),%xmm0 ++ paddd %xmm5,%xmm0 ++.byte 15,56,205,245 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm6,%xmm7 ++.byte 102,15,58,15,253,4 ++ nop ++ paddd %xmm7,%xmm3 ++.byte 15,56,204,229 ++.byte 15,56,203,202 ++ movdqa 352-128(%rcx),%xmm0 ++ paddd %xmm6,%xmm0 ++.byte 15,56,205,222 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm3,%xmm7 ++.byte 102,15,58,15,254,4 ++ nop ++ paddd %xmm7,%xmm4 ++.byte 15,56,204,238 ++.byte 15,56,203,202 ++ movdqa 384-128(%rcx),%xmm0 ++ paddd %xmm3,%xmm0 ++.byte 15,56,205,227 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm4,%xmm7 ++.byte 102,15,58,15,251,4 ++ nop ++ paddd %xmm7,%xmm5 ++.byte 15,56,204,243 ++.byte 15,56,203,202 ++ movdqa 416-128(%rcx),%xmm0 ++ paddd %xmm4,%xmm0 ++.byte 15,56,205,236 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ movdqa %xmm5,%xmm7 ++.byte 102,15,58,15,252,4 ++.byte 15,56,203,202 ++ paddd %xmm7,%xmm6 ++ ++ movdqa 448-128(%rcx),%xmm0 ++ paddd %xmm5,%xmm0 ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++.byte 15,56,205,245 ++ movdqa %xmm8,%xmm7 ++.byte 15,56,203,202 ++ ++ movdqa 480-128(%rcx),%xmm0 ++ paddd %xmm6,%xmm0 ++ nop ++.byte 15,56,203,209 ++ pshufd $0x0e,%xmm0,%xmm0 ++ decq %rdx ++ nop ++.byte 15,56,203,202 ++ ++ paddd %xmm10,%xmm2 ++ paddd %xmm9,%xmm1 ++ jnz .Loop_shaext ++ ++ pshufd $0xb1,%xmm2,%xmm2 ++ pshufd $0x1b,%xmm1,%xmm7 ++ pshufd $0xb1,%xmm1,%xmm1 ++ punpckhqdq %xmm2,%xmm1 ++.byte 102,15,58,15,215,8 ++ ++ movdqu %xmm1,(%rdi) ++ movdqu %xmm2,16(%rdi) ++ .byte 0xf3,0xc3 ++.cfi_endproc ++.size sha256_block_data_order_shaext,.-sha256_block_data_order_shaext + .type sha256_block_data_order_ssse3,@function + .align 64 + sha256_block_data_order_ssse3: +diff --git a/mac-x86/crypto/chacha/chacha-x86.S b/mac-x86/crypto/chacha/chacha-x86.S +deleted file mode 100644 +index ef535b2..0000000 +--- a/mac-x86/crypto/chacha/chacha-x86.S ++++ /dev/null +@@ -1,974 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.globl _ChaCha20_ctr32 +-.private_extern _ChaCha20_ctr32 +-.align 4 +-_ChaCha20_ctr32: +-L_ChaCha20_ctr32_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- xorl %eax,%eax +- cmpl 28(%esp),%eax +- je L000no_data +- call Lpic_point +-Lpic_point: +- popl %eax +- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp +- testl $16777216,(%ebp) +- jz L001x86 +- testl $512,4(%ebp) +- jz L001x86 +- jmp Lssse3_shortcut +-L001x86: +- movl 32(%esp),%esi +- movl 36(%esp),%edi +- subl $132,%esp +- movl (%esi),%eax +- movl 4(%esi),%ebx +- movl 8(%esi),%ecx +- movl 12(%esi),%edx +- movl %eax,80(%esp) +- movl %ebx,84(%esp) +- movl %ecx,88(%esp) +- movl %edx,92(%esp) +- movl 16(%esi),%eax +- movl 20(%esi),%ebx +- movl 24(%esi),%ecx +- movl 28(%esi),%edx +- movl %eax,96(%esp) +- movl %ebx,100(%esp) +- movl %ecx,104(%esp) +- movl %edx,108(%esp) +- movl (%edi),%eax +- movl 4(%edi),%ebx +- movl 8(%edi),%ecx +- movl 12(%edi),%edx +- subl $1,%eax +- movl %eax,112(%esp) +- movl %ebx,116(%esp) +- movl %ecx,120(%esp) +- movl %edx,124(%esp) +- jmp L002entry +-.align 4,0x90 +-L003outer_loop: +- movl %ebx,156(%esp) +- movl %eax,152(%esp) +- movl %ecx,160(%esp) +-L002entry: +- movl $1634760805,%eax +- movl $857760878,4(%esp) +- movl $2036477234,8(%esp) +- movl $1797285236,12(%esp) +- movl 84(%esp),%ebx +- movl 88(%esp),%ebp +- movl 104(%esp),%ecx +- movl 108(%esp),%esi +- movl 116(%esp),%edx +- movl 120(%esp),%edi +- movl %ebx,20(%esp) +- movl %ebp,24(%esp) +- movl %ecx,40(%esp) +- movl %esi,44(%esp) +- movl %edx,52(%esp) +- movl %edi,56(%esp) +- movl 92(%esp),%ebx +- movl 124(%esp),%edi +- movl 112(%esp),%edx +- movl 80(%esp),%ebp +- movl 96(%esp),%ecx +- movl 100(%esp),%esi +- addl $1,%edx +- movl %ebx,28(%esp) +- movl %edi,60(%esp) +- movl %edx,112(%esp) +- movl $10,%ebx +- jmp L004loop +-.align 4,0x90 +-L004loop: +- addl %ebp,%eax +- movl %ebx,128(%esp) +- movl %ebp,%ebx +- xorl %eax,%edx +- roll $16,%edx +- addl %edx,%ecx +- xorl %ecx,%ebx +- movl 52(%esp),%edi +- roll $12,%ebx +- movl 20(%esp),%ebp +- addl %ebx,%eax +- xorl %eax,%edx +- movl %eax,(%esp) +- roll $8,%edx +- movl 4(%esp),%eax +- addl %edx,%ecx +- movl %edx,48(%esp) +- xorl %ecx,%ebx +- addl %ebp,%eax +- roll $7,%ebx +- xorl %eax,%edi +- movl %ecx,32(%esp) +- roll $16,%edi +- movl %ebx,16(%esp) +- addl %edi,%esi +- movl 40(%esp),%ecx +- xorl %esi,%ebp +- movl 56(%esp),%edx +- roll $12,%ebp +- movl 24(%esp),%ebx +- addl %ebp,%eax +- xorl %eax,%edi +- movl %eax,4(%esp) +- roll $8,%edi +- movl 8(%esp),%eax +- addl %edi,%esi +- movl %edi,52(%esp) +- xorl %esi,%ebp +- addl %ebx,%eax +- roll $7,%ebp +- xorl %eax,%edx +- movl %esi,36(%esp) +- roll $16,%edx +- movl %ebp,20(%esp) +- addl %edx,%ecx +- movl 44(%esp),%esi +- xorl %ecx,%ebx +- movl 60(%esp),%edi +- roll $12,%ebx +- movl 28(%esp),%ebp +- addl %ebx,%eax +- xorl %eax,%edx +- movl %eax,8(%esp) +- roll $8,%edx +- movl 12(%esp),%eax +- addl %edx,%ecx +- movl %edx,56(%esp) +- xorl %ecx,%ebx +- addl %ebp,%eax +- roll $7,%ebx +- xorl %eax,%edi +- roll $16,%edi +- movl %ebx,24(%esp) +- addl %edi,%esi +- xorl %esi,%ebp +- roll $12,%ebp +- movl 20(%esp),%ebx +- addl %ebp,%eax +- xorl %eax,%edi +- movl %eax,12(%esp) +- roll $8,%edi +- movl (%esp),%eax +- addl %edi,%esi +- movl %edi,%edx +- xorl %esi,%ebp +- addl %ebx,%eax +- roll $7,%ebp +- xorl %eax,%edx +- roll $16,%edx +- movl %ebp,28(%esp) +- addl %edx,%ecx +- xorl %ecx,%ebx +- movl 48(%esp),%edi +- roll $12,%ebx +- movl 24(%esp),%ebp +- addl %ebx,%eax +- xorl %eax,%edx +- movl %eax,(%esp) +- roll $8,%edx +- movl 4(%esp),%eax +- addl %edx,%ecx +- movl %edx,60(%esp) +- xorl %ecx,%ebx +- addl %ebp,%eax +- roll $7,%ebx +- xorl %eax,%edi +- movl %ecx,40(%esp) +- roll $16,%edi +- movl %ebx,20(%esp) +- addl %edi,%esi +- movl 32(%esp),%ecx +- xorl %esi,%ebp +- movl 52(%esp),%edx +- roll $12,%ebp +- movl 28(%esp),%ebx +- addl %ebp,%eax +- xorl %eax,%edi +- movl %eax,4(%esp) +- roll $8,%edi +- movl 8(%esp),%eax +- addl %edi,%esi +- movl %edi,48(%esp) +- xorl %esi,%ebp +- addl %ebx,%eax +- roll $7,%ebp +- xorl %eax,%edx +- movl %esi,44(%esp) +- roll $16,%edx +- movl %ebp,24(%esp) +- addl %edx,%ecx +- movl 36(%esp),%esi +- xorl %ecx,%ebx +- movl 56(%esp),%edi +- roll $12,%ebx +- movl 16(%esp),%ebp +- addl %ebx,%eax +- xorl %eax,%edx +- movl %eax,8(%esp) +- roll $8,%edx +- movl 12(%esp),%eax +- addl %edx,%ecx +- movl %edx,52(%esp) +- xorl %ecx,%ebx +- addl %ebp,%eax +- roll $7,%ebx +- xorl %eax,%edi +- roll $16,%edi +- movl %ebx,28(%esp) +- addl %edi,%esi +- xorl %esi,%ebp +- movl 48(%esp),%edx +- roll $12,%ebp +- movl 128(%esp),%ebx +- addl %ebp,%eax +- xorl %eax,%edi +- movl %eax,12(%esp) +- roll $8,%edi +- movl (%esp),%eax +- addl %edi,%esi +- movl %edi,56(%esp) +- xorl %esi,%ebp +- roll $7,%ebp +- decl %ebx +- jnz L004loop +- movl 160(%esp),%ebx +- addl $1634760805,%eax +- addl 80(%esp),%ebp +- addl 96(%esp),%ecx +- addl 100(%esp),%esi +- cmpl $64,%ebx +- jb L005tail +- movl 156(%esp),%ebx +- addl 112(%esp),%edx +- addl 120(%esp),%edi +- xorl (%ebx),%eax +- xorl 16(%ebx),%ebp +- movl %eax,(%esp) +- movl 152(%esp),%eax +- xorl 32(%ebx),%ecx +- xorl 36(%ebx),%esi +- xorl 48(%ebx),%edx +- xorl 56(%ebx),%edi +- movl %ebp,16(%eax) +- movl %ecx,32(%eax) +- movl %esi,36(%eax) +- movl %edx,48(%eax) +- movl %edi,56(%eax) +- movl 4(%esp),%ebp +- movl 8(%esp),%ecx +- movl 12(%esp),%esi +- movl 20(%esp),%edx +- movl 24(%esp),%edi +- addl $857760878,%ebp +- addl $2036477234,%ecx +- addl $1797285236,%esi +- addl 84(%esp),%edx +- addl 88(%esp),%edi +- xorl 4(%ebx),%ebp +- xorl 8(%ebx),%ecx +- xorl 12(%ebx),%esi +- xorl 20(%ebx),%edx +- xorl 24(%ebx),%edi +- movl %ebp,4(%eax) +- movl %ecx,8(%eax) +- movl %esi,12(%eax) +- movl %edx,20(%eax) +- movl %edi,24(%eax) +- movl 28(%esp),%ebp +- movl 40(%esp),%ecx +- movl 44(%esp),%esi +- movl 52(%esp),%edx +- movl 60(%esp),%edi +- addl 92(%esp),%ebp +- addl 104(%esp),%ecx +- addl 108(%esp),%esi +- addl 116(%esp),%edx +- addl 124(%esp),%edi +- xorl 28(%ebx),%ebp +- xorl 40(%ebx),%ecx +- xorl 44(%ebx),%esi +- xorl 52(%ebx),%edx +- xorl 60(%ebx),%edi +- leal 64(%ebx),%ebx +- movl %ebp,28(%eax) +- movl (%esp),%ebp +- movl %ecx,40(%eax) +- movl 160(%esp),%ecx +- movl %esi,44(%eax) +- movl %edx,52(%eax) +- movl %edi,60(%eax) +- movl %ebp,(%eax) +- leal 64(%eax),%eax +- subl $64,%ecx +- jnz L003outer_loop +- jmp L006done +-L005tail: +- addl 112(%esp),%edx +- addl 120(%esp),%edi +- movl %eax,(%esp) +- movl %ebp,16(%esp) +- movl %ecx,32(%esp) +- movl %esi,36(%esp) +- movl %edx,48(%esp) +- movl %edi,56(%esp) +- movl 4(%esp),%ebp +- movl 8(%esp),%ecx +- movl 12(%esp),%esi +- movl 20(%esp),%edx +- movl 24(%esp),%edi +- addl $857760878,%ebp +- addl $2036477234,%ecx +- addl $1797285236,%esi +- addl 84(%esp),%edx +- addl 88(%esp),%edi +- movl %ebp,4(%esp) +- movl %ecx,8(%esp) +- movl %esi,12(%esp) +- movl %edx,20(%esp) +- movl %edi,24(%esp) +- movl 28(%esp),%ebp +- movl 40(%esp),%ecx +- movl 44(%esp),%esi +- movl 52(%esp),%edx +- movl 60(%esp),%edi +- addl 92(%esp),%ebp +- addl 104(%esp),%ecx +- addl 108(%esp),%esi +- addl 116(%esp),%edx +- addl 124(%esp),%edi +- movl %ebp,28(%esp) +- movl 156(%esp),%ebp +- movl %ecx,40(%esp) +- movl 152(%esp),%ecx +- movl %esi,44(%esp) +- xorl %esi,%esi +- movl %edx,52(%esp) +- movl %edi,60(%esp) +- xorl %eax,%eax +- xorl %edx,%edx +-L007tail_loop: +- movb (%esi,%ebp,1),%al +- movb (%esp,%esi,1),%dl +- leal 1(%esi),%esi +- xorb %dl,%al +- movb %al,-1(%ecx,%esi,1) +- decl %ebx +- jnz L007tail_loop +-L006done: +- addl $132,%esp +-L000no_data: +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _ChaCha20_ssse3 +-.private_extern _ChaCha20_ssse3 +-.align 4 +-_ChaCha20_ssse3: +-L_ChaCha20_ssse3_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +-Lssse3_shortcut: +- movl 20(%esp),%edi +- movl 24(%esp),%esi +- movl 28(%esp),%ecx +- movl 32(%esp),%edx +- movl 36(%esp),%ebx +- movl %esp,%ebp +- subl $524,%esp +- andl $-64,%esp +- movl %ebp,512(%esp) +- leal Lssse3_data-Lpic_point(%eax),%eax +- movdqu (%ebx),%xmm3 +- cmpl $256,%ecx +- jb L0081x +- movl %edx,516(%esp) +- movl %ebx,520(%esp) +- subl $256,%ecx +- leal 384(%esp),%ebp +- movdqu (%edx),%xmm7 +- pshufd $0,%xmm3,%xmm0 +- pshufd $85,%xmm3,%xmm1 +- pshufd $170,%xmm3,%xmm2 +- pshufd $255,%xmm3,%xmm3 +- paddd 48(%eax),%xmm0 +- pshufd $0,%xmm7,%xmm4 +- pshufd $85,%xmm7,%xmm5 +- psubd 64(%eax),%xmm0 +- pshufd $170,%xmm7,%xmm6 +- pshufd $255,%xmm7,%xmm7 +- movdqa %xmm0,64(%ebp) +- movdqa %xmm1,80(%ebp) +- movdqa %xmm2,96(%ebp) +- movdqa %xmm3,112(%ebp) +- movdqu 16(%edx),%xmm3 +- movdqa %xmm4,-64(%ebp) +- movdqa %xmm5,-48(%ebp) +- movdqa %xmm6,-32(%ebp) +- movdqa %xmm7,-16(%ebp) +- movdqa 32(%eax),%xmm7 +- leal 128(%esp),%ebx +- pshufd $0,%xmm3,%xmm0 +- pshufd $85,%xmm3,%xmm1 +- pshufd $170,%xmm3,%xmm2 +- pshufd $255,%xmm3,%xmm3 +- pshufd $0,%xmm7,%xmm4 +- pshufd $85,%xmm7,%xmm5 +- pshufd $170,%xmm7,%xmm6 +- pshufd $255,%xmm7,%xmm7 +- movdqa %xmm0,(%ebp) +- movdqa %xmm1,16(%ebp) +- movdqa %xmm2,32(%ebp) +- movdqa %xmm3,48(%ebp) +- movdqa %xmm4,-128(%ebp) +- movdqa %xmm5,-112(%ebp) +- movdqa %xmm6,-96(%ebp) +- movdqa %xmm7,-80(%ebp) +- leal 128(%esi),%esi +- leal 128(%edi),%edi +- jmp L009outer_loop +-.align 4,0x90 +-L009outer_loop: +- movdqa -112(%ebp),%xmm1 +- movdqa -96(%ebp),%xmm2 +- movdqa -80(%ebp),%xmm3 +- movdqa -48(%ebp),%xmm5 +- movdqa -32(%ebp),%xmm6 +- movdqa -16(%ebp),%xmm7 +- movdqa %xmm1,-112(%ebx) +- movdqa %xmm2,-96(%ebx) +- movdqa %xmm3,-80(%ebx) +- movdqa %xmm5,-48(%ebx) +- movdqa %xmm6,-32(%ebx) +- movdqa %xmm7,-16(%ebx) +- movdqa 32(%ebp),%xmm2 +- movdqa 48(%ebp),%xmm3 +- movdqa 64(%ebp),%xmm4 +- movdqa 80(%ebp),%xmm5 +- movdqa 96(%ebp),%xmm6 +- movdqa 112(%ebp),%xmm7 +- paddd 64(%eax),%xmm4 +- movdqa %xmm2,32(%ebx) +- movdqa %xmm3,48(%ebx) +- movdqa %xmm4,64(%ebx) +- movdqa %xmm5,80(%ebx) +- movdqa %xmm6,96(%ebx) +- movdqa %xmm7,112(%ebx) +- movdqa %xmm4,64(%ebp) +- movdqa -128(%ebp),%xmm0 +- movdqa %xmm4,%xmm6 +- movdqa -64(%ebp),%xmm3 +- movdqa (%ebp),%xmm4 +- movdqa 16(%ebp),%xmm5 +- movl $10,%edx +- nop +-.align 4,0x90 +-L010loop: +- paddd %xmm3,%xmm0 +- movdqa %xmm3,%xmm2 +- pxor %xmm0,%xmm6 +- pshufb (%eax),%xmm6 +- paddd %xmm6,%xmm4 +- pxor %xmm4,%xmm2 +- movdqa -48(%ebx),%xmm3 +- movdqa %xmm2,%xmm1 +- pslld $12,%xmm2 +- psrld $20,%xmm1 +- por %xmm1,%xmm2 +- movdqa -112(%ebx),%xmm1 +- paddd %xmm2,%xmm0 +- movdqa 80(%ebx),%xmm7 +- pxor %xmm0,%xmm6 +- movdqa %xmm0,-128(%ebx) +- pshufb 16(%eax),%xmm6 +- paddd %xmm6,%xmm4 +- movdqa %xmm6,64(%ebx) +- pxor %xmm4,%xmm2 +- paddd %xmm3,%xmm1 +- movdqa %xmm2,%xmm0 +- pslld $7,%xmm2 +- psrld $25,%xmm0 +- pxor %xmm1,%xmm7 +- por %xmm0,%xmm2 +- movdqa %xmm4,(%ebx) +- pshufb (%eax),%xmm7 +- movdqa %xmm2,-64(%ebx) +- paddd %xmm7,%xmm5 +- movdqa 32(%ebx),%xmm4 +- pxor %xmm5,%xmm3 +- movdqa -32(%ebx),%xmm2 +- movdqa %xmm3,%xmm0 +- pslld $12,%xmm3 +- psrld $20,%xmm0 +- por %xmm0,%xmm3 +- movdqa -96(%ebx),%xmm0 +- paddd %xmm3,%xmm1 +- movdqa 96(%ebx),%xmm6 +- pxor %xmm1,%xmm7 +- movdqa %xmm1,-112(%ebx) +- pshufb 16(%eax),%xmm7 +- paddd %xmm7,%xmm5 +- movdqa %xmm7,80(%ebx) +- pxor %xmm5,%xmm3 +- paddd %xmm2,%xmm0 +- movdqa %xmm3,%xmm1 +- pslld $7,%xmm3 +- psrld $25,%xmm1 +- pxor %xmm0,%xmm6 +- por %xmm1,%xmm3 +- movdqa %xmm5,16(%ebx) +- pshufb (%eax),%xmm6 +- movdqa %xmm3,-48(%ebx) +- paddd %xmm6,%xmm4 +- movdqa 48(%ebx),%xmm5 +- pxor %xmm4,%xmm2 +- movdqa -16(%ebx),%xmm3 +- movdqa %xmm2,%xmm1 +- pslld $12,%xmm2 +- psrld $20,%xmm1 +- por %xmm1,%xmm2 +- movdqa -80(%ebx),%xmm1 +- paddd %xmm2,%xmm0 +- movdqa 112(%ebx),%xmm7 +- pxor %xmm0,%xmm6 +- movdqa %xmm0,-96(%ebx) +- pshufb 16(%eax),%xmm6 +- paddd %xmm6,%xmm4 +- movdqa %xmm6,96(%ebx) +- pxor %xmm4,%xmm2 +- paddd %xmm3,%xmm1 +- movdqa %xmm2,%xmm0 +- pslld $7,%xmm2 +- psrld $25,%xmm0 +- pxor %xmm1,%xmm7 +- por %xmm0,%xmm2 +- pshufb (%eax),%xmm7 +- movdqa %xmm2,-32(%ebx) +- paddd %xmm7,%xmm5 +- pxor %xmm5,%xmm3 +- movdqa -48(%ebx),%xmm2 +- movdqa %xmm3,%xmm0 +- pslld $12,%xmm3 +- psrld $20,%xmm0 +- por %xmm0,%xmm3 +- movdqa -128(%ebx),%xmm0 +- paddd %xmm3,%xmm1 +- pxor %xmm1,%xmm7 +- movdqa %xmm1,-80(%ebx) +- pshufb 16(%eax),%xmm7 +- paddd %xmm7,%xmm5 +- movdqa %xmm7,%xmm6 +- pxor %xmm5,%xmm3 +- paddd %xmm2,%xmm0 +- movdqa %xmm3,%xmm1 +- pslld $7,%xmm3 +- psrld $25,%xmm1 +- pxor %xmm0,%xmm6 +- por %xmm1,%xmm3 +- pshufb (%eax),%xmm6 +- movdqa %xmm3,-16(%ebx) +- paddd %xmm6,%xmm4 +- pxor %xmm4,%xmm2 +- movdqa -32(%ebx),%xmm3 +- movdqa %xmm2,%xmm1 +- pslld $12,%xmm2 +- psrld $20,%xmm1 +- por %xmm1,%xmm2 +- movdqa -112(%ebx),%xmm1 +- paddd %xmm2,%xmm0 +- movdqa 64(%ebx),%xmm7 +- pxor %xmm0,%xmm6 +- movdqa %xmm0,-128(%ebx) +- pshufb 16(%eax),%xmm6 +- paddd %xmm6,%xmm4 +- movdqa %xmm6,112(%ebx) +- pxor %xmm4,%xmm2 +- paddd %xmm3,%xmm1 +- movdqa %xmm2,%xmm0 +- pslld $7,%xmm2 +- psrld $25,%xmm0 +- pxor %xmm1,%xmm7 +- por %xmm0,%xmm2 +- movdqa %xmm4,32(%ebx) +- pshufb (%eax),%xmm7 +- movdqa %xmm2,-48(%ebx) +- paddd %xmm7,%xmm5 +- movdqa (%ebx),%xmm4 +- pxor %xmm5,%xmm3 +- movdqa -16(%ebx),%xmm2 +- movdqa %xmm3,%xmm0 +- pslld $12,%xmm3 +- psrld $20,%xmm0 +- por %xmm0,%xmm3 +- movdqa -96(%ebx),%xmm0 +- paddd %xmm3,%xmm1 +- movdqa 80(%ebx),%xmm6 +- pxor %xmm1,%xmm7 +- movdqa %xmm1,-112(%ebx) +- pshufb 16(%eax),%xmm7 +- paddd %xmm7,%xmm5 +- movdqa %xmm7,64(%ebx) +- pxor %xmm5,%xmm3 +- paddd %xmm2,%xmm0 +- movdqa %xmm3,%xmm1 +- pslld $7,%xmm3 +- psrld $25,%xmm1 +- pxor %xmm0,%xmm6 +- por %xmm1,%xmm3 +- movdqa %xmm5,48(%ebx) +- pshufb (%eax),%xmm6 +- movdqa %xmm3,-32(%ebx) +- paddd %xmm6,%xmm4 +- movdqa 16(%ebx),%xmm5 +- pxor %xmm4,%xmm2 +- movdqa -64(%ebx),%xmm3 +- movdqa %xmm2,%xmm1 +- pslld $12,%xmm2 +- psrld $20,%xmm1 +- por %xmm1,%xmm2 +- movdqa -80(%ebx),%xmm1 +- paddd %xmm2,%xmm0 +- movdqa 96(%ebx),%xmm7 +- pxor %xmm0,%xmm6 +- movdqa %xmm0,-96(%ebx) +- pshufb 16(%eax),%xmm6 +- paddd %xmm6,%xmm4 +- movdqa %xmm6,80(%ebx) +- pxor %xmm4,%xmm2 +- paddd %xmm3,%xmm1 +- movdqa %xmm2,%xmm0 +- pslld $7,%xmm2 +- psrld $25,%xmm0 +- pxor %xmm1,%xmm7 +- por %xmm0,%xmm2 +- pshufb (%eax),%xmm7 +- movdqa %xmm2,-16(%ebx) +- paddd %xmm7,%xmm5 +- pxor %xmm5,%xmm3 +- movdqa %xmm3,%xmm0 +- pslld $12,%xmm3 +- psrld $20,%xmm0 +- por %xmm0,%xmm3 +- movdqa -128(%ebx),%xmm0 +- paddd %xmm3,%xmm1 +- movdqa 64(%ebx),%xmm6 +- pxor %xmm1,%xmm7 +- movdqa %xmm1,-80(%ebx) +- pshufb 16(%eax),%xmm7 +- paddd %xmm7,%xmm5 +- movdqa %xmm7,96(%ebx) +- pxor %xmm5,%xmm3 +- movdqa %xmm3,%xmm1 +- pslld $7,%xmm3 +- psrld $25,%xmm1 +- por %xmm1,%xmm3 +- decl %edx +- jnz L010loop +- movdqa %xmm3,-64(%ebx) +- movdqa %xmm4,(%ebx) +- movdqa %xmm5,16(%ebx) +- movdqa %xmm6,64(%ebx) +- movdqa %xmm7,96(%ebx) +- movdqa -112(%ebx),%xmm1 +- movdqa -96(%ebx),%xmm2 +- movdqa -80(%ebx),%xmm3 +- paddd -128(%ebp),%xmm0 +- paddd -112(%ebp),%xmm1 +- paddd -96(%ebp),%xmm2 +- paddd -80(%ebp),%xmm3 +- movdqa %xmm0,%xmm6 +- punpckldq %xmm1,%xmm0 +- movdqa %xmm2,%xmm7 +- punpckldq %xmm3,%xmm2 +- punpckhdq %xmm1,%xmm6 +- punpckhdq %xmm3,%xmm7 +- movdqa %xmm0,%xmm1 +- punpcklqdq %xmm2,%xmm0 +- movdqa %xmm6,%xmm3 +- punpcklqdq %xmm7,%xmm6 +- punpckhqdq %xmm2,%xmm1 +- punpckhqdq %xmm7,%xmm3 +- movdqu -128(%esi),%xmm4 +- movdqu -64(%esi),%xmm5 +- movdqu (%esi),%xmm2 +- movdqu 64(%esi),%xmm7 +- leal 16(%esi),%esi +- pxor %xmm0,%xmm4 +- movdqa -64(%ebx),%xmm0 +- pxor %xmm1,%xmm5 +- movdqa -48(%ebx),%xmm1 +- pxor %xmm2,%xmm6 +- movdqa -32(%ebx),%xmm2 +- pxor %xmm3,%xmm7 +- movdqa -16(%ebx),%xmm3 +- movdqu %xmm4,-128(%edi) +- movdqu %xmm5,-64(%edi) +- movdqu %xmm6,(%edi) +- movdqu %xmm7,64(%edi) +- leal 16(%edi),%edi +- paddd -64(%ebp),%xmm0 +- paddd -48(%ebp),%xmm1 +- paddd -32(%ebp),%xmm2 +- paddd -16(%ebp),%xmm3 +- movdqa %xmm0,%xmm6 +- punpckldq %xmm1,%xmm0 +- movdqa %xmm2,%xmm7 +- punpckldq %xmm3,%xmm2 +- punpckhdq %xmm1,%xmm6 +- punpckhdq %xmm3,%xmm7 +- movdqa %xmm0,%xmm1 +- punpcklqdq %xmm2,%xmm0 +- movdqa %xmm6,%xmm3 +- punpcklqdq %xmm7,%xmm6 +- punpckhqdq %xmm2,%xmm1 +- punpckhqdq %xmm7,%xmm3 +- movdqu -128(%esi),%xmm4 +- movdqu -64(%esi),%xmm5 +- movdqu (%esi),%xmm2 +- movdqu 64(%esi),%xmm7 +- leal 16(%esi),%esi +- pxor %xmm0,%xmm4 +- movdqa (%ebx),%xmm0 +- pxor %xmm1,%xmm5 +- movdqa 16(%ebx),%xmm1 +- pxor %xmm2,%xmm6 +- movdqa 32(%ebx),%xmm2 +- pxor %xmm3,%xmm7 +- movdqa 48(%ebx),%xmm3 +- movdqu %xmm4,-128(%edi) +- movdqu %xmm5,-64(%edi) +- movdqu %xmm6,(%edi) +- movdqu %xmm7,64(%edi) +- leal 16(%edi),%edi +- paddd (%ebp),%xmm0 +- paddd 16(%ebp),%xmm1 +- paddd 32(%ebp),%xmm2 +- paddd 48(%ebp),%xmm3 +- movdqa %xmm0,%xmm6 +- punpckldq %xmm1,%xmm0 +- movdqa %xmm2,%xmm7 +- punpckldq %xmm3,%xmm2 +- punpckhdq %xmm1,%xmm6 +- punpckhdq %xmm3,%xmm7 +- movdqa %xmm0,%xmm1 +- punpcklqdq %xmm2,%xmm0 +- movdqa %xmm6,%xmm3 +- punpcklqdq %xmm7,%xmm6 +- punpckhqdq %xmm2,%xmm1 +- punpckhqdq %xmm7,%xmm3 +- movdqu -128(%esi),%xmm4 +- movdqu -64(%esi),%xmm5 +- movdqu (%esi),%xmm2 +- movdqu 64(%esi),%xmm7 +- leal 16(%esi),%esi +- pxor %xmm0,%xmm4 +- movdqa 64(%ebx),%xmm0 +- pxor %xmm1,%xmm5 +- movdqa 80(%ebx),%xmm1 +- pxor %xmm2,%xmm6 +- movdqa 96(%ebx),%xmm2 +- pxor %xmm3,%xmm7 +- movdqa 112(%ebx),%xmm3 +- movdqu %xmm4,-128(%edi) +- movdqu %xmm5,-64(%edi) +- movdqu %xmm6,(%edi) +- movdqu %xmm7,64(%edi) +- leal 16(%edi),%edi +- paddd 64(%ebp),%xmm0 +- paddd 80(%ebp),%xmm1 +- paddd 96(%ebp),%xmm2 +- paddd 112(%ebp),%xmm3 +- movdqa %xmm0,%xmm6 +- punpckldq %xmm1,%xmm0 +- movdqa %xmm2,%xmm7 +- punpckldq %xmm3,%xmm2 +- punpckhdq %xmm1,%xmm6 +- punpckhdq %xmm3,%xmm7 +- movdqa %xmm0,%xmm1 +- punpcklqdq %xmm2,%xmm0 +- movdqa %xmm6,%xmm3 +- punpcklqdq %xmm7,%xmm6 +- punpckhqdq %xmm2,%xmm1 +- punpckhqdq %xmm7,%xmm3 +- movdqu -128(%esi),%xmm4 +- movdqu -64(%esi),%xmm5 +- movdqu (%esi),%xmm2 +- movdqu 64(%esi),%xmm7 +- leal 208(%esi),%esi +- pxor %xmm0,%xmm4 +- pxor %xmm1,%xmm5 +- pxor %xmm2,%xmm6 +- pxor %xmm3,%xmm7 +- movdqu %xmm4,-128(%edi) +- movdqu %xmm5,-64(%edi) +- movdqu %xmm6,(%edi) +- movdqu %xmm7,64(%edi) +- leal 208(%edi),%edi +- subl $256,%ecx +- jnc L009outer_loop +- addl $256,%ecx +- jz L011done +- movl 520(%esp),%ebx +- leal -128(%esi),%esi +- movl 516(%esp),%edx +- leal -128(%edi),%edi +- movd 64(%ebp),%xmm2 +- movdqu (%ebx),%xmm3 +- paddd 96(%eax),%xmm2 +- pand 112(%eax),%xmm3 +- por %xmm2,%xmm3 +-L0081x: +- movdqa 32(%eax),%xmm0 +- movdqu (%edx),%xmm1 +- movdqu 16(%edx),%xmm2 +- movdqa (%eax),%xmm6 +- movdqa 16(%eax),%xmm7 +- movl %ebp,48(%esp) +- movdqa %xmm0,(%esp) +- movdqa %xmm1,16(%esp) +- movdqa %xmm2,32(%esp) +- movdqa %xmm3,48(%esp) +- movl $10,%edx +- jmp L012loop1x +-.align 4,0x90 +-L013outer1x: +- movdqa 80(%eax),%xmm3 +- movdqa (%esp),%xmm0 +- movdqa 16(%esp),%xmm1 +- movdqa 32(%esp),%xmm2 +- paddd 48(%esp),%xmm3 +- movl $10,%edx +- movdqa %xmm3,48(%esp) +- jmp L012loop1x +-.align 4,0x90 +-L012loop1x: +- paddd %xmm1,%xmm0 +- pxor %xmm0,%xmm3 +-.byte 102,15,56,0,222 +- paddd %xmm3,%xmm2 +- pxor %xmm2,%xmm1 +- movdqa %xmm1,%xmm4 +- psrld $20,%xmm1 +- pslld $12,%xmm4 +- por %xmm4,%xmm1 +- paddd %xmm1,%xmm0 +- pxor %xmm0,%xmm3 +-.byte 102,15,56,0,223 +- paddd %xmm3,%xmm2 +- pxor %xmm2,%xmm1 +- movdqa %xmm1,%xmm4 +- psrld $25,%xmm1 +- pslld $7,%xmm4 +- por %xmm4,%xmm1 +- pshufd $78,%xmm2,%xmm2 +- pshufd $57,%xmm1,%xmm1 +- pshufd $147,%xmm3,%xmm3 +- nop +- paddd %xmm1,%xmm0 +- pxor %xmm0,%xmm3 +-.byte 102,15,56,0,222 +- paddd %xmm3,%xmm2 +- pxor %xmm2,%xmm1 +- movdqa %xmm1,%xmm4 +- psrld $20,%xmm1 +- pslld $12,%xmm4 +- por %xmm4,%xmm1 +- paddd %xmm1,%xmm0 +- pxor %xmm0,%xmm3 +-.byte 102,15,56,0,223 +- paddd %xmm3,%xmm2 +- pxor %xmm2,%xmm1 +- movdqa %xmm1,%xmm4 +- psrld $25,%xmm1 +- pslld $7,%xmm4 +- por %xmm4,%xmm1 +- pshufd $78,%xmm2,%xmm2 +- pshufd $147,%xmm1,%xmm1 +- pshufd $57,%xmm3,%xmm3 +- decl %edx +- jnz L012loop1x +- paddd (%esp),%xmm0 +- paddd 16(%esp),%xmm1 +- paddd 32(%esp),%xmm2 +- paddd 48(%esp),%xmm3 +- cmpl $64,%ecx +- jb L014tail +- movdqu (%esi),%xmm4 +- movdqu 16(%esi),%xmm5 +- pxor %xmm4,%xmm0 +- movdqu 32(%esi),%xmm4 +- pxor %xmm5,%xmm1 +- movdqu 48(%esi),%xmm5 +- pxor %xmm4,%xmm2 +- pxor %xmm5,%xmm3 +- leal 64(%esi),%esi +- movdqu %xmm0,(%edi) +- movdqu %xmm1,16(%edi) +- movdqu %xmm2,32(%edi) +- movdqu %xmm3,48(%edi) +- leal 64(%edi),%edi +- subl $64,%ecx +- jnz L013outer1x +- jmp L011done +-L014tail: +- movdqa %xmm0,(%esp) +- movdqa %xmm1,16(%esp) +- movdqa %xmm2,32(%esp) +- movdqa %xmm3,48(%esp) +- xorl %eax,%eax +- xorl %edx,%edx +- xorl %ebp,%ebp +-L015tail_loop: +- movb (%esp,%ebp,1),%al +- movb (%esi,%ebp,1),%dl +- leal 1(%ebp),%ebp +- xorb %dl,%al +- movb %al,-1(%edi,%ebp,1) +- decl %ecx +- jnz L015tail_loop +-L011done: +- movl 512(%esp),%esp +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.align 6,0x90 +-Lssse3_data: +-.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 +-.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 +-.long 1634760805,857760878,2036477234,1797285236 +-.long 0,1,2,3 +-.long 4,4,4,4 +-.long 1,0,0,0 +-.long 4,0,0,0 +-.long 0,-1,-1,-1 +-.align 6,0x90 +-.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 +-.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 +-.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 +-.byte 114,103,62,0 +-.section __IMPORT,__pointers,non_lazy_symbol_pointers +-L_OPENSSL_ia32cap_P$non_lazy_ptr: +-.indirect_symbol _OPENSSL_ia32cap_P +-.long 0 +-#endif +diff --git a/mac-x86/crypto/fipsmodule/aesni-x86.S b/mac-x86/crypto/fipsmodule/aesni-x86.S +deleted file mode 100644 +index 00f6003..0000000 +--- a/mac-x86/crypto/fipsmodule/aesni-x86.S ++++ /dev/null +@@ -1,2476 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-#ifdef BORINGSSL_DISPATCH_TEST +-#endif +-.globl _aes_hw_encrypt +-.private_extern _aes_hw_encrypt +-.align 4 +-_aes_hw_encrypt: +-L_aes_hw_encrypt_begin: +-#ifdef BORINGSSL_DISPATCH_TEST +- pushl %ebx +- pushl %edx +- call L000pic +-L000pic: +- popl %ebx +- leal _BORINGSSL_function_hit+1-L000pic(%ebx),%ebx +- movl $1,%edx +- movb %dl,(%ebx) +- popl %edx +- popl %ebx +-#endif +- movl 4(%esp),%eax +- movl 12(%esp),%edx +- movups (%eax),%xmm2 +- movl 240(%edx),%ecx +- movl 8(%esp),%eax +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L001enc1_loop_1: +-.byte 102,15,56,220,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L001enc1_loop_1 +-.byte 102,15,56,221,209 +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- movups %xmm2,(%eax) +- pxor %xmm2,%xmm2 +- ret +-.globl _aes_hw_decrypt +-.private_extern _aes_hw_decrypt +-.align 4 +-_aes_hw_decrypt: +-L_aes_hw_decrypt_begin: +- movl 4(%esp),%eax +- movl 12(%esp),%edx +- movups (%eax),%xmm2 +- movl 240(%edx),%ecx +- movl 8(%esp),%eax +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L002dec1_loop_2: +-.byte 102,15,56,222,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L002dec1_loop_2 +-.byte 102,15,56,223,209 +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- movups %xmm2,(%eax) +- pxor %xmm2,%xmm2 +- ret +-.private_extern __aesni_encrypt2 +-.align 4 +-__aesni_encrypt2: +- movups (%edx),%xmm0 +- shll $4,%ecx +- movups 16(%edx),%xmm1 +- xorps %xmm0,%xmm2 +- pxor %xmm0,%xmm3 +- movups 32(%edx),%xmm0 +- leal 32(%edx,%ecx,1),%edx +- negl %ecx +- addl $16,%ecx +-L003enc2_loop: +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +- movups (%edx,%ecx,1),%xmm1 +- addl $32,%ecx +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +- movups -16(%edx,%ecx,1),%xmm0 +- jnz L003enc2_loop +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,221,208 +-.byte 102,15,56,221,216 +- ret +-.private_extern __aesni_decrypt2 +-.align 4 +-__aesni_decrypt2: +- movups (%edx),%xmm0 +- shll $4,%ecx +- movups 16(%edx),%xmm1 +- xorps %xmm0,%xmm2 +- pxor %xmm0,%xmm3 +- movups 32(%edx),%xmm0 +- leal 32(%edx,%ecx,1),%edx +- negl %ecx +- addl $16,%ecx +-L004dec2_loop: +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +- movups (%edx,%ecx,1),%xmm1 +- addl $32,%ecx +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +- movups -16(%edx,%ecx,1),%xmm0 +- jnz L004dec2_loop +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,223,208 +-.byte 102,15,56,223,216 +- ret +-.private_extern __aesni_encrypt3 +-.align 4 +-__aesni_encrypt3: +- movups (%edx),%xmm0 +- shll $4,%ecx +- movups 16(%edx),%xmm1 +- xorps %xmm0,%xmm2 +- pxor %xmm0,%xmm3 +- pxor %xmm0,%xmm4 +- movups 32(%edx),%xmm0 +- leal 32(%edx,%ecx,1),%edx +- negl %ecx +- addl $16,%ecx +-L005enc3_loop: +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +- movups (%edx,%ecx,1),%xmm1 +- addl $32,%ecx +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +-.byte 102,15,56,220,224 +- movups -16(%edx,%ecx,1),%xmm0 +- jnz L005enc3_loop +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,221,208 +-.byte 102,15,56,221,216 +-.byte 102,15,56,221,224 +- ret +-.private_extern __aesni_decrypt3 +-.align 4 +-__aesni_decrypt3: +- movups (%edx),%xmm0 +- shll $4,%ecx +- movups 16(%edx),%xmm1 +- xorps %xmm0,%xmm2 +- pxor %xmm0,%xmm3 +- pxor %xmm0,%xmm4 +- movups 32(%edx),%xmm0 +- leal 32(%edx,%ecx,1),%edx +- negl %ecx +- addl $16,%ecx +-L006dec3_loop: +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +- movups (%edx,%ecx,1),%xmm1 +- addl $32,%ecx +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +- movups -16(%edx,%ecx,1),%xmm0 +- jnz L006dec3_loop +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,223,208 +-.byte 102,15,56,223,216 +-.byte 102,15,56,223,224 +- ret +-.private_extern __aesni_encrypt4 +-.align 4 +-__aesni_encrypt4: +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- shll $4,%ecx +- xorps %xmm0,%xmm2 +- pxor %xmm0,%xmm3 +- pxor %xmm0,%xmm4 +- pxor %xmm0,%xmm5 +- movups 32(%edx),%xmm0 +- leal 32(%edx,%ecx,1),%edx +- negl %ecx +-.byte 15,31,64,0 +- addl $16,%ecx +-L007enc4_loop: +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +- movups (%edx,%ecx,1),%xmm1 +- addl $32,%ecx +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +-.byte 102,15,56,220,224 +-.byte 102,15,56,220,232 +- movups -16(%edx,%ecx,1),%xmm0 +- jnz L007enc4_loop +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +-.byte 102,15,56,221,208 +-.byte 102,15,56,221,216 +-.byte 102,15,56,221,224 +-.byte 102,15,56,221,232 +- ret +-.private_extern __aesni_decrypt4 +-.align 4 +-__aesni_decrypt4: +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- shll $4,%ecx +- xorps %xmm0,%xmm2 +- pxor %xmm0,%xmm3 +- pxor %xmm0,%xmm4 +- pxor %xmm0,%xmm5 +- movups 32(%edx),%xmm0 +- leal 32(%edx,%ecx,1),%edx +- negl %ecx +-.byte 15,31,64,0 +- addl $16,%ecx +-L008dec4_loop: +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +- movups (%edx,%ecx,1),%xmm1 +- addl $32,%ecx +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +-.byte 102,15,56,222,232 +- movups -16(%edx,%ecx,1),%xmm0 +- jnz L008dec4_loop +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,223,208 +-.byte 102,15,56,223,216 +-.byte 102,15,56,223,224 +-.byte 102,15,56,223,232 +- ret +-.private_extern __aesni_encrypt6 +-.align 4 +-__aesni_encrypt6: +- movups (%edx),%xmm0 +- shll $4,%ecx +- movups 16(%edx),%xmm1 +- xorps %xmm0,%xmm2 +- pxor %xmm0,%xmm3 +- pxor %xmm0,%xmm4 +-.byte 102,15,56,220,209 +- pxor %xmm0,%xmm5 +- pxor %xmm0,%xmm6 +-.byte 102,15,56,220,217 +- leal 32(%edx,%ecx,1),%edx +- negl %ecx +-.byte 102,15,56,220,225 +- pxor %xmm0,%xmm7 +- movups (%edx,%ecx,1),%xmm0 +- addl $16,%ecx +- jmp L009_aesni_encrypt6_inner +-.align 4,0x90 +-L010enc6_loop: +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-L009_aesni_encrypt6_inner: +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-L_aesni_encrypt6_enter: +- movups (%edx,%ecx,1),%xmm1 +- addl $32,%ecx +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +-.byte 102,15,56,220,224 +-.byte 102,15,56,220,232 +-.byte 102,15,56,220,240 +-.byte 102,15,56,220,248 +- movups -16(%edx,%ecx,1),%xmm0 +- jnz L010enc6_loop +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-.byte 102,15,56,221,208 +-.byte 102,15,56,221,216 +-.byte 102,15,56,221,224 +-.byte 102,15,56,221,232 +-.byte 102,15,56,221,240 +-.byte 102,15,56,221,248 +- ret +-.private_extern __aesni_decrypt6 +-.align 4 +-__aesni_decrypt6: +- movups (%edx),%xmm0 +- shll $4,%ecx +- movups 16(%edx),%xmm1 +- xorps %xmm0,%xmm2 +- pxor %xmm0,%xmm3 +- pxor %xmm0,%xmm4 +-.byte 102,15,56,222,209 +- pxor %xmm0,%xmm5 +- pxor %xmm0,%xmm6 +-.byte 102,15,56,222,217 +- leal 32(%edx,%ecx,1),%edx +- negl %ecx +-.byte 102,15,56,222,225 +- pxor %xmm0,%xmm7 +- movups (%edx,%ecx,1),%xmm0 +- addl $16,%ecx +- jmp L011_aesni_decrypt6_inner +-.align 4,0x90 +-L012dec6_loop: +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-L011_aesni_decrypt6_inner: +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +-L_aesni_decrypt6_enter: +- movups (%edx,%ecx,1),%xmm1 +- addl $32,%ecx +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +-.byte 102,15,56,222,232 +-.byte 102,15,56,222,240 +-.byte 102,15,56,222,248 +- movups -16(%edx,%ecx,1),%xmm0 +- jnz L012dec6_loop +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +-.byte 102,15,56,223,208 +-.byte 102,15,56,223,216 +-.byte 102,15,56,223,224 +-.byte 102,15,56,223,232 +-.byte 102,15,56,223,240 +-.byte 102,15,56,223,248 +- ret +-.globl _aes_hw_ecb_encrypt +-.private_extern _aes_hw_ecb_encrypt +-.align 4 +-_aes_hw_ecb_encrypt: +-L_aes_hw_ecb_encrypt_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 20(%esp),%esi +- movl 24(%esp),%edi +- movl 28(%esp),%eax +- movl 32(%esp),%edx +- movl 36(%esp),%ebx +- andl $-16,%eax +- jz L013ecb_ret +- movl 240(%edx),%ecx +- testl %ebx,%ebx +- jz L014ecb_decrypt +- movl %edx,%ebp +- movl %ecx,%ebx +- cmpl $96,%eax +- jb L015ecb_enc_tail +- movdqu (%esi),%xmm2 +- movdqu 16(%esi),%xmm3 +- movdqu 32(%esi),%xmm4 +- movdqu 48(%esi),%xmm5 +- movdqu 64(%esi),%xmm6 +- movdqu 80(%esi),%xmm7 +- leal 96(%esi),%esi +- subl $96,%eax +- jmp L016ecb_enc_loop6_enter +-.align 4,0x90 +-L017ecb_enc_loop6: +- movups %xmm2,(%edi) +- movdqu (%esi),%xmm2 +- movups %xmm3,16(%edi) +- movdqu 16(%esi),%xmm3 +- movups %xmm4,32(%edi) +- movdqu 32(%esi),%xmm4 +- movups %xmm5,48(%edi) +- movdqu 48(%esi),%xmm5 +- movups %xmm6,64(%edi) +- movdqu 64(%esi),%xmm6 +- movups %xmm7,80(%edi) +- leal 96(%edi),%edi +- movdqu 80(%esi),%xmm7 +- leal 96(%esi),%esi +-L016ecb_enc_loop6_enter: +- call __aesni_encrypt6 +- movl %ebp,%edx +- movl %ebx,%ecx +- subl $96,%eax +- jnc L017ecb_enc_loop6 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +- movups %xmm6,64(%edi) +- movups %xmm7,80(%edi) +- leal 96(%edi),%edi +- addl $96,%eax +- jz L013ecb_ret +-L015ecb_enc_tail: +- movups (%esi),%xmm2 +- cmpl $32,%eax +- jb L018ecb_enc_one +- movups 16(%esi),%xmm3 +- je L019ecb_enc_two +- movups 32(%esi),%xmm4 +- cmpl $64,%eax +- jb L020ecb_enc_three +- movups 48(%esi),%xmm5 +- je L021ecb_enc_four +- movups 64(%esi),%xmm6 +- xorps %xmm7,%xmm7 +- call __aesni_encrypt6 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +- movups %xmm6,64(%edi) +- jmp L013ecb_ret +-.align 4,0x90 +-L018ecb_enc_one: +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L022enc1_loop_3: +-.byte 102,15,56,220,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L022enc1_loop_3 +-.byte 102,15,56,221,209 +- movups %xmm2,(%edi) +- jmp L013ecb_ret +-.align 4,0x90 +-L019ecb_enc_two: +- call __aesni_encrypt2 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- jmp L013ecb_ret +-.align 4,0x90 +-L020ecb_enc_three: +- call __aesni_encrypt3 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- jmp L013ecb_ret +-.align 4,0x90 +-L021ecb_enc_four: +- call __aesni_encrypt4 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +- jmp L013ecb_ret +-.align 4,0x90 +-L014ecb_decrypt: +- movl %edx,%ebp +- movl %ecx,%ebx +- cmpl $96,%eax +- jb L023ecb_dec_tail +- movdqu (%esi),%xmm2 +- movdqu 16(%esi),%xmm3 +- movdqu 32(%esi),%xmm4 +- movdqu 48(%esi),%xmm5 +- movdqu 64(%esi),%xmm6 +- movdqu 80(%esi),%xmm7 +- leal 96(%esi),%esi +- subl $96,%eax +- jmp L024ecb_dec_loop6_enter +-.align 4,0x90 +-L025ecb_dec_loop6: +- movups %xmm2,(%edi) +- movdqu (%esi),%xmm2 +- movups %xmm3,16(%edi) +- movdqu 16(%esi),%xmm3 +- movups %xmm4,32(%edi) +- movdqu 32(%esi),%xmm4 +- movups %xmm5,48(%edi) +- movdqu 48(%esi),%xmm5 +- movups %xmm6,64(%edi) +- movdqu 64(%esi),%xmm6 +- movups %xmm7,80(%edi) +- leal 96(%edi),%edi +- movdqu 80(%esi),%xmm7 +- leal 96(%esi),%esi +-L024ecb_dec_loop6_enter: +- call __aesni_decrypt6 +- movl %ebp,%edx +- movl %ebx,%ecx +- subl $96,%eax +- jnc L025ecb_dec_loop6 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +- movups %xmm6,64(%edi) +- movups %xmm7,80(%edi) +- leal 96(%edi),%edi +- addl $96,%eax +- jz L013ecb_ret +-L023ecb_dec_tail: +- movups (%esi),%xmm2 +- cmpl $32,%eax +- jb L026ecb_dec_one +- movups 16(%esi),%xmm3 +- je L027ecb_dec_two +- movups 32(%esi),%xmm4 +- cmpl $64,%eax +- jb L028ecb_dec_three +- movups 48(%esi),%xmm5 +- je L029ecb_dec_four +- movups 64(%esi),%xmm6 +- xorps %xmm7,%xmm7 +- call __aesni_decrypt6 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +- movups %xmm6,64(%edi) +- jmp L013ecb_ret +-.align 4,0x90 +-L026ecb_dec_one: +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L030dec1_loop_4: +-.byte 102,15,56,222,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L030dec1_loop_4 +-.byte 102,15,56,223,209 +- movups %xmm2,(%edi) +- jmp L013ecb_ret +-.align 4,0x90 +-L027ecb_dec_two: +- call __aesni_decrypt2 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- jmp L013ecb_ret +-.align 4,0x90 +-L028ecb_dec_three: +- call __aesni_decrypt3 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- jmp L013ecb_ret +-.align 4,0x90 +-L029ecb_dec_four: +- call __aesni_decrypt4 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +-L013ecb_ret: +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- pxor %xmm7,%xmm7 +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _aes_hw_ccm64_encrypt_blocks +-.private_extern _aes_hw_ccm64_encrypt_blocks +-.align 4 +-_aes_hw_ccm64_encrypt_blocks: +-L_aes_hw_ccm64_encrypt_blocks_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 20(%esp),%esi +- movl 24(%esp),%edi +- movl 28(%esp),%eax +- movl 32(%esp),%edx +- movl 36(%esp),%ebx +- movl 40(%esp),%ecx +- movl %esp,%ebp +- subl $60,%esp +- andl $-16,%esp +- movl %ebp,48(%esp) +- movdqu (%ebx),%xmm7 +- movdqu (%ecx),%xmm3 +- movl 240(%edx),%ecx +- movl $202182159,(%esp) +- movl $134810123,4(%esp) +- movl $67438087,8(%esp) +- movl $66051,12(%esp) +- movl $1,%ebx +- xorl %ebp,%ebp +- movl %ebx,16(%esp) +- movl %ebp,20(%esp) +- movl %ebp,24(%esp) +- movl %ebp,28(%esp) +- shll $4,%ecx +- movl $16,%ebx +- leal (%edx),%ebp +- movdqa (%esp),%xmm5 +- movdqa %xmm7,%xmm2 +- leal 32(%edx,%ecx,1),%edx +- subl %ecx,%ebx +-.byte 102,15,56,0,253 +-L031ccm64_enc_outer: +- movups (%ebp),%xmm0 +- movl %ebx,%ecx +- movups (%esi),%xmm6 +- xorps %xmm0,%xmm2 +- movups 16(%ebp),%xmm1 +- xorps %xmm6,%xmm0 +- xorps %xmm0,%xmm3 +- movups 32(%ebp),%xmm0 +-L032ccm64_enc2_loop: +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +- movups (%edx,%ecx,1),%xmm1 +- addl $32,%ecx +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +- movups -16(%edx,%ecx,1),%xmm0 +- jnz L032ccm64_enc2_loop +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +- paddq 16(%esp),%xmm7 +- decl %eax +-.byte 102,15,56,221,208 +-.byte 102,15,56,221,216 +- leal 16(%esi),%esi +- xorps %xmm2,%xmm6 +- movdqa %xmm7,%xmm2 +- movups %xmm6,(%edi) +-.byte 102,15,56,0,213 +- leal 16(%edi),%edi +- jnz L031ccm64_enc_outer +- movl 48(%esp),%esp +- movl 40(%esp),%edi +- movups %xmm3,(%edi) +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- pxor %xmm7,%xmm7 +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _aes_hw_ccm64_decrypt_blocks +-.private_extern _aes_hw_ccm64_decrypt_blocks +-.align 4 +-_aes_hw_ccm64_decrypt_blocks: +-L_aes_hw_ccm64_decrypt_blocks_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 20(%esp),%esi +- movl 24(%esp),%edi +- movl 28(%esp),%eax +- movl 32(%esp),%edx +- movl 36(%esp),%ebx +- movl 40(%esp),%ecx +- movl %esp,%ebp +- subl $60,%esp +- andl $-16,%esp +- movl %ebp,48(%esp) +- movdqu (%ebx),%xmm7 +- movdqu (%ecx),%xmm3 +- movl 240(%edx),%ecx +- movl $202182159,(%esp) +- movl $134810123,4(%esp) +- movl $67438087,8(%esp) +- movl $66051,12(%esp) +- movl $1,%ebx +- xorl %ebp,%ebp +- movl %ebx,16(%esp) +- movl %ebp,20(%esp) +- movl %ebp,24(%esp) +- movl %ebp,28(%esp) +- movdqa (%esp),%xmm5 +- movdqa %xmm7,%xmm2 +- movl %edx,%ebp +- movl %ecx,%ebx +-.byte 102,15,56,0,253 +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L033enc1_loop_5: +-.byte 102,15,56,220,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L033enc1_loop_5 +-.byte 102,15,56,221,209 +- shll $4,%ebx +- movl $16,%ecx +- movups (%esi),%xmm6 +- paddq 16(%esp),%xmm7 +- leal 16(%esi),%esi +- subl %ebx,%ecx +- leal 32(%ebp,%ebx,1),%edx +- movl %ecx,%ebx +- jmp L034ccm64_dec_outer +-.align 4,0x90 +-L034ccm64_dec_outer: +- xorps %xmm2,%xmm6 +- movdqa %xmm7,%xmm2 +- movups %xmm6,(%edi) +- leal 16(%edi),%edi +-.byte 102,15,56,0,213 +- subl $1,%eax +- jz L035ccm64_dec_break +- movups (%ebp),%xmm0 +- movl %ebx,%ecx +- movups 16(%ebp),%xmm1 +- xorps %xmm0,%xmm6 +- xorps %xmm0,%xmm2 +- xorps %xmm6,%xmm3 +- movups 32(%ebp),%xmm0 +-L036ccm64_dec2_loop: +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +- movups (%edx,%ecx,1),%xmm1 +- addl $32,%ecx +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +- movups -16(%edx,%ecx,1),%xmm0 +- jnz L036ccm64_dec2_loop +- movups (%esi),%xmm6 +- paddq 16(%esp),%xmm7 +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,221,208 +-.byte 102,15,56,221,216 +- leal 16(%esi),%esi +- jmp L034ccm64_dec_outer +-.align 4,0x90 +-L035ccm64_dec_break: +- movl 240(%ebp),%ecx +- movl %ebp,%edx +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- xorps %xmm0,%xmm6 +- leal 32(%edx),%edx +- xorps %xmm6,%xmm3 +-L037enc1_loop_6: +-.byte 102,15,56,220,217 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L037enc1_loop_6 +-.byte 102,15,56,221,217 +- movl 48(%esp),%esp +- movl 40(%esp),%edi +- movups %xmm3,(%edi) +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- pxor %xmm7,%xmm7 +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _aes_hw_ctr32_encrypt_blocks +-.private_extern _aes_hw_ctr32_encrypt_blocks +-.align 4 +-_aes_hw_ctr32_encrypt_blocks: +-L_aes_hw_ctr32_encrypt_blocks_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +-#ifdef BORINGSSL_DISPATCH_TEST +- pushl %ebx +- pushl %edx +- call L038pic +-L038pic: +- popl %ebx +- leal _BORINGSSL_function_hit+0-L038pic(%ebx),%ebx +- movl $1,%edx +- movb %dl,(%ebx) +- popl %edx +- popl %ebx +-#endif +- movl 20(%esp),%esi +- movl 24(%esp),%edi +- movl 28(%esp),%eax +- movl 32(%esp),%edx +- movl 36(%esp),%ebx +- movl %esp,%ebp +- subl $88,%esp +- andl $-16,%esp +- movl %ebp,80(%esp) +- cmpl $1,%eax +- je L039ctr32_one_shortcut +- movdqu (%ebx),%xmm7 +- movl $202182159,(%esp) +- movl $134810123,4(%esp) +- movl $67438087,8(%esp) +- movl $66051,12(%esp) +- movl $6,%ecx +- xorl %ebp,%ebp +- movl %ecx,16(%esp) +- movl %ecx,20(%esp) +- movl %ecx,24(%esp) +- movl %ebp,28(%esp) +-.byte 102,15,58,22,251,3 +-.byte 102,15,58,34,253,3 +- movl 240(%edx),%ecx +- bswap %ebx +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- movdqa (%esp),%xmm2 +-.byte 102,15,58,34,195,0 +- leal 3(%ebx),%ebp +-.byte 102,15,58,34,205,0 +- incl %ebx +-.byte 102,15,58,34,195,1 +- incl %ebp +-.byte 102,15,58,34,205,1 +- incl %ebx +-.byte 102,15,58,34,195,2 +- incl %ebp +-.byte 102,15,58,34,205,2 +- movdqa %xmm0,48(%esp) +-.byte 102,15,56,0,194 +- movdqu (%edx),%xmm6 +- movdqa %xmm1,64(%esp) +-.byte 102,15,56,0,202 +- pshufd $192,%xmm0,%xmm2 +- pshufd $128,%xmm0,%xmm3 +- cmpl $6,%eax +- jb L040ctr32_tail +- pxor %xmm6,%xmm7 +- shll $4,%ecx +- movl $16,%ebx +- movdqa %xmm7,32(%esp) +- movl %edx,%ebp +- subl %ecx,%ebx +- leal 32(%edx,%ecx,1),%edx +- subl $6,%eax +- jmp L041ctr32_loop6 +-.align 4,0x90 +-L041ctr32_loop6: +- pshufd $64,%xmm0,%xmm4 +- movdqa 32(%esp),%xmm0 +- pshufd $192,%xmm1,%xmm5 +- pxor %xmm0,%xmm2 +- pshufd $128,%xmm1,%xmm6 +- pxor %xmm0,%xmm3 +- pshufd $64,%xmm1,%xmm7 +- movups 16(%ebp),%xmm1 +- pxor %xmm0,%xmm4 +- pxor %xmm0,%xmm5 +-.byte 102,15,56,220,209 +- pxor %xmm0,%xmm6 +- pxor %xmm0,%xmm7 +-.byte 102,15,56,220,217 +- movups 32(%ebp),%xmm0 +- movl %ebx,%ecx +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +- call L_aesni_encrypt6_enter +- movups (%esi),%xmm1 +- movups 16(%esi),%xmm0 +- xorps %xmm1,%xmm2 +- movups 32(%esi),%xmm1 +- xorps %xmm0,%xmm3 +- movups %xmm2,(%edi) +- movdqa 16(%esp),%xmm0 +- xorps %xmm1,%xmm4 +- movdqa 64(%esp),%xmm1 +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- paddd %xmm0,%xmm1 +- paddd 48(%esp),%xmm0 +- movdqa (%esp),%xmm2 +- movups 48(%esi),%xmm3 +- movups 64(%esi),%xmm4 +- xorps %xmm3,%xmm5 +- movups 80(%esi),%xmm3 +- leal 96(%esi),%esi +- movdqa %xmm0,48(%esp) +-.byte 102,15,56,0,194 +- xorps %xmm4,%xmm6 +- movups %xmm5,48(%edi) +- xorps %xmm3,%xmm7 +- movdqa %xmm1,64(%esp) +-.byte 102,15,56,0,202 +- movups %xmm6,64(%edi) +- pshufd $192,%xmm0,%xmm2 +- movups %xmm7,80(%edi) +- leal 96(%edi),%edi +- pshufd $128,%xmm0,%xmm3 +- subl $6,%eax +- jnc L041ctr32_loop6 +- addl $6,%eax +- jz L042ctr32_ret +- movdqu (%ebp),%xmm7 +- movl %ebp,%edx +- pxor 32(%esp),%xmm7 +- movl 240(%ebp),%ecx +-L040ctr32_tail: +- por %xmm7,%xmm2 +- cmpl $2,%eax +- jb L043ctr32_one +- pshufd $64,%xmm0,%xmm4 +- por %xmm7,%xmm3 +- je L044ctr32_two +- pshufd $192,%xmm1,%xmm5 +- por %xmm7,%xmm4 +- cmpl $4,%eax +- jb L045ctr32_three +- pshufd $128,%xmm1,%xmm6 +- por %xmm7,%xmm5 +- je L046ctr32_four +- por %xmm7,%xmm6 +- call __aesni_encrypt6 +- movups (%esi),%xmm1 +- movups 16(%esi),%xmm0 +- xorps %xmm1,%xmm2 +- movups 32(%esi),%xmm1 +- xorps %xmm0,%xmm3 +- movups 48(%esi),%xmm0 +- xorps %xmm1,%xmm4 +- movups 64(%esi),%xmm1 +- xorps %xmm0,%xmm5 +- movups %xmm2,(%edi) +- xorps %xmm1,%xmm6 +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +- movups %xmm6,64(%edi) +- jmp L042ctr32_ret +-.align 4,0x90 +-L039ctr32_one_shortcut: +- movups (%ebx),%xmm2 +- movl 240(%edx),%ecx +-L043ctr32_one: +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L047enc1_loop_7: +-.byte 102,15,56,220,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L047enc1_loop_7 +-.byte 102,15,56,221,209 +- movups (%esi),%xmm6 +- xorps %xmm2,%xmm6 +- movups %xmm6,(%edi) +- jmp L042ctr32_ret +-.align 4,0x90 +-L044ctr32_two: +- call __aesni_encrypt2 +- movups (%esi),%xmm5 +- movups 16(%esi),%xmm6 +- xorps %xmm5,%xmm2 +- xorps %xmm6,%xmm3 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- jmp L042ctr32_ret +-.align 4,0x90 +-L045ctr32_three: +- call __aesni_encrypt3 +- movups (%esi),%xmm5 +- movups 16(%esi),%xmm6 +- xorps %xmm5,%xmm2 +- movups 32(%esi),%xmm7 +- xorps %xmm6,%xmm3 +- movups %xmm2,(%edi) +- xorps %xmm7,%xmm4 +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- jmp L042ctr32_ret +-.align 4,0x90 +-L046ctr32_four: +- call __aesni_encrypt4 +- movups (%esi),%xmm6 +- movups 16(%esi),%xmm7 +- movups 32(%esi),%xmm1 +- xorps %xmm6,%xmm2 +- movups 48(%esi),%xmm0 +- xorps %xmm7,%xmm3 +- movups %xmm2,(%edi) +- xorps %xmm1,%xmm4 +- movups %xmm3,16(%edi) +- xorps %xmm0,%xmm5 +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +-L042ctr32_ret: +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- movdqa %xmm0,32(%esp) +- pxor %xmm5,%xmm5 +- movdqa %xmm0,48(%esp) +- pxor %xmm6,%xmm6 +- movdqa %xmm0,64(%esp) +- pxor %xmm7,%xmm7 +- movl 80(%esp),%esp +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _aes_hw_xts_encrypt +-.private_extern _aes_hw_xts_encrypt +-.align 4 +-_aes_hw_xts_encrypt: +-L_aes_hw_xts_encrypt_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 36(%esp),%edx +- movl 40(%esp),%esi +- movl 240(%edx),%ecx +- movups (%esi),%xmm2 +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L048enc1_loop_8: +-.byte 102,15,56,220,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L048enc1_loop_8 +-.byte 102,15,56,221,209 +- movl 20(%esp),%esi +- movl 24(%esp),%edi +- movl 28(%esp),%eax +- movl 32(%esp),%edx +- movl %esp,%ebp +- subl $120,%esp +- movl 240(%edx),%ecx +- andl $-16,%esp +- movl $135,96(%esp) +- movl $0,100(%esp) +- movl $1,104(%esp) +- movl $0,108(%esp) +- movl %eax,112(%esp) +- movl %ebp,116(%esp) +- movdqa %xmm2,%xmm1 +- pxor %xmm0,%xmm0 +- movdqa 96(%esp),%xmm3 +- pcmpgtd %xmm1,%xmm0 +- andl $-16,%eax +- movl %edx,%ebp +- movl %ecx,%ebx +- subl $96,%eax +- jc L049xts_enc_short +- shll $4,%ecx +- movl $16,%ebx +- subl %ecx,%ebx +- leal 32(%edx,%ecx,1),%edx +- jmp L050xts_enc_loop6 +-.align 4,0x90 +-L050xts_enc_loop6: +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,(%esp) +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,16(%esp) +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,32(%esp) +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,48(%esp) +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- pshufd $19,%xmm0,%xmm7 +- movdqa %xmm1,64(%esp) +- paddq %xmm1,%xmm1 +- movups (%ebp),%xmm0 +- pand %xmm3,%xmm7 +- movups (%esi),%xmm2 +- pxor %xmm1,%xmm7 +- movl %ebx,%ecx +- movdqu 16(%esi),%xmm3 +- xorps %xmm0,%xmm2 +- movdqu 32(%esi),%xmm4 +- pxor %xmm0,%xmm3 +- movdqu 48(%esi),%xmm5 +- pxor %xmm0,%xmm4 +- movdqu 64(%esi),%xmm6 +- pxor %xmm0,%xmm5 +- movdqu 80(%esi),%xmm1 +- pxor %xmm0,%xmm6 +- leal 96(%esi),%esi +- pxor (%esp),%xmm2 +- movdqa %xmm7,80(%esp) +- pxor %xmm1,%xmm7 +- movups 16(%ebp),%xmm1 +- pxor 16(%esp),%xmm3 +- pxor 32(%esp),%xmm4 +-.byte 102,15,56,220,209 +- pxor 48(%esp),%xmm5 +- pxor 64(%esp),%xmm6 +-.byte 102,15,56,220,217 +- pxor %xmm0,%xmm7 +- movups 32(%ebp),%xmm0 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +- call L_aesni_encrypt6_enter +- movdqa 80(%esp),%xmm1 +- pxor %xmm0,%xmm0 +- xorps (%esp),%xmm2 +- pcmpgtd %xmm1,%xmm0 +- xorps 16(%esp),%xmm3 +- movups %xmm2,(%edi) +- xorps 32(%esp),%xmm4 +- movups %xmm3,16(%edi) +- xorps 48(%esp),%xmm5 +- movups %xmm4,32(%edi) +- xorps 64(%esp),%xmm6 +- movups %xmm5,48(%edi) +- xorps %xmm1,%xmm7 +- movups %xmm6,64(%edi) +- pshufd $19,%xmm0,%xmm2 +- movups %xmm7,80(%edi) +- leal 96(%edi),%edi +- movdqa 96(%esp),%xmm3 +- pxor %xmm0,%xmm0 +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- subl $96,%eax +- jnc L050xts_enc_loop6 +- movl 240(%ebp),%ecx +- movl %ebp,%edx +- movl %ecx,%ebx +-L049xts_enc_short: +- addl $96,%eax +- jz L051xts_enc_done6x +- movdqa %xmm1,%xmm5 +- cmpl $32,%eax +- jb L052xts_enc_one +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- je L053xts_enc_two +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,%xmm6 +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- cmpl $64,%eax +- jb L054xts_enc_three +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,%xmm7 +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- movdqa %xmm5,(%esp) +- movdqa %xmm6,16(%esp) +- je L055xts_enc_four +- movdqa %xmm7,32(%esp) +- pshufd $19,%xmm0,%xmm7 +- movdqa %xmm1,48(%esp) +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm7 +- pxor %xmm1,%xmm7 +- movdqu (%esi),%xmm2 +- movdqu 16(%esi),%xmm3 +- movdqu 32(%esi),%xmm4 +- pxor (%esp),%xmm2 +- movdqu 48(%esi),%xmm5 +- pxor 16(%esp),%xmm3 +- movdqu 64(%esi),%xmm6 +- pxor 32(%esp),%xmm4 +- leal 80(%esi),%esi +- pxor 48(%esp),%xmm5 +- movdqa %xmm7,64(%esp) +- pxor %xmm7,%xmm6 +- call __aesni_encrypt6 +- movaps 64(%esp),%xmm1 +- xorps (%esp),%xmm2 +- xorps 16(%esp),%xmm3 +- xorps 32(%esp),%xmm4 +- movups %xmm2,(%edi) +- xorps 48(%esp),%xmm5 +- movups %xmm3,16(%edi) +- xorps %xmm1,%xmm6 +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +- movups %xmm6,64(%edi) +- leal 80(%edi),%edi +- jmp L056xts_enc_done +-.align 4,0x90 +-L052xts_enc_one: +- movups (%esi),%xmm2 +- leal 16(%esi),%esi +- xorps %xmm5,%xmm2 +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L057enc1_loop_9: +-.byte 102,15,56,220,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L057enc1_loop_9 +-.byte 102,15,56,221,209 +- xorps %xmm5,%xmm2 +- movups %xmm2,(%edi) +- leal 16(%edi),%edi +- movdqa %xmm5,%xmm1 +- jmp L056xts_enc_done +-.align 4,0x90 +-L053xts_enc_two: +- movaps %xmm1,%xmm6 +- movups (%esi),%xmm2 +- movups 16(%esi),%xmm3 +- leal 32(%esi),%esi +- xorps %xmm5,%xmm2 +- xorps %xmm6,%xmm3 +- call __aesni_encrypt2 +- xorps %xmm5,%xmm2 +- xorps %xmm6,%xmm3 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- leal 32(%edi),%edi +- movdqa %xmm6,%xmm1 +- jmp L056xts_enc_done +-.align 4,0x90 +-L054xts_enc_three: +- movaps %xmm1,%xmm7 +- movups (%esi),%xmm2 +- movups 16(%esi),%xmm3 +- movups 32(%esi),%xmm4 +- leal 48(%esi),%esi +- xorps %xmm5,%xmm2 +- xorps %xmm6,%xmm3 +- xorps %xmm7,%xmm4 +- call __aesni_encrypt3 +- xorps %xmm5,%xmm2 +- xorps %xmm6,%xmm3 +- xorps %xmm7,%xmm4 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- leal 48(%edi),%edi +- movdqa %xmm7,%xmm1 +- jmp L056xts_enc_done +-.align 4,0x90 +-L055xts_enc_four: +- movaps %xmm1,%xmm6 +- movups (%esi),%xmm2 +- movups 16(%esi),%xmm3 +- movups 32(%esi),%xmm4 +- xorps (%esp),%xmm2 +- movups 48(%esi),%xmm5 +- leal 64(%esi),%esi +- xorps 16(%esp),%xmm3 +- xorps %xmm7,%xmm4 +- xorps %xmm6,%xmm5 +- call __aesni_encrypt4 +- xorps (%esp),%xmm2 +- xorps 16(%esp),%xmm3 +- xorps %xmm7,%xmm4 +- movups %xmm2,(%edi) +- xorps %xmm6,%xmm5 +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +- leal 64(%edi),%edi +- movdqa %xmm6,%xmm1 +- jmp L056xts_enc_done +-.align 4,0x90 +-L051xts_enc_done6x: +- movl 112(%esp),%eax +- andl $15,%eax +- jz L058xts_enc_ret +- movdqa %xmm1,%xmm5 +- movl %eax,112(%esp) +- jmp L059xts_enc_steal +-.align 4,0x90 +-L056xts_enc_done: +- movl 112(%esp),%eax +- pxor %xmm0,%xmm0 +- andl $15,%eax +- jz L058xts_enc_ret +- pcmpgtd %xmm1,%xmm0 +- movl %eax,112(%esp) +- pshufd $19,%xmm0,%xmm5 +- paddq %xmm1,%xmm1 +- pand 96(%esp),%xmm5 +- pxor %xmm1,%xmm5 +-L059xts_enc_steal: +- movzbl (%esi),%ecx +- movzbl -16(%edi),%edx +- leal 1(%esi),%esi +- movb %cl,-16(%edi) +- movb %dl,(%edi) +- leal 1(%edi),%edi +- subl $1,%eax +- jnz L059xts_enc_steal +- subl 112(%esp),%edi +- movl %ebp,%edx +- movl %ebx,%ecx +- movups -16(%edi),%xmm2 +- xorps %xmm5,%xmm2 +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L060enc1_loop_10: +-.byte 102,15,56,220,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L060enc1_loop_10 +-.byte 102,15,56,221,209 +- xorps %xmm5,%xmm2 +- movups %xmm2,-16(%edi) +-L058xts_enc_ret: +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- movdqa %xmm0,(%esp) +- pxor %xmm3,%xmm3 +- movdqa %xmm0,16(%esp) +- pxor %xmm4,%xmm4 +- movdqa %xmm0,32(%esp) +- pxor %xmm5,%xmm5 +- movdqa %xmm0,48(%esp) +- pxor %xmm6,%xmm6 +- movdqa %xmm0,64(%esp) +- pxor %xmm7,%xmm7 +- movdqa %xmm0,80(%esp) +- movl 116(%esp),%esp +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _aes_hw_xts_decrypt +-.private_extern _aes_hw_xts_decrypt +-.align 4 +-_aes_hw_xts_decrypt: +-L_aes_hw_xts_decrypt_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 36(%esp),%edx +- movl 40(%esp),%esi +- movl 240(%edx),%ecx +- movups (%esi),%xmm2 +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L061enc1_loop_11: +-.byte 102,15,56,220,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L061enc1_loop_11 +-.byte 102,15,56,221,209 +- movl 20(%esp),%esi +- movl 24(%esp),%edi +- movl 28(%esp),%eax +- movl 32(%esp),%edx +- movl %esp,%ebp +- subl $120,%esp +- andl $-16,%esp +- xorl %ebx,%ebx +- testl $15,%eax +- setnz %bl +- shll $4,%ebx +- subl %ebx,%eax +- movl $135,96(%esp) +- movl $0,100(%esp) +- movl $1,104(%esp) +- movl $0,108(%esp) +- movl %eax,112(%esp) +- movl %ebp,116(%esp) +- movl 240(%edx),%ecx +- movl %edx,%ebp +- movl %ecx,%ebx +- movdqa %xmm2,%xmm1 +- pxor %xmm0,%xmm0 +- movdqa 96(%esp),%xmm3 +- pcmpgtd %xmm1,%xmm0 +- andl $-16,%eax +- subl $96,%eax +- jc L062xts_dec_short +- shll $4,%ecx +- movl $16,%ebx +- subl %ecx,%ebx +- leal 32(%edx,%ecx,1),%edx +- jmp L063xts_dec_loop6 +-.align 4,0x90 +-L063xts_dec_loop6: +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,(%esp) +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,16(%esp) +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,32(%esp) +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,48(%esp) +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- pshufd $19,%xmm0,%xmm7 +- movdqa %xmm1,64(%esp) +- paddq %xmm1,%xmm1 +- movups (%ebp),%xmm0 +- pand %xmm3,%xmm7 +- movups (%esi),%xmm2 +- pxor %xmm1,%xmm7 +- movl %ebx,%ecx +- movdqu 16(%esi),%xmm3 +- xorps %xmm0,%xmm2 +- movdqu 32(%esi),%xmm4 +- pxor %xmm0,%xmm3 +- movdqu 48(%esi),%xmm5 +- pxor %xmm0,%xmm4 +- movdqu 64(%esi),%xmm6 +- pxor %xmm0,%xmm5 +- movdqu 80(%esi),%xmm1 +- pxor %xmm0,%xmm6 +- leal 96(%esi),%esi +- pxor (%esp),%xmm2 +- movdqa %xmm7,80(%esp) +- pxor %xmm1,%xmm7 +- movups 16(%ebp),%xmm1 +- pxor 16(%esp),%xmm3 +- pxor 32(%esp),%xmm4 +-.byte 102,15,56,222,209 +- pxor 48(%esp),%xmm5 +- pxor 64(%esp),%xmm6 +-.byte 102,15,56,222,217 +- pxor %xmm0,%xmm7 +- movups 32(%ebp),%xmm0 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +- call L_aesni_decrypt6_enter +- movdqa 80(%esp),%xmm1 +- pxor %xmm0,%xmm0 +- xorps (%esp),%xmm2 +- pcmpgtd %xmm1,%xmm0 +- xorps 16(%esp),%xmm3 +- movups %xmm2,(%edi) +- xorps 32(%esp),%xmm4 +- movups %xmm3,16(%edi) +- xorps 48(%esp),%xmm5 +- movups %xmm4,32(%edi) +- xorps 64(%esp),%xmm6 +- movups %xmm5,48(%edi) +- xorps %xmm1,%xmm7 +- movups %xmm6,64(%edi) +- pshufd $19,%xmm0,%xmm2 +- movups %xmm7,80(%edi) +- leal 96(%edi),%edi +- movdqa 96(%esp),%xmm3 +- pxor %xmm0,%xmm0 +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- subl $96,%eax +- jnc L063xts_dec_loop6 +- movl 240(%ebp),%ecx +- movl %ebp,%edx +- movl %ecx,%ebx +-L062xts_dec_short: +- addl $96,%eax +- jz L064xts_dec_done6x +- movdqa %xmm1,%xmm5 +- cmpl $32,%eax +- jb L065xts_dec_one +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- je L066xts_dec_two +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,%xmm6 +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- cmpl $64,%eax +- jb L067xts_dec_three +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa %xmm1,%xmm7 +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +- movdqa %xmm5,(%esp) +- movdqa %xmm6,16(%esp) +- je L068xts_dec_four +- movdqa %xmm7,32(%esp) +- pshufd $19,%xmm0,%xmm7 +- movdqa %xmm1,48(%esp) +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm7 +- pxor %xmm1,%xmm7 +- movdqu (%esi),%xmm2 +- movdqu 16(%esi),%xmm3 +- movdqu 32(%esi),%xmm4 +- pxor (%esp),%xmm2 +- movdqu 48(%esi),%xmm5 +- pxor 16(%esp),%xmm3 +- movdqu 64(%esi),%xmm6 +- pxor 32(%esp),%xmm4 +- leal 80(%esi),%esi +- pxor 48(%esp),%xmm5 +- movdqa %xmm7,64(%esp) +- pxor %xmm7,%xmm6 +- call __aesni_decrypt6 +- movaps 64(%esp),%xmm1 +- xorps (%esp),%xmm2 +- xorps 16(%esp),%xmm3 +- xorps 32(%esp),%xmm4 +- movups %xmm2,(%edi) +- xorps 48(%esp),%xmm5 +- movups %xmm3,16(%edi) +- xorps %xmm1,%xmm6 +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +- movups %xmm6,64(%edi) +- leal 80(%edi),%edi +- jmp L069xts_dec_done +-.align 4,0x90 +-L065xts_dec_one: +- movups (%esi),%xmm2 +- leal 16(%esi),%esi +- xorps %xmm5,%xmm2 +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L070dec1_loop_12: +-.byte 102,15,56,222,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L070dec1_loop_12 +-.byte 102,15,56,223,209 +- xorps %xmm5,%xmm2 +- movups %xmm2,(%edi) +- leal 16(%edi),%edi +- movdqa %xmm5,%xmm1 +- jmp L069xts_dec_done +-.align 4,0x90 +-L066xts_dec_two: +- movaps %xmm1,%xmm6 +- movups (%esi),%xmm2 +- movups 16(%esi),%xmm3 +- leal 32(%esi),%esi +- xorps %xmm5,%xmm2 +- xorps %xmm6,%xmm3 +- call __aesni_decrypt2 +- xorps %xmm5,%xmm2 +- xorps %xmm6,%xmm3 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- leal 32(%edi),%edi +- movdqa %xmm6,%xmm1 +- jmp L069xts_dec_done +-.align 4,0x90 +-L067xts_dec_three: +- movaps %xmm1,%xmm7 +- movups (%esi),%xmm2 +- movups 16(%esi),%xmm3 +- movups 32(%esi),%xmm4 +- leal 48(%esi),%esi +- xorps %xmm5,%xmm2 +- xorps %xmm6,%xmm3 +- xorps %xmm7,%xmm4 +- call __aesni_decrypt3 +- xorps %xmm5,%xmm2 +- xorps %xmm6,%xmm3 +- xorps %xmm7,%xmm4 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- leal 48(%edi),%edi +- movdqa %xmm7,%xmm1 +- jmp L069xts_dec_done +-.align 4,0x90 +-L068xts_dec_four: +- movaps %xmm1,%xmm6 +- movups (%esi),%xmm2 +- movups 16(%esi),%xmm3 +- movups 32(%esi),%xmm4 +- xorps (%esp),%xmm2 +- movups 48(%esi),%xmm5 +- leal 64(%esi),%esi +- xorps 16(%esp),%xmm3 +- xorps %xmm7,%xmm4 +- xorps %xmm6,%xmm5 +- call __aesni_decrypt4 +- xorps (%esp),%xmm2 +- xorps 16(%esp),%xmm3 +- xorps %xmm7,%xmm4 +- movups %xmm2,(%edi) +- xorps %xmm6,%xmm5 +- movups %xmm3,16(%edi) +- movups %xmm4,32(%edi) +- movups %xmm5,48(%edi) +- leal 64(%edi),%edi +- movdqa %xmm6,%xmm1 +- jmp L069xts_dec_done +-.align 4,0x90 +-L064xts_dec_done6x: +- movl 112(%esp),%eax +- andl $15,%eax +- jz L071xts_dec_ret +- movl %eax,112(%esp) +- jmp L072xts_dec_only_one_more +-.align 4,0x90 +-L069xts_dec_done: +- movl 112(%esp),%eax +- pxor %xmm0,%xmm0 +- andl $15,%eax +- jz L071xts_dec_ret +- pcmpgtd %xmm1,%xmm0 +- movl %eax,112(%esp) +- pshufd $19,%xmm0,%xmm2 +- pxor %xmm0,%xmm0 +- movdqa 96(%esp),%xmm3 +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm2 +- pcmpgtd %xmm1,%xmm0 +- pxor %xmm2,%xmm1 +-L072xts_dec_only_one_more: +- pshufd $19,%xmm0,%xmm5 +- movdqa %xmm1,%xmm6 +- paddq %xmm1,%xmm1 +- pand %xmm3,%xmm5 +- pxor %xmm1,%xmm5 +- movl %ebp,%edx +- movl %ebx,%ecx +- movups (%esi),%xmm2 +- xorps %xmm5,%xmm2 +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L073dec1_loop_13: +-.byte 102,15,56,222,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L073dec1_loop_13 +-.byte 102,15,56,223,209 +- xorps %xmm5,%xmm2 +- movups %xmm2,(%edi) +-L074xts_dec_steal: +- movzbl 16(%esi),%ecx +- movzbl (%edi),%edx +- leal 1(%esi),%esi +- movb %cl,(%edi) +- movb %dl,16(%edi) +- leal 1(%edi),%edi +- subl $1,%eax +- jnz L074xts_dec_steal +- subl 112(%esp),%edi +- movl %ebp,%edx +- movl %ebx,%ecx +- movups (%edi),%xmm2 +- xorps %xmm6,%xmm2 +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L075dec1_loop_14: +-.byte 102,15,56,222,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L075dec1_loop_14 +-.byte 102,15,56,223,209 +- xorps %xmm6,%xmm2 +- movups %xmm2,(%edi) +-L071xts_dec_ret: +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- movdqa %xmm0,(%esp) +- pxor %xmm3,%xmm3 +- movdqa %xmm0,16(%esp) +- pxor %xmm4,%xmm4 +- movdqa %xmm0,32(%esp) +- pxor %xmm5,%xmm5 +- movdqa %xmm0,48(%esp) +- pxor %xmm6,%xmm6 +- movdqa %xmm0,64(%esp) +- pxor %xmm7,%xmm7 +- movdqa %xmm0,80(%esp) +- movl 116(%esp),%esp +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _aes_hw_cbc_encrypt +-.private_extern _aes_hw_cbc_encrypt +-.align 4 +-_aes_hw_cbc_encrypt: +-L_aes_hw_cbc_encrypt_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 20(%esp),%esi +- movl %esp,%ebx +- movl 24(%esp),%edi +- subl $24,%ebx +- movl 28(%esp),%eax +- andl $-16,%ebx +- movl 32(%esp),%edx +- movl 36(%esp),%ebp +- testl %eax,%eax +- jz L076cbc_abort +- cmpl $0,40(%esp) +- xchgl %esp,%ebx +- movups (%ebp),%xmm7 +- movl 240(%edx),%ecx +- movl %edx,%ebp +- movl %ebx,16(%esp) +- movl %ecx,%ebx +- je L077cbc_decrypt +- movaps %xmm7,%xmm2 +- cmpl $16,%eax +- jb L078cbc_enc_tail +- subl $16,%eax +- jmp L079cbc_enc_loop +-.align 4,0x90 +-L079cbc_enc_loop: +- movups (%esi),%xmm7 +- leal 16(%esi),%esi +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- xorps %xmm0,%xmm7 +- leal 32(%edx),%edx +- xorps %xmm7,%xmm2 +-L080enc1_loop_15: +-.byte 102,15,56,220,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L080enc1_loop_15 +-.byte 102,15,56,221,209 +- movl %ebx,%ecx +- movl %ebp,%edx +- movups %xmm2,(%edi) +- leal 16(%edi),%edi +- subl $16,%eax +- jnc L079cbc_enc_loop +- addl $16,%eax +- jnz L078cbc_enc_tail +- movaps %xmm2,%xmm7 +- pxor %xmm2,%xmm2 +- jmp L081cbc_ret +-L078cbc_enc_tail: +- movl %eax,%ecx +-.long 2767451785 +- movl $16,%ecx +- subl %eax,%ecx +- xorl %eax,%eax +-.long 2868115081 +- leal -16(%edi),%edi +- movl %ebx,%ecx +- movl %edi,%esi +- movl %ebp,%edx +- jmp L079cbc_enc_loop +-.align 4,0x90 +-L077cbc_decrypt: +- cmpl $80,%eax +- jbe L082cbc_dec_tail +- movaps %xmm7,(%esp) +- subl $80,%eax +- jmp L083cbc_dec_loop6_enter +-.align 4,0x90 +-L084cbc_dec_loop6: +- movaps %xmm0,(%esp) +- movups %xmm7,(%edi) +- leal 16(%edi),%edi +-L083cbc_dec_loop6_enter: +- movdqu (%esi),%xmm2 +- movdqu 16(%esi),%xmm3 +- movdqu 32(%esi),%xmm4 +- movdqu 48(%esi),%xmm5 +- movdqu 64(%esi),%xmm6 +- movdqu 80(%esi),%xmm7 +- call __aesni_decrypt6 +- movups (%esi),%xmm1 +- movups 16(%esi),%xmm0 +- xorps (%esp),%xmm2 +- xorps %xmm1,%xmm3 +- movups 32(%esi),%xmm1 +- xorps %xmm0,%xmm4 +- movups 48(%esi),%xmm0 +- xorps %xmm1,%xmm5 +- movups 64(%esi),%xmm1 +- xorps %xmm0,%xmm6 +- movups 80(%esi),%xmm0 +- xorps %xmm1,%xmm7 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- leal 96(%esi),%esi +- movups %xmm4,32(%edi) +- movl %ebx,%ecx +- movups %xmm5,48(%edi) +- movl %ebp,%edx +- movups %xmm6,64(%edi) +- leal 80(%edi),%edi +- subl $96,%eax +- ja L084cbc_dec_loop6 +- movaps %xmm7,%xmm2 +- movaps %xmm0,%xmm7 +- addl $80,%eax +- jle L085cbc_dec_clear_tail_collected +- movups %xmm2,(%edi) +- leal 16(%edi),%edi +-L082cbc_dec_tail: +- movups (%esi),%xmm2 +- movaps %xmm2,%xmm6 +- cmpl $16,%eax +- jbe L086cbc_dec_one +- movups 16(%esi),%xmm3 +- movaps %xmm3,%xmm5 +- cmpl $32,%eax +- jbe L087cbc_dec_two +- movups 32(%esi),%xmm4 +- cmpl $48,%eax +- jbe L088cbc_dec_three +- movups 48(%esi),%xmm5 +- cmpl $64,%eax +- jbe L089cbc_dec_four +- movups 64(%esi),%xmm6 +- movaps %xmm7,(%esp) +- movups (%esi),%xmm2 +- xorps %xmm7,%xmm7 +- call __aesni_decrypt6 +- movups (%esi),%xmm1 +- movups 16(%esi),%xmm0 +- xorps (%esp),%xmm2 +- xorps %xmm1,%xmm3 +- movups 32(%esi),%xmm1 +- xorps %xmm0,%xmm4 +- movups 48(%esi),%xmm0 +- xorps %xmm1,%xmm5 +- movups 64(%esi),%xmm7 +- xorps %xmm0,%xmm6 +- movups %xmm2,(%edi) +- movups %xmm3,16(%edi) +- pxor %xmm3,%xmm3 +- movups %xmm4,32(%edi) +- pxor %xmm4,%xmm4 +- movups %xmm5,48(%edi) +- pxor %xmm5,%xmm5 +- leal 64(%edi),%edi +- movaps %xmm6,%xmm2 +- pxor %xmm6,%xmm6 +- subl $80,%eax +- jmp L090cbc_dec_tail_collected +-.align 4,0x90 +-L086cbc_dec_one: +- movups (%edx),%xmm0 +- movups 16(%edx),%xmm1 +- leal 32(%edx),%edx +- xorps %xmm0,%xmm2 +-L091dec1_loop_16: +-.byte 102,15,56,222,209 +- decl %ecx +- movups (%edx),%xmm1 +- leal 16(%edx),%edx +- jnz L091dec1_loop_16 +-.byte 102,15,56,223,209 +- xorps %xmm7,%xmm2 +- movaps %xmm6,%xmm7 +- subl $16,%eax +- jmp L090cbc_dec_tail_collected +-.align 4,0x90 +-L087cbc_dec_two: +- call __aesni_decrypt2 +- xorps %xmm7,%xmm2 +- xorps %xmm6,%xmm3 +- movups %xmm2,(%edi) +- movaps %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- leal 16(%edi),%edi +- movaps %xmm5,%xmm7 +- subl $32,%eax +- jmp L090cbc_dec_tail_collected +-.align 4,0x90 +-L088cbc_dec_three: +- call __aesni_decrypt3 +- xorps %xmm7,%xmm2 +- xorps %xmm6,%xmm3 +- xorps %xmm5,%xmm4 +- movups %xmm2,(%edi) +- movaps %xmm4,%xmm2 +- pxor %xmm4,%xmm4 +- movups %xmm3,16(%edi) +- pxor %xmm3,%xmm3 +- leal 32(%edi),%edi +- movups 32(%esi),%xmm7 +- subl $48,%eax +- jmp L090cbc_dec_tail_collected +-.align 4,0x90 +-L089cbc_dec_four: +- call __aesni_decrypt4 +- movups 16(%esi),%xmm1 +- movups 32(%esi),%xmm0 +- xorps %xmm7,%xmm2 +- movups 48(%esi),%xmm7 +- xorps %xmm6,%xmm3 +- movups %xmm2,(%edi) +- xorps %xmm1,%xmm4 +- movups %xmm3,16(%edi) +- pxor %xmm3,%xmm3 +- xorps %xmm0,%xmm5 +- movups %xmm4,32(%edi) +- pxor %xmm4,%xmm4 +- leal 48(%edi),%edi +- movaps %xmm5,%xmm2 +- pxor %xmm5,%xmm5 +- subl $64,%eax +- jmp L090cbc_dec_tail_collected +-.align 4,0x90 +-L085cbc_dec_clear_tail_collected: +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +-L090cbc_dec_tail_collected: +- andl $15,%eax +- jnz L092cbc_dec_tail_partial +- movups %xmm2,(%edi) +- pxor %xmm0,%xmm0 +- jmp L081cbc_ret +-.align 4,0x90 +-L092cbc_dec_tail_partial: +- movaps %xmm2,(%esp) +- pxor %xmm0,%xmm0 +- movl $16,%ecx +- movl %esp,%esi +- subl %eax,%ecx +-.long 2767451785 +- movdqa %xmm2,(%esp) +-L081cbc_ret: +- movl 16(%esp),%esp +- movl 36(%esp),%ebp +- pxor %xmm2,%xmm2 +- pxor %xmm1,%xmm1 +- movups %xmm7,(%ebp) +- pxor %xmm7,%xmm7 +-L076cbc_abort: +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.private_extern __aesni_set_encrypt_key +-.align 4 +-__aesni_set_encrypt_key: +- pushl %ebp +- pushl %ebx +- testl %eax,%eax +- jz L093bad_pointer +- testl %edx,%edx +- jz L093bad_pointer +- call L094pic +-L094pic: +- popl %ebx +- leal Lkey_const-L094pic(%ebx),%ebx +- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lkey_const(%ebx),%ebp +- movups (%eax),%xmm0 +- xorps %xmm4,%xmm4 +- movl 4(%ebp),%ebp +- leal 16(%edx),%edx +- andl $268437504,%ebp +- cmpl $256,%ecx +- je L09514rounds +- cmpl $192,%ecx +- je L09612rounds +- cmpl $128,%ecx +- jne L097bad_keybits +-.align 4,0x90 +-L09810rounds: +- cmpl $268435456,%ebp +- je L09910rounds_alt +- movl $9,%ecx +- movups %xmm0,-16(%edx) +-.byte 102,15,58,223,200,1 +- call L100key_128_cold +-.byte 102,15,58,223,200,2 +- call L101key_128 +-.byte 102,15,58,223,200,4 +- call L101key_128 +-.byte 102,15,58,223,200,8 +- call L101key_128 +-.byte 102,15,58,223,200,16 +- call L101key_128 +-.byte 102,15,58,223,200,32 +- call L101key_128 +-.byte 102,15,58,223,200,64 +- call L101key_128 +-.byte 102,15,58,223,200,128 +- call L101key_128 +-.byte 102,15,58,223,200,27 +- call L101key_128 +-.byte 102,15,58,223,200,54 +- call L101key_128 +- movups %xmm0,(%edx) +- movl %ecx,80(%edx) +- jmp L102good_key +-.align 4,0x90 +-L101key_128: +- movups %xmm0,(%edx) +- leal 16(%edx),%edx +-L100key_128_cold: +- shufps $16,%xmm0,%xmm4 +- xorps %xmm4,%xmm0 +- shufps $140,%xmm0,%xmm4 +- xorps %xmm4,%xmm0 +- shufps $255,%xmm1,%xmm1 +- xorps %xmm1,%xmm0 +- ret +-.align 4,0x90 +-L09910rounds_alt: +- movdqa (%ebx),%xmm5 +- movl $8,%ecx +- movdqa 32(%ebx),%xmm4 +- movdqa %xmm0,%xmm2 +- movdqu %xmm0,-16(%edx) +-L103loop_key128: +-.byte 102,15,56,0,197 +-.byte 102,15,56,221,196 +- pslld $1,%xmm4 +- leal 16(%edx),%edx +- movdqa %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm3,%xmm2 +- pxor %xmm2,%xmm0 +- movdqu %xmm0,-16(%edx) +- movdqa %xmm0,%xmm2 +- decl %ecx +- jnz L103loop_key128 +- movdqa 48(%ebx),%xmm4 +-.byte 102,15,56,0,197 +-.byte 102,15,56,221,196 +- pslld $1,%xmm4 +- movdqa %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm3,%xmm2 +- pxor %xmm2,%xmm0 +- movdqu %xmm0,(%edx) +- movdqa %xmm0,%xmm2 +-.byte 102,15,56,0,197 +-.byte 102,15,56,221,196 +- movdqa %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm3,%xmm2 +- pxor %xmm2,%xmm0 +- movdqu %xmm0,16(%edx) +- movl $9,%ecx +- movl %ecx,96(%edx) +- jmp L102good_key +-.align 4,0x90 +-L09612rounds: +- movq 16(%eax),%xmm2 +- cmpl $268435456,%ebp +- je L10412rounds_alt +- movl $11,%ecx +- movups %xmm0,-16(%edx) +-.byte 102,15,58,223,202,1 +- call L105key_192a_cold +-.byte 102,15,58,223,202,2 +- call L106key_192b +-.byte 102,15,58,223,202,4 +- call L107key_192a +-.byte 102,15,58,223,202,8 +- call L106key_192b +-.byte 102,15,58,223,202,16 +- call L107key_192a +-.byte 102,15,58,223,202,32 +- call L106key_192b +-.byte 102,15,58,223,202,64 +- call L107key_192a +-.byte 102,15,58,223,202,128 +- call L106key_192b +- movups %xmm0,(%edx) +- movl %ecx,48(%edx) +- jmp L102good_key +-.align 4,0x90 +-L107key_192a: +- movups %xmm0,(%edx) +- leal 16(%edx),%edx +-.align 4,0x90 +-L105key_192a_cold: +- movaps %xmm2,%xmm5 +-L108key_192b_warm: +- shufps $16,%xmm0,%xmm4 +- movdqa %xmm2,%xmm3 +- xorps %xmm4,%xmm0 +- shufps $140,%xmm0,%xmm4 +- pslldq $4,%xmm3 +- xorps %xmm4,%xmm0 +- pshufd $85,%xmm1,%xmm1 +- pxor %xmm3,%xmm2 +- pxor %xmm1,%xmm0 +- pshufd $255,%xmm0,%xmm3 +- pxor %xmm3,%xmm2 +- ret +-.align 4,0x90 +-L106key_192b: +- movaps %xmm0,%xmm3 +- shufps $68,%xmm0,%xmm5 +- movups %xmm5,(%edx) +- shufps $78,%xmm2,%xmm3 +- movups %xmm3,16(%edx) +- leal 32(%edx),%edx +- jmp L108key_192b_warm +-.align 4,0x90 +-L10412rounds_alt: +- movdqa 16(%ebx),%xmm5 +- movdqa 32(%ebx),%xmm4 +- movl $8,%ecx +- movdqu %xmm0,-16(%edx) +-L109loop_key192: +- movq %xmm2,(%edx) +- movdqa %xmm2,%xmm1 +-.byte 102,15,56,0,213 +-.byte 102,15,56,221,212 +- pslld $1,%xmm4 +- leal 24(%edx),%edx +- movdqa %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm3,%xmm0 +- pshufd $255,%xmm0,%xmm3 +- pxor %xmm1,%xmm3 +- pslldq $4,%xmm1 +- pxor %xmm1,%xmm3 +- pxor %xmm2,%xmm0 +- pxor %xmm3,%xmm2 +- movdqu %xmm0,-16(%edx) +- decl %ecx +- jnz L109loop_key192 +- movl $11,%ecx +- movl %ecx,32(%edx) +- jmp L102good_key +-.align 4,0x90 +-L09514rounds: +- movups 16(%eax),%xmm2 +- leal 16(%edx),%edx +- cmpl $268435456,%ebp +- je L11014rounds_alt +- movl $13,%ecx +- movups %xmm0,-32(%edx) +- movups %xmm2,-16(%edx) +-.byte 102,15,58,223,202,1 +- call L111key_256a_cold +-.byte 102,15,58,223,200,1 +- call L112key_256b +-.byte 102,15,58,223,202,2 +- call L113key_256a +-.byte 102,15,58,223,200,2 +- call L112key_256b +-.byte 102,15,58,223,202,4 +- call L113key_256a +-.byte 102,15,58,223,200,4 +- call L112key_256b +-.byte 102,15,58,223,202,8 +- call L113key_256a +-.byte 102,15,58,223,200,8 +- call L112key_256b +-.byte 102,15,58,223,202,16 +- call L113key_256a +-.byte 102,15,58,223,200,16 +- call L112key_256b +-.byte 102,15,58,223,202,32 +- call L113key_256a +-.byte 102,15,58,223,200,32 +- call L112key_256b +-.byte 102,15,58,223,202,64 +- call L113key_256a +- movups %xmm0,(%edx) +- movl %ecx,16(%edx) +- xorl %eax,%eax +- jmp L102good_key +-.align 4,0x90 +-L113key_256a: +- movups %xmm2,(%edx) +- leal 16(%edx),%edx +-L111key_256a_cold: +- shufps $16,%xmm0,%xmm4 +- xorps %xmm4,%xmm0 +- shufps $140,%xmm0,%xmm4 +- xorps %xmm4,%xmm0 +- shufps $255,%xmm1,%xmm1 +- xorps %xmm1,%xmm0 +- ret +-.align 4,0x90 +-L112key_256b: +- movups %xmm0,(%edx) +- leal 16(%edx),%edx +- shufps $16,%xmm2,%xmm4 +- xorps %xmm4,%xmm2 +- shufps $140,%xmm2,%xmm4 +- xorps %xmm4,%xmm2 +- shufps $170,%xmm1,%xmm1 +- xorps %xmm1,%xmm2 +- ret +-.align 4,0x90 +-L11014rounds_alt: +- movdqa (%ebx),%xmm5 +- movdqa 32(%ebx),%xmm4 +- movl $7,%ecx +- movdqu %xmm0,-32(%edx) +- movdqa %xmm2,%xmm1 +- movdqu %xmm2,-16(%edx) +-L114loop_key256: +-.byte 102,15,56,0,213 +-.byte 102,15,56,221,212 +- movdqa %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm3,%xmm0 +- pslld $1,%xmm4 +- pxor %xmm2,%xmm0 +- movdqu %xmm0,(%edx) +- decl %ecx +- jz L115done_key256 +- pshufd $255,%xmm0,%xmm2 +- pxor %xmm3,%xmm3 +-.byte 102,15,56,221,211 +- movdqa %xmm1,%xmm3 +- pslldq $4,%xmm1 +- pxor %xmm1,%xmm3 +- pslldq $4,%xmm1 +- pxor %xmm1,%xmm3 +- pslldq $4,%xmm1 +- pxor %xmm3,%xmm1 +- pxor %xmm1,%xmm2 +- movdqu %xmm2,16(%edx) +- leal 32(%edx),%edx +- movdqa %xmm2,%xmm1 +- jmp L114loop_key256 +-L115done_key256: +- movl $13,%ecx +- movl %ecx,16(%edx) +-L102good_key: +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- xorl %eax,%eax +- popl %ebx +- popl %ebp +- ret +-.align 2,0x90 +-L093bad_pointer: +- movl $-1,%eax +- popl %ebx +- popl %ebp +- ret +-.align 2,0x90 +-L097bad_keybits: +- pxor %xmm0,%xmm0 +- movl $-2,%eax +- popl %ebx +- popl %ebp +- ret +-.globl _aes_hw_set_encrypt_key +-.private_extern _aes_hw_set_encrypt_key +-.align 4 +-_aes_hw_set_encrypt_key: +-L_aes_hw_set_encrypt_key_begin: +-#ifdef BORINGSSL_DISPATCH_TEST +- pushl %ebx +- pushl %edx +- call L116pic +-L116pic: +- popl %ebx +- leal _BORINGSSL_function_hit+3-L116pic(%ebx),%ebx +- movl $1,%edx +- movb %dl,(%ebx) +- popl %edx +- popl %ebx +-#endif +- movl 4(%esp),%eax +- movl 8(%esp),%ecx +- movl 12(%esp),%edx +- call __aesni_set_encrypt_key +- ret +-.globl _aes_hw_set_decrypt_key +-.private_extern _aes_hw_set_decrypt_key +-.align 4 +-_aes_hw_set_decrypt_key: +-L_aes_hw_set_decrypt_key_begin: +- movl 4(%esp),%eax +- movl 8(%esp),%ecx +- movl 12(%esp),%edx +- call __aesni_set_encrypt_key +- movl 12(%esp),%edx +- shll $4,%ecx +- testl %eax,%eax +- jnz L117dec_key_ret +- leal 16(%edx,%ecx,1),%eax +- movups (%edx),%xmm0 +- movups (%eax),%xmm1 +- movups %xmm0,(%eax) +- movups %xmm1,(%edx) +- leal 16(%edx),%edx +- leal -16(%eax),%eax +-L118dec_key_inverse: +- movups (%edx),%xmm0 +- movups (%eax),%xmm1 +-.byte 102,15,56,219,192 +-.byte 102,15,56,219,201 +- leal 16(%edx),%edx +- leal -16(%eax),%eax +- movups %xmm0,16(%eax) +- movups %xmm1,-16(%edx) +- cmpl %edx,%eax +- ja L118dec_key_inverse +- movups (%edx),%xmm0 +-.byte 102,15,56,219,192 +- movups %xmm0,(%edx) +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- xorl %eax,%eax +-L117dec_key_ret: +- ret +-.align 6,0x90 +-Lkey_const: +-.long 202313229,202313229,202313229,202313229 +-.long 67569157,67569157,67569157,67569157 +-.long 1,1,1,1 +-.long 27,27,27,27 +-.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69 +-.byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83 +-.byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115 +-.byte 115,108,46,111,114,103,62,0 +-.section __IMPORT,__pointers,non_lazy_symbol_pointers +-L_OPENSSL_ia32cap_P$non_lazy_ptr: +-.indirect_symbol _OPENSSL_ia32cap_P +-.long 0 +-#endif +diff --git a/mac-x86/crypto/fipsmodule/bn-586.S b/mac-x86/crypto/fipsmodule/bn-586.S +deleted file mode 100644 +index ede2e76..0000000 +--- a/mac-x86/crypto/fipsmodule/bn-586.S ++++ /dev/null +@@ -1,988 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.globl _bn_mul_add_words +-.private_extern _bn_mul_add_words +-.align 4 +-_bn_mul_add_words: +-L_bn_mul_add_words_begin: +- call L000PIC_me_up +-L000PIC_me_up: +- popl %eax +- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000PIC_me_up(%eax),%eax +- btl $26,(%eax) +- jnc L001maw_non_sse2 +- movl 4(%esp),%eax +- movl 8(%esp),%edx +- movl 12(%esp),%ecx +- movd 16(%esp),%mm0 +- pxor %mm1,%mm1 +- jmp L002maw_sse2_entry +-.align 4,0x90 +-L003maw_sse2_unrolled: +- movd (%eax),%mm3 +- paddq %mm3,%mm1 +- movd (%edx),%mm2 +- pmuludq %mm0,%mm2 +- movd 4(%edx),%mm4 +- pmuludq %mm0,%mm4 +- movd 8(%edx),%mm6 +- pmuludq %mm0,%mm6 +- movd 12(%edx),%mm7 +- pmuludq %mm0,%mm7 +- paddq %mm2,%mm1 +- movd 4(%eax),%mm3 +- paddq %mm4,%mm3 +- movd 8(%eax),%mm5 +- paddq %mm6,%mm5 +- movd 12(%eax),%mm4 +- paddq %mm4,%mm7 +- movd %mm1,(%eax) +- movd 16(%edx),%mm2 +- pmuludq %mm0,%mm2 +- psrlq $32,%mm1 +- movd 20(%edx),%mm4 +- pmuludq %mm0,%mm4 +- paddq %mm3,%mm1 +- movd 24(%edx),%mm6 +- pmuludq %mm0,%mm6 +- movd %mm1,4(%eax) +- psrlq $32,%mm1 +- movd 28(%edx),%mm3 +- addl $32,%edx +- pmuludq %mm0,%mm3 +- paddq %mm5,%mm1 +- movd 16(%eax),%mm5 +- paddq %mm5,%mm2 +- movd %mm1,8(%eax) +- psrlq $32,%mm1 +- paddq %mm7,%mm1 +- movd 20(%eax),%mm5 +- paddq %mm5,%mm4 +- movd %mm1,12(%eax) +- psrlq $32,%mm1 +- paddq %mm2,%mm1 +- movd 24(%eax),%mm5 +- paddq %mm5,%mm6 +- movd %mm1,16(%eax) +- psrlq $32,%mm1 +- paddq %mm4,%mm1 +- movd 28(%eax),%mm5 +- paddq %mm5,%mm3 +- movd %mm1,20(%eax) +- psrlq $32,%mm1 +- paddq %mm6,%mm1 +- movd %mm1,24(%eax) +- psrlq $32,%mm1 +- paddq %mm3,%mm1 +- movd %mm1,28(%eax) +- leal 32(%eax),%eax +- psrlq $32,%mm1 +- subl $8,%ecx +- jz L004maw_sse2_exit +-L002maw_sse2_entry: +- testl $4294967288,%ecx +- jnz L003maw_sse2_unrolled +-.align 2,0x90 +-L005maw_sse2_loop: +- movd (%edx),%mm2 +- movd (%eax),%mm3 +- pmuludq %mm0,%mm2 +- leal 4(%edx),%edx +- paddq %mm3,%mm1 +- paddq %mm2,%mm1 +- movd %mm1,(%eax) +- subl $1,%ecx +- psrlq $32,%mm1 +- leal 4(%eax),%eax +- jnz L005maw_sse2_loop +-L004maw_sse2_exit: +- movd %mm1,%eax +- emms +- ret +-.align 4,0x90 +-L001maw_non_sse2: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- +- xorl %esi,%esi +- movl 20(%esp),%edi +- movl 28(%esp),%ecx +- movl 24(%esp),%ebx +- andl $4294967288,%ecx +- movl 32(%esp),%ebp +- pushl %ecx +- jz L006maw_finish +-.align 4,0x90 +-L007maw_loop: +- # Round 0 +- movl (%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl (%edi),%eax +- adcl $0,%edx +- movl %eax,(%edi) +- movl %edx,%esi +- # Round 4 +- movl 4(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 4(%edi),%eax +- adcl $0,%edx +- movl %eax,4(%edi) +- movl %edx,%esi +- # Round 8 +- movl 8(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 8(%edi),%eax +- adcl $0,%edx +- movl %eax,8(%edi) +- movl %edx,%esi +- # Round 12 +- movl 12(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 12(%edi),%eax +- adcl $0,%edx +- movl %eax,12(%edi) +- movl %edx,%esi +- # Round 16 +- movl 16(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 16(%edi),%eax +- adcl $0,%edx +- movl %eax,16(%edi) +- movl %edx,%esi +- # Round 20 +- movl 20(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 20(%edi),%eax +- adcl $0,%edx +- movl %eax,20(%edi) +- movl %edx,%esi +- # Round 24 +- movl 24(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 24(%edi),%eax +- adcl $0,%edx +- movl %eax,24(%edi) +- movl %edx,%esi +- # Round 28 +- movl 28(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 28(%edi),%eax +- adcl $0,%edx +- movl %eax,28(%edi) +- movl %edx,%esi +- +- subl $8,%ecx +- leal 32(%ebx),%ebx +- leal 32(%edi),%edi +- jnz L007maw_loop +-L006maw_finish: +- movl 32(%esp),%ecx +- andl $7,%ecx +- jnz L008maw_finish2 +- jmp L009maw_end +-L008maw_finish2: +- # Tail Round 0 +- movl (%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl (%edi),%eax +- adcl $0,%edx +- decl %ecx +- movl %eax,(%edi) +- movl %edx,%esi +- jz L009maw_end +- # Tail Round 1 +- movl 4(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 4(%edi),%eax +- adcl $0,%edx +- decl %ecx +- movl %eax,4(%edi) +- movl %edx,%esi +- jz L009maw_end +- # Tail Round 2 +- movl 8(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 8(%edi),%eax +- adcl $0,%edx +- decl %ecx +- movl %eax,8(%edi) +- movl %edx,%esi +- jz L009maw_end +- # Tail Round 3 +- movl 12(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 12(%edi),%eax +- adcl $0,%edx +- decl %ecx +- movl %eax,12(%edi) +- movl %edx,%esi +- jz L009maw_end +- # Tail Round 4 +- movl 16(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 16(%edi),%eax +- adcl $0,%edx +- decl %ecx +- movl %eax,16(%edi) +- movl %edx,%esi +- jz L009maw_end +- # Tail Round 5 +- movl 20(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 20(%edi),%eax +- adcl $0,%edx +- decl %ecx +- movl %eax,20(%edi) +- movl %edx,%esi +- jz L009maw_end +- # Tail Round 6 +- movl 24(%ebx),%eax +- mull %ebp +- addl %esi,%eax +- adcl $0,%edx +- addl 24(%edi),%eax +- adcl $0,%edx +- movl %eax,24(%edi) +- movl %edx,%esi +-L009maw_end: +- movl %esi,%eax +- popl %ecx +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _bn_mul_words +-.private_extern _bn_mul_words +-.align 4 +-_bn_mul_words: +-L_bn_mul_words_begin: +- call L010PIC_me_up +-L010PIC_me_up: +- popl %eax +- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L010PIC_me_up(%eax),%eax +- btl $26,(%eax) +- jnc L011mw_non_sse2 +- movl 4(%esp),%eax +- movl 8(%esp),%edx +- movl 12(%esp),%ecx +- movd 16(%esp),%mm0 +- pxor %mm1,%mm1 +-.align 4,0x90 +-L012mw_sse2_loop: +- movd (%edx),%mm2 +- pmuludq %mm0,%mm2 +- leal 4(%edx),%edx +- paddq %mm2,%mm1 +- movd %mm1,(%eax) +- subl $1,%ecx +- psrlq $32,%mm1 +- leal 4(%eax),%eax +- jnz L012mw_sse2_loop +- movd %mm1,%eax +- emms +- ret +-.align 4,0x90 +-L011mw_non_sse2: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- +- xorl %esi,%esi +- movl 20(%esp),%edi +- movl 24(%esp),%ebx +- movl 28(%esp),%ebp +- movl 32(%esp),%ecx +- andl $4294967288,%ebp +- jz L013mw_finish +-L014mw_loop: +- # Round 0 +- movl (%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,(%edi) +- movl %edx,%esi +- # Round 4 +- movl 4(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,4(%edi) +- movl %edx,%esi +- # Round 8 +- movl 8(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,8(%edi) +- movl %edx,%esi +- # Round 12 +- movl 12(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,12(%edi) +- movl %edx,%esi +- # Round 16 +- movl 16(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,16(%edi) +- movl %edx,%esi +- # Round 20 +- movl 20(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,20(%edi) +- movl %edx,%esi +- # Round 24 +- movl 24(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,24(%edi) +- movl %edx,%esi +- # Round 28 +- movl 28(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,28(%edi) +- movl %edx,%esi +- +- addl $32,%ebx +- addl $32,%edi +- subl $8,%ebp +- jz L013mw_finish +- jmp L014mw_loop +-L013mw_finish: +- movl 28(%esp),%ebp +- andl $7,%ebp +- jnz L015mw_finish2 +- jmp L016mw_end +-L015mw_finish2: +- # Tail Round 0 +- movl (%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,(%edi) +- movl %edx,%esi +- decl %ebp +- jz L016mw_end +- # Tail Round 1 +- movl 4(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,4(%edi) +- movl %edx,%esi +- decl %ebp +- jz L016mw_end +- # Tail Round 2 +- movl 8(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,8(%edi) +- movl %edx,%esi +- decl %ebp +- jz L016mw_end +- # Tail Round 3 +- movl 12(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,12(%edi) +- movl %edx,%esi +- decl %ebp +- jz L016mw_end +- # Tail Round 4 +- movl 16(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,16(%edi) +- movl %edx,%esi +- decl %ebp +- jz L016mw_end +- # Tail Round 5 +- movl 20(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,20(%edi) +- movl %edx,%esi +- decl %ebp +- jz L016mw_end +- # Tail Round 6 +- movl 24(%ebx),%eax +- mull %ecx +- addl %esi,%eax +- adcl $0,%edx +- movl %eax,24(%edi) +- movl %edx,%esi +-L016mw_end: +- movl %esi,%eax +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _bn_sqr_words +-.private_extern _bn_sqr_words +-.align 4 +-_bn_sqr_words: +-L_bn_sqr_words_begin: +- call L017PIC_me_up +-L017PIC_me_up: +- popl %eax +- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L017PIC_me_up(%eax),%eax +- btl $26,(%eax) +- jnc L018sqr_non_sse2 +- movl 4(%esp),%eax +- movl 8(%esp),%edx +- movl 12(%esp),%ecx +-.align 4,0x90 +-L019sqr_sse2_loop: +- movd (%edx),%mm0 +- pmuludq %mm0,%mm0 +- leal 4(%edx),%edx +- movq %mm0,(%eax) +- subl $1,%ecx +- leal 8(%eax),%eax +- jnz L019sqr_sse2_loop +- emms +- ret +-.align 4,0x90 +-L018sqr_non_sse2: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- +- movl 20(%esp),%esi +- movl 24(%esp),%edi +- movl 28(%esp),%ebx +- andl $4294967288,%ebx +- jz L020sw_finish +-L021sw_loop: +- # Round 0 +- movl (%edi),%eax +- mull %eax +- movl %eax,(%esi) +- movl %edx,4(%esi) +- # Round 4 +- movl 4(%edi),%eax +- mull %eax +- movl %eax,8(%esi) +- movl %edx,12(%esi) +- # Round 8 +- movl 8(%edi),%eax +- mull %eax +- movl %eax,16(%esi) +- movl %edx,20(%esi) +- # Round 12 +- movl 12(%edi),%eax +- mull %eax +- movl %eax,24(%esi) +- movl %edx,28(%esi) +- # Round 16 +- movl 16(%edi),%eax +- mull %eax +- movl %eax,32(%esi) +- movl %edx,36(%esi) +- # Round 20 +- movl 20(%edi),%eax +- mull %eax +- movl %eax,40(%esi) +- movl %edx,44(%esi) +- # Round 24 +- movl 24(%edi),%eax +- mull %eax +- movl %eax,48(%esi) +- movl %edx,52(%esi) +- # Round 28 +- movl 28(%edi),%eax +- mull %eax +- movl %eax,56(%esi) +- movl %edx,60(%esi) +- +- addl $32,%edi +- addl $64,%esi +- subl $8,%ebx +- jnz L021sw_loop +-L020sw_finish: +- movl 28(%esp),%ebx +- andl $7,%ebx +- jz L022sw_end +- # Tail Round 0 +- movl (%edi),%eax +- mull %eax +- movl %eax,(%esi) +- decl %ebx +- movl %edx,4(%esi) +- jz L022sw_end +- # Tail Round 1 +- movl 4(%edi),%eax +- mull %eax +- movl %eax,8(%esi) +- decl %ebx +- movl %edx,12(%esi) +- jz L022sw_end +- # Tail Round 2 +- movl 8(%edi),%eax +- mull %eax +- movl %eax,16(%esi) +- decl %ebx +- movl %edx,20(%esi) +- jz L022sw_end +- # Tail Round 3 +- movl 12(%edi),%eax +- mull %eax +- movl %eax,24(%esi) +- decl %ebx +- movl %edx,28(%esi) +- jz L022sw_end +- # Tail Round 4 +- movl 16(%edi),%eax +- mull %eax +- movl %eax,32(%esi) +- decl %ebx +- movl %edx,36(%esi) +- jz L022sw_end +- # Tail Round 5 +- movl 20(%edi),%eax +- mull %eax +- movl %eax,40(%esi) +- decl %ebx +- movl %edx,44(%esi) +- jz L022sw_end +- # Tail Round 6 +- movl 24(%edi),%eax +- mull %eax +- movl %eax,48(%esi) +- movl %edx,52(%esi) +-L022sw_end: +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _bn_div_words +-.private_extern _bn_div_words +-.align 4 +-_bn_div_words: +-L_bn_div_words_begin: +- movl 4(%esp),%edx +- movl 8(%esp),%eax +- movl 12(%esp),%ecx +- divl %ecx +- ret +-.globl _bn_add_words +-.private_extern _bn_add_words +-.align 4 +-_bn_add_words: +-L_bn_add_words_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- +- movl 20(%esp),%ebx +- movl 24(%esp),%esi +- movl 28(%esp),%edi +- movl 32(%esp),%ebp +- xorl %eax,%eax +- andl $4294967288,%ebp +- jz L023aw_finish +-L024aw_loop: +- # Round 0 +- movl (%esi),%ecx +- movl (%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- movl %ecx,(%ebx) +- # Round 1 +- movl 4(%esi),%ecx +- movl 4(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- movl %ecx,4(%ebx) +- # Round 2 +- movl 8(%esi),%ecx +- movl 8(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- movl %ecx,8(%ebx) +- # Round 3 +- movl 12(%esi),%ecx +- movl 12(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- movl %ecx,12(%ebx) +- # Round 4 +- movl 16(%esi),%ecx +- movl 16(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- movl %ecx,16(%ebx) +- # Round 5 +- movl 20(%esi),%ecx +- movl 20(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- movl %ecx,20(%ebx) +- # Round 6 +- movl 24(%esi),%ecx +- movl 24(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- movl %ecx,24(%ebx) +- # Round 7 +- movl 28(%esi),%ecx +- movl 28(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- movl %ecx,28(%ebx) +- +- addl $32,%esi +- addl $32,%edi +- addl $32,%ebx +- subl $8,%ebp +- jnz L024aw_loop +-L023aw_finish: +- movl 32(%esp),%ebp +- andl $7,%ebp +- jz L025aw_end +- # Tail Round 0 +- movl (%esi),%ecx +- movl (%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,(%ebx) +- jz L025aw_end +- # Tail Round 1 +- movl 4(%esi),%ecx +- movl 4(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,4(%ebx) +- jz L025aw_end +- # Tail Round 2 +- movl 8(%esi),%ecx +- movl 8(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,8(%ebx) +- jz L025aw_end +- # Tail Round 3 +- movl 12(%esi),%ecx +- movl 12(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,12(%ebx) +- jz L025aw_end +- # Tail Round 4 +- movl 16(%esi),%ecx +- movl 16(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,16(%ebx) +- jz L025aw_end +- # Tail Round 5 +- movl 20(%esi),%ecx +- movl 20(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,20(%ebx) +- jz L025aw_end +- # Tail Round 6 +- movl 24(%esi),%ecx +- movl 24(%edi),%edx +- addl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- addl %edx,%ecx +- adcl $0,%eax +- movl %ecx,24(%ebx) +-L025aw_end: +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _bn_sub_words +-.private_extern _bn_sub_words +-.align 4 +-_bn_sub_words: +-L_bn_sub_words_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- +- movl 20(%esp),%ebx +- movl 24(%esp),%esi +- movl 28(%esp),%edi +- movl 32(%esp),%ebp +- xorl %eax,%eax +- andl $4294967288,%ebp +- jz L026aw_finish +-L027aw_loop: +- # Round 0 +- movl (%esi),%ecx +- movl (%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- movl %ecx,(%ebx) +- # Round 1 +- movl 4(%esi),%ecx +- movl 4(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- movl %ecx,4(%ebx) +- # Round 2 +- movl 8(%esi),%ecx +- movl 8(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- movl %ecx,8(%ebx) +- # Round 3 +- movl 12(%esi),%ecx +- movl 12(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- movl %ecx,12(%ebx) +- # Round 4 +- movl 16(%esi),%ecx +- movl 16(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- movl %ecx,16(%ebx) +- # Round 5 +- movl 20(%esi),%ecx +- movl 20(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- movl %ecx,20(%ebx) +- # Round 6 +- movl 24(%esi),%ecx +- movl 24(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- movl %ecx,24(%ebx) +- # Round 7 +- movl 28(%esi),%ecx +- movl 28(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- movl %ecx,28(%ebx) +- +- addl $32,%esi +- addl $32,%edi +- addl $32,%ebx +- subl $8,%ebp +- jnz L027aw_loop +-L026aw_finish: +- movl 32(%esp),%ebp +- andl $7,%ebp +- jz L028aw_end +- # Tail Round 0 +- movl (%esi),%ecx +- movl (%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,(%ebx) +- jz L028aw_end +- # Tail Round 1 +- movl 4(%esi),%ecx +- movl 4(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,4(%ebx) +- jz L028aw_end +- # Tail Round 2 +- movl 8(%esi),%ecx +- movl 8(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,8(%ebx) +- jz L028aw_end +- # Tail Round 3 +- movl 12(%esi),%ecx +- movl 12(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,12(%ebx) +- jz L028aw_end +- # Tail Round 4 +- movl 16(%esi),%ecx +- movl 16(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,16(%ebx) +- jz L028aw_end +- # Tail Round 5 +- movl 20(%esi),%ecx +- movl 20(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- decl %ebp +- movl %ecx,20(%ebx) +- jz L028aw_end +- # Tail Round 6 +- movl 24(%esi),%ecx +- movl 24(%edi),%edx +- subl %eax,%ecx +- movl $0,%eax +- adcl %eax,%eax +- subl %edx,%ecx +- adcl $0,%eax +- movl %ecx,24(%ebx) +-L028aw_end: +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.section __IMPORT,__pointers,non_lazy_symbol_pointers +-L_OPENSSL_ia32cap_P$non_lazy_ptr: +-.indirect_symbol _OPENSSL_ia32cap_P +-.long 0 +-#endif +diff --git a/mac-x86/crypto/fipsmodule/co-586.S b/mac-x86/crypto/fipsmodule/co-586.S +deleted file mode 100644 +index 015dffa..0000000 +--- a/mac-x86/crypto/fipsmodule/co-586.S ++++ /dev/null +@@ -1,1257 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.globl _bn_mul_comba8 +-.private_extern _bn_mul_comba8 +-.align 4 +-_bn_mul_comba8: +-L_bn_mul_comba8_begin: +- pushl %esi +- movl 12(%esp),%esi +- pushl %edi +- movl 20(%esp),%edi +- pushl %ebp +- pushl %ebx +- xorl %ebx,%ebx +- movl (%esi),%eax +- xorl %ecx,%ecx +- movl (%edi),%edx +- # ################## Calculate word 0 +- xorl %ebp,%ebp +- # mul a[0]*b[0] +- mull %edx +- addl %eax,%ebx +- movl 20(%esp),%eax +- adcl %edx,%ecx +- movl (%edi),%edx +- adcl $0,%ebp +- movl %ebx,(%eax) +- movl 4(%esi),%eax +- # saved r[0] +- # ################## Calculate word 1 +- xorl %ebx,%ebx +- # mul a[1]*b[0] +- mull %edx +- addl %eax,%ecx +- movl (%esi),%eax +- adcl %edx,%ebp +- movl 4(%edi),%edx +- adcl $0,%ebx +- # mul a[0]*b[1] +- mull %edx +- addl %eax,%ecx +- movl 20(%esp),%eax +- adcl %edx,%ebp +- movl (%edi),%edx +- adcl $0,%ebx +- movl %ecx,4(%eax) +- movl 8(%esi),%eax +- # saved r[1] +- # ################## Calculate word 2 +- xorl %ecx,%ecx +- # mul a[2]*b[0] +- mull %edx +- addl %eax,%ebp +- movl 4(%esi),%eax +- adcl %edx,%ebx +- movl 4(%edi),%edx +- adcl $0,%ecx +- # mul a[1]*b[1] +- mull %edx +- addl %eax,%ebp +- movl (%esi),%eax +- adcl %edx,%ebx +- movl 8(%edi),%edx +- adcl $0,%ecx +- # mul a[0]*b[2] +- mull %edx +- addl %eax,%ebp +- movl 20(%esp),%eax +- adcl %edx,%ebx +- movl (%edi),%edx +- adcl $0,%ecx +- movl %ebp,8(%eax) +- movl 12(%esi),%eax +- # saved r[2] +- # ################## Calculate word 3 +- xorl %ebp,%ebp +- # mul a[3]*b[0] +- mull %edx +- addl %eax,%ebx +- movl 8(%esi),%eax +- adcl %edx,%ecx +- movl 4(%edi),%edx +- adcl $0,%ebp +- # mul a[2]*b[1] +- mull %edx +- addl %eax,%ebx +- movl 4(%esi),%eax +- adcl %edx,%ecx +- movl 8(%edi),%edx +- adcl $0,%ebp +- # mul a[1]*b[2] +- mull %edx +- addl %eax,%ebx +- movl (%esi),%eax +- adcl %edx,%ecx +- movl 12(%edi),%edx +- adcl $0,%ebp +- # mul a[0]*b[3] +- mull %edx +- addl %eax,%ebx +- movl 20(%esp),%eax +- adcl %edx,%ecx +- movl (%edi),%edx +- adcl $0,%ebp +- movl %ebx,12(%eax) +- movl 16(%esi),%eax +- # saved r[3] +- # ################## Calculate word 4 +- xorl %ebx,%ebx +- # mul a[4]*b[0] +- mull %edx +- addl %eax,%ecx +- movl 12(%esi),%eax +- adcl %edx,%ebp +- movl 4(%edi),%edx +- adcl $0,%ebx +- # mul a[3]*b[1] +- mull %edx +- addl %eax,%ecx +- movl 8(%esi),%eax +- adcl %edx,%ebp +- movl 8(%edi),%edx +- adcl $0,%ebx +- # mul a[2]*b[2] +- mull %edx +- addl %eax,%ecx +- movl 4(%esi),%eax +- adcl %edx,%ebp +- movl 12(%edi),%edx +- adcl $0,%ebx +- # mul a[1]*b[3] +- mull %edx +- addl %eax,%ecx +- movl (%esi),%eax +- adcl %edx,%ebp +- movl 16(%edi),%edx +- adcl $0,%ebx +- # mul a[0]*b[4] +- mull %edx +- addl %eax,%ecx +- movl 20(%esp),%eax +- adcl %edx,%ebp +- movl (%edi),%edx +- adcl $0,%ebx +- movl %ecx,16(%eax) +- movl 20(%esi),%eax +- # saved r[4] +- # ################## Calculate word 5 +- xorl %ecx,%ecx +- # mul a[5]*b[0] +- mull %edx +- addl %eax,%ebp +- movl 16(%esi),%eax +- adcl %edx,%ebx +- movl 4(%edi),%edx +- adcl $0,%ecx +- # mul a[4]*b[1] +- mull %edx +- addl %eax,%ebp +- movl 12(%esi),%eax +- adcl %edx,%ebx +- movl 8(%edi),%edx +- adcl $0,%ecx +- # mul a[3]*b[2] +- mull %edx +- addl %eax,%ebp +- movl 8(%esi),%eax +- adcl %edx,%ebx +- movl 12(%edi),%edx +- adcl $0,%ecx +- # mul a[2]*b[3] +- mull %edx +- addl %eax,%ebp +- movl 4(%esi),%eax +- adcl %edx,%ebx +- movl 16(%edi),%edx +- adcl $0,%ecx +- # mul a[1]*b[4] +- mull %edx +- addl %eax,%ebp +- movl (%esi),%eax +- adcl %edx,%ebx +- movl 20(%edi),%edx +- adcl $0,%ecx +- # mul a[0]*b[5] +- mull %edx +- addl %eax,%ebp +- movl 20(%esp),%eax +- adcl %edx,%ebx +- movl (%edi),%edx +- adcl $0,%ecx +- movl %ebp,20(%eax) +- movl 24(%esi),%eax +- # saved r[5] +- # ################## Calculate word 6 +- xorl %ebp,%ebp +- # mul a[6]*b[0] +- mull %edx +- addl %eax,%ebx +- movl 20(%esi),%eax +- adcl %edx,%ecx +- movl 4(%edi),%edx +- adcl $0,%ebp +- # mul a[5]*b[1] +- mull %edx +- addl %eax,%ebx +- movl 16(%esi),%eax +- adcl %edx,%ecx +- movl 8(%edi),%edx +- adcl $0,%ebp +- # mul a[4]*b[2] +- mull %edx +- addl %eax,%ebx +- movl 12(%esi),%eax +- adcl %edx,%ecx +- movl 12(%edi),%edx +- adcl $0,%ebp +- # mul a[3]*b[3] +- mull %edx +- addl %eax,%ebx +- movl 8(%esi),%eax +- adcl %edx,%ecx +- movl 16(%edi),%edx +- adcl $0,%ebp +- # mul a[2]*b[4] +- mull %edx +- addl %eax,%ebx +- movl 4(%esi),%eax +- adcl %edx,%ecx +- movl 20(%edi),%edx +- adcl $0,%ebp +- # mul a[1]*b[5] +- mull %edx +- addl %eax,%ebx +- movl (%esi),%eax +- adcl %edx,%ecx +- movl 24(%edi),%edx +- adcl $0,%ebp +- # mul a[0]*b[6] +- mull %edx +- addl %eax,%ebx +- movl 20(%esp),%eax +- adcl %edx,%ecx +- movl (%edi),%edx +- adcl $0,%ebp +- movl %ebx,24(%eax) +- movl 28(%esi),%eax +- # saved r[6] +- # ################## Calculate word 7 +- xorl %ebx,%ebx +- # mul a[7]*b[0] +- mull %edx +- addl %eax,%ecx +- movl 24(%esi),%eax +- adcl %edx,%ebp +- movl 4(%edi),%edx +- adcl $0,%ebx +- # mul a[6]*b[1] +- mull %edx +- addl %eax,%ecx +- movl 20(%esi),%eax +- adcl %edx,%ebp +- movl 8(%edi),%edx +- adcl $0,%ebx +- # mul a[5]*b[2] +- mull %edx +- addl %eax,%ecx +- movl 16(%esi),%eax +- adcl %edx,%ebp +- movl 12(%edi),%edx +- adcl $0,%ebx +- # mul a[4]*b[3] +- mull %edx +- addl %eax,%ecx +- movl 12(%esi),%eax +- adcl %edx,%ebp +- movl 16(%edi),%edx +- adcl $0,%ebx +- # mul a[3]*b[4] +- mull %edx +- addl %eax,%ecx +- movl 8(%esi),%eax +- adcl %edx,%ebp +- movl 20(%edi),%edx +- adcl $0,%ebx +- # mul a[2]*b[5] +- mull %edx +- addl %eax,%ecx +- movl 4(%esi),%eax +- adcl %edx,%ebp +- movl 24(%edi),%edx +- adcl $0,%ebx +- # mul a[1]*b[6] +- mull %edx +- addl %eax,%ecx +- movl (%esi),%eax +- adcl %edx,%ebp +- movl 28(%edi),%edx +- adcl $0,%ebx +- # mul a[0]*b[7] +- mull %edx +- addl %eax,%ecx +- movl 20(%esp),%eax +- adcl %edx,%ebp +- movl 4(%edi),%edx +- adcl $0,%ebx +- movl %ecx,28(%eax) +- movl 28(%esi),%eax +- # saved r[7] +- # ################## Calculate word 8 +- xorl %ecx,%ecx +- # mul a[7]*b[1] +- mull %edx +- addl %eax,%ebp +- movl 24(%esi),%eax +- adcl %edx,%ebx +- movl 8(%edi),%edx +- adcl $0,%ecx +- # mul a[6]*b[2] +- mull %edx +- addl %eax,%ebp +- movl 20(%esi),%eax +- adcl %edx,%ebx +- movl 12(%edi),%edx +- adcl $0,%ecx +- # mul a[5]*b[3] +- mull %edx +- addl %eax,%ebp +- movl 16(%esi),%eax +- adcl %edx,%ebx +- movl 16(%edi),%edx +- adcl $0,%ecx +- # mul a[4]*b[4] +- mull %edx +- addl %eax,%ebp +- movl 12(%esi),%eax +- adcl %edx,%ebx +- movl 20(%edi),%edx +- adcl $0,%ecx +- # mul a[3]*b[5] +- mull %edx +- addl %eax,%ebp +- movl 8(%esi),%eax +- adcl %edx,%ebx +- movl 24(%edi),%edx +- adcl $0,%ecx +- # mul a[2]*b[6] +- mull %edx +- addl %eax,%ebp +- movl 4(%esi),%eax +- adcl %edx,%ebx +- movl 28(%edi),%edx +- adcl $0,%ecx +- # mul a[1]*b[7] +- mull %edx +- addl %eax,%ebp +- movl 20(%esp),%eax +- adcl %edx,%ebx +- movl 8(%edi),%edx +- adcl $0,%ecx +- movl %ebp,32(%eax) +- movl 28(%esi),%eax +- # saved r[8] +- # ################## Calculate word 9 +- xorl %ebp,%ebp +- # mul a[7]*b[2] +- mull %edx +- addl %eax,%ebx +- movl 24(%esi),%eax +- adcl %edx,%ecx +- movl 12(%edi),%edx +- adcl $0,%ebp +- # mul a[6]*b[3] +- mull %edx +- addl %eax,%ebx +- movl 20(%esi),%eax +- adcl %edx,%ecx +- movl 16(%edi),%edx +- adcl $0,%ebp +- # mul a[5]*b[4] +- mull %edx +- addl %eax,%ebx +- movl 16(%esi),%eax +- adcl %edx,%ecx +- movl 20(%edi),%edx +- adcl $0,%ebp +- # mul a[4]*b[5] +- mull %edx +- addl %eax,%ebx +- movl 12(%esi),%eax +- adcl %edx,%ecx +- movl 24(%edi),%edx +- adcl $0,%ebp +- # mul a[3]*b[6] +- mull %edx +- addl %eax,%ebx +- movl 8(%esi),%eax +- adcl %edx,%ecx +- movl 28(%edi),%edx +- adcl $0,%ebp +- # mul a[2]*b[7] +- mull %edx +- addl %eax,%ebx +- movl 20(%esp),%eax +- adcl %edx,%ecx +- movl 12(%edi),%edx +- adcl $0,%ebp +- movl %ebx,36(%eax) +- movl 28(%esi),%eax +- # saved r[9] +- # ################## Calculate word 10 +- xorl %ebx,%ebx +- # mul a[7]*b[3] +- mull %edx +- addl %eax,%ecx +- movl 24(%esi),%eax +- adcl %edx,%ebp +- movl 16(%edi),%edx +- adcl $0,%ebx +- # mul a[6]*b[4] +- mull %edx +- addl %eax,%ecx +- movl 20(%esi),%eax +- adcl %edx,%ebp +- movl 20(%edi),%edx +- adcl $0,%ebx +- # mul a[5]*b[5] +- mull %edx +- addl %eax,%ecx +- movl 16(%esi),%eax +- adcl %edx,%ebp +- movl 24(%edi),%edx +- adcl $0,%ebx +- # mul a[4]*b[6] +- mull %edx +- addl %eax,%ecx +- movl 12(%esi),%eax +- adcl %edx,%ebp +- movl 28(%edi),%edx +- adcl $0,%ebx +- # mul a[3]*b[7] +- mull %edx +- addl %eax,%ecx +- movl 20(%esp),%eax +- adcl %edx,%ebp +- movl 16(%edi),%edx +- adcl $0,%ebx +- movl %ecx,40(%eax) +- movl 28(%esi),%eax +- # saved r[10] +- # ################## Calculate word 11 +- xorl %ecx,%ecx +- # mul a[7]*b[4] +- mull %edx +- addl %eax,%ebp +- movl 24(%esi),%eax +- adcl %edx,%ebx +- movl 20(%edi),%edx +- adcl $0,%ecx +- # mul a[6]*b[5] +- mull %edx +- addl %eax,%ebp +- movl 20(%esi),%eax +- adcl %edx,%ebx +- movl 24(%edi),%edx +- adcl $0,%ecx +- # mul a[5]*b[6] +- mull %edx +- addl %eax,%ebp +- movl 16(%esi),%eax +- adcl %edx,%ebx +- movl 28(%edi),%edx +- adcl $0,%ecx +- # mul a[4]*b[7] +- mull %edx +- addl %eax,%ebp +- movl 20(%esp),%eax +- adcl %edx,%ebx +- movl 20(%edi),%edx +- adcl $0,%ecx +- movl %ebp,44(%eax) +- movl 28(%esi),%eax +- # saved r[11] +- # ################## Calculate word 12 +- xorl %ebp,%ebp +- # mul a[7]*b[5] +- mull %edx +- addl %eax,%ebx +- movl 24(%esi),%eax +- adcl %edx,%ecx +- movl 24(%edi),%edx +- adcl $0,%ebp +- # mul a[6]*b[6] +- mull %edx +- addl %eax,%ebx +- movl 20(%esi),%eax +- adcl %edx,%ecx +- movl 28(%edi),%edx +- adcl $0,%ebp +- # mul a[5]*b[7] +- mull %edx +- addl %eax,%ebx +- movl 20(%esp),%eax +- adcl %edx,%ecx +- movl 24(%edi),%edx +- adcl $0,%ebp +- movl %ebx,48(%eax) +- movl 28(%esi),%eax +- # saved r[12] +- # ################## Calculate word 13 +- xorl %ebx,%ebx +- # mul a[7]*b[6] +- mull %edx +- addl %eax,%ecx +- movl 24(%esi),%eax +- adcl %edx,%ebp +- movl 28(%edi),%edx +- adcl $0,%ebx +- # mul a[6]*b[7] +- mull %edx +- addl %eax,%ecx +- movl 20(%esp),%eax +- adcl %edx,%ebp +- movl 28(%edi),%edx +- adcl $0,%ebx +- movl %ecx,52(%eax) +- movl 28(%esi),%eax +- # saved r[13] +- # ################## Calculate word 14 +- xorl %ecx,%ecx +- # mul a[7]*b[7] +- mull %edx +- addl %eax,%ebp +- movl 20(%esp),%eax +- adcl %edx,%ebx +- adcl $0,%ecx +- movl %ebp,56(%eax) +- # saved r[14] +- # save r[15] +- movl %ebx,60(%eax) +- popl %ebx +- popl %ebp +- popl %edi +- popl %esi +- ret +-.globl _bn_mul_comba4 +-.private_extern _bn_mul_comba4 +-.align 4 +-_bn_mul_comba4: +-L_bn_mul_comba4_begin: +- pushl %esi +- movl 12(%esp),%esi +- pushl %edi +- movl 20(%esp),%edi +- pushl %ebp +- pushl %ebx +- xorl %ebx,%ebx +- movl (%esi),%eax +- xorl %ecx,%ecx +- movl (%edi),%edx +- # ################## Calculate word 0 +- xorl %ebp,%ebp +- # mul a[0]*b[0] +- mull %edx +- addl %eax,%ebx +- movl 20(%esp),%eax +- adcl %edx,%ecx +- movl (%edi),%edx +- adcl $0,%ebp +- movl %ebx,(%eax) +- movl 4(%esi),%eax +- # saved r[0] +- # ################## Calculate word 1 +- xorl %ebx,%ebx +- # mul a[1]*b[0] +- mull %edx +- addl %eax,%ecx +- movl (%esi),%eax +- adcl %edx,%ebp +- movl 4(%edi),%edx +- adcl $0,%ebx +- # mul a[0]*b[1] +- mull %edx +- addl %eax,%ecx +- movl 20(%esp),%eax +- adcl %edx,%ebp +- movl (%edi),%edx +- adcl $0,%ebx +- movl %ecx,4(%eax) +- movl 8(%esi),%eax +- # saved r[1] +- # ################## Calculate word 2 +- xorl %ecx,%ecx +- # mul a[2]*b[0] +- mull %edx +- addl %eax,%ebp +- movl 4(%esi),%eax +- adcl %edx,%ebx +- movl 4(%edi),%edx +- adcl $0,%ecx +- # mul a[1]*b[1] +- mull %edx +- addl %eax,%ebp +- movl (%esi),%eax +- adcl %edx,%ebx +- movl 8(%edi),%edx +- adcl $0,%ecx +- # mul a[0]*b[2] +- mull %edx +- addl %eax,%ebp +- movl 20(%esp),%eax +- adcl %edx,%ebx +- movl (%edi),%edx +- adcl $0,%ecx +- movl %ebp,8(%eax) +- movl 12(%esi),%eax +- # saved r[2] +- # ################## Calculate word 3 +- xorl %ebp,%ebp +- # mul a[3]*b[0] +- mull %edx +- addl %eax,%ebx +- movl 8(%esi),%eax +- adcl %edx,%ecx +- movl 4(%edi),%edx +- adcl $0,%ebp +- # mul a[2]*b[1] +- mull %edx +- addl %eax,%ebx +- movl 4(%esi),%eax +- adcl %edx,%ecx +- movl 8(%edi),%edx +- adcl $0,%ebp +- # mul a[1]*b[2] +- mull %edx +- addl %eax,%ebx +- movl (%esi),%eax +- adcl %edx,%ecx +- movl 12(%edi),%edx +- adcl $0,%ebp +- # mul a[0]*b[3] +- mull %edx +- addl %eax,%ebx +- movl 20(%esp),%eax +- adcl %edx,%ecx +- movl 4(%edi),%edx +- adcl $0,%ebp +- movl %ebx,12(%eax) +- movl 12(%esi),%eax +- # saved r[3] +- # ################## Calculate word 4 +- xorl %ebx,%ebx +- # mul a[3]*b[1] +- mull %edx +- addl %eax,%ecx +- movl 8(%esi),%eax +- adcl %edx,%ebp +- movl 8(%edi),%edx +- adcl $0,%ebx +- # mul a[2]*b[2] +- mull %edx +- addl %eax,%ecx +- movl 4(%esi),%eax +- adcl %edx,%ebp +- movl 12(%edi),%edx +- adcl $0,%ebx +- # mul a[1]*b[3] +- mull %edx +- addl %eax,%ecx +- movl 20(%esp),%eax +- adcl %edx,%ebp +- movl 8(%edi),%edx +- adcl $0,%ebx +- movl %ecx,16(%eax) +- movl 12(%esi),%eax +- # saved r[4] +- # ################## Calculate word 5 +- xorl %ecx,%ecx +- # mul a[3]*b[2] +- mull %edx +- addl %eax,%ebp +- movl 8(%esi),%eax +- adcl %edx,%ebx +- movl 12(%edi),%edx +- adcl $0,%ecx +- # mul a[2]*b[3] +- mull %edx +- addl %eax,%ebp +- movl 20(%esp),%eax +- adcl %edx,%ebx +- movl 12(%edi),%edx +- adcl $0,%ecx +- movl %ebp,20(%eax) +- movl 12(%esi),%eax +- # saved r[5] +- # ################## Calculate word 6 +- xorl %ebp,%ebp +- # mul a[3]*b[3] +- mull %edx +- addl %eax,%ebx +- movl 20(%esp),%eax +- adcl %edx,%ecx +- adcl $0,%ebp +- movl %ebx,24(%eax) +- # saved r[6] +- # save r[7] +- movl %ecx,28(%eax) +- popl %ebx +- popl %ebp +- popl %edi +- popl %esi +- ret +-.globl _bn_sqr_comba8 +-.private_extern _bn_sqr_comba8 +-.align 4 +-_bn_sqr_comba8: +-L_bn_sqr_comba8_begin: +- pushl %esi +- pushl %edi +- pushl %ebp +- pushl %ebx +- movl 20(%esp),%edi +- movl 24(%esp),%esi +- xorl %ebx,%ebx +- xorl %ecx,%ecx +- movl (%esi),%eax +- # ############### Calculate word 0 +- xorl %ebp,%ebp +- # sqr a[0]*a[0] +- mull %eax +- addl %eax,%ebx +- adcl %edx,%ecx +- movl (%esi),%edx +- adcl $0,%ebp +- movl %ebx,(%edi) +- movl 4(%esi),%eax +- # saved r[0] +- # ############### Calculate word 1 +- xorl %ebx,%ebx +- # sqr a[1]*a[0] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 8(%esi),%eax +- adcl $0,%ebx +- movl %ecx,4(%edi) +- movl (%esi),%edx +- # saved r[1] +- # ############### Calculate word 2 +- xorl %ecx,%ecx +- # sqr a[2]*a[0] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ecx +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 4(%esi),%eax +- adcl $0,%ecx +- # sqr a[1]*a[1] +- mull %eax +- addl %eax,%ebp +- adcl %edx,%ebx +- movl (%esi),%edx +- adcl $0,%ecx +- movl %ebp,8(%edi) +- movl 12(%esi),%eax +- # saved r[2] +- # ############### Calculate word 3 +- xorl %ebp,%ebp +- # sqr a[3]*a[0] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebp +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 8(%esi),%eax +- adcl $0,%ebp +- movl 4(%esi),%edx +- # sqr a[2]*a[1] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebp +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 16(%esi),%eax +- adcl $0,%ebp +- movl %ebx,12(%edi) +- movl (%esi),%edx +- # saved r[3] +- # ############### Calculate word 4 +- xorl %ebx,%ebx +- # sqr a[4]*a[0] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 12(%esi),%eax +- adcl $0,%ebx +- movl 4(%esi),%edx +- # sqr a[3]*a[1] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 8(%esi),%eax +- adcl $0,%ebx +- # sqr a[2]*a[2] +- mull %eax +- addl %eax,%ecx +- adcl %edx,%ebp +- movl (%esi),%edx +- adcl $0,%ebx +- movl %ecx,16(%edi) +- movl 20(%esi),%eax +- # saved r[4] +- # ############### Calculate word 5 +- xorl %ecx,%ecx +- # sqr a[5]*a[0] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ecx +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 16(%esi),%eax +- adcl $0,%ecx +- movl 4(%esi),%edx +- # sqr a[4]*a[1] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ecx +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 12(%esi),%eax +- adcl $0,%ecx +- movl 8(%esi),%edx +- # sqr a[3]*a[2] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ecx +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 24(%esi),%eax +- adcl $0,%ecx +- movl %ebp,20(%edi) +- movl (%esi),%edx +- # saved r[5] +- # ############### Calculate word 6 +- xorl %ebp,%ebp +- # sqr a[6]*a[0] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebp +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 20(%esi),%eax +- adcl $0,%ebp +- movl 4(%esi),%edx +- # sqr a[5]*a[1] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebp +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 16(%esi),%eax +- adcl $0,%ebp +- movl 8(%esi),%edx +- # sqr a[4]*a[2] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebp +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 12(%esi),%eax +- adcl $0,%ebp +- # sqr a[3]*a[3] +- mull %eax +- addl %eax,%ebx +- adcl %edx,%ecx +- movl (%esi),%edx +- adcl $0,%ebp +- movl %ebx,24(%edi) +- movl 28(%esi),%eax +- # saved r[6] +- # ############### Calculate word 7 +- xorl %ebx,%ebx +- # sqr a[7]*a[0] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 24(%esi),%eax +- adcl $0,%ebx +- movl 4(%esi),%edx +- # sqr a[6]*a[1] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 20(%esi),%eax +- adcl $0,%ebx +- movl 8(%esi),%edx +- # sqr a[5]*a[2] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 16(%esi),%eax +- adcl $0,%ebx +- movl 12(%esi),%edx +- # sqr a[4]*a[3] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 28(%esi),%eax +- adcl $0,%ebx +- movl %ecx,28(%edi) +- movl 4(%esi),%edx +- # saved r[7] +- # ############### Calculate word 8 +- xorl %ecx,%ecx +- # sqr a[7]*a[1] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ecx +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 24(%esi),%eax +- adcl $0,%ecx +- movl 8(%esi),%edx +- # sqr a[6]*a[2] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ecx +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 20(%esi),%eax +- adcl $0,%ecx +- movl 12(%esi),%edx +- # sqr a[5]*a[3] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ecx +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 16(%esi),%eax +- adcl $0,%ecx +- # sqr a[4]*a[4] +- mull %eax +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 8(%esi),%edx +- adcl $0,%ecx +- movl %ebp,32(%edi) +- movl 28(%esi),%eax +- # saved r[8] +- # ############### Calculate word 9 +- xorl %ebp,%ebp +- # sqr a[7]*a[2] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebp +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 24(%esi),%eax +- adcl $0,%ebp +- movl 12(%esi),%edx +- # sqr a[6]*a[3] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebp +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 20(%esi),%eax +- adcl $0,%ebp +- movl 16(%esi),%edx +- # sqr a[5]*a[4] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebp +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 28(%esi),%eax +- adcl $0,%ebp +- movl %ebx,36(%edi) +- movl 12(%esi),%edx +- # saved r[9] +- # ############### Calculate word 10 +- xorl %ebx,%ebx +- # sqr a[7]*a[3] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 24(%esi),%eax +- adcl $0,%ebx +- movl 16(%esi),%edx +- # sqr a[6]*a[4] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 20(%esi),%eax +- adcl $0,%ebx +- # sqr a[5]*a[5] +- mull %eax +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 16(%esi),%edx +- adcl $0,%ebx +- movl %ecx,40(%edi) +- movl 28(%esi),%eax +- # saved r[10] +- # ############### Calculate word 11 +- xorl %ecx,%ecx +- # sqr a[7]*a[4] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ecx +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 24(%esi),%eax +- adcl $0,%ecx +- movl 20(%esi),%edx +- # sqr a[6]*a[5] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ecx +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 28(%esi),%eax +- adcl $0,%ecx +- movl %ebp,44(%edi) +- movl 20(%esi),%edx +- # saved r[11] +- # ############### Calculate word 12 +- xorl %ebp,%ebp +- # sqr a[7]*a[5] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebp +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 24(%esi),%eax +- adcl $0,%ebp +- # sqr a[6]*a[6] +- mull %eax +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 24(%esi),%edx +- adcl $0,%ebp +- movl %ebx,48(%edi) +- movl 28(%esi),%eax +- # saved r[12] +- # ############### Calculate word 13 +- xorl %ebx,%ebx +- # sqr a[7]*a[6] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 28(%esi),%eax +- adcl $0,%ebx +- movl %ecx,52(%edi) +- # saved r[13] +- # ############### Calculate word 14 +- xorl %ecx,%ecx +- # sqr a[7]*a[7] +- mull %eax +- addl %eax,%ebp +- adcl %edx,%ebx +- adcl $0,%ecx +- movl %ebp,56(%edi) +- # saved r[14] +- movl %ebx,60(%edi) +- popl %ebx +- popl %ebp +- popl %edi +- popl %esi +- ret +-.globl _bn_sqr_comba4 +-.private_extern _bn_sqr_comba4 +-.align 4 +-_bn_sqr_comba4: +-L_bn_sqr_comba4_begin: +- pushl %esi +- pushl %edi +- pushl %ebp +- pushl %ebx +- movl 20(%esp),%edi +- movl 24(%esp),%esi +- xorl %ebx,%ebx +- xorl %ecx,%ecx +- movl (%esi),%eax +- # ############### Calculate word 0 +- xorl %ebp,%ebp +- # sqr a[0]*a[0] +- mull %eax +- addl %eax,%ebx +- adcl %edx,%ecx +- movl (%esi),%edx +- adcl $0,%ebp +- movl %ebx,(%edi) +- movl 4(%esi),%eax +- # saved r[0] +- # ############### Calculate word 1 +- xorl %ebx,%ebx +- # sqr a[1]*a[0] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 8(%esi),%eax +- adcl $0,%ebx +- movl %ecx,4(%edi) +- movl (%esi),%edx +- # saved r[1] +- # ############### Calculate word 2 +- xorl %ecx,%ecx +- # sqr a[2]*a[0] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ecx +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 4(%esi),%eax +- adcl $0,%ecx +- # sqr a[1]*a[1] +- mull %eax +- addl %eax,%ebp +- adcl %edx,%ebx +- movl (%esi),%edx +- adcl $0,%ecx +- movl %ebp,8(%edi) +- movl 12(%esi),%eax +- # saved r[2] +- # ############### Calculate word 3 +- xorl %ebp,%ebp +- # sqr a[3]*a[0] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebp +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 8(%esi),%eax +- adcl $0,%ebp +- movl 4(%esi),%edx +- # sqr a[2]*a[1] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebp +- addl %eax,%ebx +- adcl %edx,%ecx +- movl 12(%esi),%eax +- adcl $0,%ebp +- movl %ebx,12(%edi) +- movl 4(%esi),%edx +- # saved r[3] +- # ############### Calculate word 4 +- xorl %ebx,%ebx +- # sqr a[3]*a[1] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ebx +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 8(%esi),%eax +- adcl $0,%ebx +- # sqr a[2]*a[2] +- mull %eax +- addl %eax,%ecx +- adcl %edx,%ebp +- movl 8(%esi),%edx +- adcl $0,%ebx +- movl %ecx,16(%edi) +- movl 12(%esi),%eax +- # saved r[4] +- # ############### Calculate word 5 +- xorl %ecx,%ecx +- # sqr a[3]*a[2] +- mull %edx +- addl %eax,%eax +- adcl %edx,%edx +- adcl $0,%ecx +- addl %eax,%ebp +- adcl %edx,%ebx +- movl 12(%esi),%eax +- adcl $0,%ecx +- movl %ebp,20(%edi) +- # saved r[5] +- # ############### Calculate word 6 +- xorl %ebp,%ebp +- # sqr a[3]*a[3] +- mull %eax +- addl %eax,%ebx +- adcl %edx,%ecx +- adcl $0,%ebp +- movl %ebx,24(%edi) +- # saved r[6] +- movl %ecx,28(%edi) +- popl %ebx +- popl %ebp +- popl %edi +- popl %esi +- ret +-#endif +diff --git a/mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S b/mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S +deleted file mode 100644 +index 8656679..0000000 +--- a/mac-x86/crypto/fipsmodule/ghash-ssse3-x86.S ++++ /dev/null +@@ -1,289 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.globl _gcm_gmult_ssse3 +-.private_extern _gcm_gmult_ssse3 +-.align 4 +-_gcm_gmult_ssse3: +-L_gcm_gmult_ssse3_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 20(%esp),%edi +- movl 24(%esp),%esi +- movdqu (%edi),%xmm0 +- call L000pic_point +-L000pic_point: +- popl %eax +- movdqa Lreverse_bytes-L000pic_point(%eax),%xmm7 +- movdqa Llow4_mask-L000pic_point(%eax),%xmm2 +-.byte 102,15,56,0,199 +- movdqa %xmm2,%xmm1 +- pandn %xmm0,%xmm1 +- psrld $4,%xmm1 +- pand %xmm2,%xmm0 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- movl $5,%eax +-L001loop_row_1: +- movdqa (%esi),%xmm4 +- leal 16(%esi),%esi +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- pxor %xmm5,%xmm2 +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- subl $1,%eax +- jnz L001loop_row_1 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movl $5,%eax +-L002loop_row_2: +- movdqa (%esi),%xmm4 +- leal 16(%esi),%esi +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- pxor %xmm5,%xmm2 +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- subl $1,%eax +- jnz L002loop_row_2 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movl $6,%eax +-L003loop_row_3: +- movdqa (%esi),%xmm4 +- leal 16(%esi),%esi +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- pxor %xmm5,%xmm2 +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- subl $1,%eax +- jnz L003loop_row_3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +-.byte 102,15,56,0,215 +- movdqu %xmm2,(%edi) +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _gcm_ghash_ssse3 +-.private_extern _gcm_ghash_ssse3 +-.align 4 +-_gcm_ghash_ssse3: +-L_gcm_ghash_ssse3_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 20(%esp),%edi +- movl 24(%esp),%esi +- movl 28(%esp),%edx +- movl 32(%esp),%ecx +- movdqu (%edi),%xmm0 +- call L004pic_point +-L004pic_point: +- popl %ebx +- movdqa Lreverse_bytes-L004pic_point(%ebx),%xmm7 +- andl $-16,%ecx +-.byte 102,15,56,0,199 +- pxor %xmm3,%xmm3 +-L005loop_ghash: +- movdqa Llow4_mask-L004pic_point(%ebx),%xmm2 +- movdqu (%edx),%xmm1 +-.byte 102,15,56,0,207 +- pxor %xmm1,%xmm0 +- movdqa %xmm2,%xmm1 +- pandn %xmm0,%xmm1 +- psrld $4,%xmm1 +- pand %xmm2,%xmm0 +- pxor %xmm2,%xmm2 +- movl $5,%eax +-L006loop_row_4: +- movdqa (%esi),%xmm4 +- leal 16(%esi),%esi +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- pxor %xmm5,%xmm2 +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- subl $1,%eax +- jnz L006loop_row_4 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movl $5,%eax +-L007loop_row_5: +- movdqa (%esi),%xmm4 +- leal 16(%esi),%esi +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- pxor %xmm5,%xmm2 +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- subl $1,%eax +- jnz L007loop_row_5 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movl $6,%eax +-L008loop_row_6: +- movdqa (%esi),%xmm4 +- leal 16(%esi),%esi +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- pxor %xmm5,%xmm2 +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- subl $1,%eax +- jnz L008loop_row_6 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movdqa %xmm2,%xmm0 +- leal -256(%esi),%esi +- leal 16(%edx),%edx +- subl $16,%ecx +- jnz L005loop_ghash +-.byte 102,15,56,0,199 +- movdqu %xmm0,(%edi) +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.align 4,0x90 +-Lreverse_bytes: +-.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +-.align 4,0x90 +-Llow4_mask: +-.long 252645135,252645135,252645135,252645135 +-#endif +diff --git a/mac-x86/crypto/fipsmodule/ghash-x86.S b/mac-x86/crypto/fipsmodule/ghash-x86.S +deleted file mode 100644 +index c1e0d53..0000000 +--- a/mac-x86/crypto/fipsmodule/ghash-x86.S ++++ /dev/null +@@ -1,323 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.globl _gcm_init_clmul +-.private_extern _gcm_init_clmul +-.align 4 +-_gcm_init_clmul: +-L_gcm_init_clmul_begin: +- movl 4(%esp),%edx +- movl 8(%esp),%eax +- call L000pic +-L000pic: +- popl %ecx +- leal Lbswap-L000pic(%ecx),%ecx +- movdqu (%eax),%xmm2 +- pshufd $78,%xmm2,%xmm2 +- pshufd $255,%xmm2,%xmm4 +- movdqa %xmm2,%xmm3 +- psllq $1,%xmm2 +- pxor %xmm5,%xmm5 +- psrlq $63,%xmm3 +- pcmpgtd %xmm4,%xmm5 +- pslldq $8,%xmm3 +- por %xmm3,%xmm2 +- pand 16(%ecx),%xmm5 +- pxor %xmm5,%xmm2 +- movdqa %xmm2,%xmm0 +- movdqa %xmm0,%xmm1 +- pshufd $78,%xmm0,%xmm3 +- pshufd $78,%xmm2,%xmm4 +- pxor %xmm0,%xmm3 +- pxor %xmm2,%xmm4 +-.byte 102,15,58,68,194,0 +-.byte 102,15,58,68,202,17 +-.byte 102,15,58,68,220,0 +- xorps %xmm0,%xmm3 +- xorps %xmm1,%xmm3 +- movdqa %xmm3,%xmm4 +- psrldq $8,%xmm3 +- pslldq $8,%xmm4 +- pxor %xmm3,%xmm1 +- pxor %xmm4,%xmm0 +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm4,%xmm1 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +- pshufd $78,%xmm2,%xmm3 +- pshufd $78,%xmm0,%xmm4 +- pxor %xmm2,%xmm3 +- movdqu %xmm2,(%edx) +- pxor %xmm0,%xmm4 +- movdqu %xmm0,16(%edx) +-.byte 102,15,58,15,227,8 +- movdqu %xmm4,32(%edx) +- ret +-.globl _gcm_gmult_clmul +-.private_extern _gcm_gmult_clmul +-.align 4 +-_gcm_gmult_clmul: +-L_gcm_gmult_clmul_begin: +- movl 4(%esp),%eax +- movl 8(%esp),%edx +- call L001pic +-L001pic: +- popl %ecx +- leal Lbswap-L001pic(%ecx),%ecx +- movdqu (%eax),%xmm0 +- movdqa (%ecx),%xmm5 +- movups (%edx),%xmm2 +-.byte 102,15,56,0,197 +- movups 32(%edx),%xmm4 +- movdqa %xmm0,%xmm1 +- pshufd $78,%xmm0,%xmm3 +- pxor %xmm0,%xmm3 +-.byte 102,15,58,68,194,0 +-.byte 102,15,58,68,202,17 +-.byte 102,15,58,68,220,0 +- xorps %xmm0,%xmm3 +- xorps %xmm1,%xmm3 +- movdqa %xmm3,%xmm4 +- psrldq $8,%xmm3 +- pslldq $8,%xmm4 +- pxor %xmm3,%xmm1 +- pxor %xmm4,%xmm0 +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm4,%xmm1 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +-.byte 102,15,56,0,197 +- movdqu %xmm0,(%eax) +- ret +-.globl _gcm_ghash_clmul +-.private_extern _gcm_ghash_clmul +-.align 4 +-_gcm_ghash_clmul: +-L_gcm_ghash_clmul_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 20(%esp),%eax +- movl 24(%esp),%edx +- movl 28(%esp),%esi +- movl 32(%esp),%ebx +- call L002pic +-L002pic: +- popl %ecx +- leal Lbswap-L002pic(%ecx),%ecx +- movdqu (%eax),%xmm0 +- movdqa (%ecx),%xmm5 +- movdqu (%edx),%xmm2 +-.byte 102,15,56,0,197 +- subl $16,%ebx +- jz L003odd_tail +- movdqu (%esi),%xmm3 +- movdqu 16(%esi),%xmm6 +-.byte 102,15,56,0,221 +-.byte 102,15,56,0,245 +- movdqu 32(%edx),%xmm5 +- pxor %xmm3,%xmm0 +- pshufd $78,%xmm6,%xmm3 +- movdqa %xmm6,%xmm7 +- pxor %xmm6,%xmm3 +- leal 32(%esi),%esi +-.byte 102,15,58,68,242,0 +-.byte 102,15,58,68,250,17 +-.byte 102,15,58,68,221,0 +- movups 16(%edx),%xmm2 +- nop +- subl $32,%ebx +- jbe L004even_tail +- jmp L005mod_loop +-.align 5,0x90 +-L005mod_loop: +- pshufd $78,%xmm0,%xmm4 +- movdqa %xmm0,%xmm1 +- pxor %xmm0,%xmm4 +- nop +-.byte 102,15,58,68,194,0 +-.byte 102,15,58,68,202,17 +-.byte 102,15,58,68,229,16 +- movups (%edx),%xmm2 +- xorps %xmm6,%xmm0 +- movdqa (%ecx),%xmm5 +- xorps %xmm7,%xmm1 +- movdqu (%esi),%xmm7 +- pxor %xmm0,%xmm3 +- movdqu 16(%esi),%xmm6 +- pxor %xmm1,%xmm3 +-.byte 102,15,56,0,253 +- pxor %xmm3,%xmm4 +- movdqa %xmm4,%xmm3 +- psrldq $8,%xmm4 +- pslldq $8,%xmm3 +- pxor %xmm4,%xmm1 +- pxor %xmm3,%xmm0 +-.byte 102,15,56,0,245 +- pxor %xmm7,%xmm1 +- movdqa %xmm6,%xmm7 +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +-.byte 102,15,58,68,242,0 +- movups 32(%edx),%xmm5 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- pshufd $78,%xmm7,%xmm3 +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm7,%xmm3 +- pxor %xmm4,%xmm1 +-.byte 102,15,58,68,250,17 +- movups 16(%edx),%xmm2 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +-.byte 102,15,58,68,221,0 +- leal 32(%esi),%esi +- subl $32,%ebx +- ja L005mod_loop +-L004even_tail: +- pshufd $78,%xmm0,%xmm4 +- movdqa %xmm0,%xmm1 +- pxor %xmm0,%xmm4 +-.byte 102,15,58,68,194,0 +-.byte 102,15,58,68,202,17 +-.byte 102,15,58,68,229,16 +- movdqa (%ecx),%xmm5 +- xorps %xmm6,%xmm0 +- xorps %xmm7,%xmm1 +- pxor %xmm0,%xmm3 +- pxor %xmm1,%xmm3 +- pxor %xmm3,%xmm4 +- movdqa %xmm4,%xmm3 +- psrldq $8,%xmm4 +- pslldq $8,%xmm3 +- pxor %xmm4,%xmm1 +- pxor %xmm3,%xmm0 +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm4,%xmm1 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +- testl %ebx,%ebx +- jnz L006done +- movups (%edx),%xmm2 +-L003odd_tail: +- movdqu (%esi),%xmm3 +-.byte 102,15,56,0,221 +- pxor %xmm3,%xmm0 +- movdqa %xmm0,%xmm1 +- pshufd $78,%xmm0,%xmm3 +- pshufd $78,%xmm2,%xmm4 +- pxor %xmm0,%xmm3 +- pxor %xmm2,%xmm4 +-.byte 102,15,58,68,194,0 +-.byte 102,15,58,68,202,17 +-.byte 102,15,58,68,220,0 +- xorps %xmm0,%xmm3 +- xorps %xmm1,%xmm3 +- movdqa %xmm3,%xmm4 +- psrldq $8,%xmm3 +- pslldq $8,%xmm4 +- pxor %xmm3,%xmm1 +- pxor %xmm4,%xmm0 +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm4,%xmm1 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +-L006done: +-.byte 102,15,56,0,197 +- movdqu %xmm0,(%eax) +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.align 6,0x90 +-Lbswap: +-.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +-.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 +-.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 +-.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 +-.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 +-.byte 0 +-#endif +diff --git a/mac-x86/crypto/fipsmodule/md5-586.S b/mac-x86/crypto/fipsmodule/md5-586.S +deleted file mode 100644 +index f4c4b50..0000000 +--- a/mac-x86/crypto/fipsmodule/md5-586.S ++++ /dev/null +@@ -1,685 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.globl _md5_block_asm_data_order +-.private_extern _md5_block_asm_data_order +-.align 4 +-_md5_block_asm_data_order: +-L_md5_block_asm_data_order_begin: +- pushl %esi +- pushl %edi +- movl 12(%esp),%edi +- movl 16(%esp),%esi +- movl 20(%esp),%ecx +- pushl %ebp +- shll $6,%ecx +- pushl %ebx +- addl %esi,%ecx +- subl $64,%ecx +- movl (%edi),%eax +- pushl %ecx +- movl 4(%edi),%ebx +- movl 8(%edi),%ecx +- movl 12(%edi),%edx +-L000start: +- +- # R0 section +- movl %ecx,%edi +- movl (%esi),%ebp +- # R0 0 +- xorl %edx,%edi +- andl %ebx,%edi +- leal 3614090360(%eax,%ebp,1),%eax +- xorl %edx,%edi +- addl %edi,%eax +- movl %ebx,%edi +- roll $7,%eax +- movl 4(%esi),%ebp +- addl %ebx,%eax +- # R0 1 +- xorl %ecx,%edi +- andl %eax,%edi +- leal 3905402710(%edx,%ebp,1),%edx +- xorl %ecx,%edi +- addl %edi,%edx +- movl %eax,%edi +- roll $12,%edx +- movl 8(%esi),%ebp +- addl %eax,%edx +- # R0 2 +- xorl %ebx,%edi +- andl %edx,%edi +- leal 606105819(%ecx,%ebp,1),%ecx +- xorl %ebx,%edi +- addl %edi,%ecx +- movl %edx,%edi +- roll $17,%ecx +- movl 12(%esi),%ebp +- addl %edx,%ecx +- # R0 3 +- xorl %eax,%edi +- andl %ecx,%edi +- leal 3250441966(%ebx,%ebp,1),%ebx +- xorl %eax,%edi +- addl %edi,%ebx +- movl %ecx,%edi +- roll $22,%ebx +- movl 16(%esi),%ebp +- addl %ecx,%ebx +- # R0 4 +- xorl %edx,%edi +- andl %ebx,%edi +- leal 4118548399(%eax,%ebp,1),%eax +- xorl %edx,%edi +- addl %edi,%eax +- movl %ebx,%edi +- roll $7,%eax +- movl 20(%esi),%ebp +- addl %ebx,%eax +- # R0 5 +- xorl %ecx,%edi +- andl %eax,%edi +- leal 1200080426(%edx,%ebp,1),%edx +- xorl %ecx,%edi +- addl %edi,%edx +- movl %eax,%edi +- roll $12,%edx +- movl 24(%esi),%ebp +- addl %eax,%edx +- # R0 6 +- xorl %ebx,%edi +- andl %edx,%edi +- leal 2821735955(%ecx,%ebp,1),%ecx +- xorl %ebx,%edi +- addl %edi,%ecx +- movl %edx,%edi +- roll $17,%ecx +- movl 28(%esi),%ebp +- addl %edx,%ecx +- # R0 7 +- xorl %eax,%edi +- andl %ecx,%edi +- leal 4249261313(%ebx,%ebp,1),%ebx +- xorl %eax,%edi +- addl %edi,%ebx +- movl %ecx,%edi +- roll $22,%ebx +- movl 32(%esi),%ebp +- addl %ecx,%ebx +- # R0 8 +- xorl %edx,%edi +- andl %ebx,%edi +- leal 1770035416(%eax,%ebp,1),%eax +- xorl %edx,%edi +- addl %edi,%eax +- movl %ebx,%edi +- roll $7,%eax +- movl 36(%esi),%ebp +- addl %ebx,%eax +- # R0 9 +- xorl %ecx,%edi +- andl %eax,%edi +- leal 2336552879(%edx,%ebp,1),%edx +- xorl %ecx,%edi +- addl %edi,%edx +- movl %eax,%edi +- roll $12,%edx +- movl 40(%esi),%ebp +- addl %eax,%edx +- # R0 10 +- xorl %ebx,%edi +- andl %edx,%edi +- leal 4294925233(%ecx,%ebp,1),%ecx +- xorl %ebx,%edi +- addl %edi,%ecx +- movl %edx,%edi +- roll $17,%ecx +- movl 44(%esi),%ebp +- addl %edx,%ecx +- # R0 11 +- xorl %eax,%edi +- andl %ecx,%edi +- leal 2304563134(%ebx,%ebp,1),%ebx +- xorl %eax,%edi +- addl %edi,%ebx +- movl %ecx,%edi +- roll $22,%ebx +- movl 48(%esi),%ebp +- addl %ecx,%ebx +- # R0 12 +- xorl %edx,%edi +- andl %ebx,%edi +- leal 1804603682(%eax,%ebp,1),%eax +- xorl %edx,%edi +- addl %edi,%eax +- movl %ebx,%edi +- roll $7,%eax +- movl 52(%esi),%ebp +- addl %ebx,%eax +- # R0 13 +- xorl %ecx,%edi +- andl %eax,%edi +- leal 4254626195(%edx,%ebp,1),%edx +- xorl %ecx,%edi +- addl %edi,%edx +- movl %eax,%edi +- roll $12,%edx +- movl 56(%esi),%ebp +- addl %eax,%edx +- # R0 14 +- xorl %ebx,%edi +- andl %edx,%edi +- leal 2792965006(%ecx,%ebp,1),%ecx +- xorl %ebx,%edi +- addl %edi,%ecx +- movl %edx,%edi +- roll $17,%ecx +- movl 60(%esi),%ebp +- addl %edx,%ecx +- # R0 15 +- xorl %eax,%edi +- andl %ecx,%edi +- leal 1236535329(%ebx,%ebp,1),%ebx +- xorl %eax,%edi +- addl %edi,%ebx +- movl %ecx,%edi +- roll $22,%ebx +- movl 4(%esi),%ebp +- addl %ecx,%ebx +- +- # R1 section +- # R1 16 +- leal 4129170786(%eax,%ebp,1),%eax +- xorl %ebx,%edi +- andl %edx,%edi +- movl 24(%esi),%ebp +- xorl %ecx,%edi +- addl %edi,%eax +- movl %ebx,%edi +- roll $5,%eax +- addl %ebx,%eax +- # R1 17 +- leal 3225465664(%edx,%ebp,1),%edx +- xorl %eax,%edi +- andl %ecx,%edi +- movl 44(%esi),%ebp +- xorl %ebx,%edi +- addl %edi,%edx +- movl %eax,%edi +- roll $9,%edx +- addl %eax,%edx +- # R1 18 +- leal 643717713(%ecx,%ebp,1),%ecx +- xorl %edx,%edi +- andl %ebx,%edi +- movl (%esi),%ebp +- xorl %eax,%edi +- addl %edi,%ecx +- movl %edx,%edi +- roll $14,%ecx +- addl %edx,%ecx +- # R1 19 +- leal 3921069994(%ebx,%ebp,1),%ebx +- xorl %ecx,%edi +- andl %eax,%edi +- movl 20(%esi),%ebp +- xorl %edx,%edi +- addl %edi,%ebx +- movl %ecx,%edi +- roll $20,%ebx +- addl %ecx,%ebx +- # R1 20 +- leal 3593408605(%eax,%ebp,1),%eax +- xorl %ebx,%edi +- andl %edx,%edi +- movl 40(%esi),%ebp +- xorl %ecx,%edi +- addl %edi,%eax +- movl %ebx,%edi +- roll $5,%eax +- addl %ebx,%eax +- # R1 21 +- leal 38016083(%edx,%ebp,1),%edx +- xorl %eax,%edi +- andl %ecx,%edi +- movl 60(%esi),%ebp +- xorl %ebx,%edi +- addl %edi,%edx +- movl %eax,%edi +- roll $9,%edx +- addl %eax,%edx +- # R1 22 +- leal 3634488961(%ecx,%ebp,1),%ecx +- xorl %edx,%edi +- andl %ebx,%edi +- movl 16(%esi),%ebp +- xorl %eax,%edi +- addl %edi,%ecx +- movl %edx,%edi +- roll $14,%ecx +- addl %edx,%ecx +- # R1 23 +- leal 3889429448(%ebx,%ebp,1),%ebx +- xorl %ecx,%edi +- andl %eax,%edi +- movl 36(%esi),%ebp +- xorl %edx,%edi +- addl %edi,%ebx +- movl %ecx,%edi +- roll $20,%ebx +- addl %ecx,%ebx +- # R1 24 +- leal 568446438(%eax,%ebp,1),%eax +- xorl %ebx,%edi +- andl %edx,%edi +- movl 56(%esi),%ebp +- xorl %ecx,%edi +- addl %edi,%eax +- movl %ebx,%edi +- roll $5,%eax +- addl %ebx,%eax +- # R1 25 +- leal 3275163606(%edx,%ebp,1),%edx +- xorl %eax,%edi +- andl %ecx,%edi +- movl 12(%esi),%ebp +- xorl %ebx,%edi +- addl %edi,%edx +- movl %eax,%edi +- roll $9,%edx +- addl %eax,%edx +- # R1 26 +- leal 4107603335(%ecx,%ebp,1),%ecx +- xorl %edx,%edi +- andl %ebx,%edi +- movl 32(%esi),%ebp +- xorl %eax,%edi +- addl %edi,%ecx +- movl %edx,%edi +- roll $14,%ecx +- addl %edx,%ecx +- # R1 27 +- leal 1163531501(%ebx,%ebp,1),%ebx +- xorl %ecx,%edi +- andl %eax,%edi +- movl 52(%esi),%ebp +- xorl %edx,%edi +- addl %edi,%ebx +- movl %ecx,%edi +- roll $20,%ebx +- addl %ecx,%ebx +- # R1 28 +- leal 2850285829(%eax,%ebp,1),%eax +- xorl %ebx,%edi +- andl %edx,%edi +- movl 8(%esi),%ebp +- xorl %ecx,%edi +- addl %edi,%eax +- movl %ebx,%edi +- roll $5,%eax +- addl %ebx,%eax +- # R1 29 +- leal 4243563512(%edx,%ebp,1),%edx +- xorl %eax,%edi +- andl %ecx,%edi +- movl 28(%esi),%ebp +- xorl %ebx,%edi +- addl %edi,%edx +- movl %eax,%edi +- roll $9,%edx +- addl %eax,%edx +- # R1 30 +- leal 1735328473(%ecx,%ebp,1),%ecx +- xorl %edx,%edi +- andl %ebx,%edi +- movl 48(%esi),%ebp +- xorl %eax,%edi +- addl %edi,%ecx +- movl %edx,%edi +- roll $14,%ecx +- addl %edx,%ecx +- # R1 31 +- leal 2368359562(%ebx,%ebp,1),%ebx +- xorl %ecx,%edi +- andl %eax,%edi +- movl 20(%esi),%ebp +- xorl %edx,%edi +- addl %edi,%ebx +- movl %ecx,%edi +- roll $20,%ebx +- addl %ecx,%ebx +- +- # R2 section +- # R2 32 +- xorl %edx,%edi +- xorl %ebx,%edi +- leal 4294588738(%eax,%ebp,1),%eax +- addl %edi,%eax +- roll $4,%eax +- movl 32(%esi),%ebp +- movl %ebx,%edi +- # R2 33 +- leal 2272392833(%edx,%ebp,1),%edx +- addl %ebx,%eax +- xorl %ecx,%edi +- xorl %eax,%edi +- movl 44(%esi),%ebp +- addl %edi,%edx +- movl %eax,%edi +- roll $11,%edx +- addl %eax,%edx +- # R2 34 +- xorl %ebx,%edi +- xorl %edx,%edi +- leal 1839030562(%ecx,%ebp,1),%ecx +- addl %edi,%ecx +- roll $16,%ecx +- movl 56(%esi),%ebp +- movl %edx,%edi +- # R2 35 +- leal 4259657740(%ebx,%ebp,1),%ebx +- addl %edx,%ecx +- xorl %eax,%edi +- xorl %ecx,%edi +- movl 4(%esi),%ebp +- addl %edi,%ebx +- movl %ecx,%edi +- roll $23,%ebx +- addl %ecx,%ebx +- # R2 36 +- xorl %edx,%edi +- xorl %ebx,%edi +- leal 2763975236(%eax,%ebp,1),%eax +- addl %edi,%eax +- roll $4,%eax +- movl 16(%esi),%ebp +- movl %ebx,%edi +- # R2 37 +- leal 1272893353(%edx,%ebp,1),%edx +- addl %ebx,%eax +- xorl %ecx,%edi +- xorl %eax,%edi +- movl 28(%esi),%ebp +- addl %edi,%edx +- movl %eax,%edi +- roll $11,%edx +- addl %eax,%edx +- # R2 38 +- xorl %ebx,%edi +- xorl %edx,%edi +- leal 4139469664(%ecx,%ebp,1),%ecx +- addl %edi,%ecx +- roll $16,%ecx +- movl 40(%esi),%ebp +- movl %edx,%edi +- # R2 39 +- leal 3200236656(%ebx,%ebp,1),%ebx +- addl %edx,%ecx +- xorl %eax,%edi +- xorl %ecx,%edi +- movl 52(%esi),%ebp +- addl %edi,%ebx +- movl %ecx,%edi +- roll $23,%ebx +- addl %ecx,%ebx +- # R2 40 +- xorl %edx,%edi +- xorl %ebx,%edi +- leal 681279174(%eax,%ebp,1),%eax +- addl %edi,%eax +- roll $4,%eax +- movl (%esi),%ebp +- movl %ebx,%edi +- # R2 41 +- leal 3936430074(%edx,%ebp,1),%edx +- addl %ebx,%eax +- xorl %ecx,%edi +- xorl %eax,%edi +- movl 12(%esi),%ebp +- addl %edi,%edx +- movl %eax,%edi +- roll $11,%edx +- addl %eax,%edx +- # R2 42 +- xorl %ebx,%edi +- xorl %edx,%edi +- leal 3572445317(%ecx,%ebp,1),%ecx +- addl %edi,%ecx +- roll $16,%ecx +- movl 24(%esi),%ebp +- movl %edx,%edi +- # R2 43 +- leal 76029189(%ebx,%ebp,1),%ebx +- addl %edx,%ecx +- xorl %eax,%edi +- xorl %ecx,%edi +- movl 36(%esi),%ebp +- addl %edi,%ebx +- movl %ecx,%edi +- roll $23,%ebx +- addl %ecx,%ebx +- # R2 44 +- xorl %edx,%edi +- xorl %ebx,%edi +- leal 3654602809(%eax,%ebp,1),%eax +- addl %edi,%eax +- roll $4,%eax +- movl 48(%esi),%ebp +- movl %ebx,%edi +- # R2 45 +- leal 3873151461(%edx,%ebp,1),%edx +- addl %ebx,%eax +- xorl %ecx,%edi +- xorl %eax,%edi +- movl 60(%esi),%ebp +- addl %edi,%edx +- movl %eax,%edi +- roll $11,%edx +- addl %eax,%edx +- # R2 46 +- xorl %ebx,%edi +- xorl %edx,%edi +- leal 530742520(%ecx,%ebp,1),%ecx +- addl %edi,%ecx +- roll $16,%ecx +- movl 8(%esi),%ebp +- movl %edx,%edi +- # R2 47 +- leal 3299628645(%ebx,%ebp,1),%ebx +- addl %edx,%ecx +- xorl %eax,%edi +- xorl %ecx,%edi +- movl (%esi),%ebp +- addl %edi,%ebx +- movl $-1,%edi +- roll $23,%ebx +- addl %ecx,%ebx +- +- # R3 section +- # R3 48 +- xorl %edx,%edi +- orl %ebx,%edi +- leal 4096336452(%eax,%ebp,1),%eax +- xorl %ecx,%edi +- movl 28(%esi),%ebp +- addl %edi,%eax +- movl $-1,%edi +- roll $6,%eax +- xorl %ecx,%edi +- addl %ebx,%eax +- # R3 49 +- orl %eax,%edi +- leal 1126891415(%edx,%ebp,1),%edx +- xorl %ebx,%edi +- movl 56(%esi),%ebp +- addl %edi,%edx +- movl $-1,%edi +- roll $10,%edx +- xorl %ebx,%edi +- addl %eax,%edx +- # R3 50 +- orl %edx,%edi +- leal 2878612391(%ecx,%ebp,1),%ecx +- xorl %eax,%edi +- movl 20(%esi),%ebp +- addl %edi,%ecx +- movl $-1,%edi +- roll $15,%ecx +- xorl %eax,%edi +- addl %edx,%ecx +- # R3 51 +- orl %ecx,%edi +- leal 4237533241(%ebx,%ebp,1),%ebx +- xorl %edx,%edi +- movl 48(%esi),%ebp +- addl %edi,%ebx +- movl $-1,%edi +- roll $21,%ebx +- xorl %edx,%edi +- addl %ecx,%ebx +- # R3 52 +- orl %ebx,%edi +- leal 1700485571(%eax,%ebp,1),%eax +- xorl %ecx,%edi +- movl 12(%esi),%ebp +- addl %edi,%eax +- movl $-1,%edi +- roll $6,%eax +- xorl %ecx,%edi +- addl %ebx,%eax +- # R3 53 +- orl %eax,%edi +- leal 2399980690(%edx,%ebp,1),%edx +- xorl %ebx,%edi +- movl 40(%esi),%ebp +- addl %edi,%edx +- movl $-1,%edi +- roll $10,%edx +- xorl %ebx,%edi +- addl %eax,%edx +- # R3 54 +- orl %edx,%edi +- leal 4293915773(%ecx,%ebp,1),%ecx +- xorl %eax,%edi +- movl 4(%esi),%ebp +- addl %edi,%ecx +- movl $-1,%edi +- roll $15,%ecx +- xorl %eax,%edi +- addl %edx,%ecx +- # R3 55 +- orl %ecx,%edi +- leal 2240044497(%ebx,%ebp,1),%ebx +- xorl %edx,%edi +- movl 32(%esi),%ebp +- addl %edi,%ebx +- movl $-1,%edi +- roll $21,%ebx +- xorl %edx,%edi +- addl %ecx,%ebx +- # R3 56 +- orl %ebx,%edi +- leal 1873313359(%eax,%ebp,1),%eax +- xorl %ecx,%edi +- movl 60(%esi),%ebp +- addl %edi,%eax +- movl $-1,%edi +- roll $6,%eax +- xorl %ecx,%edi +- addl %ebx,%eax +- # R3 57 +- orl %eax,%edi +- leal 4264355552(%edx,%ebp,1),%edx +- xorl %ebx,%edi +- movl 24(%esi),%ebp +- addl %edi,%edx +- movl $-1,%edi +- roll $10,%edx +- xorl %ebx,%edi +- addl %eax,%edx +- # R3 58 +- orl %edx,%edi +- leal 2734768916(%ecx,%ebp,1),%ecx +- xorl %eax,%edi +- movl 52(%esi),%ebp +- addl %edi,%ecx +- movl $-1,%edi +- roll $15,%ecx +- xorl %eax,%edi +- addl %edx,%ecx +- # R3 59 +- orl %ecx,%edi +- leal 1309151649(%ebx,%ebp,1),%ebx +- xorl %edx,%edi +- movl 16(%esi),%ebp +- addl %edi,%ebx +- movl $-1,%edi +- roll $21,%ebx +- xorl %edx,%edi +- addl %ecx,%ebx +- # R3 60 +- orl %ebx,%edi +- leal 4149444226(%eax,%ebp,1),%eax +- xorl %ecx,%edi +- movl 44(%esi),%ebp +- addl %edi,%eax +- movl $-1,%edi +- roll $6,%eax +- xorl %ecx,%edi +- addl %ebx,%eax +- # R3 61 +- orl %eax,%edi +- leal 3174756917(%edx,%ebp,1),%edx +- xorl %ebx,%edi +- movl 8(%esi),%ebp +- addl %edi,%edx +- movl $-1,%edi +- roll $10,%edx +- xorl %ebx,%edi +- addl %eax,%edx +- # R3 62 +- orl %edx,%edi +- leal 718787259(%ecx,%ebp,1),%ecx +- xorl %eax,%edi +- movl 36(%esi),%ebp +- addl %edi,%ecx +- movl $-1,%edi +- roll $15,%ecx +- xorl %eax,%edi +- addl %edx,%ecx +- # R3 63 +- orl %ecx,%edi +- leal 3951481745(%ebx,%ebp,1),%ebx +- xorl %edx,%edi +- movl 24(%esp),%ebp +- addl %edi,%ebx +- addl $64,%esi +- roll $21,%ebx +- movl (%ebp),%edi +- addl %ecx,%ebx +- addl %edi,%eax +- movl 4(%ebp),%edi +- addl %edi,%ebx +- movl 8(%ebp),%edi +- addl %edi,%ecx +- movl 12(%ebp),%edi +- addl %edi,%edx +- movl %eax,(%ebp) +- movl %ebx,4(%ebp) +- movl (%esp),%edi +- movl %ecx,8(%ebp) +- movl %edx,12(%ebp) +- cmpl %esi,%edi +- jae L000start +- popl %eax +- popl %ebx +- popl %ebp +- popl %edi +- popl %esi +- ret +-#endif +diff --git a/mac-x86/crypto/fipsmodule/sha1-586.S b/mac-x86/crypto/fipsmodule/sha1-586.S +deleted file mode 100644 +index 3213a62..0000000 +--- a/mac-x86/crypto/fipsmodule/sha1-586.S ++++ /dev/null +@@ -1,3805 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.globl _sha1_block_data_order +-.private_extern _sha1_block_data_order +-.align 4 +-_sha1_block_data_order: +-L_sha1_block_data_order_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- call L000pic_point +-L000pic_point: +- popl %ebp +- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L000pic_point(%ebp),%esi +- leal LK_XX_XX-L000pic_point(%ebp),%ebp +- movl (%esi),%eax +- movl 4(%esi),%edx +- testl $512,%edx +- jz L001x86 +- movl 8(%esi),%ecx +- testl $16777216,%eax +- jz L001x86 +- andl $268435456,%edx +- andl $1073741824,%eax +- orl %edx,%eax +- cmpl $1342177280,%eax +- je Lavx_shortcut +- jmp Lssse3_shortcut +-.align 4,0x90 +-L001x86: +- movl 20(%esp),%ebp +- movl 24(%esp),%esi +- movl 28(%esp),%eax +- subl $76,%esp +- shll $6,%eax +- addl %esi,%eax +- movl %eax,104(%esp) +- movl 16(%ebp),%edi +- jmp L002loop +-.align 4,0x90 +-L002loop: +- movl (%esi),%eax +- movl 4(%esi),%ebx +- movl 8(%esi),%ecx +- movl 12(%esi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- movl %eax,(%esp) +- movl %ebx,4(%esp) +- movl %ecx,8(%esp) +- movl %edx,12(%esp) +- movl 16(%esi),%eax +- movl 20(%esi),%ebx +- movl 24(%esi),%ecx +- movl 28(%esi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- movl %eax,16(%esp) +- movl %ebx,20(%esp) +- movl %ecx,24(%esp) +- movl %edx,28(%esp) +- movl 32(%esi),%eax +- movl 36(%esi),%ebx +- movl 40(%esi),%ecx +- movl 44(%esi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- movl %eax,32(%esp) +- movl %ebx,36(%esp) +- movl %ecx,40(%esp) +- movl %edx,44(%esp) +- movl 48(%esi),%eax +- movl 52(%esi),%ebx +- movl 56(%esi),%ecx +- movl 60(%esi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- movl %eax,48(%esp) +- movl %ebx,52(%esp) +- movl %ecx,56(%esp) +- movl %edx,60(%esp) +- movl %esi,100(%esp) +- movl (%ebp),%eax +- movl 4(%ebp),%ebx +- movl 8(%ebp),%ecx +- movl 12(%ebp),%edx +- # 00_15 0 +- movl %ecx,%esi +- movl %eax,%ebp +- roll $5,%ebp +- xorl %edx,%esi +- addl %edi,%ebp +- movl (%esp),%edi +- andl %ebx,%esi +- rorl $2,%ebx +- xorl %edx,%esi +- leal 1518500249(%ebp,%edi,1),%ebp +- addl %esi,%ebp +- # 00_15 1 +- movl %ebx,%edi +- movl %ebp,%esi +- roll $5,%ebp +- xorl %ecx,%edi +- addl %edx,%ebp +- movl 4(%esp),%edx +- andl %eax,%edi +- rorl $2,%eax +- xorl %ecx,%edi +- leal 1518500249(%ebp,%edx,1),%ebp +- addl %edi,%ebp +- # 00_15 2 +- movl %eax,%edx +- movl %ebp,%edi +- roll $5,%ebp +- xorl %ebx,%edx +- addl %ecx,%ebp +- movl 8(%esp),%ecx +- andl %esi,%edx +- rorl $2,%esi +- xorl %ebx,%edx +- leal 1518500249(%ebp,%ecx,1),%ebp +- addl %edx,%ebp +- # 00_15 3 +- movl %esi,%ecx +- movl %ebp,%edx +- roll $5,%ebp +- xorl %eax,%ecx +- addl %ebx,%ebp +- movl 12(%esp),%ebx +- andl %edi,%ecx +- rorl $2,%edi +- xorl %eax,%ecx +- leal 1518500249(%ebp,%ebx,1),%ebp +- addl %ecx,%ebp +- # 00_15 4 +- movl %edi,%ebx +- movl %ebp,%ecx +- roll $5,%ebp +- xorl %esi,%ebx +- addl %eax,%ebp +- movl 16(%esp),%eax +- andl %edx,%ebx +- rorl $2,%edx +- xorl %esi,%ebx +- leal 1518500249(%ebp,%eax,1),%ebp +- addl %ebx,%ebp +- # 00_15 5 +- movl %edx,%eax +- movl %ebp,%ebx +- roll $5,%ebp +- xorl %edi,%eax +- addl %esi,%ebp +- movl 20(%esp),%esi +- andl %ecx,%eax +- rorl $2,%ecx +- xorl %edi,%eax +- leal 1518500249(%ebp,%esi,1),%ebp +- addl %eax,%ebp +- # 00_15 6 +- movl %ecx,%esi +- movl %ebp,%eax +- roll $5,%ebp +- xorl %edx,%esi +- addl %edi,%ebp +- movl 24(%esp),%edi +- andl %ebx,%esi +- rorl $2,%ebx +- xorl %edx,%esi +- leal 1518500249(%ebp,%edi,1),%ebp +- addl %esi,%ebp +- # 00_15 7 +- movl %ebx,%edi +- movl %ebp,%esi +- roll $5,%ebp +- xorl %ecx,%edi +- addl %edx,%ebp +- movl 28(%esp),%edx +- andl %eax,%edi +- rorl $2,%eax +- xorl %ecx,%edi +- leal 1518500249(%ebp,%edx,1),%ebp +- addl %edi,%ebp +- # 00_15 8 +- movl %eax,%edx +- movl %ebp,%edi +- roll $5,%ebp +- xorl %ebx,%edx +- addl %ecx,%ebp +- movl 32(%esp),%ecx +- andl %esi,%edx +- rorl $2,%esi +- xorl %ebx,%edx +- leal 1518500249(%ebp,%ecx,1),%ebp +- addl %edx,%ebp +- # 00_15 9 +- movl %esi,%ecx +- movl %ebp,%edx +- roll $5,%ebp +- xorl %eax,%ecx +- addl %ebx,%ebp +- movl 36(%esp),%ebx +- andl %edi,%ecx +- rorl $2,%edi +- xorl %eax,%ecx +- leal 1518500249(%ebp,%ebx,1),%ebp +- addl %ecx,%ebp +- # 00_15 10 +- movl %edi,%ebx +- movl %ebp,%ecx +- roll $5,%ebp +- xorl %esi,%ebx +- addl %eax,%ebp +- movl 40(%esp),%eax +- andl %edx,%ebx +- rorl $2,%edx +- xorl %esi,%ebx +- leal 1518500249(%ebp,%eax,1),%ebp +- addl %ebx,%ebp +- # 00_15 11 +- movl %edx,%eax +- movl %ebp,%ebx +- roll $5,%ebp +- xorl %edi,%eax +- addl %esi,%ebp +- movl 44(%esp),%esi +- andl %ecx,%eax +- rorl $2,%ecx +- xorl %edi,%eax +- leal 1518500249(%ebp,%esi,1),%ebp +- addl %eax,%ebp +- # 00_15 12 +- movl %ecx,%esi +- movl %ebp,%eax +- roll $5,%ebp +- xorl %edx,%esi +- addl %edi,%ebp +- movl 48(%esp),%edi +- andl %ebx,%esi +- rorl $2,%ebx +- xorl %edx,%esi +- leal 1518500249(%ebp,%edi,1),%ebp +- addl %esi,%ebp +- # 00_15 13 +- movl %ebx,%edi +- movl %ebp,%esi +- roll $5,%ebp +- xorl %ecx,%edi +- addl %edx,%ebp +- movl 52(%esp),%edx +- andl %eax,%edi +- rorl $2,%eax +- xorl %ecx,%edi +- leal 1518500249(%ebp,%edx,1),%ebp +- addl %edi,%ebp +- # 00_15 14 +- movl %eax,%edx +- movl %ebp,%edi +- roll $5,%ebp +- xorl %ebx,%edx +- addl %ecx,%ebp +- movl 56(%esp),%ecx +- andl %esi,%edx +- rorl $2,%esi +- xorl %ebx,%edx +- leal 1518500249(%ebp,%ecx,1),%ebp +- addl %edx,%ebp +- # 00_15 15 +- movl %esi,%ecx +- movl %ebp,%edx +- roll $5,%ebp +- xorl %eax,%ecx +- addl %ebx,%ebp +- movl 60(%esp),%ebx +- andl %edi,%ecx +- rorl $2,%edi +- xorl %eax,%ecx +- leal 1518500249(%ebp,%ebx,1),%ebp +- movl (%esp),%ebx +- addl %ebp,%ecx +- # 16_19 16 +- movl %edi,%ebp +- xorl 8(%esp),%ebx +- xorl %esi,%ebp +- xorl 32(%esp),%ebx +- andl %edx,%ebp +- xorl 52(%esp),%ebx +- roll $1,%ebx +- xorl %esi,%ebp +- addl %ebp,%eax +- movl %ecx,%ebp +- rorl $2,%edx +- movl %ebx,(%esp) +- roll $5,%ebp +- leal 1518500249(%ebx,%eax,1),%ebx +- movl 4(%esp),%eax +- addl %ebp,%ebx +- # 16_19 17 +- movl %edx,%ebp +- xorl 12(%esp),%eax +- xorl %edi,%ebp +- xorl 36(%esp),%eax +- andl %ecx,%ebp +- xorl 56(%esp),%eax +- roll $1,%eax +- xorl %edi,%ebp +- addl %ebp,%esi +- movl %ebx,%ebp +- rorl $2,%ecx +- movl %eax,4(%esp) +- roll $5,%ebp +- leal 1518500249(%eax,%esi,1),%eax +- movl 8(%esp),%esi +- addl %ebp,%eax +- # 16_19 18 +- movl %ecx,%ebp +- xorl 16(%esp),%esi +- xorl %edx,%ebp +- xorl 40(%esp),%esi +- andl %ebx,%ebp +- xorl 60(%esp),%esi +- roll $1,%esi +- xorl %edx,%ebp +- addl %ebp,%edi +- movl %eax,%ebp +- rorl $2,%ebx +- movl %esi,8(%esp) +- roll $5,%ebp +- leal 1518500249(%esi,%edi,1),%esi +- movl 12(%esp),%edi +- addl %ebp,%esi +- # 16_19 19 +- movl %ebx,%ebp +- xorl 20(%esp),%edi +- xorl %ecx,%ebp +- xorl 44(%esp),%edi +- andl %eax,%ebp +- xorl (%esp),%edi +- roll $1,%edi +- xorl %ecx,%ebp +- addl %ebp,%edx +- movl %esi,%ebp +- rorl $2,%eax +- movl %edi,12(%esp) +- roll $5,%ebp +- leal 1518500249(%edi,%edx,1),%edi +- movl 16(%esp),%edx +- addl %ebp,%edi +- # 20_39 20 +- movl %esi,%ebp +- xorl 24(%esp),%edx +- xorl %eax,%ebp +- xorl 48(%esp),%edx +- xorl %ebx,%ebp +- xorl 4(%esp),%edx +- roll $1,%edx +- addl %ebp,%ecx +- rorl $2,%esi +- movl %edi,%ebp +- roll $5,%ebp +- movl %edx,16(%esp) +- leal 1859775393(%edx,%ecx,1),%edx +- movl 20(%esp),%ecx +- addl %ebp,%edx +- # 20_39 21 +- movl %edi,%ebp +- xorl 28(%esp),%ecx +- xorl %esi,%ebp +- xorl 52(%esp),%ecx +- xorl %eax,%ebp +- xorl 8(%esp),%ecx +- roll $1,%ecx +- addl %ebp,%ebx +- rorl $2,%edi +- movl %edx,%ebp +- roll $5,%ebp +- movl %ecx,20(%esp) +- leal 1859775393(%ecx,%ebx,1),%ecx +- movl 24(%esp),%ebx +- addl %ebp,%ecx +- # 20_39 22 +- movl %edx,%ebp +- xorl 32(%esp),%ebx +- xorl %edi,%ebp +- xorl 56(%esp),%ebx +- xorl %esi,%ebp +- xorl 12(%esp),%ebx +- roll $1,%ebx +- addl %ebp,%eax +- rorl $2,%edx +- movl %ecx,%ebp +- roll $5,%ebp +- movl %ebx,24(%esp) +- leal 1859775393(%ebx,%eax,1),%ebx +- movl 28(%esp),%eax +- addl %ebp,%ebx +- # 20_39 23 +- movl %ecx,%ebp +- xorl 36(%esp),%eax +- xorl %edx,%ebp +- xorl 60(%esp),%eax +- xorl %edi,%ebp +- xorl 16(%esp),%eax +- roll $1,%eax +- addl %ebp,%esi +- rorl $2,%ecx +- movl %ebx,%ebp +- roll $5,%ebp +- movl %eax,28(%esp) +- leal 1859775393(%eax,%esi,1),%eax +- movl 32(%esp),%esi +- addl %ebp,%eax +- # 20_39 24 +- movl %ebx,%ebp +- xorl 40(%esp),%esi +- xorl %ecx,%ebp +- xorl (%esp),%esi +- xorl %edx,%ebp +- xorl 20(%esp),%esi +- roll $1,%esi +- addl %ebp,%edi +- rorl $2,%ebx +- movl %eax,%ebp +- roll $5,%ebp +- movl %esi,32(%esp) +- leal 1859775393(%esi,%edi,1),%esi +- movl 36(%esp),%edi +- addl %ebp,%esi +- # 20_39 25 +- movl %eax,%ebp +- xorl 44(%esp),%edi +- xorl %ebx,%ebp +- xorl 4(%esp),%edi +- xorl %ecx,%ebp +- xorl 24(%esp),%edi +- roll $1,%edi +- addl %ebp,%edx +- rorl $2,%eax +- movl %esi,%ebp +- roll $5,%ebp +- movl %edi,36(%esp) +- leal 1859775393(%edi,%edx,1),%edi +- movl 40(%esp),%edx +- addl %ebp,%edi +- # 20_39 26 +- movl %esi,%ebp +- xorl 48(%esp),%edx +- xorl %eax,%ebp +- xorl 8(%esp),%edx +- xorl %ebx,%ebp +- xorl 28(%esp),%edx +- roll $1,%edx +- addl %ebp,%ecx +- rorl $2,%esi +- movl %edi,%ebp +- roll $5,%ebp +- movl %edx,40(%esp) +- leal 1859775393(%edx,%ecx,1),%edx +- movl 44(%esp),%ecx +- addl %ebp,%edx +- # 20_39 27 +- movl %edi,%ebp +- xorl 52(%esp),%ecx +- xorl %esi,%ebp +- xorl 12(%esp),%ecx +- xorl %eax,%ebp +- xorl 32(%esp),%ecx +- roll $1,%ecx +- addl %ebp,%ebx +- rorl $2,%edi +- movl %edx,%ebp +- roll $5,%ebp +- movl %ecx,44(%esp) +- leal 1859775393(%ecx,%ebx,1),%ecx +- movl 48(%esp),%ebx +- addl %ebp,%ecx +- # 20_39 28 +- movl %edx,%ebp +- xorl 56(%esp),%ebx +- xorl %edi,%ebp +- xorl 16(%esp),%ebx +- xorl %esi,%ebp +- xorl 36(%esp),%ebx +- roll $1,%ebx +- addl %ebp,%eax +- rorl $2,%edx +- movl %ecx,%ebp +- roll $5,%ebp +- movl %ebx,48(%esp) +- leal 1859775393(%ebx,%eax,1),%ebx +- movl 52(%esp),%eax +- addl %ebp,%ebx +- # 20_39 29 +- movl %ecx,%ebp +- xorl 60(%esp),%eax +- xorl %edx,%ebp +- xorl 20(%esp),%eax +- xorl %edi,%ebp +- xorl 40(%esp),%eax +- roll $1,%eax +- addl %ebp,%esi +- rorl $2,%ecx +- movl %ebx,%ebp +- roll $5,%ebp +- movl %eax,52(%esp) +- leal 1859775393(%eax,%esi,1),%eax +- movl 56(%esp),%esi +- addl %ebp,%eax +- # 20_39 30 +- movl %ebx,%ebp +- xorl (%esp),%esi +- xorl %ecx,%ebp +- xorl 24(%esp),%esi +- xorl %edx,%ebp +- xorl 44(%esp),%esi +- roll $1,%esi +- addl %ebp,%edi +- rorl $2,%ebx +- movl %eax,%ebp +- roll $5,%ebp +- movl %esi,56(%esp) +- leal 1859775393(%esi,%edi,1),%esi +- movl 60(%esp),%edi +- addl %ebp,%esi +- # 20_39 31 +- movl %eax,%ebp +- xorl 4(%esp),%edi +- xorl %ebx,%ebp +- xorl 28(%esp),%edi +- xorl %ecx,%ebp +- xorl 48(%esp),%edi +- roll $1,%edi +- addl %ebp,%edx +- rorl $2,%eax +- movl %esi,%ebp +- roll $5,%ebp +- movl %edi,60(%esp) +- leal 1859775393(%edi,%edx,1),%edi +- movl (%esp),%edx +- addl %ebp,%edi +- # 20_39 32 +- movl %esi,%ebp +- xorl 8(%esp),%edx +- xorl %eax,%ebp +- xorl 32(%esp),%edx +- xorl %ebx,%ebp +- xorl 52(%esp),%edx +- roll $1,%edx +- addl %ebp,%ecx +- rorl $2,%esi +- movl %edi,%ebp +- roll $5,%ebp +- movl %edx,(%esp) +- leal 1859775393(%edx,%ecx,1),%edx +- movl 4(%esp),%ecx +- addl %ebp,%edx +- # 20_39 33 +- movl %edi,%ebp +- xorl 12(%esp),%ecx +- xorl %esi,%ebp +- xorl 36(%esp),%ecx +- xorl %eax,%ebp +- xorl 56(%esp),%ecx +- roll $1,%ecx +- addl %ebp,%ebx +- rorl $2,%edi +- movl %edx,%ebp +- roll $5,%ebp +- movl %ecx,4(%esp) +- leal 1859775393(%ecx,%ebx,1),%ecx +- movl 8(%esp),%ebx +- addl %ebp,%ecx +- # 20_39 34 +- movl %edx,%ebp +- xorl 16(%esp),%ebx +- xorl %edi,%ebp +- xorl 40(%esp),%ebx +- xorl %esi,%ebp +- xorl 60(%esp),%ebx +- roll $1,%ebx +- addl %ebp,%eax +- rorl $2,%edx +- movl %ecx,%ebp +- roll $5,%ebp +- movl %ebx,8(%esp) +- leal 1859775393(%ebx,%eax,1),%ebx +- movl 12(%esp),%eax +- addl %ebp,%ebx +- # 20_39 35 +- movl %ecx,%ebp +- xorl 20(%esp),%eax +- xorl %edx,%ebp +- xorl 44(%esp),%eax +- xorl %edi,%ebp +- xorl (%esp),%eax +- roll $1,%eax +- addl %ebp,%esi +- rorl $2,%ecx +- movl %ebx,%ebp +- roll $5,%ebp +- movl %eax,12(%esp) +- leal 1859775393(%eax,%esi,1),%eax +- movl 16(%esp),%esi +- addl %ebp,%eax +- # 20_39 36 +- movl %ebx,%ebp +- xorl 24(%esp),%esi +- xorl %ecx,%ebp +- xorl 48(%esp),%esi +- xorl %edx,%ebp +- xorl 4(%esp),%esi +- roll $1,%esi +- addl %ebp,%edi +- rorl $2,%ebx +- movl %eax,%ebp +- roll $5,%ebp +- movl %esi,16(%esp) +- leal 1859775393(%esi,%edi,1),%esi +- movl 20(%esp),%edi +- addl %ebp,%esi +- # 20_39 37 +- movl %eax,%ebp +- xorl 28(%esp),%edi +- xorl %ebx,%ebp +- xorl 52(%esp),%edi +- xorl %ecx,%ebp +- xorl 8(%esp),%edi +- roll $1,%edi +- addl %ebp,%edx +- rorl $2,%eax +- movl %esi,%ebp +- roll $5,%ebp +- movl %edi,20(%esp) +- leal 1859775393(%edi,%edx,1),%edi +- movl 24(%esp),%edx +- addl %ebp,%edi +- # 20_39 38 +- movl %esi,%ebp +- xorl 32(%esp),%edx +- xorl %eax,%ebp +- xorl 56(%esp),%edx +- xorl %ebx,%ebp +- xorl 12(%esp),%edx +- roll $1,%edx +- addl %ebp,%ecx +- rorl $2,%esi +- movl %edi,%ebp +- roll $5,%ebp +- movl %edx,24(%esp) +- leal 1859775393(%edx,%ecx,1),%edx +- movl 28(%esp),%ecx +- addl %ebp,%edx +- # 20_39 39 +- movl %edi,%ebp +- xorl 36(%esp),%ecx +- xorl %esi,%ebp +- xorl 60(%esp),%ecx +- xorl %eax,%ebp +- xorl 16(%esp),%ecx +- roll $1,%ecx +- addl %ebp,%ebx +- rorl $2,%edi +- movl %edx,%ebp +- roll $5,%ebp +- movl %ecx,28(%esp) +- leal 1859775393(%ecx,%ebx,1),%ecx +- movl 32(%esp),%ebx +- addl %ebp,%ecx +- # 40_59 40 +- movl %edi,%ebp +- xorl 40(%esp),%ebx +- xorl %esi,%ebp +- xorl (%esp),%ebx +- andl %edx,%ebp +- xorl 20(%esp),%ebx +- roll $1,%ebx +- addl %eax,%ebp +- rorl $2,%edx +- movl %ecx,%eax +- roll $5,%eax +- movl %ebx,32(%esp) +- leal 2400959708(%ebx,%ebp,1),%ebx +- movl %edi,%ebp +- addl %eax,%ebx +- andl %esi,%ebp +- movl 36(%esp),%eax +- addl %ebp,%ebx +- # 40_59 41 +- movl %edx,%ebp +- xorl 44(%esp),%eax +- xorl %edi,%ebp +- xorl 4(%esp),%eax +- andl %ecx,%ebp +- xorl 24(%esp),%eax +- roll $1,%eax +- addl %esi,%ebp +- rorl $2,%ecx +- movl %ebx,%esi +- roll $5,%esi +- movl %eax,36(%esp) +- leal 2400959708(%eax,%ebp,1),%eax +- movl %edx,%ebp +- addl %esi,%eax +- andl %edi,%ebp +- movl 40(%esp),%esi +- addl %ebp,%eax +- # 40_59 42 +- movl %ecx,%ebp +- xorl 48(%esp),%esi +- xorl %edx,%ebp +- xorl 8(%esp),%esi +- andl %ebx,%ebp +- xorl 28(%esp),%esi +- roll $1,%esi +- addl %edi,%ebp +- rorl $2,%ebx +- movl %eax,%edi +- roll $5,%edi +- movl %esi,40(%esp) +- leal 2400959708(%esi,%ebp,1),%esi +- movl %ecx,%ebp +- addl %edi,%esi +- andl %edx,%ebp +- movl 44(%esp),%edi +- addl %ebp,%esi +- # 40_59 43 +- movl %ebx,%ebp +- xorl 52(%esp),%edi +- xorl %ecx,%ebp +- xorl 12(%esp),%edi +- andl %eax,%ebp +- xorl 32(%esp),%edi +- roll $1,%edi +- addl %edx,%ebp +- rorl $2,%eax +- movl %esi,%edx +- roll $5,%edx +- movl %edi,44(%esp) +- leal 2400959708(%edi,%ebp,1),%edi +- movl %ebx,%ebp +- addl %edx,%edi +- andl %ecx,%ebp +- movl 48(%esp),%edx +- addl %ebp,%edi +- # 40_59 44 +- movl %eax,%ebp +- xorl 56(%esp),%edx +- xorl %ebx,%ebp +- xorl 16(%esp),%edx +- andl %esi,%ebp +- xorl 36(%esp),%edx +- roll $1,%edx +- addl %ecx,%ebp +- rorl $2,%esi +- movl %edi,%ecx +- roll $5,%ecx +- movl %edx,48(%esp) +- leal 2400959708(%edx,%ebp,1),%edx +- movl %eax,%ebp +- addl %ecx,%edx +- andl %ebx,%ebp +- movl 52(%esp),%ecx +- addl %ebp,%edx +- # 40_59 45 +- movl %esi,%ebp +- xorl 60(%esp),%ecx +- xorl %eax,%ebp +- xorl 20(%esp),%ecx +- andl %edi,%ebp +- xorl 40(%esp),%ecx +- roll $1,%ecx +- addl %ebx,%ebp +- rorl $2,%edi +- movl %edx,%ebx +- roll $5,%ebx +- movl %ecx,52(%esp) +- leal 2400959708(%ecx,%ebp,1),%ecx +- movl %esi,%ebp +- addl %ebx,%ecx +- andl %eax,%ebp +- movl 56(%esp),%ebx +- addl %ebp,%ecx +- # 40_59 46 +- movl %edi,%ebp +- xorl (%esp),%ebx +- xorl %esi,%ebp +- xorl 24(%esp),%ebx +- andl %edx,%ebp +- xorl 44(%esp),%ebx +- roll $1,%ebx +- addl %eax,%ebp +- rorl $2,%edx +- movl %ecx,%eax +- roll $5,%eax +- movl %ebx,56(%esp) +- leal 2400959708(%ebx,%ebp,1),%ebx +- movl %edi,%ebp +- addl %eax,%ebx +- andl %esi,%ebp +- movl 60(%esp),%eax +- addl %ebp,%ebx +- # 40_59 47 +- movl %edx,%ebp +- xorl 4(%esp),%eax +- xorl %edi,%ebp +- xorl 28(%esp),%eax +- andl %ecx,%ebp +- xorl 48(%esp),%eax +- roll $1,%eax +- addl %esi,%ebp +- rorl $2,%ecx +- movl %ebx,%esi +- roll $5,%esi +- movl %eax,60(%esp) +- leal 2400959708(%eax,%ebp,1),%eax +- movl %edx,%ebp +- addl %esi,%eax +- andl %edi,%ebp +- movl (%esp),%esi +- addl %ebp,%eax +- # 40_59 48 +- movl %ecx,%ebp +- xorl 8(%esp),%esi +- xorl %edx,%ebp +- xorl 32(%esp),%esi +- andl %ebx,%ebp +- xorl 52(%esp),%esi +- roll $1,%esi +- addl %edi,%ebp +- rorl $2,%ebx +- movl %eax,%edi +- roll $5,%edi +- movl %esi,(%esp) +- leal 2400959708(%esi,%ebp,1),%esi +- movl %ecx,%ebp +- addl %edi,%esi +- andl %edx,%ebp +- movl 4(%esp),%edi +- addl %ebp,%esi +- # 40_59 49 +- movl %ebx,%ebp +- xorl 12(%esp),%edi +- xorl %ecx,%ebp +- xorl 36(%esp),%edi +- andl %eax,%ebp +- xorl 56(%esp),%edi +- roll $1,%edi +- addl %edx,%ebp +- rorl $2,%eax +- movl %esi,%edx +- roll $5,%edx +- movl %edi,4(%esp) +- leal 2400959708(%edi,%ebp,1),%edi +- movl %ebx,%ebp +- addl %edx,%edi +- andl %ecx,%ebp +- movl 8(%esp),%edx +- addl %ebp,%edi +- # 40_59 50 +- movl %eax,%ebp +- xorl 16(%esp),%edx +- xorl %ebx,%ebp +- xorl 40(%esp),%edx +- andl %esi,%ebp +- xorl 60(%esp),%edx +- roll $1,%edx +- addl %ecx,%ebp +- rorl $2,%esi +- movl %edi,%ecx +- roll $5,%ecx +- movl %edx,8(%esp) +- leal 2400959708(%edx,%ebp,1),%edx +- movl %eax,%ebp +- addl %ecx,%edx +- andl %ebx,%ebp +- movl 12(%esp),%ecx +- addl %ebp,%edx +- # 40_59 51 +- movl %esi,%ebp +- xorl 20(%esp),%ecx +- xorl %eax,%ebp +- xorl 44(%esp),%ecx +- andl %edi,%ebp +- xorl (%esp),%ecx +- roll $1,%ecx +- addl %ebx,%ebp +- rorl $2,%edi +- movl %edx,%ebx +- roll $5,%ebx +- movl %ecx,12(%esp) +- leal 2400959708(%ecx,%ebp,1),%ecx +- movl %esi,%ebp +- addl %ebx,%ecx +- andl %eax,%ebp +- movl 16(%esp),%ebx +- addl %ebp,%ecx +- # 40_59 52 +- movl %edi,%ebp +- xorl 24(%esp),%ebx +- xorl %esi,%ebp +- xorl 48(%esp),%ebx +- andl %edx,%ebp +- xorl 4(%esp),%ebx +- roll $1,%ebx +- addl %eax,%ebp +- rorl $2,%edx +- movl %ecx,%eax +- roll $5,%eax +- movl %ebx,16(%esp) +- leal 2400959708(%ebx,%ebp,1),%ebx +- movl %edi,%ebp +- addl %eax,%ebx +- andl %esi,%ebp +- movl 20(%esp),%eax +- addl %ebp,%ebx +- # 40_59 53 +- movl %edx,%ebp +- xorl 28(%esp),%eax +- xorl %edi,%ebp +- xorl 52(%esp),%eax +- andl %ecx,%ebp +- xorl 8(%esp),%eax +- roll $1,%eax +- addl %esi,%ebp +- rorl $2,%ecx +- movl %ebx,%esi +- roll $5,%esi +- movl %eax,20(%esp) +- leal 2400959708(%eax,%ebp,1),%eax +- movl %edx,%ebp +- addl %esi,%eax +- andl %edi,%ebp +- movl 24(%esp),%esi +- addl %ebp,%eax +- # 40_59 54 +- movl %ecx,%ebp +- xorl 32(%esp),%esi +- xorl %edx,%ebp +- xorl 56(%esp),%esi +- andl %ebx,%ebp +- xorl 12(%esp),%esi +- roll $1,%esi +- addl %edi,%ebp +- rorl $2,%ebx +- movl %eax,%edi +- roll $5,%edi +- movl %esi,24(%esp) +- leal 2400959708(%esi,%ebp,1),%esi +- movl %ecx,%ebp +- addl %edi,%esi +- andl %edx,%ebp +- movl 28(%esp),%edi +- addl %ebp,%esi +- # 40_59 55 +- movl %ebx,%ebp +- xorl 36(%esp),%edi +- xorl %ecx,%ebp +- xorl 60(%esp),%edi +- andl %eax,%ebp +- xorl 16(%esp),%edi +- roll $1,%edi +- addl %edx,%ebp +- rorl $2,%eax +- movl %esi,%edx +- roll $5,%edx +- movl %edi,28(%esp) +- leal 2400959708(%edi,%ebp,1),%edi +- movl %ebx,%ebp +- addl %edx,%edi +- andl %ecx,%ebp +- movl 32(%esp),%edx +- addl %ebp,%edi +- # 40_59 56 +- movl %eax,%ebp +- xorl 40(%esp),%edx +- xorl %ebx,%ebp +- xorl (%esp),%edx +- andl %esi,%ebp +- xorl 20(%esp),%edx +- roll $1,%edx +- addl %ecx,%ebp +- rorl $2,%esi +- movl %edi,%ecx +- roll $5,%ecx +- movl %edx,32(%esp) +- leal 2400959708(%edx,%ebp,1),%edx +- movl %eax,%ebp +- addl %ecx,%edx +- andl %ebx,%ebp +- movl 36(%esp),%ecx +- addl %ebp,%edx +- # 40_59 57 +- movl %esi,%ebp +- xorl 44(%esp),%ecx +- xorl %eax,%ebp +- xorl 4(%esp),%ecx +- andl %edi,%ebp +- xorl 24(%esp),%ecx +- roll $1,%ecx +- addl %ebx,%ebp +- rorl $2,%edi +- movl %edx,%ebx +- roll $5,%ebx +- movl %ecx,36(%esp) +- leal 2400959708(%ecx,%ebp,1),%ecx +- movl %esi,%ebp +- addl %ebx,%ecx +- andl %eax,%ebp +- movl 40(%esp),%ebx +- addl %ebp,%ecx +- # 40_59 58 +- movl %edi,%ebp +- xorl 48(%esp),%ebx +- xorl %esi,%ebp +- xorl 8(%esp),%ebx +- andl %edx,%ebp +- xorl 28(%esp),%ebx +- roll $1,%ebx +- addl %eax,%ebp +- rorl $2,%edx +- movl %ecx,%eax +- roll $5,%eax +- movl %ebx,40(%esp) +- leal 2400959708(%ebx,%ebp,1),%ebx +- movl %edi,%ebp +- addl %eax,%ebx +- andl %esi,%ebp +- movl 44(%esp),%eax +- addl %ebp,%ebx +- # 40_59 59 +- movl %edx,%ebp +- xorl 52(%esp),%eax +- xorl %edi,%ebp +- xorl 12(%esp),%eax +- andl %ecx,%ebp +- xorl 32(%esp),%eax +- roll $1,%eax +- addl %esi,%ebp +- rorl $2,%ecx +- movl %ebx,%esi +- roll $5,%esi +- movl %eax,44(%esp) +- leal 2400959708(%eax,%ebp,1),%eax +- movl %edx,%ebp +- addl %esi,%eax +- andl %edi,%ebp +- movl 48(%esp),%esi +- addl %ebp,%eax +- # 20_39 60 +- movl %ebx,%ebp +- xorl 56(%esp),%esi +- xorl %ecx,%ebp +- xorl 16(%esp),%esi +- xorl %edx,%ebp +- xorl 36(%esp),%esi +- roll $1,%esi +- addl %ebp,%edi +- rorl $2,%ebx +- movl %eax,%ebp +- roll $5,%ebp +- movl %esi,48(%esp) +- leal 3395469782(%esi,%edi,1),%esi +- movl 52(%esp),%edi +- addl %ebp,%esi +- # 20_39 61 +- movl %eax,%ebp +- xorl 60(%esp),%edi +- xorl %ebx,%ebp +- xorl 20(%esp),%edi +- xorl %ecx,%ebp +- xorl 40(%esp),%edi +- roll $1,%edi +- addl %ebp,%edx +- rorl $2,%eax +- movl %esi,%ebp +- roll $5,%ebp +- movl %edi,52(%esp) +- leal 3395469782(%edi,%edx,1),%edi +- movl 56(%esp),%edx +- addl %ebp,%edi +- # 20_39 62 +- movl %esi,%ebp +- xorl (%esp),%edx +- xorl %eax,%ebp +- xorl 24(%esp),%edx +- xorl %ebx,%ebp +- xorl 44(%esp),%edx +- roll $1,%edx +- addl %ebp,%ecx +- rorl $2,%esi +- movl %edi,%ebp +- roll $5,%ebp +- movl %edx,56(%esp) +- leal 3395469782(%edx,%ecx,1),%edx +- movl 60(%esp),%ecx +- addl %ebp,%edx +- # 20_39 63 +- movl %edi,%ebp +- xorl 4(%esp),%ecx +- xorl %esi,%ebp +- xorl 28(%esp),%ecx +- xorl %eax,%ebp +- xorl 48(%esp),%ecx +- roll $1,%ecx +- addl %ebp,%ebx +- rorl $2,%edi +- movl %edx,%ebp +- roll $5,%ebp +- movl %ecx,60(%esp) +- leal 3395469782(%ecx,%ebx,1),%ecx +- movl (%esp),%ebx +- addl %ebp,%ecx +- # 20_39 64 +- movl %edx,%ebp +- xorl 8(%esp),%ebx +- xorl %edi,%ebp +- xorl 32(%esp),%ebx +- xorl %esi,%ebp +- xorl 52(%esp),%ebx +- roll $1,%ebx +- addl %ebp,%eax +- rorl $2,%edx +- movl %ecx,%ebp +- roll $5,%ebp +- movl %ebx,(%esp) +- leal 3395469782(%ebx,%eax,1),%ebx +- movl 4(%esp),%eax +- addl %ebp,%ebx +- # 20_39 65 +- movl %ecx,%ebp +- xorl 12(%esp),%eax +- xorl %edx,%ebp +- xorl 36(%esp),%eax +- xorl %edi,%ebp +- xorl 56(%esp),%eax +- roll $1,%eax +- addl %ebp,%esi +- rorl $2,%ecx +- movl %ebx,%ebp +- roll $5,%ebp +- movl %eax,4(%esp) +- leal 3395469782(%eax,%esi,1),%eax +- movl 8(%esp),%esi +- addl %ebp,%eax +- # 20_39 66 +- movl %ebx,%ebp +- xorl 16(%esp),%esi +- xorl %ecx,%ebp +- xorl 40(%esp),%esi +- xorl %edx,%ebp +- xorl 60(%esp),%esi +- roll $1,%esi +- addl %ebp,%edi +- rorl $2,%ebx +- movl %eax,%ebp +- roll $5,%ebp +- movl %esi,8(%esp) +- leal 3395469782(%esi,%edi,1),%esi +- movl 12(%esp),%edi +- addl %ebp,%esi +- # 20_39 67 +- movl %eax,%ebp +- xorl 20(%esp),%edi +- xorl %ebx,%ebp +- xorl 44(%esp),%edi +- xorl %ecx,%ebp +- xorl (%esp),%edi +- roll $1,%edi +- addl %ebp,%edx +- rorl $2,%eax +- movl %esi,%ebp +- roll $5,%ebp +- movl %edi,12(%esp) +- leal 3395469782(%edi,%edx,1),%edi +- movl 16(%esp),%edx +- addl %ebp,%edi +- # 20_39 68 +- movl %esi,%ebp +- xorl 24(%esp),%edx +- xorl %eax,%ebp +- xorl 48(%esp),%edx +- xorl %ebx,%ebp +- xorl 4(%esp),%edx +- roll $1,%edx +- addl %ebp,%ecx +- rorl $2,%esi +- movl %edi,%ebp +- roll $5,%ebp +- movl %edx,16(%esp) +- leal 3395469782(%edx,%ecx,1),%edx +- movl 20(%esp),%ecx +- addl %ebp,%edx +- # 20_39 69 +- movl %edi,%ebp +- xorl 28(%esp),%ecx +- xorl %esi,%ebp +- xorl 52(%esp),%ecx +- xorl %eax,%ebp +- xorl 8(%esp),%ecx +- roll $1,%ecx +- addl %ebp,%ebx +- rorl $2,%edi +- movl %edx,%ebp +- roll $5,%ebp +- movl %ecx,20(%esp) +- leal 3395469782(%ecx,%ebx,1),%ecx +- movl 24(%esp),%ebx +- addl %ebp,%ecx +- # 20_39 70 +- movl %edx,%ebp +- xorl 32(%esp),%ebx +- xorl %edi,%ebp +- xorl 56(%esp),%ebx +- xorl %esi,%ebp +- xorl 12(%esp),%ebx +- roll $1,%ebx +- addl %ebp,%eax +- rorl $2,%edx +- movl %ecx,%ebp +- roll $5,%ebp +- movl %ebx,24(%esp) +- leal 3395469782(%ebx,%eax,1),%ebx +- movl 28(%esp),%eax +- addl %ebp,%ebx +- # 20_39 71 +- movl %ecx,%ebp +- xorl 36(%esp),%eax +- xorl %edx,%ebp +- xorl 60(%esp),%eax +- xorl %edi,%ebp +- xorl 16(%esp),%eax +- roll $1,%eax +- addl %ebp,%esi +- rorl $2,%ecx +- movl %ebx,%ebp +- roll $5,%ebp +- movl %eax,28(%esp) +- leal 3395469782(%eax,%esi,1),%eax +- movl 32(%esp),%esi +- addl %ebp,%eax +- # 20_39 72 +- movl %ebx,%ebp +- xorl 40(%esp),%esi +- xorl %ecx,%ebp +- xorl (%esp),%esi +- xorl %edx,%ebp +- xorl 20(%esp),%esi +- roll $1,%esi +- addl %ebp,%edi +- rorl $2,%ebx +- movl %eax,%ebp +- roll $5,%ebp +- movl %esi,32(%esp) +- leal 3395469782(%esi,%edi,1),%esi +- movl 36(%esp),%edi +- addl %ebp,%esi +- # 20_39 73 +- movl %eax,%ebp +- xorl 44(%esp),%edi +- xorl %ebx,%ebp +- xorl 4(%esp),%edi +- xorl %ecx,%ebp +- xorl 24(%esp),%edi +- roll $1,%edi +- addl %ebp,%edx +- rorl $2,%eax +- movl %esi,%ebp +- roll $5,%ebp +- movl %edi,36(%esp) +- leal 3395469782(%edi,%edx,1),%edi +- movl 40(%esp),%edx +- addl %ebp,%edi +- # 20_39 74 +- movl %esi,%ebp +- xorl 48(%esp),%edx +- xorl %eax,%ebp +- xorl 8(%esp),%edx +- xorl %ebx,%ebp +- xorl 28(%esp),%edx +- roll $1,%edx +- addl %ebp,%ecx +- rorl $2,%esi +- movl %edi,%ebp +- roll $5,%ebp +- movl %edx,40(%esp) +- leal 3395469782(%edx,%ecx,1),%edx +- movl 44(%esp),%ecx +- addl %ebp,%edx +- # 20_39 75 +- movl %edi,%ebp +- xorl 52(%esp),%ecx +- xorl %esi,%ebp +- xorl 12(%esp),%ecx +- xorl %eax,%ebp +- xorl 32(%esp),%ecx +- roll $1,%ecx +- addl %ebp,%ebx +- rorl $2,%edi +- movl %edx,%ebp +- roll $5,%ebp +- movl %ecx,44(%esp) +- leal 3395469782(%ecx,%ebx,1),%ecx +- movl 48(%esp),%ebx +- addl %ebp,%ecx +- # 20_39 76 +- movl %edx,%ebp +- xorl 56(%esp),%ebx +- xorl %edi,%ebp +- xorl 16(%esp),%ebx +- xorl %esi,%ebp +- xorl 36(%esp),%ebx +- roll $1,%ebx +- addl %ebp,%eax +- rorl $2,%edx +- movl %ecx,%ebp +- roll $5,%ebp +- movl %ebx,48(%esp) +- leal 3395469782(%ebx,%eax,1),%ebx +- movl 52(%esp),%eax +- addl %ebp,%ebx +- # 20_39 77 +- movl %ecx,%ebp +- xorl 60(%esp),%eax +- xorl %edx,%ebp +- xorl 20(%esp),%eax +- xorl %edi,%ebp +- xorl 40(%esp),%eax +- roll $1,%eax +- addl %ebp,%esi +- rorl $2,%ecx +- movl %ebx,%ebp +- roll $5,%ebp +- leal 3395469782(%eax,%esi,1),%eax +- movl 56(%esp),%esi +- addl %ebp,%eax +- # 20_39 78 +- movl %ebx,%ebp +- xorl (%esp),%esi +- xorl %ecx,%ebp +- xorl 24(%esp),%esi +- xorl %edx,%ebp +- xorl 44(%esp),%esi +- roll $1,%esi +- addl %ebp,%edi +- rorl $2,%ebx +- movl %eax,%ebp +- roll $5,%ebp +- leal 3395469782(%esi,%edi,1),%esi +- movl 60(%esp),%edi +- addl %ebp,%esi +- # 20_39 79 +- movl %eax,%ebp +- xorl 4(%esp),%edi +- xorl %ebx,%ebp +- xorl 28(%esp),%edi +- xorl %ecx,%ebp +- xorl 48(%esp),%edi +- roll $1,%edi +- addl %ebp,%edx +- rorl $2,%eax +- movl %esi,%ebp +- roll $5,%ebp +- leal 3395469782(%edi,%edx,1),%edi +- addl %ebp,%edi +- movl 96(%esp),%ebp +- movl 100(%esp),%edx +- addl (%ebp),%edi +- addl 4(%ebp),%esi +- addl 8(%ebp),%eax +- addl 12(%ebp),%ebx +- addl 16(%ebp),%ecx +- movl %edi,(%ebp) +- addl $64,%edx +- movl %esi,4(%ebp) +- cmpl 104(%esp),%edx +- movl %eax,8(%ebp) +- movl %ecx,%edi +- movl %ebx,12(%ebp) +- movl %edx,%esi +- movl %ecx,16(%ebp) +- jb L002loop +- addl $76,%esp +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.private_extern __sha1_block_data_order_ssse3 +-.align 4 +-__sha1_block_data_order_ssse3: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- call L003pic_point +-L003pic_point: +- popl %ebp +- leal LK_XX_XX-L003pic_point(%ebp),%ebp +-Lssse3_shortcut: +- movdqa (%ebp),%xmm7 +- movdqa 16(%ebp),%xmm0 +- movdqa 32(%ebp),%xmm1 +- movdqa 48(%ebp),%xmm2 +- movdqa 64(%ebp),%xmm6 +- movl 20(%esp),%edi +- movl 24(%esp),%ebp +- movl 28(%esp),%edx +- movl %esp,%esi +- subl $208,%esp +- andl $-64,%esp +- movdqa %xmm0,112(%esp) +- movdqa %xmm1,128(%esp) +- movdqa %xmm2,144(%esp) +- shll $6,%edx +- movdqa %xmm7,160(%esp) +- addl %ebp,%edx +- movdqa %xmm6,176(%esp) +- addl $64,%ebp +- movl %edi,192(%esp) +- movl %ebp,196(%esp) +- movl %edx,200(%esp) +- movl %esi,204(%esp) +- movl (%edi),%eax +- movl 4(%edi),%ebx +- movl 8(%edi),%ecx +- movl 12(%edi),%edx +- movl 16(%edi),%edi +- movl %ebx,%esi +- movdqu -64(%ebp),%xmm0 +- movdqu -48(%ebp),%xmm1 +- movdqu -32(%ebp),%xmm2 +- movdqu -16(%ebp),%xmm3 +-.byte 102,15,56,0,198 +-.byte 102,15,56,0,206 +-.byte 102,15,56,0,214 +- movdqa %xmm7,96(%esp) +-.byte 102,15,56,0,222 +- paddd %xmm7,%xmm0 +- paddd %xmm7,%xmm1 +- paddd %xmm7,%xmm2 +- movdqa %xmm0,(%esp) +- psubd %xmm7,%xmm0 +- movdqa %xmm1,16(%esp) +- psubd %xmm7,%xmm1 +- movdqa %xmm2,32(%esp) +- movl %ecx,%ebp +- psubd %xmm7,%xmm2 +- xorl %edx,%ebp +- pshufd $238,%xmm0,%xmm4 +- andl %ebp,%esi +- jmp L004loop +-.align 4,0x90 +-L004loop: +- rorl $2,%ebx +- xorl %edx,%esi +- movl %eax,%ebp +- punpcklqdq %xmm1,%xmm4 +- movdqa %xmm3,%xmm6 +- addl (%esp),%edi +- xorl %ecx,%ebx +- paddd %xmm3,%xmm7 +- movdqa %xmm0,64(%esp) +- roll $5,%eax +- addl %esi,%edi +- psrldq $4,%xmm6 +- andl %ebx,%ebp +- xorl %ecx,%ebx +- pxor %xmm0,%xmm4 +- addl %eax,%edi +- rorl $7,%eax +- pxor %xmm2,%xmm6 +- xorl %ecx,%ebp +- movl %edi,%esi +- addl 4(%esp),%edx +- pxor %xmm6,%xmm4 +- xorl %ebx,%eax +- roll $5,%edi +- movdqa %xmm7,48(%esp) +- addl %ebp,%edx +- andl %eax,%esi +- movdqa %xmm4,%xmm0 +- xorl %ebx,%eax +- addl %edi,%edx +- rorl $7,%edi +- movdqa %xmm4,%xmm6 +- xorl %ebx,%esi +- pslldq $12,%xmm0 +- paddd %xmm4,%xmm4 +- movl %edx,%ebp +- addl 8(%esp),%ecx +- psrld $31,%xmm6 +- xorl %eax,%edi +- roll $5,%edx +- movdqa %xmm0,%xmm7 +- addl %esi,%ecx +- andl %edi,%ebp +- xorl %eax,%edi +- psrld $30,%xmm0 +- addl %edx,%ecx +- rorl $7,%edx +- por %xmm6,%xmm4 +- xorl %eax,%ebp +- movl %ecx,%esi +- addl 12(%esp),%ebx +- pslld $2,%xmm7 +- xorl %edi,%edx +- roll $5,%ecx +- pxor %xmm0,%xmm4 +- movdqa 96(%esp),%xmm0 +- addl %ebp,%ebx +- andl %edx,%esi +- pxor %xmm7,%xmm4 +- pshufd $238,%xmm1,%xmm5 +- xorl %edi,%edx +- addl %ecx,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- movl %ebx,%ebp +- punpcklqdq %xmm2,%xmm5 +- movdqa %xmm4,%xmm7 +- addl 16(%esp),%eax +- xorl %edx,%ecx +- paddd %xmm4,%xmm0 +- movdqa %xmm1,80(%esp) +- roll $5,%ebx +- addl %esi,%eax +- psrldq $4,%xmm7 +- andl %ecx,%ebp +- xorl %edx,%ecx +- pxor %xmm1,%xmm5 +- addl %ebx,%eax +- rorl $7,%ebx +- pxor %xmm3,%xmm7 +- xorl %edx,%ebp +- movl %eax,%esi +- addl 20(%esp),%edi +- pxor %xmm7,%xmm5 +- xorl %ecx,%ebx +- roll $5,%eax +- movdqa %xmm0,(%esp) +- addl %ebp,%edi +- andl %ebx,%esi +- movdqa %xmm5,%xmm1 +- xorl %ecx,%ebx +- addl %eax,%edi +- rorl $7,%eax +- movdqa %xmm5,%xmm7 +- xorl %ecx,%esi +- pslldq $12,%xmm1 +- paddd %xmm5,%xmm5 +- movl %edi,%ebp +- addl 24(%esp),%edx +- psrld $31,%xmm7 +- xorl %ebx,%eax +- roll $5,%edi +- movdqa %xmm1,%xmm0 +- addl %esi,%edx +- andl %eax,%ebp +- xorl %ebx,%eax +- psrld $30,%xmm1 +- addl %edi,%edx +- rorl $7,%edi +- por %xmm7,%xmm5 +- xorl %ebx,%ebp +- movl %edx,%esi +- addl 28(%esp),%ecx +- pslld $2,%xmm0 +- xorl %eax,%edi +- roll $5,%edx +- pxor %xmm1,%xmm5 +- movdqa 112(%esp),%xmm1 +- addl %ebp,%ecx +- andl %edi,%esi +- pxor %xmm0,%xmm5 +- pshufd $238,%xmm2,%xmm6 +- xorl %eax,%edi +- addl %edx,%ecx +- rorl $7,%edx +- xorl %eax,%esi +- movl %ecx,%ebp +- punpcklqdq %xmm3,%xmm6 +- movdqa %xmm5,%xmm0 +- addl 32(%esp),%ebx +- xorl %edi,%edx +- paddd %xmm5,%xmm1 +- movdqa %xmm2,96(%esp) +- roll $5,%ecx +- addl %esi,%ebx +- psrldq $4,%xmm0 +- andl %edx,%ebp +- xorl %edi,%edx +- pxor %xmm2,%xmm6 +- addl %ecx,%ebx +- rorl $7,%ecx +- pxor %xmm4,%xmm0 +- xorl %edi,%ebp +- movl %ebx,%esi +- addl 36(%esp),%eax +- pxor %xmm0,%xmm6 +- xorl %edx,%ecx +- roll $5,%ebx +- movdqa %xmm1,16(%esp) +- addl %ebp,%eax +- andl %ecx,%esi +- movdqa %xmm6,%xmm2 +- xorl %edx,%ecx +- addl %ebx,%eax +- rorl $7,%ebx +- movdqa %xmm6,%xmm0 +- xorl %edx,%esi +- pslldq $12,%xmm2 +- paddd %xmm6,%xmm6 +- movl %eax,%ebp +- addl 40(%esp),%edi +- psrld $31,%xmm0 +- xorl %ecx,%ebx +- roll $5,%eax +- movdqa %xmm2,%xmm1 +- addl %esi,%edi +- andl %ebx,%ebp +- xorl %ecx,%ebx +- psrld $30,%xmm2 +- addl %eax,%edi +- rorl $7,%eax +- por %xmm0,%xmm6 +- xorl %ecx,%ebp +- movdqa 64(%esp),%xmm0 +- movl %edi,%esi +- addl 44(%esp),%edx +- pslld $2,%xmm1 +- xorl %ebx,%eax +- roll $5,%edi +- pxor %xmm2,%xmm6 +- movdqa 112(%esp),%xmm2 +- addl %ebp,%edx +- andl %eax,%esi +- pxor %xmm1,%xmm6 +- pshufd $238,%xmm3,%xmm7 +- xorl %ebx,%eax +- addl %edi,%edx +- rorl $7,%edi +- xorl %ebx,%esi +- movl %edx,%ebp +- punpcklqdq %xmm4,%xmm7 +- movdqa %xmm6,%xmm1 +- addl 48(%esp),%ecx +- xorl %eax,%edi +- paddd %xmm6,%xmm2 +- movdqa %xmm3,64(%esp) +- roll $5,%edx +- addl %esi,%ecx +- psrldq $4,%xmm1 +- andl %edi,%ebp +- xorl %eax,%edi +- pxor %xmm3,%xmm7 +- addl %edx,%ecx +- rorl $7,%edx +- pxor %xmm5,%xmm1 +- xorl %eax,%ebp +- movl %ecx,%esi +- addl 52(%esp),%ebx +- pxor %xmm1,%xmm7 +- xorl %edi,%edx +- roll $5,%ecx +- movdqa %xmm2,32(%esp) +- addl %ebp,%ebx +- andl %edx,%esi +- movdqa %xmm7,%xmm3 +- xorl %edi,%edx +- addl %ecx,%ebx +- rorl $7,%ecx +- movdqa %xmm7,%xmm1 +- xorl %edi,%esi +- pslldq $12,%xmm3 +- paddd %xmm7,%xmm7 +- movl %ebx,%ebp +- addl 56(%esp),%eax +- psrld $31,%xmm1 +- xorl %edx,%ecx +- roll $5,%ebx +- movdqa %xmm3,%xmm2 +- addl %esi,%eax +- andl %ecx,%ebp +- xorl %edx,%ecx +- psrld $30,%xmm3 +- addl %ebx,%eax +- rorl $7,%ebx +- por %xmm1,%xmm7 +- xorl %edx,%ebp +- movdqa 80(%esp),%xmm1 +- movl %eax,%esi +- addl 60(%esp),%edi +- pslld $2,%xmm2 +- xorl %ecx,%ebx +- roll $5,%eax +- pxor %xmm3,%xmm7 +- movdqa 112(%esp),%xmm3 +- addl %ebp,%edi +- andl %ebx,%esi +- pxor %xmm2,%xmm7 +- pshufd $238,%xmm6,%xmm2 +- xorl %ecx,%ebx +- addl %eax,%edi +- rorl $7,%eax +- pxor %xmm4,%xmm0 +- punpcklqdq %xmm7,%xmm2 +- xorl %ecx,%esi +- movl %edi,%ebp +- addl (%esp),%edx +- pxor %xmm1,%xmm0 +- movdqa %xmm4,80(%esp) +- xorl %ebx,%eax +- roll $5,%edi +- movdqa %xmm3,%xmm4 +- addl %esi,%edx +- paddd %xmm7,%xmm3 +- andl %eax,%ebp +- pxor %xmm2,%xmm0 +- xorl %ebx,%eax +- addl %edi,%edx +- rorl $7,%edi +- xorl %ebx,%ebp +- movdqa %xmm0,%xmm2 +- movdqa %xmm3,48(%esp) +- movl %edx,%esi +- addl 4(%esp),%ecx +- xorl %eax,%edi +- roll $5,%edx +- pslld $2,%xmm0 +- addl %ebp,%ecx +- andl %edi,%esi +- psrld $30,%xmm2 +- xorl %eax,%edi +- addl %edx,%ecx +- rorl $7,%edx +- xorl %eax,%esi +- movl %ecx,%ebp +- addl 8(%esp),%ebx +- xorl %edi,%edx +- roll $5,%ecx +- por %xmm2,%xmm0 +- addl %esi,%ebx +- andl %edx,%ebp +- movdqa 96(%esp),%xmm2 +- xorl %edi,%edx +- addl %ecx,%ebx +- addl 12(%esp),%eax +- xorl %edi,%ebp +- movl %ebx,%esi +- pshufd $238,%xmm7,%xmm3 +- roll $5,%ebx +- addl %ebp,%eax +- xorl %edx,%esi +- rorl $7,%ecx +- addl %ebx,%eax +- addl 16(%esp),%edi +- pxor %xmm5,%xmm1 +- punpcklqdq %xmm0,%xmm3 +- xorl %ecx,%esi +- movl %eax,%ebp +- roll $5,%eax +- pxor %xmm2,%xmm1 +- movdqa %xmm5,96(%esp) +- addl %esi,%edi +- xorl %ecx,%ebp +- movdqa %xmm4,%xmm5 +- rorl $7,%ebx +- paddd %xmm0,%xmm4 +- addl %eax,%edi +- pxor %xmm3,%xmm1 +- addl 20(%esp),%edx +- xorl %ebx,%ebp +- movl %edi,%esi +- roll $5,%edi +- movdqa %xmm1,%xmm3 +- movdqa %xmm4,(%esp) +- addl %ebp,%edx +- xorl %ebx,%esi +- rorl $7,%eax +- addl %edi,%edx +- pslld $2,%xmm1 +- addl 24(%esp),%ecx +- xorl %eax,%esi +- psrld $30,%xmm3 +- movl %edx,%ebp +- roll $5,%edx +- addl %esi,%ecx +- xorl %eax,%ebp +- rorl $7,%edi +- addl %edx,%ecx +- por %xmm3,%xmm1 +- addl 28(%esp),%ebx +- xorl %edi,%ebp +- movdqa 64(%esp),%xmm3 +- movl %ecx,%esi +- roll $5,%ecx +- addl %ebp,%ebx +- xorl %edi,%esi +- rorl $7,%edx +- pshufd $238,%xmm0,%xmm4 +- addl %ecx,%ebx +- addl 32(%esp),%eax +- pxor %xmm6,%xmm2 +- punpcklqdq %xmm1,%xmm4 +- xorl %edx,%esi +- movl %ebx,%ebp +- roll $5,%ebx +- pxor %xmm3,%xmm2 +- movdqa %xmm6,64(%esp) +- addl %esi,%eax +- xorl %edx,%ebp +- movdqa 128(%esp),%xmm6 +- rorl $7,%ecx +- paddd %xmm1,%xmm5 +- addl %ebx,%eax +- pxor %xmm4,%xmm2 +- addl 36(%esp),%edi +- xorl %ecx,%ebp +- movl %eax,%esi +- roll $5,%eax +- movdqa %xmm2,%xmm4 +- movdqa %xmm5,16(%esp) +- addl %ebp,%edi +- xorl %ecx,%esi +- rorl $7,%ebx +- addl %eax,%edi +- pslld $2,%xmm2 +- addl 40(%esp),%edx +- xorl %ebx,%esi +- psrld $30,%xmm4 +- movl %edi,%ebp +- roll $5,%edi +- addl %esi,%edx +- xorl %ebx,%ebp +- rorl $7,%eax +- addl %edi,%edx +- por %xmm4,%xmm2 +- addl 44(%esp),%ecx +- xorl %eax,%ebp +- movdqa 80(%esp),%xmm4 +- movl %edx,%esi +- roll $5,%edx +- addl %ebp,%ecx +- xorl %eax,%esi +- rorl $7,%edi +- pshufd $238,%xmm1,%xmm5 +- addl %edx,%ecx +- addl 48(%esp),%ebx +- pxor %xmm7,%xmm3 +- punpcklqdq %xmm2,%xmm5 +- xorl %edi,%esi +- movl %ecx,%ebp +- roll $5,%ecx +- pxor %xmm4,%xmm3 +- movdqa %xmm7,80(%esp) +- addl %esi,%ebx +- xorl %edi,%ebp +- movdqa %xmm6,%xmm7 +- rorl $7,%edx +- paddd %xmm2,%xmm6 +- addl %ecx,%ebx +- pxor %xmm5,%xmm3 +- addl 52(%esp),%eax +- xorl %edx,%ebp +- movl %ebx,%esi +- roll $5,%ebx +- movdqa %xmm3,%xmm5 +- movdqa %xmm6,32(%esp) +- addl %ebp,%eax +- xorl %edx,%esi +- rorl $7,%ecx +- addl %ebx,%eax +- pslld $2,%xmm3 +- addl 56(%esp),%edi +- xorl %ecx,%esi +- psrld $30,%xmm5 +- movl %eax,%ebp +- roll $5,%eax +- addl %esi,%edi +- xorl %ecx,%ebp +- rorl $7,%ebx +- addl %eax,%edi +- por %xmm5,%xmm3 +- addl 60(%esp),%edx +- xorl %ebx,%ebp +- movdqa 96(%esp),%xmm5 +- movl %edi,%esi +- roll $5,%edi +- addl %ebp,%edx +- xorl %ebx,%esi +- rorl $7,%eax +- pshufd $238,%xmm2,%xmm6 +- addl %edi,%edx +- addl (%esp),%ecx +- pxor %xmm0,%xmm4 +- punpcklqdq %xmm3,%xmm6 +- xorl %eax,%esi +- movl %edx,%ebp +- roll $5,%edx +- pxor %xmm5,%xmm4 +- movdqa %xmm0,96(%esp) +- addl %esi,%ecx +- xorl %eax,%ebp +- movdqa %xmm7,%xmm0 +- rorl $7,%edi +- paddd %xmm3,%xmm7 +- addl %edx,%ecx +- pxor %xmm6,%xmm4 +- addl 4(%esp),%ebx +- xorl %edi,%ebp +- movl %ecx,%esi +- roll $5,%ecx +- movdqa %xmm4,%xmm6 +- movdqa %xmm7,48(%esp) +- addl %ebp,%ebx +- xorl %edi,%esi +- rorl $7,%edx +- addl %ecx,%ebx +- pslld $2,%xmm4 +- addl 8(%esp),%eax +- xorl %edx,%esi +- psrld $30,%xmm6 +- movl %ebx,%ebp +- roll $5,%ebx +- addl %esi,%eax +- xorl %edx,%ebp +- rorl $7,%ecx +- addl %ebx,%eax +- por %xmm6,%xmm4 +- addl 12(%esp),%edi +- xorl %ecx,%ebp +- movdqa 64(%esp),%xmm6 +- movl %eax,%esi +- roll $5,%eax +- addl %ebp,%edi +- xorl %ecx,%esi +- rorl $7,%ebx +- pshufd $238,%xmm3,%xmm7 +- addl %eax,%edi +- addl 16(%esp),%edx +- pxor %xmm1,%xmm5 +- punpcklqdq %xmm4,%xmm7 +- xorl %ebx,%esi +- movl %edi,%ebp +- roll $5,%edi +- pxor %xmm6,%xmm5 +- movdqa %xmm1,64(%esp) +- addl %esi,%edx +- xorl %ebx,%ebp +- movdqa %xmm0,%xmm1 +- rorl $7,%eax +- paddd %xmm4,%xmm0 +- addl %edi,%edx +- pxor %xmm7,%xmm5 +- addl 20(%esp),%ecx +- xorl %eax,%ebp +- movl %edx,%esi +- roll $5,%edx +- movdqa %xmm5,%xmm7 +- movdqa %xmm0,(%esp) +- addl %ebp,%ecx +- xorl %eax,%esi +- rorl $7,%edi +- addl %edx,%ecx +- pslld $2,%xmm5 +- addl 24(%esp),%ebx +- xorl %edi,%esi +- psrld $30,%xmm7 +- movl %ecx,%ebp +- roll $5,%ecx +- addl %esi,%ebx +- xorl %edi,%ebp +- rorl $7,%edx +- addl %ecx,%ebx +- por %xmm7,%xmm5 +- addl 28(%esp),%eax +- movdqa 80(%esp),%xmm7 +- rorl $7,%ecx +- movl %ebx,%esi +- xorl %edx,%ebp +- roll $5,%ebx +- pshufd $238,%xmm4,%xmm0 +- addl %ebp,%eax +- xorl %ecx,%esi +- xorl %edx,%ecx +- addl %ebx,%eax +- addl 32(%esp),%edi +- pxor %xmm2,%xmm6 +- punpcklqdq %xmm5,%xmm0 +- andl %ecx,%esi +- xorl %edx,%ecx +- rorl $7,%ebx +- pxor %xmm7,%xmm6 +- movdqa %xmm2,80(%esp) +- movl %eax,%ebp +- xorl %ecx,%esi +- roll $5,%eax +- movdqa %xmm1,%xmm2 +- addl %esi,%edi +- paddd %xmm5,%xmm1 +- xorl %ebx,%ebp +- pxor %xmm0,%xmm6 +- xorl %ecx,%ebx +- addl %eax,%edi +- addl 36(%esp),%edx +- andl %ebx,%ebp +- movdqa %xmm6,%xmm0 +- movdqa %xmm1,16(%esp) +- xorl %ecx,%ebx +- rorl $7,%eax +- movl %edi,%esi +- xorl %ebx,%ebp +- roll $5,%edi +- pslld $2,%xmm6 +- addl %ebp,%edx +- xorl %eax,%esi +- psrld $30,%xmm0 +- xorl %ebx,%eax +- addl %edi,%edx +- addl 40(%esp),%ecx +- andl %eax,%esi +- xorl %ebx,%eax +- rorl $7,%edi +- por %xmm0,%xmm6 +- movl %edx,%ebp +- xorl %eax,%esi +- movdqa 96(%esp),%xmm0 +- roll $5,%edx +- addl %esi,%ecx +- xorl %edi,%ebp +- xorl %eax,%edi +- addl %edx,%ecx +- pshufd $238,%xmm5,%xmm1 +- addl 44(%esp),%ebx +- andl %edi,%ebp +- xorl %eax,%edi +- rorl $7,%edx +- movl %ecx,%esi +- xorl %edi,%ebp +- roll $5,%ecx +- addl %ebp,%ebx +- xorl %edx,%esi +- xorl %edi,%edx +- addl %ecx,%ebx +- addl 48(%esp),%eax +- pxor %xmm3,%xmm7 +- punpcklqdq %xmm6,%xmm1 +- andl %edx,%esi +- xorl %edi,%edx +- rorl $7,%ecx +- pxor %xmm0,%xmm7 +- movdqa %xmm3,96(%esp) +- movl %ebx,%ebp +- xorl %edx,%esi +- roll $5,%ebx +- movdqa 144(%esp),%xmm3 +- addl %esi,%eax +- paddd %xmm6,%xmm2 +- xorl %ecx,%ebp +- pxor %xmm1,%xmm7 +- xorl %edx,%ecx +- addl %ebx,%eax +- addl 52(%esp),%edi +- andl %ecx,%ebp +- movdqa %xmm7,%xmm1 +- movdqa %xmm2,32(%esp) +- xorl %edx,%ecx +- rorl $7,%ebx +- movl %eax,%esi +- xorl %ecx,%ebp +- roll $5,%eax +- pslld $2,%xmm7 +- addl %ebp,%edi +- xorl %ebx,%esi +- psrld $30,%xmm1 +- xorl %ecx,%ebx +- addl %eax,%edi +- addl 56(%esp),%edx +- andl %ebx,%esi +- xorl %ecx,%ebx +- rorl $7,%eax +- por %xmm1,%xmm7 +- movl %edi,%ebp +- xorl %ebx,%esi +- movdqa 64(%esp),%xmm1 +- roll $5,%edi +- addl %esi,%edx +- xorl %eax,%ebp +- xorl %ebx,%eax +- addl %edi,%edx +- pshufd $238,%xmm6,%xmm2 +- addl 60(%esp),%ecx +- andl %eax,%ebp +- xorl %ebx,%eax +- rorl $7,%edi +- movl %edx,%esi +- xorl %eax,%ebp +- roll $5,%edx +- addl %ebp,%ecx +- xorl %edi,%esi +- xorl %eax,%edi +- addl %edx,%ecx +- addl (%esp),%ebx +- pxor %xmm4,%xmm0 +- punpcklqdq %xmm7,%xmm2 +- andl %edi,%esi +- xorl %eax,%edi +- rorl $7,%edx +- pxor %xmm1,%xmm0 +- movdqa %xmm4,64(%esp) +- movl %ecx,%ebp +- xorl %edi,%esi +- roll $5,%ecx +- movdqa %xmm3,%xmm4 +- addl %esi,%ebx +- paddd %xmm7,%xmm3 +- xorl %edx,%ebp +- pxor %xmm2,%xmm0 +- xorl %edi,%edx +- addl %ecx,%ebx +- addl 4(%esp),%eax +- andl %edx,%ebp +- movdqa %xmm0,%xmm2 +- movdqa %xmm3,48(%esp) +- xorl %edi,%edx +- rorl $7,%ecx +- movl %ebx,%esi +- xorl %edx,%ebp +- roll $5,%ebx +- pslld $2,%xmm0 +- addl %ebp,%eax +- xorl %ecx,%esi +- psrld $30,%xmm2 +- xorl %edx,%ecx +- addl %ebx,%eax +- addl 8(%esp),%edi +- andl %ecx,%esi +- xorl %edx,%ecx +- rorl $7,%ebx +- por %xmm2,%xmm0 +- movl %eax,%ebp +- xorl %ecx,%esi +- movdqa 80(%esp),%xmm2 +- roll $5,%eax +- addl %esi,%edi +- xorl %ebx,%ebp +- xorl %ecx,%ebx +- addl %eax,%edi +- pshufd $238,%xmm7,%xmm3 +- addl 12(%esp),%edx +- andl %ebx,%ebp +- xorl %ecx,%ebx +- rorl $7,%eax +- movl %edi,%esi +- xorl %ebx,%ebp +- roll $5,%edi +- addl %ebp,%edx +- xorl %eax,%esi +- xorl %ebx,%eax +- addl %edi,%edx +- addl 16(%esp),%ecx +- pxor %xmm5,%xmm1 +- punpcklqdq %xmm0,%xmm3 +- andl %eax,%esi +- xorl %ebx,%eax +- rorl $7,%edi +- pxor %xmm2,%xmm1 +- movdqa %xmm5,80(%esp) +- movl %edx,%ebp +- xorl %eax,%esi +- roll $5,%edx +- movdqa %xmm4,%xmm5 +- addl %esi,%ecx +- paddd %xmm0,%xmm4 +- xorl %edi,%ebp +- pxor %xmm3,%xmm1 +- xorl %eax,%edi +- addl %edx,%ecx +- addl 20(%esp),%ebx +- andl %edi,%ebp +- movdqa %xmm1,%xmm3 +- movdqa %xmm4,(%esp) +- xorl %eax,%edi +- rorl $7,%edx +- movl %ecx,%esi +- xorl %edi,%ebp +- roll $5,%ecx +- pslld $2,%xmm1 +- addl %ebp,%ebx +- xorl %edx,%esi +- psrld $30,%xmm3 +- xorl %edi,%edx +- addl %ecx,%ebx +- addl 24(%esp),%eax +- andl %edx,%esi +- xorl %edi,%edx +- rorl $7,%ecx +- por %xmm3,%xmm1 +- movl %ebx,%ebp +- xorl %edx,%esi +- movdqa 96(%esp),%xmm3 +- roll $5,%ebx +- addl %esi,%eax +- xorl %ecx,%ebp +- xorl %edx,%ecx +- addl %ebx,%eax +- pshufd $238,%xmm0,%xmm4 +- addl 28(%esp),%edi +- andl %ecx,%ebp +- xorl %edx,%ecx +- rorl $7,%ebx +- movl %eax,%esi +- xorl %ecx,%ebp +- roll $5,%eax +- addl %ebp,%edi +- xorl %ebx,%esi +- xorl %ecx,%ebx +- addl %eax,%edi +- addl 32(%esp),%edx +- pxor %xmm6,%xmm2 +- punpcklqdq %xmm1,%xmm4 +- andl %ebx,%esi +- xorl %ecx,%ebx +- rorl $7,%eax +- pxor %xmm3,%xmm2 +- movdqa %xmm6,96(%esp) +- movl %edi,%ebp +- xorl %ebx,%esi +- roll $5,%edi +- movdqa %xmm5,%xmm6 +- addl %esi,%edx +- paddd %xmm1,%xmm5 +- xorl %eax,%ebp +- pxor %xmm4,%xmm2 +- xorl %ebx,%eax +- addl %edi,%edx +- addl 36(%esp),%ecx +- andl %eax,%ebp +- movdqa %xmm2,%xmm4 +- movdqa %xmm5,16(%esp) +- xorl %ebx,%eax +- rorl $7,%edi +- movl %edx,%esi +- xorl %eax,%ebp +- roll $5,%edx +- pslld $2,%xmm2 +- addl %ebp,%ecx +- xorl %edi,%esi +- psrld $30,%xmm4 +- xorl %eax,%edi +- addl %edx,%ecx +- addl 40(%esp),%ebx +- andl %edi,%esi +- xorl %eax,%edi +- rorl $7,%edx +- por %xmm4,%xmm2 +- movl %ecx,%ebp +- xorl %edi,%esi +- movdqa 64(%esp),%xmm4 +- roll $5,%ecx +- addl %esi,%ebx +- xorl %edx,%ebp +- xorl %edi,%edx +- addl %ecx,%ebx +- pshufd $238,%xmm1,%xmm5 +- addl 44(%esp),%eax +- andl %edx,%ebp +- xorl %edi,%edx +- rorl $7,%ecx +- movl %ebx,%esi +- xorl %edx,%ebp +- roll $5,%ebx +- addl %ebp,%eax +- xorl %edx,%esi +- addl %ebx,%eax +- addl 48(%esp),%edi +- pxor %xmm7,%xmm3 +- punpcklqdq %xmm2,%xmm5 +- xorl %ecx,%esi +- movl %eax,%ebp +- roll $5,%eax +- pxor %xmm4,%xmm3 +- movdqa %xmm7,64(%esp) +- addl %esi,%edi +- xorl %ecx,%ebp +- movdqa %xmm6,%xmm7 +- rorl $7,%ebx +- paddd %xmm2,%xmm6 +- addl %eax,%edi +- pxor %xmm5,%xmm3 +- addl 52(%esp),%edx +- xorl %ebx,%ebp +- movl %edi,%esi +- roll $5,%edi +- movdqa %xmm3,%xmm5 +- movdqa %xmm6,32(%esp) +- addl %ebp,%edx +- xorl %ebx,%esi +- rorl $7,%eax +- addl %edi,%edx +- pslld $2,%xmm3 +- addl 56(%esp),%ecx +- xorl %eax,%esi +- psrld $30,%xmm5 +- movl %edx,%ebp +- roll $5,%edx +- addl %esi,%ecx +- xorl %eax,%ebp +- rorl $7,%edi +- addl %edx,%ecx +- por %xmm5,%xmm3 +- addl 60(%esp),%ebx +- xorl %edi,%ebp +- movl %ecx,%esi +- roll $5,%ecx +- addl %ebp,%ebx +- xorl %edi,%esi +- rorl $7,%edx +- addl %ecx,%ebx +- addl (%esp),%eax +- xorl %edx,%esi +- movl %ebx,%ebp +- roll $5,%ebx +- addl %esi,%eax +- xorl %edx,%ebp +- rorl $7,%ecx +- paddd %xmm3,%xmm7 +- addl %ebx,%eax +- addl 4(%esp),%edi +- xorl %ecx,%ebp +- movl %eax,%esi +- movdqa %xmm7,48(%esp) +- roll $5,%eax +- addl %ebp,%edi +- xorl %ecx,%esi +- rorl $7,%ebx +- addl %eax,%edi +- addl 8(%esp),%edx +- xorl %ebx,%esi +- movl %edi,%ebp +- roll $5,%edi +- addl %esi,%edx +- xorl %ebx,%ebp +- rorl $7,%eax +- addl %edi,%edx +- addl 12(%esp),%ecx +- xorl %eax,%ebp +- movl %edx,%esi +- roll $5,%edx +- addl %ebp,%ecx +- xorl %eax,%esi +- rorl $7,%edi +- addl %edx,%ecx +- movl 196(%esp),%ebp +- cmpl 200(%esp),%ebp +- je L005done +- movdqa 160(%esp),%xmm7 +- movdqa 176(%esp),%xmm6 +- movdqu (%ebp),%xmm0 +- movdqu 16(%ebp),%xmm1 +- movdqu 32(%ebp),%xmm2 +- movdqu 48(%ebp),%xmm3 +- addl $64,%ebp +-.byte 102,15,56,0,198 +- movl %ebp,196(%esp) +- movdqa %xmm7,96(%esp) +- addl 16(%esp),%ebx +- xorl %edi,%esi +- movl %ecx,%ebp +- roll $5,%ecx +- addl %esi,%ebx +- xorl %edi,%ebp +- rorl $7,%edx +-.byte 102,15,56,0,206 +- addl %ecx,%ebx +- addl 20(%esp),%eax +- xorl %edx,%ebp +- movl %ebx,%esi +- paddd %xmm7,%xmm0 +- roll $5,%ebx +- addl %ebp,%eax +- xorl %edx,%esi +- rorl $7,%ecx +- movdqa %xmm0,(%esp) +- addl %ebx,%eax +- addl 24(%esp),%edi +- xorl %ecx,%esi +- movl %eax,%ebp +- psubd %xmm7,%xmm0 +- roll $5,%eax +- addl %esi,%edi +- xorl %ecx,%ebp +- rorl $7,%ebx +- addl %eax,%edi +- addl 28(%esp),%edx +- xorl %ebx,%ebp +- movl %edi,%esi +- roll $5,%edi +- addl %ebp,%edx +- xorl %ebx,%esi +- rorl $7,%eax +- addl %edi,%edx +- addl 32(%esp),%ecx +- xorl %eax,%esi +- movl %edx,%ebp +- roll $5,%edx +- addl %esi,%ecx +- xorl %eax,%ebp +- rorl $7,%edi +-.byte 102,15,56,0,214 +- addl %edx,%ecx +- addl 36(%esp),%ebx +- xorl %edi,%ebp +- movl %ecx,%esi +- paddd %xmm7,%xmm1 +- roll $5,%ecx +- addl %ebp,%ebx +- xorl %edi,%esi +- rorl $7,%edx +- movdqa %xmm1,16(%esp) +- addl %ecx,%ebx +- addl 40(%esp),%eax +- xorl %edx,%esi +- movl %ebx,%ebp +- psubd %xmm7,%xmm1 +- roll $5,%ebx +- addl %esi,%eax +- xorl %edx,%ebp +- rorl $7,%ecx +- addl %ebx,%eax +- addl 44(%esp),%edi +- xorl %ecx,%ebp +- movl %eax,%esi +- roll $5,%eax +- addl %ebp,%edi +- xorl %ecx,%esi +- rorl $7,%ebx +- addl %eax,%edi +- addl 48(%esp),%edx +- xorl %ebx,%esi +- movl %edi,%ebp +- roll $5,%edi +- addl %esi,%edx +- xorl %ebx,%ebp +- rorl $7,%eax +-.byte 102,15,56,0,222 +- addl %edi,%edx +- addl 52(%esp),%ecx +- xorl %eax,%ebp +- movl %edx,%esi +- paddd %xmm7,%xmm2 +- roll $5,%edx +- addl %ebp,%ecx +- xorl %eax,%esi +- rorl $7,%edi +- movdqa %xmm2,32(%esp) +- addl %edx,%ecx +- addl 56(%esp),%ebx +- xorl %edi,%esi +- movl %ecx,%ebp +- psubd %xmm7,%xmm2 +- roll $5,%ecx +- addl %esi,%ebx +- xorl %edi,%ebp +- rorl $7,%edx +- addl %ecx,%ebx +- addl 60(%esp),%eax +- xorl %edx,%ebp +- movl %ebx,%esi +- roll $5,%ebx +- addl %ebp,%eax +- rorl $7,%ecx +- addl %ebx,%eax +- movl 192(%esp),%ebp +- addl (%ebp),%eax +- addl 4(%ebp),%esi +- addl 8(%ebp),%ecx +- movl %eax,(%ebp) +- addl 12(%ebp),%edx +- movl %esi,4(%ebp) +- addl 16(%ebp),%edi +- movl %ecx,8(%ebp) +- movl %ecx,%ebx +- movl %edx,12(%ebp) +- xorl %edx,%ebx +- movl %edi,16(%ebp) +- movl %esi,%ebp +- pshufd $238,%xmm0,%xmm4 +- andl %ebx,%esi +- movl %ebp,%ebx +- jmp L004loop +-.align 4,0x90 +-L005done: +- addl 16(%esp),%ebx +- xorl %edi,%esi +- movl %ecx,%ebp +- roll $5,%ecx +- addl %esi,%ebx +- xorl %edi,%ebp +- rorl $7,%edx +- addl %ecx,%ebx +- addl 20(%esp),%eax +- xorl %edx,%ebp +- movl %ebx,%esi +- roll $5,%ebx +- addl %ebp,%eax +- xorl %edx,%esi +- rorl $7,%ecx +- addl %ebx,%eax +- addl 24(%esp),%edi +- xorl %ecx,%esi +- movl %eax,%ebp +- roll $5,%eax +- addl %esi,%edi +- xorl %ecx,%ebp +- rorl $7,%ebx +- addl %eax,%edi +- addl 28(%esp),%edx +- xorl %ebx,%ebp +- movl %edi,%esi +- roll $5,%edi +- addl %ebp,%edx +- xorl %ebx,%esi +- rorl $7,%eax +- addl %edi,%edx +- addl 32(%esp),%ecx +- xorl %eax,%esi +- movl %edx,%ebp +- roll $5,%edx +- addl %esi,%ecx +- xorl %eax,%ebp +- rorl $7,%edi +- addl %edx,%ecx +- addl 36(%esp),%ebx +- xorl %edi,%ebp +- movl %ecx,%esi +- roll $5,%ecx +- addl %ebp,%ebx +- xorl %edi,%esi +- rorl $7,%edx +- addl %ecx,%ebx +- addl 40(%esp),%eax +- xorl %edx,%esi +- movl %ebx,%ebp +- roll $5,%ebx +- addl %esi,%eax +- xorl %edx,%ebp +- rorl $7,%ecx +- addl %ebx,%eax +- addl 44(%esp),%edi +- xorl %ecx,%ebp +- movl %eax,%esi +- roll $5,%eax +- addl %ebp,%edi +- xorl %ecx,%esi +- rorl $7,%ebx +- addl %eax,%edi +- addl 48(%esp),%edx +- xorl %ebx,%esi +- movl %edi,%ebp +- roll $5,%edi +- addl %esi,%edx +- xorl %ebx,%ebp +- rorl $7,%eax +- addl %edi,%edx +- addl 52(%esp),%ecx +- xorl %eax,%ebp +- movl %edx,%esi +- roll $5,%edx +- addl %ebp,%ecx +- xorl %eax,%esi +- rorl $7,%edi +- addl %edx,%ecx +- addl 56(%esp),%ebx +- xorl %edi,%esi +- movl %ecx,%ebp +- roll $5,%ecx +- addl %esi,%ebx +- xorl %edi,%ebp +- rorl $7,%edx +- addl %ecx,%ebx +- addl 60(%esp),%eax +- xorl %edx,%ebp +- movl %ebx,%esi +- roll $5,%ebx +- addl %ebp,%eax +- rorl $7,%ecx +- addl %ebx,%eax +- movl 192(%esp),%ebp +- addl (%ebp),%eax +- movl 204(%esp),%esp +- addl 4(%ebp),%esi +- addl 8(%ebp),%ecx +- movl %eax,(%ebp) +- addl 12(%ebp),%edx +- movl %esi,4(%ebp) +- addl 16(%ebp),%edi +- movl %ecx,8(%ebp) +- movl %edx,12(%ebp) +- movl %edi,16(%ebp) +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.private_extern __sha1_block_data_order_avx +-.align 4 +-__sha1_block_data_order_avx: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- call L006pic_point +-L006pic_point: +- popl %ebp +- leal LK_XX_XX-L006pic_point(%ebp),%ebp +-Lavx_shortcut: +- vzeroall +- vmovdqa (%ebp),%xmm7 +- vmovdqa 16(%ebp),%xmm0 +- vmovdqa 32(%ebp),%xmm1 +- vmovdqa 48(%ebp),%xmm2 +- vmovdqa 64(%ebp),%xmm6 +- movl 20(%esp),%edi +- movl 24(%esp),%ebp +- movl 28(%esp),%edx +- movl %esp,%esi +- subl $208,%esp +- andl $-64,%esp +- vmovdqa %xmm0,112(%esp) +- vmovdqa %xmm1,128(%esp) +- vmovdqa %xmm2,144(%esp) +- shll $6,%edx +- vmovdqa %xmm7,160(%esp) +- addl %ebp,%edx +- vmovdqa %xmm6,176(%esp) +- addl $64,%ebp +- movl %edi,192(%esp) +- movl %ebp,196(%esp) +- movl %edx,200(%esp) +- movl %esi,204(%esp) +- movl (%edi),%eax +- movl 4(%edi),%ebx +- movl 8(%edi),%ecx +- movl 12(%edi),%edx +- movl 16(%edi),%edi +- movl %ebx,%esi +- vmovdqu -64(%ebp),%xmm0 +- vmovdqu -48(%ebp),%xmm1 +- vmovdqu -32(%ebp),%xmm2 +- vmovdqu -16(%ebp),%xmm3 +- vpshufb %xmm6,%xmm0,%xmm0 +- vpshufb %xmm6,%xmm1,%xmm1 +- vpshufb %xmm6,%xmm2,%xmm2 +- vmovdqa %xmm7,96(%esp) +- vpshufb %xmm6,%xmm3,%xmm3 +- vpaddd %xmm7,%xmm0,%xmm4 +- vpaddd %xmm7,%xmm1,%xmm5 +- vpaddd %xmm7,%xmm2,%xmm6 +- vmovdqa %xmm4,(%esp) +- movl %ecx,%ebp +- vmovdqa %xmm5,16(%esp) +- xorl %edx,%ebp +- vmovdqa %xmm6,32(%esp) +- andl %ebp,%esi +- jmp L007loop +-.align 4,0x90 +-L007loop: +- shrdl $2,%ebx,%ebx +- xorl %edx,%esi +- vpalignr $8,%xmm0,%xmm1,%xmm4 +- movl %eax,%ebp +- addl (%esp),%edi +- vpaddd %xmm3,%xmm7,%xmm7 +- vmovdqa %xmm0,64(%esp) +- xorl %ecx,%ebx +- shldl $5,%eax,%eax +- vpsrldq $4,%xmm3,%xmm6 +- addl %esi,%edi +- andl %ebx,%ebp +- vpxor %xmm0,%xmm4,%xmm4 +- xorl %ecx,%ebx +- addl %eax,%edi +- vpxor %xmm2,%xmm6,%xmm6 +- shrdl $7,%eax,%eax +- xorl %ecx,%ebp +- vmovdqa %xmm7,48(%esp) +- movl %edi,%esi +- addl 4(%esp),%edx +- vpxor %xmm6,%xmm4,%xmm4 +- xorl %ebx,%eax +- shldl $5,%edi,%edi +- addl %ebp,%edx +- andl %eax,%esi +- vpsrld $31,%xmm4,%xmm6 +- xorl %ebx,%eax +- addl %edi,%edx +- shrdl $7,%edi,%edi +- xorl %ebx,%esi +- vpslldq $12,%xmm4,%xmm0 +- vpaddd %xmm4,%xmm4,%xmm4 +- movl %edx,%ebp +- addl 8(%esp),%ecx +- xorl %eax,%edi +- shldl $5,%edx,%edx +- vpsrld $30,%xmm0,%xmm7 +- vpor %xmm6,%xmm4,%xmm4 +- addl %esi,%ecx +- andl %edi,%ebp +- xorl %eax,%edi +- addl %edx,%ecx +- vpslld $2,%xmm0,%xmm0 +- shrdl $7,%edx,%edx +- xorl %eax,%ebp +- vpxor %xmm7,%xmm4,%xmm4 +- movl %ecx,%esi +- addl 12(%esp),%ebx +- xorl %edi,%edx +- shldl $5,%ecx,%ecx +- vpxor %xmm0,%xmm4,%xmm4 +- addl %ebp,%ebx +- andl %edx,%esi +- vmovdqa 96(%esp),%xmm0 +- xorl %edi,%edx +- addl %ecx,%ebx +- shrdl $7,%ecx,%ecx +- xorl %edi,%esi +- vpalignr $8,%xmm1,%xmm2,%xmm5 +- movl %ebx,%ebp +- addl 16(%esp),%eax +- vpaddd %xmm4,%xmm0,%xmm0 +- vmovdqa %xmm1,80(%esp) +- xorl %edx,%ecx +- shldl $5,%ebx,%ebx +- vpsrldq $4,%xmm4,%xmm7 +- addl %esi,%eax +- andl %ecx,%ebp +- vpxor %xmm1,%xmm5,%xmm5 +- xorl %edx,%ecx +- addl %ebx,%eax +- vpxor %xmm3,%xmm7,%xmm7 +- shrdl $7,%ebx,%ebx +- xorl %edx,%ebp +- vmovdqa %xmm0,(%esp) +- movl %eax,%esi +- addl 20(%esp),%edi +- vpxor %xmm7,%xmm5,%xmm5 +- xorl %ecx,%ebx +- shldl $5,%eax,%eax +- addl %ebp,%edi +- andl %ebx,%esi +- vpsrld $31,%xmm5,%xmm7 +- xorl %ecx,%ebx +- addl %eax,%edi +- shrdl $7,%eax,%eax +- xorl %ecx,%esi +- vpslldq $12,%xmm5,%xmm1 +- vpaddd %xmm5,%xmm5,%xmm5 +- movl %edi,%ebp +- addl 24(%esp),%edx +- xorl %ebx,%eax +- shldl $5,%edi,%edi +- vpsrld $30,%xmm1,%xmm0 +- vpor %xmm7,%xmm5,%xmm5 +- addl %esi,%edx +- andl %eax,%ebp +- xorl %ebx,%eax +- addl %edi,%edx +- vpslld $2,%xmm1,%xmm1 +- shrdl $7,%edi,%edi +- xorl %ebx,%ebp +- vpxor %xmm0,%xmm5,%xmm5 +- movl %edx,%esi +- addl 28(%esp),%ecx +- xorl %eax,%edi +- shldl $5,%edx,%edx +- vpxor %xmm1,%xmm5,%xmm5 +- addl %ebp,%ecx +- andl %edi,%esi +- vmovdqa 112(%esp),%xmm1 +- xorl %eax,%edi +- addl %edx,%ecx +- shrdl $7,%edx,%edx +- xorl %eax,%esi +- vpalignr $8,%xmm2,%xmm3,%xmm6 +- movl %ecx,%ebp +- addl 32(%esp),%ebx +- vpaddd %xmm5,%xmm1,%xmm1 +- vmovdqa %xmm2,96(%esp) +- xorl %edi,%edx +- shldl $5,%ecx,%ecx +- vpsrldq $4,%xmm5,%xmm0 +- addl %esi,%ebx +- andl %edx,%ebp +- vpxor %xmm2,%xmm6,%xmm6 +- xorl %edi,%edx +- addl %ecx,%ebx +- vpxor %xmm4,%xmm0,%xmm0 +- shrdl $7,%ecx,%ecx +- xorl %edi,%ebp +- vmovdqa %xmm1,16(%esp) +- movl %ebx,%esi +- addl 36(%esp),%eax +- vpxor %xmm0,%xmm6,%xmm6 +- xorl %edx,%ecx +- shldl $5,%ebx,%ebx +- addl %ebp,%eax +- andl %ecx,%esi +- vpsrld $31,%xmm6,%xmm0 +- xorl %edx,%ecx +- addl %ebx,%eax +- shrdl $7,%ebx,%ebx +- xorl %edx,%esi +- vpslldq $12,%xmm6,%xmm2 +- vpaddd %xmm6,%xmm6,%xmm6 +- movl %eax,%ebp +- addl 40(%esp),%edi +- xorl %ecx,%ebx +- shldl $5,%eax,%eax +- vpsrld $30,%xmm2,%xmm1 +- vpor %xmm0,%xmm6,%xmm6 +- addl %esi,%edi +- andl %ebx,%ebp +- xorl %ecx,%ebx +- addl %eax,%edi +- vpslld $2,%xmm2,%xmm2 +- vmovdqa 64(%esp),%xmm0 +- shrdl $7,%eax,%eax +- xorl %ecx,%ebp +- vpxor %xmm1,%xmm6,%xmm6 +- movl %edi,%esi +- addl 44(%esp),%edx +- xorl %ebx,%eax +- shldl $5,%edi,%edi +- vpxor %xmm2,%xmm6,%xmm6 +- addl %ebp,%edx +- andl %eax,%esi +- vmovdqa 112(%esp),%xmm2 +- xorl %ebx,%eax +- addl %edi,%edx +- shrdl $7,%edi,%edi +- xorl %ebx,%esi +- vpalignr $8,%xmm3,%xmm4,%xmm7 +- movl %edx,%ebp +- addl 48(%esp),%ecx +- vpaddd %xmm6,%xmm2,%xmm2 +- vmovdqa %xmm3,64(%esp) +- xorl %eax,%edi +- shldl $5,%edx,%edx +- vpsrldq $4,%xmm6,%xmm1 +- addl %esi,%ecx +- andl %edi,%ebp +- vpxor %xmm3,%xmm7,%xmm7 +- xorl %eax,%edi +- addl %edx,%ecx +- vpxor %xmm5,%xmm1,%xmm1 +- shrdl $7,%edx,%edx +- xorl %eax,%ebp +- vmovdqa %xmm2,32(%esp) +- movl %ecx,%esi +- addl 52(%esp),%ebx +- vpxor %xmm1,%xmm7,%xmm7 +- xorl %edi,%edx +- shldl $5,%ecx,%ecx +- addl %ebp,%ebx +- andl %edx,%esi +- vpsrld $31,%xmm7,%xmm1 +- xorl %edi,%edx +- addl %ecx,%ebx +- shrdl $7,%ecx,%ecx +- xorl %edi,%esi +- vpslldq $12,%xmm7,%xmm3 +- vpaddd %xmm7,%xmm7,%xmm7 +- movl %ebx,%ebp +- addl 56(%esp),%eax +- xorl %edx,%ecx +- shldl $5,%ebx,%ebx +- vpsrld $30,%xmm3,%xmm2 +- vpor %xmm1,%xmm7,%xmm7 +- addl %esi,%eax +- andl %ecx,%ebp +- xorl %edx,%ecx +- addl %ebx,%eax +- vpslld $2,%xmm3,%xmm3 +- vmovdqa 80(%esp),%xmm1 +- shrdl $7,%ebx,%ebx +- xorl %edx,%ebp +- vpxor %xmm2,%xmm7,%xmm7 +- movl %eax,%esi +- addl 60(%esp),%edi +- xorl %ecx,%ebx +- shldl $5,%eax,%eax +- vpxor %xmm3,%xmm7,%xmm7 +- addl %ebp,%edi +- andl %ebx,%esi +- vmovdqa 112(%esp),%xmm3 +- xorl %ecx,%ebx +- addl %eax,%edi +- vpalignr $8,%xmm6,%xmm7,%xmm2 +- vpxor %xmm4,%xmm0,%xmm0 +- shrdl $7,%eax,%eax +- xorl %ecx,%esi +- movl %edi,%ebp +- addl (%esp),%edx +- vpxor %xmm1,%xmm0,%xmm0 +- vmovdqa %xmm4,80(%esp) +- xorl %ebx,%eax +- shldl $5,%edi,%edi +- vmovdqa %xmm3,%xmm4 +- vpaddd %xmm7,%xmm3,%xmm3 +- addl %esi,%edx +- andl %eax,%ebp +- vpxor %xmm2,%xmm0,%xmm0 +- xorl %ebx,%eax +- addl %edi,%edx +- shrdl $7,%edi,%edi +- xorl %ebx,%ebp +- vpsrld $30,%xmm0,%xmm2 +- vmovdqa %xmm3,48(%esp) +- movl %edx,%esi +- addl 4(%esp),%ecx +- xorl %eax,%edi +- shldl $5,%edx,%edx +- vpslld $2,%xmm0,%xmm0 +- addl %ebp,%ecx +- andl %edi,%esi +- xorl %eax,%edi +- addl %edx,%ecx +- shrdl $7,%edx,%edx +- xorl %eax,%esi +- movl %ecx,%ebp +- addl 8(%esp),%ebx +- vpor %xmm2,%xmm0,%xmm0 +- xorl %edi,%edx +- shldl $5,%ecx,%ecx +- vmovdqa 96(%esp),%xmm2 +- addl %esi,%ebx +- andl %edx,%ebp +- xorl %edi,%edx +- addl %ecx,%ebx +- addl 12(%esp),%eax +- xorl %edi,%ebp +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- addl %ebp,%eax +- xorl %edx,%esi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- vpalignr $8,%xmm7,%xmm0,%xmm3 +- vpxor %xmm5,%xmm1,%xmm1 +- addl 16(%esp),%edi +- xorl %ecx,%esi +- movl %eax,%ebp +- shldl $5,%eax,%eax +- vpxor %xmm2,%xmm1,%xmm1 +- vmovdqa %xmm5,96(%esp) +- addl %esi,%edi +- xorl %ecx,%ebp +- vmovdqa %xmm4,%xmm5 +- vpaddd %xmm0,%xmm4,%xmm4 +- shrdl $7,%ebx,%ebx +- addl %eax,%edi +- vpxor %xmm3,%xmm1,%xmm1 +- addl 20(%esp),%edx +- xorl %ebx,%ebp +- movl %edi,%esi +- shldl $5,%edi,%edi +- vpsrld $30,%xmm1,%xmm3 +- vmovdqa %xmm4,(%esp) +- addl %ebp,%edx +- xorl %ebx,%esi +- shrdl $7,%eax,%eax +- addl %edi,%edx +- vpslld $2,%xmm1,%xmm1 +- addl 24(%esp),%ecx +- xorl %eax,%esi +- movl %edx,%ebp +- shldl $5,%edx,%edx +- addl %esi,%ecx +- xorl %eax,%ebp +- shrdl $7,%edi,%edi +- addl %edx,%ecx +- vpor %xmm3,%xmm1,%xmm1 +- addl 28(%esp),%ebx +- xorl %edi,%ebp +- vmovdqa 64(%esp),%xmm3 +- movl %ecx,%esi +- shldl $5,%ecx,%ecx +- addl %ebp,%ebx +- xorl %edi,%esi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- vpalignr $8,%xmm0,%xmm1,%xmm4 +- vpxor %xmm6,%xmm2,%xmm2 +- addl 32(%esp),%eax +- xorl %edx,%esi +- movl %ebx,%ebp +- shldl $5,%ebx,%ebx +- vpxor %xmm3,%xmm2,%xmm2 +- vmovdqa %xmm6,64(%esp) +- addl %esi,%eax +- xorl %edx,%ebp +- vmovdqa 128(%esp),%xmm6 +- vpaddd %xmm1,%xmm5,%xmm5 +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- vpxor %xmm4,%xmm2,%xmm2 +- addl 36(%esp),%edi +- xorl %ecx,%ebp +- movl %eax,%esi +- shldl $5,%eax,%eax +- vpsrld $30,%xmm2,%xmm4 +- vmovdqa %xmm5,16(%esp) +- addl %ebp,%edi +- xorl %ecx,%esi +- shrdl $7,%ebx,%ebx +- addl %eax,%edi +- vpslld $2,%xmm2,%xmm2 +- addl 40(%esp),%edx +- xorl %ebx,%esi +- movl %edi,%ebp +- shldl $5,%edi,%edi +- addl %esi,%edx +- xorl %ebx,%ebp +- shrdl $7,%eax,%eax +- addl %edi,%edx +- vpor %xmm4,%xmm2,%xmm2 +- addl 44(%esp),%ecx +- xorl %eax,%ebp +- vmovdqa 80(%esp),%xmm4 +- movl %edx,%esi +- shldl $5,%edx,%edx +- addl %ebp,%ecx +- xorl %eax,%esi +- shrdl $7,%edi,%edi +- addl %edx,%ecx +- vpalignr $8,%xmm1,%xmm2,%xmm5 +- vpxor %xmm7,%xmm3,%xmm3 +- addl 48(%esp),%ebx +- xorl %edi,%esi +- movl %ecx,%ebp +- shldl $5,%ecx,%ecx +- vpxor %xmm4,%xmm3,%xmm3 +- vmovdqa %xmm7,80(%esp) +- addl %esi,%ebx +- xorl %edi,%ebp +- vmovdqa %xmm6,%xmm7 +- vpaddd %xmm2,%xmm6,%xmm6 +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- vpxor %xmm5,%xmm3,%xmm3 +- addl 52(%esp),%eax +- xorl %edx,%ebp +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- vpsrld $30,%xmm3,%xmm5 +- vmovdqa %xmm6,32(%esp) +- addl %ebp,%eax +- xorl %edx,%esi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- vpslld $2,%xmm3,%xmm3 +- addl 56(%esp),%edi +- xorl %ecx,%esi +- movl %eax,%ebp +- shldl $5,%eax,%eax +- addl %esi,%edi +- xorl %ecx,%ebp +- shrdl $7,%ebx,%ebx +- addl %eax,%edi +- vpor %xmm5,%xmm3,%xmm3 +- addl 60(%esp),%edx +- xorl %ebx,%ebp +- vmovdqa 96(%esp),%xmm5 +- movl %edi,%esi +- shldl $5,%edi,%edi +- addl %ebp,%edx +- xorl %ebx,%esi +- shrdl $7,%eax,%eax +- addl %edi,%edx +- vpalignr $8,%xmm2,%xmm3,%xmm6 +- vpxor %xmm0,%xmm4,%xmm4 +- addl (%esp),%ecx +- xorl %eax,%esi +- movl %edx,%ebp +- shldl $5,%edx,%edx +- vpxor %xmm5,%xmm4,%xmm4 +- vmovdqa %xmm0,96(%esp) +- addl %esi,%ecx +- xorl %eax,%ebp +- vmovdqa %xmm7,%xmm0 +- vpaddd %xmm3,%xmm7,%xmm7 +- shrdl $7,%edi,%edi +- addl %edx,%ecx +- vpxor %xmm6,%xmm4,%xmm4 +- addl 4(%esp),%ebx +- xorl %edi,%ebp +- movl %ecx,%esi +- shldl $5,%ecx,%ecx +- vpsrld $30,%xmm4,%xmm6 +- vmovdqa %xmm7,48(%esp) +- addl %ebp,%ebx +- xorl %edi,%esi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- vpslld $2,%xmm4,%xmm4 +- addl 8(%esp),%eax +- xorl %edx,%esi +- movl %ebx,%ebp +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- xorl %edx,%ebp +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- vpor %xmm6,%xmm4,%xmm4 +- addl 12(%esp),%edi +- xorl %ecx,%ebp +- vmovdqa 64(%esp),%xmm6 +- movl %eax,%esi +- shldl $5,%eax,%eax +- addl %ebp,%edi +- xorl %ecx,%esi +- shrdl $7,%ebx,%ebx +- addl %eax,%edi +- vpalignr $8,%xmm3,%xmm4,%xmm7 +- vpxor %xmm1,%xmm5,%xmm5 +- addl 16(%esp),%edx +- xorl %ebx,%esi +- movl %edi,%ebp +- shldl $5,%edi,%edi +- vpxor %xmm6,%xmm5,%xmm5 +- vmovdqa %xmm1,64(%esp) +- addl %esi,%edx +- xorl %ebx,%ebp +- vmovdqa %xmm0,%xmm1 +- vpaddd %xmm4,%xmm0,%xmm0 +- shrdl $7,%eax,%eax +- addl %edi,%edx +- vpxor %xmm7,%xmm5,%xmm5 +- addl 20(%esp),%ecx +- xorl %eax,%ebp +- movl %edx,%esi +- shldl $5,%edx,%edx +- vpsrld $30,%xmm5,%xmm7 +- vmovdqa %xmm0,(%esp) +- addl %ebp,%ecx +- xorl %eax,%esi +- shrdl $7,%edi,%edi +- addl %edx,%ecx +- vpslld $2,%xmm5,%xmm5 +- addl 24(%esp),%ebx +- xorl %edi,%esi +- movl %ecx,%ebp +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- xorl %edi,%ebp +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- vpor %xmm7,%xmm5,%xmm5 +- addl 28(%esp),%eax +- vmovdqa 80(%esp),%xmm7 +- shrdl $7,%ecx,%ecx +- movl %ebx,%esi +- xorl %edx,%ebp +- shldl $5,%ebx,%ebx +- addl %ebp,%eax +- xorl %ecx,%esi +- xorl %edx,%ecx +- addl %ebx,%eax +- vpalignr $8,%xmm4,%xmm5,%xmm0 +- vpxor %xmm2,%xmm6,%xmm6 +- addl 32(%esp),%edi +- andl %ecx,%esi +- xorl %edx,%ecx +- shrdl $7,%ebx,%ebx +- vpxor %xmm7,%xmm6,%xmm6 +- vmovdqa %xmm2,80(%esp) +- movl %eax,%ebp +- xorl %ecx,%esi +- vmovdqa %xmm1,%xmm2 +- vpaddd %xmm5,%xmm1,%xmm1 +- shldl $5,%eax,%eax +- addl %esi,%edi +- vpxor %xmm0,%xmm6,%xmm6 +- xorl %ebx,%ebp +- xorl %ecx,%ebx +- addl %eax,%edi +- addl 36(%esp),%edx +- vpsrld $30,%xmm6,%xmm0 +- vmovdqa %xmm1,16(%esp) +- andl %ebx,%ebp +- xorl %ecx,%ebx +- shrdl $7,%eax,%eax +- movl %edi,%esi +- vpslld $2,%xmm6,%xmm6 +- xorl %ebx,%ebp +- shldl $5,%edi,%edi +- addl %ebp,%edx +- xorl %eax,%esi +- xorl %ebx,%eax +- addl %edi,%edx +- addl 40(%esp),%ecx +- andl %eax,%esi +- vpor %xmm0,%xmm6,%xmm6 +- xorl %ebx,%eax +- shrdl $7,%edi,%edi +- vmovdqa 96(%esp),%xmm0 +- movl %edx,%ebp +- xorl %eax,%esi +- shldl $5,%edx,%edx +- addl %esi,%ecx +- xorl %edi,%ebp +- xorl %eax,%edi +- addl %edx,%ecx +- addl 44(%esp),%ebx +- andl %edi,%ebp +- xorl %eax,%edi +- shrdl $7,%edx,%edx +- movl %ecx,%esi +- xorl %edi,%ebp +- shldl $5,%ecx,%ecx +- addl %ebp,%ebx +- xorl %edx,%esi +- xorl %edi,%edx +- addl %ecx,%ebx +- vpalignr $8,%xmm5,%xmm6,%xmm1 +- vpxor %xmm3,%xmm7,%xmm7 +- addl 48(%esp),%eax +- andl %edx,%esi +- xorl %edi,%edx +- shrdl $7,%ecx,%ecx +- vpxor %xmm0,%xmm7,%xmm7 +- vmovdqa %xmm3,96(%esp) +- movl %ebx,%ebp +- xorl %edx,%esi +- vmovdqa 144(%esp),%xmm3 +- vpaddd %xmm6,%xmm2,%xmm2 +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- vpxor %xmm1,%xmm7,%xmm7 +- xorl %ecx,%ebp +- xorl %edx,%ecx +- addl %ebx,%eax +- addl 52(%esp),%edi +- vpsrld $30,%xmm7,%xmm1 +- vmovdqa %xmm2,32(%esp) +- andl %ecx,%ebp +- xorl %edx,%ecx +- shrdl $7,%ebx,%ebx +- movl %eax,%esi +- vpslld $2,%xmm7,%xmm7 +- xorl %ecx,%ebp +- shldl $5,%eax,%eax +- addl %ebp,%edi +- xorl %ebx,%esi +- xorl %ecx,%ebx +- addl %eax,%edi +- addl 56(%esp),%edx +- andl %ebx,%esi +- vpor %xmm1,%xmm7,%xmm7 +- xorl %ecx,%ebx +- shrdl $7,%eax,%eax +- vmovdqa 64(%esp),%xmm1 +- movl %edi,%ebp +- xorl %ebx,%esi +- shldl $5,%edi,%edi +- addl %esi,%edx +- xorl %eax,%ebp +- xorl %ebx,%eax +- addl %edi,%edx +- addl 60(%esp),%ecx +- andl %eax,%ebp +- xorl %ebx,%eax +- shrdl $7,%edi,%edi +- movl %edx,%esi +- xorl %eax,%ebp +- shldl $5,%edx,%edx +- addl %ebp,%ecx +- xorl %edi,%esi +- xorl %eax,%edi +- addl %edx,%ecx +- vpalignr $8,%xmm6,%xmm7,%xmm2 +- vpxor %xmm4,%xmm0,%xmm0 +- addl (%esp),%ebx +- andl %edi,%esi +- xorl %eax,%edi +- shrdl $7,%edx,%edx +- vpxor %xmm1,%xmm0,%xmm0 +- vmovdqa %xmm4,64(%esp) +- movl %ecx,%ebp +- xorl %edi,%esi +- vmovdqa %xmm3,%xmm4 +- vpaddd %xmm7,%xmm3,%xmm3 +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- vpxor %xmm2,%xmm0,%xmm0 +- xorl %edx,%ebp +- xorl %edi,%edx +- addl %ecx,%ebx +- addl 4(%esp),%eax +- vpsrld $30,%xmm0,%xmm2 +- vmovdqa %xmm3,48(%esp) +- andl %edx,%ebp +- xorl %edi,%edx +- shrdl $7,%ecx,%ecx +- movl %ebx,%esi +- vpslld $2,%xmm0,%xmm0 +- xorl %edx,%ebp +- shldl $5,%ebx,%ebx +- addl %ebp,%eax +- xorl %ecx,%esi +- xorl %edx,%ecx +- addl %ebx,%eax +- addl 8(%esp),%edi +- andl %ecx,%esi +- vpor %xmm2,%xmm0,%xmm0 +- xorl %edx,%ecx +- shrdl $7,%ebx,%ebx +- vmovdqa 80(%esp),%xmm2 +- movl %eax,%ebp +- xorl %ecx,%esi +- shldl $5,%eax,%eax +- addl %esi,%edi +- xorl %ebx,%ebp +- xorl %ecx,%ebx +- addl %eax,%edi +- addl 12(%esp),%edx +- andl %ebx,%ebp +- xorl %ecx,%ebx +- shrdl $7,%eax,%eax +- movl %edi,%esi +- xorl %ebx,%ebp +- shldl $5,%edi,%edi +- addl %ebp,%edx +- xorl %eax,%esi +- xorl %ebx,%eax +- addl %edi,%edx +- vpalignr $8,%xmm7,%xmm0,%xmm3 +- vpxor %xmm5,%xmm1,%xmm1 +- addl 16(%esp),%ecx +- andl %eax,%esi +- xorl %ebx,%eax +- shrdl $7,%edi,%edi +- vpxor %xmm2,%xmm1,%xmm1 +- vmovdqa %xmm5,80(%esp) +- movl %edx,%ebp +- xorl %eax,%esi +- vmovdqa %xmm4,%xmm5 +- vpaddd %xmm0,%xmm4,%xmm4 +- shldl $5,%edx,%edx +- addl %esi,%ecx +- vpxor %xmm3,%xmm1,%xmm1 +- xorl %edi,%ebp +- xorl %eax,%edi +- addl %edx,%ecx +- addl 20(%esp),%ebx +- vpsrld $30,%xmm1,%xmm3 +- vmovdqa %xmm4,(%esp) +- andl %edi,%ebp +- xorl %eax,%edi +- shrdl $7,%edx,%edx +- movl %ecx,%esi +- vpslld $2,%xmm1,%xmm1 +- xorl %edi,%ebp +- shldl $5,%ecx,%ecx +- addl %ebp,%ebx +- xorl %edx,%esi +- xorl %edi,%edx +- addl %ecx,%ebx +- addl 24(%esp),%eax +- andl %edx,%esi +- vpor %xmm3,%xmm1,%xmm1 +- xorl %edi,%edx +- shrdl $7,%ecx,%ecx +- vmovdqa 96(%esp),%xmm3 +- movl %ebx,%ebp +- xorl %edx,%esi +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- xorl %ecx,%ebp +- xorl %edx,%ecx +- addl %ebx,%eax +- addl 28(%esp),%edi +- andl %ecx,%ebp +- xorl %edx,%ecx +- shrdl $7,%ebx,%ebx +- movl %eax,%esi +- xorl %ecx,%ebp +- shldl $5,%eax,%eax +- addl %ebp,%edi +- xorl %ebx,%esi +- xorl %ecx,%ebx +- addl %eax,%edi +- vpalignr $8,%xmm0,%xmm1,%xmm4 +- vpxor %xmm6,%xmm2,%xmm2 +- addl 32(%esp),%edx +- andl %ebx,%esi +- xorl %ecx,%ebx +- shrdl $7,%eax,%eax +- vpxor %xmm3,%xmm2,%xmm2 +- vmovdqa %xmm6,96(%esp) +- movl %edi,%ebp +- xorl %ebx,%esi +- vmovdqa %xmm5,%xmm6 +- vpaddd %xmm1,%xmm5,%xmm5 +- shldl $5,%edi,%edi +- addl %esi,%edx +- vpxor %xmm4,%xmm2,%xmm2 +- xorl %eax,%ebp +- xorl %ebx,%eax +- addl %edi,%edx +- addl 36(%esp),%ecx +- vpsrld $30,%xmm2,%xmm4 +- vmovdqa %xmm5,16(%esp) +- andl %eax,%ebp +- xorl %ebx,%eax +- shrdl $7,%edi,%edi +- movl %edx,%esi +- vpslld $2,%xmm2,%xmm2 +- xorl %eax,%ebp +- shldl $5,%edx,%edx +- addl %ebp,%ecx +- xorl %edi,%esi +- xorl %eax,%edi +- addl %edx,%ecx +- addl 40(%esp),%ebx +- andl %edi,%esi +- vpor %xmm4,%xmm2,%xmm2 +- xorl %eax,%edi +- shrdl $7,%edx,%edx +- vmovdqa 64(%esp),%xmm4 +- movl %ecx,%ebp +- xorl %edi,%esi +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- xorl %edx,%ebp +- xorl %edi,%edx +- addl %ecx,%ebx +- addl 44(%esp),%eax +- andl %edx,%ebp +- xorl %edi,%edx +- shrdl $7,%ecx,%ecx +- movl %ebx,%esi +- xorl %edx,%ebp +- shldl $5,%ebx,%ebx +- addl %ebp,%eax +- xorl %edx,%esi +- addl %ebx,%eax +- vpalignr $8,%xmm1,%xmm2,%xmm5 +- vpxor %xmm7,%xmm3,%xmm3 +- addl 48(%esp),%edi +- xorl %ecx,%esi +- movl %eax,%ebp +- shldl $5,%eax,%eax +- vpxor %xmm4,%xmm3,%xmm3 +- vmovdqa %xmm7,64(%esp) +- addl %esi,%edi +- xorl %ecx,%ebp +- vmovdqa %xmm6,%xmm7 +- vpaddd %xmm2,%xmm6,%xmm6 +- shrdl $7,%ebx,%ebx +- addl %eax,%edi +- vpxor %xmm5,%xmm3,%xmm3 +- addl 52(%esp),%edx +- xorl %ebx,%ebp +- movl %edi,%esi +- shldl $5,%edi,%edi +- vpsrld $30,%xmm3,%xmm5 +- vmovdqa %xmm6,32(%esp) +- addl %ebp,%edx +- xorl %ebx,%esi +- shrdl $7,%eax,%eax +- addl %edi,%edx +- vpslld $2,%xmm3,%xmm3 +- addl 56(%esp),%ecx +- xorl %eax,%esi +- movl %edx,%ebp +- shldl $5,%edx,%edx +- addl %esi,%ecx +- xorl %eax,%ebp +- shrdl $7,%edi,%edi +- addl %edx,%ecx +- vpor %xmm5,%xmm3,%xmm3 +- addl 60(%esp),%ebx +- xorl %edi,%ebp +- movl %ecx,%esi +- shldl $5,%ecx,%ecx +- addl %ebp,%ebx +- xorl %edi,%esi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl (%esp),%eax +- vpaddd %xmm3,%xmm7,%xmm7 +- xorl %edx,%esi +- movl %ebx,%ebp +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- vmovdqa %xmm7,48(%esp) +- xorl %edx,%ebp +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- addl 4(%esp),%edi +- xorl %ecx,%ebp +- movl %eax,%esi +- shldl $5,%eax,%eax +- addl %ebp,%edi +- xorl %ecx,%esi +- shrdl $7,%ebx,%ebx +- addl %eax,%edi +- addl 8(%esp),%edx +- xorl %ebx,%esi +- movl %edi,%ebp +- shldl $5,%edi,%edi +- addl %esi,%edx +- xorl %ebx,%ebp +- shrdl $7,%eax,%eax +- addl %edi,%edx +- addl 12(%esp),%ecx +- xorl %eax,%ebp +- movl %edx,%esi +- shldl $5,%edx,%edx +- addl %ebp,%ecx +- xorl %eax,%esi +- shrdl $7,%edi,%edi +- addl %edx,%ecx +- movl 196(%esp),%ebp +- cmpl 200(%esp),%ebp +- je L008done +- vmovdqa 160(%esp),%xmm7 +- vmovdqa 176(%esp),%xmm6 +- vmovdqu (%ebp),%xmm0 +- vmovdqu 16(%ebp),%xmm1 +- vmovdqu 32(%ebp),%xmm2 +- vmovdqu 48(%ebp),%xmm3 +- addl $64,%ebp +- vpshufb %xmm6,%xmm0,%xmm0 +- movl %ebp,196(%esp) +- vmovdqa %xmm7,96(%esp) +- addl 16(%esp),%ebx +- xorl %edi,%esi +- vpshufb %xmm6,%xmm1,%xmm1 +- movl %ecx,%ebp +- shldl $5,%ecx,%ecx +- vpaddd %xmm7,%xmm0,%xmm4 +- addl %esi,%ebx +- xorl %edi,%ebp +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- vmovdqa %xmm4,(%esp) +- addl 20(%esp),%eax +- xorl %edx,%ebp +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- addl %ebp,%eax +- xorl %edx,%esi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- addl 24(%esp),%edi +- xorl %ecx,%esi +- movl %eax,%ebp +- shldl $5,%eax,%eax +- addl %esi,%edi +- xorl %ecx,%ebp +- shrdl $7,%ebx,%ebx +- addl %eax,%edi +- addl 28(%esp),%edx +- xorl %ebx,%ebp +- movl %edi,%esi +- shldl $5,%edi,%edi +- addl %ebp,%edx +- xorl %ebx,%esi +- shrdl $7,%eax,%eax +- addl %edi,%edx +- addl 32(%esp),%ecx +- xorl %eax,%esi +- vpshufb %xmm6,%xmm2,%xmm2 +- movl %edx,%ebp +- shldl $5,%edx,%edx +- vpaddd %xmm7,%xmm1,%xmm5 +- addl %esi,%ecx +- xorl %eax,%ebp +- shrdl $7,%edi,%edi +- addl %edx,%ecx +- vmovdqa %xmm5,16(%esp) +- addl 36(%esp),%ebx +- xorl %edi,%ebp +- movl %ecx,%esi +- shldl $5,%ecx,%ecx +- addl %ebp,%ebx +- xorl %edi,%esi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl 40(%esp),%eax +- xorl %edx,%esi +- movl %ebx,%ebp +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- xorl %edx,%ebp +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- addl 44(%esp),%edi +- xorl %ecx,%ebp +- movl %eax,%esi +- shldl $5,%eax,%eax +- addl %ebp,%edi +- xorl %ecx,%esi +- shrdl $7,%ebx,%ebx +- addl %eax,%edi +- addl 48(%esp),%edx +- xorl %ebx,%esi +- vpshufb %xmm6,%xmm3,%xmm3 +- movl %edi,%ebp +- shldl $5,%edi,%edi +- vpaddd %xmm7,%xmm2,%xmm6 +- addl %esi,%edx +- xorl %ebx,%ebp +- shrdl $7,%eax,%eax +- addl %edi,%edx +- vmovdqa %xmm6,32(%esp) +- addl 52(%esp),%ecx +- xorl %eax,%ebp +- movl %edx,%esi +- shldl $5,%edx,%edx +- addl %ebp,%ecx +- xorl %eax,%esi +- shrdl $7,%edi,%edi +- addl %edx,%ecx +- addl 56(%esp),%ebx +- xorl %edi,%esi +- movl %ecx,%ebp +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- xorl %edi,%ebp +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl 60(%esp),%eax +- xorl %edx,%ebp +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- addl %ebp,%eax +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- movl 192(%esp),%ebp +- addl (%ebp),%eax +- addl 4(%ebp),%esi +- addl 8(%ebp),%ecx +- movl %eax,(%ebp) +- addl 12(%ebp),%edx +- movl %esi,4(%ebp) +- addl 16(%ebp),%edi +- movl %ecx,%ebx +- movl %ecx,8(%ebp) +- xorl %edx,%ebx +- movl %edx,12(%ebp) +- movl %edi,16(%ebp) +- movl %esi,%ebp +- andl %ebx,%esi +- movl %ebp,%ebx +- jmp L007loop +-.align 4,0x90 +-L008done: +- addl 16(%esp),%ebx +- xorl %edi,%esi +- movl %ecx,%ebp +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- xorl %edi,%ebp +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl 20(%esp),%eax +- xorl %edx,%ebp +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- addl %ebp,%eax +- xorl %edx,%esi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- addl 24(%esp),%edi +- xorl %ecx,%esi +- movl %eax,%ebp +- shldl $5,%eax,%eax +- addl %esi,%edi +- xorl %ecx,%ebp +- shrdl $7,%ebx,%ebx +- addl %eax,%edi +- addl 28(%esp),%edx +- xorl %ebx,%ebp +- movl %edi,%esi +- shldl $5,%edi,%edi +- addl %ebp,%edx +- xorl %ebx,%esi +- shrdl $7,%eax,%eax +- addl %edi,%edx +- addl 32(%esp),%ecx +- xorl %eax,%esi +- movl %edx,%ebp +- shldl $5,%edx,%edx +- addl %esi,%ecx +- xorl %eax,%ebp +- shrdl $7,%edi,%edi +- addl %edx,%ecx +- addl 36(%esp),%ebx +- xorl %edi,%ebp +- movl %ecx,%esi +- shldl $5,%ecx,%ecx +- addl %ebp,%ebx +- xorl %edi,%esi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl 40(%esp),%eax +- xorl %edx,%esi +- movl %ebx,%ebp +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- xorl %edx,%ebp +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- addl 44(%esp),%edi +- xorl %ecx,%ebp +- movl %eax,%esi +- shldl $5,%eax,%eax +- addl %ebp,%edi +- xorl %ecx,%esi +- shrdl $7,%ebx,%ebx +- addl %eax,%edi +- addl 48(%esp),%edx +- xorl %ebx,%esi +- movl %edi,%ebp +- shldl $5,%edi,%edi +- addl %esi,%edx +- xorl %ebx,%ebp +- shrdl $7,%eax,%eax +- addl %edi,%edx +- addl 52(%esp),%ecx +- xorl %eax,%ebp +- movl %edx,%esi +- shldl $5,%edx,%edx +- addl %ebp,%ecx +- xorl %eax,%esi +- shrdl $7,%edi,%edi +- addl %edx,%ecx +- addl 56(%esp),%ebx +- xorl %edi,%esi +- movl %ecx,%ebp +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- xorl %edi,%ebp +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl 60(%esp),%eax +- xorl %edx,%ebp +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- addl %ebp,%eax +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- vzeroall +- movl 192(%esp),%ebp +- addl (%ebp),%eax +- movl 204(%esp),%esp +- addl 4(%ebp),%esi +- addl 8(%ebp),%ecx +- movl %eax,(%ebp) +- addl 12(%ebp),%edx +- movl %esi,4(%ebp) +- addl 16(%ebp),%edi +- movl %ecx,8(%ebp) +- movl %edx,12(%ebp) +- movl %edi,16(%ebp) +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.align 6,0x90 +-LK_XX_XX: +-.long 1518500249,1518500249,1518500249,1518500249 +-.long 1859775393,1859775393,1859775393,1859775393 +-.long 2400959708,2400959708,2400959708,2400959708 +-.long 3395469782,3395469782,3395469782,3395469782 +-.long 66051,67438087,134810123,202182159 +-.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +-.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115 +-.byte 102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82 +-.byte 89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112 +-.byte 114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +-.section __IMPORT,__pointers,non_lazy_symbol_pointers +-L_OPENSSL_ia32cap_P$non_lazy_ptr: +-.indirect_symbol _OPENSSL_ia32cap_P +-.long 0 +-#endif +diff --git a/mac-x86/crypto/fipsmodule/sha256-586.S b/mac-x86/crypto/fipsmodule/sha256-586.S +deleted file mode 100644 +index c81cb9a..0000000 +--- a/mac-x86/crypto/fipsmodule/sha256-586.S ++++ /dev/null +@@ -1,5568 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.globl _sha256_block_data_order +-.private_extern _sha256_block_data_order +-.align 4 +-_sha256_block_data_order: +-L_sha256_block_data_order_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 20(%esp),%esi +- movl 24(%esp),%edi +- movl 28(%esp),%eax +- movl %esp,%ebx +- call L000pic_point +-L000pic_point: +- popl %ebp +- leal L001K256-L000pic_point(%ebp),%ebp +- subl $16,%esp +- andl $-64,%esp +- shll $6,%eax +- addl %edi,%eax +- movl %esi,(%esp) +- movl %edi,4(%esp) +- movl %eax,8(%esp) +- movl %ebx,12(%esp) +- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K256(%ebp),%edx +- movl (%edx),%ecx +- movl 4(%edx),%ebx +- testl $1048576,%ecx +- jnz L002loop +- movl 8(%edx),%edx +- testl $16777216,%ecx +- jz L003no_xmm +- andl $1073741824,%ecx +- andl $268435968,%ebx +- orl %ebx,%ecx +- andl $1342177280,%ecx +- cmpl $1342177280,%ecx +- je L004AVX +- testl $512,%ebx +- jnz L005SSSE3 +-L003no_xmm: +- subl %edi,%eax +- cmpl $256,%eax +- jae L006unrolled +- jmp L002loop +-.align 4,0x90 +-L002loop: +- movl (%edi),%eax +- movl 4(%edi),%ebx +- movl 8(%edi),%ecx +- bswap %eax +- movl 12(%edi),%edx +- bswap %ebx +- pushl %eax +- bswap %ecx +- pushl %ebx +- bswap %edx +- pushl %ecx +- pushl %edx +- movl 16(%edi),%eax +- movl 20(%edi),%ebx +- movl 24(%edi),%ecx +- bswap %eax +- movl 28(%edi),%edx +- bswap %ebx +- pushl %eax +- bswap %ecx +- pushl %ebx +- bswap %edx +- pushl %ecx +- pushl %edx +- movl 32(%edi),%eax +- movl 36(%edi),%ebx +- movl 40(%edi),%ecx +- bswap %eax +- movl 44(%edi),%edx +- bswap %ebx +- pushl %eax +- bswap %ecx +- pushl %ebx +- bswap %edx +- pushl %ecx +- pushl %edx +- movl 48(%edi),%eax +- movl 52(%edi),%ebx +- movl 56(%edi),%ecx +- bswap %eax +- movl 60(%edi),%edx +- bswap %ebx +- pushl %eax +- bswap %ecx +- pushl %ebx +- bswap %edx +- pushl %ecx +- pushl %edx +- addl $64,%edi +- leal -36(%esp),%esp +- movl %edi,104(%esp) +- movl (%esi),%eax +- movl 4(%esi),%ebx +- movl 8(%esi),%ecx +- movl 12(%esi),%edi +- movl %ebx,8(%esp) +- xorl %ecx,%ebx +- movl %ecx,12(%esp) +- movl %edi,16(%esp) +- movl %ebx,(%esp) +- movl 16(%esi),%edx +- movl 20(%esi),%ebx +- movl 24(%esi),%ecx +- movl 28(%esi),%edi +- movl %ebx,24(%esp) +- movl %ecx,28(%esp) +- movl %edi,32(%esp) +-.align 4,0x90 +-L00700_15: +- movl %edx,%ecx +- movl 24(%esp),%esi +- rorl $14,%ecx +- movl 28(%esp),%edi +- xorl %edx,%ecx +- xorl %edi,%esi +- movl 96(%esp),%ebx +- rorl $5,%ecx +- andl %edx,%esi +- movl %edx,20(%esp) +- xorl %ecx,%edx +- addl 32(%esp),%ebx +- xorl %edi,%esi +- rorl $6,%edx +- movl %eax,%ecx +- addl %esi,%ebx +- rorl $9,%ecx +- addl %edx,%ebx +- movl 8(%esp),%edi +- xorl %eax,%ecx +- movl %eax,4(%esp) +- leal -4(%esp),%esp +- rorl $11,%ecx +- movl (%ebp),%esi +- xorl %eax,%ecx +- movl 20(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %esi,%ebx +- movl %eax,(%esp) +- addl %ebx,%edx +- andl 4(%esp),%eax +- addl %ecx,%ebx +- xorl %edi,%eax +- addl $4,%ebp +- addl %ebx,%eax +- cmpl $3248222580,%esi +- jne L00700_15 +- movl 156(%esp),%ecx +- jmp L00816_63 +-.align 4,0x90 +-L00816_63: +- movl %ecx,%ebx +- movl 104(%esp),%esi +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 160(%esp),%ebx +- shrl $10,%edi +- addl 124(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 24(%esp),%esi +- rorl $14,%ecx +- addl %edi,%ebx +- movl 28(%esp),%edi +- xorl %edx,%ecx +- xorl %edi,%esi +- movl %ebx,96(%esp) +- rorl $5,%ecx +- andl %edx,%esi +- movl %edx,20(%esp) +- xorl %ecx,%edx +- addl 32(%esp),%ebx +- xorl %edi,%esi +- rorl $6,%edx +- movl %eax,%ecx +- addl %esi,%ebx +- rorl $9,%ecx +- addl %edx,%ebx +- movl 8(%esp),%edi +- xorl %eax,%ecx +- movl %eax,4(%esp) +- leal -4(%esp),%esp +- rorl $11,%ecx +- movl (%ebp),%esi +- xorl %eax,%ecx +- movl 20(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %esi,%ebx +- movl %eax,(%esp) +- addl %ebx,%edx +- andl 4(%esp),%eax +- addl %ecx,%ebx +- xorl %edi,%eax +- movl 156(%esp),%ecx +- addl $4,%ebp +- addl %ebx,%eax +- cmpl $3329325298,%esi +- jne L00816_63 +- movl 356(%esp),%esi +- movl 8(%esp),%ebx +- movl 16(%esp),%ecx +- addl (%esi),%eax +- addl 4(%esi),%ebx +- addl 8(%esi),%edi +- addl 12(%esi),%ecx +- movl %eax,(%esi) +- movl %ebx,4(%esi) +- movl %edi,8(%esi) +- movl %ecx,12(%esi) +- movl 24(%esp),%eax +- movl 28(%esp),%ebx +- movl 32(%esp),%ecx +- movl 360(%esp),%edi +- addl 16(%esi),%edx +- addl 20(%esi),%eax +- addl 24(%esi),%ebx +- addl 28(%esi),%ecx +- movl %edx,16(%esi) +- movl %eax,20(%esi) +- movl %ebx,24(%esi) +- movl %ecx,28(%esi) +- leal 356(%esp),%esp +- subl $256,%ebp +- cmpl 8(%esp),%edi +- jb L002loop +- movl 12(%esp),%esp +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.align 6,0x90 +-L001K256: +-.long 1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298 +-.long 66051,67438087,134810123,202182159 +-.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97 +-.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +-.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +-.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +-.byte 62,0 +-.align 4,0x90 +-L006unrolled: +- leal -96(%esp),%esp +- movl (%esi),%eax +- movl 4(%esi),%ebp +- movl 8(%esi),%ecx +- movl 12(%esi),%ebx +- movl %ebp,4(%esp) +- xorl %ecx,%ebp +- movl %ecx,8(%esp) +- movl %ebx,12(%esp) +- movl 16(%esi),%edx +- movl 20(%esi),%ebx +- movl 24(%esi),%ecx +- movl 28(%esi),%esi +- movl %ebx,20(%esp) +- movl %ecx,24(%esp) +- movl %esi,28(%esp) +- jmp L009grand_loop +-.align 4,0x90 +-L009grand_loop: +- movl (%edi),%ebx +- movl 4(%edi),%ecx +- bswap %ebx +- movl 8(%edi),%esi +- bswap %ecx +- movl %ebx,32(%esp) +- bswap %esi +- movl %ecx,36(%esp) +- movl %esi,40(%esp) +- movl 12(%edi),%ebx +- movl 16(%edi),%ecx +- bswap %ebx +- movl 20(%edi),%esi +- bswap %ecx +- movl %ebx,44(%esp) +- bswap %esi +- movl %ecx,48(%esp) +- movl %esi,52(%esp) +- movl 24(%edi),%ebx +- movl 28(%edi),%ecx +- bswap %ebx +- movl 32(%edi),%esi +- bswap %ecx +- movl %ebx,56(%esp) +- bswap %esi +- movl %ecx,60(%esp) +- movl %esi,64(%esp) +- movl 36(%edi),%ebx +- movl 40(%edi),%ecx +- bswap %ebx +- movl 44(%edi),%esi +- bswap %ecx +- movl %ebx,68(%esp) +- bswap %esi +- movl %ecx,72(%esp) +- movl %esi,76(%esp) +- movl 48(%edi),%ebx +- movl 52(%edi),%ecx +- bswap %ebx +- movl 56(%edi),%esi +- bswap %ecx +- movl %ebx,80(%esp) +- bswap %esi +- movl %ecx,84(%esp) +- movl %esi,88(%esp) +- movl 60(%edi),%ebx +- addl $64,%edi +- bswap %ebx +- movl %edi,100(%esp) +- movl %ebx,92(%esp) +- movl %edx,%ecx +- movl 20(%esp),%esi +- rorl $14,%edx +- movl 24(%esp),%edi +- xorl %ecx,%edx +- movl 32(%esp),%ebx +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- addl 28(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 4(%esp),%edi +- xorl %eax,%ecx +- movl %eax,(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 1116352408(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- rorl $2,%ecx +- addl %edx,%ebp +- addl 12(%esp),%edx +- addl %ecx,%ebp +- movl %edx,%esi +- movl 16(%esp),%ecx +- rorl $14,%edx +- movl 20(%esp),%edi +- xorl %esi,%edx +- movl 36(%esp),%ebx +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,12(%esp) +- xorl %esi,%edx +- addl 24(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl (%esp),%edi +- xorl %ebp,%esi +- movl %ebp,28(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 1899447441(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- rorl $2,%esi +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %esi,%eax +- movl %edx,%ecx +- movl 12(%esp),%esi +- rorl $14,%edx +- movl 16(%esp),%edi +- xorl %ecx,%edx +- movl 40(%esp),%ebx +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- addl 20(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 28(%esp),%edi +- xorl %eax,%ecx +- movl %eax,24(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 3049323471(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- rorl $2,%ecx +- addl %edx,%ebp +- addl 4(%esp),%edx +- addl %ecx,%ebp +- movl %edx,%esi +- movl 8(%esp),%ecx +- rorl $14,%edx +- movl 12(%esp),%edi +- xorl %esi,%edx +- movl 44(%esp),%ebx +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,4(%esp) +- xorl %esi,%edx +- addl 16(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 24(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,20(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 3921009573(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- rorl $2,%esi +- addl %edx,%eax +- addl (%esp),%edx +- addl %esi,%eax +- movl %edx,%ecx +- movl 4(%esp),%esi +- rorl $14,%edx +- movl 8(%esp),%edi +- xorl %ecx,%edx +- movl 48(%esp),%ebx +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- addl 12(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 20(%esp),%edi +- xorl %eax,%ecx +- movl %eax,16(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 961987163(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- rorl $2,%ecx +- addl %edx,%ebp +- addl 28(%esp),%edx +- addl %ecx,%ebp +- movl %edx,%esi +- movl (%esp),%ecx +- rorl $14,%edx +- movl 4(%esp),%edi +- xorl %esi,%edx +- movl 52(%esp),%ebx +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,28(%esp) +- xorl %esi,%edx +- addl 8(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 16(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,12(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 1508970993(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- rorl $2,%esi +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %esi,%eax +- movl %edx,%ecx +- movl 28(%esp),%esi +- rorl $14,%edx +- movl (%esp),%edi +- xorl %ecx,%edx +- movl 56(%esp),%ebx +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- addl 4(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 12(%esp),%edi +- xorl %eax,%ecx +- movl %eax,8(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 2453635748(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- rorl $2,%ecx +- addl %edx,%ebp +- addl 20(%esp),%edx +- addl %ecx,%ebp +- movl %edx,%esi +- movl 24(%esp),%ecx +- rorl $14,%edx +- movl 28(%esp),%edi +- xorl %esi,%edx +- movl 60(%esp),%ebx +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,20(%esp) +- xorl %esi,%edx +- addl (%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 8(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,4(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 2870763221(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- rorl $2,%esi +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %esi,%eax +- movl %edx,%ecx +- movl 20(%esp),%esi +- rorl $14,%edx +- movl 24(%esp),%edi +- xorl %ecx,%edx +- movl 64(%esp),%ebx +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- addl 28(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 4(%esp),%edi +- xorl %eax,%ecx +- movl %eax,(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 3624381080(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- rorl $2,%ecx +- addl %edx,%ebp +- addl 12(%esp),%edx +- addl %ecx,%ebp +- movl %edx,%esi +- movl 16(%esp),%ecx +- rorl $14,%edx +- movl 20(%esp),%edi +- xorl %esi,%edx +- movl 68(%esp),%ebx +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,12(%esp) +- xorl %esi,%edx +- addl 24(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl (%esp),%edi +- xorl %ebp,%esi +- movl %ebp,28(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 310598401(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- rorl $2,%esi +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %esi,%eax +- movl %edx,%ecx +- movl 12(%esp),%esi +- rorl $14,%edx +- movl 16(%esp),%edi +- xorl %ecx,%edx +- movl 72(%esp),%ebx +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- addl 20(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 28(%esp),%edi +- xorl %eax,%ecx +- movl %eax,24(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 607225278(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- rorl $2,%ecx +- addl %edx,%ebp +- addl 4(%esp),%edx +- addl %ecx,%ebp +- movl %edx,%esi +- movl 8(%esp),%ecx +- rorl $14,%edx +- movl 12(%esp),%edi +- xorl %esi,%edx +- movl 76(%esp),%ebx +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,4(%esp) +- xorl %esi,%edx +- addl 16(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 24(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,20(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 1426881987(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- rorl $2,%esi +- addl %edx,%eax +- addl (%esp),%edx +- addl %esi,%eax +- movl %edx,%ecx +- movl 4(%esp),%esi +- rorl $14,%edx +- movl 8(%esp),%edi +- xorl %ecx,%edx +- movl 80(%esp),%ebx +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- addl 12(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 20(%esp),%edi +- xorl %eax,%ecx +- movl %eax,16(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 1925078388(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- rorl $2,%ecx +- addl %edx,%ebp +- addl 28(%esp),%edx +- addl %ecx,%ebp +- movl %edx,%esi +- movl (%esp),%ecx +- rorl $14,%edx +- movl 4(%esp),%edi +- xorl %esi,%edx +- movl 84(%esp),%ebx +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,28(%esp) +- xorl %esi,%edx +- addl 8(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 16(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,12(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 2162078206(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- rorl $2,%esi +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %esi,%eax +- movl %edx,%ecx +- movl 28(%esp),%esi +- rorl $14,%edx +- movl (%esp),%edi +- xorl %ecx,%edx +- movl 88(%esp),%ebx +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- addl 4(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 12(%esp),%edi +- xorl %eax,%ecx +- movl %eax,8(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 2614888103(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- rorl $2,%ecx +- addl %edx,%ebp +- addl 20(%esp),%edx +- addl %ecx,%ebp +- movl %edx,%esi +- movl 24(%esp),%ecx +- rorl $14,%edx +- movl 28(%esp),%edi +- xorl %esi,%edx +- movl 92(%esp),%ebx +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,20(%esp) +- xorl %esi,%edx +- addl (%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 8(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,4(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 3248222580(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 36(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %esi,%eax +- movl 88(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 32(%esp),%ebx +- shrl $10,%edi +- addl 68(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 20(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 24(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,32(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- addl 28(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 4(%esp),%edi +- xorl %eax,%ecx +- movl %eax,(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 3835390401(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 40(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 12(%esp),%edx +- addl %ecx,%ebp +- movl 92(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 36(%esp),%ebx +- shrl $10,%edi +- addl 72(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 16(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 20(%esp),%edi +- xorl %esi,%edx +- movl %ebx,36(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,12(%esp) +- xorl %esi,%edx +- addl 24(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl (%esp),%edi +- xorl %ebp,%esi +- movl %ebp,28(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 4022224774(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 44(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %esi,%eax +- movl 32(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 40(%esp),%ebx +- shrl $10,%edi +- addl 76(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 12(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 16(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,40(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- addl 20(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 28(%esp),%edi +- xorl %eax,%ecx +- movl %eax,24(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 264347078(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 48(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 4(%esp),%edx +- addl %ecx,%ebp +- movl 36(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 44(%esp),%ebx +- shrl $10,%edi +- addl 80(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 8(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 12(%esp),%edi +- xorl %esi,%edx +- movl %ebx,44(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,4(%esp) +- xorl %esi,%edx +- addl 16(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 24(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,20(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 604807628(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 52(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl (%esp),%edx +- addl %esi,%eax +- movl 40(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 48(%esp),%ebx +- shrl $10,%edi +- addl 84(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 4(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 8(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,48(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- addl 12(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 20(%esp),%edi +- xorl %eax,%ecx +- movl %eax,16(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 770255983(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 56(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 28(%esp),%edx +- addl %ecx,%ebp +- movl 44(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 52(%esp),%ebx +- shrl $10,%edi +- addl 88(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl (%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 4(%esp),%edi +- xorl %esi,%edx +- movl %ebx,52(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,28(%esp) +- xorl %esi,%edx +- addl 8(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 16(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,12(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 1249150122(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 60(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %esi,%eax +- movl 48(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 56(%esp),%ebx +- shrl $10,%edi +- addl 92(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 28(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl (%esp),%edi +- xorl %ecx,%edx +- movl %ebx,56(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- addl 4(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 12(%esp),%edi +- xorl %eax,%ecx +- movl %eax,8(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 1555081692(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 64(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 20(%esp),%edx +- addl %ecx,%ebp +- movl 52(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 60(%esp),%ebx +- shrl $10,%edi +- addl 32(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 24(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 28(%esp),%edi +- xorl %esi,%edx +- movl %ebx,60(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,20(%esp) +- xorl %esi,%edx +- addl (%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 8(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,4(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 1996064986(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 68(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %esi,%eax +- movl 56(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 64(%esp),%ebx +- shrl $10,%edi +- addl 36(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 20(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 24(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,64(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- addl 28(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 4(%esp),%edi +- xorl %eax,%ecx +- movl %eax,(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 2554220882(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 72(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 12(%esp),%edx +- addl %ecx,%ebp +- movl 60(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 68(%esp),%ebx +- shrl $10,%edi +- addl 40(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 16(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 20(%esp),%edi +- xorl %esi,%edx +- movl %ebx,68(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,12(%esp) +- xorl %esi,%edx +- addl 24(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl (%esp),%edi +- xorl %ebp,%esi +- movl %ebp,28(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 2821834349(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 76(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %esi,%eax +- movl 64(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 72(%esp),%ebx +- shrl $10,%edi +- addl 44(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 12(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 16(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,72(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- addl 20(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 28(%esp),%edi +- xorl %eax,%ecx +- movl %eax,24(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 2952996808(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 80(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 4(%esp),%edx +- addl %ecx,%ebp +- movl 68(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 76(%esp),%ebx +- shrl $10,%edi +- addl 48(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 8(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 12(%esp),%edi +- xorl %esi,%edx +- movl %ebx,76(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,4(%esp) +- xorl %esi,%edx +- addl 16(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 24(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,20(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 3210313671(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 84(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl (%esp),%edx +- addl %esi,%eax +- movl 72(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 80(%esp),%ebx +- shrl $10,%edi +- addl 52(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 4(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 8(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,80(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- addl 12(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 20(%esp),%edi +- xorl %eax,%ecx +- movl %eax,16(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 3336571891(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 88(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 28(%esp),%edx +- addl %ecx,%ebp +- movl 76(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 84(%esp),%ebx +- shrl $10,%edi +- addl 56(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl (%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 4(%esp),%edi +- xorl %esi,%edx +- movl %ebx,84(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,28(%esp) +- xorl %esi,%edx +- addl 8(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 16(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,12(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 3584528711(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 92(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %esi,%eax +- movl 80(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 88(%esp),%ebx +- shrl $10,%edi +- addl 60(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 28(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl (%esp),%edi +- xorl %ecx,%edx +- movl %ebx,88(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- addl 4(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 12(%esp),%edi +- xorl %eax,%ecx +- movl %eax,8(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 113926993(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 32(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 20(%esp),%edx +- addl %ecx,%ebp +- movl 84(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 92(%esp),%ebx +- shrl $10,%edi +- addl 64(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 24(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 28(%esp),%edi +- xorl %esi,%edx +- movl %ebx,92(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,20(%esp) +- xorl %esi,%edx +- addl (%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 8(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,4(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 338241895(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 36(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %esi,%eax +- movl 88(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 32(%esp),%ebx +- shrl $10,%edi +- addl 68(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 20(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 24(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,32(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- addl 28(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 4(%esp),%edi +- xorl %eax,%ecx +- movl %eax,(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 666307205(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 40(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 12(%esp),%edx +- addl %ecx,%ebp +- movl 92(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 36(%esp),%ebx +- shrl $10,%edi +- addl 72(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 16(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 20(%esp),%edi +- xorl %esi,%edx +- movl %ebx,36(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,12(%esp) +- xorl %esi,%edx +- addl 24(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl (%esp),%edi +- xorl %ebp,%esi +- movl %ebp,28(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 773529912(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 44(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %esi,%eax +- movl 32(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 40(%esp),%ebx +- shrl $10,%edi +- addl 76(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 12(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 16(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,40(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- addl 20(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 28(%esp),%edi +- xorl %eax,%ecx +- movl %eax,24(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 1294757372(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 48(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 4(%esp),%edx +- addl %ecx,%ebp +- movl 36(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 44(%esp),%ebx +- shrl $10,%edi +- addl 80(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 8(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 12(%esp),%edi +- xorl %esi,%edx +- movl %ebx,44(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,4(%esp) +- xorl %esi,%edx +- addl 16(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 24(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,20(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 1396182291(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 52(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl (%esp),%edx +- addl %esi,%eax +- movl 40(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 48(%esp),%ebx +- shrl $10,%edi +- addl 84(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 4(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 8(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,48(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- addl 12(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 20(%esp),%edi +- xorl %eax,%ecx +- movl %eax,16(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 1695183700(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 56(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 28(%esp),%edx +- addl %ecx,%ebp +- movl 44(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 52(%esp),%ebx +- shrl $10,%edi +- addl 88(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl (%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 4(%esp),%edi +- xorl %esi,%edx +- movl %ebx,52(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,28(%esp) +- xorl %esi,%edx +- addl 8(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 16(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,12(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 1986661051(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 60(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %esi,%eax +- movl 48(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 56(%esp),%ebx +- shrl $10,%edi +- addl 92(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 28(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl (%esp),%edi +- xorl %ecx,%edx +- movl %ebx,56(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- addl 4(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 12(%esp),%edi +- xorl %eax,%ecx +- movl %eax,8(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 2177026350(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 64(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 20(%esp),%edx +- addl %ecx,%ebp +- movl 52(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 60(%esp),%ebx +- shrl $10,%edi +- addl 32(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 24(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 28(%esp),%edi +- xorl %esi,%edx +- movl %ebx,60(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,20(%esp) +- xorl %esi,%edx +- addl (%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 8(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,4(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 2456956037(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 68(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %esi,%eax +- movl 56(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 64(%esp),%ebx +- shrl $10,%edi +- addl 36(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 20(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 24(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,64(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- addl 28(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 4(%esp),%edi +- xorl %eax,%ecx +- movl %eax,(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 2730485921(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 72(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 12(%esp),%edx +- addl %ecx,%ebp +- movl 60(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 68(%esp),%ebx +- shrl $10,%edi +- addl 40(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 16(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 20(%esp),%edi +- xorl %esi,%edx +- movl %ebx,68(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,12(%esp) +- xorl %esi,%edx +- addl 24(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl (%esp),%edi +- xorl %ebp,%esi +- movl %ebp,28(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 2820302411(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 76(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %esi,%eax +- movl 64(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 72(%esp),%ebx +- shrl $10,%edi +- addl 44(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 12(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 16(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,72(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- addl 20(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 28(%esp),%edi +- xorl %eax,%ecx +- movl %eax,24(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 3259730800(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 80(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 4(%esp),%edx +- addl %ecx,%ebp +- movl 68(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 76(%esp),%ebx +- shrl $10,%edi +- addl 48(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 8(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 12(%esp),%edi +- xorl %esi,%edx +- movl %ebx,76(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,4(%esp) +- xorl %esi,%edx +- addl 16(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 24(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,20(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 3345764771(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 84(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl (%esp),%edx +- addl %esi,%eax +- movl 72(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 80(%esp),%ebx +- shrl $10,%edi +- addl 52(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 4(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 8(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,80(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- addl 12(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 20(%esp),%edi +- xorl %eax,%ecx +- movl %eax,16(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 3516065817(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 88(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 28(%esp),%edx +- addl %ecx,%ebp +- movl 76(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 84(%esp),%ebx +- shrl $10,%edi +- addl 56(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl (%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 4(%esp),%edi +- xorl %esi,%edx +- movl %ebx,84(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,28(%esp) +- xorl %esi,%edx +- addl 8(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 16(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,12(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 3600352804(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 92(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %esi,%eax +- movl 80(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 88(%esp),%ebx +- shrl $10,%edi +- addl 60(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 28(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl (%esp),%edi +- xorl %ecx,%edx +- movl %ebx,88(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- addl 4(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 12(%esp),%edi +- xorl %eax,%ecx +- movl %eax,8(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 4094571909(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 32(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 20(%esp),%edx +- addl %ecx,%ebp +- movl 84(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 92(%esp),%ebx +- shrl $10,%edi +- addl 64(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 24(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 28(%esp),%edi +- xorl %esi,%edx +- movl %ebx,92(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,20(%esp) +- xorl %esi,%edx +- addl (%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 8(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,4(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 275423344(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 36(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %esi,%eax +- movl 88(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 32(%esp),%ebx +- shrl $10,%edi +- addl 68(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 20(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 24(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,32(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- addl 28(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 4(%esp),%edi +- xorl %eax,%ecx +- movl %eax,(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 430227734(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 40(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 12(%esp),%edx +- addl %ecx,%ebp +- movl 92(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 36(%esp),%ebx +- shrl $10,%edi +- addl 72(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 16(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 20(%esp),%edi +- xorl %esi,%edx +- movl %ebx,36(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,12(%esp) +- xorl %esi,%edx +- addl 24(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl (%esp),%edi +- xorl %ebp,%esi +- movl %ebp,28(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 506948616(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 44(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %esi,%eax +- movl 32(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 40(%esp),%ebx +- shrl $10,%edi +- addl 76(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 12(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 16(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,40(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- addl 20(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 28(%esp),%edi +- xorl %eax,%ecx +- movl %eax,24(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 659060556(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 48(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 4(%esp),%edx +- addl %ecx,%ebp +- movl 36(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 44(%esp),%ebx +- shrl $10,%edi +- addl 80(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 8(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 12(%esp),%edi +- xorl %esi,%edx +- movl %ebx,44(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,4(%esp) +- xorl %esi,%edx +- addl 16(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 24(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,20(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 883997877(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 52(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl (%esp),%edx +- addl %esi,%eax +- movl 40(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 48(%esp),%ebx +- shrl $10,%edi +- addl 84(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 4(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 8(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,48(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- addl 12(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 20(%esp),%edi +- xorl %eax,%ecx +- movl %eax,16(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 958139571(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 56(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 28(%esp),%edx +- addl %ecx,%ebp +- movl 44(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 52(%esp),%ebx +- shrl $10,%edi +- addl 88(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl (%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 4(%esp),%edi +- xorl %esi,%edx +- movl %ebx,52(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,28(%esp) +- xorl %esi,%edx +- addl 8(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 16(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,12(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 1322822218(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 60(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %esi,%eax +- movl 48(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 56(%esp),%ebx +- shrl $10,%edi +- addl 92(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 28(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl (%esp),%edi +- xorl %ecx,%edx +- movl %ebx,56(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- addl 4(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 12(%esp),%edi +- xorl %eax,%ecx +- movl %eax,8(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 1537002063(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 64(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 20(%esp),%edx +- addl %ecx,%ebp +- movl 52(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 60(%esp),%ebx +- shrl $10,%edi +- addl 32(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 24(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 28(%esp),%edi +- xorl %esi,%edx +- movl %ebx,60(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,20(%esp) +- xorl %esi,%edx +- addl (%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 8(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,4(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 1747873779(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 68(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %esi,%eax +- movl 56(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 64(%esp),%ebx +- shrl $10,%edi +- addl 36(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 20(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 24(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,64(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- addl 28(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 4(%esp),%edi +- xorl %eax,%ecx +- movl %eax,(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 1955562222(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 72(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 12(%esp),%edx +- addl %ecx,%ebp +- movl 60(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 68(%esp),%ebx +- shrl $10,%edi +- addl 40(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 16(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 20(%esp),%edi +- xorl %esi,%edx +- movl %ebx,68(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,12(%esp) +- xorl %esi,%edx +- addl 24(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl (%esp),%edi +- xorl %ebp,%esi +- movl %ebp,28(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 2024104815(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 76(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %esi,%eax +- movl 64(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 72(%esp),%ebx +- shrl $10,%edi +- addl 44(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 12(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 16(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,72(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- addl 20(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 28(%esp),%edi +- xorl %eax,%ecx +- movl %eax,24(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 2227730452(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 80(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 4(%esp),%edx +- addl %ecx,%ebp +- movl 68(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 76(%esp),%ebx +- shrl $10,%edi +- addl 48(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 8(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 12(%esp),%edi +- xorl %esi,%edx +- movl %ebx,76(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,4(%esp) +- xorl %esi,%edx +- addl 16(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 24(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,20(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 2361852424(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 84(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl (%esp),%edx +- addl %esi,%eax +- movl 72(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 80(%esp),%ebx +- shrl $10,%edi +- addl 52(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 4(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl 8(%esp),%edi +- xorl %ecx,%edx +- movl %ebx,80(%esp) +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- addl 12(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 20(%esp),%edi +- xorl %eax,%ecx +- movl %eax,16(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 2428436474(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 88(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 28(%esp),%edx +- addl %ecx,%ebp +- movl 76(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 84(%esp),%ebx +- shrl $10,%edi +- addl 56(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl (%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 4(%esp),%edi +- xorl %esi,%edx +- movl %ebx,84(%esp) +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,28(%esp) +- xorl %esi,%edx +- addl 8(%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 16(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,12(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 2756734187(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- movl 92(%esp),%ecx +- rorl $2,%esi +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %esi,%eax +- movl 80(%esp),%esi +- movl %ecx,%ebx +- rorl $11,%ecx +- movl %esi,%edi +- rorl $2,%esi +- xorl %ebx,%ecx +- shrl $3,%ebx +- rorl $7,%ecx +- xorl %edi,%esi +- xorl %ecx,%ebx +- rorl $17,%esi +- addl 88(%esp),%ebx +- shrl $10,%edi +- addl 60(%esp),%ebx +- movl %edx,%ecx +- xorl %esi,%edi +- movl 28(%esp),%esi +- rorl $14,%edx +- addl %edi,%ebx +- movl (%esp),%edi +- xorl %ecx,%edx +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- addl 4(%esp),%ebx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%ebx +- rorl $9,%ecx +- movl %eax,%esi +- movl 12(%esp),%edi +- xorl %eax,%ecx +- movl %eax,8(%esp) +- xorl %edi,%eax +- rorl $11,%ecx +- andl %eax,%ebp +- leal 3204031479(%ebx,%edx,1),%edx +- xorl %esi,%ecx +- xorl %edi,%ebp +- movl 32(%esp),%esi +- rorl $2,%ecx +- addl %edx,%ebp +- addl 20(%esp),%edx +- addl %ecx,%ebp +- movl 84(%esp),%ecx +- movl %esi,%ebx +- rorl $11,%esi +- movl %ecx,%edi +- rorl $2,%ecx +- xorl %ebx,%esi +- shrl $3,%ebx +- rorl $7,%esi +- xorl %edi,%ecx +- xorl %esi,%ebx +- rorl $17,%ecx +- addl 92(%esp),%ebx +- shrl $10,%edi +- addl 64(%esp),%ebx +- movl %edx,%esi +- xorl %ecx,%edi +- movl 24(%esp),%ecx +- rorl $14,%edx +- addl %edi,%ebx +- movl 28(%esp),%edi +- xorl %esi,%edx +- xorl %edi,%ecx +- rorl $5,%edx +- andl %esi,%ecx +- movl %esi,20(%esp) +- xorl %esi,%edx +- addl (%esp),%ebx +- xorl %ecx,%edi +- rorl $6,%edx +- movl %ebp,%esi +- addl %edi,%ebx +- rorl $9,%esi +- movl %ebp,%ecx +- movl 8(%esp),%edi +- xorl %ebp,%esi +- movl %ebp,4(%esp) +- xorl %edi,%ebp +- rorl $11,%esi +- andl %ebp,%eax +- leal 3329325298(%ebx,%edx,1),%edx +- xorl %ecx,%esi +- xorl %edi,%eax +- rorl $2,%esi +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %esi,%eax +- movl 96(%esp),%esi +- xorl %edi,%ebp +- movl 12(%esp),%ecx +- addl (%esi),%eax +- addl 4(%esi),%ebp +- addl 8(%esi),%edi +- addl 12(%esi),%ecx +- movl %eax,(%esi) +- movl %ebp,4(%esi) +- movl %edi,8(%esi) +- movl %ecx,12(%esi) +- movl %ebp,4(%esp) +- xorl %edi,%ebp +- movl %edi,8(%esp) +- movl %ecx,12(%esp) +- movl 20(%esp),%edi +- movl 24(%esp),%ebx +- movl 28(%esp),%ecx +- addl 16(%esi),%edx +- addl 20(%esi),%edi +- addl 24(%esi),%ebx +- addl 28(%esi),%ecx +- movl %edx,16(%esi) +- movl %edi,20(%esi) +- movl %ebx,24(%esi) +- movl %ecx,28(%esi) +- movl %edi,20(%esp) +- movl 100(%esp),%edi +- movl %ebx,24(%esp) +- movl %ecx,28(%esp) +- cmpl 104(%esp),%edi +- jb L009grand_loop +- movl 108(%esp),%esp +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.align 5,0x90 +-L005SSSE3: +- leal -96(%esp),%esp +- movl (%esi),%eax +- movl 4(%esi),%ebx +- movl 8(%esi),%ecx +- movl 12(%esi),%edi +- movl %ebx,4(%esp) +- xorl %ecx,%ebx +- movl %ecx,8(%esp) +- movl %edi,12(%esp) +- movl 16(%esi),%edx +- movl 20(%esi),%edi +- movl 24(%esi),%ecx +- movl 28(%esi),%esi +- movl %edi,20(%esp) +- movl 100(%esp),%edi +- movl %ecx,24(%esp) +- movl %esi,28(%esp) +- movdqa 256(%ebp),%xmm7 +- jmp L010grand_ssse3 +-.align 4,0x90 +-L010grand_ssse3: +- movdqu (%edi),%xmm0 +- movdqu 16(%edi),%xmm1 +- movdqu 32(%edi),%xmm2 +- movdqu 48(%edi),%xmm3 +- addl $64,%edi +-.byte 102,15,56,0,199 +- movl %edi,100(%esp) +-.byte 102,15,56,0,207 +- movdqa (%ebp),%xmm4 +-.byte 102,15,56,0,215 +- movdqa 16(%ebp),%xmm5 +- paddd %xmm0,%xmm4 +-.byte 102,15,56,0,223 +- movdqa 32(%ebp),%xmm6 +- paddd %xmm1,%xmm5 +- movdqa 48(%ebp),%xmm7 +- movdqa %xmm4,32(%esp) +- paddd %xmm2,%xmm6 +- movdqa %xmm5,48(%esp) +- paddd %xmm3,%xmm7 +- movdqa %xmm6,64(%esp) +- movdqa %xmm7,80(%esp) +- jmp L011ssse3_00_47 +-.align 4,0x90 +-L011ssse3_00_47: +- addl $64,%ebp +- movl %edx,%ecx +- movdqa %xmm1,%xmm4 +- rorl $14,%edx +- movl 20(%esp),%esi +- movdqa %xmm3,%xmm7 +- xorl %ecx,%edx +- movl 24(%esp),%edi +-.byte 102,15,58,15,224,4 +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +-.byte 102,15,58,15,250,4 +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- movdqa %xmm4,%xmm5 +- rorl $6,%edx +- movl %eax,%ecx +- movdqa %xmm4,%xmm6 +- addl %edi,%edx +- movl 4(%esp),%edi +- psrld $3,%xmm4 +- movl %eax,%esi +- rorl $9,%ecx +- paddd %xmm7,%xmm0 +- movl %eax,(%esp) +- xorl %eax,%ecx +- psrld $7,%xmm6 +- xorl %edi,%eax +- addl 28(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- pshufd $250,%xmm3,%xmm7 +- xorl %esi,%ecx +- addl 32(%esp),%edx +- pslld $14,%xmm5 +- xorl %edi,%ebx +- rorl $2,%ecx +- pxor %xmm6,%xmm4 +- addl %edx,%ebx +- addl 12(%esp),%edx +- psrld $11,%xmm6 +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- pxor %xmm5,%xmm4 +- movl 16(%esp),%esi +- xorl %ecx,%edx +- pslld $11,%xmm5 +- movl 20(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- pxor %xmm6,%xmm4 +- andl %ecx,%esi +- movl %ecx,12(%esp) +- movdqa %xmm7,%xmm6 +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- pxor %xmm5,%xmm4 +- movl %ebx,%ecx +- addl %edi,%edx +- psrld $10,%xmm7 +- movl (%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- paddd %xmm4,%xmm0 +- movl %ebx,28(%esp) +- xorl %ebx,%ecx +- psrlq $17,%xmm6 +- xorl %edi,%ebx +- addl 24(%esp),%edx +- rorl $11,%ecx +- pxor %xmm6,%xmm7 +- andl %ebx,%eax +- xorl %esi,%ecx +- psrlq $2,%xmm6 +- addl 36(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- pxor %xmm6,%xmm7 +- addl %edx,%eax +- addl 8(%esp),%edx +- pshufd $128,%xmm7,%xmm7 +- addl %ecx,%eax +- movl %edx,%ecx +- rorl $14,%edx +- movl 12(%esp),%esi +- xorl %ecx,%edx +- movl 16(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- psrldq $8,%xmm7 +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- paddd %xmm7,%xmm0 +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 28(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,24(%esp) +- pshufd $80,%xmm0,%xmm7 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 20(%esp),%edx +- movdqa %xmm7,%xmm6 +- rorl $11,%ecx +- psrld $10,%xmm7 +- andl %eax,%ebx +- psrlq $17,%xmm6 +- xorl %esi,%ecx +- addl 40(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- pxor %xmm6,%xmm7 +- addl %edx,%ebx +- addl 4(%esp),%edx +- psrlq $2,%xmm6 +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- pxor %xmm6,%xmm7 +- movl 8(%esp),%esi +- xorl %ecx,%edx +- movl 12(%esp),%edi +- pshufd $8,%xmm7,%xmm7 +- xorl %edi,%esi +- rorl $5,%edx +- movdqa (%ebp),%xmm6 +- andl %ecx,%esi +- movl %ecx,4(%esp) +- pslldq $8,%xmm7 +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 24(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- paddd %xmm7,%xmm0 +- movl %ebx,20(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 16(%esp),%edx +- paddd %xmm0,%xmm6 +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 44(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl (%esp),%edx +- addl %ecx,%eax +- movdqa %xmm6,32(%esp) +- movl %edx,%ecx +- movdqa %xmm2,%xmm4 +- rorl $14,%edx +- movl 4(%esp),%esi +- movdqa %xmm0,%xmm7 +- xorl %ecx,%edx +- movl 8(%esp),%edi +-.byte 102,15,58,15,225,4 +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +-.byte 102,15,58,15,251,4 +- movl %ecx,(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- movdqa %xmm4,%xmm5 +- rorl $6,%edx +- movl %eax,%ecx +- movdqa %xmm4,%xmm6 +- addl %edi,%edx +- movl 20(%esp),%edi +- psrld $3,%xmm4 +- movl %eax,%esi +- rorl $9,%ecx +- paddd %xmm7,%xmm1 +- movl %eax,16(%esp) +- xorl %eax,%ecx +- psrld $7,%xmm6 +- xorl %edi,%eax +- addl 12(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- pshufd $250,%xmm0,%xmm7 +- xorl %esi,%ecx +- addl 48(%esp),%edx +- pslld $14,%xmm5 +- xorl %edi,%ebx +- rorl $2,%ecx +- pxor %xmm6,%xmm4 +- addl %edx,%ebx +- addl 28(%esp),%edx +- psrld $11,%xmm6 +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- pxor %xmm5,%xmm4 +- movl (%esp),%esi +- xorl %ecx,%edx +- pslld $11,%xmm5 +- movl 4(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- pxor %xmm6,%xmm4 +- andl %ecx,%esi +- movl %ecx,28(%esp) +- movdqa %xmm7,%xmm6 +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- pxor %xmm5,%xmm4 +- movl %ebx,%ecx +- addl %edi,%edx +- psrld $10,%xmm7 +- movl 16(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- paddd %xmm4,%xmm1 +- movl %ebx,12(%esp) +- xorl %ebx,%ecx +- psrlq $17,%xmm6 +- xorl %edi,%ebx +- addl 8(%esp),%edx +- rorl $11,%ecx +- pxor %xmm6,%xmm7 +- andl %ebx,%eax +- xorl %esi,%ecx +- psrlq $2,%xmm6 +- addl 52(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- pxor %xmm6,%xmm7 +- addl %edx,%eax +- addl 24(%esp),%edx +- pshufd $128,%xmm7,%xmm7 +- addl %ecx,%eax +- movl %edx,%ecx +- rorl $14,%edx +- movl 28(%esp),%esi +- xorl %ecx,%edx +- movl (%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- psrldq $8,%xmm7 +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- paddd %xmm7,%xmm1 +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 12(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,8(%esp) +- pshufd $80,%xmm1,%xmm7 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 4(%esp),%edx +- movdqa %xmm7,%xmm6 +- rorl $11,%ecx +- psrld $10,%xmm7 +- andl %eax,%ebx +- psrlq $17,%xmm6 +- xorl %esi,%ecx +- addl 56(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- pxor %xmm6,%xmm7 +- addl %edx,%ebx +- addl 20(%esp),%edx +- psrlq $2,%xmm6 +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- pxor %xmm6,%xmm7 +- movl 24(%esp),%esi +- xorl %ecx,%edx +- movl 28(%esp),%edi +- pshufd $8,%xmm7,%xmm7 +- xorl %edi,%esi +- rorl $5,%edx +- movdqa 16(%ebp),%xmm6 +- andl %ecx,%esi +- movl %ecx,20(%esp) +- pslldq $8,%xmm7 +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 8(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- paddd %xmm7,%xmm1 +- movl %ebx,4(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl (%esp),%edx +- paddd %xmm1,%xmm6 +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 60(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %ecx,%eax +- movdqa %xmm6,48(%esp) +- movl %edx,%ecx +- movdqa %xmm3,%xmm4 +- rorl $14,%edx +- movl 20(%esp),%esi +- movdqa %xmm1,%xmm7 +- xorl %ecx,%edx +- movl 24(%esp),%edi +-.byte 102,15,58,15,226,4 +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +-.byte 102,15,58,15,248,4 +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- movdqa %xmm4,%xmm5 +- rorl $6,%edx +- movl %eax,%ecx +- movdqa %xmm4,%xmm6 +- addl %edi,%edx +- movl 4(%esp),%edi +- psrld $3,%xmm4 +- movl %eax,%esi +- rorl $9,%ecx +- paddd %xmm7,%xmm2 +- movl %eax,(%esp) +- xorl %eax,%ecx +- psrld $7,%xmm6 +- xorl %edi,%eax +- addl 28(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- pshufd $250,%xmm1,%xmm7 +- xorl %esi,%ecx +- addl 64(%esp),%edx +- pslld $14,%xmm5 +- xorl %edi,%ebx +- rorl $2,%ecx +- pxor %xmm6,%xmm4 +- addl %edx,%ebx +- addl 12(%esp),%edx +- psrld $11,%xmm6 +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- pxor %xmm5,%xmm4 +- movl 16(%esp),%esi +- xorl %ecx,%edx +- pslld $11,%xmm5 +- movl 20(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- pxor %xmm6,%xmm4 +- andl %ecx,%esi +- movl %ecx,12(%esp) +- movdqa %xmm7,%xmm6 +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- pxor %xmm5,%xmm4 +- movl %ebx,%ecx +- addl %edi,%edx +- psrld $10,%xmm7 +- movl (%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- paddd %xmm4,%xmm2 +- movl %ebx,28(%esp) +- xorl %ebx,%ecx +- psrlq $17,%xmm6 +- xorl %edi,%ebx +- addl 24(%esp),%edx +- rorl $11,%ecx +- pxor %xmm6,%xmm7 +- andl %ebx,%eax +- xorl %esi,%ecx +- psrlq $2,%xmm6 +- addl 68(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- pxor %xmm6,%xmm7 +- addl %edx,%eax +- addl 8(%esp),%edx +- pshufd $128,%xmm7,%xmm7 +- addl %ecx,%eax +- movl %edx,%ecx +- rorl $14,%edx +- movl 12(%esp),%esi +- xorl %ecx,%edx +- movl 16(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- psrldq $8,%xmm7 +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- paddd %xmm7,%xmm2 +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 28(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,24(%esp) +- pshufd $80,%xmm2,%xmm7 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 20(%esp),%edx +- movdqa %xmm7,%xmm6 +- rorl $11,%ecx +- psrld $10,%xmm7 +- andl %eax,%ebx +- psrlq $17,%xmm6 +- xorl %esi,%ecx +- addl 72(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- pxor %xmm6,%xmm7 +- addl %edx,%ebx +- addl 4(%esp),%edx +- psrlq $2,%xmm6 +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- pxor %xmm6,%xmm7 +- movl 8(%esp),%esi +- xorl %ecx,%edx +- movl 12(%esp),%edi +- pshufd $8,%xmm7,%xmm7 +- xorl %edi,%esi +- rorl $5,%edx +- movdqa 32(%ebp),%xmm6 +- andl %ecx,%esi +- movl %ecx,4(%esp) +- pslldq $8,%xmm7 +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 24(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- paddd %xmm7,%xmm2 +- movl %ebx,20(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 16(%esp),%edx +- paddd %xmm2,%xmm6 +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 76(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl (%esp),%edx +- addl %ecx,%eax +- movdqa %xmm6,64(%esp) +- movl %edx,%ecx +- movdqa %xmm0,%xmm4 +- rorl $14,%edx +- movl 4(%esp),%esi +- movdqa %xmm2,%xmm7 +- xorl %ecx,%edx +- movl 8(%esp),%edi +-.byte 102,15,58,15,227,4 +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +-.byte 102,15,58,15,249,4 +- movl %ecx,(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- movdqa %xmm4,%xmm5 +- rorl $6,%edx +- movl %eax,%ecx +- movdqa %xmm4,%xmm6 +- addl %edi,%edx +- movl 20(%esp),%edi +- psrld $3,%xmm4 +- movl %eax,%esi +- rorl $9,%ecx +- paddd %xmm7,%xmm3 +- movl %eax,16(%esp) +- xorl %eax,%ecx +- psrld $7,%xmm6 +- xorl %edi,%eax +- addl 12(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- pshufd $250,%xmm2,%xmm7 +- xorl %esi,%ecx +- addl 80(%esp),%edx +- pslld $14,%xmm5 +- xorl %edi,%ebx +- rorl $2,%ecx +- pxor %xmm6,%xmm4 +- addl %edx,%ebx +- addl 28(%esp),%edx +- psrld $11,%xmm6 +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- pxor %xmm5,%xmm4 +- movl (%esp),%esi +- xorl %ecx,%edx +- pslld $11,%xmm5 +- movl 4(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- pxor %xmm6,%xmm4 +- andl %ecx,%esi +- movl %ecx,28(%esp) +- movdqa %xmm7,%xmm6 +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- pxor %xmm5,%xmm4 +- movl %ebx,%ecx +- addl %edi,%edx +- psrld $10,%xmm7 +- movl 16(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- paddd %xmm4,%xmm3 +- movl %ebx,12(%esp) +- xorl %ebx,%ecx +- psrlq $17,%xmm6 +- xorl %edi,%ebx +- addl 8(%esp),%edx +- rorl $11,%ecx +- pxor %xmm6,%xmm7 +- andl %ebx,%eax +- xorl %esi,%ecx +- psrlq $2,%xmm6 +- addl 84(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- pxor %xmm6,%xmm7 +- addl %edx,%eax +- addl 24(%esp),%edx +- pshufd $128,%xmm7,%xmm7 +- addl %ecx,%eax +- movl %edx,%ecx +- rorl $14,%edx +- movl 28(%esp),%esi +- xorl %ecx,%edx +- movl (%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- psrldq $8,%xmm7 +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- paddd %xmm7,%xmm3 +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 12(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,8(%esp) +- pshufd $80,%xmm3,%xmm7 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 4(%esp),%edx +- movdqa %xmm7,%xmm6 +- rorl $11,%ecx +- psrld $10,%xmm7 +- andl %eax,%ebx +- psrlq $17,%xmm6 +- xorl %esi,%ecx +- addl 88(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- pxor %xmm6,%xmm7 +- addl %edx,%ebx +- addl 20(%esp),%edx +- psrlq $2,%xmm6 +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- pxor %xmm6,%xmm7 +- movl 24(%esp),%esi +- xorl %ecx,%edx +- movl 28(%esp),%edi +- pshufd $8,%xmm7,%xmm7 +- xorl %edi,%esi +- rorl $5,%edx +- movdqa 48(%ebp),%xmm6 +- andl %ecx,%esi +- movl %ecx,20(%esp) +- pslldq $8,%xmm7 +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 8(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- paddd %xmm7,%xmm3 +- movl %ebx,4(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl (%esp),%edx +- paddd %xmm3,%xmm6 +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 92(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %ecx,%eax +- movdqa %xmm6,80(%esp) +- cmpl $66051,64(%ebp) +- jne L011ssse3_00_47 +- movl %edx,%ecx +- rorl $14,%edx +- movl 20(%esp),%esi +- xorl %ecx,%edx +- movl 24(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 4(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 28(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 32(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- addl %edx,%ebx +- addl 12(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- movl 16(%esp),%esi +- xorl %ecx,%edx +- movl 20(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,12(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl (%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- movl %ebx,28(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 24(%esp),%edx +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 36(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- rorl $14,%edx +- movl 12(%esp),%esi +- xorl %ecx,%edx +- movl 16(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 28(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,24(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 20(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 40(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- addl %edx,%ebx +- addl 4(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- movl 8(%esp),%esi +- xorl %ecx,%edx +- movl 12(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,4(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 24(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- movl %ebx,20(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 16(%esp),%edx +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 44(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl (%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- rorl $14,%edx +- movl 4(%esp),%esi +- xorl %ecx,%edx +- movl 8(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 20(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,16(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 12(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 48(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- addl %edx,%ebx +- addl 28(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- movl (%esp),%esi +- xorl %ecx,%edx +- movl 4(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,28(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 16(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- movl %ebx,12(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 8(%esp),%edx +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 52(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- rorl $14,%edx +- movl 28(%esp),%esi +- xorl %ecx,%edx +- movl (%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 12(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,8(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 4(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 56(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- addl %edx,%ebx +- addl 20(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- movl 24(%esp),%esi +- xorl %ecx,%edx +- movl 28(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,20(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 8(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- movl %ebx,4(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl (%esp),%edx +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 60(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- rorl $14,%edx +- movl 20(%esp),%esi +- xorl %ecx,%edx +- movl 24(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 4(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 28(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 64(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- addl %edx,%ebx +- addl 12(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- movl 16(%esp),%esi +- xorl %ecx,%edx +- movl 20(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,12(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl (%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- movl %ebx,28(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 24(%esp),%edx +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 68(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- rorl $14,%edx +- movl 12(%esp),%esi +- xorl %ecx,%edx +- movl 16(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 28(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,24(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 20(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 72(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- addl %edx,%ebx +- addl 4(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- movl 8(%esp),%esi +- xorl %ecx,%edx +- movl 12(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,4(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 24(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- movl %ebx,20(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 16(%esp),%edx +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 76(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl (%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- rorl $14,%edx +- movl 4(%esp),%esi +- xorl %ecx,%edx +- movl 8(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 20(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,16(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 12(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 80(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- addl %edx,%ebx +- addl 28(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- movl (%esp),%esi +- xorl %ecx,%edx +- movl 4(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,28(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 16(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- movl %ebx,12(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 8(%esp),%edx +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 84(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- rorl $14,%edx +- movl 28(%esp),%esi +- xorl %ecx,%edx +- movl (%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 12(%esp),%edi +- movl %eax,%esi +- rorl $9,%ecx +- movl %eax,8(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 4(%esp),%edx +- rorl $11,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 88(%esp),%edx +- xorl %edi,%ebx +- rorl $2,%ecx +- addl %edx,%ebx +- addl 20(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- rorl $14,%edx +- movl 24(%esp),%esi +- xorl %ecx,%edx +- movl 28(%esp),%edi +- xorl %edi,%esi +- rorl $5,%edx +- andl %ecx,%esi +- movl %ecx,20(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- rorl $6,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 8(%esp),%edi +- movl %ebx,%esi +- rorl $9,%ecx +- movl %ebx,4(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl (%esp),%edx +- rorl $11,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 92(%esp),%edx +- xorl %edi,%eax +- rorl $2,%ecx +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %ecx,%eax +- movl 96(%esp),%esi +- xorl %edi,%ebx +- movl 12(%esp),%ecx +- addl (%esi),%eax +- addl 4(%esi),%ebx +- addl 8(%esi),%edi +- addl 12(%esi),%ecx +- movl %eax,(%esi) +- movl %ebx,4(%esi) +- movl %edi,8(%esi) +- movl %ecx,12(%esi) +- movl %ebx,4(%esp) +- xorl %edi,%ebx +- movl %edi,8(%esp) +- movl %ecx,12(%esp) +- movl 20(%esp),%edi +- movl 24(%esp),%ecx +- addl 16(%esi),%edx +- addl 20(%esi),%edi +- addl 24(%esi),%ecx +- movl %edx,16(%esi) +- movl %edi,20(%esi) +- movl %edi,20(%esp) +- movl 28(%esp),%edi +- movl %ecx,24(%esi) +- addl 28(%esi),%edi +- movl %ecx,24(%esp) +- movl %edi,28(%esi) +- movl %edi,28(%esp) +- movl 100(%esp),%edi +- movdqa 64(%ebp),%xmm7 +- subl $192,%ebp +- cmpl 104(%esp),%edi +- jb L010grand_ssse3 +- movl 108(%esp),%esp +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.align 5,0x90 +-L004AVX: +- leal -96(%esp),%esp +- vzeroall +- movl (%esi),%eax +- movl 4(%esi),%ebx +- movl 8(%esi),%ecx +- movl 12(%esi),%edi +- movl %ebx,4(%esp) +- xorl %ecx,%ebx +- movl %ecx,8(%esp) +- movl %edi,12(%esp) +- movl 16(%esi),%edx +- movl 20(%esi),%edi +- movl 24(%esi),%ecx +- movl 28(%esi),%esi +- movl %edi,20(%esp) +- movl 100(%esp),%edi +- movl %ecx,24(%esp) +- movl %esi,28(%esp) +- vmovdqa 256(%ebp),%xmm7 +- jmp L012grand_avx +-.align 5,0x90 +-L012grand_avx: +- vmovdqu (%edi),%xmm0 +- vmovdqu 16(%edi),%xmm1 +- vmovdqu 32(%edi),%xmm2 +- vmovdqu 48(%edi),%xmm3 +- addl $64,%edi +- vpshufb %xmm7,%xmm0,%xmm0 +- movl %edi,100(%esp) +- vpshufb %xmm7,%xmm1,%xmm1 +- vpshufb %xmm7,%xmm2,%xmm2 +- vpaddd (%ebp),%xmm0,%xmm4 +- vpshufb %xmm7,%xmm3,%xmm3 +- vpaddd 16(%ebp),%xmm1,%xmm5 +- vpaddd 32(%ebp),%xmm2,%xmm6 +- vpaddd 48(%ebp),%xmm3,%xmm7 +- vmovdqa %xmm4,32(%esp) +- vmovdqa %xmm5,48(%esp) +- vmovdqa %xmm6,64(%esp) +- vmovdqa %xmm7,80(%esp) +- jmp L013avx_00_47 +-.align 4,0x90 +-L013avx_00_47: +- addl $64,%ebp +- vpalignr $4,%xmm0,%xmm1,%xmm4 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 20(%esp),%esi +- vpalignr $4,%xmm2,%xmm3,%xmm7 +- xorl %ecx,%edx +- movl 24(%esp),%edi +- xorl %edi,%esi +- vpsrld $7,%xmm4,%xmm6 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- vpaddd %xmm7,%xmm0,%xmm0 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrld $3,%xmm4,%xmm7 +- movl %eax,%ecx +- addl %edi,%edx +- movl 4(%esp),%edi +- vpslld $14,%xmm4,%xmm5 +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,(%esp) +- vpxor %xmm6,%xmm7,%xmm4 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 28(%esp),%edx +- vpshufd $250,%xmm3,%xmm7 +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- vpsrld $11,%xmm6,%xmm6 +- addl 32(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- vpxor %xmm5,%xmm4,%xmm4 +- addl %edx,%ebx +- addl 12(%esp),%edx +- addl %ecx,%ebx +- vpslld $11,%xmm5,%xmm5 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 16(%esp),%esi +- vpxor %xmm6,%xmm4,%xmm4 +- xorl %ecx,%edx +- movl 20(%esp),%edi +- xorl %edi,%esi +- vpsrld $10,%xmm7,%xmm6 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,12(%esp) +- vpxor %xmm5,%xmm4,%xmm4 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrlq $17,%xmm7,%xmm5 +- movl %ebx,%ecx +- addl %edi,%edx +- movl (%esp),%edi +- vpaddd %xmm4,%xmm0,%xmm0 +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,28(%esp) +- vpxor %xmm5,%xmm6,%xmm6 +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 24(%esp),%edx +- vpsrlq $19,%xmm7,%xmm7 +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- vpxor %xmm7,%xmm6,%xmm6 +- addl 36(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- vpshufd $132,%xmm6,%xmm7 +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %ecx,%eax +- vpsrldq $8,%xmm7,%xmm7 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 12(%esp),%esi +- vpaddd %xmm7,%xmm0,%xmm0 +- xorl %ecx,%edx +- movl 16(%esp),%edi +- xorl %edi,%esi +- vpshufd $80,%xmm0,%xmm7 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- vpsrld $10,%xmm7,%xmm6 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrlq $17,%xmm7,%xmm5 +- movl %eax,%ecx +- addl %edi,%edx +- movl 28(%esp),%edi +- vpxor %xmm5,%xmm6,%xmm6 +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,24(%esp) +- vpsrlq $19,%xmm7,%xmm7 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 20(%esp),%edx +- vpxor %xmm7,%xmm6,%xmm6 +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- vpshufd $232,%xmm6,%xmm7 +- addl 40(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- vpslldq $8,%xmm7,%xmm7 +- addl %edx,%ebx +- addl 4(%esp),%edx +- addl %ecx,%ebx +- vpaddd %xmm7,%xmm0,%xmm0 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 8(%esp),%esi +- vpaddd (%ebp),%xmm0,%xmm6 +- xorl %ecx,%edx +- movl 12(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,4(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 24(%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,20(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 16(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 44(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl (%esp),%edx +- addl %ecx,%eax +- vmovdqa %xmm6,32(%esp) +- vpalignr $4,%xmm1,%xmm2,%xmm4 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 4(%esp),%esi +- vpalignr $4,%xmm3,%xmm0,%xmm7 +- xorl %ecx,%edx +- movl 8(%esp),%edi +- xorl %edi,%esi +- vpsrld $7,%xmm4,%xmm6 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- vpaddd %xmm7,%xmm1,%xmm1 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrld $3,%xmm4,%xmm7 +- movl %eax,%ecx +- addl %edi,%edx +- movl 20(%esp),%edi +- vpslld $14,%xmm4,%xmm5 +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,16(%esp) +- vpxor %xmm6,%xmm7,%xmm4 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 12(%esp),%edx +- vpshufd $250,%xmm0,%xmm7 +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- vpsrld $11,%xmm6,%xmm6 +- addl 48(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- vpxor %xmm5,%xmm4,%xmm4 +- addl %edx,%ebx +- addl 28(%esp),%edx +- addl %ecx,%ebx +- vpslld $11,%xmm5,%xmm5 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl (%esp),%esi +- vpxor %xmm6,%xmm4,%xmm4 +- xorl %ecx,%edx +- movl 4(%esp),%edi +- xorl %edi,%esi +- vpsrld $10,%xmm7,%xmm6 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,28(%esp) +- vpxor %xmm5,%xmm4,%xmm4 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrlq $17,%xmm7,%xmm5 +- movl %ebx,%ecx +- addl %edi,%edx +- movl 16(%esp),%edi +- vpaddd %xmm4,%xmm1,%xmm1 +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,12(%esp) +- vpxor %xmm5,%xmm6,%xmm6 +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 8(%esp),%edx +- vpsrlq $19,%xmm7,%xmm7 +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- vpxor %xmm7,%xmm6,%xmm6 +- addl 52(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- vpshufd $132,%xmm6,%xmm7 +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %ecx,%eax +- vpsrldq $8,%xmm7,%xmm7 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 28(%esp),%esi +- vpaddd %xmm7,%xmm1,%xmm1 +- xorl %ecx,%edx +- movl (%esp),%edi +- xorl %edi,%esi +- vpshufd $80,%xmm1,%xmm7 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- vpsrld $10,%xmm7,%xmm6 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrlq $17,%xmm7,%xmm5 +- movl %eax,%ecx +- addl %edi,%edx +- movl 12(%esp),%edi +- vpxor %xmm5,%xmm6,%xmm6 +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,8(%esp) +- vpsrlq $19,%xmm7,%xmm7 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 4(%esp),%edx +- vpxor %xmm7,%xmm6,%xmm6 +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- vpshufd $232,%xmm6,%xmm7 +- addl 56(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- vpslldq $8,%xmm7,%xmm7 +- addl %edx,%ebx +- addl 20(%esp),%edx +- addl %ecx,%ebx +- vpaddd %xmm7,%xmm1,%xmm1 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 24(%esp),%esi +- vpaddd 16(%ebp),%xmm1,%xmm6 +- xorl %ecx,%edx +- movl 28(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,20(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 8(%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,4(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl (%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 60(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %ecx,%eax +- vmovdqa %xmm6,48(%esp) +- vpalignr $4,%xmm2,%xmm3,%xmm4 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 20(%esp),%esi +- vpalignr $4,%xmm0,%xmm1,%xmm7 +- xorl %ecx,%edx +- movl 24(%esp),%edi +- xorl %edi,%esi +- vpsrld $7,%xmm4,%xmm6 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- vpaddd %xmm7,%xmm2,%xmm2 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrld $3,%xmm4,%xmm7 +- movl %eax,%ecx +- addl %edi,%edx +- movl 4(%esp),%edi +- vpslld $14,%xmm4,%xmm5 +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,(%esp) +- vpxor %xmm6,%xmm7,%xmm4 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 28(%esp),%edx +- vpshufd $250,%xmm1,%xmm7 +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- vpsrld $11,%xmm6,%xmm6 +- addl 64(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- vpxor %xmm5,%xmm4,%xmm4 +- addl %edx,%ebx +- addl 12(%esp),%edx +- addl %ecx,%ebx +- vpslld $11,%xmm5,%xmm5 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 16(%esp),%esi +- vpxor %xmm6,%xmm4,%xmm4 +- xorl %ecx,%edx +- movl 20(%esp),%edi +- xorl %edi,%esi +- vpsrld $10,%xmm7,%xmm6 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,12(%esp) +- vpxor %xmm5,%xmm4,%xmm4 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrlq $17,%xmm7,%xmm5 +- movl %ebx,%ecx +- addl %edi,%edx +- movl (%esp),%edi +- vpaddd %xmm4,%xmm2,%xmm2 +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,28(%esp) +- vpxor %xmm5,%xmm6,%xmm6 +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 24(%esp),%edx +- vpsrlq $19,%xmm7,%xmm7 +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- vpxor %xmm7,%xmm6,%xmm6 +- addl 68(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- vpshufd $132,%xmm6,%xmm7 +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %ecx,%eax +- vpsrldq $8,%xmm7,%xmm7 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 12(%esp),%esi +- vpaddd %xmm7,%xmm2,%xmm2 +- xorl %ecx,%edx +- movl 16(%esp),%edi +- xorl %edi,%esi +- vpshufd $80,%xmm2,%xmm7 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- vpsrld $10,%xmm7,%xmm6 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrlq $17,%xmm7,%xmm5 +- movl %eax,%ecx +- addl %edi,%edx +- movl 28(%esp),%edi +- vpxor %xmm5,%xmm6,%xmm6 +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,24(%esp) +- vpsrlq $19,%xmm7,%xmm7 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 20(%esp),%edx +- vpxor %xmm7,%xmm6,%xmm6 +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- vpshufd $232,%xmm6,%xmm7 +- addl 72(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- vpslldq $8,%xmm7,%xmm7 +- addl %edx,%ebx +- addl 4(%esp),%edx +- addl %ecx,%ebx +- vpaddd %xmm7,%xmm2,%xmm2 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 8(%esp),%esi +- vpaddd 32(%ebp),%xmm2,%xmm6 +- xorl %ecx,%edx +- movl 12(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,4(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 24(%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,20(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 16(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 76(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl (%esp),%edx +- addl %ecx,%eax +- vmovdqa %xmm6,64(%esp) +- vpalignr $4,%xmm3,%xmm0,%xmm4 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 4(%esp),%esi +- vpalignr $4,%xmm1,%xmm2,%xmm7 +- xorl %ecx,%edx +- movl 8(%esp),%edi +- xorl %edi,%esi +- vpsrld $7,%xmm4,%xmm6 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- vpaddd %xmm7,%xmm3,%xmm3 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrld $3,%xmm4,%xmm7 +- movl %eax,%ecx +- addl %edi,%edx +- movl 20(%esp),%edi +- vpslld $14,%xmm4,%xmm5 +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,16(%esp) +- vpxor %xmm6,%xmm7,%xmm4 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 12(%esp),%edx +- vpshufd $250,%xmm2,%xmm7 +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- vpsrld $11,%xmm6,%xmm6 +- addl 80(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- vpxor %xmm5,%xmm4,%xmm4 +- addl %edx,%ebx +- addl 28(%esp),%edx +- addl %ecx,%ebx +- vpslld $11,%xmm5,%xmm5 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl (%esp),%esi +- vpxor %xmm6,%xmm4,%xmm4 +- xorl %ecx,%edx +- movl 4(%esp),%edi +- xorl %edi,%esi +- vpsrld $10,%xmm7,%xmm6 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,28(%esp) +- vpxor %xmm5,%xmm4,%xmm4 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrlq $17,%xmm7,%xmm5 +- movl %ebx,%ecx +- addl %edi,%edx +- movl 16(%esp),%edi +- vpaddd %xmm4,%xmm3,%xmm3 +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,12(%esp) +- vpxor %xmm5,%xmm6,%xmm6 +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 8(%esp),%edx +- vpsrlq $19,%xmm7,%xmm7 +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- vpxor %xmm7,%xmm6,%xmm6 +- addl 84(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- vpshufd $132,%xmm6,%xmm7 +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %ecx,%eax +- vpsrldq $8,%xmm7,%xmm7 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 28(%esp),%esi +- vpaddd %xmm7,%xmm3,%xmm3 +- xorl %ecx,%edx +- movl (%esp),%edi +- xorl %edi,%esi +- vpshufd $80,%xmm3,%xmm7 +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- vpsrld $10,%xmm7,%xmm6 +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- vpsrlq $17,%xmm7,%xmm5 +- movl %eax,%ecx +- addl %edi,%edx +- movl 12(%esp),%edi +- vpxor %xmm5,%xmm6,%xmm6 +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,8(%esp) +- vpsrlq $19,%xmm7,%xmm7 +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 4(%esp),%edx +- vpxor %xmm7,%xmm6,%xmm6 +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- vpshufd $232,%xmm6,%xmm7 +- addl 88(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- vpslldq $8,%xmm7,%xmm7 +- addl %edx,%ebx +- addl 20(%esp),%edx +- addl %ecx,%ebx +- vpaddd %xmm7,%xmm3,%xmm3 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 24(%esp),%esi +- vpaddd 48(%ebp),%xmm3,%xmm6 +- xorl %ecx,%edx +- movl 28(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,20(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 8(%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,4(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl (%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 92(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %ecx,%eax +- vmovdqa %xmm6,80(%esp) +- cmpl $66051,64(%ebp) +- jne L013avx_00_47 +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 20(%esp),%esi +- xorl %ecx,%edx +- movl 24(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 4(%esp),%edi +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 28(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 32(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- addl %edx,%ebx +- addl 12(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 16(%esp),%esi +- xorl %ecx,%edx +- movl 20(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,12(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl (%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,28(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 24(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 36(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 12(%esp),%esi +- xorl %ecx,%edx +- movl 16(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 28(%esp),%edi +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,24(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 20(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 40(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- addl %edx,%ebx +- addl 4(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 8(%esp),%esi +- xorl %ecx,%edx +- movl 12(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,4(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 24(%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,20(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 16(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 44(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl (%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 4(%esp),%esi +- xorl %ecx,%edx +- movl 8(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 20(%esp),%edi +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,16(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 12(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 48(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- addl %edx,%ebx +- addl 28(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl (%esp),%esi +- xorl %ecx,%edx +- movl 4(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,28(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 16(%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,12(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 8(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 52(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 28(%esp),%esi +- xorl %ecx,%edx +- movl (%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 12(%esp),%edi +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,8(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 4(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 56(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- addl %edx,%ebx +- addl 20(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 24(%esp),%esi +- xorl %ecx,%edx +- movl 28(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,20(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 8(%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,4(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl (%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 60(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 20(%esp),%esi +- xorl %ecx,%edx +- movl 24(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,16(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 4(%esp),%edi +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 28(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 64(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- addl %edx,%ebx +- addl 12(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 16(%esp),%esi +- xorl %ecx,%edx +- movl 20(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,12(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl (%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,28(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 24(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 68(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl 8(%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 12(%esp),%esi +- xorl %ecx,%edx +- movl 16(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,8(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 28(%esp),%edi +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,24(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 20(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 72(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- addl %edx,%ebx +- addl 4(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 8(%esp),%esi +- xorl %ecx,%edx +- movl 12(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,4(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 24(%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,20(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 16(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 76(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl (%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 4(%esp),%esi +- xorl %ecx,%edx +- movl 8(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 20(%esp),%edi +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,16(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 12(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 80(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- addl %edx,%ebx +- addl 28(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl (%esp),%esi +- xorl %ecx,%edx +- movl 4(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,28(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 16(%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,12(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl 8(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 84(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl 24(%esp),%edx +- addl %ecx,%eax +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 28(%esp),%esi +- xorl %ecx,%edx +- movl (%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,24(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %eax,%ecx +- addl %edi,%edx +- movl 12(%esp),%edi +- movl %eax,%esi +- shrdl $9,%ecx,%ecx +- movl %eax,8(%esp) +- xorl %eax,%ecx +- xorl %edi,%eax +- addl 4(%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %eax,%ebx +- xorl %esi,%ecx +- addl 88(%esp),%edx +- xorl %edi,%ebx +- shrdl $2,%ecx,%ecx +- addl %edx,%ebx +- addl 20(%esp),%edx +- addl %ecx,%ebx +- movl %edx,%ecx +- shrdl $14,%edx,%edx +- movl 24(%esp),%esi +- xorl %ecx,%edx +- movl 28(%esp),%edi +- xorl %edi,%esi +- shrdl $5,%edx,%edx +- andl %ecx,%esi +- movl %ecx,20(%esp) +- xorl %ecx,%edx +- xorl %esi,%edi +- shrdl $6,%edx,%edx +- movl %ebx,%ecx +- addl %edi,%edx +- movl 8(%esp),%edi +- movl %ebx,%esi +- shrdl $9,%ecx,%ecx +- movl %ebx,4(%esp) +- xorl %ebx,%ecx +- xorl %edi,%ebx +- addl (%esp),%edx +- shrdl $11,%ecx,%ecx +- andl %ebx,%eax +- xorl %esi,%ecx +- addl 92(%esp),%edx +- xorl %edi,%eax +- shrdl $2,%ecx,%ecx +- addl %edx,%eax +- addl 16(%esp),%edx +- addl %ecx,%eax +- movl 96(%esp),%esi +- xorl %edi,%ebx +- movl 12(%esp),%ecx +- addl (%esi),%eax +- addl 4(%esi),%ebx +- addl 8(%esi),%edi +- addl 12(%esi),%ecx +- movl %eax,(%esi) +- movl %ebx,4(%esi) +- movl %edi,8(%esi) +- movl %ecx,12(%esi) +- movl %ebx,4(%esp) +- xorl %edi,%ebx +- movl %edi,8(%esp) +- movl %ecx,12(%esp) +- movl 20(%esp),%edi +- movl 24(%esp),%ecx +- addl 16(%esi),%edx +- addl 20(%esi),%edi +- addl 24(%esi),%ecx +- movl %edx,16(%esi) +- movl %edi,20(%esi) +- movl %edi,20(%esp) +- movl 28(%esp),%edi +- movl %ecx,24(%esi) +- addl 28(%esi),%edi +- movl %ecx,24(%esp) +- movl %edi,28(%esi) +- movl %edi,28(%esp) +- movl 100(%esp),%edi +- vmovdqa 64(%ebp),%xmm7 +- subl $192,%ebp +- cmpl 104(%esp),%edi +- jb L012grand_avx +- movl 108(%esp),%esp +- vzeroall +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.section __IMPORT,__pointers,non_lazy_symbol_pointers +-L_OPENSSL_ia32cap_P$non_lazy_ptr: +-.indirect_symbol _OPENSSL_ia32cap_P +-.long 0 +-#endif +diff --git a/mac-x86/crypto/fipsmodule/sha512-586.S b/mac-x86/crypto/fipsmodule/sha512-586.S +deleted file mode 100644 +index 8c33cf5..0000000 +--- a/mac-x86/crypto/fipsmodule/sha512-586.S ++++ /dev/null +@@ -1,2838 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.globl _sha512_block_data_order +-.private_extern _sha512_block_data_order +-.align 4 +-_sha512_block_data_order: +-L_sha512_block_data_order_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 20(%esp),%esi +- movl 24(%esp),%edi +- movl 28(%esp),%eax +- movl %esp,%ebx +- call L000pic_point +-L000pic_point: +- popl %ebp +- leal L001K512-L000pic_point(%ebp),%ebp +- subl $16,%esp +- andl $-64,%esp +- shll $7,%eax +- addl %edi,%eax +- movl %esi,(%esp) +- movl %edi,4(%esp) +- movl %eax,8(%esp) +- movl %ebx,12(%esp) +- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001K512(%ebp),%edx +- movl (%edx),%ecx +- testl $67108864,%ecx +- jz L002loop_x86 +- movl 4(%edx),%edx +- movq (%esi),%mm0 +- andl $16777216,%ecx +- movq 8(%esi),%mm1 +- andl $512,%edx +- movq 16(%esi),%mm2 +- orl %edx,%ecx +- movq 24(%esi),%mm3 +- movq 32(%esi),%mm4 +- movq 40(%esi),%mm5 +- movq 48(%esi),%mm6 +- movq 56(%esi),%mm7 +- cmpl $16777728,%ecx +- je L003SSSE3 +- subl $80,%esp +- jmp L004loop_sse2 +-.align 4,0x90 +-L004loop_sse2: +- movq %mm1,8(%esp) +- movq %mm2,16(%esp) +- movq %mm3,24(%esp) +- movq %mm5,40(%esp) +- movq %mm6,48(%esp) +- pxor %mm1,%mm2 +- movq %mm7,56(%esp) +- movq %mm0,%mm3 +- movl (%edi),%eax +- movl 4(%edi),%ebx +- addl $8,%edi +- movl $15,%edx +- bswap %eax +- bswap %ebx +- jmp L00500_14_sse2 +-.align 4,0x90 +-L00500_14_sse2: +- movd %eax,%mm1 +- movl (%edi),%eax +- movd %ebx,%mm7 +- movl 4(%edi),%ebx +- addl $8,%edi +- bswap %eax +- bswap %ebx +- punpckldq %mm1,%mm7 +- movq %mm4,%mm1 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,32(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- movq %mm3,%mm0 +- movq %mm7,72(%esp) +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 56(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- paddq (%ebp),%mm7 +- pxor %mm4,%mm3 +- movq 24(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 8(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- subl $8,%esp +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 40(%esp),%mm5 +- paddq %mm2,%mm3 +- movq %mm0,%mm2 +- addl $8,%ebp +- paddq %mm6,%mm3 +- movq 48(%esp),%mm6 +- decl %edx +- jnz L00500_14_sse2 +- movd %eax,%mm1 +- movd %ebx,%mm7 +- punpckldq %mm1,%mm7 +- movq %mm4,%mm1 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,32(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- movq %mm3,%mm0 +- movq %mm7,72(%esp) +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 56(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- paddq (%ebp),%mm7 +- pxor %mm4,%mm3 +- movq 24(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 8(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- subl $8,%esp +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 192(%esp),%mm7 +- paddq %mm2,%mm3 +- movq %mm0,%mm2 +- addl $8,%ebp +- paddq %mm6,%mm3 +- pxor %mm0,%mm0 +- movl $32,%edx +- jmp L00616_79_sse2 +-.align 4,0x90 +-L00616_79_sse2: +- movq 88(%esp),%mm5 +- movq %mm7,%mm1 +- psrlq $1,%mm7 +- movq %mm5,%mm6 +- psrlq $6,%mm5 +- psllq $56,%mm1 +- paddq %mm3,%mm0 +- movq %mm7,%mm3 +- psrlq $6,%mm7 +- pxor %mm1,%mm3 +- psllq $7,%mm1 +- pxor %mm7,%mm3 +- psrlq $1,%mm7 +- pxor %mm1,%mm3 +- movq %mm5,%mm1 +- psrlq $13,%mm5 +- pxor %mm3,%mm7 +- psllq $3,%mm6 +- pxor %mm5,%mm1 +- paddq 200(%esp),%mm7 +- pxor %mm6,%mm1 +- psrlq $42,%mm5 +- paddq 128(%esp),%mm7 +- pxor %mm5,%mm1 +- psllq $42,%mm6 +- movq 40(%esp),%mm5 +- pxor %mm6,%mm1 +- movq 48(%esp),%mm6 +- paddq %mm1,%mm7 +- movq %mm4,%mm1 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,32(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- movq %mm7,72(%esp) +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 56(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- paddq (%ebp),%mm7 +- pxor %mm4,%mm3 +- movq 24(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 8(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- subl $8,%esp +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 192(%esp),%mm7 +- paddq %mm6,%mm2 +- addl $8,%ebp +- movq 88(%esp),%mm5 +- movq %mm7,%mm1 +- psrlq $1,%mm7 +- movq %mm5,%mm6 +- psrlq $6,%mm5 +- psllq $56,%mm1 +- paddq %mm3,%mm2 +- movq %mm7,%mm3 +- psrlq $6,%mm7 +- pxor %mm1,%mm3 +- psllq $7,%mm1 +- pxor %mm7,%mm3 +- psrlq $1,%mm7 +- pxor %mm1,%mm3 +- movq %mm5,%mm1 +- psrlq $13,%mm5 +- pxor %mm3,%mm7 +- psllq $3,%mm6 +- pxor %mm5,%mm1 +- paddq 200(%esp),%mm7 +- pxor %mm6,%mm1 +- psrlq $42,%mm5 +- paddq 128(%esp),%mm7 +- pxor %mm5,%mm1 +- psllq $42,%mm6 +- movq 40(%esp),%mm5 +- pxor %mm6,%mm1 +- movq 48(%esp),%mm6 +- paddq %mm1,%mm7 +- movq %mm4,%mm1 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,32(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- movq %mm7,72(%esp) +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 56(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- paddq (%ebp),%mm7 +- pxor %mm4,%mm3 +- movq 24(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 8(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- subl $8,%esp +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 192(%esp),%mm7 +- paddq %mm6,%mm0 +- addl $8,%ebp +- decl %edx +- jnz L00616_79_sse2 +- paddq %mm3,%mm0 +- movq 8(%esp),%mm1 +- movq 24(%esp),%mm3 +- movq 40(%esp),%mm5 +- movq 48(%esp),%mm6 +- movq 56(%esp),%mm7 +- pxor %mm1,%mm2 +- paddq (%esi),%mm0 +- paddq 8(%esi),%mm1 +- paddq 16(%esi),%mm2 +- paddq 24(%esi),%mm3 +- paddq 32(%esi),%mm4 +- paddq 40(%esi),%mm5 +- paddq 48(%esi),%mm6 +- paddq 56(%esi),%mm7 +- movl $640,%eax +- movq %mm0,(%esi) +- movq %mm1,8(%esi) +- movq %mm2,16(%esi) +- movq %mm3,24(%esi) +- movq %mm4,32(%esi) +- movq %mm5,40(%esi) +- movq %mm6,48(%esi) +- movq %mm7,56(%esi) +- leal (%esp,%eax,1),%esp +- subl %eax,%ebp +- cmpl 88(%esp),%edi +- jb L004loop_sse2 +- movl 92(%esp),%esp +- emms +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.align 5,0x90 +-L003SSSE3: +- leal -64(%esp),%edx +- subl $256,%esp +- movdqa 640(%ebp),%xmm1 +- movdqu (%edi),%xmm0 +-.byte 102,15,56,0,193 +- movdqa (%ebp),%xmm3 +- movdqa %xmm1,%xmm2 +- movdqu 16(%edi),%xmm1 +- paddq %xmm0,%xmm3 +-.byte 102,15,56,0,202 +- movdqa %xmm3,-128(%edx) +- movdqa 16(%ebp),%xmm4 +- movdqa %xmm2,%xmm3 +- movdqu 32(%edi),%xmm2 +- paddq %xmm1,%xmm4 +-.byte 102,15,56,0,211 +- movdqa %xmm4,-112(%edx) +- movdqa 32(%ebp),%xmm5 +- movdqa %xmm3,%xmm4 +- movdqu 48(%edi),%xmm3 +- paddq %xmm2,%xmm5 +-.byte 102,15,56,0,220 +- movdqa %xmm5,-96(%edx) +- movdqa 48(%ebp),%xmm6 +- movdqa %xmm4,%xmm5 +- movdqu 64(%edi),%xmm4 +- paddq %xmm3,%xmm6 +-.byte 102,15,56,0,229 +- movdqa %xmm6,-80(%edx) +- movdqa 64(%ebp),%xmm7 +- movdqa %xmm5,%xmm6 +- movdqu 80(%edi),%xmm5 +- paddq %xmm4,%xmm7 +-.byte 102,15,56,0,238 +- movdqa %xmm7,-64(%edx) +- movdqa %xmm0,(%edx) +- movdqa 80(%ebp),%xmm0 +- movdqa %xmm6,%xmm7 +- movdqu 96(%edi),%xmm6 +- paddq %xmm5,%xmm0 +-.byte 102,15,56,0,247 +- movdqa %xmm0,-48(%edx) +- movdqa %xmm1,16(%edx) +- movdqa 96(%ebp),%xmm1 +- movdqa %xmm7,%xmm0 +- movdqu 112(%edi),%xmm7 +- paddq %xmm6,%xmm1 +-.byte 102,15,56,0,248 +- movdqa %xmm1,-32(%edx) +- movdqa %xmm2,32(%edx) +- movdqa 112(%ebp),%xmm2 +- movdqa (%edx),%xmm0 +- paddq %xmm7,%xmm2 +- movdqa %xmm2,-16(%edx) +- nop +-.align 5,0x90 +-L007loop_ssse3: +- movdqa 16(%edx),%xmm2 +- movdqa %xmm3,48(%edx) +- leal 128(%ebp),%ebp +- movq %mm1,8(%esp) +- movl %edi,%ebx +- movq %mm2,16(%esp) +- leal 128(%edi),%edi +- movq %mm3,24(%esp) +- cmpl %eax,%edi +- movq %mm5,40(%esp) +- cmovbl %edi,%ebx +- movq %mm6,48(%esp) +- movl $4,%ecx +- pxor %mm1,%mm2 +- movq %mm7,56(%esp) +- pxor %mm3,%mm3 +- jmp L00800_47_ssse3 +-.align 5,0x90 +-L00800_47_ssse3: +- movdqa %xmm5,%xmm3 +- movdqa %xmm2,%xmm1 +-.byte 102,15,58,15,208,8 +- movdqa %xmm4,(%edx) +-.byte 102,15,58,15,220,8 +- movdqa %xmm2,%xmm4 +- psrlq $7,%xmm2 +- paddq %xmm3,%xmm0 +- movdqa %xmm4,%xmm3 +- psrlq $1,%xmm4 +- psllq $56,%xmm3 +- pxor %xmm4,%xmm2 +- psrlq $7,%xmm4 +- pxor %xmm3,%xmm2 +- psllq $7,%xmm3 +- pxor %xmm4,%xmm2 +- movdqa %xmm7,%xmm4 +- pxor %xmm3,%xmm2 +- movdqa %xmm7,%xmm3 +- psrlq $6,%xmm4 +- paddq %xmm2,%xmm0 +- movdqa %xmm7,%xmm2 +- psrlq $19,%xmm3 +- psllq $3,%xmm2 +- pxor %xmm3,%xmm4 +- psrlq $42,%xmm3 +- pxor %xmm2,%xmm4 +- psllq $42,%xmm2 +- pxor %xmm3,%xmm4 +- movdqa 32(%edx),%xmm3 +- pxor %xmm2,%xmm4 +- movdqa (%ebp),%xmm2 +- movq %mm4,%mm1 +- paddq %xmm4,%xmm0 +- movq -128(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,32(%esp) +- paddq %xmm0,%xmm2 +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 56(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 24(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 8(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 32(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 40(%esp),%mm6 +- movq %mm4,%mm1 +- movq -120(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,24(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,56(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 48(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 16(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq (%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 24(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 32(%esp),%mm6 +- movdqa %xmm2,-128(%edx) +- movdqa %xmm6,%xmm4 +- movdqa %xmm3,%xmm2 +-.byte 102,15,58,15,217,8 +- movdqa %xmm5,16(%edx) +-.byte 102,15,58,15,229,8 +- movdqa %xmm3,%xmm5 +- psrlq $7,%xmm3 +- paddq %xmm4,%xmm1 +- movdqa %xmm5,%xmm4 +- psrlq $1,%xmm5 +- psllq $56,%xmm4 +- pxor %xmm5,%xmm3 +- psrlq $7,%xmm5 +- pxor %xmm4,%xmm3 +- psllq $7,%xmm4 +- pxor %xmm5,%xmm3 +- movdqa %xmm0,%xmm5 +- pxor %xmm4,%xmm3 +- movdqa %xmm0,%xmm4 +- psrlq $6,%xmm5 +- paddq %xmm3,%xmm1 +- movdqa %xmm0,%xmm3 +- psrlq $19,%xmm4 +- psllq $3,%xmm3 +- pxor %xmm4,%xmm5 +- psrlq $42,%xmm4 +- pxor %xmm3,%xmm5 +- psllq $42,%xmm3 +- pxor %xmm4,%xmm5 +- movdqa 48(%edx),%xmm4 +- pxor %xmm3,%xmm5 +- movdqa 16(%ebp),%xmm3 +- movq %mm4,%mm1 +- paddq %xmm5,%xmm1 +- movq -112(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,16(%esp) +- paddq %xmm1,%xmm3 +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,48(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 40(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 8(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 56(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 16(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 24(%esp),%mm6 +- movq %mm4,%mm1 +- movq -104(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,8(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,40(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 32(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq (%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 48(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 8(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 16(%esp),%mm6 +- movdqa %xmm3,-112(%edx) +- movdqa %xmm7,%xmm5 +- movdqa %xmm4,%xmm3 +-.byte 102,15,58,15,226,8 +- movdqa %xmm6,32(%edx) +-.byte 102,15,58,15,238,8 +- movdqa %xmm4,%xmm6 +- psrlq $7,%xmm4 +- paddq %xmm5,%xmm2 +- movdqa %xmm6,%xmm5 +- psrlq $1,%xmm6 +- psllq $56,%xmm5 +- pxor %xmm6,%xmm4 +- psrlq $7,%xmm6 +- pxor %xmm5,%xmm4 +- psllq $7,%xmm5 +- pxor %xmm6,%xmm4 +- movdqa %xmm1,%xmm6 +- pxor %xmm5,%xmm4 +- movdqa %xmm1,%xmm5 +- psrlq $6,%xmm6 +- paddq %xmm4,%xmm2 +- movdqa %xmm1,%xmm4 +- psrlq $19,%xmm5 +- psllq $3,%xmm4 +- pxor %xmm5,%xmm6 +- psrlq $42,%xmm5 +- pxor %xmm4,%xmm6 +- psllq $42,%xmm4 +- pxor %xmm5,%xmm6 +- movdqa (%edx),%xmm5 +- pxor %xmm4,%xmm6 +- movdqa 32(%ebp),%xmm4 +- movq %mm4,%mm1 +- paddq %xmm6,%xmm2 +- movq -96(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,(%esp) +- paddq %xmm2,%xmm4 +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,32(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 24(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 56(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 40(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq (%esp),%mm5 +- paddq %mm6,%mm2 +- movq 8(%esp),%mm6 +- movq %mm4,%mm1 +- movq -88(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,56(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,24(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 16(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 48(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 32(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 56(%esp),%mm5 +- paddq %mm6,%mm0 +- movq (%esp),%mm6 +- movdqa %xmm4,-96(%edx) +- movdqa %xmm0,%xmm6 +- movdqa %xmm5,%xmm4 +-.byte 102,15,58,15,235,8 +- movdqa %xmm7,48(%edx) +-.byte 102,15,58,15,247,8 +- movdqa %xmm5,%xmm7 +- psrlq $7,%xmm5 +- paddq %xmm6,%xmm3 +- movdqa %xmm7,%xmm6 +- psrlq $1,%xmm7 +- psllq $56,%xmm6 +- pxor %xmm7,%xmm5 +- psrlq $7,%xmm7 +- pxor %xmm6,%xmm5 +- psllq $7,%xmm6 +- pxor %xmm7,%xmm5 +- movdqa %xmm2,%xmm7 +- pxor %xmm6,%xmm5 +- movdqa %xmm2,%xmm6 +- psrlq $6,%xmm7 +- paddq %xmm5,%xmm3 +- movdqa %xmm2,%xmm5 +- psrlq $19,%xmm6 +- psllq $3,%xmm5 +- pxor %xmm6,%xmm7 +- psrlq $42,%xmm6 +- pxor %xmm5,%xmm7 +- psllq $42,%xmm5 +- pxor %xmm6,%xmm7 +- movdqa 16(%edx),%xmm6 +- pxor %xmm5,%xmm7 +- movdqa 48(%ebp),%xmm5 +- movq %mm4,%mm1 +- paddq %xmm7,%xmm3 +- movq -80(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,48(%esp) +- paddq %xmm3,%xmm5 +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,16(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 8(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 40(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 24(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 48(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 56(%esp),%mm6 +- movq %mm4,%mm1 +- movq -72(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,40(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,8(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq (%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 32(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 16(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 40(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 48(%esp),%mm6 +- movdqa %xmm5,-80(%edx) +- movdqa %xmm1,%xmm7 +- movdqa %xmm6,%xmm5 +-.byte 102,15,58,15,244,8 +- movdqa %xmm0,(%edx) +-.byte 102,15,58,15,248,8 +- movdqa %xmm6,%xmm0 +- psrlq $7,%xmm6 +- paddq %xmm7,%xmm4 +- movdqa %xmm0,%xmm7 +- psrlq $1,%xmm0 +- psllq $56,%xmm7 +- pxor %xmm0,%xmm6 +- psrlq $7,%xmm0 +- pxor %xmm7,%xmm6 +- psllq $7,%xmm7 +- pxor %xmm0,%xmm6 +- movdqa %xmm3,%xmm0 +- pxor %xmm7,%xmm6 +- movdqa %xmm3,%xmm7 +- psrlq $6,%xmm0 +- paddq %xmm6,%xmm4 +- movdqa %xmm3,%xmm6 +- psrlq $19,%xmm7 +- psllq $3,%xmm6 +- pxor %xmm7,%xmm0 +- psrlq $42,%xmm7 +- pxor %xmm6,%xmm0 +- psllq $42,%xmm6 +- pxor %xmm7,%xmm0 +- movdqa 32(%edx),%xmm7 +- pxor %xmm6,%xmm0 +- movdqa 64(%ebp),%xmm6 +- movq %mm4,%mm1 +- paddq %xmm0,%xmm4 +- movq -64(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,32(%esp) +- paddq %xmm4,%xmm6 +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 56(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 24(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 8(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 32(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 40(%esp),%mm6 +- movq %mm4,%mm1 +- movq -56(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,24(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,56(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 48(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 16(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq (%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 24(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 32(%esp),%mm6 +- movdqa %xmm6,-64(%edx) +- movdqa %xmm2,%xmm0 +- movdqa %xmm7,%xmm6 +-.byte 102,15,58,15,253,8 +- movdqa %xmm1,16(%edx) +-.byte 102,15,58,15,193,8 +- movdqa %xmm7,%xmm1 +- psrlq $7,%xmm7 +- paddq %xmm0,%xmm5 +- movdqa %xmm1,%xmm0 +- psrlq $1,%xmm1 +- psllq $56,%xmm0 +- pxor %xmm1,%xmm7 +- psrlq $7,%xmm1 +- pxor %xmm0,%xmm7 +- psllq $7,%xmm0 +- pxor %xmm1,%xmm7 +- movdqa %xmm4,%xmm1 +- pxor %xmm0,%xmm7 +- movdqa %xmm4,%xmm0 +- psrlq $6,%xmm1 +- paddq %xmm7,%xmm5 +- movdqa %xmm4,%xmm7 +- psrlq $19,%xmm0 +- psllq $3,%xmm7 +- pxor %xmm0,%xmm1 +- psrlq $42,%xmm0 +- pxor %xmm7,%xmm1 +- psllq $42,%xmm7 +- pxor %xmm0,%xmm1 +- movdqa 48(%edx),%xmm0 +- pxor %xmm7,%xmm1 +- movdqa 80(%ebp),%xmm7 +- movq %mm4,%mm1 +- paddq %xmm1,%xmm5 +- movq -48(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,16(%esp) +- paddq %xmm5,%xmm7 +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,48(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 40(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 8(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 56(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 16(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 24(%esp),%mm6 +- movq %mm4,%mm1 +- movq -40(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,8(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,40(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 32(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq (%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 48(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 8(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 16(%esp),%mm6 +- movdqa %xmm7,-48(%edx) +- movdqa %xmm3,%xmm1 +- movdqa %xmm0,%xmm7 +-.byte 102,15,58,15,198,8 +- movdqa %xmm2,32(%edx) +-.byte 102,15,58,15,202,8 +- movdqa %xmm0,%xmm2 +- psrlq $7,%xmm0 +- paddq %xmm1,%xmm6 +- movdqa %xmm2,%xmm1 +- psrlq $1,%xmm2 +- psllq $56,%xmm1 +- pxor %xmm2,%xmm0 +- psrlq $7,%xmm2 +- pxor %xmm1,%xmm0 +- psllq $7,%xmm1 +- pxor %xmm2,%xmm0 +- movdqa %xmm5,%xmm2 +- pxor %xmm1,%xmm0 +- movdqa %xmm5,%xmm1 +- psrlq $6,%xmm2 +- paddq %xmm0,%xmm6 +- movdqa %xmm5,%xmm0 +- psrlq $19,%xmm1 +- psllq $3,%xmm0 +- pxor %xmm1,%xmm2 +- psrlq $42,%xmm1 +- pxor %xmm0,%xmm2 +- psllq $42,%xmm0 +- pxor %xmm1,%xmm2 +- movdqa (%edx),%xmm1 +- pxor %xmm0,%xmm2 +- movdqa 96(%ebp),%xmm0 +- movq %mm4,%mm1 +- paddq %xmm2,%xmm6 +- movq -32(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,(%esp) +- paddq %xmm6,%xmm0 +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,32(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 24(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 56(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 40(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq (%esp),%mm5 +- paddq %mm6,%mm2 +- movq 8(%esp),%mm6 +- movq %mm4,%mm1 +- movq -24(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,56(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,24(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 16(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 48(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 32(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 56(%esp),%mm5 +- paddq %mm6,%mm0 +- movq (%esp),%mm6 +- movdqa %xmm0,-32(%edx) +- movdqa %xmm4,%xmm2 +- movdqa %xmm1,%xmm0 +-.byte 102,15,58,15,207,8 +- movdqa %xmm3,48(%edx) +-.byte 102,15,58,15,211,8 +- movdqa %xmm1,%xmm3 +- psrlq $7,%xmm1 +- paddq %xmm2,%xmm7 +- movdqa %xmm3,%xmm2 +- psrlq $1,%xmm3 +- psllq $56,%xmm2 +- pxor %xmm3,%xmm1 +- psrlq $7,%xmm3 +- pxor %xmm2,%xmm1 +- psllq $7,%xmm2 +- pxor %xmm3,%xmm1 +- movdqa %xmm6,%xmm3 +- pxor %xmm2,%xmm1 +- movdqa %xmm6,%xmm2 +- psrlq $6,%xmm3 +- paddq %xmm1,%xmm7 +- movdqa %xmm6,%xmm1 +- psrlq $19,%xmm2 +- psllq $3,%xmm1 +- pxor %xmm2,%xmm3 +- psrlq $42,%xmm2 +- pxor %xmm1,%xmm3 +- psllq $42,%xmm1 +- pxor %xmm2,%xmm3 +- movdqa 16(%edx),%xmm2 +- pxor %xmm1,%xmm3 +- movdqa 112(%ebp),%xmm1 +- movq %mm4,%mm1 +- paddq %xmm3,%xmm7 +- movq -16(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,48(%esp) +- paddq %xmm7,%xmm1 +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,16(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 8(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 40(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 24(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 48(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 56(%esp),%mm6 +- movq %mm4,%mm1 +- movq -8(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,40(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,8(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq (%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 32(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 16(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 40(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 48(%esp),%mm6 +- movdqa %xmm1,-16(%edx) +- leal 128(%ebp),%ebp +- decl %ecx +- jnz L00800_47_ssse3 +- movdqa (%ebp),%xmm1 +- leal -640(%ebp),%ebp +- movdqu (%ebx),%xmm0 +-.byte 102,15,56,0,193 +- movdqa (%ebp),%xmm3 +- movdqa %xmm1,%xmm2 +- movdqu 16(%ebx),%xmm1 +- paddq %xmm0,%xmm3 +-.byte 102,15,56,0,202 +- movq %mm4,%mm1 +- movq -128(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,32(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 56(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 24(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 8(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 32(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 40(%esp),%mm6 +- movq %mm4,%mm1 +- movq -120(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,24(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,56(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 48(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 16(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq (%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 24(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 32(%esp),%mm6 +- movdqa %xmm3,-128(%edx) +- movdqa 16(%ebp),%xmm4 +- movdqa %xmm2,%xmm3 +- movdqu 32(%ebx),%xmm2 +- paddq %xmm1,%xmm4 +-.byte 102,15,56,0,211 +- movq %mm4,%mm1 +- movq -112(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,16(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,48(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 40(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 8(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 56(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 16(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 24(%esp),%mm6 +- movq %mm4,%mm1 +- movq -104(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,8(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,40(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 32(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq (%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 48(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 8(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 16(%esp),%mm6 +- movdqa %xmm4,-112(%edx) +- movdqa 32(%ebp),%xmm5 +- movdqa %xmm3,%xmm4 +- movdqu 48(%ebx),%xmm3 +- paddq %xmm2,%xmm5 +-.byte 102,15,56,0,220 +- movq %mm4,%mm1 +- movq -96(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,32(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 24(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 56(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 40(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq (%esp),%mm5 +- paddq %mm6,%mm2 +- movq 8(%esp),%mm6 +- movq %mm4,%mm1 +- movq -88(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,56(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,24(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 16(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 48(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 32(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 56(%esp),%mm5 +- paddq %mm6,%mm0 +- movq (%esp),%mm6 +- movdqa %xmm5,-96(%edx) +- movdqa 48(%ebp),%xmm6 +- movdqa %xmm4,%xmm5 +- movdqu 64(%ebx),%xmm4 +- paddq %xmm3,%xmm6 +-.byte 102,15,56,0,229 +- movq %mm4,%mm1 +- movq -80(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,48(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,16(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 8(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 40(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 24(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 48(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 56(%esp),%mm6 +- movq %mm4,%mm1 +- movq -72(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,40(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,8(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq (%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 32(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 16(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 40(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 48(%esp),%mm6 +- movdqa %xmm6,-80(%edx) +- movdqa 64(%ebp),%xmm7 +- movdqa %xmm5,%xmm6 +- movdqu 80(%ebx),%xmm5 +- paddq %xmm4,%xmm7 +-.byte 102,15,56,0,238 +- movq %mm4,%mm1 +- movq -64(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,32(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 56(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 24(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 8(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 32(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 40(%esp),%mm6 +- movq %mm4,%mm1 +- movq -56(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,24(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,56(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 48(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 16(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq (%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 24(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 32(%esp),%mm6 +- movdqa %xmm7,-64(%edx) +- movdqa %xmm0,(%edx) +- movdqa 80(%ebp),%xmm0 +- movdqa %xmm6,%xmm7 +- movdqu 96(%ebx),%xmm6 +- paddq %xmm5,%xmm0 +-.byte 102,15,56,0,247 +- movq %mm4,%mm1 +- movq -48(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,16(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,48(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 40(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 8(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 56(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 16(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 24(%esp),%mm6 +- movq %mm4,%mm1 +- movq -40(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,8(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,40(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 32(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq (%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 48(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 8(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 16(%esp),%mm6 +- movdqa %xmm0,-48(%edx) +- movdqa %xmm1,16(%edx) +- movdqa 96(%ebp),%xmm1 +- movdqa %xmm7,%xmm0 +- movdqu 112(%ebx),%xmm7 +- paddq %xmm6,%xmm1 +-.byte 102,15,56,0,248 +- movq %mm4,%mm1 +- movq -32(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,32(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 24(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 56(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 40(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq (%esp),%mm5 +- paddq %mm6,%mm2 +- movq 8(%esp),%mm6 +- movq %mm4,%mm1 +- movq -24(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,56(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,24(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 16(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 48(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 32(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 56(%esp),%mm5 +- paddq %mm6,%mm0 +- movq (%esp),%mm6 +- movdqa %xmm1,-32(%edx) +- movdqa %xmm2,32(%edx) +- movdqa 112(%ebp),%xmm2 +- movdqa (%edx),%xmm0 +- paddq %xmm7,%xmm2 +- movq %mm4,%mm1 +- movq -16(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,48(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm0 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm0,16(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq 8(%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 40(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm0,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm0,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 24(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm0,%mm2 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- pxor %mm7,%mm6 +- movq 48(%esp),%mm5 +- paddq %mm6,%mm2 +- movq 56(%esp),%mm6 +- movq %mm4,%mm1 +- movq -8(%edx),%mm7 +- pxor %mm6,%mm5 +- psrlq $14,%mm1 +- movq %mm4,40(%esp) +- pand %mm4,%mm5 +- psllq $23,%mm4 +- paddq %mm3,%mm2 +- movq %mm1,%mm3 +- psrlq $4,%mm1 +- pxor %mm6,%mm5 +- pxor %mm4,%mm3 +- psllq $23,%mm4 +- pxor %mm1,%mm3 +- movq %mm2,8(%esp) +- paddq %mm5,%mm7 +- pxor %mm4,%mm3 +- psrlq $23,%mm1 +- paddq (%esp),%mm7 +- pxor %mm1,%mm3 +- psllq $4,%mm4 +- pxor %mm4,%mm3 +- movq 32(%esp),%mm4 +- paddq %mm7,%mm3 +- movq %mm2,%mm5 +- psrlq $28,%mm5 +- paddq %mm3,%mm4 +- movq %mm2,%mm6 +- movq %mm5,%mm7 +- psllq $25,%mm6 +- movq 16(%esp),%mm1 +- psrlq $6,%mm5 +- pxor %mm6,%mm7 +- psllq $5,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm2 +- psrlq $5,%mm5 +- pxor %mm6,%mm7 +- pand %mm2,%mm0 +- psllq $6,%mm6 +- pxor %mm5,%mm7 +- pxor %mm1,%mm0 +- pxor %mm7,%mm6 +- movq 40(%esp),%mm5 +- paddq %mm6,%mm0 +- movq 48(%esp),%mm6 +- movdqa %xmm2,-16(%edx) +- movq 8(%esp),%mm1 +- paddq %mm3,%mm0 +- movq 24(%esp),%mm3 +- movq 56(%esp),%mm7 +- pxor %mm1,%mm2 +- paddq (%esi),%mm0 +- paddq 8(%esi),%mm1 +- paddq 16(%esi),%mm2 +- paddq 24(%esi),%mm3 +- paddq 32(%esi),%mm4 +- paddq 40(%esi),%mm5 +- paddq 48(%esi),%mm6 +- paddq 56(%esi),%mm7 +- movq %mm0,(%esi) +- movq %mm1,8(%esi) +- movq %mm2,16(%esi) +- movq %mm3,24(%esi) +- movq %mm4,32(%esi) +- movq %mm5,40(%esi) +- movq %mm6,48(%esi) +- movq %mm7,56(%esi) +- cmpl %eax,%edi +- jb L007loop_ssse3 +- movl 76(%edx),%esp +- emms +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.align 4,0x90 +-L002loop_x86: +- movl (%edi),%eax +- movl 4(%edi),%ebx +- movl 8(%edi),%ecx +- movl 12(%edi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- pushl %eax +- pushl %ebx +- pushl %ecx +- pushl %edx +- movl 16(%edi),%eax +- movl 20(%edi),%ebx +- movl 24(%edi),%ecx +- movl 28(%edi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- pushl %eax +- pushl %ebx +- pushl %ecx +- pushl %edx +- movl 32(%edi),%eax +- movl 36(%edi),%ebx +- movl 40(%edi),%ecx +- movl 44(%edi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- pushl %eax +- pushl %ebx +- pushl %ecx +- pushl %edx +- movl 48(%edi),%eax +- movl 52(%edi),%ebx +- movl 56(%edi),%ecx +- movl 60(%edi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- pushl %eax +- pushl %ebx +- pushl %ecx +- pushl %edx +- movl 64(%edi),%eax +- movl 68(%edi),%ebx +- movl 72(%edi),%ecx +- movl 76(%edi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- pushl %eax +- pushl %ebx +- pushl %ecx +- pushl %edx +- movl 80(%edi),%eax +- movl 84(%edi),%ebx +- movl 88(%edi),%ecx +- movl 92(%edi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- pushl %eax +- pushl %ebx +- pushl %ecx +- pushl %edx +- movl 96(%edi),%eax +- movl 100(%edi),%ebx +- movl 104(%edi),%ecx +- movl 108(%edi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- pushl %eax +- pushl %ebx +- pushl %ecx +- pushl %edx +- movl 112(%edi),%eax +- movl 116(%edi),%ebx +- movl 120(%edi),%ecx +- movl 124(%edi),%edx +- bswap %eax +- bswap %ebx +- bswap %ecx +- bswap %edx +- pushl %eax +- pushl %ebx +- pushl %ecx +- pushl %edx +- addl $128,%edi +- subl $72,%esp +- movl %edi,204(%esp) +- leal 8(%esp),%edi +- movl $16,%ecx +-.long 2784229001 +-.align 4,0x90 +-L00900_15_x86: +- movl 40(%esp),%ecx +- movl 44(%esp),%edx +- movl %ecx,%esi +- shrl $9,%ecx +- movl %edx,%edi +- shrl $9,%edx +- movl %ecx,%ebx +- shll $14,%esi +- movl %edx,%eax +- shll $14,%edi +- xorl %esi,%ebx +- shrl $5,%ecx +- xorl %edi,%eax +- shrl $5,%edx +- xorl %ecx,%eax +- shll $4,%esi +- xorl %edx,%ebx +- shll $4,%edi +- xorl %esi,%ebx +- shrl $4,%ecx +- xorl %edi,%eax +- shrl $4,%edx +- xorl %ecx,%eax +- shll $5,%esi +- xorl %edx,%ebx +- shll $5,%edi +- xorl %esi,%eax +- xorl %edi,%ebx +- movl 48(%esp),%ecx +- movl 52(%esp),%edx +- movl 56(%esp),%esi +- movl 60(%esp),%edi +- addl 64(%esp),%eax +- adcl 68(%esp),%ebx +- xorl %esi,%ecx +- xorl %edi,%edx +- andl 40(%esp),%ecx +- andl 44(%esp),%edx +- addl 192(%esp),%eax +- adcl 196(%esp),%ebx +- xorl %esi,%ecx +- xorl %edi,%edx +- movl (%ebp),%esi +- movl 4(%ebp),%edi +- addl %ecx,%eax +- adcl %edx,%ebx +- movl 32(%esp),%ecx +- movl 36(%esp),%edx +- addl %esi,%eax +- adcl %edi,%ebx +- movl %eax,(%esp) +- movl %ebx,4(%esp) +- addl %ecx,%eax +- adcl %edx,%ebx +- movl 8(%esp),%ecx +- movl 12(%esp),%edx +- movl %eax,32(%esp) +- movl %ebx,36(%esp) +- movl %ecx,%esi +- shrl $2,%ecx +- movl %edx,%edi +- shrl $2,%edx +- movl %ecx,%ebx +- shll $4,%esi +- movl %edx,%eax +- shll $4,%edi +- xorl %esi,%ebx +- shrl $5,%ecx +- xorl %edi,%eax +- shrl $5,%edx +- xorl %ecx,%ebx +- shll $21,%esi +- xorl %edx,%eax +- shll $21,%edi +- xorl %esi,%eax +- shrl $21,%ecx +- xorl %edi,%ebx +- shrl $21,%edx +- xorl %ecx,%eax +- shll $5,%esi +- xorl %edx,%ebx +- shll $5,%edi +- xorl %esi,%eax +- xorl %edi,%ebx +- movl 8(%esp),%ecx +- movl 12(%esp),%edx +- movl 16(%esp),%esi +- movl 20(%esp),%edi +- addl (%esp),%eax +- adcl 4(%esp),%ebx +- orl %esi,%ecx +- orl %edi,%edx +- andl 24(%esp),%ecx +- andl 28(%esp),%edx +- andl 8(%esp),%esi +- andl 12(%esp),%edi +- orl %esi,%ecx +- orl %edi,%edx +- addl %ecx,%eax +- adcl %edx,%ebx +- movl %eax,(%esp) +- movl %ebx,4(%esp) +- movb (%ebp),%dl +- subl $8,%esp +- leal 8(%ebp),%ebp +- cmpb $148,%dl +- jne L00900_15_x86 +-.align 4,0x90 +-L01016_79_x86: +- movl 312(%esp),%ecx +- movl 316(%esp),%edx +- movl %ecx,%esi +- shrl $1,%ecx +- movl %edx,%edi +- shrl $1,%edx +- movl %ecx,%eax +- shll $24,%esi +- movl %edx,%ebx +- shll $24,%edi +- xorl %esi,%ebx +- shrl $6,%ecx +- xorl %edi,%eax +- shrl $6,%edx +- xorl %ecx,%eax +- shll $7,%esi +- xorl %edx,%ebx +- shll $1,%edi +- xorl %esi,%ebx +- shrl $1,%ecx +- xorl %edi,%eax +- shrl $1,%edx +- xorl %ecx,%eax +- shll $6,%edi +- xorl %edx,%ebx +- xorl %edi,%eax +- movl %eax,(%esp) +- movl %ebx,4(%esp) +- movl 208(%esp),%ecx +- movl 212(%esp),%edx +- movl %ecx,%esi +- shrl $6,%ecx +- movl %edx,%edi +- shrl $6,%edx +- movl %ecx,%eax +- shll $3,%esi +- movl %edx,%ebx +- shll $3,%edi +- xorl %esi,%eax +- shrl $13,%ecx +- xorl %edi,%ebx +- shrl $13,%edx +- xorl %ecx,%eax +- shll $10,%esi +- xorl %edx,%ebx +- shll $10,%edi +- xorl %esi,%ebx +- shrl $10,%ecx +- xorl %edi,%eax +- shrl $10,%edx +- xorl %ecx,%ebx +- shll $13,%edi +- xorl %edx,%eax +- xorl %edi,%eax +- movl 320(%esp),%ecx +- movl 324(%esp),%edx +- addl (%esp),%eax +- adcl 4(%esp),%ebx +- movl 248(%esp),%esi +- movl 252(%esp),%edi +- addl %ecx,%eax +- adcl %edx,%ebx +- addl %esi,%eax +- adcl %edi,%ebx +- movl %eax,192(%esp) +- movl %ebx,196(%esp) +- movl 40(%esp),%ecx +- movl 44(%esp),%edx +- movl %ecx,%esi +- shrl $9,%ecx +- movl %edx,%edi +- shrl $9,%edx +- movl %ecx,%ebx +- shll $14,%esi +- movl %edx,%eax +- shll $14,%edi +- xorl %esi,%ebx +- shrl $5,%ecx +- xorl %edi,%eax +- shrl $5,%edx +- xorl %ecx,%eax +- shll $4,%esi +- xorl %edx,%ebx +- shll $4,%edi +- xorl %esi,%ebx +- shrl $4,%ecx +- xorl %edi,%eax +- shrl $4,%edx +- xorl %ecx,%eax +- shll $5,%esi +- xorl %edx,%ebx +- shll $5,%edi +- xorl %esi,%eax +- xorl %edi,%ebx +- movl 48(%esp),%ecx +- movl 52(%esp),%edx +- movl 56(%esp),%esi +- movl 60(%esp),%edi +- addl 64(%esp),%eax +- adcl 68(%esp),%ebx +- xorl %esi,%ecx +- xorl %edi,%edx +- andl 40(%esp),%ecx +- andl 44(%esp),%edx +- addl 192(%esp),%eax +- adcl 196(%esp),%ebx +- xorl %esi,%ecx +- xorl %edi,%edx +- movl (%ebp),%esi +- movl 4(%ebp),%edi +- addl %ecx,%eax +- adcl %edx,%ebx +- movl 32(%esp),%ecx +- movl 36(%esp),%edx +- addl %esi,%eax +- adcl %edi,%ebx +- movl %eax,(%esp) +- movl %ebx,4(%esp) +- addl %ecx,%eax +- adcl %edx,%ebx +- movl 8(%esp),%ecx +- movl 12(%esp),%edx +- movl %eax,32(%esp) +- movl %ebx,36(%esp) +- movl %ecx,%esi +- shrl $2,%ecx +- movl %edx,%edi +- shrl $2,%edx +- movl %ecx,%ebx +- shll $4,%esi +- movl %edx,%eax +- shll $4,%edi +- xorl %esi,%ebx +- shrl $5,%ecx +- xorl %edi,%eax +- shrl $5,%edx +- xorl %ecx,%ebx +- shll $21,%esi +- xorl %edx,%eax +- shll $21,%edi +- xorl %esi,%eax +- shrl $21,%ecx +- xorl %edi,%ebx +- shrl $21,%edx +- xorl %ecx,%eax +- shll $5,%esi +- xorl %edx,%ebx +- shll $5,%edi +- xorl %esi,%eax +- xorl %edi,%ebx +- movl 8(%esp),%ecx +- movl 12(%esp),%edx +- movl 16(%esp),%esi +- movl 20(%esp),%edi +- addl (%esp),%eax +- adcl 4(%esp),%ebx +- orl %esi,%ecx +- orl %edi,%edx +- andl 24(%esp),%ecx +- andl 28(%esp),%edx +- andl 8(%esp),%esi +- andl 12(%esp),%edi +- orl %esi,%ecx +- orl %edi,%edx +- addl %ecx,%eax +- adcl %edx,%ebx +- movl %eax,(%esp) +- movl %ebx,4(%esp) +- movb (%ebp),%dl +- subl $8,%esp +- leal 8(%ebp),%ebp +- cmpb $23,%dl +- jne L01016_79_x86 +- movl 840(%esp),%esi +- movl 844(%esp),%edi +- movl (%esi),%eax +- movl 4(%esi),%ebx +- movl 8(%esi),%ecx +- movl 12(%esi),%edx +- addl 8(%esp),%eax +- adcl 12(%esp),%ebx +- movl %eax,(%esi) +- movl %ebx,4(%esi) +- addl 16(%esp),%ecx +- adcl 20(%esp),%edx +- movl %ecx,8(%esi) +- movl %edx,12(%esi) +- movl 16(%esi),%eax +- movl 20(%esi),%ebx +- movl 24(%esi),%ecx +- movl 28(%esi),%edx +- addl 24(%esp),%eax +- adcl 28(%esp),%ebx +- movl %eax,16(%esi) +- movl %ebx,20(%esi) +- addl 32(%esp),%ecx +- adcl 36(%esp),%edx +- movl %ecx,24(%esi) +- movl %edx,28(%esi) +- movl 32(%esi),%eax +- movl 36(%esi),%ebx +- movl 40(%esi),%ecx +- movl 44(%esi),%edx +- addl 40(%esp),%eax +- adcl 44(%esp),%ebx +- movl %eax,32(%esi) +- movl %ebx,36(%esi) +- addl 48(%esp),%ecx +- adcl 52(%esp),%edx +- movl %ecx,40(%esi) +- movl %edx,44(%esi) +- movl 48(%esi),%eax +- movl 52(%esi),%ebx +- movl 56(%esi),%ecx +- movl 60(%esi),%edx +- addl 56(%esp),%eax +- adcl 60(%esp),%ebx +- movl %eax,48(%esi) +- movl %ebx,52(%esi) +- addl 64(%esp),%ecx +- adcl 68(%esp),%edx +- movl %ecx,56(%esi) +- movl %edx,60(%esi) +- addl $840,%esp +- subl $640,%ebp +- cmpl 8(%esp),%edi +- jb L002loop_x86 +- movl 12(%esp),%esp +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.align 6,0x90 +-L001K512: +-.long 3609767458,1116352408 +-.long 602891725,1899447441 +-.long 3964484399,3049323471 +-.long 2173295548,3921009573 +-.long 4081628472,961987163 +-.long 3053834265,1508970993 +-.long 2937671579,2453635748 +-.long 3664609560,2870763221 +-.long 2734883394,3624381080 +-.long 1164996542,310598401 +-.long 1323610764,607225278 +-.long 3590304994,1426881987 +-.long 4068182383,1925078388 +-.long 991336113,2162078206 +-.long 633803317,2614888103 +-.long 3479774868,3248222580 +-.long 2666613458,3835390401 +-.long 944711139,4022224774 +-.long 2341262773,264347078 +-.long 2007800933,604807628 +-.long 1495990901,770255983 +-.long 1856431235,1249150122 +-.long 3175218132,1555081692 +-.long 2198950837,1996064986 +-.long 3999719339,2554220882 +-.long 766784016,2821834349 +-.long 2566594879,2952996808 +-.long 3203337956,3210313671 +-.long 1034457026,3336571891 +-.long 2466948901,3584528711 +-.long 3758326383,113926993 +-.long 168717936,338241895 +-.long 1188179964,666307205 +-.long 1546045734,773529912 +-.long 1522805485,1294757372 +-.long 2643833823,1396182291 +-.long 2343527390,1695183700 +-.long 1014477480,1986661051 +-.long 1206759142,2177026350 +-.long 344077627,2456956037 +-.long 1290863460,2730485921 +-.long 3158454273,2820302411 +-.long 3505952657,3259730800 +-.long 106217008,3345764771 +-.long 3606008344,3516065817 +-.long 1432725776,3600352804 +-.long 1467031594,4094571909 +-.long 851169720,275423344 +-.long 3100823752,430227734 +-.long 1363258195,506948616 +-.long 3750685593,659060556 +-.long 3785050280,883997877 +-.long 3318307427,958139571 +-.long 3812723403,1322822218 +-.long 2003034995,1537002063 +-.long 3602036899,1747873779 +-.long 1575990012,1955562222 +-.long 1125592928,2024104815 +-.long 2716904306,2227730452 +-.long 442776044,2361852424 +-.long 593698344,2428436474 +-.long 3733110249,2756734187 +-.long 2999351573,3204031479 +-.long 3815920427,3329325298 +-.long 3928383900,3391569614 +-.long 566280711,3515267271 +-.long 3454069534,3940187606 +-.long 4000239992,4118630271 +-.long 1914138554,116418474 +-.long 2731055270,174292421 +-.long 3203993006,289380356 +-.long 320620315,460393269 +-.long 587496836,685471733 +-.long 1086792851,852142971 +-.long 365543100,1017036298 +-.long 2618297676,1126000580 +-.long 3409855158,1288033470 +-.long 4234509866,1501505948 +-.long 987167468,1607167915 +-.long 1246189591,1816402316 +-.long 67438087,66051 +-.long 202182159,134810123 +-.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97 +-.byte 110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32 +-.byte 67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97 +-.byte 112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103 +-.byte 62,0 +-.section __IMPORT,__pointers,non_lazy_symbol_pointers +-L_OPENSSL_ia32cap_P$non_lazy_ptr: +-.indirect_symbol _OPENSSL_ia32cap_P +-.long 0 +-#endif +diff --git a/mac-x86/crypto/fipsmodule/vpaes-x86.S b/mac-x86/crypto/fipsmodule/vpaes-x86.S +deleted file mode 100644 +index 00c0190..0000000 +--- a/mac-x86/crypto/fipsmodule/vpaes-x86.S ++++ /dev/null +@@ -1,681 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-#ifdef BORINGSSL_DISPATCH_TEST +-#endif +-.align 6,0x90 +-L_vpaes_consts: +-.long 218628480,235210255,168496130,67568393 +-.long 252381056,17041926,33884169,51187212 +-.long 252645135,252645135,252645135,252645135 +-.long 1512730624,3266504856,1377990664,3401244816 +-.long 830229760,1275146365,2969422977,3447763452 +-.long 3411033600,2979783055,338359620,2782886510 +-.long 4209124096,907596821,221174255,1006095553 +-.long 191964160,3799684038,3164090317,1589111125 +-.long 182528256,1777043520,2877432650,3265356744 +-.long 1874708224,3503451415,3305285752,363511674 +-.long 1606117888,3487855781,1093350906,2384367825 +-.long 197121,67569157,134941193,202313229 +-.long 67569157,134941193,202313229,197121 +-.long 134941193,202313229,197121,67569157 +-.long 202313229,197121,67569157,134941193 +-.long 33619971,100992007,168364043,235736079 +-.long 235736079,33619971,100992007,168364043 +-.long 168364043,235736079,33619971,100992007 +-.long 100992007,168364043,235736079,33619971 +-.long 50462976,117835012,185207048,252579084 +-.long 252314880,51251460,117574920,184942860 +-.long 184682752,252054788,50987272,118359308 +-.long 118099200,185467140,251790600,50727180 +-.long 2946363062,528716217,1300004225,1881839624 +-.long 1532713819,1532713819,1532713819,1532713819 +-.long 3602276352,4288629033,3737020424,4153884961 +-.long 1354558464,32357713,2958822624,3775749553 +-.long 1201988352,132424512,1572796698,503232858 +-.long 2213177600,1597421020,4103937655,675398315 +-.long 2749646592,4273543773,1511898873,121693092 +-.long 3040248576,1103263732,2871565598,1608280554 +-.long 2236667136,2588920351,482954393,64377734 +-.long 3069987328,291237287,2117370568,3650299247 +-.long 533321216,3573750986,2572112006,1401264716 +-.long 1339849704,2721158661,548607111,3445553514 +-.long 2128193280,3054596040,2183486460,1257083700 +-.long 655635200,1165381986,3923443150,2344132524 +-.long 190078720,256924420,290342170,357187870 +-.long 1610966272,2263057382,4103205268,309794674 +-.long 2592527872,2233205587,1335446729,3402964816 +-.long 3973531904,3225098121,3002836325,1918774430 +-.long 3870401024,2102906079,2284471353,4117666579 +-.long 617007872,1021508343,366931923,691083277 +-.long 2528395776,3491914898,2968704004,1613121270 +-.long 3445188352,3247741094,844474987,4093578302 +-.long 651481088,1190302358,1689581232,574775300 +-.long 4289380608,206939853,2555985458,2489840491 +-.long 2130264064,327674451,3566485037,3349835193 +-.long 2470714624,316102159,3636825756,3393945945 +-.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105 +-.byte 111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83 +-.byte 83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117 +-.byte 114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105 +-.byte 118,101,114,115,105,116,121,41,0 +-.align 6,0x90 +-.private_extern __vpaes_preheat +-.align 4 +-__vpaes_preheat: +- addl (%esp),%ebp +- movdqa -48(%ebp),%xmm7 +- movdqa -16(%ebp),%xmm6 +- ret +-.private_extern __vpaes_encrypt_core +-.align 4 +-__vpaes_encrypt_core: +- movl $16,%ecx +- movl 240(%edx),%eax +- movdqa %xmm6,%xmm1 +- movdqa (%ebp),%xmm2 +- pandn %xmm0,%xmm1 +- pand %xmm6,%xmm0 +- movdqu (%edx),%xmm5 +-.byte 102,15,56,0,208 +- movdqa 16(%ebp),%xmm0 +- pxor %xmm5,%xmm2 +- psrld $4,%xmm1 +- addl $16,%edx +-.byte 102,15,56,0,193 +- leal 192(%ebp),%ebx +- pxor %xmm2,%xmm0 +- jmp L000enc_entry +-.align 4,0x90 +-L001enc_loop: +- movdqa 32(%ebp),%xmm4 +- movdqa 48(%ebp),%xmm0 +-.byte 102,15,56,0,226 +-.byte 102,15,56,0,195 +- pxor %xmm5,%xmm4 +- movdqa 64(%ebp),%xmm5 +- pxor %xmm4,%xmm0 +- movdqa -64(%ebx,%ecx,1),%xmm1 +-.byte 102,15,56,0,234 +- movdqa 80(%ebp),%xmm2 +- movdqa (%ebx,%ecx,1),%xmm4 +-.byte 102,15,56,0,211 +- movdqa %xmm0,%xmm3 +- pxor %xmm5,%xmm2 +-.byte 102,15,56,0,193 +- addl $16,%edx +- pxor %xmm2,%xmm0 +-.byte 102,15,56,0,220 +- addl $16,%ecx +- pxor %xmm0,%xmm3 +-.byte 102,15,56,0,193 +- andl $48,%ecx +- subl $1,%eax +- pxor %xmm3,%xmm0 +-L000enc_entry: +- movdqa %xmm6,%xmm1 +- movdqa -32(%ebp),%xmm5 +- pandn %xmm0,%xmm1 +- psrld $4,%xmm1 +- pand %xmm6,%xmm0 +-.byte 102,15,56,0,232 +- movdqa %xmm7,%xmm3 +- pxor %xmm1,%xmm0 +-.byte 102,15,56,0,217 +- movdqa %xmm7,%xmm4 +- pxor %xmm5,%xmm3 +-.byte 102,15,56,0,224 +- movdqa %xmm7,%xmm2 +- pxor %xmm5,%xmm4 +-.byte 102,15,56,0,211 +- movdqa %xmm7,%xmm3 +- pxor %xmm0,%xmm2 +-.byte 102,15,56,0,220 +- movdqu (%edx),%xmm5 +- pxor %xmm1,%xmm3 +- jnz L001enc_loop +- movdqa 96(%ebp),%xmm4 +- movdqa 112(%ebp),%xmm0 +-.byte 102,15,56,0,226 +- pxor %xmm5,%xmm4 +-.byte 102,15,56,0,195 +- movdqa 64(%ebx,%ecx,1),%xmm1 +- pxor %xmm4,%xmm0 +-.byte 102,15,56,0,193 +- ret +-.private_extern __vpaes_decrypt_core +-.align 4 +-__vpaes_decrypt_core: +- leal 608(%ebp),%ebx +- movl 240(%edx),%eax +- movdqa %xmm6,%xmm1 +- movdqa -64(%ebx),%xmm2 +- pandn %xmm0,%xmm1 +- movl %eax,%ecx +- psrld $4,%xmm1 +- movdqu (%edx),%xmm5 +- shll $4,%ecx +- pand %xmm6,%xmm0 +-.byte 102,15,56,0,208 +- movdqa -48(%ebx),%xmm0 +- xorl $48,%ecx +-.byte 102,15,56,0,193 +- andl $48,%ecx +- pxor %xmm5,%xmm2 +- movdqa 176(%ebp),%xmm5 +- pxor %xmm2,%xmm0 +- addl $16,%edx +- leal -352(%ebx,%ecx,1),%ecx +- jmp L002dec_entry +-.align 4,0x90 +-L003dec_loop: +- movdqa -32(%ebx),%xmm4 +- movdqa -16(%ebx),%xmm1 +-.byte 102,15,56,0,226 +-.byte 102,15,56,0,203 +- pxor %xmm4,%xmm0 +- movdqa (%ebx),%xmm4 +- pxor %xmm1,%xmm0 +- movdqa 16(%ebx),%xmm1 +-.byte 102,15,56,0,226 +-.byte 102,15,56,0,197 +-.byte 102,15,56,0,203 +- pxor %xmm4,%xmm0 +- movdqa 32(%ebx),%xmm4 +- pxor %xmm1,%xmm0 +- movdqa 48(%ebx),%xmm1 +-.byte 102,15,56,0,226 +-.byte 102,15,56,0,197 +-.byte 102,15,56,0,203 +- pxor %xmm4,%xmm0 +- movdqa 64(%ebx),%xmm4 +- pxor %xmm1,%xmm0 +- movdqa 80(%ebx),%xmm1 +-.byte 102,15,56,0,226 +-.byte 102,15,56,0,197 +-.byte 102,15,56,0,203 +- pxor %xmm4,%xmm0 +- addl $16,%edx +-.byte 102,15,58,15,237,12 +- pxor %xmm1,%xmm0 +- subl $1,%eax +-L002dec_entry: +- movdqa %xmm6,%xmm1 +- movdqa -32(%ebp),%xmm2 +- pandn %xmm0,%xmm1 +- pand %xmm6,%xmm0 +- psrld $4,%xmm1 +-.byte 102,15,56,0,208 +- movdqa %xmm7,%xmm3 +- pxor %xmm1,%xmm0 +-.byte 102,15,56,0,217 +- movdqa %xmm7,%xmm4 +- pxor %xmm2,%xmm3 +-.byte 102,15,56,0,224 +- pxor %xmm2,%xmm4 +- movdqa %xmm7,%xmm2 +-.byte 102,15,56,0,211 +- movdqa %xmm7,%xmm3 +- pxor %xmm0,%xmm2 +-.byte 102,15,56,0,220 +- movdqu (%edx),%xmm0 +- pxor %xmm1,%xmm3 +- jnz L003dec_loop +- movdqa 96(%ebx),%xmm4 +-.byte 102,15,56,0,226 +- pxor %xmm0,%xmm4 +- movdqa 112(%ebx),%xmm0 +- movdqa (%ecx),%xmm2 +-.byte 102,15,56,0,195 +- pxor %xmm4,%xmm0 +-.byte 102,15,56,0,194 +- ret +-.private_extern __vpaes_schedule_core +-.align 4 +-__vpaes_schedule_core: +- addl (%esp),%ebp +- movdqu (%esi),%xmm0 +- movdqa 320(%ebp),%xmm2 +- movdqa %xmm0,%xmm3 +- leal (%ebp),%ebx +- movdqa %xmm2,4(%esp) +- call __vpaes_schedule_transform +- movdqa %xmm0,%xmm7 +- testl %edi,%edi +- jnz L004schedule_am_decrypting +- movdqu %xmm0,(%edx) +- jmp L005schedule_go +-L004schedule_am_decrypting: +- movdqa 256(%ebp,%ecx,1),%xmm1 +-.byte 102,15,56,0,217 +- movdqu %xmm3,(%edx) +- xorl $48,%ecx +-L005schedule_go: +- cmpl $192,%eax +- ja L006schedule_256 +- je L007schedule_192 +-L008schedule_128: +- movl $10,%eax +-L009loop_schedule_128: +- call __vpaes_schedule_round +- decl %eax +- jz L010schedule_mangle_last +- call __vpaes_schedule_mangle +- jmp L009loop_schedule_128 +-.align 4,0x90 +-L007schedule_192: +- movdqu 8(%esi),%xmm0 +- call __vpaes_schedule_transform +- movdqa %xmm0,%xmm6 +- pxor %xmm4,%xmm4 +- movhlps %xmm4,%xmm6 +- movl $4,%eax +-L011loop_schedule_192: +- call __vpaes_schedule_round +-.byte 102,15,58,15,198,8 +- call __vpaes_schedule_mangle +- call __vpaes_schedule_192_smear +- call __vpaes_schedule_mangle +- call __vpaes_schedule_round +- decl %eax +- jz L010schedule_mangle_last +- call __vpaes_schedule_mangle +- call __vpaes_schedule_192_smear +- jmp L011loop_schedule_192 +-.align 4,0x90 +-L006schedule_256: +- movdqu 16(%esi),%xmm0 +- call __vpaes_schedule_transform +- movl $7,%eax +-L012loop_schedule_256: +- call __vpaes_schedule_mangle +- movdqa %xmm0,%xmm6 +- call __vpaes_schedule_round +- decl %eax +- jz L010schedule_mangle_last +- call __vpaes_schedule_mangle +- pshufd $255,%xmm0,%xmm0 +- movdqa %xmm7,20(%esp) +- movdqa %xmm6,%xmm7 +- call L_vpaes_schedule_low_round +- movdqa 20(%esp),%xmm7 +- jmp L012loop_schedule_256 +-.align 4,0x90 +-L010schedule_mangle_last: +- leal 384(%ebp),%ebx +- testl %edi,%edi +- jnz L013schedule_mangle_last_dec +- movdqa 256(%ebp,%ecx,1),%xmm1 +-.byte 102,15,56,0,193 +- leal 352(%ebp),%ebx +- addl $32,%edx +-L013schedule_mangle_last_dec: +- addl $-16,%edx +- pxor 336(%ebp),%xmm0 +- call __vpaes_schedule_transform +- movdqu %xmm0,(%edx) +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- pxor %xmm7,%xmm7 +- ret +-.private_extern __vpaes_schedule_192_smear +-.align 4 +-__vpaes_schedule_192_smear: +- pshufd $128,%xmm6,%xmm1 +- pshufd $254,%xmm7,%xmm0 +- pxor %xmm1,%xmm6 +- pxor %xmm1,%xmm1 +- pxor %xmm0,%xmm6 +- movdqa %xmm6,%xmm0 +- movhlps %xmm1,%xmm6 +- ret +-.private_extern __vpaes_schedule_round +-.align 4 +-__vpaes_schedule_round: +- movdqa 8(%esp),%xmm2 +- pxor %xmm1,%xmm1 +-.byte 102,15,58,15,202,15 +-.byte 102,15,58,15,210,15 +- pxor %xmm1,%xmm7 +- pshufd $255,%xmm0,%xmm0 +-.byte 102,15,58,15,192,1 +- movdqa %xmm2,8(%esp) +-L_vpaes_schedule_low_round: +- movdqa %xmm7,%xmm1 +- pslldq $4,%xmm7 +- pxor %xmm1,%xmm7 +- movdqa %xmm7,%xmm1 +- pslldq $8,%xmm7 +- pxor %xmm1,%xmm7 +- pxor 336(%ebp),%xmm7 +- movdqa -16(%ebp),%xmm4 +- movdqa -48(%ebp),%xmm5 +- movdqa %xmm4,%xmm1 +- pandn %xmm0,%xmm1 +- psrld $4,%xmm1 +- pand %xmm4,%xmm0 +- movdqa -32(%ebp),%xmm2 +-.byte 102,15,56,0,208 +- pxor %xmm1,%xmm0 +- movdqa %xmm5,%xmm3 +-.byte 102,15,56,0,217 +- pxor %xmm2,%xmm3 +- movdqa %xmm5,%xmm4 +-.byte 102,15,56,0,224 +- pxor %xmm2,%xmm4 +- movdqa %xmm5,%xmm2 +-.byte 102,15,56,0,211 +- pxor %xmm0,%xmm2 +- movdqa %xmm5,%xmm3 +-.byte 102,15,56,0,220 +- pxor %xmm1,%xmm3 +- movdqa 32(%ebp),%xmm4 +-.byte 102,15,56,0,226 +- movdqa 48(%ebp),%xmm0 +-.byte 102,15,56,0,195 +- pxor %xmm4,%xmm0 +- pxor %xmm7,%xmm0 +- movdqa %xmm0,%xmm7 +- ret +-.private_extern __vpaes_schedule_transform +-.align 4 +-__vpaes_schedule_transform: +- movdqa -16(%ebp),%xmm2 +- movdqa %xmm2,%xmm1 +- pandn %xmm0,%xmm1 +- psrld $4,%xmm1 +- pand %xmm2,%xmm0 +- movdqa (%ebx),%xmm2 +-.byte 102,15,56,0,208 +- movdqa 16(%ebx),%xmm0 +-.byte 102,15,56,0,193 +- pxor %xmm2,%xmm0 +- ret +-.private_extern __vpaes_schedule_mangle +-.align 4 +-__vpaes_schedule_mangle: +- movdqa %xmm0,%xmm4 +- movdqa 128(%ebp),%xmm5 +- testl %edi,%edi +- jnz L014schedule_mangle_dec +- addl $16,%edx +- pxor 336(%ebp),%xmm4 +-.byte 102,15,56,0,229 +- movdqa %xmm4,%xmm3 +-.byte 102,15,56,0,229 +- pxor %xmm4,%xmm3 +-.byte 102,15,56,0,229 +- pxor %xmm4,%xmm3 +- jmp L015schedule_mangle_both +-.align 4,0x90 +-L014schedule_mangle_dec: +- movdqa -16(%ebp),%xmm2 +- leal 416(%ebp),%esi +- movdqa %xmm2,%xmm1 +- pandn %xmm4,%xmm1 +- psrld $4,%xmm1 +- pand %xmm2,%xmm4 +- movdqa (%esi),%xmm2 +-.byte 102,15,56,0,212 +- movdqa 16(%esi),%xmm3 +-.byte 102,15,56,0,217 +- pxor %xmm2,%xmm3 +-.byte 102,15,56,0,221 +- movdqa 32(%esi),%xmm2 +-.byte 102,15,56,0,212 +- pxor %xmm3,%xmm2 +- movdqa 48(%esi),%xmm3 +-.byte 102,15,56,0,217 +- pxor %xmm2,%xmm3 +-.byte 102,15,56,0,221 +- movdqa 64(%esi),%xmm2 +-.byte 102,15,56,0,212 +- pxor %xmm3,%xmm2 +- movdqa 80(%esi),%xmm3 +-.byte 102,15,56,0,217 +- pxor %xmm2,%xmm3 +-.byte 102,15,56,0,221 +- movdqa 96(%esi),%xmm2 +-.byte 102,15,56,0,212 +- pxor %xmm3,%xmm2 +- movdqa 112(%esi),%xmm3 +-.byte 102,15,56,0,217 +- pxor %xmm2,%xmm3 +- addl $-16,%edx +-L015schedule_mangle_both: +- movdqa 256(%ebp,%ecx,1),%xmm1 +-.byte 102,15,56,0,217 +- addl $-16,%ecx +- andl $48,%ecx +- movdqu %xmm3,(%edx) +- ret +-.globl _vpaes_set_encrypt_key +-.private_extern _vpaes_set_encrypt_key +-.align 4 +-_vpaes_set_encrypt_key: +-L_vpaes_set_encrypt_key_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +-#ifdef BORINGSSL_DISPATCH_TEST +- pushl %ebx +- pushl %edx +- call L016pic +-L016pic: +- popl %ebx +- leal _BORINGSSL_function_hit+5-L016pic(%ebx),%ebx +- movl $1,%edx +- movb %dl,(%ebx) +- popl %edx +- popl %ebx +-#endif +- movl 20(%esp),%esi +- leal -56(%esp),%ebx +- movl 24(%esp),%eax +- andl $-16,%ebx +- movl 28(%esp),%edx +- xchgl %esp,%ebx +- movl %ebx,48(%esp) +- movl %eax,%ebx +- shrl $5,%ebx +- addl $5,%ebx +- movl %ebx,240(%edx) +- movl $48,%ecx +- movl $0,%edi +- leal L_vpaes_consts+0x30-L017pic_point,%ebp +- call __vpaes_schedule_core +-L017pic_point: +- movl 48(%esp),%esp +- xorl %eax,%eax +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _vpaes_set_decrypt_key +-.private_extern _vpaes_set_decrypt_key +-.align 4 +-_vpaes_set_decrypt_key: +-L_vpaes_set_decrypt_key_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 20(%esp),%esi +- leal -56(%esp),%ebx +- movl 24(%esp),%eax +- andl $-16,%ebx +- movl 28(%esp),%edx +- xchgl %esp,%ebx +- movl %ebx,48(%esp) +- movl %eax,%ebx +- shrl $5,%ebx +- addl $5,%ebx +- movl %ebx,240(%edx) +- shll $4,%ebx +- leal 16(%edx,%ebx,1),%edx +- movl $1,%edi +- movl %eax,%ecx +- shrl $1,%ecx +- andl $32,%ecx +- xorl $32,%ecx +- leal L_vpaes_consts+0x30-L018pic_point,%ebp +- call __vpaes_schedule_core +-L018pic_point: +- movl 48(%esp),%esp +- xorl %eax,%eax +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _vpaes_encrypt +-.private_extern _vpaes_encrypt +-.align 4 +-_vpaes_encrypt: +-L_vpaes_encrypt_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +-#ifdef BORINGSSL_DISPATCH_TEST +- pushl %ebx +- pushl %edx +- call L019pic +-L019pic: +- popl %ebx +- leal _BORINGSSL_function_hit+4-L019pic(%ebx),%ebx +- movl $1,%edx +- movb %dl,(%ebx) +- popl %edx +- popl %ebx +-#endif +- leal L_vpaes_consts+0x30-L020pic_point,%ebp +- call __vpaes_preheat +-L020pic_point: +- movl 20(%esp),%esi +- leal -56(%esp),%ebx +- movl 24(%esp),%edi +- andl $-16,%ebx +- movl 28(%esp),%edx +- xchgl %esp,%ebx +- movl %ebx,48(%esp) +- movdqu (%esi),%xmm0 +- call __vpaes_encrypt_core +- movdqu %xmm0,(%edi) +- movl 48(%esp),%esp +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _vpaes_decrypt +-.private_extern _vpaes_decrypt +-.align 4 +-_vpaes_decrypt: +-L_vpaes_decrypt_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- leal L_vpaes_consts+0x30-L021pic_point,%ebp +- call __vpaes_preheat +-L021pic_point: +- movl 20(%esp),%esi +- leal -56(%esp),%ebx +- movl 24(%esp),%edi +- andl $-16,%ebx +- movl 28(%esp),%edx +- xchgl %esp,%ebx +- movl %ebx,48(%esp) +- movdqu (%esi),%xmm0 +- call __vpaes_decrypt_core +- movdqu %xmm0,(%edi) +- movl 48(%esp),%esp +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _vpaes_cbc_encrypt +-.private_extern _vpaes_cbc_encrypt +-.align 4 +-_vpaes_cbc_encrypt: +-L_vpaes_cbc_encrypt_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 20(%esp),%esi +- movl 24(%esp),%edi +- movl 28(%esp),%eax +- movl 32(%esp),%edx +- subl $16,%eax +- jc L022cbc_abort +- leal -56(%esp),%ebx +- movl 36(%esp),%ebp +- andl $-16,%ebx +- movl 40(%esp),%ecx +- xchgl %esp,%ebx +- movdqu (%ebp),%xmm1 +- subl %esi,%edi +- movl %ebx,48(%esp) +- movl %edi,(%esp) +- movl %edx,4(%esp) +- movl %ebp,8(%esp) +- movl %eax,%edi +- leal L_vpaes_consts+0x30-L023pic_point,%ebp +- call __vpaes_preheat +-L023pic_point: +- cmpl $0,%ecx +- je L024cbc_dec_loop +- jmp L025cbc_enc_loop +-.align 4,0x90 +-L025cbc_enc_loop: +- movdqu (%esi),%xmm0 +- pxor %xmm1,%xmm0 +- call __vpaes_encrypt_core +- movl (%esp),%ebx +- movl 4(%esp),%edx +- movdqa %xmm0,%xmm1 +- movdqu %xmm0,(%ebx,%esi,1) +- leal 16(%esi),%esi +- subl $16,%edi +- jnc L025cbc_enc_loop +- jmp L026cbc_done +-.align 4,0x90 +-L024cbc_dec_loop: +- movdqu (%esi),%xmm0 +- movdqa %xmm1,16(%esp) +- movdqa %xmm0,32(%esp) +- call __vpaes_decrypt_core +- movl (%esp),%ebx +- movl 4(%esp),%edx +- pxor 16(%esp),%xmm0 +- movdqa 32(%esp),%xmm1 +- movdqu %xmm0,(%ebx,%esi,1) +- leal 16(%esi),%esi +- subl $16,%edi +- jnc L024cbc_dec_loop +-L026cbc_done: +- movl 8(%esp),%ebx +- movl 48(%esp),%esp +- movdqu %xmm1,(%ebx) +-L022cbc_abort: +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-#endif +diff --git a/mac-x86/crypto/fipsmodule/x86-mont.S b/mac-x86/crypto/fipsmodule/x86-mont.S +deleted file mode 100644 +index 7850a37..0000000 +--- a/mac-x86/crypto/fipsmodule/x86-mont.S ++++ /dev/null +@@ -1,485 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.globl _bn_mul_mont +-.private_extern _bn_mul_mont +-.align 4 +-_bn_mul_mont: +-L_bn_mul_mont_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- xorl %eax,%eax +- movl 40(%esp),%edi +- cmpl $4,%edi +- jl L000just_leave +- leal 20(%esp),%esi +- leal 24(%esp),%edx +- addl $2,%edi +- negl %edi +- leal -32(%esp,%edi,4),%ebp +- negl %edi +- movl %ebp,%eax +- subl %edx,%eax +- andl $2047,%eax +- subl %eax,%ebp +- xorl %ebp,%edx +- andl $2048,%edx +- xorl $2048,%edx +- subl %edx,%ebp +- andl $-64,%ebp +- movl %esp,%eax +- subl %ebp,%eax +- andl $-4096,%eax +- movl %esp,%edx +- leal (%ebp,%eax,1),%esp +- movl (%esp),%eax +- cmpl %ebp,%esp +- ja L001page_walk +- jmp L002page_walk_done +-.align 4,0x90 +-L001page_walk: +- leal -4096(%esp),%esp +- movl (%esp),%eax +- cmpl %ebp,%esp +- ja L001page_walk +-L002page_walk_done: +- movl (%esi),%eax +- movl 4(%esi),%ebx +- movl 8(%esi),%ecx +- movl 12(%esi),%ebp +- movl 16(%esi),%esi +- movl (%esi),%esi +- movl %eax,4(%esp) +- movl %ebx,8(%esp) +- movl %ecx,12(%esp) +- movl %ebp,16(%esp) +- movl %esi,20(%esp) +- leal -3(%edi),%ebx +- movl %edx,24(%esp) +- call L003PIC_me_up +-L003PIC_me_up: +- popl %eax +- movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L003PIC_me_up(%eax),%eax +- btl $26,(%eax) +- jnc L004non_sse2 +- movl $-1,%eax +- movd %eax,%mm7 +- movl 8(%esp),%esi +- movl 12(%esp),%edi +- movl 16(%esp),%ebp +- xorl %edx,%edx +- xorl %ecx,%ecx +- movd (%edi),%mm4 +- movd (%esi),%mm5 +- movd (%ebp),%mm3 +- pmuludq %mm4,%mm5 +- movq %mm5,%mm2 +- movq %mm5,%mm0 +- pand %mm7,%mm0 +- pmuludq 20(%esp),%mm5 +- pmuludq %mm5,%mm3 +- paddq %mm0,%mm3 +- movd 4(%ebp),%mm1 +- movd 4(%esi),%mm0 +- psrlq $32,%mm2 +- psrlq $32,%mm3 +- incl %ecx +-.align 4,0x90 +-L0051st: +- pmuludq %mm4,%mm0 +- pmuludq %mm5,%mm1 +- paddq %mm0,%mm2 +- paddq %mm1,%mm3 +- movq %mm2,%mm0 +- pand %mm7,%mm0 +- movd 4(%ebp,%ecx,4),%mm1 +- paddq %mm0,%mm3 +- movd 4(%esi,%ecx,4),%mm0 +- psrlq $32,%mm2 +- movd %mm3,28(%esp,%ecx,4) +- psrlq $32,%mm3 +- leal 1(%ecx),%ecx +- cmpl %ebx,%ecx +- jl L0051st +- pmuludq %mm4,%mm0 +- pmuludq %mm5,%mm1 +- paddq %mm0,%mm2 +- paddq %mm1,%mm3 +- movq %mm2,%mm0 +- pand %mm7,%mm0 +- paddq %mm0,%mm3 +- movd %mm3,28(%esp,%ecx,4) +- psrlq $32,%mm2 +- psrlq $32,%mm3 +- paddq %mm2,%mm3 +- movq %mm3,32(%esp,%ebx,4) +- incl %edx +-L006outer: +- xorl %ecx,%ecx +- movd (%edi,%edx,4),%mm4 +- movd (%esi),%mm5 +- movd 32(%esp),%mm6 +- movd (%ebp),%mm3 +- pmuludq %mm4,%mm5 +- paddq %mm6,%mm5 +- movq %mm5,%mm0 +- movq %mm5,%mm2 +- pand %mm7,%mm0 +- pmuludq 20(%esp),%mm5 +- pmuludq %mm5,%mm3 +- paddq %mm0,%mm3 +- movd 36(%esp),%mm6 +- movd 4(%ebp),%mm1 +- movd 4(%esi),%mm0 +- psrlq $32,%mm2 +- psrlq $32,%mm3 +- paddq %mm6,%mm2 +- incl %ecx +- decl %ebx +-L007inner: +- pmuludq %mm4,%mm0 +- pmuludq %mm5,%mm1 +- paddq %mm0,%mm2 +- paddq %mm1,%mm3 +- movq %mm2,%mm0 +- movd 36(%esp,%ecx,4),%mm6 +- pand %mm7,%mm0 +- movd 4(%ebp,%ecx,4),%mm1 +- paddq %mm0,%mm3 +- movd 4(%esi,%ecx,4),%mm0 +- psrlq $32,%mm2 +- movd %mm3,28(%esp,%ecx,4) +- psrlq $32,%mm3 +- paddq %mm6,%mm2 +- decl %ebx +- leal 1(%ecx),%ecx +- jnz L007inner +- movl %ecx,%ebx +- pmuludq %mm4,%mm0 +- pmuludq %mm5,%mm1 +- paddq %mm0,%mm2 +- paddq %mm1,%mm3 +- movq %mm2,%mm0 +- pand %mm7,%mm0 +- paddq %mm0,%mm3 +- movd %mm3,28(%esp,%ecx,4) +- psrlq $32,%mm2 +- psrlq $32,%mm3 +- movd 36(%esp,%ebx,4),%mm6 +- paddq %mm2,%mm3 +- paddq %mm6,%mm3 +- movq %mm3,32(%esp,%ebx,4) +- leal 1(%edx),%edx +- cmpl %ebx,%edx +- jle L006outer +- emms +- jmp L008common_tail +-.align 4,0x90 +-L004non_sse2: +- movl 8(%esp),%esi +- leal 1(%ebx),%ebp +- movl 12(%esp),%edi +- xorl %ecx,%ecx +- movl %esi,%edx +- andl $1,%ebp +- subl %edi,%edx +- leal 4(%edi,%ebx,4),%eax +- orl %edx,%ebp +- movl (%edi),%edi +- jz L009bn_sqr_mont +- movl %eax,28(%esp) +- movl (%esi),%eax +- xorl %edx,%edx +-.align 4,0x90 +-L010mull: +- movl %edx,%ebp +- mull %edi +- addl %eax,%ebp +- leal 1(%ecx),%ecx +- adcl $0,%edx +- movl (%esi,%ecx,4),%eax +- cmpl %ebx,%ecx +- movl %ebp,28(%esp,%ecx,4) +- jl L010mull +- movl %edx,%ebp +- mull %edi +- movl 20(%esp),%edi +- addl %ebp,%eax +- movl 16(%esp),%esi +- adcl $0,%edx +- imull 32(%esp),%edi +- movl %eax,32(%esp,%ebx,4) +- xorl %ecx,%ecx +- movl %edx,36(%esp,%ebx,4) +- movl %ecx,40(%esp,%ebx,4) +- movl (%esi),%eax +- mull %edi +- addl 32(%esp),%eax +- movl 4(%esi),%eax +- adcl $0,%edx +- incl %ecx +- jmp L0112ndmadd +-.align 4,0x90 +-L0121stmadd: +- movl %edx,%ebp +- mull %edi +- addl 32(%esp,%ecx,4),%ebp +- leal 1(%ecx),%ecx +- adcl $0,%edx +- addl %eax,%ebp +- movl (%esi,%ecx,4),%eax +- adcl $0,%edx +- cmpl %ebx,%ecx +- movl %ebp,28(%esp,%ecx,4) +- jl L0121stmadd +- movl %edx,%ebp +- mull %edi +- addl 32(%esp,%ebx,4),%eax +- movl 20(%esp),%edi +- adcl $0,%edx +- movl 16(%esp),%esi +- addl %eax,%ebp +- adcl $0,%edx +- imull 32(%esp),%edi +- xorl %ecx,%ecx +- addl 36(%esp,%ebx,4),%edx +- movl %ebp,32(%esp,%ebx,4) +- adcl $0,%ecx +- movl (%esi),%eax +- movl %edx,36(%esp,%ebx,4) +- movl %ecx,40(%esp,%ebx,4) +- mull %edi +- addl 32(%esp),%eax +- movl 4(%esi),%eax +- adcl $0,%edx +- movl $1,%ecx +-.align 4,0x90 +-L0112ndmadd: +- movl %edx,%ebp +- mull %edi +- addl 32(%esp,%ecx,4),%ebp +- leal 1(%ecx),%ecx +- adcl $0,%edx +- addl %eax,%ebp +- movl (%esi,%ecx,4),%eax +- adcl $0,%edx +- cmpl %ebx,%ecx +- movl %ebp,24(%esp,%ecx,4) +- jl L0112ndmadd +- movl %edx,%ebp +- mull %edi +- addl 32(%esp,%ebx,4),%ebp +- adcl $0,%edx +- addl %eax,%ebp +- adcl $0,%edx +- movl %ebp,28(%esp,%ebx,4) +- xorl %eax,%eax +- movl 12(%esp),%ecx +- addl 36(%esp,%ebx,4),%edx +- adcl 40(%esp,%ebx,4),%eax +- leal 4(%ecx),%ecx +- movl %edx,32(%esp,%ebx,4) +- cmpl 28(%esp),%ecx +- movl %eax,36(%esp,%ebx,4) +- je L008common_tail +- movl (%ecx),%edi +- movl 8(%esp),%esi +- movl %ecx,12(%esp) +- xorl %ecx,%ecx +- xorl %edx,%edx +- movl (%esi),%eax +- jmp L0121stmadd +-.align 4,0x90 +-L009bn_sqr_mont: +- movl %ebx,(%esp) +- movl %ecx,12(%esp) +- movl %edi,%eax +- mull %edi +- movl %eax,32(%esp) +- movl %edx,%ebx +- shrl $1,%edx +- andl $1,%ebx +- incl %ecx +-.align 4,0x90 +-L013sqr: +- movl (%esi,%ecx,4),%eax +- movl %edx,%ebp +- mull %edi +- addl %ebp,%eax +- leal 1(%ecx),%ecx +- adcl $0,%edx +- leal (%ebx,%eax,2),%ebp +- shrl $31,%eax +- cmpl (%esp),%ecx +- movl %eax,%ebx +- movl %ebp,28(%esp,%ecx,4) +- jl L013sqr +- movl (%esi,%ecx,4),%eax +- movl %edx,%ebp +- mull %edi +- addl %ebp,%eax +- movl 20(%esp),%edi +- adcl $0,%edx +- movl 16(%esp),%esi +- leal (%ebx,%eax,2),%ebp +- imull 32(%esp),%edi +- shrl $31,%eax +- movl %ebp,32(%esp,%ecx,4) +- leal (%eax,%edx,2),%ebp +- movl (%esi),%eax +- shrl $31,%edx +- movl %ebp,36(%esp,%ecx,4) +- movl %edx,40(%esp,%ecx,4) +- mull %edi +- addl 32(%esp),%eax +- movl %ecx,%ebx +- adcl $0,%edx +- movl 4(%esi),%eax +- movl $1,%ecx +-.align 4,0x90 +-L0143rdmadd: +- movl %edx,%ebp +- mull %edi +- addl 32(%esp,%ecx,4),%ebp +- adcl $0,%edx +- addl %eax,%ebp +- movl 4(%esi,%ecx,4),%eax +- adcl $0,%edx +- movl %ebp,28(%esp,%ecx,4) +- movl %edx,%ebp +- mull %edi +- addl 36(%esp,%ecx,4),%ebp +- leal 2(%ecx),%ecx +- adcl $0,%edx +- addl %eax,%ebp +- movl (%esi,%ecx,4),%eax +- adcl $0,%edx +- cmpl %ebx,%ecx +- movl %ebp,24(%esp,%ecx,4) +- jl L0143rdmadd +- movl %edx,%ebp +- mull %edi +- addl 32(%esp,%ebx,4),%ebp +- adcl $0,%edx +- addl %eax,%ebp +- adcl $0,%edx +- movl %ebp,28(%esp,%ebx,4) +- movl 12(%esp),%ecx +- xorl %eax,%eax +- movl 8(%esp),%esi +- addl 36(%esp,%ebx,4),%edx +- adcl 40(%esp,%ebx,4),%eax +- movl %edx,32(%esp,%ebx,4) +- cmpl %ebx,%ecx +- movl %eax,36(%esp,%ebx,4) +- je L008common_tail +- movl 4(%esi,%ecx,4),%edi +- leal 1(%ecx),%ecx +- movl %edi,%eax +- movl %ecx,12(%esp) +- mull %edi +- addl 32(%esp,%ecx,4),%eax +- adcl $0,%edx +- movl %eax,32(%esp,%ecx,4) +- xorl %ebp,%ebp +- cmpl %ebx,%ecx +- leal 1(%ecx),%ecx +- je L015sqrlast +- movl %edx,%ebx +- shrl $1,%edx +- andl $1,%ebx +-.align 4,0x90 +-L016sqradd: +- movl (%esi,%ecx,4),%eax +- movl %edx,%ebp +- mull %edi +- addl %ebp,%eax +- leal (%eax,%eax,1),%ebp +- adcl $0,%edx +- shrl $31,%eax +- addl 32(%esp,%ecx,4),%ebp +- leal 1(%ecx),%ecx +- adcl $0,%eax +- addl %ebx,%ebp +- adcl $0,%eax +- cmpl (%esp),%ecx +- movl %ebp,28(%esp,%ecx,4) +- movl %eax,%ebx +- jle L016sqradd +- movl %edx,%ebp +- addl %edx,%edx +- shrl $31,%ebp +- addl %ebx,%edx +- adcl $0,%ebp +-L015sqrlast: +- movl 20(%esp),%edi +- movl 16(%esp),%esi +- imull 32(%esp),%edi +- addl 32(%esp,%ecx,4),%edx +- movl (%esi),%eax +- adcl $0,%ebp +- movl %edx,32(%esp,%ecx,4) +- movl %ebp,36(%esp,%ecx,4) +- mull %edi +- addl 32(%esp),%eax +- leal -1(%ecx),%ebx +- adcl $0,%edx +- movl $1,%ecx +- movl 4(%esi),%eax +- jmp L0143rdmadd +-.align 4,0x90 +-L008common_tail: +- movl 16(%esp),%ebp +- movl 4(%esp),%edi +- leal 32(%esp),%esi +- movl (%esi),%eax +- movl %ebx,%ecx +- xorl %edx,%edx +-.align 4,0x90 +-L017sub: +- sbbl (%ebp,%edx,4),%eax +- movl %eax,(%edi,%edx,4) +- decl %ecx +- movl 4(%esi,%edx,4),%eax +- leal 1(%edx),%edx +- jge L017sub +- sbbl $0,%eax +- movl $-1,%edx +- xorl %eax,%edx +- jmp L018copy +-.align 4,0x90 +-L018copy: +- movl 32(%esp,%ebx,4),%esi +- movl (%edi,%ebx,4),%ebp +- movl %ecx,32(%esp,%ebx,4) +- andl %eax,%esi +- andl %edx,%ebp +- orl %esi,%ebp +- movl %ebp,(%edi,%ebx,4) +- decl %ebx +- jge L018copy +- movl 24(%esp),%esp +- movl $1,%eax +-L000just_leave: +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 +-.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 +-.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 +-.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 +-.byte 111,114,103,62,0 +-.section __IMPORT,__pointers,non_lazy_symbol_pointers +-L_OPENSSL_ia32cap_P$non_lazy_ptr: +-.indirect_symbol _OPENSSL_ia32cap_P +-.long 0 +-#endif +diff --git a/mac-x86/crypto/test/trampoline-x86.S b/mac-x86/crypto/test/trampoline-x86.S +deleted file mode 100644 +index fd40b95..0000000 +--- a/mac-x86/crypto/test/trampoline-x86.S ++++ /dev/null +@@ -1,169 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__i386__) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.globl _abi_test_trampoline +-.private_extern _abi_test_trampoline +-.align 4 +-_abi_test_trampoline: +-L_abi_test_trampoline_begin: +- pushl %ebp +- pushl %ebx +- pushl %esi +- pushl %edi +- movl 24(%esp),%ecx +- movl (%ecx),%esi +- movl 4(%ecx),%edi +- movl 8(%ecx),%ebx +- movl 12(%ecx),%ebp +- subl $44,%esp +- movl 72(%esp),%eax +- xorl %ecx,%ecx +-L000loop: +- cmpl 76(%esp),%ecx +- jae L001loop_done +- movl (%eax,%ecx,4),%edx +- movl %edx,(%esp,%ecx,4) +- addl $1,%ecx +- jmp L000loop +-L001loop_done: +- call *64(%esp) +- addl $44,%esp +- movl 24(%esp),%ecx +- movl %esi,(%ecx) +- movl %edi,4(%ecx) +- movl %ebx,8(%ecx) +- movl %ebp,12(%ecx) +- popl %edi +- popl %esi +- popl %ebx +- popl %ebp +- ret +-.globl _abi_test_get_and_clear_direction_flag +-.private_extern _abi_test_get_and_clear_direction_flag +-.align 4 +-_abi_test_get_and_clear_direction_flag: +-L_abi_test_get_and_clear_direction_flag_begin: +- pushfl +- popl %eax +- andl $1024,%eax +- shrl $10,%eax +- cld +- ret +-.globl _abi_test_set_direction_flag +-.private_extern _abi_test_set_direction_flag +-.align 4 +-_abi_test_set_direction_flag: +-L_abi_test_set_direction_flag_begin: +- std +- ret +-.globl _abi_test_clobber_eax +-.private_extern _abi_test_clobber_eax +-.align 4 +-_abi_test_clobber_eax: +-L_abi_test_clobber_eax_begin: +- xorl %eax,%eax +- ret +-.globl _abi_test_clobber_ebx +-.private_extern _abi_test_clobber_ebx +-.align 4 +-_abi_test_clobber_ebx: +-L_abi_test_clobber_ebx_begin: +- xorl %ebx,%ebx +- ret +-.globl _abi_test_clobber_ecx +-.private_extern _abi_test_clobber_ecx +-.align 4 +-_abi_test_clobber_ecx: +-L_abi_test_clobber_ecx_begin: +- xorl %ecx,%ecx +- ret +-.globl _abi_test_clobber_edx +-.private_extern _abi_test_clobber_edx +-.align 4 +-_abi_test_clobber_edx: +-L_abi_test_clobber_edx_begin: +- xorl %edx,%edx +- ret +-.globl _abi_test_clobber_edi +-.private_extern _abi_test_clobber_edi +-.align 4 +-_abi_test_clobber_edi: +-L_abi_test_clobber_edi_begin: +- xorl %edi,%edi +- ret +-.globl _abi_test_clobber_esi +-.private_extern _abi_test_clobber_esi +-.align 4 +-_abi_test_clobber_esi: +-L_abi_test_clobber_esi_begin: +- xorl %esi,%esi +- ret +-.globl _abi_test_clobber_ebp +-.private_extern _abi_test_clobber_ebp +-.align 4 +-_abi_test_clobber_ebp: +-L_abi_test_clobber_ebp_begin: +- xorl %ebp,%ebp +- ret +-.globl _abi_test_clobber_xmm0 +-.private_extern _abi_test_clobber_xmm0 +-.align 4 +-_abi_test_clobber_xmm0: +-L_abi_test_clobber_xmm0_begin: +- pxor %xmm0,%xmm0 +- ret +-.globl _abi_test_clobber_xmm1 +-.private_extern _abi_test_clobber_xmm1 +-.align 4 +-_abi_test_clobber_xmm1: +-L_abi_test_clobber_xmm1_begin: +- pxor %xmm1,%xmm1 +- ret +-.globl _abi_test_clobber_xmm2 +-.private_extern _abi_test_clobber_xmm2 +-.align 4 +-_abi_test_clobber_xmm2: +-L_abi_test_clobber_xmm2_begin: +- pxor %xmm2,%xmm2 +- ret +-.globl _abi_test_clobber_xmm3 +-.private_extern _abi_test_clobber_xmm3 +-.align 4 +-_abi_test_clobber_xmm3: +-L_abi_test_clobber_xmm3_begin: +- pxor %xmm3,%xmm3 +- ret +-.globl _abi_test_clobber_xmm4 +-.private_extern _abi_test_clobber_xmm4 +-.align 4 +-_abi_test_clobber_xmm4: +-L_abi_test_clobber_xmm4_begin: +- pxor %xmm4,%xmm4 +- ret +-.globl _abi_test_clobber_xmm5 +-.private_extern _abi_test_clobber_xmm5 +-.align 4 +-_abi_test_clobber_xmm5: +-L_abi_test_clobber_xmm5_begin: +- pxor %xmm5,%xmm5 +- ret +-.globl _abi_test_clobber_xmm6 +-.private_extern _abi_test_clobber_xmm6 +-.align 4 +-_abi_test_clobber_xmm6: +-L_abi_test_clobber_xmm6_begin: +- pxor %xmm6,%xmm6 +- ret +-.globl _abi_test_clobber_xmm7 +-.private_extern _abi_test_clobber_xmm7 +-.align 4 +-_abi_test_clobber_xmm7: +-L_abi_test_clobber_xmm7_begin: +- pxor %xmm7,%xmm7 +- ret +-#endif +diff --git a/mac-x86_64/crypto/chacha/chacha-x86_64.S b/mac-x86_64/crypto/chacha/chacha-x86_64.S +deleted file mode 100644 +index 782ddf4..0000000 +--- a/mac-x86_64/crypto/chacha/chacha-x86_64.S ++++ /dev/null +@@ -1,1625 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +- +-.p2align 6 +-L$zero: +-.long 0,0,0,0 +-L$one: +-.long 1,0,0,0 +-L$inc: +-.long 0,1,2,3 +-L$four: +-.long 4,4,4,4 +-L$incy: +-.long 0,2,4,6,1,3,5,7 +-L$eight: +-.long 8,8,8,8,8,8,8,8 +-L$rot16: +-.byte 0x2,0x3,0x0,0x1, 0x6,0x7,0x4,0x5, 0xa,0xb,0x8,0x9, 0xe,0xf,0xc,0xd +-L$rot24: +-.byte 0x3,0x0,0x1,0x2, 0x7,0x4,0x5,0x6, 0xb,0x8,0x9,0xa, 0xf,0xc,0xd,0xe +-L$sigma: +-.byte 101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107,0 +-.p2align 6 +-L$zeroz: +-.long 0,0,0,0, 1,0,0,0, 2,0,0,0, 3,0,0,0 +-L$fourz: +-.long 4,0,0,0, 4,0,0,0, 4,0,0,0, 4,0,0,0 +-L$incz: +-.long 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 +-L$sixteen: +-.long 16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16 +-.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +-.globl _ChaCha20_ctr32 +-.private_extern _ChaCha20_ctr32 +- +-.p2align 6 +-_ChaCha20_ctr32: +- +- cmpq $0,%rdx +- je L$no_data +- movq _OPENSSL_ia32cap_P+4(%rip),%r10 +- testl $512,%r10d +- jnz L$ChaCha20_ssse3 +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- subq $64+24,%rsp +- +-L$ctr32_body: +- +- +- movdqu (%rcx),%xmm1 +- movdqu 16(%rcx),%xmm2 +- movdqu (%r8),%xmm3 +- movdqa L$one(%rip),%xmm4 +- +- +- movdqa %xmm1,16(%rsp) +- movdqa %xmm2,32(%rsp) +- movdqa %xmm3,48(%rsp) +- movq %rdx,%rbp +- jmp L$oop_outer +- +-.p2align 5 +-L$oop_outer: +- movl $0x61707865,%eax +- movl $0x3320646e,%ebx +- movl $0x79622d32,%ecx +- movl $0x6b206574,%edx +- movl 16(%rsp),%r8d +- movl 20(%rsp),%r9d +- movl 24(%rsp),%r10d +- movl 28(%rsp),%r11d +- movd %xmm3,%r12d +- movl 52(%rsp),%r13d +- movl 56(%rsp),%r14d +- movl 60(%rsp),%r15d +- +- movq %rbp,64+0(%rsp) +- movl $10,%ebp +- movq %rsi,64+8(%rsp) +-.byte 102,72,15,126,214 +- movq %rdi,64+16(%rsp) +- movq %rsi,%rdi +- shrq $32,%rdi +- jmp L$oop +- +-.p2align 5 +-L$oop: +- addl %r8d,%eax +- xorl %eax,%r12d +- roll $16,%r12d +- addl %r9d,%ebx +- xorl %ebx,%r13d +- roll $16,%r13d +- addl %r12d,%esi +- xorl %esi,%r8d +- roll $12,%r8d +- addl %r13d,%edi +- xorl %edi,%r9d +- roll $12,%r9d +- addl %r8d,%eax +- xorl %eax,%r12d +- roll $8,%r12d +- addl %r9d,%ebx +- xorl %ebx,%r13d +- roll $8,%r13d +- addl %r12d,%esi +- xorl %esi,%r8d +- roll $7,%r8d +- addl %r13d,%edi +- xorl %edi,%r9d +- roll $7,%r9d +- movl %esi,32(%rsp) +- movl %edi,36(%rsp) +- movl 40(%rsp),%esi +- movl 44(%rsp),%edi +- addl %r10d,%ecx +- xorl %ecx,%r14d +- roll $16,%r14d +- addl %r11d,%edx +- xorl %edx,%r15d +- roll $16,%r15d +- addl %r14d,%esi +- xorl %esi,%r10d +- roll $12,%r10d +- addl %r15d,%edi +- xorl %edi,%r11d +- roll $12,%r11d +- addl %r10d,%ecx +- xorl %ecx,%r14d +- roll $8,%r14d +- addl %r11d,%edx +- xorl %edx,%r15d +- roll $8,%r15d +- addl %r14d,%esi +- xorl %esi,%r10d +- roll $7,%r10d +- addl %r15d,%edi +- xorl %edi,%r11d +- roll $7,%r11d +- addl %r9d,%eax +- xorl %eax,%r15d +- roll $16,%r15d +- addl %r10d,%ebx +- xorl %ebx,%r12d +- roll $16,%r12d +- addl %r15d,%esi +- xorl %esi,%r9d +- roll $12,%r9d +- addl %r12d,%edi +- xorl %edi,%r10d +- roll $12,%r10d +- addl %r9d,%eax +- xorl %eax,%r15d +- roll $8,%r15d +- addl %r10d,%ebx +- xorl %ebx,%r12d +- roll $8,%r12d +- addl %r15d,%esi +- xorl %esi,%r9d +- roll $7,%r9d +- addl %r12d,%edi +- xorl %edi,%r10d +- roll $7,%r10d +- movl %esi,40(%rsp) +- movl %edi,44(%rsp) +- movl 32(%rsp),%esi +- movl 36(%rsp),%edi +- addl %r11d,%ecx +- xorl %ecx,%r13d +- roll $16,%r13d +- addl %r8d,%edx +- xorl %edx,%r14d +- roll $16,%r14d +- addl %r13d,%esi +- xorl %esi,%r11d +- roll $12,%r11d +- addl %r14d,%edi +- xorl %edi,%r8d +- roll $12,%r8d +- addl %r11d,%ecx +- xorl %ecx,%r13d +- roll $8,%r13d +- addl %r8d,%edx +- xorl %edx,%r14d +- roll $8,%r14d +- addl %r13d,%esi +- xorl %esi,%r11d +- roll $7,%r11d +- addl %r14d,%edi +- xorl %edi,%r8d +- roll $7,%r8d +- decl %ebp +- jnz L$oop +- movl %edi,36(%rsp) +- movl %esi,32(%rsp) +- movq 64(%rsp),%rbp +- movdqa %xmm2,%xmm1 +- movq 64+8(%rsp),%rsi +- paddd %xmm4,%xmm3 +- movq 64+16(%rsp),%rdi +- +- addl $0x61707865,%eax +- addl $0x3320646e,%ebx +- addl $0x79622d32,%ecx +- addl $0x6b206574,%edx +- addl 16(%rsp),%r8d +- addl 20(%rsp),%r9d +- addl 24(%rsp),%r10d +- addl 28(%rsp),%r11d +- addl 48(%rsp),%r12d +- addl 52(%rsp),%r13d +- addl 56(%rsp),%r14d +- addl 60(%rsp),%r15d +- paddd 32(%rsp),%xmm1 +- +- cmpq $64,%rbp +- jb L$tail +- +- xorl 0(%rsi),%eax +- xorl 4(%rsi),%ebx +- xorl 8(%rsi),%ecx +- xorl 12(%rsi),%edx +- xorl 16(%rsi),%r8d +- xorl 20(%rsi),%r9d +- xorl 24(%rsi),%r10d +- xorl 28(%rsi),%r11d +- movdqu 32(%rsi),%xmm0 +- xorl 48(%rsi),%r12d +- xorl 52(%rsi),%r13d +- xorl 56(%rsi),%r14d +- xorl 60(%rsi),%r15d +- leaq 64(%rsi),%rsi +- pxor %xmm1,%xmm0 +- +- movdqa %xmm2,32(%rsp) +- movd %xmm3,48(%rsp) +- +- movl %eax,0(%rdi) +- movl %ebx,4(%rdi) +- movl %ecx,8(%rdi) +- movl %edx,12(%rdi) +- movl %r8d,16(%rdi) +- movl %r9d,20(%rdi) +- movl %r10d,24(%rdi) +- movl %r11d,28(%rdi) +- movdqu %xmm0,32(%rdi) +- movl %r12d,48(%rdi) +- movl %r13d,52(%rdi) +- movl %r14d,56(%rdi) +- movl %r15d,60(%rdi) +- leaq 64(%rdi),%rdi +- +- subq $64,%rbp +- jnz L$oop_outer +- +- jmp L$done +- +-.p2align 4 +-L$tail: +- movl %eax,0(%rsp) +- movl %ebx,4(%rsp) +- xorq %rbx,%rbx +- movl %ecx,8(%rsp) +- movl %edx,12(%rsp) +- movl %r8d,16(%rsp) +- movl %r9d,20(%rsp) +- movl %r10d,24(%rsp) +- movl %r11d,28(%rsp) +- movdqa %xmm1,32(%rsp) +- movl %r12d,48(%rsp) +- movl %r13d,52(%rsp) +- movl %r14d,56(%rsp) +- movl %r15d,60(%rsp) +- +-L$oop_tail: +- movzbl (%rsi,%rbx,1),%eax +- movzbl (%rsp,%rbx,1),%edx +- leaq 1(%rbx),%rbx +- xorl %edx,%eax +- movb %al,-1(%rdi,%rbx,1) +- decq %rbp +- jnz L$oop_tail +- +-L$done: +- leaq 64+24+48(%rsp),%rsi +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$no_data: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-ChaCha20_ssse3: +-L$ChaCha20_ssse3: +- +- movq %rsp,%r9 +- +- cmpq $128,%rdx +- ja L$ChaCha20_4x +- +-L$do_sse3_after_all: +- subq $64+8,%rsp +- movdqa L$sigma(%rip),%xmm0 +- movdqu (%rcx),%xmm1 +- movdqu 16(%rcx),%xmm2 +- movdqu (%r8),%xmm3 +- movdqa L$rot16(%rip),%xmm6 +- movdqa L$rot24(%rip),%xmm7 +- +- movdqa %xmm0,0(%rsp) +- movdqa %xmm1,16(%rsp) +- movdqa %xmm2,32(%rsp) +- movdqa %xmm3,48(%rsp) +- movq $10,%r8 +- jmp L$oop_ssse3 +- +-.p2align 5 +-L$oop_outer_ssse3: +- movdqa L$one(%rip),%xmm3 +- movdqa 0(%rsp),%xmm0 +- movdqa 16(%rsp),%xmm1 +- movdqa 32(%rsp),%xmm2 +- paddd 48(%rsp),%xmm3 +- movq $10,%r8 +- movdqa %xmm3,48(%rsp) +- jmp L$oop_ssse3 +- +-.p2align 5 +-L$oop_ssse3: +- paddd %xmm1,%xmm0 +- pxor %xmm0,%xmm3 +-.byte 102,15,56,0,222 +- paddd %xmm3,%xmm2 +- pxor %xmm2,%xmm1 +- movdqa %xmm1,%xmm4 +- psrld $20,%xmm1 +- pslld $12,%xmm4 +- por %xmm4,%xmm1 +- paddd %xmm1,%xmm0 +- pxor %xmm0,%xmm3 +-.byte 102,15,56,0,223 +- paddd %xmm3,%xmm2 +- pxor %xmm2,%xmm1 +- movdqa %xmm1,%xmm4 +- psrld $25,%xmm1 +- pslld $7,%xmm4 +- por %xmm4,%xmm1 +- pshufd $78,%xmm2,%xmm2 +- pshufd $57,%xmm1,%xmm1 +- pshufd $147,%xmm3,%xmm3 +- nop +- paddd %xmm1,%xmm0 +- pxor %xmm0,%xmm3 +-.byte 102,15,56,0,222 +- paddd %xmm3,%xmm2 +- pxor %xmm2,%xmm1 +- movdqa %xmm1,%xmm4 +- psrld $20,%xmm1 +- pslld $12,%xmm4 +- por %xmm4,%xmm1 +- paddd %xmm1,%xmm0 +- pxor %xmm0,%xmm3 +-.byte 102,15,56,0,223 +- paddd %xmm3,%xmm2 +- pxor %xmm2,%xmm1 +- movdqa %xmm1,%xmm4 +- psrld $25,%xmm1 +- pslld $7,%xmm4 +- por %xmm4,%xmm1 +- pshufd $78,%xmm2,%xmm2 +- pshufd $147,%xmm1,%xmm1 +- pshufd $57,%xmm3,%xmm3 +- decq %r8 +- jnz L$oop_ssse3 +- paddd 0(%rsp),%xmm0 +- paddd 16(%rsp),%xmm1 +- paddd 32(%rsp),%xmm2 +- paddd 48(%rsp),%xmm3 +- +- cmpq $64,%rdx +- jb L$tail_ssse3 +- +- movdqu 0(%rsi),%xmm4 +- movdqu 16(%rsi),%xmm5 +- pxor %xmm4,%xmm0 +- movdqu 32(%rsi),%xmm4 +- pxor %xmm5,%xmm1 +- movdqu 48(%rsi),%xmm5 +- leaq 64(%rsi),%rsi +- pxor %xmm4,%xmm2 +- pxor %xmm5,%xmm3 +- +- movdqu %xmm0,0(%rdi) +- movdqu %xmm1,16(%rdi) +- movdqu %xmm2,32(%rdi) +- movdqu %xmm3,48(%rdi) +- leaq 64(%rdi),%rdi +- +- subq $64,%rdx +- jnz L$oop_outer_ssse3 +- +- jmp L$done_ssse3 +- +-.p2align 4 +-L$tail_ssse3: +- movdqa %xmm0,0(%rsp) +- movdqa %xmm1,16(%rsp) +- movdqa %xmm2,32(%rsp) +- movdqa %xmm3,48(%rsp) +- xorq %r8,%r8 +- +-L$oop_tail_ssse3: +- movzbl (%rsi,%r8,1),%eax +- movzbl (%rsp,%r8,1),%ecx +- leaq 1(%r8),%r8 +- xorl %ecx,%eax +- movb %al,-1(%rdi,%r8,1) +- decq %rdx +- jnz L$oop_tail_ssse3 +- +-L$done_ssse3: +- leaq (%r9),%rsp +- +-L$ssse3_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-ChaCha20_4x: +-L$ChaCha20_4x: +- +- movq %rsp,%r9 +- +- movq %r10,%r11 +- shrq $32,%r10 +- testq $32,%r10 +- jnz L$ChaCha20_8x +- cmpq $192,%rdx +- ja L$proceed4x +- +- andq $71303168,%r11 +- cmpq $4194304,%r11 +- je L$do_sse3_after_all +- +-L$proceed4x: +- subq $0x140+8,%rsp +- movdqa L$sigma(%rip),%xmm11 +- movdqu (%rcx),%xmm15 +- movdqu 16(%rcx),%xmm7 +- movdqu (%r8),%xmm3 +- leaq 256(%rsp),%rcx +- leaq L$rot16(%rip),%r10 +- leaq L$rot24(%rip),%r11 +- +- pshufd $0x00,%xmm11,%xmm8 +- pshufd $0x55,%xmm11,%xmm9 +- movdqa %xmm8,64(%rsp) +- pshufd $0xaa,%xmm11,%xmm10 +- movdqa %xmm9,80(%rsp) +- pshufd $0xff,%xmm11,%xmm11 +- movdqa %xmm10,96(%rsp) +- movdqa %xmm11,112(%rsp) +- +- pshufd $0x00,%xmm15,%xmm12 +- pshufd $0x55,%xmm15,%xmm13 +- movdqa %xmm12,128-256(%rcx) +- pshufd $0xaa,%xmm15,%xmm14 +- movdqa %xmm13,144-256(%rcx) +- pshufd $0xff,%xmm15,%xmm15 +- movdqa %xmm14,160-256(%rcx) +- movdqa %xmm15,176-256(%rcx) +- +- pshufd $0x00,%xmm7,%xmm4 +- pshufd $0x55,%xmm7,%xmm5 +- movdqa %xmm4,192-256(%rcx) +- pshufd $0xaa,%xmm7,%xmm6 +- movdqa %xmm5,208-256(%rcx) +- pshufd $0xff,%xmm7,%xmm7 +- movdqa %xmm6,224-256(%rcx) +- movdqa %xmm7,240-256(%rcx) +- +- pshufd $0x00,%xmm3,%xmm0 +- pshufd $0x55,%xmm3,%xmm1 +- paddd L$inc(%rip),%xmm0 +- pshufd $0xaa,%xmm3,%xmm2 +- movdqa %xmm1,272-256(%rcx) +- pshufd $0xff,%xmm3,%xmm3 +- movdqa %xmm2,288-256(%rcx) +- movdqa %xmm3,304-256(%rcx) +- +- jmp L$oop_enter4x +- +-.p2align 5 +-L$oop_outer4x: +- movdqa 64(%rsp),%xmm8 +- movdqa 80(%rsp),%xmm9 +- movdqa 96(%rsp),%xmm10 +- movdqa 112(%rsp),%xmm11 +- movdqa 128-256(%rcx),%xmm12 +- movdqa 144-256(%rcx),%xmm13 +- movdqa 160-256(%rcx),%xmm14 +- movdqa 176-256(%rcx),%xmm15 +- movdqa 192-256(%rcx),%xmm4 +- movdqa 208-256(%rcx),%xmm5 +- movdqa 224-256(%rcx),%xmm6 +- movdqa 240-256(%rcx),%xmm7 +- movdqa 256-256(%rcx),%xmm0 +- movdqa 272-256(%rcx),%xmm1 +- movdqa 288-256(%rcx),%xmm2 +- movdqa 304-256(%rcx),%xmm3 +- paddd L$four(%rip),%xmm0 +- +-L$oop_enter4x: +- movdqa %xmm6,32(%rsp) +- movdqa %xmm7,48(%rsp) +- movdqa (%r10),%xmm7 +- movl $10,%eax +- movdqa %xmm0,256-256(%rcx) +- jmp L$oop4x +- +-.p2align 5 +-L$oop4x: +- paddd %xmm12,%xmm8 +- paddd %xmm13,%xmm9 +- pxor %xmm8,%xmm0 +- pxor %xmm9,%xmm1 +-.byte 102,15,56,0,199 +-.byte 102,15,56,0,207 +- paddd %xmm0,%xmm4 +- paddd %xmm1,%xmm5 +- pxor %xmm4,%xmm12 +- pxor %xmm5,%xmm13 +- movdqa %xmm12,%xmm6 +- pslld $12,%xmm12 +- psrld $20,%xmm6 +- movdqa %xmm13,%xmm7 +- pslld $12,%xmm13 +- por %xmm6,%xmm12 +- psrld $20,%xmm7 +- movdqa (%r11),%xmm6 +- por %xmm7,%xmm13 +- paddd %xmm12,%xmm8 +- paddd %xmm13,%xmm9 +- pxor %xmm8,%xmm0 +- pxor %xmm9,%xmm1 +-.byte 102,15,56,0,198 +-.byte 102,15,56,0,206 +- paddd %xmm0,%xmm4 +- paddd %xmm1,%xmm5 +- pxor %xmm4,%xmm12 +- pxor %xmm5,%xmm13 +- movdqa %xmm12,%xmm7 +- pslld $7,%xmm12 +- psrld $25,%xmm7 +- movdqa %xmm13,%xmm6 +- pslld $7,%xmm13 +- por %xmm7,%xmm12 +- psrld $25,%xmm6 +- movdqa (%r10),%xmm7 +- por %xmm6,%xmm13 +- movdqa %xmm4,0(%rsp) +- movdqa %xmm5,16(%rsp) +- movdqa 32(%rsp),%xmm4 +- movdqa 48(%rsp),%xmm5 +- paddd %xmm14,%xmm10 +- paddd %xmm15,%xmm11 +- pxor %xmm10,%xmm2 +- pxor %xmm11,%xmm3 +-.byte 102,15,56,0,215 +-.byte 102,15,56,0,223 +- paddd %xmm2,%xmm4 +- paddd %xmm3,%xmm5 +- pxor %xmm4,%xmm14 +- pxor %xmm5,%xmm15 +- movdqa %xmm14,%xmm6 +- pslld $12,%xmm14 +- psrld $20,%xmm6 +- movdqa %xmm15,%xmm7 +- pslld $12,%xmm15 +- por %xmm6,%xmm14 +- psrld $20,%xmm7 +- movdqa (%r11),%xmm6 +- por %xmm7,%xmm15 +- paddd %xmm14,%xmm10 +- paddd %xmm15,%xmm11 +- pxor %xmm10,%xmm2 +- pxor %xmm11,%xmm3 +-.byte 102,15,56,0,214 +-.byte 102,15,56,0,222 +- paddd %xmm2,%xmm4 +- paddd %xmm3,%xmm5 +- pxor %xmm4,%xmm14 +- pxor %xmm5,%xmm15 +- movdqa %xmm14,%xmm7 +- pslld $7,%xmm14 +- psrld $25,%xmm7 +- movdqa %xmm15,%xmm6 +- pslld $7,%xmm15 +- por %xmm7,%xmm14 +- psrld $25,%xmm6 +- movdqa (%r10),%xmm7 +- por %xmm6,%xmm15 +- paddd %xmm13,%xmm8 +- paddd %xmm14,%xmm9 +- pxor %xmm8,%xmm3 +- pxor %xmm9,%xmm0 +-.byte 102,15,56,0,223 +-.byte 102,15,56,0,199 +- paddd %xmm3,%xmm4 +- paddd %xmm0,%xmm5 +- pxor %xmm4,%xmm13 +- pxor %xmm5,%xmm14 +- movdqa %xmm13,%xmm6 +- pslld $12,%xmm13 +- psrld $20,%xmm6 +- movdqa %xmm14,%xmm7 +- pslld $12,%xmm14 +- por %xmm6,%xmm13 +- psrld $20,%xmm7 +- movdqa (%r11),%xmm6 +- por %xmm7,%xmm14 +- paddd %xmm13,%xmm8 +- paddd %xmm14,%xmm9 +- pxor %xmm8,%xmm3 +- pxor %xmm9,%xmm0 +-.byte 102,15,56,0,222 +-.byte 102,15,56,0,198 +- paddd %xmm3,%xmm4 +- paddd %xmm0,%xmm5 +- pxor %xmm4,%xmm13 +- pxor %xmm5,%xmm14 +- movdqa %xmm13,%xmm7 +- pslld $7,%xmm13 +- psrld $25,%xmm7 +- movdqa %xmm14,%xmm6 +- pslld $7,%xmm14 +- por %xmm7,%xmm13 +- psrld $25,%xmm6 +- movdqa (%r10),%xmm7 +- por %xmm6,%xmm14 +- movdqa %xmm4,32(%rsp) +- movdqa %xmm5,48(%rsp) +- movdqa 0(%rsp),%xmm4 +- movdqa 16(%rsp),%xmm5 +- paddd %xmm15,%xmm10 +- paddd %xmm12,%xmm11 +- pxor %xmm10,%xmm1 +- pxor %xmm11,%xmm2 +-.byte 102,15,56,0,207 +-.byte 102,15,56,0,215 +- paddd %xmm1,%xmm4 +- paddd %xmm2,%xmm5 +- pxor %xmm4,%xmm15 +- pxor %xmm5,%xmm12 +- movdqa %xmm15,%xmm6 +- pslld $12,%xmm15 +- psrld $20,%xmm6 +- movdqa %xmm12,%xmm7 +- pslld $12,%xmm12 +- por %xmm6,%xmm15 +- psrld $20,%xmm7 +- movdqa (%r11),%xmm6 +- por %xmm7,%xmm12 +- paddd %xmm15,%xmm10 +- paddd %xmm12,%xmm11 +- pxor %xmm10,%xmm1 +- pxor %xmm11,%xmm2 +-.byte 102,15,56,0,206 +-.byte 102,15,56,0,214 +- paddd %xmm1,%xmm4 +- paddd %xmm2,%xmm5 +- pxor %xmm4,%xmm15 +- pxor %xmm5,%xmm12 +- movdqa %xmm15,%xmm7 +- pslld $7,%xmm15 +- psrld $25,%xmm7 +- movdqa %xmm12,%xmm6 +- pslld $7,%xmm12 +- por %xmm7,%xmm15 +- psrld $25,%xmm6 +- movdqa (%r10),%xmm7 +- por %xmm6,%xmm12 +- decl %eax +- jnz L$oop4x +- +- paddd 64(%rsp),%xmm8 +- paddd 80(%rsp),%xmm9 +- paddd 96(%rsp),%xmm10 +- paddd 112(%rsp),%xmm11 +- +- movdqa %xmm8,%xmm6 +- punpckldq %xmm9,%xmm8 +- movdqa %xmm10,%xmm7 +- punpckldq %xmm11,%xmm10 +- punpckhdq %xmm9,%xmm6 +- punpckhdq %xmm11,%xmm7 +- movdqa %xmm8,%xmm9 +- punpcklqdq %xmm10,%xmm8 +- movdqa %xmm6,%xmm11 +- punpcklqdq %xmm7,%xmm6 +- punpckhqdq %xmm10,%xmm9 +- punpckhqdq %xmm7,%xmm11 +- paddd 128-256(%rcx),%xmm12 +- paddd 144-256(%rcx),%xmm13 +- paddd 160-256(%rcx),%xmm14 +- paddd 176-256(%rcx),%xmm15 +- +- movdqa %xmm8,0(%rsp) +- movdqa %xmm9,16(%rsp) +- movdqa 32(%rsp),%xmm8 +- movdqa 48(%rsp),%xmm9 +- +- movdqa %xmm12,%xmm10 +- punpckldq %xmm13,%xmm12 +- movdqa %xmm14,%xmm7 +- punpckldq %xmm15,%xmm14 +- punpckhdq %xmm13,%xmm10 +- punpckhdq %xmm15,%xmm7 +- movdqa %xmm12,%xmm13 +- punpcklqdq %xmm14,%xmm12 +- movdqa %xmm10,%xmm15 +- punpcklqdq %xmm7,%xmm10 +- punpckhqdq %xmm14,%xmm13 +- punpckhqdq %xmm7,%xmm15 +- paddd 192-256(%rcx),%xmm4 +- paddd 208-256(%rcx),%xmm5 +- paddd 224-256(%rcx),%xmm8 +- paddd 240-256(%rcx),%xmm9 +- +- movdqa %xmm6,32(%rsp) +- movdqa %xmm11,48(%rsp) +- +- movdqa %xmm4,%xmm14 +- punpckldq %xmm5,%xmm4 +- movdqa %xmm8,%xmm7 +- punpckldq %xmm9,%xmm8 +- punpckhdq %xmm5,%xmm14 +- punpckhdq %xmm9,%xmm7 +- movdqa %xmm4,%xmm5 +- punpcklqdq %xmm8,%xmm4 +- movdqa %xmm14,%xmm9 +- punpcklqdq %xmm7,%xmm14 +- punpckhqdq %xmm8,%xmm5 +- punpckhqdq %xmm7,%xmm9 +- paddd 256-256(%rcx),%xmm0 +- paddd 272-256(%rcx),%xmm1 +- paddd 288-256(%rcx),%xmm2 +- paddd 304-256(%rcx),%xmm3 +- +- movdqa %xmm0,%xmm8 +- punpckldq %xmm1,%xmm0 +- movdqa %xmm2,%xmm7 +- punpckldq %xmm3,%xmm2 +- punpckhdq %xmm1,%xmm8 +- punpckhdq %xmm3,%xmm7 +- movdqa %xmm0,%xmm1 +- punpcklqdq %xmm2,%xmm0 +- movdqa %xmm8,%xmm3 +- punpcklqdq %xmm7,%xmm8 +- punpckhqdq %xmm2,%xmm1 +- punpckhqdq %xmm7,%xmm3 +- cmpq $256,%rdx +- jb L$tail4x +- +- movdqu 0(%rsi),%xmm6 +- movdqu 16(%rsi),%xmm11 +- movdqu 32(%rsi),%xmm2 +- movdqu 48(%rsi),%xmm7 +- pxor 0(%rsp),%xmm6 +- pxor %xmm12,%xmm11 +- pxor %xmm4,%xmm2 +- pxor %xmm0,%xmm7 +- +- movdqu %xmm6,0(%rdi) +- movdqu 64(%rsi),%xmm6 +- movdqu %xmm11,16(%rdi) +- movdqu 80(%rsi),%xmm11 +- movdqu %xmm2,32(%rdi) +- movdqu 96(%rsi),%xmm2 +- movdqu %xmm7,48(%rdi) +- movdqu 112(%rsi),%xmm7 +- leaq 128(%rsi),%rsi +- pxor 16(%rsp),%xmm6 +- pxor %xmm13,%xmm11 +- pxor %xmm5,%xmm2 +- pxor %xmm1,%xmm7 +- +- movdqu %xmm6,64(%rdi) +- movdqu 0(%rsi),%xmm6 +- movdqu %xmm11,80(%rdi) +- movdqu 16(%rsi),%xmm11 +- movdqu %xmm2,96(%rdi) +- movdqu 32(%rsi),%xmm2 +- movdqu %xmm7,112(%rdi) +- leaq 128(%rdi),%rdi +- movdqu 48(%rsi),%xmm7 +- pxor 32(%rsp),%xmm6 +- pxor %xmm10,%xmm11 +- pxor %xmm14,%xmm2 +- pxor %xmm8,%xmm7 +- +- movdqu %xmm6,0(%rdi) +- movdqu 64(%rsi),%xmm6 +- movdqu %xmm11,16(%rdi) +- movdqu 80(%rsi),%xmm11 +- movdqu %xmm2,32(%rdi) +- movdqu 96(%rsi),%xmm2 +- movdqu %xmm7,48(%rdi) +- movdqu 112(%rsi),%xmm7 +- leaq 128(%rsi),%rsi +- pxor 48(%rsp),%xmm6 +- pxor %xmm15,%xmm11 +- pxor %xmm9,%xmm2 +- pxor %xmm3,%xmm7 +- movdqu %xmm6,64(%rdi) +- movdqu %xmm11,80(%rdi) +- movdqu %xmm2,96(%rdi) +- movdqu %xmm7,112(%rdi) +- leaq 128(%rdi),%rdi +- +- subq $256,%rdx +- jnz L$oop_outer4x +- +- jmp L$done4x +- +-L$tail4x: +- cmpq $192,%rdx +- jae L$192_or_more4x +- cmpq $128,%rdx +- jae L$128_or_more4x +- cmpq $64,%rdx +- jae L$64_or_more4x +- +- +- xorq %r10,%r10 +- +- movdqa %xmm12,16(%rsp) +- movdqa %xmm4,32(%rsp) +- movdqa %xmm0,48(%rsp) +- jmp L$oop_tail4x +- +-.p2align 5 +-L$64_or_more4x: +- movdqu 0(%rsi),%xmm6 +- movdqu 16(%rsi),%xmm11 +- movdqu 32(%rsi),%xmm2 +- movdqu 48(%rsi),%xmm7 +- pxor 0(%rsp),%xmm6 +- pxor %xmm12,%xmm11 +- pxor %xmm4,%xmm2 +- pxor %xmm0,%xmm7 +- movdqu %xmm6,0(%rdi) +- movdqu %xmm11,16(%rdi) +- movdqu %xmm2,32(%rdi) +- movdqu %xmm7,48(%rdi) +- je L$done4x +- +- movdqa 16(%rsp),%xmm6 +- leaq 64(%rsi),%rsi +- xorq %r10,%r10 +- movdqa %xmm6,0(%rsp) +- movdqa %xmm13,16(%rsp) +- leaq 64(%rdi),%rdi +- movdqa %xmm5,32(%rsp) +- subq $64,%rdx +- movdqa %xmm1,48(%rsp) +- jmp L$oop_tail4x +- +-.p2align 5 +-L$128_or_more4x: +- movdqu 0(%rsi),%xmm6 +- movdqu 16(%rsi),%xmm11 +- movdqu 32(%rsi),%xmm2 +- movdqu 48(%rsi),%xmm7 +- pxor 0(%rsp),%xmm6 +- pxor %xmm12,%xmm11 +- pxor %xmm4,%xmm2 +- pxor %xmm0,%xmm7 +- +- movdqu %xmm6,0(%rdi) +- movdqu 64(%rsi),%xmm6 +- movdqu %xmm11,16(%rdi) +- movdqu 80(%rsi),%xmm11 +- movdqu %xmm2,32(%rdi) +- movdqu 96(%rsi),%xmm2 +- movdqu %xmm7,48(%rdi) +- movdqu 112(%rsi),%xmm7 +- pxor 16(%rsp),%xmm6 +- pxor %xmm13,%xmm11 +- pxor %xmm5,%xmm2 +- pxor %xmm1,%xmm7 +- movdqu %xmm6,64(%rdi) +- movdqu %xmm11,80(%rdi) +- movdqu %xmm2,96(%rdi) +- movdqu %xmm7,112(%rdi) +- je L$done4x +- +- movdqa 32(%rsp),%xmm6 +- leaq 128(%rsi),%rsi +- xorq %r10,%r10 +- movdqa %xmm6,0(%rsp) +- movdqa %xmm10,16(%rsp) +- leaq 128(%rdi),%rdi +- movdqa %xmm14,32(%rsp) +- subq $128,%rdx +- movdqa %xmm8,48(%rsp) +- jmp L$oop_tail4x +- +-.p2align 5 +-L$192_or_more4x: +- movdqu 0(%rsi),%xmm6 +- movdqu 16(%rsi),%xmm11 +- movdqu 32(%rsi),%xmm2 +- movdqu 48(%rsi),%xmm7 +- pxor 0(%rsp),%xmm6 +- pxor %xmm12,%xmm11 +- pxor %xmm4,%xmm2 +- pxor %xmm0,%xmm7 +- +- movdqu %xmm6,0(%rdi) +- movdqu 64(%rsi),%xmm6 +- movdqu %xmm11,16(%rdi) +- movdqu 80(%rsi),%xmm11 +- movdqu %xmm2,32(%rdi) +- movdqu 96(%rsi),%xmm2 +- movdqu %xmm7,48(%rdi) +- movdqu 112(%rsi),%xmm7 +- leaq 128(%rsi),%rsi +- pxor 16(%rsp),%xmm6 +- pxor %xmm13,%xmm11 +- pxor %xmm5,%xmm2 +- pxor %xmm1,%xmm7 +- +- movdqu %xmm6,64(%rdi) +- movdqu 0(%rsi),%xmm6 +- movdqu %xmm11,80(%rdi) +- movdqu 16(%rsi),%xmm11 +- movdqu %xmm2,96(%rdi) +- movdqu 32(%rsi),%xmm2 +- movdqu %xmm7,112(%rdi) +- leaq 128(%rdi),%rdi +- movdqu 48(%rsi),%xmm7 +- pxor 32(%rsp),%xmm6 +- pxor %xmm10,%xmm11 +- pxor %xmm14,%xmm2 +- pxor %xmm8,%xmm7 +- movdqu %xmm6,0(%rdi) +- movdqu %xmm11,16(%rdi) +- movdqu %xmm2,32(%rdi) +- movdqu %xmm7,48(%rdi) +- je L$done4x +- +- movdqa 48(%rsp),%xmm6 +- leaq 64(%rsi),%rsi +- xorq %r10,%r10 +- movdqa %xmm6,0(%rsp) +- movdqa %xmm15,16(%rsp) +- leaq 64(%rdi),%rdi +- movdqa %xmm9,32(%rsp) +- subq $192,%rdx +- movdqa %xmm3,48(%rsp) +- +-L$oop_tail4x: +- movzbl (%rsi,%r10,1),%eax +- movzbl (%rsp,%r10,1),%ecx +- leaq 1(%r10),%r10 +- xorl %ecx,%eax +- movb %al,-1(%rdi,%r10,1) +- decq %rdx +- jnz L$oop_tail4x +- +-L$done4x: +- leaq (%r9),%rsp +- +-L$4x_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-ChaCha20_8x: +-L$ChaCha20_8x: +- +- movq %rsp,%r9 +- +- subq $0x280+8,%rsp +- andq $-32,%rsp +- vzeroupper +- +- +- +- +- +- +- +- +- +- +- vbroadcasti128 L$sigma(%rip),%ymm11 +- vbroadcasti128 (%rcx),%ymm3 +- vbroadcasti128 16(%rcx),%ymm15 +- vbroadcasti128 (%r8),%ymm7 +- leaq 256(%rsp),%rcx +- leaq 512(%rsp),%rax +- leaq L$rot16(%rip),%r10 +- leaq L$rot24(%rip),%r11 +- +- vpshufd $0x00,%ymm11,%ymm8 +- vpshufd $0x55,%ymm11,%ymm9 +- vmovdqa %ymm8,128-256(%rcx) +- vpshufd $0xaa,%ymm11,%ymm10 +- vmovdqa %ymm9,160-256(%rcx) +- vpshufd $0xff,%ymm11,%ymm11 +- vmovdqa %ymm10,192-256(%rcx) +- vmovdqa %ymm11,224-256(%rcx) +- +- vpshufd $0x00,%ymm3,%ymm0 +- vpshufd $0x55,%ymm3,%ymm1 +- vmovdqa %ymm0,256-256(%rcx) +- vpshufd $0xaa,%ymm3,%ymm2 +- vmovdqa %ymm1,288-256(%rcx) +- vpshufd $0xff,%ymm3,%ymm3 +- vmovdqa %ymm2,320-256(%rcx) +- vmovdqa %ymm3,352-256(%rcx) +- +- vpshufd $0x00,%ymm15,%ymm12 +- vpshufd $0x55,%ymm15,%ymm13 +- vmovdqa %ymm12,384-512(%rax) +- vpshufd $0xaa,%ymm15,%ymm14 +- vmovdqa %ymm13,416-512(%rax) +- vpshufd $0xff,%ymm15,%ymm15 +- vmovdqa %ymm14,448-512(%rax) +- vmovdqa %ymm15,480-512(%rax) +- +- vpshufd $0x00,%ymm7,%ymm4 +- vpshufd $0x55,%ymm7,%ymm5 +- vpaddd L$incy(%rip),%ymm4,%ymm4 +- vpshufd $0xaa,%ymm7,%ymm6 +- vmovdqa %ymm5,544-512(%rax) +- vpshufd $0xff,%ymm7,%ymm7 +- vmovdqa %ymm6,576-512(%rax) +- vmovdqa %ymm7,608-512(%rax) +- +- jmp L$oop_enter8x +- +-.p2align 5 +-L$oop_outer8x: +- vmovdqa 128-256(%rcx),%ymm8 +- vmovdqa 160-256(%rcx),%ymm9 +- vmovdqa 192-256(%rcx),%ymm10 +- vmovdqa 224-256(%rcx),%ymm11 +- vmovdqa 256-256(%rcx),%ymm0 +- vmovdqa 288-256(%rcx),%ymm1 +- vmovdqa 320-256(%rcx),%ymm2 +- vmovdqa 352-256(%rcx),%ymm3 +- vmovdqa 384-512(%rax),%ymm12 +- vmovdqa 416-512(%rax),%ymm13 +- vmovdqa 448-512(%rax),%ymm14 +- vmovdqa 480-512(%rax),%ymm15 +- vmovdqa 512-512(%rax),%ymm4 +- vmovdqa 544-512(%rax),%ymm5 +- vmovdqa 576-512(%rax),%ymm6 +- vmovdqa 608-512(%rax),%ymm7 +- vpaddd L$eight(%rip),%ymm4,%ymm4 +- +-L$oop_enter8x: +- vmovdqa %ymm14,64(%rsp) +- vmovdqa %ymm15,96(%rsp) +- vbroadcasti128 (%r10),%ymm15 +- vmovdqa %ymm4,512-512(%rax) +- movl $10,%eax +- jmp L$oop8x +- +-.p2align 5 +-L$oop8x: +- vpaddd %ymm0,%ymm8,%ymm8 +- vpxor %ymm4,%ymm8,%ymm4 +- vpshufb %ymm15,%ymm4,%ymm4 +- vpaddd %ymm1,%ymm9,%ymm9 +- vpxor %ymm5,%ymm9,%ymm5 +- vpshufb %ymm15,%ymm5,%ymm5 +- vpaddd %ymm4,%ymm12,%ymm12 +- vpxor %ymm0,%ymm12,%ymm0 +- vpslld $12,%ymm0,%ymm14 +- vpsrld $20,%ymm0,%ymm0 +- vpor %ymm0,%ymm14,%ymm0 +- vbroadcasti128 (%r11),%ymm14 +- vpaddd %ymm5,%ymm13,%ymm13 +- vpxor %ymm1,%ymm13,%ymm1 +- vpslld $12,%ymm1,%ymm15 +- vpsrld $20,%ymm1,%ymm1 +- vpor %ymm1,%ymm15,%ymm1 +- vpaddd %ymm0,%ymm8,%ymm8 +- vpxor %ymm4,%ymm8,%ymm4 +- vpshufb %ymm14,%ymm4,%ymm4 +- vpaddd %ymm1,%ymm9,%ymm9 +- vpxor %ymm5,%ymm9,%ymm5 +- vpshufb %ymm14,%ymm5,%ymm5 +- vpaddd %ymm4,%ymm12,%ymm12 +- vpxor %ymm0,%ymm12,%ymm0 +- vpslld $7,%ymm0,%ymm15 +- vpsrld $25,%ymm0,%ymm0 +- vpor %ymm0,%ymm15,%ymm0 +- vbroadcasti128 (%r10),%ymm15 +- vpaddd %ymm5,%ymm13,%ymm13 +- vpxor %ymm1,%ymm13,%ymm1 +- vpslld $7,%ymm1,%ymm14 +- vpsrld $25,%ymm1,%ymm1 +- vpor %ymm1,%ymm14,%ymm1 +- vmovdqa %ymm12,0(%rsp) +- vmovdqa %ymm13,32(%rsp) +- vmovdqa 64(%rsp),%ymm12 +- vmovdqa 96(%rsp),%ymm13 +- vpaddd %ymm2,%ymm10,%ymm10 +- vpxor %ymm6,%ymm10,%ymm6 +- vpshufb %ymm15,%ymm6,%ymm6 +- vpaddd %ymm3,%ymm11,%ymm11 +- vpxor %ymm7,%ymm11,%ymm7 +- vpshufb %ymm15,%ymm7,%ymm7 +- vpaddd %ymm6,%ymm12,%ymm12 +- vpxor %ymm2,%ymm12,%ymm2 +- vpslld $12,%ymm2,%ymm14 +- vpsrld $20,%ymm2,%ymm2 +- vpor %ymm2,%ymm14,%ymm2 +- vbroadcasti128 (%r11),%ymm14 +- vpaddd %ymm7,%ymm13,%ymm13 +- vpxor %ymm3,%ymm13,%ymm3 +- vpslld $12,%ymm3,%ymm15 +- vpsrld $20,%ymm3,%ymm3 +- vpor %ymm3,%ymm15,%ymm3 +- vpaddd %ymm2,%ymm10,%ymm10 +- vpxor %ymm6,%ymm10,%ymm6 +- vpshufb %ymm14,%ymm6,%ymm6 +- vpaddd %ymm3,%ymm11,%ymm11 +- vpxor %ymm7,%ymm11,%ymm7 +- vpshufb %ymm14,%ymm7,%ymm7 +- vpaddd %ymm6,%ymm12,%ymm12 +- vpxor %ymm2,%ymm12,%ymm2 +- vpslld $7,%ymm2,%ymm15 +- vpsrld $25,%ymm2,%ymm2 +- vpor %ymm2,%ymm15,%ymm2 +- vbroadcasti128 (%r10),%ymm15 +- vpaddd %ymm7,%ymm13,%ymm13 +- vpxor %ymm3,%ymm13,%ymm3 +- vpslld $7,%ymm3,%ymm14 +- vpsrld $25,%ymm3,%ymm3 +- vpor %ymm3,%ymm14,%ymm3 +- vpaddd %ymm1,%ymm8,%ymm8 +- vpxor %ymm7,%ymm8,%ymm7 +- vpshufb %ymm15,%ymm7,%ymm7 +- vpaddd %ymm2,%ymm9,%ymm9 +- vpxor %ymm4,%ymm9,%ymm4 +- vpshufb %ymm15,%ymm4,%ymm4 +- vpaddd %ymm7,%ymm12,%ymm12 +- vpxor %ymm1,%ymm12,%ymm1 +- vpslld $12,%ymm1,%ymm14 +- vpsrld $20,%ymm1,%ymm1 +- vpor %ymm1,%ymm14,%ymm1 +- vbroadcasti128 (%r11),%ymm14 +- vpaddd %ymm4,%ymm13,%ymm13 +- vpxor %ymm2,%ymm13,%ymm2 +- vpslld $12,%ymm2,%ymm15 +- vpsrld $20,%ymm2,%ymm2 +- vpor %ymm2,%ymm15,%ymm2 +- vpaddd %ymm1,%ymm8,%ymm8 +- vpxor %ymm7,%ymm8,%ymm7 +- vpshufb %ymm14,%ymm7,%ymm7 +- vpaddd %ymm2,%ymm9,%ymm9 +- vpxor %ymm4,%ymm9,%ymm4 +- vpshufb %ymm14,%ymm4,%ymm4 +- vpaddd %ymm7,%ymm12,%ymm12 +- vpxor %ymm1,%ymm12,%ymm1 +- vpslld $7,%ymm1,%ymm15 +- vpsrld $25,%ymm1,%ymm1 +- vpor %ymm1,%ymm15,%ymm1 +- vbroadcasti128 (%r10),%ymm15 +- vpaddd %ymm4,%ymm13,%ymm13 +- vpxor %ymm2,%ymm13,%ymm2 +- vpslld $7,%ymm2,%ymm14 +- vpsrld $25,%ymm2,%ymm2 +- vpor %ymm2,%ymm14,%ymm2 +- vmovdqa %ymm12,64(%rsp) +- vmovdqa %ymm13,96(%rsp) +- vmovdqa 0(%rsp),%ymm12 +- vmovdqa 32(%rsp),%ymm13 +- vpaddd %ymm3,%ymm10,%ymm10 +- vpxor %ymm5,%ymm10,%ymm5 +- vpshufb %ymm15,%ymm5,%ymm5 +- vpaddd %ymm0,%ymm11,%ymm11 +- vpxor %ymm6,%ymm11,%ymm6 +- vpshufb %ymm15,%ymm6,%ymm6 +- vpaddd %ymm5,%ymm12,%ymm12 +- vpxor %ymm3,%ymm12,%ymm3 +- vpslld $12,%ymm3,%ymm14 +- vpsrld $20,%ymm3,%ymm3 +- vpor %ymm3,%ymm14,%ymm3 +- vbroadcasti128 (%r11),%ymm14 +- vpaddd %ymm6,%ymm13,%ymm13 +- vpxor %ymm0,%ymm13,%ymm0 +- vpslld $12,%ymm0,%ymm15 +- vpsrld $20,%ymm0,%ymm0 +- vpor %ymm0,%ymm15,%ymm0 +- vpaddd %ymm3,%ymm10,%ymm10 +- vpxor %ymm5,%ymm10,%ymm5 +- vpshufb %ymm14,%ymm5,%ymm5 +- vpaddd %ymm0,%ymm11,%ymm11 +- vpxor %ymm6,%ymm11,%ymm6 +- vpshufb %ymm14,%ymm6,%ymm6 +- vpaddd %ymm5,%ymm12,%ymm12 +- vpxor %ymm3,%ymm12,%ymm3 +- vpslld $7,%ymm3,%ymm15 +- vpsrld $25,%ymm3,%ymm3 +- vpor %ymm3,%ymm15,%ymm3 +- vbroadcasti128 (%r10),%ymm15 +- vpaddd %ymm6,%ymm13,%ymm13 +- vpxor %ymm0,%ymm13,%ymm0 +- vpslld $7,%ymm0,%ymm14 +- vpsrld $25,%ymm0,%ymm0 +- vpor %ymm0,%ymm14,%ymm0 +- decl %eax +- jnz L$oop8x +- +- leaq 512(%rsp),%rax +- vpaddd 128-256(%rcx),%ymm8,%ymm8 +- vpaddd 160-256(%rcx),%ymm9,%ymm9 +- vpaddd 192-256(%rcx),%ymm10,%ymm10 +- vpaddd 224-256(%rcx),%ymm11,%ymm11 +- +- vpunpckldq %ymm9,%ymm8,%ymm14 +- vpunpckldq %ymm11,%ymm10,%ymm15 +- vpunpckhdq %ymm9,%ymm8,%ymm8 +- vpunpckhdq %ymm11,%ymm10,%ymm10 +- vpunpcklqdq %ymm15,%ymm14,%ymm9 +- vpunpckhqdq %ymm15,%ymm14,%ymm14 +- vpunpcklqdq %ymm10,%ymm8,%ymm11 +- vpunpckhqdq %ymm10,%ymm8,%ymm8 +- vpaddd 256-256(%rcx),%ymm0,%ymm0 +- vpaddd 288-256(%rcx),%ymm1,%ymm1 +- vpaddd 320-256(%rcx),%ymm2,%ymm2 +- vpaddd 352-256(%rcx),%ymm3,%ymm3 +- +- vpunpckldq %ymm1,%ymm0,%ymm10 +- vpunpckldq %ymm3,%ymm2,%ymm15 +- vpunpckhdq %ymm1,%ymm0,%ymm0 +- vpunpckhdq %ymm3,%ymm2,%ymm2 +- vpunpcklqdq %ymm15,%ymm10,%ymm1 +- vpunpckhqdq %ymm15,%ymm10,%ymm10 +- vpunpcklqdq %ymm2,%ymm0,%ymm3 +- vpunpckhqdq %ymm2,%ymm0,%ymm0 +- vperm2i128 $0x20,%ymm1,%ymm9,%ymm15 +- vperm2i128 $0x31,%ymm1,%ymm9,%ymm1 +- vperm2i128 $0x20,%ymm10,%ymm14,%ymm9 +- vperm2i128 $0x31,%ymm10,%ymm14,%ymm10 +- vperm2i128 $0x20,%ymm3,%ymm11,%ymm14 +- vperm2i128 $0x31,%ymm3,%ymm11,%ymm3 +- vperm2i128 $0x20,%ymm0,%ymm8,%ymm11 +- vperm2i128 $0x31,%ymm0,%ymm8,%ymm0 +- vmovdqa %ymm15,0(%rsp) +- vmovdqa %ymm9,32(%rsp) +- vmovdqa 64(%rsp),%ymm15 +- vmovdqa 96(%rsp),%ymm9 +- +- vpaddd 384-512(%rax),%ymm12,%ymm12 +- vpaddd 416-512(%rax),%ymm13,%ymm13 +- vpaddd 448-512(%rax),%ymm15,%ymm15 +- vpaddd 480-512(%rax),%ymm9,%ymm9 +- +- vpunpckldq %ymm13,%ymm12,%ymm2 +- vpunpckldq %ymm9,%ymm15,%ymm8 +- vpunpckhdq %ymm13,%ymm12,%ymm12 +- vpunpckhdq %ymm9,%ymm15,%ymm15 +- vpunpcklqdq %ymm8,%ymm2,%ymm13 +- vpunpckhqdq %ymm8,%ymm2,%ymm2 +- vpunpcklqdq %ymm15,%ymm12,%ymm9 +- vpunpckhqdq %ymm15,%ymm12,%ymm12 +- vpaddd 512-512(%rax),%ymm4,%ymm4 +- vpaddd 544-512(%rax),%ymm5,%ymm5 +- vpaddd 576-512(%rax),%ymm6,%ymm6 +- vpaddd 608-512(%rax),%ymm7,%ymm7 +- +- vpunpckldq %ymm5,%ymm4,%ymm15 +- vpunpckldq %ymm7,%ymm6,%ymm8 +- vpunpckhdq %ymm5,%ymm4,%ymm4 +- vpunpckhdq %ymm7,%ymm6,%ymm6 +- vpunpcklqdq %ymm8,%ymm15,%ymm5 +- vpunpckhqdq %ymm8,%ymm15,%ymm15 +- vpunpcklqdq %ymm6,%ymm4,%ymm7 +- vpunpckhqdq %ymm6,%ymm4,%ymm4 +- vperm2i128 $0x20,%ymm5,%ymm13,%ymm8 +- vperm2i128 $0x31,%ymm5,%ymm13,%ymm5 +- vperm2i128 $0x20,%ymm15,%ymm2,%ymm13 +- vperm2i128 $0x31,%ymm15,%ymm2,%ymm15 +- vperm2i128 $0x20,%ymm7,%ymm9,%ymm2 +- vperm2i128 $0x31,%ymm7,%ymm9,%ymm7 +- vperm2i128 $0x20,%ymm4,%ymm12,%ymm9 +- vperm2i128 $0x31,%ymm4,%ymm12,%ymm4 +- vmovdqa 0(%rsp),%ymm6 +- vmovdqa 32(%rsp),%ymm12 +- +- cmpq $512,%rdx +- jb L$tail8x +- +- vpxor 0(%rsi),%ymm6,%ymm6 +- vpxor 32(%rsi),%ymm8,%ymm8 +- vpxor 64(%rsi),%ymm1,%ymm1 +- vpxor 96(%rsi),%ymm5,%ymm5 +- leaq 128(%rsi),%rsi +- vmovdqu %ymm6,0(%rdi) +- vmovdqu %ymm8,32(%rdi) +- vmovdqu %ymm1,64(%rdi) +- vmovdqu %ymm5,96(%rdi) +- leaq 128(%rdi),%rdi +- +- vpxor 0(%rsi),%ymm12,%ymm12 +- vpxor 32(%rsi),%ymm13,%ymm13 +- vpxor 64(%rsi),%ymm10,%ymm10 +- vpxor 96(%rsi),%ymm15,%ymm15 +- leaq 128(%rsi),%rsi +- vmovdqu %ymm12,0(%rdi) +- vmovdqu %ymm13,32(%rdi) +- vmovdqu %ymm10,64(%rdi) +- vmovdqu %ymm15,96(%rdi) +- leaq 128(%rdi),%rdi +- +- vpxor 0(%rsi),%ymm14,%ymm14 +- vpxor 32(%rsi),%ymm2,%ymm2 +- vpxor 64(%rsi),%ymm3,%ymm3 +- vpxor 96(%rsi),%ymm7,%ymm7 +- leaq 128(%rsi),%rsi +- vmovdqu %ymm14,0(%rdi) +- vmovdqu %ymm2,32(%rdi) +- vmovdqu %ymm3,64(%rdi) +- vmovdqu %ymm7,96(%rdi) +- leaq 128(%rdi),%rdi +- +- vpxor 0(%rsi),%ymm11,%ymm11 +- vpxor 32(%rsi),%ymm9,%ymm9 +- vpxor 64(%rsi),%ymm0,%ymm0 +- vpxor 96(%rsi),%ymm4,%ymm4 +- leaq 128(%rsi),%rsi +- vmovdqu %ymm11,0(%rdi) +- vmovdqu %ymm9,32(%rdi) +- vmovdqu %ymm0,64(%rdi) +- vmovdqu %ymm4,96(%rdi) +- leaq 128(%rdi),%rdi +- +- subq $512,%rdx +- jnz L$oop_outer8x +- +- jmp L$done8x +- +-L$tail8x: +- cmpq $448,%rdx +- jae L$448_or_more8x +- cmpq $384,%rdx +- jae L$384_or_more8x +- cmpq $320,%rdx +- jae L$320_or_more8x +- cmpq $256,%rdx +- jae L$256_or_more8x +- cmpq $192,%rdx +- jae L$192_or_more8x +- cmpq $128,%rdx +- jae L$128_or_more8x +- cmpq $64,%rdx +- jae L$64_or_more8x +- +- xorq %r10,%r10 +- vmovdqa %ymm6,0(%rsp) +- vmovdqa %ymm8,32(%rsp) +- jmp L$oop_tail8x +- +-.p2align 5 +-L$64_or_more8x: +- vpxor 0(%rsi),%ymm6,%ymm6 +- vpxor 32(%rsi),%ymm8,%ymm8 +- vmovdqu %ymm6,0(%rdi) +- vmovdqu %ymm8,32(%rdi) +- je L$done8x +- +- leaq 64(%rsi),%rsi +- xorq %r10,%r10 +- vmovdqa %ymm1,0(%rsp) +- leaq 64(%rdi),%rdi +- subq $64,%rdx +- vmovdqa %ymm5,32(%rsp) +- jmp L$oop_tail8x +- +-.p2align 5 +-L$128_or_more8x: +- vpxor 0(%rsi),%ymm6,%ymm6 +- vpxor 32(%rsi),%ymm8,%ymm8 +- vpxor 64(%rsi),%ymm1,%ymm1 +- vpxor 96(%rsi),%ymm5,%ymm5 +- vmovdqu %ymm6,0(%rdi) +- vmovdqu %ymm8,32(%rdi) +- vmovdqu %ymm1,64(%rdi) +- vmovdqu %ymm5,96(%rdi) +- je L$done8x +- +- leaq 128(%rsi),%rsi +- xorq %r10,%r10 +- vmovdqa %ymm12,0(%rsp) +- leaq 128(%rdi),%rdi +- subq $128,%rdx +- vmovdqa %ymm13,32(%rsp) +- jmp L$oop_tail8x +- +-.p2align 5 +-L$192_or_more8x: +- vpxor 0(%rsi),%ymm6,%ymm6 +- vpxor 32(%rsi),%ymm8,%ymm8 +- vpxor 64(%rsi),%ymm1,%ymm1 +- vpxor 96(%rsi),%ymm5,%ymm5 +- vpxor 128(%rsi),%ymm12,%ymm12 +- vpxor 160(%rsi),%ymm13,%ymm13 +- vmovdqu %ymm6,0(%rdi) +- vmovdqu %ymm8,32(%rdi) +- vmovdqu %ymm1,64(%rdi) +- vmovdqu %ymm5,96(%rdi) +- vmovdqu %ymm12,128(%rdi) +- vmovdqu %ymm13,160(%rdi) +- je L$done8x +- +- leaq 192(%rsi),%rsi +- xorq %r10,%r10 +- vmovdqa %ymm10,0(%rsp) +- leaq 192(%rdi),%rdi +- subq $192,%rdx +- vmovdqa %ymm15,32(%rsp) +- jmp L$oop_tail8x +- +-.p2align 5 +-L$256_or_more8x: +- vpxor 0(%rsi),%ymm6,%ymm6 +- vpxor 32(%rsi),%ymm8,%ymm8 +- vpxor 64(%rsi),%ymm1,%ymm1 +- vpxor 96(%rsi),%ymm5,%ymm5 +- vpxor 128(%rsi),%ymm12,%ymm12 +- vpxor 160(%rsi),%ymm13,%ymm13 +- vpxor 192(%rsi),%ymm10,%ymm10 +- vpxor 224(%rsi),%ymm15,%ymm15 +- vmovdqu %ymm6,0(%rdi) +- vmovdqu %ymm8,32(%rdi) +- vmovdqu %ymm1,64(%rdi) +- vmovdqu %ymm5,96(%rdi) +- vmovdqu %ymm12,128(%rdi) +- vmovdqu %ymm13,160(%rdi) +- vmovdqu %ymm10,192(%rdi) +- vmovdqu %ymm15,224(%rdi) +- je L$done8x +- +- leaq 256(%rsi),%rsi +- xorq %r10,%r10 +- vmovdqa %ymm14,0(%rsp) +- leaq 256(%rdi),%rdi +- subq $256,%rdx +- vmovdqa %ymm2,32(%rsp) +- jmp L$oop_tail8x +- +-.p2align 5 +-L$320_or_more8x: +- vpxor 0(%rsi),%ymm6,%ymm6 +- vpxor 32(%rsi),%ymm8,%ymm8 +- vpxor 64(%rsi),%ymm1,%ymm1 +- vpxor 96(%rsi),%ymm5,%ymm5 +- vpxor 128(%rsi),%ymm12,%ymm12 +- vpxor 160(%rsi),%ymm13,%ymm13 +- vpxor 192(%rsi),%ymm10,%ymm10 +- vpxor 224(%rsi),%ymm15,%ymm15 +- vpxor 256(%rsi),%ymm14,%ymm14 +- vpxor 288(%rsi),%ymm2,%ymm2 +- vmovdqu %ymm6,0(%rdi) +- vmovdqu %ymm8,32(%rdi) +- vmovdqu %ymm1,64(%rdi) +- vmovdqu %ymm5,96(%rdi) +- vmovdqu %ymm12,128(%rdi) +- vmovdqu %ymm13,160(%rdi) +- vmovdqu %ymm10,192(%rdi) +- vmovdqu %ymm15,224(%rdi) +- vmovdqu %ymm14,256(%rdi) +- vmovdqu %ymm2,288(%rdi) +- je L$done8x +- +- leaq 320(%rsi),%rsi +- xorq %r10,%r10 +- vmovdqa %ymm3,0(%rsp) +- leaq 320(%rdi),%rdi +- subq $320,%rdx +- vmovdqa %ymm7,32(%rsp) +- jmp L$oop_tail8x +- +-.p2align 5 +-L$384_or_more8x: +- vpxor 0(%rsi),%ymm6,%ymm6 +- vpxor 32(%rsi),%ymm8,%ymm8 +- vpxor 64(%rsi),%ymm1,%ymm1 +- vpxor 96(%rsi),%ymm5,%ymm5 +- vpxor 128(%rsi),%ymm12,%ymm12 +- vpxor 160(%rsi),%ymm13,%ymm13 +- vpxor 192(%rsi),%ymm10,%ymm10 +- vpxor 224(%rsi),%ymm15,%ymm15 +- vpxor 256(%rsi),%ymm14,%ymm14 +- vpxor 288(%rsi),%ymm2,%ymm2 +- vpxor 320(%rsi),%ymm3,%ymm3 +- vpxor 352(%rsi),%ymm7,%ymm7 +- vmovdqu %ymm6,0(%rdi) +- vmovdqu %ymm8,32(%rdi) +- vmovdqu %ymm1,64(%rdi) +- vmovdqu %ymm5,96(%rdi) +- vmovdqu %ymm12,128(%rdi) +- vmovdqu %ymm13,160(%rdi) +- vmovdqu %ymm10,192(%rdi) +- vmovdqu %ymm15,224(%rdi) +- vmovdqu %ymm14,256(%rdi) +- vmovdqu %ymm2,288(%rdi) +- vmovdqu %ymm3,320(%rdi) +- vmovdqu %ymm7,352(%rdi) +- je L$done8x +- +- leaq 384(%rsi),%rsi +- xorq %r10,%r10 +- vmovdqa %ymm11,0(%rsp) +- leaq 384(%rdi),%rdi +- subq $384,%rdx +- vmovdqa %ymm9,32(%rsp) +- jmp L$oop_tail8x +- +-.p2align 5 +-L$448_or_more8x: +- vpxor 0(%rsi),%ymm6,%ymm6 +- vpxor 32(%rsi),%ymm8,%ymm8 +- vpxor 64(%rsi),%ymm1,%ymm1 +- vpxor 96(%rsi),%ymm5,%ymm5 +- vpxor 128(%rsi),%ymm12,%ymm12 +- vpxor 160(%rsi),%ymm13,%ymm13 +- vpxor 192(%rsi),%ymm10,%ymm10 +- vpxor 224(%rsi),%ymm15,%ymm15 +- vpxor 256(%rsi),%ymm14,%ymm14 +- vpxor 288(%rsi),%ymm2,%ymm2 +- vpxor 320(%rsi),%ymm3,%ymm3 +- vpxor 352(%rsi),%ymm7,%ymm7 +- vpxor 384(%rsi),%ymm11,%ymm11 +- vpxor 416(%rsi),%ymm9,%ymm9 +- vmovdqu %ymm6,0(%rdi) +- vmovdqu %ymm8,32(%rdi) +- vmovdqu %ymm1,64(%rdi) +- vmovdqu %ymm5,96(%rdi) +- vmovdqu %ymm12,128(%rdi) +- vmovdqu %ymm13,160(%rdi) +- vmovdqu %ymm10,192(%rdi) +- vmovdqu %ymm15,224(%rdi) +- vmovdqu %ymm14,256(%rdi) +- vmovdqu %ymm2,288(%rdi) +- vmovdqu %ymm3,320(%rdi) +- vmovdqu %ymm7,352(%rdi) +- vmovdqu %ymm11,384(%rdi) +- vmovdqu %ymm9,416(%rdi) +- je L$done8x +- +- leaq 448(%rsi),%rsi +- xorq %r10,%r10 +- vmovdqa %ymm0,0(%rsp) +- leaq 448(%rdi),%rdi +- subq $448,%rdx +- vmovdqa %ymm4,32(%rsp) +- +-L$oop_tail8x: +- movzbl (%rsi,%r10,1),%eax +- movzbl (%rsp,%r10,1),%ecx +- leaq 1(%r10),%r10 +- xorl %ecx,%eax +- movb %al,-1(%rdi,%r10,1) +- decq %rdx +- jnz L$oop_tail8x +- +-L$done8x: +- vzeroall +- leaq (%r9),%rsp +- +-L$8x_epilogue: +- .byte 0xf3,0xc3 +- +- +-#endif +diff --git a/mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S b/mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S +deleted file mode 100644 +index f988089..0000000 +--- a/mac-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S ++++ /dev/null +@@ -1,3068 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.data +- +-.p2align 4 +-one: +-.quad 1,0 +-two: +-.quad 2,0 +-three: +-.quad 3,0 +-four: +-.quad 4,0 +-five: +-.quad 5,0 +-six: +-.quad 6,0 +-seven: +-.quad 7,0 +-eight: +-.quad 8,0 +- +-OR_MASK: +-.long 0x00000000,0x00000000,0x00000000,0x80000000 +-poly: +-.quad 0x1, 0xc200000000000000 +-mask: +-.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +-con1: +-.long 1,1,1,1 +-con2: +-.long 0x1b,0x1b,0x1b,0x1b +-con3: +-.byte -1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7 +-and_mask: +-.long 0,0xffffffff, 0xffffffff, 0xffffffff +-.text +- +-.p2align 4 +-GFMUL: +- +- vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 +- vpclmulqdq $0x11,%xmm1,%xmm0,%xmm5 +- vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 +- vpclmulqdq $0x01,%xmm1,%xmm0,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $8,%xmm3,%xmm4 +- vpsrldq $8,%xmm3,%xmm3 +- vpxor %xmm4,%xmm2,%xmm2 +- vpxor %xmm3,%xmm5,%xmm5 +- +- vpclmulqdq $0x10,poly(%rip),%xmm2,%xmm3 +- vpshufd $78,%xmm2,%xmm4 +- vpxor %xmm4,%xmm3,%xmm2 +- +- vpclmulqdq $0x10,poly(%rip),%xmm2,%xmm3 +- vpshufd $78,%xmm2,%xmm4 +- vpxor %xmm4,%xmm3,%xmm2 +- +- vpxor %xmm5,%xmm2,%xmm0 +- .byte 0xf3,0xc3 +- +- +-.globl _aesgcmsiv_htable_init +-.private_extern _aesgcmsiv_htable_init +- +-.p2align 4 +-_aesgcmsiv_htable_init: +- +- vmovdqa (%rsi),%xmm0 +- vmovdqa %xmm0,%xmm1 +- vmovdqa %xmm0,(%rdi) +- call GFMUL +- vmovdqa %xmm0,16(%rdi) +- call GFMUL +- vmovdqa %xmm0,32(%rdi) +- call GFMUL +- vmovdqa %xmm0,48(%rdi) +- call GFMUL +- vmovdqa %xmm0,64(%rdi) +- call GFMUL +- vmovdqa %xmm0,80(%rdi) +- call GFMUL +- vmovdqa %xmm0,96(%rdi) +- call GFMUL +- vmovdqa %xmm0,112(%rdi) +- .byte 0xf3,0xc3 +- +- +-.globl _aesgcmsiv_htable6_init +-.private_extern _aesgcmsiv_htable6_init +- +-.p2align 4 +-_aesgcmsiv_htable6_init: +- +- vmovdqa (%rsi),%xmm0 +- vmovdqa %xmm0,%xmm1 +- vmovdqa %xmm0,(%rdi) +- call GFMUL +- vmovdqa %xmm0,16(%rdi) +- call GFMUL +- vmovdqa %xmm0,32(%rdi) +- call GFMUL +- vmovdqa %xmm0,48(%rdi) +- call GFMUL +- vmovdqa %xmm0,64(%rdi) +- call GFMUL +- vmovdqa %xmm0,80(%rdi) +- .byte 0xf3,0xc3 +- +- +-.globl _aesgcmsiv_htable_polyval +-.private_extern _aesgcmsiv_htable_polyval +- +-.p2align 4 +-_aesgcmsiv_htable_polyval: +- +- testq %rdx,%rdx +- jnz L$htable_polyval_start +- .byte 0xf3,0xc3 +- +-L$htable_polyval_start: +- vzeroall +- +- +- +- movq %rdx,%r11 +- andq $127,%r11 +- +- jz L$htable_polyval_no_prefix +- +- vpxor %xmm9,%xmm9,%xmm9 +- vmovdqa (%rcx),%xmm1 +- subq %r11,%rdx +- +- subq $16,%r11 +- +- +- vmovdqu (%rsi),%xmm0 +- vpxor %xmm1,%xmm0,%xmm0 +- +- vpclmulqdq $0x01,(%rdi,%r11,1),%xmm0,%xmm5 +- vpclmulqdq $0x00,(%rdi,%r11,1),%xmm0,%xmm3 +- vpclmulqdq $0x11,(%rdi,%r11,1),%xmm0,%xmm4 +- vpclmulqdq $0x10,(%rdi,%r11,1),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- +- leaq 16(%rsi),%rsi +- testq %r11,%r11 +- jnz L$htable_polyval_prefix_loop +- jmp L$htable_polyval_prefix_complete +- +- +-.p2align 6 +-L$htable_polyval_prefix_loop: +- subq $16,%r11 +- +- vmovdqu (%rsi),%xmm0 +- +- vpclmulqdq $0x00,(%rdi,%r11,1),%xmm0,%xmm6 +- vpxor %xmm6,%xmm3,%xmm3 +- vpclmulqdq $0x11,(%rdi,%r11,1),%xmm0,%xmm6 +- vpxor %xmm6,%xmm4,%xmm4 +- vpclmulqdq $0x01,(%rdi,%r11,1),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- vpclmulqdq $0x10,(%rdi,%r11,1),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- +- testq %r11,%r11 +- +- leaq 16(%rsi),%rsi +- +- jnz L$htable_polyval_prefix_loop +- +-L$htable_polyval_prefix_complete: +- vpsrldq $8,%xmm5,%xmm6 +- vpslldq $8,%xmm5,%xmm5 +- +- vpxor %xmm6,%xmm4,%xmm9 +- vpxor %xmm5,%xmm3,%xmm1 +- +- jmp L$htable_polyval_main_loop +- +-L$htable_polyval_no_prefix: +- +- +- +- +- vpxor %xmm1,%xmm1,%xmm1 +- vmovdqa (%rcx),%xmm9 +- +-.p2align 6 +-L$htable_polyval_main_loop: +- subq $0x80,%rdx +- jb L$htable_polyval_out +- +- vmovdqu 112(%rsi),%xmm0 +- +- vpclmulqdq $0x01,(%rdi),%xmm0,%xmm5 +- vpclmulqdq $0x00,(%rdi),%xmm0,%xmm3 +- vpclmulqdq $0x11,(%rdi),%xmm0,%xmm4 +- vpclmulqdq $0x10,(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- +- +- vmovdqu 96(%rsi),%xmm0 +- vpclmulqdq $0x01,16(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- vpclmulqdq $0x00,16(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm3,%xmm3 +- vpclmulqdq $0x11,16(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm4,%xmm4 +- vpclmulqdq $0x10,16(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- +- +- +- vmovdqu 80(%rsi),%xmm0 +- +- vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm7 +- vpalignr $8,%xmm1,%xmm1,%xmm1 +- +- vpclmulqdq $0x01,32(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- vpclmulqdq $0x00,32(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm3,%xmm3 +- vpclmulqdq $0x11,32(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm4,%xmm4 +- vpclmulqdq $0x10,32(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- +- +- vpxor %xmm7,%xmm1,%xmm1 +- +- vmovdqu 64(%rsi),%xmm0 +- +- vpclmulqdq $0x01,48(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- vpclmulqdq $0x00,48(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm3,%xmm3 +- vpclmulqdq $0x11,48(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm4,%xmm4 +- vpclmulqdq $0x10,48(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- +- +- vmovdqu 48(%rsi),%xmm0 +- +- vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm7 +- vpalignr $8,%xmm1,%xmm1,%xmm1 +- +- vpclmulqdq $0x01,64(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- vpclmulqdq $0x00,64(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm3,%xmm3 +- vpclmulqdq $0x11,64(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm4,%xmm4 +- vpclmulqdq $0x10,64(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- +- +- vpxor %xmm7,%xmm1,%xmm1 +- +- vmovdqu 32(%rsi),%xmm0 +- +- vpclmulqdq $0x01,80(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- vpclmulqdq $0x00,80(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm3,%xmm3 +- vpclmulqdq $0x11,80(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm4,%xmm4 +- vpclmulqdq $0x10,80(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- +- +- vpxor %xmm9,%xmm1,%xmm1 +- +- vmovdqu 16(%rsi),%xmm0 +- +- vpclmulqdq $0x01,96(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- vpclmulqdq $0x00,96(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm3,%xmm3 +- vpclmulqdq $0x11,96(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm4,%xmm4 +- vpclmulqdq $0x10,96(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- +- +- vmovdqu 0(%rsi),%xmm0 +- vpxor %xmm1,%xmm0,%xmm0 +- +- vpclmulqdq $0x01,112(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- vpclmulqdq $0x00,112(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm3,%xmm3 +- vpclmulqdq $0x11,112(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm4,%xmm4 +- vpclmulqdq $0x10,112(%rdi),%xmm0,%xmm6 +- vpxor %xmm6,%xmm5,%xmm5 +- +- +- vpsrldq $8,%xmm5,%xmm6 +- vpslldq $8,%xmm5,%xmm5 +- +- vpxor %xmm6,%xmm4,%xmm9 +- vpxor %xmm5,%xmm3,%xmm1 +- +- leaq 128(%rsi),%rsi +- jmp L$htable_polyval_main_loop +- +- +- +-L$htable_polyval_out: +- vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm6 +- vpalignr $8,%xmm1,%xmm1,%xmm1 +- vpxor %xmm6,%xmm1,%xmm1 +- +- vpclmulqdq $0x10,poly(%rip),%xmm1,%xmm6 +- vpalignr $8,%xmm1,%xmm1,%xmm1 +- vpxor %xmm6,%xmm1,%xmm1 +- vpxor %xmm9,%xmm1,%xmm1 +- +- vmovdqu %xmm1,(%rcx) +- vzeroupper +- .byte 0xf3,0xc3 +- +- +-.globl _aesgcmsiv_polyval_horner +-.private_extern _aesgcmsiv_polyval_horner +- +-.p2align 4 +-_aesgcmsiv_polyval_horner: +- +- testq %rcx,%rcx +- jnz L$polyval_horner_start +- .byte 0xf3,0xc3 +- +-L$polyval_horner_start: +- +- +- +- xorq %r10,%r10 +- shlq $4,%rcx +- +- vmovdqa (%rsi),%xmm1 +- vmovdqa (%rdi),%xmm0 +- +-L$polyval_horner_loop: +- vpxor (%rdx,%r10,1),%xmm0,%xmm0 +- call GFMUL +- +- addq $16,%r10 +- cmpq %r10,%rcx +- jne L$polyval_horner_loop +- +- +- vmovdqa %xmm0,(%rdi) +- .byte 0xf3,0xc3 +- +- +-.globl _aes128gcmsiv_aes_ks +-.private_extern _aes128gcmsiv_aes_ks +- +-.p2align 4 +-_aes128gcmsiv_aes_ks: +- +- vmovdqu (%rdi),%xmm1 +- vmovdqa %xmm1,(%rsi) +- +- vmovdqa con1(%rip),%xmm0 +- vmovdqa mask(%rip),%xmm15 +- +- movq $8,%rax +- +-L$ks128_loop: +- addq $16,%rsi +- subq $1,%rax +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpslldq $4,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpslldq $4,%xmm3,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpslldq $4,%xmm3,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vmovdqa %xmm1,(%rsi) +- jne L$ks128_loop +- +- vmovdqa con2(%rip),%xmm0 +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpslldq $4,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpslldq $4,%xmm3,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpslldq $4,%xmm3,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vmovdqa %xmm1,16(%rsi) +- +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslldq $4,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpslldq $4,%xmm3,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpslldq $4,%xmm3,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vmovdqa %xmm1,32(%rsi) +- .byte 0xf3,0xc3 +- +- +-.globl _aes256gcmsiv_aes_ks +-.private_extern _aes256gcmsiv_aes_ks +- +-.p2align 4 +-_aes256gcmsiv_aes_ks: +- +- vmovdqu (%rdi),%xmm1 +- vmovdqu 16(%rdi),%xmm3 +- vmovdqa %xmm1,(%rsi) +- vmovdqa %xmm3,16(%rsi) +- vmovdqa con1(%rip),%xmm0 +- vmovdqa mask(%rip),%xmm15 +- vpxor %xmm14,%xmm14,%xmm14 +- movq $6,%rax +- +-L$ks256_loop: +- addq $32,%rsi +- subq $1,%rax +- vpshufb %xmm15,%xmm3,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpsllq $32,%xmm1,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vmovdqa %xmm1,(%rsi) +- vpshufd $0xff,%xmm1,%xmm2 +- vaesenclast %xmm14,%xmm2,%xmm2 +- vpsllq $32,%xmm3,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpshufb con3(%rip),%xmm3,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpxor %xmm2,%xmm3,%xmm3 +- vmovdqa %xmm3,16(%rsi) +- jne L$ks256_loop +- +- vpshufb %xmm15,%xmm3,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpsllq $32,%xmm1,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vmovdqa %xmm1,32(%rsi) +- .byte 0xf3,0xc3 +- +-.globl _aes128gcmsiv_aes_ks_enc_x1 +-.private_extern _aes128gcmsiv_aes_ks_enc_x1 +- +-.p2align 4 +-_aes128gcmsiv_aes_ks_enc_x1: +- +- vmovdqa (%rcx),%xmm1 +- vmovdqa 0(%rdi),%xmm4 +- +- vmovdqa %xmm1,(%rdx) +- vpxor %xmm1,%xmm4,%xmm4 +- +- vmovdqa con1(%rip),%xmm0 +- vmovdqa mask(%rip),%xmm15 +- +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpsllq $32,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- +- vaesenc %xmm1,%xmm4,%xmm4 +- vmovdqa %xmm1,16(%rdx) +- +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpsllq $32,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- +- vaesenc %xmm1,%xmm4,%xmm4 +- vmovdqa %xmm1,32(%rdx) +- +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpsllq $32,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- +- vaesenc %xmm1,%xmm4,%xmm4 +- vmovdqa %xmm1,48(%rdx) +- +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpsllq $32,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- +- vaesenc %xmm1,%xmm4,%xmm4 +- vmovdqa %xmm1,64(%rdx) +- +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpsllq $32,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- +- vaesenc %xmm1,%xmm4,%xmm4 +- vmovdqa %xmm1,80(%rdx) +- +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpsllq $32,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- +- vaesenc %xmm1,%xmm4,%xmm4 +- vmovdqa %xmm1,96(%rdx) +- +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpsllq $32,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- +- vaesenc %xmm1,%xmm4,%xmm4 +- vmovdqa %xmm1,112(%rdx) +- +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpsllq $32,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- +- vaesenc %xmm1,%xmm4,%xmm4 +- vmovdqa %xmm1,128(%rdx) +- +- +- vmovdqa con2(%rip),%xmm0 +- +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpsllq $32,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- +- vaesenc %xmm1,%xmm4,%xmm4 +- vmovdqa %xmm1,144(%rdx) +- +- vpshufb %xmm15,%xmm1,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpsllq $32,%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpshufb con3(%rip),%xmm1,%xmm3 +- vpxor %xmm3,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- +- vaesenclast %xmm1,%xmm4,%xmm4 +- vmovdqa %xmm1,160(%rdx) +- +- +- vmovdqa %xmm4,0(%rsi) +- .byte 0xf3,0xc3 +- +- +-.globl _aes128gcmsiv_kdf +-.private_extern _aes128gcmsiv_kdf +- +-.p2align 4 +-_aes128gcmsiv_kdf: +- +- +- +- +- +- vmovdqa (%rdx),%xmm1 +- vmovdqa 0(%rdi),%xmm9 +- vmovdqa and_mask(%rip),%xmm12 +- vmovdqa one(%rip),%xmm13 +- vpshufd $0x90,%xmm9,%xmm9 +- vpand %xmm12,%xmm9,%xmm9 +- vpaddd %xmm13,%xmm9,%xmm10 +- vpaddd %xmm13,%xmm10,%xmm11 +- vpaddd %xmm13,%xmm11,%xmm12 +- +- vpxor %xmm1,%xmm9,%xmm9 +- vpxor %xmm1,%xmm10,%xmm10 +- vpxor %xmm1,%xmm11,%xmm11 +- vpxor %xmm1,%xmm12,%xmm12 +- +- vmovdqa 16(%rdx),%xmm1 +- vaesenc %xmm1,%xmm9,%xmm9 +- vaesenc %xmm1,%xmm10,%xmm10 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- +- vmovdqa 32(%rdx),%xmm2 +- vaesenc %xmm2,%xmm9,%xmm9 +- vaesenc %xmm2,%xmm10,%xmm10 +- vaesenc %xmm2,%xmm11,%xmm11 +- vaesenc %xmm2,%xmm12,%xmm12 +- +- vmovdqa 48(%rdx),%xmm1 +- vaesenc %xmm1,%xmm9,%xmm9 +- vaesenc %xmm1,%xmm10,%xmm10 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- +- vmovdqa 64(%rdx),%xmm2 +- vaesenc %xmm2,%xmm9,%xmm9 +- vaesenc %xmm2,%xmm10,%xmm10 +- vaesenc %xmm2,%xmm11,%xmm11 +- vaesenc %xmm2,%xmm12,%xmm12 +- +- vmovdqa 80(%rdx),%xmm1 +- vaesenc %xmm1,%xmm9,%xmm9 +- vaesenc %xmm1,%xmm10,%xmm10 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- +- vmovdqa 96(%rdx),%xmm2 +- vaesenc %xmm2,%xmm9,%xmm9 +- vaesenc %xmm2,%xmm10,%xmm10 +- vaesenc %xmm2,%xmm11,%xmm11 +- vaesenc %xmm2,%xmm12,%xmm12 +- +- vmovdqa 112(%rdx),%xmm1 +- vaesenc %xmm1,%xmm9,%xmm9 +- vaesenc %xmm1,%xmm10,%xmm10 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- +- vmovdqa 128(%rdx),%xmm2 +- vaesenc %xmm2,%xmm9,%xmm9 +- vaesenc %xmm2,%xmm10,%xmm10 +- vaesenc %xmm2,%xmm11,%xmm11 +- vaesenc %xmm2,%xmm12,%xmm12 +- +- vmovdqa 144(%rdx),%xmm1 +- vaesenc %xmm1,%xmm9,%xmm9 +- vaesenc %xmm1,%xmm10,%xmm10 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- +- vmovdqa 160(%rdx),%xmm2 +- vaesenclast %xmm2,%xmm9,%xmm9 +- vaesenclast %xmm2,%xmm10,%xmm10 +- vaesenclast %xmm2,%xmm11,%xmm11 +- vaesenclast %xmm2,%xmm12,%xmm12 +- +- +- vmovdqa %xmm9,0(%rsi) +- vmovdqa %xmm10,16(%rsi) +- vmovdqa %xmm11,32(%rsi) +- vmovdqa %xmm12,48(%rsi) +- .byte 0xf3,0xc3 +- +- +-.globl _aes128gcmsiv_enc_msg_x4 +-.private_extern _aes128gcmsiv_enc_msg_x4 +- +-.p2align 4 +-_aes128gcmsiv_enc_msg_x4: +- +- testq %r8,%r8 +- jnz L$128_enc_msg_x4_start +- .byte 0xf3,0xc3 +- +-L$128_enc_msg_x4_start: +- pushq %r12 +- +- pushq %r13 +- +- +- shrq $4,%r8 +- movq %r8,%r10 +- shlq $62,%r10 +- shrq $62,%r10 +- +- +- vmovdqa (%rdx),%xmm15 +- vpor OR_MASK(%rip),%xmm15,%xmm15 +- +- vmovdqu four(%rip),%xmm4 +- vmovdqa %xmm15,%xmm0 +- vpaddd one(%rip),%xmm15,%xmm1 +- vpaddd two(%rip),%xmm15,%xmm2 +- vpaddd three(%rip),%xmm15,%xmm3 +- +- shrq $2,%r8 +- je L$128_enc_msg_x4_check_remainder +- +- subq $64,%rsi +- subq $64,%rdi +- +-L$128_enc_msg_x4_loop1: +- addq $64,%rsi +- addq $64,%rdi +- +- vmovdqa %xmm0,%xmm5 +- vmovdqa %xmm1,%xmm6 +- vmovdqa %xmm2,%xmm7 +- vmovdqa %xmm3,%xmm8 +- +- vpxor (%rcx),%xmm5,%xmm5 +- vpxor (%rcx),%xmm6,%xmm6 +- vpxor (%rcx),%xmm7,%xmm7 +- vpxor (%rcx),%xmm8,%xmm8 +- +- vmovdqu 16(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vpaddd %xmm4,%xmm0,%xmm0 +- vmovdqu 32(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vpaddd %xmm4,%xmm1,%xmm1 +- vmovdqu 48(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vpaddd %xmm4,%xmm2,%xmm2 +- vmovdqu 64(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vpaddd %xmm4,%xmm3,%xmm3 +- +- vmovdqu 80(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 96(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 112(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 128(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 144(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 160(%rcx),%xmm12 +- vaesenclast %xmm12,%xmm5,%xmm5 +- vaesenclast %xmm12,%xmm6,%xmm6 +- vaesenclast %xmm12,%xmm7,%xmm7 +- vaesenclast %xmm12,%xmm8,%xmm8 +- +- +- +- vpxor 0(%rdi),%xmm5,%xmm5 +- vpxor 16(%rdi),%xmm6,%xmm6 +- vpxor 32(%rdi),%xmm7,%xmm7 +- vpxor 48(%rdi),%xmm8,%xmm8 +- +- subq $1,%r8 +- +- vmovdqu %xmm5,0(%rsi) +- vmovdqu %xmm6,16(%rsi) +- vmovdqu %xmm7,32(%rsi) +- vmovdqu %xmm8,48(%rsi) +- +- jne L$128_enc_msg_x4_loop1 +- +- addq $64,%rsi +- addq $64,%rdi +- +-L$128_enc_msg_x4_check_remainder: +- cmpq $0,%r10 +- je L$128_enc_msg_x4_out +- +-L$128_enc_msg_x4_loop2: +- +- +- vmovdqa %xmm0,%xmm5 +- vpaddd one(%rip),%xmm0,%xmm0 +- +- vpxor (%rcx),%xmm5,%xmm5 +- vaesenc 16(%rcx),%xmm5,%xmm5 +- vaesenc 32(%rcx),%xmm5,%xmm5 +- vaesenc 48(%rcx),%xmm5,%xmm5 +- vaesenc 64(%rcx),%xmm5,%xmm5 +- vaesenc 80(%rcx),%xmm5,%xmm5 +- vaesenc 96(%rcx),%xmm5,%xmm5 +- vaesenc 112(%rcx),%xmm5,%xmm5 +- vaesenc 128(%rcx),%xmm5,%xmm5 +- vaesenc 144(%rcx),%xmm5,%xmm5 +- vaesenclast 160(%rcx),%xmm5,%xmm5 +- +- +- vpxor (%rdi),%xmm5,%xmm5 +- vmovdqu %xmm5,(%rsi) +- +- addq $16,%rdi +- addq $16,%rsi +- +- subq $1,%r10 +- jne L$128_enc_msg_x4_loop2 +- +-L$128_enc_msg_x4_out: +- popq %r13 +- +- popq %r12 +- +- .byte 0xf3,0xc3 +- +- +-.globl _aes128gcmsiv_enc_msg_x8 +-.private_extern _aes128gcmsiv_enc_msg_x8 +- +-.p2align 4 +-_aes128gcmsiv_enc_msg_x8: +- +- testq %r8,%r8 +- jnz L$128_enc_msg_x8_start +- .byte 0xf3,0xc3 +- +-L$128_enc_msg_x8_start: +- pushq %r12 +- +- pushq %r13 +- +- pushq %rbp +- +- movq %rsp,%rbp +- +- +- +- subq $128,%rsp +- andq $-64,%rsp +- +- shrq $4,%r8 +- movq %r8,%r10 +- shlq $61,%r10 +- shrq $61,%r10 +- +- +- vmovdqu (%rdx),%xmm1 +- vpor OR_MASK(%rip),%xmm1,%xmm1 +- +- +- vpaddd seven(%rip),%xmm1,%xmm0 +- vmovdqu %xmm0,(%rsp) +- vpaddd one(%rip),%xmm1,%xmm9 +- vpaddd two(%rip),%xmm1,%xmm10 +- vpaddd three(%rip),%xmm1,%xmm11 +- vpaddd four(%rip),%xmm1,%xmm12 +- vpaddd five(%rip),%xmm1,%xmm13 +- vpaddd six(%rip),%xmm1,%xmm14 +- vmovdqa %xmm1,%xmm0 +- +- shrq $3,%r8 +- je L$128_enc_msg_x8_check_remainder +- +- subq $128,%rsi +- subq $128,%rdi +- +-L$128_enc_msg_x8_loop1: +- addq $128,%rsi +- addq $128,%rdi +- +- vmovdqa %xmm0,%xmm1 +- vmovdqa %xmm9,%xmm2 +- vmovdqa %xmm10,%xmm3 +- vmovdqa %xmm11,%xmm4 +- vmovdqa %xmm12,%xmm5 +- vmovdqa %xmm13,%xmm6 +- vmovdqa %xmm14,%xmm7 +- +- vmovdqu (%rsp),%xmm8 +- +- vpxor (%rcx),%xmm1,%xmm1 +- vpxor (%rcx),%xmm2,%xmm2 +- vpxor (%rcx),%xmm3,%xmm3 +- vpxor (%rcx),%xmm4,%xmm4 +- vpxor (%rcx),%xmm5,%xmm5 +- vpxor (%rcx),%xmm6,%xmm6 +- vpxor (%rcx),%xmm7,%xmm7 +- vpxor (%rcx),%xmm8,%xmm8 +- +- vmovdqu 16(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vmovdqu (%rsp),%xmm14 +- vpaddd eight(%rip),%xmm14,%xmm14 +- vmovdqu %xmm14,(%rsp) +- vmovdqu 32(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpsubd one(%rip),%xmm14,%xmm14 +- vmovdqu 48(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm0,%xmm0 +- vmovdqu 64(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm9,%xmm9 +- vmovdqu 80(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm10,%xmm10 +- vmovdqu 96(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm11,%xmm11 +- vmovdqu 112(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm12,%xmm12 +- vmovdqu 128(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm13,%xmm13 +- vmovdqu 144(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vmovdqu 160(%rcx),%xmm15 +- vaesenclast %xmm15,%xmm1,%xmm1 +- vaesenclast %xmm15,%xmm2,%xmm2 +- vaesenclast %xmm15,%xmm3,%xmm3 +- vaesenclast %xmm15,%xmm4,%xmm4 +- vaesenclast %xmm15,%xmm5,%xmm5 +- vaesenclast %xmm15,%xmm6,%xmm6 +- vaesenclast %xmm15,%xmm7,%xmm7 +- vaesenclast %xmm15,%xmm8,%xmm8 +- +- +- +- vpxor 0(%rdi),%xmm1,%xmm1 +- vpxor 16(%rdi),%xmm2,%xmm2 +- vpxor 32(%rdi),%xmm3,%xmm3 +- vpxor 48(%rdi),%xmm4,%xmm4 +- vpxor 64(%rdi),%xmm5,%xmm5 +- vpxor 80(%rdi),%xmm6,%xmm6 +- vpxor 96(%rdi),%xmm7,%xmm7 +- vpxor 112(%rdi),%xmm8,%xmm8 +- +- decq %r8 +- +- vmovdqu %xmm1,0(%rsi) +- vmovdqu %xmm2,16(%rsi) +- vmovdqu %xmm3,32(%rsi) +- vmovdqu %xmm4,48(%rsi) +- vmovdqu %xmm5,64(%rsi) +- vmovdqu %xmm6,80(%rsi) +- vmovdqu %xmm7,96(%rsi) +- vmovdqu %xmm8,112(%rsi) +- +- jne L$128_enc_msg_x8_loop1 +- +- addq $128,%rsi +- addq $128,%rdi +- +-L$128_enc_msg_x8_check_remainder: +- cmpq $0,%r10 +- je L$128_enc_msg_x8_out +- +-L$128_enc_msg_x8_loop2: +- +- +- vmovdqa %xmm0,%xmm1 +- vpaddd one(%rip),%xmm0,%xmm0 +- +- vpxor (%rcx),%xmm1,%xmm1 +- vaesenc 16(%rcx),%xmm1,%xmm1 +- vaesenc 32(%rcx),%xmm1,%xmm1 +- vaesenc 48(%rcx),%xmm1,%xmm1 +- vaesenc 64(%rcx),%xmm1,%xmm1 +- vaesenc 80(%rcx),%xmm1,%xmm1 +- vaesenc 96(%rcx),%xmm1,%xmm1 +- vaesenc 112(%rcx),%xmm1,%xmm1 +- vaesenc 128(%rcx),%xmm1,%xmm1 +- vaesenc 144(%rcx),%xmm1,%xmm1 +- vaesenclast 160(%rcx),%xmm1,%xmm1 +- +- +- vpxor (%rdi),%xmm1,%xmm1 +- +- vmovdqu %xmm1,(%rsi) +- +- addq $16,%rdi +- addq $16,%rsi +- +- decq %r10 +- jne L$128_enc_msg_x8_loop2 +- +-L$128_enc_msg_x8_out: +- movq %rbp,%rsp +- +- popq %rbp +- +- popq %r13 +- +- popq %r12 +- +- .byte 0xf3,0xc3 +- +- +-.globl _aes128gcmsiv_dec +-.private_extern _aes128gcmsiv_dec +- +-.p2align 4 +-_aes128gcmsiv_dec: +- +- testq $~15,%r9 +- jnz L$128_dec_start +- .byte 0xf3,0xc3 +- +-L$128_dec_start: +- vzeroupper +- vmovdqa (%rdx),%xmm0 +- movq %rdx,%rax +- +- leaq 32(%rax),%rax +- leaq 32(%rcx),%rcx +- +- +- vmovdqu (%rdi,%r9,1),%xmm15 +- vpor OR_MASK(%rip),%xmm15,%xmm15 +- andq $~15,%r9 +- +- +- cmpq $96,%r9 +- jb L$128_dec_loop2 +- +- +- subq $96,%r9 +- vmovdqa %xmm15,%xmm7 +- vpaddd one(%rip),%xmm7,%xmm8 +- vpaddd two(%rip),%xmm7,%xmm9 +- vpaddd one(%rip),%xmm9,%xmm10 +- vpaddd two(%rip),%xmm9,%xmm11 +- vpaddd one(%rip),%xmm11,%xmm12 +- vpaddd two(%rip),%xmm11,%xmm15 +- +- vpxor (%r8),%xmm7,%xmm7 +- vpxor (%r8),%xmm8,%xmm8 +- vpxor (%r8),%xmm9,%xmm9 +- vpxor (%r8),%xmm10,%xmm10 +- vpxor (%r8),%xmm11,%xmm11 +- vpxor (%r8),%xmm12,%xmm12 +- +- vmovdqu 16(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 32(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 48(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 64(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 80(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 96(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 112(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 128(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 144(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 160(%r8),%xmm4 +- vaesenclast %xmm4,%xmm7,%xmm7 +- vaesenclast %xmm4,%xmm8,%xmm8 +- vaesenclast %xmm4,%xmm9,%xmm9 +- vaesenclast %xmm4,%xmm10,%xmm10 +- vaesenclast %xmm4,%xmm11,%xmm11 +- vaesenclast %xmm4,%xmm12,%xmm12 +- +- +- vpxor 0(%rdi),%xmm7,%xmm7 +- vpxor 16(%rdi),%xmm8,%xmm8 +- vpxor 32(%rdi),%xmm9,%xmm9 +- vpxor 48(%rdi),%xmm10,%xmm10 +- vpxor 64(%rdi),%xmm11,%xmm11 +- vpxor 80(%rdi),%xmm12,%xmm12 +- +- vmovdqu %xmm7,0(%rsi) +- vmovdqu %xmm8,16(%rsi) +- vmovdqu %xmm9,32(%rsi) +- vmovdqu %xmm10,48(%rsi) +- vmovdqu %xmm11,64(%rsi) +- vmovdqu %xmm12,80(%rsi) +- +- addq $96,%rdi +- addq $96,%rsi +- jmp L$128_dec_loop1 +- +- +-.p2align 6 +-L$128_dec_loop1: +- cmpq $96,%r9 +- jb L$128_dec_finish_96 +- subq $96,%r9 +- +- vmovdqa %xmm12,%xmm6 +- vmovdqa %xmm11,16-32(%rax) +- vmovdqa %xmm10,32-32(%rax) +- vmovdqa %xmm9,48-32(%rax) +- vmovdqa %xmm8,64-32(%rax) +- vmovdqa %xmm7,80-32(%rax) +- +- vmovdqa %xmm15,%xmm7 +- vpaddd one(%rip),%xmm7,%xmm8 +- vpaddd two(%rip),%xmm7,%xmm9 +- vpaddd one(%rip),%xmm9,%xmm10 +- vpaddd two(%rip),%xmm9,%xmm11 +- vpaddd one(%rip),%xmm11,%xmm12 +- vpaddd two(%rip),%xmm11,%xmm15 +- +- vmovdqa (%r8),%xmm4 +- vpxor %xmm4,%xmm7,%xmm7 +- vpxor %xmm4,%xmm8,%xmm8 +- vpxor %xmm4,%xmm9,%xmm9 +- vpxor %xmm4,%xmm10,%xmm10 +- vpxor %xmm4,%xmm11,%xmm11 +- vpxor %xmm4,%xmm12,%xmm12 +- +- vmovdqu 0-32(%rcx),%xmm4 +- vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 +- vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 +- vpclmulqdq $0x01,%xmm4,%xmm6,%xmm1 +- vpclmulqdq $0x10,%xmm4,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu 16(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu -16(%rax),%xmm6 +- vmovdqu -16(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- +- vmovdqu 32(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 0(%rax),%xmm6 +- vmovdqu 0(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- +- vmovdqu 48(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 16(%rax),%xmm6 +- vmovdqu 16(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- +- vmovdqu 64(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 32(%rax),%xmm6 +- vmovdqu 32(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- +- vmovdqu 80(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 96(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 112(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- +- vmovdqa 80-32(%rax),%xmm6 +- vpxor %xmm0,%xmm6,%xmm6 +- vmovdqu 80-32(%rcx),%xmm5 +- +- vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu 128(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- +- vpsrldq $8,%xmm1,%xmm4 +- vpxor %xmm4,%xmm2,%xmm5 +- vpslldq $8,%xmm1,%xmm4 +- vpxor %xmm4,%xmm3,%xmm0 +- +- vmovdqa poly(%rip),%xmm3 +- +- vmovdqu 144(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 160(%r8),%xmm6 +- vpalignr $8,%xmm0,%xmm0,%xmm2 +- vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 +- vpxor %xmm0,%xmm2,%xmm0 +- +- vpxor 0(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm7,%xmm7 +- vpxor 16(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm8,%xmm8 +- vpxor 32(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm9,%xmm9 +- vpxor 48(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm10,%xmm10 +- vpxor 64(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm11,%xmm11 +- vpxor 80(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm12,%xmm12 +- +- vpalignr $8,%xmm0,%xmm0,%xmm2 +- vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 +- vpxor %xmm0,%xmm2,%xmm0 +- +- vmovdqu %xmm7,0(%rsi) +- vmovdqu %xmm8,16(%rsi) +- vmovdqu %xmm9,32(%rsi) +- vmovdqu %xmm10,48(%rsi) +- vmovdqu %xmm11,64(%rsi) +- vmovdqu %xmm12,80(%rsi) +- +- vpxor %xmm5,%xmm0,%xmm0 +- +- leaq 96(%rdi),%rdi +- leaq 96(%rsi),%rsi +- jmp L$128_dec_loop1 +- +-L$128_dec_finish_96: +- vmovdqa %xmm12,%xmm6 +- vmovdqa %xmm11,16-32(%rax) +- vmovdqa %xmm10,32-32(%rax) +- vmovdqa %xmm9,48-32(%rax) +- vmovdqa %xmm8,64-32(%rax) +- vmovdqa %xmm7,80-32(%rax) +- +- vmovdqu 0-32(%rcx),%xmm4 +- vpclmulqdq $0x10,%xmm4,%xmm6,%xmm1 +- vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 +- vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 +- vpclmulqdq $0x01,%xmm4,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu -16(%rax),%xmm6 +- vmovdqu -16(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu 0(%rax),%xmm6 +- vmovdqu 0(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu 16(%rax),%xmm6 +- vmovdqu 16(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu 32(%rax),%xmm6 +- vmovdqu 32(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- +- vmovdqu 80-32(%rax),%xmm6 +- vpxor %xmm0,%xmm6,%xmm6 +- vmovdqu 80-32(%rcx),%xmm5 +- vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vpsrldq $8,%xmm1,%xmm4 +- vpxor %xmm4,%xmm2,%xmm5 +- vpslldq $8,%xmm1,%xmm4 +- vpxor %xmm4,%xmm3,%xmm0 +- +- vmovdqa poly(%rip),%xmm3 +- +- vpalignr $8,%xmm0,%xmm0,%xmm2 +- vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 +- vpxor %xmm0,%xmm2,%xmm0 +- +- vpalignr $8,%xmm0,%xmm0,%xmm2 +- vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 +- vpxor %xmm0,%xmm2,%xmm0 +- +- vpxor %xmm5,%xmm0,%xmm0 +- +-L$128_dec_loop2: +- +- +- +- cmpq $16,%r9 +- jb L$128_dec_out +- subq $16,%r9 +- +- vmovdqa %xmm15,%xmm2 +- vpaddd one(%rip),%xmm15,%xmm15 +- +- vpxor 0(%r8),%xmm2,%xmm2 +- vaesenc 16(%r8),%xmm2,%xmm2 +- vaesenc 32(%r8),%xmm2,%xmm2 +- vaesenc 48(%r8),%xmm2,%xmm2 +- vaesenc 64(%r8),%xmm2,%xmm2 +- vaesenc 80(%r8),%xmm2,%xmm2 +- vaesenc 96(%r8),%xmm2,%xmm2 +- vaesenc 112(%r8),%xmm2,%xmm2 +- vaesenc 128(%r8),%xmm2,%xmm2 +- vaesenc 144(%r8),%xmm2,%xmm2 +- vaesenclast 160(%r8),%xmm2,%xmm2 +- vpxor (%rdi),%xmm2,%xmm2 +- vmovdqu %xmm2,(%rsi) +- addq $16,%rdi +- addq $16,%rsi +- +- vpxor %xmm2,%xmm0,%xmm0 +- vmovdqa -32(%rcx),%xmm1 +- call GFMUL +- +- jmp L$128_dec_loop2 +- +-L$128_dec_out: +- vmovdqu %xmm0,(%rdx) +- .byte 0xf3,0xc3 +- +- +-.globl _aes128gcmsiv_ecb_enc_block +-.private_extern _aes128gcmsiv_ecb_enc_block +- +-.p2align 4 +-_aes128gcmsiv_ecb_enc_block: +- +- vmovdqa (%rdi),%xmm1 +- +- vpxor (%rdx),%xmm1,%xmm1 +- vaesenc 16(%rdx),%xmm1,%xmm1 +- vaesenc 32(%rdx),%xmm1,%xmm1 +- vaesenc 48(%rdx),%xmm1,%xmm1 +- vaesenc 64(%rdx),%xmm1,%xmm1 +- vaesenc 80(%rdx),%xmm1,%xmm1 +- vaesenc 96(%rdx),%xmm1,%xmm1 +- vaesenc 112(%rdx),%xmm1,%xmm1 +- vaesenc 128(%rdx),%xmm1,%xmm1 +- vaesenc 144(%rdx),%xmm1,%xmm1 +- vaesenclast 160(%rdx),%xmm1,%xmm1 +- +- vmovdqa %xmm1,(%rsi) +- +- .byte 0xf3,0xc3 +- +- +-.globl _aes256gcmsiv_aes_ks_enc_x1 +-.private_extern _aes256gcmsiv_aes_ks_enc_x1 +- +-.p2align 4 +-_aes256gcmsiv_aes_ks_enc_x1: +- +- vmovdqa con1(%rip),%xmm0 +- vmovdqa mask(%rip),%xmm15 +- vmovdqa (%rdi),%xmm8 +- vmovdqa (%rcx),%xmm1 +- vmovdqa 16(%rcx),%xmm3 +- vpxor %xmm1,%xmm8,%xmm8 +- vaesenc %xmm3,%xmm8,%xmm8 +- vmovdqu %xmm1,(%rdx) +- vmovdqu %xmm3,16(%rdx) +- vpxor %xmm14,%xmm14,%xmm14 +- +- vpshufb %xmm15,%xmm3,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpslldq $4,%xmm1,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vaesenc %xmm1,%xmm8,%xmm8 +- vmovdqu %xmm1,32(%rdx) +- +- vpshufd $0xff,%xmm1,%xmm2 +- vaesenclast %xmm14,%xmm2,%xmm2 +- vpslldq $4,%xmm3,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpxor %xmm2,%xmm3,%xmm3 +- vaesenc %xmm3,%xmm8,%xmm8 +- vmovdqu %xmm3,48(%rdx) +- +- vpshufb %xmm15,%xmm3,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpslldq $4,%xmm1,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vaesenc %xmm1,%xmm8,%xmm8 +- vmovdqu %xmm1,64(%rdx) +- +- vpshufd $0xff,%xmm1,%xmm2 +- vaesenclast %xmm14,%xmm2,%xmm2 +- vpslldq $4,%xmm3,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpxor %xmm2,%xmm3,%xmm3 +- vaesenc %xmm3,%xmm8,%xmm8 +- vmovdqu %xmm3,80(%rdx) +- +- vpshufb %xmm15,%xmm3,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpslldq $4,%xmm1,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vaesenc %xmm1,%xmm8,%xmm8 +- vmovdqu %xmm1,96(%rdx) +- +- vpshufd $0xff,%xmm1,%xmm2 +- vaesenclast %xmm14,%xmm2,%xmm2 +- vpslldq $4,%xmm3,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpxor %xmm2,%xmm3,%xmm3 +- vaesenc %xmm3,%xmm8,%xmm8 +- vmovdqu %xmm3,112(%rdx) +- +- vpshufb %xmm15,%xmm3,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpslldq $4,%xmm1,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vaesenc %xmm1,%xmm8,%xmm8 +- vmovdqu %xmm1,128(%rdx) +- +- vpshufd $0xff,%xmm1,%xmm2 +- vaesenclast %xmm14,%xmm2,%xmm2 +- vpslldq $4,%xmm3,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpxor %xmm2,%xmm3,%xmm3 +- vaesenc %xmm3,%xmm8,%xmm8 +- vmovdqu %xmm3,144(%rdx) +- +- vpshufb %xmm15,%xmm3,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpslldq $4,%xmm1,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vaesenc %xmm1,%xmm8,%xmm8 +- vmovdqu %xmm1,160(%rdx) +- +- vpshufd $0xff,%xmm1,%xmm2 +- vaesenclast %xmm14,%xmm2,%xmm2 +- vpslldq $4,%xmm3,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpxor %xmm2,%xmm3,%xmm3 +- vaesenc %xmm3,%xmm8,%xmm8 +- vmovdqu %xmm3,176(%rdx) +- +- vpshufb %xmm15,%xmm3,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslld $1,%xmm0,%xmm0 +- vpslldq $4,%xmm1,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vaesenc %xmm1,%xmm8,%xmm8 +- vmovdqu %xmm1,192(%rdx) +- +- vpshufd $0xff,%xmm1,%xmm2 +- vaesenclast %xmm14,%xmm2,%xmm2 +- vpslldq $4,%xmm3,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpxor %xmm2,%xmm3,%xmm3 +- vaesenc %xmm3,%xmm8,%xmm8 +- vmovdqu %xmm3,208(%rdx) +- +- vpshufb %xmm15,%xmm3,%xmm2 +- vaesenclast %xmm0,%xmm2,%xmm2 +- vpslldq $4,%xmm1,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpslldq $4,%xmm4,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpxor %xmm2,%xmm1,%xmm1 +- vaesenclast %xmm1,%xmm8,%xmm8 +- vmovdqu %xmm1,224(%rdx) +- +- vmovdqa %xmm8,(%rsi) +- .byte 0xf3,0xc3 +- +- +-.globl _aes256gcmsiv_ecb_enc_block +-.private_extern _aes256gcmsiv_ecb_enc_block +- +-.p2align 4 +-_aes256gcmsiv_ecb_enc_block: +- +- vmovdqa (%rdi),%xmm1 +- vpxor (%rdx),%xmm1,%xmm1 +- vaesenc 16(%rdx),%xmm1,%xmm1 +- vaesenc 32(%rdx),%xmm1,%xmm1 +- vaesenc 48(%rdx),%xmm1,%xmm1 +- vaesenc 64(%rdx),%xmm1,%xmm1 +- vaesenc 80(%rdx),%xmm1,%xmm1 +- vaesenc 96(%rdx),%xmm1,%xmm1 +- vaesenc 112(%rdx),%xmm1,%xmm1 +- vaesenc 128(%rdx),%xmm1,%xmm1 +- vaesenc 144(%rdx),%xmm1,%xmm1 +- vaesenc 160(%rdx),%xmm1,%xmm1 +- vaesenc 176(%rdx),%xmm1,%xmm1 +- vaesenc 192(%rdx),%xmm1,%xmm1 +- vaesenc 208(%rdx),%xmm1,%xmm1 +- vaesenclast 224(%rdx),%xmm1,%xmm1 +- vmovdqa %xmm1,(%rsi) +- .byte 0xf3,0xc3 +- +- +-.globl _aes256gcmsiv_enc_msg_x4 +-.private_extern _aes256gcmsiv_enc_msg_x4 +- +-.p2align 4 +-_aes256gcmsiv_enc_msg_x4: +- +- testq %r8,%r8 +- jnz L$256_enc_msg_x4_start +- .byte 0xf3,0xc3 +- +-L$256_enc_msg_x4_start: +- movq %r8,%r10 +- shrq $4,%r8 +- shlq $60,%r10 +- jz L$256_enc_msg_x4_start2 +- addq $1,%r8 +- +-L$256_enc_msg_x4_start2: +- movq %r8,%r10 +- shlq $62,%r10 +- shrq $62,%r10 +- +- +- vmovdqa (%rdx),%xmm15 +- vpor OR_MASK(%rip),%xmm15,%xmm15 +- +- vmovdqa four(%rip),%xmm4 +- vmovdqa %xmm15,%xmm0 +- vpaddd one(%rip),%xmm15,%xmm1 +- vpaddd two(%rip),%xmm15,%xmm2 +- vpaddd three(%rip),%xmm15,%xmm3 +- +- shrq $2,%r8 +- je L$256_enc_msg_x4_check_remainder +- +- subq $64,%rsi +- subq $64,%rdi +- +-L$256_enc_msg_x4_loop1: +- addq $64,%rsi +- addq $64,%rdi +- +- vmovdqa %xmm0,%xmm5 +- vmovdqa %xmm1,%xmm6 +- vmovdqa %xmm2,%xmm7 +- vmovdqa %xmm3,%xmm8 +- +- vpxor (%rcx),%xmm5,%xmm5 +- vpxor (%rcx),%xmm6,%xmm6 +- vpxor (%rcx),%xmm7,%xmm7 +- vpxor (%rcx),%xmm8,%xmm8 +- +- vmovdqu 16(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vpaddd %xmm4,%xmm0,%xmm0 +- vmovdqu 32(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vpaddd %xmm4,%xmm1,%xmm1 +- vmovdqu 48(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vpaddd %xmm4,%xmm2,%xmm2 +- vmovdqu 64(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vpaddd %xmm4,%xmm3,%xmm3 +- +- vmovdqu 80(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 96(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 112(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 128(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 144(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 160(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 176(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 192(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 208(%rcx),%xmm12 +- vaesenc %xmm12,%xmm5,%xmm5 +- vaesenc %xmm12,%xmm6,%xmm6 +- vaesenc %xmm12,%xmm7,%xmm7 +- vaesenc %xmm12,%xmm8,%xmm8 +- +- vmovdqu 224(%rcx),%xmm12 +- vaesenclast %xmm12,%xmm5,%xmm5 +- vaesenclast %xmm12,%xmm6,%xmm6 +- vaesenclast %xmm12,%xmm7,%xmm7 +- vaesenclast %xmm12,%xmm8,%xmm8 +- +- +- +- vpxor 0(%rdi),%xmm5,%xmm5 +- vpxor 16(%rdi),%xmm6,%xmm6 +- vpxor 32(%rdi),%xmm7,%xmm7 +- vpxor 48(%rdi),%xmm8,%xmm8 +- +- subq $1,%r8 +- +- vmovdqu %xmm5,0(%rsi) +- vmovdqu %xmm6,16(%rsi) +- vmovdqu %xmm7,32(%rsi) +- vmovdqu %xmm8,48(%rsi) +- +- jne L$256_enc_msg_x4_loop1 +- +- addq $64,%rsi +- addq $64,%rdi +- +-L$256_enc_msg_x4_check_remainder: +- cmpq $0,%r10 +- je L$256_enc_msg_x4_out +- +-L$256_enc_msg_x4_loop2: +- +- +- +- vmovdqa %xmm0,%xmm5 +- vpaddd one(%rip),%xmm0,%xmm0 +- vpxor (%rcx),%xmm5,%xmm5 +- vaesenc 16(%rcx),%xmm5,%xmm5 +- vaesenc 32(%rcx),%xmm5,%xmm5 +- vaesenc 48(%rcx),%xmm5,%xmm5 +- vaesenc 64(%rcx),%xmm5,%xmm5 +- vaesenc 80(%rcx),%xmm5,%xmm5 +- vaesenc 96(%rcx),%xmm5,%xmm5 +- vaesenc 112(%rcx),%xmm5,%xmm5 +- vaesenc 128(%rcx),%xmm5,%xmm5 +- vaesenc 144(%rcx),%xmm5,%xmm5 +- vaesenc 160(%rcx),%xmm5,%xmm5 +- vaesenc 176(%rcx),%xmm5,%xmm5 +- vaesenc 192(%rcx),%xmm5,%xmm5 +- vaesenc 208(%rcx),%xmm5,%xmm5 +- vaesenclast 224(%rcx),%xmm5,%xmm5 +- +- +- vpxor (%rdi),%xmm5,%xmm5 +- +- vmovdqu %xmm5,(%rsi) +- +- addq $16,%rdi +- addq $16,%rsi +- +- subq $1,%r10 +- jne L$256_enc_msg_x4_loop2 +- +-L$256_enc_msg_x4_out: +- .byte 0xf3,0xc3 +- +- +-.globl _aes256gcmsiv_enc_msg_x8 +-.private_extern _aes256gcmsiv_enc_msg_x8 +- +-.p2align 4 +-_aes256gcmsiv_enc_msg_x8: +- +- testq %r8,%r8 +- jnz L$256_enc_msg_x8_start +- .byte 0xf3,0xc3 +- +-L$256_enc_msg_x8_start: +- +- movq %rsp,%r11 +- subq $16,%r11 +- andq $-64,%r11 +- +- movq %r8,%r10 +- shrq $4,%r8 +- shlq $60,%r10 +- jz L$256_enc_msg_x8_start2 +- addq $1,%r8 +- +-L$256_enc_msg_x8_start2: +- movq %r8,%r10 +- shlq $61,%r10 +- shrq $61,%r10 +- +- +- vmovdqa (%rdx),%xmm1 +- vpor OR_MASK(%rip),%xmm1,%xmm1 +- +- +- vpaddd seven(%rip),%xmm1,%xmm0 +- vmovdqa %xmm0,(%r11) +- vpaddd one(%rip),%xmm1,%xmm9 +- vpaddd two(%rip),%xmm1,%xmm10 +- vpaddd three(%rip),%xmm1,%xmm11 +- vpaddd four(%rip),%xmm1,%xmm12 +- vpaddd five(%rip),%xmm1,%xmm13 +- vpaddd six(%rip),%xmm1,%xmm14 +- vmovdqa %xmm1,%xmm0 +- +- shrq $3,%r8 +- jz L$256_enc_msg_x8_check_remainder +- +- subq $128,%rsi +- subq $128,%rdi +- +-L$256_enc_msg_x8_loop1: +- addq $128,%rsi +- addq $128,%rdi +- +- vmovdqa %xmm0,%xmm1 +- vmovdqa %xmm9,%xmm2 +- vmovdqa %xmm10,%xmm3 +- vmovdqa %xmm11,%xmm4 +- vmovdqa %xmm12,%xmm5 +- vmovdqa %xmm13,%xmm6 +- vmovdqa %xmm14,%xmm7 +- +- vmovdqa (%r11),%xmm8 +- +- vpxor (%rcx),%xmm1,%xmm1 +- vpxor (%rcx),%xmm2,%xmm2 +- vpxor (%rcx),%xmm3,%xmm3 +- vpxor (%rcx),%xmm4,%xmm4 +- vpxor (%rcx),%xmm5,%xmm5 +- vpxor (%rcx),%xmm6,%xmm6 +- vpxor (%rcx),%xmm7,%xmm7 +- vpxor (%rcx),%xmm8,%xmm8 +- +- vmovdqu 16(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vmovdqa (%r11),%xmm14 +- vpaddd eight(%rip),%xmm14,%xmm14 +- vmovdqa %xmm14,(%r11) +- vmovdqu 32(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpsubd one(%rip),%xmm14,%xmm14 +- vmovdqu 48(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm0,%xmm0 +- vmovdqu 64(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm9,%xmm9 +- vmovdqu 80(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm10,%xmm10 +- vmovdqu 96(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm11,%xmm11 +- vmovdqu 112(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm12,%xmm12 +- vmovdqu 128(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vpaddd eight(%rip),%xmm13,%xmm13 +- vmovdqu 144(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vmovdqu 160(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vmovdqu 176(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vmovdqu 192(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vmovdqu 208(%rcx),%xmm15 +- vaesenc %xmm15,%xmm1,%xmm1 +- vaesenc %xmm15,%xmm2,%xmm2 +- vaesenc %xmm15,%xmm3,%xmm3 +- vaesenc %xmm15,%xmm4,%xmm4 +- vaesenc %xmm15,%xmm5,%xmm5 +- vaesenc %xmm15,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm8,%xmm8 +- +- vmovdqu 224(%rcx),%xmm15 +- vaesenclast %xmm15,%xmm1,%xmm1 +- vaesenclast %xmm15,%xmm2,%xmm2 +- vaesenclast %xmm15,%xmm3,%xmm3 +- vaesenclast %xmm15,%xmm4,%xmm4 +- vaesenclast %xmm15,%xmm5,%xmm5 +- vaesenclast %xmm15,%xmm6,%xmm6 +- vaesenclast %xmm15,%xmm7,%xmm7 +- vaesenclast %xmm15,%xmm8,%xmm8 +- +- +- +- vpxor 0(%rdi),%xmm1,%xmm1 +- vpxor 16(%rdi),%xmm2,%xmm2 +- vpxor 32(%rdi),%xmm3,%xmm3 +- vpxor 48(%rdi),%xmm4,%xmm4 +- vpxor 64(%rdi),%xmm5,%xmm5 +- vpxor 80(%rdi),%xmm6,%xmm6 +- vpxor 96(%rdi),%xmm7,%xmm7 +- vpxor 112(%rdi),%xmm8,%xmm8 +- +- subq $1,%r8 +- +- vmovdqu %xmm1,0(%rsi) +- vmovdqu %xmm2,16(%rsi) +- vmovdqu %xmm3,32(%rsi) +- vmovdqu %xmm4,48(%rsi) +- vmovdqu %xmm5,64(%rsi) +- vmovdqu %xmm6,80(%rsi) +- vmovdqu %xmm7,96(%rsi) +- vmovdqu %xmm8,112(%rsi) +- +- jne L$256_enc_msg_x8_loop1 +- +- addq $128,%rsi +- addq $128,%rdi +- +-L$256_enc_msg_x8_check_remainder: +- cmpq $0,%r10 +- je L$256_enc_msg_x8_out +- +-L$256_enc_msg_x8_loop2: +- +- +- vmovdqa %xmm0,%xmm1 +- vpaddd one(%rip),%xmm0,%xmm0 +- +- vpxor (%rcx),%xmm1,%xmm1 +- vaesenc 16(%rcx),%xmm1,%xmm1 +- vaesenc 32(%rcx),%xmm1,%xmm1 +- vaesenc 48(%rcx),%xmm1,%xmm1 +- vaesenc 64(%rcx),%xmm1,%xmm1 +- vaesenc 80(%rcx),%xmm1,%xmm1 +- vaesenc 96(%rcx),%xmm1,%xmm1 +- vaesenc 112(%rcx),%xmm1,%xmm1 +- vaesenc 128(%rcx),%xmm1,%xmm1 +- vaesenc 144(%rcx),%xmm1,%xmm1 +- vaesenc 160(%rcx),%xmm1,%xmm1 +- vaesenc 176(%rcx),%xmm1,%xmm1 +- vaesenc 192(%rcx),%xmm1,%xmm1 +- vaesenc 208(%rcx),%xmm1,%xmm1 +- vaesenclast 224(%rcx),%xmm1,%xmm1 +- +- +- vpxor (%rdi),%xmm1,%xmm1 +- +- vmovdqu %xmm1,(%rsi) +- +- addq $16,%rdi +- addq $16,%rsi +- subq $1,%r10 +- jnz L$256_enc_msg_x8_loop2 +- +-L$256_enc_msg_x8_out: +- .byte 0xf3,0xc3 +- +- +- +-.globl _aes256gcmsiv_dec +-.private_extern _aes256gcmsiv_dec +- +-.p2align 4 +-_aes256gcmsiv_dec: +- +- testq $~15,%r9 +- jnz L$256_dec_start +- .byte 0xf3,0xc3 +- +-L$256_dec_start: +- vzeroupper +- vmovdqa (%rdx),%xmm0 +- movq %rdx,%rax +- +- leaq 32(%rax),%rax +- leaq 32(%rcx),%rcx +- +- +- vmovdqu (%rdi,%r9,1),%xmm15 +- vpor OR_MASK(%rip),%xmm15,%xmm15 +- andq $~15,%r9 +- +- +- cmpq $96,%r9 +- jb L$256_dec_loop2 +- +- +- subq $96,%r9 +- vmovdqa %xmm15,%xmm7 +- vpaddd one(%rip),%xmm7,%xmm8 +- vpaddd two(%rip),%xmm7,%xmm9 +- vpaddd one(%rip),%xmm9,%xmm10 +- vpaddd two(%rip),%xmm9,%xmm11 +- vpaddd one(%rip),%xmm11,%xmm12 +- vpaddd two(%rip),%xmm11,%xmm15 +- +- vpxor (%r8),%xmm7,%xmm7 +- vpxor (%r8),%xmm8,%xmm8 +- vpxor (%r8),%xmm9,%xmm9 +- vpxor (%r8),%xmm10,%xmm10 +- vpxor (%r8),%xmm11,%xmm11 +- vpxor (%r8),%xmm12,%xmm12 +- +- vmovdqu 16(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 32(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 48(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 64(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 80(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 96(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 112(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 128(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 144(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 160(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 176(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 192(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 208(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 224(%r8),%xmm4 +- vaesenclast %xmm4,%xmm7,%xmm7 +- vaesenclast %xmm4,%xmm8,%xmm8 +- vaesenclast %xmm4,%xmm9,%xmm9 +- vaesenclast %xmm4,%xmm10,%xmm10 +- vaesenclast %xmm4,%xmm11,%xmm11 +- vaesenclast %xmm4,%xmm12,%xmm12 +- +- +- vpxor 0(%rdi),%xmm7,%xmm7 +- vpxor 16(%rdi),%xmm8,%xmm8 +- vpxor 32(%rdi),%xmm9,%xmm9 +- vpxor 48(%rdi),%xmm10,%xmm10 +- vpxor 64(%rdi),%xmm11,%xmm11 +- vpxor 80(%rdi),%xmm12,%xmm12 +- +- vmovdqu %xmm7,0(%rsi) +- vmovdqu %xmm8,16(%rsi) +- vmovdqu %xmm9,32(%rsi) +- vmovdqu %xmm10,48(%rsi) +- vmovdqu %xmm11,64(%rsi) +- vmovdqu %xmm12,80(%rsi) +- +- addq $96,%rdi +- addq $96,%rsi +- jmp L$256_dec_loop1 +- +- +-.p2align 6 +-L$256_dec_loop1: +- cmpq $96,%r9 +- jb L$256_dec_finish_96 +- subq $96,%r9 +- +- vmovdqa %xmm12,%xmm6 +- vmovdqa %xmm11,16-32(%rax) +- vmovdqa %xmm10,32-32(%rax) +- vmovdqa %xmm9,48-32(%rax) +- vmovdqa %xmm8,64-32(%rax) +- vmovdqa %xmm7,80-32(%rax) +- +- vmovdqa %xmm15,%xmm7 +- vpaddd one(%rip),%xmm7,%xmm8 +- vpaddd two(%rip),%xmm7,%xmm9 +- vpaddd one(%rip),%xmm9,%xmm10 +- vpaddd two(%rip),%xmm9,%xmm11 +- vpaddd one(%rip),%xmm11,%xmm12 +- vpaddd two(%rip),%xmm11,%xmm15 +- +- vmovdqa (%r8),%xmm4 +- vpxor %xmm4,%xmm7,%xmm7 +- vpxor %xmm4,%xmm8,%xmm8 +- vpxor %xmm4,%xmm9,%xmm9 +- vpxor %xmm4,%xmm10,%xmm10 +- vpxor %xmm4,%xmm11,%xmm11 +- vpxor %xmm4,%xmm12,%xmm12 +- +- vmovdqu 0-32(%rcx),%xmm4 +- vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 +- vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 +- vpclmulqdq $0x01,%xmm4,%xmm6,%xmm1 +- vpclmulqdq $0x10,%xmm4,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu 16(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu -16(%rax),%xmm6 +- vmovdqu -16(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- +- vmovdqu 32(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 0(%rax),%xmm6 +- vmovdqu 0(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- +- vmovdqu 48(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 16(%rax),%xmm6 +- vmovdqu 16(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- +- vmovdqu 64(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 32(%rax),%xmm6 +- vmovdqu 32(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- +- vmovdqu 80(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 96(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 112(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- +- vmovdqa 80-32(%rax),%xmm6 +- vpxor %xmm0,%xmm6,%xmm6 +- vmovdqu 80-32(%rcx),%xmm5 +- +- vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu 128(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- +- vpsrldq $8,%xmm1,%xmm4 +- vpxor %xmm4,%xmm2,%xmm5 +- vpslldq $8,%xmm1,%xmm4 +- vpxor %xmm4,%xmm3,%xmm0 +- +- vmovdqa poly(%rip),%xmm3 +- +- vmovdqu 144(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 160(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 176(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 192(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 208(%r8),%xmm4 +- vaesenc %xmm4,%xmm7,%xmm7 +- vaesenc %xmm4,%xmm8,%xmm8 +- vaesenc %xmm4,%xmm9,%xmm9 +- vaesenc %xmm4,%xmm10,%xmm10 +- vaesenc %xmm4,%xmm11,%xmm11 +- vaesenc %xmm4,%xmm12,%xmm12 +- +- vmovdqu 224(%r8),%xmm6 +- vpalignr $8,%xmm0,%xmm0,%xmm2 +- vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 +- vpxor %xmm0,%xmm2,%xmm0 +- +- vpxor 0(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm7,%xmm7 +- vpxor 16(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm8,%xmm8 +- vpxor 32(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm9,%xmm9 +- vpxor 48(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm10,%xmm10 +- vpxor 64(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm11,%xmm11 +- vpxor 80(%rdi),%xmm6,%xmm4 +- vaesenclast %xmm4,%xmm12,%xmm12 +- +- vpalignr $8,%xmm0,%xmm0,%xmm2 +- vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 +- vpxor %xmm0,%xmm2,%xmm0 +- +- vmovdqu %xmm7,0(%rsi) +- vmovdqu %xmm8,16(%rsi) +- vmovdqu %xmm9,32(%rsi) +- vmovdqu %xmm10,48(%rsi) +- vmovdqu %xmm11,64(%rsi) +- vmovdqu %xmm12,80(%rsi) +- +- vpxor %xmm5,%xmm0,%xmm0 +- +- leaq 96(%rdi),%rdi +- leaq 96(%rsi),%rsi +- jmp L$256_dec_loop1 +- +-L$256_dec_finish_96: +- vmovdqa %xmm12,%xmm6 +- vmovdqa %xmm11,16-32(%rax) +- vmovdqa %xmm10,32-32(%rax) +- vmovdqa %xmm9,48-32(%rax) +- vmovdqa %xmm8,64-32(%rax) +- vmovdqa %xmm7,80-32(%rax) +- +- vmovdqu 0-32(%rcx),%xmm4 +- vpclmulqdq $0x10,%xmm4,%xmm6,%xmm1 +- vpclmulqdq $0x11,%xmm4,%xmm6,%xmm2 +- vpclmulqdq $0x00,%xmm4,%xmm6,%xmm3 +- vpclmulqdq $0x01,%xmm4,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu -16(%rax),%xmm6 +- vmovdqu -16(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu 0(%rax),%xmm6 +- vmovdqu 0(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu 16(%rax),%xmm6 +- vmovdqu 16(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vmovdqu 32(%rax),%xmm6 +- vmovdqu 32(%rcx),%xmm13 +- +- vpclmulqdq $0x10,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x11,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x01,%xmm13,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- +- vmovdqu 80-32(%rax),%xmm6 +- vpxor %xmm0,%xmm6,%xmm6 +- vmovdqu 80-32(%rcx),%xmm5 +- vpclmulqdq $0x11,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- vpclmulqdq $0x10,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x01,%xmm5,%xmm6,%xmm4 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vpsrldq $8,%xmm1,%xmm4 +- vpxor %xmm4,%xmm2,%xmm5 +- vpslldq $8,%xmm1,%xmm4 +- vpxor %xmm4,%xmm3,%xmm0 +- +- vmovdqa poly(%rip),%xmm3 +- +- vpalignr $8,%xmm0,%xmm0,%xmm2 +- vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 +- vpxor %xmm0,%xmm2,%xmm0 +- +- vpalignr $8,%xmm0,%xmm0,%xmm2 +- vpclmulqdq $0x10,%xmm3,%xmm0,%xmm0 +- vpxor %xmm0,%xmm2,%xmm0 +- +- vpxor %xmm5,%xmm0,%xmm0 +- +-L$256_dec_loop2: +- +- +- +- cmpq $16,%r9 +- jb L$256_dec_out +- subq $16,%r9 +- +- vmovdqa %xmm15,%xmm2 +- vpaddd one(%rip),%xmm15,%xmm15 +- +- vpxor 0(%r8),%xmm2,%xmm2 +- vaesenc 16(%r8),%xmm2,%xmm2 +- vaesenc 32(%r8),%xmm2,%xmm2 +- vaesenc 48(%r8),%xmm2,%xmm2 +- vaesenc 64(%r8),%xmm2,%xmm2 +- vaesenc 80(%r8),%xmm2,%xmm2 +- vaesenc 96(%r8),%xmm2,%xmm2 +- vaesenc 112(%r8),%xmm2,%xmm2 +- vaesenc 128(%r8),%xmm2,%xmm2 +- vaesenc 144(%r8),%xmm2,%xmm2 +- vaesenc 160(%r8),%xmm2,%xmm2 +- vaesenc 176(%r8),%xmm2,%xmm2 +- vaesenc 192(%r8),%xmm2,%xmm2 +- vaesenc 208(%r8),%xmm2,%xmm2 +- vaesenclast 224(%r8),%xmm2,%xmm2 +- vpxor (%rdi),%xmm2,%xmm2 +- vmovdqu %xmm2,(%rsi) +- addq $16,%rdi +- addq $16,%rsi +- +- vpxor %xmm2,%xmm0,%xmm0 +- vmovdqa -32(%rcx),%xmm1 +- call GFMUL +- +- jmp L$256_dec_loop2 +- +-L$256_dec_out: +- vmovdqu %xmm0,(%rdx) +- .byte 0xf3,0xc3 +- +- +-.globl _aes256gcmsiv_kdf +-.private_extern _aes256gcmsiv_kdf +- +-.p2align 4 +-_aes256gcmsiv_kdf: +- +- +- +- +- +- vmovdqa (%rdx),%xmm1 +- vmovdqa 0(%rdi),%xmm4 +- vmovdqa and_mask(%rip),%xmm11 +- vmovdqa one(%rip),%xmm8 +- vpshufd $0x90,%xmm4,%xmm4 +- vpand %xmm11,%xmm4,%xmm4 +- vpaddd %xmm8,%xmm4,%xmm6 +- vpaddd %xmm8,%xmm6,%xmm7 +- vpaddd %xmm8,%xmm7,%xmm11 +- vpaddd %xmm8,%xmm11,%xmm12 +- vpaddd %xmm8,%xmm12,%xmm13 +- +- vpxor %xmm1,%xmm4,%xmm4 +- vpxor %xmm1,%xmm6,%xmm6 +- vpxor %xmm1,%xmm7,%xmm7 +- vpxor %xmm1,%xmm11,%xmm11 +- vpxor %xmm1,%xmm12,%xmm12 +- vpxor %xmm1,%xmm13,%xmm13 +- +- vmovdqa 16(%rdx),%xmm1 +- vaesenc %xmm1,%xmm4,%xmm4 +- vaesenc %xmm1,%xmm6,%xmm6 +- vaesenc %xmm1,%xmm7,%xmm7 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- vaesenc %xmm1,%xmm13,%xmm13 +- +- vmovdqa 32(%rdx),%xmm2 +- vaesenc %xmm2,%xmm4,%xmm4 +- vaesenc %xmm2,%xmm6,%xmm6 +- vaesenc %xmm2,%xmm7,%xmm7 +- vaesenc %xmm2,%xmm11,%xmm11 +- vaesenc %xmm2,%xmm12,%xmm12 +- vaesenc %xmm2,%xmm13,%xmm13 +- +- vmovdqa 48(%rdx),%xmm1 +- vaesenc %xmm1,%xmm4,%xmm4 +- vaesenc %xmm1,%xmm6,%xmm6 +- vaesenc %xmm1,%xmm7,%xmm7 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- vaesenc %xmm1,%xmm13,%xmm13 +- +- vmovdqa 64(%rdx),%xmm2 +- vaesenc %xmm2,%xmm4,%xmm4 +- vaesenc %xmm2,%xmm6,%xmm6 +- vaesenc %xmm2,%xmm7,%xmm7 +- vaesenc %xmm2,%xmm11,%xmm11 +- vaesenc %xmm2,%xmm12,%xmm12 +- vaesenc %xmm2,%xmm13,%xmm13 +- +- vmovdqa 80(%rdx),%xmm1 +- vaesenc %xmm1,%xmm4,%xmm4 +- vaesenc %xmm1,%xmm6,%xmm6 +- vaesenc %xmm1,%xmm7,%xmm7 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- vaesenc %xmm1,%xmm13,%xmm13 +- +- vmovdqa 96(%rdx),%xmm2 +- vaesenc %xmm2,%xmm4,%xmm4 +- vaesenc %xmm2,%xmm6,%xmm6 +- vaesenc %xmm2,%xmm7,%xmm7 +- vaesenc %xmm2,%xmm11,%xmm11 +- vaesenc %xmm2,%xmm12,%xmm12 +- vaesenc %xmm2,%xmm13,%xmm13 +- +- vmovdqa 112(%rdx),%xmm1 +- vaesenc %xmm1,%xmm4,%xmm4 +- vaesenc %xmm1,%xmm6,%xmm6 +- vaesenc %xmm1,%xmm7,%xmm7 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- vaesenc %xmm1,%xmm13,%xmm13 +- +- vmovdqa 128(%rdx),%xmm2 +- vaesenc %xmm2,%xmm4,%xmm4 +- vaesenc %xmm2,%xmm6,%xmm6 +- vaesenc %xmm2,%xmm7,%xmm7 +- vaesenc %xmm2,%xmm11,%xmm11 +- vaesenc %xmm2,%xmm12,%xmm12 +- vaesenc %xmm2,%xmm13,%xmm13 +- +- vmovdqa 144(%rdx),%xmm1 +- vaesenc %xmm1,%xmm4,%xmm4 +- vaesenc %xmm1,%xmm6,%xmm6 +- vaesenc %xmm1,%xmm7,%xmm7 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- vaesenc %xmm1,%xmm13,%xmm13 +- +- vmovdqa 160(%rdx),%xmm2 +- vaesenc %xmm2,%xmm4,%xmm4 +- vaesenc %xmm2,%xmm6,%xmm6 +- vaesenc %xmm2,%xmm7,%xmm7 +- vaesenc %xmm2,%xmm11,%xmm11 +- vaesenc %xmm2,%xmm12,%xmm12 +- vaesenc %xmm2,%xmm13,%xmm13 +- +- vmovdqa 176(%rdx),%xmm1 +- vaesenc %xmm1,%xmm4,%xmm4 +- vaesenc %xmm1,%xmm6,%xmm6 +- vaesenc %xmm1,%xmm7,%xmm7 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- vaesenc %xmm1,%xmm13,%xmm13 +- +- vmovdqa 192(%rdx),%xmm2 +- vaesenc %xmm2,%xmm4,%xmm4 +- vaesenc %xmm2,%xmm6,%xmm6 +- vaesenc %xmm2,%xmm7,%xmm7 +- vaesenc %xmm2,%xmm11,%xmm11 +- vaesenc %xmm2,%xmm12,%xmm12 +- vaesenc %xmm2,%xmm13,%xmm13 +- +- vmovdqa 208(%rdx),%xmm1 +- vaesenc %xmm1,%xmm4,%xmm4 +- vaesenc %xmm1,%xmm6,%xmm6 +- vaesenc %xmm1,%xmm7,%xmm7 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- vaesenc %xmm1,%xmm13,%xmm13 +- +- vmovdqa 224(%rdx),%xmm2 +- vaesenclast %xmm2,%xmm4,%xmm4 +- vaesenclast %xmm2,%xmm6,%xmm6 +- vaesenclast %xmm2,%xmm7,%xmm7 +- vaesenclast %xmm2,%xmm11,%xmm11 +- vaesenclast %xmm2,%xmm12,%xmm12 +- vaesenclast %xmm2,%xmm13,%xmm13 +- +- +- vmovdqa %xmm4,0(%rsi) +- vmovdqa %xmm6,16(%rsi) +- vmovdqa %xmm7,32(%rsi) +- vmovdqa %xmm11,48(%rsi) +- vmovdqa %xmm12,64(%rsi) +- vmovdqa %xmm13,80(%rsi) +- .byte 0xf3,0xc3 +- +- +-#endif +diff --git a/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S b/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S +deleted file mode 100644 +index 6813510..0000000 +--- a/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S ++++ /dev/null +@@ -1,8878 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +-chacha20_poly1305_constants: +- +-.p2align 6 +-L$chacha20_consts: +-.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' +-.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' +-L$rol8: +-.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 +-.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 +-L$rol16: +-.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 +-.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 +-L$avx2_init: +-.long 0,0,0,0 +-L$sse_inc: +-.long 1,0,0,0 +-L$avx2_inc: +-.long 2,0,0,0,2,0,0,0 +-L$clamp: +-.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC +-.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF +-.p2align 4 +-L$and_masks: +-.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 +-.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff +- +- +-.p2align 6 +-poly_hash_ad_internal: +- +- +- xorq %r10,%r10 +- xorq %r11,%r11 +- xorq %r12,%r12 +- cmpq $13,%r8 +- jne L$hash_ad_loop +-L$poly_fast_tls_ad: +- +- movq (%rcx),%r10 +- movq 5(%rcx),%r11 +- shrq $24,%r11 +- movq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- .byte 0xf3,0xc3 +-L$hash_ad_loop: +- +- cmpq $16,%r8 +- jb L$hash_ad_tail +- addq 0+0(%rcx),%r10 +- adcq 8+0(%rcx),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rcx),%rcx +- subq $16,%r8 +- jmp L$hash_ad_loop +-L$hash_ad_tail: +- cmpq $0,%r8 +- je L$hash_ad_done +- +- xorq %r13,%r13 +- xorq %r14,%r14 +- xorq %r15,%r15 +- addq %r8,%rcx +-L$hash_ad_tail_loop: +- shldq $8,%r13,%r14 +- shlq $8,%r13 +- movzbq -1(%rcx),%r15 +- xorq %r15,%r13 +- decq %rcx +- decq %r8 +- jne L$hash_ad_tail_loop +- +- addq %r13,%r10 +- adcq %r14,%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- +-L$hash_ad_done: +- .byte 0xf3,0xc3 +- +- +- +-.globl _chacha20_poly1305_open +-.private_extern _chacha20_poly1305_open +- +-.p2align 6 +-_chacha20_poly1305_open: +- +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- +- +- pushq %r9 +- +- subq $288 + 0 + 32,%rsp +- +- +- leaq 32(%rsp),%rbp +- andq $-32,%rbp +- +- movq %rdx,%rbx +- movq %r8,0+0+32(%rbp) +- movq %rbx,8+0+32(%rbp) +- +- movl _OPENSSL_ia32cap_P+8(%rip),%eax +- andl $288,%eax +- xorl $288,%eax +- jz chacha20_poly1305_open_avx2 +- +- cmpq $128,%rbx +- jbe L$open_sse_128 +- +- movdqa L$chacha20_consts(%rip),%xmm0 +- movdqu 0(%r9),%xmm4 +- movdqu 16(%r9),%xmm8 +- movdqu 32(%r9),%xmm12 +- +- movdqa %xmm12,%xmm7 +- +- movdqa %xmm4,0+48(%rbp) +- movdqa %xmm8,0+64(%rbp) +- movdqa %xmm12,0+96(%rbp) +- movq $10,%r10 +-L$open_sse_init_rounds: +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- +- decq %r10 +- jne L$open_sse_init_rounds +- +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd 0+48(%rbp),%xmm4 +- +- pand L$clamp(%rip),%xmm0 +- movdqa %xmm0,0+0(%rbp) +- movdqa %xmm4,0+16(%rbp) +- +- movq %r8,%r8 +- call poly_hash_ad_internal +-L$open_sse_main_loop: +- cmpq $256,%rbx +- jb L$open_sse_tail +- +- movdqa L$chacha20_consts(%rip),%xmm0 +- movdqa 0+48(%rbp),%xmm4 +- movdqa 0+64(%rbp),%xmm8 +- movdqa %xmm0,%xmm1 +- movdqa %xmm4,%xmm5 +- movdqa %xmm8,%xmm9 +- movdqa %xmm0,%xmm2 +- movdqa %xmm4,%xmm6 +- movdqa %xmm8,%xmm10 +- movdqa %xmm0,%xmm3 +- movdqa %xmm4,%xmm7 +- movdqa %xmm8,%xmm11 +- movdqa 0+96(%rbp),%xmm15 +- paddd L$sse_inc(%rip),%xmm15 +- movdqa %xmm15,%xmm14 +- paddd L$sse_inc(%rip),%xmm14 +- movdqa %xmm14,%xmm13 +- paddd L$sse_inc(%rip),%xmm13 +- movdqa %xmm13,%xmm12 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,0+96(%rbp) +- movdqa %xmm13,0+112(%rbp) +- movdqa %xmm14,0+128(%rbp) +- movdqa %xmm15,0+144(%rbp) +- +- +- +- movq $4,%rcx +- movq %rsi,%r8 +-L$open_sse_main_loop_rounds: +- movdqa %xmm8,0+80(%rbp) +- movdqa L$rol16(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- addq 0+0(%r8),%r10 +- adcq 8+0(%r8),%r11 +- adcq $1,%r12 +- +- leaq 16(%r8),%r8 +- pxor %xmm10,%xmm6 +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm4 +- pxor %xmm8,%xmm4 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movdqa L$rol8(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- pxor %xmm10,%xmm6 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm4 +- pxor %xmm8,%xmm4 +- movdqa 0+80(%rbp),%xmm8 +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +-.byte 102,15,58,15,255,4 +-.byte 102,69,15,58,15,219,8 +-.byte 102,69,15,58,15,255,12 +-.byte 102,15,58,15,246,4 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,12 +-.byte 102,15,58,15,237,4 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,12 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- movdqa %xmm8,0+80(%rbp) +- movdqa L$rol16(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- pxor %xmm10,%xmm6 +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm4 +- pxor %xmm8,%xmm4 +- movdqa L$rol8(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- pxor %xmm10,%xmm6 +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm4 +- pxor %xmm8,%xmm4 +- movdqa 0+80(%rbp),%xmm8 +-.byte 102,15,58,15,255,12 +-.byte 102,69,15,58,15,219,8 +-.byte 102,69,15,58,15,255,4 +-.byte 102,15,58,15,246,12 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,4 +-.byte 102,15,58,15,237,12 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- +- decq %rcx +- jge L$open_sse_main_loop_rounds +- addq 0+0(%r8),%r10 +- adcq 8+0(%r8),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%r8),%r8 +- cmpq $-6,%rcx +- jg L$open_sse_main_loop_rounds +- paddd L$chacha20_consts(%rip),%xmm3 +- paddd 0+48(%rbp),%xmm7 +- paddd 0+64(%rbp),%xmm11 +- paddd 0+144(%rbp),%xmm15 +- paddd L$chacha20_consts(%rip),%xmm2 +- paddd 0+48(%rbp),%xmm6 +- paddd 0+64(%rbp),%xmm10 +- paddd 0+128(%rbp),%xmm14 +- paddd L$chacha20_consts(%rip),%xmm1 +- paddd 0+48(%rbp),%xmm5 +- paddd 0+64(%rbp),%xmm9 +- paddd 0+112(%rbp),%xmm13 +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd 0+48(%rbp),%xmm4 +- paddd 0+64(%rbp),%xmm8 +- paddd 0+96(%rbp),%xmm12 +- movdqa %xmm12,0+80(%rbp) +- movdqu 0 + 0(%rsi),%xmm12 +- pxor %xmm3,%xmm12 +- movdqu %xmm12,0 + 0(%rdi) +- movdqu 16 + 0(%rsi),%xmm12 +- pxor %xmm7,%xmm12 +- movdqu %xmm12,16 + 0(%rdi) +- movdqu 32 + 0(%rsi),%xmm12 +- pxor %xmm11,%xmm12 +- movdqu %xmm12,32 + 0(%rdi) +- movdqu 48 + 0(%rsi),%xmm12 +- pxor %xmm15,%xmm12 +- movdqu %xmm12,48 + 0(%rdi) +- movdqu 0 + 64(%rsi),%xmm3 +- movdqu 16 + 64(%rsi),%xmm7 +- movdqu 32 + 64(%rsi),%xmm11 +- movdqu 48 + 64(%rsi),%xmm15 +- pxor %xmm3,%xmm2 +- pxor %xmm7,%xmm6 +- pxor %xmm11,%xmm10 +- pxor %xmm14,%xmm15 +- movdqu %xmm2,0 + 64(%rdi) +- movdqu %xmm6,16 + 64(%rdi) +- movdqu %xmm10,32 + 64(%rdi) +- movdqu %xmm15,48 + 64(%rdi) +- movdqu 0 + 128(%rsi),%xmm3 +- movdqu 16 + 128(%rsi),%xmm7 +- movdqu 32 + 128(%rsi),%xmm11 +- movdqu 48 + 128(%rsi),%xmm15 +- pxor %xmm3,%xmm1 +- pxor %xmm7,%xmm5 +- pxor %xmm11,%xmm9 +- pxor %xmm13,%xmm15 +- movdqu %xmm1,0 + 128(%rdi) +- movdqu %xmm5,16 + 128(%rdi) +- movdqu %xmm9,32 + 128(%rdi) +- movdqu %xmm15,48 + 128(%rdi) +- movdqu 0 + 192(%rsi),%xmm3 +- movdqu 16 + 192(%rsi),%xmm7 +- movdqu 32 + 192(%rsi),%xmm11 +- movdqu 48 + 192(%rsi),%xmm15 +- pxor %xmm3,%xmm0 +- pxor %xmm7,%xmm4 +- pxor %xmm11,%xmm8 +- pxor 0+80(%rbp),%xmm15 +- movdqu %xmm0,0 + 192(%rdi) +- movdqu %xmm4,16 + 192(%rdi) +- movdqu %xmm8,32 + 192(%rdi) +- movdqu %xmm15,48 + 192(%rdi) +- +- leaq 256(%rsi),%rsi +- leaq 256(%rdi),%rdi +- subq $256,%rbx +- jmp L$open_sse_main_loop +-L$open_sse_tail: +- +- testq %rbx,%rbx +- jz L$open_sse_finalize +- cmpq $192,%rbx +- ja L$open_sse_tail_256 +- cmpq $128,%rbx +- ja L$open_sse_tail_192 +- cmpq $64,%rbx +- ja L$open_sse_tail_128 +- movdqa L$chacha20_consts(%rip),%xmm0 +- movdqa 0+48(%rbp),%xmm4 +- movdqa 0+64(%rbp),%xmm8 +- movdqa 0+96(%rbp),%xmm12 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,0+96(%rbp) +- +- xorq %r8,%r8 +- movq %rbx,%rcx +- cmpq $16,%rcx +- jb L$open_sse_tail_64_rounds +-L$open_sse_tail_64_rounds_and_x1hash: +- addq 0+0(%rsi,%r8,1),%r10 +- adcq 8+0(%rsi,%r8,1),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- subq $16,%rcx +-L$open_sse_tail_64_rounds: +- addq $16,%r8 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- +- cmpq $16,%rcx +- jae L$open_sse_tail_64_rounds_and_x1hash +- cmpq $160,%r8 +- jne L$open_sse_tail_64_rounds +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd 0+48(%rbp),%xmm4 +- paddd 0+64(%rbp),%xmm8 +- paddd 0+96(%rbp),%xmm12 +- +- jmp L$open_sse_tail_64_dec_loop +- +-L$open_sse_tail_128: +- movdqa L$chacha20_consts(%rip),%xmm0 +- movdqa 0+48(%rbp),%xmm4 +- movdqa 0+64(%rbp),%xmm8 +- movdqa %xmm0,%xmm1 +- movdqa %xmm4,%xmm5 +- movdqa %xmm8,%xmm9 +- movdqa 0+96(%rbp),%xmm13 +- paddd L$sse_inc(%rip),%xmm13 +- movdqa %xmm13,%xmm12 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,0+96(%rbp) +- movdqa %xmm13,0+112(%rbp) +- +- movq %rbx,%rcx +- andq $-16,%rcx +- xorq %r8,%r8 +-L$open_sse_tail_128_rounds_and_x1hash: +- addq 0+0(%rsi,%r8,1),%r10 +- adcq 8+0(%rsi,%r8,1),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +-L$open_sse_tail_128_rounds: +- addq $16,%r8 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,4 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,12 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,12 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,4 +- +- cmpq %rcx,%r8 +- jb L$open_sse_tail_128_rounds_and_x1hash +- cmpq $160,%r8 +- jne L$open_sse_tail_128_rounds +- paddd L$chacha20_consts(%rip),%xmm1 +- paddd 0+48(%rbp),%xmm5 +- paddd 0+64(%rbp),%xmm9 +- paddd 0+112(%rbp),%xmm13 +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd 0+48(%rbp),%xmm4 +- paddd 0+64(%rbp),%xmm8 +- paddd 0+96(%rbp),%xmm12 +- movdqu 0 + 0(%rsi),%xmm3 +- movdqu 16 + 0(%rsi),%xmm7 +- movdqu 32 + 0(%rsi),%xmm11 +- movdqu 48 + 0(%rsi),%xmm15 +- pxor %xmm3,%xmm1 +- pxor %xmm7,%xmm5 +- pxor %xmm11,%xmm9 +- pxor %xmm13,%xmm15 +- movdqu %xmm1,0 + 0(%rdi) +- movdqu %xmm5,16 + 0(%rdi) +- movdqu %xmm9,32 + 0(%rdi) +- movdqu %xmm15,48 + 0(%rdi) +- +- subq $64,%rbx +- leaq 64(%rsi),%rsi +- leaq 64(%rdi),%rdi +- jmp L$open_sse_tail_64_dec_loop +- +-L$open_sse_tail_192: +- movdqa L$chacha20_consts(%rip),%xmm0 +- movdqa 0+48(%rbp),%xmm4 +- movdqa 0+64(%rbp),%xmm8 +- movdqa %xmm0,%xmm1 +- movdqa %xmm4,%xmm5 +- movdqa %xmm8,%xmm9 +- movdqa %xmm0,%xmm2 +- movdqa %xmm4,%xmm6 +- movdqa %xmm8,%xmm10 +- movdqa 0+96(%rbp),%xmm14 +- paddd L$sse_inc(%rip),%xmm14 +- movdqa %xmm14,%xmm13 +- paddd L$sse_inc(%rip),%xmm13 +- movdqa %xmm13,%xmm12 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,0+96(%rbp) +- movdqa %xmm13,0+112(%rbp) +- movdqa %xmm14,0+128(%rbp) +- +- movq %rbx,%rcx +- movq $160,%r8 +- cmpq $160,%rcx +- cmovgq %r8,%rcx +- andq $-16,%rcx +- xorq %r8,%r8 +-L$open_sse_tail_192_rounds_and_x1hash: +- addq 0+0(%rsi,%r8,1),%r10 +- adcq 8+0(%rsi,%r8,1),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +-L$open_sse_tail_192_rounds: +- addq $16,%r8 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,4 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,12 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol16(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm6 +- pxor %xmm3,%xmm6 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol8(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm6 +- pxor %xmm3,%xmm6 +-.byte 102,15,58,15,246,4 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,12 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,12 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,4 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol16(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm6 +- pxor %xmm3,%xmm6 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol8(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm6 +- pxor %xmm3,%xmm6 +-.byte 102,15,58,15,246,12 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,4 +- +- cmpq %rcx,%r8 +- jb L$open_sse_tail_192_rounds_and_x1hash +- cmpq $160,%r8 +- jne L$open_sse_tail_192_rounds +- cmpq $176,%rbx +- jb L$open_sse_tail_192_finish +- addq 0+160(%rsi),%r10 +- adcq 8+160(%rsi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- cmpq $192,%rbx +- jb L$open_sse_tail_192_finish +- addq 0+176(%rsi),%r10 +- adcq 8+176(%rsi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +-L$open_sse_tail_192_finish: +- paddd L$chacha20_consts(%rip),%xmm2 +- paddd 0+48(%rbp),%xmm6 +- paddd 0+64(%rbp),%xmm10 +- paddd 0+128(%rbp),%xmm14 +- paddd L$chacha20_consts(%rip),%xmm1 +- paddd 0+48(%rbp),%xmm5 +- paddd 0+64(%rbp),%xmm9 +- paddd 0+112(%rbp),%xmm13 +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd 0+48(%rbp),%xmm4 +- paddd 0+64(%rbp),%xmm8 +- paddd 0+96(%rbp),%xmm12 +- movdqu 0 + 0(%rsi),%xmm3 +- movdqu 16 + 0(%rsi),%xmm7 +- movdqu 32 + 0(%rsi),%xmm11 +- movdqu 48 + 0(%rsi),%xmm15 +- pxor %xmm3,%xmm2 +- pxor %xmm7,%xmm6 +- pxor %xmm11,%xmm10 +- pxor %xmm14,%xmm15 +- movdqu %xmm2,0 + 0(%rdi) +- movdqu %xmm6,16 + 0(%rdi) +- movdqu %xmm10,32 + 0(%rdi) +- movdqu %xmm15,48 + 0(%rdi) +- movdqu 0 + 64(%rsi),%xmm3 +- movdqu 16 + 64(%rsi),%xmm7 +- movdqu 32 + 64(%rsi),%xmm11 +- movdqu 48 + 64(%rsi),%xmm15 +- pxor %xmm3,%xmm1 +- pxor %xmm7,%xmm5 +- pxor %xmm11,%xmm9 +- pxor %xmm13,%xmm15 +- movdqu %xmm1,0 + 64(%rdi) +- movdqu %xmm5,16 + 64(%rdi) +- movdqu %xmm9,32 + 64(%rdi) +- movdqu %xmm15,48 + 64(%rdi) +- +- subq $128,%rbx +- leaq 128(%rsi),%rsi +- leaq 128(%rdi),%rdi +- jmp L$open_sse_tail_64_dec_loop +- +-L$open_sse_tail_256: +- movdqa L$chacha20_consts(%rip),%xmm0 +- movdqa 0+48(%rbp),%xmm4 +- movdqa 0+64(%rbp),%xmm8 +- movdqa %xmm0,%xmm1 +- movdqa %xmm4,%xmm5 +- movdqa %xmm8,%xmm9 +- movdqa %xmm0,%xmm2 +- movdqa %xmm4,%xmm6 +- movdqa %xmm8,%xmm10 +- movdqa %xmm0,%xmm3 +- movdqa %xmm4,%xmm7 +- movdqa %xmm8,%xmm11 +- movdqa 0+96(%rbp),%xmm15 +- paddd L$sse_inc(%rip),%xmm15 +- movdqa %xmm15,%xmm14 +- paddd L$sse_inc(%rip),%xmm14 +- movdqa %xmm14,%xmm13 +- paddd L$sse_inc(%rip),%xmm13 +- movdqa %xmm13,%xmm12 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,0+96(%rbp) +- movdqa %xmm13,0+112(%rbp) +- movdqa %xmm14,0+128(%rbp) +- movdqa %xmm15,0+144(%rbp) +- +- xorq %r8,%r8 +-L$open_sse_tail_256_rounds_and_x1hash: +- addq 0+0(%rsi,%r8,1),%r10 +- adcq 8+0(%rsi,%r8,1),%r11 +- adcq $1,%r12 +- movdqa %xmm11,0+80(%rbp) +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm11 +- pslld $12,%xmm11 +- psrld $20,%xmm4 +- pxor %xmm11,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm11 +- pslld $7,%xmm11 +- psrld $25,%xmm4 +- pxor %xmm11,%xmm4 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm11 +- pslld $12,%xmm11 +- psrld $20,%xmm5 +- pxor %xmm11,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm11 +- pslld $7,%xmm11 +- psrld $25,%xmm5 +- pxor %xmm11,%xmm5 +-.byte 102,15,58,15,237,4 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,12 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol16(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm11 +- pslld $12,%xmm11 +- psrld $20,%xmm6 +- pxor %xmm11,%xmm6 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol8(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm11 +- pslld $7,%xmm11 +- psrld $25,%xmm6 +- pxor %xmm11,%xmm6 +-.byte 102,15,58,15,246,4 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,12 +- movdqa 0+80(%rbp),%xmm11 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movdqa %xmm9,0+80(%rbp) +- paddd %xmm7,%xmm3 +- pxor %xmm3,%xmm15 +- pshufb L$rol16(%rip),%xmm15 +- paddd %xmm15,%xmm11 +- pxor %xmm11,%xmm7 +- movdqa %xmm7,%xmm9 +- pslld $12,%xmm9 +- psrld $20,%xmm7 +- pxor %xmm9,%xmm7 +- paddd %xmm7,%xmm3 +- pxor %xmm3,%xmm15 +- pshufb L$rol8(%rip),%xmm15 +- paddd %xmm15,%xmm11 +- pxor %xmm11,%xmm7 +- movdqa %xmm7,%xmm9 +- pslld $7,%xmm9 +- psrld $25,%xmm7 +- pxor %xmm9,%xmm7 +-.byte 102,15,58,15,255,4 +-.byte 102,69,15,58,15,219,8 +-.byte 102,69,15,58,15,255,12 +- movdqa 0+80(%rbp),%xmm9 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- movdqa %xmm11,0+80(%rbp) +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm11 +- pslld $12,%xmm11 +- psrld $20,%xmm4 +- pxor %xmm11,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm11 +- pslld $7,%xmm11 +- psrld $25,%xmm4 +- pxor %xmm11,%xmm4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm11 +- pslld $12,%xmm11 +- psrld $20,%xmm5 +- pxor %xmm11,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm11 +- pslld $7,%xmm11 +- psrld $25,%xmm5 +- pxor %xmm11,%xmm5 +-.byte 102,15,58,15,237,12 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,4 +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol16(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm11 +- pslld $12,%xmm11 +- psrld $20,%xmm6 +- pxor %xmm11,%xmm6 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol8(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm11 +- pslld $7,%xmm11 +- psrld $25,%xmm6 +- pxor %xmm11,%xmm6 +-.byte 102,15,58,15,246,12 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,4 +- movdqa 0+80(%rbp),%xmm11 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- movdqa %xmm9,0+80(%rbp) +- paddd %xmm7,%xmm3 +- pxor %xmm3,%xmm15 +- pshufb L$rol16(%rip),%xmm15 +- paddd %xmm15,%xmm11 +- pxor %xmm11,%xmm7 +- movdqa %xmm7,%xmm9 +- pslld $12,%xmm9 +- psrld $20,%xmm7 +- pxor %xmm9,%xmm7 +- paddd %xmm7,%xmm3 +- pxor %xmm3,%xmm15 +- pshufb L$rol8(%rip),%xmm15 +- paddd %xmm15,%xmm11 +- pxor %xmm11,%xmm7 +- movdqa %xmm7,%xmm9 +- pslld $7,%xmm9 +- psrld $25,%xmm7 +- pxor %xmm9,%xmm7 +-.byte 102,15,58,15,255,12 +-.byte 102,69,15,58,15,219,8 +-.byte 102,69,15,58,15,255,4 +- movdqa 0+80(%rbp),%xmm9 +- +- addq $16,%r8 +- cmpq $160,%r8 +- jb L$open_sse_tail_256_rounds_and_x1hash +- +- movq %rbx,%rcx +- andq $-16,%rcx +-L$open_sse_tail_256_hash: +- addq 0+0(%rsi,%r8,1),%r10 +- adcq 8+0(%rsi,%r8,1),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- addq $16,%r8 +- cmpq %rcx,%r8 +- jb L$open_sse_tail_256_hash +- paddd L$chacha20_consts(%rip),%xmm3 +- paddd 0+48(%rbp),%xmm7 +- paddd 0+64(%rbp),%xmm11 +- paddd 0+144(%rbp),%xmm15 +- paddd L$chacha20_consts(%rip),%xmm2 +- paddd 0+48(%rbp),%xmm6 +- paddd 0+64(%rbp),%xmm10 +- paddd 0+128(%rbp),%xmm14 +- paddd L$chacha20_consts(%rip),%xmm1 +- paddd 0+48(%rbp),%xmm5 +- paddd 0+64(%rbp),%xmm9 +- paddd 0+112(%rbp),%xmm13 +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd 0+48(%rbp),%xmm4 +- paddd 0+64(%rbp),%xmm8 +- paddd 0+96(%rbp),%xmm12 +- movdqa %xmm12,0+80(%rbp) +- movdqu 0 + 0(%rsi),%xmm12 +- pxor %xmm3,%xmm12 +- movdqu %xmm12,0 + 0(%rdi) +- movdqu 16 + 0(%rsi),%xmm12 +- pxor %xmm7,%xmm12 +- movdqu %xmm12,16 + 0(%rdi) +- movdqu 32 + 0(%rsi),%xmm12 +- pxor %xmm11,%xmm12 +- movdqu %xmm12,32 + 0(%rdi) +- movdqu 48 + 0(%rsi),%xmm12 +- pxor %xmm15,%xmm12 +- movdqu %xmm12,48 + 0(%rdi) +- movdqu 0 + 64(%rsi),%xmm3 +- movdqu 16 + 64(%rsi),%xmm7 +- movdqu 32 + 64(%rsi),%xmm11 +- movdqu 48 + 64(%rsi),%xmm15 +- pxor %xmm3,%xmm2 +- pxor %xmm7,%xmm6 +- pxor %xmm11,%xmm10 +- pxor %xmm14,%xmm15 +- movdqu %xmm2,0 + 64(%rdi) +- movdqu %xmm6,16 + 64(%rdi) +- movdqu %xmm10,32 + 64(%rdi) +- movdqu %xmm15,48 + 64(%rdi) +- movdqu 0 + 128(%rsi),%xmm3 +- movdqu 16 + 128(%rsi),%xmm7 +- movdqu 32 + 128(%rsi),%xmm11 +- movdqu 48 + 128(%rsi),%xmm15 +- pxor %xmm3,%xmm1 +- pxor %xmm7,%xmm5 +- pxor %xmm11,%xmm9 +- pxor %xmm13,%xmm15 +- movdqu %xmm1,0 + 128(%rdi) +- movdqu %xmm5,16 + 128(%rdi) +- movdqu %xmm9,32 + 128(%rdi) +- movdqu %xmm15,48 + 128(%rdi) +- +- movdqa 0+80(%rbp),%xmm12 +- subq $192,%rbx +- leaq 192(%rsi),%rsi +- leaq 192(%rdi),%rdi +- +- +-L$open_sse_tail_64_dec_loop: +- cmpq $16,%rbx +- jb L$open_sse_tail_16_init +- subq $16,%rbx +- movdqu (%rsi),%xmm3 +- pxor %xmm3,%xmm0 +- movdqu %xmm0,(%rdi) +- leaq 16(%rsi),%rsi +- leaq 16(%rdi),%rdi +- movdqa %xmm4,%xmm0 +- movdqa %xmm8,%xmm4 +- movdqa %xmm12,%xmm8 +- jmp L$open_sse_tail_64_dec_loop +-L$open_sse_tail_16_init: +- movdqa %xmm0,%xmm1 +- +- +-L$open_sse_tail_16: +- testq %rbx,%rbx +- jz L$open_sse_finalize +- +- +- +- pxor %xmm3,%xmm3 +- leaq -1(%rsi,%rbx,1),%rsi +- movq %rbx,%r8 +-L$open_sse_tail_16_compose: +- pslldq $1,%xmm3 +- pinsrb $0,(%rsi),%xmm3 +- subq $1,%rsi +- subq $1,%r8 +- jnz L$open_sse_tail_16_compose +- +-.byte 102,73,15,126,221 +- pextrq $1,%xmm3,%r14 +- +- pxor %xmm1,%xmm3 +- +- +-L$open_sse_tail_16_extract: +- pextrb $0,%xmm3,(%rdi) +- psrldq $1,%xmm3 +- addq $1,%rdi +- subq $1,%rbx +- jne L$open_sse_tail_16_extract +- +- addq %r13,%r10 +- adcq %r14,%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- +-L$open_sse_finalize: +- addq 0+0+32(%rbp),%r10 +- adcq 8+0+32(%rbp),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- +- movq %r10,%r13 +- movq %r11,%r14 +- movq %r12,%r15 +- subq $-5,%r10 +- sbbq $-1,%r11 +- sbbq $3,%r12 +- cmovcq %r13,%r10 +- cmovcq %r14,%r11 +- cmovcq %r15,%r12 +- +- addq 0+0+16(%rbp),%r10 +- adcq 8+0+16(%rbp),%r11 +- +- +- addq $288 + 0 + 32,%rsp +- +- +- popq %r9 +- +- movq %r10,(%r9) +- movq %r11,8(%r9) +- popq %r15 +- +- popq %r14 +- +- popq %r13 +- +- popq %r12 +- +- popq %rbx +- +- popq %rbp +- +- .byte 0xf3,0xc3 +- +-L$open_sse_128: +- +- movdqu L$chacha20_consts(%rip),%xmm0 +- movdqa %xmm0,%xmm1 +- movdqa %xmm0,%xmm2 +- movdqu 0(%r9),%xmm4 +- movdqa %xmm4,%xmm5 +- movdqa %xmm4,%xmm6 +- movdqu 16(%r9),%xmm8 +- movdqa %xmm8,%xmm9 +- movdqa %xmm8,%xmm10 +- movdqu 32(%r9),%xmm12 +- movdqa %xmm12,%xmm13 +- paddd L$sse_inc(%rip),%xmm13 +- movdqa %xmm13,%xmm14 +- paddd L$sse_inc(%rip),%xmm14 +- movdqa %xmm4,%xmm7 +- movdqa %xmm8,%xmm11 +- movdqa %xmm13,%xmm15 +- movq $10,%r10 +- +-L$open_sse_128_rounds: +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,4 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,12 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol16(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm6 +- pxor %xmm3,%xmm6 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol8(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm6 +- pxor %xmm3,%xmm6 +-.byte 102,15,58,15,246,4 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,12 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,12 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,4 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol16(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm6 +- pxor %xmm3,%xmm6 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol8(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm6 +- pxor %xmm3,%xmm6 +-.byte 102,15,58,15,246,12 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,4 +- +- decq %r10 +- jnz L$open_sse_128_rounds +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd L$chacha20_consts(%rip),%xmm1 +- paddd L$chacha20_consts(%rip),%xmm2 +- paddd %xmm7,%xmm4 +- paddd %xmm7,%xmm5 +- paddd %xmm7,%xmm6 +- paddd %xmm11,%xmm9 +- paddd %xmm11,%xmm10 +- paddd %xmm15,%xmm13 +- paddd L$sse_inc(%rip),%xmm15 +- paddd %xmm15,%xmm14 +- +- pand L$clamp(%rip),%xmm0 +- movdqa %xmm0,0+0(%rbp) +- movdqa %xmm4,0+16(%rbp) +- +- movq %r8,%r8 +- call poly_hash_ad_internal +-L$open_sse_128_xor_hash: +- cmpq $16,%rbx +- jb L$open_sse_tail_16 +- subq $16,%rbx +- addq 0+0(%rsi),%r10 +- adcq 8+0(%rsi),%r11 +- adcq $1,%r12 +- +- +- movdqu 0(%rsi),%xmm3 +- pxor %xmm3,%xmm1 +- movdqu %xmm1,0(%rdi) +- leaq 16(%rsi),%rsi +- leaq 16(%rdi),%rdi +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- +- movdqa %xmm5,%xmm1 +- movdqa %xmm9,%xmm5 +- movdqa %xmm13,%xmm9 +- movdqa %xmm2,%xmm13 +- movdqa %xmm6,%xmm2 +- movdqa %xmm10,%xmm6 +- movdqa %xmm14,%xmm10 +- jmp L$open_sse_128_xor_hash +- +- +- +- +- +- +- +- +- +-.globl _chacha20_poly1305_seal +-.private_extern _chacha20_poly1305_seal +- +-.p2align 6 +-_chacha20_poly1305_seal: +- +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- +- +- pushq %r9 +- +- subq $288 + 0 + 32,%rsp +- +- leaq 32(%rsp),%rbp +- andq $-32,%rbp +- +- movq 56(%r9),%rbx +- addq %rdx,%rbx +- movq %r8,0+0+32(%rbp) +- movq %rbx,8+0+32(%rbp) +- movq %rdx,%rbx +- +- movl _OPENSSL_ia32cap_P+8(%rip),%eax +- andl $288,%eax +- xorl $288,%eax +- jz chacha20_poly1305_seal_avx2 +- +- cmpq $128,%rbx +- jbe L$seal_sse_128 +- +- movdqa L$chacha20_consts(%rip),%xmm0 +- movdqu 0(%r9),%xmm4 +- movdqu 16(%r9),%xmm8 +- movdqu 32(%r9),%xmm12 +- +- movdqa %xmm0,%xmm1 +- movdqa %xmm0,%xmm2 +- movdqa %xmm0,%xmm3 +- movdqa %xmm4,%xmm5 +- movdqa %xmm4,%xmm6 +- movdqa %xmm4,%xmm7 +- movdqa %xmm8,%xmm9 +- movdqa %xmm8,%xmm10 +- movdqa %xmm8,%xmm11 +- movdqa %xmm12,%xmm15 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,%xmm14 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,%xmm13 +- paddd L$sse_inc(%rip),%xmm12 +- +- movdqa %xmm4,0+48(%rbp) +- movdqa %xmm8,0+64(%rbp) +- movdqa %xmm12,0+96(%rbp) +- movdqa %xmm13,0+112(%rbp) +- movdqa %xmm14,0+128(%rbp) +- movdqa %xmm15,0+144(%rbp) +- movq $10,%r10 +-L$seal_sse_init_rounds: +- movdqa %xmm8,0+80(%rbp) +- movdqa L$rol16(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- pxor %xmm10,%xmm6 +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm4 +- pxor %xmm8,%xmm4 +- movdqa L$rol8(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- pxor %xmm10,%xmm6 +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm4 +- pxor %xmm8,%xmm4 +- movdqa 0+80(%rbp),%xmm8 +-.byte 102,15,58,15,255,4 +-.byte 102,69,15,58,15,219,8 +-.byte 102,69,15,58,15,255,12 +-.byte 102,15,58,15,246,4 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,12 +-.byte 102,15,58,15,237,4 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,12 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- movdqa %xmm8,0+80(%rbp) +- movdqa L$rol16(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- pxor %xmm10,%xmm6 +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm4 +- pxor %xmm8,%xmm4 +- movdqa L$rol8(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- pxor %xmm10,%xmm6 +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm4 +- pxor %xmm8,%xmm4 +- movdqa 0+80(%rbp),%xmm8 +-.byte 102,15,58,15,255,12 +-.byte 102,69,15,58,15,219,8 +-.byte 102,69,15,58,15,255,4 +-.byte 102,15,58,15,246,12 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,4 +-.byte 102,15,58,15,237,12 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- +- decq %r10 +- jnz L$seal_sse_init_rounds +- paddd L$chacha20_consts(%rip),%xmm3 +- paddd 0+48(%rbp),%xmm7 +- paddd 0+64(%rbp),%xmm11 +- paddd 0+144(%rbp),%xmm15 +- paddd L$chacha20_consts(%rip),%xmm2 +- paddd 0+48(%rbp),%xmm6 +- paddd 0+64(%rbp),%xmm10 +- paddd 0+128(%rbp),%xmm14 +- paddd L$chacha20_consts(%rip),%xmm1 +- paddd 0+48(%rbp),%xmm5 +- paddd 0+64(%rbp),%xmm9 +- paddd 0+112(%rbp),%xmm13 +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd 0+48(%rbp),%xmm4 +- paddd 0+64(%rbp),%xmm8 +- paddd 0+96(%rbp),%xmm12 +- +- +- pand L$clamp(%rip),%xmm3 +- movdqa %xmm3,0+0(%rbp) +- movdqa %xmm7,0+16(%rbp) +- +- movq %r8,%r8 +- call poly_hash_ad_internal +- movdqu 0 + 0(%rsi),%xmm3 +- movdqu 16 + 0(%rsi),%xmm7 +- movdqu 32 + 0(%rsi),%xmm11 +- movdqu 48 + 0(%rsi),%xmm15 +- pxor %xmm3,%xmm2 +- pxor %xmm7,%xmm6 +- pxor %xmm11,%xmm10 +- pxor %xmm14,%xmm15 +- movdqu %xmm2,0 + 0(%rdi) +- movdqu %xmm6,16 + 0(%rdi) +- movdqu %xmm10,32 + 0(%rdi) +- movdqu %xmm15,48 + 0(%rdi) +- movdqu 0 + 64(%rsi),%xmm3 +- movdqu 16 + 64(%rsi),%xmm7 +- movdqu 32 + 64(%rsi),%xmm11 +- movdqu 48 + 64(%rsi),%xmm15 +- pxor %xmm3,%xmm1 +- pxor %xmm7,%xmm5 +- pxor %xmm11,%xmm9 +- pxor %xmm13,%xmm15 +- movdqu %xmm1,0 + 64(%rdi) +- movdqu %xmm5,16 + 64(%rdi) +- movdqu %xmm9,32 + 64(%rdi) +- movdqu %xmm15,48 + 64(%rdi) +- +- cmpq $192,%rbx +- ja L$seal_sse_main_init +- movq $128,%rcx +- subq $128,%rbx +- leaq 128(%rsi),%rsi +- jmp L$seal_sse_128_tail_hash +-L$seal_sse_main_init: +- movdqu 0 + 128(%rsi),%xmm3 +- movdqu 16 + 128(%rsi),%xmm7 +- movdqu 32 + 128(%rsi),%xmm11 +- movdqu 48 + 128(%rsi),%xmm15 +- pxor %xmm3,%xmm0 +- pxor %xmm7,%xmm4 +- pxor %xmm11,%xmm8 +- pxor %xmm12,%xmm15 +- movdqu %xmm0,0 + 128(%rdi) +- movdqu %xmm4,16 + 128(%rdi) +- movdqu %xmm8,32 + 128(%rdi) +- movdqu %xmm15,48 + 128(%rdi) +- +- movq $192,%rcx +- subq $192,%rbx +- leaq 192(%rsi),%rsi +- movq $2,%rcx +- movq $8,%r8 +- cmpq $64,%rbx +- jbe L$seal_sse_tail_64 +- cmpq $128,%rbx +- jbe L$seal_sse_tail_128 +- cmpq $192,%rbx +- jbe L$seal_sse_tail_192 +- +-L$seal_sse_main_loop: +- movdqa L$chacha20_consts(%rip),%xmm0 +- movdqa 0+48(%rbp),%xmm4 +- movdqa 0+64(%rbp),%xmm8 +- movdqa %xmm0,%xmm1 +- movdqa %xmm4,%xmm5 +- movdqa %xmm8,%xmm9 +- movdqa %xmm0,%xmm2 +- movdqa %xmm4,%xmm6 +- movdqa %xmm8,%xmm10 +- movdqa %xmm0,%xmm3 +- movdqa %xmm4,%xmm7 +- movdqa %xmm8,%xmm11 +- movdqa 0+96(%rbp),%xmm15 +- paddd L$sse_inc(%rip),%xmm15 +- movdqa %xmm15,%xmm14 +- paddd L$sse_inc(%rip),%xmm14 +- movdqa %xmm14,%xmm13 +- paddd L$sse_inc(%rip),%xmm13 +- movdqa %xmm13,%xmm12 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,0+96(%rbp) +- movdqa %xmm13,0+112(%rbp) +- movdqa %xmm14,0+128(%rbp) +- movdqa %xmm15,0+144(%rbp) +- +-.p2align 5 +-L$seal_sse_main_rounds: +- movdqa %xmm8,0+80(%rbp) +- movdqa L$rol16(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- pxor %xmm10,%xmm6 +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm4 +- pxor %xmm8,%xmm4 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movdqa L$rol8(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- pxor %xmm10,%xmm6 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm4 +- pxor %xmm8,%xmm4 +- movdqa 0+80(%rbp),%xmm8 +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +-.byte 102,15,58,15,255,4 +-.byte 102,69,15,58,15,219,8 +-.byte 102,69,15,58,15,255,12 +-.byte 102,15,58,15,246,4 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,12 +-.byte 102,15,58,15,237,4 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,12 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- movdqa %xmm8,0+80(%rbp) +- movdqa L$rol16(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- pxor %xmm10,%xmm6 +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $20,%xmm8 +- pslld $32-20,%xmm4 +- pxor %xmm8,%xmm4 +- movdqa L$rol8(%rip),%xmm8 +- paddd %xmm7,%xmm3 +- paddd %xmm6,%xmm2 +- paddd %xmm5,%xmm1 +- paddd %xmm4,%xmm0 +- pxor %xmm3,%xmm15 +- pxor %xmm2,%xmm14 +- pxor %xmm1,%xmm13 +- pxor %xmm0,%xmm12 +-.byte 102,69,15,56,0,248 +-.byte 102,69,15,56,0,240 +-.byte 102,69,15,56,0,232 +-.byte 102,69,15,56,0,224 +- movdqa 0+80(%rbp),%xmm8 +- paddd %xmm15,%xmm11 +- paddd %xmm14,%xmm10 +- paddd %xmm13,%xmm9 +- paddd %xmm12,%xmm8 +- pxor %xmm11,%xmm7 +- pxor %xmm10,%xmm6 +- pxor %xmm9,%xmm5 +- pxor %xmm8,%xmm4 +- movdqa %xmm8,0+80(%rbp) +- movdqa %xmm7,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm7 +- pxor %xmm8,%xmm7 +- movdqa %xmm6,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm6 +- pxor %xmm8,%xmm6 +- movdqa %xmm5,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm5 +- pxor %xmm8,%xmm5 +- movdqa %xmm4,%xmm8 +- psrld $25,%xmm8 +- pslld $32-25,%xmm4 +- pxor %xmm8,%xmm4 +- movdqa 0+80(%rbp),%xmm8 +-.byte 102,15,58,15,255,12 +-.byte 102,69,15,58,15,219,8 +-.byte 102,69,15,58,15,255,4 +-.byte 102,15,58,15,246,12 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,4 +-.byte 102,15,58,15,237,12 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- +- leaq 16(%rdi),%rdi +- decq %r8 +- jge L$seal_sse_main_rounds +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rdi),%rdi +- decq %rcx +- jg L$seal_sse_main_rounds +- paddd L$chacha20_consts(%rip),%xmm3 +- paddd 0+48(%rbp),%xmm7 +- paddd 0+64(%rbp),%xmm11 +- paddd 0+144(%rbp),%xmm15 +- paddd L$chacha20_consts(%rip),%xmm2 +- paddd 0+48(%rbp),%xmm6 +- paddd 0+64(%rbp),%xmm10 +- paddd 0+128(%rbp),%xmm14 +- paddd L$chacha20_consts(%rip),%xmm1 +- paddd 0+48(%rbp),%xmm5 +- paddd 0+64(%rbp),%xmm9 +- paddd 0+112(%rbp),%xmm13 +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd 0+48(%rbp),%xmm4 +- paddd 0+64(%rbp),%xmm8 +- paddd 0+96(%rbp),%xmm12 +- +- movdqa %xmm14,0+80(%rbp) +- movdqa %xmm14,0+80(%rbp) +- movdqu 0 + 0(%rsi),%xmm14 +- pxor %xmm3,%xmm14 +- movdqu %xmm14,0 + 0(%rdi) +- movdqu 16 + 0(%rsi),%xmm14 +- pxor %xmm7,%xmm14 +- movdqu %xmm14,16 + 0(%rdi) +- movdqu 32 + 0(%rsi),%xmm14 +- pxor %xmm11,%xmm14 +- movdqu %xmm14,32 + 0(%rdi) +- movdqu 48 + 0(%rsi),%xmm14 +- pxor %xmm15,%xmm14 +- movdqu %xmm14,48 + 0(%rdi) +- +- movdqa 0+80(%rbp),%xmm14 +- movdqu 0 + 64(%rsi),%xmm3 +- movdqu 16 + 64(%rsi),%xmm7 +- movdqu 32 + 64(%rsi),%xmm11 +- movdqu 48 + 64(%rsi),%xmm15 +- pxor %xmm3,%xmm2 +- pxor %xmm7,%xmm6 +- pxor %xmm11,%xmm10 +- pxor %xmm14,%xmm15 +- movdqu %xmm2,0 + 64(%rdi) +- movdqu %xmm6,16 + 64(%rdi) +- movdqu %xmm10,32 + 64(%rdi) +- movdqu %xmm15,48 + 64(%rdi) +- movdqu 0 + 128(%rsi),%xmm3 +- movdqu 16 + 128(%rsi),%xmm7 +- movdqu 32 + 128(%rsi),%xmm11 +- movdqu 48 + 128(%rsi),%xmm15 +- pxor %xmm3,%xmm1 +- pxor %xmm7,%xmm5 +- pxor %xmm11,%xmm9 +- pxor %xmm13,%xmm15 +- movdqu %xmm1,0 + 128(%rdi) +- movdqu %xmm5,16 + 128(%rdi) +- movdqu %xmm9,32 + 128(%rdi) +- movdqu %xmm15,48 + 128(%rdi) +- +- cmpq $256,%rbx +- ja L$seal_sse_main_loop_xor +- +- movq $192,%rcx +- subq $192,%rbx +- leaq 192(%rsi),%rsi +- jmp L$seal_sse_128_tail_hash +-L$seal_sse_main_loop_xor: +- movdqu 0 + 192(%rsi),%xmm3 +- movdqu 16 + 192(%rsi),%xmm7 +- movdqu 32 + 192(%rsi),%xmm11 +- movdqu 48 + 192(%rsi),%xmm15 +- pxor %xmm3,%xmm0 +- pxor %xmm7,%xmm4 +- pxor %xmm11,%xmm8 +- pxor %xmm12,%xmm15 +- movdqu %xmm0,0 + 192(%rdi) +- movdqu %xmm4,16 + 192(%rdi) +- movdqu %xmm8,32 + 192(%rdi) +- movdqu %xmm15,48 + 192(%rdi) +- +- leaq 256(%rsi),%rsi +- subq $256,%rbx +- movq $6,%rcx +- movq $4,%r8 +- cmpq $192,%rbx +- jg L$seal_sse_main_loop +- movq %rbx,%rcx +- testq %rbx,%rbx +- je L$seal_sse_128_tail_hash +- movq $6,%rcx +- cmpq $128,%rbx +- ja L$seal_sse_tail_192 +- cmpq $64,%rbx +- ja L$seal_sse_tail_128 +- +-L$seal_sse_tail_64: +- movdqa L$chacha20_consts(%rip),%xmm0 +- movdqa 0+48(%rbp),%xmm4 +- movdqa 0+64(%rbp),%xmm8 +- movdqa 0+96(%rbp),%xmm12 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,0+96(%rbp) +- +-L$seal_sse_tail_64_rounds_and_x2hash: +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rdi),%rdi +-L$seal_sse_tail_64_rounds_and_x1hash: +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rdi),%rdi +- decq %rcx +- jg L$seal_sse_tail_64_rounds_and_x2hash +- decq %r8 +- jge L$seal_sse_tail_64_rounds_and_x1hash +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd 0+48(%rbp),%xmm4 +- paddd 0+64(%rbp),%xmm8 +- paddd 0+96(%rbp),%xmm12 +- +- jmp L$seal_sse_128_tail_xor +- +-L$seal_sse_tail_128: +- movdqa L$chacha20_consts(%rip),%xmm0 +- movdqa 0+48(%rbp),%xmm4 +- movdqa 0+64(%rbp),%xmm8 +- movdqa %xmm0,%xmm1 +- movdqa %xmm4,%xmm5 +- movdqa %xmm8,%xmm9 +- movdqa 0+96(%rbp),%xmm13 +- paddd L$sse_inc(%rip),%xmm13 +- movdqa %xmm13,%xmm12 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,0+96(%rbp) +- movdqa %xmm13,0+112(%rbp) +- +-L$seal_sse_tail_128_rounds_and_x2hash: +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rdi),%rdi +-L$seal_sse_tail_128_rounds_and_x1hash: +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,4 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,12 +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,12 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,4 +- +- leaq 16(%rdi),%rdi +- decq %rcx +- jg L$seal_sse_tail_128_rounds_and_x2hash +- decq %r8 +- jge L$seal_sse_tail_128_rounds_and_x1hash +- paddd L$chacha20_consts(%rip),%xmm1 +- paddd 0+48(%rbp),%xmm5 +- paddd 0+64(%rbp),%xmm9 +- paddd 0+112(%rbp),%xmm13 +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd 0+48(%rbp),%xmm4 +- paddd 0+64(%rbp),%xmm8 +- paddd 0+96(%rbp),%xmm12 +- movdqu 0 + 0(%rsi),%xmm3 +- movdqu 16 + 0(%rsi),%xmm7 +- movdqu 32 + 0(%rsi),%xmm11 +- movdqu 48 + 0(%rsi),%xmm15 +- pxor %xmm3,%xmm1 +- pxor %xmm7,%xmm5 +- pxor %xmm11,%xmm9 +- pxor %xmm13,%xmm15 +- movdqu %xmm1,0 + 0(%rdi) +- movdqu %xmm5,16 + 0(%rdi) +- movdqu %xmm9,32 + 0(%rdi) +- movdqu %xmm15,48 + 0(%rdi) +- +- movq $64,%rcx +- subq $64,%rbx +- leaq 64(%rsi),%rsi +- jmp L$seal_sse_128_tail_hash +- +-L$seal_sse_tail_192: +- movdqa L$chacha20_consts(%rip),%xmm0 +- movdqa 0+48(%rbp),%xmm4 +- movdqa 0+64(%rbp),%xmm8 +- movdqa %xmm0,%xmm1 +- movdqa %xmm4,%xmm5 +- movdqa %xmm8,%xmm9 +- movdqa %xmm0,%xmm2 +- movdqa %xmm4,%xmm6 +- movdqa %xmm8,%xmm10 +- movdqa 0+96(%rbp),%xmm14 +- paddd L$sse_inc(%rip),%xmm14 +- movdqa %xmm14,%xmm13 +- paddd L$sse_inc(%rip),%xmm13 +- movdqa %xmm13,%xmm12 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,0+96(%rbp) +- movdqa %xmm13,0+112(%rbp) +- movdqa %xmm14,0+128(%rbp) +- +-L$seal_sse_tail_192_rounds_and_x2hash: +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rdi),%rdi +-L$seal_sse_tail_192_rounds_and_x1hash: +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,4 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,12 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol16(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm6 +- pxor %xmm3,%xmm6 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol8(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm6 +- pxor %xmm3,%xmm6 +-.byte 102,15,58,15,246,4 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,12 +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,12 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,4 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol16(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm6 +- pxor %xmm3,%xmm6 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol8(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm6 +- pxor %xmm3,%xmm6 +-.byte 102,15,58,15,246,12 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,4 +- +- leaq 16(%rdi),%rdi +- decq %rcx +- jg L$seal_sse_tail_192_rounds_and_x2hash +- decq %r8 +- jge L$seal_sse_tail_192_rounds_and_x1hash +- paddd L$chacha20_consts(%rip),%xmm2 +- paddd 0+48(%rbp),%xmm6 +- paddd 0+64(%rbp),%xmm10 +- paddd 0+128(%rbp),%xmm14 +- paddd L$chacha20_consts(%rip),%xmm1 +- paddd 0+48(%rbp),%xmm5 +- paddd 0+64(%rbp),%xmm9 +- paddd 0+112(%rbp),%xmm13 +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd 0+48(%rbp),%xmm4 +- paddd 0+64(%rbp),%xmm8 +- paddd 0+96(%rbp),%xmm12 +- movdqu 0 + 0(%rsi),%xmm3 +- movdqu 16 + 0(%rsi),%xmm7 +- movdqu 32 + 0(%rsi),%xmm11 +- movdqu 48 + 0(%rsi),%xmm15 +- pxor %xmm3,%xmm2 +- pxor %xmm7,%xmm6 +- pxor %xmm11,%xmm10 +- pxor %xmm14,%xmm15 +- movdqu %xmm2,0 + 0(%rdi) +- movdqu %xmm6,16 + 0(%rdi) +- movdqu %xmm10,32 + 0(%rdi) +- movdqu %xmm15,48 + 0(%rdi) +- movdqu 0 + 64(%rsi),%xmm3 +- movdqu 16 + 64(%rsi),%xmm7 +- movdqu 32 + 64(%rsi),%xmm11 +- movdqu 48 + 64(%rsi),%xmm15 +- pxor %xmm3,%xmm1 +- pxor %xmm7,%xmm5 +- pxor %xmm11,%xmm9 +- pxor %xmm13,%xmm15 +- movdqu %xmm1,0 + 64(%rdi) +- movdqu %xmm5,16 + 64(%rdi) +- movdqu %xmm9,32 + 64(%rdi) +- movdqu %xmm15,48 + 64(%rdi) +- +- movq $128,%rcx +- subq $128,%rbx +- leaq 128(%rsi),%rsi +- +-L$seal_sse_128_tail_hash: +- cmpq $16,%rcx +- jb L$seal_sse_128_tail_xor +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- subq $16,%rcx +- leaq 16(%rdi),%rdi +- jmp L$seal_sse_128_tail_hash +- +-L$seal_sse_128_tail_xor: +- cmpq $16,%rbx +- jb L$seal_sse_tail_16 +- subq $16,%rbx +- +- movdqu 0(%rsi),%xmm3 +- pxor %xmm3,%xmm0 +- movdqu %xmm0,0(%rdi) +- +- addq 0(%rdi),%r10 +- adcq 8(%rdi),%r11 +- adcq $1,%r12 +- leaq 16(%rsi),%rsi +- leaq 16(%rdi),%rdi +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm8,%xmm4 +- movdqa %xmm12,%xmm8 +- movdqa %xmm1,%xmm12 +- movdqa %xmm5,%xmm1 +- movdqa %xmm9,%xmm5 +- movdqa %xmm13,%xmm9 +- jmp L$seal_sse_128_tail_xor +- +-L$seal_sse_tail_16: +- testq %rbx,%rbx +- jz L$process_blocks_of_extra_in +- +- movq %rbx,%r8 +- movq %rbx,%rcx +- leaq -1(%rsi,%rbx,1),%rsi +- pxor %xmm15,%xmm15 +-L$seal_sse_tail_16_compose: +- pslldq $1,%xmm15 +- pinsrb $0,(%rsi),%xmm15 +- leaq -1(%rsi),%rsi +- decq %rcx +- jne L$seal_sse_tail_16_compose +- +- +- pxor %xmm0,%xmm15 +- +- +- movq %rbx,%rcx +- movdqu %xmm15,%xmm0 +-L$seal_sse_tail_16_extract: +- pextrb $0,%xmm0,(%rdi) +- psrldq $1,%xmm0 +- addq $1,%rdi +- subq $1,%rcx +- jnz L$seal_sse_tail_16_extract +- +- +- +- +- +- +- +- +- movq 288 + 0 + 32(%rsp),%r9 +- movq 56(%r9),%r14 +- movq 48(%r9),%r13 +- testq %r14,%r14 +- jz L$process_partial_block +- +- movq $16,%r15 +- subq %rbx,%r15 +- cmpq %r15,%r14 +- +- jge L$load_extra_in +- movq %r14,%r15 +- +-L$load_extra_in: +- +- +- leaq -1(%r13,%r15,1),%rsi +- +- +- addq %r15,%r13 +- subq %r15,%r14 +- movq %r13,48(%r9) +- movq %r14,56(%r9) +- +- +- +- addq %r15,%r8 +- +- +- pxor %xmm11,%xmm11 +-L$load_extra_load_loop: +- pslldq $1,%xmm11 +- pinsrb $0,(%rsi),%xmm11 +- leaq -1(%rsi),%rsi +- subq $1,%r15 +- jnz L$load_extra_load_loop +- +- +- +- +- movq %rbx,%r15 +- +-L$load_extra_shift_loop: +- pslldq $1,%xmm11 +- subq $1,%r15 +- jnz L$load_extra_shift_loop +- +- +- +- +- leaq L$and_masks(%rip),%r15 +- shlq $4,%rbx +- pand -16(%r15,%rbx,1),%xmm15 +- +- +- por %xmm11,%xmm15 +- +- +- +-.byte 102,77,15,126,253 +- pextrq $1,%xmm15,%r14 +- addq %r13,%r10 +- adcq %r14,%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- +-L$process_blocks_of_extra_in: +- +- movq 288+32+0 (%rsp),%r9 +- movq 48(%r9),%rsi +- movq 56(%r9),%r8 +- movq %r8,%rcx +- shrq $4,%r8 +- +-L$process_extra_hash_loop: +- jz process_extra_in_trailer +- addq 0+0(%rsi),%r10 +- adcq 8+0(%rsi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rsi),%rsi +- subq $1,%r8 +- jmp L$process_extra_hash_loop +-process_extra_in_trailer: +- andq $15,%rcx +- movq %rcx,%rbx +- jz L$do_length_block +- leaq -1(%rsi,%rcx,1),%rsi +- +-L$process_extra_in_trailer_load: +- pslldq $1,%xmm15 +- pinsrb $0,(%rsi),%xmm15 +- leaq -1(%rsi),%rsi +- subq $1,%rcx +- jnz L$process_extra_in_trailer_load +- +-L$process_partial_block: +- +- leaq L$and_masks(%rip),%r15 +- shlq $4,%rbx +- pand -16(%r15,%rbx,1),%xmm15 +-.byte 102,77,15,126,253 +- pextrq $1,%xmm15,%r14 +- addq %r13,%r10 +- adcq %r14,%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- +-L$do_length_block: +- addq 0+0+32(%rbp),%r10 +- adcq 8+0+32(%rbp),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- +- movq %r10,%r13 +- movq %r11,%r14 +- movq %r12,%r15 +- subq $-5,%r10 +- sbbq $-1,%r11 +- sbbq $3,%r12 +- cmovcq %r13,%r10 +- cmovcq %r14,%r11 +- cmovcq %r15,%r12 +- +- addq 0+0+16(%rbp),%r10 +- adcq 8+0+16(%rbp),%r11 +- +- +- addq $288 + 0 + 32,%rsp +- +- +- popq %r9 +- +- movq %r10,(%r9) +- movq %r11,8(%r9) +- popq %r15 +- +- popq %r14 +- +- popq %r13 +- +- popq %r12 +- +- popq %rbx +- +- popq %rbp +- +- .byte 0xf3,0xc3 +- +-L$seal_sse_128: +- +- movdqu L$chacha20_consts(%rip),%xmm0 +- movdqa %xmm0,%xmm1 +- movdqa %xmm0,%xmm2 +- movdqu 0(%r9),%xmm4 +- movdqa %xmm4,%xmm5 +- movdqa %xmm4,%xmm6 +- movdqu 16(%r9),%xmm8 +- movdqa %xmm8,%xmm9 +- movdqa %xmm8,%xmm10 +- movdqu 32(%r9),%xmm14 +- movdqa %xmm14,%xmm12 +- paddd L$sse_inc(%rip),%xmm12 +- movdqa %xmm12,%xmm13 +- paddd L$sse_inc(%rip),%xmm13 +- movdqa %xmm4,%xmm7 +- movdqa %xmm8,%xmm11 +- movdqa %xmm12,%xmm15 +- movq $10,%r10 +- +-L$seal_sse_128_rounds: +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,4 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,12 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,4 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,12 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol16(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm6 +- pxor %xmm3,%xmm6 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol8(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm6 +- pxor %xmm3,%xmm6 +-.byte 102,15,58,15,246,4 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,12 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol16(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm4 +- pxor %xmm3,%xmm4 +- paddd %xmm4,%xmm0 +- pxor %xmm0,%xmm12 +- pshufb L$rol8(%rip),%xmm12 +- paddd %xmm12,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,15,228,12 +-.byte 102,69,15,58,15,192,8 +-.byte 102,69,15,58,15,228,4 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol16(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm5 +- pxor %xmm3,%xmm5 +- paddd %xmm5,%xmm1 +- pxor %xmm1,%xmm13 +- pshufb L$rol8(%rip),%xmm13 +- paddd %xmm13,%xmm9 +- pxor %xmm9,%xmm5 +- movdqa %xmm5,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm5 +- pxor %xmm3,%xmm5 +-.byte 102,15,58,15,237,12 +-.byte 102,69,15,58,15,201,8 +-.byte 102,69,15,58,15,237,4 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol16(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $12,%xmm3 +- psrld $20,%xmm6 +- pxor %xmm3,%xmm6 +- paddd %xmm6,%xmm2 +- pxor %xmm2,%xmm14 +- pshufb L$rol8(%rip),%xmm14 +- paddd %xmm14,%xmm10 +- pxor %xmm10,%xmm6 +- movdqa %xmm6,%xmm3 +- pslld $7,%xmm3 +- psrld $25,%xmm6 +- pxor %xmm3,%xmm6 +-.byte 102,15,58,15,246,12 +-.byte 102,69,15,58,15,210,8 +-.byte 102,69,15,58,15,246,4 +- +- decq %r10 +- jnz L$seal_sse_128_rounds +- paddd L$chacha20_consts(%rip),%xmm0 +- paddd L$chacha20_consts(%rip),%xmm1 +- paddd L$chacha20_consts(%rip),%xmm2 +- paddd %xmm7,%xmm4 +- paddd %xmm7,%xmm5 +- paddd %xmm7,%xmm6 +- paddd %xmm11,%xmm8 +- paddd %xmm11,%xmm9 +- paddd %xmm15,%xmm12 +- paddd L$sse_inc(%rip),%xmm15 +- paddd %xmm15,%xmm13 +- +- pand L$clamp(%rip),%xmm2 +- movdqa %xmm2,0+0(%rbp) +- movdqa %xmm6,0+16(%rbp) +- +- movq %r8,%r8 +- call poly_hash_ad_internal +- jmp L$seal_sse_128_tail_xor +- +- +- +- +- +-.p2align 6 +-chacha20_poly1305_open_avx2: +- +- +- +- +- +- +- +- +- +- +- +- +- vzeroupper +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vbroadcasti128 0(%r9),%ymm4 +- vbroadcasti128 16(%r9),%ymm8 +- vbroadcasti128 32(%r9),%ymm12 +- vpaddd L$avx2_init(%rip),%ymm12,%ymm12 +- cmpq $192,%rbx +- jbe L$open_avx2_192 +- cmpq $320,%rbx +- jbe L$open_avx2_320 +- +- vmovdqa %ymm4,0+64(%rbp) +- vmovdqa %ymm8,0+96(%rbp) +- vmovdqa %ymm12,0+160(%rbp) +- movq $10,%r10 +-L$open_avx2_init_rounds: +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- +- decq %r10 +- jne L$open_avx2_init_rounds +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 +- +- vpand L$clamp(%rip),%ymm3,%ymm3 +- vmovdqa %ymm3,0+0(%rbp) +- +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 +- +- movq %r8,%r8 +- call poly_hash_ad_internal +- +- xorq %rcx,%rcx +-L$open_avx2_init_hash: +- addq 0+0(%rsi,%rcx,1),%r10 +- adcq 8+0(%rsi,%rcx,1),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- addq $16,%rcx +- cmpq $64,%rcx +- jne L$open_avx2_init_hash +- +- vpxor 0(%rsi),%ymm0,%ymm0 +- vpxor 32(%rsi),%ymm4,%ymm4 +- +- vmovdqu %ymm0,0(%rdi) +- vmovdqu %ymm4,32(%rdi) +- leaq 64(%rsi),%rsi +- leaq 64(%rdi),%rdi +- subq $64,%rbx +-L$open_avx2_main_loop: +- +- cmpq $512,%rbx +- jb L$open_avx2_main_loop_done +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vmovdqa 0+64(%rbp),%ymm4 +- vmovdqa 0+96(%rbp),%ymm8 +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm8,%ymm10 +- vmovdqa %ymm0,%ymm3 +- vmovdqa %ymm4,%ymm7 +- vmovdqa %ymm8,%ymm11 +- vmovdqa L$avx2_inc(%rip),%ymm12 +- vpaddd 0+160(%rbp),%ymm12,%ymm15 +- vpaddd %ymm15,%ymm12,%ymm14 +- vpaddd %ymm14,%ymm12,%ymm13 +- vpaddd %ymm13,%ymm12,%ymm12 +- vmovdqa %ymm15,0+256(%rbp) +- vmovdqa %ymm14,0+224(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- vmovdqa %ymm12,0+160(%rbp) +- +- xorq %rcx,%rcx +-L$open_avx2_main_loop_rounds: +- addq 0+0(%rsi,%rcx,1),%r10 +- adcq 8+0(%rsi,%rcx,1),%r11 +- adcq $1,%r12 +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- addq %rax,%r15 +- adcq %rdx,%r9 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- addq 0+16(%rsi,%rcx,1),%r10 +- adcq 8+16(%rsi,%rcx,1),%r11 +- adcq $1,%r12 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $4,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $12,%ymm15,%ymm15,%ymm15 +- vpalignr $4,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $12,%ymm14,%ymm14,%ymm14 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- addq %rax,%r15 +- adcq %rdx,%r9 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- addq 0+32(%rsi,%rcx,1),%r10 +- adcq 8+32(%rsi,%rcx,1),%r11 +- adcq $1,%r12 +- +- leaq 48(%rcx),%rcx +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- addq %rax,%r15 +- adcq %rdx,%r9 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $12,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $4,%ymm15,%ymm15,%ymm15 +- vpalignr $12,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $4,%ymm14,%ymm14,%ymm14 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- +- cmpq $60*8,%rcx +- jne L$open_avx2_main_loop_rounds +- vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 +- vpaddd 0+64(%rbp),%ymm7,%ymm7 +- vpaddd 0+96(%rbp),%ymm11,%ymm11 +- vpaddd 0+256(%rbp),%ymm15,%ymm15 +- vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 +- vpaddd 0+64(%rbp),%ymm6,%ymm6 +- vpaddd 0+96(%rbp),%ymm10,%ymm10 +- vpaddd 0+224(%rbp),%ymm14,%ymm14 +- vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 +- vpaddd 0+64(%rbp),%ymm5,%ymm5 +- vpaddd 0+96(%rbp),%ymm9,%ymm9 +- vpaddd 0+192(%rbp),%ymm13,%ymm13 +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- +- vmovdqa %ymm0,0+128(%rbp) +- addq 0+60*8(%rsi),%r10 +- adcq 8+60*8(%rsi),%r11 +- adcq $1,%r12 +- vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 +- vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 +- vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 +- vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 +- vpxor 0+0(%rsi),%ymm0,%ymm0 +- vpxor 32+0(%rsi),%ymm3,%ymm3 +- vpxor 64+0(%rsi),%ymm7,%ymm7 +- vpxor 96+0(%rsi),%ymm11,%ymm11 +- vmovdqu %ymm0,0+0(%rdi) +- vmovdqu %ymm3,32+0(%rdi) +- vmovdqu %ymm7,64+0(%rdi) +- vmovdqu %ymm11,96+0(%rdi) +- +- vmovdqa 0+128(%rbp),%ymm0 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 +- vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 +- vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 +- vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 +- vpxor 0+128(%rsi),%ymm3,%ymm3 +- vpxor 32+128(%rsi),%ymm2,%ymm2 +- vpxor 64+128(%rsi),%ymm6,%ymm6 +- vpxor 96+128(%rsi),%ymm10,%ymm10 +- vmovdqu %ymm3,0+128(%rdi) +- vmovdqu %ymm2,32+128(%rdi) +- vmovdqu %ymm6,64+128(%rdi) +- vmovdqu %ymm10,96+128(%rdi) +- addq 0+60*8+16(%rsi),%r10 +- adcq 8+60*8+16(%rsi),%r11 +- adcq $1,%r12 +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 +- vpxor 0+256(%rsi),%ymm3,%ymm3 +- vpxor 32+256(%rsi),%ymm1,%ymm1 +- vpxor 64+256(%rsi),%ymm5,%ymm5 +- vpxor 96+256(%rsi),%ymm9,%ymm9 +- vmovdqu %ymm3,0+256(%rdi) +- vmovdqu %ymm1,32+256(%rdi) +- vmovdqu %ymm5,64+256(%rdi) +- vmovdqu %ymm9,96+256(%rdi) +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 +- vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 +- vpxor 0+384(%rsi),%ymm3,%ymm3 +- vpxor 32+384(%rsi),%ymm0,%ymm0 +- vpxor 64+384(%rsi),%ymm4,%ymm4 +- vpxor 96+384(%rsi),%ymm8,%ymm8 +- vmovdqu %ymm3,0+384(%rdi) +- vmovdqu %ymm0,32+384(%rdi) +- vmovdqu %ymm4,64+384(%rdi) +- vmovdqu %ymm8,96+384(%rdi) +- +- leaq 512(%rsi),%rsi +- leaq 512(%rdi),%rdi +- subq $512,%rbx +- jmp L$open_avx2_main_loop +-L$open_avx2_main_loop_done: +- testq %rbx,%rbx +- vzeroupper +- je L$open_sse_finalize +- +- cmpq $384,%rbx +- ja L$open_avx2_tail_512 +- cmpq $256,%rbx +- ja L$open_avx2_tail_384 +- cmpq $128,%rbx +- ja L$open_avx2_tail_256 +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vmovdqa 0+64(%rbp),%ymm4 +- vmovdqa 0+96(%rbp),%ymm8 +- vmovdqa L$avx2_inc(%rip),%ymm12 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- vmovdqa %ymm12,0+160(%rbp) +- +- xorq %r8,%r8 +- movq %rbx,%rcx +- andq $-16,%rcx +- testq %rcx,%rcx +- je L$open_avx2_tail_128_rounds +-L$open_avx2_tail_128_rounds_and_x1hash: +- addq 0+0(%rsi,%r8,1),%r10 +- adcq 8+0(%rsi,%r8,1),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +-L$open_avx2_tail_128_rounds: +- addq $16,%r8 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- +- cmpq %rcx,%r8 +- jb L$open_avx2_tail_128_rounds_and_x1hash +- cmpq $160,%r8 +- jne L$open_avx2_tail_128_rounds +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 +- vmovdqa %ymm3,%ymm8 +- +- jmp L$open_avx2_tail_128_xor +- +-L$open_avx2_tail_256: +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vmovdqa 0+64(%rbp),%ymm4 +- vmovdqa 0+96(%rbp),%ymm8 +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm8,%ymm9 +- vmovdqa L$avx2_inc(%rip),%ymm12 +- vpaddd 0+160(%rbp),%ymm12,%ymm13 +- vpaddd %ymm13,%ymm12,%ymm12 +- vmovdqa %ymm12,0+160(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- +- movq %rbx,0+128(%rbp) +- movq %rbx,%rcx +- subq $128,%rcx +- shrq $4,%rcx +- movq $10,%r8 +- cmpq $10,%rcx +- cmovgq %r8,%rcx +- movq %rsi,%rbx +- xorq %r8,%r8 +-L$open_avx2_tail_256_rounds_and_x1hash: +- addq 0+0(%rbx),%r10 +- adcq 8+0(%rbx),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rbx),%rbx +-L$open_avx2_tail_256_rounds: +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- +- incq %r8 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol16(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpsrld $20,%ymm6,%ymm3 +- vpslld $12,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol8(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpslld $7,%ymm6,%ymm3 +- vpsrld $25,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpalignr $4,%ymm14,%ymm14,%ymm14 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $12,%ymm6,%ymm6,%ymm6 +- +- cmpq %rcx,%r8 +- jb L$open_avx2_tail_256_rounds_and_x1hash +- cmpq $10,%r8 +- jne L$open_avx2_tail_256_rounds +- movq %rbx,%r8 +- subq %rsi,%rbx +- movq %rbx,%rcx +- movq 0+128(%rbp),%rbx +-L$open_avx2_tail_256_hash: +- addq $16,%rcx +- cmpq %rbx,%rcx +- jg L$open_avx2_tail_256_done +- addq 0+0(%r8),%r10 +- adcq 8+0(%r8),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%r8),%r8 +- jmp L$open_avx2_tail_256_hash +-L$open_avx2_tail_256_done: +- vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 +- vpaddd 0+64(%rbp),%ymm5,%ymm5 +- vpaddd 0+96(%rbp),%ymm9,%ymm9 +- vpaddd 0+192(%rbp),%ymm13,%ymm13 +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 +- vpxor 0+0(%rsi),%ymm3,%ymm3 +- vpxor 32+0(%rsi),%ymm1,%ymm1 +- vpxor 64+0(%rsi),%ymm5,%ymm5 +- vpxor 96+0(%rsi),%ymm9,%ymm9 +- vmovdqu %ymm3,0+0(%rdi) +- vmovdqu %ymm1,32+0(%rdi) +- vmovdqu %ymm5,64+0(%rdi) +- vmovdqu %ymm9,96+0(%rdi) +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 +- vmovdqa %ymm3,%ymm8 +- +- leaq 128(%rsi),%rsi +- leaq 128(%rdi),%rdi +- subq $128,%rbx +- jmp L$open_avx2_tail_128_xor +- +-L$open_avx2_tail_384: +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vmovdqa 0+64(%rbp),%ymm4 +- vmovdqa 0+96(%rbp),%ymm8 +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm8,%ymm10 +- vmovdqa L$avx2_inc(%rip),%ymm12 +- vpaddd 0+160(%rbp),%ymm12,%ymm14 +- vpaddd %ymm14,%ymm12,%ymm13 +- vpaddd %ymm13,%ymm12,%ymm12 +- vmovdqa %ymm12,0+160(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- vmovdqa %ymm14,0+224(%rbp) +- +- movq %rbx,0+128(%rbp) +- movq %rbx,%rcx +- subq $256,%rcx +- shrq $4,%rcx +- addq $6,%rcx +- movq $10,%r8 +- cmpq $10,%rcx +- cmovgq %r8,%rcx +- movq %rsi,%rbx +- xorq %r8,%r8 +-L$open_avx2_tail_384_rounds_and_x2hash: +- addq 0+0(%rbx),%r10 +- adcq 8+0(%rbx),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rbx),%rbx +-L$open_avx2_tail_384_rounds_and_x1hash: +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol16(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpsrld $20,%ymm6,%ymm3 +- vpslld $12,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol8(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpslld $7,%ymm6,%ymm3 +- vpsrld $25,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpalignr $12,%ymm14,%ymm14,%ymm14 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $4,%ymm6,%ymm6,%ymm6 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- addq 0+0(%rbx),%r10 +- adcq 8+0(%rbx),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rbx),%rbx +- incq %r8 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol16(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpsrld $20,%ymm6,%ymm3 +- vpslld $12,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol8(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpslld $7,%ymm6,%ymm3 +- vpsrld $25,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpalignr $4,%ymm14,%ymm14,%ymm14 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $12,%ymm6,%ymm6,%ymm6 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- +- cmpq %rcx,%r8 +- jb L$open_avx2_tail_384_rounds_and_x2hash +- cmpq $10,%r8 +- jne L$open_avx2_tail_384_rounds_and_x1hash +- movq %rbx,%r8 +- subq %rsi,%rbx +- movq %rbx,%rcx +- movq 0+128(%rbp),%rbx +-L$open_avx2_384_tail_hash: +- addq $16,%rcx +- cmpq %rbx,%rcx +- jg L$open_avx2_384_tail_done +- addq 0+0(%r8),%r10 +- adcq 8+0(%r8),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%r8),%r8 +- jmp L$open_avx2_384_tail_hash +-L$open_avx2_384_tail_done: +- vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 +- vpaddd 0+64(%rbp),%ymm6,%ymm6 +- vpaddd 0+96(%rbp),%ymm10,%ymm10 +- vpaddd 0+224(%rbp),%ymm14,%ymm14 +- vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 +- vpaddd 0+64(%rbp),%ymm5,%ymm5 +- vpaddd 0+96(%rbp),%ymm9,%ymm9 +- vpaddd 0+192(%rbp),%ymm13,%ymm13 +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 +- vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 +- vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 +- vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 +- vpxor 0+0(%rsi),%ymm3,%ymm3 +- vpxor 32+0(%rsi),%ymm2,%ymm2 +- vpxor 64+0(%rsi),%ymm6,%ymm6 +- vpxor 96+0(%rsi),%ymm10,%ymm10 +- vmovdqu %ymm3,0+0(%rdi) +- vmovdqu %ymm2,32+0(%rdi) +- vmovdqu %ymm6,64+0(%rdi) +- vmovdqu %ymm10,96+0(%rdi) +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 +- vpxor 0+128(%rsi),%ymm3,%ymm3 +- vpxor 32+128(%rsi),%ymm1,%ymm1 +- vpxor 64+128(%rsi),%ymm5,%ymm5 +- vpxor 96+128(%rsi),%ymm9,%ymm9 +- vmovdqu %ymm3,0+128(%rdi) +- vmovdqu %ymm1,32+128(%rdi) +- vmovdqu %ymm5,64+128(%rdi) +- vmovdqu %ymm9,96+128(%rdi) +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 +- vmovdqa %ymm3,%ymm8 +- +- leaq 256(%rsi),%rsi +- leaq 256(%rdi),%rdi +- subq $256,%rbx +- jmp L$open_avx2_tail_128_xor +- +-L$open_avx2_tail_512: +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vmovdqa 0+64(%rbp),%ymm4 +- vmovdqa 0+96(%rbp),%ymm8 +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm8,%ymm10 +- vmovdqa %ymm0,%ymm3 +- vmovdqa %ymm4,%ymm7 +- vmovdqa %ymm8,%ymm11 +- vmovdqa L$avx2_inc(%rip),%ymm12 +- vpaddd 0+160(%rbp),%ymm12,%ymm15 +- vpaddd %ymm15,%ymm12,%ymm14 +- vpaddd %ymm14,%ymm12,%ymm13 +- vpaddd %ymm13,%ymm12,%ymm12 +- vmovdqa %ymm15,0+256(%rbp) +- vmovdqa %ymm14,0+224(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- vmovdqa %ymm12,0+160(%rbp) +- +- xorq %rcx,%rcx +- movq %rsi,%r8 +-L$open_avx2_tail_512_rounds_and_x2hash: +- addq 0+0(%r8),%r10 +- adcq 8+0(%r8),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%r8),%r8 +-L$open_avx2_tail_512_rounds_and_x1hash: +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- addq 0+0(%r8),%r10 +- adcq 8+0(%r8),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $4,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $12,%ymm15,%ymm15,%ymm15 +- vpalignr $4,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $12,%ymm14,%ymm14,%ymm14 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- addq 0+16(%r8),%r10 +- adcq 8+16(%r8),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 32(%r8),%r8 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $12,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $4,%ymm15,%ymm15,%ymm15 +- vpalignr $12,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $4,%ymm14,%ymm14,%ymm14 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- +- incq %rcx +- cmpq $4,%rcx +- jl L$open_avx2_tail_512_rounds_and_x2hash +- cmpq $10,%rcx +- jne L$open_avx2_tail_512_rounds_and_x1hash +- movq %rbx,%rcx +- subq $384,%rcx +- andq $-16,%rcx +-L$open_avx2_tail_512_hash: +- testq %rcx,%rcx +- je L$open_avx2_tail_512_done +- addq 0+0(%r8),%r10 +- adcq 8+0(%r8),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%r8),%r8 +- subq $16,%rcx +- jmp L$open_avx2_tail_512_hash +-L$open_avx2_tail_512_done: +- vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 +- vpaddd 0+64(%rbp),%ymm7,%ymm7 +- vpaddd 0+96(%rbp),%ymm11,%ymm11 +- vpaddd 0+256(%rbp),%ymm15,%ymm15 +- vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 +- vpaddd 0+64(%rbp),%ymm6,%ymm6 +- vpaddd 0+96(%rbp),%ymm10,%ymm10 +- vpaddd 0+224(%rbp),%ymm14,%ymm14 +- vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 +- vpaddd 0+64(%rbp),%ymm5,%ymm5 +- vpaddd 0+96(%rbp),%ymm9,%ymm9 +- vpaddd 0+192(%rbp),%ymm13,%ymm13 +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- +- vmovdqa %ymm0,0+128(%rbp) +- vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 +- vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 +- vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 +- vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 +- vpxor 0+0(%rsi),%ymm0,%ymm0 +- vpxor 32+0(%rsi),%ymm3,%ymm3 +- vpxor 64+0(%rsi),%ymm7,%ymm7 +- vpxor 96+0(%rsi),%ymm11,%ymm11 +- vmovdqu %ymm0,0+0(%rdi) +- vmovdqu %ymm3,32+0(%rdi) +- vmovdqu %ymm7,64+0(%rdi) +- vmovdqu %ymm11,96+0(%rdi) +- +- vmovdqa 0+128(%rbp),%ymm0 +- vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 +- vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 +- vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 +- vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 +- vpxor 0+128(%rsi),%ymm3,%ymm3 +- vpxor 32+128(%rsi),%ymm2,%ymm2 +- vpxor 64+128(%rsi),%ymm6,%ymm6 +- vpxor 96+128(%rsi),%ymm10,%ymm10 +- vmovdqu %ymm3,0+128(%rdi) +- vmovdqu %ymm2,32+128(%rdi) +- vmovdqu %ymm6,64+128(%rdi) +- vmovdqu %ymm10,96+128(%rdi) +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 +- vpxor 0+256(%rsi),%ymm3,%ymm3 +- vpxor 32+256(%rsi),%ymm1,%ymm1 +- vpxor 64+256(%rsi),%ymm5,%ymm5 +- vpxor 96+256(%rsi),%ymm9,%ymm9 +- vmovdqu %ymm3,0+256(%rdi) +- vmovdqu %ymm1,32+256(%rdi) +- vmovdqu %ymm5,64+256(%rdi) +- vmovdqu %ymm9,96+256(%rdi) +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 +- vmovdqa %ymm3,%ymm8 +- +- leaq 384(%rsi),%rsi +- leaq 384(%rdi),%rdi +- subq $384,%rbx +-L$open_avx2_tail_128_xor: +- cmpq $32,%rbx +- jb L$open_avx2_tail_32_xor +- subq $32,%rbx +- vpxor (%rsi),%ymm0,%ymm0 +- vmovdqu %ymm0,(%rdi) +- leaq 32(%rsi),%rsi +- leaq 32(%rdi),%rdi +- vmovdqa %ymm4,%ymm0 +- vmovdqa %ymm8,%ymm4 +- vmovdqa %ymm12,%ymm8 +- jmp L$open_avx2_tail_128_xor +-L$open_avx2_tail_32_xor: +- cmpq $16,%rbx +- vmovdqa %xmm0,%xmm1 +- jb L$open_avx2_exit +- subq $16,%rbx +- +- vpxor (%rsi),%xmm0,%xmm1 +- vmovdqu %xmm1,(%rdi) +- leaq 16(%rsi),%rsi +- leaq 16(%rdi),%rdi +- vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 +- vmovdqa %xmm0,%xmm1 +-L$open_avx2_exit: +- vzeroupper +- jmp L$open_sse_tail_16 +- +-L$open_avx2_192: +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm8,%ymm10 +- vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 +- vmovdqa %ymm12,%ymm11 +- vmovdqa %ymm13,%ymm15 +- movq $10,%r10 +-L$open_avx2_192_rounds: +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- +- decq %r10 +- jne L$open_avx2_192_rounds +- vpaddd %ymm2,%ymm0,%ymm0 +- vpaddd %ymm2,%ymm1,%ymm1 +- vpaddd %ymm6,%ymm4,%ymm4 +- vpaddd %ymm6,%ymm5,%ymm5 +- vpaddd %ymm10,%ymm8,%ymm8 +- vpaddd %ymm10,%ymm9,%ymm9 +- vpaddd %ymm11,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm13,%ymm13 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 +- +- vpand L$clamp(%rip),%ymm3,%ymm3 +- vmovdqa %ymm3,0+0(%rbp) +- +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 +-L$open_avx2_short: +- movq %r8,%r8 +- call poly_hash_ad_internal +-L$open_avx2_short_hash_and_xor_loop: +- cmpq $32,%rbx +- jb L$open_avx2_short_tail_32 +- subq $32,%rbx +- addq 0+0(%rsi),%r10 +- adcq 8+0(%rsi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- addq 0+16(%rsi),%r10 +- adcq 8+16(%rsi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- +- vpxor (%rsi),%ymm0,%ymm0 +- vmovdqu %ymm0,(%rdi) +- leaq 32(%rsi),%rsi +- leaq 32(%rdi),%rdi +- +- vmovdqa %ymm4,%ymm0 +- vmovdqa %ymm8,%ymm4 +- vmovdqa %ymm12,%ymm8 +- vmovdqa %ymm1,%ymm12 +- vmovdqa %ymm5,%ymm1 +- vmovdqa %ymm9,%ymm5 +- vmovdqa %ymm13,%ymm9 +- vmovdqa %ymm2,%ymm13 +- vmovdqa %ymm6,%ymm2 +- jmp L$open_avx2_short_hash_and_xor_loop +-L$open_avx2_short_tail_32: +- cmpq $16,%rbx +- vmovdqa %xmm0,%xmm1 +- jb L$open_avx2_short_tail_32_exit +- subq $16,%rbx +- addq 0+0(%rsi),%r10 +- adcq 8+0(%rsi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- vpxor (%rsi),%xmm0,%xmm3 +- vmovdqu %xmm3,(%rdi) +- leaq 16(%rsi),%rsi +- leaq 16(%rdi),%rdi +- vextracti128 $1,%ymm0,%xmm1 +-L$open_avx2_short_tail_32_exit: +- vzeroupper +- jmp L$open_sse_tail_16 +- +-L$open_avx2_320: +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm8,%ymm10 +- vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 +- vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 +- vmovdqa %ymm4,%ymm7 +- vmovdqa %ymm8,%ymm11 +- vmovdqa %ymm12,0+160(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- vmovdqa %ymm14,0+224(%rbp) +- movq $10,%r10 +-L$open_avx2_320_rounds: +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol16(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpsrld $20,%ymm6,%ymm3 +- vpslld $12,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol8(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpslld $7,%ymm6,%ymm3 +- vpsrld $25,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpalignr $12,%ymm14,%ymm14,%ymm14 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $4,%ymm6,%ymm6,%ymm6 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol16(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpsrld $20,%ymm6,%ymm3 +- vpslld $12,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol8(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpslld $7,%ymm6,%ymm3 +- vpsrld $25,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpalignr $4,%ymm14,%ymm14,%ymm14 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $12,%ymm6,%ymm6,%ymm6 +- +- decq %r10 +- jne L$open_avx2_320_rounds +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 +- vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 +- vpaddd %ymm7,%ymm4,%ymm4 +- vpaddd %ymm7,%ymm5,%ymm5 +- vpaddd %ymm7,%ymm6,%ymm6 +- vpaddd %ymm11,%ymm8,%ymm8 +- vpaddd %ymm11,%ymm9,%ymm9 +- vpaddd %ymm11,%ymm10,%ymm10 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- vpaddd 0+192(%rbp),%ymm13,%ymm13 +- vpaddd 0+224(%rbp),%ymm14,%ymm14 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 +- +- vpand L$clamp(%rip),%ymm3,%ymm3 +- vmovdqa %ymm3,0+0(%rbp) +- +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 +- vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 +- vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 +- vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 +- vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 +- jmp L$open_avx2_short +- +- +- +- +- +-.p2align 6 +-chacha20_poly1305_seal_avx2: +- +- +- +- +- +- +- +- +- +- +- +- +- vzeroupper +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vbroadcasti128 0(%r9),%ymm4 +- vbroadcasti128 16(%r9),%ymm8 +- vbroadcasti128 32(%r9),%ymm12 +- vpaddd L$avx2_init(%rip),%ymm12,%ymm12 +- cmpq $192,%rbx +- jbe L$seal_avx2_192 +- cmpq $320,%rbx +- jbe L$seal_avx2_320 +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm0,%ymm3 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm4,%ymm7 +- vmovdqa %ymm4,0+64(%rbp) +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm8,%ymm10 +- vmovdqa %ymm8,%ymm11 +- vmovdqa %ymm8,0+96(%rbp) +- vmovdqa %ymm12,%ymm15 +- vpaddd L$avx2_inc(%rip),%ymm15,%ymm14 +- vpaddd L$avx2_inc(%rip),%ymm14,%ymm13 +- vpaddd L$avx2_inc(%rip),%ymm13,%ymm12 +- vmovdqa %ymm12,0+160(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- vmovdqa %ymm14,0+224(%rbp) +- vmovdqa %ymm15,0+256(%rbp) +- movq $10,%r10 +-L$seal_avx2_init_rounds: +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $4,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $12,%ymm15,%ymm15,%ymm15 +- vpalignr $4,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $12,%ymm14,%ymm14,%ymm14 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $12,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $4,%ymm15,%ymm15,%ymm15 +- vpalignr $12,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $4,%ymm14,%ymm14,%ymm14 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- +- decq %r10 +- jnz L$seal_avx2_init_rounds +- vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 +- vpaddd 0+64(%rbp),%ymm7,%ymm7 +- vpaddd 0+96(%rbp),%ymm11,%ymm11 +- vpaddd 0+256(%rbp),%ymm15,%ymm15 +- vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 +- vpaddd 0+64(%rbp),%ymm6,%ymm6 +- vpaddd 0+96(%rbp),%ymm10,%ymm10 +- vpaddd 0+224(%rbp),%ymm14,%ymm14 +- vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 +- vpaddd 0+64(%rbp),%ymm5,%ymm5 +- vpaddd 0+96(%rbp),%ymm9,%ymm9 +- vpaddd 0+192(%rbp),%ymm13,%ymm13 +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- +- vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 +- vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 +- vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 +- vpand L$clamp(%rip),%ymm15,%ymm15 +- vmovdqa %ymm15,0+0(%rbp) +- movq %r8,%r8 +- call poly_hash_ad_internal +- +- vpxor 0(%rsi),%ymm3,%ymm3 +- vpxor 32(%rsi),%ymm11,%ymm11 +- vmovdqu %ymm3,0(%rdi) +- vmovdqu %ymm11,32(%rdi) +- vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 +- vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 +- vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 +- vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 +- vpxor 0+64(%rsi),%ymm15,%ymm15 +- vpxor 32+64(%rsi),%ymm2,%ymm2 +- vpxor 64+64(%rsi),%ymm6,%ymm6 +- vpxor 96+64(%rsi),%ymm10,%ymm10 +- vmovdqu %ymm15,0+64(%rdi) +- vmovdqu %ymm2,32+64(%rdi) +- vmovdqu %ymm6,64+64(%rdi) +- vmovdqu %ymm10,96+64(%rdi) +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 +- vpxor 0+192(%rsi),%ymm15,%ymm15 +- vpxor 32+192(%rsi),%ymm1,%ymm1 +- vpxor 64+192(%rsi),%ymm5,%ymm5 +- vpxor 96+192(%rsi),%ymm9,%ymm9 +- vmovdqu %ymm15,0+192(%rdi) +- vmovdqu %ymm1,32+192(%rdi) +- vmovdqu %ymm5,64+192(%rdi) +- vmovdqu %ymm9,96+192(%rdi) +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 +- vmovdqa %ymm15,%ymm8 +- +- leaq 320(%rsi),%rsi +- subq $320,%rbx +- movq $320,%rcx +- cmpq $128,%rbx +- jbe L$seal_avx2_short_hash_remainder +- vpxor 0(%rsi),%ymm0,%ymm0 +- vpxor 32(%rsi),%ymm4,%ymm4 +- vpxor 64(%rsi),%ymm8,%ymm8 +- vpxor 96(%rsi),%ymm12,%ymm12 +- vmovdqu %ymm0,320(%rdi) +- vmovdqu %ymm4,352(%rdi) +- vmovdqu %ymm8,384(%rdi) +- vmovdqu %ymm12,416(%rdi) +- leaq 128(%rsi),%rsi +- subq $128,%rbx +- movq $8,%rcx +- movq $2,%r8 +- cmpq $128,%rbx +- jbe L$seal_avx2_tail_128 +- cmpq $256,%rbx +- jbe L$seal_avx2_tail_256 +- cmpq $384,%rbx +- jbe L$seal_avx2_tail_384 +- cmpq $512,%rbx +- jbe L$seal_avx2_tail_512 +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vmovdqa 0+64(%rbp),%ymm4 +- vmovdqa 0+96(%rbp),%ymm8 +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm8,%ymm10 +- vmovdqa %ymm0,%ymm3 +- vmovdqa %ymm4,%ymm7 +- vmovdqa %ymm8,%ymm11 +- vmovdqa L$avx2_inc(%rip),%ymm12 +- vpaddd 0+160(%rbp),%ymm12,%ymm15 +- vpaddd %ymm15,%ymm12,%ymm14 +- vpaddd %ymm14,%ymm12,%ymm13 +- vpaddd %ymm13,%ymm12,%ymm12 +- vmovdqa %ymm15,0+256(%rbp) +- vmovdqa %ymm14,0+224(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- vmovdqa %ymm12,0+160(%rbp) +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $4,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $12,%ymm15,%ymm15,%ymm15 +- vpalignr $4,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $12,%ymm14,%ymm14,%ymm14 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $12,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $4,%ymm15,%ymm15,%ymm15 +- vpalignr $12,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $4,%ymm14,%ymm14,%ymm14 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- +- subq $16,%rdi +- movq $9,%rcx +- jmp L$seal_avx2_main_loop_rounds_entry +-.p2align 5 +-L$seal_avx2_main_loop: +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vmovdqa 0+64(%rbp),%ymm4 +- vmovdqa 0+96(%rbp),%ymm8 +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm8,%ymm10 +- vmovdqa %ymm0,%ymm3 +- vmovdqa %ymm4,%ymm7 +- vmovdqa %ymm8,%ymm11 +- vmovdqa L$avx2_inc(%rip),%ymm12 +- vpaddd 0+160(%rbp),%ymm12,%ymm15 +- vpaddd %ymm15,%ymm12,%ymm14 +- vpaddd %ymm14,%ymm12,%ymm13 +- vpaddd %ymm13,%ymm12,%ymm12 +- vmovdqa %ymm15,0+256(%rbp) +- vmovdqa %ymm14,0+224(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- vmovdqa %ymm12,0+160(%rbp) +- +- movq $10,%rcx +-.p2align 5 +-L$seal_avx2_main_loop_rounds: +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- addq %rax,%r15 +- adcq %rdx,%r9 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +-L$seal_avx2_main_loop_rounds_entry: +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- addq 0+16(%rdi),%r10 +- adcq 8+16(%rdi),%r11 +- adcq $1,%r12 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $4,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $12,%ymm15,%ymm15,%ymm15 +- vpalignr $4,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $12,%ymm14,%ymm14,%ymm14 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- addq %rax,%r15 +- adcq %rdx,%r9 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- addq 0+32(%rdi),%r10 +- adcq 8+32(%rdi),%r11 +- adcq $1,%r12 +- +- leaq 48(%rdi),%rdi +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- addq %rax,%r15 +- adcq %rdx,%r9 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $12,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $4,%ymm15,%ymm15,%ymm15 +- vpalignr $12,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $4,%ymm14,%ymm14,%ymm14 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- +- decq %rcx +- jne L$seal_avx2_main_loop_rounds +- vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 +- vpaddd 0+64(%rbp),%ymm7,%ymm7 +- vpaddd 0+96(%rbp),%ymm11,%ymm11 +- vpaddd 0+256(%rbp),%ymm15,%ymm15 +- vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 +- vpaddd 0+64(%rbp),%ymm6,%ymm6 +- vpaddd 0+96(%rbp),%ymm10,%ymm10 +- vpaddd 0+224(%rbp),%ymm14,%ymm14 +- vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 +- vpaddd 0+64(%rbp),%ymm5,%ymm5 +- vpaddd 0+96(%rbp),%ymm9,%ymm9 +- vpaddd 0+192(%rbp),%ymm13,%ymm13 +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- +- vmovdqa %ymm0,0+128(%rbp) +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- addq 0+16(%rdi),%r10 +- adcq 8+16(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 32(%rdi),%rdi +- vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 +- vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 +- vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 +- vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 +- vpxor 0+0(%rsi),%ymm0,%ymm0 +- vpxor 32+0(%rsi),%ymm3,%ymm3 +- vpxor 64+0(%rsi),%ymm7,%ymm7 +- vpxor 96+0(%rsi),%ymm11,%ymm11 +- vmovdqu %ymm0,0+0(%rdi) +- vmovdqu %ymm3,32+0(%rdi) +- vmovdqu %ymm7,64+0(%rdi) +- vmovdqu %ymm11,96+0(%rdi) +- +- vmovdqa 0+128(%rbp),%ymm0 +- vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 +- vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 +- vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 +- vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 +- vpxor 0+128(%rsi),%ymm3,%ymm3 +- vpxor 32+128(%rsi),%ymm2,%ymm2 +- vpxor 64+128(%rsi),%ymm6,%ymm6 +- vpxor 96+128(%rsi),%ymm10,%ymm10 +- vmovdqu %ymm3,0+128(%rdi) +- vmovdqu %ymm2,32+128(%rdi) +- vmovdqu %ymm6,64+128(%rdi) +- vmovdqu %ymm10,96+128(%rdi) +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 +- vpxor 0+256(%rsi),%ymm3,%ymm3 +- vpxor 32+256(%rsi),%ymm1,%ymm1 +- vpxor 64+256(%rsi),%ymm5,%ymm5 +- vpxor 96+256(%rsi),%ymm9,%ymm9 +- vmovdqu %ymm3,0+256(%rdi) +- vmovdqu %ymm1,32+256(%rdi) +- vmovdqu %ymm5,64+256(%rdi) +- vmovdqu %ymm9,96+256(%rdi) +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 +- vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 +- vpxor 0+384(%rsi),%ymm3,%ymm3 +- vpxor 32+384(%rsi),%ymm0,%ymm0 +- vpxor 64+384(%rsi),%ymm4,%ymm4 +- vpxor 96+384(%rsi),%ymm8,%ymm8 +- vmovdqu %ymm3,0+384(%rdi) +- vmovdqu %ymm0,32+384(%rdi) +- vmovdqu %ymm4,64+384(%rdi) +- vmovdqu %ymm8,96+384(%rdi) +- +- leaq 512(%rsi),%rsi +- subq $512,%rbx +- cmpq $512,%rbx +- jg L$seal_avx2_main_loop +- +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- addq 0+16(%rdi),%r10 +- adcq 8+16(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 32(%rdi),%rdi +- movq $10,%rcx +- xorq %r8,%r8 +- +- cmpq $384,%rbx +- ja L$seal_avx2_tail_512 +- cmpq $256,%rbx +- ja L$seal_avx2_tail_384 +- cmpq $128,%rbx +- ja L$seal_avx2_tail_256 +- +-L$seal_avx2_tail_128: +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vmovdqa 0+64(%rbp),%ymm4 +- vmovdqa 0+96(%rbp),%ymm8 +- vmovdqa L$avx2_inc(%rip),%ymm12 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- vmovdqa %ymm12,0+160(%rbp) +- +-L$seal_avx2_tail_128_rounds_and_3xhash: +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rdi),%rdi +-L$seal_avx2_tail_128_rounds_and_2xhash: +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- addq 0+16(%rdi),%r10 +- adcq 8+16(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 32(%rdi),%rdi +- decq %rcx +- jg L$seal_avx2_tail_128_rounds_and_3xhash +- decq %r8 +- jge L$seal_avx2_tail_128_rounds_and_2xhash +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 +- vmovdqa %ymm3,%ymm8 +- +- jmp L$seal_avx2_short_loop +- +-L$seal_avx2_tail_256: +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vmovdqa 0+64(%rbp),%ymm4 +- vmovdqa 0+96(%rbp),%ymm8 +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm8,%ymm9 +- vmovdqa L$avx2_inc(%rip),%ymm12 +- vpaddd 0+160(%rbp),%ymm12,%ymm13 +- vpaddd %ymm13,%ymm12,%ymm12 +- vmovdqa %ymm12,0+160(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- +-L$seal_avx2_tail_256_rounds_and_3xhash: +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rdi),%rdi +-L$seal_avx2_tail_256_rounds_and_2xhash: +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- addq 0+16(%rdi),%r10 +- adcq 8+16(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 32(%rdi),%rdi +- decq %rcx +- jg L$seal_avx2_tail_256_rounds_and_3xhash +- decq %r8 +- jge L$seal_avx2_tail_256_rounds_and_2xhash +- vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 +- vpaddd 0+64(%rbp),%ymm5,%ymm5 +- vpaddd 0+96(%rbp),%ymm9,%ymm9 +- vpaddd 0+192(%rbp),%ymm13,%ymm13 +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 +- vpxor 0+0(%rsi),%ymm3,%ymm3 +- vpxor 32+0(%rsi),%ymm1,%ymm1 +- vpxor 64+0(%rsi),%ymm5,%ymm5 +- vpxor 96+0(%rsi),%ymm9,%ymm9 +- vmovdqu %ymm3,0+0(%rdi) +- vmovdqu %ymm1,32+0(%rdi) +- vmovdqu %ymm5,64+0(%rdi) +- vmovdqu %ymm9,96+0(%rdi) +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 +- vmovdqa %ymm3,%ymm8 +- +- movq $128,%rcx +- leaq 128(%rsi),%rsi +- subq $128,%rbx +- jmp L$seal_avx2_short_hash_remainder +- +-L$seal_avx2_tail_384: +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vmovdqa 0+64(%rbp),%ymm4 +- vmovdqa 0+96(%rbp),%ymm8 +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm8,%ymm10 +- vmovdqa L$avx2_inc(%rip),%ymm12 +- vpaddd 0+160(%rbp),%ymm12,%ymm14 +- vpaddd %ymm14,%ymm12,%ymm13 +- vpaddd %ymm13,%ymm12,%ymm12 +- vmovdqa %ymm12,0+160(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- vmovdqa %ymm14,0+224(%rbp) +- +-L$seal_avx2_tail_384_rounds_and_3xhash: +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rdi),%rdi +-L$seal_avx2_tail_384_rounds_and_2xhash: +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol16(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpsrld $20,%ymm6,%ymm3 +- vpslld $12,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol8(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpslld $7,%ymm6,%ymm3 +- vpsrld $25,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpalignr $12,%ymm14,%ymm14,%ymm14 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $4,%ymm6,%ymm6,%ymm6 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- addq 0+16(%rdi),%r10 +- adcq 8+16(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol16(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpsrld $20,%ymm6,%ymm3 +- vpslld $12,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol8(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpslld $7,%ymm6,%ymm3 +- vpsrld $25,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpalignr $4,%ymm14,%ymm14,%ymm14 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $12,%ymm6,%ymm6,%ymm6 +- +- leaq 32(%rdi),%rdi +- decq %rcx +- jg L$seal_avx2_tail_384_rounds_and_3xhash +- decq %r8 +- jge L$seal_avx2_tail_384_rounds_and_2xhash +- vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 +- vpaddd 0+64(%rbp),%ymm6,%ymm6 +- vpaddd 0+96(%rbp),%ymm10,%ymm10 +- vpaddd 0+224(%rbp),%ymm14,%ymm14 +- vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 +- vpaddd 0+64(%rbp),%ymm5,%ymm5 +- vpaddd 0+96(%rbp),%ymm9,%ymm9 +- vpaddd 0+192(%rbp),%ymm13,%ymm13 +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 +- vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 +- vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 +- vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 +- vpxor 0+0(%rsi),%ymm3,%ymm3 +- vpxor 32+0(%rsi),%ymm2,%ymm2 +- vpxor 64+0(%rsi),%ymm6,%ymm6 +- vpxor 96+0(%rsi),%ymm10,%ymm10 +- vmovdqu %ymm3,0+0(%rdi) +- vmovdqu %ymm2,32+0(%rdi) +- vmovdqu %ymm6,64+0(%rdi) +- vmovdqu %ymm10,96+0(%rdi) +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 +- vpxor 0+128(%rsi),%ymm3,%ymm3 +- vpxor 32+128(%rsi),%ymm1,%ymm1 +- vpxor 64+128(%rsi),%ymm5,%ymm5 +- vpxor 96+128(%rsi),%ymm9,%ymm9 +- vmovdqu %ymm3,0+128(%rdi) +- vmovdqu %ymm1,32+128(%rdi) +- vmovdqu %ymm5,64+128(%rdi) +- vmovdqu %ymm9,96+128(%rdi) +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 +- vmovdqa %ymm3,%ymm8 +- +- movq $256,%rcx +- leaq 256(%rsi),%rsi +- subq $256,%rbx +- jmp L$seal_avx2_short_hash_remainder +- +-L$seal_avx2_tail_512: +- vmovdqa L$chacha20_consts(%rip),%ymm0 +- vmovdqa 0+64(%rbp),%ymm4 +- vmovdqa 0+96(%rbp),%ymm8 +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm8,%ymm10 +- vmovdqa %ymm0,%ymm3 +- vmovdqa %ymm4,%ymm7 +- vmovdqa %ymm8,%ymm11 +- vmovdqa L$avx2_inc(%rip),%ymm12 +- vpaddd 0+160(%rbp),%ymm12,%ymm15 +- vpaddd %ymm15,%ymm12,%ymm14 +- vpaddd %ymm14,%ymm12,%ymm13 +- vpaddd %ymm13,%ymm12,%ymm12 +- vmovdqa %ymm15,0+256(%rbp) +- vmovdqa %ymm14,0+224(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- vmovdqa %ymm12,0+160(%rbp) +- +-L$seal_avx2_tail_512_rounds_and_3xhash: +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- addq %rax,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rdi),%rdi +-L$seal_avx2_tail_512_rounds_and_2xhash: +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $4,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $12,%ymm15,%ymm15,%ymm15 +- vpalignr $4,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $12,%ymm14,%ymm14,%ymm14 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- addq %rax,%r15 +- adcq %rdx,%r9 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vmovdqa %ymm8,0+128(%rbp) +- vmovdqa L$rol16(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $20,%ymm7,%ymm8 +- vpslld $32-20,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $20,%ymm6,%ymm8 +- vpslld $32-20,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $20,%ymm5,%ymm8 +- vpslld $32-20,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $20,%ymm4,%ymm8 +- vpslld $32-20,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa L$rol8(%rip),%ymm8 +- vpaddd %ymm7,%ymm3,%ymm3 +- vpaddd %ymm6,%ymm2,%ymm2 +- addq 0+16(%rdi),%r10 +- adcq 8+16(%rdi),%r11 +- adcq $1,%r12 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm3,%ymm15,%ymm15 +- vpxor %ymm2,%ymm14,%ymm14 +- vpxor %ymm1,%ymm13,%ymm13 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb %ymm8,%ymm15,%ymm15 +- vpshufb %ymm8,%ymm14,%ymm14 +- vpshufb %ymm8,%ymm13,%ymm13 +- vpshufb %ymm8,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm11,%ymm11 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpaddd 0+128(%rbp),%ymm12,%ymm8 +- vpxor %ymm11,%ymm7,%ymm7 +- vpxor %ymm10,%ymm6,%ymm6 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa %ymm8,0+128(%rbp) +- vpsrld $25,%ymm7,%ymm8 +- movq 0+0+0(%rbp),%rdx +- movq %rdx,%r15 +- mulxq %r10,%r13,%r14 +- mulxq %r11,%rax,%rdx +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- vpslld $32-25,%ymm7,%ymm7 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $25,%ymm6,%ymm8 +- vpslld $32-25,%ymm6,%ymm6 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $25,%ymm5,%ymm8 +- vpslld $32-25,%ymm5,%ymm5 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $25,%ymm4,%ymm8 +- vpslld $32-25,%ymm4,%ymm4 +- vpxor %ymm8,%ymm4,%ymm4 +- vmovdqa 0+128(%rbp),%ymm8 +- vpalignr $12,%ymm7,%ymm7,%ymm7 +- vpalignr $8,%ymm11,%ymm11,%ymm11 +- vpalignr $4,%ymm15,%ymm15,%ymm15 +- vpalignr $12,%ymm6,%ymm6,%ymm6 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $4,%ymm14,%ymm14,%ymm14 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- movq 8+0+0(%rbp),%rdx +- mulxq %r10,%r10,%rax +- addq %r10,%r14 +- mulxq %r11,%r11,%r9 +- adcq %r11,%r15 +- adcq $0,%r9 +- imulq %r12,%rdx +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- addq %rax,%r15 +- adcq %rdx,%r9 +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 32(%rdi),%rdi +- decq %rcx +- jg L$seal_avx2_tail_512_rounds_and_3xhash +- decq %r8 +- jge L$seal_avx2_tail_512_rounds_and_2xhash +- vpaddd L$chacha20_consts(%rip),%ymm3,%ymm3 +- vpaddd 0+64(%rbp),%ymm7,%ymm7 +- vpaddd 0+96(%rbp),%ymm11,%ymm11 +- vpaddd 0+256(%rbp),%ymm15,%ymm15 +- vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 +- vpaddd 0+64(%rbp),%ymm6,%ymm6 +- vpaddd 0+96(%rbp),%ymm10,%ymm10 +- vpaddd 0+224(%rbp),%ymm14,%ymm14 +- vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 +- vpaddd 0+64(%rbp),%ymm5,%ymm5 +- vpaddd 0+96(%rbp),%ymm9,%ymm9 +- vpaddd 0+192(%rbp),%ymm13,%ymm13 +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd 0+64(%rbp),%ymm4,%ymm4 +- vpaddd 0+96(%rbp),%ymm8,%ymm8 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- +- vmovdqa %ymm0,0+128(%rbp) +- vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 +- vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 +- vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 +- vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 +- vpxor 0+0(%rsi),%ymm0,%ymm0 +- vpxor 32+0(%rsi),%ymm3,%ymm3 +- vpxor 64+0(%rsi),%ymm7,%ymm7 +- vpxor 96+0(%rsi),%ymm11,%ymm11 +- vmovdqu %ymm0,0+0(%rdi) +- vmovdqu %ymm3,32+0(%rdi) +- vmovdqu %ymm7,64+0(%rdi) +- vmovdqu %ymm11,96+0(%rdi) +- +- vmovdqa 0+128(%rbp),%ymm0 +- vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 +- vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 +- vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 +- vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 +- vpxor 0+128(%rsi),%ymm3,%ymm3 +- vpxor 32+128(%rsi),%ymm2,%ymm2 +- vpxor 64+128(%rsi),%ymm6,%ymm6 +- vpxor 96+128(%rsi),%ymm10,%ymm10 +- vmovdqu %ymm3,0+128(%rdi) +- vmovdqu %ymm2,32+128(%rdi) +- vmovdqu %ymm6,64+128(%rdi) +- vmovdqu %ymm10,96+128(%rdi) +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 +- vpxor 0+256(%rsi),%ymm3,%ymm3 +- vpxor 32+256(%rsi),%ymm1,%ymm1 +- vpxor 64+256(%rsi),%ymm5,%ymm5 +- vpxor 96+256(%rsi),%ymm9,%ymm9 +- vmovdqu %ymm3,0+256(%rdi) +- vmovdqu %ymm1,32+256(%rdi) +- vmovdqu %ymm5,64+256(%rdi) +- vmovdqu %ymm9,96+256(%rdi) +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 +- vmovdqa %ymm3,%ymm8 +- +- movq $384,%rcx +- leaq 384(%rsi),%rsi +- subq $384,%rbx +- jmp L$seal_avx2_short_hash_remainder +- +-L$seal_avx2_320: +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm8,%ymm10 +- vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 +- vpaddd L$avx2_inc(%rip),%ymm13,%ymm14 +- vmovdqa %ymm4,%ymm7 +- vmovdqa %ymm8,%ymm11 +- vmovdqa %ymm12,0+160(%rbp) +- vmovdqa %ymm13,0+192(%rbp) +- vmovdqa %ymm14,0+224(%rbp) +- movq $10,%r10 +-L$seal_avx2_320_rounds: +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol16(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpsrld $20,%ymm6,%ymm3 +- vpslld $12,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol8(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpslld $7,%ymm6,%ymm3 +- vpsrld $25,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpalignr $12,%ymm14,%ymm14,%ymm14 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $4,%ymm6,%ymm6,%ymm6 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol16(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpsrld $20,%ymm6,%ymm3 +- vpslld $12,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpaddd %ymm6,%ymm2,%ymm2 +- vpxor %ymm2,%ymm14,%ymm14 +- vpshufb L$rol8(%rip),%ymm14,%ymm14 +- vpaddd %ymm14,%ymm10,%ymm10 +- vpxor %ymm10,%ymm6,%ymm6 +- vpslld $7,%ymm6,%ymm3 +- vpsrld $25,%ymm6,%ymm6 +- vpxor %ymm3,%ymm6,%ymm6 +- vpalignr $4,%ymm14,%ymm14,%ymm14 +- vpalignr $8,%ymm10,%ymm10,%ymm10 +- vpalignr $12,%ymm6,%ymm6,%ymm6 +- +- decq %r10 +- jne L$seal_avx2_320_rounds +- vpaddd L$chacha20_consts(%rip),%ymm0,%ymm0 +- vpaddd L$chacha20_consts(%rip),%ymm1,%ymm1 +- vpaddd L$chacha20_consts(%rip),%ymm2,%ymm2 +- vpaddd %ymm7,%ymm4,%ymm4 +- vpaddd %ymm7,%ymm5,%ymm5 +- vpaddd %ymm7,%ymm6,%ymm6 +- vpaddd %ymm11,%ymm8,%ymm8 +- vpaddd %ymm11,%ymm9,%ymm9 +- vpaddd %ymm11,%ymm10,%ymm10 +- vpaddd 0+160(%rbp),%ymm12,%ymm12 +- vpaddd 0+192(%rbp),%ymm13,%ymm13 +- vpaddd 0+224(%rbp),%ymm14,%ymm14 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 +- +- vpand L$clamp(%rip),%ymm3,%ymm3 +- vmovdqa %ymm3,0+0(%rbp) +- +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 +- vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 +- vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 +- vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 +- vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 +- jmp L$seal_avx2_short +- +-L$seal_avx2_192: +- vmovdqa %ymm0,%ymm1 +- vmovdqa %ymm0,%ymm2 +- vmovdqa %ymm4,%ymm5 +- vmovdqa %ymm4,%ymm6 +- vmovdqa %ymm8,%ymm9 +- vmovdqa %ymm8,%ymm10 +- vpaddd L$avx2_inc(%rip),%ymm12,%ymm13 +- vmovdqa %ymm12,%ymm11 +- vmovdqa %ymm13,%ymm15 +- movq $10,%r10 +-L$seal_avx2_192_rounds: +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $12,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $4,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $12,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $4,%ymm5,%ymm5,%ymm5 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol16(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $20,%ymm4,%ymm3 +- vpslld $12,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpaddd %ymm4,%ymm0,%ymm0 +- vpxor %ymm0,%ymm12,%ymm12 +- vpshufb L$rol8(%rip),%ymm12,%ymm12 +- vpaddd %ymm12,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpslld $7,%ymm4,%ymm3 +- vpsrld $25,%ymm4,%ymm4 +- vpxor %ymm3,%ymm4,%ymm4 +- vpalignr $4,%ymm12,%ymm12,%ymm12 +- vpalignr $8,%ymm8,%ymm8,%ymm8 +- vpalignr $12,%ymm4,%ymm4,%ymm4 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol16(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpsrld $20,%ymm5,%ymm3 +- vpslld $12,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpaddd %ymm5,%ymm1,%ymm1 +- vpxor %ymm1,%ymm13,%ymm13 +- vpshufb L$rol8(%rip),%ymm13,%ymm13 +- vpaddd %ymm13,%ymm9,%ymm9 +- vpxor %ymm9,%ymm5,%ymm5 +- vpslld $7,%ymm5,%ymm3 +- vpsrld $25,%ymm5,%ymm5 +- vpxor %ymm3,%ymm5,%ymm5 +- vpalignr $4,%ymm13,%ymm13,%ymm13 +- vpalignr $8,%ymm9,%ymm9,%ymm9 +- vpalignr $12,%ymm5,%ymm5,%ymm5 +- +- decq %r10 +- jne L$seal_avx2_192_rounds +- vpaddd %ymm2,%ymm0,%ymm0 +- vpaddd %ymm2,%ymm1,%ymm1 +- vpaddd %ymm6,%ymm4,%ymm4 +- vpaddd %ymm6,%ymm5,%ymm5 +- vpaddd %ymm10,%ymm8,%ymm8 +- vpaddd %ymm10,%ymm9,%ymm9 +- vpaddd %ymm11,%ymm12,%ymm12 +- vpaddd %ymm15,%ymm13,%ymm13 +- vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 +- +- vpand L$clamp(%rip),%ymm3,%ymm3 +- vmovdqa %ymm3,0+0(%rbp) +- +- vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 +- vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 +- vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 +- vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 +- vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 +- vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 +-L$seal_avx2_short: +- movq %r8,%r8 +- call poly_hash_ad_internal +- xorq %rcx,%rcx +-L$seal_avx2_short_hash_remainder: +- cmpq $16,%rcx +- jb L$seal_avx2_short_loop +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- subq $16,%rcx +- addq $16,%rdi +- jmp L$seal_avx2_short_hash_remainder +-L$seal_avx2_short_loop: +- cmpq $32,%rbx +- jb L$seal_avx2_short_tail +- subq $32,%rbx +- +- vpxor (%rsi),%ymm0,%ymm0 +- vmovdqu %ymm0,(%rdi) +- leaq 32(%rsi),%rsi +- +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- addq 0+16(%rdi),%r10 +- adcq 8+16(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 32(%rdi),%rdi +- +- vmovdqa %ymm4,%ymm0 +- vmovdqa %ymm8,%ymm4 +- vmovdqa %ymm12,%ymm8 +- vmovdqa %ymm1,%ymm12 +- vmovdqa %ymm5,%ymm1 +- vmovdqa %ymm9,%ymm5 +- vmovdqa %ymm13,%ymm9 +- vmovdqa %ymm2,%ymm13 +- vmovdqa %ymm6,%ymm2 +- jmp L$seal_avx2_short_loop +-L$seal_avx2_short_tail: +- cmpq $16,%rbx +- jb L$seal_avx2_exit +- subq $16,%rbx +- vpxor (%rsi),%xmm0,%xmm3 +- vmovdqu %xmm3,(%rdi) +- leaq 16(%rsi),%rsi +- addq 0+0(%rdi),%r10 +- adcq 8+0(%rdi),%r11 +- adcq $1,%r12 +- movq 0+0+0(%rbp),%rax +- movq %rax,%r15 +- mulq %r10 +- movq %rax,%r13 +- movq %rdx,%r14 +- movq 0+0+0(%rbp),%rax +- mulq %r11 +- imulq %r12,%r15 +- addq %rax,%r14 +- adcq %rdx,%r15 +- movq 8+0+0(%rbp),%rax +- movq %rax,%r9 +- mulq %r10 +- addq %rax,%r14 +- adcq $0,%rdx +- movq %rdx,%r10 +- movq 8+0+0(%rbp),%rax +- mulq %r11 +- addq %rax,%r15 +- adcq $0,%rdx +- imulq %r12,%r9 +- addq %r10,%r15 +- adcq %rdx,%r9 +- movq %r13,%r10 +- movq %r14,%r11 +- movq %r15,%r12 +- andq $3,%r12 +- movq %r15,%r13 +- andq $-4,%r13 +- movq %r9,%r14 +- shrdq $2,%r9,%r15 +- shrq $2,%r9 +- addq %r13,%r15 +- adcq %r14,%r9 +- addq %r15,%r10 +- adcq %r9,%r11 +- adcq $0,%r12 +- +- leaq 16(%rdi),%rdi +- vextracti128 $1,%ymm0,%xmm0 +-L$seal_avx2_exit: +- vzeroupper +- jmp L$seal_sse_tail_16 +- +- +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S b/mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S +deleted file mode 100644 +index e497c35..0000000 +--- a/mac-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S ++++ /dev/null +@@ -1,850 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +-.p2align 5 +-_aesni_ctr32_ghash_6x: +- +- vmovdqu 32(%r11),%xmm2 +- subq $6,%rdx +- vpxor %xmm4,%xmm4,%xmm4 +- vmovdqu 0-128(%rcx),%xmm15 +- vpaddb %xmm2,%xmm1,%xmm10 +- vpaddb %xmm2,%xmm10,%xmm11 +- vpaddb %xmm2,%xmm11,%xmm12 +- vpaddb %xmm2,%xmm12,%xmm13 +- vpaddb %xmm2,%xmm13,%xmm14 +- vpxor %xmm15,%xmm1,%xmm9 +- vmovdqu %xmm4,16+8(%rsp) +- jmp L$oop6x +- +-.p2align 5 +-L$oop6x: +- addl $100663296,%ebx +- jc L$handle_ctr32 +- vmovdqu 0-32(%r9),%xmm3 +- vpaddb %xmm2,%xmm14,%xmm1 +- vpxor %xmm15,%xmm10,%xmm10 +- vpxor %xmm15,%xmm11,%xmm11 +- +-L$resume_ctr32: +- vmovdqu %xmm1,(%r8) +- vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5 +- vpxor %xmm15,%xmm12,%xmm12 +- vmovups 16-128(%rcx),%xmm2 +- vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6 +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- xorq %r12,%r12 +- cmpq %r14,%r15 +- +- vaesenc %xmm2,%xmm9,%xmm9 +- vmovdqu 48+8(%rsp),%xmm0 +- vpxor %xmm15,%xmm13,%xmm13 +- vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1 +- vaesenc %xmm2,%xmm10,%xmm10 +- vpxor %xmm15,%xmm14,%xmm14 +- setnc %r12b +- vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 +- vaesenc %xmm2,%xmm11,%xmm11 +- vmovdqu 16-32(%r9),%xmm3 +- negq %r12 +- vaesenc %xmm2,%xmm12,%xmm12 +- vpxor %xmm5,%xmm6,%xmm6 +- vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5 +- vpxor %xmm4,%xmm8,%xmm8 +- vaesenc %xmm2,%xmm13,%xmm13 +- vpxor %xmm5,%xmm1,%xmm4 +- andq $0x60,%r12 +- vmovups 32-128(%rcx),%xmm15 +- vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1 +- vaesenc %xmm2,%xmm14,%xmm14 +- +- vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2 +- leaq (%r14,%r12,1),%r14 +- vaesenc %xmm15,%xmm9,%xmm9 +- vpxor 16+8(%rsp),%xmm8,%xmm8 +- vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3 +- vmovdqu 64+8(%rsp),%xmm0 +- vaesenc %xmm15,%xmm10,%xmm10 +- movbeq 88(%r14),%r13 +- vaesenc %xmm15,%xmm11,%xmm11 +- movbeq 80(%r14),%r12 +- vaesenc %xmm15,%xmm12,%xmm12 +- movq %r13,32+8(%rsp) +- vaesenc %xmm15,%xmm13,%xmm13 +- movq %r12,40+8(%rsp) +- vmovdqu 48-32(%r9),%xmm5 +- vaesenc %xmm15,%xmm14,%xmm14 +- +- vmovups 48-128(%rcx),%xmm15 +- vpxor %xmm1,%xmm6,%xmm6 +- vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1 +- vaesenc %xmm15,%xmm9,%xmm9 +- vpxor %xmm2,%xmm6,%xmm6 +- vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2 +- vaesenc %xmm15,%xmm10,%xmm10 +- vpxor %xmm3,%xmm7,%xmm7 +- vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3 +- vaesenc %xmm15,%xmm11,%xmm11 +- vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5 +- vmovdqu 80+8(%rsp),%xmm0 +- vaesenc %xmm15,%xmm12,%xmm12 +- vaesenc %xmm15,%xmm13,%xmm13 +- vpxor %xmm1,%xmm4,%xmm4 +- vmovdqu 64-32(%r9),%xmm1 +- vaesenc %xmm15,%xmm14,%xmm14 +- +- vmovups 64-128(%rcx),%xmm15 +- vpxor %xmm2,%xmm6,%xmm6 +- vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2 +- vaesenc %xmm15,%xmm9,%xmm9 +- vpxor %xmm3,%xmm6,%xmm6 +- vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3 +- vaesenc %xmm15,%xmm10,%xmm10 +- movbeq 72(%r14),%r13 +- vpxor %xmm5,%xmm7,%xmm7 +- vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5 +- vaesenc %xmm15,%xmm11,%xmm11 +- movbeq 64(%r14),%r12 +- vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1 +- vmovdqu 96+8(%rsp),%xmm0 +- vaesenc %xmm15,%xmm12,%xmm12 +- movq %r13,48+8(%rsp) +- vaesenc %xmm15,%xmm13,%xmm13 +- movq %r12,56+8(%rsp) +- vpxor %xmm2,%xmm4,%xmm4 +- vmovdqu 96-32(%r9),%xmm2 +- vaesenc %xmm15,%xmm14,%xmm14 +- +- vmovups 80-128(%rcx),%xmm15 +- vpxor %xmm3,%xmm6,%xmm6 +- vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3 +- vaesenc %xmm15,%xmm9,%xmm9 +- vpxor %xmm5,%xmm6,%xmm6 +- vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5 +- vaesenc %xmm15,%xmm10,%xmm10 +- movbeq 56(%r14),%r13 +- vpxor %xmm1,%xmm7,%xmm7 +- vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1 +- vpxor 112+8(%rsp),%xmm8,%xmm8 +- vaesenc %xmm15,%xmm11,%xmm11 +- movbeq 48(%r14),%r12 +- vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2 +- vaesenc %xmm15,%xmm12,%xmm12 +- movq %r13,64+8(%rsp) +- vaesenc %xmm15,%xmm13,%xmm13 +- movq %r12,72+8(%rsp) +- vpxor %xmm3,%xmm4,%xmm4 +- vmovdqu 112-32(%r9),%xmm3 +- vaesenc %xmm15,%xmm14,%xmm14 +- +- vmovups 96-128(%rcx),%xmm15 +- vpxor %xmm5,%xmm6,%xmm6 +- vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5 +- vaesenc %xmm15,%xmm9,%xmm9 +- vpxor %xmm1,%xmm6,%xmm6 +- vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1 +- vaesenc %xmm15,%xmm10,%xmm10 +- movbeq 40(%r14),%r13 +- vpxor %xmm2,%xmm7,%xmm7 +- vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2 +- vaesenc %xmm15,%xmm11,%xmm11 +- movbeq 32(%r14),%r12 +- vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8 +- vaesenc %xmm15,%xmm12,%xmm12 +- movq %r13,80+8(%rsp) +- vaesenc %xmm15,%xmm13,%xmm13 +- movq %r12,88+8(%rsp) +- vpxor %xmm5,%xmm6,%xmm6 +- vaesenc %xmm15,%xmm14,%xmm14 +- vpxor %xmm1,%xmm6,%xmm6 +- +- vmovups 112-128(%rcx),%xmm15 +- vpslldq $8,%xmm6,%xmm5 +- vpxor %xmm2,%xmm4,%xmm4 +- vmovdqu 16(%r11),%xmm3 +- +- vaesenc %xmm15,%xmm9,%xmm9 +- vpxor %xmm8,%xmm7,%xmm7 +- vaesenc %xmm15,%xmm10,%xmm10 +- vpxor %xmm5,%xmm4,%xmm4 +- movbeq 24(%r14),%r13 +- vaesenc %xmm15,%xmm11,%xmm11 +- movbeq 16(%r14),%r12 +- vpalignr $8,%xmm4,%xmm4,%xmm0 +- vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 +- movq %r13,96+8(%rsp) +- vaesenc %xmm15,%xmm12,%xmm12 +- movq %r12,104+8(%rsp) +- vaesenc %xmm15,%xmm13,%xmm13 +- vmovups 128-128(%rcx),%xmm1 +- vaesenc %xmm15,%xmm14,%xmm14 +- +- vaesenc %xmm1,%xmm9,%xmm9 +- vmovups 144-128(%rcx),%xmm15 +- vaesenc %xmm1,%xmm10,%xmm10 +- vpsrldq $8,%xmm6,%xmm6 +- vaesenc %xmm1,%xmm11,%xmm11 +- vpxor %xmm6,%xmm7,%xmm7 +- vaesenc %xmm1,%xmm12,%xmm12 +- vpxor %xmm0,%xmm4,%xmm4 +- movbeq 8(%r14),%r13 +- vaesenc %xmm1,%xmm13,%xmm13 +- movbeq 0(%r14),%r12 +- vaesenc %xmm1,%xmm14,%xmm14 +- vmovups 160-128(%rcx),%xmm1 +- cmpl $11,%ebp +- jb L$enc_tail +- +- vaesenc %xmm15,%xmm9,%xmm9 +- vaesenc %xmm15,%xmm10,%xmm10 +- vaesenc %xmm15,%xmm11,%xmm11 +- vaesenc %xmm15,%xmm12,%xmm12 +- vaesenc %xmm15,%xmm13,%xmm13 +- vaesenc %xmm15,%xmm14,%xmm14 +- +- vaesenc %xmm1,%xmm9,%xmm9 +- vaesenc %xmm1,%xmm10,%xmm10 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- vaesenc %xmm1,%xmm13,%xmm13 +- vmovups 176-128(%rcx),%xmm15 +- vaesenc %xmm1,%xmm14,%xmm14 +- vmovups 192-128(%rcx),%xmm1 +- je L$enc_tail +- +- vaesenc %xmm15,%xmm9,%xmm9 +- vaesenc %xmm15,%xmm10,%xmm10 +- vaesenc %xmm15,%xmm11,%xmm11 +- vaesenc %xmm15,%xmm12,%xmm12 +- vaesenc %xmm15,%xmm13,%xmm13 +- vaesenc %xmm15,%xmm14,%xmm14 +- +- vaesenc %xmm1,%xmm9,%xmm9 +- vaesenc %xmm1,%xmm10,%xmm10 +- vaesenc %xmm1,%xmm11,%xmm11 +- vaesenc %xmm1,%xmm12,%xmm12 +- vaesenc %xmm1,%xmm13,%xmm13 +- vmovups 208-128(%rcx),%xmm15 +- vaesenc %xmm1,%xmm14,%xmm14 +- vmovups 224-128(%rcx),%xmm1 +- jmp L$enc_tail +- +-.p2align 5 +-L$handle_ctr32: +- vmovdqu (%r11),%xmm0 +- vpshufb %xmm0,%xmm1,%xmm6 +- vmovdqu 48(%r11),%xmm5 +- vpaddd 64(%r11),%xmm6,%xmm10 +- vpaddd %xmm5,%xmm6,%xmm11 +- vmovdqu 0-32(%r9),%xmm3 +- vpaddd %xmm5,%xmm10,%xmm12 +- vpshufb %xmm0,%xmm10,%xmm10 +- vpaddd %xmm5,%xmm11,%xmm13 +- vpshufb %xmm0,%xmm11,%xmm11 +- vpxor %xmm15,%xmm10,%xmm10 +- vpaddd %xmm5,%xmm12,%xmm14 +- vpshufb %xmm0,%xmm12,%xmm12 +- vpxor %xmm15,%xmm11,%xmm11 +- vpaddd %xmm5,%xmm13,%xmm1 +- vpshufb %xmm0,%xmm13,%xmm13 +- vpshufb %xmm0,%xmm14,%xmm14 +- vpshufb %xmm0,%xmm1,%xmm1 +- jmp L$resume_ctr32 +- +-.p2align 5 +-L$enc_tail: +- vaesenc %xmm15,%xmm9,%xmm9 +- vmovdqu %xmm7,16+8(%rsp) +- vpalignr $8,%xmm4,%xmm4,%xmm8 +- vaesenc %xmm15,%xmm10,%xmm10 +- vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4 +- vpxor 0(%rdi),%xmm1,%xmm2 +- vaesenc %xmm15,%xmm11,%xmm11 +- vpxor 16(%rdi),%xmm1,%xmm0 +- vaesenc %xmm15,%xmm12,%xmm12 +- vpxor 32(%rdi),%xmm1,%xmm5 +- vaesenc %xmm15,%xmm13,%xmm13 +- vpxor 48(%rdi),%xmm1,%xmm6 +- vaesenc %xmm15,%xmm14,%xmm14 +- vpxor 64(%rdi),%xmm1,%xmm7 +- vpxor 80(%rdi),%xmm1,%xmm3 +- vmovdqu (%r8),%xmm1 +- +- vaesenclast %xmm2,%xmm9,%xmm9 +- vmovdqu 32(%r11),%xmm2 +- vaesenclast %xmm0,%xmm10,%xmm10 +- vpaddb %xmm2,%xmm1,%xmm0 +- movq %r13,112+8(%rsp) +- leaq 96(%rdi),%rdi +- vaesenclast %xmm5,%xmm11,%xmm11 +- vpaddb %xmm2,%xmm0,%xmm5 +- movq %r12,120+8(%rsp) +- leaq 96(%rsi),%rsi +- vmovdqu 0-128(%rcx),%xmm15 +- vaesenclast %xmm6,%xmm12,%xmm12 +- vpaddb %xmm2,%xmm5,%xmm6 +- vaesenclast %xmm7,%xmm13,%xmm13 +- vpaddb %xmm2,%xmm6,%xmm7 +- vaesenclast %xmm3,%xmm14,%xmm14 +- vpaddb %xmm2,%xmm7,%xmm3 +- +- addq $0x60,%r10 +- subq $0x6,%rdx +- jc L$6x_done +- +- vmovups %xmm9,-96(%rsi) +- vpxor %xmm15,%xmm1,%xmm9 +- vmovups %xmm10,-80(%rsi) +- vmovdqa %xmm0,%xmm10 +- vmovups %xmm11,-64(%rsi) +- vmovdqa %xmm5,%xmm11 +- vmovups %xmm12,-48(%rsi) +- vmovdqa %xmm6,%xmm12 +- vmovups %xmm13,-32(%rsi) +- vmovdqa %xmm7,%xmm13 +- vmovups %xmm14,-16(%rsi) +- vmovdqa %xmm3,%xmm14 +- vmovdqu 32+8(%rsp),%xmm7 +- jmp L$oop6x +- +-L$6x_done: +- vpxor 16+8(%rsp),%xmm8,%xmm8 +- vpxor %xmm4,%xmm8,%xmm8 +- +- .byte 0xf3,0xc3 +- +- +-.globl _aesni_gcm_decrypt +-.private_extern _aesni_gcm_decrypt +- +-.p2align 5 +-_aesni_gcm_decrypt: +- +- xorq %r10,%r10 +- +- +- +- cmpq $0x60,%rdx +- jb L$gcm_dec_abort +- +- leaq (%rsp),%rax +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- vzeroupper +- +- vmovdqu (%r8),%xmm1 +- addq $-128,%rsp +- movl 12(%r8),%ebx +- leaq L$bswap_mask(%rip),%r11 +- leaq -128(%rcx),%r14 +- movq $0xf80,%r15 +- vmovdqu (%r9),%xmm8 +- andq $-128,%rsp +- vmovdqu (%r11),%xmm0 +- leaq 128(%rcx),%rcx +- leaq 32+32(%r9),%r9 +- movl 240-128(%rcx),%ebp +- vpshufb %xmm0,%xmm8,%xmm8 +- +- andq %r15,%r14 +- andq %rsp,%r15 +- subq %r14,%r15 +- jc L$dec_no_key_aliasing +- cmpq $768,%r15 +- jnc L$dec_no_key_aliasing +- subq %r15,%rsp +-L$dec_no_key_aliasing: +- +- vmovdqu 80(%rdi),%xmm7 +- leaq (%rdi),%r14 +- vmovdqu 64(%rdi),%xmm4 +- +- +- +- +- +- +- +- leaq -192(%rdi,%rdx,1),%r15 +- +- vmovdqu 48(%rdi),%xmm5 +- shrq $4,%rdx +- xorq %r10,%r10 +- vmovdqu 32(%rdi),%xmm6 +- vpshufb %xmm0,%xmm7,%xmm7 +- vmovdqu 16(%rdi),%xmm2 +- vpshufb %xmm0,%xmm4,%xmm4 +- vmovdqu (%rdi),%xmm3 +- vpshufb %xmm0,%xmm5,%xmm5 +- vmovdqu %xmm4,48(%rsp) +- vpshufb %xmm0,%xmm6,%xmm6 +- vmovdqu %xmm5,64(%rsp) +- vpshufb %xmm0,%xmm2,%xmm2 +- vmovdqu %xmm6,80(%rsp) +- vpshufb %xmm0,%xmm3,%xmm3 +- vmovdqu %xmm2,96(%rsp) +- vmovdqu %xmm3,112(%rsp) +- +- call _aesni_ctr32_ghash_6x +- +- vmovups %xmm9,-96(%rsi) +- vmovups %xmm10,-80(%rsi) +- vmovups %xmm11,-64(%rsi) +- vmovups %xmm12,-48(%rsi) +- vmovups %xmm13,-32(%rsi) +- vmovups %xmm14,-16(%rsi) +- +- vpshufb (%r11),%xmm8,%xmm8 +- vmovdqu %xmm8,-64(%r9) +- +- vzeroupper +- movq -48(%rax),%r15 +- +- movq -40(%rax),%r14 +- +- movq -32(%rax),%r13 +- +- movq -24(%rax),%r12 +- +- movq -16(%rax),%rbp +- +- movq -8(%rax),%rbx +- +- leaq (%rax),%rsp +- +-L$gcm_dec_abort: +- movq %r10,%rax +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-_aesni_ctr32_6x: +- +- vmovdqu 0-128(%rcx),%xmm4 +- vmovdqu 32(%r11),%xmm2 +- leaq -1(%rbp),%r13 +- vmovups 16-128(%rcx),%xmm15 +- leaq 32-128(%rcx),%r12 +- vpxor %xmm4,%xmm1,%xmm9 +- addl $100663296,%ebx +- jc L$handle_ctr32_2 +- vpaddb %xmm2,%xmm1,%xmm10 +- vpaddb %xmm2,%xmm10,%xmm11 +- vpxor %xmm4,%xmm10,%xmm10 +- vpaddb %xmm2,%xmm11,%xmm12 +- vpxor %xmm4,%xmm11,%xmm11 +- vpaddb %xmm2,%xmm12,%xmm13 +- vpxor %xmm4,%xmm12,%xmm12 +- vpaddb %xmm2,%xmm13,%xmm14 +- vpxor %xmm4,%xmm13,%xmm13 +- vpaddb %xmm2,%xmm14,%xmm1 +- vpxor %xmm4,%xmm14,%xmm14 +- jmp L$oop_ctr32 +- +-.p2align 4 +-L$oop_ctr32: +- vaesenc %xmm15,%xmm9,%xmm9 +- vaesenc %xmm15,%xmm10,%xmm10 +- vaesenc %xmm15,%xmm11,%xmm11 +- vaesenc %xmm15,%xmm12,%xmm12 +- vaesenc %xmm15,%xmm13,%xmm13 +- vaesenc %xmm15,%xmm14,%xmm14 +- vmovups (%r12),%xmm15 +- leaq 16(%r12),%r12 +- decl %r13d +- jnz L$oop_ctr32 +- +- vmovdqu (%r12),%xmm3 +- vaesenc %xmm15,%xmm9,%xmm9 +- vpxor 0(%rdi),%xmm3,%xmm4 +- vaesenc %xmm15,%xmm10,%xmm10 +- vpxor 16(%rdi),%xmm3,%xmm5 +- vaesenc %xmm15,%xmm11,%xmm11 +- vpxor 32(%rdi),%xmm3,%xmm6 +- vaesenc %xmm15,%xmm12,%xmm12 +- vpxor 48(%rdi),%xmm3,%xmm8 +- vaesenc %xmm15,%xmm13,%xmm13 +- vpxor 64(%rdi),%xmm3,%xmm2 +- vaesenc %xmm15,%xmm14,%xmm14 +- vpxor 80(%rdi),%xmm3,%xmm3 +- leaq 96(%rdi),%rdi +- +- vaesenclast %xmm4,%xmm9,%xmm9 +- vaesenclast %xmm5,%xmm10,%xmm10 +- vaesenclast %xmm6,%xmm11,%xmm11 +- vaesenclast %xmm8,%xmm12,%xmm12 +- vaesenclast %xmm2,%xmm13,%xmm13 +- vaesenclast %xmm3,%xmm14,%xmm14 +- vmovups %xmm9,0(%rsi) +- vmovups %xmm10,16(%rsi) +- vmovups %xmm11,32(%rsi) +- vmovups %xmm12,48(%rsi) +- vmovups %xmm13,64(%rsi) +- vmovups %xmm14,80(%rsi) +- leaq 96(%rsi),%rsi +- +- .byte 0xf3,0xc3 +-.p2align 5 +-L$handle_ctr32_2: +- vpshufb %xmm0,%xmm1,%xmm6 +- vmovdqu 48(%r11),%xmm5 +- vpaddd 64(%r11),%xmm6,%xmm10 +- vpaddd %xmm5,%xmm6,%xmm11 +- vpaddd %xmm5,%xmm10,%xmm12 +- vpshufb %xmm0,%xmm10,%xmm10 +- vpaddd %xmm5,%xmm11,%xmm13 +- vpshufb %xmm0,%xmm11,%xmm11 +- vpxor %xmm4,%xmm10,%xmm10 +- vpaddd %xmm5,%xmm12,%xmm14 +- vpshufb %xmm0,%xmm12,%xmm12 +- vpxor %xmm4,%xmm11,%xmm11 +- vpaddd %xmm5,%xmm13,%xmm1 +- vpshufb %xmm0,%xmm13,%xmm13 +- vpxor %xmm4,%xmm12,%xmm12 +- vpshufb %xmm0,%xmm14,%xmm14 +- vpxor %xmm4,%xmm13,%xmm13 +- vpshufb %xmm0,%xmm1,%xmm1 +- vpxor %xmm4,%xmm14,%xmm14 +- jmp L$oop_ctr32 +- +- +- +-.globl _aesni_gcm_encrypt +-.private_extern _aesni_gcm_encrypt +- +-.p2align 5 +-_aesni_gcm_encrypt: +- +-#ifdef BORINGSSL_DISPATCH_TEST +- +- movb $1,_BORINGSSL_function_hit+2(%rip) +-#endif +- xorq %r10,%r10 +- +- +- +- +- cmpq $288,%rdx +- jb L$gcm_enc_abort +- +- leaq (%rsp),%rax +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- vzeroupper +- +- vmovdqu (%r8),%xmm1 +- addq $-128,%rsp +- movl 12(%r8),%ebx +- leaq L$bswap_mask(%rip),%r11 +- leaq -128(%rcx),%r14 +- movq $0xf80,%r15 +- leaq 128(%rcx),%rcx +- vmovdqu (%r11),%xmm0 +- andq $-128,%rsp +- movl 240-128(%rcx),%ebp +- +- andq %r15,%r14 +- andq %rsp,%r15 +- subq %r14,%r15 +- jc L$enc_no_key_aliasing +- cmpq $768,%r15 +- jnc L$enc_no_key_aliasing +- subq %r15,%rsp +-L$enc_no_key_aliasing: +- +- leaq (%rsi),%r14 +- +- +- +- +- +- +- +- +- leaq -192(%rsi,%rdx,1),%r15 +- +- shrq $4,%rdx +- +- call _aesni_ctr32_6x +- vpshufb %xmm0,%xmm9,%xmm8 +- vpshufb %xmm0,%xmm10,%xmm2 +- vmovdqu %xmm8,112(%rsp) +- vpshufb %xmm0,%xmm11,%xmm4 +- vmovdqu %xmm2,96(%rsp) +- vpshufb %xmm0,%xmm12,%xmm5 +- vmovdqu %xmm4,80(%rsp) +- vpshufb %xmm0,%xmm13,%xmm6 +- vmovdqu %xmm5,64(%rsp) +- vpshufb %xmm0,%xmm14,%xmm7 +- vmovdqu %xmm6,48(%rsp) +- +- call _aesni_ctr32_6x +- +- vmovdqu (%r9),%xmm8 +- leaq 32+32(%r9),%r9 +- subq $12,%rdx +- movq $192,%r10 +- vpshufb %xmm0,%xmm8,%xmm8 +- +- call _aesni_ctr32_ghash_6x +- vmovdqu 32(%rsp),%xmm7 +- vmovdqu (%r11),%xmm0 +- vmovdqu 0-32(%r9),%xmm3 +- vpunpckhqdq %xmm7,%xmm7,%xmm1 +- vmovdqu 32-32(%r9),%xmm15 +- vmovups %xmm9,-96(%rsi) +- vpshufb %xmm0,%xmm9,%xmm9 +- vpxor %xmm7,%xmm1,%xmm1 +- vmovups %xmm10,-80(%rsi) +- vpshufb %xmm0,%xmm10,%xmm10 +- vmovups %xmm11,-64(%rsi) +- vpshufb %xmm0,%xmm11,%xmm11 +- vmovups %xmm12,-48(%rsi) +- vpshufb %xmm0,%xmm12,%xmm12 +- vmovups %xmm13,-32(%rsi) +- vpshufb %xmm0,%xmm13,%xmm13 +- vmovups %xmm14,-16(%rsi) +- vpshufb %xmm0,%xmm14,%xmm14 +- vmovdqu %xmm9,16(%rsp) +- vmovdqu 48(%rsp),%xmm6 +- vmovdqu 16-32(%r9),%xmm0 +- vpunpckhqdq %xmm6,%xmm6,%xmm2 +- vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5 +- vpxor %xmm6,%xmm2,%xmm2 +- vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7 +- vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 +- +- vmovdqu 64(%rsp),%xmm9 +- vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4 +- vmovdqu 48-32(%r9),%xmm3 +- vpxor %xmm5,%xmm4,%xmm4 +- vpunpckhqdq %xmm9,%xmm9,%xmm5 +- vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6 +- vpxor %xmm9,%xmm5,%xmm5 +- vpxor %xmm7,%xmm6,%xmm6 +- vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 +- vmovdqu 80-32(%r9),%xmm15 +- vpxor %xmm1,%xmm2,%xmm2 +- +- vmovdqu 80(%rsp),%xmm1 +- vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7 +- vmovdqu 64-32(%r9),%xmm0 +- vpxor %xmm4,%xmm7,%xmm7 +- vpunpckhqdq %xmm1,%xmm1,%xmm4 +- vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9 +- vpxor %xmm1,%xmm4,%xmm4 +- vpxor %xmm6,%xmm9,%xmm9 +- vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5 +- vpxor %xmm2,%xmm5,%xmm5 +- +- vmovdqu 96(%rsp),%xmm2 +- vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6 +- vmovdqu 96-32(%r9),%xmm3 +- vpxor %xmm7,%xmm6,%xmm6 +- vpunpckhqdq %xmm2,%xmm2,%xmm7 +- vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1 +- vpxor %xmm2,%xmm7,%xmm7 +- vpxor %xmm9,%xmm1,%xmm1 +- vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4 +- vmovdqu 128-32(%r9),%xmm15 +- vpxor %xmm5,%xmm4,%xmm4 +- +- vpxor 112(%rsp),%xmm8,%xmm8 +- vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5 +- vmovdqu 112-32(%r9),%xmm0 +- vpunpckhqdq %xmm8,%xmm8,%xmm9 +- vpxor %xmm6,%xmm5,%xmm5 +- vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2 +- vpxor %xmm8,%xmm9,%xmm9 +- vpxor %xmm1,%xmm2,%xmm2 +- vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7 +- vpxor %xmm4,%xmm7,%xmm4 +- +- vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6 +- vmovdqu 0-32(%r9),%xmm3 +- vpunpckhqdq %xmm14,%xmm14,%xmm1 +- vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8 +- vpxor %xmm14,%xmm1,%xmm1 +- vpxor %xmm5,%xmm6,%xmm5 +- vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9 +- vmovdqu 32-32(%r9),%xmm15 +- vpxor %xmm2,%xmm8,%xmm7 +- vpxor %xmm4,%xmm9,%xmm6 +- +- vmovdqu 16-32(%r9),%xmm0 +- vpxor %xmm5,%xmm7,%xmm9 +- vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4 +- vpxor %xmm9,%xmm6,%xmm6 +- vpunpckhqdq %xmm13,%xmm13,%xmm2 +- vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14 +- vpxor %xmm13,%xmm2,%xmm2 +- vpslldq $8,%xmm6,%xmm9 +- vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1 +- vpxor %xmm9,%xmm5,%xmm8 +- vpsrldq $8,%xmm6,%xmm6 +- vpxor %xmm6,%xmm7,%xmm7 +- +- vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5 +- vmovdqu 48-32(%r9),%xmm3 +- vpxor %xmm4,%xmm5,%xmm5 +- vpunpckhqdq %xmm12,%xmm12,%xmm9 +- vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13 +- vpxor %xmm12,%xmm9,%xmm9 +- vpxor %xmm14,%xmm13,%xmm13 +- vpalignr $8,%xmm8,%xmm8,%xmm14 +- vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2 +- vmovdqu 80-32(%r9),%xmm15 +- vpxor %xmm1,%xmm2,%xmm2 +- +- vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4 +- vmovdqu 64-32(%r9),%xmm0 +- vpxor %xmm5,%xmm4,%xmm4 +- vpunpckhqdq %xmm11,%xmm11,%xmm1 +- vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12 +- vpxor %xmm11,%xmm1,%xmm1 +- vpxor %xmm13,%xmm12,%xmm12 +- vxorps 16(%rsp),%xmm7,%xmm7 +- vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9 +- vpxor %xmm2,%xmm9,%xmm9 +- +- vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 +- vxorps %xmm14,%xmm8,%xmm8 +- +- vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5 +- vmovdqu 96-32(%r9),%xmm3 +- vpxor %xmm4,%xmm5,%xmm5 +- vpunpckhqdq %xmm10,%xmm10,%xmm2 +- vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11 +- vpxor %xmm10,%xmm2,%xmm2 +- vpalignr $8,%xmm8,%xmm8,%xmm14 +- vpxor %xmm12,%xmm11,%xmm11 +- vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1 +- vmovdqu 128-32(%r9),%xmm15 +- vpxor %xmm9,%xmm1,%xmm1 +- +- vxorps %xmm7,%xmm14,%xmm14 +- vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8 +- vxorps %xmm14,%xmm8,%xmm8 +- +- vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4 +- vmovdqu 112-32(%r9),%xmm0 +- vpxor %xmm5,%xmm4,%xmm4 +- vpunpckhqdq %xmm8,%xmm8,%xmm9 +- vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10 +- vpxor %xmm8,%xmm9,%xmm9 +- vpxor %xmm11,%xmm10,%xmm10 +- vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2 +- vpxor %xmm1,%xmm2,%xmm2 +- +- vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5 +- vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7 +- vpxor %xmm4,%xmm5,%xmm5 +- vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6 +- vpxor %xmm10,%xmm7,%xmm7 +- vpxor %xmm2,%xmm6,%xmm6 +- +- vpxor %xmm5,%xmm7,%xmm4 +- vpxor %xmm4,%xmm6,%xmm6 +- vpslldq $8,%xmm6,%xmm1 +- vmovdqu 16(%r11),%xmm3 +- vpsrldq $8,%xmm6,%xmm6 +- vpxor %xmm1,%xmm5,%xmm8 +- vpxor %xmm6,%xmm7,%xmm7 +- +- vpalignr $8,%xmm8,%xmm8,%xmm2 +- vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 +- vpxor %xmm2,%xmm8,%xmm8 +- +- vpalignr $8,%xmm8,%xmm8,%xmm2 +- vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8 +- vpxor %xmm7,%xmm2,%xmm2 +- vpxor %xmm2,%xmm8,%xmm8 +- vpshufb (%r11),%xmm8,%xmm8 +- vmovdqu %xmm8,-64(%r9) +- +- vzeroupper +- movq -48(%rax),%r15 +- +- movq -40(%rax),%r14 +- +- movq -32(%rax),%r13 +- +- movq -24(%rax),%r12 +- +- movq -16(%rax),%rbp +- +- movq -8(%rax),%rbx +- +- leaq (%rax),%rsp +- +-L$gcm_enc_abort: +- movq %r10,%rax +- .byte 0xf3,0xc3 +- +- +-.p2align 6 +-L$bswap_mask: +-.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +-L$poly: +-.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +-L$one_msb: +-.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +-L$two_lsb: +-.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +-L$one_lsb: +-.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +-.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +-.p2align 6 +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S b/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S +deleted file mode 100644 +index 7633880..0000000 +--- a/mac-x86_64/crypto/fipsmodule/aesni-x86_64.S ++++ /dev/null +@@ -1,2503 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +-.globl _aes_hw_encrypt +-.private_extern _aes_hw_encrypt +- +-.p2align 4 +-_aes_hw_encrypt: +- +-#ifdef BORINGSSL_DISPATCH_TEST +- +- movb $1,_BORINGSSL_function_hit+1(%rip) +-#endif +- movups (%rdi),%xmm2 +- movl 240(%rdx),%eax +- movups (%rdx),%xmm0 +- movups 16(%rdx),%xmm1 +- leaq 32(%rdx),%rdx +- xorps %xmm0,%xmm2 +-L$oop_enc1_1: +-.byte 102,15,56,220,209 +- decl %eax +- movups (%rdx),%xmm1 +- leaq 16(%rdx),%rdx +- jnz L$oop_enc1_1 +-.byte 102,15,56,221,209 +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- .byte 0xf3,0xc3 +- +- +- +-.globl _aes_hw_decrypt +-.private_extern _aes_hw_decrypt +- +-.p2align 4 +-_aes_hw_decrypt: +- +- movups (%rdi),%xmm2 +- movl 240(%rdx),%eax +- movups (%rdx),%xmm0 +- movups 16(%rdx),%xmm1 +- leaq 32(%rdx),%rdx +- xorps %xmm0,%xmm2 +-L$oop_dec1_2: +-.byte 102,15,56,222,209 +- decl %eax +- movups (%rdx),%xmm1 +- leaq 16(%rdx),%rdx +- jnz L$oop_dec1_2 +-.byte 102,15,56,223,209 +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-_aesni_encrypt2: +- +- movups (%rcx),%xmm0 +- shll $4,%eax +- movups 16(%rcx),%xmm1 +- xorps %xmm0,%xmm2 +- xorps %xmm0,%xmm3 +- movups 32(%rcx),%xmm0 +- leaq 32(%rcx,%rax,1),%rcx +- negq %rax +- addq $16,%rax +- +-L$enc_loop2: +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +- movups (%rcx,%rax,1),%xmm1 +- addq $32,%rax +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +- movups -16(%rcx,%rax,1),%xmm0 +- jnz L$enc_loop2 +- +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,221,208 +-.byte 102,15,56,221,216 +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-_aesni_decrypt2: +- +- movups (%rcx),%xmm0 +- shll $4,%eax +- movups 16(%rcx),%xmm1 +- xorps %xmm0,%xmm2 +- xorps %xmm0,%xmm3 +- movups 32(%rcx),%xmm0 +- leaq 32(%rcx,%rax,1),%rcx +- negq %rax +- addq $16,%rax +- +-L$dec_loop2: +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +- movups (%rcx,%rax,1),%xmm1 +- addq $32,%rax +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +- movups -16(%rcx,%rax,1),%xmm0 +- jnz L$dec_loop2 +- +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,223,208 +-.byte 102,15,56,223,216 +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-_aesni_encrypt3: +- +- movups (%rcx),%xmm0 +- shll $4,%eax +- movups 16(%rcx),%xmm1 +- xorps %xmm0,%xmm2 +- xorps %xmm0,%xmm3 +- xorps %xmm0,%xmm4 +- movups 32(%rcx),%xmm0 +- leaq 32(%rcx,%rax,1),%rcx +- negq %rax +- addq $16,%rax +- +-L$enc_loop3: +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +- movups (%rcx,%rax,1),%xmm1 +- addq $32,%rax +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +-.byte 102,15,56,220,224 +- movups -16(%rcx,%rax,1),%xmm0 +- jnz L$enc_loop3 +- +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,221,208 +-.byte 102,15,56,221,216 +-.byte 102,15,56,221,224 +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-_aesni_decrypt3: +- +- movups (%rcx),%xmm0 +- shll $4,%eax +- movups 16(%rcx),%xmm1 +- xorps %xmm0,%xmm2 +- xorps %xmm0,%xmm3 +- xorps %xmm0,%xmm4 +- movups 32(%rcx),%xmm0 +- leaq 32(%rcx,%rax,1),%rcx +- negq %rax +- addq $16,%rax +- +-L$dec_loop3: +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +- movups (%rcx,%rax,1),%xmm1 +- addq $32,%rax +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +- movups -16(%rcx,%rax,1),%xmm0 +- jnz L$dec_loop3 +- +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,223,208 +-.byte 102,15,56,223,216 +-.byte 102,15,56,223,224 +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-_aesni_encrypt4: +- +- movups (%rcx),%xmm0 +- shll $4,%eax +- movups 16(%rcx),%xmm1 +- xorps %xmm0,%xmm2 +- xorps %xmm0,%xmm3 +- xorps %xmm0,%xmm4 +- xorps %xmm0,%xmm5 +- movups 32(%rcx),%xmm0 +- leaq 32(%rcx,%rax,1),%rcx +- negq %rax +-.byte 0x0f,0x1f,0x00 +- addq $16,%rax +- +-L$enc_loop4: +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +- movups (%rcx,%rax,1),%xmm1 +- addq $32,%rax +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +-.byte 102,15,56,220,224 +-.byte 102,15,56,220,232 +- movups -16(%rcx,%rax,1),%xmm0 +- jnz L$enc_loop4 +- +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +-.byte 102,15,56,221,208 +-.byte 102,15,56,221,216 +-.byte 102,15,56,221,224 +-.byte 102,15,56,221,232 +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-_aesni_decrypt4: +- +- movups (%rcx),%xmm0 +- shll $4,%eax +- movups 16(%rcx),%xmm1 +- xorps %xmm0,%xmm2 +- xorps %xmm0,%xmm3 +- xorps %xmm0,%xmm4 +- xorps %xmm0,%xmm5 +- movups 32(%rcx),%xmm0 +- leaq 32(%rcx,%rax,1),%rcx +- negq %rax +-.byte 0x0f,0x1f,0x00 +- addq $16,%rax +- +-L$dec_loop4: +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +- movups (%rcx,%rax,1),%xmm1 +- addq $32,%rax +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +-.byte 102,15,56,222,232 +- movups -16(%rcx,%rax,1),%xmm0 +- jnz L$dec_loop4 +- +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,223,208 +-.byte 102,15,56,223,216 +-.byte 102,15,56,223,224 +-.byte 102,15,56,223,232 +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-_aesni_encrypt6: +- +- movups (%rcx),%xmm0 +- shll $4,%eax +- movups 16(%rcx),%xmm1 +- xorps %xmm0,%xmm2 +- pxor %xmm0,%xmm3 +- pxor %xmm0,%xmm4 +-.byte 102,15,56,220,209 +- leaq 32(%rcx,%rax,1),%rcx +- negq %rax +-.byte 102,15,56,220,217 +- pxor %xmm0,%xmm5 +- pxor %xmm0,%xmm6 +-.byte 102,15,56,220,225 +- pxor %xmm0,%xmm7 +- movups (%rcx,%rax,1),%xmm0 +- addq $16,%rax +- jmp L$enc_loop6_enter +-.p2align 4 +-L$enc_loop6: +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-L$enc_loop6_enter: +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +- movups (%rcx,%rax,1),%xmm1 +- addq $32,%rax +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +-.byte 102,15,56,220,224 +-.byte 102,15,56,220,232 +-.byte 102,15,56,220,240 +-.byte 102,15,56,220,248 +- movups -16(%rcx,%rax,1),%xmm0 +- jnz L$enc_loop6 +- +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-.byte 102,15,56,221,208 +-.byte 102,15,56,221,216 +-.byte 102,15,56,221,224 +-.byte 102,15,56,221,232 +-.byte 102,15,56,221,240 +-.byte 102,15,56,221,248 +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-_aesni_decrypt6: +- +- movups (%rcx),%xmm0 +- shll $4,%eax +- movups 16(%rcx),%xmm1 +- xorps %xmm0,%xmm2 +- pxor %xmm0,%xmm3 +- pxor %xmm0,%xmm4 +-.byte 102,15,56,222,209 +- leaq 32(%rcx,%rax,1),%rcx +- negq %rax +-.byte 102,15,56,222,217 +- pxor %xmm0,%xmm5 +- pxor %xmm0,%xmm6 +-.byte 102,15,56,222,225 +- pxor %xmm0,%xmm7 +- movups (%rcx,%rax,1),%xmm0 +- addq $16,%rax +- jmp L$dec_loop6_enter +-.p2align 4 +-L$dec_loop6: +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-L$dec_loop6_enter: +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +- movups (%rcx,%rax,1),%xmm1 +- addq $32,%rax +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +-.byte 102,15,56,222,232 +-.byte 102,15,56,222,240 +-.byte 102,15,56,222,248 +- movups -16(%rcx,%rax,1),%xmm0 +- jnz L$dec_loop6 +- +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +-.byte 102,15,56,223,208 +-.byte 102,15,56,223,216 +-.byte 102,15,56,223,224 +-.byte 102,15,56,223,232 +-.byte 102,15,56,223,240 +-.byte 102,15,56,223,248 +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-_aesni_encrypt8: +- +- movups (%rcx),%xmm0 +- shll $4,%eax +- movups 16(%rcx),%xmm1 +- xorps %xmm0,%xmm2 +- xorps %xmm0,%xmm3 +- pxor %xmm0,%xmm4 +- pxor %xmm0,%xmm5 +- pxor %xmm0,%xmm6 +- leaq 32(%rcx,%rax,1),%rcx +- negq %rax +-.byte 102,15,56,220,209 +- pxor %xmm0,%xmm7 +- pxor %xmm0,%xmm8 +-.byte 102,15,56,220,217 +- pxor %xmm0,%xmm9 +- movups (%rcx,%rax,1),%xmm0 +- addq $16,%rax +- jmp L$enc_loop8_inner +-.p2align 4 +-L$enc_loop8: +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-L$enc_loop8_inner: +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-.byte 102,68,15,56,220,193 +-.byte 102,68,15,56,220,201 +-L$enc_loop8_enter: +- movups (%rcx,%rax,1),%xmm1 +- addq $32,%rax +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +-.byte 102,15,56,220,224 +-.byte 102,15,56,220,232 +-.byte 102,15,56,220,240 +-.byte 102,15,56,220,248 +-.byte 102,68,15,56,220,192 +-.byte 102,68,15,56,220,200 +- movups -16(%rcx,%rax,1),%xmm0 +- jnz L$enc_loop8 +- +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-.byte 102,68,15,56,220,193 +-.byte 102,68,15,56,220,201 +-.byte 102,15,56,221,208 +-.byte 102,15,56,221,216 +-.byte 102,15,56,221,224 +-.byte 102,15,56,221,232 +-.byte 102,15,56,221,240 +-.byte 102,15,56,221,248 +-.byte 102,68,15,56,221,192 +-.byte 102,68,15,56,221,200 +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-_aesni_decrypt8: +- +- movups (%rcx),%xmm0 +- shll $4,%eax +- movups 16(%rcx),%xmm1 +- xorps %xmm0,%xmm2 +- xorps %xmm0,%xmm3 +- pxor %xmm0,%xmm4 +- pxor %xmm0,%xmm5 +- pxor %xmm0,%xmm6 +- leaq 32(%rcx,%rax,1),%rcx +- negq %rax +-.byte 102,15,56,222,209 +- pxor %xmm0,%xmm7 +- pxor %xmm0,%xmm8 +-.byte 102,15,56,222,217 +- pxor %xmm0,%xmm9 +- movups (%rcx,%rax,1),%xmm0 +- addq $16,%rax +- jmp L$dec_loop8_inner +-.p2align 4 +-L$dec_loop8: +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-L$dec_loop8_inner: +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +-.byte 102,68,15,56,222,193 +-.byte 102,68,15,56,222,201 +-L$dec_loop8_enter: +- movups (%rcx,%rax,1),%xmm1 +- addq $32,%rax +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +-.byte 102,15,56,222,232 +-.byte 102,15,56,222,240 +-.byte 102,15,56,222,248 +-.byte 102,68,15,56,222,192 +-.byte 102,68,15,56,222,200 +- movups -16(%rcx,%rax,1),%xmm0 +- jnz L$dec_loop8 +- +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +-.byte 102,68,15,56,222,193 +-.byte 102,68,15,56,222,201 +-.byte 102,15,56,223,208 +-.byte 102,15,56,223,216 +-.byte 102,15,56,223,224 +-.byte 102,15,56,223,232 +-.byte 102,15,56,223,240 +-.byte 102,15,56,223,248 +-.byte 102,68,15,56,223,192 +-.byte 102,68,15,56,223,200 +- .byte 0xf3,0xc3 +- +- +-.globl _aes_hw_ecb_encrypt +-.private_extern _aes_hw_ecb_encrypt +- +-.p2align 4 +-_aes_hw_ecb_encrypt: +- +- andq $-16,%rdx +- jz L$ecb_ret +- +- movl 240(%rcx),%eax +- movups (%rcx),%xmm0 +- movq %rcx,%r11 +- movl %eax,%r10d +- testl %r8d,%r8d +- jz L$ecb_decrypt +- +- cmpq $0x80,%rdx +- jb L$ecb_enc_tail +- +- movdqu (%rdi),%xmm2 +- movdqu 16(%rdi),%xmm3 +- movdqu 32(%rdi),%xmm4 +- movdqu 48(%rdi),%xmm5 +- movdqu 64(%rdi),%xmm6 +- movdqu 80(%rdi),%xmm7 +- movdqu 96(%rdi),%xmm8 +- movdqu 112(%rdi),%xmm9 +- leaq 128(%rdi),%rdi +- subq $0x80,%rdx +- jmp L$ecb_enc_loop8_enter +-.p2align 4 +-L$ecb_enc_loop8: +- movups %xmm2,(%rsi) +- movq %r11,%rcx +- movdqu (%rdi),%xmm2 +- movl %r10d,%eax +- movups %xmm3,16(%rsi) +- movdqu 16(%rdi),%xmm3 +- movups %xmm4,32(%rsi) +- movdqu 32(%rdi),%xmm4 +- movups %xmm5,48(%rsi) +- movdqu 48(%rdi),%xmm5 +- movups %xmm6,64(%rsi) +- movdqu 64(%rdi),%xmm6 +- movups %xmm7,80(%rsi) +- movdqu 80(%rdi),%xmm7 +- movups %xmm8,96(%rsi) +- movdqu 96(%rdi),%xmm8 +- movups %xmm9,112(%rsi) +- leaq 128(%rsi),%rsi +- movdqu 112(%rdi),%xmm9 +- leaq 128(%rdi),%rdi +-L$ecb_enc_loop8_enter: +- +- call _aesni_encrypt8 +- +- subq $0x80,%rdx +- jnc L$ecb_enc_loop8 +- +- movups %xmm2,(%rsi) +- movq %r11,%rcx +- movups %xmm3,16(%rsi) +- movl %r10d,%eax +- movups %xmm4,32(%rsi) +- movups %xmm5,48(%rsi) +- movups %xmm6,64(%rsi) +- movups %xmm7,80(%rsi) +- movups %xmm8,96(%rsi) +- movups %xmm9,112(%rsi) +- leaq 128(%rsi),%rsi +- addq $0x80,%rdx +- jz L$ecb_ret +- +-L$ecb_enc_tail: +- movups (%rdi),%xmm2 +- cmpq $0x20,%rdx +- jb L$ecb_enc_one +- movups 16(%rdi),%xmm3 +- je L$ecb_enc_two +- movups 32(%rdi),%xmm4 +- cmpq $0x40,%rdx +- jb L$ecb_enc_three +- movups 48(%rdi),%xmm5 +- je L$ecb_enc_four +- movups 64(%rdi),%xmm6 +- cmpq $0x60,%rdx +- jb L$ecb_enc_five +- movups 80(%rdi),%xmm7 +- je L$ecb_enc_six +- movdqu 96(%rdi),%xmm8 +- xorps %xmm9,%xmm9 +- call _aesni_encrypt8 +- movups %xmm2,(%rsi) +- movups %xmm3,16(%rsi) +- movups %xmm4,32(%rsi) +- movups %xmm5,48(%rsi) +- movups %xmm6,64(%rsi) +- movups %xmm7,80(%rsi) +- movups %xmm8,96(%rsi) +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_enc_one: +- movups (%rcx),%xmm0 +- movups 16(%rcx),%xmm1 +- leaq 32(%rcx),%rcx +- xorps %xmm0,%xmm2 +-L$oop_enc1_3: +-.byte 102,15,56,220,209 +- decl %eax +- movups (%rcx),%xmm1 +- leaq 16(%rcx),%rcx +- jnz L$oop_enc1_3 +-.byte 102,15,56,221,209 +- movups %xmm2,(%rsi) +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_enc_two: +- call _aesni_encrypt2 +- movups %xmm2,(%rsi) +- movups %xmm3,16(%rsi) +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_enc_three: +- call _aesni_encrypt3 +- movups %xmm2,(%rsi) +- movups %xmm3,16(%rsi) +- movups %xmm4,32(%rsi) +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_enc_four: +- call _aesni_encrypt4 +- movups %xmm2,(%rsi) +- movups %xmm3,16(%rsi) +- movups %xmm4,32(%rsi) +- movups %xmm5,48(%rsi) +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_enc_five: +- xorps %xmm7,%xmm7 +- call _aesni_encrypt6 +- movups %xmm2,(%rsi) +- movups %xmm3,16(%rsi) +- movups %xmm4,32(%rsi) +- movups %xmm5,48(%rsi) +- movups %xmm6,64(%rsi) +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_enc_six: +- call _aesni_encrypt6 +- movups %xmm2,(%rsi) +- movups %xmm3,16(%rsi) +- movups %xmm4,32(%rsi) +- movups %xmm5,48(%rsi) +- movups %xmm6,64(%rsi) +- movups %xmm7,80(%rsi) +- jmp L$ecb_ret +- +-.p2align 4 +-L$ecb_decrypt: +- cmpq $0x80,%rdx +- jb L$ecb_dec_tail +- +- movdqu (%rdi),%xmm2 +- movdqu 16(%rdi),%xmm3 +- movdqu 32(%rdi),%xmm4 +- movdqu 48(%rdi),%xmm5 +- movdqu 64(%rdi),%xmm6 +- movdqu 80(%rdi),%xmm7 +- movdqu 96(%rdi),%xmm8 +- movdqu 112(%rdi),%xmm9 +- leaq 128(%rdi),%rdi +- subq $0x80,%rdx +- jmp L$ecb_dec_loop8_enter +-.p2align 4 +-L$ecb_dec_loop8: +- movups %xmm2,(%rsi) +- movq %r11,%rcx +- movdqu (%rdi),%xmm2 +- movl %r10d,%eax +- movups %xmm3,16(%rsi) +- movdqu 16(%rdi),%xmm3 +- movups %xmm4,32(%rsi) +- movdqu 32(%rdi),%xmm4 +- movups %xmm5,48(%rsi) +- movdqu 48(%rdi),%xmm5 +- movups %xmm6,64(%rsi) +- movdqu 64(%rdi),%xmm6 +- movups %xmm7,80(%rsi) +- movdqu 80(%rdi),%xmm7 +- movups %xmm8,96(%rsi) +- movdqu 96(%rdi),%xmm8 +- movups %xmm9,112(%rsi) +- leaq 128(%rsi),%rsi +- movdqu 112(%rdi),%xmm9 +- leaq 128(%rdi),%rdi +-L$ecb_dec_loop8_enter: +- +- call _aesni_decrypt8 +- +- movups (%r11),%xmm0 +- subq $0x80,%rdx +- jnc L$ecb_dec_loop8 +- +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- movq %r11,%rcx +- movups %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- movl %r10d,%eax +- movups %xmm4,32(%rsi) +- pxor %xmm4,%xmm4 +- movups %xmm5,48(%rsi) +- pxor %xmm5,%xmm5 +- movups %xmm6,64(%rsi) +- pxor %xmm6,%xmm6 +- movups %xmm7,80(%rsi) +- pxor %xmm7,%xmm7 +- movups %xmm8,96(%rsi) +- pxor %xmm8,%xmm8 +- movups %xmm9,112(%rsi) +- pxor %xmm9,%xmm9 +- leaq 128(%rsi),%rsi +- addq $0x80,%rdx +- jz L$ecb_ret +- +-L$ecb_dec_tail: +- movups (%rdi),%xmm2 +- cmpq $0x20,%rdx +- jb L$ecb_dec_one +- movups 16(%rdi),%xmm3 +- je L$ecb_dec_two +- movups 32(%rdi),%xmm4 +- cmpq $0x40,%rdx +- jb L$ecb_dec_three +- movups 48(%rdi),%xmm5 +- je L$ecb_dec_four +- movups 64(%rdi),%xmm6 +- cmpq $0x60,%rdx +- jb L$ecb_dec_five +- movups 80(%rdi),%xmm7 +- je L$ecb_dec_six +- movups 96(%rdi),%xmm8 +- movups (%rcx),%xmm0 +- xorps %xmm9,%xmm9 +- call _aesni_decrypt8 +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- movups %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- movups %xmm4,32(%rsi) +- pxor %xmm4,%xmm4 +- movups %xmm5,48(%rsi) +- pxor %xmm5,%xmm5 +- movups %xmm6,64(%rsi) +- pxor %xmm6,%xmm6 +- movups %xmm7,80(%rsi) +- pxor %xmm7,%xmm7 +- movups %xmm8,96(%rsi) +- pxor %xmm8,%xmm8 +- pxor %xmm9,%xmm9 +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_dec_one: +- movups (%rcx),%xmm0 +- movups 16(%rcx),%xmm1 +- leaq 32(%rcx),%rcx +- xorps %xmm0,%xmm2 +-L$oop_dec1_4: +-.byte 102,15,56,222,209 +- decl %eax +- movups (%rcx),%xmm1 +- leaq 16(%rcx),%rcx +- jnz L$oop_dec1_4 +-.byte 102,15,56,223,209 +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_dec_two: +- call _aesni_decrypt2 +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- movups %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_dec_three: +- call _aesni_decrypt3 +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- movups %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- movups %xmm4,32(%rsi) +- pxor %xmm4,%xmm4 +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_dec_four: +- call _aesni_decrypt4 +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- movups %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- movups %xmm4,32(%rsi) +- pxor %xmm4,%xmm4 +- movups %xmm5,48(%rsi) +- pxor %xmm5,%xmm5 +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_dec_five: +- xorps %xmm7,%xmm7 +- call _aesni_decrypt6 +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- movups %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- movups %xmm4,32(%rsi) +- pxor %xmm4,%xmm4 +- movups %xmm5,48(%rsi) +- pxor %xmm5,%xmm5 +- movups %xmm6,64(%rsi) +- pxor %xmm6,%xmm6 +- pxor %xmm7,%xmm7 +- jmp L$ecb_ret +-.p2align 4 +-L$ecb_dec_six: +- call _aesni_decrypt6 +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- movups %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- movups %xmm4,32(%rsi) +- pxor %xmm4,%xmm4 +- movups %xmm5,48(%rsi) +- pxor %xmm5,%xmm5 +- movups %xmm6,64(%rsi) +- pxor %xmm6,%xmm6 +- movups %xmm7,80(%rsi) +- pxor %xmm7,%xmm7 +- +-L$ecb_ret: +- xorps %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- .byte 0xf3,0xc3 +- +- +-.globl _aes_hw_ctr32_encrypt_blocks +-.private_extern _aes_hw_ctr32_encrypt_blocks +- +-.p2align 4 +-_aes_hw_ctr32_encrypt_blocks: +- +-#ifdef BORINGSSL_DISPATCH_TEST +- movb $1,_BORINGSSL_function_hit(%rip) +-#endif +- cmpq $1,%rdx +- jne L$ctr32_bulk +- +- +- +- movups (%r8),%xmm2 +- movups (%rdi),%xmm3 +- movl 240(%rcx),%edx +- movups (%rcx),%xmm0 +- movups 16(%rcx),%xmm1 +- leaq 32(%rcx),%rcx +- xorps %xmm0,%xmm2 +-L$oop_enc1_5: +-.byte 102,15,56,220,209 +- decl %edx +- movups (%rcx),%xmm1 +- leaq 16(%rcx),%rcx +- jnz L$oop_enc1_5 +-.byte 102,15,56,221,209 +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- xorps %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movups %xmm2,(%rsi) +- xorps %xmm2,%xmm2 +- jmp L$ctr32_epilogue +- +-.p2align 4 +-L$ctr32_bulk: +- leaq (%rsp),%r11 +- +- pushq %rbp +- +- subq $128,%rsp +- andq $-16,%rsp +- +- +- +- +- movdqu (%r8),%xmm2 +- movdqu (%rcx),%xmm0 +- movl 12(%r8),%r8d +- pxor %xmm0,%xmm2 +- movl 12(%rcx),%ebp +- movdqa %xmm2,0(%rsp) +- bswapl %r8d +- movdqa %xmm2,%xmm3 +- movdqa %xmm2,%xmm4 +- movdqa %xmm2,%xmm5 +- movdqa %xmm2,64(%rsp) +- movdqa %xmm2,80(%rsp) +- movdqa %xmm2,96(%rsp) +- movq %rdx,%r10 +- movdqa %xmm2,112(%rsp) +- +- leaq 1(%r8),%rax +- leaq 2(%r8),%rdx +- bswapl %eax +- bswapl %edx +- xorl %ebp,%eax +- xorl %ebp,%edx +-.byte 102,15,58,34,216,3 +- leaq 3(%r8),%rax +- movdqa %xmm3,16(%rsp) +-.byte 102,15,58,34,226,3 +- bswapl %eax +- movq %r10,%rdx +- leaq 4(%r8),%r10 +- movdqa %xmm4,32(%rsp) +- xorl %ebp,%eax +- bswapl %r10d +-.byte 102,15,58,34,232,3 +- xorl %ebp,%r10d +- movdqa %xmm5,48(%rsp) +- leaq 5(%r8),%r9 +- movl %r10d,64+12(%rsp) +- bswapl %r9d +- leaq 6(%r8),%r10 +- movl 240(%rcx),%eax +- xorl %ebp,%r9d +- bswapl %r10d +- movl %r9d,80+12(%rsp) +- xorl %ebp,%r10d +- leaq 7(%r8),%r9 +- movl %r10d,96+12(%rsp) +- bswapl %r9d +- leaq _OPENSSL_ia32cap_P(%rip),%r10 +- movl 4(%r10),%r10d +- xorl %ebp,%r9d +- andl $71303168,%r10d +- movl %r9d,112+12(%rsp) +- +- movups 16(%rcx),%xmm1 +- +- movdqa 64(%rsp),%xmm6 +- movdqa 80(%rsp),%xmm7 +- +- cmpq $8,%rdx +- jb L$ctr32_tail +- +- subq $6,%rdx +- cmpl $4194304,%r10d +- je L$ctr32_6x +- +- leaq 128(%rcx),%rcx +- subq $2,%rdx +- jmp L$ctr32_loop8 +- +-.p2align 4 +-L$ctr32_6x: +- shll $4,%eax +- movl $48,%r10d +- bswapl %ebp +- leaq 32(%rcx,%rax,1),%rcx +- subq %rax,%r10 +- jmp L$ctr32_loop6 +- +-.p2align 4 +-L$ctr32_loop6: +- addl $6,%r8d +- movups -48(%rcx,%r10,1),%xmm0 +-.byte 102,15,56,220,209 +- movl %r8d,%eax +- xorl %ebp,%eax +-.byte 102,15,56,220,217 +-.byte 0x0f,0x38,0xf1,0x44,0x24,12 +- leal 1(%r8),%eax +-.byte 102,15,56,220,225 +- xorl %ebp,%eax +-.byte 0x0f,0x38,0xf1,0x44,0x24,28 +-.byte 102,15,56,220,233 +- leal 2(%r8),%eax +- xorl %ebp,%eax +-.byte 102,15,56,220,241 +-.byte 0x0f,0x38,0xf1,0x44,0x24,44 +- leal 3(%r8),%eax +-.byte 102,15,56,220,249 +- movups -32(%rcx,%r10,1),%xmm1 +- xorl %ebp,%eax +- +-.byte 102,15,56,220,208 +-.byte 0x0f,0x38,0xf1,0x44,0x24,60 +- leal 4(%r8),%eax +-.byte 102,15,56,220,216 +- xorl %ebp,%eax +-.byte 0x0f,0x38,0xf1,0x44,0x24,76 +-.byte 102,15,56,220,224 +- leal 5(%r8),%eax +- xorl %ebp,%eax +-.byte 102,15,56,220,232 +-.byte 0x0f,0x38,0xf1,0x44,0x24,92 +- movq %r10,%rax +-.byte 102,15,56,220,240 +-.byte 102,15,56,220,248 +- movups -16(%rcx,%r10,1),%xmm0 +- +- call L$enc_loop6 +- +- movdqu (%rdi),%xmm8 +- movdqu 16(%rdi),%xmm9 +- movdqu 32(%rdi),%xmm10 +- movdqu 48(%rdi),%xmm11 +- movdqu 64(%rdi),%xmm12 +- movdqu 80(%rdi),%xmm13 +- leaq 96(%rdi),%rdi +- movups -64(%rcx,%r10,1),%xmm1 +- pxor %xmm2,%xmm8 +- movaps 0(%rsp),%xmm2 +- pxor %xmm3,%xmm9 +- movaps 16(%rsp),%xmm3 +- pxor %xmm4,%xmm10 +- movaps 32(%rsp),%xmm4 +- pxor %xmm5,%xmm11 +- movaps 48(%rsp),%xmm5 +- pxor %xmm6,%xmm12 +- movaps 64(%rsp),%xmm6 +- pxor %xmm7,%xmm13 +- movaps 80(%rsp),%xmm7 +- movdqu %xmm8,(%rsi) +- movdqu %xmm9,16(%rsi) +- movdqu %xmm10,32(%rsi) +- movdqu %xmm11,48(%rsi) +- movdqu %xmm12,64(%rsi) +- movdqu %xmm13,80(%rsi) +- leaq 96(%rsi),%rsi +- +- subq $6,%rdx +- jnc L$ctr32_loop6 +- +- addq $6,%rdx +- jz L$ctr32_done +- +- leal -48(%r10),%eax +- leaq -80(%rcx,%r10,1),%rcx +- negl %eax +- shrl $4,%eax +- jmp L$ctr32_tail +- +-.p2align 5 +-L$ctr32_loop8: +- addl $8,%r8d +- movdqa 96(%rsp),%xmm8 +-.byte 102,15,56,220,209 +- movl %r8d,%r9d +- movdqa 112(%rsp),%xmm9 +-.byte 102,15,56,220,217 +- bswapl %r9d +- movups 32-128(%rcx),%xmm0 +-.byte 102,15,56,220,225 +- xorl %ebp,%r9d +- nop +-.byte 102,15,56,220,233 +- movl %r9d,0+12(%rsp) +- leaq 1(%r8),%r9 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-.byte 102,68,15,56,220,193 +-.byte 102,68,15,56,220,201 +- movups 48-128(%rcx),%xmm1 +- bswapl %r9d +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +- xorl %ebp,%r9d +-.byte 0x66,0x90 +-.byte 102,15,56,220,224 +-.byte 102,15,56,220,232 +- movl %r9d,16+12(%rsp) +- leaq 2(%r8),%r9 +-.byte 102,15,56,220,240 +-.byte 102,15,56,220,248 +-.byte 102,68,15,56,220,192 +-.byte 102,68,15,56,220,200 +- movups 64-128(%rcx),%xmm0 +- bswapl %r9d +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +- xorl %ebp,%r9d +-.byte 0x66,0x90 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +- movl %r9d,32+12(%rsp) +- leaq 3(%r8),%r9 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-.byte 102,68,15,56,220,193 +-.byte 102,68,15,56,220,201 +- movups 80-128(%rcx),%xmm1 +- bswapl %r9d +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +- xorl %ebp,%r9d +-.byte 0x66,0x90 +-.byte 102,15,56,220,224 +-.byte 102,15,56,220,232 +- movl %r9d,48+12(%rsp) +- leaq 4(%r8),%r9 +-.byte 102,15,56,220,240 +-.byte 102,15,56,220,248 +-.byte 102,68,15,56,220,192 +-.byte 102,68,15,56,220,200 +- movups 96-128(%rcx),%xmm0 +- bswapl %r9d +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +- xorl %ebp,%r9d +-.byte 0x66,0x90 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +- movl %r9d,64+12(%rsp) +- leaq 5(%r8),%r9 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-.byte 102,68,15,56,220,193 +-.byte 102,68,15,56,220,201 +- movups 112-128(%rcx),%xmm1 +- bswapl %r9d +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +- xorl %ebp,%r9d +-.byte 0x66,0x90 +-.byte 102,15,56,220,224 +-.byte 102,15,56,220,232 +- movl %r9d,80+12(%rsp) +- leaq 6(%r8),%r9 +-.byte 102,15,56,220,240 +-.byte 102,15,56,220,248 +-.byte 102,68,15,56,220,192 +-.byte 102,68,15,56,220,200 +- movups 128-128(%rcx),%xmm0 +- bswapl %r9d +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +- xorl %ebp,%r9d +-.byte 0x66,0x90 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +- movl %r9d,96+12(%rsp) +- leaq 7(%r8),%r9 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-.byte 102,68,15,56,220,193 +-.byte 102,68,15,56,220,201 +- movups 144-128(%rcx),%xmm1 +- bswapl %r9d +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +-.byte 102,15,56,220,224 +- xorl %ebp,%r9d +- movdqu 0(%rdi),%xmm10 +-.byte 102,15,56,220,232 +- movl %r9d,112+12(%rsp) +- cmpl $11,%eax +-.byte 102,15,56,220,240 +-.byte 102,15,56,220,248 +-.byte 102,68,15,56,220,192 +-.byte 102,68,15,56,220,200 +- movups 160-128(%rcx),%xmm0 +- +- jb L$ctr32_enc_done +- +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-.byte 102,68,15,56,220,193 +-.byte 102,68,15,56,220,201 +- movups 176-128(%rcx),%xmm1 +- +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +-.byte 102,15,56,220,224 +-.byte 102,15,56,220,232 +-.byte 102,15,56,220,240 +-.byte 102,15,56,220,248 +-.byte 102,68,15,56,220,192 +-.byte 102,68,15,56,220,200 +- movups 192-128(%rcx),%xmm0 +- je L$ctr32_enc_done +- +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-.byte 102,68,15,56,220,193 +-.byte 102,68,15,56,220,201 +- movups 208-128(%rcx),%xmm1 +- +-.byte 102,15,56,220,208 +-.byte 102,15,56,220,216 +-.byte 102,15,56,220,224 +-.byte 102,15,56,220,232 +-.byte 102,15,56,220,240 +-.byte 102,15,56,220,248 +-.byte 102,68,15,56,220,192 +-.byte 102,68,15,56,220,200 +- movups 224-128(%rcx),%xmm0 +- jmp L$ctr32_enc_done +- +-.p2align 4 +-L$ctr32_enc_done: +- movdqu 16(%rdi),%xmm11 +- pxor %xmm0,%xmm10 +- movdqu 32(%rdi),%xmm12 +- pxor %xmm0,%xmm11 +- movdqu 48(%rdi),%xmm13 +- pxor %xmm0,%xmm12 +- movdqu 64(%rdi),%xmm14 +- pxor %xmm0,%xmm13 +- movdqu 80(%rdi),%xmm15 +- pxor %xmm0,%xmm14 +- pxor %xmm0,%xmm15 +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +-.byte 102,15,56,220,249 +-.byte 102,68,15,56,220,193 +-.byte 102,68,15,56,220,201 +- movdqu 96(%rdi),%xmm1 +- leaq 128(%rdi),%rdi +- +-.byte 102,65,15,56,221,210 +- pxor %xmm0,%xmm1 +- movdqu 112-128(%rdi),%xmm10 +-.byte 102,65,15,56,221,219 +- pxor %xmm0,%xmm10 +- movdqa 0(%rsp),%xmm11 +-.byte 102,65,15,56,221,228 +-.byte 102,65,15,56,221,237 +- movdqa 16(%rsp),%xmm12 +- movdqa 32(%rsp),%xmm13 +-.byte 102,65,15,56,221,246 +-.byte 102,65,15,56,221,255 +- movdqa 48(%rsp),%xmm14 +- movdqa 64(%rsp),%xmm15 +-.byte 102,68,15,56,221,193 +- movdqa 80(%rsp),%xmm0 +- movups 16-128(%rcx),%xmm1 +-.byte 102,69,15,56,221,202 +- +- movups %xmm2,(%rsi) +- movdqa %xmm11,%xmm2 +- movups %xmm3,16(%rsi) +- movdqa %xmm12,%xmm3 +- movups %xmm4,32(%rsi) +- movdqa %xmm13,%xmm4 +- movups %xmm5,48(%rsi) +- movdqa %xmm14,%xmm5 +- movups %xmm6,64(%rsi) +- movdqa %xmm15,%xmm6 +- movups %xmm7,80(%rsi) +- movdqa %xmm0,%xmm7 +- movups %xmm8,96(%rsi) +- movups %xmm9,112(%rsi) +- leaq 128(%rsi),%rsi +- +- subq $8,%rdx +- jnc L$ctr32_loop8 +- +- addq $8,%rdx +- jz L$ctr32_done +- leaq -128(%rcx),%rcx +- +-L$ctr32_tail: +- +- +- leaq 16(%rcx),%rcx +- cmpq $4,%rdx +- jb L$ctr32_loop3 +- je L$ctr32_loop4 +- +- +- shll $4,%eax +- movdqa 96(%rsp),%xmm8 +- pxor %xmm9,%xmm9 +- +- movups 16(%rcx),%xmm0 +-.byte 102,15,56,220,209 +-.byte 102,15,56,220,217 +- leaq 32-16(%rcx,%rax,1),%rcx +- negq %rax +-.byte 102,15,56,220,225 +- addq $16,%rax +- movups (%rdi),%xmm10 +-.byte 102,15,56,220,233 +-.byte 102,15,56,220,241 +- movups 16(%rdi),%xmm11 +- movups 32(%rdi),%xmm12 +-.byte 102,15,56,220,249 +-.byte 102,68,15,56,220,193 +- +- call L$enc_loop8_enter +- +- movdqu 48(%rdi),%xmm13 +- pxor %xmm10,%xmm2 +- movdqu 64(%rdi),%xmm10 +- pxor %xmm11,%xmm3 +- movdqu %xmm2,(%rsi) +- pxor %xmm12,%xmm4 +- movdqu %xmm3,16(%rsi) +- pxor %xmm13,%xmm5 +- movdqu %xmm4,32(%rsi) +- pxor %xmm10,%xmm6 +- movdqu %xmm5,48(%rsi) +- movdqu %xmm6,64(%rsi) +- cmpq $6,%rdx +- jb L$ctr32_done +- +- movups 80(%rdi),%xmm11 +- xorps %xmm11,%xmm7 +- movups %xmm7,80(%rsi) +- je L$ctr32_done +- +- movups 96(%rdi),%xmm12 +- xorps %xmm12,%xmm8 +- movups %xmm8,96(%rsi) +- jmp L$ctr32_done +- +-.p2align 5 +-L$ctr32_loop4: +-.byte 102,15,56,220,209 +- leaq 16(%rcx),%rcx +- decl %eax +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +-.byte 102,15,56,220,233 +- movups (%rcx),%xmm1 +- jnz L$ctr32_loop4 +-.byte 102,15,56,221,209 +-.byte 102,15,56,221,217 +- movups (%rdi),%xmm10 +- movups 16(%rdi),%xmm11 +-.byte 102,15,56,221,225 +-.byte 102,15,56,221,233 +- movups 32(%rdi),%xmm12 +- movups 48(%rdi),%xmm13 +- +- xorps %xmm10,%xmm2 +- movups %xmm2,(%rsi) +- xorps %xmm11,%xmm3 +- movups %xmm3,16(%rsi) +- pxor %xmm12,%xmm4 +- movdqu %xmm4,32(%rsi) +- pxor %xmm13,%xmm5 +- movdqu %xmm5,48(%rsi) +- jmp L$ctr32_done +- +-.p2align 5 +-L$ctr32_loop3: +-.byte 102,15,56,220,209 +- leaq 16(%rcx),%rcx +- decl %eax +-.byte 102,15,56,220,217 +-.byte 102,15,56,220,225 +- movups (%rcx),%xmm1 +- jnz L$ctr32_loop3 +-.byte 102,15,56,221,209 +-.byte 102,15,56,221,217 +-.byte 102,15,56,221,225 +- +- movups (%rdi),%xmm10 +- xorps %xmm10,%xmm2 +- movups %xmm2,(%rsi) +- cmpq $2,%rdx +- jb L$ctr32_done +- +- movups 16(%rdi),%xmm11 +- xorps %xmm11,%xmm3 +- movups %xmm3,16(%rsi) +- je L$ctr32_done +- +- movups 32(%rdi),%xmm12 +- xorps %xmm12,%xmm4 +- movups %xmm4,32(%rsi) +- +-L$ctr32_done: +- xorps %xmm0,%xmm0 +- xorl %ebp,%ebp +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- pxor %xmm7,%xmm7 +- movaps %xmm0,0(%rsp) +- pxor %xmm8,%xmm8 +- movaps %xmm0,16(%rsp) +- pxor %xmm9,%xmm9 +- movaps %xmm0,32(%rsp) +- pxor %xmm10,%xmm10 +- movaps %xmm0,48(%rsp) +- pxor %xmm11,%xmm11 +- movaps %xmm0,64(%rsp) +- pxor %xmm12,%xmm12 +- movaps %xmm0,80(%rsp) +- pxor %xmm13,%xmm13 +- movaps %xmm0,96(%rsp) +- pxor %xmm14,%xmm14 +- movaps %xmm0,112(%rsp) +- pxor %xmm15,%xmm15 +- movq -8(%r11),%rbp +- +- leaq (%r11),%rsp +- +-L$ctr32_epilogue: +- .byte 0xf3,0xc3 +- +- +-.globl _aes_hw_cbc_encrypt +-.private_extern _aes_hw_cbc_encrypt +- +-.p2align 4 +-_aes_hw_cbc_encrypt: +- +- testq %rdx,%rdx +- jz L$cbc_ret +- +- movl 240(%rcx),%r10d +- movq %rcx,%r11 +- testl %r9d,%r9d +- jz L$cbc_decrypt +- +- movups (%r8),%xmm2 +- movl %r10d,%eax +- cmpq $16,%rdx +- jb L$cbc_enc_tail +- subq $16,%rdx +- jmp L$cbc_enc_loop +-.p2align 4 +-L$cbc_enc_loop: +- movups (%rdi),%xmm3 +- leaq 16(%rdi),%rdi +- +- movups (%rcx),%xmm0 +- movups 16(%rcx),%xmm1 +- xorps %xmm0,%xmm3 +- leaq 32(%rcx),%rcx +- xorps %xmm3,%xmm2 +-L$oop_enc1_6: +-.byte 102,15,56,220,209 +- decl %eax +- movups (%rcx),%xmm1 +- leaq 16(%rcx),%rcx +- jnz L$oop_enc1_6 +-.byte 102,15,56,221,209 +- movl %r10d,%eax +- movq %r11,%rcx +- movups %xmm2,0(%rsi) +- leaq 16(%rsi),%rsi +- subq $16,%rdx +- jnc L$cbc_enc_loop +- addq $16,%rdx +- jnz L$cbc_enc_tail +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- movups %xmm2,(%r8) +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- jmp L$cbc_ret +- +-L$cbc_enc_tail: +- movq %rdx,%rcx +- xchgq %rdi,%rsi +-.long 0x9066A4F3 +- movl $16,%ecx +- subq %rdx,%rcx +- xorl %eax,%eax +-.long 0x9066AAF3 +- leaq -16(%rdi),%rdi +- movl %r10d,%eax +- movq %rdi,%rsi +- movq %r11,%rcx +- xorq %rdx,%rdx +- jmp L$cbc_enc_loop +- +-.p2align 4 +-L$cbc_decrypt: +- cmpq $16,%rdx +- jne L$cbc_decrypt_bulk +- +- +- +- movdqu (%rdi),%xmm2 +- movdqu (%r8),%xmm3 +- movdqa %xmm2,%xmm4 +- movups (%rcx),%xmm0 +- movups 16(%rcx),%xmm1 +- leaq 32(%rcx),%rcx +- xorps %xmm0,%xmm2 +-L$oop_dec1_7: +-.byte 102,15,56,222,209 +- decl %r10d +- movups (%rcx),%xmm1 +- leaq 16(%rcx),%rcx +- jnz L$oop_dec1_7 +-.byte 102,15,56,223,209 +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- movdqu %xmm4,(%r8) +- xorps %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- jmp L$cbc_ret +-.p2align 4 +-L$cbc_decrypt_bulk: +- leaq (%rsp),%r11 +- +- pushq %rbp +- +- subq $16,%rsp +- andq $-16,%rsp +- movq %rcx,%rbp +- movups (%r8),%xmm10 +- movl %r10d,%eax +- cmpq $0x50,%rdx +- jbe L$cbc_dec_tail +- +- movups (%rcx),%xmm0 +- movdqu 0(%rdi),%xmm2 +- movdqu 16(%rdi),%xmm3 +- movdqa %xmm2,%xmm11 +- movdqu 32(%rdi),%xmm4 +- movdqa %xmm3,%xmm12 +- movdqu 48(%rdi),%xmm5 +- movdqa %xmm4,%xmm13 +- movdqu 64(%rdi),%xmm6 +- movdqa %xmm5,%xmm14 +- movdqu 80(%rdi),%xmm7 +- movdqa %xmm6,%xmm15 +- leaq _OPENSSL_ia32cap_P(%rip),%r9 +- movl 4(%r9),%r9d +- cmpq $0x70,%rdx +- jbe L$cbc_dec_six_or_seven +- +- andl $71303168,%r9d +- subq $0x50,%rdx +- cmpl $4194304,%r9d +- je L$cbc_dec_loop6_enter +- subq $0x20,%rdx +- leaq 112(%rcx),%rcx +- jmp L$cbc_dec_loop8_enter +-.p2align 4 +-L$cbc_dec_loop8: +- movups %xmm9,(%rsi) +- leaq 16(%rsi),%rsi +-L$cbc_dec_loop8_enter: +- movdqu 96(%rdi),%xmm8 +- pxor %xmm0,%xmm2 +- movdqu 112(%rdi),%xmm9 +- pxor %xmm0,%xmm3 +- movups 16-112(%rcx),%xmm1 +- pxor %xmm0,%xmm4 +- movq $-1,%rbp +- cmpq $0x70,%rdx +- pxor %xmm0,%xmm5 +- pxor %xmm0,%xmm6 +- pxor %xmm0,%xmm7 +- pxor %xmm0,%xmm8 +- +-.byte 102,15,56,222,209 +- pxor %xmm0,%xmm9 +- movups 32-112(%rcx),%xmm0 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +-.byte 102,68,15,56,222,193 +- adcq $0,%rbp +- andq $128,%rbp +-.byte 102,68,15,56,222,201 +- addq %rdi,%rbp +- movups 48-112(%rcx),%xmm1 +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +-.byte 102,15,56,222,232 +-.byte 102,15,56,222,240 +-.byte 102,15,56,222,248 +-.byte 102,68,15,56,222,192 +-.byte 102,68,15,56,222,200 +- movups 64-112(%rcx),%xmm0 +- nop +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +-.byte 102,68,15,56,222,193 +-.byte 102,68,15,56,222,201 +- movups 80-112(%rcx),%xmm1 +- nop +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +-.byte 102,15,56,222,232 +-.byte 102,15,56,222,240 +-.byte 102,15,56,222,248 +-.byte 102,68,15,56,222,192 +-.byte 102,68,15,56,222,200 +- movups 96-112(%rcx),%xmm0 +- nop +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +-.byte 102,68,15,56,222,193 +-.byte 102,68,15,56,222,201 +- movups 112-112(%rcx),%xmm1 +- nop +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +-.byte 102,15,56,222,232 +-.byte 102,15,56,222,240 +-.byte 102,15,56,222,248 +-.byte 102,68,15,56,222,192 +-.byte 102,68,15,56,222,200 +- movups 128-112(%rcx),%xmm0 +- nop +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +-.byte 102,68,15,56,222,193 +-.byte 102,68,15,56,222,201 +- movups 144-112(%rcx),%xmm1 +- cmpl $11,%eax +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +-.byte 102,15,56,222,232 +-.byte 102,15,56,222,240 +-.byte 102,15,56,222,248 +-.byte 102,68,15,56,222,192 +-.byte 102,68,15,56,222,200 +- movups 160-112(%rcx),%xmm0 +- jb L$cbc_dec_done +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +-.byte 102,68,15,56,222,193 +-.byte 102,68,15,56,222,201 +- movups 176-112(%rcx),%xmm1 +- nop +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +-.byte 102,15,56,222,232 +-.byte 102,15,56,222,240 +-.byte 102,15,56,222,248 +-.byte 102,68,15,56,222,192 +-.byte 102,68,15,56,222,200 +- movups 192-112(%rcx),%xmm0 +- je L$cbc_dec_done +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +-.byte 102,68,15,56,222,193 +-.byte 102,68,15,56,222,201 +- movups 208-112(%rcx),%xmm1 +- nop +-.byte 102,15,56,222,208 +-.byte 102,15,56,222,216 +-.byte 102,15,56,222,224 +-.byte 102,15,56,222,232 +-.byte 102,15,56,222,240 +-.byte 102,15,56,222,248 +-.byte 102,68,15,56,222,192 +-.byte 102,68,15,56,222,200 +- movups 224-112(%rcx),%xmm0 +- jmp L$cbc_dec_done +-.p2align 4 +-L$cbc_dec_done: +-.byte 102,15,56,222,209 +-.byte 102,15,56,222,217 +- pxor %xmm0,%xmm10 +- pxor %xmm0,%xmm11 +-.byte 102,15,56,222,225 +-.byte 102,15,56,222,233 +- pxor %xmm0,%xmm12 +- pxor %xmm0,%xmm13 +-.byte 102,15,56,222,241 +-.byte 102,15,56,222,249 +- pxor %xmm0,%xmm14 +- pxor %xmm0,%xmm15 +-.byte 102,68,15,56,222,193 +-.byte 102,68,15,56,222,201 +- movdqu 80(%rdi),%xmm1 +- +-.byte 102,65,15,56,223,210 +- movdqu 96(%rdi),%xmm10 +- pxor %xmm0,%xmm1 +-.byte 102,65,15,56,223,219 +- pxor %xmm0,%xmm10 +- movdqu 112(%rdi),%xmm0 +-.byte 102,65,15,56,223,228 +- leaq 128(%rdi),%rdi +- movdqu 0(%rbp),%xmm11 +-.byte 102,65,15,56,223,237 +-.byte 102,65,15,56,223,246 +- movdqu 16(%rbp),%xmm12 +- movdqu 32(%rbp),%xmm13 +-.byte 102,65,15,56,223,255 +-.byte 102,68,15,56,223,193 +- movdqu 48(%rbp),%xmm14 +- movdqu 64(%rbp),%xmm15 +-.byte 102,69,15,56,223,202 +- movdqa %xmm0,%xmm10 +- movdqu 80(%rbp),%xmm1 +- movups -112(%rcx),%xmm0 +- +- movups %xmm2,(%rsi) +- movdqa %xmm11,%xmm2 +- movups %xmm3,16(%rsi) +- movdqa %xmm12,%xmm3 +- movups %xmm4,32(%rsi) +- movdqa %xmm13,%xmm4 +- movups %xmm5,48(%rsi) +- movdqa %xmm14,%xmm5 +- movups %xmm6,64(%rsi) +- movdqa %xmm15,%xmm6 +- movups %xmm7,80(%rsi) +- movdqa %xmm1,%xmm7 +- movups %xmm8,96(%rsi) +- leaq 112(%rsi),%rsi +- +- subq $0x80,%rdx +- ja L$cbc_dec_loop8 +- +- movaps %xmm9,%xmm2 +- leaq -112(%rcx),%rcx +- addq $0x70,%rdx +- jle L$cbc_dec_clear_tail_collected +- movups %xmm9,(%rsi) +- leaq 16(%rsi),%rsi +- cmpq $0x50,%rdx +- jbe L$cbc_dec_tail +- +- movaps %xmm11,%xmm2 +-L$cbc_dec_six_or_seven: +- cmpq $0x60,%rdx +- ja L$cbc_dec_seven +- +- movaps %xmm7,%xmm8 +- call _aesni_decrypt6 +- pxor %xmm10,%xmm2 +- movaps %xmm8,%xmm10 +- pxor %xmm11,%xmm3 +- movdqu %xmm2,(%rsi) +- pxor %xmm12,%xmm4 +- movdqu %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- pxor %xmm13,%xmm5 +- movdqu %xmm4,32(%rsi) +- pxor %xmm4,%xmm4 +- pxor %xmm14,%xmm6 +- movdqu %xmm5,48(%rsi) +- pxor %xmm5,%xmm5 +- pxor %xmm15,%xmm7 +- movdqu %xmm6,64(%rsi) +- pxor %xmm6,%xmm6 +- leaq 80(%rsi),%rsi +- movdqa %xmm7,%xmm2 +- pxor %xmm7,%xmm7 +- jmp L$cbc_dec_tail_collected +- +-.p2align 4 +-L$cbc_dec_seven: +- movups 96(%rdi),%xmm8 +- xorps %xmm9,%xmm9 +- call _aesni_decrypt8 +- movups 80(%rdi),%xmm9 +- pxor %xmm10,%xmm2 +- movups 96(%rdi),%xmm10 +- pxor %xmm11,%xmm3 +- movdqu %xmm2,(%rsi) +- pxor %xmm12,%xmm4 +- movdqu %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- pxor %xmm13,%xmm5 +- movdqu %xmm4,32(%rsi) +- pxor %xmm4,%xmm4 +- pxor %xmm14,%xmm6 +- movdqu %xmm5,48(%rsi) +- pxor %xmm5,%xmm5 +- pxor %xmm15,%xmm7 +- movdqu %xmm6,64(%rsi) +- pxor %xmm6,%xmm6 +- pxor %xmm9,%xmm8 +- movdqu %xmm7,80(%rsi) +- pxor %xmm7,%xmm7 +- leaq 96(%rsi),%rsi +- movdqa %xmm8,%xmm2 +- pxor %xmm8,%xmm8 +- pxor %xmm9,%xmm9 +- jmp L$cbc_dec_tail_collected +- +-.p2align 4 +-L$cbc_dec_loop6: +- movups %xmm7,(%rsi) +- leaq 16(%rsi),%rsi +- movdqu 0(%rdi),%xmm2 +- movdqu 16(%rdi),%xmm3 +- movdqa %xmm2,%xmm11 +- movdqu 32(%rdi),%xmm4 +- movdqa %xmm3,%xmm12 +- movdqu 48(%rdi),%xmm5 +- movdqa %xmm4,%xmm13 +- movdqu 64(%rdi),%xmm6 +- movdqa %xmm5,%xmm14 +- movdqu 80(%rdi),%xmm7 +- movdqa %xmm6,%xmm15 +-L$cbc_dec_loop6_enter: +- leaq 96(%rdi),%rdi +- movdqa %xmm7,%xmm8 +- +- call _aesni_decrypt6 +- +- pxor %xmm10,%xmm2 +- movdqa %xmm8,%xmm10 +- pxor %xmm11,%xmm3 +- movdqu %xmm2,(%rsi) +- pxor %xmm12,%xmm4 +- movdqu %xmm3,16(%rsi) +- pxor %xmm13,%xmm5 +- movdqu %xmm4,32(%rsi) +- pxor %xmm14,%xmm6 +- movq %rbp,%rcx +- movdqu %xmm5,48(%rsi) +- pxor %xmm15,%xmm7 +- movl %r10d,%eax +- movdqu %xmm6,64(%rsi) +- leaq 80(%rsi),%rsi +- subq $0x60,%rdx +- ja L$cbc_dec_loop6 +- +- movdqa %xmm7,%xmm2 +- addq $0x50,%rdx +- jle L$cbc_dec_clear_tail_collected +- movups %xmm7,(%rsi) +- leaq 16(%rsi),%rsi +- +-L$cbc_dec_tail: +- movups (%rdi),%xmm2 +- subq $0x10,%rdx +- jbe L$cbc_dec_one +- +- movups 16(%rdi),%xmm3 +- movaps %xmm2,%xmm11 +- subq $0x10,%rdx +- jbe L$cbc_dec_two +- +- movups 32(%rdi),%xmm4 +- movaps %xmm3,%xmm12 +- subq $0x10,%rdx +- jbe L$cbc_dec_three +- +- movups 48(%rdi),%xmm5 +- movaps %xmm4,%xmm13 +- subq $0x10,%rdx +- jbe L$cbc_dec_four +- +- movups 64(%rdi),%xmm6 +- movaps %xmm5,%xmm14 +- movaps %xmm6,%xmm15 +- xorps %xmm7,%xmm7 +- call _aesni_decrypt6 +- pxor %xmm10,%xmm2 +- movaps %xmm15,%xmm10 +- pxor %xmm11,%xmm3 +- movdqu %xmm2,(%rsi) +- pxor %xmm12,%xmm4 +- movdqu %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- pxor %xmm13,%xmm5 +- movdqu %xmm4,32(%rsi) +- pxor %xmm4,%xmm4 +- pxor %xmm14,%xmm6 +- movdqu %xmm5,48(%rsi) +- pxor %xmm5,%xmm5 +- leaq 64(%rsi),%rsi +- movdqa %xmm6,%xmm2 +- pxor %xmm6,%xmm6 +- pxor %xmm7,%xmm7 +- subq $0x10,%rdx +- jmp L$cbc_dec_tail_collected +- +-.p2align 4 +-L$cbc_dec_one: +- movaps %xmm2,%xmm11 +- movups (%rcx),%xmm0 +- movups 16(%rcx),%xmm1 +- leaq 32(%rcx),%rcx +- xorps %xmm0,%xmm2 +-L$oop_dec1_8: +-.byte 102,15,56,222,209 +- decl %eax +- movups (%rcx),%xmm1 +- leaq 16(%rcx),%rcx +- jnz L$oop_dec1_8 +-.byte 102,15,56,223,209 +- xorps %xmm10,%xmm2 +- movaps %xmm11,%xmm10 +- jmp L$cbc_dec_tail_collected +-.p2align 4 +-L$cbc_dec_two: +- movaps %xmm3,%xmm12 +- call _aesni_decrypt2 +- pxor %xmm10,%xmm2 +- movaps %xmm12,%xmm10 +- pxor %xmm11,%xmm3 +- movdqu %xmm2,(%rsi) +- movdqa %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- leaq 16(%rsi),%rsi +- jmp L$cbc_dec_tail_collected +-.p2align 4 +-L$cbc_dec_three: +- movaps %xmm4,%xmm13 +- call _aesni_decrypt3 +- pxor %xmm10,%xmm2 +- movaps %xmm13,%xmm10 +- pxor %xmm11,%xmm3 +- movdqu %xmm2,(%rsi) +- pxor %xmm12,%xmm4 +- movdqu %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- movdqa %xmm4,%xmm2 +- pxor %xmm4,%xmm4 +- leaq 32(%rsi),%rsi +- jmp L$cbc_dec_tail_collected +-.p2align 4 +-L$cbc_dec_four: +- movaps %xmm5,%xmm14 +- call _aesni_decrypt4 +- pxor %xmm10,%xmm2 +- movaps %xmm14,%xmm10 +- pxor %xmm11,%xmm3 +- movdqu %xmm2,(%rsi) +- pxor %xmm12,%xmm4 +- movdqu %xmm3,16(%rsi) +- pxor %xmm3,%xmm3 +- pxor %xmm13,%xmm5 +- movdqu %xmm4,32(%rsi) +- pxor %xmm4,%xmm4 +- movdqa %xmm5,%xmm2 +- pxor %xmm5,%xmm5 +- leaq 48(%rsi),%rsi +- jmp L$cbc_dec_tail_collected +- +-.p2align 4 +-L$cbc_dec_clear_tail_collected: +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- pxor %xmm7,%xmm7 +- pxor %xmm8,%xmm8 +- pxor %xmm9,%xmm9 +-L$cbc_dec_tail_collected: +- movups %xmm10,(%r8) +- andq $15,%rdx +- jnz L$cbc_dec_tail_partial +- movups %xmm2,(%rsi) +- pxor %xmm2,%xmm2 +- jmp L$cbc_dec_ret +-.p2align 4 +-L$cbc_dec_tail_partial: +- movaps %xmm2,(%rsp) +- pxor %xmm2,%xmm2 +- movq $16,%rcx +- movq %rsi,%rdi +- subq %rdx,%rcx +- leaq (%rsp),%rsi +-.long 0x9066A4F3 +- movdqa %xmm2,(%rsp) +- +-L$cbc_dec_ret: +- xorps %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- movq -8(%r11),%rbp +- +- leaq (%r11),%rsp +- +-L$cbc_ret: +- .byte 0xf3,0xc3 +- +- +-.globl _aes_hw_set_decrypt_key +-.private_extern _aes_hw_set_decrypt_key +- +-.p2align 4 +-_aes_hw_set_decrypt_key: +- +-.byte 0x48,0x83,0xEC,0x08 +- +- call __aesni_set_encrypt_key +- shll $4,%esi +- testl %eax,%eax +- jnz L$dec_key_ret +- leaq 16(%rdx,%rsi,1),%rdi +- +- movups (%rdx),%xmm0 +- movups (%rdi),%xmm1 +- movups %xmm0,(%rdi) +- movups %xmm1,(%rdx) +- leaq 16(%rdx),%rdx +- leaq -16(%rdi),%rdi +- +-L$dec_key_inverse: +- movups (%rdx),%xmm0 +- movups (%rdi),%xmm1 +-.byte 102,15,56,219,192 +-.byte 102,15,56,219,201 +- leaq 16(%rdx),%rdx +- leaq -16(%rdi),%rdi +- movups %xmm0,16(%rdi) +- movups %xmm1,-16(%rdx) +- cmpq %rdx,%rdi +- ja L$dec_key_inverse +- +- movups (%rdx),%xmm0 +-.byte 102,15,56,219,192 +- pxor %xmm1,%xmm1 +- movups %xmm0,(%rdi) +- pxor %xmm0,%xmm0 +-L$dec_key_ret: +- addq $8,%rsp +- +- .byte 0xf3,0xc3 +- +-L$SEH_end_set_decrypt_key: +- +-.globl _aes_hw_set_encrypt_key +-.private_extern _aes_hw_set_encrypt_key +- +-.p2align 4 +-_aes_hw_set_encrypt_key: +-__aesni_set_encrypt_key: +- +-#ifdef BORINGSSL_DISPATCH_TEST +- movb $1,_BORINGSSL_function_hit+3(%rip) +-#endif +-.byte 0x48,0x83,0xEC,0x08 +- +- movq $-1,%rax +- testq %rdi,%rdi +- jz L$enc_key_ret +- testq %rdx,%rdx +- jz L$enc_key_ret +- +- movups (%rdi),%xmm0 +- xorps %xmm4,%xmm4 +- leaq _OPENSSL_ia32cap_P(%rip),%r10 +- movl 4(%r10),%r10d +- andl $268437504,%r10d +- leaq 16(%rdx),%rax +- cmpl $256,%esi +- je L$14rounds +- cmpl $192,%esi +- je L$12rounds +- cmpl $128,%esi +- jne L$bad_keybits +- +-L$10rounds: +- movl $9,%esi +- cmpl $268435456,%r10d +- je L$10rounds_alt +- +- movups %xmm0,(%rdx) +-.byte 102,15,58,223,200,1 +- call L$key_expansion_128_cold +-.byte 102,15,58,223,200,2 +- call L$key_expansion_128 +-.byte 102,15,58,223,200,4 +- call L$key_expansion_128 +-.byte 102,15,58,223,200,8 +- call L$key_expansion_128 +-.byte 102,15,58,223,200,16 +- call L$key_expansion_128 +-.byte 102,15,58,223,200,32 +- call L$key_expansion_128 +-.byte 102,15,58,223,200,64 +- call L$key_expansion_128 +-.byte 102,15,58,223,200,128 +- call L$key_expansion_128 +-.byte 102,15,58,223,200,27 +- call L$key_expansion_128 +-.byte 102,15,58,223,200,54 +- call L$key_expansion_128 +- movups %xmm0,(%rax) +- movl %esi,80(%rax) +- xorl %eax,%eax +- jmp L$enc_key_ret +- +-.p2align 4 +-L$10rounds_alt: +- movdqa L$key_rotate(%rip),%xmm5 +- movl $8,%r10d +- movdqa L$key_rcon1(%rip),%xmm4 +- movdqa %xmm0,%xmm2 +- movdqu %xmm0,(%rdx) +- jmp L$oop_key128 +- +-.p2align 4 +-L$oop_key128: +-.byte 102,15,56,0,197 +-.byte 102,15,56,221,196 +- pslld $1,%xmm4 +- leaq 16(%rax),%rax +- +- movdqa %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm3,%xmm2 +- +- pxor %xmm2,%xmm0 +- movdqu %xmm0,-16(%rax) +- movdqa %xmm0,%xmm2 +- +- decl %r10d +- jnz L$oop_key128 +- +- movdqa L$key_rcon1b(%rip),%xmm4 +- +-.byte 102,15,56,0,197 +-.byte 102,15,56,221,196 +- pslld $1,%xmm4 +- +- movdqa %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm3,%xmm2 +- +- pxor %xmm2,%xmm0 +- movdqu %xmm0,(%rax) +- +- movdqa %xmm0,%xmm2 +-.byte 102,15,56,0,197 +-.byte 102,15,56,221,196 +- +- movdqa %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm2,%xmm3 +- pslldq $4,%xmm2 +- pxor %xmm3,%xmm2 +- +- pxor %xmm2,%xmm0 +- movdqu %xmm0,16(%rax) +- +- movl %esi,96(%rax) +- xorl %eax,%eax +- jmp L$enc_key_ret +- +-.p2align 4 +-L$12rounds: +- movq 16(%rdi),%xmm2 +- movl $11,%esi +- cmpl $268435456,%r10d +- je L$12rounds_alt +- +- movups %xmm0,(%rdx) +-.byte 102,15,58,223,202,1 +- call L$key_expansion_192a_cold +-.byte 102,15,58,223,202,2 +- call L$key_expansion_192b +-.byte 102,15,58,223,202,4 +- call L$key_expansion_192a +-.byte 102,15,58,223,202,8 +- call L$key_expansion_192b +-.byte 102,15,58,223,202,16 +- call L$key_expansion_192a +-.byte 102,15,58,223,202,32 +- call L$key_expansion_192b +-.byte 102,15,58,223,202,64 +- call L$key_expansion_192a +-.byte 102,15,58,223,202,128 +- call L$key_expansion_192b +- movups %xmm0,(%rax) +- movl %esi,48(%rax) +- xorq %rax,%rax +- jmp L$enc_key_ret +- +-.p2align 4 +-L$12rounds_alt: +- movdqa L$key_rotate192(%rip),%xmm5 +- movdqa L$key_rcon1(%rip),%xmm4 +- movl $8,%r10d +- movdqu %xmm0,(%rdx) +- jmp L$oop_key192 +- +-.p2align 4 +-L$oop_key192: +- movq %xmm2,0(%rax) +- movdqa %xmm2,%xmm1 +-.byte 102,15,56,0,213 +-.byte 102,15,56,221,212 +- pslld $1,%xmm4 +- leaq 24(%rax),%rax +- +- movdqa %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm3,%xmm0 +- +- pshufd $0xff,%xmm0,%xmm3 +- pxor %xmm1,%xmm3 +- pslldq $4,%xmm1 +- pxor %xmm1,%xmm3 +- +- pxor %xmm2,%xmm0 +- pxor %xmm3,%xmm2 +- movdqu %xmm0,-16(%rax) +- +- decl %r10d +- jnz L$oop_key192 +- +- movl %esi,32(%rax) +- xorl %eax,%eax +- jmp L$enc_key_ret +- +-.p2align 4 +-L$14rounds: +- movups 16(%rdi),%xmm2 +- movl $13,%esi +- leaq 16(%rax),%rax +- cmpl $268435456,%r10d +- je L$14rounds_alt +- +- movups %xmm0,(%rdx) +- movups %xmm2,16(%rdx) +-.byte 102,15,58,223,202,1 +- call L$key_expansion_256a_cold +-.byte 102,15,58,223,200,1 +- call L$key_expansion_256b +-.byte 102,15,58,223,202,2 +- call L$key_expansion_256a +-.byte 102,15,58,223,200,2 +- call L$key_expansion_256b +-.byte 102,15,58,223,202,4 +- call L$key_expansion_256a +-.byte 102,15,58,223,200,4 +- call L$key_expansion_256b +-.byte 102,15,58,223,202,8 +- call L$key_expansion_256a +-.byte 102,15,58,223,200,8 +- call L$key_expansion_256b +-.byte 102,15,58,223,202,16 +- call L$key_expansion_256a +-.byte 102,15,58,223,200,16 +- call L$key_expansion_256b +-.byte 102,15,58,223,202,32 +- call L$key_expansion_256a +-.byte 102,15,58,223,200,32 +- call L$key_expansion_256b +-.byte 102,15,58,223,202,64 +- call L$key_expansion_256a +- movups %xmm0,(%rax) +- movl %esi,16(%rax) +- xorq %rax,%rax +- jmp L$enc_key_ret +- +-.p2align 4 +-L$14rounds_alt: +- movdqa L$key_rotate(%rip),%xmm5 +- movdqa L$key_rcon1(%rip),%xmm4 +- movl $7,%r10d +- movdqu %xmm0,0(%rdx) +- movdqa %xmm2,%xmm1 +- movdqu %xmm2,16(%rdx) +- jmp L$oop_key256 +- +-.p2align 4 +-L$oop_key256: +-.byte 102,15,56,0,213 +-.byte 102,15,56,221,212 +- +- movdqa %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm0,%xmm3 +- pslldq $4,%xmm0 +- pxor %xmm3,%xmm0 +- pslld $1,%xmm4 +- +- pxor %xmm2,%xmm0 +- movdqu %xmm0,(%rax) +- +- decl %r10d +- jz L$done_key256 +- +- pshufd $0xff,%xmm0,%xmm2 +- pxor %xmm3,%xmm3 +-.byte 102,15,56,221,211 +- +- movdqa %xmm1,%xmm3 +- pslldq $4,%xmm1 +- pxor %xmm1,%xmm3 +- pslldq $4,%xmm1 +- pxor %xmm1,%xmm3 +- pslldq $4,%xmm1 +- pxor %xmm3,%xmm1 +- +- pxor %xmm1,%xmm2 +- movdqu %xmm2,16(%rax) +- leaq 32(%rax),%rax +- movdqa %xmm2,%xmm1 +- +- jmp L$oop_key256 +- +-L$done_key256: +- movl %esi,16(%rax) +- xorl %eax,%eax +- jmp L$enc_key_ret +- +-.p2align 4 +-L$bad_keybits: +- movq $-2,%rax +-L$enc_key_ret: +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- addq $8,%rsp +- +- .byte 0xf3,0xc3 +- +-L$SEH_end_set_encrypt_key: +- +-.p2align 4 +-L$key_expansion_128: +- movups %xmm0,(%rax) +- leaq 16(%rax),%rax +-L$key_expansion_128_cold: +- shufps $16,%xmm0,%xmm4 +- xorps %xmm4,%xmm0 +- shufps $140,%xmm0,%xmm4 +- xorps %xmm4,%xmm0 +- shufps $255,%xmm1,%xmm1 +- xorps %xmm1,%xmm0 +- .byte 0xf3,0xc3 +- +-.p2align 4 +-L$key_expansion_192a: +- movups %xmm0,(%rax) +- leaq 16(%rax),%rax +-L$key_expansion_192a_cold: +- movaps %xmm2,%xmm5 +-L$key_expansion_192b_warm: +- shufps $16,%xmm0,%xmm4 +- movdqa %xmm2,%xmm3 +- xorps %xmm4,%xmm0 +- shufps $140,%xmm0,%xmm4 +- pslldq $4,%xmm3 +- xorps %xmm4,%xmm0 +- pshufd $85,%xmm1,%xmm1 +- pxor %xmm3,%xmm2 +- pxor %xmm1,%xmm0 +- pshufd $255,%xmm0,%xmm3 +- pxor %xmm3,%xmm2 +- .byte 0xf3,0xc3 +- +-.p2align 4 +-L$key_expansion_192b: +- movaps %xmm0,%xmm3 +- shufps $68,%xmm0,%xmm5 +- movups %xmm5,(%rax) +- shufps $78,%xmm2,%xmm3 +- movups %xmm3,16(%rax) +- leaq 32(%rax),%rax +- jmp L$key_expansion_192b_warm +- +-.p2align 4 +-L$key_expansion_256a: +- movups %xmm2,(%rax) +- leaq 16(%rax),%rax +-L$key_expansion_256a_cold: +- shufps $16,%xmm0,%xmm4 +- xorps %xmm4,%xmm0 +- shufps $140,%xmm0,%xmm4 +- xorps %xmm4,%xmm0 +- shufps $255,%xmm1,%xmm1 +- xorps %xmm1,%xmm0 +- .byte 0xf3,0xc3 +- +-.p2align 4 +-L$key_expansion_256b: +- movups %xmm0,(%rax) +- leaq 16(%rax),%rax +- +- shufps $16,%xmm2,%xmm4 +- xorps %xmm4,%xmm2 +- shufps $140,%xmm2,%xmm4 +- xorps %xmm4,%xmm2 +- shufps $170,%xmm1,%xmm1 +- xorps %xmm1,%xmm2 +- .byte 0xf3,0xc3 +- +- +-.p2align 6 +-L$bswap_mask: +-.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +-L$increment32: +-.long 6,6,6,0 +-L$increment64: +-.long 1,0,0,0 +-L$xts_magic: +-.long 0x87,0,1,0 +-L$increment1: +-.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 +-L$key_rotate: +-.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d +-L$key_rotate192: +-.long 0x04070605,0x04070605,0x04070605,0x04070605 +-L$key_rcon1: +-.long 1,1,1,1 +-L$key_rcon1b: +-.long 0x1b,0x1b,0x1b,0x1b +- +-.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +-.p2align 6 +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S b/mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S +deleted file mode 100644 +index 7f92fc5..0000000 +--- a/mac-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S ++++ /dev/null +@@ -1,426 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +- +- +- +- +-.globl _gcm_gmult_ssse3 +-.private_extern _gcm_gmult_ssse3 +-.p2align 4 +-_gcm_gmult_ssse3: +- +-L$gmult_seh_begin: +- movdqu (%rdi),%xmm0 +- movdqa L$reverse_bytes(%rip),%xmm10 +- movdqa L$low4_mask(%rip),%xmm2 +- +- +-.byte 102,65,15,56,0,194 +- +- +- movdqa %xmm2,%xmm1 +- pandn %xmm0,%xmm1 +- psrld $4,%xmm1 +- pand %xmm2,%xmm0 +- +- +- +- +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- movq $5,%rax +-L$oop_row_1: +- movdqa (%rsi),%xmm4 +- leaq 16(%rsi),%rsi +- +- +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- +- +- +- +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- +- +- pxor %xmm5,%xmm2 +- +- +- +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- +- +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- +- subq $1,%rax +- jnz L$oop_row_1 +- +- +- +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movq $5,%rax +-L$oop_row_2: +- movdqa (%rsi),%xmm4 +- leaq 16(%rsi),%rsi +- +- +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- +- +- +- +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- +- +- pxor %xmm5,%xmm2 +- +- +- +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- +- +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- +- subq $1,%rax +- jnz L$oop_row_2 +- +- +- +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movq $6,%rax +-L$oop_row_3: +- movdqa (%rsi),%xmm4 +- leaq 16(%rsi),%rsi +- +- +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- +- +- +- +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- +- +- pxor %xmm5,%xmm2 +- +- +- +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- +- +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- +- subq $1,%rax +- jnz L$oop_row_3 +- +- +- +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- +-.byte 102,65,15,56,0,210 +- movdqu %xmm2,(%rdi) +- +- +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- .byte 0xf3,0xc3 +-L$gmult_seh_end: +- +- +- +- +- +- +- +- +-.globl _gcm_ghash_ssse3 +-.private_extern _gcm_ghash_ssse3 +-.p2align 4 +-_gcm_ghash_ssse3: +-L$ghash_seh_begin: +- +- movdqu (%rdi),%xmm0 +- movdqa L$reverse_bytes(%rip),%xmm10 +- movdqa L$low4_mask(%rip),%xmm11 +- +- +- andq $-16,%rcx +- +- +- +-.byte 102,65,15,56,0,194 +- +- +- pxor %xmm3,%xmm3 +-L$oop_ghash: +- +- movdqu (%rdx),%xmm1 +-.byte 102,65,15,56,0,202 +- pxor %xmm1,%xmm0 +- +- +- movdqa %xmm11,%xmm1 +- pandn %xmm0,%xmm1 +- psrld $4,%xmm1 +- pand %xmm11,%xmm0 +- +- +- +- +- pxor %xmm2,%xmm2 +- +- movq $5,%rax +-L$oop_row_4: +- movdqa (%rsi),%xmm4 +- leaq 16(%rsi),%rsi +- +- +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- +- +- +- +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- +- +- pxor %xmm5,%xmm2 +- +- +- +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- +- +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- +- subq $1,%rax +- jnz L$oop_row_4 +- +- +- +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movq $5,%rax +-L$oop_row_5: +- movdqa (%rsi),%xmm4 +- leaq 16(%rsi),%rsi +- +- +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- +- +- +- +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- +- +- pxor %xmm5,%xmm2 +- +- +- +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- +- +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- +- subq $1,%rax +- jnz L$oop_row_5 +- +- +- +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movq $6,%rax +-L$oop_row_6: +- movdqa (%rsi),%xmm4 +- leaq 16(%rsi),%rsi +- +- +- movdqa %xmm2,%xmm6 +-.byte 102,15,58,15,243,1 +- movdqa %xmm6,%xmm3 +- psrldq $1,%xmm2 +- +- +- +- +- movdqa %xmm4,%xmm5 +-.byte 102,15,56,0,224 +-.byte 102,15,56,0,233 +- +- +- pxor %xmm5,%xmm2 +- +- +- +- movdqa %xmm4,%xmm5 +- psllq $60,%xmm5 +- movdqa %xmm5,%xmm6 +- pslldq $8,%xmm6 +- pxor %xmm6,%xmm3 +- +- +- psrldq $8,%xmm5 +- pxor %xmm5,%xmm2 +- psrlq $4,%xmm4 +- pxor %xmm4,%xmm2 +- +- subq $1,%rax +- jnz L$oop_row_6 +- +- +- +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $1,%xmm3 +- pxor %xmm3,%xmm2 +- psrlq $5,%xmm3 +- pxor %xmm3,%xmm2 +- pxor %xmm3,%xmm3 +- movdqa %xmm2,%xmm0 +- +- +- leaq -256(%rsi),%rsi +- +- +- leaq 16(%rdx),%rdx +- subq $16,%rcx +- jnz L$oop_ghash +- +- +-.byte 102,65,15,56,0,194 +- movdqu %xmm0,(%rdi) +- +- +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- .byte 0xf3,0xc3 +-L$ghash_seh_end: +- +- +- +-.p2align 4 +- +- +-L$reverse_bytes: +-.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +- +-L$low4_mask: +-.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/ghash-x86_64.S b/mac-x86_64/crypto/fipsmodule/ghash-x86_64.S +deleted file mode 100644 +index fd767a0..0000000 +--- a/mac-x86_64/crypto/fipsmodule/ghash-x86_64.S ++++ /dev/null +@@ -1,1125 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +-.globl _gcm_init_clmul +-.private_extern _gcm_init_clmul +- +-.p2align 4 +-_gcm_init_clmul: +- +-L$_init_clmul: +- movdqu (%rsi),%xmm2 +- pshufd $78,%xmm2,%xmm2 +- +- +- pshufd $255,%xmm2,%xmm4 +- movdqa %xmm2,%xmm3 +- psllq $1,%xmm2 +- pxor %xmm5,%xmm5 +- psrlq $63,%xmm3 +- pcmpgtd %xmm4,%xmm5 +- pslldq $8,%xmm3 +- por %xmm3,%xmm2 +- +- +- pand L$0x1c2_polynomial(%rip),%xmm5 +- pxor %xmm5,%xmm2 +- +- +- pshufd $78,%xmm2,%xmm6 +- movdqa %xmm2,%xmm0 +- pxor %xmm2,%xmm6 +- movdqa %xmm0,%xmm1 +- pshufd $78,%xmm0,%xmm3 +- pxor %xmm0,%xmm3 +-.byte 102,15,58,68,194,0 +-.byte 102,15,58,68,202,17 +-.byte 102,15,58,68,222,0 +- pxor %xmm0,%xmm3 +- pxor %xmm1,%xmm3 +- +- movdqa %xmm3,%xmm4 +- psrldq $8,%xmm3 +- pslldq $8,%xmm4 +- pxor %xmm3,%xmm1 +- pxor %xmm4,%xmm0 +- +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- +- +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm4,%xmm1 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +- pshufd $78,%xmm2,%xmm3 +- pshufd $78,%xmm0,%xmm4 +- pxor %xmm2,%xmm3 +- movdqu %xmm2,0(%rdi) +- pxor %xmm0,%xmm4 +- movdqu %xmm0,16(%rdi) +-.byte 102,15,58,15,227,8 +- movdqu %xmm4,32(%rdi) +- movdqa %xmm0,%xmm1 +- pshufd $78,%xmm0,%xmm3 +- pxor %xmm0,%xmm3 +-.byte 102,15,58,68,194,0 +-.byte 102,15,58,68,202,17 +-.byte 102,15,58,68,222,0 +- pxor %xmm0,%xmm3 +- pxor %xmm1,%xmm3 +- +- movdqa %xmm3,%xmm4 +- psrldq $8,%xmm3 +- pslldq $8,%xmm4 +- pxor %xmm3,%xmm1 +- pxor %xmm4,%xmm0 +- +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- +- +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm4,%xmm1 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +- movdqa %xmm0,%xmm5 +- movdqa %xmm0,%xmm1 +- pshufd $78,%xmm0,%xmm3 +- pxor %xmm0,%xmm3 +-.byte 102,15,58,68,194,0 +-.byte 102,15,58,68,202,17 +-.byte 102,15,58,68,222,0 +- pxor %xmm0,%xmm3 +- pxor %xmm1,%xmm3 +- +- movdqa %xmm3,%xmm4 +- psrldq $8,%xmm3 +- pslldq $8,%xmm4 +- pxor %xmm3,%xmm1 +- pxor %xmm4,%xmm0 +- +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- +- +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm4,%xmm1 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +- pshufd $78,%xmm5,%xmm3 +- pshufd $78,%xmm0,%xmm4 +- pxor %xmm5,%xmm3 +- movdqu %xmm5,48(%rdi) +- pxor %xmm0,%xmm4 +- movdqu %xmm0,64(%rdi) +-.byte 102,15,58,15,227,8 +- movdqu %xmm4,80(%rdi) +- .byte 0xf3,0xc3 +- +- +-.globl _gcm_gmult_clmul +-.private_extern _gcm_gmult_clmul +- +-.p2align 4 +-_gcm_gmult_clmul: +- +-L$_gmult_clmul: +- movdqu (%rdi),%xmm0 +- movdqa L$bswap_mask(%rip),%xmm5 +- movdqu (%rsi),%xmm2 +- movdqu 32(%rsi),%xmm4 +-.byte 102,15,56,0,197 +- movdqa %xmm0,%xmm1 +- pshufd $78,%xmm0,%xmm3 +- pxor %xmm0,%xmm3 +-.byte 102,15,58,68,194,0 +-.byte 102,15,58,68,202,17 +-.byte 102,15,58,68,220,0 +- pxor %xmm0,%xmm3 +- pxor %xmm1,%xmm3 +- +- movdqa %xmm3,%xmm4 +- psrldq $8,%xmm3 +- pslldq $8,%xmm4 +- pxor %xmm3,%xmm1 +- pxor %xmm4,%xmm0 +- +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- +- +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm4,%xmm1 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +-.byte 102,15,56,0,197 +- movdqu %xmm0,(%rdi) +- .byte 0xf3,0xc3 +- +- +-.globl _gcm_ghash_clmul +-.private_extern _gcm_ghash_clmul +- +-.p2align 5 +-_gcm_ghash_clmul: +- +-L$_ghash_clmul: +- movdqa L$bswap_mask(%rip),%xmm10 +- +- movdqu (%rdi),%xmm0 +- movdqu (%rsi),%xmm2 +- movdqu 32(%rsi),%xmm7 +-.byte 102,65,15,56,0,194 +- +- subq $0x10,%rcx +- jz L$odd_tail +- +- movdqu 16(%rsi),%xmm6 +- leaq _OPENSSL_ia32cap_P(%rip),%rax +- movl 4(%rax),%eax +- cmpq $0x30,%rcx +- jb L$skip4x +- +- andl $71303168,%eax +- cmpl $4194304,%eax +- je L$skip4x +- +- subq $0x30,%rcx +- movq $0xA040608020C0E000,%rax +- movdqu 48(%rsi),%xmm14 +- movdqu 64(%rsi),%xmm15 +- +- +- +- +- movdqu 48(%rdx),%xmm3 +- movdqu 32(%rdx),%xmm11 +-.byte 102,65,15,56,0,218 +-.byte 102,69,15,56,0,218 +- movdqa %xmm3,%xmm5 +- pshufd $78,%xmm3,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,68,218,0 +-.byte 102,15,58,68,234,17 +-.byte 102,15,58,68,231,0 +- +- movdqa %xmm11,%xmm13 +- pshufd $78,%xmm11,%xmm12 +- pxor %xmm11,%xmm12 +-.byte 102,68,15,58,68,222,0 +-.byte 102,68,15,58,68,238,17 +-.byte 102,68,15,58,68,231,16 +- xorps %xmm11,%xmm3 +- xorps %xmm13,%xmm5 +- movups 80(%rsi),%xmm7 +- xorps %xmm12,%xmm4 +- +- movdqu 16(%rdx),%xmm11 +- movdqu 0(%rdx),%xmm8 +-.byte 102,69,15,56,0,218 +-.byte 102,69,15,56,0,194 +- movdqa %xmm11,%xmm13 +- pshufd $78,%xmm11,%xmm12 +- pxor %xmm8,%xmm0 +- pxor %xmm11,%xmm12 +-.byte 102,69,15,58,68,222,0 +- movdqa %xmm0,%xmm1 +- pshufd $78,%xmm0,%xmm8 +- pxor %xmm0,%xmm8 +-.byte 102,69,15,58,68,238,17 +-.byte 102,68,15,58,68,231,0 +- xorps %xmm11,%xmm3 +- xorps %xmm13,%xmm5 +- +- leaq 64(%rdx),%rdx +- subq $0x40,%rcx +- jc L$tail4x +- +- jmp L$mod4_loop +-.p2align 5 +-L$mod4_loop: +-.byte 102,65,15,58,68,199,0 +- xorps %xmm12,%xmm4 +- movdqu 48(%rdx),%xmm11 +-.byte 102,69,15,56,0,218 +-.byte 102,65,15,58,68,207,17 +- xorps %xmm3,%xmm0 +- movdqu 32(%rdx),%xmm3 +- movdqa %xmm11,%xmm13 +-.byte 102,68,15,58,68,199,16 +- pshufd $78,%xmm11,%xmm12 +- xorps %xmm5,%xmm1 +- pxor %xmm11,%xmm12 +-.byte 102,65,15,56,0,218 +- movups 32(%rsi),%xmm7 +- xorps %xmm4,%xmm8 +-.byte 102,68,15,58,68,218,0 +- pshufd $78,%xmm3,%xmm4 +- +- pxor %xmm0,%xmm8 +- movdqa %xmm3,%xmm5 +- pxor %xmm1,%xmm8 +- pxor %xmm3,%xmm4 +- movdqa %xmm8,%xmm9 +-.byte 102,68,15,58,68,234,17 +- pslldq $8,%xmm8 +- psrldq $8,%xmm9 +- pxor %xmm8,%xmm0 +- movdqa L$7_mask(%rip),%xmm8 +- pxor %xmm9,%xmm1 +-.byte 102,76,15,110,200 +- +- pand %xmm0,%xmm8 +-.byte 102,69,15,56,0,200 +- pxor %xmm0,%xmm9 +-.byte 102,68,15,58,68,231,0 +- psllq $57,%xmm9 +- movdqa %xmm9,%xmm8 +- pslldq $8,%xmm9 +-.byte 102,15,58,68,222,0 +- psrldq $8,%xmm8 +- pxor %xmm9,%xmm0 +- pxor %xmm8,%xmm1 +- movdqu 0(%rdx),%xmm8 +- +- movdqa %xmm0,%xmm9 +- psrlq $1,%xmm0 +-.byte 102,15,58,68,238,17 +- xorps %xmm11,%xmm3 +- movdqu 16(%rdx),%xmm11 +-.byte 102,69,15,56,0,218 +-.byte 102,15,58,68,231,16 +- xorps %xmm13,%xmm5 +- movups 80(%rsi),%xmm7 +-.byte 102,69,15,56,0,194 +- pxor %xmm9,%xmm1 +- pxor %xmm0,%xmm9 +- psrlq $5,%xmm0 +- +- movdqa %xmm11,%xmm13 +- pxor %xmm12,%xmm4 +- pshufd $78,%xmm11,%xmm12 +- pxor %xmm9,%xmm0 +- pxor %xmm8,%xmm1 +- pxor %xmm11,%xmm12 +-.byte 102,69,15,58,68,222,0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +- movdqa %xmm0,%xmm1 +-.byte 102,69,15,58,68,238,17 +- xorps %xmm11,%xmm3 +- pshufd $78,%xmm0,%xmm8 +- pxor %xmm0,%xmm8 +- +-.byte 102,68,15,58,68,231,0 +- xorps %xmm13,%xmm5 +- +- leaq 64(%rdx),%rdx +- subq $0x40,%rcx +- jnc L$mod4_loop +- +-L$tail4x: +-.byte 102,65,15,58,68,199,0 +-.byte 102,65,15,58,68,207,17 +-.byte 102,68,15,58,68,199,16 +- xorps %xmm12,%xmm4 +- xorps %xmm3,%xmm0 +- xorps %xmm5,%xmm1 +- pxor %xmm0,%xmm1 +- pxor %xmm4,%xmm8 +- +- pxor %xmm1,%xmm8 +- pxor %xmm0,%xmm1 +- +- movdqa %xmm8,%xmm9 +- psrldq $8,%xmm8 +- pslldq $8,%xmm9 +- pxor %xmm8,%xmm1 +- pxor %xmm9,%xmm0 +- +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- +- +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm4,%xmm1 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +- addq $0x40,%rcx +- jz L$done +- movdqu 32(%rsi),%xmm7 +- subq $0x10,%rcx +- jz L$odd_tail +-L$skip4x: +- +- +- +- +- +- movdqu (%rdx),%xmm8 +- movdqu 16(%rdx),%xmm3 +-.byte 102,69,15,56,0,194 +-.byte 102,65,15,56,0,218 +- pxor %xmm8,%xmm0 +- +- movdqa %xmm3,%xmm5 +- pshufd $78,%xmm3,%xmm4 +- pxor %xmm3,%xmm4 +-.byte 102,15,58,68,218,0 +-.byte 102,15,58,68,234,17 +-.byte 102,15,58,68,231,0 +- +- leaq 32(%rdx),%rdx +- nop +- subq $0x20,%rcx +- jbe L$even_tail +- nop +- jmp L$mod_loop +- +-.p2align 5 +-L$mod_loop: +- movdqa %xmm0,%xmm1 +- movdqa %xmm4,%xmm8 +- pshufd $78,%xmm0,%xmm4 +- pxor %xmm0,%xmm4 +- +-.byte 102,15,58,68,198,0 +-.byte 102,15,58,68,206,17 +-.byte 102,15,58,68,231,16 +- +- pxor %xmm3,%xmm0 +- pxor %xmm5,%xmm1 +- movdqu (%rdx),%xmm9 +- pxor %xmm0,%xmm8 +-.byte 102,69,15,56,0,202 +- movdqu 16(%rdx),%xmm3 +- +- pxor %xmm1,%xmm8 +- pxor %xmm9,%xmm1 +- pxor %xmm8,%xmm4 +-.byte 102,65,15,56,0,218 +- movdqa %xmm4,%xmm8 +- psrldq $8,%xmm8 +- pslldq $8,%xmm4 +- pxor %xmm8,%xmm1 +- pxor %xmm4,%xmm0 +- +- movdqa %xmm3,%xmm5 +- +- movdqa %xmm0,%xmm9 +- movdqa %xmm0,%xmm8 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm8 +-.byte 102,15,58,68,218,0 +- psllq $1,%xmm0 +- pxor %xmm8,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm8 +- pslldq $8,%xmm0 +- psrldq $8,%xmm8 +- pxor %xmm9,%xmm0 +- pshufd $78,%xmm5,%xmm4 +- pxor %xmm8,%xmm1 +- pxor %xmm5,%xmm4 +- +- movdqa %xmm0,%xmm9 +- psrlq $1,%xmm0 +-.byte 102,15,58,68,234,17 +- pxor %xmm9,%xmm1 +- pxor %xmm0,%xmm9 +- psrlq $5,%xmm0 +- pxor %xmm9,%xmm0 +- leaq 32(%rdx),%rdx +- psrlq $1,%xmm0 +-.byte 102,15,58,68,231,0 +- pxor %xmm1,%xmm0 +- +- subq $0x20,%rcx +- ja L$mod_loop +- +-L$even_tail: +- movdqa %xmm0,%xmm1 +- movdqa %xmm4,%xmm8 +- pshufd $78,%xmm0,%xmm4 +- pxor %xmm0,%xmm4 +- +-.byte 102,15,58,68,198,0 +-.byte 102,15,58,68,206,17 +-.byte 102,15,58,68,231,16 +- +- pxor %xmm3,%xmm0 +- pxor %xmm5,%xmm1 +- pxor %xmm0,%xmm8 +- pxor %xmm1,%xmm8 +- pxor %xmm8,%xmm4 +- movdqa %xmm4,%xmm8 +- psrldq $8,%xmm8 +- pslldq $8,%xmm4 +- pxor %xmm8,%xmm1 +- pxor %xmm4,%xmm0 +- +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- +- +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm4,%xmm1 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +- testq %rcx,%rcx +- jnz L$done +- +-L$odd_tail: +- movdqu (%rdx),%xmm8 +-.byte 102,69,15,56,0,194 +- pxor %xmm8,%xmm0 +- movdqa %xmm0,%xmm1 +- pshufd $78,%xmm0,%xmm3 +- pxor %xmm0,%xmm3 +-.byte 102,15,58,68,194,0 +-.byte 102,15,58,68,202,17 +-.byte 102,15,58,68,223,0 +- pxor %xmm0,%xmm3 +- pxor %xmm1,%xmm3 +- +- movdqa %xmm3,%xmm4 +- psrldq $8,%xmm3 +- pslldq $8,%xmm4 +- pxor %xmm3,%xmm1 +- pxor %xmm4,%xmm0 +- +- movdqa %xmm0,%xmm4 +- movdqa %xmm0,%xmm3 +- psllq $5,%xmm0 +- pxor %xmm0,%xmm3 +- psllq $1,%xmm0 +- pxor %xmm3,%xmm0 +- psllq $57,%xmm0 +- movdqa %xmm0,%xmm3 +- pslldq $8,%xmm0 +- psrldq $8,%xmm3 +- pxor %xmm4,%xmm0 +- pxor %xmm3,%xmm1 +- +- +- movdqa %xmm0,%xmm4 +- psrlq $1,%xmm0 +- pxor %xmm4,%xmm1 +- pxor %xmm0,%xmm4 +- psrlq $5,%xmm0 +- pxor %xmm4,%xmm0 +- psrlq $1,%xmm0 +- pxor %xmm1,%xmm0 +-L$done: +-.byte 102,65,15,56,0,194 +- movdqu %xmm0,(%rdi) +- .byte 0xf3,0xc3 +- +- +-.globl _gcm_init_avx +-.private_extern _gcm_init_avx +- +-.p2align 5 +-_gcm_init_avx: +- +- vzeroupper +- +- vmovdqu (%rsi),%xmm2 +- vpshufd $78,%xmm2,%xmm2 +- +- +- vpshufd $255,%xmm2,%xmm4 +- vpsrlq $63,%xmm2,%xmm3 +- vpsllq $1,%xmm2,%xmm2 +- vpxor %xmm5,%xmm5,%xmm5 +- vpcmpgtd %xmm4,%xmm5,%xmm5 +- vpslldq $8,%xmm3,%xmm3 +- vpor %xmm3,%xmm2,%xmm2 +- +- +- vpand L$0x1c2_polynomial(%rip),%xmm5,%xmm5 +- vpxor %xmm5,%xmm2,%xmm2 +- +- vpunpckhqdq %xmm2,%xmm2,%xmm6 +- vmovdqa %xmm2,%xmm0 +- vpxor %xmm2,%xmm6,%xmm6 +- movq $4,%r10 +- jmp L$init_start_avx +-.p2align 5 +-L$init_loop_avx: +- vpalignr $8,%xmm3,%xmm4,%xmm5 +- vmovdqu %xmm5,-16(%rdi) +- vpunpckhqdq %xmm0,%xmm0,%xmm3 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 +- vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 +- vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 +- vpxor %xmm0,%xmm1,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- +- vpslldq $8,%xmm3,%xmm4 +- vpsrldq $8,%xmm3,%xmm3 +- vpxor %xmm4,%xmm0,%xmm0 +- vpxor %xmm3,%xmm1,%xmm1 +- vpsllq $57,%xmm0,%xmm3 +- vpsllq $62,%xmm0,%xmm4 +- vpxor %xmm3,%xmm4,%xmm4 +- vpsllq $63,%xmm0,%xmm3 +- vpxor %xmm3,%xmm4,%xmm4 +- vpslldq $8,%xmm4,%xmm3 +- vpsrldq $8,%xmm4,%xmm4 +- vpxor %xmm3,%xmm0,%xmm0 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vpsrlq $1,%xmm0,%xmm4 +- vpxor %xmm0,%xmm1,%xmm1 +- vpxor %xmm4,%xmm0,%xmm0 +- vpsrlq $5,%xmm4,%xmm4 +- vpxor %xmm4,%xmm0,%xmm0 +- vpsrlq $1,%xmm0,%xmm0 +- vpxor %xmm1,%xmm0,%xmm0 +-L$init_start_avx: +- vmovdqa %xmm0,%xmm5 +- vpunpckhqdq %xmm0,%xmm0,%xmm3 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x11,%xmm2,%xmm0,%xmm1 +- vpclmulqdq $0x00,%xmm2,%xmm0,%xmm0 +- vpclmulqdq $0x00,%xmm6,%xmm3,%xmm3 +- vpxor %xmm0,%xmm1,%xmm4 +- vpxor %xmm4,%xmm3,%xmm3 +- +- vpslldq $8,%xmm3,%xmm4 +- vpsrldq $8,%xmm3,%xmm3 +- vpxor %xmm4,%xmm0,%xmm0 +- vpxor %xmm3,%xmm1,%xmm1 +- vpsllq $57,%xmm0,%xmm3 +- vpsllq $62,%xmm0,%xmm4 +- vpxor %xmm3,%xmm4,%xmm4 +- vpsllq $63,%xmm0,%xmm3 +- vpxor %xmm3,%xmm4,%xmm4 +- vpslldq $8,%xmm4,%xmm3 +- vpsrldq $8,%xmm4,%xmm4 +- vpxor %xmm3,%xmm0,%xmm0 +- vpxor %xmm4,%xmm1,%xmm1 +- +- vpsrlq $1,%xmm0,%xmm4 +- vpxor %xmm0,%xmm1,%xmm1 +- vpxor %xmm4,%xmm0,%xmm0 +- vpsrlq $5,%xmm4,%xmm4 +- vpxor %xmm4,%xmm0,%xmm0 +- vpsrlq $1,%xmm0,%xmm0 +- vpxor %xmm1,%xmm0,%xmm0 +- vpshufd $78,%xmm5,%xmm3 +- vpshufd $78,%xmm0,%xmm4 +- vpxor %xmm5,%xmm3,%xmm3 +- vmovdqu %xmm5,0(%rdi) +- vpxor %xmm0,%xmm4,%xmm4 +- vmovdqu %xmm0,16(%rdi) +- leaq 48(%rdi),%rdi +- subq $1,%r10 +- jnz L$init_loop_avx +- +- vpalignr $8,%xmm4,%xmm3,%xmm5 +- vmovdqu %xmm5,-16(%rdi) +- +- vzeroupper +- .byte 0xf3,0xc3 +- +- +-.globl _gcm_gmult_avx +-.private_extern _gcm_gmult_avx +- +-.p2align 5 +-_gcm_gmult_avx: +- +- jmp L$_gmult_clmul +- +- +-.globl _gcm_ghash_avx +-.private_extern _gcm_ghash_avx +- +-.p2align 5 +-_gcm_ghash_avx: +- +- vzeroupper +- +- vmovdqu (%rdi),%xmm10 +- leaq L$0x1c2_polynomial(%rip),%r10 +- leaq 64(%rsi),%rsi +- vmovdqu L$bswap_mask(%rip),%xmm13 +- vpshufb %xmm13,%xmm10,%xmm10 +- cmpq $0x80,%rcx +- jb L$short_avx +- subq $0x80,%rcx +- +- vmovdqu 112(%rdx),%xmm14 +- vmovdqu 0-64(%rsi),%xmm6 +- vpshufb %xmm13,%xmm14,%xmm14 +- vmovdqu 32-64(%rsi),%xmm7 +- +- vpunpckhqdq %xmm14,%xmm14,%xmm9 +- vmovdqu 96(%rdx),%xmm15 +- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 +- vpxor %xmm14,%xmm9,%xmm9 +- vpshufb %xmm13,%xmm15,%xmm15 +- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 +- vmovdqu 16-64(%rsi),%xmm6 +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vmovdqu 80(%rdx),%xmm14 +- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 +- vpxor %xmm15,%xmm8,%xmm8 +- +- vpshufb %xmm13,%xmm14,%xmm14 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 +- vpunpckhqdq %xmm14,%xmm14,%xmm9 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 +- vmovdqu 48-64(%rsi),%xmm6 +- vpxor %xmm14,%xmm9,%xmm9 +- vmovdqu 64(%rdx),%xmm15 +- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 +- vmovdqu 80-64(%rsi),%xmm7 +- +- vpshufb %xmm13,%xmm15,%xmm15 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 +- vpxor %xmm1,%xmm4,%xmm4 +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 +- vmovdqu 64-64(%rsi),%xmm6 +- vpxor %xmm2,%xmm5,%xmm5 +- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 +- vpxor %xmm15,%xmm8,%xmm8 +- +- vmovdqu 48(%rdx),%xmm14 +- vpxor %xmm3,%xmm0,%xmm0 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 +- vpxor %xmm4,%xmm1,%xmm1 +- vpshufb %xmm13,%xmm14,%xmm14 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 +- vmovdqu 96-64(%rsi),%xmm6 +- vpxor %xmm5,%xmm2,%xmm2 +- vpunpckhqdq %xmm14,%xmm14,%xmm9 +- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 +- vmovdqu 128-64(%rsi),%xmm7 +- vpxor %xmm14,%xmm9,%xmm9 +- +- vmovdqu 32(%rdx),%xmm15 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 +- vpxor %xmm1,%xmm4,%xmm4 +- vpshufb %xmm13,%xmm15,%xmm15 +- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 +- vmovdqu 112-64(%rsi),%xmm6 +- vpxor %xmm2,%xmm5,%xmm5 +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 +- vpxor %xmm15,%xmm8,%xmm8 +- +- vmovdqu 16(%rdx),%xmm14 +- vpxor %xmm3,%xmm0,%xmm0 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 +- vpxor %xmm4,%xmm1,%xmm1 +- vpshufb %xmm13,%xmm14,%xmm14 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 +- vmovdqu 144-64(%rsi),%xmm6 +- vpxor %xmm5,%xmm2,%xmm2 +- vpunpckhqdq %xmm14,%xmm14,%xmm9 +- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 +- vmovdqu 176-64(%rsi),%xmm7 +- vpxor %xmm14,%xmm9,%xmm9 +- +- vmovdqu (%rdx),%xmm15 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 +- vpxor %xmm1,%xmm4,%xmm4 +- vpshufb %xmm13,%xmm15,%xmm15 +- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 +- vmovdqu 160-64(%rsi),%xmm6 +- vpxor %xmm2,%xmm5,%xmm5 +- vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 +- +- leaq 128(%rdx),%rdx +- cmpq $0x80,%rcx +- jb L$tail_avx +- +- vpxor %xmm10,%xmm15,%xmm15 +- subq $0x80,%rcx +- jmp L$oop8x_avx +- +-.p2align 5 +-L$oop8x_avx: +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vmovdqu 112(%rdx),%xmm14 +- vpxor %xmm0,%xmm3,%xmm3 +- vpxor %xmm15,%xmm8,%xmm8 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm10 +- vpshufb %xmm13,%xmm14,%xmm14 +- vpxor %xmm1,%xmm4,%xmm4 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm11 +- vmovdqu 0-64(%rsi),%xmm6 +- vpunpckhqdq %xmm14,%xmm14,%xmm9 +- vpxor %xmm2,%xmm5,%xmm5 +- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm12 +- vmovdqu 32-64(%rsi),%xmm7 +- vpxor %xmm14,%xmm9,%xmm9 +- +- vmovdqu 96(%rdx),%xmm15 +- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 +- vpxor %xmm3,%xmm10,%xmm10 +- vpshufb %xmm13,%xmm15,%xmm15 +- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 +- vxorps %xmm4,%xmm11,%xmm11 +- vmovdqu 16-64(%rsi),%xmm6 +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 +- vpxor %xmm5,%xmm12,%xmm12 +- vxorps %xmm15,%xmm8,%xmm8 +- +- vmovdqu 80(%rdx),%xmm14 +- vpxor %xmm10,%xmm12,%xmm12 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 +- vpxor %xmm11,%xmm12,%xmm12 +- vpslldq $8,%xmm12,%xmm9 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 +- vpsrldq $8,%xmm12,%xmm12 +- vpxor %xmm9,%xmm10,%xmm10 +- vmovdqu 48-64(%rsi),%xmm6 +- vpshufb %xmm13,%xmm14,%xmm14 +- vxorps %xmm12,%xmm11,%xmm11 +- vpxor %xmm1,%xmm4,%xmm4 +- vpunpckhqdq %xmm14,%xmm14,%xmm9 +- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 +- vmovdqu 80-64(%rsi),%xmm7 +- vpxor %xmm14,%xmm9,%xmm9 +- vpxor %xmm2,%xmm5,%xmm5 +- +- vmovdqu 64(%rdx),%xmm15 +- vpalignr $8,%xmm10,%xmm10,%xmm12 +- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 +- vpshufb %xmm13,%xmm15,%xmm15 +- vpxor %xmm3,%xmm0,%xmm0 +- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 +- vmovdqu 64-64(%rsi),%xmm6 +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 +- vxorps %xmm15,%xmm8,%xmm8 +- vpxor %xmm5,%xmm2,%xmm2 +- +- vmovdqu 48(%rdx),%xmm14 +- vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 +- vpshufb %xmm13,%xmm14,%xmm14 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 +- vmovdqu 96-64(%rsi),%xmm6 +- vpunpckhqdq %xmm14,%xmm14,%xmm9 +- vpxor %xmm1,%xmm4,%xmm4 +- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 +- vmovdqu 128-64(%rsi),%xmm7 +- vpxor %xmm14,%xmm9,%xmm9 +- vpxor %xmm2,%xmm5,%xmm5 +- +- vmovdqu 32(%rdx),%xmm15 +- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 +- vpshufb %xmm13,%xmm15,%xmm15 +- vpxor %xmm3,%xmm0,%xmm0 +- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 +- vmovdqu 112-64(%rsi),%xmm6 +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpxor %xmm4,%xmm1,%xmm1 +- vpclmulqdq $0x00,%xmm7,%xmm9,%xmm2 +- vpxor %xmm15,%xmm8,%xmm8 +- vpxor %xmm5,%xmm2,%xmm2 +- vxorps %xmm12,%xmm10,%xmm10 +- +- vmovdqu 16(%rdx),%xmm14 +- vpalignr $8,%xmm10,%xmm10,%xmm12 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm3 +- vpshufb %xmm13,%xmm14,%xmm14 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm4 +- vmovdqu 144-64(%rsi),%xmm6 +- vpclmulqdq $0x10,(%r10),%xmm10,%xmm10 +- vxorps %xmm11,%xmm12,%xmm12 +- vpunpckhqdq %xmm14,%xmm14,%xmm9 +- vpxor %xmm1,%xmm4,%xmm4 +- vpclmulqdq $0x10,%xmm7,%xmm8,%xmm5 +- vmovdqu 176-64(%rsi),%xmm7 +- vpxor %xmm14,%xmm9,%xmm9 +- vpxor %xmm2,%xmm5,%xmm5 +- +- vmovdqu (%rdx),%xmm15 +- vpclmulqdq $0x00,%xmm6,%xmm14,%xmm0 +- vpshufb %xmm13,%xmm15,%xmm15 +- vpclmulqdq $0x11,%xmm6,%xmm14,%xmm1 +- vmovdqu 160-64(%rsi),%xmm6 +- vpxor %xmm12,%xmm15,%xmm15 +- vpclmulqdq $0x10,%xmm7,%xmm9,%xmm2 +- vpxor %xmm10,%xmm15,%xmm15 +- +- leaq 128(%rdx),%rdx +- subq $0x80,%rcx +- jnc L$oop8x_avx +- +- addq $0x80,%rcx +- jmp L$tail_no_xor_avx +- +-.p2align 5 +-L$short_avx: +- vmovdqu -16(%rdx,%rcx,1),%xmm14 +- leaq (%rdx,%rcx,1),%rdx +- vmovdqu 0-64(%rsi),%xmm6 +- vmovdqu 32-64(%rsi),%xmm7 +- vpshufb %xmm13,%xmm14,%xmm15 +- +- vmovdqa %xmm0,%xmm3 +- vmovdqa %xmm1,%xmm4 +- vmovdqa %xmm2,%xmm5 +- subq $0x10,%rcx +- jz L$tail_avx +- +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 +- vpxor %xmm15,%xmm8,%xmm8 +- vmovdqu -32(%rdx),%xmm14 +- vpxor %xmm1,%xmm4,%xmm4 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 +- vmovdqu 16-64(%rsi),%xmm6 +- vpshufb %xmm13,%xmm14,%xmm15 +- vpxor %xmm2,%xmm5,%xmm5 +- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 +- vpsrldq $8,%xmm7,%xmm7 +- subq $0x10,%rcx +- jz L$tail_avx +- +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 +- vpxor %xmm15,%xmm8,%xmm8 +- vmovdqu -48(%rdx),%xmm14 +- vpxor %xmm1,%xmm4,%xmm4 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 +- vmovdqu 48-64(%rsi),%xmm6 +- vpshufb %xmm13,%xmm14,%xmm15 +- vpxor %xmm2,%xmm5,%xmm5 +- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 +- vmovdqu 80-64(%rsi),%xmm7 +- subq $0x10,%rcx +- jz L$tail_avx +- +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 +- vpxor %xmm15,%xmm8,%xmm8 +- vmovdqu -64(%rdx),%xmm14 +- vpxor %xmm1,%xmm4,%xmm4 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 +- vmovdqu 64-64(%rsi),%xmm6 +- vpshufb %xmm13,%xmm14,%xmm15 +- vpxor %xmm2,%xmm5,%xmm5 +- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 +- vpsrldq $8,%xmm7,%xmm7 +- subq $0x10,%rcx +- jz L$tail_avx +- +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 +- vpxor %xmm15,%xmm8,%xmm8 +- vmovdqu -80(%rdx),%xmm14 +- vpxor %xmm1,%xmm4,%xmm4 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 +- vmovdqu 96-64(%rsi),%xmm6 +- vpshufb %xmm13,%xmm14,%xmm15 +- vpxor %xmm2,%xmm5,%xmm5 +- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 +- vmovdqu 128-64(%rsi),%xmm7 +- subq $0x10,%rcx +- jz L$tail_avx +- +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 +- vpxor %xmm15,%xmm8,%xmm8 +- vmovdqu -96(%rdx),%xmm14 +- vpxor %xmm1,%xmm4,%xmm4 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 +- vmovdqu 112-64(%rsi),%xmm6 +- vpshufb %xmm13,%xmm14,%xmm15 +- vpxor %xmm2,%xmm5,%xmm5 +- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 +- vpsrldq $8,%xmm7,%xmm7 +- subq $0x10,%rcx +- jz L$tail_avx +- +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 +- vpxor %xmm15,%xmm8,%xmm8 +- vmovdqu -112(%rdx),%xmm14 +- vpxor %xmm1,%xmm4,%xmm4 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 +- vmovdqu 144-64(%rsi),%xmm6 +- vpshufb %xmm13,%xmm14,%xmm15 +- vpxor %xmm2,%xmm5,%xmm5 +- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 +- vmovq 184-64(%rsi),%xmm7 +- subq $0x10,%rcx +- jmp L$tail_avx +- +-.p2align 5 +-L$tail_avx: +- vpxor %xmm10,%xmm15,%xmm15 +-L$tail_no_xor_avx: +- vpunpckhqdq %xmm15,%xmm15,%xmm8 +- vpxor %xmm0,%xmm3,%xmm3 +- vpclmulqdq $0x00,%xmm6,%xmm15,%xmm0 +- vpxor %xmm15,%xmm8,%xmm8 +- vpxor %xmm1,%xmm4,%xmm4 +- vpclmulqdq $0x11,%xmm6,%xmm15,%xmm1 +- vpxor %xmm2,%xmm5,%xmm5 +- vpclmulqdq $0x00,%xmm7,%xmm8,%xmm2 +- +- vmovdqu (%r10),%xmm12 +- +- vpxor %xmm0,%xmm3,%xmm10 +- vpxor %xmm1,%xmm4,%xmm11 +- vpxor %xmm2,%xmm5,%xmm5 +- +- vpxor %xmm10,%xmm5,%xmm5 +- vpxor %xmm11,%xmm5,%xmm5 +- vpslldq $8,%xmm5,%xmm9 +- vpsrldq $8,%xmm5,%xmm5 +- vpxor %xmm9,%xmm10,%xmm10 +- vpxor %xmm5,%xmm11,%xmm11 +- +- vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 +- vpalignr $8,%xmm10,%xmm10,%xmm10 +- vpxor %xmm9,%xmm10,%xmm10 +- +- vpclmulqdq $0x10,%xmm12,%xmm10,%xmm9 +- vpalignr $8,%xmm10,%xmm10,%xmm10 +- vpxor %xmm11,%xmm10,%xmm10 +- vpxor %xmm9,%xmm10,%xmm10 +- +- cmpq $0,%rcx +- jne L$short_avx +- +- vpshufb %xmm13,%xmm10,%xmm10 +- vmovdqu %xmm10,(%rdi) +- vzeroupper +- .byte 0xf3,0xc3 +- +- +-.p2align 6 +-L$bswap_mask: +-.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +-L$0x1c2_polynomial: +-.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2 +-L$7_mask: +-.long 7,0,7,0 +-.p2align 6 +- +-.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +-.p2align 6 +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/md5-x86_64.S b/mac-x86_64/crypto/fipsmodule/md5-x86_64.S +deleted file mode 100644 +index 06e3ba0..0000000 +--- a/mac-x86_64/crypto/fipsmodule/md5-x86_64.S ++++ /dev/null +@@ -1,696 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +-.p2align 4 +- +-.globl _md5_block_asm_data_order +-.private_extern _md5_block_asm_data_order +- +-_md5_block_asm_data_order: +- +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r14 +- +- pushq %r15 +- +-L$prologue: +- +- +- +- +- movq %rdi,%rbp +- shlq $6,%rdx +- leaq (%rsi,%rdx,1),%rdi +- movl 0(%rbp),%eax +- movl 4(%rbp),%ebx +- movl 8(%rbp),%ecx +- movl 12(%rbp),%edx +- +- +- +- +- +- +- +- cmpq %rdi,%rsi +- je L$end +- +- +-L$loop: +- movl %eax,%r8d +- movl %ebx,%r9d +- movl %ecx,%r14d +- movl %edx,%r15d +- movl 0(%rsi),%r10d +- movl %edx,%r11d +- xorl %ecx,%r11d +- leal -680876936(%rax,%r10,1),%eax +- andl %ebx,%r11d +- xorl %edx,%r11d +- movl 4(%rsi),%r10d +- addl %r11d,%eax +- roll $7,%eax +- movl %ecx,%r11d +- addl %ebx,%eax +- xorl %ebx,%r11d +- leal -389564586(%rdx,%r10,1),%edx +- andl %eax,%r11d +- xorl %ecx,%r11d +- movl 8(%rsi),%r10d +- addl %r11d,%edx +- roll $12,%edx +- movl %ebx,%r11d +- addl %eax,%edx +- xorl %eax,%r11d +- leal 606105819(%rcx,%r10,1),%ecx +- andl %edx,%r11d +- xorl %ebx,%r11d +- movl 12(%rsi),%r10d +- addl %r11d,%ecx +- roll $17,%ecx +- movl %eax,%r11d +- addl %edx,%ecx +- xorl %edx,%r11d +- leal -1044525330(%rbx,%r10,1),%ebx +- andl %ecx,%r11d +- xorl %eax,%r11d +- movl 16(%rsi),%r10d +- addl %r11d,%ebx +- roll $22,%ebx +- movl %edx,%r11d +- addl %ecx,%ebx +- xorl %ecx,%r11d +- leal -176418897(%rax,%r10,1),%eax +- andl %ebx,%r11d +- xorl %edx,%r11d +- movl 20(%rsi),%r10d +- addl %r11d,%eax +- roll $7,%eax +- movl %ecx,%r11d +- addl %ebx,%eax +- xorl %ebx,%r11d +- leal 1200080426(%rdx,%r10,1),%edx +- andl %eax,%r11d +- xorl %ecx,%r11d +- movl 24(%rsi),%r10d +- addl %r11d,%edx +- roll $12,%edx +- movl %ebx,%r11d +- addl %eax,%edx +- xorl %eax,%r11d +- leal -1473231341(%rcx,%r10,1),%ecx +- andl %edx,%r11d +- xorl %ebx,%r11d +- movl 28(%rsi),%r10d +- addl %r11d,%ecx +- roll $17,%ecx +- movl %eax,%r11d +- addl %edx,%ecx +- xorl %edx,%r11d +- leal -45705983(%rbx,%r10,1),%ebx +- andl %ecx,%r11d +- xorl %eax,%r11d +- movl 32(%rsi),%r10d +- addl %r11d,%ebx +- roll $22,%ebx +- movl %edx,%r11d +- addl %ecx,%ebx +- xorl %ecx,%r11d +- leal 1770035416(%rax,%r10,1),%eax +- andl %ebx,%r11d +- xorl %edx,%r11d +- movl 36(%rsi),%r10d +- addl %r11d,%eax +- roll $7,%eax +- movl %ecx,%r11d +- addl %ebx,%eax +- xorl %ebx,%r11d +- leal -1958414417(%rdx,%r10,1),%edx +- andl %eax,%r11d +- xorl %ecx,%r11d +- movl 40(%rsi),%r10d +- addl %r11d,%edx +- roll $12,%edx +- movl %ebx,%r11d +- addl %eax,%edx +- xorl %eax,%r11d +- leal -42063(%rcx,%r10,1),%ecx +- andl %edx,%r11d +- xorl %ebx,%r11d +- movl 44(%rsi),%r10d +- addl %r11d,%ecx +- roll $17,%ecx +- movl %eax,%r11d +- addl %edx,%ecx +- xorl %edx,%r11d +- leal -1990404162(%rbx,%r10,1),%ebx +- andl %ecx,%r11d +- xorl %eax,%r11d +- movl 48(%rsi),%r10d +- addl %r11d,%ebx +- roll $22,%ebx +- movl %edx,%r11d +- addl %ecx,%ebx +- xorl %ecx,%r11d +- leal 1804603682(%rax,%r10,1),%eax +- andl %ebx,%r11d +- xorl %edx,%r11d +- movl 52(%rsi),%r10d +- addl %r11d,%eax +- roll $7,%eax +- movl %ecx,%r11d +- addl %ebx,%eax +- xorl %ebx,%r11d +- leal -40341101(%rdx,%r10,1),%edx +- andl %eax,%r11d +- xorl %ecx,%r11d +- movl 56(%rsi),%r10d +- addl %r11d,%edx +- roll $12,%edx +- movl %ebx,%r11d +- addl %eax,%edx +- xorl %eax,%r11d +- leal -1502002290(%rcx,%r10,1),%ecx +- andl %edx,%r11d +- xorl %ebx,%r11d +- movl 60(%rsi),%r10d +- addl %r11d,%ecx +- roll $17,%ecx +- movl %eax,%r11d +- addl %edx,%ecx +- xorl %edx,%r11d +- leal 1236535329(%rbx,%r10,1),%ebx +- andl %ecx,%r11d +- xorl %eax,%r11d +- movl 0(%rsi),%r10d +- addl %r11d,%ebx +- roll $22,%ebx +- movl %edx,%r11d +- addl %ecx,%ebx +- movl 4(%rsi),%r10d +- movl %edx,%r11d +- movl %edx,%r12d +- notl %r11d +- leal -165796510(%rax,%r10,1),%eax +- andl %ebx,%r12d +- andl %ecx,%r11d +- movl 24(%rsi),%r10d +- orl %r11d,%r12d +- movl %ecx,%r11d +- addl %r12d,%eax +- movl %ecx,%r12d +- roll $5,%eax +- addl %ebx,%eax +- notl %r11d +- leal -1069501632(%rdx,%r10,1),%edx +- andl %eax,%r12d +- andl %ebx,%r11d +- movl 44(%rsi),%r10d +- orl %r11d,%r12d +- movl %ebx,%r11d +- addl %r12d,%edx +- movl %ebx,%r12d +- roll $9,%edx +- addl %eax,%edx +- notl %r11d +- leal 643717713(%rcx,%r10,1),%ecx +- andl %edx,%r12d +- andl %eax,%r11d +- movl 0(%rsi),%r10d +- orl %r11d,%r12d +- movl %eax,%r11d +- addl %r12d,%ecx +- movl %eax,%r12d +- roll $14,%ecx +- addl %edx,%ecx +- notl %r11d +- leal -373897302(%rbx,%r10,1),%ebx +- andl %ecx,%r12d +- andl %edx,%r11d +- movl 20(%rsi),%r10d +- orl %r11d,%r12d +- movl %edx,%r11d +- addl %r12d,%ebx +- movl %edx,%r12d +- roll $20,%ebx +- addl %ecx,%ebx +- notl %r11d +- leal -701558691(%rax,%r10,1),%eax +- andl %ebx,%r12d +- andl %ecx,%r11d +- movl 40(%rsi),%r10d +- orl %r11d,%r12d +- movl %ecx,%r11d +- addl %r12d,%eax +- movl %ecx,%r12d +- roll $5,%eax +- addl %ebx,%eax +- notl %r11d +- leal 38016083(%rdx,%r10,1),%edx +- andl %eax,%r12d +- andl %ebx,%r11d +- movl 60(%rsi),%r10d +- orl %r11d,%r12d +- movl %ebx,%r11d +- addl %r12d,%edx +- movl %ebx,%r12d +- roll $9,%edx +- addl %eax,%edx +- notl %r11d +- leal -660478335(%rcx,%r10,1),%ecx +- andl %edx,%r12d +- andl %eax,%r11d +- movl 16(%rsi),%r10d +- orl %r11d,%r12d +- movl %eax,%r11d +- addl %r12d,%ecx +- movl %eax,%r12d +- roll $14,%ecx +- addl %edx,%ecx +- notl %r11d +- leal -405537848(%rbx,%r10,1),%ebx +- andl %ecx,%r12d +- andl %edx,%r11d +- movl 36(%rsi),%r10d +- orl %r11d,%r12d +- movl %edx,%r11d +- addl %r12d,%ebx +- movl %edx,%r12d +- roll $20,%ebx +- addl %ecx,%ebx +- notl %r11d +- leal 568446438(%rax,%r10,1),%eax +- andl %ebx,%r12d +- andl %ecx,%r11d +- movl 56(%rsi),%r10d +- orl %r11d,%r12d +- movl %ecx,%r11d +- addl %r12d,%eax +- movl %ecx,%r12d +- roll $5,%eax +- addl %ebx,%eax +- notl %r11d +- leal -1019803690(%rdx,%r10,1),%edx +- andl %eax,%r12d +- andl %ebx,%r11d +- movl 12(%rsi),%r10d +- orl %r11d,%r12d +- movl %ebx,%r11d +- addl %r12d,%edx +- movl %ebx,%r12d +- roll $9,%edx +- addl %eax,%edx +- notl %r11d +- leal -187363961(%rcx,%r10,1),%ecx +- andl %edx,%r12d +- andl %eax,%r11d +- movl 32(%rsi),%r10d +- orl %r11d,%r12d +- movl %eax,%r11d +- addl %r12d,%ecx +- movl %eax,%r12d +- roll $14,%ecx +- addl %edx,%ecx +- notl %r11d +- leal 1163531501(%rbx,%r10,1),%ebx +- andl %ecx,%r12d +- andl %edx,%r11d +- movl 52(%rsi),%r10d +- orl %r11d,%r12d +- movl %edx,%r11d +- addl %r12d,%ebx +- movl %edx,%r12d +- roll $20,%ebx +- addl %ecx,%ebx +- notl %r11d +- leal -1444681467(%rax,%r10,1),%eax +- andl %ebx,%r12d +- andl %ecx,%r11d +- movl 8(%rsi),%r10d +- orl %r11d,%r12d +- movl %ecx,%r11d +- addl %r12d,%eax +- movl %ecx,%r12d +- roll $5,%eax +- addl %ebx,%eax +- notl %r11d +- leal -51403784(%rdx,%r10,1),%edx +- andl %eax,%r12d +- andl %ebx,%r11d +- movl 28(%rsi),%r10d +- orl %r11d,%r12d +- movl %ebx,%r11d +- addl %r12d,%edx +- movl %ebx,%r12d +- roll $9,%edx +- addl %eax,%edx +- notl %r11d +- leal 1735328473(%rcx,%r10,1),%ecx +- andl %edx,%r12d +- andl %eax,%r11d +- movl 48(%rsi),%r10d +- orl %r11d,%r12d +- movl %eax,%r11d +- addl %r12d,%ecx +- movl %eax,%r12d +- roll $14,%ecx +- addl %edx,%ecx +- notl %r11d +- leal -1926607734(%rbx,%r10,1),%ebx +- andl %ecx,%r12d +- andl %edx,%r11d +- movl 0(%rsi),%r10d +- orl %r11d,%r12d +- movl %edx,%r11d +- addl %r12d,%ebx +- movl %edx,%r12d +- roll $20,%ebx +- addl %ecx,%ebx +- movl 20(%rsi),%r10d +- movl %ecx,%r11d +- leal -378558(%rax,%r10,1),%eax +- movl 32(%rsi),%r10d +- xorl %edx,%r11d +- xorl %ebx,%r11d +- addl %r11d,%eax +- roll $4,%eax +- movl %ebx,%r11d +- addl %ebx,%eax +- leal -2022574463(%rdx,%r10,1),%edx +- movl 44(%rsi),%r10d +- xorl %ecx,%r11d +- xorl %eax,%r11d +- addl %r11d,%edx +- roll $11,%edx +- movl %eax,%r11d +- addl %eax,%edx +- leal 1839030562(%rcx,%r10,1),%ecx +- movl 56(%rsi),%r10d +- xorl %ebx,%r11d +- xorl %edx,%r11d +- addl %r11d,%ecx +- roll $16,%ecx +- movl %edx,%r11d +- addl %edx,%ecx +- leal -35309556(%rbx,%r10,1),%ebx +- movl 4(%rsi),%r10d +- xorl %eax,%r11d +- xorl %ecx,%r11d +- addl %r11d,%ebx +- roll $23,%ebx +- movl %ecx,%r11d +- addl %ecx,%ebx +- leal -1530992060(%rax,%r10,1),%eax +- movl 16(%rsi),%r10d +- xorl %edx,%r11d +- xorl %ebx,%r11d +- addl %r11d,%eax +- roll $4,%eax +- movl %ebx,%r11d +- addl %ebx,%eax +- leal 1272893353(%rdx,%r10,1),%edx +- movl 28(%rsi),%r10d +- xorl %ecx,%r11d +- xorl %eax,%r11d +- addl %r11d,%edx +- roll $11,%edx +- movl %eax,%r11d +- addl %eax,%edx +- leal -155497632(%rcx,%r10,1),%ecx +- movl 40(%rsi),%r10d +- xorl %ebx,%r11d +- xorl %edx,%r11d +- addl %r11d,%ecx +- roll $16,%ecx +- movl %edx,%r11d +- addl %edx,%ecx +- leal -1094730640(%rbx,%r10,1),%ebx +- movl 52(%rsi),%r10d +- xorl %eax,%r11d +- xorl %ecx,%r11d +- addl %r11d,%ebx +- roll $23,%ebx +- movl %ecx,%r11d +- addl %ecx,%ebx +- leal 681279174(%rax,%r10,1),%eax +- movl 0(%rsi),%r10d +- xorl %edx,%r11d +- xorl %ebx,%r11d +- addl %r11d,%eax +- roll $4,%eax +- movl %ebx,%r11d +- addl %ebx,%eax +- leal -358537222(%rdx,%r10,1),%edx +- movl 12(%rsi),%r10d +- xorl %ecx,%r11d +- xorl %eax,%r11d +- addl %r11d,%edx +- roll $11,%edx +- movl %eax,%r11d +- addl %eax,%edx +- leal -722521979(%rcx,%r10,1),%ecx +- movl 24(%rsi),%r10d +- xorl %ebx,%r11d +- xorl %edx,%r11d +- addl %r11d,%ecx +- roll $16,%ecx +- movl %edx,%r11d +- addl %edx,%ecx +- leal 76029189(%rbx,%r10,1),%ebx +- movl 36(%rsi),%r10d +- xorl %eax,%r11d +- xorl %ecx,%r11d +- addl %r11d,%ebx +- roll $23,%ebx +- movl %ecx,%r11d +- addl %ecx,%ebx +- leal -640364487(%rax,%r10,1),%eax +- movl 48(%rsi),%r10d +- xorl %edx,%r11d +- xorl %ebx,%r11d +- addl %r11d,%eax +- roll $4,%eax +- movl %ebx,%r11d +- addl %ebx,%eax +- leal -421815835(%rdx,%r10,1),%edx +- movl 60(%rsi),%r10d +- xorl %ecx,%r11d +- xorl %eax,%r11d +- addl %r11d,%edx +- roll $11,%edx +- movl %eax,%r11d +- addl %eax,%edx +- leal 530742520(%rcx,%r10,1),%ecx +- movl 8(%rsi),%r10d +- xorl %ebx,%r11d +- xorl %edx,%r11d +- addl %r11d,%ecx +- roll $16,%ecx +- movl %edx,%r11d +- addl %edx,%ecx +- leal -995338651(%rbx,%r10,1),%ebx +- movl 0(%rsi),%r10d +- xorl %eax,%r11d +- xorl %ecx,%r11d +- addl %r11d,%ebx +- roll $23,%ebx +- movl %ecx,%r11d +- addl %ecx,%ebx +- movl 0(%rsi),%r10d +- movl $0xffffffff,%r11d +- xorl %edx,%r11d +- leal -198630844(%rax,%r10,1),%eax +- orl %ebx,%r11d +- xorl %ecx,%r11d +- addl %r11d,%eax +- movl 28(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $6,%eax +- xorl %ecx,%r11d +- addl %ebx,%eax +- leal 1126891415(%rdx,%r10,1),%edx +- orl %eax,%r11d +- xorl %ebx,%r11d +- addl %r11d,%edx +- movl 56(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $10,%edx +- xorl %ebx,%r11d +- addl %eax,%edx +- leal -1416354905(%rcx,%r10,1),%ecx +- orl %edx,%r11d +- xorl %eax,%r11d +- addl %r11d,%ecx +- movl 20(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $15,%ecx +- xorl %eax,%r11d +- addl %edx,%ecx +- leal -57434055(%rbx,%r10,1),%ebx +- orl %ecx,%r11d +- xorl %edx,%r11d +- addl %r11d,%ebx +- movl 48(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $21,%ebx +- xorl %edx,%r11d +- addl %ecx,%ebx +- leal 1700485571(%rax,%r10,1),%eax +- orl %ebx,%r11d +- xorl %ecx,%r11d +- addl %r11d,%eax +- movl 12(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $6,%eax +- xorl %ecx,%r11d +- addl %ebx,%eax +- leal -1894986606(%rdx,%r10,1),%edx +- orl %eax,%r11d +- xorl %ebx,%r11d +- addl %r11d,%edx +- movl 40(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $10,%edx +- xorl %ebx,%r11d +- addl %eax,%edx +- leal -1051523(%rcx,%r10,1),%ecx +- orl %edx,%r11d +- xorl %eax,%r11d +- addl %r11d,%ecx +- movl 4(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $15,%ecx +- xorl %eax,%r11d +- addl %edx,%ecx +- leal -2054922799(%rbx,%r10,1),%ebx +- orl %ecx,%r11d +- xorl %edx,%r11d +- addl %r11d,%ebx +- movl 32(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $21,%ebx +- xorl %edx,%r11d +- addl %ecx,%ebx +- leal 1873313359(%rax,%r10,1),%eax +- orl %ebx,%r11d +- xorl %ecx,%r11d +- addl %r11d,%eax +- movl 60(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $6,%eax +- xorl %ecx,%r11d +- addl %ebx,%eax +- leal -30611744(%rdx,%r10,1),%edx +- orl %eax,%r11d +- xorl %ebx,%r11d +- addl %r11d,%edx +- movl 24(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $10,%edx +- xorl %ebx,%r11d +- addl %eax,%edx +- leal -1560198380(%rcx,%r10,1),%ecx +- orl %edx,%r11d +- xorl %eax,%r11d +- addl %r11d,%ecx +- movl 52(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $15,%ecx +- xorl %eax,%r11d +- addl %edx,%ecx +- leal 1309151649(%rbx,%r10,1),%ebx +- orl %ecx,%r11d +- xorl %edx,%r11d +- addl %r11d,%ebx +- movl 16(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $21,%ebx +- xorl %edx,%r11d +- addl %ecx,%ebx +- leal -145523070(%rax,%r10,1),%eax +- orl %ebx,%r11d +- xorl %ecx,%r11d +- addl %r11d,%eax +- movl 44(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $6,%eax +- xorl %ecx,%r11d +- addl %ebx,%eax +- leal -1120210379(%rdx,%r10,1),%edx +- orl %eax,%r11d +- xorl %ebx,%r11d +- addl %r11d,%edx +- movl 8(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $10,%edx +- xorl %ebx,%r11d +- addl %eax,%edx +- leal 718787259(%rcx,%r10,1),%ecx +- orl %edx,%r11d +- xorl %eax,%r11d +- addl %r11d,%ecx +- movl 36(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $15,%ecx +- xorl %eax,%r11d +- addl %edx,%ecx +- leal -343485551(%rbx,%r10,1),%ebx +- orl %ecx,%r11d +- xorl %edx,%r11d +- addl %r11d,%ebx +- movl 0(%rsi),%r10d +- movl $0xffffffff,%r11d +- roll $21,%ebx +- xorl %edx,%r11d +- addl %ecx,%ebx +- +- addl %r8d,%eax +- addl %r9d,%ebx +- addl %r14d,%ecx +- addl %r15d,%edx +- +- +- addq $64,%rsi +- cmpq %rdi,%rsi +- jb L$loop +- +- +-L$end: +- movl %eax,0(%rbp) +- movl %ebx,4(%rbp) +- movl %ecx,8(%rbp) +- movl %edx,12(%rbp) +- +- movq (%rsp),%r15 +- +- movq 8(%rsp),%r14 +- +- movq 16(%rsp),%r12 +- +- movq 24(%rsp),%rbx +- +- movq 32(%rsp),%rbp +- +- addq $40,%rsp +- +-L$epilogue: +- .byte 0xf3,0xc3 +- +- +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S b/mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S +deleted file mode 100644 +index 36057aa..0000000 +--- a/mac-x86_64/crypto/fipsmodule/p256-x86_64-asm.S ++++ /dev/null +@@ -1,4467 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +- +-.p2align 6 +-L$poly: +-.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 +- +-L$One: +-.long 1,1,1,1,1,1,1,1 +-L$Two: +-.long 2,2,2,2,2,2,2,2 +-L$Three: +-.long 3,3,3,3,3,3,3,3 +-L$ONE_mont: +-.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe +- +- +-L$ord: +-.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 +-L$ordK: +-.quad 0xccd1c8aaee00bc4f +- +- +- +-.globl _ecp_nistz256_neg +-.private_extern _ecp_nistz256_neg +- +-.p2align 5 +-_ecp_nistz256_neg: +- +- pushq %r12 +- +- pushq %r13 +- +-L$neg_body: +- +- xorq %r8,%r8 +- xorq %r9,%r9 +- xorq %r10,%r10 +- xorq %r11,%r11 +- xorq %r13,%r13 +- +- subq 0(%rsi),%r8 +- sbbq 8(%rsi),%r9 +- sbbq 16(%rsi),%r10 +- movq %r8,%rax +- sbbq 24(%rsi),%r11 +- leaq L$poly(%rip),%rsi +- movq %r9,%rdx +- sbbq $0,%r13 +- +- addq 0(%rsi),%r8 +- movq %r10,%rcx +- adcq 8(%rsi),%r9 +- adcq 16(%rsi),%r10 +- movq %r11,%r12 +- adcq 24(%rsi),%r11 +- testq %r13,%r13 +- +- cmovzq %rax,%r8 +- cmovzq %rdx,%r9 +- movq %r8,0(%rdi) +- cmovzq %rcx,%r10 +- movq %r9,8(%rdi) +- cmovzq %r12,%r11 +- movq %r10,16(%rdi) +- movq %r11,24(%rdi) +- +- movq 0(%rsp),%r13 +- +- movq 8(%rsp),%r12 +- +- leaq 16(%rsp),%rsp +- +-L$neg_epilogue: +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +-.globl _ecp_nistz256_ord_mul_mont +-.private_extern _ecp_nistz256_ord_mul_mont +- +-.p2align 5 +-_ecp_nistz256_ord_mul_mont: +- +- leaq _OPENSSL_ia32cap_P(%rip),%rcx +- movq 8(%rcx),%rcx +- andl $0x80100,%ecx +- cmpl $0x80100,%ecx +- je L$ecp_nistz256_ord_mul_montx +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$ord_mul_body: +- +- movq 0(%rdx),%rax +- movq %rdx,%rbx +- leaq L$ord(%rip),%r14 +- movq L$ordK(%rip),%r15 +- +- +- movq %rax,%rcx +- mulq 0(%rsi) +- movq %rax,%r8 +- movq %rcx,%rax +- movq %rdx,%r9 +- +- mulq 8(%rsi) +- addq %rax,%r9 +- movq %rcx,%rax +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq 16(%rsi) +- addq %rax,%r10 +- movq %rcx,%rax +- adcq $0,%rdx +- +- movq %r8,%r13 +- imulq %r15,%r8 +- +- movq %rdx,%r11 +- mulq 24(%rsi) +- addq %rax,%r11 +- movq %r8,%rax +- adcq $0,%rdx +- movq %rdx,%r12 +- +- +- mulq 0(%r14) +- movq %r8,%rbp +- addq %rax,%r13 +- movq %r8,%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- subq %r8,%r10 +- sbbq $0,%r8 +- +- mulq 8(%r14) +- addq %rcx,%r9 +- adcq $0,%rdx +- addq %rax,%r9 +- movq %rbp,%rax +- adcq %rdx,%r10 +- movq %rbp,%rdx +- adcq $0,%r8 +- +- shlq $32,%rax +- shrq $32,%rdx +- subq %rax,%r11 +- movq 8(%rbx),%rax +- sbbq %rdx,%rbp +- +- addq %r8,%r11 +- adcq %rbp,%r12 +- adcq $0,%r13 +- +- +- movq %rax,%rcx +- mulq 0(%rsi) +- addq %rax,%r9 +- movq %rcx,%rax +- adcq $0,%rdx +- movq %rdx,%rbp +- +- mulq 8(%rsi) +- addq %rbp,%r10 +- adcq $0,%rdx +- addq %rax,%r10 +- movq %rcx,%rax +- adcq $0,%rdx +- movq %rdx,%rbp +- +- mulq 16(%rsi) +- addq %rbp,%r11 +- adcq $0,%rdx +- addq %rax,%r11 +- movq %rcx,%rax +- adcq $0,%rdx +- +- movq %r9,%rcx +- imulq %r15,%r9 +- +- movq %rdx,%rbp +- mulq 24(%rsi) +- addq %rbp,%r12 +- adcq $0,%rdx +- xorq %r8,%r8 +- addq %rax,%r12 +- movq %r9,%rax +- adcq %rdx,%r13 +- adcq $0,%r8 +- +- +- mulq 0(%r14) +- movq %r9,%rbp +- addq %rax,%rcx +- movq %r9,%rax +- adcq %rdx,%rcx +- +- subq %r9,%r11 +- sbbq $0,%r9 +- +- mulq 8(%r14) +- addq %rcx,%r10 +- adcq $0,%rdx +- addq %rax,%r10 +- movq %rbp,%rax +- adcq %rdx,%r11 +- movq %rbp,%rdx +- adcq $0,%r9 +- +- shlq $32,%rax +- shrq $32,%rdx +- subq %rax,%r12 +- movq 16(%rbx),%rax +- sbbq %rdx,%rbp +- +- addq %r9,%r12 +- adcq %rbp,%r13 +- adcq $0,%r8 +- +- +- movq %rax,%rcx +- mulq 0(%rsi) +- addq %rax,%r10 +- movq %rcx,%rax +- adcq $0,%rdx +- movq %rdx,%rbp +- +- mulq 8(%rsi) +- addq %rbp,%r11 +- adcq $0,%rdx +- addq %rax,%r11 +- movq %rcx,%rax +- adcq $0,%rdx +- movq %rdx,%rbp +- +- mulq 16(%rsi) +- addq %rbp,%r12 +- adcq $0,%rdx +- addq %rax,%r12 +- movq %rcx,%rax +- adcq $0,%rdx +- +- movq %r10,%rcx +- imulq %r15,%r10 +- +- movq %rdx,%rbp +- mulq 24(%rsi) +- addq %rbp,%r13 +- adcq $0,%rdx +- xorq %r9,%r9 +- addq %rax,%r13 +- movq %r10,%rax +- adcq %rdx,%r8 +- adcq $0,%r9 +- +- +- mulq 0(%r14) +- movq %r10,%rbp +- addq %rax,%rcx +- movq %r10,%rax +- adcq %rdx,%rcx +- +- subq %r10,%r12 +- sbbq $0,%r10 +- +- mulq 8(%r14) +- addq %rcx,%r11 +- adcq $0,%rdx +- addq %rax,%r11 +- movq %rbp,%rax +- adcq %rdx,%r12 +- movq %rbp,%rdx +- adcq $0,%r10 +- +- shlq $32,%rax +- shrq $32,%rdx +- subq %rax,%r13 +- movq 24(%rbx),%rax +- sbbq %rdx,%rbp +- +- addq %r10,%r13 +- adcq %rbp,%r8 +- adcq $0,%r9 +- +- +- movq %rax,%rcx +- mulq 0(%rsi) +- addq %rax,%r11 +- movq %rcx,%rax +- adcq $0,%rdx +- movq %rdx,%rbp +- +- mulq 8(%rsi) +- addq %rbp,%r12 +- adcq $0,%rdx +- addq %rax,%r12 +- movq %rcx,%rax +- adcq $0,%rdx +- movq %rdx,%rbp +- +- mulq 16(%rsi) +- addq %rbp,%r13 +- adcq $0,%rdx +- addq %rax,%r13 +- movq %rcx,%rax +- adcq $0,%rdx +- +- movq %r11,%rcx +- imulq %r15,%r11 +- +- movq %rdx,%rbp +- mulq 24(%rsi) +- addq %rbp,%r8 +- adcq $0,%rdx +- xorq %r10,%r10 +- addq %rax,%r8 +- movq %r11,%rax +- adcq %rdx,%r9 +- adcq $0,%r10 +- +- +- mulq 0(%r14) +- movq %r11,%rbp +- addq %rax,%rcx +- movq %r11,%rax +- adcq %rdx,%rcx +- +- subq %r11,%r13 +- sbbq $0,%r11 +- +- mulq 8(%r14) +- addq %rcx,%r12 +- adcq $0,%rdx +- addq %rax,%r12 +- movq %rbp,%rax +- adcq %rdx,%r13 +- movq %rbp,%rdx +- adcq $0,%r11 +- +- shlq $32,%rax +- shrq $32,%rdx +- subq %rax,%r8 +- sbbq %rdx,%rbp +- +- addq %r11,%r8 +- adcq %rbp,%r9 +- adcq $0,%r10 +- +- +- movq %r12,%rsi +- subq 0(%r14),%r12 +- movq %r13,%r11 +- sbbq 8(%r14),%r13 +- movq %r8,%rcx +- sbbq 16(%r14),%r8 +- movq %r9,%rbp +- sbbq 24(%r14),%r9 +- sbbq $0,%r10 +- +- cmovcq %rsi,%r12 +- cmovcq %r11,%r13 +- cmovcq %rcx,%r8 +- cmovcq %rbp,%r9 +- +- movq %r12,0(%rdi) +- movq %r13,8(%rdi) +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- +- movq 0(%rsp),%r15 +- +- movq 8(%rsp),%r14 +- +- movq 16(%rsp),%r13 +- +- movq 24(%rsp),%r12 +- +- movq 32(%rsp),%rbx +- +- movq 40(%rsp),%rbp +- +- leaq 48(%rsp),%rsp +- +-L$ord_mul_epilogue: +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +- +-.globl _ecp_nistz256_ord_sqr_mont +-.private_extern _ecp_nistz256_ord_sqr_mont +- +-.p2align 5 +-_ecp_nistz256_ord_sqr_mont: +- +- leaq _OPENSSL_ia32cap_P(%rip),%rcx +- movq 8(%rcx),%rcx +- andl $0x80100,%ecx +- cmpl $0x80100,%ecx +- je L$ecp_nistz256_ord_sqr_montx +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$ord_sqr_body: +- +- movq 0(%rsi),%r8 +- movq 8(%rsi),%rax +- movq 16(%rsi),%r14 +- movq 24(%rsi),%r15 +- leaq L$ord(%rip),%rsi +- movq %rdx,%rbx +- jmp L$oop_ord_sqr +- +-.p2align 5 +-L$oop_ord_sqr: +- +- movq %rax,%rbp +- mulq %r8 +- movq %rax,%r9 +-.byte 102,72,15,110,205 +- movq %r14,%rax +- movq %rdx,%r10 +- +- mulq %r8 +- addq %rax,%r10 +- movq %r15,%rax +-.byte 102,73,15,110,214 +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %r8 +- addq %rax,%r11 +- movq %r15,%rax +-.byte 102,73,15,110,223 +- adcq $0,%rdx +- movq %rdx,%r12 +- +- +- mulq %r14 +- movq %rax,%r13 +- movq %r14,%rax +- movq %rdx,%r14 +- +- +- mulq %rbp +- addq %rax,%r11 +- movq %r15,%rax +- adcq $0,%rdx +- movq %rdx,%r15 +- +- mulq %rbp +- addq %rax,%r12 +- adcq $0,%rdx +- +- addq %r15,%r12 +- adcq %rdx,%r13 +- adcq $0,%r14 +- +- +- xorq %r15,%r15 +- movq %r8,%rax +- addq %r9,%r9 +- adcq %r10,%r10 +- adcq %r11,%r11 +- adcq %r12,%r12 +- adcq %r13,%r13 +- adcq %r14,%r14 +- adcq $0,%r15 +- +- +- mulq %rax +- movq %rax,%r8 +-.byte 102,72,15,126,200 +- movq %rdx,%rbp +- +- mulq %rax +- addq %rbp,%r9 +- adcq %rax,%r10 +-.byte 102,72,15,126,208 +- adcq $0,%rdx +- movq %rdx,%rbp +- +- mulq %rax +- addq %rbp,%r11 +- adcq %rax,%r12 +-.byte 102,72,15,126,216 +- adcq $0,%rdx +- movq %rdx,%rbp +- +- movq %r8,%rcx +- imulq 32(%rsi),%r8 +- +- mulq %rax +- addq %rbp,%r13 +- adcq %rax,%r14 +- movq 0(%rsi),%rax +- adcq %rdx,%r15 +- +- +- mulq %r8 +- movq %r8,%rbp +- addq %rax,%rcx +- movq 8(%rsi),%rax +- adcq %rdx,%rcx +- +- subq %r8,%r10 +- sbbq $0,%rbp +- +- mulq %r8 +- addq %rcx,%r9 +- adcq $0,%rdx +- addq %rax,%r9 +- movq %r8,%rax +- adcq %rdx,%r10 +- movq %r8,%rdx +- adcq $0,%rbp +- +- movq %r9,%rcx +- imulq 32(%rsi),%r9 +- +- shlq $32,%rax +- shrq $32,%rdx +- subq %rax,%r11 +- movq 0(%rsi),%rax +- sbbq %rdx,%r8 +- +- addq %rbp,%r11 +- adcq $0,%r8 +- +- +- mulq %r9 +- movq %r9,%rbp +- addq %rax,%rcx +- movq 8(%rsi),%rax +- adcq %rdx,%rcx +- +- subq %r9,%r11 +- sbbq $0,%rbp +- +- mulq %r9 +- addq %rcx,%r10 +- adcq $0,%rdx +- addq %rax,%r10 +- movq %r9,%rax +- adcq %rdx,%r11 +- movq %r9,%rdx +- adcq $0,%rbp +- +- movq %r10,%rcx +- imulq 32(%rsi),%r10 +- +- shlq $32,%rax +- shrq $32,%rdx +- subq %rax,%r8 +- movq 0(%rsi),%rax +- sbbq %rdx,%r9 +- +- addq %rbp,%r8 +- adcq $0,%r9 +- +- +- mulq %r10 +- movq %r10,%rbp +- addq %rax,%rcx +- movq 8(%rsi),%rax +- adcq %rdx,%rcx +- +- subq %r10,%r8 +- sbbq $0,%rbp +- +- mulq %r10 +- addq %rcx,%r11 +- adcq $0,%rdx +- addq %rax,%r11 +- movq %r10,%rax +- adcq %rdx,%r8 +- movq %r10,%rdx +- adcq $0,%rbp +- +- movq %r11,%rcx +- imulq 32(%rsi),%r11 +- +- shlq $32,%rax +- shrq $32,%rdx +- subq %rax,%r9 +- movq 0(%rsi),%rax +- sbbq %rdx,%r10 +- +- addq %rbp,%r9 +- adcq $0,%r10 +- +- +- mulq %r11 +- movq %r11,%rbp +- addq %rax,%rcx +- movq 8(%rsi),%rax +- adcq %rdx,%rcx +- +- subq %r11,%r9 +- sbbq $0,%rbp +- +- mulq %r11 +- addq %rcx,%r8 +- adcq $0,%rdx +- addq %rax,%r8 +- movq %r11,%rax +- adcq %rdx,%r9 +- movq %r11,%rdx +- adcq $0,%rbp +- +- shlq $32,%rax +- shrq $32,%rdx +- subq %rax,%r10 +- sbbq %rdx,%r11 +- +- addq %rbp,%r10 +- adcq $0,%r11 +- +- +- xorq %rdx,%rdx +- addq %r12,%r8 +- adcq %r13,%r9 +- movq %r8,%r12 +- adcq %r14,%r10 +- adcq %r15,%r11 +- movq %r9,%rax +- adcq $0,%rdx +- +- +- subq 0(%rsi),%r8 +- movq %r10,%r14 +- sbbq 8(%rsi),%r9 +- sbbq 16(%rsi),%r10 +- movq %r11,%r15 +- sbbq 24(%rsi),%r11 +- sbbq $0,%rdx +- +- cmovcq %r12,%r8 +- cmovncq %r9,%rax +- cmovncq %r10,%r14 +- cmovncq %r11,%r15 +- +- decq %rbx +- jnz L$oop_ord_sqr +- +- movq %r8,0(%rdi) +- movq %rax,8(%rdi) +- pxor %xmm1,%xmm1 +- movq %r14,16(%rdi) +- pxor %xmm2,%xmm2 +- movq %r15,24(%rdi) +- pxor %xmm3,%xmm3 +- +- movq 0(%rsp),%r15 +- +- movq 8(%rsp),%r14 +- +- movq 16(%rsp),%r13 +- +- movq 24(%rsp),%r12 +- +- movq 32(%rsp),%rbx +- +- movq 40(%rsp),%rbp +- +- leaq 48(%rsp),%rsp +- +-L$ord_sqr_epilogue: +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-ecp_nistz256_ord_mul_montx: +- +-L$ecp_nistz256_ord_mul_montx: +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$ord_mulx_body: +- +- movq %rdx,%rbx +- movq 0(%rdx),%rdx +- movq 0(%rsi),%r9 +- movq 8(%rsi),%r10 +- movq 16(%rsi),%r11 +- movq 24(%rsi),%r12 +- leaq -128(%rsi),%rsi +- leaq L$ord-128(%rip),%r14 +- movq L$ordK(%rip),%r15 +- +- +- mulxq %r9,%r8,%r9 +- mulxq %r10,%rcx,%r10 +- mulxq %r11,%rbp,%r11 +- addq %rcx,%r9 +- mulxq %r12,%rcx,%r12 +- movq %r8,%rdx +- mulxq %r15,%rdx,%rax +- adcq %rbp,%r10 +- adcq %rcx,%r11 +- adcq $0,%r12 +- +- +- xorq %r13,%r13 +- mulxq 0+128(%r14),%rcx,%rbp +- adcxq %rcx,%r8 +- adoxq %rbp,%r9 +- +- mulxq 8+128(%r14),%rcx,%rbp +- adcxq %rcx,%r9 +- adoxq %rbp,%r10 +- +- mulxq 16+128(%r14),%rcx,%rbp +- adcxq %rcx,%r10 +- adoxq %rbp,%r11 +- +- mulxq 24+128(%r14),%rcx,%rbp +- movq 8(%rbx),%rdx +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- adcxq %r8,%r12 +- adoxq %r8,%r13 +- adcq $0,%r13 +- +- +- mulxq 0+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r9 +- adoxq %rbp,%r10 +- +- mulxq 8+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r10 +- adoxq %rbp,%r11 +- +- mulxq 16+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- +- mulxq 24+128(%rsi),%rcx,%rbp +- movq %r9,%rdx +- mulxq %r15,%rdx,%rax +- adcxq %rcx,%r12 +- adoxq %rbp,%r13 +- +- adcxq %r8,%r13 +- adoxq %r8,%r8 +- adcq $0,%r8 +- +- +- mulxq 0+128(%r14),%rcx,%rbp +- adcxq %rcx,%r9 +- adoxq %rbp,%r10 +- +- mulxq 8+128(%r14),%rcx,%rbp +- adcxq %rcx,%r10 +- adoxq %rbp,%r11 +- +- mulxq 16+128(%r14),%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- +- mulxq 24+128(%r14),%rcx,%rbp +- movq 16(%rbx),%rdx +- adcxq %rcx,%r12 +- adoxq %rbp,%r13 +- adcxq %r9,%r13 +- adoxq %r9,%r8 +- adcq $0,%r8 +- +- +- mulxq 0+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r10 +- adoxq %rbp,%r11 +- +- mulxq 8+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- +- mulxq 16+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r12 +- adoxq %rbp,%r13 +- +- mulxq 24+128(%rsi),%rcx,%rbp +- movq %r10,%rdx +- mulxq %r15,%rdx,%rax +- adcxq %rcx,%r13 +- adoxq %rbp,%r8 +- +- adcxq %r9,%r8 +- adoxq %r9,%r9 +- adcq $0,%r9 +- +- +- mulxq 0+128(%r14),%rcx,%rbp +- adcxq %rcx,%r10 +- adoxq %rbp,%r11 +- +- mulxq 8+128(%r14),%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- +- mulxq 16+128(%r14),%rcx,%rbp +- adcxq %rcx,%r12 +- adoxq %rbp,%r13 +- +- mulxq 24+128(%r14),%rcx,%rbp +- movq 24(%rbx),%rdx +- adcxq %rcx,%r13 +- adoxq %rbp,%r8 +- adcxq %r10,%r8 +- adoxq %r10,%r9 +- adcq $0,%r9 +- +- +- mulxq 0+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- +- mulxq 8+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r12 +- adoxq %rbp,%r13 +- +- mulxq 16+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r13 +- adoxq %rbp,%r8 +- +- mulxq 24+128(%rsi),%rcx,%rbp +- movq %r11,%rdx +- mulxq %r15,%rdx,%rax +- adcxq %rcx,%r8 +- adoxq %rbp,%r9 +- +- adcxq %r10,%r9 +- adoxq %r10,%r10 +- adcq $0,%r10 +- +- +- mulxq 0+128(%r14),%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- +- mulxq 8+128(%r14),%rcx,%rbp +- adcxq %rcx,%r12 +- adoxq %rbp,%r13 +- +- mulxq 16+128(%r14),%rcx,%rbp +- adcxq %rcx,%r13 +- adoxq %rbp,%r8 +- +- mulxq 24+128(%r14),%rcx,%rbp +- leaq 128(%r14),%r14 +- movq %r12,%rbx +- adcxq %rcx,%r8 +- adoxq %rbp,%r9 +- movq %r13,%rdx +- adcxq %r11,%r9 +- adoxq %r11,%r10 +- adcq $0,%r10 +- +- +- +- movq %r8,%rcx +- subq 0(%r14),%r12 +- sbbq 8(%r14),%r13 +- sbbq 16(%r14),%r8 +- movq %r9,%rbp +- sbbq 24(%r14),%r9 +- sbbq $0,%r10 +- +- cmovcq %rbx,%r12 +- cmovcq %rdx,%r13 +- cmovcq %rcx,%r8 +- cmovcq %rbp,%r9 +- +- movq %r12,0(%rdi) +- movq %r13,8(%rdi) +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- +- movq 0(%rsp),%r15 +- +- movq 8(%rsp),%r14 +- +- movq 16(%rsp),%r13 +- +- movq 24(%rsp),%r12 +- +- movq 32(%rsp),%rbx +- +- movq 40(%rsp),%rbp +- +- leaq 48(%rsp),%rsp +- +-L$ord_mulx_epilogue: +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-ecp_nistz256_ord_sqr_montx: +- +-L$ecp_nistz256_ord_sqr_montx: +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$ord_sqrx_body: +- +- movq %rdx,%rbx +- movq 0(%rsi),%rdx +- movq 8(%rsi),%r14 +- movq 16(%rsi),%r15 +- movq 24(%rsi),%r8 +- leaq L$ord(%rip),%rsi +- jmp L$oop_ord_sqrx +- +-.p2align 5 +-L$oop_ord_sqrx: +- mulxq %r14,%r9,%r10 +- mulxq %r15,%rcx,%r11 +- movq %rdx,%rax +-.byte 102,73,15,110,206 +- mulxq %r8,%rbp,%r12 +- movq %r14,%rdx +- addq %rcx,%r10 +-.byte 102,73,15,110,215 +- adcq %rbp,%r11 +- adcq $0,%r12 +- xorq %r13,%r13 +- +- mulxq %r15,%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- +- mulxq %r8,%rcx,%rbp +- movq %r15,%rdx +- adcxq %rcx,%r12 +- adoxq %rbp,%r13 +- adcq $0,%r13 +- +- mulxq %r8,%rcx,%r14 +- movq %rax,%rdx +-.byte 102,73,15,110,216 +- xorq %r15,%r15 +- adcxq %r9,%r9 +- adoxq %rcx,%r13 +- adcxq %r10,%r10 +- adoxq %r15,%r14 +- +- +- mulxq %rdx,%r8,%rbp +-.byte 102,72,15,126,202 +- adcxq %r11,%r11 +- adoxq %rbp,%r9 +- adcxq %r12,%r12 +- mulxq %rdx,%rcx,%rax +-.byte 102,72,15,126,210 +- adcxq %r13,%r13 +- adoxq %rcx,%r10 +- adcxq %r14,%r14 +- mulxq %rdx,%rcx,%rbp +-.byte 0x67 +-.byte 102,72,15,126,218 +- adoxq %rax,%r11 +- adcxq %r15,%r15 +- adoxq %rcx,%r12 +- adoxq %rbp,%r13 +- mulxq %rdx,%rcx,%rax +- adoxq %rcx,%r14 +- adoxq %rax,%r15 +- +- +- movq %r8,%rdx +- mulxq 32(%rsi),%rdx,%rcx +- +- xorq %rax,%rax +- mulxq 0(%rsi),%rcx,%rbp +- adcxq %rcx,%r8 +- adoxq %rbp,%r9 +- mulxq 8(%rsi),%rcx,%rbp +- adcxq %rcx,%r9 +- adoxq %rbp,%r10 +- mulxq 16(%rsi),%rcx,%rbp +- adcxq %rcx,%r10 +- adoxq %rbp,%r11 +- mulxq 24(%rsi),%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r8 +- adcxq %rax,%r8 +- +- +- movq %r9,%rdx +- mulxq 32(%rsi),%rdx,%rcx +- +- mulxq 0(%rsi),%rcx,%rbp +- adoxq %rcx,%r9 +- adcxq %rbp,%r10 +- mulxq 8(%rsi),%rcx,%rbp +- adoxq %rcx,%r10 +- adcxq %rbp,%r11 +- mulxq 16(%rsi),%rcx,%rbp +- adoxq %rcx,%r11 +- adcxq %rbp,%r8 +- mulxq 24(%rsi),%rcx,%rbp +- adoxq %rcx,%r8 +- adcxq %rbp,%r9 +- adoxq %rax,%r9 +- +- +- movq %r10,%rdx +- mulxq 32(%rsi),%rdx,%rcx +- +- mulxq 0(%rsi),%rcx,%rbp +- adcxq %rcx,%r10 +- adoxq %rbp,%r11 +- mulxq 8(%rsi),%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r8 +- mulxq 16(%rsi),%rcx,%rbp +- adcxq %rcx,%r8 +- adoxq %rbp,%r9 +- mulxq 24(%rsi),%rcx,%rbp +- adcxq %rcx,%r9 +- adoxq %rbp,%r10 +- adcxq %rax,%r10 +- +- +- movq %r11,%rdx +- mulxq 32(%rsi),%rdx,%rcx +- +- mulxq 0(%rsi),%rcx,%rbp +- adoxq %rcx,%r11 +- adcxq %rbp,%r8 +- mulxq 8(%rsi),%rcx,%rbp +- adoxq %rcx,%r8 +- adcxq %rbp,%r9 +- mulxq 16(%rsi),%rcx,%rbp +- adoxq %rcx,%r9 +- adcxq %rbp,%r10 +- mulxq 24(%rsi),%rcx,%rbp +- adoxq %rcx,%r10 +- adcxq %rbp,%r11 +- adoxq %rax,%r11 +- +- +- addq %r8,%r12 +- adcq %r13,%r9 +- movq %r12,%rdx +- adcq %r14,%r10 +- adcq %r15,%r11 +- movq %r9,%r14 +- adcq $0,%rax +- +- +- subq 0(%rsi),%r12 +- movq %r10,%r15 +- sbbq 8(%rsi),%r9 +- sbbq 16(%rsi),%r10 +- movq %r11,%r8 +- sbbq 24(%rsi),%r11 +- sbbq $0,%rax +- +- cmovncq %r12,%rdx +- cmovncq %r9,%r14 +- cmovncq %r10,%r15 +- cmovncq %r11,%r8 +- +- decq %rbx +- jnz L$oop_ord_sqrx +- +- movq %rdx,0(%rdi) +- movq %r14,8(%rdi) +- pxor %xmm1,%xmm1 +- movq %r15,16(%rdi) +- pxor %xmm2,%xmm2 +- movq %r8,24(%rdi) +- pxor %xmm3,%xmm3 +- +- movq 0(%rsp),%r15 +- +- movq 8(%rsp),%r14 +- +- movq 16(%rsp),%r13 +- +- movq 24(%rsp),%r12 +- +- movq 32(%rsp),%rbx +- +- movq 40(%rsp),%rbp +- +- leaq 48(%rsp),%rsp +- +-L$ord_sqrx_epilogue: +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +-.globl _ecp_nistz256_mul_mont +-.private_extern _ecp_nistz256_mul_mont +- +-.p2align 5 +-_ecp_nistz256_mul_mont: +- +- leaq _OPENSSL_ia32cap_P(%rip),%rcx +- movq 8(%rcx),%rcx +- andl $0x80100,%ecx +-L$mul_mont: +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$mul_body: +- cmpl $0x80100,%ecx +- je L$mul_montx +- movq %rdx,%rbx +- movq 0(%rdx),%rax +- movq 0(%rsi),%r9 +- movq 8(%rsi),%r10 +- movq 16(%rsi),%r11 +- movq 24(%rsi),%r12 +- +- call __ecp_nistz256_mul_montq +- jmp L$mul_mont_done +- +-.p2align 5 +-L$mul_montx: +- movq %rdx,%rbx +- movq 0(%rdx),%rdx +- movq 0(%rsi),%r9 +- movq 8(%rsi),%r10 +- movq 16(%rsi),%r11 +- movq 24(%rsi),%r12 +- leaq -128(%rsi),%rsi +- +- call __ecp_nistz256_mul_montx +-L$mul_mont_done: +- movq 0(%rsp),%r15 +- +- movq 8(%rsp),%r14 +- +- movq 16(%rsp),%r13 +- +- movq 24(%rsp),%r12 +- +- movq 32(%rsp),%rbx +- +- movq 40(%rsp),%rbp +- +- leaq 48(%rsp),%rsp +- +-L$mul_epilogue: +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-__ecp_nistz256_mul_montq: +- +- +- +- movq %rax,%rbp +- mulq %r9 +- movq L$poly+8(%rip),%r14 +- movq %rax,%r8 +- movq %rbp,%rax +- movq %rdx,%r9 +- +- mulq %r10 +- movq L$poly+24(%rip),%r15 +- addq %rax,%r9 +- movq %rbp,%rax +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %r11 +- addq %rax,%r10 +- movq %rbp,%rax +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %r12 +- addq %rax,%r11 +- movq %r8,%rax +- adcq $0,%rdx +- xorq %r13,%r13 +- movq %rdx,%r12 +- +- +- +- +- +- +- +- +- +- +- movq %r8,%rbp +- shlq $32,%r8 +- mulq %r15 +- shrq $32,%rbp +- addq %r8,%r9 +- adcq %rbp,%r10 +- adcq %rax,%r11 +- movq 8(%rbx),%rax +- adcq %rdx,%r12 +- adcq $0,%r13 +- xorq %r8,%r8 +- +- +- +- movq %rax,%rbp +- mulq 0(%rsi) +- addq %rax,%r9 +- movq %rbp,%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- mulq 8(%rsi) +- addq %rcx,%r10 +- adcq $0,%rdx +- addq %rax,%r10 +- movq %rbp,%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- mulq 16(%rsi) +- addq %rcx,%r11 +- adcq $0,%rdx +- addq %rax,%r11 +- movq %rbp,%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- mulq 24(%rsi) +- addq %rcx,%r12 +- adcq $0,%rdx +- addq %rax,%r12 +- movq %r9,%rax +- adcq %rdx,%r13 +- adcq $0,%r8 +- +- +- +- movq %r9,%rbp +- shlq $32,%r9 +- mulq %r15 +- shrq $32,%rbp +- addq %r9,%r10 +- adcq %rbp,%r11 +- adcq %rax,%r12 +- movq 16(%rbx),%rax +- adcq %rdx,%r13 +- adcq $0,%r8 +- xorq %r9,%r9 +- +- +- +- movq %rax,%rbp +- mulq 0(%rsi) +- addq %rax,%r10 +- movq %rbp,%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- mulq 8(%rsi) +- addq %rcx,%r11 +- adcq $0,%rdx +- addq %rax,%r11 +- movq %rbp,%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- mulq 16(%rsi) +- addq %rcx,%r12 +- adcq $0,%rdx +- addq %rax,%r12 +- movq %rbp,%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- mulq 24(%rsi) +- addq %rcx,%r13 +- adcq $0,%rdx +- addq %rax,%r13 +- movq %r10,%rax +- adcq %rdx,%r8 +- adcq $0,%r9 +- +- +- +- movq %r10,%rbp +- shlq $32,%r10 +- mulq %r15 +- shrq $32,%rbp +- addq %r10,%r11 +- adcq %rbp,%r12 +- adcq %rax,%r13 +- movq 24(%rbx),%rax +- adcq %rdx,%r8 +- adcq $0,%r9 +- xorq %r10,%r10 +- +- +- +- movq %rax,%rbp +- mulq 0(%rsi) +- addq %rax,%r11 +- movq %rbp,%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- mulq 8(%rsi) +- addq %rcx,%r12 +- adcq $0,%rdx +- addq %rax,%r12 +- movq %rbp,%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- mulq 16(%rsi) +- addq %rcx,%r13 +- adcq $0,%rdx +- addq %rax,%r13 +- movq %rbp,%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- mulq 24(%rsi) +- addq %rcx,%r8 +- adcq $0,%rdx +- addq %rax,%r8 +- movq %r11,%rax +- adcq %rdx,%r9 +- adcq $0,%r10 +- +- +- +- movq %r11,%rbp +- shlq $32,%r11 +- mulq %r15 +- shrq $32,%rbp +- addq %r11,%r12 +- adcq %rbp,%r13 +- movq %r12,%rcx +- adcq %rax,%r8 +- adcq %rdx,%r9 +- movq %r13,%rbp +- adcq $0,%r10 +- +- +- +- subq $-1,%r12 +- movq %r8,%rbx +- sbbq %r14,%r13 +- sbbq $0,%r8 +- movq %r9,%rdx +- sbbq %r15,%r9 +- sbbq $0,%r10 +- +- cmovcq %rcx,%r12 +- cmovcq %rbp,%r13 +- movq %r12,0(%rdi) +- cmovcq %rbx,%r8 +- movq %r13,8(%rdi) +- cmovcq %rdx,%r9 +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +- +- +-.globl _ecp_nistz256_sqr_mont +-.private_extern _ecp_nistz256_sqr_mont +- +-.p2align 5 +-_ecp_nistz256_sqr_mont: +- +- leaq _OPENSSL_ia32cap_P(%rip),%rcx +- movq 8(%rcx),%rcx +- andl $0x80100,%ecx +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$sqr_body: +- cmpl $0x80100,%ecx +- je L$sqr_montx +- movq 0(%rsi),%rax +- movq 8(%rsi),%r14 +- movq 16(%rsi),%r15 +- movq 24(%rsi),%r8 +- +- call __ecp_nistz256_sqr_montq +- jmp L$sqr_mont_done +- +-.p2align 5 +-L$sqr_montx: +- movq 0(%rsi),%rdx +- movq 8(%rsi),%r14 +- movq 16(%rsi),%r15 +- movq 24(%rsi),%r8 +- leaq -128(%rsi),%rsi +- +- call __ecp_nistz256_sqr_montx +-L$sqr_mont_done: +- movq 0(%rsp),%r15 +- +- movq 8(%rsp),%r14 +- +- movq 16(%rsp),%r13 +- +- movq 24(%rsp),%r12 +- +- movq 32(%rsp),%rbx +- +- movq 40(%rsp),%rbp +- +- leaq 48(%rsp),%rsp +- +-L$sqr_epilogue: +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-__ecp_nistz256_sqr_montq: +- +- movq %rax,%r13 +- mulq %r14 +- movq %rax,%r9 +- movq %r15,%rax +- movq %rdx,%r10 +- +- mulq %r13 +- addq %rax,%r10 +- movq %r8,%rax +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %r13 +- addq %rax,%r11 +- movq %r15,%rax +- adcq $0,%rdx +- movq %rdx,%r12 +- +- +- mulq %r14 +- addq %rax,%r11 +- movq %r8,%rax +- adcq $0,%rdx +- movq %rdx,%rbp +- +- mulq %r14 +- addq %rax,%r12 +- movq %r8,%rax +- adcq $0,%rdx +- addq %rbp,%r12 +- movq %rdx,%r13 +- adcq $0,%r13 +- +- +- mulq %r15 +- xorq %r15,%r15 +- addq %rax,%r13 +- movq 0(%rsi),%rax +- movq %rdx,%r14 +- adcq $0,%r14 +- +- addq %r9,%r9 +- adcq %r10,%r10 +- adcq %r11,%r11 +- adcq %r12,%r12 +- adcq %r13,%r13 +- adcq %r14,%r14 +- adcq $0,%r15 +- +- mulq %rax +- movq %rax,%r8 +- movq 8(%rsi),%rax +- movq %rdx,%rcx +- +- mulq %rax +- addq %rcx,%r9 +- adcq %rax,%r10 +- movq 16(%rsi),%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- mulq %rax +- addq %rcx,%r11 +- adcq %rax,%r12 +- movq 24(%rsi),%rax +- adcq $0,%rdx +- movq %rdx,%rcx +- +- mulq %rax +- addq %rcx,%r13 +- adcq %rax,%r14 +- movq %r8,%rax +- adcq %rdx,%r15 +- +- movq L$poly+8(%rip),%rsi +- movq L$poly+24(%rip),%rbp +- +- +- +- +- movq %r8,%rcx +- shlq $32,%r8 +- mulq %rbp +- shrq $32,%rcx +- addq %r8,%r9 +- adcq %rcx,%r10 +- adcq %rax,%r11 +- movq %r9,%rax +- adcq $0,%rdx +- +- +- +- movq %r9,%rcx +- shlq $32,%r9 +- movq %rdx,%r8 +- mulq %rbp +- shrq $32,%rcx +- addq %r9,%r10 +- adcq %rcx,%r11 +- adcq %rax,%r8 +- movq %r10,%rax +- adcq $0,%rdx +- +- +- +- movq %r10,%rcx +- shlq $32,%r10 +- movq %rdx,%r9 +- mulq %rbp +- shrq $32,%rcx +- addq %r10,%r11 +- adcq %rcx,%r8 +- adcq %rax,%r9 +- movq %r11,%rax +- adcq $0,%rdx +- +- +- +- movq %r11,%rcx +- shlq $32,%r11 +- movq %rdx,%r10 +- mulq %rbp +- shrq $32,%rcx +- addq %r11,%r8 +- adcq %rcx,%r9 +- adcq %rax,%r10 +- adcq $0,%rdx +- xorq %r11,%r11 +- +- +- +- addq %r8,%r12 +- adcq %r9,%r13 +- movq %r12,%r8 +- adcq %r10,%r14 +- adcq %rdx,%r15 +- movq %r13,%r9 +- adcq $0,%r11 +- +- subq $-1,%r12 +- movq %r14,%r10 +- sbbq %rsi,%r13 +- sbbq $0,%r14 +- movq %r15,%rcx +- sbbq %rbp,%r15 +- sbbq $0,%r11 +- +- cmovcq %r8,%r12 +- cmovcq %r9,%r13 +- movq %r12,0(%rdi) +- cmovcq %r10,%r14 +- movq %r13,8(%rdi) +- cmovcq %rcx,%r15 +- movq %r14,16(%rdi) +- movq %r15,24(%rdi) +- +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-__ecp_nistz256_mul_montx: +- +- +- +- mulxq %r9,%r8,%r9 +- mulxq %r10,%rcx,%r10 +- movq $32,%r14 +- xorq %r13,%r13 +- mulxq %r11,%rbp,%r11 +- movq L$poly+24(%rip),%r15 +- adcq %rcx,%r9 +- mulxq %r12,%rcx,%r12 +- movq %r8,%rdx +- adcq %rbp,%r10 +- shlxq %r14,%r8,%rbp +- adcq %rcx,%r11 +- shrxq %r14,%r8,%rcx +- adcq $0,%r12 +- +- +- +- addq %rbp,%r9 +- adcq %rcx,%r10 +- +- mulxq %r15,%rcx,%rbp +- movq 8(%rbx),%rdx +- adcq %rcx,%r11 +- adcq %rbp,%r12 +- adcq $0,%r13 +- xorq %r8,%r8 +- +- +- +- mulxq 0+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r9 +- adoxq %rbp,%r10 +- +- mulxq 8+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r10 +- adoxq %rbp,%r11 +- +- mulxq 16+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- +- mulxq 24+128(%rsi),%rcx,%rbp +- movq %r9,%rdx +- adcxq %rcx,%r12 +- shlxq %r14,%r9,%rcx +- adoxq %rbp,%r13 +- shrxq %r14,%r9,%rbp +- +- adcxq %r8,%r13 +- adoxq %r8,%r8 +- adcq $0,%r8 +- +- +- +- addq %rcx,%r10 +- adcq %rbp,%r11 +- +- mulxq %r15,%rcx,%rbp +- movq 16(%rbx),%rdx +- adcq %rcx,%r12 +- adcq %rbp,%r13 +- adcq $0,%r8 +- xorq %r9,%r9 +- +- +- +- mulxq 0+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r10 +- adoxq %rbp,%r11 +- +- mulxq 8+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- +- mulxq 16+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r12 +- adoxq %rbp,%r13 +- +- mulxq 24+128(%rsi),%rcx,%rbp +- movq %r10,%rdx +- adcxq %rcx,%r13 +- shlxq %r14,%r10,%rcx +- adoxq %rbp,%r8 +- shrxq %r14,%r10,%rbp +- +- adcxq %r9,%r8 +- adoxq %r9,%r9 +- adcq $0,%r9 +- +- +- +- addq %rcx,%r11 +- adcq %rbp,%r12 +- +- mulxq %r15,%rcx,%rbp +- movq 24(%rbx),%rdx +- adcq %rcx,%r13 +- adcq %rbp,%r8 +- adcq $0,%r9 +- xorq %r10,%r10 +- +- +- +- mulxq 0+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- +- mulxq 8+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r12 +- adoxq %rbp,%r13 +- +- mulxq 16+128(%rsi),%rcx,%rbp +- adcxq %rcx,%r13 +- adoxq %rbp,%r8 +- +- mulxq 24+128(%rsi),%rcx,%rbp +- movq %r11,%rdx +- adcxq %rcx,%r8 +- shlxq %r14,%r11,%rcx +- adoxq %rbp,%r9 +- shrxq %r14,%r11,%rbp +- +- adcxq %r10,%r9 +- adoxq %r10,%r10 +- adcq $0,%r10 +- +- +- +- addq %rcx,%r12 +- adcq %rbp,%r13 +- +- mulxq %r15,%rcx,%rbp +- movq %r12,%rbx +- movq L$poly+8(%rip),%r14 +- adcq %rcx,%r8 +- movq %r13,%rdx +- adcq %rbp,%r9 +- adcq $0,%r10 +- +- +- +- xorl %eax,%eax +- movq %r8,%rcx +- sbbq $-1,%r12 +- sbbq %r14,%r13 +- sbbq $0,%r8 +- movq %r9,%rbp +- sbbq %r15,%r9 +- sbbq $0,%r10 +- +- cmovcq %rbx,%r12 +- cmovcq %rdx,%r13 +- movq %r12,0(%rdi) +- cmovcq %rcx,%r8 +- movq %r13,8(%rdi) +- cmovcq %rbp,%r9 +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-__ecp_nistz256_sqr_montx: +- +- mulxq %r14,%r9,%r10 +- mulxq %r15,%rcx,%r11 +- xorl %eax,%eax +- adcq %rcx,%r10 +- mulxq %r8,%rbp,%r12 +- movq %r14,%rdx +- adcq %rbp,%r11 +- adcq $0,%r12 +- xorq %r13,%r13 +- +- +- mulxq %r15,%rcx,%rbp +- adcxq %rcx,%r11 +- adoxq %rbp,%r12 +- +- mulxq %r8,%rcx,%rbp +- movq %r15,%rdx +- adcxq %rcx,%r12 +- adoxq %rbp,%r13 +- adcq $0,%r13 +- +- +- mulxq %r8,%rcx,%r14 +- movq 0+128(%rsi),%rdx +- xorq %r15,%r15 +- adcxq %r9,%r9 +- adoxq %rcx,%r13 +- adcxq %r10,%r10 +- adoxq %r15,%r14 +- +- mulxq %rdx,%r8,%rbp +- movq 8+128(%rsi),%rdx +- adcxq %r11,%r11 +- adoxq %rbp,%r9 +- adcxq %r12,%r12 +- mulxq %rdx,%rcx,%rax +- movq 16+128(%rsi),%rdx +- adcxq %r13,%r13 +- adoxq %rcx,%r10 +- adcxq %r14,%r14 +-.byte 0x67 +- mulxq %rdx,%rcx,%rbp +- movq 24+128(%rsi),%rdx +- adoxq %rax,%r11 +- adcxq %r15,%r15 +- adoxq %rcx,%r12 +- movq $32,%rsi +- adoxq %rbp,%r13 +-.byte 0x67,0x67 +- mulxq %rdx,%rcx,%rax +- movq L$poly+24(%rip),%rdx +- adoxq %rcx,%r14 +- shlxq %rsi,%r8,%rcx +- adoxq %rax,%r15 +- shrxq %rsi,%r8,%rax +- movq %rdx,%rbp +- +- +- addq %rcx,%r9 +- adcq %rax,%r10 +- +- mulxq %r8,%rcx,%r8 +- adcq %rcx,%r11 +- shlxq %rsi,%r9,%rcx +- adcq $0,%r8 +- shrxq %rsi,%r9,%rax +- +- +- addq %rcx,%r10 +- adcq %rax,%r11 +- +- mulxq %r9,%rcx,%r9 +- adcq %rcx,%r8 +- shlxq %rsi,%r10,%rcx +- adcq $0,%r9 +- shrxq %rsi,%r10,%rax +- +- +- addq %rcx,%r11 +- adcq %rax,%r8 +- +- mulxq %r10,%rcx,%r10 +- adcq %rcx,%r9 +- shlxq %rsi,%r11,%rcx +- adcq $0,%r10 +- shrxq %rsi,%r11,%rax +- +- +- addq %rcx,%r8 +- adcq %rax,%r9 +- +- mulxq %r11,%rcx,%r11 +- adcq %rcx,%r10 +- adcq $0,%r11 +- +- xorq %rdx,%rdx +- addq %r8,%r12 +- movq L$poly+8(%rip),%rsi +- adcq %r9,%r13 +- movq %r12,%r8 +- adcq %r10,%r14 +- adcq %r11,%r15 +- movq %r13,%r9 +- adcq $0,%rdx +- +- subq $-1,%r12 +- movq %r14,%r10 +- sbbq %rsi,%r13 +- sbbq $0,%r14 +- movq %r15,%r11 +- sbbq %rbp,%r15 +- sbbq $0,%rdx +- +- cmovcq %r8,%r12 +- cmovcq %r9,%r13 +- movq %r12,0(%rdi) +- cmovcq %r10,%r14 +- movq %r13,8(%rdi) +- cmovcq %r11,%r15 +- movq %r14,16(%rdi) +- movq %r15,24(%rdi) +- +- .byte 0xf3,0xc3 +- +- +- +- +-.globl _ecp_nistz256_select_w5 +-.private_extern _ecp_nistz256_select_w5 +- +-.p2align 5 +-_ecp_nistz256_select_w5: +- +- leaq _OPENSSL_ia32cap_P(%rip),%rax +- movq 8(%rax),%rax +- testl $32,%eax +- jnz L$avx2_select_w5 +- movdqa L$One(%rip),%xmm0 +- movd %edx,%xmm1 +- +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- pxor %xmm7,%xmm7 +- +- movdqa %xmm0,%xmm8 +- pshufd $0,%xmm1,%xmm1 +- +- movq $16,%rax +-L$select_loop_sse_w5: +- +- movdqa %xmm8,%xmm15 +- paddd %xmm0,%xmm8 +- pcmpeqd %xmm1,%xmm15 +- +- movdqa 0(%rsi),%xmm9 +- movdqa 16(%rsi),%xmm10 +- movdqa 32(%rsi),%xmm11 +- movdqa 48(%rsi),%xmm12 +- movdqa 64(%rsi),%xmm13 +- movdqa 80(%rsi),%xmm14 +- leaq 96(%rsi),%rsi +- +- pand %xmm15,%xmm9 +- pand %xmm15,%xmm10 +- por %xmm9,%xmm2 +- pand %xmm15,%xmm11 +- por %xmm10,%xmm3 +- pand %xmm15,%xmm12 +- por %xmm11,%xmm4 +- pand %xmm15,%xmm13 +- por %xmm12,%xmm5 +- pand %xmm15,%xmm14 +- por %xmm13,%xmm6 +- por %xmm14,%xmm7 +- +- decq %rax +- jnz L$select_loop_sse_w5 +- +- movdqu %xmm2,0(%rdi) +- movdqu %xmm3,16(%rdi) +- movdqu %xmm4,32(%rdi) +- movdqu %xmm5,48(%rdi) +- movdqu %xmm6,64(%rdi) +- movdqu %xmm7,80(%rdi) +- .byte 0xf3,0xc3 +- +-L$SEH_end_ecp_nistz256_select_w5: +- +- +- +- +-.globl _ecp_nistz256_select_w7 +-.private_extern _ecp_nistz256_select_w7 +- +-.p2align 5 +-_ecp_nistz256_select_w7: +- +- leaq _OPENSSL_ia32cap_P(%rip),%rax +- movq 8(%rax),%rax +- testl $32,%eax +- jnz L$avx2_select_w7 +- movdqa L$One(%rip),%xmm8 +- movd %edx,%xmm1 +- +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- +- movdqa %xmm8,%xmm0 +- pshufd $0,%xmm1,%xmm1 +- movq $64,%rax +- +-L$select_loop_sse_w7: +- movdqa %xmm8,%xmm15 +- paddd %xmm0,%xmm8 +- movdqa 0(%rsi),%xmm9 +- movdqa 16(%rsi),%xmm10 +- pcmpeqd %xmm1,%xmm15 +- movdqa 32(%rsi),%xmm11 +- movdqa 48(%rsi),%xmm12 +- leaq 64(%rsi),%rsi +- +- pand %xmm15,%xmm9 +- pand %xmm15,%xmm10 +- por %xmm9,%xmm2 +- pand %xmm15,%xmm11 +- por %xmm10,%xmm3 +- pand %xmm15,%xmm12 +- por %xmm11,%xmm4 +- prefetcht0 255(%rsi) +- por %xmm12,%xmm5 +- +- decq %rax +- jnz L$select_loop_sse_w7 +- +- movdqu %xmm2,0(%rdi) +- movdqu %xmm3,16(%rdi) +- movdqu %xmm4,32(%rdi) +- movdqu %xmm5,48(%rdi) +- .byte 0xf3,0xc3 +- +-L$SEH_end_ecp_nistz256_select_w7: +- +- +- +- +-.p2align 5 +-ecp_nistz256_avx2_select_w5: +- +-L$avx2_select_w5: +- vzeroupper +- vmovdqa L$Two(%rip),%ymm0 +- +- vpxor %ymm2,%ymm2,%ymm2 +- vpxor %ymm3,%ymm3,%ymm3 +- vpxor %ymm4,%ymm4,%ymm4 +- +- vmovdqa L$One(%rip),%ymm5 +- vmovdqa L$Two(%rip),%ymm10 +- +- vmovd %edx,%xmm1 +- vpermd %ymm1,%ymm2,%ymm1 +- +- movq $8,%rax +-L$select_loop_avx2_w5: +- +- vmovdqa 0(%rsi),%ymm6 +- vmovdqa 32(%rsi),%ymm7 +- vmovdqa 64(%rsi),%ymm8 +- +- vmovdqa 96(%rsi),%ymm11 +- vmovdqa 128(%rsi),%ymm12 +- vmovdqa 160(%rsi),%ymm13 +- +- vpcmpeqd %ymm1,%ymm5,%ymm9 +- vpcmpeqd %ymm1,%ymm10,%ymm14 +- +- vpaddd %ymm0,%ymm5,%ymm5 +- vpaddd %ymm0,%ymm10,%ymm10 +- leaq 192(%rsi),%rsi +- +- vpand %ymm9,%ymm6,%ymm6 +- vpand %ymm9,%ymm7,%ymm7 +- vpand %ymm9,%ymm8,%ymm8 +- vpand %ymm14,%ymm11,%ymm11 +- vpand %ymm14,%ymm12,%ymm12 +- vpand %ymm14,%ymm13,%ymm13 +- +- vpxor %ymm6,%ymm2,%ymm2 +- vpxor %ymm7,%ymm3,%ymm3 +- vpxor %ymm8,%ymm4,%ymm4 +- vpxor %ymm11,%ymm2,%ymm2 +- vpxor %ymm12,%ymm3,%ymm3 +- vpxor %ymm13,%ymm4,%ymm4 +- +- decq %rax +- jnz L$select_loop_avx2_w5 +- +- vmovdqu %ymm2,0(%rdi) +- vmovdqu %ymm3,32(%rdi) +- vmovdqu %ymm4,64(%rdi) +- vzeroupper +- .byte 0xf3,0xc3 +- +-L$SEH_end_ecp_nistz256_avx2_select_w5: +- +- +- +- +-.globl _ecp_nistz256_avx2_select_w7 +-.private_extern _ecp_nistz256_avx2_select_w7 +- +-.p2align 5 +-_ecp_nistz256_avx2_select_w7: +- +-L$avx2_select_w7: +- vzeroupper +- vmovdqa L$Three(%rip),%ymm0 +- +- vpxor %ymm2,%ymm2,%ymm2 +- vpxor %ymm3,%ymm3,%ymm3 +- +- vmovdqa L$One(%rip),%ymm4 +- vmovdqa L$Two(%rip),%ymm8 +- vmovdqa L$Three(%rip),%ymm12 +- +- vmovd %edx,%xmm1 +- vpermd %ymm1,%ymm2,%ymm1 +- +- +- movq $21,%rax +-L$select_loop_avx2_w7: +- +- vmovdqa 0(%rsi),%ymm5 +- vmovdqa 32(%rsi),%ymm6 +- +- vmovdqa 64(%rsi),%ymm9 +- vmovdqa 96(%rsi),%ymm10 +- +- vmovdqa 128(%rsi),%ymm13 +- vmovdqa 160(%rsi),%ymm14 +- +- vpcmpeqd %ymm1,%ymm4,%ymm7 +- vpcmpeqd %ymm1,%ymm8,%ymm11 +- vpcmpeqd %ymm1,%ymm12,%ymm15 +- +- vpaddd %ymm0,%ymm4,%ymm4 +- vpaddd %ymm0,%ymm8,%ymm8 +- vpaddd %ymm0,%ymm12,%ymm12 +- leaq 192(%rsi),%rsi +- +- vpand %ymm7,%ymm5,%ymm5 +- vpand %ymm7,%ymm6,%ymm6 +- vpand %ymm11,%ymm9,%ymm9 +- vpand %ymm11,%ymm10,%ymm10 +- vpand %ymm15,%ymm13,%ymm13 +- vpand %ymm15,%ymm14,%ymm14 +- +- vpxor %ymm5,%ymm2,%ymm2 +- vpxor %ymm6,%ymm3,%ymm3 +- vpxor %ymm9,%ymm2,%ymm2 +- vpxor %ymm10,%ymm3,%ymm3 +- vpxor %ymm13,%ymm2,%ymm2 +- vpxor %ymm14,%ymm3,%ymm3 +- +- decq %rax +- jnz L$select_loop_avx2_w7 +- +- +- vmovdqa 0(%rsi),%ymm5 +- vmovdqa 32(%rsi),%ymm6 +- +- vpcmpeqd %ymm1,%ymm4,%ymm7 +- +- vpand %ymm7,%ymm5,%ymm5 +- vpand %ymm7,%ymm6,%ymm6 +- +- vpxor %ymm5,%ymm2,%ymm2 +- vpxor %ymm6,%ymm3,%ymm3 +- +- vmovdqu %ymm2,0(%rdi) +- vmovdqu %ymm3,32(%rdi) +- vzeroupper +- .byte 0xf3,0xc3 +- +-L$SEH_end_ecp_nistz256_avx2_select_w7: +- +- +-.p2align 5 +-__ecp_nistz256_add_toq: +- +- xorq %r11,%r11 +- addq 0(%rbx),%r12 +- adcq 8(%rbx),%r13 +- movq %r12,%rax +- adcq 16(%rbx),%r8 +- adcq 24(%rbx),%r9 +- movq %r13,%rbp +- adcq $0,%r11 +- +- subq $-1,%r12 +- movq %r8,%rcx +- sbbq %r14,%r13 +- sbbq $0,%r8 +- movq %r9,%r10 +- sbbq %r15,%r9 +- sbbq $0,%r11 +- +- cmovcq %rax,%r12 +- cmovcq %rbp,%r13 +- movq %r12,0(%rdi) +- cmovcq %rcx,%r8 +- movq %r13,8(%rdi) +- cmovcq %r10,%r9 +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-__ecp_nistz256_sub_fromq: +- +- subq 0(%rbx),%r12 +- sbbq 8(%rbx),%r13 +- movq %r12,%rax +- sbbq 16(%rbx),%r8 +- sbbq 24(%rbx),%r9 +- movq %r13,%rbp +- sbbq %r11,%r11 +- +- addq $-1,%r12 +- movq %r8,%rcx +- adcq %r14,%r13 +- adcq $0,%r8 +- movq %r9,%r10 +- adcq %r15,%r9 +- testq %r11,%r11 +- +- cmovzq %rax,%r12 +- cmovzq %rbp,%r13 +- movq %r12,0(%rdi) +- cmovzq %rcx,%r8 +- movq %r13,8(%rdi) +- cmovzq %r10,%r9 +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-__ecp_nistz256_subq: +- +- subq %r12,%rax +- sbbq %r13,%rbp +- movq %rax,%r12 +- sbbq %r8,%rcx +- sbbq %r9,%r10 +- movq %rbp,%r13 +- sbbq %r11,%r11 +- +- addq $-1,%rax +- movq %rcx,%r8 +- adcq %r14,%rbp +- adcq $0,%rcx +- movq %r10,%r9 +- adcq %r15,%r10 +- testq %r11,%r11 +- +- cmovnzq %rax,%r12 +- cmovnzq %rbp,%r13 +- cmovnzq %rcx,%r8 +- cmovnzq %r10,%r9 +- +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-__ecp_nistz256_mul_by_2q: +- +- xorq %r11,%r11 +- addq %r12,%r12 +- adcq %r13,%r13 +- movq %r12,%rax +- adcq %r8,%r8 +- adcq %r9,%r9 +- movq %r13,%rbp +- adcq $0,%r11 +- +- subq $-1,%r12 +- movq %r8,%rcx +- sbbq %r14,%r13 +- sbbq $0,%r8 +- movq %r9,%r10 +- sbbq %r15,%r9 +- sbbq $0,%r11 +- +- cmovcq %rax,%r12 +- cmovcq %rbp,%r13 +- movq %r12,0(%rdi) +- cmovcq %rcx,%r8 +- movq %r13,8(%rdi) +- cmovcq %r10,%r9 +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- +- .byte 0xf3,0xc3 +- +- +-.globl _ecp_nistz256_point_double +-.private_extern _ecp_nistz256_point_double +- +-.p2align 5 +-_ecp_nistz256_point_double: +- +- leaq _OPENSSL_ia32cap_P(%rip),%rcx +- movq 8(%rcx),%rcx +- andl $0x80100,%ecx +- cmpl $0x80100,%ecx +- je L$point_doublex +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- subq $160+8,%rsp +- +-L$point_doubleq_body: +- +-L$point_double_shortcutq: +- movdqu 0(%rsi),%xmm0 +- movq %rsi,%rbx +- movdqu 16(%rsi),%xmm1 +- movq 32+0(%rsi),%r12 +- movq 32+8(%rsi),%r13 +- movq 32+16(%rsi),%r8 +- movq 32+24(%rsi),%r9 +- movq L$poly+8(%rip),%r14 +- movq L$poly+24(%rip),%r15 +- movdqa %xmm0,96(%rsp) +- movdqa %xmm1,96+16(%rsp) +- leaq 32(%rdi),%r10 +- leaq 64(%rdi),%r11 +-.byte 102,72,15,110,199 +-.byte 102,73,15,110,202 +-.byte 102,73,15,110,211 +- +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_mul_by_2q +- +- movq 64+0(%rsi),%rax +- movq 64+8(%rsi),%r14 +- movq 64+16(%rsi),%r15 +- movq 64+24(%rsi),%r8 +- leaq 64-0(%rsi),%rsi +- leaq 64(%rsp),%rdi +- call __ecp_nistz256_sqr_montq +- +- movq 0+0(%rsp),%rax +- movq 8+0(%rsp),%r14 +- leaq 0+0(%rsp),%rsi +- movq 16+0(%rsp),%r15 +- movq 24+0(%rsp),%r8 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_sqr_montq +- +- movq 32(%rbx),%rax +- movq 64+0(%rbx),%r9 +- movq 64+8(%rbx),%r10 +- movq 64+16(%rbx),%r11 +- movq 64+24(%rbx),%r12 +- leaq 64-0(%rbx),%rsi +- leaq 32(%rbx),%rbx +-.byte 102,72,15,126,215 +- call __ecp_nistz256_mul_montq +- call __ecp_nistz256_mul_by_2q +- +- movq 96+0(%rsp),%r12 +- movq 96+8(%rsp),%r13 +- leaq 64(%rsp),%rbx +- movq 96+16(%rsp),%r8 +- movq 96+24(%rsp),%r9 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_add_toq +- +- movq 96+0(%rsp),%r12 +- movq 96+8(%rsp),%r13 +- leaq 64(%rsp),%rbx +- movq 96+16(%rsp),%r8 +- movq 96+24(%rsp),%r9 +- leaq 64(%rsp),%rdi +- call __ecp_nistz256_sub_fromq +- +- movq 0+0(%rsp),%rax +- movq 8+0(%rsp),%r14 +- leaq 0+0(%rsp),%rsi +- movq 16+0(%rsp),%r15 +- movq 24+0(%rsp),%r8 +-.byte 102,72,15,126,207 +- call __ecp_nistz256_sqr_montq +- xorq %r9,%r9 +- movq %r12,%rax +- addq $-1,%r12 +- movq %r13,%r10 +- adcq %rsi,%r13 +- movq %r14,%rcx +- adcq $0,%r14 +- movq %r15,%r8 +- adcq %rbp,%r15 +- adcq $0,%r9 +- xorq %rsi,%rsi +- testq $1,%rax +- +- cmovzq %rax,%r12 +- cmovzq %r10,%r13 +- cmovzq %rcx,%r14 +- cmovzq %r8,%r15 +- cmovzq %rsi,%r9 +- +- movq %r13,%rax +- shrq $1,%r12 +- shlq $63,%rax +- movq %r14,%r10 +- shrq $1,%r13 +- orq %rax,%r12 +- shlq $63,%r10 +- movq %r15,%rcx +- shrq $1,%r14 +- orq %r10,%r13 +- shlq $63,%rcx +- movq %r12,0(%rdi) +- shrq $1,%r15 +- movq %r13,8(%rdi) +- shlq $63,%r9 +- orq %rcx,%r14 +- orq %r9,%r15 +- movq %r14,16(%rdi) +- movq %r15,24(%rdi) +- movq 64(%rsp),%rax +- leaq 64(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq 0+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- leaq 128(%rsp),%rdi +- call __ecp_nistz256_mul_by_2q +- +- leaq 32(%rsp),%rbx +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_add_toq +- +- movq 96(%rsp),%rax +- leaq 96(%rsp),%rbx +- movq 0+0(%rsp),%r9 +- movq 8+0(%rsp),%r10 +- leaq 0+0(%rsp),%rsi +- movq 16+0(%rsp),%r11 +- movq 24+0(%rsp),%r12 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- leaq 128(%rsp),%rdi +- call __ecp_nistz256_mul_by_2q +- +- movq 0+32(%rsp),%rax +- movq 8+32(%rsp),%r14 +- leaq 0+32(%rsp),%rsi +- movq 16+32(%rsp),%r15 +- movq 24+32(%rsp),%r8 +-.byte 102,72,15,126,199 +- call __ecp_nistz256_sqr_montq +- +- leaq 128(%rsp),%rbx +- movq %r14,%r8 +- movq %r15,%r9 +- movq %rsi,%r14 +- movq %rbp,%r15 +- call __ecp_nistz256_sub_fromq +- +- movq 0+0(%rsp),%rax +- movq 0+8(%rsp),%rbp +- movq 0+16(%rsp),%rcx +- movq 0+24(%rsp),%r10 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_subq +- +- movq 32(%rsp),%rax +- leaq 32(%rsp),%rbx +- movq %r12,%r14 +- xorl %ecx,%ecx +- movq %r12,0+0(%rsp) +- movq %r13,%r10 +- movq %r13,0+8(%rsp) +- cmovzq %r8,%r11 +- movq %r8,0+16(%rsp) +- leaq 0-0(%rsp),%rsi +- cmovzq %r9,%r12 +- movq %r9,0+24(%rsp) +- movq %r14,%r9 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +-.byte 102,72,15,126,203 +-.byte 102,72,15,126,207 +- call __ecp_nistz256_sub_fromq +- +- leaq 160+56(%rsp),%rsi +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbx +- +- movq -8(%rsi),%rbp +- +- leaq (%rsi),%rsp +- +-L$point_doubleq_epilogue: +- .byte 0xf3,0xc3 +- +- +-.globl _ecp_nistz256_point_add +-.private_extern _ecp_nistz256_point_add +- +-.p2align 5 +-_ecp_nistz256_point_add: +- +- leaq _OPENSSL_ia32cap_P(%rip),%rcx +- movq 8(%rcx),%rcx +- andl $0x80100,%ecx +- cmpl $0x80100,%ecx +- je L$point_addx +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- subq $576+8,%rsp +- +-L$point_addq_body: +- +- movdqu 0(%rsi),%xmm0 +- movdqu 16(%rsi),%xmm1 +- movdqu 32(%rsi),%xmm2 +- movdqu 48(%rsi),%xmm3 +- movdqu 64(%rsi),%xmm4 +- movdqu 80(%rsi),%xmm5 +- movq %rsi,%rbx +- movq %rdx,%rsi +- movdqa %xmm0,384(%rsp) +- movdqa %xmm1,384+16(%rsp) +- movdqa %xmm2,416(%rsp) +- movdqa %xmm3,416+16(%rsp) +- movdqa %xmm4,448(%rsp) +- movdqa %xmm5,448+16(%rsp) +- por %xmm4,%xmm5 +- +- movdqu 0(%rsi),%xmm0 +- pshufd $0xb1,%xmm5,%xmm3 +- movdqu 16(%rsi),%xmm1 +- movdqu 32(%rsi),%xmm2 +- por %xmm3,%xmm5 +- movdqu 48(%rsi),%xmm3 +- movq 64+0(%rsi),%rax +- movq 64+8(%rsi),%r14 +- movq 64+16(%rsi),%r15 +- movq 64+24(%rsi),%r8 +- movdqa %xmm0,480(%rsp) +- pshufd $0x1e,%xmm5,%xmm4 +- movdqa %xmm1,480+16(%rsp) +- movdqu 64(%rsi),%xmm0 +- movdqu 80(%rsi),%xmm1 +- movdqa %xmm2,512(%rsp) +- movdqa %xmm3,512+16(%rsp) +- por %xmm4,%xmm5 +- pxor %xmm4,%xmm4 +- por %xmm0,%xmm1 +-.byte 102,72,15,110,199 +- +- leaq 64-0(%rsi),%rsi +- movq %rax,544+0(%rsp) +- movq %r14,544+8(%rsp) +- movq %r15,544+16(%rsp) +- movq %r8,544+24(%rsp) +- leaq 96(%rsp),%rdi +- call __ecp_nistz256_sqr_montq +- +- pcmpeqd %xmm4,%xmm5 +- pshufd $0xb1,%xmm1,%xmm4 +- por %xmm1,%xmm4 +- pshufd $0,%xmm5,%xmm5 +- pshufd $0x1e,%xmm4,%xmm3 +- por %xmm3,%xmm4 +- pxor %xmm3,%xmm3 +- pcmpeqd %xmm3,%xmm4 +- pshufd $0,%xmm4,%xmm4 +- movq 64+0(%rbx),%rax +- movq 64+8(%rbx),%r14 +- movq 64+16(%rbx),%r15 +- movq 64+24(%rbx),%r8 +-.byte 102,72,15,110,203 +- +- leaq 64-0(%rbx),%rsi +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_sqr_montq +- +- movq 544(%rsp),%rax +- leaq 544(%rsp),%rbx +- movq 0+96(%rsp),%r9 +- movq 8+96(%rsp),%r10 +- leaq 0+96(%rsp),%rsi +- movq 16+96(%rsp),%r11 +- movq 24+96(%rsp),%r12 +- leaq 224(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 448(%rsp),%rax +- leaq 448(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq 0+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 256(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 416(%rsp),%rax +- leaq 416(%rsp),%rbx +- movq 0+224(%rsp),%r9 +- movq 8+224(%rsp),%r10 +- leaq 0+224(%rsp),%rsi +- movq 16+224(%rsp),%r11 +- movq 24+224(%rsp),%r12 +- leaq 224(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 512(%rsp),%rax +- leaq 512(%rsp),%rbx +- movq 0+256(%rsp),%r9 +- movq 8+256(%rsp),%r10 +- leaq 0+256(%rsp),%rsi +- movq 16+256(%rsp),%r11 +- movq 24+256(%rsp),%r12 +- leaq 256(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- leaq 224(%rsp),%rbx +- leaq 64(%rsp),%rdi +- call __ecp_nistz256_sub_fromq +- +- orq %r13,%r12 +- movdqa %xmm4,%xmm2 +- orq %r8,%r12 +- orq %r9,%r12 +- por %xmm5,%xmm2 +-.byte 102,73,15,110,220 +- +- movq 384(%rsp),%rax +- leaq 384(%rsp),%rbx +- movq 0+96(%rsp),%r9 +- movq 8+96(%rsp),%r10 +- leaq 0+96(%rsp),%rsi +- movq 16+96(%rsp),%r11 +- movq 24+96(%rsp),%r12 +- leaq 160(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 480(%rsp),%rax +- leaq 480(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq 0+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 192(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- leaq 160(%rsp),%rbx +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_sub_fromq +- +- orq %r13,%r12 +- orq %r8,%r12 +- orq %r9,%r12 +- +-.byte 102,73,15,126,208 +-.byte 102,73,15,126,217 +- orq %r8,%r12 +-.byte 0x3e +- jnz L$add_proceedq +- +- +- +- testq %r9,%r9 +- jz L$add_doubleq +- +- +- +- +- +- +-.byte 102,72,15,126,199 +- pxor %xmm0,%xmm0 +- movdqu %xmm0,0(%rdi) +- movdqu %xmm0,16(%rdi) +- movdqu %xmm0,32(%rdi) +- movdqu %xmm0,48(%rdi) +- movdqu %xmm0,64(%rdi) +- movdqu %xmm0,80(%rdi) +- jmp L$add_doneq +- +-.p2align 5 +-L$add_doubleq: +-.byte 102,72,15,126,206 +-.byte 102,72,15,126,199 +- addq $416,%rsp +- +- jmp L$point_double_shortcutq +- +- +-.p2align 5 +-L$add_proceedq: +- movq 0+64(%rsp),%rax +- movq 8+64(%rsp),%r14 +- leaq 0+64(%rsp),%rsi +- movq 16+64(%rsp),%r15 +- movq 24+64(%rsp),%r8 +- leaq 96(%rsp),%rdi +- call __ecp_nistz256_sqr_montq +- +- movq 448(%rsp),%rax +- leaq 448(%rsp),%rbx +- movq 0+0(%rsp),%r9 +- movq 8+0(%rsp),%r10 +- leaq 0+0(%rsp),%rsi +- movq 16+0(%rsp),%r11 +- movq 24+0(%rsp),%r12 +- leaq 352(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 0+0(%rsp),%rax +- movq 8+0(%rsp),%r14 +- leaq 0+0(%rsp),%rsi +- movq 16+0(%rsp),%r15 +- movq 24+0(%rsp),%r8 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_sqr_montq +- +- movq 544(%rsp),%rax +- leaq 544(%rsp),%rbx +- movq 0+352(%rsp),%r9 +- movq 8+352(%rsp),%r10 +- leaq 0+352(%rsp),%rsi +- movq 16+352(%rsp),%r11 +- movq 24+352(%rsp),%r12 +- leaq 352(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 0(%rsp),%rax +- leaq 0(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq 0+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 128(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 160(%rsp),%rax +- leaq 160(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq 0+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 192(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- +- +- +- xorq %r11,%r11 +- addq %r12,%r12 +- leaq 96(%rsp),%rsi +- adcq %r13,%r13 +- movq %r12,%rax +- adcq %r8,%r8 +- adcq %r9,%r9 +- movq %r13,%rbp +- adcq $0,%r11 +- +- subq $-1,%r12 +- movq %r8,%rcx +- sbbq %r14,%r13 +- sbbq $0,%r8 +- movq %r9,%r10 +- sbbq %r15,%r9 +- sbbq $0,%r11 +- +- cmovcq %rax,%r12 +- movq 0(%rsi),%rax +- cmovcq %rbp,%r13 +- movq 8(%rsi),%rbp +- cmovcq %rcx,%r8 +- movq 16(%rsi),%rcx +- cmovcq %r10,%r9 +- movq 24(%rsi),%r10 +- +- call __ecp_nistz256_subq +- +- leaq 128(%rsp),%rbx +- leaq 288(%rsp),%rdi +- call __ecp_nistz256_sub_fromq +- +- movq 192+0(%rsp),%rax +- movq 192+8(%rsp),%rbp +- movq 192+16(%rsp),%rcx +- movq 192+24(%rsp),%r10 +- leaq 320(%rsp),%rdi +- +- call __ecp_nistz256_subq +- +- movq %r12,0(%rdi) +- movq %r13,8(%rdi) +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- movq 128(%rsp),%rax +- leaq 128(%rsp),%rbx +- movq 0+224(%rsp),%r9 +- movq 8+224(%rsp),%r10 +- leaq 0+224(%rsp),%rsi +- movq 16+224(%rsp),%r11 +- movq 24+224(%rsp),%r12 +- leaq 256(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 320(%rsp),%rax +- leaq 320(%rsp),%rbx +- movq 0+64(%rsp),%r9 +- movq 8+64(%rsp),%r10 +- leaq 0+64(%rsp),%rsi +- movq 16+64(%rsp),%r11 +- movq 24+64(%rsp),%r12 +- leaq 320(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- leaq 256(%rsp),%rbx +- leaq 320(%rsp),%rdi +- call __ecp_nistz256_sub_fromq +- +-.byte 102,72,15,126,199 +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 352(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 352+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand 544(%rsp),%xmm2 +- pand 544+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 448(%rsp),%xmm2 +- pand 448+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,64(%rdi) +- movdqu %xmm3,80(%rdi) +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 288(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 288+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand 480(%rsp),%xmm2 +- pand 480+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 384(%rsp),%xmm2 +- pand 384+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,0(%rdi) +- movdqu %xmm3,16(%rdi) +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 320(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 320+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand 512(%rsp),%xmm2 +- pand 512+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 416(%rsp),%xmm2 +- pand 416+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,32(%rdi) +- movdqu %xmm3,48(%rdi) +- +-L$add_doneq: +- leaq 576+56(%rsp),%rsi +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbx +- +- movq -8(%rsi),%rbp +- +- leaq (%rsi),%rsp +- +-L$point_addq_epilogue: +- .byte 0xf3,0xc3 +- +- +-.globl _ecp_nistz256_point_add_affine +-.private_extern _ecp_nistz256_point_add_affine +- +-.p2align 5 +-_ecp_nistz256_point_add_affine: +- +- leaq _OPENSSL_ia32cap_P(%rip),%rcx +- movq 8(%rcx),%rcx +- andl $0x80100,%ecx +- cmpl $0x80100,%ecx +- je L$point_add_affinex +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- subq $480+8,%rsp +- +-L$add_affineq_body: +- +- movdqu 0(%rsi),%xmm0 +- movq %rdx,%rbx +- movdqu 16(%rsi),%xmm1 +- movdqu 32(%rsi),%xmm2 +- movdqu 48(%rsi),%xmm3 +- movdqu 64(%rsi),%xmm4 +- movdqu 80(%rsi),%xmm5 +- movq 64+0(%rsi),%rax +- movq 64+8(%rsi),%r14 +- movq 64+16(%rsi),%r15 +- movq 64+24(%rsi),%r8 +- movdqa %xmm0,320(%rsp) +- movdqa %xmm1,320+16(%rsp) +- movdqa %xmm2,352(%rsp) +- movdqa %xmm3,352+16(%rsp) +- movdqa %xmm4,384(%rsp) +- movdqa %xmm5,384+16(%rsp) +- por %xmm4,%xmm5 +- +- movdqu 0(%rbx),%xmm0 +- pshufd $0xb1,%xmm5,%xmm3 +- movdqu 16(%rbx),%xmm1 +- movdqu 32(%rbx),%xmm2 +- por %xmm3,%xmm5 +- movdqu 48(%rbx),%xmm3 +- movdqa %xmm0,416(%rsp) +- pshufd $0x1e,%xmm5,%xmm4 +- movdqa %xmm1,416+16(%rsp) +- por %xmm0,%xmm1 +-.byte 102,72,15,110,199 +- movdqa %xmm2,448(%rsp) +- movdqa %xmm3,448+16(%rsp) +- por %xmm2,%xmm3 +- por %xmm4,%xmm5 +- pxor %xmm4,%xmm4 +- por %xmm1,%xmm3 +- +- leaq 64-0(%rsi),%rsi +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_sqr_montq +- +- pcmpeqd %xmm4,%xmm5 +- pshufd $0xb1,%xmm3,%xmm4 +- movq 0(%rbx),%rax +- +- movq %r12,%r9 +- por %xmm3,%xmm4 +- pshufd $0,%xmm5,%xmm5 +- pshufd $0x1e,%xmm4,%xmm3 +- movq %r13,%r10 +- por %xmm3,%xmm4 +- pxor %xmm3,%xmm3 +- movq %r14,%r11 +- pcmpeqd %xmm3,%xmm4 +- pshufd $0,%xmm4,%xmm4 +- +- leaq 32-0(%rsp),%rsi +- movq %r15,%r12 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- leaq 320(%rsp),%rbx +- leaq 64(%rsp),%rdi +- call __ecp_nistz256_sub_fromq +- +- movq 384(%rsp),%rax +- leaq 384(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq 0+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 384(%rsp),%rax +- leaq 384(%rsp),%rbx +- movq 0+64(%rsp),%r9 +- movq 8+64(%rsp),%r10 +- leaq 0+64(%rsp),%rsi +- movq 16+64(%rsp),%r11 +- movq 24+64(%rsp),%r12 +- leaq 288(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 448(%rsp),%rax +- leaq 448(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq 0+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- leaq 352(%rsp),%rbx +- leaq 96(%rsp),%rdi +- call __ecp_nistz256_sub_fromq +- +- movq 0+64(%rsp),%rax +- movq 8+64(%rsp),%r14 +- leaq 0+64(%rsp),%rsi +- movq 16+64(%rsp),%r15 +- movq 24+64(%rsp),%r8 +- leaq 128(%rsp),%rdi +- call __ecp_nistz256_sqr_montq +- +- movq 0+96(%rsp),%rax +- movq 8+96(%rsp),%r14 +- leaq 0+96(%rsp),%rsi +- movq 16+96(%rsp),%r15 +- movq 24+96(%rsp),%r8 +- leaq 192(%rsp),%rdi +- call __ecp_nistz256_sqr_montq +- +- movq 128(%rsp),%rax +- leaq 128(%rsp),%rbx +- movq 0+64(%rsp),%r9 +- movq 8+64(%rsp),%r10 +- leaq 0+64(%rsp),%rsi +- movq 16+64(%rsp),%r11 +- movq 24+64(%rsp),%r12 +- leaq 160(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 320(%rsp),%rax +- leaq 320(%rsp),%rbx +- movq 0+128(%rsp),%r9 +- movq 8+128(%rsp),%r10 +- leaq 0+128(%rsp),%rsi +- movq 16+128(%rsp),%r11 +- movq 24+128(%rsp),%r12 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- +- +- +- xorq %r11,%r11 +- addq %r12,%r12 +- leaq 192(%rsp),%rsi +- adcq %r13,%r13 +- movq %r12,%rax +- adcq %r8,%r8 +- adcq %r9,%r9 +- movq %r13,%rbp +- adcq $0,%r11 +- +- subq $-1,%r12 +- movq %r8,%rcx +- sbbq %r14,%r13 +- sbbq $0,%r8 +- movq %r9,%r10 +- sbbq %r15,%r9 +- sbbq $0,%r11 +- +- cmovcq %rax,%r12 +- movq 0(%rsi),%rax +- cmovcq %rbp,%r13 +- movq 8(%rsi),%rbp +- cmovcq %rcx,%r8 +- movq 16(%rsi),%rcx +- cmovcq %r10,%r9 +- movq 24(%rsi),%r10 +- +- call __ecp_nistz256_subq +- +- leaq 160(%rsp),%rbx +- leaq 224(%rsp),%rdi +- call __ecp_nistz256_sub_fromq +- +- movq 0+0(%rsp),%rax +- movq 0+8(%rsp),%rbp +- movq 0+16(%rsp),%rcx +- movq 0+24(%rsp),%r10 +- leaq 64(%rsp),%rdi +- +- call __ecp_nistz256_subq +- +- movq %r12,0(%rdi) +- movq %r13,8(%rdi) +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- movq 352(%rsp),%rax +- leaq 352(%rsp),%rbx +- movq 0+160(%rsp),%r9 +- movq 8+160(%rsp),%r10 +- leaq 0+160(%rsp),%rsi +- movq 16+160(%rsp),%r11 +- movq 24+160(%rsp),%r12 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- movq 96(%rsp),%rax +- leaq 96(%rsp),%rbx +- movq 0+64(%rsp),%r9 +- movq 8+64(%rsp),%r10 +- leaq 0+64(%rsp),%rsi +- movq 16+64(%rsp),%r11 +- movq 24+64(%rsp),%r12 +- leaq 64(%rsp),%rdi +- call __ecp_nistz256_mul_montq +- +- leaq 32(%rsp),%rbx +- leaq 256(%rsp),%rdi +- call __ecp_nistz256_sub_fromq +- +-.byte 102,72,15,126,199 +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 288(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 288+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand L$ONE_mont(%rip),%xmm2 +- pand L$ONE_mont+16(%rip),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 384(%rsp),%xmm2 +- pand 384+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,64(%rdi) +- movdqu %xmm3,80(%rdi) +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 224(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 224+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand 416(%rsp),%xmm2 +- pand 416+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 320(%rsp),%xmm2 +- pand 320+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,0(%rdi) +- movdqu %xmm3,16(%rdi) +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 256(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 256+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand 448(%rsp),%xmm2 +- pand 448+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 352(%rsp),%xmm2 +- pand 352+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,32(%rdi) +- movdqu %xmm3,48(%rdi) +- +- leaq 480+56(%rsp),%rsi +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbx +- +- movq -8(%rsi),%rbp +- +- leaq (%rsi),%rsp +- +-L$add_affineq_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-__ecp_nistz256_add_tox: +- +- xorq %r11,%r11 +- adcq 0(%rbx),%r12 +- adcq 8(%rbx),%r13 +- movq %r12,%rax +- adcq 16(%rbx),%r8 +- adcq 24(%rbx),%r9 +- movq %r13,%rbp +- adcq $0,%r11 +- +- xorq %r10,%r10 +- sbbq $-1,%r12 +- movq %r8,%rcx +- sbbq %r14,%r13 +- sbbq $0,%r8 +- movq %r9,%r10 +- sbbq %r15,%r9 +- sbbq $0,%r11 +- +- cmovcq %rax,%r12 +- cmovcq %rbp,%r13 +- movq %r12,0(%rdi) +- cmovcq %rcx,%r8 +- movq %r13,8(%rdi) +- cmovcq %r10,%r9 +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-__ecp_nistz256_sub_fromx: +- +- xorq %r11,%r11 +- sbbq 0(%rbx),%r12 +- sbbq 8(%rbx),%r13 +- movq %r12,%rax +- sbbq 16(%rbx),%r8 +- sbbq 24(%rbx),%r9 +- movq %r13,%rbp +- sbbq $0,%r11 +- +- xorq %r10,%r10 +- adcq $-1,%r12 +- movq %r8,%rcx +- adcq %r14,%r13 +- adcq $0,%r8 +- movq %r9,%r10 +- adcq %r15,%r9 +- +- btq $0,%r11 +- cmovncq %rax,%r12 +- cmovncq %rbp,%r13 +- movq %r12,0(%rdi) +- cmovncq %rcx,%r8 +- movq %r13,8(%rdi) +- cmovncq %r10,%r9 +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-__ecp_nistz256_subx: +- +- xorq %r11,%r11 +- sbbq %r12,%rax +- sbbq %r13,%rbp +- movq %rax,%r12 +- sbbq %r8,%rcx +- sbbq %r9,%r10 +- movq %rbp,%r13 +- sbbq $0,%r11 +- +- xorq %r9,%r9 +- adcq $-1,%rax +- movq %rcx,%r8 +- adcq %r14,%rbp +- adcq $0,%rcx +- movq %r10,%r9 +- adcq %r15,%r10 +- +- btq $0,%r11 +- cmovcq %rax,%r12 +- cmovcq %rbp,%r13 +- cmovcq %rcx,%r8 +- cmovcq %r10,%r9 +- +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-__ecp_nistz256_mul_by_2x: +- +- xorq %r11,%r11 +- adcq %r12,%r12 +- adcq %r13,%r13 +- movq %r12,%rax +- adcq %r8,%r8 +- adcq %r9,%r9 +- movq %r13,%rbp +- adcq $0,%r11 +- +- xorq %r10,%r10 +- sbbq $-1,%r12 +- movq %r8,%rcx +- sbbq %r14,%r13 +- sbbq $0,%r8 +- movq %r9,%r10 +- sbbq %r15,%r9 +- sbbq $0,%r11 +- +- cmovcq %rax,%r12 +- cmovcq %rbp,%r13 +- movq %r12,0(%rdi) +- cmovcq %rcx,%r8 +- movq %r13,8(%rdi) +- cmovcq %r10,%r9 +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-ecp_nistz256_point_doublex: +- +-L$point_doublex: +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- subq $160+8,%rsp +- +-L$point_doublex_body: +- +-L$point_double_shortcutx: +- movdqu 0(%rsi),%xmm0 +- movq %rsi,%rbx +- movdqu 16(%rsi),%xmm1 +- movq 32+0(%rsi),%r12 +- movq 32+8(%rsi),%r13 +- movq 32+16(%rsi),%r8 +- movq 32+24(%rsi),%r9 +- movq L$poly+8(%rip),%r14 +- movq L$poly+24(%rip),%r15 +- movdqa %xmm0,96(%rsp) +- movdqa %xmm1,96+16(%rsp) +- leaq 32(%rdi),%r10 +- leaq 64(%rdi),%r11 +-.byte 102,72,15,110,199 +-.byte 102,73,15,110,202 +-.byte 102,73,15,110,211 +- +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_mul_by_2x +- +- movq 64+0(%rsi),%rdx +- movq 64+8(%rsi),%r14 +- movq 64+16(%rsi),%r15 +- movq 64+24(%rsi),%r8 +- leaq 64-128(%rsi),%rsi +- leaq 64(%rsp),%rdi +- call __ecp_nistz256_sqr_montx +- +- movq 0+0(%rsp),%rdx +- movq 8+0(%rsp),%r14 +- leaq -128+0(%rsp),%rsi +- movq 16+0(%rsp),%r15 +- movq 24+0(%rsp),%r8 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_sqr_montx +- +- movq 32(%rbx),%rdx +- movq 64+0(%rbx),%r9 +- movq 64+8(%rbx),%r10 +- movq 64+16(%rbx),%r11 +- movq 64+24(%rbx),%r12 +- leaq 64-128(%rbx),%rsi +- leaq 32(%rbx),%rbx +-.byte 102,72,15,126,215 +- call __ecp_nistz256_mul_montx +- call __ecp_nistz256_mul_by_2x +- +- movq 96+0(%rsp),%r12 +- movq 96+8(%rsp),%r13 +- leaq 64(%rsp),%rbx +- movq 96+16(%rsp),%r8 +- movq 96+24(%rsp),%r9 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_add_tox +- +- movq 96+0(%rsp),%r12 +- movq 96+8(%rsp),%r13 +- leaq 64(%rsp),%rbx +- movq 96+16(%rsp),%r8 +- movq 96+24(%rsp),%r9 +- leaq 64(%rsp),%rdi +- call __ecp_nistz256_sub_fromx +- +- movq 0+0(%rsp),%rdx +- movq 8+0(%rsp),%r14 +- leaq -128+0(%rsp),%rsi +- movq 16+0(%rsp),%r15 +- movq 24+0(%rsp),%r8 +-.byte 102,72,15,126,207 +- call __ecp_nistz256_sqr_montx +- xorq %r9,%r9 +- movq %r12,%rax +- addq $-1,%r12 +- movq %r13,%r10 +- adcq %rsi,%r13 +- movq %r14,%rcx +- adcq $0,%r14 +- movq %r15,%r8 +- adcq %rbp,%r15 +- adcq $0,%r9 +- xorq %rsi,%rsi +- testq $1,%rax +- +- cmovzq %rax,%r12 +- cmovzq %r10,%r13 +- cmovzq %rcx,%r14 +- cmovzq %r8,%r15 +- cmovzq %rsi,%r9 +- +- movq %r13,%rax +- shrq $1,%r12 +- shlq $63,%rax +- movq %r14,%r10 +- shrq $1,%r13 +- orq %rax,%r12 +- shlq $63,%r10 +- movq %r15,%rcx +- shrq $1,%r14 +- orq %r10,%r13 +- shlq $63,%rcx +- movq %r12,0(%rdi) +- shrq $1,%r15 +- movq %r13,8(%rdi) +- shlq $63,%r9 +- orq %rcx,%r14 +- orq %r9,%r15 +- movq %r14,16(%rdi) +- movq %r15,24(%rdi) +- movq 64(%rsp),%rdx +- leaq 64(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq -128+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- leaq 128(%rsp),%rdi +- call __ecp_nistz256_mul_by_2x +- +- leaq 32(%rsp),%rbx +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_add_tox +- +- movq 96(%rsp),%rdx +- leaq 96(%rsp),%rbx +- movq 0+0(%rsp),%r9 +- movq 8+0(%rsp),%r10 +- leaq -128+0(%rsp),%rsi +- movq 16+0(%rsp),%r11 +- movq 24+0(%rsp),%r12 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- leaq 128(%rsp),%rdi +- call __ecp_nistz256_mul_by_2x +- +- movq 0+32(%rsp),%rdx +- movq 8+32(%rsp),%r14 +- leaq -128+32(%rsp),%rsi +- movq 16+32(%rsp),%r15 +- movq 24+32(%rsp),%r8 +-.byte 102,72,15,126,199 +- call __ecp_nistz256_sqr_montx +- +- leaq 128(%rsp),%rbx +- movq %r14,%r8 +- movq %r15,%r9 +- movq %rsi,%r14 +- movq %rbp,%r15 +- call __ecp_nistz256_sub_fromx +- +- movq 0+0(%rsp),%rax +- movq 0+8(%rsp),%rbp +- movq 0+16(%rsp),%rcx +- movq 0+24(%rsp),%r10 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_subx +- +- movq 32(%rsp),%rdx +- leaq 32(%rsp),%rbx +- movq %r12,%r14 +- xorl %ecx,%ecx +- movq %r12,0+0(%rsp) +- movq %r13,%r10 +- movq %r13,0+8(%rsp) +- cmovzq %r8,%r11 +- movq %r8,0+16(%rsp) +- leaq 0-128(%rsp),%rsi +- cmovzq %r9,%r12 +- movq %r9,0+24(%rsp) +- movq %r14,%r9 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +-.byte 102,72,15,126,203 +-.byte 102,72,15,126,207 +- call __ecp_nistz256_sub_fromx +- +- leaq 160+56(%rsp),%rsi +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbx +- +- movq -8(%rsi),%rbp +- +- leaq (%rsi),%rsp +- +-L$point_doublex_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-ecp_nistz256_point_addx: +- +-L$point_addx: +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- subq $576+8,%rsp +- +-L$point_addx_body: +- +- movdqu 0(%rsi),%xmm0 +- movdqu 16(%rsi),%xmm1 +- movdqu 32(%rsi),%xmm2 +- movdqu 48(%rsi),%xmm3 +- movdqu 64(%rsi),%xmm4 +- movdqu 80(%rsi),%xmm5 +- movq %rsi,%rbx +- movq %rdx,%rsi +- movdqa %xmm0,384(%rsp) +- movdqa %xmm1,384+16(%rsp) +- movdqa %xmm2,416(%rsp) +- movdqa %xmm3,416+16(%rsp) +- movdqa %xmm4,448(%rsp) +- movdqa %xmm5,448+16(%rsp) +- por %xmm4,%xmm5 +- +- movdqu 0(%rsi),%xmm0 +- pshufd $0xb1,%xmm5,%xmm3 +- movdqu 16(%rsi),%xmm1 +- movdqu 32(%rsi),%xmm2 +- por %xmm3,%xmm5 +- movdqu 48(%rsi),%xmm3 +- movq 64+0(%rsi),%rdx +- movq 64+8(%rsi),%r14 +- movq 64+16(%rsi),%r15 +- movq 64+24(%rsi),%r8 +- movdqa %xmm0,480(%rsp) +- pshufd $0x1e,%xmm5,%xmm4 +- movdqa %xmm1,480+16(%rsp) +- movdqu 64(%rsi),%xmm0 +- movdqu 80(%rsi),%xmm1 +- movdqa %xmm2,512(%rsp) +- movdqa %xmm3,512+16(%rsp) +- por %xmm4,%xmm5 +- pxor %xmm4,%xmm4 +- por %xmm0,%xmm1 +-.byte 102,72,15,110,199 +- +- leaq 64-128(%rsi),%rsi +- movq %rdx,544+0(%rsp) +- movq %r14,544+8(%rsp) +- movq %r15,544+16(%rsp) +- movq %r8,544+24(%rsp) +- leaq 96(%rsp),%rdi +- call __ecp_nistz256_sqr_montx +- +- pcmpeqd %xmm4,%xmm5 +- pshufd $0xb1,%xmm1,%xmm4 +- por %xmm1,%xmm4 +- pshufd $0,%xmm5,%xmm5 +- pshufd $0x1e,%xmm4,%xmm3 +- por %xmm3,%xmm4 +- pxor %xmm3,%xmm3 +- pcmpeqd %xmm3,%xmm4 +- pshufd $0,%xmm4,%xmm4 +- movq 64+0(%rbx),%rdx +- movq 64+8(%rbx),%r14 +- movq 64+16(%rbx),%r15 +- movq 64+24(%rbx),%r8 +-.byte 102,72,15,110,203 +- +- leaq 64-128(%rbx),%rsi +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_sqr_montx +- +- movq 544(%rsp),%rdx +- leaq 544(%rsp),%rbx +- movq 0+96(%rsp),%r9 +- movq 8+96(%rsp),%r10 +- leaq -128+96(%rsp),%rsi +- movq 16+96(%rsp),%r11 +- movq 24+96(%rsp),%r12 +- leaq 224(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 448(%rsp),%rdx +- leaq 448(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq -128+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 256(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 416(%rsp),%rdx +- leaq 416(%rsp),%rbx +- movq 0+224(%rsp),%r9 +- movq 8+224(%rsp),%r10 +- leaq -128+224(%rsp),%rsi +- movq 16+224(%rsp),%r11 +- movq 24+224(%rsp),%r12 +- leaq 224(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 512(%rsp),%rdx +- leaq 512(%rsp),%rbx +- movq 0+256(%rsp),%r9 +- movq 8+256(%rsp),%r10 +- leaq -128+256(%rsp),%rsi +- movq 16+256(%rsp),%r11 +- movq 24+256(%rsp),%r12 +- leaq 256(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- leaq 224(%rsp),%rbx +- leaq 64(%rsp),%rdi +- call __ecp_nistz256_sub_fromx +- +- orq %r13,%r12 +- movdqa %xmm4,%xmm2 +- orq %r8,%r12 +- orq %r9,%r12 +- por %xmm5,%xmm2 +-.byte 102,73,15,110,220 +- +- movq 384(%rsp),%rdx +- leaq 384(%rsp),%rbx +- movq 0+96(%rsp),%r9 +- movq 8+96(%rsp),%r10 +- leaq -128+96(%rsp),%rsi +- movq 16+96(%rsp),%r11 +- movq 24+96(%rsp),%r12 +- leaq 160(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 480(%rsp),%rdx +- leaq 480(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq -128+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 192(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- leaq 160(%rsp),%rbx +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_sub_fromx +- +- orq %r13,%r12 +- orq %r8,%r12 +- orq %r9,%r12 +- +-.byte 102,73,15,126,208 +-.byte 102,73,15,126,217 +- orq %r8,%r12 +-.byte 0x3e +- jnz L$add_proceedx +- +- +- +- testq %r9,%r9 +- jz L$add_doublex +- +- +- +- +- +- +-.byte 102,72,15,126,199 +- pxor %xmm0,%xmm0 +- movdqu %xmm0,0(%rdi) +- movdqu %xmm0,16(%rdi) +- movdqu %xmm0,32(%rdi) +- movdqu %xmm0,48(%rdi) +- movdqu %xmm0,64(%rdi) +- movdqu %xmm0,80(%rdi) +- jmp L$add_donex +- +-.p2align 5 +-L$add_doublex: +-.byte 102,72,15,126,206 +-.byte 102,72,15,126,199 +- addq $416,%rsp +- +- jmp L$point_double_shortcutx +- +- +-.p2align 5 +-L$add_proceedx: +- movq 0+64(%rsp),%rdx +- movq 8+64(%rsp),%r14 +- leaq -128+64(%rsp),%rsi +- movq 16+64(%rsp),%r15 +- movq 24+64(%rsp),%r8 +- leaq 96(%rsp),%rdi +- call __ecp_nistz256_sqr_montx +- +- movq 448(%rsp),%rdx +- leaq 448(%rsp),%rbx +- movq 0+0(%rsp),%r9 +- movq 8+0(%rsp),%r10 +- leaq -128+0(%rsp),%rsi +- movq 16+0(%rsp),%r11 +- movq 24+0(%rsp),%r12 +- leaq 352(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 0+0(%rsp),%rdx +- movq 8+0(%rsp),%r14 +- leaq -128+0(%rsp),%rsi +- movq 16+0(%rsp),%r15 +- movq 24+0(%rsp),%r8 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_sqr_montx +- +- movq 544(%rsp),%rdx +- leaq 544(%rsp),%rbx +- movq 0+352(%rsp),%r9 +- movq 8+352(%rsp),%r10 +- leaq -128+352(%rsp),%rsi +- movq 16+352(%rsp),%r11 +- movq 24+352(%rsp),%r12 +- leaq 352(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 0(%rsp),%rdx +- leaq 0(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq -128+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 128(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 160(%rsp),%rdx +- leaq 160(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq -128+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 192(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- +- +- +- xorq %r11,%r11 +- addq %r12,%r12 +- leaq 96(%rsp),%rsi +- adcq %r13,%r13 +- movq %r12,%rax +- adcq %r8,%r8 +- adcq %r9,%r9 +- movq %r13,%rbp +- adcq $0,%r11 +- +- subq $-1,%r12 +- movq %r8,%rcx +- sbbq %r14,%r13 +- sbbq $0,%r8 +- movq %r9,%r10 +- sbbq %r15,%r9 +- sbbq $0,%r11 +- +- cmovcq %rax,%r12 +- movq 0(%rsi),%rax +- cmovcq %rbp,%r13 +- movq 8(%rsi),%rbp +- cmovcq %rcx,%r8 +- movq 16(%rsi),%rcx +- cmovcq %r10,%r9 +- movq 24(%rsi),%r10 +- +- call __ecp_nistz256_subx +- +- leaq 128(%rsp),%rbx +- leaq 288(%rsp),%rdi +- call __ecp_nistz256_sub_fromx +- +- movq 192+0(%rsp),%rax +- movq 192+8(%rsp),%rbp +- movq 192+16(%rsp),%rcx +- movq 192+24(%rsp),%r10 +- leaq 320(%rsp),%rdi +- +- call __ecp_nistz256_subx +- +- movq %r12,0(%rdi) +- movq %r13,8(%rdi) +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- movq 128(%rsp),%rdx +- leaq 128(%rsp),%rbx +- movq 0+224(%rsp),%r9 +- movq 8+224(%rsp),%r10 +- leaq -128+224(%rsp),%rsi +- movq 16+224(%rsp),%r11 +- movq 24+224(%rsp),%r12 +- leaq 256(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 320(%rsp),%rdx +- leaq 320(%rsp),%rbx +- movq 0+64(%rsp),%r9 +- movq 8+64(%rsp),%r10 +- leaq -128+64(%rsp),%rsi +- movq 16+64(%rsp),%r11 +- movq 24+64(%rsp),%r12 +- leaq 320(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- leaq 256(%rsp),%rbx +- leaq 320(%rsp),%rdi +- call __ecp_nistz256_sub_fromx +- +-.byte 102,72,15,126,199 +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 352(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 352+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand 544(%rsp),%xmm2 +- pand 544+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 448(%rsp),%xmm2 +- pand 448+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,64(%rdi) +- movdqu %xmm3,80(%rdi) +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 288(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 288+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand 480(%rsp),%xmm2 +- pand 480+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 384(%rsp),%xmm2 +- pand 384+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,0(%rdi) +- movdqu %xmm3,16(%rdi) +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 320(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 320+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand 512(%rsp),%xmm2 +- pand 512+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 416(%rsp),%xmm2 +- pand 416+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,32(%rdi) +- movdqu %xmm3,48(%rdi) +- +-L$add_donex: +- leaq 576+56(%rsp),%rsi +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbx +- +- movq -8(%rsi),%rbp +- +- leaq (%rsi),%rsp +- +-L$point_addx_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-ecp_nistz256_point_add_affinex: +- +-L$point_add_affinex: +- pushq %rbp +- +- pushq %rbx +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- subq $480+8,%rsp +- +-L$add_affinex_body: +- +- movdqu 0(%rsi),%xmm0 +- movq %rdx,%rbx +- movdqu 16(%rsi),%xmm1 +- movdqu 32(%rsi),%xmm2 +- movdqu 48(%rsi),%xmm3 +- movdqu 64(%rsi),%xmm4 +- movdqu 80(%rsi),%xmm5 +- movq 64+0(%rsi),%rdx +- movq 64+8(%rsi),%r14 +- movq 64+16(%rsi),%r15 +- movq 64+24(%rsi),%r8 +- movdqa %xmm0,320(%rsp) +- movdqa %xmm1,320+16(%rsp) +- movdqa %xmm2,352(%rsp) +- movdqa %xmm3,352+16(%rsp) +- movdqa %xmm4,384(%rsp) +- movdqa %xmm5,384+16(%rsp) +- por %xmm4,%xmm5 +- +- movdqu 0(%rbx),%xmm0 +- pshufd $0xb1,%xmm5,%xmm3 +- movdqu 16(%rbx),%xmm1 +- movdqu 32(%rbx),%xmm2 +- por %xmm3,%xmm5 +- movdqu 48(%rbx),%xmm3 +- movdqa %xmm0,416(%rsp) +- pshufd $0x1e,%xmm5,%xmm4 +- movdqa %xmm1,416+16(%rsp) +- por %xmm0,%xmm1 +-.byte 102,72,15,110,199 +- movdqa %xmm2,448(%rsp) +- movdqa %xmm3,448+16(%rsp) +- por %xmm2,%xmm3 +- por %xmm4,%xmm5 +- pxor %xmm4,%xmm4 +- por %xmm1,%xmm3 +- +- leaq 64-128(%rsi),%rsi +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_sqr_montx +- +- pcmpeqd %xmm4,%xmm5 +- pshufd $0xb1,%xmm3,%xmm4 +- movq 0(%rbx),%rdx +- +- movq %r12,%r9 +- por %xmm3,%xmm4 +- pshufd $0,%xmm5,%xmm5 +- pshufd $0x1e,%xmm4,%xmm3 +- movq %r13,%r10 +- por %xmm3,%xmm4 +- pxor %xmm3,%xmm3 +- movq %r14,%r11 +- pcmpeqd %xmm3,%xmm4 +- pshufd $0,%xmm4,%xmm4 +- +- leaq 32-128(%rsp),%rsi +- movq %r15,%r12 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- leaq 320(%rsp),%rbx +- leaq 64(%rsp),%rdi +- call __ecp_nistz256_sub_fromx +- +- movq 384(%rsp),%rdx +- leaq 384(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq -128+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 384(%rsp),%rdx +- leaq 384(%rsp),%rbx +- movq 0+64(%rsp),%r9 +- movq 8+64(%rsp),%r10 +- leaq -128+64(%rsp),%rsi +- movq 16+64(%rsp),%r11 +- movq 24+64(%rsp),%r12 +- leaq 288(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 448(%rsp),%rdx +- leaq 448(%rsp),%rbx +- movq 0+32(%rsp),%r9 +- movq 8+32(%rsp),%r10 +- leaq -128+32(%rsp),%rsi +- movq 16+32(%rsp),%r11 +- movq 24+32(%rsp),%r12 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- leaq 352(%rsp),%rbx +- leaq 96(%rsp),%rdi +- call __ecp_nistz256_sub_fromx +- +- movq 0+64(%rsp),%rdx +- movq 8+64(%rsp),%r14 +- leaq -128+64(%rsp),%rsi +- movq 16+64(%rsp),%r15 +- movq 24+64(%rsp),%r8 +- leaq 128(%rsp),%rdi +- call __ecp_nistz256_sqr_montx +- +- movq 0+96(%rsp),%rdx +- movq 8+96(%rsp),%r14 +- leaq -128+96(%rsp),%rsi +- movq 16+96(%rsp),%r15 +- movq 24+96(%rsp),%r8 +- leaq 192(%rsp),%rdi +- call __ecp_nistz256_sqr_montx +- +- movq 128(%rsp),%rdx +- leaq 128(%rsp),%rbx +- movq 0+64(%rsp),%r9 +- movq 8+64(%rsp),%r10 +- leaq -128+64(%rsp),%rsi +- movq 16+64(%rsp),%r11 +- movq 24+64(%rsp),%r12 +- leaq 160(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 320(%rsp),%rdx +- leaq 320(%rsp),%rbx +- movq 0+128(%rsp),%r9 +- movq 8+128(%rsp),%r10 +- leaq -128+128(%rsp),%rsi +- movq 16+128(%rsp),%r11 +- movq 24+128(%rsp),%r12 +- leaq 0(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- +- +- +- xorq %r11,%r11 +- addq %r12,%r12 +- leaq 192(%rsp),%rsi +- adcq %r13,%r13 +- movq %r12,%rax +- adcq %r8,%r8 +- adcq %r9,%r9 +- movq %r13,%rbp +- adcq $0,%r11 +- +- subq $-1,%r12 +- movq %r8,%rcx +- sbbq %r14,%r13 +- sbbq $0,%r8 +- movq %r9,%r10 +- sbbq %r15,%r9 +- sbbq $0,%r11 +- +- cmovcq %rax,%r12 +- movq 0(%rsi),%rax +- cmovcq %rbp,%r13 +- movq 8(%rsi),%rbp +- cmovcq %rcx,%r8 +- movq 16(%rsi),%rcx +- cmovcq %r10,%r9 +- movq 24(%rsi),%r10 +- +- call __ecp_nistz256_subx +- +- leaq 160(%rsp),%rbx +- leaq 224(%rsp),%rdi +- call __ecp_nistz256_sub_fromx +- +- movq 0+0(%rsp),%rax +- movq 0+8(%rsp),%rbp +- movq 0+16(%rsp),%rcx +- movq 0+24(%rsp),%r10 +- leaq 64(%rsp),%rdi +- +- call __ecp_nistz256_subx +- +- movq %r12,0(%rdi) +- movq %r13,8(%rdi) +- movq %r8,16(%rdi) +- movq %r9,24(%rdi) +- movq 352(%rsp),%rdx +- leaq 352(%rsp),%rbx +- movq 0+160(%rsp),%r9 +- movq 8+160(%rsp),%r10 +- leaq -128+160(%rsp),%rsi +- movq 16+160(%rsp),%r11 +- movq 24+160(%rsp),%r12 +- leaq 32(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- movq 96(%rsp),%rdx +- leaq 96(%rsp),%rbx +- movq 0+64(%rsp),%r9 +- movq 8+64(%rsp),%r10 +- leaq -128+64(%rsp),%rsi +- movq 16+64(%rsp),%r11 +- movq 24+64(%rsp),%r12 +- leaq 64(%rsp),%rdi +- call __ecp_nistz256_mul_montx +- +- leaq 32(%rsp),%rbx +- leaq 256(%rsp),%rdi +- call __ecp_nistz256_sub_fromx +- +-.byte 102,72,15,126,199 +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 288(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 288+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand L$ONE_mont(%rip),%xmm2 +- pand L$ONE_mont+16(%rip),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 384(%rsp),%xmm2 +- pand 384+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,64(%rdi) +- movdqu %xmm3,80(%rdi) +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 224(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 224+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand 416(%rsp),%xmm2 +- pand 416+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 320(%rsp),%xmm2 +- pand 320+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,0(%rdi) +- movdqu %xmm3,16(%rdi) +- +- movdqa %xmm5,%xmm0 +- movdqa %xmm5,%xmm1 +- pandn 256(%rsp),%xmm0 +- movdqa %xmm5,%xmm2 +- pandn 256+16(%rsp),%xmm1 +- movdqa %xmm5,%xmm3 +- pand 448(%rsp),%xmm2 +- pand 448+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- +- movdqa %xmm4,%xmm0 +- movdqa %xmm4,%xmm1 +- pandn %xmm2,%xmm0 +- movdqa %xmm4,%xmm2 +- pandn %xmm3,%xmm1 +- movdqa %xmm4,%xmm3 +- pand 352(%rsp),%xmm2 +- pand 352+16(%rsp),%xmm3 +- por %xmm0,%xmm2 +- por %xmm1,%xmm3 +- movdqu %xmm2,32(%rdi) +- movdqu %xmm3,48(%rdi) +- +- leaq 480+56(%rsp),%rsi +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbx +- +- movq -8(%rsi),%rbp +- +- leaq (%rsi),%rsp +- +-L$add_affinex_epilogue: +- .byte 0xf3,0xc3 +- +- +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S b/mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S +deleted file mode 100644 +index ae7293a..0000000 +--- a/mac-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S ++++ /dev/null +@@ -1,328 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +-.private_extern _beeu_mod_inverse_vartime +-.globl _beeu_mod_inverse_vartime +-.private_extern _beeu_mod_inverse_vartime +-.p2align 5 +-_beeu_mod_inverse_vartime: +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- pushq %rbx +- +- pushq %rsi +- +- +- subq $80,%rsp +- +- movq %rdi,0(%rsp) +- +- +- movq $1,%r8 +- xorq %r9,%r9 +- xorq %r10,%r10 +- xorq %r11,%r11 +- xorq %rdi,%rdi +- +- xorq %r12,%r12 +- xorq %r13,%r13 +- xorq %r14,%r14 +- xorq %r15,%r15 +- xorq %rbp,%rbp +- +- +- vmovdqu 0(%rsi),%xmm0 +- vmovdqu 16(%rsi),%xmm1 +- vmovdqu %xmm0,48(%rsp) +- vmovdqu %xmm1,64(%rsp) +- +- vmovdqu 0(%rdx),%xmm0 +- vmovdqu 16(%rdx),%xmm1 +- vmovdqu %xmm0,16(%rsp) +- vmovdqu %xmm1,32(%rsp) +- +-L$beeu_loop: +- xorq %rbx,%rbx +- orq 48(%rsp),%rbx +- orq 56(%rsp),%rbx +- orq 64(%rsp),%rbx +- orq 72(%rsp),%rbx +- jz L$beeu_loop_end +- +- +- +- +- +- +- +- +- +- +- movq $1,%rcx +- +- +-L$beeu_shift_loop_XB: +- movq %rcx,%rbx +- andq 48(%rsp),%rbx +- jnz L$beeu_shift_loop_end_XB +- +- +- movq $1,%rbx +- andq %r8,%rbx +- jz L$shift1_0 +- addq 0(%rdx),%r8 +- adcq 8(%rdx),%r9 +- adcq 16(%rdx),%r10 +- adcq 24(%rdx),%r11 +- adcq $0,%rdi +- +-L$shift1_0: +- shrdq $1,%r9,%r8 +- shrdq $1,%r10,%r9 +- shrdq $1,%r11,%r10 +- shrdq $1,%rdi,%r11 +- shrq $1,%rdi +- +- shlq $1,%rcx +- +- +- +- +- +- cmpq $0x8000000,%rcx +- jne L$beeu_shift_loop_XB +- +-L$beeu_shift_loop_end_XB: +- bsfq %rcx,%rcx +- testq %rcx,%rcx +- jz L$beeu_no_shift_XB +- +- +- +- movq 8+48(%rsp),%rax +- movq 16+48(%rsp),%rbx +- movq 24+48(%rsp),%rsi +- +- shrdq %cl,%rax,0+48(%rsp) +- shrdq %cl,%rbx,8+48(%rsp) +- shrdq %cl,%rsi,16+48(%rsp) +- +- shrq %cl,%rsi +- movq %rsi,24+48(%rsp) +- +- +-L$beeu_no_shift_XB: +- +- movq $1,%rcx +- +- +-L$beeu_shift_loop_YA: +- movq %rcx,%rbx +- andq 16(%rsp),%rbx +- jnz L$beeu_shift_loop_end_YA +- +- +- movq $1,%rbx +- andq %r12,%rbx +- jz L$shift1_1 +- addq 0(%rdx),%r12 +- adcq 8(%rdx),%r13 +- adcq 16(%rdx),%r14 +- adcq 24(%rdx),%r15 +- adcq $0,%rbp +- +-L$shift1_1: +- shrdq $1,%r13,%r12 +- shrdq $1,%r14,%r13 +- shrdq $1,%r15,%r14 +- shrdq $1,%rbp,%r15 +- shrq $1,%rbp +- +- shlq $1,%rcx +- +- +- +- +- +- cmpq $0x8000000,%rcx +- jne L$beeu_shift_loop_YA +- +-L$beeu_shift_loop_end_YA: +- bsfq %rcx,%rcx +- testq %rcx,%rcx +- jz L$beeu_no_shift_YA +- +- +- +- movq 8+16(%rsp),%rax +- movq 16+16(%rsp),%rbx +- movq 24+16(%rsp),%rsi +- +- shrdq %cl,%rax,0+16(%rsp) +- shrdq %cl,%rbx,8+16(%rsp) +- shrdq %cl,%rsi,16+16(%rsp) +- +- shrq %cl,%rsi +- movq %rsi,24+16(%rsp) +- +- +-L$beeu_no_shift_YA: +- +- movq 48(%rsp),%rax +- movq 56(%rsp),%rbx +- movq 64(%rsp),%rsi +- movq 72(%rsp),%rcx +- subq 16(%rsp),%rax +- sbbq 24(%rsp),%rbx +- sbbq 32(%rsp),%rsi +- sbbq 40(%rsp),%rcx +- jnc L$beeu_B_bigger_than_A +- +- +- movq 16(%rsp),%rax +- movq 24(%rsp),%rbx +- movq 32(%rsp),%rsi +- movq 40(%rsp),%rcx +- subq 48(%rsp),%rax +- sbbq 56(%rsp),%rbx +- sbbq 64(%rsp),%rsi +- sbbq 72(%rsp),%rcx +- movq %rax,16(%rsp) +- movq %rbx,24(%rsp) +- movq %rsi,32(%rsp) +- movq %rcx,40(%rsp) +- +- +- addq %r8,%r12 +- adcq %r9,%r13 +- adcq %r10,%r14 +- adcq %r11,%r15 +- adcq %rdi,%rbp +- jmp L$beeu_loop +- +-L$beeu_B_bigger_than_A: +- +- movq %rax,48(%rsp) +- movq %rbx,56(%rsp) +- movq %rsi,64(%rsp) +- movq %rcx,72(%rsp) +- +- +- addq %r12,%r8 +- adcq %r13,%r9 +- adcq %r14,%r10 +- adcq %r15,%r11 +- adcq %rbp,%rdi +- +- jmp L$beeu_loop +- +-L$beeu_loop_end: +- +- +- +- +- movq 16(%rsp),%rbx +- subq $1,%rbx +- orq 24(%rsp),%rbx +- orq 32(%rsp),%rbx +- orq 40(%rsp),%rbx +- +- jnz L$beeu_err +- +- +- +- +- movq 0(%rdx),%r8 +- movq 8(%rdx),%r9 +- movq 16(%rdx),%r10 +- movq 24(%rdx),%r11 +- xorq %rdi,%rdi +- +-L$beeu_reduction_loop: +- movq %r12,16(%rsp) +- movq %r13,24(%rsp) +- movq %r14,32(%rsp) +- movq %r15,40(%rsp) +- movq %rbp,48(%rsp) +- +- +- subq %r8,%r12 +- sbbq %r9,%r13 +- sbbq %r10,%r14 +- sbbq %r11,%r15 +- sbbq $0,%rbp +- +- +- cmovcq 16(%rsp),%r12 +- cmovcq 24(%rsp),%r13 +- cmovcq 32(%rsp),%r14 +- cmovcq 40(%rsp),%r15 +- jnc L$beeu_reduction_loop +- +- +- subq %r12,%r8 +- sbbq %r13,%r9 +- sbbq %r14,%r10 +- sbbq %r15,%r11 +- +-L$beeu_save: +- +- movq 0(%rsp),%rdi +- +- movq %r8,0(%rdi) +- movq %r9,8(%rdi) +- movq %r10,16(%rdi) +- movq %r11,24(%rdi) +- +- +- movq $1,%rax +- jmp L$beeu_finish +- +-L$beeu_err: +- +- xorq %rax,%rax +- +-L$beeu_finish: +- addq $80,%rsp +- +- popq %rsi +- +- popq %rbx +- +- popq %r15 +- +- popq %r14 +- +- popq %r13 +- +- popq %r12 +- +- popq %rbp +- +- .byte 0xf3,0xc3 +- +- +- +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S b/mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S +deleted file mode 100644 +index 664c067..0000000 +--- a/mac-x86_64/crypto/fipsmodule/rdrand-x86_64.S ++++ /dev/null +@@ -1,62 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +- +- +-.globl _CRYPTO_rdrand +-.private_extern _CRYPTO_rdrand +- +-.p2align 4 +-_CRYPTO_rdrand: +- +- xorq %rax,%rax +-.byte 72,15,199,242 +- +- adcq %rax,%rax +- movq %rdx,0(%rdi) +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +-.globl _CRYPTO_rdrand_multiple8_buf +-.private_extern _CRYPTO_rdrand_multiple8_buf +- +-.p2align 4 +-_CRYPTO_rdrand_multiple8_buf: +- +- testq %rsi,%rsi +- jz L$out +- movq $8,%rdx +-L$loop: +-.byte 72,15,199,241 +- jnc L$err +- movq %rcx,0(%rdi) +- addq %rdx,%rdi +- subq %rdx,%rsi +- jnz L$loop +-L$out: +- movq $1,%rax +- .byte 0xf3,0xc3 +-L$err: +- xorq %rax,%rax +- .byte 0xf3,0xc3 +- +- +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/rsaz-avx2.S b/mac-x86_64/crypto/fipsmodule/rsaz-avx2.S +deleted file mode 100644 +index bebc699..0000000 +--- a/mac-x86_64/crypto/fipsmodule/rsaz-avx2.S ++++ /dev/null +@@ -1,1748 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +-.globl _rsaz_1024_sqr_avx2 +-.private_extern _rsaz_1024_sqr_avx2 +- +-.p2align 6 +-_rsaz_1024_sqr_avx2: +- +- leaq (%rsp),%rax +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- vzeroupper +- movq %rax,%rbp +- +- movq %rdx,%r13 +- subq $832,%rsp +- movq %r13,%r15 +- subq $-128,%rdi +- subq $-128,%rsi +- subq $-128,%r13 +- +- andq $4095,%r15 +- addq $320,%r15 +- shrq $12,%r15 +- vpxor %ymm9,%ymm9,%ymm9 +- jz L$sqr_1024_no_n_copy +- +- +- +- +- +- subq $320,%rsp +- vmovdqu 0-128(%r13),%ymm0 +- andq $-2048,%rsp +- vmovdqu 32-128(%r13),%ymm1 +- vmovdqu 64-128(%r13),%ymm2 +- vmovdqu 96-128(%r13),%ymm3 +- vmovdqu 128-128(%r13),%ymm4 +- vmovdqu 160-128(%r13),%ymm5 +- vmovdqu 192-128(%r13),%ymm6 +- vmovdqu 224-128(%r13),%ymm7 +- vmovdqu 256-128(%r13),%ymm8 +- leaq 832+128(%rsp),%r13 +- vmovdqu %ymm0,0-128(%r13) +- vmovdqu %ymm1,32-128(%r13) +- vmovdqu %ymm2,64-128(%r13) +- vmovdqu %ymm3,96-128(%r13) +- vmovdqu %ymm4,128-128(%r13) +- vmovdqu %ymm5,160-128(%r13) +- vmovdqu %ymm6,192-128(%r13) +- vmovdqu %ymm7,224-128(%r13) +- vmovdqu %ymm8,256-128(%r13) +- vmovdqu %ymm9,288-128(%r13) +- +-L$sqr_1024_no_n_copy: +- andq $-1024,%rsp +- +- vmovdqu 32-128(%rsi),%ymm1 +- vmovdqu 64-128(%rsi),%ymm2 +- vmovdqu 96-128(%rsi),%ymm3 +- vmovdqu 128-128(%rsi),%ymm4 +- vmovdqu 160-128(%rsi),%ymm5 +- vmovdqu 192-128(%rsi),%ymm6 +- vmovdqu 224-128(%rsi),%ymm7 +- vmovdqu 256-128(%rsi),%ymm8 +- +- leaq 192(%rsp),%rbx +- vmovdqu L$and_mask(%rip),%ymm15 +- jmp L$OOP_GRANDE_SQR_1024 +- +-.p2align 5 +-L$OOP_GRANDE_SQR_1024: +- leaq 576+128(%rsp),%r9 +- leaq 448(%rsp),%r12 +- +- +- +- +- vpaddq %ymm1,%ymm1,%ymm1 +- vpbroadcastq 0-128(%rsi),%ymm10 +- vpaddq %ymm2,%ymm2,%ymm2 +- vmovdqa %ymm1,0-128(%r9) +- vpaddq %ymm3,%ymm3,%ymm3 +- vmovdqa %ymm2,32-128(%r9) +- vpaddq %ymm4,%ymm4,%ymm4 +- vmovdqa %ymm3,64-128(%r9) +- vpaddq %ymm5,%ymm5,%ymm5 +- vmovdqa %ymm4,96-128(%r9) +- vpaddq %ymm6,%ymm6,%ymm6 +- vmovdqa %ymm5,128-128(%r9) +- vpaddq %ymm7,%ymm7,%ymm7 +- vmovdqa %ymm6,160-128(%r9) +- vpaddq %ymm8,%ymm8,%ymm8 +- vmovdqa %ymm7,192-128(%r9) +- vpxor %ymm9,%ymm9,%ymm9 +- vmovdqa %ymm8,224-128(%r9) +- +- vpmuludq 0-128(%rsi),%ymm10,%ymm0 +- vpbroadcastq 32-128(%rsi),%ymm11 +- vmovdqu %ymm9,288-192(%rbx) +- vpmuludq %ymm10,%ymm1,%ymm1 +- vmovdqu %ymm9,320-448(%r12) +- vpmuludq %ymm10,%ymm2,%ymm2 +- vmovdqu %ymm9,352-448(%r12) +- vpmuludq %ymm10,%ymm3,%ymm3 +- vmovdqu %ymm9,384-448(%r12) +- vpmuludq %ymm10,%ymm4,%ymm4 +- vmovdqu %ymm9,416-448(%r12) +- vpmuludq %ymm10,%ymm5,%ymm5 +- vmovdqu %ymm9,448-448(%r12) +- vpmuludq %ymm10,%ymm6,%ymm6 +- vmovdqu %ymm9,480-448(%r12) +- vpmuludq %ymm10,%ymm7,%ymm7 +- vmovdqu %ymm9,512-448(%r12) +- vpmuludq %ymm10,%ymm8,%ymm8 +- vpbroadcastq 64-128(%rsi),%ymm10 +- vmovdqu %ymm9,544-448(%r12) +- +- movq %rsi,%r15 +- movl $4,%r14d +- jmp L$sqr_entry_1024 +-.p2align 5 +-L$OOP_SQR_1024: +- vpbroadcastq 32-128(%r15),%ymm11 +- vpmuludq 0-128(%rsi),%ymm10,%ymm0 +- vpaddq 0-192(%rbx),%ymm0,%ymm0 +- vpmuludq 0-128(%r9),%ymm10,%ymm1 +- vpaddq 32-192(%rbx),%ymm1,%ymm1 +- vpmuludq 32-128(%r9),%ymm10,%ymm2 +- vpaddq 64-192(%rbx),%ymm2,%ymm2 +- vpmuludq 64-128(%r9),%ymm10,%ymm3 +- vpaddq 96-192(%rbx),%ymm3,%ymm3 +- vpmuludq 96-128(%r9),%ymm10,%ymm4 +- vpaddq 128-192(%rbx),%ymm4,%ymm4 +- vpmuludq 128-128(%r9),%ymm10,%ymm5 +- vpaddq 160-192(%rbx),%ymm5,%ymm5 +- vpmuludq 160-128(%r9),%ymm10,%ymm6 +- vpaddq 192-192(%rbx),%ymm6,%ymm6 +- vpmuludq 192-128(%r9),%ymm10,%ymm7 +- vpaddq 224-192(%rbx),%ymm7,%ymm7 +- vpmuludq 224-128(%r9),%ymm10,%ymm8 +- vpbroadcastq 64-128(%r15),%ymm10 +- vpaddq 256-192(%rbx),%ymm8,%ymm8 +-L$sqr_entry_1024: +- vmovdqu %ymm0,0-192(%rbx) +- vmovdqu %ymm1,32-192(%rbx) +- +- vpmuludq 32-128(%rsi),%ymm11,%ymm12 +- vpaddq %ymm12,%ymm2,%ymm2 +- vpmuludq 32-128(%r9),%ymm11,%ymm14 +- vpaddq %ymm14,%ymm3,%ymm3 +- vpmuludq 64-128(%r9),%ymm11,%ymm13 +- vpaddq %ymm13,%ymm4,%ymm4 +- vpmuludq 96-128(%r9),%ymm11,%ymm12 +- vpaddq %ymm12,%ymm5,%ymm5 +- vpmuludq 128-128(%r9),%ymm11,%ymm14 +- vpaddq %ymm14,%ymm6,%ymm6 +- vpmuludq 160-128(%r9),%ymm11,%ymm13 +- vpaddq %ymm13,%ymm7,%ymm7 +- vpmuludq 192-128(%r9),%ymm11,%ymm12 +- vpaddq %ymm12,%ymm8,%ymm8 +- vpmuludq 224-128(%r9),%ymm11,%ymm0 +- vpbroadcastq 96-128(%r15),%ymm11 +- vpaddq 288-192(%rbx),%ymm0,%ymm0 +- +- vmovdqu %ymm2,64-192(%rbx) +- vmovdqu %ymm3,96-192(%rbx) +- +- vpmuludq 64-128(%rsi),%ymm10,%ymm13 +- vpaddq %ymm13,%ymm4,%ymm4 +- vpmuludq 64-128(%r9),%ymm10,%ymm12 +- vpaddq %ymm12,%ymm5,%ymm5 +- vpmuludq 96-128(%r9),%ymm10,%ymm14 +- vpaddq %ymm14,%ymm6,%ymm6 +- vpmuludq 128-128(%r9),%ymm10,%ymm13 +- vpaddq %ymm13,%ymm7,%ymm7 +- vpmuludq 160-128(%r9),%ymm10,%ymm12 +- vpaddq %ymm12,%ymm8,%ymm8 +- vpmuludq 192-128(%r9),%ymm10,%ymm14 +- vpaddq %ymm14,%ymm0,%ymm0 +- vpmuludq 224-128(%r9),%ymm10,%ymm1 +- vpbroadcastq 128-128(%r15),%ymm10 +- vpaddq 320-448(%r12),%ymm1,%ymm1 +- +- vmovdqu %ymm4,128-192(%rbx) +- vmovdqu %ymm5,160-192(%rbx) +- +- vpmuludq 96-128(%rsi),%ymm11,%ymm12 +- vpaddq %ymm12,%ymm6,%ymm6 +- vpmuludq 96-128(%r9),%ymm11,%ymm14 +- vpaddq %ymm14,%ymm7,%ymm7 +- vpmuludq 128-128(%r9),%ymm11,%ymm13 +- vpaddq %ymm13,%ymm8,%ymm8 +- vpmuludq 160-128(%r9),%ymm11,%ymm12 +- vpaddq %ymm12,%ymm0,%ymm0 +- vpmuludq 192-128(%r9),%ymm11,%ymm14 +- vpaddq %ymm14,%ymm1,%ymm1 +- vpmuludq 224-128(%r9),%ymm11,%ymm2 +- vpbroadcastq 160-128(%r15),%ymm11 +- vpaddq 352-448(%r12),%ymm2,%ymm2 +- +- vmovdqu %ymm6,192-192(%rbx) +- vmovdqu %ymm7,224-192(%rbx) +- +- vpmuludq 128-128(%rsi),%ymm10,%ymm12 +- vpaddq %ymm12,%ymm8,%ymm8 +- vpmuludq 128-128(%r9),%ymm10,%ymm14 +- vpaddq %ymm14,%ymm0,%ymm0 +- vpmuludq 160-128(%r9),%ymm10,%ymm13 +- vpaddq %ymm13,%ymm1,%ymm1 +- vpmuludq 192-128(%r9),%ymm10,%ymm12 +- vpaddq %ymm12,%ymm2,%ymm2 +- vpmuludq 224-128(%r9),%ymm10,%ymm3 +- vpbroadcastq 192-128(%r15),%ymm10 +- vpaddq 384-448(%r12),%ymm3,%ymm3 +- +- vmovdqu %ymm8,256-192(%rbx) +- vmovdqu %ymm0,288-192(%rbx) +- leaq 8(%rbx),%rbx +- +- vpmuludq 160-128(%rsi),%ymm11,%ymm13 +- vpaddq %ymm13,%ymm1,%ymm1 +- vpmuludq 160-128(%r9),%ymm11,%ymm12 +- vpaddq %ymm12,%ymm2,%ymm2 +- vpmuludq 192-128(%r9),%ymm11,%ymm14 +- vpaddq %ymm14,%ymm3,%ymm3 +- vpmuludq 224-128(%r9),%ymm11,%ymm4 +- vpbroadcastq 224-128(%r15),%ymm11 +- vpaddq 416-448(%r12),%ymm4,%ymm4 +- +- vmovdqu %ymm1,320-448(%r12) +- vmovdqu %ymm2,352-448(%r12) +- +- vpmuludq 192-128(%rsi),%ymm10,%ymm12 +- vpaddq %ymm12,%ymm3,%ymm3 +- vpmuludq 192-128(%r9),%ymm10,%ymm14 +- vpbroadcastq 256-128(%r15),%ymm0 +- vpaddq %ymm14,%ymm4,%ymm4 +- vpmuludq 224-128(%r9),%ymm10,%ymm5 +- vpbroadcastq 0+8-128(%r15),%ymm10 +- vpaddq 448-448(%r12),%ymm5,%ymm5 +- +- vmovdqu %ymm3,384-448(%r12) +- vmovdqu %ymm4,416-448(%r12) +- leaq 8(%r15),%r15 +- +- vpmuludq 224-128(%rsi),%ymm11,%ymm12 +- vpaddq %ymm12,%ymm5,%ymm5 +- vpmuludq 224-128(%r9),%ymm11,%ymm6 +- vpaddq 480-448(%r12),%ymm6,%ymm6 +- +- vpmuludq 256-128(%rsi),%ymm0,%ymm7 +- vmovdqu %ymm5,448-448(%r12) +- vpaddq 512-448(%r12),%ymm7,%ymm7 +- vmovdqu %ymm6,480-448(%r12) +- vmovdqu %ymm7,512-448(%r12) +- leaq 8(%r12),%r12 +- +- decl %r14d +- jnz L$OOP_SQR_1024 +- +- vmovdqu 256(%rsp),%ymm8 +- vmovdqu 288(%rsp),%ymm1 +- vmovdqu 320(%rsp),%ymm2 +- leaq 192(%rsp),%rbx +- +- vpsrlq $29,%ymm8,%ymm14 +- vpand %ymm15,%ymm8,%ymm8 +- vpsrlq $29,%ymm1,%ymm11 +- vpand %ymm15,%ymm1,%ymm1 +- +- vpermq $0x93,%ymm14,%ymm14 +- vpxor %ymm9,%ymm9,%ymm9 +- vpermq $0x93,%ymm11,%ymm11 +- +- vpblendd $3,%ymm9,%ymm14,%ymm10 +- vpblendd $3,%ymm14,%ymm11,%ymm14 +- vpaddq %ymm10,%ymm8,%ymm8 +- vpblendd $3,%ymm11,%ymm9,%ymm11 +- vpaddq %ymm14,%ymm1,%ymm1 +- vpaddq %ymm11,%ymm2,%ymm2 +- vmovdqu %ymm1,288-192(%rbx) +- vmovdqu %ymm2,320-192(%rbx) +- +- movq (%rsp),%rax +- movq 8(%rsp),%r10 +- movq 16(%rsp),%r11 +- movq 24(%rsp),%r12 +- vmovdqu 32(%rsp),%ymm1 +- vmovdqu 64-192(%rbx),%ymm2 +- vmovdqu 96-192(%rbx),%ymm3 +- vmovdqu 128-192(%rbx),%ymm4 +- vmovdqu 160-192(%rbx),%ymm5 +- vmovdqu 192-192(%rbx),%ymm6 +- vmovdqu 224-192(%rbx),%ymm7 +- +- movq %rax,%r9 +- imull %ecx,%eax +- andl $0x1fffffff,%eax +- vmovd %eax,%xmm12 +- +- movq %rax,%rdx +- imulq -128(%r13),%rax +- vpbroadcastq %xmm12,%ymm12 +- addq %rax,%r9 +- movq %rdx,%rax +- imulq 8-128(%r13),%rax +- shrq $29,%r9 +- addq %rax,%r10 +- movq %rdx,%rax +- imulq 16-128(%r13),%rax +- addq %r9,%r10 +- addq %rax,%r11 +- imulq 24-128(%r13),%rdx +- addq %rdx,%r12 +- +- movq %r10,%rax +- imull %ecx,%eax +- andl $0x1fffffff,%eax +- +- movl $9,%r14d +- jmp L$OOP_REDUCE_1024 +- +-.p2align 5 +-L$OOP_REDUCE_1024: +- vmovd %eax,%xmm13 +- vpbroadcastq %xmm13,%ymm13 +- +- vpmuludq 32-128(%r13),%ymm12,%ymm10 +- movq %rax,%rdx +- imulq -128(%r13),%rax +- vpaddq %ymm10,%ymm1,%ymm1 +- addq %rax,%r10 +- vpmuludq 64-128(%r13),%ymm12,%ymm14 +- movq %rdx,%rax +- imulq 8-128(%r13),%rax +- vpaddq %ymm14,%ymm2,%ymm2 +- vpmuludq 96-128(%r13),%ymm12,%ymm11 +-.byte 0x67 +- addq %rax,%r11 +-.byte 0x67 +- movq %rdx,%rax +- imulq 16-128(%r13),%rax +- shrq $29,%r10 +- vpaddq %ymm11,%ymm3,%ymm3 +- vpmuludq 128-128(%r13),%ymm12,%ymm10 +- addq %rax,%r12 +- addq %r10,%r11 +- vpaddq %ymm10,%ymm4,%ymm4 +- vpmuludq 160-128(%r13),%ymm12,%ymm14 +- movq %r11,%rax +- imull %ecx,%eax +- vpaddq %ymm14,%ymm5,%ymm5 +- vpmuludq 192-128(%r13),%ymm12,%ymm11 +- andl $0x1fffffff,%eax +- vpaddq %ymm11,%ymm6,%ymm6 +- vpmuludq 224-128(%r13),%ymm12,%ymm10 +- vpaddq %ymm10,%ymm7,%ymm7 +- vpmuludq 256-128(%r13),%ymm12,%ymm14 +- vmovd %eax,%xmm12 +- +- vpaddq %ymm14,%ymm8,%ymm8 +- +- vpbroadcastq %xmm12,%ymm12 +- +- vpmuludq 32-8-128(%r13),%ymm13,%ymm11 +- vmovdqu 96-8-128(%r13),%ymm14 +- movq %rax,%rdx +- imulq -128(%r13),%rax +- vpaddq %ymm11,%ymm1,%ymm1 +- vpmuludq 64-8-128(%r13),%ymm13,%ymm10 +- vmovdqu 128-8-128(%r13),%ymm11 +- addq %rax,%r11 +- movq %rdx,%rax +- imulq 8-128(%r13),%rax +- vpaddq %ymm10,%ymm2,%ymm2 +- addq %r12,%rax +- shrq $29,%r11 +- vpmuludq %ymm13,%ymm14,%ymm14 +- vmovdqu 160-8-128(%r13),%ymm10 +- addq %r11,%rax +- vpaddq %ymm14,%ymm3,%ymm3 +- vpmuludq %ymm13,%ymm11,%ymm11 +- vmovdqu 192-8-128(%r13),%ymm14 +-.byte 0x67 +- movq %rax,%r12 +- imull %ecx,%eax +- vpaddq %ymm11,%ymm4,%ymm4 +- vpmuludq %ymm13,%ymm10,%ymm10 +-.byte 0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00 +- andl $0x1fffffff,%eax +- vpaddq %ymm10,%ymm5,%ymm5 +- vpmuludq %ymm13,%ymm14,%ymm14 +- vmovdqu 256-8-128(%r13),%ymm10 +- vpaddq %ymm14,%ymm6,%ymm6 +- vpmuludq %ymm13,%ymm11,%ymm11 +- vmovdqu 288-8-128(%r13),%ymm9 +- vmovd %eax,%xmm0 +- imulq -128(%r13),%rax +- vpaddq %ymm11,%ymm7,%ymm7 +- vpmuludq %ymm13,%ymm10,%ymm10 +- vmovdqu 32-16-128(%r13),%ymm14 +- vpbroadcastq %xmm0,%ymm0 +- vpaddq %ymm10,%ymm8,%ymm8 +- vpmuludq %ymm13,%ymm9,%ymm9 +- vmovdqu 64-16-128(%r13),%ymm11 +- addq %rax,%r12 +- +- vmovdqu 32-24-128(%r13),%ymm13 +- vpmuludq %ymm12,%ymm14,%ymm14 +- vmovdqu 96-16-128(%r13),%ymm10 +- vpaddq %ymm14,%ymm1,%ymm1 +- vpmuludq %ymm0,%ymm13,%ymm13 +- vpmuludq %ymm12,%ymm11,%ymm11 +-.byte 0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff +- vpaddq %ymm1,%ymm13,%ymm13 +- vpaddq %ymm11,%ymm2,%ymm2 +- vpmuludq %ymm12,%ymm10,%ymm10 +- vmovdqu 160-16-128(%r13),%ymm11 +-.byte 0x67 +- vmovq %xmm13,%rax +- vmovdqu %ymm13,(%rsp) +- vpaddq %ymm10,%ymm3,%ymm3 +- vpmuludq %ymm12,%ymm14,%ymm14 +- vmovdqu 192-16-128(%r13),%ymm10 +- vpaddq %ymm14,%ymm4,%ymm4 +- vpmuludq %ymm12,%ymm11,%ymm11 +- vmovdqu 224-16-128(%r13),%ymm14 +- vpaddq %ymm11,%ymm5,%ymm5 +- vpmuludq %ymm12,%ymm10,%ymm10 +- vmovdqu 256-16-128(%r13),%ymm11 +- vpaddq %ymm10,%ymm6,%ymm6 +- vpmuludq %ymm12,%ymm14,%ymm14 +- shrq $29,%r12 +- vmovdqu 288-16-128(%r13),%ymm10 +- addq %r12,%rax +- vpaddq %ymm14,%ymm7,%ymm7 +- vpmuludq %ymm12,%ymm11,%ymm11 +- +- movq %rax,%r9 +- imull %ecx,%eax +- vpaddq %ymm11,%ymm8,%ymm8 +- vpmuludq %ymm12,%ymm10,%ymm10 +- andl $0x1fffffff,%eax +- vmovd %eax,%xmm12 +- vmovdqu 96-24-128(%r13),%ymm11 +-.byte 0x67 +- vpaddq %ymm10,%ymm9,%ymm9 +- vpbroadcastq %xmm12,%ymm12 +- +- vpmuludq 64-24-128(%r13),%ymm0,%ymm14 +- vmovdqu 128-24-128(%r13),%ymm10 +- movq %rax,%rdx +- imulq -128(%r13),%rax +- movq 8(%rsp),%r10 +- vpaddq %ymm14,%ymm2,%ymm1 +- vpmuludq %ymm0,%ymm11,%ymm11 +- vmovdqu 160-24-128(%r13),%ymm14 +- addq %rax,%r9 +- movq %rdx,%rax +- imulq 8-128(%r13),%rax +-.byte 0x67 +- shrq $29,%r9 +- movq 16(%rsp),%r11 +- vpaddq %ymm11,%ymm3,%ymm2 +- vpmuludq %ymm0,%ymm10,%ymm10 +- vmovdqu 192-24-128(%r13),%ymm11 +- addq %rax,%r10 +- movq %rdx,%rax +- imulq 16-128(%r13),%rax +- vpaddq %ymm10,%ymm4,%ymm3 +- vpmuludq %ymm0,%ymm14,%ymm14 +- vmovdqu 224-24-128(%r13),%ymm10 +- imulq 24-128(%r13),%rdx +- addq %rax,%r11 +- leaq (%r9,%r10,1),%rax +- vpaddq %ymm14,%ymm5,%ymm4 +- vpmuludq %ymm0,%ymm11,%ymm11 +- vmovdqu 256-24-128(%r13),%ymm14 +- movq %rax,%r10 +- imull %ecx,%eax +- vpmuludq %ymm0,%ymm10,%ymm10 +- vpaddq %ymm11,%ymm6,%ymm5 +- vmovdqu 288-24-128(%r13),%ymm11 +- andl $0x1fffffff,%eax +- vpaddq %ymm10,%ymm7,%ymm6 +- vpmuludq %ymm0,%ymm14,%ymm14 +- addq 24(%rsp),%rdx +- vpaddq %ymm14,%ymm8,%ymm7 +- vpmuludq %ymm0,%ymm11,%ymm11 +- vpaddq %ymm11,%ymm9,%ymm8 +- vmovq %r12,%xmm9 +- movq %rdx,%r12 +- +- decl %r14d +- jnz L$OOP_REDUCE_1024 +- leaq 448(%rsp),%r12 +- vpaddq %ymm9,%ymm13,%ymm0 +- vpxor %ymm9,%ymm9,%ymm9 +- +- vpaddq 288-192(%rbx),%ymm0,%ymm0 +- vpaddq 320-448(%r12),%ymm1,%ymm1 +- vpaddq 352-448(%r12),%ymm2,%ymm2 +- vpaddq 384-448(%r12),%ymm3,%ymm3 +- vpaddq 416-448(%r12),%ymm4,%ymm4 +- vpaddq 448-448(%r12),%ymm5,%ymm5 +- vpaddq 480-448(%r12),%ymm6,%ymm6 +- vpaddq 512-448(%r12),%ymm7,%ymm7 +- vpaddq 544-448(%r12),%ymm8,%ymm8 +- +- vpsrlq $29,%ymm0,%ymm14 +- vpand %ymm15,%ymm0,%ymm0 +- vpsrlq $29,%ymm1,%ymm11 +- vpand %ymm15,%ymm1,%ymm1 +- vpsrlq $29,%ymm2,%ymm12 +- vpermq $0x93,%ymm14,%ymm14 +- vpand %ymm15,%ymm2,%ymm2 +- vpsrlq $29,%ymm3,%ymm13 +- vpermq $0x93,%ymm11,%ymm11 +- vpand %ymm15,%ymm3,%ymm3 +- vpermq $0x93,%ymm12,%ymm12 +- +- vpblendd $3,%ymm9,%ymm14,%ymm10 +- vpermq $0x93,%ymm13,%ymm13 +- vpblendd $3,%ymm14,%ymm11,%ymm14 +- vpaddq %ymm10,%ymm0,%ymm0 +- vpblendd $3,%ymm11,%ymm12,%ymm11 +- vpaddq %ymm14,%ymm1,%ymm1 +- vpblendd $3,%ymm12,%ymm13,%ymm12 +- vpaddq %ymm11,%ymm2,%ymm2 +- vpblendd $3,%ymm13,%ymm9,%ymm13 +- vpaddq %ymm12,%ymm3,%ymm3 +- vpaddq %ymm13,%ymm4,%ymm4 +- +- vpsrlq $29,%ymm0,%ymm14 +- vpand %ymm15,%ymm0,%ymm0 +- vpsrlq $29,%ymm1,%ymm11 +- vpand %ymm15,%ymm1,%ymm1 +- vpsrlq $29,%ymm2,%ymm12 +- vpermq $0x93,%ymm14,%ymm14 +- vpand %ymm15,%ymm2,%ymm2 +- vpsrlq $29,%ymm3,%ymm13 +- vpermq $0x93,%ymm11,%ymm11 +- vpand %ymm15,%ymm3,%ymm3 +- vpermq $0x93,%ymm12,%ymm12 +- +- vpblendd $3,%ymm9,%ymm14,%ymm10 +- vpermq $0x93,%ymm13,%ymm13 +- vpblendd $3,%ymm14,%ymm11,%ymm14 +- vpaddq %ymm10,%ymm0,%ymm0 +- vpblendd $3,%ymm11,%ymm12,%ymm11 +- vpaddq %ymm14,%ymm1,%ymm1 +- vmovdqu %ymm0,0-128(%rdi) +- vpblendd $3,%ymm12,%ymm13,%ymm12 +- vpaddq %ymm11,%ymm2,%ymm2 +- vmovdqu %ymm1,32-128(%rdi) +- vpblendd $3,%ymm13,%ymm9,%ymm13 +- vpaddq %ymm12,%ymm3,%ymm3 +- vmovdqu %ymm2,64-128(%rdi) +- vpaddq %ymm13,%ymm4,%ymm4 +- vmovdqu %ymm3,96-128(%rdi) +- vpsrlq $29,%ymm4,%ymm14 +- vpand %ymm15,%ymm4,%ymm4 +- vpsrlq $29,%ymm5,%ymm11 +- vpand %ymm15,%ymm5,%ymm5 +- vpsrlq $29,%ymm6,%ymm12 +- vpermq $0x93,%ymm14,%ymm14 +- vpand %ymm15,%ymm6,%ymm6 +- vpsrlq $29,%ymm7,%ymm13 +- vpermq $0x93,%ymm11,%ymm11 +- vpand %ymm15,%ymm7,%ymm7 +- vpsrlq $29,%ymm8,%ymm0 +- vpermq $0x93,%ymm12,%ymm12 +- vpand %ymm15,%ymm8,%ymm8 +- vpermq $0x93,%ymm13,%ymm13 +- +- vpblendd $3,%ymm9,%ymm14,%ymm10 +- vpermq $0x93,%ymm0,%ymm0 +- vpblendd $3,%ymm14,%ymm11,%ymm14 +- vpaddq %ymm10,%ymm4,%ymm4 +- vpblendd $3,%ymm11,%ymm12,%ymm11 +- vpaddq %ymm14,%ymm5,%ymm5 +- vpblendd $3,%ymm12,%ymm13,%ymm12 +- vpaddq %ymm11,%ymm6,%ymm6 +- vpblendd $3,%ymm13,%ymm0,%ymm13 +- vpaddq %ymm12,%ymm7,%ymm7 +- vpaddq %ymm13,%ymm8,%ymm8 +- +- vpsrlq $29,%ymm4,%ymm14 +- vpand %ymm15,%ymm4,%ymm4 +- vpsrlq $29,%ymm5,%ymm11 +- vpand %ymm15,%ymm5,%ymm5 +- vpsrlq $29,%ymm6,%ymm12 +- vpermq $0x93,%ymm14,%ymm14 +- vpand %ymm15,%ymm6,%ymm6 +- vpsrlq $29,%ymm7,%ymm13 +- vpermq $0x93,%ymm11,%ymm11 +- vpand %ymm15,%ymm7,%ymm7 +- vpsrlq $29,%ymm8,%ymm0 +- vpermq $0x93,%ymm12,%ymm12 +- vpand %ymm15,%ymm8,%ymm8 +- vpermq $0x93,%ymm13,%ymm13 +- +- vpblendd $3,%ymm9,%ymm14,%ymm10 +- vpermq $0x93,%ymm0,%ymm0 +- vpblendd $3,%ymm14,%ymm11,%ymm14 +- vpaddq %ymm10,%ymm4,%ymm4 +- vpblendd $3,%ymm11,%ymm12,%ymm11 +- vpaddq %ymm14,%ymm5,%ymm5 +- vmovdqu %ymm4,128-128(%rdi) +- vpblendd $3,%ymm12,%ymm13,%ymm12 +- vpaddq %ymm11,%ymm6,%ymm6 +- vmovdqu %ymm5,160-128(%rdi) +- vpblendd $3,%ymm13,%ymm0,%ymm13 +- vpaddq %ymm12,%ymm7,%ymm7 +- vmovdqu %ymm6,192-128(%rdi) +- vpaddq %ymm13,%ymm8,%ymm8 +- vmovdqu %ymm7,224-128(%rdi) +- vmovdqu %ymm8,256-128(%rdi) +- +- movq %rdi,%rsi +- decl %r8d +- jne L$OOP_GRANDE_SQR_1024 +- +- vzeroall +- movq %rbp,%rax +- +- movq -48(%rax),%r15 +- +- movq -40(%rax),%r14 +- +- movq -32(%rax),%r13 +- +- movq -24(%rax),%r12 +- +- movq -16(%rax),%rbp +- +- movq -8(%rax),%rbx +- +- leaq (%rax),%rsp +- +-L$sqr_1024_epilogue: +- .byte 0xf3,0xc3 +- +- +-.globl _rsaz_1024_mul_avx2 +-.private_extern _rsaz_1024_mul_avx2 +- +-.p2align 6 +-_rsaz_1024_mul_avx2: +- +- leaq (%rsp),%rax +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- movq %rax,%rbp +- +- vzeroall +- movq %rdx,%r13 +- subq $64,%rsp +- +- +- +- +- +- +-.byte 0x67,0x67 +- movq %rsi,%r15 +- andq $4095,%r15 +- addq $320,%r15 +- shrq $12,%r15 +- movq %rsi,%r15 +- cmovnzq %r13,%rsi +- cmovnzq %r15,%r13 +- +- movq %rcx,%r15 +- subq $-128,%rsi +- subq $-128,%rcx +- subq $-128,%rdi +- +- andq $4095,%r15 +- addq $320,%r15 +-.byte 0x67,0x67 +- shrq $12,%r15 +- jz L$mul_1024_no_n_copy +- +- +- +- +- +- subq $320,%rsp +- vmovdqu 0-128(%rcx),%ymm0 +- andq $-512,%rsp +- vmovdqu 32-128(%rcx),%ymm1 +- vmovdqu 64-128(%rcx),%ymm2 +- vmovdqu 96-128(%rcx),%ymm3 +- vmovdqu 128-128(%rcx),%ymm4 +- vmovdqu 160-128(%rcx),%ymm5 +- vmovdqu 192-128(%rcx),%ymm6 +- vmovdqu 224-128(%rcx),%ymm7 +- vmovdqu 256-128(%rcx),%ymm8 +- leaq 64+128(%rsp),%rcx +- vmovdqu %ymm0,0-128(%rcx) +- vpxor %ymm0,%ymm0,%ymm0 +- vmovdqu %ymm1,32-128(%rcx) +- vpxor %ymm1,%ymm1,%ymm1 +- vmovdqu %ymm2,64-128(%rcx) +- vpxor %ymm2,%ymm2,%ymm2 +- vmovdqu %ymm3,96-128(%rcx) +- vpxor %ymm3,%ymm3,%ymm3 +- vmovdqu %ymm4,128-128(%rcx) +- vpxor %ymm4,%ymm4,%ymm4 +- vmovdqu %ymm5,160-128(%rcx) +- vpxor %ymm5,%ymm5,%ymm5 +- vmovdqu %ymm6,192-128(%rcx) +- vpxor %ymm6,%ymm6,%ymm6 +- vmovdqu %ymm7,224-128(%rcx) +- vpxor %ymm7,%ymm7,%ymm7 +- vmovdqu %ymm8,256-128(%rcx) +- vmovdqa %ymm0,%ymm8 +- vmovdqu %ymm9,288-128(%rcx) +-L$mul_1024_no_n_copy: +- andq $-64,%rsp +- +- movq (%r13),%rbx +- vpbroadcastq (%r13),%ymm10 +- vmovdqu %ymm0,(%rsp) +- xorq %r9,%r9 +-.byte 0x67 +- xorq %r10,%r10 +- xorq %r11,%r11 +- xorq %r12,%r12 +- +- vmovdqu L$and_mask(%rip),%ymm15 +- movl $9,%r14d +- vmovdqu %ymm9,288-128(%rdi) +- jmp L$oop_mul_1024 +- +-.p2align 5 +-L$oop_mul_1024: +- vpsrlq $29,%ymm3,%ymm9 +- movq %rbx,%rax +- imulq -128(%rsi),%rax +- addq %r9,%rax +- movq %rbx,%r10 +- imulq 8-128(%rsi),%r10 +- addq 8(%rsp),%r10 +- +- movq %rax,%r9 +- imull %r8d,%eax +- andl $0x1fffffff,%eax +- +- movq %rbx,%r11 +- imulq 16-128(%rsi),%r11 +- addq 16(%rsp),%r11 +- +- movq %rbx,%r12 +- imulq 24-128(%rsi),%r12 +- addq 24(%rsp),%r12 +- vpmuludq 32-128(%rsi),%ymm10,%ymm0 +- vmovd %eax,%xmm11 +- vpaddq %ymm0,%ymm1,%ymm1 +- vpmuludq 64-128(%rsi),%ymm10,%ymm12 +- vpbroadcastq %xmm11,%ymm11 +- vpaddq %ymm12,%ymm2,%ymm2 +- vpmuludq 96-128(%rsi),%ymm10,%ymm13 +- vpand %ymm15,%ymm3,%ymm3 +- vpaddq %ymm13,%ymm3,%ymm3 +- vpmuludq 128-128(%rsi),%ymm10,%ymm0 +- vpaddq %ymm0,%ymm4,%ymm4 +- vpmuludq 160-128(%rsi),%ymm10,%ymm12 +- vpaddq %ymm12,%ymm5,%ymm5 +- vpmuludq 192-128(%rsi),%ymm10,%ymm13 +- vpaddq %ymm13,%ymm6,%ymm6 +- vpmuludq 224-128(%rsi),%ymm10,%ymm0 +- vpermq $0x93,%ymm9,%ymm9 +- vpaddq %ymm0,%ymm7,%ymm7 +- vpmuludq 256-128(%rsi),%ymm10,%ymm12 +- vpbroadcastq 8(%r13),%ymm10 +- vpaddq %ymm12,%ymm8,%ymm8 +- +- movq %rax,%rdx +- imulq -128(%rcx),%rax +- addq %rax,%r9 +- movq %rdx,%rax +- imulq 8-128(%rcx),%rax +- addq %rax,%r10 +- movq %rdx,%rax +- imulq 16-128(%rcx),%rax +- addq %rax,%r11 +- shrq $29,%r9 +- imulq 24-128(%rcx),%rdx +- addq %rdx,%r12 +- addq %r9,%r10 +- +- vpmuludq 32-128(%rcx),%ymm11,%ymm13 +- vmovq %xmm10,%rbx +- vpaddq %ymm13,%ymm1,%ymm1 +- vpmuludq 64-128(%rcx),%ymm11,%ymm0 +- vpaddq %ymm0,%ymm2,%ymm2 +- vpmuludq 96-128(%rcx),%ymm11,%ymm12 +- vpaddq %ymm12,%ymm3,%ymm3 +- vpmuludq 128-128(%rcx),%ymm11,%ymm13 +- vpaddq %ymm13,%ymm4,%ymm4 +- vpmuludq 160-128(%rcx),%ymm11,%ymm0 +- vpaddq %ymm0,%ymm5,%ymm5 +- vpmuludq 192-128(%rcx),%ymm11,%ymm12 +- vpaddq %ymm12,%ymm6,%ymm6 +- vpmuludq 224-128(%rcx),%ymm11,%ymm13 +- vpblendd $3,%ymm14,%ymm9,%ymm12 +- vpaddq %ymm13,%ymm7,%ymm7 +- vpmuludq 256-128(%rcx),%ymm11,%ymm0 +- vpaddq %ymm12,%ymm3,%ymm3 +- vpaddq %ymm0,%ymm8,%ymm8 +- +- movq %rbx,%rax +- imulq -128(%rsi),%rax +- addq %rax,%r10 +- vmovdqu -8+32-128(%rsi),%ymm12 +- movq %rbx,%rax +- imulq 8-128(%rsi),%rax +- addq %rax,%r11 +- vmovdqu -8+64-128(%rsi),%ymm13 +- +- movq %r10,%rax +- vpblendd $0xfc,%ymm14,%ymm9,%ymm9 +- imull %r8d,%eax +- vpaddq %ymm9,%ymm4,%ymm4 +- andl $0x1fffffff,%eax +- +- imulq 16-128(%rsi),%rbx +- addq %rbx,%r12 +- vpmuludq %ymm10,%ymm12,%ymm12 +- vmovd %eax,%xmm11 +- vmovdqu -8+96-128(%rsi),%ymm0 +- vpaddq %ymm12,%ymm1,%ymm1 +- vpmuludq %ymm10,%ymm13,%ymm13 +- vpbroadcastq %xmm11,%ymm11 +- vmovdqu -8+128-128(%rsi),%ymm12 +- vpaddq %ymm13,%ymm2,%ymm2 +- vpmuludq %ymm10,%ymm0,%ymm0 +- vmovdqu -8+160-128(%rsi),%ymm13 +- vpaddq %ymm0,%ymm3,%ymm3 +- vpmuludq %ymm10,%ymm12,%ymm12 +- vmovdqu -8+192-128(%rsi),%ymm0 +- vpaddq %ymm12,%ymm4,%ymm4 +- vpmuludq %ymm10,%ymm13,%ymm13 +- vmovdqu -8+224-128(%rsi),%ymm12 +- vpaddq %ymm13,%ymm5,%ymm5 +- vpmuludq %ymm10,%ymm0,%ymm0 +- vmovdqu -8+256-128(%rsi),%ymm13 +- vpaddq %ymm0,%ymm6,%ymm6 +- vpmuludq %ymm10,%ymm12,%ymm12 +- vmovdqu -8+288-128(%rsi),%ymm9 +- vpaddq %ymm12,%ymm7,%ymm7 +- vpmuludq %ymm10,%ymm13,%ymm13 +- vpaddq %ymm13,%ymm8,%ymm8 +- vpmuludq %ymm10,%ymm9,%ymm9 +- vpbroadcastq 16(%r13),%ymm10 +- +- movq %rax,%rdx +- imulq -128(%rcx),%rax +- addq %rax,%r10 +- vmovdqu -8+32-128(%rcx),%ymm0 +- movq %rdx,%rax +- imulq 8-128(%rcx),%rax +- addq %rax,%r11 +- vmovdqu -8+64-128(%rcx),%ymm12 +- shrq $29,%r10 +- imulq 16-128(%rcx),%rdx +- addq %rdx,%r12 +- addq %r10,%r11 +- +- vpmuludq %ymm11,%ymm0,%ymm0 +- vmovq %xmm10,%rbx +- vmovdqu -8+96-128(%rcx),%ymm13 +- vpaddq %ymm0,%ymm1,%ymm1 +- vpmuludq %ymm11,%ymm12,%ymm12 +- vmovdqu -8+128-128(%rcx),%ymm0 +- vpaddq %ymm12,%ymm2,%ymm2 +- vpmuludq %ymm11,%ymm13,%ymm13 +- vmovdqu -8+160-128(%rcx),%ymm12 +- vpaddq %ymm13,%ymm3,%ymm3 +- vpmuludq %ymm11,%ymm0,%ymm0 +- vmovdqu -8+192-128(%rcx),%ymm13 +- vpaddq %ymm0,%ymm4,%ymm4 +- vpmuludq %ymm11,%ymm12,%ymm12 +- vmovdqu -8+224-128(%rcx),%ymm0 +- vpaddq %ymm12,%ymm5,%ymm5 +- vpmuludq %ymm11,%ymm13,%ymm13 +- vmovdqu -8+256-128(%rcx),%ymm12 +- vpaddq %ymm13,%ymm6,%ymm6 +- vpmuludq %ymm11,%ymm0,%ymm0 +- vmovdqu -8+288-128(%rcx),%ymm13 +- vpaddq %ymm0,%ymm7,%ymm7 +- vpmuludq %ymm11,%ymm12,%ymm12 +- vpaddq %ymm12,%ymm8,%ymm8 +- vpmuludq %ymm11,%ymm13,%ymm13 +- vpaddq %ymm13,%ymm9,%ymm9 +- +- vmovdqu -16+32-128(%rsi),%ymm0 +- movq %rbx,%rax +- imulq -128(%rsi),%rax +- addq %r11,%rax +- +- vmovdqu -16+64-128(%rsi),%ymm12 +- movq %rax,%r11 +- imull %r8d,%eax +- andl $0x1fffffff,%eax +- +- imulq 8-128(%rsi),%rbx +- addq %rbx,%r12 +- vpmuludq %ymm10,%ymm0,%ymm0 +- vmovd %eax,%xmm11 +- vmovdqu -16+96-128(%rsi),%ymm13 +- vpaddq %ymm0,%ymm1,%ymm1 +- vpmuludq %ymm10,%ymm12,%ymm12 +- vpbroadcastq %xmm11,%ymm11 +- vmovdqu -16+128-128(%rsi),%ymm0 +- vpaddq %ymm12,%ymm2,%ymm2 +- vpmuludq %ymm10,%ymm13,%ymm13 +- vmovdqu -16+160-128(%rsi),%ymm12 +- vpaddq %ymm13,%ymm3,%ymm3 +- vpmuludq %ymm10,%ymm0,%ymm0 +- vmovdqu -16+192-128(%rsi),%ymm13 +- vpaddq %ymm0,%ymm4,%ymm4 +- vpmuludq %ymm10,%ymm12,%ymm12 +- vmovdqu -16+224-128(%rsi),%ymm0 +- vpaddq %ymm12,%ymm5,%ymm5 +- vpmuludq %ymm10,%ymm13,%ymm13 +- vmovdqu -16+256-128(%rsi),%ymm12 +- vpaddq %ymm13,%ymm6,%ymm6 +- vpmuludq %ymm10,%ymm0,%ymm0 +- vmovdqu -16+288-128(%rsi),%ymm13 +- vpaddq %ymm0,%ymm7,%ymm7 +- vpmuludq %ymm10,%ymm12,%ymm12 +- vpaddq %ymm12,%ymm8,%ymm8 +- vpmuludq %ymm10,%ymm13,%ymm13 +- vpbroadcastq 24(%r13),%ymm10 +- vpaddq %ymm13,%ymm9,%ymm9 +- +- vmovdqu -16+32-128(%rcx),%ymm0 +- movq %rax,%rdx +- imulq -128(%rcx),%rax +- addq %rax,%r11 +- vmovdqu -16+64-128(%rcx),%ymm12 +- imulq 8-128(%rcx),%rdx +- addq %rdx,%r12 +- shrq $29,%r11 +- +- vpmuludq %ymm11,%ymm0,%ymm0 +- vmovq %xmm10,%rbx +- vmovdqu -16+96-128(%rcx),%ymm13 +- vpaddq %ymm0,%ymm1,%ymm1 +- vpmuludq %ymm11,%ymm12,%ymm12 +- vmovdqu -16+128-128(%rcx),%ymm0 +- vpaddq %ymm12,%ymm2,%ymm2 +- vpmuludq %ymm11,%ymm13,%ymm13 +- vmovdqu -16+160-128(%rcx),%ymm12 +- vpaddq %ymm13,%ymm3,%ymm3 +- vpmuludq %ymm11,%ymm0,%ymm0 +- vmovdqu -16+192-128(%rcx),%ymm13 +- vpaddq %ymm0,%ymm4,%ymm4 +- vpmuludq %ymm11,%ymm12,%ymm12 +- vmovdqu -16+224-128(%rcx),%ymm0 +- vpaddq %ymm12,%ymm5,%ymm5 +- vpmuludq %ymm11,%ymm13,%ymm13 +- vmovdqu -16+256-128(%rcx),%ymm12 +- vpaddq %ymm13,%ymm6,%ymm6 +- vpmuludq %ymm11,%ymm0,%ymm0 +- vmovdqu -16+288-128(%rcx),%ymm13 +- vpaddq %ymm0,%ymm7,%ymm7 +- vpmuludq %ymm11,%ymm12,%ymm12 +- vmovdqu -24+32-128(%rsi),%ymm0 +- vpaddq %ymm12,%ymm8,%ymm8 +- vpmuludq %ymm11,%ymm13,%ymm13 +- vmovdqu -24+64-128(%rsi),%ymm12 +- vpaddq %ymm13,%ymm9,%ymm9 +- +- addq %r11,%r12 +- imulq -128(%rsi),%rbx +- addq %rbx,%r12 +- +- movq %r12,%rax +- imull %r8d,%eax +- andl $0x1fffffff,%eax +- +- vpmuludq %ymm10,%ymm0,%ymm0 +- vmovd %eax,%xmm11 +- vmovdqu -24+96-128(%rsi),%ymm13 +- vpaddq %ymm0,%ymm1,%ymm1 +- vpmuludq %ymm10,%ymm12,%ymm12 +- vpbroadcastq %xmm11,%ymm11 +- vmovdqu -24+128-128(%rsi),%ymm0 +- vpaddq %ymm12,%ymm2,%ymm2 +- vpmuludq %ymm10,%ymm13,%ymm13 +- vmovdqu -24+160-128(%rsi),%ymm12 +- vpaddq %ymm13,%ymm3,%ymm3 +- vpmuludq %ymm10,%ymm0,%ymm0 +- vmovdqu -24+192-128(%rsi),%ymm13 +- vpaddq %ymm0,%ymm4,%ymm4 +- vpmuludq %ymm10,%ymm12,%ymm12 +- vmovdqu -24+224-128(%rsi),%ymm0 +- vpaddq %ymm12,%ymm5,%ymm5 +- vpmuludq %ymm10,%ymm13,%ymm13 +- vmovdqu -24+256-128(%rsi),%ymm12 +- vpaddq %ymm13,%ymm6,%ymm6 +- vpmuludq %ymm10,%ymm0,%ymm0 +- vmovdqu -24+288-128(%rsi),%ymm13 +- vpaddq %ymm0,%ymm7,%ymm7 +- vpmuludq %ymm10,%ymm12,%ymm12 +- vpaddq %ymm12,%ymm8,%ymm8 +- vpmuludq %ymm10,%ymm13,%ymm13 +- vpbroadcastq 32(%r13),%ymm10 +- vpaddq %ymm13,%ymm9,%ymm9 +- addq $32,%r13 +- +- vmovdqu -24+32-128(%rcx),%ymm0 +- imulq -128(%rcx),%rax +- addq %rax,%r12 +- shrq $29,%r12 +- +- vmovdqu -24+64-128(%rcx),%ymm12 +- vpmuludq %ymm11,%ymm0,%ymm0 +- vmovq %xmm10,%rbx +- vmovdqu -24+96-128(%rcx),%ymm13 +- vpaddq %ymm0,%ymm1,%ymm0 +- vpmuludq %ymm11,%ymm12,%ymm12 +- vmovdqu %ymm0,(%rsp) +- vpaddq %ymm12,%ymm2,%ymm1 +- vmovdqu -24+128-128(%rcx),%ymm0 +- vpmuludq %ymm11,%ymm13,%ymm13 +- vmovdqu -24+160-128(%rcx),%ymm12 +- vpaddq %ymm13,%ymm3,%ymm2 +- vpmuludq %ymm11,%ymm0,%ymm0 +- vmovdqu -24+192-128(%rcx),%ymm13 +- vpaddq %ymm0,%ymm4,%ymm3 +- vpmuludq %ymm11,%ymm12,%ymm12 +- vmovdqu -24+224-128(%rcx),%ymm0 +- vpaddq %ymm12,%ymm5,%ymm4 +- vpmuludq %ymm11,%ymm13,%ymm13 +- vmovdqu -24+256-128(%rcx),%ymm12 +- vpaddq %ymm13,%ymm6,%ymm5 +- vpmuludq %ymm11,%ymm0,%ymm0 +- vmovdqu -24+288-128(%rcx),%ymm13 +- movq %r12,%r9 +- vpaddq %ymm0,%ymm7,%ymm6 +- vpmuludq %ymm11,%ymm12,%ymm12 +- addq (%rsp),%r9 +- vpaddq %ymm12,%ymm8,%ymm7 +- vpmuludq %ymm11,%ymm13,%ymm13 +- vmovq %r12,%xmm12 +- vpaddq %ymm13,%ymm9,%ymm8 +- +- decl %r14d +- jnz L$oop_mul_1024 +- vpaddq (%rsp),%ymm12,%ymm0 +- +- vpsrlq $29,%ymm0,%ymm12 +- vpand %ymm15,%ymm0,%ymm0 +- vpsrlq $29,%ymm1,%ymm13 +- vpand %ymm15,%ymm1,%ymm1 +- vpsrlq $29,%ymm2,%ymm10 +- vpermq $0x93,%ymm12,%ymm12 +- vpand %ymm15,%ymm2,%ymm2 +- vpsrlq $29,%ymm3,%ymm11 +- vpermq $0x93,%ymm13,%ymm13 +- vpand %ymm15,%ymm3,%ymm3 +- +- vpblendd $3,%ymm14,%ymm12,%ymm9 +- vpermq $0x93,%ymm10,%ymm10 +- vpblendd $3,%ymm12,%ymm13,%ymm12 +- vpermq $0x93,%ymm11,%ymm11 +- vpaddq %ymm9,%ymm0,%ymm0 +- vpblendd $3,%ymm13,%ymm10,%ymm13 +- vpaddq %ymm12,%ymm1,%ymm1 +- vpblendd $3,%ymm10,%ymm11,%ymm10 +- vpaddq %ymm13,%ymm2,%ymm2 +- vpblendd $3,%ymm11,%ymm14,%ymm11 +- vpaddq %ymm10,%ymm3,%ymm3 +- vpaddq %ymm11,%ymm4,%ymm4 +- +- vpsrlq $29,%ymm0,%ymm12 +- vpand %ymm15,%ymm0,%ymm0 +- vpsrlq $29,%ymm1,%ymm13 +- vpand %ymm15,%ymm1,%ymm1 +- vpsrlq $29,%ymm2,%ymm10 +- vpermq $0x93,%ymm12,%ymm12 +- vpand %ymm15,%ymm2,%ymm2 +- vpsrlq $29,%ymm3,%ymm11 +- vpermq $0x93,%ymm13,%ymm13 +- vpand %ymm15,%ymm3,%ymm3 +- vpermq $0x93,%ymm10,%ymm10 +- +- vpblendd $3,%ymm14,%ymm12,%ymm9 +- vpermq $0x93,%ymm11,%ymm11 +- vpblendd $3,%ymm12,%ymm13,%ymm12 +- vpaddq %ymm9,%ymm0,%ymm0 +- vpblendd $3,%ymm13,%ymm10,%ymm13 +- vpaddq %ymm12,%ymm1,%ymm1 +- vpblendd $3,%ymm10,%ymm11,%ymm10 +- vpaddq %ymm13,%ymm2,%ymm2 +- vpblendd $3,%ymm11,%ymm14,%ymm11 +- vpaddq %ymm10,%ymm3,%ymm3 +- vpaddq %ymm11,%ymm4,%ymm4 +- +- vmovdqu %ymm0,0-128(%rdi) +- vmovdqu %ymm1,32-128(%rdi) +- vmovdqu %ymm2,64-128(%rdi) +- vmovdqu %ymm3,96-128(%rdi) +- vpsrlq $29,%ymm4,%ymm12 +- vpand %ymm15,%ymm4,%ymm4 +- vpsrlq $29,%ymm5,%ymm13 +- vpand %ymm15,%ymm5,%ymm5 +- vpsrlq $29,%ymm6,%ymm10 +- vpermq $0x93,%ymm12,%ymm12 +- vpand %ymm15,%ymm6,%ymm6 +- vpsrlq $29,%ymm7,%ymm11 +- vpermq $0x93,%ymm13,%ymm13 +- vpand %ymm15,%ymm7,%ymm7 +- vpsrlq $29,%ymm8,%ymm0 +- vpermq $0x93,%ymm10,%ymm10 +- vpand %ymm15,%ymm8,%ymm8 +- vpermq $0x93,%ymm11,%ymm11 +- +- vpblendd $3,%ymm14,%ymm12,%ymm9 +- vpermq $0x93,%ymm0,%ymm0 +- vpblendd $3,%ymm12,%ymm13,%ymm12 +- vpaddq %ymm9,%ymm4,%ymm4 +- vpblendd $3,%ymm13,%ymm10,%ymm13 +- vpaddq %ymm12,%ymm5,%ymm5 +- vpblendd $3,%ymm10,%ymm11,%ymm10 +- vpaddq %ymm13,%ymm6,%ymm6 +- vpblendd $3,%ymm11,%ymm0,%ymm11 +- vpaddq %ymm10,%ymm7,%ymm7 +- vpaddq %ymm11,%ymm8,%ymm8 +- +- vpsrlq $29,%ymm4,%ymm12 +- vpand %ymm15,%ymm4,%ymm4 +- vpsrlq $29,%ymm5,%ymm13 +- vpand %ymm15,%ymm5,%ymm5 +- vpsrlq $29,%ymm6,%ymm10 +- vpermq $0x93,%ymm12,%ymm12 +- vpand %ymm15,%ymm6,%ymm6 +- vpsrlq $29,%ymm7,%ymm11 +- vpermq $0x93,%ymm13,%ymm13 +- vpand %ymm15,%ymm7,%ymm7 +- vpsrlq $29,%ymm8,%ymm0 +- vpermq $0x93,%ymm10,%ymm10 +- vpand %ymm15,%ymm8,%ymm8 +- vpermq $0x93,%ymm11,%ymm11 +- +- vpblendd $3,%ymm14,%ymm12,%ymm9 +- vpermq $0x93,%ymm0,%ymm0 +- vpblendd $3,%ymm12,%ymm13,%ymm12 +- vpaddq %ymm9,%ymm4,%ymm4 +- vpblendd $3,%ymm13,%ymm10,%ymm13 +- vpaddq %ymm12,%ymm5,%ymm5 +- vpblendd $3,%ymm10,%ymm11,%ymm10 +- vpaddq %ymm13,%ymm6,%ymm6 +- vpblendd $3,%ymm11,%ymm0,%ymm11 +- vpaddq %ymm10,%ymm7,%ymm7 +- vpaddq %ymm11,%ymm8,%ymm8 +- +- vmovdqu %ymm4,128-128(%rdi) +- vmovdqu %ymm5,160-128(%rdi) +- vmovdqu %ymm6,192-128(%rdi) +- vmovdqu %ymm7,224-128(%rdi) +- vmovdqu %ymm8,256-128(%rdi) +- vzeroupper +- +- movq %rbp,%rax +- +- movq -48(%rax),%r15 +- +- movq -40(%rax),%r14 +- +- movq -32(%rax),%r13 +- +- movq -24(%rax),%r12 +- +- movq -16(%rax),%rbp +- +- movq -8(%rax),%rbx +- +- leaq (%rax),%rsp +- +-L$mul_1024_epilogue: +- .byte 0xf3,0xc3 +- +- +-.globl _rsaz_1024_red2norm_avx2 +-.private_extern _rsaz_1024_red2norm_avx2 +- +-.p2align 5 +-_rsaz_1024_red2norm_avx2: +- +- subq $-128,%rsi +- xorq %rax,%rax +- movq -128(%rsi),%r8 +- movq -120(%rsi),%r9 +- movq -112(%rsi),%r10 +- shlq $0,%r8 +- shlq $29,%r9 +- movq %r10,%r11 +- shlq $58,%r10 +- shrq $6,%r11 +- addq %r8,%rax +- addq %r9,%rax +- addq %r10,%rax +- adcq $0,%r11 +- movq %rax,0(%rdi) +- movq %r11,%rax +- movq -104(%rsi),%r8 +- movq -96(%rsi),%r9 +- shlq $23,%r8 +- movq %r9,%r10 +- shlq $52,%r9 +- shrq $12,%r10 +- addq %r8,%rax +- addq %r9,%rax +- adcq $0,%r10 +- movq %rax,8(%rdi) +- movq %r10,%rax +- movq -88(%rsi),%r11 +- movq -80(%rsi),%r8 +- shlq $17,%r11 +- movq %r8,%r9 +- shlq $46,%r8 +- shrq $18,%r9 +- addq %r11,%rax +- addq %r8,%rax +- adcq $0,%r9 +- movq %rax,16(%rdi) +- movq %r9,%rax +- movq -72(%rsi),%r10 +- movq -64(%rsi),%r11 +- shlq $11,%r10 +- movq %r11,%r8 +- shlq $40,%r11 +- shrq $24,%r8 +- addq %r10,%rax +- addq %r11,%rax +- adcq $0,%r8 +- movq %rax,24(%rdi) +- movq %r8,%rax +- movq -56(%rsi),%r9 +- movq -48(%rsi),%r10 +- movq -40(%rsi),%r11 +- shlq $5,%r9 +- shlq $34,%r10 +- movq %r11,%r8 +- shlq $63,%r11 +- shrq $1,%r8 +- addq %r9,%rax +- addq %r10,%rax +- addq %r11,%rax +- adcq $0,%r8 +- movq %rax,32(%rdi) +- movq %r8,%rax +- movq -32(%rsi),%r9 +- movq -24(%rsi),%r10 +- shlq $28,%r9 +- movq %r10,%r11 +- shlq $57,%r10 +- shrq $7,%r11 +- addq %r9,%rax +- addq %r10,%rax +- adcq $0,%r11 +- movq %rax,40(%rdi) +- movq %r11,%rax +- movq -16(%rsi),%r8 +- movq -8(%rsi),%r9 +- shlq $22,%r8 +- movq %r9,%r10 +- shlq $51,%r9 +- shrq $13,%r10 +- addq %r8,%rax +- addq %r9,%rax +- adcq $0,%r10 +- movq %rax,48(%rdi) +- movq %r10,%rax +- movq 0(%rsi),%r11 +- movq 8(%rsi),%r8 +- shlq $16,%r11 +- movq %r8,%r9 +- shlq $45,%r8 +- shrq $19,%r9 +- addq %r11,%rax +- addq %r8,%rax +- adcq $0,%r9 +- movq %rax,56(%rdi) +- movq %r9,%rax +- movq 16(%rsi),%r10 +- movq 24(%rsi),%r11 +- shlq $10,%r10 +- movq %r11,%r8 +- shlq $39,%r11 +- shrq $25,%r8 +- addq %r10,%rax +- addq %r11,%rax +- adcq $0,%r8 +- movq %rax,64(%rdi) +- movq %r8,%rax +- movq 32(%rsi),%r9 +- movq 40(%rsi),%r10 +- movq 48(%rsi),%r11 +- shlq $4,%r9 +- shlq $33,%r10 +- movq %r11,%r8 +- shlq $62,%r11 +- shrq $2,%r8 +- addq %r9,%rax +- addq %r10,%rax +- addq %r11,%rax +- adcq $0,%r8 +- movq %rax,72(%rdi) +- movq %r8,%rax +- movq 56(%rsi),%r9 +- movq 64(%rsi),%r10 +- shlq $27,%r9 +- movq %r10,%r11 +- shlq $56,%r10 +- shrq $8,%r11 +- addq %r9,%rax +- addq %r10,%rax +- adcq $0,%r11 +- movq %rax,80(%rdi) +- movq %r11,%rax +- movq 72(%rsi),%r8 +- movq 80(%rsi),%r9 +- shlq $21,%r8 +- movq %r9,%r10 +- shlq $50,%r9 +- shrq $14,%r10 +- addq %r8,%rax +- addq %r9,%rax +- adcq $0,%r10 +- movq %rax,88(%rdi) +- movq %r10,%rax +- movq 88(%rsi),%r11 +- movq 96(%rsi),%r8 +- shlq $15,%r11 +- movq %r8,%r9 +- shlq $44,%r8 +- shrq $20,%r9 +- addq %r11,%rax +- addq %r8,%rax +- adcq $0,%r9 +- movq %rax,96(%rdi) +- movq %r9,%rax +- movq 104(%rsi),%r10 +- movq 112(%rsi),%r11 +- shlq $9,%r10 +- movq %r11,%r8 +- shlq $38,%r11 +- shrq $26,%r8 +- addq %r10,%rax +- addq %r11,%rax +- adcq $0,%r8 +- movq %rax,104(%rdi) +- movq %r8,%rax +- movq 120(%rsi),%r9 +- movq 128(%rsi),%r10 +- movq 136(%rsi),%r11 +- shlq $3,%r9 +- shlq $32,%r10 +- movq %r11,%r8 +- shlq $61,%r11 +- shrq $3,%r8 +- addq %r9,%rax +- addq %r10,%rax +- addq %r11,%rax +- adcq $0,%r8 +- movq %rax,112(%rdi) +- movq %r8,%rax +- movq 144(%rsi),%r9 +- movq 152(%rsi),%r10 +- shlq $26,%r9 +- movq %r10,%r11 +- shlq $55,%r10 +- shrq $9,%r11 +- addq %r9,%rax +- addq %r10,%rax +- adcq $0,%r11 +- movq %rax,120(%rdi) +- movq %r11,%rax +- .byte 0xf3,0xc3 +- +- +- +-.globl _rsaz_1024_norm2red_avx2 +-.private_extern _rsaz_1024_norm2red_avx2 +- +-.p2align 5 +-_rsaz_1024_norm2red_avx2: +- +- subq $-128,%rdi +- movq (%rsi),%r8 +- movl $0x1fffffff,%eax +- movq 8(%rsi),%r9 +- movq %r8,%r11 +- shrq $0,%r11 +- andq %rax,%r11 +- movq %r11,-128(%rdi) +- movq %r8,%r10 +- shrq $29,%r10 +- andq %rax,%r10 +- movq %r10,-120(%rdi) +- shrdq $58,%r9,%r8 +- andq %rax,%r8 +- movq %r8,-112(%rdi) +- movq 16(%rsi),%r10 +- movq %r9,%r8 +- shrq $23,%r8 +- andq %rax,%r8 +- movq %r8,-104(%rdi) +- shrdq $52,%r10,%r9 +- andq %rax,%r9 +- movq %r9,-96(%rdi) +- movq 24(%rsi),%r11 +- movq %r10,%r9 +- shrq $17,%r9 +- andq %rax,%r9 +- movq %r9,-88(%rdi) +- shrdq $46,%r11,%r10 +- andq %rax,%r10 +- movq %r10,-80(%rdi) +- movq 32(%rsi),%r8 +- movq %r11,%r10 +- shrq $11,%r10 +- andq %rax,%r10 +- movq %r10,-72(%rdi) +- shrdq $40,%r8,%r11 +- andq %rax,%r11 +- movq %r11,-64(%rdi) +- movq 40(%rsi),%r9 +- movq %r8,%r11 +- shrq $5,%r11 +- andq %rax,%r11 +- movq %r11,-56(%rdi) +- movq %r8,%r10 +- shrq $34,%r10 +- andq %rax,%r10 +- movq %r10,-48(%rdi) +- shrdq $63,%r9,%r8 +- andq %rax,%r8 +- movq %r8,-40(%rdi) +- movq 48(%rsi),%r10 +- movq %r9,%r8 +- shrq $28,%r8 +- andq %rax,%r8 +- movq %r8,-32(%rdi) +- shrdq $57,%r10,%r9 +- andq %rax,%r9 +- movq %r9,-24(%rdi) +- movq 56(%rsi),%r11 +- movq %r10,%r9 +- shrq $22,%r9 +- andq %rax,%r9 +- movq %r9,-16(%rdi) +- shrdq $51,%r11,%r10 +- andq %rax,%r10 +- movq %r10,-8(%rdi) +- movq 64(%rsi),%r8 +- movq %r11,%r10 +- shrq $16,%r10 +- andq %rax,%r10 +- movq %r10,0(%rdi) +- shrdq $45,%r8,%r11 +- andq %rax,%r11 +- movq %r11,8(%rdi) +- movq 72(%rsi),%r9 +- movq %r8,%r11 +- shrq $10,%r11 +- andq %rax,%r11 +- movq %r11,16(%rdi) +- shrdq $39,%r9,%r8 +- andq %rax,%r8 +- movq %r8,24(%rdi) +- movq 80(%rsi),%r10 +- movq %r9,%r8 +- shrq $4,%r8 +- andq %rax,%r8 +- movq %r8,32(%rdi) +- movq %r9,%r11 +- shrq $33,%r11 +- andq %rax,%r11 +- movq %r11,40(%rdi) +- shrdq $62,%r10,%r9 +- andq %rax,%r9 +- movq %r9,48(%rdi) +- movq 88(%rsi),%r11 +- movq %r10,%r9 +- shrq $27,%r9 +- andq %rax,%r9 +- movq %r9,56(%rdi) +- shrdq $56,%r11,%r10 +- andq %rax,%r10 +- movq %r10,64(%rdi) +- movq 96(%rsi),%r8 +- movq %r11,%r10 +- shrq $21,%r10 +- andq %rax,%r10 +- movq %r10,72(%rdi) +- shrdq $50,%r8,%r11 +- andq %rax,%r11 +- movq %r11,80(%rdi) +- movq 104(%rsi),%r9 +- movq %r8,%r11 +- shrq $15,%r11 +- andq %rax,%r11 +- movq %r11,88(%rdi) +- shrdq $44,%r9,%r8 +- andq %rax,%r8 +- movq %r8,96(%rdi) +- movq 112(%rsi),%r10 +- movq %r9,%r8 +- shrq $9,%r8 +- andq %rax,%r8 +- movq %r8,104(%rdi) +- shrdq $38,%r10,%r9 +- andq %rax,%r9 +- movq %r9,112(%rdi) +- movq 120(%rsi),%r11 +- movq %r10,%r9 +- shrq $3,%r9 +- andq %rax,%r9 +- movq %r9,120(%rdi) +- movq %r10,%r8 +- shrq $32,%r8 +- andq %rax,%r8 +- movq %r8,128(%rdi) +- shrdq $61,%r11,%r10 +- andq %rax,%r10 +- movq %r10,136(%rdi) +- xorq %r8,%r8 +- movq %r11,%r10 +- shrq $26,%r10 +- andq %rax,%r10 +- movq %r10,144(%rdi) +- shrdq $55,%r8,%r11 +- andq %rax,%r11 +- movq %r11,152(%rdi) +- movq %r8,160(%rdi) +- movq %r8,168(%rdi) +- movq %r8,176(%rdi) +- movq %r8,184(%rdi) +- .byte 0xf3,0xc3 +- +- +-.globl _rsaz_1024_scatter5_avx2 +-.private_extern _rsaz_1024_scatter5_avx2 +- +-.p2align 5 +-_rsaz_1024_scatter5_avx2: +- +- vzeroupper +- vmovdqu L$scatter_permd(%rip),%ymm5 +- shll $4,%edx +- leaq (%rdi,%rdx,1),%rdi +- movl $9,%eax +- jmp L$oop_scatter_1024 +- +-.p2align 5 +-L$oop_scatter_1024: +- vmovdqu (%rsi),%ymm0 +- leaq 32(%rsi),%rsi +- vpermd %ymm0,%ymm5,%ymm0 +- vmovdqu %xmm0,(%rdi) +- leaq 512(%rdi),%rdi +- decl %eax +- jnz L$oop_scatter_1024 +- +- vzeroupper +- .byte 0xf3,0xc3 +- +- +- +-.globl _rsaz_1024_gather5_avx2 +-.private_extern _rsaz_1024_gather5_avx2 +- +-.p2align 5 +-_rsaz_1024_gather5_avx2: +- +- vzeroupper +- movq %rsp,%r11 +- +- leaq -256(%rsp),%rsp +- andq $-32,%rsp +- leaq L$inc(%rip),%r10 +- leaq -128(%rsp),%rax +- +- vmovd %edx,%xmm4 +- vmovdqa (%r10),%ymm0 +- vmovdqa 32(%r10),%ymm1 +- vmovdqa 64(%r10),%ymm5 +- vpbroadcastd %xmm4,%ymm4 +- +- vpaddd %ymm5,%ymm0,%ymm2 +- vpcmpeqd %ymm4,%ymm0,%ymm0 +- vpaddd %ymm5,%ymm1,%ymm3 +- vpcmpeqd %ymm4,%ymm1,%ymm1 +- vmovdqa %ymm0,0+128(%rax) +- vpaddd %ymm5,%ymm2,%ymm0 +- vpcmpeqd %ymm4,%ymm2,%ymm2 +- vmovdqa %ymm1,32+128(%rax) +- vpaddd %ymm5,%ymm3,%ymm1 +- vpcmpeqd %ymm4,%ymm3,%ymm3 +- vmovdqa %ymm2,64+128(%rax) +- vpaddd %ymm5,%ymm0,%ymm2 +- vpcmpeqd %ymm4,%ymm0,%ymm0 +- vmovdqa %ymm3,96+128(%rax) +- vpaddd %ymm5,%ymm1,%ymm3 +- vpcmpeqd %ymm4,%ymm1,%ymm1 +- vmovdqa %ymm0,128+128(%rax) +- vpaddd %ymm5,%ymm2,%ymm8 +- vpcmpeqd %ymm4,%ymm2,%ymm2 +- vmovdqa %ymm1,160+128(%rax) +- vpaddd %ymm5,%ymm3,%ymm9 +- vpcmpeqd %ymm4,%ymm3,%ymm3 +- vmovdqa %ymm2,192+128(%rax) +- vpaddd %ymm5,%ymm8,%ymm10 +- vpcmpeqd %ymm4,%ymm8,%ymm8 +- vmovdqa %ymm3,224+128(%rax) +- vpaddd %ymm5,%ymm9,%ymm11 +- vpcmpeqd %ymm4,%ymm9,%ymm9 +- vpaddd %ymm5,%ymm10,%ymm12 +- vpcmpeqd %ymm4,%ymm10,%ymm10 +- vpaddd %ymm5,%ymm11,%ymm13 +- vpcmpeqd %ymm4,%ymm11,%ymm11 +- vpaddd %ymm5,%ymm12,%ymm14 +- vpcmpeqd %ymm4,%ymm12,%ymm12 +- vpaddd %ymm5,%ymm13,%ymm15 +- vpcmpeqd %ymm4,%ymm13,%ymm13 +- vpcmpeqd %ymm4,%ymm14,%ymm14 +- vpcmpeqd %ymm4,%ymm15,%ymm15 +- +- vmovdqa -32(%r10),%ymm7 +- leaq 128(%rsi),%rsi +- movl $9,%edx +- +-L$oop_gather_1024: +- vmovdqa 0-128(%rsi),%ymm0 +- vmovdqa 32-128(%rsi),%ymm1 +- vmovdqa 64-128(%rsi),%ymm2 +- vmovdqa 96-128(%rsi),%ymm3 +- vpand 0+128(%rax),%ymm0,%ymm0 +- vpand 32+128(%rax),%ymm1,%ymm1 +- vpand 64+128(%rax),%ymm2,%ymm2 +- vpor %ymm0,%ymm1,%ymm4 +- vpand 96+128(%rax),%ymm3,%ymm3 +- vmovdqa 128-128(%rsi),%ymm0 +- vmovdqa 160-128(%rsi),%ymm1 +- vpor %ymm2,%ymm3,%ymm5 +- vmovdqa 192-128(%rsi),%ymm2 +- vmovdqa 224-128(%rsi),%ymm3 +- vpand 128+128(%rax),%ymm0,%ymm0 +- vpand 160+128(%rax),%ymm1,%ymm1 +- vpand 192+128(%rax),%ymm2,%ymm2 +- vpor %ymm0,%ymm4,%ymm4 +- vpand 224+128(%rax),%ymm3,%ymm3 +- vpand 256-128(%rsi),%ymm8,%ymm0 +- vpor %ymm1,%ymm5,%ymm5 +- vpand 288-128(%rsi),%ymm9,%ymm1 +- vpor %ymm2,%ymm4,%ymm4 +- vpand 320-128(%rsi),%ymm10,%ymm2 +- vpor %ymm3,%ymm5,%ymm5 +- vpand 352-128(%rsi),%ymm11,%ymm3 +- vpor %ymm0,%ymm4,%ymm4 +- vpand 384-128(%rsi),%ymm12,%ymm0 +- vpor %ymm1,%ymm5,%ymm5 +- vpand 416-128(%rsi),%ymm13,%ymm1 +- vpor %ymm2,%ymm4,%ymm4 +- vpand 448-128(%rsi),%ymm14,%ymm2 +- vpor %ymm3,%ymm5,%ymm5 +- vpand 480-128(%rsi),%ymm15,%ymm3 +- leaq 512(%rsi),%rsi +- vpor %ymm0,%ymm4,%ymm4 +- vpor %ymm1,%ymm5,%ymm5 +- vpor %ymm2,%ymm4,%ymm4 +- vpor %ymm3,%ymm5,%ymm5 +- +- vpor %ymm5,%ymm4,%ymm4 +- vextracti128 $1,%ymm4,%xmm5 +- vpor %xmm4,%xmm5,%xmm5 +- vpermd %ymm5,%ymm7,%ymm5 +- vmovdqu %ymm5,(%rdi) +- leaq 32(%rdi),%rdi +- decl %edx +- jnz L$oop_gather_1024 +- +- vpxor %ymm0,%ymm0,%ymm0 +- vmovdqu %ymm0,(%rdi) +- vzeroupper +- leaq (%r11),%rsp +- +- .byte 0xf3,0xc3 +- +-L$SEH_end_rsaz_1024_gather5: +- +-.p2align 6 +-L$and_mask: +-.quad 0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff +-L$scatter_permd: +-.long 0,2,4,6,7,7,7,7 +-L$gather_permd: +-.long 0,7,1,7,2,7,3,7 +-L$inc: +-.long 0,0,0,0, 1,1,1,1 +-.long 2,2,2,2, 3,3,3,3 +-.long 4,4,4,4, 4,4,4,4 +-.p2align 6 +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S b/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S +deleted file mode 100644 +index d50851e..0000000 +--- a/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S ++++ /dev/null +@@ -1,5466 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +-.globl _sha1_block_data_order +-.private_extern _sha1_block_data_order +- +-.p2align 4 +-_sha1_block_data_order: +- +- leaq _OPENSSL_ia32cap_P(%rip),%r10 +- movl 0(%r10),%r9d +- movl 4(%r10),%r8d +- movl 8(%r10),%r10d +- testl $512,%r8d +- jz L$ialu +- testl $536870912,%r10d +- jnz _shaext_shortcut +- andl $296,%r10d +- cmpl $296,%r10d +- je _avx2_shortcut +- andl $268435456,%r8d +- andl $1073741824,%r9d +- orl %r9d,%r8d +- cmpl $1342177280,%r8d +- je _avx_shortcut +- jmp _ssse3_shortcut +- +-.p2align 4 +-L$ialu: +- movq %rsp,%rax +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- movq %rdi,%r8 +- subq $72,%rsp +- movq %rsi,%r9 +- andq $-64,%rsp +- movq %rdx,%r10 +- movq %rax,64(%rsp) +- +-L$prologue: +- +- movl 0(%r8),%esi +- movl 4(%r8),%edi +- movl 8(%r8),%r11d +- movl 12(%r8),%r12d +- movl 16(%r8),%r13d +- jmp L$loop +- +-.p2align 4 +-L$loop: +- movl 0(%r9),%edx +- bswapl %edx +- movl 4(%r9),%ebp +- movl %r12d,%eax +- movl %edx,0(%rsp) +- movl %esi,%ecx +- bswapl %ebp +- xorl %r11d,%eax +- roll $5,%ecx +- andl %edi,%eax +- leal 1518500249(%rdx,%r13,1),%r13d +- addl %ecx,%r13d +- xorl %r12d,%eax +- roll $30,%edi +- addl %eax,%r13d +- movl 8(%r9),%r14d +- movl %r11d,%eax +- movl %ebp,4(%rsp) +- movl %r13d,%ecx +- bswapl %r14d +- xorl %edi,%eax +- roll $5,%ecx +- andl %esi,%eax +- leal 1518500249(%rbp,%r12,1),%r12d +- addl %ecx,%r12d +- xorl %r11d,%eax +- roll $30,%esi +- addl %eax,%r12d +- movl 12(%r9),%edx +- movl %edi,%eax +- movl %r14d,8(%rsp) +- movl %r12d,%ecx +- bswapl %edx +- xorl %esi,%eax +- roll $5,%ecx +- andl %r13d,%eax +- leal 1518500249(%r14,%r11,1),%r11d +- addl %ecx,%r11d +- xorl %edi,%eax +- roll $30,%r13d +- addl %eax,%r11d +- movl 16(%r9),%ebp +- movl %esi,%eax +- movl %edx,12(%rsp) +- movl %r11d,%ecx +- bswapl %ebp +- xorl %r13d,%eax +- roll $5,%ecx +- andl %r12d,%eax +- leal 1518500249(%rdx,%rdi,1),%edi +- addl %ecx,%edi +- xorl %esi,%eax +- roll $30,%r12d +- addl %eax,%edi +- movl 20(%r9),%r14d +- movl %r13d,%eax +- movl %ebp,16(%rsp) +- movl %edi,%ecx +- bswapl %r14d +- xorl %r12d,%eax +- roll $5,%ecx +- andl %r11d,%eax +- leal 1518500249(%rbp,%rsi,1),%esi +- addl %ecx,%esi +- xorl %r13d,%eax +- roll $30,%r11d +- addl %eax,%esi +- movl 24(%r9),%edx +- movl %r12d,%eax +- movl %r14d,20(%rsp) +- movl %esi,%ecx +- bswapl %edx +- xorl %r11d,%eax +- roll $5,%ecx +- andl %edi,%eax +- leal 1518500249(%r14,%r13,1),%r13d +- addl %ecx,%r13d +- xorl %r12d,%eax +- roll $30,%edi +- addl %eax,%r13d +- movl 28(%r9),%ebp +- movl %r11d,%eax +- movl %edx,24(%rsp) +- movl %r13d,%ecx +- bswapl %ebp +- xorl %edi,%eax +- roll $5,%ecx +- andl %esi,%eax +- leal 1518500249(%rdx,%r12,1),%r12d +- addl %ecx,%r12d +- xorl %r11d,%eax +- roll $30,%esi +- addl %eax,%r12d +- movl 32(%r9),%r14d +- movl %edi,%eax +- movl %ebp,28(%rsp) +- movl %r12d,%ecx +- bswapl %r14d +- xorl %esi,%eax +- roll $5,%ecx +- andl %r13d,%eax +- leal 1518500249(%rbp,%r11,1),%r11d +- addl %ecx,%r11d +- xorl %edi,%eax +- roll $30,%r13d +- addl %eax,%r11d +- movl 36(%r9),%edx +- movl %esi,%eax +- movl %r14d,32(%rsp) +- movl %r11d,%ecx +- bswapl %edx +- xorl %r13d,%eax +- roll $5,%ecx +- andl %r12d,%eax +- leal 1518500249(%r14,%rdi,1),%edi +- addl %ecx,%edi +- xorl %esi,%eax +- roll $30,%r12d +- addl %eax,%edi +- movl 40(%r9),%ebp +- movl %r13d,%eax +- movl %edx,36(%rsp) +- movl %edi,%ecx +- bswapl %ebp +- xorl %r12d,%eax +- roll $5,%ecx +- andl %r11d,%eax +- leal 1518500249(%rdx,%rsi,1),%esi +- addl %ecx,%esi +- xorl %r13d,%eax +- roll $30,%r11d +- addl %eax,%esi +- movl 44(%r9),%r14d +- movl %r12d,%eax +- movl %ebp,40(%rsp) +- movl %esi,%ecx +- bswapl %r14d +- xorl %r11d,%eax +- roll $5,%ecx +- andl %edi,%eax +- leal 1518500249(%rbp,%r13,1),%r13d +- addl %ecx,%r13d +- xorl %r12d,%eax +- roll $30,%edi +- addl %eax,%r13d +- movl 48(%r9),%edx +- movl %r11d,%eax +- movl %r14d,44(%rsp) +- movl %r13d,%ecx +- bswapl %edx +- xorl %edi,%eax +- roll $5,%ecx +- andl %esi,%eax +- leal 1518500249(%r14,%r12,1),%r12d +- addl %ecx,%r12d +- xorl %r11d,%eax +- roll $30,%esi +- addl %eax,%r12d +- movl 52(%r9),%ebp +- movl %edi,%eax +- movl %edx,48(%rsp) +- movl %r12d,%ecx +- bswapl %ebp +- xorl %esi,%eax +- roll $5,%ecx +- andl %r13d,%eax +- leal 1518500249(%rdx,%r11,1),%r11d +- addl %ecx,%r11d +- xorl %edi,%eax +- roll $30,%r13d +- addl %eax,%r11d +- movl 56(%r9),%r14d +- movl %esi,%eax +- movl %ebp,52(%rsp) +- movl %r11d,%ecx +- bswapl %r14d +- xorl %r13d,%eax +- roll $5,%ecx +- andl %r12d,%eax +- leal 1518500249(%rbp,%rdi,1),%edi +- addl %ecx,%edi +- xorl %esi,%eax +- roll $30,%r12d +- addl %eax,%edi +- movl 60(%r9),%edx +- movl %r13d,%eax +- movl %r14d,56(%rsp) +- movl %edi,%ecx +- bswapl %edx +- xorl %r12d,%eax +- roll $5,%ecx +- andl %r11d,%eax +- leal 1518500249(%r14,%rsi,1),%esi +- addl %ecx,%esi +- xorl %r13d,%eax +- roll $30,%r11d +- addl %eax,%esi +- xorl 0(%rsp),%ebp +- movl %r12d,%eax +- movl %edx,60(%rsp) +- movl %esi,%ecx +- xorl 8(%rsp),%ebp +- xorl %r11d,%eax +- roll $5,%ecx +- xorl 32(%rsp),%ebp +- andl %edi,%eax +- leal 1518500249(%rdx,%r13,1),%r13d +- roll $30,%edi +- xorl %r12d,%eax +- addl %ecx,%r13d +- roll $1,%ebp +- addl %eax,%r13d +- xorl 4(%rsp),%r14d +- movl %r11d,%eax +- movl %ebp,0(%rsp) +- movl %r13d,%ecx +- xorl 12(%rsp),%r14d +- xorl %edi,%eax +- roll $5,%ecx +- xorl 36(%rsp),%r14d +- andl %esi,%eax +- leal 1518500249(%rbp,%r12,1),%r12d +- roll $30,%esi +- xorl %r11d,%eax +- addl %ecx,%r12d +- roll $1,%r14d +- addl %eax,%r12d +- xorl 8(%rsp),%edx +- movl %edi,%eax +- movl %r14d,4(%rsp) +- movl %r12d,%ecx +- xorl 16(%rsp),%edx +- xorl %esi,%eax +- roll $5,%ecx +- xorl 40(%rsp),%edx +- andl %r13d,%eax +- leal 1518500249(%r14,%r11,1),%r11d +- roll $30,%r13d +- xorl %edi,%eax +- addl %ecx,%r11d +- roll $1,%edx +- addl %eax,%r11d +- xorl 12(%rsp),%ebp +- movl %esi,%eax +- movl %edx,8(%rsp) +- movl %r11d,%ecx +- xorl 20(%rsp),%ebp +- xorl %r13d,%eax +- roll $5,%ecx +- xorl 44(%rsp),%ebp +- andl %r12d,%eax +- leal 1518500249(%rdx,%rdi,1),%edi +- roll $30,%r12d +- xorl %esi,%eax +- addl %ecx,%edi +- roll $1,%ebp +- addl %eax,%edi +- xorl 16(%rsp),%r14d +- movl %r13d,%eax +- movl %ebp,12(%rsp) +- movl %edi,%ecx +- xorl 24(%rsp),%r14d +- xorl %r12d,%eax +- roll $5,%ecx +- xorl 48(%rsp),%r14d +- andl %r11d,%eax +- leal 1518500249(%rbp,%rsi,1),%esi +- roll $30,%r11d +- xorl %r13d,%eax +- addl %ecx,%esi +- roll $1,%r14d +- addl %eax,%esi +- xorl 20(%rsp),%edx +- movl %edi,%eax +- movl %r14d,16(%rsp) +- movl %esi,%ecx +- xorl 28(%rsp),%edx +- xorl %r12d,%eax +- roll $5,%ecx +- xorl 52(%rsp),%edx +- leal 1859775393(%r14,%r13,1),%r13d +- xorl %r11d,%eax +- addl %ecx,%r13d +- roll $30,%edi +- addl %eax,%r13d +- roll $1,%edx +- xorl 24(%rsp),%ebp +- movl %esi,%eax +- movl %edx,20(%rsp) +- movl %r13d,%ecx +- xorl 32(%rsp),%ebp +- xorl %r11d,%eax +- roll $5,%ecx +- xorl 56(%rsp),%ebp +- leal 1859775393(%rdx,%r12,1),%r12d +- xorl %edi,%eax +- addl %ecx,%r12d +- roll $30,%esi +- addl %eax,%r12d +- roll $1,%ebp +- xorl 28(%rsp),%r14d +- movl %r13d,%eax +- movl %ebp,24(%rsp) +- movl %r12d,%ecx +- xorl 36(%rsp),%r14d +- xorl %edi,%eax +- roll $5,%ecx +- xorl 60(%rsp),%r14d +- leal 1859775393(%rbp,%r11,1),%r11d +- xorl %esi,%eax +- addl %ecx,%r11d +- roll $30,%r13d +- addl %eax,%r11d +- roll $1,%r14d +- xorl 32(%rsp),%edx +- movl %r12d,%eax +- movl %r14d,28(%rsp) +- movl %r11d,%ecx +- xorl 40(%rsp),%edx +- xorl %esi,%eax +- roll $5,%ecx +- xorl 0(%rsp),%edx +- leal 1859775393(%r14,%rdi,1),%edi +- xorl %r13d,%eax +- addl %ecx,%edi +- roll $30,%r12d +- addl %eax,%edi +- roll $1,%edx +- xorl 36(%rsp),%ebp +- movl %r11d,%eax +- movl %edx,32(%rsp) +- movl %edi,%ecx +- xorl 44(%rsp),%ebp +- xorl %r13d,%eax +- roll $5,%ecx +- xorl 4(%rsp),%ebp +- leal 1859775393(%rdx,%rsi,1),%esi +- xorl %r12d,%eax +- addl %ecx,%esi +- roll $30,%r11d +- addl %eax,%esi +- roll $1,%ebp +- xorl 40(%rsp),%r14d +- movl %edi,%eax +- movl %ebp,36(%rsp) +- movl %esi,%ecx +- xorl 48(%rsp),%r14d +- xorl %r12d,%eax +- roll $5,%ecx +- xorl 8(%rsp),%r14d +- leal 1859775393(%rbp,%r13,1),%r13d +- xorl %r11d,%eax +- addl %ecx,%r13d +- roll $30,%edi +- addl %eax,%r13d +- roll $1,%r14d +- xorl 44(%rsp),%edx +- movl %esi,%eax +- movl %r14d,40(%rsp) +- movl %r13d,%ecx +- xorl 52(%rsp),%edx +- xorl %r11d,%eax +- roll $5,%ecx +- xorl 12(%rsp),%edx +- leal 1859775393(%r14,%r12,1),%r12d +- xorl %edi,%eax +- addl %ecx,%r12d +- roll $30,%esi +- addl %eax,%r12d +- roll $1,%edx +- xorl 48(%rsp),%ebp +- movl %r13d,%eax +- movl %edx,44(%rsp) +- movl %r12d,%ecx +- xorl 56(%rsp),%ebp +- xorl %edi,%eax +- roll $5,%ecx +- xorl 16(%rsp),%ebp +- leal 1859775393(%rdx,%r11,1),%r11d +- xorl %esi,%eax +- addl %ecx,%r11d +- roll $30,%r13d +- addl %eax,%r11d +- roll $1,%ebp +- xorl 52(%rsp),%r14d +- movl %r12d,%eax +- movl %ebp,48(%rsp) +- movl %r11d,%ecx +- xorl 60(%rsp),%r14d +- xorl %esi,%eax +- roll $5,%ecx +- xorl 20(%rsp),%r14d +- leal 1859775393(%rbp,%rdi,1),%edi +- xorl %r13d,%eax +- addl %ecx,%edi +- roll $30,%r12d +- addl %eax,%edi +- roll $1,%r14d +- xorl 56(%rsp),%edx +- movl %r11d,%eax +- movl %r14d,52(%rsp) +- movl %edi,%ecx +- xorl 0(%rsp),%edx +- xorl %r13d,%eax +- roll $5,%ecx +- xorl 24(%rsp),%edx +- leal 1859775393(%r14,%rsi,1),%esi +- xorl %r12d,%eax +- addl %ecx,%esi +- roll $30,%r11d +- addl %eax,%esi +- roll $1,%edx +- xorl 60(%rsp),%ebp +- movl %edi,%eax +- movl %edx,56(%rsp) +- movl %esi,%ecx +- xorl 4(%rsp),%ebp +- xorl %r12d,%eax +- roll $5,%ecx +- xorl 28(%rsp),%ebp +- leal 1859775393(%rdx,%r13,1),%r13d +- xorl %r11d,%eax +- addl %ecx,%r13d +- roll $30,%edi +- addl %eax,%r13d +- roll $1,%ebp +- xorl 0(%rsp),%r14d +- movl %esi,%eax +- movl %ebp,60(%rsp) +- movl %r13d,%ecx +- xorl 8(%rsp),%r14d +- xorl %r11d,%eax +- roll $5,%ecx +- xorl 32(%rsp),%r14d +- leal 1859775393(%rbp,%r12,1),%r12d +- xorl %edi,%eax +- addl %ecx,%r12d +- roll $30,%esi +- addl %eax,%r12d +- roll $1,%r14d +- xorl 4(%rsp),%edx +- movl %r13d,%eax +- movl %r14d,0(%rsp) +- movl %r12d,%ecx +- xorl 12(%rsp),%edx +- xorl %edi,%eax +- roll $5,%ecx +- xorl 36(%rsp),%edx +- leal 1859775393(%r14,%r11,1),%r11d +- xorl %esi,%eax +- addl %ecx,%r11d +- roll $30,%r13d +- addl %eax,%r11d +- roll $1,%edx +- xorl 8(%rsp),%ebp +- movl %r12d,%eax +- movl %edx,4(%rsp) +- movl %r11d,%ecx +- xorl 16(%rsp),%ebp +- xorl %esi,%eax +- roll $5,%ecx +- xorl 40(%rsp),%ebp +- leal 1859775393(%rdx,%rdi,1),%edi +- xorl %r13d,%eax +- addl %ecx,%edi +- roll $30,%r12d +- addl %eax,%edi +- roll $1,%ebp +- xorl 12(%rsp),%r14d +- movl %r11d,%eax +- movl %ebp,8(%rsp) +- movl %edi,%ecx +- xorl 20(%rsp),%r14d +- xorl %r13d,%eax +- roll $5,%ecx +- xorl 44(%rsp),%r14d +- leal 1859775393(%rbp,%rsi,1),%esi +- xorl %r12d,%eax +- addl %ecx,%esi +- roll $30,%r11d +- addl %eax,%esi +- roll $1,%r14d +- xorl 16(%rsp),%edx +- movl %edi,%eax +- movl %r14d,12(%rsp) +- movl %esi,%ecx +- xorl 24(%rsp),%edx +- xorl %r12d,%eax +- roll $5,%ecx +- xorl 48(%rsp),%edx +- leal 1859775393(%r14,%r13,1),%r13d +- xorl %r11d,%eax +- addl %ecx,%r13d +- roll $30,%edi +- addl %eax,%r13d +- roll $1,%edx +- xorl 20(%rsp),%ebp +- movl %esi,%eax +- movl %edx,16(%rsp) +- movl %r13d,%ecx +- xorl 28(%rsp),%ebp +- xorl %r11d,%eax +- roll $5,%ecx +- xorl 52(%rsp),%ebp +- leal 1859775393(%rdx,%r12,1),%r12d +- xorl %edi,%eax +- addl %ecx,%r12d +- roll $30,%esi +- addl %eax,%r12d +- roll $1,%ebp +- xorl 24(%rsp),%r14d +- movl %r13d,%eax +- movl %ebp,20(%rsp) +- movl %r12d,%ecx +- xorl 32(%rsp),%r14d +- xorl %edi,%eax +- roll $5,%ecx +- xorl 56(%rsp),%r14d +- leal 1859775393(%rbp,%r11,1),%r11d +- xorl %esi,%eax +- addl %ecx,%r11d +- roll $30,%r13d +- addl %eax,%r11d +- roll $1,%r14d +- xorl 28(%rsp),%edx +- movl %r12d,%eax +- movl %r14d,24(%rsp) +- movl %r11d,%ecx +- xorl 36(%rsp),%edx +- xorl %esi,%eax +- roll $5,%ecx +- xorl 60(%rsp),%edx +- leal 1859775393(%r14,%rdi,1),%edi +- xorl %r13d,%eax +- addl %ecx,%edi +- roll $30,%r12d +- addl %eax,%edi +- roll $1,%edx +- xorl 32(%rsp),%ebp +- movl %r11d,%eax +- movl %edx,28(%rsp) +- movl %edi,%ecx +- xorl 40(%rsp),%ebp +- xorl %r13d,%eax +- roll $5,%ecx +- xorl 0(%rsp),%ebp +- leal 1859775393(%rdx,%rsi,1),%esi +- xorl %r12d,%eax +- addl %ecx,%esi +- roll $30,%r11d +- addl %eax,%esi +- roll $1,%ebp +- xorl 36(%rsp),%r14d +- movl %r12d,%eax +- movl %ebp,32(%rsp) +- movl %r12d,%ebx +- xorl 44(%rsp),%r14d +- andl %r11d,%eax +- movl %esi,%ecx +- xorl 4(%rsp),%r14d +- leal -1894007588(%rbp,%r13,1),%r13d +- xorl %r11d,%ebx +- roll $5,%ecx +- addl %eax,%r13d +- roll $1,%r14d +- andl %edi,%ebx +- addl %ecx,%r13d +- roll $30,%edi +- addl %ebx,%r13d +- xorl 40(%rsp),%edx +- movl %r11d,%eax +- movl %r14d,36(%rsp) +- movl %r11d,%ebx +- xorl 48(%rsp),%edx +- andl %edi,%eax +- movl %r13d,%ecx +- xorl 8(%rsp),%edx +- leal -1894007588(%r14,%r12,1),%r12d +- xorl %edi,%ebx +- roll $5,%ecx +- addl %eax,%r12d +- roll $1,%edx +- andl %esi,%ebx +- addl %ecx,%r12d +- roll $30,%esi +- addl %ebx,%r12d +- xorl 44(%rsp),%ebp +- movl %edi,%eax +- movl %edx,40(%rsp) +- movl %edi,%ebx +- xorl 52(%rsp),%ebp +- andl %esi,%eax +- movl %r12d,%ecx +- xorl 12(%rsp),%ebp +- leal -1894007588(%rdx,%r11,1),%r11d +- xorl %esi,%ebx +- roll $5,%ecx +- addl %eax,%r11d +- roll $1,%ebp +- andl %r13d,%ebx +- addl %ecx,%r11d +- roll $30,%r13d +- addl %ebx,%r11d +- xorl 48(%rsp),%r14d +- movl %esi,%eax +- movl %ebp,44(%rsp) +- movl %esi,%ebx +- xorl 56(%rsp),%r14d +- andl %r13d,%eax +- movl %r11d,%ecx +- xorl 16(%rsp),%r14d +- leal -1894007588(%rbp,%rdi,1),%edi +- xorl %r13d,%ebx +- roll $5,%ecx +- addl %eax,%edi +- roll $1,%r14d +- andl %r12d,%ebx +- addl %ecx,%edi +- roll $30,%r12d +- addl %ebx,%edi +- xorl 52(%rsp),%edx +- movl %r13d,%eax +- movl %r14d,48(%rsp) +- movl %r13d,%ebx +- xorl 60(%rsp),%edx +- andl %r12d,%eax +- movl %edi,%ecx +- xorl 20(%rsp),%edx +- leal -1894007588(%r14,%rsi,1),%esi +- xorl %r12d,%ebx +- roll $5,%ecx +- addl %eax,%esi +- roll $1,%edx +- andl %r11d,%ebx +- addl %ecx,%esi +- roll $30,%r11d +- addl %ebx,%esi +- xorl 56(%rsp),%ebp +- movl %r12d,%eax +- movl %edx,52(%rsp) +- movl %r12d,%ebx +- xorl 0(%rsp),%ebp +- andl %r11d,%eax +- movl %esi,%ecx +- xorl 24(%rsp),%ebp +- leal -1894007588(%rdx,%r13,1),%r13d +- xorl %r11d,%ebx +- roll $5,%ecx +- addl %eax,%r13d +- roll $1,%ebp +- andl %edi,%ebx +- addl %ecx,%r13d +- roll $30,%edi +- addl %ebx,%r13d +- xorl 60(%rsp),%r14d +- movl %r11d,%eax +- movl %ebp,56(%rsp) +- movl %r11d,%ebx +- xorl 4(%rsp),%r14d +- andl %edi,%eax +- movl %r13d,%ecx +- xorl 28(%rsp),%r14d +- leal -1894007588(%rbp,%r12,1),%r12d +- xorl %edi,%ebx +- roll $5,%ecx +- addl %eax,%r12d +- roll $1,%r14d +- andl %esi,%ebx +- addl %ecx,%r12d +- roll $30,%esi +- addl %ebx,%r12d +- xorl 0(%rsp),%edx +- movl %edi,%eax +- movl %r14d,60(%rsp) +- movl %edi,%ebx +- xorl 8(%rsp),%edx +- andl %esi,%eax +- movl %r12d,%ecx +- xorl 32(%rsp),%edx +- leal -1894007588(%r14,%r11,1),%r11d +- xorl %esi,%ebx +- roll $5,%ecx +- addl %eax,%r11d +- roll $1,%edx +- andl %r13d,%ebx +- addl %ecx,%r11d +- roll $30,%r13d +- addl %ebx,%r11d +- xorl 4(%rsp),%ebp +- movl %esi,%eax +- movl %edx,0(%rsp) +- movl %esi,%ebx +- xorl 12(%rsp),%ebp +- andl %r13d,%eax +- movl %r11d,%ecx +- xorl 36(%rsp),%ebp +- leal -1894007588(%rdx,%rdi,1),%edi +- xorl %r13d,%ebx +- roll $5,%ecx +- addl %eax,%edi +- roll $1,%ebp +- andl %r12d,%ebx +- addl %ecx,%edi +- roll $30,%r12d +- addl %ebx,%edi +- xorl 8(%rsp),%r14d +- movl %r13d,%eax +- movl %ebp,4(%rsp) +- movl %r13d,%ebx +- xorl 16(%rsp),%r14d +- andl %r12d,%eax +- movl %edi,%ecx +- xorl 40(%rsp),%r14d +- leal -1894007588(%rbp,%rsi,1),%esi +- xorl %r12d,%ebx +- roll $5,%ecx +- addl %eax,%esi +- roll $1,%r14d +- andl %r11d,%ebx +- addl %ecx,%esi +- roll $30,%r11d +- addl %ebx,%esi +- xorl 12(%rsp),%edx +- movl %r12d,%eax +- movl %r14d,8(%rsp) +- movl %r12d,%ebx +- xorl 20(%rsp),%edx +- andl %r11d,%eax +- movl %esi,%ecx +- xorl 44(%rsp),%edx +- leal -1894007588(%r14,%r13,1),%r13d +- xorl %r11d,%ebx +- roll $5,%ecx +- addl %eax,%r13d +- roll $1,%edx +- andl %edi,%ebx +- addl %ecx,%r13d +- roll $30,%edi +- addl %ebx,%r13d +- xorl 16(%rsp),%ebp +- movl %r11d,%eax +- movl %edx,12(%rsp) +- movl %r11d,%ebx +- xorl 24(%rsp),%ebp +- andl %edi,%eax +- movl %r13d,%ecx +- xorl 48(%rsp),%ebp +- leal -1894007588(%rdx,%r12,1),%r12d +- xorl %edi,%ebx +- roll $5,%ecx +- addl %eax,%r12d +- roll $1,%ebp +- andl %esi,%ebx +- addl %ecx,%r12d +- roll $30,%esi +- addl %ebx,%r12d +- xorl 20(%rsp),%r14d +- movl %edi,%eax +- movl %ebp,16(%rsp) +- movl %edi,%ebx +- xorl 28(%rsp),%r14d +- andl %esi,%eax +- movl %r12d,%ecx +- xorl 52(%rsp),%r14d +- leal -1894007588(%rbp,%r11,1),%r11d +- xorl %esi,%ebx +- roll $5,%ecx +- addl %eax,%r11d +- roll $1,%r14d +- andl %r13d,%ebx +- addl %ecx,%r11d +- roll $30,%r13d +- addl %ebx,%r11d +- xorl 24(%rsp),%edx +- movl %esi,%eax +- movl %r14d,20(%rsp) +- movl %esi,%ebx +- xorl 32(%rsp),%edx +- andl %r13d,%eax +- movl %r11d,%ecx +- xorl 56(%rsp),%edx +- leal -1894007588(%r14,%rdi,1),%edi +- xorl %r13d,%ebx +- roll $5,%ecx +- addl %eax,%edi +- roll $1,%edx +- andl %r12d,%ebx +- addl %ecx,%edi +- roll $30,%r12d +- addl %ebx,%edi +- xorl 28(%rsp),%ebp +- movl %r13d,%eax +- movl %edx,24(%rsp) +- movl %r13d,%ebx +- xorl 36(%rsp),%ebp +- andl %r12d,%eax +- movl %edi,%ecx +- xorl 60(%rsp),%ebp +- leal -1894007588(%rdx,%rsi,1),%esi +- xorl %r12d,%ebx +- roll $5,%ecx +- addl %eax,%esi +- roll $1,%ebp +- andl %r11d,%ebx +- addl %ecx,%esi +- roll $30,%r11d +- addl %ebx,%esi +- xorl 32(%rsp),%r14d +- movl %r12d,%eax +- movl %ebp,28(%rsp) +- movl %r12d,%ebx +- xorl 40(%rsp),%r14d +- andl %r11d,%eax +- movl %esi,%ecx +- xorl 0(%rsp),%r14d +- leal -1894007588(%rbp,%r13,1),%r13d +- xorl %r11d,%ebx +- roll $5,%ecx +- addl %eax,%r13d +- roll $1,%r14d +- andl %edi,%ebx +- addl %ecx,%r13d +- roll $30,%edi +- addl %ebx,%r13d +- xorl 36(%rsp),%edx +- movl %r11d,%eax +- movl %r14d,32(%rsp) +- movl %r11d,%ebx +- xorl 44(%rsp),%edx +- andl %edi,%eax +- movl %r13d,%ecx +- xorl 4(%rsp),%edx +- leal -1894007588(%r14,%r12,1),%r12d +- xorl %edi,%ebx +- roll $5,%ecx +- addl %eax,%r12d +- roll $1,%edx +- andl %esi,%ebx +- addl %ecx,%r12d +- roll $30,%esi +- addl %ebx,%r12d +- xorl 40(%rsp),%ebp +- movl %edi,%eax +- movl %edx,36(%rsp) +- movl %edi,%ebx +- xorl 48(%rsp),%ebp +- andl %esi,%eax +- movl %r12d,%ecx +- xorl 8(%rsp),%ebp +- leal -1894007588(%rdx,%r11,1),%r11d +- xorl %esi,%ebx +- roll $5,%ecx +- addl %eax,%r11d +- roll $1,%ebp +- andl %r13d,%ebx +- addl %ecx,%r11d +- roll $30,%r13d +- addl %ebx,%r11d +- xorl 44(%rsp),%r14d +- movl %esi,%eax +- movl %ebp,40(%rsp) +- movl %esi,%ebx +- xorl 52(%rsp),%r14d +- andl %r13d,%eax +- movl %r11d,%ecx +- xorl 12(%rsp),%r14d +- leal -1894007588(%rbp,%rdi,1),%edi +- xorl %r13d,%ebx +- roll $5,%ecx +- addl %eax,%edi +- roll $1,%r14d +- andl %r12d,%ebx +- addl %ecx,%edi +- roll $30,%r12d +- addl %ebx,%edi +- xorl 48(%rsp),%edx +- movl %r13d,%eax +- movl %r14d,44(%rsp) +- movl %r13d,%ebx +- xorl 56(%rsp),%edx +- andl %r12d,%eax +- movl %edi,%ecx +- xorl 16(%rsp),%edx +- leal -1894007588(%r14,%rsi,1),%esi +- xorl %r12d,%ebx +- roll $5,%ecx +- addl %eax,%esi +- roll $1,%edx +- andl %r11d,%ebx +- addl %ecx,%esi +- roll $30,%r11d +- addl %ebx,%esi +- xorl 52(%rsp),%ebp +- movl %edi,%eax +- movl %edx,48(%rsp) +- movl %esi,%ecx +- xorl 60(%rsp),%ebp +- xorl %r12d,%eax +- roll $5,%ecx +- xorl 20(%rsp),%ebp +- leal -899497514(%rdx,%r13,1),%r13d +- xorl %r11d,%eax +- addl %ecx,%r13d +- roll $30,%edi +- addl %eax,%r13d +- roll $1,%ebp +- xorl 56(%rsp),%r14d +- movl %esi,%eax +- movl %ebp,52(%rsp) +- movl %r13d,%ecx +- xorl 0(%rsp),%r14d +- xorl %r11d,%eax +- roll $5,%ecx +- xorl 24(%rsp),%r14d +- leal -899497514(%rbp,%r12,1),%r12d +- xorl %edi,%eax +- addl %ecx,%r12d +- roll $30,%esi +- addl %eax,%r12d +- roll $1,%r14d +- xorl 60(%rsp),%edx +- movl %r13d,%eax +- movl %r14d,56(%rsp) +- movl %r12d,%ecx +- xorl 4(%rsp),%edx +- xorl %edi,%eax +- roll $5,%ecx +- xorl 28(%rsp),%edx +- leal -899497514(%r14,%r11,1),%r11d +- xorl %esi,%eax +- addl %ecx,%r11d +- roll $30,%r13d +- addl %eax,%r11d +- roll $1,%edx +- xorl 0(%rsp),%ebp +- movl %r12d,%eax +- movl %edx,60(%rsp) +- movl %r11d,%ecx +- xorl 8(%rsp),%ebp +- xorl %esi,%eax +- roll $5,%ecx +- xorl 32(%rsp),%ebp +- leal -899497514(%rdx,%rdi,1),%edi +- xorl %r13d,%eax +- addl %ecx,%edi +- roll $30,%r12d +- addl %eax,%edi +- roll $1,%ebp +- xorl 4(%rsp),%r14d +- movl %r11d,%eax +- movl %ebp,0(%rsp) +- movl %edi,%ecx +- xorl 12(%rsp),%r14d +- xorl %r13d,%eax +- roll $5,%ecx +- xorl 36(%rsp),%r14d +- leal -899497514(%rbp,%rsi,1),%esi +- xorl %r12d,%eax +- addl %ecx,%esi +- roll $30,%r11d +- addl %eax,%esi +- roll $1,%r14d +- xorl 8(%rsp),%edx +- movl %edi,%eax +- movl %r14d,4(%rsp) +- movl %esi,%ecx +- xorl 16(%rsp),%edx +- xorl %r12d,%eax +- roll $5,%ecx +- xorl 40(%rsp),%edx +- leal -899497514(%r14,%r13,1),%r13d +- xorl %r11d,%eax +- addl %ecx,%r13d +- roll $30,%edi +- addl %eax,%r13d +- roll $1,%edx +- xorl 12(%rsp),%ebp +- movl %esi,%eax +- movl %edx,8(%rsp) +- movl %r13d,%ecx +- xorl 20(%rsp),%ebp +- xorl %r11d,%eax +- roll $5,%ecx +- xorl 44(%rsp),%ebp +- leal -899497514(%rdx,%r12,1),%r12d +- xorl %edi,%eax +- addl %ecx,%r12d +- roll $30,%esi +- addl %eax,%r12d +- roll $1,%ebp +- xorl 16(%rsp),%r14d +- movl %r13d,%eax +- movl %ebp,12(%rsp) +- movl %r12d,%ecx +- xorl 24(%rsp),%r14d +- xorl %edi,%eax +- roll $5,%ecx +- xorl 48(%rsp),%r14d +- leal -899497514(%rbp,%r11,1),%r11d +- xorl %esi,%eax +- addl %ecx,%r11d +- roll $30,%r13d +- addl %eax,%r11d +- roll $1,%r14d +- xorl 20(%rsp),%edx +- movl %r12d,%eax +- movl %r14d,16(%rsp) +- movl %r11d,%ecx +- xorl 28(%rsp),%edx +- xorl %esi,%eax +- roll $5,%ecx +- xorl 52(%rsp),%edx +- leal -899497514(%r14,%rdi,1),%edi +- xorl %r13d,%eax +- addl %ecx,%edi +- roll $30,%r12d +- addl %eax,%edi +- roll $1,%edx +- xorl 24(%rsp),%ebp +- movl %r11d,%eax +- movl %edx,20(%rsp) +- movl %edi,%ecx +- xorl 32(%rsp),%ebp +- xorl %r13d,%eax +- roll $5,%ecx +- xorl 56(%rsp),%ebp +- leal -899497514(%rdx,%rsi,1),%esi +- xorl %r12d,%eax +- addl %ecx,%esi +- roll $30,%r11d +- addl %eax,%esi +- roll $1,%ebp +- xorl 28(%rsp),%r14d +- movl %edi,%eax +- movl %ebp,24(%rsp) +- movl %esi,%ecx +- xorl 36(%rsp),%r14d +- xorl %r12d,%eax +- roll $5,%ecx +- xorl 60(%rsp),%r14d +- leal -899497514(%rbp,%r13,1),%r13d +- xorl %r11d,%eax +- addl %ecx,%r13d +- roll $30,%edi +- addl %eax,%r13d +- roll $1,%r14d +- xorl 32(%rsp),%edx +- movl %esi,%eax +- movl %r14d,28(%rsp) +- movl %r13d,%ecx +- xorl 40(%rsp),%edx +- xorl %r11d,%eax +- roll $5,%ecx +- xorl 0(%rsp),%edx +- leal -899497514(%r14,%r12,1),%r12d +- xorl %edi,%eax +- addl %ecx,%r12d +- roll $30,%esi +- addl %eax,%r12d +- roll $1,%edx +- xorl 36(%rsp),%ebp +- movl %r13d,%eax +- +- movl %r12d,%ecx +- xorl 44(%rsp),%ebp +- xorl %edi,%eax +- roll $5,%ecx +- xorl 4(%rsp),%ebp +- leal -899497514(%rdx,%r11,1),%r11d +- xorl %esi,%eax +- addl %ecx,%r11d +- roll $30,%r13d +- addl %eax,%r11d +- roll $1,%ebp +- xorl 40(%rsp),%r14d +- movl %r12d,%eax +- +- movl %r11d,%ecx +- xorl 48(%rsp),%r14d +- xorl %esi,%eax +- roll $5,%ecx +- xorl 8(%rsp),%r14d +- leal -899497514(%rbp,%rdi,1),%edi +- xorl %r13d,%eax +- addl %ecx,%edi +- roll $30,%r12d +- addl %eax,%edi +- roll $1,%r14d +- xorl 44(%rsp),%edx +- movl %r11d,%eax +- +- movl %edi,%ecx +- xorl 52(%rsp),%edx +- xorl %r13d,%eax +- roll $5,%ecx +- xorl 12(%rsp),%edx +- leal -899497514(%r14,%rsi,1),%esi +- xorl %r12d,%eax +- addl %ecx,%esi +- roll $30,%r11d +- addl %eax,%esi +- roll $1,%edx +- xorl 48(%rsp),%ebp +- movl %edi,%eax +- +- movl %esi,%ecx +- xorl 56(%rsp),%ebp +- xorl %r12d,%eax +- roll $5,%ecx +- xorl 16(%rsp),%ebp +- leal -899497514(%rdx,%r13,1),%r13d +- xorl %r11d,%eax +- addl %ecx,%r13d +- roll $30,%edi +- addl %eax,%r13d +- roll $1,%ebp +- xorl 52(%rsp),%r14d +- movl %esi,%eax +- +- movl %r13d,%ecx +- xorl 60(%rsp),%r14d +- xorl %r11d,%eax +- roll $5,%ecx +- xorl 20(%rsp),%r14d +- leal -899497514(%rbp,%r12,1),%r12d +- xorl %edi,%eax +- addl %ecx,%r12d +- roll $30,%esi +- addl %eax,%r12d +- roll $1,%r14d +- xorl 56(%rsp),%edx +- movl %r13d,%eax +- +- movl %r12d,%ecx +- xorl 0(%rsp),%edx +- xorl %edi,%eax +- roll $5,%ecx +- xorl 24(%rsp),%edx +- leal -899497514(%r14,%r11,1),%r11d +- xorl %esi,%eax +- addl %ecx,%r11d +- roll $30,%r13d +- addl %eax,%r11d +- roll $1,%edx +- xorl 60(%rsp),%ebp +- movl %r12d,%eax +- +- movl %r11d,%ecx +- xorl 4(%rsp),%ebp +- xorl %esi,%eax +- roll $5,%ecx +- xorl 28(%rsp),%ebp +- leal -899497514(%rdx,%rdi,1),%edi +- xorl %r13d,%eax +- addl %ecx,%edi +- roll $30,%r12d +- addl %eax,%edi +- roll $1,%ebp +- movl %r11d,%eax +- movl %edi,%ecx +- xorl %r13d,%eax +- leal -899497514(%rbp,%rsi,1),%esi +- roll $5,%ecx +- xorl %r12d,%eax +- addl %ecx,%esi +- roll $30,%r11d +- addl %eax,%esi +- addl 0(%r8),%esi +- addl 4(%r8),%edi +- addl 8(%r8),%r11d +- addl 12(%r8),%r12d +- addl 16(%r8),%r13d +- movl %esi,0(%r8) +- movl %edi,4(%r8) +- movl %r11d,8(%r8) +- movl %r12d,12(%r8) +- movl %r13d,16(%r8) +- +- subq $1,%r10 +- leaq 64(%r9),%r9 +- jnz L$loop +- +- movq 64(%rsp),%rsi +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-sha1_block_data_order_shaext: +-_shaext_shortcut: +- +- movdqu (%rdi),%xmm0 +- movd 16(%rdi),%xmm1 +- movdqa K_XX_XX+160(%rip),%xmm3 +- +- movdqu (%rsi),%xmm4 +- pshufd $27,%xmm0,%xmm0 +- movdqu 16(%rsi),%xmm5 +- pshufd $27,%xmm1,%xmm1 +- movdqu 32(%rsi),%xmm6 +-.byte 102,15,56,0,227 +- movdqu 48(%rsi),%xmm7 +-.byte 102,15,56,0,235 +-.byte 102,15,56,0,243 +- movdqa %xmm1,%xmm9 +-.byte 102,15,56,0,251 +- jmp L$oop_shaext +- +-.p2align 4 +-L$oop_shaext: +- decq %rdx +- leaq 64(%rsi),%r8 +- paddd %xmm4,%xmm1 +- cmovneq %r8,%rsi +- movdqa %xmm0,%xmm8 +-.byte 15,56,201,229 +- movdqa %xmm0,%xmm2 +-.byte 15,58,204,193,0 +-.byte 15,56,200,213 +- pxor %xmm6,%xmm4 +-.byte 15,56,201,238 +-.byte 15,56,202,231 +- +- movdqa %xmm0,%xmm1 +-.byte 15,58,204,194,0 +-.byte 15,56,200,206 +- pxor %xmm7,%xmm5 +-.byte 15,56,202,236 +-.byte 15,56,201,247 +- movdqa %xmm0,%xmm2 +-.byte 15,58,204,193,0 +-.byte 15,56,200,215 +- pxor %xmm4,%xmm6 +-.byte 15,56,201,252 +-.byte 15,56,202,245 +- +- movdqa %xmm0,%xmm1 +-.byte 15,58,204,194,0 +-.byte 15,56,200,204 +- pxor %xmm5,%xmm7 +-.byte 15,56,202,254 +-.byte 15,56,201,229 +- movdqa %xmm0,%xmm2 +-.byte 15,58,204,193,0 +-.byte 15,56,200,213 +- pxor %xmm6,%xmm4 +-.byte 15,56,201,238 +-.byte 15,56,202,231 +- +- movdqa %xmm0,%xmm1 +-.byte 15,58,204,194,1 +-.byte 15,56,200,206 +- pxor %xmm7,%xmm5 +-.byte 15,56,202,236 +-.byte 15,56,201,247 +- movdqa %xmm0,%xmm2 +-.byte 15,58,204,193,1 +-.byte 15,56,200,215 +- pxor %xmm4,%xmm6 +-.byte 15,56,201,252 +-.byte 15,56,202,245 +- +- movdqa %xmm0,%xmm1 +-.byte 15,58,204,194,1 +-.byte 15,56,200,204 +- pxor %xmm5,%xmm7 +-.byte 15,56,202,254 +-.byte 15,56,201,229 +- movdqa %xmm0,%xmm2 +-.byte 15,58,204,193,1 +-.byte 15,56,200,213 +- pxor %xmm6,%xmm4 +-.byte 15,56,201,238 +-.byte 15,56,202,231 +- +- movdqa %xmm0,%xmm1 +-.byte 15,58,204,194,1 +-.byte 15,56,200,206 +- pxor %xmm7,%xmm5 +-.byte 15,56,202,236 +-.byte 15,56,201,247 +- movdqa %xmm0,%xmm2 +-.byte 15,58,204,193,2 +-.byte 15,56,200,215 +- pxor %xmm4,%xmm6 +-.byte 15,56,201,252 +-.byte 15,56,202,245 +- +- movdqa %xmm0,%xmm1 +-.byte 15,58,204,194,2 +-.byte 15,56,200,204 +- pxor %xmm5,%xmm7 +-.byte 15,56,202,254 +-.byte 15,56,201,229 +- movdqa %xmm0,%xmm2 +-.byte 15,58,204,193,2 +-.byte 15,56,200,213 +- pxor %xmm6,%xmm4 +-.byte 15,56,201,238 +-.byte 15,56,202,231 +- +- movdqa %xmm0,%xmm1 +-.byte 15,58,204,194,2 +-.byte 15,56,200,206 +- pxor %xmm7,%xmm5 +-.byte 15,56,202,236 +-.byte 15,56,201,247 +- movdqa %xmm0,%xmm2 +-.byte 15,58,204,193,2 +-.byte 15,56,200,215 +- pxor %xmm4,%xmm6 +-.byte 15,56,201,252 +-.byte 15,56,202,245 +- +- movdqa %xmm0,%xmm1 +-.byte 15,58,204,194,3 +-.byte 15,56,200,204 +- pxor %xmm5,%xmm7 +-.byte 15,56,202,254 +- movdqu (%rsi),%xmm4 +- movdqa %xmm0,%xmm2 +-.byte 15,58,204,193,3 +-.byte 15,56,200,213 +- movdqu 16(%rsi),%xmm5 +-.byte 102,15,56,0,227 +- +- movdqa %xmm0,%xmm1 +-.byte 15,58,204,194,3 +-.byte 15,56,200,206 +- movdqu 32(%rsi),%xmm6 +-.byte 102,15,56,0,235 +- +- movdqa %xmm0,%xmm2 +-.byte 15,58,204,193,3 +-.byte 15,56,200,215 +- movdqu 48(%rsi),%xmm7 +-.byte 102,15,56,0,243 +- +- movdqa %xmm0,%xmm1 +-.byte 15,58,204,194,3 +-.byte 65,15,56,200,201 +-.byte 102,15,56,0,251 +- +- paddd %xmm8,%xmm0 +- movdqa %xmm1,%xmm9 +- +- jnz L$oop_shaext +- +- pshufd $27,%xmm0,%xmm0 +- pshufd $27,%xmm1,%xmm1 +- movdqu %xmm0,(%rdi) +- movd %xmm1,16(%rdi) +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-sha1_block_data_order_ssse3: +-_ssse3_shortcut: +- +- movq %rsp,%r11 +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- leaq -64(%rsp),%rsp +- andq $-64,%rsp +- movq %rdi,%r8 +- movq %rsi,%r9 +- movq %rdx,%r10 +- +- shlq $6,%r10 +- addq %r9,%r10 +- leaq K_XX_XX+64(%rip),%r14 +- +- movl 0(%r8),%eax +- movl 4(%r8),%ebx +- movl 8(%r8),%ecx +- movl 12(%r8),%edx +- movl %ebx,%esi +- movl 16(%r8),%ebp +- movl %ecx,%edi +- xorl %edx,%edi +- andl %edi,%esi +- +- movdqa 64(%r14),%xmm6 +- movdqa -64(%r14),%xmm9 +- movdqu 0(%r9),%xmm0 +- movdqu 16(%r9),%xmm1 +- movdqu 32(%r9),%xmm2 +- movdqu 48(%r9),%xmm3 +-.byte 102,15,56,0,198 +-.byte 102,15,56,0,206 +-.byte 102,15,56,0,214 +- addq $64,%r9 +- paddd %xmm9,%xmm0 +-.byte 102,15,56,0,222 +- paddd %xmm9,%xmm1 +- paddd %xmm9,%xmm2 +- movdqa %xmm0,0(%rsp) +- psubd %xmm9,%xmm0 +- movdqa %xmm1,16(%rsp) +- psubd %xmm9,%xmm1 +- movdqa %xmm2,32(%rsp) +- psubd %xmm9,%xmm2 +- jmp L$oop_ssse3 +-.p2align 4 +-L$oop_ssse3: +- rorl $2,%ebx +- pshufd $238,%xmm0,%xmm4 +- xorl %edx,%esi +- movdqa %xmm3,%xmm8 +- paddd %xmm3,%xmm9 +- movl %eax,%edi +- addl 0(%rsp),%ebp +- punpcklqdq %xmm1,%xmm4 +- xorl %ecx,%ebx +- roll $5,%eax +- addl %esi,%ebp +- psrldq $4,%xmm8 +- andl %ebx,%edi +- xorl %ecx,%ebx +- pxor %xmm0,%xmm4 +- addl %eax,%ebp +- rorl $7,%eax +- pxor %xmm2,%xmm8 +- xorl %ecx,%edi +- movl %ebp,%esi +- addl 4(%rsp),%edx +- pxor %xmm8,%xmm4 +- xorl %ebx,%eax +- roll $5,%ebp +- movdqa %xmm9,48(%rsp) +- addl %edi,%edx +- andl %eax,%esi +- movdqa %xmm4,%xmm10 +- xorl %ebx,%eax +- addl %ebp,%edx +- rorl $7,%ebp +- movdqa %xmm4,%xmm8 +- xorl %ebx,%esi +- pslldq $12,%xmm10 +- paddd %xmm4,%xmm4 +- movl %edx,%edi +- addl 8(%rsp),%ecx +- psrld $31,%xmm8 +- xorl %eax,%ebp +- roll $5,%edx +- addl %esi,%ecx +- movdqa %xmm10,%xmm9 +- andl %ebp,%edi +- xorl %eax,%ebp +- psrld $30,%xmm10 +- addl %edx,%ecx +- rorl $7,%edx +- por %xmm8,%xmm4 +- xorl %eax,%edi +- movl %ecx,%esi +- addl 12(%rsp),%ebx +- pslld $2,%xmm9 +- pxor %xmm10,%xmm4 +- xorl %ebp,%edx +- movdqa -64(%r14),%xmm10 +- roll $5,%ecx +- addl %edi,%ebx +- andl %edx,%esi +- pxor %xmm9,%xmm4 +- xorl %ebp,%edx +- addl %ecx,%ebx +- rorl $7,%ecx +- pshufd $238,%xmm1,%xmm5 +- xorl %ebp,%esi +- movdqa %xmm4,%xmm9 +- paddd %xmm4,%xmm10 +- movl %ebx,%edi +- addl 16(%rsp),%eax +- punpcklqdq %xmm2,%xmm5 +- xorl %edx,%ecx +- roll $5,%ebx +- addl %esi,%eax +- psrldq $4,%xmm9 +- andl %ecx,%edi +- xorl %edx,%ecx +- pxor %xmm1,%xmm5 +- addl %ebx,%eax +- rorl $7,%ebx +- pxor %xmm3,%xmm9 +- xorl %edx,%edi +- movl %eax,%esi +- addl 20(%rsp),%ebp +- pxor %xmm9,%xmm5 +- xorl %ecx,%ebx +- roll $5,%eax +- movdqa %xmm10,0(%rsp) +- addl %edi,%ebp +- andl %ebx,%esi +- movdqa %xmm5,%xmm8 +- xorl %ecx,%ebx +- addl %eax,%ebp +- rorl $7,%eax +- movdqa %xmm5,%xmm9 +- xorl %ecx,%esi +- pslldq $12,%xmm8 +- paddd %xmm5,%xmm5 +- movl %ebp,%edi +- addl 24(%rsp),%edx +- psrld $31,%xmm9 +- xorl %ebx,%eax +- roll $5,%ebp +- addl %esi,%edx +- movdqa %xmm8,%xmm10 +- andl %eax,%edi +- xorl %ebx,%eax +- psrld $30,%xmm8 +- addl %ebp,%edx +- rorl $7,%ebp +- por %xmm9,%xmm5 +- xorl %ebx,%edi +- movl %edx,%esi +- addl 28(%rsp),%ecx +- pslld $2,%xmm10 +- pxor %xmm8,%xmm5 +- xorl %eax,%ebp +- movdqa -32(%r14),%xmm8 +- roll $5,%edx +- addl %edi,%ecx +- andl %ebp,%esi +- pxor %xmm10,%xmm5 +- xorl %eax,%ebp +- addl %edx,%ecx +- rorl $7,%edx +- pshufd $238,%xmm2,%xmm6 +- xorl %eax,%esi +- movdqa %xmm5,%xmm10 +- paddd %xmm5,%xmm8 +- movl %ecx,%edi +- addl 32(%rsp),%ebx +- punpcklqdq %xmm3,%xmm6 +- xorl %ebp,%edx +- roll $5,%ecx +- addl %esi,%ebx +- psrldq $4,%xmm10 +- andl %edx,%edi +- xorl %ebp,%edx +- pxor %xmm2,%xmm6 +- addl %ecx,%ebx +- rorl $7,%ecx +- pxor %xmm4,%xmm10 +- xorl %ebp,%edi +- movl %ebx,%esi +- addl 36(%rsp),%eax +- pxor %xmm10,%xmm6 +- xorl %edx,%ecx +- roll $5,%ebx +- movdqa %xmm8,16(%rsp) +- addl %edi,%eax +- andl %ecx,%esi +- movdqa %xmm6,%xmm9 +- xorl %edx,%ecx +- addl %ebx,%eax +- rorl $7,%ebx +- movdqa %xmm6,%xmm10 +- xorl %edx,%esi +- pslldq $12,%xmm9 +- paddd %xmm6,%xmm6 +- movl %eax,%edi +- addl 40(%rsp),%ebp +- psrld $31,%xmm10 +- xorl %ecx,%ebx +- roll $5,%eax +- addl %esi,%ebp +- movdqa %xmm9,%xmm8 +- andl %ebx,%edi +- xorl %ecx,%ebx +- psrld $30,%xmm9 +- addl %eax,%ebp +- rorl $7,%eax +- por %xmm10,%xmm6 +- xorl %ecx,%edi +- movl %ebp,%esi +- addl 44(%rsp),%edx +- pslld $2,%xmm8 +- pxor %xmm9,%xmm6 +- xorl %ebx,%eax +- movdqa -32(%r14),%xmm9 +- roll $5,%ebp +- addl %edi,%edx +- andl %eax,%esi +- pxor %xmm8,%xmm6 +- xorl %ebx,%eax +- addl %ebp,%edx +- rorl $7,%ebp +- pshufd $238,%xmm3,%xmm7 +- xorl %ebx,%esi +- movdqa %xmm6,%xmm8 +- paddd %xmm6,%xmm9 +- movl %edx,%edi +- addl 48(%rsp),%ecx +- punpcklqdq %xmm4,%xmm7 +- xorl %eax,%ebp +- roll $5,%edx +- addl %esi,%ecx +- psrldq $4,%xmm8 +- andl %ebp,%edi +- xorl %eax,%ebp +- pxor %xmm3,%xmm7 +- addl %edx,%ecx +- rorl $7,%edx +- pxor %xmm5,%xmm8 +- xorl %eax,%edi +- movl %ecx,%esi +- addl 52(%rsp),%ebx +- pxor %xmm8,%xmm7 +- xorl %ebp,%edx +- roll $5,%ecx +- movdqa %xmm9,32(%rsp) +- addl %edi,%ebx +- andl %edx,%esi +- movdqa %xmm7,%xmm10 +- xorl %ebp,%edx +- addl %ecx,%ebx +- rorl $7,%ecx +- movdqa %xmm7,%xmm8 +- xorl %ebp,%esi +- pslldq $12,%xmm10 +- paddd %xmm7,%xmm7 +- movl %ebx,%edi +- addl 56(%rsp),%eax +- psrld $31,%xmm8 +- xorl %edx,%ecx +- roll $5,%ebx +- addl %esi,%eax +- movdqa %xmm10,%xmm9 +- andl %ecx,%edi +- xorl %edx,%ecx +- psrld $30,%xmm10 +- addl %ebx,%eax +- rorl $7,%ebx +- por %xmm8,%xmm7 +- xorl %edx,%edi +- movl %eax,%esi +- addl 60(%rsp),%ebp +- pslld $2,%xmm9 +- pxor %xmm10,%xmm7 +- xorl %ecx,%ebx +- movdqa -32(%r14),%xmm10 +- roll $5,%eax +- addl %edi,%ebp +- andl %ebx,%esi +- pxor %xmm9,%xmm7 +- pshufd $238,%xmm6,%xmm9 +- xorl %ecx,%ebx +- addl %eax,%ebp +- rorl $7,%eax +- pxor %xmm4,%xmm0 +- xorl %ecx,%esi +- movl %ebp,%edi +- addl 0(%rsp),%edx +- punpcklqdq %xmm7,%xmm9 +- xorl %ebx,%eax +- roll $5,%ebp +- pxor %xmm1,%xmm0 +- addl %esi,%edx +- andl %eax,%edi +- movdqa %xmm10,%xmm8 +- xorl %ebx,%eax +- paddd %xmm7,%xmm10 +- addl %ebp,%edx +- pxor %xmm9,%xmm0 +- rorl $7,%ebp +- xorl %ebx,%edi +- movl %edx,%esi +- addl 4(%rsp),%ecx +- movdqa %xmm0,%xmm9 +- xorl %eax,%ebp +- roll $5,%edx +- movdqa %xmm10,48(%rsp) +- addl %edi,%ecx +- andl %ebp,%esi +- xorl %eax,%ebp +- pslld $2,%xmm0 +- addl %edx,%ecx +- rorl $7,%edx +- psrld $30,%xmm9 +- xorl %eax,%esi +- movl %ecx,%edi +- addl 8(%rsp),%ebx +- por %xmm9,%xmm0 +- xorl %ebp,%edx +- roll $5,%ecx +- pshufd $238,%xmm7,%xmm10 +- addl %esi,%ebx +- andl %edx,%edi +- xorl %ebp,%edx +- addl %ecx,%ebx +- addl 12(%rsp),%eax +- xorl %ebp,%edi +- movl %ebx,%esi +- roll $5,%ebx +- addl %edi,%eax +- xorl %edx,%esi +- rorl $7,%ecx +- addl %ebx,%eax +- pxor %xmm5,%xmm1 +- addl 16(%rsp),%ebp +- xorl %ecx,%esi +- punpcklqdq %xmm0,%xmm10 +- movl %eax,%edi +- roll $5,%eax +- pxor %xmm2,%xmm1 +- addl %esi,%ebp +- xorl %ecx,%edi +- movdqa %xmm8,%xmm9 +- rorl $7,%ebx +- paddd %xmm0,%xmm8 +- addl %eax,%ebp +- pxor %xmm10,%xmm1 +- addl 20(%rsp),%edx +- xorl %ebx,%edi +- movl %ebp,%esi +- roll $5,%ebp +- movdqa %xmm1,%xmm10 +- addl %edi,%edx +- xorl %ebx,%esi +- movdqa %xmm8,0(%rsp) +- rorl $7,%eax +- addl %ebp,%edx +- addl 24(%rsp),%ecx +- pslld $2,%xmm1 +- xorl %eax,%esi +- movl %edx,%edi +- psrld $30,%xmm10 +- roll $5,%edx +- addl %esi,%ecx +- xorl %eax,%edi +- rorl $7,%ebp +- por %xmm10,%xmm1 +- addl %edx,%ecx +- addl 28(%rsp),%ebx +- pshufd $238,%xmm0,%xmm8 +- xorl %ebp,%edi +- movl %ecx,%esi +- roll $5,%ecx +- addl %edi,%ebx +- xorl %ebp,%esi +- rorl $7,%edx +- addl %ecx,%ebx +- pxor %xmm6,%xmm2 +- addl 32(%rsp),%eax +- xorl %edx,%esi +- punpcklqdq %xmm1,%xmm8 +- movl %ebx,%edi +- roll $5,%ebx +- pxor %xmm3,%xmm2 +- addl %esi,%eax +- xorl %edx,%edi +- movdqa 0(%r14),%xmm10 +- rorl $7,%ecx +- paddd %xmm1,%xmm9 +- addl %ebx,%eax +- pxor %xmm8,%xmm2 +- addl 36(%rsp),%ebp +- xorl %ecx,%edi +- movl %eax,%esi +- roll $5,%eax +- movdqa %xmm2,%xmm8 +- addl %edi,%ebp +- xorl %ecx,%esi +- movdqa %xmm9,16(%rsp) +- rorl $7,%ebx +- addl %eax,%ebp +- addl 40(%rsp),%edx +- pslld $2,%xmm2 +- xorl %ebx,%esi +- movl %ebp,%edi +- psrld $30,%xmm8 +- roll $5,%ebp +- addl %esi,%edx +- xorl %ebx,%edi +- rorl $7,%eax +- por %xmm8,%xmm2 +- addl %ebp,%edx +- addl 44(%rsp),%ecx +- pshufd $238,%xmm1,%xmm9 +- xorl %eax,%edi +- movl %edx,%esi +- roll $5,%edx +- addl %edi,%ecx +- xorl %eax,%esi +- rorl $7,%ebp +- addl %edx,%ecx +- pxor %xmm7,%xmm3 +- addl 48(%rsp),%ebx +- xorl %ebp,%esi +- punpcklqdq %xmm2,%xmm9 +- movl %ecx,%edi +- roll $5,%ecx +- pxor %xmm4,%xmm3 +- addl %esi,%ebx +- xorl %ebp,%edi +- movdqa %xmm10,%xmm8 +- rorl $7,%edx +- paddd %xmm2,%xmm10 +- addl %ecx,%ebx +- pxor %xmm9,%xmm3 +- addl 52(%rsp),%eax +- xorl %edx,%edi +- movl %ebx,%esi +- roll $5,%ebx +- movdqa %xmm3,%xmm9 +- addl %edi,%eax +- xorl %edx,%esi +- movdqa %xmm10,32(%rsp) +- rorl $7,%ecx +- addl %ebx,%eax +- addl 56(%rsp),%ebp +- pslld $2,%xmm3 +- xorl %ecx,%esi +- movl %eax,%edi +- psrld $30,%xmm9 +- roll $5,%eax +- addl %esi,%ebp +- xorl %ecx,%edi +- rorl $7,%ebx +- por %xmm9,%xmm3 +- addl %eax,%ebp +- addl 60(%rsp),%edx +- pshufd $238,%xmm2,%xmm10 +- xorl %ebx,%edi +- movl %ebp,%esi +- roll $5,%ebp +- addl %edi,%edx +- xorl %ebx,%esi +- rorl $7,%eax +- addl %ebp,%edx +- pxor %xmm0,%xmm4 +- addl 0(%rsp),%ecx +- xorl %eax,%esi +- punpcklqdq %xmm3,%xmm10 +- movl %edx,%edi +- roll $5,%edx +- pxor %xmm5,%xmm4 +- addl %esi,%ecx +- xorl %eax,%edi +- movdqa %xmm8,%xmm9 +- rorl $7,%ebp +- paddd %xmm3,%xmm8 +- addl %edx,%ecx +- pxor %xmm10,%xmm4 +- addl 4(%rsp),%ebx +- xorl %ebp,%edi +- movl %ecx,%esi +- roll $5,%ecx +- movdqa %xmm4,%xmm10 +- addl %edi,%ebx +- xorl %ebp,%esi +- movdqa %xmm8,48(%rsp) +- rorl $7,%edx +- addl %ecx,%ebx +- addl 8(%rsp),%eax +- pslld $2,%xmm4 +- xorl %edx,%esi +- movl %ebx,%edi +- psrld $30,%xmm10 +- roll $5,%ebx +- addl %esi,%eax +- xorl %edx,%edi +- rorl $7,%ecx +- por %xmm10,%xmm4 +- addl %ebx,%eax +- addl 12(%rsp),%ebp +- pshufd $238,%xmm3,%xmm8 +- xorl %ecx,%edi +- movl %eax,%esi +- roll $5,%eax +- addl %edi,%ebp +- xorl %ecx,%esi +- rorl $7,%ebx +- addl %eax,%ebp +- pxor %xmm1,%xmm5 +- addl 16(%rsp),%edx +- xorl %ebx,%esi +- punpcklqdq %xmm4,%xmm8 +- movl %ebp,%edi +- roll $5,%ebp +- pxor %xmm6,%xmm5 +- addl %esi,%edx +- xorl %ebx,%edi +- movdqa %xmm9,%xmm10 +- rorl $7,%eax +- paddd %xmm4,%xmm9 +- addl %ebp,%edx +- pxor %xmm8,%xmm5 +- addl 20(%rsp),%ecx +- xorl %eax,%edi +- movl %edx,%esi +- roll $5,%edx +- movdqa %xmm5,%xmm8 +- addl %edi,%ecx +- xorl %eax,%esi +- movdqa %xmm9,0(%rsp) +- rorl $7,%ebp +- addl %edx,%ecx +- addl 24(%rsp),%ebx +- pslld $2,%xmm5 +- xorl %ebp,%esi +- movl %ecx,%edi +- psrld $30,%xmm8 +- roll $5,%ecx +- addl %esi,%ebx +- xorl %ebp,%edi +- rorl $7,%edx +- por %xmm8,%xmm5 +- addl %ecx,%ebx +- addl 28(%rsp),%eax +- pshufd $238,%xmm4,%xmm9 +- rorl $7,%ecx +- movl %ebx,%esi +- xorl %edx,%edi +- roll $5,%ebx +- addl %edi,%eax +- xorl %ecx,%esi +- xorl %edx,%ecx +- addl %ebx,%eax +- pxor %xmm2,%xmm6 +- addl 32(%rsp),%ebp +- andl %ecx,%esi +- xorl %edx,%ecx +- rorl $7,%ebx +- punpcklqdq %xmm5,%xmm9 +- movl %eax,%edi +- xorl %ecx,%esi +- pxor %xmm7,%xmm6 +- roll $5,%eax +- addl %esi,%ebp +- movdqa %xmm10,%xmm8 +- xorl %ebx,%edi +- paddd %xmm5,%xmm10 +- xorl %ecx,%ebx +- pxor %xmm9,%xmm6 +- addl %eax,%ebp +- addl 36(%rsp),%edx +- andl %ebx,%edi +- xorl %ecx,%ebx +- rorl $7,%eax +- movdqa %xmm6,%xmm9 +- movl %ebp,%esi +- xorl %ebx,%edi +- movdqa %xmm10,16(%rsp) +- roll $5,%ebp +- addl %edi,%edx +- xorl %eax,%esi +- pslld $2,%xmm6 +- xorl %ebx,%eax +- addl %ebp,%edx +- psrld $30,%xmm9 +- addl 40(%rsp),%ecx +- andl %eax,%esi +- xorl %ebx,%eax +- por %xmm9,%xmm6 +- rorl $7,%ebp +- movl %edx,%edi +- xorl %eax,%esi +- roll $5,%edx +- pshufd $238,%xmm5,%xmm10 +- addl %esi,%ecx +- xorl %ebp,%edi +- xorl %eax,%ebp +- addl %edx,%ecx +- addl 44(%rsp),%ebx +- andl %ebp,%edi +- xorl %eax,%ebp +- rorl $7,%edx +- movl %ecx,%esi +- xorl %ebp,%edi +- roll $5,%ecx +- addl %edi,%ebx +- xorl %edx,%esi +- xorl %ebp,%edx +- addl %ecx,%ebx +- pxor %xmm3,%xmm7 +- addl 48(%rsp),%eax +- andl %edx,%esi +- xorl %ebp,%edx +- rorl $7,%ecx +- punpcklqdq %xmm6,%xmm10 +- movl %ebx,%edi +- xorl %edx,%esi +- pxor %xmm0,%xmm7 +- roll $5,%ebx +- addl %esi,%eax +- movdqa 32(%r14),%xmm9 +- xorl %ecx,%edi +- paddd %xmm6,%xmm8 +- xorl %edx,%ecx +- pxor %xmm10,%xmm7 +- addl %ebx,%eax +- addl 52(%rsp),%ebp +- andl %ecx,%edi +- xorl %edx,%ecx +- rorl $7,%ebx +- movdqa %xmm7,%xmm10 +- movl %eax,%esi +- xorl %ecx,%edi +- movdqa %xmm8,32(%rsp) +- roll $5,%eax +- addl %edi,%ebp +- xorl %ebx,%esi +- pslld $2,%xmm7 +- xorl %ecx,%ebx +- addl %eax,%ebp +- psrld $30,%xmm10 +- addl 56(%rsp),%edx +- andl %ebx,%esi +- xorl %ecx,%ebx +- por %xmm10,%xmm7 +- rorl $7,%eax +- movl %ebp,%edi +- xorl %ebx,%esi +- roll $5,%ebp +- pshufd $238,%xmm6,%xmm8 +- addl %esi,%edx +- xorl %eax,%edi +- xorl %ebx,%eax +- addl %ebp,%edx +- addl 60(%rsp),%ecx +- andl %eax,%edi +- xorl %ebx,%eax +- rorl $7,%ebp +- movl %edx,%esi +- xorl %eax,%edi +- roll $5,%edx +- addl %edi,%ecx +- xorl %ebp,%esi +- xorl %eax,%ebp +- addl %edx,%ecx +- pxor %xmm4,%xmm0 +- addl 0(%rsp),%ebx +- andl %ebp,%esi +- xorl %eax,%ebp +- rorl $7,%edx +- punpcklqdq %xmm7,%xmm8 +- movl %ecx,%edi +- xorl %ebp,%esi +- pxor %xmm1,%xmm0 +- roll $5,%ecx +- addl %esi,%ebx +- movdqa %xmm9,%xmm10 +- xorl %edx,%edi +- paddd %xmm7,%xmm9 +- xorl %ebp,%edx +- pxor %xmm8,%xmm0 +- addl %ecx,%ebx +- addl 4(%rsp),%eax +- andl %edx,%edi +- xorl %ebp,%edx +- rorl $7,%ecx +- movdqa %xmm0,%xmm8 +- movl %ebx,%esi +- xorl %edx,%edi +- movdqa %xmm9,48(%rsp) +- roll $5,%ebx +- addl %edi,%eax +- xorl %ecx,%esi +- pslld $2,%xmm0 +- xorl %edx,%ecx +- addl %ebx,%eax +- psrld $30,%xmm8 +- addl 8(%rsp),%ebp +- andl %ecx,%esi +- xorl %edx,%ecx +- por %xmm8,%xmm0 +- rorl $7,%ebx +- movl %eax,%edi +- xorl %ecx,%esi +- roll $5,%eax +- pshufd $238,%xmm7,%xmm9 +- addl %esi,%ebp +- xorl %ebx,%edi +- xorl %ecx,%ebx +- addl %eax,%ebp +- addl 12(%rsp),%edx +- andl %ebx,%edi +- xorl %ecx,%ebx +- rorl $7,%eax +- movl %ebp,%esi +- xorl %ebx,%edi +- roll $5,%ebp +- addl %edi,%edx +- xorl %eax,%esi +- xorl %ebx,%eax +- addl %ebp,%edx +- pxor %xmm5,%xmm1 +- addl 16(%rsp),%ecx +- andl %eax,%esi +- xorl %ebx,%eax +- rorl $7,%ebp +- punpcklqdq %xmm0,%xmm9 +- movl %edx,%edi +- xorl %eax,%esi +- pxor %xmm2,%xmm1 +- roll $5,%edx +- addl %esi,%ecx +- movdqa %xmm10,%xmm8 +- xorl %ebp,%edi +- paddd %xmm0,%xmm10 +- xorl %eax,%ebp +- pxor %xmm9,%xmm1 +- addl %edx,%ecx +- addl 20(%rsp),%ebx +- andl %ebp,%edi +- xorl %eax,%ebp +- rorl $7,%edx +- movdqa %xmm1,%xmm9 +- movl %ecx,%esi +- xorl %ebp,%edi +- movdqa %xmm10,0(%rsp) +- roll $5,%ecx +- addl %edi,%ebx +- xorl %edx,%esi +- pslld $2,%xmm1 +- xorl %ebp,%edx +- addl %ecx,%ebx +- psrld $30,%xmm9 +- addl 24(%rsp),%eax +- andl %edx,%esi +- xorl %ebp,%edx +- por %xmm9,%xmm1 +- rorl $7,%ecx +- movl %ebx,%edi +- xorl %edx,%esi +- roll $5,%ebx +- pshufd $238,%xmm0,%xmm10 +- addl %esi,%eax +- xorl %ecx,%edi +- xorl %edx,%ecx +- addl %ebx,%eax +- addl 28(%rsp),%ebp +- andl %ecx,%edi +- xorl %edx,%ecx +- rorl $7,%ebx +- movl %eax,%esi +- xorl %ecx,%edi +- roll $5,%eax +- addl %edi,%ebp +- xorl %ebx,%esi +- xorl %ecx,%ebx +- addl %eax,%ebp +- pxor %xmm6,%xmm2 +- addl 32(%rsp),%edx +- andl %ebx,%esi +- xorl %ecx,%ebx +- rorl $7,%eax +- punpcklqdq %xmm1,%xmm10 +- movl %ebp,%edi +- xorl %ebx,%esi +- pxor %xmm3,%xmm2 +- roll $5,%ebp +- addl %esi,%edx +- movdqa %xmm8,%xmm9 +- xorl %eax,%edi +- paddd %xmm1,%xmm8 +- xorl %ebx,%eax +- pxor %xmm10,%xmm2 +- addl %ebp,%edx +- addl 36(%rsp),%ecx +- andl %eax,%edi +- xorl %ebx,%eax +- rorl $7,%ebp +- movdqa %xmm2,%xmm10 +- movl %edx,%esi +- xorl %eax,%edi +- movdqa %xmm8,16(%rsp) +- roll $5,%edx +- addl %edi,%ecx +- xorl %ebp,%esi +- pslld $2,%xmm2 +- xorl %eax,%ebp +- addl %edx,%ecx +- psrld $30,%xmm10 +- addl 40(%rsp),%ebx +- andl %ebp,%esi +- xorl %eax,%ebp +- por %xmm10,%xmm2 +- rorl $7,%edx +- movl %ecx,%edi +- xorl %ebp,%esi +- roll $5,%ecx +- pshufd $238,%xmm1,%xmm8 +- addl %esi,%ebx +- xorl %edx,%edi +- xorl %ebp,%edx +- addl %ecx,%ebx +- addl 44(%rsp),%eax +- andl %edx,%edi +- xorl %ebp,%edx +- rorl $7,%ecx +- movl %ebx,%esi +- xorl %edx,%edi +- roll $5,%ebx +- addl %edi,%eax +- xorl %edx,%esi +- addl %ebx,%eax +- pxor %xmm7,%xmm3 +- addl 48(%rsp),%ebp +- xorl %ecx,%esi +- punpcklqdq %xmm2,%xmm8 +- movl %eax,%edi +- roll $5,%eax +- pxor %xmm4,%xmm3 +- addl %esi,%ebp +- xorl %ecx,%edi +- movdqa %xmm9,%xmm10 +- rorl $7,%ebx +- paddd %xmm2,%xmm9 +- addl %eax,%ebp +- pxor %xmm8,%xmm3 +- addl 52(%rsp),%edx +- xorl %ebx,%edi +- movl %ebp,%esi +- roll $5,%ebp +- movdqa %xmm3,%xmm8 +- addl %edi,%edx +- xorl %ebx,%esi +- movdqa %xmm9,32(%rsp) +- rorl $7,%eax +- addl %ebp,%edx +- addl 56(%rsp),%ecx +- pslld $2,%xmm3 +- xorl %eax,%esi +- movl %edx,%edi +- psrld $30,%xmm8 +- roll $5,%edx +- addl %esi,%ecx +- xorl %eax,%edi +- rorl $7,%ebp +- por %xmm8,%xmm3 +- addl %edx,%ecx +- addl 60(%rsp),%ebx +- xorl %ebp,%edi +- movl %ecx,%esi +- roll $5,%ecx +- addl %edi,%ebx +- xorl %ebp,%esi +- rorl $7,%edx +- addl %ecx,%ebx +- addl 0(%rsp),%eax +- xorl %edx,%esi +- movl %ebx,%edi +- roll $5,%ebx +- paddd %xmm3,%xmm10 +- addl %esi,%eax +- xorl %edx,%edi +- movdqa %xmm10,48(%rsp) +- rorl $7,%ecx +- addl %ebx,%eax +- addl 4(%rsp),%ebp +- xorl %ecx,%edi +- movl %eax,%esi +- roll $5,%eax +- addl %edi,%ebp +- xorl %ecx,%esi +- rorl $7,%ebx +- addl %eax,%ebp +- addl 8(%rsp),%edx +- xorl %ebx,%esi +- movl %ebp,%edi +- roll $5,%ebp +- addl %esi,%edx +- xorl %ebx,%edi +- rorl $7,%eax +- addl %ebp,%edx +- addl 12(%rsp),%ecx +- xorl %eax,%edi +- movl %edx,%esi +- roll $5,%edx +- addl %edi,%ecx +- xorl %eax,%esi +- rorl $7,%ebp +- addl %edx,%ecx +- cmpq %r10,%r9 +- je L$done_ssse3 +- movdqa 64(%r14),%xmm6 +- movdqa -64(%r14),%xmm9 +- movdqu 0(%r9),%xmm0 +- movdqu 16(%r9),%xmm1 +- movdqu 32(%r9),%xmm2 +- movdqu 48(%r9),%xmm3 +-.byte 102,15,56,0,198 +- addq $64,%r9 +- addl 16(%rsp),%ebx +- xorl %ebp,%esi +- movl %ecx,%edi +-.byte 102,15,56,0,206 +- roll $5,%ecx +- addl %esi,%ebx +- xorl %ebp,%edi +- rorl $7,%edx +- paddd %xmm9,%xmm0 +- addl %ecx,%ebx +- addl 20(%rsp),%eax +- xorl %edx,%edi +- movl %ebx,%esi +- movdqa %xmm0,0(%rsp) +- roll $5,%ebx +- addl %edi,%eax +- xorl %edx,%esi +- rorl $7,%ecx +- psubd %xmm9,%xmm0 +- addl %ebx,%eax +- addl 24(%rsp),%ebp +- xorl %ecx,%esi +- movl %eax,%edi +- roll $5,%eax +- addl %esi,%ebp +- xorl %ecx,%edi +- rorl $7,%ebx +- addl %eax,%ebp +- addl 28(%rsp),%edx +- xorl %ebx,%edi +- movl %ebp,%esi +- roll $5,%ebp +- addl %edi,%edx +- xorl %ebx,%esi +- rorl $7,%eax +- addl %ebp,%edx +- addl 32(%rsp),%ecx +- xorl %eax,%esi +- movl %edx,%edi +-.byte 102,15,56,0,214 +- roll $5,%edx +- addl %esi,%ecx +- xorl %eax,%edi +- rorl $7,%ebp +- paddd %xmm9,%xmm1 +- addl %edx,%ecx +- addl 36(%rsp),%ebx +- xorl %ebp,%edi +- movl %ecx,%esi +- movdqa %xmm1,16(%rsp) +- roll $5,%ecx +- addl %edi,%ebx +- xorl %ebp,%esi +- rorl $7,%edx +- psubd %xmm9,%xmm1 +- addl %ecx,%ebx +- addl 40(%rsp),%eax +- xorl %edx,%esi +- movl %ebx,%edi +- roll $5,%ebx +- addl %esi,%eax +- xorl %edx,%edi +- rorl $7,%ecx +- addl %ebx,%eax +- addl 44(%rsp),%ebp +- xorl %ecx,%edi +- movl %eax,%esi +- roll $5,%eax +- addl %edi,%ebp +- xorl %ecx,%esi +- rorl $7,%ebx +- addl %eax,%ebp +- addl 48(%rsp),%edx +- xorl %ebx,%esi +- movl %ebp,%edi +-.byte 102,15,56,0,222 +- roll $5,%ebp +- addl %esi,%edx +- xorl %ebx,%edi +- rorl $7,%eax +- paddd %xmm9,%xmm2 +- addl %ebp,%edx +- addl 52(%rsp),%ecx +- xorl %eax,%edi +- movl %edx,%esi +- movdqa %xmm2,32(%rsp) +- roll $5,%edx +- addl %edi,%ecx +- xorl %eax,%esi +- rorl $7,%ebp +- psubd %xmm9,%xmm2 +- addl %edx,%ecx +- addl 56(%rsp),%ebx +- xorl %ebp,%esi +- movl %ecx,%edi +- roll $5,%ecx +- addl %esi,%ebx +- xorl %ebp,%edi +- rorl $7,%edx +- addl %ecx,%ebx +- addl 60(%rsp),%eax +- xorl %edx,%edi +- movl %ebx,%esi +- roll $5,%ebx +- addl %edi,%eax +- rorl $7,%ecx +- addl %ebx,%eax +- addl 0(%r8),%eax +- addl 4(%r8),%esi +- addl 8(%r8),%ecx +- addl 12(%r8),%edx +- movl %eax,0(%r8) +- addl 16(%r8),%ebp +- movl %esi,4(%r8) +- movl %esi,%ebx +- movl %ecx,8(%r8) +- movl %ecx,%edi +- movl %edx,12(%r8) +- xorl %edx,%edi +- movl %ebp,16(%r8) +- andl %edi,%esi +- jmp L$oop_ssse3 +- +-.p2align 4 +-L$done_ssse3: +- addl 16(%rsp),%ebx +- xorl %ebp,%esi +- movl %ecx,%edi +- roll $5,%ecx +- addl %esi,%ebx +- xorl %ebp,%edi +- rorl $7,%edx +- addl %ecx,%ebx +- addl 20(%rsp),%eax +- xorl %edx,%edi +- movl %ebx,%esi +- roll $5,%ebx +- addl %edi,%eax +- xorl %edx,%esi +- rorl $7,%ecx +- addl %ebx,%eax +- addl 24(%rsp),%ebp +- xorl %ecx,%esi +- movl %eax,%edi +- roll $5,%eax +- addl %esi,%ebp +- xorl %ecx,%edi +- rorl $7,%ebx +- addl %eax,%ebp +- addl 28(%rsp),%edx +- xorl %ebx,%edi +- movl %ebp,%esi +- roll $5,%ebp +- addl %edi,%edx +- xorl %ebx,%esi +- rorl $7,%eax +- addl %ebp,%edx +- addl 32(%rsp),%ecx +- xorl %eax,%esi +- movl %edx,%edi +- roll $5,%edx +- addl %esi,%ecx +- xorl %eax,%edi +- rorl $7,%ebp +- addl %edx,%ecx +- addl 36(%rsp),%ebx +- xorl %ebp,%edi +- movl %ecx,%esi +- roll $5,%ecx +- addl %edi,%ebx +- xorl %ebp,%esi +- rorl $7,%edx +- addl %ecx,%ebx +- addl 40(%rsp),%eax +- xorl %edx,%esi +- movl %ebx,%edi +- roll $5,%ebx +- addl %esi,%eax +- xorl %edx,%edi +- rorl $7,%ecx +- addl %ebx,%eax +- addl 44(%rsp),%ebp +- xorl %ecx,%edi +- movl %eax,%esi +- roll $5,%eax +- addl %edi,%ebp +- xorl %ecx,%esi +- rorl $7,%ebx +- addl %eax,%ebp +- addl 48(%rsp),%edx +- xorl %ebx,%esi +- movl %ebp,%edi +- roll $5,%ebp +- addl %esi,%edx +- xorl %ebx,%edi +- rorl $7,%eax +- addl %ebp,%edx +- addl 52(%rsp),%ecx +- xorl %eax,%edi +- movl %edx,%esi +- roll $5,%edx +- addl %edi,%ecx +- xorl %eax,%esi +- rorl $7,%ebp +- addl %edx,%ecx +- addl 56(%rsp),%ebx +- xorl %ebp,%esi +- movl %ecx,%edi +- roll $5,%ecx +- addl %esi,%ebx +- xorl %ebp,%edi +- rorl $7,%edx +- addl %ecx,%ebx +- addl 60(%rsp),%eax +- xorl %edx,%edi +- movl %ebx,%esi +- roll $5,%ebx +- addl %edi,%eax +- rorl $7,%ecx +- addl %ebx,%eax +- addl 0(%r8),%eax +- addl 4(%r8),%esi +- addl 8(%r8),%ecx +- movl %eax,0(%r8) +- addl 12(%r8),%edx +- movl %esi,4(%r8) +- addl 16(%r8),%ebp +- movl %ecx,8(%r8) +- movl %edx,12(%r8) +- movl %ebp,16(%r8) +- movq -40(%r11),%r14 +- +- movq -32(%r11),%r13 +- +- movq -24(%r11),%r12 +- +- movq -16(%r11),%rbp +- +- movq -8(%r11),%rbx +- +- leaq (%r11),%rsp +- +-L$epilogue_ssse3: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-sha1_block_data_order_avx: +-_avx_shortcut: +- +- movq %rsp,%r11 +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- leaq -64(%rsp),%rsp +- vzeroupper +- andq $-64,%rsp +- movq %rdi,%r8 +- movq %rsi,%r9 +- movq %rdx,%r10 +- +- shlq $6,%r10 +- addq %r9,%r10 +- leaq K_XX_XX+64(%rip),%r14 +- +- movl 0(%r8),%eax +- movl 4(%r8),%ebx +- movl 8(%r8),%ecx +- movl 12(%r8),%edx +- movl %ebx,%esi +- movl 16(%r8),%ebp +- movl %ecx,%edi +- xorl %edx,%edi +- andl %edi,%esi +- +- vmovdqa 64(%r14),%xmm6 +- vmovdqa -64(%r14),%xmm11 +- vmovdqu 0(%r9),%xmm0 +- vmovdqu 16(%r9),%xmm1 +- vmovdqu 32(%r9),%xmm2 +- vmovdqu 48(%r9),%xmm3 +- vpshufb %xmm6,%xmm0,%xmm0 +- addq $64,%r9 +- vpshufb %xmm6,%xmm1,%xmm1 +- vpshufb %xmm6,%xmm2,%xmm2 +- vpshufb %xmm6,%xmm3,%xmm3 +- vpaddd %xmm11,%xmm0,%xmm4 +- vpaddd %xmm11,%xmm1,%xmm5 +- vpaddd %xmm11,%xmm2,%xmm6 +- vmovdqa %xmm4,0(%rsp) +- vmovdqa %xmm5,16(%rsp) +- vmovdqa %xmm6,32(%rsp) +- jmp L$oop_avx +-.p2align 4 +-L$oop_avx: +- shrdl $2,%ebx,%ebx +- xorl %edx,%esi +- vpalignr $8,%xmm0,%xmm1,%xmm4 +- movl %eax,%edi +- addl 0(%rsp),%ebp +- vpaddd %xmm3,%xmm11,%xmm9 +- xorl %ecx,%ebx +- shldl $5,%eax,%eax +- vpsrldq $4,%xmm3,%xmm8 +- addl %esi,%ebp +- andl %ebx,%edi +- vpxor %xmm0,%xmm4,%xmm4 +- xorl %ecx,%ebx +- addl %eax,%ebp +- vpxor %xmm2,%xmm8,%xmm8 +- shrdl $7,%eax,%eax +- xorl %ecx,%edi +- movl %ebp,%esi +- addl 4(%rsp),%edx +- vpxor %xmm8,%xmm4,%xmm4 +- xorl %ebx,%eax +- shldl $5,%ebp,%ebp +- vmovdqa %xmm9,48(%rsp) +- addl %edi,%edx +- andl %eax,%esi +- vpsrld $31,%xmm4,%xmm8 +- xorl %ebx,%eax +- addl %ebp,%edx +- shrdl $7,%ebp,%ebp +- xorl %ebx,%esi +- vpslldq $12,%xmm4,%xmm10 +- vpaddd %xmm4,%xmm4,%xmm4 +- movl %edx,%edi +- addl 8(%rsp),%ecx +- xorl %eax,%ebp +- shldl $5,%edx,%edx +- vpsrld $30,%xmm10,%xmm9 +- vpor %xmm8,%xmm4,%xmm4 +- addl %esi,%ecx +- andl %ebp,%edi +- xorl %eax,%ebp +- addl %edx,%ecx +- vpslld $2,%xmm10,%xmm10 +- vpxor %xmm9,%xmm4,%xmm4 +- shrdl $7,%edx,%edx +- xorl %eax,%edi +- movl %ecx,%esi +- addl 12(%rsp),%ebx +- vpxor %xmm10,%xmm4,%xmm4 +- xorl %ebp,%edx +- shldl $5,%ecx,%ecx +- addl %edi,%ebx +- andl %edx,%esi +- xorl %ebp,%edx +- addl %ecx,%ebx +- shrdl $7,%ecx,%ecx +- xorl %ebp,%esi +- vpalignr $8,%xmm1,%xmm2,%xmm5 +- movl %ebx,%edi +- addl 16(%rsp),%eax +- vpaddd %xmm4,%xmm11,%xmm9 +- xorl %edx,%ecx +- shldl $5,%ebx,%ebx +- vpsrldq $4,%xmm4,%xmm8 +- addl %esi,%eax +- andl %ecx,%edi +- vpxor %xmm1,%xmm5,%xmm5 +- xorl %edx,%ecx +- addl %ebx,%eax +- vpxor %xmm3,%xmm8,%xmm8 +- shrdl $7,%ebx,%ebx +- xorl %edx,%edi +- movl %eax,%esi +- addl 20(%rsp),%ebp +- vpxor %xmm8,%xmm5,%xmm5 +- xorl %ecx,%ebx +- shldl $5,%eax,%eax +- vmovdqa %xmm9,0(%rsp) +- addl %edi,%ebp +- andl %ebx,%esi +- vpsrld $31,%xmm5,%xmm8 +- xorl %ecx,%ebx +- addl %eax,%ebp +- shrdl $7,%eax,%eax +- xorl %ecx,%esi +- vpslldq $12,%xmm5,%xmm10 +- vpaddd %xmm5,%xmm5,%xmm5 +- movl %ebp,%edi +- addl 24(%rsp),%edx +- xorl %ebx,%eax +- shldl $5,%ebp,%ebp +- vpsrld $30,%xmm10,%xmm9 +- vpor %xmm8,%xmm5,%xmm5 +- addl %esi,%edx +- andl %eax,%edi +- xorl %ebx,%eax +- addl %ebp,%edx +- vpslld $2,%xmm10,%xmm10 +- vpxor %xmm9,%xmm5,%xmm5 +- shrdl $7,%ebp,%ebp +- xorl %ebx,%edi +- movl %edx,%esi +- addl 28(%rsp),%ecx +- vpxor %xmm10,%xmm5,%xmm5 +- xorl %eax,%ebp +- shldl $5,%edx,%edx +- vmovdqa -32(%r14),%xmm11 +- addl %edi,%ecx +- andl %ebp,%esi +- xorl %eax,%ebp +- addl %edx,%ecx +- shrdl $7,%edx,%edx +- xorl %eax,%esi +- vpalignr $8,%xmm2,%xmm3,%xmm6 +- movl %ecx,%edi +- addl 32(%rsp),%ebx +- vpaddd %xmm5,%xmm11,%xmm9 +- xorl %ebp,%edx +- shldl $5,%ecx,%ecx +- vpsrldq $4,%xmm5,%xmm8 +- addl %esi,%ebx +- andl %edx,%edi +- vpxor %xmm2,%xmm6,%xmm6 +- xorl %ebp,%edx +- addl %ecx,%ebx +- vpxor %xmm4,%xmm8,%xmm8 +- shrdl $7,%ecx,%ecx +- xorl %ebp,%edi +- movl %ebx,%esi +- addl 36(%rsp),%eax +- vpxor %xmm8,%xmm6,%xmm6 +- xorl %edx,%ecx +- shldl $5,%ebx,%ebx +- vmovdqa %xmm9,16(%rsp) +- addl %edi,%eax +- andl %ecx,%esi +- vpsrld $31,%xmm6,%xmm8 +- xorl %edx,%ecx +- addl %ebx,%eax +- shrdl $7,%ebx,%ebx +- xorl %edx,%esi +- vpslldq $12,%xmm6,%xmm10 +- vpaddd %xmm6,%xmm6,%xmm6 +- movl %eax,%edi +- addl 40(%rsp),%ebp +- xorl %ecx,%ebx +- shldl $5,%eax,%eax +- vpsrld $30,%xmm10,%xmm9 +- vpor %xmm8,%xmm6,%xmm6 +- addl %esi,%ebp +- andl %ebx,%edi +- xorl %ecx,%ebx +- addl %eax,%ebp +- vpslld $2,%xmm10,%xmm10 +- vpxor %xmm9,%xmm6,%xmm6 +- shrdl $7,%eax,%eax +- xorl %ecx,%edi +- movl %ebp,%esi +- addl 44(%rsp),%edx +- vpxor %xmm10,%xmm6,%xmm6 +- xorl %ebx,%eax +- shldl $5,%ebp,%ebp +- addl %edi,%edx +- andl %eax,%esi +- xorl %ebx,%eax +- addl %ebp,%edx +- shrdl $7,%ebp,%ebp +- xorl %ebx,%esi +- vpalignr $8,%xmm3,%xmm4,%xmm7 +- movl %edx,%edi +- addl 48(%rsp),%ecx +- vpaddd %xmm6,%xmm11,%xmm9 +- xorl %eax,%ebp +- shldl $5,%edx,%edx +- vpsrldq $4,%xmm6,%xmm8 +- addl %esi,%ecx +- andl %ebp,%edi +- vpxor %xmm3,%xmm7,%xmm7 +- xorl %eax,%ebp +- addl %edx,%ecx +- vpxor %xmm5,%xmm8,%xmm8 +- shrdl $7,%edx,%edx +- xorl %eax,%edi +- movl %ecx,%esi +- addl 52(%rsp),%ebx +- vpxor %xmm8,%xmm7,%xmm7 +- xorl %ebp,%edx +- shldl $5,%ecx,%ecx +- vmovdqa %xmm9,32(%rsp) +- addl %edi,%ebx +- andl %edx,%esi +- vpsrld $31,%xmm7,%xmm8 +- xorl %ebp,%edx +- addl %ecx,%ebx +- shrdl $7,%ecx,%ecx +- xorl %ebp,%esi +- vpslldq $12,%xmm7,%xmm10 +- vpaddd %xmm7,%xmm7,%xmm7 +- movl %ebx,%edi +- addl 56(%rsp),%eax +- xorl %edx,%ecx +- shldl $5,%ebx,%ebx +- vpsrld $30,%xmm10,%xmm9 +- vpor %xmm8,%xmm7,%xmm7 +- addl %esi,%eax +- andl %ecx,%edi +- xorl %edx,%ecx +- addl %ebx,%eax +- vpslld $2,%xmm10,%xmm10 +- vpxor %xmm9,%xmm7,%xmm7 +- shrdl $7,%ebx,%ebx +- xorl %edx,%edi +- movl %eax,%esi +- addl 60(%rsp),%ebp +- vpxor %xmm10,%xmm7,%xmm7 +- xorl %ecx,%ebx +- shldl $5,%eax,%eax +- addl %edi,%ebp +- andl %ebx,%esi +- xorl %ecx,%ebx +- addl %eax,%ebp +- vpalignr $8,%xmm6,%xmm7,%xmm8 +- vpxor %xmm4,%xmm0,%xmm0 +- shrdl $7,%eax,%eax +- xorl %ecx,%esi +- movl %ebp,%edi +- addl 0(%rsp),%edx +- vpxor %xmm1,%xmm0,%xmm0 +- xorl %ebx,%eax +- shldl $5,%ebp,%ebp +- vpaddd %xmm7,%xmm11,%xmm9 +- addl %esi,%edx +- andl %eax,%edi +- vpxor %xmm8,%xmm0,%xmm0 +- xorl %ebx,%eax +- addl %ebp,%edx +- shrdl $7,%ebp,%ebp +- xorl %ebx,%edi +- vpsrld $30,%xmm0,%xmm8 +- vmovdqa %xmm9,48(%rsp) +- movl %edx,%esi +- addl 4(%rsp),%ecx +- xorl %eax,%ebp +- shldl $5,%edx,%edx +- vpslld $2,%xmm0,%xmm0 +- addl %edi,%ecx +- andl %ebp,%esi +- xorl %eax,%ebp +- addl %edx,%ecx +- shrdl $7,%edx,%edx +- xorl %eax,%esi +- movl %ecx,%edi +- addl 8(%rsp),%ebx +- vpor %xmm8,%xmm0,%xmm0 +- xorl %ebp,%edx +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- andl %edx,%edi +- xorl %ebp,%edx +- addl %ecx,%ebx +- addl 12(%rsp),%eax +- xorl %ebp,%edi +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- addl %edi,%eax +- xorl %edx,%esi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- vpalignr $8,%xmm7,%xmm0,%xmm8 +- vpxor %xmm5,%xmm1,%xmm1 +- addl 16(%rsp),%ebp +- xorl %ecx,%esi +- movl %eax,%edi +- shldl $5,%eax,%eax +- vpxor %xmm2,%xmm1,%xmm1 +- addl %esi,%ebp +- xorl %ecx,%edi +- vpaddd %xmm0,%xmm11,%xmm9 +- shrdl $7,%ebx,%ebx +- addl %eax,%ebp +- vpxor %xmm8,%xmm1,%xmm1 +- addl 20(%rsp),%edx +- xorl %ebx,%edi +- movl %ebp,%esi +- shldl $5,%ebp,%ebp +- vpsrld $30,%xmm1,%xmm8 +- vmovdqa %xmm9,0(%rsp) +- addl %edi,%edx +- xorl %ebx,%esi +- shrdl $7,%eax,%eax +- addl %ebp,%edx +- vpslld $2,%xmm1,%xmm1 +- addl 24(%rsp),%ecx +- xorl %eax,%esi +- movl %edx,%edi +- shldl $5,%edx,%edx +- addl %esi,%ecx +- xorl %eax,%edi +- shrdl $7,%ebp,%ebp +- addl %edx,%ecx +- vpor %xmm8,%xmm1,%xmm1 +- addl 28(%rsp),%ebx +- xorl %ebp,%edi +- movl %ecx,%esi +- shldl $5,%ecx,%ecx +- addl %edi,%ebx +- xorl %ebp,%esi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- vpalignr $8,%xmm0,%xmm1,%xmm8 +- vpxor %xmm6,%xmm2,%xmm2 +- addl 32(%rsp),%eax +- xorl %edx,%esi +- movl %ebx,%edi +- shldl $5,%ebx,%ebx +- vpxor %xmm3,%xmm2,%xmm2 +- addl %esi,%eax +- xorl %edx,%edi +- vpaddd %xmm1,%xmm11,%xmm9 +- vmovdqa 0(%r14),%xmm11 +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- vpxor %xmm8,%xmm2,%xmm2 +- addl 36(%rsp),%ebp +- xorl %ecx,%edi +- movl %eax,%esi +- shldl $5,%eax,%eax +- vpsrld $30,%xmm2,%xmm8 +- vmovdqa %xmm9,16(%rsp) +- addl %edi,%ebp +- xorl %ecx,%esi +- shrdl $7,%ebx,%ebx +- addl %eax,%ebp +- vpslld $2,%xmm2,%xmm2 +- addl 40(%rsp),%edx +- xorl %ebx,%esi +- movl %ebp,%edi +- shldl $5,%ebp,%ebp +- addl %esi,%edx +- xorl %ebx,%edi +- shrdl $7,%eax,%eax +- addl %ebp,%edx +- vpor %xmm8,%xmm2,%xmm2 +- addl 44(%rsp),%ecx +- xorl %eax,%edi +- movl %edx,%esi +- shldl $5,%edx,%edx +- addl %edi,%ecx +- xorl %eax,%esi +- shrdl $7,%ebp,%ebp +- addl %edx,%ecx +- vpalignr $8,%xmm1,%xmm2,%xmm8 +- vpxor %xmm7,%xmm3,%xmm3 +- addl 48(%rsp),%ebx +- xorl %ebp,%esi +- movl %ecx,%edi +- shldl $5,%ecx,%ecx +- vpxor %xmm4,%xmm3,%xmm3 +- addl %esi,%ebx +- xorl %ebp,%edi +- vpaddd %xmm2,%xmm11,%xmm9 +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- vpxor %xmm8,%xmm3,%xmm3 +- addl 52(%rsp),%eax +- xorl %edx,%edi +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- vpsrld $30,%xmm3,%xmm8 +- vmovdqa %xmm9,32(%rsp) +- addl %edi,%eax +- xorl %edx,%esi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- vpslld $2,%xmm3,%xmm3 +- addl 56(%rsp),%ebp +- xorl %ecx,%esi +- movl %eax,%edi +- shldl $5,%eax,%eax +- addl %esi,%ebp +- xorl %ecx,%edi +- shrdl $7,%ebx,%ebx +- addl %eax,%ebp +- vpor %xmm8,%xmm3,%xmm3 +- addl 60(%rsp),%edx +- xorl %ebx,%edi +- movl %ebp,%esi +- shldl $5,%ebp,%ebp +- addl %edi,%edx +- xorl %ebx,%esi +- shrdl $7,%eax,%eax +- addl %ebp,%edx +- vpalignr $8,%xmm2,%xmm3,%xmm8 +- vpxor %xmm0,%xmm4,%xmm4 +- addl 0(%rsp),%ecx +- xorl %eax,%esi +- movl %edx,%edi +- shldl $5,%edx,%edx +- vpxor %xmm5,%xmm4,%xmm4 +- addl %esi,%ecx +- xorl %eax,%edi +- vpaddd %xmm3,%xmm11,%xmm9 +- shrdl $7,%ebp,%ebp +- addl %edx,%ecx +- vpxor %xmm8,%xmm4,%xmm4 +- addl 4(%rsp),%ebx +- xorl %ebp,%edi +- movl %ecx,%esi +- shldl $5,%ecx,%ecx +- vpsrld $30,%xmm4,%xmm8 +- vmovdqa %xmm9,48(%rsp) +- addl %edi,%ebx +- xorl %ebp,%esi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- vpslld $2,%xmm4,%xmm4 +- addl 8(%rsp),%eax +- xorl %edx,%esi +- movl %ebx,%edi +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- xorl %edx,%edi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- vpor %xmm8,%xmm4,%xmm4 +- addl 12(%rsp),%ebp +- xorl %ecx,%edi +- movl %eax,%esi +- shldl $5,%eax,%eax +- addl %edi,%ebp +- xorl %ecx,%esi +- shrdl $7,%ebx,%ebx +- addl %eax,%ebp +- vpalignr $8,%xmm3,%xmm4,%xmm8 +- vpxor %xmm1,%xmm5,%xmm5 +- addl 16(%rsp),%edx +- xorl %ebx,%esi +- movl %ebp,%edi +- shldl $5,%ebp,%ebp +- vpxor %xmm6,%xmm5,%xmm5 +- addl %esi,%edx +- xorl %ebx,%edi +- vpaddd %xmm4,%xmm11,%xmm9 +- shrdl $7,%eax,%eax +- addl %ebp,%edx +- vpxor %xmm8,%xmm5,%xmm5 +- addl 20(%rsp),%ecx +- xorl %eax,%edi +- movl %edx,%esi +- shldl $5,%edx,%edx +- vpsrld $30,%xmm5,%xmm8 +- vmovdqa %xmm9,0(%rsp) +- addl %edi,%ecx +- xorl %eax,%esi +- shrdl $7,%ebp,%ebp +- addl %edx,%ecx +- vpslld $2,%xmm5,%xmm5 +- addl 24(%rsp),%ebx +- xorl %ebp,%esi +- movl %ecx,%edi +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- xorl %ebp,%edi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- vpor %xmm8,%xmm5,%xmm5 +- addl 28(%rsp),%eax +- shrdl $7,%ecx,%ecx +- movl %ebx,%esi +- xorl %edx,%edi +- shldl $5,%ebx,%ebx +- addl %edi,%eax +- xorl %ecx,%esi +- xorl %edx,%ecx +- addl %ebx,%eax +- vpalignr $8,%xmm4,%xmm5,%xmm8 +- vpxor %xmm2,%xmm6,%xmm6 +- addl 32(%rsp),%ebp +- andl %ecx,%esi +- xorl %edx,%ecx +- shrdl $7,%ebx,%ebx +- vpxor %xmm7,%xmm6,%xmm6 +- movl %eax,%edi +- xorl %ecx,%esi +- vpaddd %xmm5,%xmm11,%xmm9 +- shldl $5,%eax,%eax +- addl %esi,%ebp +- vpxor %xmm8,%xmm6,%xmm6 +- xorl %ebx,%edi +- xorl %ecx,%ebx +- addl %eax,%ebp +- addl 36(%rsp),%edx +- vpsrld $30,%xmm6,%xmm8 +- vmovdqa %xmm9,16(%rsp) +- andl %ebx,%edi +- xorl %ecx,%ebx +- shrdl $7,%eax,%eax +- movl %ebp,%esi +- vpslld $2,%xmm6,%xmm6 +- xorl %ebx,%edi +- shldl $5,%ebp,%ebp +- addl %edi,%edx +- xorl %eax,%esi +- xorl %ebx,%eax +- addl %ebp,%edx +- addl 40(%rsp),%ecx +- andl %eax,%esi +- vpor %xmm8,%xmm6,%xmm6 +- xorl %ebx,%eax +- shrdl $7,%ebp,%ebp +- movl %edx,%edi +- xorl %eax,%esi +- shldl $5,%edx,%edx +- addl %esi,%ecx +- xorl %ebp,%edi +- xorl %eax,%ebp +- addl %edx,%ecx +- addl 44(%rsp),%ebx +- andl %ebp,%edi +- xorl %eax,%ebp +- shrdl $7,%edx,%edx +- movl %ecx,%esi +- xorl %ebp,%edi +- shldl $5,%ecx,%ecx +- addl %edi,%ebx +- xorl %edx,%esi +- xorl %ebp,%edx +- addl %ecx,%ebx +- vpalignr $8,%xmm5,%xmm6,%xmm8 +- vpxor %xmm3,%xmm7,%xmm7 +- addl 48(%rsp),%eax +- andl %edx,%esi +- xorl %ebp,%edx +- shrdl $7,%ecx,%ecx +- vpxor %xmm0,%xmm7,%xmm7 +- movl %ebx,%edi +- xorl %edx,%esi +- vpaddd %xmm6,%xmm11,%xmm9 +- vmovdqa 32(%r14),%xmm11 +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- vpxor %xmm8,%xmm7,%xmm7 +- xorl %ecx,%edi +- xorl %edx,%ecx +- addl %ebx,%eax +- addl 52(%rsp),%ebp +- vpsrld $30,%xmm7,%xmm8 +- vmovdqa %xmm9,32(%rsp) +- andl %ecx,%edi +- xorl %edx,%ecx +- shrdl $7,%ebx,%ebx +- movl %eax,%esi +- vpslld $2,%xmm7,%xmm7 +- xorl %ecx,%edi +- shldl $5,%eax,%eax +- addl %edi,%ebp +- xorl %ebx,%esi +- xorl %ecx,%ebx +- addl %eax,%ebp +- addl 56(%rsp),%edx +- andl %ebx,%esi +- vpor %xmm8,%xmm7,%xmm7 +- xorl %ecx,%ebx +- shrdl $7,%eax,%eax +- movl %ebp,%edi +- xorl %ebx,%esi +- shldl $5,%ebp,%ebp +- addl %esi,%edx +- xorl %eax,%edi +- xorl %ebx,%eax +- addl %ebp,%edx +- addl 60(%rsp),%ecx +- andl %eax,%edi +- xorl %ebx,%eax +- shrdl $7,%ebp,%ebp +- movl %edx,%esi +- xorl %eax,%edi +- shldl $5,%edx,%edx +- addl %edi,%ecx +- xorl %ebp,%esi +- xorl %eax,%ebp +- addl %edx,%ecx +- vpalignr $8,%xmm6,%xmm7,%xmm8 +- vpxor %xmm4,%xmm0,%xmm0 +- addl 0(%rsp),%ebx +- andl %ebp,%esi +- xorl %eax,%ebp +- shrdl $7,%edx,%edx +- vpxor %xmm1,%xmm0,%xmm0 +- movl %ecx,%edi +- xorl %ebp,%esi +- vpaddd %xmm7,%xmm11,%xmm9 +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- vpxor %xmm8,%xmm0,%xmm0 +- xorl %edx,%edi +- xorl %ebp,%edx +- addl %ecx,%ebx +- addl 4(%rsp),%eax +- vpsrld $30,%xmm0,%xmm8 +- vmovdqa %xmm9,48(%rsp) +- andl %edx,%edi +- xorl %ebp,%edx +- shrdl $7,%ecx,%ecx +- movl %ebx,%esi +- vpslld $2,%xmm0,%xmm0 +- xorl %edx,%edi +- shldl $5,%ebx,%ebx +- addl %edi,%eax +- xorl %ecx,%esi +- xorl %edx,%ecx +- addl %ebx,%eax +- addl 8(%rsp),%ebp +- andl %ecx,%esi +- vpor %xmm8,%xmm0,%xmm0 +- xorl %edx,%ecx +- shrdl $7,%ebx,%ebx +- movl %eax,%edi +- xorl %ecx,%esi +- shldl $5,%eax,%eax +- addl %esi,%ebp +- xorl %ebx,%edi +- xorl %ecx,%ebx +- addl %eax,%ebp +- addl 12(%rsp),%edx +- andl %ebx,%edi +- xorl %ecx,%ebx +- shrdl $7,%eax,%eax +- movl %ebp,%esi +- xorl %ebx,%edi +- shldl $5,%ebp,%ebp +- addl %edi,%edx +- xorl %eax,%esi +- xorl %ebx,%eax +- addl %ebp,%edx +- vpalignr $8,%xmm7,%xmm0,%xmm8 +- vpxor %xmm5,%xmm1,%xmm1 +- addl 16(%rsp),%ecx +- andl %eax,%esi +- xorl %ebx,%eax +- shrdl $7,%ebp,%ebp +- vpxor %xmm2,%xmm1,%xmm1 +- movl %edx,%edi +- xorl %eax,%esi +- vpaddd %xmm0,%xmm11,%xmm9 +- shldl $5,%edx,%edx +- addl %esi,%ecx +- vpxor %xmm8,%xmm1,%xmm1 +- xorl %ebp,%edi +- xorl %eax,%ebp +- addl %edx,%ecx +- addl 20(%rsp),%ebx +- vpsrld $30,%xmm1,%xmm8 +- vmovdqa %xmm9,0(%rsp) +- andl %ebp,%edi +- xorl %eax,%ebp +- shrdl $7,%edx,%edx +- movl %ecx,%esi +- vpslld $2,%xmm1,%xmm1 +- xorl %ebp,%edi +- shldl $5,%ecx,%ecx +- addl %edi,%ebx +- xorl %edx,%esi +- xorl %ebp,%edx +- addl %ecx,%ebx +- addl 24(%rsp),%eax +- andl %edx,%esi +- vpor %xmm8,%xmm1,%xmm1 +- xorl %ebp,%edx +- shrdl $7,%ecx,%ecx +- movl %ebx,%edi +- xorl %edx,%esi +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- xorl %ecx,%edi +- xorl %edx,%ecx +- addl %ebx,%eax +- addl 28(%rsp),%ebp +- andl %ecx,%edi +- xorl %edx,%ecx +- shrdl $7,%ebx,%ebx +- movl %eax,%esi +- xorl %ecx,%edi +- shldl $5,%eax,%eax +- addl %edi,%ebp +- xorl %ebx,%esi +- xorl %ecx,%ebx +- addl %eax,%ebp +- vpalignr $8,%xmm0,%xmm1,%xmm8 +- vpxor %xmm6,%xmm2,%xmm2 +- addl 32(%rsp),%edx +- andl %ebx,%esi +- xorl %ecx,%ebx +- shrdl $7,%eax,%eax +- vpxor %xmm3,%xmm2,%xmm2 +- movl %ebp,%edi +- xorl %ebx,%esi +- vpaddd %xmm1,%xmm11,%xmm9 +- shldl $5,%ebp,%ebp +- addl %esi,%edx +- vpxor %xmm8,%xmm2,%xmm2 +- xorl %eax,%edi +- xorl %ebx,%eax +- addl %ebp,%edx +- addl 36(%rsp),%ecx +- vpsrld $30,%xmm2,%xmm8 +- vmovdqa %xmm9,16(%rsp) +- andl %eax,%edi +- xorl %ebx,%eax +- shrdl $7,%ebp,%ebp +- movl %edx,%esi +- vpslld $2,%xmm2,%xmm2 +- xorl %eax,%edi +- shldl $5,%edx,%edx +- addl %edi,%ecx +- xorl %ebp,%esi +- xorl %eax,%ebp +- addl %edx,%ecx +- addl 40(%rsp),%ebx +- andl %ebp,%esi +- vpor %xmm8,%xmm2,%xmm2 +- xorl %eax,%ebp +- shrdl $7,%edx,%edx +- movl %ecx,%edi +- xorl %ebp,%esi +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- xorl %edx,%edi +- xorl %ebp,%edx +- addl %ecx,%ebx +- addl 44(%rsp),%eax +- andl %edx,%edi +- xorl %ebp,%edx +- shrdl $7,%ecx,%ecx +- movl %ebx,%esi +- xorl %edx,%edi +- shldl $5,%ebx,%ebx +- addl %edi,%eax +- xorl %edx,%esi +- addl %ebx,%eax +- vpalignr $8,%xmm1,%xmm2,%xmm8 +- vpxor %xmm7,%xmm3,%xmm3 +- addl 48(%rsp),%ebp +- xorl %ecx,%esi +- movl %eax,%edi +- shldl $5,%eax,%eax +- vpxor %xmm4,%xmm3,%xmm3 +- addl %esi,%ebp +- xorl %ecx,%edi +- vpaddd %xmm2,%xmm11,%xmm9 +- shrdl $7,%ebx,%ebx +- addl %eax,%ebp +- vpxor %xmm8,%xmm3,%xmm3 +- addl 52(%rsp),%edx +- xorl %ebx,%edi +- movl %ebp,%esi +- shldl $5,%ebp,%ebp +- vpsrld $30,%xmm3,%xmm8 +- vmovdqa %xmm9,32(%rsp) +- addl %edi,%edx +- xorl %ebx,%esi +- shrdl $7,%eax,%eax +- addl %ebp,%edx +- vpslld $2,%xmm3,%xmm3 +- addl 56(%rsp),%ecx +- xorl %eax,%esi +- movl %edx,%edi +- shldl $5,%edx,%edx +- addl %esi,%ecx +- xorl %eax,%edi +- shrdl $7,%ebp,%ebp +- addl %edx,%ecx +- vpor %xmm8,%xmm3,%xmm3 +- addl 60(%rsp),%ebx +- xorl %ebp,%edi +- movl %ecx,%esi +- shldl $5,%ecx,%ecx +- addl %edi,%ebx +- xorl %ebp,%esi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl 0(%rsp),%eax +- vpaddd %xmm3,%xmm11,%xmm9 +- xorl %edx,%esi +- movl %ebx,%edi +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- vmovdqa %xmm9,48(%rsp) +- xorl %edx,%edi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- addl 4(%rsp),%ebp +- xorl %ecx,%edi +- movl %eax,%esi +- shldl $5,%eax,%eax +- addl %edi,%ebp +- xorl %ecx,%esi +- shrdl $7,%ebx,%ebx +- addl %eax,%ebp +- addl 8(%rsp),%edx +- xorl %ebx,%esi +- movl %ebp,%edi +- shldl $5,%ebp,%ebp +- addl %esi,%edx +- xorl %ebx,%edi +- shrdl $7,%eax,%eax +- addl %ebp,%edx +- addl 12(%rsp),%ecx +- xorl %eax,%edi +- movl %edx,%esi +- shldl $5,%edx,%edx +- addl %edi,%ecx +- xorl %eax,%esi +- shrdl $7,%ebp,%ebp +- addl %edx,%ecx +- cmpq %r10,%r9 +- je L$done_avx +- vmovdqa 64(%r14),%xmm6 +- vmovdqa -64(%r14),%xmm11 +- vmovdqu 0(%r9),%xmm0 +- vmovdqu 16(%r9),%xmm1 +- vmovdqu 32(%r9),%xmm2 +- vmovdqu 48(%r9),%xmm3 +- vpshufb %xmm6,%xmm0,%xmm0 +- addq $64,%r9 +- addl 16(%rsp),%ebx +- xorl %ebp,%esi +- vpshufb %xmm6,%xmm1,%xmm1 +- movl %ecx,%edi +- shldl $5,%ecx,%ecx +- vpaddd %xmm11,%xmm0,%xmm4 +- addl %esi,%ebx +- xorl %ebp,%edi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- vmovdqa %xmm4,0(%rsp) +- addl 20(%rsp),%eax +- xorl %edx,%edi +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- addl %edi,%eax +- xorl %edx,%esi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- addl 24(%rsp),%ebp +- xorl %ecx,%esi +- movl %eax,%edi +- shldl $5,%eax,%eax +- addl %esi,%ebp +- xorl %ecx,%edi +- shrdl $7,%ebx,%ebx +- addl %eax,%ebp +- addl 28(%rsp),%edx +- xorl %ebx,%edi +- movl %ebp,%esi +- shldl $5,%ebp,%ebp +- addl %edi,%edx +- xorl %ebx,%esi +- shrdl $7,%eax,%eax +- addl %ebp,%edx +- addl 32(%rsp),%ecx +- xorl %eax,%esi +- vpshufb %xmm6,%xmm2,%xmm2 +- movl %edx,%edi +- shldl $5,%edx,%edx +- vpaddd %xmm11,%xmm1,%xmm5 +- addl %esi,%ecx +- xorl %eax,%edi +- shrdl $7,%ebp,%ebp +- addl %edx,%ecx +- vmovdqa %xmm5,16(%rsp) +- addl 36(%rsp),%ebx +- xorl %ebp,%edi +- movl %ecx,%esi +- shldl $5,%ecx,%ecx +- addl %edi,%ebx +- xorl %ebp,%esi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl 40(%rsp),%eax +- xorl %edx,%esi +- movl %ebx,%edi +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- xorl %edx,%edi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- addl 44(%rsp),%ebp +- xorl %ecx,%edi +- movl %eax,%esi +- shldl $5,%eax,%eax +- addl %edi,%ebp +- xorl %ecx,%esi +- shrdl $7,%ebx,%ebx +- addl %eax,%ebp +- addl 48(%rsp),%edx +- xorl %ebx,%esi +- vpshufb %xmm6,%xmm3,%xmm3 +- movl %ebp,%edi +- shldl $5,%ebp,%ebp +- vpaddd %xmm11,%xmm2,%xmm6 +- addl %esi,%edx +- xorl %ebx,%edi +- shrdl $7,%eax,%eax +- addl %ebp,%edx +- vmovdqa %xmm6,32(%rsp) +- addl 52(%rsp),%ecx +- xorl %eax,%edi +- movl %edx,%esi +- shldl $5,%edx,%edx +- addl %edi,%ecx +- xorl %eax,%esi +- shrdl $7,%ebp,%ebp +- addl %edx,%ecx +- addl 56(%rsp),%ebx +- xorl %ebp,%esi +- movl %ecx,%edi +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- xorl %ebp,%edi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl 60(%rsp),%eax +- xorl %edx,%edi +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- addl %edi,%eax +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- addl 0(%r8),%eax +- addl 4(%r8),%esi +- addl 8(%r8),%ecx +- addl 12(%r8),%edx +- movl %eax,0(%r8) +- addl 16(%r8),%ebp +- movl %esi,4(%r8) +- movl %esi,%ebx +- movl %ecx,8(%r8) +- movl %ecx,%edi +- movl %edx,12(%r8) +- xorl %edx,%edi +- movl %ebp,16(%r8) +- andl %edi,%esi +- jmp L$oop_avx +- +-.p2align 4 +-L$done_avx: +- addl 16(%rsp),%ebx +- xorl %ebp,%esi +- movl %ecx,%edi +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- xorl %ebp,%edi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl 20(%rsp),%eax +- xorl %edx,%edi +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- addl %edi,%eax +- xorl %edx,%esi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- addl 24(%rsp),%ebp +- xorl %ecx,%esi +- movl %eax,%edi +- shldl $5,%eax,%eax +- addl %esi,%ebp +- xorl %ecx,%edi +- shrdl $7,%ebx,%ebx +- addl %eax,%ebp +- addl 28(%rsp),%edx +- xorl %ebx,%edi +- movl %ebp,%esi +- shldl $5,%ebp,%ebp +- addl %edi,%edx +- xorl %ebx,%esi +- shrdl $7,%eax,%eax +- addl %ebp,%edx +- addl 32(%rsp),%ecx +- xorl %eax,%esi +- movl %edx,%edi +- shldl $5,%edx,%edx +- addl %esi,%ecx +- xorl %eax,%edi +- shrdl $7,%ebp,%ebp +- addl %edx,%ecx +- addl 36(%rsp),%ebx +- xorl %ebp,%edi +- movl %ecx,%esi +- shldl $5,%ecx,%ecx +- addl %edi,%ebx +- xorl %ebp,%esi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl 40(%rsp),%eax +- xorl %edx,%esi +- movl %ebx,%edi +- shldl $5,%ebx,%ebx +- addl %esi,%eax +- xorl %edx,%edi +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- addl 44(%rsp),%ebp +- xorl %ecx,%edi +- movl %eax,%esi +- shldl $5,%eax,%eax +- addl %edi,%ebp +- xorl %ecx,%esi +- shrdl $7,%ebx,%ebx +- addl %eax,%ebp +- addl 48(%rsp),%edx +- xorl %ebx,%esi +- movl %ebp,%edi +- shldl $5,%ebp,%ebp +- addl %esi,%edx +- xorl %ebx,%edi +- shrdl $7,%eax,%eax +- addl %ebp,%edx +- addl 52(%rsp),%ecx +- xorl %eax,%edi +- movl %edx,%esi +- shldl $5,%edx,%edx +- addl %edi,%ecx +- xorl %eax,%esi +- shrdl $7,%ebp,%ebp +- addl %edx,%ecx +- addl 56(%rsp),%ebx +- xorl %ebp,%esi +- movl %ecx,%edi +- shldl $5,%ecx,%ecx +- addl %esi,%ebx +- xorl %ebp,%edi +- shrdl $7,%edx,%edx +- addl %ecx,%ebx +- addl 60(%rsp),%eax +- xorl %edx,%edi +- movl %ebx,%esi +- shldl $5,%ebx,%ebx +- addl %edi,%eax +- shrdl $7,%ecx,%ecx +- addl %ebx,%eax +- vzeroupper +- +- addl 0(%r8),%eax +- addl 4(%r8),%esi +- addl 8(%r8),%ecx +- movl %eax,0(%r8) +- addl 12(%r8),%edx +- movl %esi,4(%r8) +- addl 16(%r8),%ebp +- movl %ecx,8(%r8) +- movl %edx,12(%r8) +- movl %ebp,16(%r8) +- movq -40(%r11),%r14 +- +- movq -32(%r11),%r13 +- +- movq -24(%r11),%r12 +- +- movq -16(%r11),%rbp +- +- movq -8(%r11),%rbx +- +- leaq (%r11),%rsp +- +-L$epilogue_avx: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-sha1_block_data_order_avx2: +-_avx2_shortcut: +- +- movq %rsp,%r11 +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- vzeroupper +- movq %rdi,%r8 +- movq %rsi,%r9 +- movq %rdx,%r10 +- +- leaq -640(%rsp),%rsp +- shlq $6,%r10 +- leaq 64(%r9),%r13 +- andq $-128,%rsp +- addq %r9,%r10 +- leaq K_XX_XX+64(%rip),%r14 +- +- movl 0(%r8),%eax +- cmpq %r10,%r13 +- cmovaeq %r9,%r13 +- movl 4(%r8),%ebp +- movl 8(%r8),%ecx +- movl 12(%r8),%edx +- movl 16(%r8),%esi +- vmovdqu 64(%r14),%ymm6 +- +- vmovdqu (%r9),%xmm0 +- vmovdqu 16(%r9),%xmm1 +- vmovdqu 32(%r9),%xmm2 +- vmovdqu 48(%r9),%xmm3 +- leaq 64(%r9),%r9 +- vinserti128 $1,(%r13),%ymm0,%ymm0 +- vinserti128 $1,16(%r13),%ymm1,%ymm1 +- vpshufb %ymm6,%ymm0,%ymm0 +- vinserti128 $1,32(%r13),%ymm2,%ymm2 +- vpshufb %ymm6,%ymm1,%ymm1 +- vinserti128 $1,48(%r13),%ymm3,%ymm3 +- vpshufb %ymm6,%ymm2,%ymm2 +- vmovdqu -64(%r14),%ymm11 +- vpshufb %ymm6,%ymm3,%ymm3 +- +- vpaddd %ymm11,%ymm0,%ymm4 +- vpaddd %ymm11,%ymm1,%ymm5 +- vmovdqu %ymm4,0(%rsp) +- vpaddd %ymm11,%ymm2,%ymm6 +- vmovdqu %ymm5,32(%rsp) +- vpaddd %ymm11,%ymm3,%ymm7 +- vmovdqu %ymm6,64(%rsp) +- vmovdqu %ymm7,96(%rsp) +- vpalignr $8,%ymm0,%ymm1,%ymm4 +- vpsrldq $4,%ymm3,%ymm8 +- vpxor %ymm0,%ymm4,%ymm4 +- vpxor %ymm2,%ymm8,%ymm8 +- vpxor %ymm8,%ymm4,%ymm4 +- vpsrld $31,%ymm4,%ymm8 +- vpslldq $12,%ymm4,%ymm10 +- vpaddd %ymm4,%ymm4,%ymm4 +- vpsrld $30,%ymm10,%ymm9 +- vpor %ymm8,%ymm4,%ymm4 +- vpslld $2,%ymm10,%ymm10 +- vpxor %ymm9,%ymm4,%ymm4 +- vpxor %ymm10,%ymm4,%ymm4 +- vpaddd %ymm11,%ymm4,%ymm9 +- vmovdqu %ymm9,128(%rsp) +- vpalignr $8,%ymm1,%ymm2,%ymm5 +- vpsrldq $4,%ymm4,%ymm8 +- vpxor %ymm1,%ymm5,%ymm5 +- vpxor %ymm3,%ymm8,%ymm8 +- vpxor %ymm8,%ymm5,%ymm5 +- vpsrld $31,%ymm5,%ymm8 +- vmovdqu -32(%r14),%ymm11 +- vpslldq $12,%ymm5,%ymm10 +- vpaddd %ymm5,%ymm5,%ymm5 +- vpsrld $30,%ymm10,%ymm9 +- vpor %ymm8,%ymm5,%ymm5 +- vpslld $2,%ymm10,%ymm10 +- vpxor %ymm9,%ymm5,%ymm5 +- vpxor %ymm10,%ymm5,%ymm5 +- vpaddd %ymm11,%ymm5,%ymm9 +- vmovdqu %ymm9,160(%rsp) +- vpalignr $8,%ymm2,%ymm3,%ymm6 +- vpsrldq $4,%ymm5,%ymm8 +- vpxor %ymm2,%ymm6,%ymm6 +- vpxor %ymm4,%ymm8,%ymm8 +- vpxor %ymm8,%ymm6,%ymm6 +- vpsrld $31,%ymm6,%ymm8 +- vpslldq $12,%ymm6,%ymm10 +- vpaddd %ymm6,%ymm6,%ymm6 +- vpsrld $30,%ymm10,%ymm9 +- vpor %ymm8,%ymm6,%ymm6 +- vpslld $2,%ymm10,%ymm10 +- vpxor %ymm9,%ymm6,%ymm6 +- vpxor %ymm10,%ymm6,%ymm6 +- vpaddd %ymm11,%ymm6,%ymm9 +- vmovdqu %ymm9,192(%rsp) +- vpalignr $8,%ymm3,%ymm4,%ymm7 +- vpsrldq $4,%ymm6,%ymm8 +- vpxor %ymm3,%ymm7,%ymm7 +- vpxor %ymm5,%ymm8,%ymm8 +- vpxor %ymm8,%ymm7,%ymm7 +- vpsrld $31,%ymm7,%ymm8 +- vpslldq $12,%ymm7,%ymm10 +- vpaddd %ymm7,%ymm7,%ymm7 +- vpsrld $30,%ymm10,%ymm9 +- vpor %ymm8,%ymm7,%ymm7 +- vpslld $2,%ymm10,%ymm10 +- vpxor %ymm9,%ymm7,%ymm7 +- vpxor %ymm10,%ymm7,%ymm7 +- vpaddd %ymm11,%ymm7,%ymm9 +- vmovdqu %ymm9,224(%rsp) +- leaq 128(%rsp),%r13 +- jmp L$oop_avx2 +-.p2align 5 +-L$oop_avx2: +- rorxl $2,%ebp,%ebx +- andnl %edx,%ebp,%edi +- andl %ecx,%ebp +- xorl %edi,%ebp +- jmp L$align32_1 +-.p2align 5 +-L$align32_1: +- vpalignr $8,%ymm6,%ymm7,%ymm8 +- vpxor %ymm4,%ymm0,%ymm0 +- addl -128(%r13),%esi +- andnl %ecx,%eax,%edi +- vpxor %ymm1,%ymm0,%ymm0 +- addl %ebp,%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- vpxor %ymm8,%ymm0,%ymm0 +- andl %ebx,%eax +- addl %r12d,%esi +- xorl %edi,%eax +- vpsrld $30,%ymm0,%ymm8 +- vpslld $2,%ymm0,%ymm0 +- addl -124(%r13),%edx +- andnl %ebx,%esi,%edi +- addl %eax,%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- andl %ebp,%esi +- vpor %ymm8,%ymm0,%ymm0 +- addl %r12d,%edx +- xorl %edi,%esi +- addl -120(%r13),%ecx +- andnl %ebp,%edx,%edi +- vpaddd %ymm11,%ymm0,%ymm9 +- addl %esi,%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- andl %eax,%edx +- vmovdqu %ymm9,256(%rsp) +- addl %r12d,%ecx +- xorl %edi,%edx +- addl -116(%r13),%ebx +- andnl %eax,%ecx,%edi +- addl %edx,%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- andl %esi,%ecx +- addl %r12d,%ebx +- xorl %edi,%ecx +- addl -96(%r13),%ebp +- andnl %esi,%ebx,%edi +- addl %ecx,%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- andl %edx,%ebx +- addl %r12d,%ebp +- xorl %edi,%ebx +- vpalignr $8,%ymm7,%ymm0,%ymm8 +- vpxor %ymm5,%ymm1,%ymm1 +- addl -92(%r13),%eax +- andnl %edx,%ebp,%edi +- vpxor %ymm2,%ymm1,%ymm1 +- addl %ebx,%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- vpxor %ymm8,%ymm1,%ymm1 +- andl %ecx,%ebp +- addl %r12d,%eax +- xorl %edi,%ebp +- vpsrld $30,%ymm1,%ymm8 +- vpslld $2,%ymm1,%ymm1 +- addl -88(%r13),%esi +- andnl %ecx,%eax,%edi +- addl %ebp,%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- andl %ebx,%eax +- vpor %ymm8,%ymm1,%ymm1 +- addl %r12d,%esi +- xorl %edi,%eax +- addl -84(%r13),%edx +- andnl %ebx,%esi,%edi +- vpaddd %ymm11,%ymm1,%ymm9 +- addl %eax,%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- andl %ebp,%esi +- vmovdqu %ymm9,288(%rsp) +- addl %r12d,%edx +- xorl %edi,%esi +- addl -64(%r13),%ecx +- andnl %ebp,%edx,%edi +- addl %esi,%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- andl %eax,%edx +- addl %r12d,%ecx +- xorl %edi,%edx +- addl -60(%r13),%ebx +- andnl %eax,%ecx,%edi +- addl %edx,%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- andl %esi,%ecx +- addl %r12d,%ebx +- xorl %edi,%ecx +- vpalignr $8,%ymm0,%ymm1,%ymm8 +- vpxor %ymm6,%ymm2,%ymm2 +- addl -56(%r13),%ebp +- andnl %esi,%ebx,%edi +- vpxor %ymm3,%ymm2,%ymm2 +- vmovdqu 0(%r14),%ymm11 +- addl %ecx,%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- vpxor %ymm8,%ymm2,%ymm2 +- andl %edx,%ebx +- addl %r12d,%ebp +- xorl %edi,%ebx +- vpsrld $30,%ymm2,%ymm8 +- vpslld $2,%ymm2,%ymm2 +- addl -52(%r13),%eax +- andnl %edx,%ebp,%edi +- addl %ebx,%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- andl %ecx,%ebp +- vpor %ymm8,%ymm2,%ymm2 +- addl %r12d,%eax +- xorl %edi,%ebp +- addl -32(%r13),%esi +- andnl %ecx,%eax,%edi +- vpaddd %ymm11,%ymm2,%ymm9 +- addl %ebp,%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- andl %ebx,%eax +- vmovdqu %ymm9,320(%rsp) +- addl %r12d,%esi +- xorl %edi,%eax +- addl -28(%r13),%edx +- andnl %ebx,%esi,%edi +- addl %eax,%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- andl %ebp,%esi +- addl %r12d,%edx +- xorl %edi,%esi +- addl -24(%r13),%ecx +- andnl %ebp,%edx,%edi +- addl %esi,%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- andl %eax,%edx +- addl %r12d,%ecx +- xorl %edi,%edx +- vpalignr $8,%ymm1,%ymm2,%ymm8 +- vpxor %ymm7,%ymm3,%ymm3 +- addl -20(%r13),%ebx +- andnl %eax,%ecx,%edi +- vpxor %ymm4,%ymm3,%ymm3 +- addl %edx,%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- vpxor %ymm8,%ymm3,%ymm3 +- andl %esi,%ecx +- addl %r12d,%ebx +- xorl %edi,%ecx +- vpsrld $30,%ymm3,%ymm8 +- vpslld $2,%ymm3,%ymm3 +- addl 0(%r13),%ebp +- andnl %esi,%ebx,%edi +- addl %ecx,%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- andl %edx,%ebx +- vpor %ymm8,%ymm3,%ymm3 +- addl %r12d,%ebp +- xorl %edi,%ebx +- addl 4(%r13),%eax +- andnl %edx,%ebp,%edi +- vpaddd %ymm11,%ymm3,%ymm9 +- addl %ebx,%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- andl %ecx,%ebp +- vmovdqu %ymm9,352(%rsp) +- addl %r12d,%eax +- xorl %edi,%ebp +- addl 8(%r13),%esi +- andnl %ecx,%eax,%edi +- addl %ebp,%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- andl %ebx,%eax +- addl %r12d,%esi +- xorl %edi,%eax +- addl 12(%r13),%edx +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- xorl %ebx,%esi +- vpalignr $8,%ymm2,%ymm3,%ymm8 +- vpxor %ymm0,%ymm4,%ymm4 +- addl 32(%r13),%ecx +- leal (%rcx,%rsi,1),%ecx +- vpxor %ymm5,%ymm4,%ymm4 +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- vpxor %ymm8,%ymm4,%ymm4 +- addl %r12d,%ecx +- xorl %ebp,%edx +- addl 36(%r13),%ebx +- vpsrld $30,%ymm4,%ymm8 +- vpslld $2,%ymm4,%ymm4 +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- vpor %ymm8,%ymm4,%ymm4 +- addl 40(%r13),%ebp +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- vpaddd %ymm11,%ymm4,%ymm9 +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- addl 44(%r13),%eax +- vmovdqu %ymm9,384(%rsp) +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- addl 64(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- xorl %ecx,%eax +- vpalignr $8,%ymm3,%ymm4,%ymm8 +- vpxor %ymm1,%ymm5,%ymm5 +- addl 68(%r13),%edx +- leal (%rdx,%rax,1),%edx +- vpxor %ymm6,%ymm5,%ymm5 +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- vpxor %ymm8,%ymm5,%ymm5 +- addl %r12d,%edx +- xorl %ebx,%esi +- addl 72(%r13),%ecx +- vpsrld $30,%ymm5,%ymm8 +- vpslld $2,%ymm5,%ymm5 +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- vpor %ymm8,%ymm5,%ymm5 +- addl 76(%r13),%ebx +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- vpaddd %ymm11,%ymm5,%ymm9 +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- addl 96(%r13),%ebp +- vmovdqu %ymm9,416(%rsp) +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- addl 100(%r13),%eax +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- vpalignr $8,%ymm4,%ymm5,%ymm8 +- vpxor %ymm2,%ymm6,%ymm6 +- addl 104(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- vpxor %ymm7,%ymm6,%ymm6 +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- vpxor %ymm8,%ymm6,%ymm6 +- addl %r12d,%esi +- xorl %ecx,%eax +- addl 108(%r13),%edx +- leaq 256(%r13),%r13 +- vpsrld $30,%ymm6,%ymm8 +- vpslld $2,%ymm6,%ymm6 +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- xorl %ebx,%esi +- vpor %ymm8,%ymm6,%ymm6 +- addl -128(%r13),%ecx +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- vpaddd %ymm11,%ymm6,%ymm9 +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- addl -124(%r13),%ebx +- vmovdqu %ymm9,448(%rsp) +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- addl -120(%r13),%ebp +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- vpalignr $8,%ymm5,%ymm6,%ymm8 +- vpxor %ymm3,%ymm7,%ymm7 +- addl -116(%r13),%eax +- leal (%rax,%rbx,1),%eax +- vpxor %ymm0,%ymm7,%ymm7 +- vmovdqu 32(%r14),%ymm11 +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- vpxor %ymm8,%ymm7,%ymm7 +- addl %r12d,%eax +- xorl %edx,%ebp +- addl -96(%r13),%esi +- vpsrld $30,%ymm7,%ymm8 +- vpslld $2,%ymm7,%ymm7 +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- xorl %ecx,%eax +- vpor %ymm8,%ymm7,%ymm7 +- addl -92(%r13),%edx +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- vpaddd %ymm11,%ymm7,%ymm9 +- xorl %ebp,%esi +- addl %r12d,%edx +- xorl %ebx,%esi +- addl -88(%r13),%ecx +- vmovdqu %ymm9,480(%rsp) +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- addl -84(%r13),%ebx +- movl %esi,%edi +- xorl %eax,%edi +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- andl %edi,%ecx +- jmp L$align32_2 +-.p2align 5 +-L$align32_2: +- vpalignr $8,%ymm6,%ymm7,%ymm8 +- vpxor %ymm4,%ymm0,%ymm0 +- addl -64(%r13),%ebp +- xorl %esi,%ecx +- vpxor %ymm1,%ymm0,%ymm0 +- movl %edx,%edi +- xorl %esi,%edi +- leal (%rcx,%rbp,1),%ebp +- vpxor %ymm8,%ymm0,%ymm0 +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- vpsrld $30,%ymm0,%ymm8 +- vpslld $2,%ymm0,%ymm0 +- addl %r12d,%ebp +- andl %edi,%ebx +- addl -60(%r13),%eax +- xorl %edx,%ebx +- movl %ecx,%edi +- xorl %edx,%edi +- vpor %ymm8,%ymm0,%ymm0 +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- vpaddd %ymm11,%ymm0,%ymm9 +- addl %r12d,%eax +- andl %edi,%ebp +- addl -56(%r13),%esi +- xorl %ecx,%ebp +- vmovdqu %ymm9,512(%rsp) +- movl %ebx,%edi +- xorl %ecx,%edi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- andl %edi,%eax +- addl -52(%r13),%edx +- xorl %ebx,%eax +- movl %ebp,%edi +- xorl %ebx,%edi +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- andl %edi,%esi +- addl -32(%r13),%ecx +- xorl %ebp,%esi +- movl %eax,%edi +- xorl %ebp,%edi +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- andl %edi,%edx +- vpalignr $8,%ymm7,%ymm0,%ymm8 +- vpxor %ymm5,%ymm1,%ymm1 +- addl -28(%r13),%ebx +- xorl %eax,%edx +- vpxor %ymm2,%ymm1,%ymm1 +- movl %esi,%edi +- xorl %eax,%edi +- leal (%rbx,%rdx,1),%ebx +- vpxor %ymm8,%ymm1,%ymm1 +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- vpsrld $30,%ymm1,%ymm8 +- vpslld $2,%ymm1,%ymm1 +- addl %r12d,%ebx +- andl %edi,%ecx +- addl -24(%r13),%ebp +- xorl %esi,%ecx +- movl %edx,%edi +- xorl %esi,%edi +- vpor %ymm8,%ymm1,%ymm1 +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- vpaddd %ymm11,%ymm1,%ymm9 +- addl %r12d,%ebp +- andl %edi,%ebx +- addl -20(%r13),%eax +- xorl %edx,%ebx +- vmovdqu %ymm9,544(%rsp) +- movl %ecx,%edi +- xorl %edx,%edi +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- andl %edi,%ebp +- addl 0(%r13),%esi +- xorl %ecx,%ebp +- movl %ebx,%edi +- xorl %ecx,%edi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- andl %edi,%eax +- addl 4(%r13),%edx +- xorl %ebx,%eax +- movl %ebp,%edi +- xorl %ebx,%edi +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- andl %edi,%esi +- vpalignr $8,%ymm0,%ymm1,%ymm8 +- vpxor %ymm6,%ymm2,%ymm2 +- addl 8(%r13),%ecx +- xorl %ebp,%esi +- vpxor %ymm3,%ymm2,%ymm2 +- movl %eax,%edi +- xorl %ebp,%edi +- leal (%rcx,%rsi,1),%ecx +- vpxor %ymm8,%ymm2,%ymm2 +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- vpsrld $30,%ymm2,%ymm8 +- vpslld $2,%ymm2,%ymm2 +- addl %r12d,%ecx +- andl %edi,%edx +- addl 12(%r13),%ebx +- xorl %eax,%edx +- movl %esi,%edi +- xorl %eax,%edi +- vpor %ymm8,%ymm2,%ymm2 +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- vpaddd %ymm11,%ymm2,%ymm9 +- addl %r12d,%ebx +- andl %edi,%ecx +- addl 32(%r13),%ebp +- xorl %esi,%ecx +- vmovdqu %ymm9,576(%rsp) +- movl %edx,%edi +- xorl %esi,%edi +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- andl %edi,%ebx +- addl 36(%r13),%eax +- xorl %edx,%ebx +- movl %ecx,%edi +- xorl %edx,%edi +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- andl %edi,%ebp +- addl 40(%r13),%esi +- xorl %ecx,%ebp +- movl %ebx,%edi +- xorl %ecx,%edi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- andl %edi,%eax +- vpalignr $8,%ymm1,%ymm2,%ymm8 +- vpxor %ymm7,%ymm3,%ymm3 +- addl 44(%r13),%edx +- xorl %ebx,%eax +- vpxor %ymm4,%ymm3,%ymm3 +- movl %ebp,%edi +- xorl %ebx,%edi +- leal (%rdx,%rax,1),%edx +- vpxor %ymm8,%ymm3,%ymm3 +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- vpsrld $30,%ymm3,%ymm8 +- vpslld $2,%ymm3,%ymm3 +- addl %r12d,%edx +- andl %edi,%esi +- addl 64(%r13),%ecx +- xorl %ebp,%esi +- movl %eax,%edi +- xorl %ebp,%edi +- vpor %ymm8,%ymm3,%ymm3 +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- vpaddd %ymm11,%ymm3,%ymm9 +- addl %r12d,%ecx +- andl %edi,%edx +- addl 68(%r13),%ebx +- xorl %eax,%edx +- vmovdqu %ymm9,608(%rsp) +- movl %esi,%edi +- xorl %eax,%edi +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- andl %edi,%ecx +- addl 72(%r13),%ebp +- xorl %esi,%ecx +- movl %edx,%edi +- xorl %esi,%edi +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- andl %edi,%ebx +- addl 76(%r13),%eax +- xorl %edx,%ebx +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- addl 96(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- xorl %ecx,%eax +- addl 100(%r13),%edx +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- xorl %ebx,%esi +- addl 104(%r13),%ecx +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- addl 108(%r13),%ebx +- leaq 256(%r13),%r13 +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- addl -128(%r13),%ebp +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- addl -124(%r13),%eax +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- addl -120(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- xorl %ecx,%eax +- addl -116(%r13),%edx +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- xorl %ebx,%esi +- addl -96(%r13),%ecx +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- addl -92(%r13),%ebx +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- addl -88(%r13),%ebp +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- addl -84(%r13),%eax +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- addl -64(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- xorl %ecx,%eax +- addl -60(%r13),%edx +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- xorl %ebx,%esi +- addl -56(%r13),%ecx +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- addl -52(%r13),%ebx +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- addl -32(%r13),%ebp +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- addl -28(%r13),%eax +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- addl -24(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- xorl %ecx,%eax +- addl -20(%r13),%edx +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- addl %r12d,%edx +- leaq 128(%r9),%r13 +- leaq 128(%r9),%rdi +- cmpq %r10,%r13 +- cmovaeq %r9,%r13 +- +- +- addl 0(%r8),%edx +- addl 4(%r8),%esi +- addl 8(%r8),%ebp +- movl %edx,0(%r8) +- addl 12(%r8),%ebx +- movl %esi,4(%r8) +- movl %edx,%eax +- addl 16(%r8),%ecx +- movl %ebp,%r12d +- movl %ebp,8(%r8) +- movl %ebx,%edx +- +- movl %ebx,12(%r8) +- movl %esi,%ebp +- movl %ecx,16(%r8) +- +- movl %ecx,%esi +- movl %r12d,%ecx +- +- +- cmpq %r10,%r9 +- je L$done_avx2 +- vmovdqu 64(%r14),%ymm6 +- cmpq %r10,%rdi +- ja L$ast_avx2 +- +- vmovdqu -64(%rdi),%xmm0 +- vmovdqu -48(%rdi),%xmm1 +- vmovdqu -32(%rdi),%xmm2 +- vmovdqu -16(%rdi),%xmm3 +- vinserti128 $1,0(%r13),%ymm0,%ymm0 +- vinserti128 $1,16(%r13),%ymm1,%ymm1 +- vinserti128 $1,32(%r13),%ymm2,%ymm2 +- vinserti128 $1,48(%r13),%ymm3,%ymm3 +- jmp L$ast_avx2 +- +-.p2align 5 +-L$ast_avx2: +- leaq 128+16(%rsp),%r13 +- rorxl $2,%ebp,%ebx +- andnl %edx,%ebp,%edi +- andl %ecx,%ebp +- xorl %edi,%ebp +- subq $-128,%r9 +- addl -128(%r13),%esi +- andnl %ecx,%eax,%edi +- addl %ebp,%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- andl %ebx,%eax +- addl %r12d,%esi +- xorl %edi,%eax +- addl -124(%r13),%edx +- andnl %ebx,%esi,%edi +- addl %eax,%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- andl %ebp,%esi +- addl %r12d,%edx +- xorl %edi,%esi +- addl -120(%r13),%ecx +- andnl %ebp,%edx,%edi +- addl %esi,%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- andl %eax,%edx +- addl %r12d,%ecx +- xorl %edi,%edx +- addl -116(%r13),%ebx +- andnl %eax,%ecx,%edi +- addl %edx,%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- andl %esi,%ecx +- addl %r12d,%ebx +- xorl %edi,%ecx +- addl -96(%r13),%ebp +- andnl %esi,%ebx,%edi +- addl %ecx,%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- andl %edx,%ebx +- addl %r12d,%ebp +- xorl %edi,%ebx +- addl -92(%r13),%eax +- andnl %edx,%ebp,%edi +- addl %ebx,%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- andl %ecx,%ebp +- addl %r12d,%eax +- xorl %edi,%ebp +- addl -88(%r13),%esi +- andnl %ecx,%eax,%edi +- addl %ebp,%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- andl %ebx,%eax +- addl %r12d,%esi +- xorl %edi,%eax +- addl -84(%r13),%edx +- andnl %ebx,%esi,%edi +- addl %eax,%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- andl %ebp,%esi +- addl %r12d,%edx +- xorl %edi,%esi +- addl -64(%r13),%ecx +- andnl %ebp,%edx,%edi +- addl %esi,%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- andl %eax,%edx +- addl %r12d,%ecx +- xorl %edi,%edx +- addl -60(%r13),%ebx +- andnl %eax,%ecx,%edi +- addl %edx,%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- andl %esi,%ecx +- addl %r12d,%ebx +- xorl %edi,%ecx +- addl -56(%r13),%ebp +- andnl %esi,%ebx,%edi +- addl %ecx,%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- andl %edx,%ebx +- addl %r12d,%ebp +- xorl %edi,%ebx +- addl -52(%r13),%eax +- andnl %edx,%ebp,%edi +- addl %ebx,%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- andl %ecx,%ebp +- addl %r12d,%eax +- xorl %edi,%ebp +- addl -32(%r13),%esi +- andnl %ecx,%eax,%edi +- addl %ebp,%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- andl %ebx,%eax +- addl %r12d,%esi +- xorl %edi,%eax +- addl -28(%r13),%edx +- andnl %ebx,%esi,%edi +- addl %eax,%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- andl %ebp,%esi +- addl %r12d,%edx +- xorl %edi,%esi +- addl -24(%r13),%ecx +- andnl %ebp,%edx,%edi +- addl %esi,%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- andl %eax,%edx +- addl %r12d,%ecx +- xorl %edi,%edx +- addl -20(%r13),%ebx +- andnl %eax,%ecx,%edi +- addl %edx,%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- andl %esi,%ecx +- addl %r12d,%ebx +- xorl %edi,%ecx +- addl 0(%r13),%ebp +- andnl %esi,%ebx,%edi +- addl %ecx,%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- andl %edx,%ebx +- addl %r12d,%ebp +- xorl %edi,%ebx +- addl 4(%r13),%eax +- andnl %edx,%ebp,%edi +- addl %ebx,%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- andl %ecx,%ebp +- addl %r12d,%eax +- xorl %edi,%ebp +- addl 8(%r13),%esi +- andnl %ecx,%eax,%edi +- addl %ebp,%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- andl %ebx,%eax +- addl %r12d,%esi +- xorl %edi,%eax +- addl 12(%r13),%edx +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- xorl %ebx,%esi +- addl 32(%r13),%ecx +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- addl 36(%r13),%ebx +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- addl 40(%r13),%ebp +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- addl 44(%r13),%eax +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- addl 64(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- xorl %ecx,%eax +- vmovdqu -64(%r14),%ymm11 +- vpshufb %ymm6,%ymm0,%ymm0 +- addl 68(%r13),%edx +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- xorl %ebx,%esi +- addl 72(%r13),%ecx +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- addl 76(%r13),%ebx +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- addl 96(%r13),%ebp +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- addl 100(%r13),%eax +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- vpshufb %ymm6,%ymm1,%ymm1 +- vpaddd %ymm11,%ymm0,%ymm8 +- addl 104(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- xorl %ecx,%eax +- addl 108(%r13),%edx +- leaq 256(%r13),%r13 +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- xorl %ebx,%esi +- addl -128(%r13),%ecx +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- addl -124(%r13),%ebx +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- addl -120(%r13),%ebp +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- vmovdqu %ymm8,0(%rsp) +- vpshufb %ymm6,%ymm2,%ymm2 +- vpaddd %ymm11,%ymm1,%ymm9 +- addl -116(%r13),%eax +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- addl -96(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- xorl %ecx,%eax +- addl -92(%r13),%edx +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- xorl %ebx,%esi +- addl -88(%r13),%ecx +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- addl -84(%r13),%ebx +- movl %esi,%edi +- xorl %eax,%edi +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- andl %edi,%ecx +- vmovdqu %ymm9,32(%rsp) +- vpshufb %ymm6,%ymm3,%ymm3 +- vpaddd %ymm11,%ymm2,%ymm6 +- addl -64(%r13),%ebp +- xorl %esi,%ecx +- movl %edx,%edi +- xorl %esi,%edi +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- andl %edi,%ebx +- addl -60(%r13),%eax +- xorl %edx,%ebx +- movl %ecx,%edi +- xorl %edx,%edi +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- andl %edi,%ebp +- addl -56(%r13),%esi +- xorl %ecx,%ebp +- movl %ebx,%edi +- xorl %ecx,%edi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- andl %edi,%eax +- addl -52(%r13),%edx +- xorl %ebx,%eax +- movl %ebp,%edi +- xorl %ebx,%edi +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- andl %edi,%esi +- addl -32(%r13),%ecx +- xorl %ebp,%esi +- movl %eax,%edi +- xorl %ebp,%edi +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- andl %edi,%edx +- jmp L$align32_3 +-.p2align 5 +-L$align32_3: +- vmovdqu %ymm6,64(%rsp) +- vpaddd %ymm11,%ymm3,%ymm7 +- addl -28(%r13),%ebx +- xorl %eax,%edx +- movl %esi,%edi +- xorl %eax,%edi +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- andl %edi,%ecx +- addl -24(%r13),%ebp +- xorl %esi,%ecx +- movl %edx,%edi +- xorl %esi,%edi +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- andl %edi,%ebx +- addl -20(%r13),%eax +- xorl %edx,%ebx +- movl %ecx,%edi +- xorl %edx,%edi +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- andl %edi,%ebp +- addl 0(%r13),%esi +- xorl %ecx,%ebp +- movl %ebx,%edi +- xorl %ecx,%edi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- andl %edi,%eax +- addl 4(%r13),%edx +- xorl %ebx,%eax +- movl %ebp,%edi +- xorl %ebx,%edi +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- andl %edi,%esi +- vmovdqu %ymm7,96(%rsp) +- addl 8(%r13),%ecx +- xorl %ebp,%esi +- movl %eax,%edi +- xorl %ebp,%edi +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- andl %edi,%edx +- addl 12(%r13),%ebx +- xorl %eax,%edx +- movl %esi,%edi +- xorl %eax,%edi +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- andl %edi,%ecx +- addl 32(%r13),%ebp +- xorl %esi,%ecx +- movl %edx,%edi +- xorl %esi,%edi +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- andl %edi,%ebx +- addl 36(%r13),%eax +- xorl %edx,%ebx +- movl %ecx,%edi +- xorl %edx,%edi +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- andl %edi,%ebp +- addl 40(%r13),%esi +- xorl %ecx,%ebp +- movl %ebx,%edi +- xorl %ecx,%edi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- andl %edi,%eax +- vpalignr $8,%ymm0,%ymm1,%ymm4 +- addl 44(%r13),%edx +- xorl %ebx,%eax +- movl %ebp,%edi +- xorl %ebx,%edi +- vpsrldq $4,%ymm3,%ymm8 +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- vpxor %ymm0,%ymm4,%ymm4 +- vpxor %ymm2,%ymm8,%ymm8 +- xorl %ebp,%esi +- addl %r12d,%edx +- vpxor %ymm8,%ymm4,%ymm4 +- andl %edi,%esi +- addl 64(%r13),%ecx +- xorl %ebp,%esi +- movl %eax,%edi +- vpsrld $31,%ymm4,%ymm8 +- xorl %ebp,%edi +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- vpslldq $12,%ymm4,%ymm10 +- vpaddd %ymm4,%ymm4,%ymm4 +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- vpsrld $30,%ymm10,%ymm9 +- vpor %ymm8,%ymm4,%ymm4 +- addl %r12d,%ecx +- andl %edi,%edx +- vpslld $2,%ymm10,%ymm10 +- vpxor %ymm9,%ymm4,%ymm4 +- addl 68(%r13),%ebx +- xorl %eax,%edx +- vpxor %ymm10,%ymm4,%ymm4 +- movl %esi,%edi +- xorl %eax,%edi +- leal (%rbx,%rdx,1),%ebx +- vpaddd %ymm11,%ymm4,%ymm9 +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- vmovdqu %ymm9,128(%rsp) +- addl %r12d,%ebx +- andl %edi,%ecx +- addl 72(%r13),%ebp +- xorl %esi,%ecx +- movl %edx,%edi +- xorl %esi,%edi +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- andl %edi,%ebx +- addl 76(%r13),%eax +- xorl %edx,%ebx +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- vpalignr $8,%ymm1,%ymm2,%ymm5 +- addl 96(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- vpsrldq $4,%ymm4,%ymm8 +- xorl %ebx,%eax +- addl %r12d,%esi +- xorl %ecx,%eax +- vpxor %ymm1,%ymm5,%ymm5 +- vpxor %ymm3,%ymm8,%ymm8 +- addl 100(%r13),%edx +- leal (%rdx,%rax,1),%edx +- vpxor %ymm8,%ymm5,%ymm5 +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- xorl %ebp,%esi +- addl %r12d,%edx +- vpsrld $31,%ymm5,%ymm8 +- vmovdqu -32(%r14),%ymm11 +- xorl %ebx,%esi +- addl 104(%r13),%ecx +- leal (%rcx,%rsi,1),%ecx +- vpslldq $12,%ymm5,%ymm10 +- vpaddd %ymm5,%ymm5,%ymm5 +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- vpsrld $30,%ymm10,%ymm9 +- vpor %ymm8,%ymm5,%ymm5 +- xorl %eax,%edx +- addl %r12d,%ecx +- vpslld $2,%ymm10,%ymm10 +- vpxor %ymm9,%ymm5,%ymm5 +- xorl %ebp,%edx +- addl 108(%r13),%ebx +- leaq 256(%r13),%r13 +- vpxor %ymm10,%ymm5,%ymm5 +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- vpaddd %ymm11,%ymm5,%ymm9 +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- vmovdqu %ymm9,160(%rsp) +- addl -128(%r13),%ebp +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- vpalignr $8,%ymm2,%ymm3,%ymm6 +- addl -124(%r13),%eax +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- vpsrldq $4,%ymm5,%ymm8 +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- vpxor %ymm2,%ymm6,%ymm6 +- vpxor %ymm4,%ymm8,%ymm8 +- addl -120(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- vpxor %ymm8,%ymm6,%ymm6 +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- vpsrld $31,%ymm6,%ymm8 +- xorl %ecx,%eax +- addl -116(%r13),%edx +- leal (%rdx,%rax,1),%edx +- vpslldq $12,%ymm6,%ymm10 +- vpaddd %ymm6,%ymm6,%ymm6 +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- vpsrld $30,%ymm10,%ymm9 +- vpor %ymm8,%ymm6,%ymm6 +- xorl %ebp,%esi +- addl %r12d,%edx +- vpslld $2,%ymm10,%ymm10 +- vpxor %ymm9,%ymm6,%ymm6 +- xorl %ebx,%esi +- addl -96(%r13),%ecx +- vpxor %ymm10,%ymm6,%ymm6 +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- vpaddd %ymm11,%ymm6,%ymm9 +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- vmovdqu %ymm9,192(%rsp) +- addl -92(%r13),%ebx +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- vpalignr $8,%ymm3,%ymm4,%ymm7 +- addl -88(%r13),%ebp +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- vpsrldq $4,%ymm6,%ymm8 +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- vpxor %ymm3,%ymm7,%ymm7 +- vpxor %ymm5,%ymm8,%ymm8 +- addl -84(%r13),%eax +- leal (%rax,%rbx,1),%eax +- vpxor %ymm8,%ymm7,%ymm7 +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- vpsrld $31,%ymm7,%ymm8 +- xorl %edx,%ebp +- addl -64(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- vpslldq $12,%ymm7,%ymm10 +- vpaddd %ymm7,%ymm7,%ymm7 +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- vpsrld $30,%ymm10,%ymm9 +- vpor %ymm8,%ymm7,%ymm7 +- xorl %ebx,%eax +- addl %r12d,%esi +- vpslld $2,%ymm10,%ymm10 +- vpxor %ymm9,%ymm7,%ymm7 +- xorl %ecx,%eax +- addl -60(%r13),%edx +- vpxor %ymm10,%ymm7,%ymm7 +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- rorxl $2,%esi,%eax +- vpaddd %ymm11,%ymm7,%ymm9 +- xorl %ebp,%esi +- addl %r12d,%edx +- xorl %ebx,%esi +- vmovdqu %ymm9,224(%rsp) +- addl -56(%r13),%ecx +- leal (%rcx,%rsi,1),%ecx +- rorxl $27,%edx,%r12d +- rorxl $2,%edx,%esi +- xorl %eax,%edx +- addl %r12d,%ecx +- xorl %ebp,%edx +- addl -52(%r13),%ebx +- leal (%rbx,%rdx,1),%ebx +- rorxl $27,%ecx,%r12d +- rorxl $2,%ecx,%edx +- xorl %esi,%ecx +- addl %r12d,%ebx +- xorl %eax,%ecx +- addl -32(%r13),%ebp +- leal (%rcx,%rbp,1),%ebp +- rorxl $27,%ebx,%r12d +- rorxl $2,%ebx,%ecx +- xorl %edx,%ebx +- addl %r12d,%ebp +- xorl %esi,%ebx +- addl -28(%r13),%eax +- leal (%rax,%rbx,1),%eax +- rorxl $27,%ebp,%r12d +- rorxl $2,%ebp,%ebx +- xorl %ecx,%ebp +- addl %r12d,%eax +- xorl %edx,%ebp +- addl -24(%r13),%esi +- leal (%rsi,%rbp,1),%esi +- rorxl $27,%eax,%r12d +- rorxl $2,%eax,%ebp +- xorl %ebx,%eax +- addl %r12d,%esi +- xorl %ecx,%eax +- addl -20(%r13),%edx +- leal (%rdx,%rax,1),%edx +- rorxl $27,%esi,%r12d +- addl %r12d,%edx +- leaq 128(%rsp),%r13 +- +- +- addl 0(%r8),%edx +- addl 4(%r8),%esi +- addl 8(%r8),%ebp +- movl %edx,0(%r8) +- addl 12(%r8),%ebx +- movl %esi,4(%r8) +- movl %edx,%eax +- addl 16(%r8),%ecx +- movl %ebp,%r12d +- movl %ebp,8(%r8) +- movl %ebx,%edx +- +- movl %ebx,12(%r8) +- movl %esi,%ebp +- movl %ecx,16(%r8) +- +- movl %ecx,%esi +- movl %r12d,%ecx +- +- +- cmpq %r10,%r9 +- jbe L$oop_avx2 +- +-L$done_avx2: +- vzeroupper +- movq -40(%r11),%r14 +- +- movq -32(%r11),%r13 +- +- movq -24(%r11),%r12 +- +- movq -16(%r11),%rbp +- +- movq -8(%r11),%rbx +- +- leaq (%r11),%rsp +- +-L$epilogue_avx2: +- .byte 0xf3,0xc3 +- +- +-.p2align 6 +-K_XX_XX: +-.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +-.long 0x5a827999,0x5a827999,0x5a827999,0x5a827999 +-.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +-.long 0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1 +-.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +-.long 0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc +-.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +-.long 0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6 +-.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +-.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +-.byte 0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0 +-.byte 83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +-.p2align 6 +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S b/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S +deleted file mode 100644 +index d94268d..0000000 +--- a/mac-x86_64/crypto/fipsmodule/sha256-x86_64.S ++++ /dev/null +@@ -1,3971 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +-.globl _sha256_block_data_order +-.private_extern _sha256_block_data_order +- +-.p2align 4 +-_sha256_block_data_order: +- +- leaq _OPENSSL_ia32cap_P(%rip),%r11 +- movl 0(%r11),%r9d +- movl 4(%r11),%r10d +- movl 8(%r11),%r11d +- andl $1073741824,%r9d +- andl $268435968,%r10d +- orl %r9d,%r10d +- cmpl $1342177792,%r10d +- je L$avx_shortcut +- testl $512,%r10d +- jnz L$ssse3_shortcut +- movq %rsp,%rax +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- shlq $4,%rdx +- subq $64+32,%rsp +- leaq (%rsi,%rdx,4),%rdx +- andq $-64,%rsp +- movq %rdi,64+0(%rsp) +- movq %rsi,64+8(%rsp) +- movq %rdx,64+16(%rsp) +- movq %rax,88(%rsp) +- +-L$prologue: +- +- movl 0(%rdi),%eax +- movl 4(%rdi),%ebx +- movl 8(%rdi),%ecx +- movl 12(%rdi),%edx +- movl 16(%rdi),%r8d +- movl 20(%rdi),%r9d +- movl 24(%rdi),%r10d +- movl 28(%rdi),%r11d +- jmp L$loop +- +-.p2align 4 +-L$loop: +- movl %ebx,%edi +- leaq K256(%rip),%rbp +- xorl %ecx,%edi +- movl 0(%rsi),%r12d +- movl %r8d,%r13d +- movl %eax,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %r9d,%r15d +- +- xorl %r8d,%r13d +- rorl $9,%r14d +- xorl %r10d,%r15d +- +- movl %r12d,0(%rsp) +- xorl %eax,%r14d +- andl %r8d,%r15d +- +- rorl $5,%r13d +- addl %r11d,%r12d +- xorl %r10d,%r15d +- +- rorl $11,%r14d +- xorl %r8d,%r13d +- addl %r15d,%r12d +- +- movl %eax,%r15d +- addl (%rbp),%r12d +- xorl %eax,%r14d +- +- xorl %ebx,%r15d +- rorl $6,%r13d +- movl %ebx,%r11d +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%r11d +- addl %r12d,%edx +- addl %r12d,%r11d +- +- leaq 4(%rbp),%rbp +- addl %r14d,%r11d +- movl 4(%rsi),%r12d +- movl %edx,%r13d +- movl %r11d,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %r8d,%edi +- +- xorl %edx,%r13d +- rorl $9,%r14d +- xorl %r9d,%edi +- +- movl %r12d,4(%rsp) +- xorl %r11d,%r14d +- andl %edx,%edi +- +- rorl $5,%r13d +- addl %r10d,%r12d +- xorl %r9d,%edi +- +- rorl $11,%r14d +- xorl %edx,%r13d +- addl %edi,%r12d +- +- movl %r11d,%edi +- addl (%rbp),%r12d +- xorl %r11d,%r14d +- +- xorl %eax,%edi +- rorl $6,%r13d +- movl %eax,%r10d +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%r10d +- addl %r12d,%ecx +- addl %r12d,%r10d +- +- leaq 4(%rbp),%rbp +- addl %r14d,%r10d +- movl 8(%rsi),%r12d +- movl %ecx,%r13d +- movl %r10d,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %edx,%r15d +- +- xorl %ecx,%r13d +- rorl $9,%r14d +- xorl %r8d,%r15d +- +- movl %r12d,8(%rsp) +- xorl %r10d,%r14d +- andl %ecx,%r15d +- +- rorl $5,%r13d +- addl %r9d,%r12d +- xorl %r8d,%r15d +- +- rorl $11,%r14d +- xorl %ecx,%r13d +- addl %r15d,%r12d +- +- movl %r10d,%r15d +- addl (%rbp),%r12d +- xorl %r10d,%r14d +- +- xorl %r11d,%r15d +- rorl $6,%r13d +- movl %r11d,%r9d +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%r9d +- addl %r12d,%ebx +- addl %r12d,%r9d +- +- leaq 4(%rbp),%rbp +- addl %r14d,%r9d +- movl 12(%rsi),%r12d +- movl %ebx,%r13d +- movl %r9d,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %ecx,%edi +- +- xorl %ebx,%r13d +- rorl $9,%r14d +- xorl %edx,%edi +- +- movl %r12d,12(%rsp) +- xorl %r9d,%r14d +- andl %ebx,%edi +- +- rorl $5,%r13d +- addl %r8d,%r12d +- xorl %edx,%edi +- +- rorl $11,%r14d +- xorl %ebx,%r13d +- addl %edi,%r12d +- +- movl %r9d,%edi +- addl (%rbp),%r12d +- xorl %r9d,%r14d +- +- xorl %r10d,%edi +- rorl $6,%r13d +- movl %r10d,%r8d +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%r8d +- addl %r12d,%eax +- addl %r12d,%r8d +- +- leaq 20(%rbp),%rbp +- addl %r14d,%r8d +- movl 16(%rsi),%r12d +- movl %eax,%r13d +- movl %r8d,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %ebx,%r15d +- +- xorl %eax,%r13d +- rorl $9,%r14d +- xorl %ecx,%r15d +- +- movl %r12d,16(%rsp) +- xorl %r8d,%r14d +- andl %eax,%r15d +- +- rorl $5,%r13d +- addl %edx,%r12d +- xorl %ecx,%r15d +- +- rorl $11,%r14d +- xorl %eax,%r13d +- addl %r15d,%r12d +- +- movl %r8d,%r15d +- addl (%rbp),%r12d +- xorl %r8d,%r14d +- +- xorl %r9d,%r15d +- rorl $6,%r13d +- movl %r9d,%edx +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%edx +- addl %r12d,%r11d +- addl %r12d,%edx +- +- leaq 4(%rbp),%rbp +- addl %r14d,%edx +- movl 20(%rsi),%r12d +- movl %r11d,%r13d +- movl %edx,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %eax,%edi +- +- xorl %r11d,%r13d +- rorl $9,%r14d +- xorl %ebx,%edi +- +- movl %r12d,20(%rsp) +- xorl %edx,%r14d +- andl %r11d,%edi +- +- rorl $5,%r13d +- addl %ecx,%r12d +- xorl %ebx,%edi +- +- rorl $11,%r14d +- xorl %r11d,%r13d +- addl %edi,%r12d +- +- movl %edx,%edi +- addl (%rbp),%r12d +- xorl %edx,%r14d +- +- xorl %r8d,%edi +- rorl $6,%r13d +- movl %r8d,%ecx +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%ecx +- addl %r12d,%r10d +- addl %r12d,%ecx +- +- leaq 4(%rbp),%rbp +- addl %r14d,%ecx +- movl 24(%rsi),%r12d +- movl %r10d,%r13d +- movl %ecx,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %r11d,%r15d +- +- xorl %r10d,%r13d +- rorl $9,%r14d +- xorl %eax,%r15d +- +- movl %r12d,24(%rsp) +- xorl %ecx,%r14d +- andl %r10d,%r15d +- +- rorl $5,%r13d +- addl %ebx,%r12d +- xorl %eax,%r15d +- +- rorl $11,%r14d +- xorl %r10d,%r13d +- addl %r15d,%r12d +- +- movl %ecx,%r15d +- addl (%rbp),%r12d +- xorl %ecx,%r14d +- +- xorl %edx,%r15d +- rorl $6,%r13d +- movl %edx,%ebx +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%ebx +- addl %r12d,%r9d +- addl %r12d,%ebx +- +- leaq 4(%rbp),%rbp +- addl %r14d,%ebx +- movl 28(%rsi),%r12d +- movl %r9d,%r13d +- movl %ebx,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %r10d,%edi +- +- xorl %r9d,%r13d +- rorl $9,%r14d +- xorl %r11d,%edi +- +- movl %r12d,28(%rsp) +- xorl %ebx,%r14d +- andl %r9d,%edi +- +- rorl $5,%r13d +- addl %eax,%r12d +- xorl %r11d,%edi +- +- rorl $11,%r14d +- xorl %r9d,%r13d +- addl %edi,%r12d +- +- movl %ebx,%edi +- addl (%rbp),%r12d +- xorl %ebx,%r14d +- +- xorl %ecx,%edi +- rorl $6,%r13d +- movl %ecx,%eax +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%eax +- addl %r12d,%r8d +- addl %r12d,%eax +- +- leaq 20(%rbp),%rbp +- addl %r14d,%eax +- movl 32(%rsi),%r12d +- movl %r8d,%r13d +- movl %eax,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %r9d,%r15d +- +- xorl %r8d,%r13d +- rorl $9,%r14d +- xorl %r10d,%r15d +- +- movl %r12d,32(%rsp) +- xorl %eax,%r14d +- andl %r8d,%r15d +- +- rorl $5,%r13d +- addl %r11d,%r12d +- xorl %r10d,%r15d +- +- rorl $11,%r14d +- xorl %r8d,%r13d +- addl %r15d,%r12d +- +- movl %eax,%r15d +- addl (%rbp),%r12d +- xorl %eax,%r14d +- +- xorl %ebx,%r15d +- rorl $6,%r13d +- movl %ebx,%r11d +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%r11d +- addl %r12d,%edx +- addl %r12d,%r11d +- +- leaq 4(%rbp),%rbp +- addl %r14d,%r11d +- movl 36(%rsi),%r12d +- movl %edx,%r13d +- movl %r11d,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %r8d,%edi +- +- xorl %edx,%r13d +- rorl $9,%r14d +- xorl %r9d,%edi +- +- movl %r12d,36(%rsp) +- xorl %r11d,%r14d +- andl %edx,%edi +- +- rorl $5,%r13d +- addl %r10d,%r12d +- xorl %r9d,%edi +- +- rorl $11,%r14d +- xorl %edx,%r13d +- addl %edi,%r12d +- +- movl %r11d,%edi +- addl (%rbp),%r12d +- xorl %r11d,%r14d +- +- xorl %eax,%edi +- rorl $6,%r13d +- movl %eax,%r10d +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%r10d +- addl %r12d,%ecx +- addl %r12d,%r10d +- +- leaq 4(%rbp),%rbp +- addl %r14d,%r10d +- movl 40(%rsi),%r12d +- movl %ecx,%r13d +- movl %r10d,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %edx,%r15d +- +- xorl %ecx,%r13d +- rorl $9,%r14d +- xorl %r8d,%r15d +- +- movl %r12d,40(%rsp) +- xorl %r10d,%r14d +- andl %ecx,%r15d +- +- rorl $5,%r13d +- addl %r9d,%r12d +- xorl %r8d,%r15d +- +- rorl $11,%r14d +- xorl %ecx,%r13d +- addl %r15d,%r12d +- +- movl %r10d,%r15d +- addl (%rbp),%r12d +- xorl %r10d,%r14d +- +- xorl %r11d,%r15d +- rorl $6,%r13d +- movl %r11d,%r9d +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%r9d +- addl %r12d,%ebx +- addl %r12d,%r9d +- +- leaq 4(%rbp),%rbp +- addl %r14d,%r9d +- movl 44(%rsi),%r12d +- movl %ebx,%r13d +- movl %r9d,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %ecx,%edi +- +- xorl %ebx,%r13d +- rorl $9,%r14d +- xorl %edx,%edi +- +- movl %r12d,44(%rsp) +- xorl %r9d,%r14d +- andl %ebx,%edi +- +- rorl $5,%r13d +- addl %r8d,%r12d +- xorl %edx,%edi +- +- rorl $11,%r14d +- xorl %ebx,%r13d +- addl %edi,%r12d +- +- movl %r9d,%edi +- addl (%rbp),%r12d +- xorl %r9d,%r14d +- +- xorl %r10d,%edi +- rorl $6,%r13d +- movl %r10d,%r8d +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%r8d +- addl %r12d,%eax +- addl %r12d,%r8d +- +- leaq 20(%rbp),%rbp +- addl %r14d,%r8d +- movl 48(%rsi),%r12d +- movl %eax,%r13d +- movl %r8d,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %ebx,%r15d +- +- xorl %eax,%r13d +- rorl $9,%r14d +- xorl %ecx,%r15d +- +- movl %r12d,48(%rsp) +- xorl %r8d,%r14d +- andl %eax,%r15d +- +- rorl $5,%r13d +- addl %edx,%r12d +- xorl %ecx,%r15d +- +- rorl $11,%r14d +- xorl %eax,%r13d +- addl %r15d,%r12d +- +- movl %r8d,%r15d +- addl (%rbp),%r12d +- xorl %r8d,%r14d +- +- xorl %r9d,%r15d +- rorl $6,%r13d +- movl %r9d,%edx +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%edx +- addl %r12d,%r11d +- addl %r12d,%edx +- +- leaq 4(%rbp),%rbp +- addl %r14d,%edx +- movl 52(%rsi),%r12d +- movl %r11d,%r13d +- movl %edx,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %eax,%edi +- +- xorl %r11d,%r13d +- rorl $9,%r14d +- xorl %ebx,%edi +- +- movl %r12d,52(%rsp) +- xorl %edx,%r14d +- andl %r11d,%edi +- +- rorl $5,%r13d +- addl %ecx,%r12d +- xorl %ebx,%edi +- +- rorl $11,%r14d +- xorl %r11d,%r13d +- addl %edi,%r12d +- +- movl %edx,%edi +- addl (%rbp),%r12d +- xorl %edx,%r14d +- +- xorl %r8d,%edi +- rorl $6,%r13d +- movl %r8d,%ecx +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%ecx +- addl %r12d,%r10d +- addl %r12d,%ecx +- +- leaq 4(%rbp),%rbp +- addl %r14d,%ecx +- movl 56(%rsi),%r12d +- movl %r10d,%r13d +- movl %ecx,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %r11d,%r15d +- +- xorl %r10d,%r13d +- rorl $9,%r14d +- xorl %eax,%r15d +- +- movl %r12d,56(%rsp) +- xorl %ecx,%r14d +- andl %r10d,%r15d +- +- rorl $5,%r13d +- addl %ebx,%r12d +- xorl %eax,%r15d +- +- rorl $11,%r14d +- xorl %r10d,%r13d +- addl %r15d,%r12d +- +- movl %ecx,%r15d +- addl (%rbp),%r12d +- xorl %ecx,%r14d +- +- xorl %edx,%r15d +- rorl $6,%r13d +- movl %edx,%ebx +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%ebx +- addl %r12d,%r9d +- addl %r12d,%ebx +- +- leaq 4(%rbp),%rbp +- addl %r14d,%ebx +- movl 60(%rsi),%r12d +- movl %r9d,%r13d +- movl %ebx,%r14d +- bswapl %r12d +- rorl $14,%r13d +- movl %r10d,%edi +- +- xorl %r9d,%r13d +- rorl $9,%r14d +- xorl %r11d,%edi +- +- movl %r12d,60(%rsp) +- xorl %ebx,%r14d +- andl %r9d,%edi +- +- rorl $5,%r13d +- addl %eax,%r12d +- xorl %r11d,%edi +- +- rorl $11,%r14d +- xorl %r9d,%r13d +- addl %edi,%r12d +- +- movl %ebx,%edi +- addl (%rbp),%r12d +- xorl %ebx,%r14d +- +- xorl %ecx,%edi +- rorl $6,%r13d +- movl %ecx,%eax +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%eax +- addl %r12d,%r8d +- addl %r12d,%eax +- +- leaq 20(%rbp),%rbp +- jmp L$rounds_16_xx +-.p2align 4 +-L$rounds_16_xx: +- movl 4(%rsp),%r13d +- movl 56(%rsp),%r15d +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%eax +- movl %r15d,%r14d +- rorl $2,%r15d +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%r15d +- shrl $10,%r14d +- +- rorl $17,%r15d +- xorl %r13d,%r12d +- xorl %r14d,%r15d +- addl 36(%rsp),%r12d +- +- addl 0(%rsp),%r12d +- movl %r8d,%r13d +- addl %r15d,%r12d +- movl %eax,%r14d +- rorl $14,%r13d +- movl %r9d,%r15d +- +- xorl %r8d,%r13d +- rorl $9,%r14d +- xorl %r10d,%r15d +- +- movl %r12d,0(%rsp) +- xorl %eax,%r14d +- andl %r8d,%r15d +- +- rorl $5,%r13d +- addl %r11d,%r12d +- xorl %r10d,%r15d +- +- rorl $11,%r14d +- xorl %r8d,%r13d +- addl %r15d,%r12d +- +- movl %eax,%r15d +- addl (%rbp),%r12d +- xorl %eax,%r14d +- +- xorl %ebx,%r15d +- rorl $6,%r13d +- movl %ebx,%r11d +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%r11d +- addl %r12d,%edx +- addl %r12d,%r11d +- +- leaq 4(%rbp),%rbp +- movl 8(%rsp),%r13d +- movl 60(%rsp),%edi +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%r11d +- movl %edi,%r14d +- rorl $2,%edi +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%edi +- shrl $10,%r14d +- +- rorl $17,%edi +- xorl %r13d,%r12d +- xorl %r14d,%edi +- addl 40(%rsp),%r12d +- +- addl 4(%rsp),%r12d +- movl %edx,%r13d +- addl %edi,%r12d +- movl %r11d,%r14d +- rorl $14,%r13d +- movl %r8d,%edi +- +- xorl %edx,%r13d +- rorl $9,%r14d +- xorl %r9d,%edi +- +- movl %r12d,4(%rsp) +- xorl %r11d,%r14d +- andl %edx,%edi +- +- rorl $5,%r13d +- addl %r10d,%r12d +- xorl %r9d,%edi +- +- rorl $11,%r14d +- xorl %edx,%r13d +- addl %edi,%r12d +- +- movl %r11d,%edi +- addl (%rbp),%r12d +- xorl %r11d,%r14d +- +- xorl %eax,%edi +- rorl $6,%r13d +- movl %eax,%r10d +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%r10d +- addl %r12d,%ecx +- addl %r12d,%r10d +- +- leaq 4(%rbp),%rbp +- movl 12(%rsp),%r13d +- movl 0(%rsp),%r15d +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%r10d +- movl %r15d,%r14d +- rorl $2,%r15d +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%r15d +- shrl $10,%r14d +- +- rorl $17,%r15d +- xorl %r13d,%r12d +- xorl %r14d,%r15d +- addl 44(%rsp),%r12d +- +- addl 8(%rsp),%r12d +- movl %ecx,%r13d +- addl %r15d,%r12d +- movl %r10d,%r14d +- rorl $14,%r13d +- movl %edx,%r15d +- +- xorl %ecx,%r13d +- rorl $9,%r14d +- xorl %r8d,%r15d +- +- movl %r12d,8(%rsp) +- xorl %r10d,%r14d +- andl %ecx,%r15d +- +- rorl $5,%r13d +- addl %r9d,%r12d +- xorl %r8d,%r15d +- +- rorl $11,%r14d +- xorl %ecx,%r13d +- addl %r15d,%r12d +- +- movl %r10d,%r15d +- addl (%rbp),%r12d +- xorl %r10d,%r14d +- +- xorl %r11d,%r15d +- rorl $6,%r13d +- movl %r11d,%r9d +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%r9d +- addl %r12d,%ebx +- addl %r12d,%r9d +- +- leaq 4(%rbp),%rbp +- movl 16(%rsp),%r13d +- movl 4(%rsp),%edi +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%r9d +- movl %edi,%r14d +- rorl $2,%edi +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%edi +- shrl $10,%r14d +- +- rorl $17,%edi +- xorl %r13d,%r12d +- xorl %r14d,%edi +- addl 48(%rsp),%r12d +- +- addl 12(%rsp),%r12d +- movl %ebx,%r13d +- addl %edi,%r12d +- movl %r9d,%r14d +- rorl $14,%r13d +- movl %ecx,%edi +- +- xorl %ebx,%r13d +- rorl $9,%r14d +- xorl %edx,%edi +- +- movl %r12d,12(%rsp) +- xorl %r9d,%r14d +- andl %ebx,%edi +- +- rorl $5,%r13d +- addl %r8d,%r12d +- xorl %edx,%edi +- +- rorl $11,%r14d +- xorl %ebx,%r13d +- addl %edi,%r12d +- +- movl %r9d,%edi +- addl (%rbp),%r12d +- xorl %r9d,%r14d +- +- xorl %r10d,%edi +- rorl $6,%r13d +- movl %r10d,%r8d +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%r8d +- addl %r12d,%eax +- addl %r12d,%r8d +- +- leaq 20(%rbp),%rbp +- movl 20(%rsp),%r13d +- movl 8(%rsp),%r15d +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%r8d +- movl %r15d,%r14d +- rorl $2,%r15d +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%r15d +- shrl $10,%r14d +- +- rorl $17,%r15d +- xorl %r13d,%r12d +- xorl %r14d,%r15d +- addl 52(%rsp),%r12d +- +- addl 16(%rsp),%r12d +- movl %eax,%r13d +- addl %r15d,%r12d +- movl %r8d,%r14d +- rorl $14,%r13d +- movl %ebx,%r15d +- +- xorl %eax,%r13d +- rorl $9,%r14d +- xorl %ecx,%r15d +- +- movl %r12d,16(%rsp) +- xorl %r8d,%r14d +- andl %eax,%r15d +- +- rorl $5,%r13d +- addl %edx,%r12d +- xorl %ecx,%r15d +- +- rorl $11,%r14d +- xorl %eax,%r13d +- addl %r15d,%r12d +- +- movl %r8d,%r15d +- addl (%rbp),%r12d +- xorl %r8d,%r14d +- +- xorl %r9d,%r15d +- rorl $6,%r13d +- movl %r9d,%edx +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%edx +- addl %r12d,%r11d +- addl %r12d,%edx +- +- leaq 4(%rbp),%rbp +- movl 24(%rsp),%r13d +- movl 12(%rsp),%edi +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%edx +- movl %edi,%r14d +- rorl $2,%edi +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%edi +- shrl $10,%r14d +- +- rorl $17,%edi +- xorl %r13d,%r12d +- xorl %r14d,%edi +- addl 56(%rsp),%r12d +- +- addl 20(%rsp),%r12d +- movl %r11d,%r13d +- addl %edi,%r12d +- movl %edx,%r14d +- rorl $14,%r13d +- movl %eax,%edi +- +- xorl %r11d,%r13d +- rorl $9,%r14d +- xorl %ebx,%edi +- +- movl %r12d,20(%rsp) +- xorl %edx,%r14d +- andl %r11d,%edi +- +- rorl $5,%r13d +- addl %ecx,%r12d +- xorl %ebx,%edi +- +- rorl $11,%r14d +- xorl %r11d,%r13d +- addl %edi,%r12d +- +- movl %edx,%edi +- addl (%rbp),%r12d +- xorl %edx,%r14d +- +- xorl %r8d,%edi +- rorl $6,%r13d +- movl %r8d,%ecx +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%ecx +- addl %r12d,%r10d +- addl %r12d,%ecx +- +- leaq 4(%rbp),%rbp +- movl 28(%rsp),%r13d +- movl 16(%rsp),%r15d +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%ecx +- movl %r15d,%r14d +- rorl $2,%r15d +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%r15d +- shrl $10,%r14d +- +- rorl $17,%r15d +- xorl %r13d,%r12d +- xorl %r14d,%r15d +- addl 60(%rsp),%r12d +- +- addl 24(%rsp),%r12d +- movl %r10d,%r13d +- addl %r15d,%r12d +- movl %ecx,%r14d +- rorl $14,%r13d +- movl %r11d,%r15d +- +- xorl %r10d,%r13d +- rorl $9,%r14d +- xorl %eax,%r15d +- +- movl %r12d,24(%rsp) +- xorl %ecx,%r14d +- andl %r10d,%r15d +- +- rorl $5,%r13d +- addl %ebx,%r12d +- xorl %eax,%r15d +- +- rorl $11,%r14d +- xorl %r10d,%r13d +- addl %r15d,%r12d +- +- movl %ecx,%r15d +- addl (%rbp),%r12d +- xorl %ecx,%r14d +- +- xorl %edx,%r15d +- rorl $6,%r13d +- movl %edx,%ebx +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%ebx +- addl %r12d,%r9d +- addl %r12d,%ebx +- +- leaq 4(%rbp),%rbp +- movl 32(%rsp),%r13d +- movl 20(%rsp),%edi +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%ebx +- movl %edi,%r14d +- rorl $2,%edi +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%edi +- shrl $10,%r14d +- +- rorl $17,%edi +- xorl %r13d,%r12d +- xorl %r14d,%edi +- addl 0(%rsp),%r12d +- +- addl 28(%rsp),%r12d +- movl %r9d,%r13d +- addl %edi,%r12d +- movl %ebx,%r14d +- rorl $14,%r13d +- movl %r10d,%edi +- +- xorl %r9d,%r13d +- rorl $9,%r14d +- xorl %r11d,%edi +- +- movl %r12d,28(%rsp) +- xorl %ebx,%r14d +- andl %r9d,%edi +- +- rorl $5,%r13d +- addl %eax,%r12d +- xorl %r11d,%edi +- +- rorl $11,%r14d +- xorl %r9d,%r13d +- addl %edi,%r12d +- +- movl %ebx,%edi +- addl (%rbp),%r12d +- xorl %ebx,%r14d +- +- xorl %ecx,%edi +- rorl $6,%r13d +- movl %ecx,%eax +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%eax +- addl %r12d,%r8d +- addl %r12d,%eax +- +- leaq 20(%rbp),%rbp +- movl 36(%rsp),%r13d +- movl 24(%rsp),%r15d +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%eax +- movl %r15d,%r14d +- rorl $2,%r15d +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%r15d +- shrl $10,%r14d +- +- rorl $17,%r15d +- xorl %r13d,%r12d +- xorl %r14d,%r15d +- addl 4(%rsp),%r12d +- +- addl 32(%rsp),%r12d +- movl %r8d,%r13d +- addl %r15d,%r12d +- movl %eax,%r14d +- rorl $14,%r13d +- movl %r9d,%r15d +- +- xorl %r8d,%r13d +- rorl $9,%r14d +- xorl %r10d,%r15d +- +- movl %r12d,32(%rsp) +- xorl %eax,%r14d +- andl %r8d,%r15d +- +- rorl $5,%r13d +- addl %r11d,%r12d +- xorl %r10d,%r15d +- +- rorl $11,%r14d +- xorl %r8d,%r13d +- addl %r15d,%r12d +- +- movl %eax,%r15d +- addl (%rbp),%r12d +- xorl %eax,%r14d +- +- xorl %ebx,%r15d +- rorl $6,%r13d +- movl %ebx,%r11d +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%r11d +- addl %r12d,%edx +- addl %r12d,%r11d +- +- leaq 4(%rbp),%rbp +- movl 40(%rsp),%r13d +- movl 28(%rsp),%edi +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%r11d +- movl %edi,%r14d +- rorl $2,%edi +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%edi +- shrl $10,%r14d +- +- rorl $17,%edi +- xorl %r13d,%r12d +- xorl %r14d,%edi +- addl 8(%rsp),%r12d +- +- addl 36(%rsp),%r12d +- movl %edx,%r13d +- addl %edi,%r12d +- movl %r11d,%r14d +- rorl $14,%r13d +- movl %r8d,%edi +- +- xorl %edx,%r13d +- rorl $9,%r14d +- xorl %r9d,%edi +- +- movl %r12d,36(%rsp) +- xorl %r11d,%r14d +- andl %edx,%edi +- +- rorl $5,%r13d +- addl %r10d,%r12d +- xorl %r9d,%edi +- +- rorl $11,%r14d +- xorl %edx,%r13d +- addl %edi,%r12d +- +- movl %r11d,%edi +- addl (%rbp),%r12d +- xorl %r11d,%r14d +- +- xorl %eax,%edi +- rorl $6,%r13d +- movl %eax,%r10d +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%r10d +- addl %r12d,%ecx +- addl %r12d,%r10d +- +- leaq 4(%rbp),%rbp +- movl 44(%rsp),%r13d +- movl 32(%rsp),%r15d +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%r10d +- movl %r15d,%r14d +- rorl $2,%r15d +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%r15d +- shrl $10,%r14d +- +- rorl $17,%r15d +- xorl %r13d,%r12d +- xorl %r14d,%r15d +- addl 12(%rsp),%r12d +- +- addl 40(%rsp),%r12d +- movl %ecx,%r13d +- addl %r15d,%r12d +- movl %r10d,%r14d +- rorl $14,%r13d +- movl %edx,%r15d +- +- xorl %ecx,%r13d +- rorl $9,%r14d +- xorl %r8d,%r15d +- +- movl %r12d,40(%rsp) +- xorl %r10d,%r14d +- andl %ecx,%r15d +- +- rorl $5,%r13d +- addl %r9d,%r12d +- xorl %r8d,%r15d +- +- rorl $11,%r14d +- xorl %ecx,%r13d +- addl %r15d,%r12d +- +- movl %r10d,%r15d +- addl (%rbp),%r12d +- xorl %r10d,%r14d +- +- xorl %r11d,%r15d +- rorl $6,%r13d +- movl %r11d,%r9d +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%r9d +- addl %r12d,%ebx +- addl %r12d,%r9d +- +- leaq 4(%rbp),%rbp +- movl 48(%rsp),%r13d +- movl 36(%rsp),%edi +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%r9d +- movl %edi,%r14d +- rorl $2,%edi +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%edi +- shrl $10,%r14d +- +- rorl $17,%edi +- xorl %r13d,%r12d +- xorl %r14d,%edi +- addl 16(%rsp),%r12d +- +- addl 44(%rsp),%r12d +- movl %ebx,%r13d +- addl %edi,%r12d +- movl %r9d,%r14d +- rorl $14,%r13d +- movl %ecx,%edi +- +- xorl %ebx,%r13d +- rorl $9,%r14d +- xorl %edx,%edi +- +- movl %r12d,44(%rsp) +- xorl %r9d,%r14d +- andl %ebx,%edi +- +- rorl $5,%r13d +- addl %r8d,%r12d +- xorl %edx,%edi +- +- rorl $11,%r14d +- xorl %ebx,%r13d +- addl %edi,%r12d +- +- movl %r9d,%edi +- addl (%rbp),%r12d +- xorl %r9d,%r14d +- +- xorl %r10d,%edi +- rorl $6,%r13d +- movl %r10d,%r8d +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%r8d +- addl %r12d,%eax +- addl %r12d,%r8d +- +- leaq 20(%rbp),%rbp +- movl 52(%rsp),%r13d +- movl 40(%rsp),%r15d +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%r8d +- movl %r15d,%r14d +- rorl $2,%r15d +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%r15d +- shrl $10,%r14d +- +- rorl $17,%r15d +- xorl %r13d,%r12d +- xorl %r14d,%r15d +- addl 20(%rsp),%r12d +- +- addl 48(%rsp),%r12d +- movl %eax,%r13d +- addl %r15d,%r12d +- movl %r8d,%r14d +- rorl $14,%r13d +- movl %ebx,%r15d +- +- xorl %eax,%r13d +- rorl $9,%r14d +- xorl %ecx,%r15d +- +- movl %r12d,48(%rsp) +- xorl %r8d,%r14d +- andl %eax,%r15d +- +- rorl $5,%r13d +- addl %edx,%r12d +- xorl %ecx,%r15d +- +- rorl $11,%r14d +- xorl %eax,%r13d +- addl %r15d,%r12d +- +- movl %r8d,%r15d +- addl (%rbp),%r12d +- xorl %r8d,%r14d +- +- xorl %r9d,%r15d +- rorl $6,%r13d +- movl %r9d,%edx +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%edx +- addl %r12d,%r11d +- addl %r12d,%edx +- +- leaq 4(%rbp),%rbp +- movl 56(%rsp),%r13d +- movl 44(%rsp),%edi +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%edx +- movl %edi,%r14d +- rorl $2,%edi +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%edi +- shrl $10,%r14d +- +- rorl $17,%edi +- xorl %r13d,%r12d +- xorl %r14d,%edi +- addl 24(%rsp),%r12d +- +- addl 52(%rsp),%r12d +- movl %r11d,%r13d +- addl %edi,%r12d +- movl %edx,%r14d +- rorl $14,%r13d +- movl %eax,%edi +- +- xorl %r11d,%r13d +- rorl $9,%r14d +- xorl %ebx,%edi +- +- movl %r12d,52(%rsp) +- xorl %edx,%r14d +- andl %r11d,%edi +- +- rorl $5,%r13d +- addl %ecx,%r12d +- xorl %ebx,%edi +- +- rorl $11,%r14d +- xorl %r11d,%r13d +- addl %edi,%r12d +- +- movl %edx,%edi +- addl (%rbp),%r12d +- xorl %edx,%r14d +- +- xorl %r8d,%edi +- rorl $6,%r13d +- movl %r8d,%ecx +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%ecx +- addl %r12d,%r10d +- addl %r12d,%ecx +- +- leaq 4(%rbp),%rbp +- movl 60(%rsp),%r13d +- movl 48(%rsp),%r15d +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%ecx +- movl %r15d,%r14d +- rorl $2,%r15d +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%r15d +- shrl $10,%r14d +- +- rorl $17,%r15d +- xorl %r13d,%r12d +- xorl %r14d,%r15d +- addl 28(%rsp),%r12d +- +- addl 56(%rsp),%r12d +- movl %r10d,%r13d +- addl %r15d,%r12d +- movl %ecx,%r14d +- rorl $14,%r13d +- movl %r11d,%r15d +- +- xorl %r10d,%r13d +- rorl $9,%r14d +- xorl %eax,%r15d +- +- movl %r12d,56(%rsp) +- xorl %ecx,%r14d +- andl %r10d,%r15d +- +- rorl $5,%r13d +- addl %ebx,%r12d +- xorl %eax,%r15d +- +- rorl $11,%r14d +- xorl %r10d,%r13d +- addl %r15d,%r12d +- +- movl %ecx,%r15d +- addl (%rbp),%r12d +- xorl %ecx,%r14d +- +- xorl %edx,%r15d +- rorl $6,%r13d +- movl %edx,%ebx +- +- andl %r15d,%edi +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %edi,%ebx +- addl %r12d,%r9d +- addl %r12d,%ebx +- +- leaq 4(%rbp),%rbp +- movl 0(%rsp),%r13d +- movl 52(%rsp),%edi +- +- movl %r13d,%r12d +- rorl $11,%r13d +- addl %r14d,%ebx +- movl %edi,%r14d +- rorl $2,%edi +- +- xorl %r12d,%r13d +- shrl $3,%r12d +- rorl $7,%r13d +- xorl %r14d,%edi +- shrl $10,%r14d +- +- rorl $17,%edi +- xorl %r13d,%r12d +- xorl %r14d,%edi +- addl 32(%rsp),%r12d +- +- addl 60(%rsp),%r12d +- movl %r9d,%r13d +- addl %edi,%r12d +- movl %ebx,%r14d +- rorl $14,%r13d +- movl %r10d,%edi +- +- xorl %r9d,%r13d +- rorl $9,%r14d +- xorl %r11d,%edi +- +- movl %r12d,60(%rsp) +- xorl %ebx,%r14d +- andl %r9d,%edi +- +- rorl $5,%r13d +- addl %eax,%r12d +- xorl %r11d,%edi +- +- rorl $11,%r14d +- xorl %r9d,%r13d +- addl %edi,%r12d +- +- movl %ebx,%edi +- addl (%rbp),%r12d +- xorl %ebx,%r14d +- +- xorl %ecx,%edi +- rorl $6,%r13d +- movl %ecx,%eax +- +- andl %edi,%r15d +- rorl $2,%r14d +- addl %r13d,%r12d +- +- xorl %r15d,%eax +- addl %r12d,%r8d +- addl %r12d,%eax +- +- leaq 20(%rbp),%rbp +- cmpb $0,3(%rbp) +- jnz L$rounds_16_xx +- +- movq 64+0(%rsp),%rdi +- addl %r14d,%eax +- leaq 64(%rsi),%rsi +- +- addl 0(%rdi),%eax +- addl 4(%rdi),%ebx +- addl 8(%rdi),%ecx +- addl 12(%rdi),%edx +- addl 16(%rdi),%r8d +- addl 20(%rdi),%r9d +- addl 24(%rdi),%r10d +- addl 28(%rdi),%r11d +- +- cmpq 64+16(%rsp),%rsi +- +- movl %eax,0(%rdi) +- movl %ebx,4(%rdi) +- movl %ecx,8(%rdi) +- movl %edx,12(%rdi) +- movl %r8d,16(%rdi) +- movl %r9d,20(%rdi) +- movl %r10d,24(%rdi) +- movl %r11d,28(%rdi) +- jb L$loop +- +- movq 88(%rsp),%rsi +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$epilogue: +- .byte 0xf3,0xc3 +- +- +-.p2align 6 +- +-K256: +-.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +-.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +-.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +-.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +-.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +-.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +-.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +-.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +-.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +-.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +-.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +-.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +-.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +-.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +-.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +-.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +-.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +-.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +-.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +-.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +-.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +-.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +-.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +-.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +-.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +-.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +-.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +-.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +-.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +-.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +-.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +-.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +- +-.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +-.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +-.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +-.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +-.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +-.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +-.byte 83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +- +-.p2align 6 +-sha256_block_data_order_ssse3: +- +-L$ssse3_shortcut: +- movq %rsp,%rax +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- shlq $4,%rdx +- subq $96,%rsp +- leaq (%rsi,%rdx,4),%rdx +- andq $-64,%rsp +- movq %rdi,64+0(%rsp) +- movq %rsi,64+8(%rsp) +- movq %rdx,64+16(%rsp) +- movq %rax,88(%rsp) +- +-L$prologue_ssse3: +- +- movl 0(%rdi),%eax +- movl 4(%rdi),%ebx +- movl 8(%rdi),%ecx +- movl 12(%rdi),%edx +- movl 16(%rdi),%r8d +- movl 20(%rdi),%r9d +- movl 24(%rdi),%r10d +- movl 28(%rdi),%r11d +- +- +- jmp L$loop_ssse3 +-.p2align 4 +-L$loop_ssse3: +- movdqa K256+512(%rip),%xmm7 +- movdqu 0(%rsi),%xmm0 +- movdqu 16(%rsi),%xmm1 +- movdqu 32(%rsi),%xmm2 +-.byte 102,15,56,0,199 +- movdqu 48(%rsi),%xmm3 +- leaq K256(%rip),%rbp +-.byte 102,15,56,0,207 +- movdqa 0(%rbp),%xmm4 +- movdqa 32(%rbp),%xmm5 +-.byte 102,15,56,0,215 +- paddd %xmm0,%xmm4 +- movdqa 64(%rbp),%xmm6 +-.byte 102,15,56,0,223 +- movdqa 96(%rbp),%xmm7 +- paddd %xmm1,%xmm5 +- paddd %xmm2,%xmm6 +- paddd %xmm3,%xmm7 +- movdqa %xmm4,0(%rsp) +- movl %eax,%r14d +- movdqa %xmm5,16(%rsp) +- movl %ebx,%edi +- movdqa %xmm6,32(%rsp) +- xorl %ecx,%edi +- movdqa %xmm7,48(%rsp) +- movl %r8d,%r13d +- jmp L$ssse3_00_47 +- +-.p2align 4 +-L$ssse3_00_47: +- subq $-128,%rbp +- rorl $14,%r13d +- movdqa %xmm1,%xmm4 +- movl %r14d,%eax +- movl %r9d,%r12d +- movdqa %xmm3,%xmm7 +- rorl $9,%r14d +- xorl %r8d,%r13d +- xorl %r10d,%r12d +- rorl $5,%r13d +- xorl %eax,%r14d +-.byte 102,15,58,15,224,4 +- andl %r8d,%r12d +- xorl %r8d,%r13d +-.byte 102,15,58,15,250,4 +- addl 0(%rsp),%r11d +- movl %eax,%r15d +- xorl %r10d,%r12d +- rorl $11,%r14d +- movdqa %xmm4,%xmm5 +- xorl %ebx,%r15d +- addl %r12d,%r11d +- movdqa %xmm4,%xmm6 +- rorl $6,%r13d +- andl %r15d,%edi +- psrld $3,%xmm4 +- xorl %eax,%r14d +- addl %r13d,%r11d +- xorl %ebx,%edi +- paddd %xmm7,%xmm0 +- rorl $2,%r14d +- addl %r11d,%edx +- psrld $7,%xmm6 +- addl %edi,%r11d +- movl %edx,%r13d +- pshufd $250,%xmm3,%xmm7 +- addl %r11d,%r14d +- rorl $14,%r13d +- pslld $14,%xmm5 +- movl %r14d,%r11d +- movl %r8d,%r12d +- pxor %xmm6,%xmm4 +- rorl $9,%r14d +- xorl %edx,%r13d +- xorl %r9d,%r12d +- rorl $5,%r13d +- psrld $11,%xmm6 +- xorl %r11d,%r14d +- pxor %xmm5,%xmm4 +- andl %edx,%r12d +- xorl %edx,%r13d +- pslld $11,%xmm5 +- addl 4(%rsp),%r10d +- movl %r11d,%edi +- pxor %xmm6,%xmm4 +- xorl %r9d,%r12d +- rorl $11,%r14d +- movdqa %xmm7,%xmm6 +- xorl %eax,%edi +- addl %r12d,%r10d +- pxor %xmm5,%xmm4 +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %r11d,%r14d +- psrld $10,%xmm7 +- addl %r13d,%r10d +- xorl %eax,%r15d +- paddd %xmm4,%xmm0 +- rorl $2,%r14d +- addl %r10d,%ecx +- psrlq $17,%xmm6 +- addl %r15d,%r10d +- movl %ecx,%r13d +- addl %r10d,%r14d +- pxor %xmm6,%xmm7 +- rorl $14,%r13d +- movl %r14d,%r10d +- movl %edx,%r12d +- rorl $9,%r14d +- psrlq $2,%xmm6 +- xorl %ecx,%r13d +- xorl %r8d,%r12d +- pxor %xmm6,%xmm7 +- rorl $5,%r13d +- xorl %r10d,%r14d +- andl %ecx,%r12d +- pshufd $128,%xmm7,%xmm7 +- xorl %ecx,%r13d +- addl 8(%rsp),%r9d +- movl %r10d,%r15d +- psrldq $8,%xmm7 +- xorl %r8d,%r12d +- rorl $11,%r14d +- xorl %r11d,%r15d +- addl %r12d,%r9d +- rorl $6,%r13d +- paddd %xmm7,%xmm0 +- andl %r15d,%edi +- xorl %r10d,%r14d +- addl %r13d,%r9d +- pshufd $80,%xmm0,%xmm7 +- xorl %r11d,%edi +- rorl $2,%r14d +- addl %r9d,%ebx +- movdqa %xmm7,%xmm6 +- addl %edi,%r9d +- movl %ebx,%r13d +- psrld $10,%xmm7 +- addl %r9d,%r14d +- rorl $14,%r13d +- psrlq $17,%xmm6 +- movl %r14d,%r9d +- movl %ecx,%r12d +- pxor %xmm6,%xmm7 +- rorl $9,%r14d +- xorl %ebx,%r13d +- xorl %edx,%r12d +- rorl $5,%r13d +- xorl %r9d,%r14d +- psrlq $2,%xmm6 +- andl %ebx,%r12d +- xorl %ebx,%r13d +- addl 12(%rsp),%r8d +- pxor %xmm6,%xmm7 +- movl %r9d,%edi +- xorl %edx,%r12d +- rorl $11,%r14d +- pshufd $8,%xmm7,%xmm7 +- xorl %r10d,%edi +- addl %r12d,%r8d +- movdqa 0(%rbp),%xmm6 +- rorl $6,%r13d +- andl %edi,%r15d +- pslldq $8,%xmm7 +- xorl %r9d,%r14d +- addl %r13d,%r8d +- xorl %r10d,%r15d +- paddd %xmm7,%xmm0 +- rorl $2,%r14d +- addl %r8d,%eax +- addl %r15d,%r8d +- paddd %xmm0,%xmm6 +- movl %eax,%r13d +- addl %r8d,%r14d +- movdqa %xmm6,0(%rsp) +- rorl $14,%r13d +- movdqa %xmm2,%xmm4 +- movl %r14d,%r8d +- movl %ebx,%r12d +- movdqa %xmm0,%xmm7 +- rorl $9,%r14d +- xorl %eax,%r13d +- xorl %ecx,%r12d +- rorl $5,%r13d +- xorl %r8d,%r14d +-.byte 102,15,58,15,225,4 +- andl %eax,%r12d +- xorl %eax,%r13d +-.byte 102,15,58,15,251,4 +- addl 16(%rsp),%edx +- movl %r8d,%r15d +- xorl %ecx,%r12d +- rorl $11,%r14d +- movdqa %xmm4,%xmm5 +- xorl %r9d,%r15d +- addl %r12d,%edx +- movdqa %xmm4,%xmm6 +- rorl $6,%r13d +- andl %r15d,%edi +- psrld $3,%xmm4 +- xorl %r8d,%r14d +- addl %r13d,%edx +- xorl %r9d,%edi +- paddd %xmm7,%xmm1 +- rorl $2,%r14d +- addl %edx,%r11d +- psrld $7,%xmm6 +- addl %edi,%edx +- movl %r11d,%r13d +- pshufd $250,%xmm0,%xmm7 +- addl %edx,%r14d +- rorl $14,%r13d +- pslld $14,%xmm5 +- movl %r14d,%edx +- movl %eax,%r12d +- pxor %xmm6,%xmm4 +- rorl $9,%r14d +- xorl %r11d,%r13d +- xorl %ebx,%r12d +- rorl $5,%r13d +- psrld $11,%xmm6 +- xorl %edx,%r14d +- pxor %xmm5,%xmm4 +- andl %r11d,%r12d +- xorl %r11d,%r13d +- pslld $11,%xmm5 +- addl 20(%rsp),%ecx +- movl %edx,%edi +- pxor %xmm6,%xmm4 +- xorl %ebx,%r12d +- rorl $11,%r14d +- movdqa %xmm7,%xmm6 +- xorl %r8d,%edi +- addl %r12d,%ecx +- pxor %xmm5,%xmm4 +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %edx,%r14d +- psrld $10,%xmm7 +- addl %r13d,%ecx +- xorl %r8d,%r15d +- paddd %xmm4,%xmm1 +- rorl $2,%r14d +- addl %ecx,%r10d +- psrlq $17,%xmm6 +- addl %r15d,%ecx +- movl %r10d,%r13d +- addl %ecx,%r14d +- pxor %xmm6,%xmm7 +- rorl $14,%r13d +- movl %r14d,%ecx +- movl %r11d,%r12d +- rorl $9,%r14d +- psrlq $2,%xmm6 +- xorl %r10d,%r13d +- xorl %eax,%r12d +- pxor %xmm6,%xmm7 +- rorl $5,%r13d +- xorl %ecx,%r14d +- andl %r10d,%r12d +- pshufd $128,%xmm7,%xmm7 +- xorl %r10d,%r13d +- addl 24(%rsp),%ebx +- movl %ecx,%r15d +- psrldq $8,%xmm7 +- xorl %eax,%r12d +- rorl $11,%r14d +- xorl %edx,%r15d +- addl %r12d,%ebx +- rorl $6,%r13d +- paddd %xmm7,%xmm1 +- andl %r15d,%edi +- xorl %ecx,%r14d +- addl %r13d,%ebx +- pshufd $80,%xmm1,%xmm7 +- xorl %edx,%edi +- rorl $2,%r14d +- addl %ebx,%r9d +- movdqa %xmm7,%xmm6 +- addl %edi,%ebx +- movl %r9d,%r13d +- psrld $10,%xmm7 +- addl %ebx,%r14d +- rorl $14,%r13d +- psrlq $17,%xmm6 +- movl %r14d,%ebx +- movl %r10d,%r12d +- pxor %xmm6,%xmm7 +- rorl $9,%r14d +- xorl %r9d,%r13d +- xorl %r11d,%r12d +- rorl $5,%r13d +- xorl %ebx,%r14d +- psrlq $2,%xmm6 +- andl %r9d,%r12d +- xorl %r9d,%r13d +- addl 28(%rsp),%eax +- pxor %xmm6,%xmm7 +- movl %ebx,%edi +- xorl %r11d,%r12d +- rorl $11,%r14d +- pshufd $8,%xmm7,%xmm7 +- xorl %ecx,%edi +- addl %r12d,%eax +- movdqa 32(%rbp),%xmm6 +- rorl $6,%r13d +- andl %edi,%r15d +- pslldq $8,%xmm7 +- xorl %ebx,%r14d +- addl %r13d,%eax +- xorl %ecx,%r15d +- paddd %xmm7,%xmm1 +- rorl $2,%r14d +- addl %eax,%r8d +- addl %r15d,%eax +- paddd %xmm1,%xmm6 +- movl %r8d,%r13d +- addl %eax,%r14d +- movdqa %xmm6,16(%rsp) +- rorl $14,%r13d +- movdqa %xmm3,%xmm4 +- movl %r14d,%eax +- movl %r9d,%r12d +- movdqa %xmm1,%xmm7 +- rorl $9,%r14d +- xorl %r8d,%r13d +- xorl %r10d,%r12d +- rorl $5,%r13d +- xorl %eax,%r14d +-.byte 102,15,58,15,226,4 +- andl %r8d,%r12d +- xorl %r8d,%r13d +-.byte 102,15,58,15,248,4 +- addl 32(%rsp),%r11d +- movl %eax,%r15d +- xorl %r10d,%r12d +- rorl $11,%r14d +- movdqa %xmm4,%xmm5 +- xorl %ebx,%r15d +- addl %r12d,%r11d +- movdqa %xmm4,%xmm6 +- rorl $6,%r13d +- andl %r15d,%edi +- psrld $3,%xmm4 +- xorl %eax,%r14d +- addl %r13d,%r11d +- xorl %ebx,%edi +- paddd %xmm7,%xmm2 +- rorl $2,%r14d +- addl %r11d,%edx +- psrld $7,%xmm6 +- addl %edi,%r11d +- movl %edx,%r13d +- pshufd $250,%xmm1,%xmm7 +- addl %r11d,%r14d +- rorl $14,%r13d +- pslld $14,%xmm5 +- movl %r14d,%r11d +- movl %r8d,%r12d +- pxor %xmm6,%xmm4 +- rorl $9,%r14d +- xorl %edx,%r13d +- xorl %r9d,%r12d +- rorl $5,%r13d +- psrld $11,%xmm6 +- xorl %r11d,%r14d +- pxor %xmm5,%xmm4 +- andl %edx,%r12d +- xorl %edx,%r13d +- pslld $11,%xmm5 +- addl 36(%rsp),%r10d +- movl %r11d,%edi +- pxor %xmm6,%xmm4 +- xorl %r9d,%r12d +- rorl $11,%r14d +- movdqa %xmm7,%xmm6 +- xorl %eax,%edi +- addl %r12d,%r10d +- pxor %xmm5,%xmm4 +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %r11d,%r14d +- psrld $10,%xmm7 +- addl %r13d,%r10d +- xorl %eax,%r15d +- paddd %xmm4,%xmm2 +- rorl $2,%r14d +- addl %r10d,%ecx +- psrlq $17,%xmm6 +- addl %r15d,%r10d +- movl %ecx,%r13d +- addl %r10d,%r14d +- pxor %xmm6,%xmm7 +- rorl $14,%r13d +- movl %r14d,%r10d +- movl %edx,%r12d +- rorl $9,%r14d +- psrlq $2,%xmm6 +- xorl %ecx,%r13d +- xorl %r8d,%r12d +- pxor %xmm6,%xmm7 +- rorl $5,%r13d +- xorl %r10d,%r14d +- andl %ecx,%r12d +- pshufd $128,%xmm7,%xmm7 +- xorl %ecx,%r13d +- addl 40(%rsp),%r9d +- movl %r10d,%r15d +- psrldq $8,%xmm7 +- xorl %r8d,%r12d +- rorl $11,%r14d +- xorl %r11d,%r15d +- addl %r12d,%r9d +- rorl $6,%r13d +- paddd %xmm7,%xmm2 +- andl %r15d,%edi +- xorl %r10d,%r14d +- addl %r13d,%r9d +- pshufd $80,%xmm2,%xmm7 +- xorl %r11d,%edi +- rorl $2,%r14d +- addl %r9d,%ebx +- movdqa %xmm7,%xmm6 +- addl %edi,%r9d +- movl %ebx,%r13d +- psrld $10,%xmm7 +- addl %r9d,%r14d +- rorl $14,%r13d +- psrlq $17,%xmm6 +- movl %r14d,%r9d +- movl %ecx,%r12d +- pxor %xmm6,%xmm7 +- rorl $9,%r14d +- xorl %ebx,%r13d +- xorl %edx,%r12d +- rorl $5,%r13d +- xorl %r9d,%r14d +- psrlq $2,%xmm6 +- andl %ebx,%r12d +- xorl %ebx,%r13d +- addl 44(%rsp),%r8d +- pxor %xmm6,%xmm7 +- movl %r9d,%edi +- xorl %edx,%r12d +- rorl $11,%r14d +- pshufd $8,%xmm7,%xmm7 +- xorl %r10d,%edi +- addl %r12d,%r8d +- movdqa 64(%rbp),%xmm6 +- rorl $6,%r13d +- andl %edi,%r15d +- pslldq $8,%xmm7 +- xorl %r9d,%r14d +- addl %r13d,%r8d +- xorl %r10d,%r15d +- paddd %xmm7,%xmm2 +- rorl $2,%r14d +- addl %r8d,%eax +- addl %r15d,%r8d +- paddd %xmm2,%xmm6 +- movl %eax,%r13d +- addl %r8d,%r14d +- movdqa %xmm6,32(%rsp) +- rorl $14,%r13d +- movdqa %xmm0,%xmm4 +- movl %r14d,%r8d +- movl %ebx,%r12d +- movdqa %xmm2,%xmm7 +- rorl $9,%r14d +- xorl %eax,%r13d +- xorl %ecx,%r12d +- rorl $5,%r13d +- xorl %r8d,%r14d +-.byte 102,15,58,15,227,4 +- andl %eax,%r12d +- xorl %eax,%r13d +-.byte 102,15,58,15,249,4 +- addl 48(%rsp),%edx +- movl %r8d,%r15d +- xorl %ecx,%r12d +- rorl $11,%r14d +- movdqa %xmm4,%xmm5 +- xorl %r9d,%r15d +- addl %r12d,%edx +- movdqa %xmm4,%xmm6 +- rorl $6,%r13d +- andl %r15d,%edi +- psrld $3,%xmm4 +- xorl %r8d,%r14d +- addl %r13d,%edx +- xorl %r9d,%edi +- paddd %xmm7,%xmm3 +- rorl $2,%r14d +- addl %edx,%r11d +- psrld $7,%xmm6 +- addl %edi,%edx +- movl %r11d,%r13d +- pshufd $250,%xmm2,%xmm7 +- addl %edx,%r14d +- rorl $14,%r13d +- pslld $14,%xmm5 +- movl %r14d,%edx +- movl %eax,%r12d +- pxor %xmm6,%xmm4 +- rorl $9,%r14d +- xorl %r11d,%r13d +- xorl %ebx,%r12d +- rorl $5,%r13d +- psrld $11,%xmm6 +- xorl %edx,%r14d +- pxor %xmm5,%xmm4 +- andl %r11d,%r12d +- xorl %r11d,%r13d +- pslld $11,%xmm5 +- addl 52(%rsp),%ecx +- movl %edx,%edi +- pxor %xmm6,%xmm4 +- xorl %ebx,%r12d +- rorl $11,%r14d +- movdqa %xmm7,%xmm6 +- xorl %r8d,%edi +- addl %r12d,%ecx +- pxor %xmm5,%xmm4 +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %edx,%r14d +- psrld $10,%xmm7 +- addl %r13d,%ecx +- xorl %r8d,%r15d +- paddd %xmm4,%xmm3 +- rorl $2,%r14d +- addl %ecx,%r10d +- psrlq $17,%xmm6 +- addl %r15d,%ecx +- movl %r10d,%r13d +- addl %ecx,%r14d +- pxor %xmm6,%xmm7 +- rorl $14,%r13d +- movl %r14d,%ecx +- movl %r11d,%r12d +- rorl $9,%r14d +- psrlq $2,%xmm6 +- xorl %r10d,%r13d +- xorl %eax,%r12d +- pxor %xmm6,%xmm7 +- rorl $5,%r13d +- xorl %ecx,%r14d +- andl %r10d,%r12d +- pshufd $128,%xmm7,%xmm7 +- xorl %r10d,%r13d +- addl 56(%rsp),%ebx +- movl %ecx,%r15d +- psrldq $8,%xmm7 +- xorl %eax,%r12d +- rorl $11,%r14d +- xorl %edx,%r15d +- addl %r12d,%ebx +- rorl $6,%r13d +- paddd %xmm7,%xmm3 +- andl %r15d,%edi +- xorl %ecx,%r14d +- addl %r13d,%ebx +- pshufd $80,%xmm3,%xmm7 +- xorl %edx,%edi +- rorl $2,%r14d +- addl %ebx,%r9d +- movdqa %xmm7,%xmm6 +- addl %edi,%ebx +- movl %r9d,%r13d +- psrld $10,%xmm7 +- addl %ebx,%r14d +- rorl $14,%r13d +- psrlq $17,%xmm6 +- movl %r14d,%ebx +- movl %r10d,%r12d +- pxor %xmm6,%xmm7 +- rorl $9,%r14d +- xorl %r9d,%r13d +- xorl %r11d,%r12d +- rorl $5,%r13d +- xorl %ebx,%r14d +- psrlq $2,%xmm6 +- andl %r9d,%r12d +- xorl %r9d,%r13d +- addl 60(%rsp),%eax +- pxor %xmm6,%xmm7 +- movl %ebx,%edi +- xorl %r11d,%r12d +- rorl $11,%r14d +- pshufd $8,%xmm7,%xmm7 +- xorl %ecx,%edi +- addl %r12d,%eax +- movdqa 96(%rbp),%xmm6 +- rorl $6,%r13d +- andl %edi,%r15d +- pslldq $8,%xmm7 +- xorl %ebx,%r14d +- addl %r13d,%eax +- xorl %ecx,%r15d +- paddd %xmm7,%xmm3 +- rorl $2,%r14d +- addl %eax,%r8d +- addl %r15d,%eax +- paddd %xmm3,%xmm6 +- movl %r8d,%r13d +- addl %eax,%r14d +- movdqa %xmm6,48(%rsp) +- cmpb $0,131(%rbp) +- jne L$ssse3_00_47 +- rorl $14,%r13d +- movl %r14d,%eax +- movl %r9d,%r12d +- rorl $9,%r14d +- xorl %r8d,%r13d +- xorl %r10d,%r12d +- rorl $5,%r13d +- xorl %eax,%r14d +- andl %r8d,%r12d +- xorl %r8d,%r13d +- addl 0(%rsp),%r11d +- movl %eax,%r15d +- xorl %r10d,%r12d +- rorl $11,%r14d +- xorl %ebx,%r15d +- addl %r12d,%r11d +- rorl $6,%r13d +- andl %r15d,%edi +- xorl %eax,%r14d +- addl %r13d,%r11d +- xorl %ebx,%edi +- rorl $2,%r14d +- addl %r11d,%edx +- addl %edi,%r11d +- movl %edx,%r13d +- addl %r11d,%r14d +- rorl $14,%r13d +- movl %r14d,%r11d +- movl %r8d,%r12d +- rorl $9,%r14d +- xorl %edx,%r13d +- xorl %r9d,%r12d +- rorl $5,%r13d +- xorl %r11d,%r14d +- andl %edx,%r12d +- xorl %edx,%r13d +- addl 4(%rsp),%r10d +- movl %r11d,%edi +- xorl %r9d,%r12d +- rorl $11,%r14d +- xorl %eax,%edi +- addl %r12d,%r10d +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %r11d,%r14d +- addl %r13d,%r10d +- xorl %eax,%r15d +- rorl $2,%r14d +- addl %r10d,%ecx +- addl %r15d,%r10d +- movl %ecx,%r13d +- addl %r10d,%r14d +- rorl $14,%r13d +- movl %r14d,%r10d +- movl %edx,%r12d +- rorl $9,%r14d +- xorl %ecx,%r13d +- xorl %r8d,%r12d +- rorl $5,%r13d +- xorl %r10d,%r14d +- andl %ecx,%r12d +- xorl %ecx,%r13d +- addl 8(%rsp),%r9d +- movl %r10d,%r15d +- xorl %r8d,%r12d +- rorl $11,%r14d +- xorl %r11d,%r15d +- addl %r12d,%r9d +- rorl $6,%r13d +- andl %r15d,%edi +- xorl %r10d,%r14d +- addl %r13d,%r9d +- xorl %r11d,%edi +- rorl $2,%r14d +- addl %r9d,%ebx +- addl %edi,%r9d +- movl %ebx,%r13d +- addl %r9d,%r14d +- rorl $14,%r13d +- movl %r14d,%r9d +- movl %ecx,%r12d +- rorl $9,%r14d +- xorl %ebx,%r13d +- xorl %edx,%r12d +- rorl $5,%r13d +- xorl %r9d,%r14d +- andl %ebx,%r12d +- xorl %ebx,%r13d +- addl 12(%rsp),%r8d +- movl %r9d,%edi +- xorl %edx,%r12d +- rorl $11,%r14d +- xorl %r10d,%edi +- addl %r12d,%r8d +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %r9d,%r14d +- addl %r13d,%r8d +- xorl %r10d,%r15d +- rorl $2,%r14d +- addl %r8d,%eax +- addl %r15d,%r8d +- movl %eax,%r13d +- addl %r8d,%r14d +- rorl $14,%r13d +- movl %r14d,%r8d +- movl %ebx,%r12d +- rorl $9,%r14d +- xorl %eax,%r13d +- xorl %ecx,%r12d +- rorl $5,%r13d +- xorl %r8d,%r14d +- andl %eax,%r12d +- xorl %eax,%r13d +- addl 16(%rsp),%edx +- movl %r8d,%r15d +- xorl %ecx,%r12d +- rorl $11,%r14d +- xorl %r9d,%r15d +- addl %r12d,%edx +- rorl $6,%r13d +- andl %r15d,%edi +- xorl %r8d,%r14d +- addl %r13d,%edx +- xorl %r9d,%edi +- rorl $2,%r14d +- addl %edx,%r11d +- addl %edi,%edx +- movl %r11d,%r13d +- addl %edx,%r14d +- rorl $14,%r13d +- movl %r14d,%edx +- movl %eax,%r12d +- rorl $9,%r14d +- xorl %r11d,%r13d +- xorl %ebx,%r12d +- rorl $5,%r13d +- xorl %edx,%r14d +- andl %r11d,%r12d +- xorl %r11d,%r13d +- addl 20(%rsp),%ecx +- movl %edx,%edi +- xorl %ebx,%r12d +- rorl $11,%r14d +- xorl %r8d,%edi +- addl %r12d,%ecx +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %edx,%r14d +- addl %r13d,%ecx +- xorl %r8d,%r15d +- rorl $2,%r14d +- addl %ecx,%r10d +- addl %r15d,%ecx +- movl %r10d,%r13d +- addl %ecx,%r14d +- rorl $14,%r13d +- movl %r14d,%ecx +- movl %r11d,%r12d +- rorl $9,%r14d +- xorl %r10d,%r13d +- xorl %eax,%r12d +- rorl $5,%r13d +- xorl %ecx,%r14d +- andl %r10d,%r12d +- xorl %r10d,%r13d +- addl 24(%rsp),%ebx +- movl %ecx,%r15d +- xorl %eax,%r12d +- rorl $11,%r14d +- xorl %edx,%r15d +- addl %r12d,%ebx +- rorl $6,%r13d +- andl %r15d,%edi +- xorl %ecx,%r14d +- addl %r13d,%ebx +- xorl %edx,%edi +- rorl $2,%r14d +- addl %ebx,%r9d +- addl %edi,%ebx +- movl %r9d,%r13d +- addl %ebx,%r14d +- rorl $14,%r13d +- movl %r14d,%ebx +- movl %r10d,%r12d +- rorl $9,%r14d +- xorl %r9d,%r13d +- xorl %r11d,%r12d +- rorl $5,%r13d +- xorl %ebx,%r14d +- andl %r9d,%r12d +- xorl %r9d,%r13d +- addl 28(%rsp),%eax +- movl %ebx,%edi +- xorl %r11d,%r12d +- rorl $11,%r14d +- xorl %ecx,%edi +- addl %r12d,%eax +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %ebx,%r14d +- addl %r13d,%eax +- xorl %ecx,%r15d +- rorl $2,%r14d +- addl %eax,%r8d +- addl %r15d,%eax +- movl %r8d,%r13d +- addl %eax,%r14d +- rorl $14,%r13d +- movl %r14d,%eax +- movl %r9d,%r12d +- rorl $9,%r14d +- xorl %r8d,%r13d +- xorl %r10d,%r12d +- rorl $5,%r13d +- xorl %eax,%r14d +- andl %r8d,%r12d +- xorl %r8d,%r13d +- addl 32(%rsp),%r11d +- movl %eax,%r15d +- xorl %r10d,%r12d +- rorl $11,%r14d +- xorl %ebx,%r15d +- addl %r12d,%r11d +- rorl $6,%r13d +- andl %r15d,%edi +- xorl %eax,%r14d +- addl %r13d,%r11d +- xorl %ebx,%edi +- rorl $2,%r14d +- addl %r11d,%edx +- addl %edi,%r11d +- movl %edx,%r13d +- addl %r11d,%r14d +- rorl $14,%r13d +- movl %r14d,%r11d +- movl %r8d,%r12d +- rorl $9,%r14d +- xorl %edx,%r13d +- xorl %r9d,%r12d +- rorl $5,%r13d +- xorl %r11d,%r14d +- andl %edx,%r12d +- xorl %edx,%r13d +- addl 36(%rsp),%r10d +- movl %r11d,%edi +- xorl %r9d,%r12d +- rorl $11,%r14d +- xorl %eax,%edi +- addl %r12d,%r10d +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %r11d,%r14d +- addl %r13d,%r10d +- xorl %eax,%r15d +- rorl $2,%r14d +- addl %r10d,%ecx +- addl %r15d,%r10d +- movl %ecx,%r13d +- addl %r10d,%r14d +- rorl $14,%r13d +- movl %r14d,%r10d +- movl %edx,%r12d +- rorl $9,%r14d +- xorl %ecx,%r13d +- xorl %r8d,%r12d +- rorl $5,%r13d +- xorl %r10d,%r14d +- andl %ecx,%r12d +- xorl %ecx,%r13d +- addl 40(%rsp),%r9d +- movl %r10d,%r15d +- xorl %r8d,%r12d +- rorl $11,%r14d +- xorl %r11d,%r15d +- addl %r12d,%r9d +- rorl $6,%r13d +- andl %r15d,%edi +- xorl %r10d,%r14d +- addl %r13d,%r9d +- xorl %r11d,%edi +- rorl $2,%r14d +- addl %r9d,%ebx +- addl %edi,%r9d +- movl %ebx,%r13d +- addl %r9d,%r14d +- rorl $14,%r13d +- movl %r14d,%r9d +- movl %ecx,%r12d +- rorl $9,%r14d +- xorl %ebx,%r13d +- xorl %edx,%r12d +- rorl $5,%r13d +- xorl %r9d,%r14d +- andl %ebx,%r12d +- xorl %ebx,%r13d +- addl 44(%rsp),%r8d +- movl %r9d,%edi +- xorl %edx,%r12d +- rorl $11,%r14d +- xorl %r10d,%edi +- addl %r12d,%r8d +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %r9d,%r14d +- addl %r13d,%r8d +- xorl %r10d,%r15d +- rorl $2,%r14d +- addl %r8d,%eax +- addl %r15d,%r8d +- movl %eax,%r13d +- addl %r8d,%r14d +- rorl $14,%r13d +- movl %r14d,%r8d +- movl %ebx,%r12d +- rorl $9,%r14d +- xorl %eax,%r13d +- xorl %ecx,%r12d +- rorl $5,%r13d +- xorl %r8d,%r14d +- andl %eax,%r12d +- xorl %eax,%r13d +- addl 48(%rsp),%edx +- movl %r8d,%r15d +- xorl %ecx,%r12d +- rorl $11,%r14d +- xorl %r9d,%r15d +- addl %r12d,%edx +- rorl $6,%r13d +- andl %r15d,%edi +- xorl %r8d,%r14d +- addl %r13d,%edx +- xorl %r9d,%edi +- rorl $2,%r14d +- addl %edx,%r11d +- addl %edi,%edx +- movl %r11d,%r13d +- addl %edx,%r14d +- rorl $14,%r13d +- movl %r14d,%edx +- movl %eax,%r12d +- rorl $9,%r14d +- xorl %r11d,%r13d +- xorl %ebx,%r12d +- rorl $5,%r13d +- xorl %edx,%r14d +- andl %r11d,%r12d +- xorl %r11d,%r13d +- addl 52(%rsp),%ecx +- movl %edx,%edi +- xorl %ebx,%r12d +- rorl $11,%r14d +- xorl %r8d,%edi +- addl %r12d,%ecx +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %edx,%r14d +- addl %r13d,%ecx +- xorl %r8d,%r15d +- rorl $2,%r14d +- addl %ecx,%r10d +- addl %r15d,%ecx +- movl %r10d,%r13d +- addl %ecx,%r14d +- rorl $14,%r13d +- movl %r14d,%ecx +- movl %r11d,%r12d +- rorl $9,%r14d +- xorl %r10d,%r13d +- xorl %eax,%r12d +- rorl $5,%r13d +- xorl %ecx,%r14d +- andl %r10d,%r12d +- xorl %r10d,%r13d +- addl 56(%rsp),%ebx +- movl %ecx,%r15d +- xorl %eax,%r12d +- rorl $11,%r14d +- xorl %edx,%r15d +- addl %r12d,%ebx +- rorl $6,%r13d +- andl %r15d,%edi +- xorl %ecx,%r14d +- addl %r13d,%ebx +- xorl %edx,%edi +- rorl $2,%r14d +- addl %ebx,%r9d +- addl %edi,%ebx +- movl %r9d,%r13d +- addl %ebx,%r14d +- rorl $14,%r13d +- movl %r14d,%ebx +- movl %r10d,%r12d +- rorl $9,%r14d +- xorl %r9d,%r13d +- xorl %r11d,%r12d +- rorl $5,%r13d +- xorl %ebx,%r14d +- andl %r9d,%r12d +- xorl %r9d,%r13d +- addl 60(%rsp),%eax +- movl %ebx,%edi +- xorl %r11d,%r12d +- rorl $11,%r14d +- xorl %ecx,%edi +- addl %r12d,%eax +- rorl $6,%r13d +- andl %edi,%r15d +- xorl %ebx,%r14d +- addl %r13d,%eax +- xorl %ecx,%r15d +- rorl $2,%r14d +- addl %eax,%r8d +- addl %r15d,%eax +- movl %r8d,%r13d +- addl %eax,%r14d +- movq 64+0(%rsp),%rdi +- movl %r14d,%eax +- +- addl 0(%rdi),%eax +- leaq 64(%rsi),%rsi +- addl 4(%rdi),%ebx +- addl 8(%rdi),%ecx +- addl 12(%rdi),%edx +- addl 16(%rdi),%r8d +- addl 20(%rdi),%r9d +- addl 24(%rdi),%r10d +- addl 28(%rdi),%r11d +- +- cmpq 64+16(%rsp),%rsi +- +- movl %eax,0(%rdi) +- movl %ebx,4(%rdi) +- movl %ecx,8(%rdi) +- movl %edx,12(%rdi) +- movl %r8d,16(%rdi) +- movl %r9d,20(%rdi) +- movl %r10d,24(%rdi) +- movl %r11d,28(%rdi) +- jb L$loop_ssse3 +- +- movq 88(%rsp),%rsi +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$epilogue_ssse3: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 6 +-sha256_block_data_order_avx: +- +-L$avx_shortcut: +- movq %rsp,%rax +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- shlq $4,%rdx +- subq $96,%rsp +- leaq (%rsi,%rdx,4),%rdx +- andq $-64,%rsp +- movq %rdi,64+0(%rsp) +- movq %rsi,64+8(%rsp) +- movq %rdx,64+16(%rsp) +- movq %rax,88(%rsp) +- +-L$prologue_avx: +- +- vzeroupper +- movl 0(%rdi),%eax +- movl 4(%rdi),%ebx +- movl 8(%rdi),%ecx +- movl 12(%rdi),%edx +- movl 16(%rdi),%r8d +- movl 20(%rdi),%r9d +- movl 24(%rdi),%r10d +- movl 28(%rdi),%r11d +- vmovdqa K256+512+32(%rip),%xmm8 +- vmovdqa K256+512+64(%rip),%xmm9 +- jmp L$loop_avx +-.p2align 4 +-L$loop_avx: +- vmovdqa K256+512(%rip),%xmm7 +- vmovdqu 0(%rsi),%xmm0 +- vmovdqu 16(%rsi),%xmm1 +- vmovdqu 32(%rsi),%xmm2 +- vmovdqu 48(%rsi),%xmm3 +- vpshufb %xmm7,%xmm0,%xmm0 +- leaq K256(%rip),%rbp +- vpshufb %xmm7,%xmm1,%xmm1 +- vpshufb %xmm7,%xmm2,%xmm2 +- vpaddd 0(%rbp),%xmm0,%xmm4 +- vpshufb %xmm7,%xmm3,%xmm3 +- vpaddd 32(%rbp),%xmm1,%xmm5 +- vpaddd 64(%rbp),%xmm2,%xmm6 +- vpaddd 96(%rbp),%xmm3,%xmm7 +- vmovdqa %xmm4,0(%rsp) +- movl %eax,%r14d +- vmovdqa %xmm5,16(%rsp) +- movl %ebx,%edi +- vmovdqa %xmm6,32(%rsp) +- xorl %ecx,%edi +- vmovdqa %xmm7,48(%rsp) +- movl %r8d,%r13d +- jmp L$avx_00_47 +- +-.p2align 4 +-L$avx_00_47: +- subq $-128,%rbp +- vpalignr $4,%xmm0,%xmm1,%xmm4 +- shrdl $14,%r13d,%r13d +- movl %r14d,%eax +- movl %r9d,%r12d +- vpalignr $4,%xmm2,%xmm3,%xmm7 +- shrdl $9,%r14d,%r14d +- xorl %r8d,%r13d +- xorl %r10d,%r12d +- vpsrld $7,%xmm4,%xmm6 +- shrdl $5,%r13d,%r13d +- xorl %eax,%r14d +- andl %r8d,%r12d +- vpaddd %xmm7,%xmm0,%xmm0 +- xorl %r8d,%r13d +- addl 0(%rsp),%r11d +- movl %eax,%r15d +- vpsrld $3,%xmm4,%xmm7 +- xorl %r10d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %ebx,%r15d +- vpslld $14,%xmm4,%xmm5 +- addl %r12d,%r11d +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- vpxor %xmm6,%xmm7,%xmm4 +- xorl %eax,%r14d +- addl %r13d,%r11d +- xorl %ebx,%edi +- vpshufd $250,%xmm3,%xmm7 +- shrdl $2,%r14d,%r14d +- addl %r11d,%edx +- addl %edi,%r11d +- vpsrld $11,%xmm6,%xmm6 +- movl %edx,%r13d +- addl %r11d,%r14d +- shrdl $14,%r13d,%r13d +- vpxor %xmm5,%xmm4,%xmm4 +- movl %r14d,%r11d +- movl %r8d,%r12d +- shrdl $9,%r14d,%r14d +- vpslld $11,%xmm5,%xmm5 +- xorl %edx,%r13d +- xorl %r9d,%r12d +- shrdl $5,%r13d,%r13d +- vpxor %xmm6,%xmm4,%xmm4 +- xorl %r11d,%r14d +- andl %edx,%r12d +- xorl %edx,%r13d +- vpsrld $10,%xmm7,%xmm6 +- addl 4(%rsp),%r10d +- movl %r11d,%edi +- xorl %r9d,%r12d +- vpxor %xmm5,%xmm4,%xmm4 +- shrdl $11,%r14d,%r14d +- xorl %eax,%edi +- addl %r12d,%r10d +- vpsrlq $17,%xmm7,%xmm7 +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %r11d,%r14d +- vpaddd %xmm4,%xmm0,%xmm0 +- addl %r13d,%r10d +- xorl %eax,%r15d +- shrdl $2,%r14d,%r14d +- vpxor %xmm7,%xmm6,%xmm6 +- addl %r10d,%ecx +- addl %r15d,%r10d +- movl %ecx,%r13d +- vpsrlq $2,%xmm7,%xmm7 +- addl %r10d,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%r10d +- vpxor %xmm7,%xmm6,%xmm6 +- movl %edx,%r12d +- shrdl $9,%r14d,%r14d +- xorl %ecx,%r13d +- vpshufb %xmm8,%xmm6,%xmm6 +- xorl %r8d,%r12d +- shrdl $5,%r13d,%r13d +- xorl %r10d,%r14d +- vpaddd %xmm6,%xmm0,%xmm0 +- andl %ecx,%r12d +- xorl %ecx,%r13d +- addl 8(%rsp),%r9d +- vpshufd $80,%xmm0,%xmm7 +- movl %r10d,%r15d +- xorl %r8d,%r12d +- shrdl $11,%r14d,%r14d +- vpsrld $10,%xmm7,%xmm6 +- xorl %r11d,%r15d +- addl %r12d,%r9d +- shrdl $6,%r13d,%r13d +- vpsrlq $17,%xmm7,%xmm7 +- andl %r15d,%edi +- xorl %r10d,%r14d +- addl %r13d,%r9d +- vpxor %xmm7,%xmm6,%xmm6 +- xorl %r11d,%edi +- shrdl $2,%r14d,%r14d +- addl %r9d,%ebx +- vpsrlq $2,%xmm7,%xmm7 +- addl %edi,%r9d +- movl %ebx,%r13d +- addl %r9d,%r14d +- vpxor %xmm7,%xmm6,%xmm6 +- shrdl $14,%r13d,%r13d +- movl %r14d,%r9d +- movl %ecx,%r12d +- vpshufb %xmm9,%xmm6,%xmm6 +- shrdl $9,%r14d,%r14d +- xorl %ebx,%r13d +- xorl %edx,%r12d +- vpaddd %xmm6,%xmm0,%xmm0 +- shrdl $5,%r13d,%r13d +- xorl %r9d,%r14d +- andl %ebx,%r12d +- vpaddd 0(%rbp),%xmm0,%xmm6 +- xorl %ebx,%r13d +- addl 12(%rsp),%r8d +- movl %r9d,%edi +- xorl %edx,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r10d,%edi +- addl %r12d,%r8d +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %r9d,%r14d +- addl %r13d,%r8d +- xorl %r10d,%r15d +- shrdl $2,%r14d,%r14d +- addl %r8d,%eax +- addl %r15d,%r8d +- movl %eax,%r13d +- addl %r8d,%r14d +- vmovdqa %xmm6,0(%rsp) +- vpalignr $4,%xmm1,%xmm2,%xmm4 +- shrdl $14,%r13d,%r13d +- movl %r14d,%r8d +- movl %ebx,%r12d +- vpalignr $4,%xmm3,%xmm0,%xmm7 +- shrdl $9,%r14d,%r14d +- xorl %eax,%r13d +- xorl %ecx,%r12d +- vpsrld $7,%xmm4,%xmm6 +- shrdl $5,%r13d,%r13d +- xorl %r8d,%r14d +- andl %eax,%r12d +- vpaddd %xmm7,%xmm1,%xmm1 +- xorl %eax,%r13d +- addl 16(%rsp),%edx +- movl %r8d,%r15d +- vpsrld $3,%xmm4,%xmm7 +- xorl %ecx,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r9d,%r15d +- vpslld $14,%xmm4,%xmm5 +- addl %r12d,%edx +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- vpxor %xmm6,%xmm7,%xmm4 +- xorl %r8d,%r14d +- addl %r13d,%edx +- xorl %r9d,%edi +- vpshufd $250,%xmm0,%xmm7 +- shrdl $2,%r14d,%r14d +- addl %edx,%r11d +- addl %edi,%edx +- vpsrld $11,%xmm6,%xmm6 +- movl %r11d,%r13d +- addl %edx,%r14d +- shrdl $14,%r13d,%r13d +- vpxor %xmm5,%xmm4,%xmm4 +- movl %r14d,%edx +- movl %eax,%r12d +- shrdl $9,%r14d,%r14d +- vpslld $11,%xmm5,%xmm5 +- xorl %r11d,%r13d +- xorl %ebx,%r12d +- shrdl $5,%r13d,%r13d +- vpxor %xmm6,%xmm4,%xmm4 +- xorl %edx,%r14d +- andl %r11d,%r12d +- xorl %r11d,%r13d +- vpsrld $10,%xmm7,%xmm6 +- addl 20(%rsp),%ecx +- movl %edx,%edi +- xorl %ebx,%r12d +- vpxor %xmm5,%xmm4,%xmm4 +- shrdl $11,%r14d,%r14d +- xorl %r8d,%edi +- addl %r12d,%ecx +- vpsrlq $17,%xmm7,%xmm7 +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %edx,%r14d +- vpaddd %xmm4,%xmm1,%xmm1 +- addl %r13d,%ecx +- xorl %r8d,%r15d +- shrdl $2,%r14d,%r14d +- vpxor %xmm7,%xmm6,%xmm6 +- addl %ecx,%r10d +- addl %r15d,%ecx +- movl %r10d,%r13d +- vpsrlq $2,%xmm7,%xmm7 +- addl %ecx,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%ecx +- vpxor %xmm7,%xmm6,%xmm6 +- movl %r11d,%r12d +- shrdl $9,%r14d,%r14d +- xorl %r10d,%r13d +- vpshufb %xmm8,%xmm6,%xmm6 +- xorl %eax,%r12d +- shrdl $5,%r13d,%r13d +- xorl %ecx,%r14d +- vpaddd %xmm6,%xmm1,%xmm1 +- andl %r10d,%r12d +- xorl %r10d,%r13d +- addl 24(%rsp),%ebx +- vpshufd $80,%xmm1,%xmm7 +- movl %ecx,%r15d +- xorl %eax,%r12d +- shrdl $11,%r14d,%r14d +- vpsrld $10,%xmm7,%xmm6 +- xorl %edx,%r15d +- addl %r12d,%ebx +- shrdl $6,%r13d,%r13d +- vpsrlq $17,%xmm7,%xmm7 +- andl %r15d,%edi +- xorl %ecx,%r14d +- addl %r13d,%ebx +- vpxor %xmm7,%xmm6,%xmm6 +- xorl %edx,%edi +- shrdl $2,%r14d,%r14d +- addl %ebx,%r9d +- vpsrlq $2,%xmm7,%xmm7 +- addl %edi,%ebx +- movl %r9d,%r13d +- addl %ebx,%r14d +- vpxor %xmm7,%xmm6,%xmm6 +- shrdl $14,%r13d,%r13d +- movl %r14d,%ebx +- movl %r10d,%r12d +- vpshufb %xmm9,%xmm6,%xmm6 +- shrdl $9,%r14d,%r14d +- xorl %r9d,%r13d +- xorl %r11d,%r12d +- vpaddd %xmm6,%xmm1,%xmm1 +- shrdl $5,%r13d,%r13d +- xorl %ebx,%r14d +- andl %r9d,%r12d +- vpaddd 32(%rbp),%xmm1,%xmm6 +- xorl %r9d,%r13d +- addl 28(%rsp),%eax +- movl %ebx,%edi +- xorl %r11d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %ecx,%edi +- addl %r12d,%eax +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %ebx,%r14d +- addl %r13d,%eax +- xorl %ecx,%r15d +- shrdl $2,%r14d,%r14d +- addl %eax,%r8d +- addl %r15d,%eax +- movl %r8d,%r13d +- addl %eax,%r14d +- vmovdqa %xmm6,16(%rsp) +- vpalignr $4,%xmm2,%xmm3,%xmm4 +- shrdl $14,%r13d,%r13d +- movl %r14d,%eax +- movl %r9d,%r12d +- vpalignr $4,%xmm0,%xmm1,%xmm7 +- shrdl $9,%r14d,%r14d +- xorl %r8d,%r13d +- xorl %r10d,%r12d +- vpsrld $7,%xmm4,%xmm6 +- shrdl $5,%r13d,%r13d +- xorl %eax,%r14d +- andl %r8d,%r12d +- vpaddd %xmm7,%xmm2,%xmm2 +- xorl %r8d,%r13d +- addl 32(%rsp),%r11d +- movl %eax,%r15d +- vpsrld $3,%xmm4,%xmm7 +- xorl %r10d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %ebx,%r15d +- vpslld $14,%xmm4,%xmm5 +- addl %r12d,%r11d +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- vpxor %xmm6,%xmm7,%xmm4 +- xorl %eax,%r14d +- addl %r13d,%r11d +- xorl %ebx,%edi +- vpshufd $250,%xmm1,%xmm7 +- shrdl $2,%r14d,%r14d +- addl %r11d,%edx +- addl %edi,%r11d +- vpsrld $11,%xmm6,%xmm6 +- movl %edx,%r13d +- addl %r11d,%r14d +- shrdl $14,%r13d,%r13d +- vpxor %xmm5,%xmm4,%xmm4 +- movl %r14d,%r11d +- movl %r8d,%r12d +- shrdl $9,%r14d,%r14d +- vpslld $11,%xmm5,%xmm5 +- xorl %edx,%r13d +- xorl %r9d,%r12d +- shrdl $5,%r13d,%r13d +- vpxor %xmm6,%xmm4,%xmm4 +- xorl %r11d,%r14d +- andl %edx,%r12d +- xorl %edx,%r13d +- vpsrld $10,%xmm7,%xmm6 +- addl 36(%rsp),%r10d +- movl %r11d,%edi +- xorl %r9d,%r12d +- vpxor %xmm5,%xmm4,%xmm4 +- shrdl $11,%r14d,%r14d +- xorl %eax,%edi +- addl %r12d,%r10d +- vpsrlq $17,%xmm7,%xmm7 +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %r11d,%r14d +- vpaddd %xmm4,%xmm2,%xmm2 +- addl %r13d,%r10d +- xorl %eax,%r15d +- shrdl $2,%r14d,%r14d +- vpxor %xmm7,%xmm6,%xmm6 +- addl %r10d,%ecx +- addl %r15d,%r10d +- movl %ecx,%r13d +- vpsrlq $2,%xmm7,%xmm7 +- addl %r10d,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%r10d +- vpxor %xmm7,%xmm6,%xmm6 +- movl %edx,%r12d +- shrdl $9,%r14d,%r14d +- xorl %ecx,%r13d +- vpshufb %xmm8,%xmm6,%xmm6 +- xorl %r8d,%r12d +- shrdl $5,%r13d,%r13d +- xorl %r10d,%r14d +- vpaddd %xmm6,%xmm2,%xmm2 +- andl %ecx,%r12d +- xorl %ecx,%r13d +- addl 40(%rsp),%r9d +- vpshufd $80,%xmm2,%xmm7 +- movl %r10d,%r15d +- xorl %r8d,%r12d +- shrdl $11,%r14d,%r14d +- vpsrld $10,%xmm7,%xmm6 +- xorl %r11d,%r15d +- addl %r12d,%r9d +- shrdl $6,%r13d,%r13d +- vpsrlq $17,%xmm7,%xmm7 +- andl %r15d,%edi +- xorl %r10d,%r14d +- addl %r13d,%r9d +- vpxor %xmm7,%xmm6,%xmm6 +- xorl %r11d,%edi +- shrdl $2,%r14d,%r14d +- addl %r9d,%ebx +- vpsrlq $2,%xmm7,%xmm7 +- addl %edi,%r9d +- movl %ebx,%r13d +- addl %r9d,%r14d +- vpxor %xmm7,%xmm6,%xmm6 +- shrdl $14,%r13d,%r13d +- movl %r14d,%r9d +- movl %ecx,%r12d +- vpshufb %xmm9,%xmm6,%xmm6 +- shrdl $9,%r14d,%r14d +- xorl %ebx,%r13d +- xorl %edx,%r12d +- vpaddd %xmm6,%xmm2,%xmm2 +- shrdl $5,%r13d,%r13d +- xorl %r9d,%r14d +- andl %ebx,%r12d +- vpaddd 64(%rbp),%xmm2,%xmm6 +- xorl %ebx,%r13d +- addl 44(%rsp),%r8d +- movl %r9d,%edi +- xorl %edx,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r10d,%edi +- addl %r12d,%r8d +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %r9d,%r14d +- addl %r13d,%r8d +- xorl %r10d,%r15d +- shrdl $2,%r14d,%r14d +- addl %r8d,%eax +- addl %r15d,%r8d +- movl %eax,%r13d +- addl %r8d,%r14d +- vmovdqa %xmm6,32(%rsp) +- vpalignr $4,%xmm3,%xmm0,%xmm4 +- shrdl $14,%r13d,%r13d +- movl %r14d,%r8d +- movl %ebx,%r12d +- vpalignr $4,%xmm1,%xmm2,%xmm7 +- shrdl $9,%r14d,%r14d +- xorl %eax,%r13d +- xorl %ecx,%r12d +- vpsrld $7,%xmm4,%xmm6 +- shrdl $5,%r13d,%r13d +- xorl %r8d,%r14d +- andl %eax,%r12d +- vpaddd %xmm7,%xmm3,%xmm3 +- xorl %eax,%r13d +- addl 48(%rsp),%edx +- movl %r8d,%r15d +- vpsrld $3,%xmm4,%xmm7 +- xorl %ecx,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r9d,%r15d +- vpslld $14,%xmm4,%xmm5 +- addl %r12d,%edx +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- vpxor %xmm6,%xmm7,%xmm4 +- xorl %r8d,%r14d +- addl %r13d,%edx +- xorl %r9d,%edi +- vpshufd $250,%xmm2,%xmm7 +- shrdl $2,%r14d,%r14d +- addl %edx,%r11d +- addl %edi,%edx +- vpsrld $11,%xmm6,%xmm6 +- movl %r11d,%r13d +- addl %edx,%r14d +- shrdl $14,%r13d,%r13d +- vpxor %xmm5,%xmm4,%xmm4 +- movl %r14d,%edx +- movl %eax,%r12d +- shrdl $9,%r14d,%r14d +- vpslld $11,%xmm5,%xmm5 +- xorl %r11d,%r13d +- xorl %ebx,%r12d +- shrdl $5,%r13d,%r13d +- vpxor %xmm6,%xmm4,%xmm4 +- xorl %edx,%r14d +- andl %r11d,%r12d +- xorl %r11d,%r13d +- vpsrld $10,%xmm7,%xmm6 +- addl 52(%rsp),%ecx +- movl %edx,%edi +- xorl %ebx,%r12d +- vpxor %xmm5,%xmm4,%xmm4 +- shrdl $11,%r14d,%r14d +- xorl %r8d,%edi +- addl %r12d,%ecx +- vpsrlq $17,%xmm7,%xmm7 +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %edx,%r14d +- vpaddd %xmm4,%xmm3,%xmm3 +- addl %r13d,%ecx +- xorl %r8d,%r15d +- shrdl $2,%r14d,%r14d +- vpxor %xmm7,%xmm6,%xmm6 +- addl %ecx,%r10d +- addl %r15d,%ecx +- movl %r10d,%r13d +- vpsrlq $2,%xmm7,%xmm7 +- addl %ecx,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%ecx +- vpxor %xmm7,%xmm6,%xmm6 +- movl %r11d,%r12d +- shrdl $9,%r14d,%r14d +- xorl %r10d,%r13d +- vpshufb %xmm8,%xmm6,%xmm6 +- xorl %eax,%r12d +- shrdl $5,%r13d,%r13d +- xorl %ecx,%r14d +- vpaddd %xmm6,%xmm3,%xmm3 +- andl %r10d,%r12d +- xorl %r10d,%r13d +- addl 56(%rsp),%ebx +- vpshufd $80,%xmm3,%xmm7 +- movl %ecx,%r15d +- xorl %eax,%r12d +- shrdl $11,%r14d,%r14d +- vpsrld $10,%xmm7,%xmm6 +- xorl %edx,%r15d +- addl %r12d,%ebx +- shrdl $6,%r13d,%r13d +- vpsrlq $17,%xmm7,%xmm7 +- andl %r15d,%edi +- xorl %ecx,%r14d +- addl %r13d,%ebx +- vpxor %xmm7,%xmm6,%xmm6 +- xorl %edx,%edi +- shrdl $2,%r14d,%r14d +- addl %ebx,%r9d +- vpsrlq $2,%xmm7,%xmm7 +- addl %edi,%ebx +- movl %r9d,%r13d +- addl %ebx,%r14d +- vpxor %xmm7,%xmm6,%xmm6 +- shrdl $14,%r13d,%r13d +- movl %r14d,%ebx +- movl %r10d,%r12d +- vpshufb %xmm9,%xmm6,%xmm6 +- shrdl $9,%r14d,%r14d +- xorl %r9d,%r13d +- xorl %r11d,%r12d +- vpaddd %xmm6,%xmm3,%xmm3 +- shrdl $5,%r13d,%r13d +- xorl %ebx,%r14d +- andl %r9d,%r12d +- vpaddd 96(%rbp),%xmm3,%xmm6 +- xorl %r9d,%r13d +- addl 60(%rsp),%eax +- movl %ebx,%edi +- xorl %r11d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %ecx,%edi +- addl %r12d,%eax +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %ebx,%r14d +- addl %r13d,%eax +- xorl %ecx,%r15d +- shrdl $2,%r14d,%r14d +- addl %eax,%r8d +- addl %r15d,%eax +- movl %r8d,%r13d +- addl %eax,%r14d +- vmovdqa %xmm6,48(%rsp) +- cmpb $0,131(%rbp) +- jne L$avx_00_47 +- shrdl $14,%r13d,%r13d +- movl %r14d,%eax +- movl %r9d,%r12d +- shrdl $9,%r14d,%r14d +- xorl %r8d,%r13d +- xorl %r10d,%r12d +- shrdl $5,%r13d,%r13d +- xorl %eax,%r14d +- andl %r8d,%r12d +- xorl %r8d,%r13d +- addl 0(%rsp),%r11d +- movl %eax,%r15d +- xorl %r10d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %ebx,%r15d +- addl %r12d,%r11d +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- xorl %eax,%r14d +- addl %r13d,%r11d +- xorl %ebx,%edi +- shrdl $2,%r14d,%r14d +- addl %r11d,%edx +- addl %edi,%r11d +- movl %edx,%r13d +- addl %r11d,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%r11d +- movl %r8d,%r12d +- shrdl $9,%r14d,%r14d +- xorl %edx,%r13d +- xorl %r9d,%r12d +- shrdl $5,%r13d,%r13d +- xorl %r11d,%r14d +- andl %edx,%r12d +- xorl %edx,%r13d +- addl 4(%rsp),%r10d +- movl %r11d,%edi +- xorl %r9d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %eax,%edi +- addl %r12d,%r10d +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %r11d,%r14d +- addl %r13d,%r10d +- xorl %eax,%r15d +- shrdl $2,%r14d,%r14d +- addl %r10d,%ecx +- addl %r15d,%r10d +- movl %ecx,%r13d +- addl %r10d,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%r10d +- movl %edx,%r12d +- shrdl $9,%r14d,%r14d +- xorl %ecx,%r13d +- xorl %r8d,%r12d +- shrdl $5,%r13d,%r13d +- xorl %r10d,%r14d +- andl %ecx,%r12d +- xorl %ecx,%r13d +- addl 8(%rsp),%r9d +- movl %r10d,%r15d +- xorl %r8d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r11d,%r15d +- addl %r12d,%r9d +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- xorl %r10d,%r14d +- addl %r13d,%r9d +- xorl %r11d,%edi +- shrdl $2,%r14d,%r14d +- addl %r9d,%ebx +- addl %edi,%r9d +- movl %ebx,%r13d +- addl %r9d,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%r9d +- movl %ecx,%r12d +- shrdl $9,%r14d,%r14d +- xorl %ebx,%r13d +- xorl %edx,%r12d +- shrdl $5,%r13d,%r13d +- xorl %r9d,%r14d +- andl %ebx,%r12d +- xorl %ebx,%r13d +- addl 12(%rsp),%r8d +- movl %r9d,%edi +- xorl %edx,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r10d,%edi +- addl %r12d,%r8d +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %r9d,%r14d +- addl %r13d,%r8d +- xorl %r10d,%r15d +- shrdl $2,%r14d,%r14d +- addl %r8d,%eax +- addl %r15d,%r8d +- movl %eax,%r13d +- addl %r8d,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%r8d +- movl %ebx,%r12d +- shrdl $9,%r14d,%r14d +- xorl %eax,%r13d +- xorl %ecx,%r12d +- shrdl $5,%r13d,%r13d +- xorl %r8d,%r14d +- andl %eax,%r12d +- xorl %eax,%r13d +- addl 16(%rsp),%edx +- movl %r8d,%r15d +- xorl %ecx,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r9d,%r15d +- addl %r12d,%edx +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- xorl %r8d,%r14d +- addl %r13d,%edx +- xorl %r9d,%edi +- shrdl $2,%r14d,%r14d +- addl %edx,%r11d +- addl %edi,%edx +- movl %r11d,%r13d +- addl %edx,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%edx +- movl %eax,%r12d +- shrdl $9,%r14d,%r14d +- xorl %r11d,%r13d +- xorl %ebx,%r12d +- shrdl $5,%r13d,%r13d +- xorl %edx,%r14d +- andl %r11d,%r12d +- xorl %r11d,%r13d +- addl 20(%rsp),%ecx +- movl %edx,%edi +- xorl %ebx,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r8d,%edi +- addl %r12d,%ecx +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %edx,%r14d +- addl %r13d,%ecx +- xorl %r8d,%r15d +- shrdl $2,%r14d,%r14d +- addl %ecx,%r10d +- addl %r15d,%ecx +- movl %r10d,%r13d +- addl %ecx,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%ecx +- movl %r11d,%r12d +- shrdl $9,%r14d,%r14d +- xorl %r10d,%r13d +- xorl %eax,%r12d +- shrdl $5,%r13d,%r13d +- xorl %ecx,%r14d +- andl %r10d,%r12d +- xorl %r10d,%r13d +- addl 24(%rsp),%ebx +- movl %ecx,%r15d +- xorl %eax,%r12d +- shrdl $11,%r14d,%r14d +- xorl %edx,%r15d +- addl %r12d,%ebx +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- xorl %ecx,%r14d +- addl %r13d,%ebx +- xorl %edx,%edi +- shrdl $2,%r14d,%r14d +- addl %ebx,%r9d +- addl %edi,%ebx +- movl %r9d,%r13d +- addl %ebx,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%ebx +- movl %r10d,%r12d +- shrdl $9,%r14d,%r14d +- xorl %r9d,%r13d +- xorl %r11d,%r12d +- shrdl $5,%r13d,%r13d +- xorl %ebx,%r14d +- andl %r9d,%r12d +- xorl %r9d,%r13d +- addl 28(%rsp),%eax +- movl %ebx,%edi +- xorl %r11d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %ecx,%edi +- addl %r12d,%eax +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %ebx,%r14d +- addl %r13d,%eax +- xorl %ecx,%r15d +- shrdl $2,%r14d,%r14d +- addl %eax,%r8d +- addl %r15d,%eax +- movl %r8d,%r13d +- addl %eax,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%eax +- movl %r9d,%r12d +- shrdl $9,%r14d,%r14d +- xorl %r8d,%r13d +- xorl %r10d,%r12d +- shrdl $5,%r13d,%r13d +- xorl %eax,%r14d +- andl %r8d,%r12d +- xorl %r8d,%r13d +- addl 32(%rsp),%r11d +- movl %eax,%r15d +- xorl %r10d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %ebx,%r15d +- addl %r12d,%r11d +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- xorl %eax,%r14d +- addl %r13d,%r11d +- xorl %ebx,%edi +- shrdl $2,%r14d,%r14d +- addl %r11d,%edx +- addl %edi,%r11d +- movl %edx,%r13d +- addl %r11d,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%r11d +- movl %r8d,%r12d +- shrdl $9,%r14d,%r14d +- xorl %edx,%r13d +- xorl %r9d,%r12d +- shrdl $5,%r13d,%r13d +- xorl %r11d,%r14d +- andl %edx,%r12d +- xorl %edx,%r13d +- addl 36(%rsp),%r10d +- movl %r11d,%edi +- xorl %r9d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %eax,%edi +- addl %r12d,%r10d +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %r11d,%r14d +- addl %r13d,%r10d +- xorl %eax,%r15d +- shrdl $2,%r14d,%r14d +- addl %r10d,%ecx +- addl %r15d,%r10d +- movl %ecx,%r13d +- addl %r10d,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%r10d +- movl %edx,%r12d +- shrdl $9,%r14d,%r14d +- xorl %ecx,%r13d +- xorl %r8d,%r12d +- shrdl $5,%r13d,%r13d +- xorl %r10d,%r14d +- andl %ecx,%r12d +- xorl %ecx,%r13d +- addl 40(%rsp),%r9d +- movl %r10d,%r15d +- xorl %r8d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r11d,%r15d +- addl %r12d,%r9d +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- xorl %r10d,%r14d +- addl %r13d,%r9d +- xorl %r11d,%edi +- shrdl $2,%r14d,%r14d +- addl %r9d,%ebx +- addl %edi,%r9d +- movl %ebx,%r13d +- addl %r9d,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%r9d +- movl %ecx,%r12d +- shrdl $9,%r14d,%r14d +- xorl %ebx,%r13d +- xorl %edx,%r12d +- shrdl $5,%r13d,%r13d +- xorl %r9d,%r14d +- andl %ebx,%r12d +- xorl %ebx,%r13d +- addl 44(%rsp),%r8d +- movl %r9d,%edi +- xorl %edx,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r10d,%edi +- addl %r12d,%r8d +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %r9d,%r14d +- addl %r13d,%r8d +- xorl %r10d,%r15d +- shrdl $2,%r14d,%r14d +- addl %r8d,%eax +- addl %r15d,%r8d +- movl %eax,%r13d +- addl %r8d,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%r8d +- movl %ebx,%r12d +- shrdl $9,%r14d,%r14d +- xorl %eax,%r13d +- xorl %ecx,%r12d +- shrdl $5,%r13d,%r13d +- xorl %r8d,%r14d +- andl %eax,%r12d +- xorl %eax,%r13d +- addl 48(%rsp),%edx +- movl %r8d,%r15d +- xorl %ecx,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r9d,%r15d +- addl %r12d,%edx +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- xorl %r8d,%r14d +- addl %r13d,%edx +- xorl %r9d,%edi +- shrdl $2,%r14d,%r14d +- addl %edx,%r11d +- addl %edi,%edx +- movl %r11d,%r13d +- addl %edx,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%edx +- movl %eax,%r12d +- shrdl $9,%r14d,%r14d +- xorl %r11d,%r13d +- xorl %ebx,%r12d +- shrdl $5,%r13d,%r13d +- xorl %edx,%r14d +- andl %r11d,%r12d +- xorl %r11d,%r13d +- addl 52(%rsp),%ecx +- movl %edx,%edi +- xorl %ebx,%r12d +- shrdl $11,%r14d,%r14d +- xorl %r8d,%edi +- addl %r12d,%ecx +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %edx,%r14d +- addl %r13d,%ecx +- xorl %r8d,%r15d +- shrdl $2,%r14d,%r14d +- addl %ecx,%r10d +- addl %r15d,%ecx +- movl %r10d,%r13d +- addl %ecx,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%ecx +- movl %r11d,%r12d +- shrdl $9,%r14d,%r14d +- xorl %r10d,%r13d +- xorl %eax,%r12d +- shrdl $5,%r13d,%r13d +- xorl %ecx,%r14d +- andl %r10d,%r12d +- xorl %r10d,%r13d +- addl 56(%rsp),%ebx +- movl %ecx,%r15d +- xorl %eax,%r12d +- shrdl $11,%r14d,%r14d +- xorl %edx,%r15d +- addl %r12d,%ebx +- shrdl $6,%r13d,%r13d +- andl %r15d,%edi +- xorl %ecx,%r14d +- addl %r13d,%ebx +- xorl %edx,%edi +- shrdl $2,%r14d,%r14d +- addl %ebx,%r9d +- addl %edi,%ebx +- movl %r9d,%r13d +- addl %ebx,%r14d +- shrdl $14,%r13d,%r13d +- movl %r14d,%ebx +- movl %r10d,%r12d +- shrdl $9,%r14d,%r14d +- xorl %r9d,%r13d +- xorl %r11d,%r12d +- shrdl $5,%r13d,%r13d +- xorl %ebx,%r14d +- andl %r9d,%r12d +- xorl %r9d,%r13d +- addl 60(%rsp),%eax +- movl %ebx,%edi +- xorl %r11d,%r12d +- shrdl $11,%r14d,%r14d +- xorl %ecx,%edi +- addl %r12d,%eax +- shrdl $6,%r13d,%r13d +- andl %edi,%r15d +- xorl %ebx,%r14d +- addl %r13d,%eax +- xorl %ecx,%r15d +- shrdl $2,%r14d,%r14d +- addl %eax,%r8d +- addl %r15d,%eax +- movl %r8d,%r13d +- addl %eax,%r14d +- movq 64+0(%rsp),%rdi +- movl %r14d,%eax +- +- addl 0(%rdi),%eax +- leaq 64(%rsi),%rsi +- addl 4(%rdi),%ebx +- addl 8(%rdi),%ecx +- addl 12(%rdi),%edx +- addl 16(%rdi),%r8d +- addl 20(%rdi),%r9d +- addl 24(%rdi),%r10d +- addl 28(%rdi),%r11d +- +- cmpq 64+16(%rsp),%rsi +- +- movl %eax,0(%rdi) +- movl %ebx,4(%rdi) +- movl %ecx,8(%rdi) +- movl %edx,12(%rdi) +- movl %r8d,16(%rdi) +- movl %r9d,20(%rdi) +- movl %r10d,24(%rdi) +- movl %r11d,28(%rdi) +- jb L$loop_avx +- +- movq 88(%rsp),%rsi +- +- vzeroupper +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$epilogue_avx: +- .byte 0xf3,0xc3 +- +- +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S b/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S +deleted file mode 100644 +index 5732f43..0000000 +--- a/mac-x86_64/crypto/fipsmodule/sha512-x86_64.S ++++ /dev/null +@@ -1,2990 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +-.globl _sha512_block_data_order +-.private_extern _sha512_block_data_order +- +-.p2align 4 +-_sha512_block_data_order: +- +- leaq _OPENSSL_ia32cap_P(%rip),%r11 +- movl 0(%r11),%r9d +- movl 4(%r11),%r10d +- movl 8(%r11),%r11d +- andl $1073741824,%r9d +- andl $268435968,%r10d +- orl %r9d,%r10d +- cmpl $1342177792,%r10d +- je L$avx_shortcut +- movq %rsp,%rax +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- shlq $4,%rdx +- subq $128+32,%rsp +- leaq (%rsi,%rdx,8),%rdx +- andq $-64,%rsp +- movq %rdi,128+0(%rsp) +- movq %rsi,128+8(%rsp) +- movq %rdx,128+16(%rsp) +- movq %rax,152(%rsp) +- +-L$prologue: +- +- movq 0(%rdi),%rax +- movq 8(%rdi),%rbx +- movq 16(%rdi),%rcx +- movq 24(%rdi),%rdx +- movq 32(%rdi),%r8 +- movq 40(%rdi),%r9 +- movq 48(%rdi),%r10 +- movq 56(%rdi),%r11 +- jmp L$loop +- +-.p2align 4 +-L$loop: +- movq %rbx,%rdi +- leaq K512(%rip),%rbp +- xorq %rcx,%rdi +- movq 0(%rsi),%r12 +- movq %r8,%r13 +- movq %rax,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %r9,%r15 +- +- xorq %r8,%r13 +- rorq $5,%r14 +- xorq %r10,%r15 +- +- movq %r12,0(%rsp) +- xorq %rax,%r14 +- andq %r8,%r15 +- +- rorq $4,%r13 +- addq %r11,%r12 +- xorq %r10,%r15 +- +- rorq $6,%r14 +- xorq %r8,%r13 +- addq %r15,%r12 +- +- movq %rax,%r15 +- addq (%rbp),%r12 +- xorq %rax,%r14 +- +- xorq %rbx,%r15 +- rorq $14,%r13 +- movq %rbx,%r11 +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%r11 +- addq %r12,%rdx +- addq %r12,%r11 +- +- leaq 8(%rbp),%rbp +- addq %r14,%r11 +- movq 8(%rsi),%r12 +- movq %rdx,%r13 +- movq %r11,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %r8,%rdi +- +- xorq %rdx,%r13 +- rorq $5,%r14 +- xorq %r9,%rdi +- +- movq %r12,8(%rsp) +- xorq %r11,%r14 +- andq %rdx,%rdi +- +- rorq $4,%r13 +- addq %r10,%r12 +- xorq %r9,%rdi +- +- rorq $6,%r14 +- xorq %rdx,%r13 +- addq %rdi,%r12 +- +- movq %r11,%rdi +- addq (%rbp),%r12 +- xorq %r11,%r14 +- +- xorq %rax,%rdi +- rorq $14,%r13 +- movq %rax,%r10 +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%r10 +- addq %r12,%rcx +- addq %r12,%r10 +- +- leaq 24(%rbp),%rbp +- addq %r14,%r10 +- movq 16(%rsi),%r12 +- movq %rcx,%r13 +- movq %r10,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %rdx,%r15 +- +- xorq %rcx,%r13 +- rorq $5,%r14 +- xorq %r8,%r15 +- +- movq %r12,16(%rsp) +- xorq %r10,%r14 +- andq %rcx,%r15 +- +- rorq $4,%r13 +- addq %r9,%r12 +- xorq %r8,%r15 +- +- rorq $6,%r14 +- xorq %rcx,%r13 +- addq %r15,%r12 +- +- movq %r10,%r15 +- addq (%rbp),%r12 +- xorq %r10,%r14 +- +- xorq %r11,%r15 +- rorq $14,%r13 +- movq %r11,%r9 +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%r9 +- addq %r12,%rbx +- addq %r12,%r9 +- +- leaq 8(%rbp),%rbp +- addq %r14,%r9 +- movq 24(%rsi),%r12 +- movq %rbx,%r13 +- movq %r9,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %rcx,%rdi +- +- xorq %rbx,%r13 +- rorq $5,%r14 +- xorq %rdx,%rdi +- +- movq %r12,24(%rsp) +- xorq %r9,%r14 +- andq %rbx,%rdi +- +- rorq $4,%r13 +- addq %r8,%r12 +- xorq %rdx,%rdi +- +- rorq $6,%r14 +- xorq %rbx,%r13 +- addq %rdi,%r12 +- +- movq %r9,%rdi +- addq (%rbp),%r12 +- xorq %r9,%r14 +- +- xorq %r10,%rdi +- rorq $14,%r13 +- movq %r10,%r8 +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%r8 +- addq %r12,%rax +- addq %r12,%r8 +- +- leaq 24(%rbp),%rbp +- addq %r14,%r8 +- movq 32(%rsi),%r12 +- movq %rax,%r13 +- movq %r8,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %rbx,%r15 +- +- xorq %rax,%r13 +- rorq $5,%r14 +- xorq %rcx,%r15 +- +- movq %r12,32(%rsp) +- xorq %r8,%r14 +- andq %rax,%r15 +- +- rorq $4,%r13 +- addq %rdx,%r12 +- xorq %rcx,%r15 +- +- rorq $6,%r14 +- xorq %rax,%r13 +- addq %r15,%r12 +- +- movq %r8,%r15 +- addq (%rbp),%r12 +- xorq %r8,%r14 +- +- xorq %r9,%r15 +- rorq $14,%r13 +- movq %r9,%rdx +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%rdx +- addq %r12,%r11 +- addq %r12,%rdx +- +- leaq 8(%rbp),%rbp +- addq %r14,%rdx +- movq 40(%rsi),%r12 +- movq %r11,%r13 +- movq %rdx,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %rax,%rdi +- +- xorq %r11,%r13 +- rorq $5,%r14 +- xorq %rbx,%rdi +- +- movq %r12,40(%rsp) +- xorq %rdx,%r14 +- andq %r11,%rdi +- +- rorq $4,%r13 +- addq %rcx,%r12 +- xorq %rbx,%rdi +- +- rorq $6,%r14 +- xorq %r11,%r13 +- addq %rdi,%r12 +- +- movq %rdx,%rdi +- addq (%rbp),%r12 +- xorq %rdx,%r14 +- +- xorq %r8,%rdi +- rorq $14,%r13 +- movq %r8,%rcx +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%rcx +- addq %r12,%r10 +- addq %r12,%rcx +- +- leaq 24(%rbp),%rbp +- addq %r14,%rcx +- movq 48(%rsi),%r12 +- movq %r10,%r13 +- movq %rcx,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %r11,%r15 +- +- xorq %r10,%r13 +- rorq $5,%r14 +- xorq %rax,%r15 +- +- movq %r12,48(%rsp) +- xorq %rcx,%r14 +- andq %r10,%r15 +- +- rorq $4,%r13 +- addq %rbx,%r12 +- xorq %rax,%r15 +- +- rorq $6,%r14 +- xorq %r10,%r13 +- addq %r15,%r12 +- +- movq %rcx,%r15 +- addq (%rbp),%r12 +- xorq %rcx,%r14 +- +- xorq %rdx,%r15 +- rorq $14,%r13 +- movq %rdx,%rbx +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%rbx +- addq %r12,%r9 +- addq %r12,%rbx +- +- leaq 8(%rbp),%rbp +- addq %r14,%rbx +- movq 56(%rsi),%r12 +- movq %r9,%r13 +- movq %rbx,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %r10,%rdi +- +- xorq %r9,%r13 +- rorq $5,%r14 +- xorq %r11,%rdi +- +- movq %r12,56(%rsp) +- xorq %rbx,%r14 +- andq %r9,%rdi +- +- rorq $4,%r13 +- addq %rax,%r12 +- xorq %r11,%rdi +- +- rorq $6,%r14 +- xorq %r9,%r13 +- addq %rdi,%r12 +- +- movq %rbx,%rdi +- addq (%rbp),%r12 +- xorq %rbx,%r14 +- +- xorq %rcx,%rdi +- rorq $14,%r13 +- movq %rcx,%rax +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%rax +- addq %r12,%r8 +- addq %r12,%rax +- +- leaq 24(%rbp),%rbp +- addq %r14,%rax +- movq 64(%rsi),%r12 +- movq %r8,%r13 +- movq %rax,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %r9,%r15 +- +- xorq %r8,%r13 +- rorq $5,%r14 +- xorq %r10,%r15 +- +- movq %r12,64(%rsp) +- xorq %rax,%r14 +- andq %r8,%r15 +- +- rorq $4,%r13 +- addq %r11,%r12 +- xorq %r10,%r15 +- +- rorq $6,%r14 +- xorq %r8,%r13 +- addq %r15,%r12 +- +- movq %rax,%r15 +- addq (%rbp),%r12 +- xorq %rax,%r14 +- +- xorq %rbx,%r15 +- rorq $14,%r13 +- movq %rbx,%r11 +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%r11 +- addq %r12,%rdx +- addq %r12,%r11 +- +- leaq 8(%rbp),%rbp +- addq %r14,%r11 +- movq 72(%rsi),%r12 +- movq %rdx,%r13 +- movq %r11,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %r8,%rdi +- +- xorq %rdx,%r13 +- rorq $5,%r14 +- xorq %r9,%rdi +- +- movq %r12,72(%rsp) +- xorq %r11,%r14 +- andq %rdx,%rdi +- +- rorq $4,%r13 +- addq %r10,%r12 +- xorq %r9,%rdi +- +- rorq $6,%r14 +- xorq %rdx,%r13 +- addq %rdi,%r12 +- +- movq %r11,%rdi +- addq (%rbp),%r12 +- xorq %r11,%r14 +- +- xorq %rax,%rdi +- rorq $14,%r13 +- movq %rax,%r10 +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%r10 +- addq %r12,%rcx +- addq %r12,%r10 +- +- leaq 24(%rbp),%rbp +- addq %r14,%r10 +- movq 80(%rsi),%r12 +- movq %rcx,%r13 +- movq %r10,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %rdx,%r15 +- +- xorq %rcx,%r13 +- rorq $5,%r14 +- xorq %r8,%r15 +- +- movq %r12,80(%rsp) +- xorq %r10,%r14 +- andq %rcx,%r15 +- +- rorq $4,%r13 +- addq %r9,%r12 +- xorq %r8,%r15 +- +- rorq $6,%r14 +- xorq %rcx,%r13 +- addq %r15,%r12 +- +- movq %r10,%r15 +- addq (%rbp),%r12 +- xorq %r10,%r14 +- +- xorq %r11,%r15 +- rorq $14,%r13 +- movq %r11,%r9 +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%r9 +- addq %r12,%rbx +- addq %r12,%r9 +- +- leaq 8(%rbp),%rbp +- addq %r14,%r9 +- movq 88(%rsi),%r12 +- movq %rbx,%r13 +- movq %r9,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %rcx,%rdi +- +- xorq %rbx,%r13 +- rorq $5,%r14 +- xorq %rdx,%rdi +- +- movq %r12,88(%rsp) +- xorq %r9,%r14 +- andq %rbx,%rdi +- +- rorq $4,%r13 +- addq %r8,%r12 +- xorq %rdx,%rdi +- +- rorq $6,%r14 +- xorq %rbx,%r13 +- addq %rdi,%r12 +- +- movq %r9,%rdi +- addq (%rbp),%r12 +- xorq %r9,%r14 +- +- xorq %r10,%rdi +- rorq $14,%r13 +- movq %r10,%r8 +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%r8 +- addq %r12,%rax +- addq %r12,%r8 +- +- leaq 24(%rbp),%rbp +- addq %r14,%r8 +- movq 96(%rsi),%r12 +- movq %rax,%r13 +- movq %r8,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %rbx,%r15 +- +- xorq %rax,%r13 +- rorq $5,%r14 +- xorq %rcx,%r15 +- +- movq %r12,96(%rsp) +- xorq %r8,%r14 +- andq %rax,%r15 +- +- rorq $4,%r13 +- addq %rdx,%r12 +- xorq %rcx,%r15 +- +- rorq $6,%r14 +- xorq %rax,%r13 +- addq %r15,%r12 +- +- movq %r8,%r15 +- addq (%rbp),%r12 +- xorq %r8,%r14 +- +- xorq %r9,%r15 +- rorq $14,%r13 +- movq %r9,%rdx +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%rdx +- addq %r12,%r11 +- addq %r12,%rdx +- +- leaq 8(%rbp),%rbp +- addq %r14,%rdx +- movq 104(%rsi),%r12 +- movq %r11,%r13 +- movq %rdx,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %rax,%rdi +- +- xorq %r11,%r13 +- rorq $5,%r14 +- xorq %rbx,%rdi +- +- movq %r12,104(%rsp) +- xorq %rdx,%r14 +- andq %r11,%rdi +- +- rorq $4,%r13 +- addq %rcx,%r12 +- xorq %rbx,%rdi +- +- rorq $6,%r14 +- xorq %r11,%r13 +- addq %rdi,%r12 +- +- movq %rdx,%rdi +- addq (%rbp),%r12 +- xorq %rdx,%r14 +- +- xorq %r8,%rdi +- rorq $14,%r13 +- movq %r8,%rcx +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%rcx +- addq %r12,%r10 +- addq %r12,%rcx +- +- leaq 24(%rbp),%rbp +- addq %r14,%rcx +- movq 112(%rsi),%r12 +- movq %r10,%r13 +- movq %rcx,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %r11,%r15 +- +- xorq %r10,%r13 +- rorq $5,%r14 +- xorq %rax,%r15 +- +- movq %r12,112(%rsp) +- xorq %rcx,%r14 +- andq %r10,%r15 +- +- rorq $4,%r13 +- addq %rbx,%r12 +- xorq %rax,%r15 +- +- rorq $6,%r14 +- xorq %r10,%r13 +- addq %r15,%r12 +- +- movq %rcx,%r15 +- addq (%rbp),%r12 +- xorq %rcx,%r14 +- +- xorq %rdx,%r15 +- rorq $14,%r13 +- movq %rdx,%rbx +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%rbx +- addq %r12,%r9 +- addq %r12,%rbx +- +- leaq 8(%rbp),%rbp +- addq %r14,%rbx +- movq 120(%rsi),%r12 +- movq %r9,%r13 +- movq %rbx,%r14 +- bswapq %r12 +- rorq $23,%r13 +- movq %r10,%rdi +- +- xorq %r9,%r13 +- rorq $5,%r14 +- xorq %r11,%rdi +- +- movq %r12,120(%rsp) +- xorq %rbx,%r14 +- andq %r9,%rdi +- +- rorq $4,%r13 +- addq %rax,%r12 +- xorq %r11,%rdi +- +- rorq $6,%r14 +- xorq %r9,%r13 +- addq %rdi,%r12 +- +- movq %rbx,%rdi +- addq (%rbp),%r12 +- xorq %rbx,%r14 +- +- xorq %rcx,%rdi +- rorq $14,%r13 +- movq %rcx,%rax +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%rax +- addq %r12,%r8 +- addq %r12,%rax +- +- leaq 24(%rbp),%rbp +- jmp L$rounds_16_xx +-.p2align 4 +-L$rounds_16_xx: +- movq 8(%rsp),%r13 +- movq 112(%rsp),%r15 +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%rax +- movq %r15,%r14 +- rorq $42,%r15 +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%r15 +- shrq $6,%r14 +- +- rorq $19,%r15 +- xorq %r13,%r12 +- xorq %r14,%r15 +- addq 72(%rsp),%r12 +- +- addq 0(%rsp),%r12 +- movq %r8,%r13 +- addq %r15,%r12 +- movq %rax,%r14 +- rorq $23,%r13 +- movq %r9,%r15 +- +- xorq %r8,%r13 +- rorq $5,%r14 +- xorq %r10,%r15 +- +- movq %r12,0(%rsp) +- xorq %rax,%r14 +- andq %r8,%r15 +- +- rorq $4,%r13 +- addq %r11,%r12 +- xorq %r10,%r15 +- +- rorq $6,%r14 +- xorq %r8,%r13 +- addq %r15,%r12 +- +- movq %rax,%r15 +- addq (%rbp),%r12 +- xorq %rax,%r14 +- +- xorq %rbx,%r15 +- rorq $14,%r13 +- movq %rbx,%r11 +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%r11 +- addq %r12,%rdx +- addq %r12,%r11 +- +- leaq 8(%rbp),%rbp +- movq 16(%rsp),%r13 +- movq 120(%rsp),%rdi +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%r11 +- movq %rdi,%r14 +- rorq $42,%rdi +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%rdi +- shrq $6,%r14 +- +- rorq $19,%rdi +- xorq %r13,%r12 +- xorq %r14,%rdi +- addq 80(%rsp),%r12 +- +- addq 8(%rsp),%r12 +- movq %rdx,%r13 +- addq %rdi,%r12 +- movq %r11,%r14 +- rorq $23,%r13 +- movq %r8,%rdi +- +- xorq %rdx,%r13 +- rorq $5,%r14 +- xorq %r9,%rdi +- +- movq %r12,8(%rsp) +- xorq %r11,%r14 +- andq %rdx,%rdi +- +- rorq $4,%r13 +- addq %r10,%r12 +- xorq %r9,%rdi +- +- rorq $6,%r14 +- xorq %rdx,%r13 +- addq %rdi,%r12 +- +- movq %r11,%rdi +- addq (%rbp),%r12 +- xorq %r11,%r14 +- +- xorq %rax,%rdi +- rorq $14,%r13 +- movq %rax,%r10 +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%r10 +- addq %r12,%rcx +- addq %r12,%r10 +- +- leaq 24(%rbp),%rbp +- movq 24(%rsp),%r13 +- movq 0(%rsp),%r15 +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%r10 +- movq %r15,%r14 +- rorq $42,%r15 +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%r15 +- shrq $6,%r14 +- +- rorq $19,%r15 +- xorq %r13,%r12 +- xorq %r14,%r15 +- addq 88(%rsp),%r12 +- +- addq 16(%rsp),%r12 +- movq %rcx,%r13 +- addq %r15,%r12 +- movq %r10,%r14 +- rorq $23,%r13 +- movq %rdx,%r15 +- +- xorq %rcx,%r13 +- rorq $5,%r14 +- xorq %r8,%r15 +- +- movq %r12,16(%rsp) +- xorq %r10,%r14 +- andq %rcx,%r15 +- +- rorq $4,%r13 +- addq %r9,%r12 +- xorq %r8,%r15 +- +- rorq $6,%r14 +- xorq %rcx,%r13 +- addq %r15,%r12 +- +- movq %r10,%r15 +- addq (%rbp),%r12 +- xorq %r10,%r14 +- +- xorq %r11,%r15 +- rorq $14,%r13 +- movq %r11,%r9 +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%r9 +- addq %r12,%rbx +- addq %r12,%r9 +- +- leaq 8(%rbp),%rbp +- movq 32(%rsp),%r13 +- movq 8(%rsp),%rdi +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%r9 +- movq %rdi,%r14 +- rorq $42,%rdi +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%rdi +- shrq $6,%r14 +- +- rorq $19,%rdi +- xorq %r13,%r12 +- xorq %r14,%rdi +- addq 96(%rsp),%r12 +- +- addq 24(%rsp),%r12 +- movq %rbx,%r13 +- addq %rdi,%r12 +- movq %r9,%r14 +- rorq $23,%r13 +- movq %rcx,%rdi +- +- xorq %rbx,%r13 +- rorq $5,%r14 +- xorq %rdx,%rdi +- +- movq %r12,24(%rsp) +- xorq %r9,%r14 +- andq %rbx,%rdi +- +- rorq $4,%r13 +- addq %r8,%r12 +- xorq %rdx,%rdi +- +- rorq $6,%r14 +- xorq %rbx,%r13 +- addq %rdi,%r12 +- +- movq %r9,%rdi +- addq (%rbp),%r12 +- xorq %r9,%r14 +- +- xorq %r10,%rdi +- rorq $14,%r13 +- movq %r10,%r8 +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%r8 +- addq %r12,%rax +- addq %r12,%r8 +- +- leaq 24(%rbp),%rbp +- movq 40(%rsp),%r13 +- movq 16(%rsp),%r15 +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%r8 +- movq %r15,%r14 +- rorq $42,%r15 +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%r15 +- shrq $6,%r14 +- +- rorq $19,%r15 +- xorq %r13,%r12 +- xorq %r14,%r15 +- addq 104(%rsp),%r12 +- +- addq 32(%rsp),%r12 +- movq %rax,%r13 +- addq %r15,%r12 +- movq %r8,%r14 +- rorq $23,%r13 +- movq %rbx,%r15 +- +- xorq %rax,%r13 +- rorq $5,%r14 +- xorq %rcx,%r15 +- +- movq %r12,32(%rsp) +- xorq %r8,%r14 +- andq %rax,%r15 +- +- rorq $4,%r13 +- addq %rdx,%r12 +- xorq %rcx,%r15 +- +- rorq $6,%r14 +- xorq %rax,%r13 +- addq %r15,%r12 +- +- movq %r8,%r15 +- addq (%rbp),%r12 +- xorq %r8,%r14 +- +- xorq %r9,%r15 +- rorq $14,%r13 +- movq %r9,%rdx +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%rdx +- addq %r12,%r11 +- addq %r12,%rdx +- +- leaq 8(%rbp),%rbp +- movq 48(%rsp),%r13 +- movq 24(%rsp),%rdi +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%rdx +- movq %rdi,%r14 +- rorq $42,%rdi +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%rdi +- shrq $6,%r14 +- +- rorq $19,%rdi +- xorq %r13,%r12 +- xorq %r14,%rdi +- addq 112(%rsp),%r12 +- +- addq 40(%rsp),%r12 +- movq %r11,%r13 +- addq %rdi,%r12 +- movq %rdx,%r14 +- rorq $23,%r13 +- movq %rax,%rdi +- +- xorq %r11,%r13 +- rorq $5,%r14 +- xorq %rbx,%rdi +- +- movq %r12,40(%rsp) +- xorq %rdx,%r14 +- andq %r11,%rdi +- +- rorq $4,%r13 +- addq %rcx,%r12 +- xorq %rbx,%rdi +- +- rorq $6,%r14 +- xorq %r11,%r13 +- addq %rdi,%r12 +- +- movq %rdx,%rdi +- addq (%rbp),%r12 +- xorq %rdx,%r14 +- +- xorq %r8,%rdi +- rorq $14,%r13 +- movq %r8,%rcx +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%rcx +- addq %r12,%r10 +- addq %r12,%rcx +- +- leaq 24(%rbp),%rbp +- movq 56(%rsp),%r13 +- movq 32(%rsp),%r15 +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%rcx +- movq %r15,%r14 +- rorq $42,%r15 +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%r15 +- shrq $6,%r14 +- +- rorq $19,%r15 +- xorq %r13,%r12 +- xorq %r14,%r15 +- addq 120(%rsp),%r12 +- +- addq 48(%rsp),%r12 +- movq %r10,%r13 +- addq %r15,%r12 +- movq %rcx,%r14 +- rorq $23,%r13 +- movq %r11,%r15 +- +- xorq %r10,%r13 +- rorq $5,%r14 +- xorq %rax,%r15 +- +- movq %r12,48(%rsp) +- xorq %rcx,%r14 +- andq %r10,%r15 +- +- rorq $4,%r13 +- addq %rbx,%r12 +- xorq %rax,%r15 +- +- rorq $6,%r14 +- xorq %r10,%r13 +- addq %r15,%r12 +- +- movq %rcx,%r15 +- addq (%rbp),%r12 +- xorq %rcx,%r14 +- +- xorq %rdx,%r15 +- rorq $14,%r13 +- movq %rdx,%rbx +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%rbx +- addq %r12,%r9 +- addq %r12,%rbx +- +- leaq 8(%rbp),%rbp +- movq 64(%rsp),%r13 +- movq 40(%rsp),%rdi +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%rbx +- movq %rdi,%r14 +- rorq $42,%rdi +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%rdi +- shrq $6,%r14 +- +- rorq $19,%rdi +- xorq %r13,%r12 +- xorq %r14,%rdi +- addq 0(%rsp),%r12 +- +- addq 56(%rsp),%r12 +- movq %r9,%r13 +- addq %rdi,%r12 +- movq %rbx,%r14 +- rorq $23,%r13 +- movq %r10,%rdi +- +- xorq %r9,%r13 +- rorq $5,%r14 +- xorq %r11,%rdi +- +- movq %r12,56(%rsp) +- xorq %rbx,%r14 +- andq %r9,%rdi +- +- rorq $4,%r13 +- addq %rax,%r12 +- xorq %r11,%rdi +- +- rorq $6,%r14 +- xorq %r9,%r13 +- addq %rdi,%r12 +- +- movq %rbx,%rdi +- addq (%rbp),%r12 +- xorq %rbx,%r14 +- +- xorq %rcx,%rdi +- rorq $14,%r13 +- movq %rcx,%rax +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%rax +- addq %r12,%r8 +- addq %r12,%rax +- +- leaq 24(%rbp),%rbp +- movq 72(%rsp),%r13 +- movq 48(%rsp),%r15 +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%rax +- movq %r15,%r14 +- rorq $42,%r15 +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%r15 +- shrq $6,%r14 +- +- rorq $19,%r15 +- xorq %r13,%r12 +- xorq %r14,%r15 +- addq 8(%rsp),%r12 +- +- addq 64(%rsp),%r12 +- movq %r8,%r13 +- addq %r15,%r12 +- movq %rax,%r14 +- rorq $23,%r13 +- movq %r9,%r15 +- +- xorq %r8,%r13 +- rorq $5,%r14 +- xorq %r10,%r15 +- +- movq %r12,64(%rsp) +- xorq %rax,%r14 +- andq %r8,%r15 +- +- rorq $4,%r13 +- addq %r11,%r12 +- xorq %r10,%r15 +- +- rorq $6,%r14 +- xorq %r8,%r13 +- addq %r15,%r12 +- +- movq %rax,%r15 +- addq (%rbp),%r12 +- xorq %rax,%r14 +- +- xorq %rbx,%r15 +- rorq $14,%r13 +- movq %rbx,%r11 +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%r11 +- addq %r12,%rdx +- addq %r12,%r11 +- +- leaq 8(%rbp),%rbp +- movq 80(%rsp),%r13 +- movq 56(%rsp),%rdi +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%r11 +- movq %rdi,%r14 +- rorq $42,%rdi +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%rdi +- shrq $6,%r14 +- +- rorq $19,%rdi +- xorq %r13,%r12 +- xorq %r14,%rdi +- addq 16(%rsp),%r12 +- +- addq 72(%rsp),%r12 +- movq %rdx,%r13 +- addq %rdi,%r12 +- movq %r11,%r14 +- rorq $23,%r13 +- movq %r8,%rdi +- +- xorq %rdx,%r13 +- rorq $5,%r14 +- xorq %r9,%rdi +- +- movq %r12,72(%rsp) +- xorq %r11,%r14 +- andq %rdx,%rdi +- +- rorq $4,%r13 +- addq %r10,%r12 +- xorq %r9,%rdi +- +- rorq $6,%r14 +- xorq %rdx,%r13 +- addq %rdi,%r12 +- +- movq %r11,%rdi +- addq (%rbp),%r12 +- xorq %r11,%r14 +- +- xorq %rax,%rdi +- rorq $14,%r13 +- movq %rax,%r10 +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%r10 +- addq %r12,%rcx +- addq %r12,%r10 +- +- leaq 24(%rbp),%rbp +- movq 88(%rsp),%r13 +- movq 64(%rsp),%r15 +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%r10 +- movq %r15,%r14 +- rorq $42,%r15 +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%r15 +- shrq $6,%r14 +- +- rorq $19,%r15 +- xorq %r13,%r12 +- xorq %r14,%r15 +- addq 24(%rsp),%r12 +- +- addq 80(%rsp),%r12 +- movq %rcx,%r13 +- addq %r15,%r12 +- movq %r10,%r14 +- rorq $23,%r13 +- movq %rdx,%r15 +- +- xorq %rcx,%r13 +- rorq $5,%r14 +- xorq %r8,%r15 +- +- movq %r12,80(%rsp) +- xorq %r10,%r14 +- andq %rcx,%r15 +- +- rorq $4,%r13 +- addq %r9,%r12 +- xorq %r8,%r15 +- +- rorq $6,%r14 +- xorq %rcx,%r13 +- addq %r15,%r12 +- +- movq %r10,%r15 +- addq (%rbp),%r12 +- xorq %r10,%r14 +- +- xorq %r11,%r15 +- rorq $14,%r13 +- movq %r11,%r9 +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%r9 +- addq %r12,%rbx +- addq %r12,%r9 +- +- leaq 8(%rbp),%rbp +- movq 96(%rsp),%r13 +- movq 72(%rsp),%rdi +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%r9 +- movq %rdi,%r14 +- rorq $42,%rdi +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%rdi +- shrq $6,%r14 +- +- rorq $19,%rdi +- xorq %r13,%r12 +- xorq %r14,%rdi +- addq 32(%rsp),%r12 +- +- addq 88(%rsp),%r12 +- movq %rbx,%r13 +- addq %rdi,%r12 +- movq %r9,%r14 +- rorq $23,%r13 +- movq %rcx,%rdi +- +- xorq %rbx,%r13 +- rorq $5,%r14 +- xorq %rdx,%rdi +- +- movq %r12,88(%rsp) +- xorq %r9,%r14 +- andq %rbx,%rdi +- +- rorq $4,%r13 +- addq %r8,%r12 +- xorq %rdx,%rdi +- +- rorq $6,%r14 +- xorq %rbx,%r13 +- addq %rdi,%r12 +- +- movq %r9,%rdi +- addq (%rbp),%r12 +- xorq %r9,%r14 +- +- xorq %r10,%rdi +- rorq $14,%r13 +- movq %r10,%r8 +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%r8 +- addq %r12,%rax +- addq %r12,%r8 +- +- leaq 24(%rbp),%rbp +- movq 104(%rsp),%r13 +- movq 80(%rsp),%r15 +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%r8 +- movq %r15,%r14 +- rorq $42,%r15 +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%r15 +- shrq $6,%r14 +- +- rorq $19,%r15 +- xorq %r13,%r12 +- xorq %r14,%r15 +- addq 40(%rsp),%r12 +- +- addq 96(%rsp),%r12 +- movq %rax,%r13 +- addq %r15,%r12 +- movq %r8,%r14 +- rorq $23,%r13 +- movq %rbx,%r15 +- +- xorq %rax,%r13 +- rorq $5,%r14 +- xorq %rcx,%r15 +- +- movq %r12,96(%rsp) +- xorq %r8,%r14 +- andq %rax,%r15 +- +- rorq $4,%r13 +- addq %rdx,%r12 +- xorq %rcx,%r15 +- +- rorq $6,%r14 +- xorq %rax,%r13 +- addq %r15,%r12 +- +- movq %r8,%r15 +- addq (%rbp),%r12 +- xorq %r8,%r14 +- +- xorq %r9,%r15 +- rorq $14,%r13 +- movq %r9,%rdx +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%rdx +- addq %r12,%r11 +- addq %r12,%rdx +- +- leaq 8(%rbp),%rbp +- movq 112(%rsp),%r13 +- movq 88(%rsp),%rdi +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%rdx +- movq %rdi,%r14 +- rorq $42,%rdi +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%rdi +- shrq $6,%r14 +- +- rorq $19,%rdi +- xorq %r13,%r12 +- xorq %r14,%rdi +- addq 48(%rsp),%r12 +- +- addq 104(%rsp),%r12 +- movq %r11,%r13 +- addq %rdi,%r12 +- movq %rdx,%r14 +- rorq $23,%r13 +- movq %rax,%rdi +- +- xorq %r11,%r13 +- rorq $5,%r14 +- xorq %rbx,%rdi +- +- movq %r12,104(%rsp) +- xorq %rdx,%r14 +- andq %r11,%rdi +- +- rorq $4,%r13 +- addq %rcx,%r12 +- xorq %rbx,%rdi +- +- rorq $6,%r14 +- xorq %r11,%r13 +- addq %rdi,%r12 +- +- movq %rdx,%rdi +- addq (%rbp),%r12 +- xorq %rdx,%r14 +- +- xorq %r8,%rdi +- rorq $14,%r13 +- movq %r8,%rcx +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%rcx +- addq %r12,%r10 +- addq %r12,%rcx +- +- leaq 24(%rbp),%rbp +- movq 120(%rsp),%r13 +- movq 96(%rsp),%r15 +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%rcx +- movq %r15,%r14 +- rorq $42,%r15 +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%r15 +- shrq $6,%r14 +- +- rorq $19,%r15 +- xorq %r13,%r12 +- xorq %r14,%r15 +- addq 56(%rsp),%r12 +- +- addq 112(%rsp),%r12 +- movq %r10,%r13 +- addq %r15,%r12 +- movq %rcx,%r14 +- rorq $23,%r13 +- movq %r11,%r15 +- +- xorq %r10,%r13 +- rorq $5,%r14 +- xorq %rax,%r15 +- +- movq %r12,112(%rsp) +- xorq %rcx,%r14 +- andq %r10,%r15 +- +- rorq $4,%r13 +- addq %rbx,%r12 +- xorq %rax,%r15 +- +- rorq $6,%r14 +- xorq %r10,%r13 +- addq %r15,%r12 +- +- movq %rcx,%r15 +- addq (%rbp),%r12 +- xorq %rcx,%r14 +- +- xorq %rdx,%r15 +- rorq $14,%r13 +- movq %rdx,%rbx +- +- andq %r15,%rdi +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %rdi,%rbx +- addq %r12,%r9 +- addq %r12,%rbx +- +- leaq 8(%rbp),%rbp +- movq 0(%rsp),%r13 +- movq 104(%rsp),%rdi +- +- movq %r13,%r12 +- rorq $7,%r13 +- addq %r14,%rbx +- movq %rdi,%r14 +- rorq $42,%rdi +- +- xorq %r12,%r13 +- shrq $7,%r12 +- rorq $1,%r13 +- xorq %r14,%rdi +- shrq $6,%r14 +- +- rorq $19,%rdi +- xorq %r13,%r12 +- xorq %r14,%rdi +- addq 64(%rsp),%r12 +- +- addq 120(%rsp),%r12 +- movq %r9,%r13 +- addq %rdi,%r12 +- movq %rbx,%r14 +- rorq $23,%r13 +- movq %r10,%rdi +- +- xorq %r9,%r13 +- rorq $5,%r14 +- xorq %r11,%rdi +- +- movq %r12,120(%rsp) +- xorq %rbx,%r14 +- andq %r9,%rdi +- +- rorq $4,%r13 +- addq %rax,%r12 +- xorq %r11,%rdi +- +- rorq $6,%r14 +- xorq %r9,%r13 +- addq %rdi,%r12 +- +- movq %rbx,%rdi +- addq (%rbp),%r12 +- xorq %rbx,%r14 +- +- xorq %rcx,%rdi +- rorq $14,%r13 +- movq %rcx,%rax +- +- andq %rdi,%r15 +- rorq $28,%r14 +- addq %r13,%r12 +- +- xorq %r15,%rax +- addq %r12,%r8 +- addq %r12,%rax +- +- leaq 24(%rbp),%rbp +- cmpb $0,7(%rbp) +- jnz L$rounds_16_xx +- +- movq 128+0(%rsp),%rdi +- addq %r14,%rax +- leaq 128(%rsi),%rsi +- +- addq 0(%rdi),%rax +- addq 8(%rdi),%rbx +- addq 16(%rdi),%rcx +- addq 24(%rdi),%rdx +- addq 32(%rdi),%r8 +- addq 40(%rdi),%r9 +- addq 48(%rdi),%r10 +- addq 56(%rdi),%r11 +- +- cmpq 128+16(%rsp),%rsi +- +- movq %rax,0(%rdi) +- movq %rbx,8(%rdi) +- movq %rcx,16(%rdi) +- movq %rdx,24(%rdi) +- movq %r8,32(%rdi) +- movq %r9,40(%rdi) +- movq %r10,48(%rdi) +- movq %r11,56(%rdi) +- jb L$loop +- +- movq 152(%rsp),%rsi +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$epilogue: +- .byte 0xf3,0xc3 +- +- +-.p2align 6 +- +-K512: +-.quad 0x428a2f98d728ae22,0x7137449123ef65cd +-.quad 0x428a2f98d728ae22,0x7137449123ef65cd +-.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +-.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +-.quad 0x3956c25bf348b538,0x59f111f1b605d019 +-.quad 0x3956c25bf348b538,0x59f111f1b605d019 +-.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +-.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +-.quad 0xd807aa98a3030242,0x12835b0145706fbe +-.quad 0xd807aa98a3030242,0x12835b0145706fbe +-.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +-.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +-.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +-.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +-.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +-.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +-.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +-.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +-.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +-.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +-.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +-.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +-.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +-.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +-.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +-.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +-.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +-.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +-.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +-.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +-.quad 0x06ca6351e003826f,0x142929670a0e6e70 +-.quad 0x06ca6351e003826f,0x142929670a0e6e70 +-.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +-.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +-.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +-.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +-.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +-.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +-.quad 0x81c2c92e47edaee6,0x92722c851482353b +-.quad 0x81c2c92e47edaee6,0x92722c851482353b +-.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +-.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +-.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +-.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +-.quad 0xd192e819d6ef5218,0xd69906245565a910 +-.quad 0xd192e819d6ef5218,0xd69906245565a910 +-.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +-.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +-.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +-.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +-.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +-.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +-.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +-.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +-.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +-.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +-.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +-.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +-.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +-.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +-.quad 0x90befffa23631e28,0xa4506cebde82bde9 +-.quad 0x90befffa23631e28,0xa4506cebde82bde9 +-.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +-.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +-.quad 0xca273eceea26619c,0xd186b8c721c0c207 +-.quad 0xca273eceea26619c,0xd186b8c721c0c207 +-.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +-.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +-.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +-.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +-.quad 0x113f9804bef90dae,0x1b710b35131c471b +-.quad 0x113f9804bef90dae,0x1b710b35131c471b +-.quad 0x28db77f523047d84,0x32caab7b40c72493 +-.quad 0x28db77f523047d84,0x32caab7b40c72493 +-.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +-.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +-.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +-.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +-.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +-.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +- +-.quad 0x0001020304050607,0x08090a0b0c0d0e0f +-.quad 0x0001020304050607,0x08090a0b0c0d0e0f +-.byte 83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +- +-.p2align 6 +-sha512_block_data_order_avx: +- +-L$avx_shortcut: +- movq %rsp,%rax +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- shlq $4,%rdx +- subq $160,%rsp +- leaq (%rsi,%rdx,8),%rdx +- andq $-64,%rsp +- movq %rdi,128+0(%rsp) +- movq %rsi,128+8(%rsp) +- movq %rdx,128+16(%rsp) +- movq %rax,152(%rsp) +- +-L$prologue_avx: +- +- vzeroupper +- movq 0(%rdi),%rax +- movq 8(%rdi),%rbx +- movq 16(%rdi),%rcx +- movq 24(%rdi),%rdx +- movq 32(%rdi),%r8 +- movq 40(%rdi),%r9 +- movq 48(%rdi),%r10 +- movq 56(%rdi),%r11 +- jmp L$loop_avx +-.p2align 4 +-L$loop_avx: +- vmovdqa K512+1280(%rip),%xmm11 +- vmovdqu 0(%rsi),%xmm0 +- leaq K512+128(%rip),%rbp +- vmovdqu 16(%rsi),%xmm1 +- vmovdqu 32(%rsi),%xmm2 +- vpshufb %xmm11,%xmm0,%xmm0 +- vmovdqu 48(%rsi),%xmm3 +- vpshufb %xmm11,%xmm1,%xmm1 +- vmovdqu 64(%rsi),%xmm4 +- vpshufb %xmm11,%xmm2,%xmm2 +- vmovdqu 80(%rsi),%xmm5 +- vpshufb %xmm11,%xmm3,%xmm3 +- vmovdqu 96(%rsi),%xmm6 +- vpshufb %xmm11,%xmm4,%xmm4 +- vmovdqu 112(%rsi),%xmm7 +- vpshufb %xmm11,%xmm5,%xmm5 +- vpaddq -128(%rbp),%xmm0,%xmm8 +- vpshufb %xmm11,%xmm6,%xmm6 +- vpaddq -96(%rbp),%xmm1,%xmm9 +- vpshufb %xmm11,%xmm7,%xmm7 +- vpaddq -64(%rbp),%xmm2,%xmm10 +- vpaddq -32(%rbp),%xmm3,%xmm11 +- vmovdqa %xmm8,0(%rsp) +- vpaddq 0(%rbp),%xmm4,%xmm8 +- vmovdqa %xmm9,16(%rsp) +- vpaddq 32(%rbp),%xmm5,%xmm9 +- vmovdqa %xmm10,32(%rsp) +- vpaddq 64(%rbp),%xmm6,%xmm10 +- vmovdqa %xmm11,48(%rsp) +- vpaddq 96(%rbp),%xmm7,%xmm11 +- vmovdqa %xmm8,64(%rsp) +- movq %rax,%r14 +- vmovdqa %xmm9,80(%rsp) +- movq %rbx,%rdi +- vmovdqa %xmm10,96(%rsp) +- xorq %rcx,%rdi +- vmovdqa %xmm11,112(%rsp) +- movq %r8,%r13 +- jmp L$avx_00_47 +- +-.p2align 4 +-L$avx_00_47: +- addq $256,%rbp +- vpalignr $8,%xmm0,%xmm1,%xmm8 +- shrdq $23,%r13,%r13 +- movq %r14,%rax +- vpalignr $8,%xmm4,%xmm5,%xmm11 +- movq %r9,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $1,%xmm8,%xmm10 +- xorq %r8,%r13 +- xorq %r10,%r12 +- vpaddq %xmm11,%xmm0,%xmm0 +- shrdq $4,%r13,%r13 +- xorq %rax,%r14 +- vpsrlq $7,%xmm8,%xmm11 +- andq %r8,%r12 +- xorq %r8,%r13 +- vpsllq $56,%xmm8,%xmm9 +- addq 0(%rsp),%r11 +- movq %rax,%r15 +- vpxor %xmm10,%xmm11,%xmm8 +- xorq %r10,%r12 +- shrdq $6,%r14,%r14 +- vpsrlq $7,%xmm10,%xmm10 +- xorq %rbx,%r15 +- addq %r12,%r11 +- vpxor %xmm9,%xmm8,%xmm8 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- vpsllq $7,%xmm9,%xmm9 +- xorq %rax,%r14 +- addq %r13,%r11 +- vpxor %xmm10,%xmm8,%xmm8 +- xorq %rbx,%rdi +- shrdq $28,%r14,%r14 +- vpsrlq $6,%xmm7,%xmm11 +- addq %r11,%rdx +- addq %rdi,%r11 +- vpxor %xmm9,%xmm8,%xmm8 +- movq %rdx,%r13 +- addq %r11,%r14 +- vpsllq $3,%xmm7,%xmm10 +- shrdq $23,%r13,%r13 +- movq %r14,%r11 +- vpaddq %xmm8,%xmm0,%xmm0 +- movq %r8,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $19,%xmm7,%xmm9 +- xorq %rdx,%r13 +- xorq %r9,%r12 +- vpxor %xmm10,%xmm11,%xmm11 +- shrdq $4,%r13,%r13 +- xorq %r11,%r14 +- vpsllq $42,%xmm10,%xmm10 +- andq %rdx,%r12 +- xorq %rdx,%r13 +- vpxor %xmm9,%xmm11,%xmm11 +- addq 8(%rsp),%r10 +- movq %r11,%rdi +- vpsrlq $42,%xmm9,%xmm9 +- xorq %r9,%r12 +- shrdq $6,%r14,%r14 +- vpxor %xmm10,%xmm11,%xmm11 +- xorq %rax,%rdi +- addq %r12,%r10 +- vpxor %xmm9,%xmm11,%xmm11 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- vpaddq %xmm11,%xmm0,%xmm0 +- xorq %r11,%r14 +- addq %r13,%r10 +- vpaddq -128(%rbp),%xmm0,%xmm10 +- xorq %rax,%r15 +- shrdq $28,%r14,%r14 +- addq %r10,%rcx +- addq %r15,%r10 +- movq %rcx,%r13 +- addq %r10,%r14 +- vmovdqa %xmm10,0(%rsp) +- vpalignr $8,%xmm1,%xmm2,%xmm8 +- shrdq $23,%r13,%r13 +- movq %r14,%r10 +- vpalignr $8,%xmm5,%xmm6,%xmm11 +- movq %rdx,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $1,%xmm8,%xmm10 +- xorq %rcx,%r13 +- xorq %r8,%r12 +- vpaddq %xmm11,%xmm1,%xmm1 +- shrdq $4,%r13,%r13 +- xorq %r10,%r14 +- vpsrlq $7,%xmm8,%xmm11 +- andq %rcx,%r12 +- xorq %rcx,%r13 +- vpsllq $56,%xmm8,%xmm9 +- addq 16(%rsp),%r9 +- movq %r10,%r15 +- vpxor %xmm10,%xmm11,%xmm8 +- xorq %r8,%r12 +- shrdq $6,%r14,%r14 +- vpsrlq $7,%xmm10,%xmm10 +- xorq %r11,%r15 +- addq %r12,%r9 +- vpxor %xmm9,%xmm8,%xmm8 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- vpsllq $7,%xmm9,%xmm9 +- xorq %r10,%r14 +- addq %r13,%r9 +- vpxor %xmm10,%xmm8,%xmm8 +- xorq %r11,%rdi +- shrdq $28,%r14,%r14 +- vpsrlq $6,%xmm0,%xmm11 +- addq %r9,%rbx +- addq %rdi,%r9 +- vpxor %xmm9,%xmm8,%xmm8 +- movq %rbx,%r13 +- addq %r9,%r14 +- vpsllq $3,%xmm0,%xmm10 +- shrdq $23,%r13,%r13 +- movq %r14,%r9 +- vpaddq %xmm8,%xmm1,%xmm1 +- movq %rcx,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $19,%xmm0,%xmm9 +- xorq %rbx,%r13 +- xorq %rdx,%r12 +- vpxor %xmm10,%xmm11,%xmm11 +- shrdq $4,%r13,%r13 +- xorq %r9,%r14 +- vpsllq $42,%xmm10,%xmm10 +- andq %rbx,%r12 +- xorq %rbx,%r13 +- vpxor %xmm9,%xmm11,%xmm11 +- addq 24(%rsp),%r8 +- movq %r9,%rdi +- vpsrlq $42,%xmm9,%xmm9 +- xorq %rdx,%r12 +- shrdq $6,%r14,%r14 +- vpxor %xmm10,%xmm11,%xmm11 +- xorq %r10,%rdi +- addq %r12,%r8 +- vpxor %xmm9,%xmm11,%xmm11 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- vpaddq %xmm11,%xmm1,%xmm1 +- xorq %r9,%r14 +- addq %r13,%r8 +- vpaddq -96(%rbp),%xmm1,%xmm10 +- xorq %r10,%r15 +- shrdq $28,%r14,%r14 +- addq %r8,%rax +- addq %r15,%r8 +- movq %rax,%r13 +- addq %r8,%r14 +- vmovdqa %xmm10,16(%rsp) +- vpalignr $8,%xmm2,%xmm3,%xmm8 +- shrdq $23,%r13,%r13 +- movq %r14,%r8 +- vpalignr $8,%xmm6,%xmm7,%xmm11 +- movq %rbx,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $1,%xmm8,%xmm10 +- xorq %rax,%r13 +- xorq %rcx,%r12 +- vpaddq %xmm11,%xmm2,%xmm2 +- shrdq $4,%r13,%r13 +- xorq %r8,%r14 +- vpsrlq $7,%xmm8,%xmm11 +- andq %rax,%r12 +- xorq %rax,%r13 +- vpsllq $56,%xmm8,%xmm9 +- addq 32(%rsp),%rdx +- movq %r8,%r15 +- vpxor %xmm10,%xmm11,%xmm8 +- xorq %rcx,%r12 +- shrdq $6,%r14,%r14 +- vpsrlq $7,%xmm10,%xmm10 +- xorq %r9,%r15 +- addq %r12,%rdx +- vpxor %xmm9,%xmm8,%xmm8 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- vpsllq $7,%xmm9,%xmm9 +- xorq %r8,%r14 +- addq %r13,%rdx +- vpxor %xmm10,%xmm8,%xmm8 +- xorq %r9,%rdi +- shrdq $28,%r14,%r14 +- vpsrlq $6,%xmm1,%xmm11 +- addq %rdx,%r11 +- addq %rdi,%rdx +- vpxor %xmm9,%xmm8,%xmm8 +- movq %r11,%r13 +- addq %rdx,%r14 +- vpsllq $3,%xmm1,%xmm10 +- shrdq $23,%r13,%r13 +- movq %r14,%rdx +- vpaddq %xmm8,%xmm2,%xmm2 +- movq %rax,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $19,%xmm1,%xmm9 +- xorq %r11,%r13 +- xorq %rbx,%r12 +- vpxor %xmm10,%xmm11,%xmm11 +- shrdq $4,%r13,%r13 +- xorq %rdx,%r14 +- vpsllq $42,%xmm10,%xmm10 +- andq %r11,%r12 +- xorq %r11,%r13 +- vpxor %xmm9,%xmm11,%xmm11 +- addq 40(%rsp),%rcx +- movq %rdx,%rdi +- vpsrlq $42,%xmm9,%xmm9 +- xorq %rbx,%r12 +- shrdq $6,%r14,%r14 +- vpxor %xmm10,%xmm11,%xmm11 +- xorq %r8,%rdi +- addq %r12,%rcx +- vpxor %xmm9,%xmm11,%xmm11 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- vpaddq %xmm11,%xmm2,%xmm2 +- xorq %rdx,%r14 +- addq %r13,%rcx +- vpaddq -64(%rbp),%xmm2,%xmm10 +- xorq %r8,%r15 +- shrdq $28,%r14,%r14 +- addq %rcx,%r10 +- addq %r15,%rcx +- movq %r10,%r13 +- addq %rcx,%r14 +- vmovdqa %xmm10,32(%rsp) +- vpalignr $8,%xmm3,%xmm4,%xmm8 +- shrdq $23,%r13,%r13 +- movq %r14,%rcx +- vpalignr $8,%xmm7,%xmm0,%xmm11 +- movq %r11,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $1,%xmm8,%xmm10 +- xorq %r10,%r13 +- xorq %rax,%r12 +- vpaddq %xmm11,%xmm3,%xmm3 +- shrdq $4,%r13,%r13 +- xorq %rcx,%r14 +- vpsrlq $7,%xmm8,%xmm11 +- andq %r10,%r12 +- xorq %r10,%r13 +- vpsllq $56,%xmm8,%xmm9 +- addq 48(%rsp),%rbx +- movq %rcx,%r15 +- vpxor %xmm10,%xmm11,%xmm8 +- xorq %rax,%r12 +- shrdq $6,%r14,%r14 +- vpsrlq $7,%xmm10,%xmm10 +- xorq %rdx,%r15 +- addq %r12,%rbx +- vpxor %xmm9,%xmm8,%xmm8 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- vpsllq $7,%xmm9,%xmm9 +- xorq %rcx,%r14 +- addq %r13,%rbx +- vpxor %xmm10,%xmm8,%xmm8 +- xorq %rdx,%rdi +- shrdq $28,%r14,%r14 +- vpsrlq $6,%xmm2,%xmm11 +- addq %rbx,%r9 +- addq %rdi,%rbx +- vpxor %xmm9,%xmm8,%xmm8 +- movq %r9,%r13 +- addq %rbx,%r14 +- vpsllq $3,%xmm2,%xmm10 +- shrdq $23,%r13,%r13 +- movq %r14,%rbx +- vpaddq %xmm8,%xmm3,%xmm3 +- movq %r10,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $19,%xmm2,%xmm9 +- xorq %r9,%r13 +- xorq %r11,%r12 +- vpxor %xmm10,%xmm11,%xmm11 +- shrdq $4,%r13,%r13 +- xorq %rbx,%r14 +- vpsllq $42,%xmm10,%xmm10 +- andq %r9,%r12 +- xorq %r9,%r13 +- vpxor %xmm9,%xmm11,%xmm11 +- addq 56(%rsp),%rax +- movq %rbx,%rdi +- vpsrlq $42,%xmm9,%xmm9 +- xorq %r11,%r12 +- shrdq $6,%r14,%r14 +- vpxor %xmm10,%xmm11,%xmm11 +- xorq %rcx,%rdi +- addq %r12,%rax +- vpxor %xmm9,%xmm11,%xmm11 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- vpaddq %xmm11,%xmm3,%xmm3 +- xorq %rbx,%r14 +- addq %r13,%rax +- vpaddq -32(%rbp),%xmm3,%xmm10 +- xorq %rcx,%r15 +- shrdq $28,%r14,%r14 +- addq %rax,%r8 +- addq %r15,%rax +- movq %r8,%r13 +- addq %rax,%r14 +- vmovdqa %xmm10,48(%rsp) +- vpalignr $8,%xmm4,%xmm5,%xmm8 +- shrdq $23,%r13,%r13 +- movq %r14,%rax +- vpalignr $8,%xmm0,%xmm1,%xmm11 +- movq %r9,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $1,%xmm8,%xmm10 +- xorq %r8,%r13 +- xorq %r10,%r12 +- vpaddq %xmm11,%xmm4,%xmm4 +- shrdq $4,%r13,%r13 +- xorq %rax,%r14 +- vpsrlq $7,%xmm8,%xmm11 +- andq %r8,%r12 +- xorq %r8,%r13 +- vpsllq $56,%xmm8,%xmm9 +- addq 64(%rsp),%r11 +- movq %rax,%r15 +- vpxor %xmm10,%xmm11,%xmm8 +- xorq %r10,%r12 +- shrdq $6,%r14,%r14 +- vpsrlq $7,%xmm10,%xmm10 +- xorq %rbx,%r15 +- addq %r12,%r11 +- vpxor %xmm9,%xmm8,%xmm8 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- vpsllq $7,%xmm9,%xmm9 +- xorq %rax,%r14 +- addq %r13,%r11 +- vpxor %xmm10,%xmm8,%xmm8 +- xorq %rbx,%rdi +- shrdq $28,%r14,%r14 +- vpsrlq $6,%xmm3,%xmm11 +- addq %r11,%rdx +- addq %rdi,%r11 +- vpxor %xmm9,%xmm8,%xmm8 +- movq %rdx,%r13 +- addq %r11,%r14 +- vpsllq $3,%xmm3,%xmm10 +- shrdq $23,%r13,%r13 +- movq %r14,%r11 +- vpaddq %xmm8,%xmm4,%xmm4 +- movq %r8,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $19,%xmm3,%xmm9 +- xorq %rdx,%r13 +- xorq %r9,%r12 +- vpxor %xmm10,%xmm11,%xmm11 +- shrdq $4,%r13,%r13 +- xorq %r11,%r14 +- vpsllq $42,%xmm10,%xmm10 +- andq %rdx,%r12 +- xorq %rdx,%r13 +- vpxor %xmm9,%xmm11,%xmm11 +- addq 72(%rsp),%r10 +- movq %r11,%rdi +- vpsrlq $42,%xmm9,%xmm9 +- xorq %r9,%r12 +- shrdq $6,%r14,%r14 +- vpxor %xmm10,%xmm11,%xmm11 +- xorq %rax,%rdi +- addq %r12,%r10 +- vpxor %xmm9,%xmm11,%xmm11 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- vpaddq %xmm11,%xmm4,%xmm4 +- xorq %r11,%r14 +- addq %r13,%r10 +- vpaddq 0(%rbp),%xmm4,%xmm10 +- xorq %rax,%r15 +- shrdq $28,%r14,%r14 +- addq %r10,%rcx +- addq %r15,%r10 +- movq %rcx,%r13 +- addq %r10,%r14 +- vmovdqa %xmm10,64(%rsp) +- vpalignr $8,%xmm5,%xmm6,%xmm8 +- shrdq $23,%r13,%r13 +- movq %r14,%r10 +- vpalignr $8,%xmm1,%xmm2,%xmm11 +- movq %rdx,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $1,%xmm8,%xmm10 +- xorq %rcx,%r13 +- xorq %r8,%r12 +- vpaddq %xmm11,%xmm5,%xmm5 +- shrdq $4,%r13,%r13 +- xorq %r10,%r14 +- vpsrlq $7,%xmm8,%xmm11 +- andq %rcx,%r12 +- xorq %rcx,%r13 +- vpsllq $56,%xmm8,%xmm9 +- addq 80(%rsp),%r9 +- movq %r10,%r15 +- vpxor %xmm10,%xmm11,%xmm8 +- xorq %r8,%r12 +- shrdq $6,%r14,%r14 +- vpsrlq $7,%xmm10,%xmm10 +- xorq %r11,%r15 +- addq %r12,%r9 +- vpxor %xmm9,%xmm8,%xmm8 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- vpsllq $7,%xmm9,%xmm9 +- xorq %r10,%r14 +- addq %r13,%r9 +- vpxor %xmm10,%xmm8,%xmm8 +- xorq %r11,%rdi +- shrdq $28,%r14,%r14 +- vpsrlq $6,%xmm4,%xmm11 +- addq %r9,%rbx +- addq %rdi,%r9 +- vpxor %xmm9,%xmm8,%xmm8 +- movq %rbx,%r13 +- addq %r9,%r14 +- vpsllq $3,%xmm4,%xmm10 +- shrdq $23,%r13,%r13 +- movq %r14,%r9 +- vpaddq %xmm8,%xmm5,%xmm5 +- movq %rcx,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $19,%xmm4,%xmm9 +- xorq %rbx,%r13 +- xorq %rdx,%r12 +- vpxor %xmm10,%xmm11,%xmm11 +- shrdq $4,%r13,%r13 +- xorq %r9,%r14 +- vpsllq $42,%xmm10,%xmm10 +- andq %rbx,%r12 +- xorq %rbx,%r13 +- vpxor %xmm9,%xmm11,%xmm11 +- addq 88(%rsp),%r8 +- movq %r9,%rdi +- vpsrlq $42,%xmm9,%xmm9 +- xorq %rdx,%r12 +- shrdq $6,%r14,%r14 +- vpxor %xmm10,%xmm11,%xmm11 +- xorq %r10,%rdi +- addq %r12,%r8 +- vpxor %xmm9,%xmm11,%xmm11 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- vpaddq %xmm11,%xmm5,%xmm5 +- xorq %r9,%r14 +- addq %r13,%r8 +- vpaddq 32(%rbp),%xmm5,%xmm10 +- xorq %r10,%r15 +- shrdq $28,%r14,%r14 +- addq %r8,%rax +- addq %r15,%r8 +- movq %rax,%r13 +- addq %r8,%r14 +- vmovdqa %xmm10,80(%rsp) +- vpalignr $8,%xmm6,%xmm7,%xmm8 +- shrdq $23,%r13,%r13 +- movq %r14,%r8 +- vpalignr $8,%xmm2,%xmm3,%xmm11 +- movq %rbx,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $1,%xmm8,%xmm10 +- xorq %rax,%r13 +- xorq %rcx,%r12 +- vpaddq %xmm11,%xmm6,%xmm6 +- shrdq $4,%r13,%r13 +- xorq %r8,%r14 +- vpsrlq $7,%xmm8,%xmm11 +- andq %rax,%r12 +- xorq %rax,%r13 +- vpsllq $56,%xmm8,%xmm9 +- addq 96(%rsp),%rdx +- movq %r8,%r15 +- vpxor %xmm10,%xmm11,%xmm8 +- xorq %rcx,%r12 +- shrdq $6,%r14,%r14 +- vpsrlq $7,%xmm10,%xmm10 +- xorq %r9,%r15 +- addq %r12,%rdx +- vpxor %xmm9,%xmm8,%xmm8 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- vpsllq $7,%xmm9,%xmm9 +- xorq %r8,%r14 +- addq %r13,%rdx +- vpxor %xmm10,%xmm8,%xmm8 +- xorq %r9,%rdi +- shrdq $28,%r14,%r14 +- vpsrlq $6,%xmm5,%xmm11 +- addq %rdx,%r11 +- addq %rdi,%rdx +- vpxor %xmm9,%xmm8,%xmm8 +- movq %r11,%r13 +- addq %rdx,%r14 +- vpsllq $3,%xmm5,%xmm10 +- shrdq $23,%r13,%r13 +- movq %r14,%rdx +- vpaddq %xmm8,%xmm6,%xmm6 +- movq %rax,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $19,%xmm5,%xmm9 +- xorq %r11,%r13 +- xorq %rbx,%r12 +- vpxor %xmm10,%xmm11,%xmm11 +- shrdq $4,%r13,%r13 +- xorq %rdx,%r14 +- vpsllq $42,%xmm10,%xmm10 +- andq %r11,%r12 +- xorq %r11,%r13 +- vpxor %xmm9,%xmm11,%xmm11 +- addq 104(%rsp),%rcx +- movq %rdx,%rdi +- vpsrlq $42,%xmm9,%xmm9 +- xorq %rbx,%r12 +- shrdq $6,%r14,%r14 +- vpxor %xmm10,%xmm11,%xmm11 +- xorq %r8,%rdi +- addq %r12,%rcx +- vpxor %xmm9,%xmm11,%xmm11 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- vpaddq %xmm11,%xmm6,%xmm6 +- xorq %rdx,%r14 +- addq %r13,%rcx +- vpaddq 64(%rbp),%xmm6,%xmm10 +- xorq %r8,%r15 +- shrdq $28,%r14,%r14 +- addq %rcx,%r10 +- addq %r15,%rcx +- movq %r10,%r13 +- addq %rcx,%r14 +- vmovdqa %xmm10,96(%rsp) +- vpalignr $8,%xmm7,%xmm0,%xmm8 +- shrdq $23,%r13,%r13 +- movq %r14,%rcx +- vpalignr $8,%xmm3,%xmm4,%xmm11 +- movq %r11,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $1,%xmm8,%xmm10 +- xorq %r10,%r13 +- xorq %rax,%r12 +- vpaddq %xmm11,%xmm7,%xmm7 +- shrdq $4,%r13,%r13 +- xorq %rcx,%r14 +- vpsrlq $7,%xmm8,%xmm11 +- andq %r10,%r12 +- xorq %r10,%r13 +- vpsllq $56,%xmm8,%xmm9 +- addq 112(%rsp),%rbx +- movq %rcx,%r15 +- vpxor %xmm10,%xmm11,%xmm8 +- xorq %rax,%r12 +- shrdq $6,%r14,%r14 +- vpsrlq $7,%xmm10,%xmm10 +- xorq %rdx,%r15 +- addq %r12,%rbx +- vpxor %xmm9,%xmm8,%xmm8 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- vpsllq $7,%xmm9,%xmm9 +- xorq %rcx,%r14 +- addq %r13,%rbx +- vpxor %xmm10,%xmm8,%xmm8 +- xorq %rdx,%rdi +- shrdq $28,%r14,%r14 +- vpsrlq $6,%xmm6,%xmm11 +- addq %rbx,%r9 +- addq %rdi,%rbx +- vpxor %xmm9,%xmm8,%xmm8 +- movq %r9,%r13 +- addq %rbx,%r14 +- vpsllq $3,%xmm6,%xmm10 +- shrdq $23,%r13,%r13 +- movq %r14,%rbx +- vpaddq %xmm8,%xmm7,%xmm7 +- movq %r10,%r12 +- shrdq $5,%r14,%r14 +- vpsrlq $19,%xmm6,%xmm9 +- xorq %r9,%r13 +- xorq %r11,%r12 +- vpxor %xmm10,%xmm11,%xmm11 +- shrdq $4,%r13,%r13 +- xorq %rbx,%r14 +- vpsllq $42,%xmm10,%xmm10 +- andq %r9,%r12 +- xorq %r9,%r13 +- vpxor %xmm9,%xmm11,%xmm11 +- addq 120(%rsp),%rax +- movq %rbx,%rdi +- vpsrlq $42,%xmm9,%xmm9 +- xorq %r11,%r12 +- shrdq $6,%r14,%r14 +- vpxor %xmm10,%xmm11,%xmm11 +- xorq %rcx,%rdi +- addq %r12,%rax +- vpxor %xmm9,%xmm11,%xmm11 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- vpaddq %xmm11,%xmm7,%xmm7 +- xorq %rbx,%r14 +- addq %r13,%rax +- vpaddq 96(%rbp),%xmm7,%xmm10 +- xorq %rcx,%r15 +- shrdq $28,%r14,%r14 +- addq %rax,%r8 +- addq %r15,%rax +- movq %r8,%r13 +- addq %rax,%r14 +- vmovdqa %xmm10,112(%rsp) +- cmpb $0,135(%rbp) +- jne L$avx_00_47 +- shrdq $23,%r13,%r13 +- movq %r14,%rax +- movq %r9,%r12 +- shrdq $5,%r14,%r14 +- xorq %r8,%r13 +- xorq %r10,%r12 +- shrdq $4,%r13,%r13 +- xorq %rax,%r14 +- andq %r8,%r12 +- xorq %r8,%r13 +- addq 0(%rsp),%r11 +- movq %rax,%r15 +- xorq %r10,%r12 +- shrdq $6,%r14,%r14 +- xorq %rbx,%r15 +- addq %r12,%r11 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- xorq %rax,%r14 +- addq %r13,%r11 +- xorq %rbx,%rdi +- shrdq $28,%r14,%r14 +- addq %r11,%rdx +- addq %rdi,%r11 +- movq %rdx,%r13 +- addq %r11,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%r11 +- movq %r8,%r12 +- shrdq $5,%r14,%r14 +- xorq %rdx,%r13 +- xorq %r9,%r12 +- shrdq $4,%r13,%r13 +- xorq %r11,%r14 +- andq %rdx,%r12 +- xorq %rdx,%r13 +- addq 8(%rsp),%r10 +- movq %r11,%rdi +- xorq %r9,%r12 +- shrdq $6,%r14,%r14 +- xorq %rax,%rdi +- addq %r12,%r10 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- xorq %r11,%r14 +- addq %r13,%r10 +- xorq %rax,%r15 +- shrdq $28,%r14,%r14 +- addq %r10,%rcx +- addq %r15,%r10 +- movq %rcx,%r13 +- addq %r10,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%r10 +- movq %rdx,%r12 +- shrdq $5,%r14,%r14 +- xorq %rcx,%r13 +- xorq %r8,%r12 +- shrdq $4,%r13,%r13 +- xorq %r10,%r14 +- andq %rcx,%r12 +- xorq %rcx,%r13 +- addq 16(%rsp),%r9 +- movq %r10,%r15 +- xorq %r8,%r12 +- shrdq $6,%r14,%r14 +- xorq %r11,%r15 +- addq %r12,%r9 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- xorq %r10,%r14 +- addq %r13,%r9 +- xorq %r11,%rdi +- shrdq $28,%r14,%r14 +- addq %r9,%rbx +- addq %rdi,%r9 +- movq %rbx,%r13 +- addq %r9,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%r9 +- movq %rcx,%r12 +- shrdq $5,%r14,%r14 +- xorq %rbx,%r13 +- xorq %rdx,%r12 +- shrdq $4,%r13,%r13 +- xorq %r9,%r14 +- andq %rbx,%r12 +- xorq %rbx,%r13 +- addq 24(%rsp),%r8 +- movq %r9,%rdi +- xorq %rdx,%r12 +- shrdq $6,%r14,%r14 +- xorq %r10,%rdi +- addq %r12,%r8 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- xorq %r9,%r14 +- addq %r13,%r8 +- xorq %r10,%r15 +- shrdq $28,%r14,%r14 +- addq %r8,%rax +- addq %r15,%r8 +- movq %rax,%r13 +- addq %r8,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%r8 +- movq %rbx,%r12 +- shrdq $5,%r14,%r14 +- xorq %rax,%r13 +- xorq %rcx,%r12 +- shrdq $4,%r13,%r13 +- xorq %r8,%r14 +- andq %rax,%r12 +- xorq %rax,%r13 +- addq 32(%rsp),%rdx +- movq %r8,%r15 +- xorq %rcx,%r12 +- shrdq $6,%r14,%r14 +- xorq %r9,%r15 +- addq %r12,%rdx +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- xorq %r8,%r14 +- addq %r13,%rdx +- xorq %r9,%rdi +- shrdq $28,%r14,%r14 +- addq %rdx,%r11 +- addq %rdi,%rdx +- movq %r11,%r13 +- addq %rdx,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%rdx +- movq %rax,%r12 +- shrdq $5,%r14,%r14 +- xorq %r11,%r13 +- xorq %rbx,%r12 +- shrdq $4,%r13,%r13 +- xorq %rdx,%r14 +- andq %r11,%r12 +- xorq %r11,%r13 +- addq 40(%rsp),%rcx +- movq %rdx,%rdi +- xorq %rbx,%r12 +- shrdq $6,%r14,%r14 +- xorq %r8,%rdi +- addq %r12,%rcx +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- xorq %rdx,%r14 +- addq %r13,%rcx +- xorq %r8,%r15 +- shrdq $28,%r14,%r14 +- addq %rcx,%r10 +- addq %r15,%rcx +- movq %r10,%r13 +- addq %rcx,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%rcx +- movq %r11,%r12 +- shrdq $5,%r14,%r14 +- xorq %r10,%r13 +- xorq %rax,%r12 +- shrdq $4,%r13,%r13 +- xorq %rcx,%r14 +- andq %r10,%r12 +- xorq %r10,%r13 +- addq 48(%rsp),%rbx +- movq %rcx,%r15 +- xorq %rax,%r12 +- shrdq $6,%r14,%r14 +- xorq %rdx,%r15 +- addq %r12,%rbx +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- xorq %rcx,%r14 +- addq %r13,%rbx +- xorq %rdx,%rdi +- shrdq $28,%r14,%r14 +- addq %rbx,%r9 +- addq %rdi,%rbx +- movq %r9,%r13 +- addq %rbx,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%rbx +- movq %r10,%r12 +- shrdq $5,%r14,%r14 +- xorq %r9,%r13 +- xorq %r11,%r12 +- shrdq $4,%r13,%r13 +- xorq %rbx,%r14 +- andq %r9,%r12 +- xorq %r9,%r13 +- addq 56(%rsp),%rax +- movq %rbx,%rdi +- xorq %r11,%r12 +- shrdq $6,%r14,%r14 +- xorq %rcx,%rdi +- addq %r12,%rax +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- xorq %rbx,%r14 +- addq %r13,%rax +- xorq %rcx,%r15 +- shrdq $28,%r14,%r14 +- addq %rax,%r8 +- addq %r15,%rax +- movq %r8,%r13 +- addq %rax,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%rax +- movq %r9,%r12 +- shrdq $5,%r14,%r14 +- xorq %r8,%r13 +- xorq %r10,%r12 +- shrdq $4,%r13,%r13 +- xorq %rax,%r14 +- andq %r8,%r12 +- xorq %r8,%r13 +- addq 64(%rsp),%r11 +- movq %rax,%r15 +- xorq %r10,%r12 +- shrdq $6,%r14,%r14 +- xorq %rbx,%r15 +- addq %r12,%r11 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- xorq %rax,%r14 +- addq %r13,%r11 +- xorq %rbx,%rdi +- shrdq $28,%r14,%r14 +- addq %r11,%rdx +- addq %rdi,%r11 +- movq %rdx,%r13 +- addq %r11,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%r11 +- movq %r8,%r12 +- shrdq $5,%r14,%r14 +- xorq %rdx,%r13 +- xorq %r9,%r12 +- shrdq $4,%r13,%r13 +- xorq %r11,%r14 +- andq %rdx,%r12 +- xorq %rdx,%r13 +- addq 72(%rsp),%r10 +- movq %r11,%rdi +- xorq %r9,%r12 +- shrdq $6,%r14,%r14 +- xorq %rax,%rdi +- addq %r12,%r10 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- xorq %r11,%r14 +- addq %r13,%r10 +- xorq %rax,%r15 +- shrdq $28,%r14,%r14 +- addq %r10,%rcx +- addq %r15,%r10 +- movq %rcx,%r13 +- addq %r10,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%r10 +- movq %rdx,%r12 +- shrdq $5,%r14,%r14 +- xorq %rcx,%r13 +- xorq %r8,%r12 +- shrdq $4,%r13,%r13 +- xorq %r10,%r14 +- andq %rcx,%r12 +- xorq %rcx,%r13 +- addq 80(%rsp),%r9 +- movq %r10,%r15 +- xorq %r8,%r12 +- shrdq $6,%r14,%r14 +- xorq %r11,%r15 +- addq %r12,%r9 +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- xorq %r10,%r14 +- addq %r13,%r9 +- xorq %r11,%rdi +- shrdq $28,%r14,%r14 +- addq %r9,%rbx +- addq %rdi,%r9 +- movq %rbx,%r13 +- addq %r9,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%r9 +- movq %rcx,%r12 +- shrdq $5,%r14,%r14 +- xorq %rbx,%r13 +- xorq %rdx,%r12 +- shrdq $4,%r13,%r13 +- xorq %r9,%r14 +- andq %rbx,%r12 +- xorq %rbx,%r13 +- addq 88(%rsp),%r8 +- movq %r9,%rdi +- xorq %rdx,%r12 +- shrdq $6,%r14,%r14 +- xorq %r10,%rdi +- addq %r12,%r8 +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- xorq %r9,%r14 +- addq %r13,%r8 +- xorq %r10,%r15 +- shrdq $28,%r14,%r14 +- addq %r8,%rax +- addq %r15,%r8 +- movq %rax,%r13 +- addq %r8,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%r8 +- movq %rbx,%r12 +- shrdq $5,%r14,%r14 +- xorq %rax,%r13 +- xorq %rcx,%r12 +- shrdq $4,%r13,%r13 +- xorq %r8,%r14 +- andq %rax,%r12 +- xorq %rax,%r13 +- addq 96(%rsp),%rdx +- movq %r8,%r15 +- xorq %rcx,%r12 +- shrdq $6,%r14,%r14 +- xorq %r9,%r15 +- addq %r12,%rdx +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- xorq %r8,%r14 +- addq %r13,%rdx +- xorq %r9,%rdi +- shrdq $28,%r14,%r14 +- addq %rdx,%r11 +- addq %rdi,%rdx +- movq %r11,%r13 +- addq %rdx,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%rdx +- movq %rax,%r12 +- shrdq $5,%r14,%r14 +- xorq %r11,%r13 +- xorq %rbx,%r12 +- shrdq $4,%r13,%r13 +- xorq %rdx,%r14 +- andq %r11,%r12 +- xorq %r11,%r13 +- addq 104(%rsp),%rcx +- movq %rdx,%rdi +- xorq %rbx,%r12 +- shrdq $6,%r14,%r14 +- xorq %r8,%rdi +- addq %r12,%rcx +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- xorq %rdx,%r14 +- addq %r13,%rcx +- xorq %r8,%r15 +- shrdq $28,%r14,%r14 +- addq %rcx,%r10 +- addq %r15,%rcx +- movq %r10,%r13 +- addq %rcx,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%rcx +- movq %r11,%r12 +- shrdq $5,%r14,%r14 +- xorq %r10,%r13 +- xorq %rax,%r12 +- shrdq $4,%r13,%r13 +- xorq %rcx,%r14 +- andq %r10,%r12 +- xorq %r10,%r13 +- addq 112(%rsp),%rbx +- movq %rcx,%r15 +- xorq %rax,%r12 +- shrdq $6,%r14,%r14 +- xorq %rdx,%r15 +- addq %r12,%rbx +- shrdq $14,%r13,%r13 +- andq %r15,%rdi +- xorq %rcx,%r14 +- addq %r13,%rbx +- xorq %rdx,%rdi +- shrdq $28,%r14,%r14 +- addq %rbx,%r9 +- addq %rdi,%rbx +- movq %r9,%r13 +- addq %rbx,%r14 +- shrdq $23,%r13,%r13 +- movq %r14,%rbx +- movq %r10,%r12 +- shrdq $5,%r14,%r14 +- xorq %r9,%r13 +- xorq %r11,%r12 +- shrdq $4,%r13,%r13 +- xorq %rbx,%r14 +- andq %r9,%r12 +- xorq %r9,%r13 +- addq 120(%rsp),%rax +- movq %rbx,%rdi +- xorq %r11,%r12 +- shrdq $6,%r14,%r14 +- xorq %rcx,%rdi +- addq %r12,%rax +- shrdq $14,%r13,%r13 +- andq %rdi,%r15 +- xorq %rbx,%r14 +- addq %r13,%rax +- xorq %rcx,%r15 +- shrdq $28,%r14,%r14 +- addq %rax,%r8 +- addq %r15,%rax +- movq %r8,%r13 +- addq %rax,%r14 +- movq 128+0(%rsp),%rdi +- movq %r14,%rax +- +- addq 0(%rdi),%rax +- leaq 128(%rsi),%rsi +- addq 8(%rdi),%rbx +- addq 16(%rdi),%rcx +- addq 24(%rdi),%rdx +- addq 32(%rdi),%r8 +- addq 40(%rdi),%r9 +- addq 48(%rdi),%r10 +- addq 56(%rdi),%r11 +- +- cmpq 128+16(%rsp),%rsi +- +- movq %rax,0(%rdi) +- movq %rbx,8(%rdi) +- movq %rcx,16(%rdi) +- movq %rdx,24(%rdi) +- movq %r8,32(%rdi) +- movq %r9,40(%rdi) +- movq %r10,48(%rdi) +- movq %r11,56(%rdi) +- jb L$loop_avx +- +- movq 152(%rsp),%rsi +- +- vzeroupper +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$epilogue_avx: +- .byte 0xf3,0xc3 +- +- +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S b/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S +deleted file mode 100644 +index 31cf329..0000000 +--- a/mac-x86_64/crypto/fipsmodule/vpaes-x86_64.S ++++ /dev/null +@@ -1,1130 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +-.p2align 4 +-_vpaes_encrypt_core: +- +- movq %rdx,%r9 +- movq $16,%r11 +- movl 240(%rdx),%eax +- movdqa %xmm9,%xmm1 +- movdqa L$k_ipt(%rip),%xmm2 +- pandn %xmm0,%xmm1 +- movdqu (%r9),%xmm5 +- psrld $4,%xmm1 +- pand %xmm9,%xmm0 +-.byte 102,15,56,0,208 +- movdqa L$k_ipt+16(%rip),%xmm0 +-.byte 102,15,56,0,193 +- pxor %xmm5,%xmm2 +- addq $16,%r9 +- pxor %xmm2,%xmm0 +- leaq L$k_mc_backward(%rip),%r10 +- jmp L$enc_entry +- +-.p2align 4 +-L$enc_loop: +- +- movdqa %xmm13,%xmm4 +- movdqa %xmm12,%xmm0 +-.byte 102,15,56,0,226 +-.byte 102,15,56,0,195 +- pxor %xmm5,%xmm4 +- movdqa %xmm15,%xmm5 +- pxor %xmm4,%xmm0 +- movdqa -64(%r11,%r10,1),%xmm1 +-.byte 102,15,56,0,234 +- movdqa (%r11,%r10,1),%xmm4 +- movdqa %xmm14,%xmm2 +-.byte 102,15,56,0,211 +- movdqa %xmm0,%xmm3 +- pxor %xmm5,%xmm2 +-.byte 102,15,56,0,193 +- addq $16,%r9 +- pxor %xmm2,%xmm0 +-.byte 102,15,56,0,220 +- addq $16,%r11 +- pxor %xmm0,%xmm3 +-.byte 102,15,56,0,193 +- andq $0x30,%r11 +- subq $1,%rax +- pxor %xmm3,%xmm0 +- +-L$enc_entry: +- +- movdqa %xmm9,%xmm1 +- movdqa %xmm11,%xmm5 +- pandn %xmm0,%xmm1 +- psrld $4,%xmm1 +- pand %xmm9,%xmm0 +-.byte 102,15,56,0,232 +- movdqa %xmm10,%xmm3 +- pxor %xmm1,%xmm0 +-.byte 102,15,56,0,217 +- movdqa %xmm10,%xmm4 +- pxor %xmm5,%xmm3 +-.byte 102,15,56,0,224 +- movdqa %xmm10,%xmm2 +- pxor %xmm5,%xmm4 +-.byte 102,15,56,0,211 +- movdqa %xmm10,%xmm3 +- pxor %xmm0,%xmm2 +-.byte 102,15,56,0,220 +- movdqu (%r9),%xmm5 +- pxor %xmm1,%xmm3 +- jnz L$enc_loop +- +- +- movdqa -96(%r10),%xmm4 +- movdqa -80(%r10),%xmm0 +-.byte 102,15,56,0,226 +- pxor %xmm5,%xmm4 +-.byte 102,15,56,0,195 +- movdqa 64(%r11,%r10,1),%xmm1 +- pxor %xmm4,%xmm0 +-.byte 102,15,56,0,193 +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +-.p2align 4 +-_vpaes_encrypt_core_2x: +- +- movq %rdx,%r9 +- movq $16,%r11 +- movl 240(%rdx),%eax +- movdqa %xmm9,%xmm1 +- movdqa %xmm9,%xmm7 +- movdqa L$k_ipt(%rip),%xmm2 +- movdqa %xmm2,%xmm8 +- pandn %xmm0,%xmm1 +- pandn %xmm6,%xmm7 +- movdqu (%r9),%xmm5 +- +- psrld $4,%xmm1 +- psrld $4,%xmm7 +- pand %xmm9,%xmm0 +- pand %xmm9,%xmm6 +-.byte 102,15,56,0,208 +-.byte 102,68,15,56,0,198 +- movdqa L$k_ipt+16(%rip),%xmm0 +- movdqa %xmm0,%xmm6 +-.byte 102,15,56,0,193 +-.byte 102,15,56,0,247 +- pxor %xmm5,%xmm2 +- pxor %xmm5,%xmm8 +- addq $16,%r9 +- pxor %xmm2,%xmm0 +- pxor %xmm8,%xmm6 +- leaq L$k_mc_backward(%rip),%r10 +- jmp L$enc2x_entry +- +-.p2align 4 +-L$enc2x_loop: +- +- movdqa L$k_sb1(%rip),%xmm4 +- movdqa L$k_sb1+16(%rip),%xmm0 +- movdqa %xmm4,%xmm12 +- movdqa %xmm0,%xmm6 +-.byte 102,15,56,0,226 +-.byte 102,69,15,56,0,224 +-.byte 102,15,56,0,195 +-.byte 102,65,15,56,0,243 +- pxor %xmm5,%xmm4 +- pxor %xmm5,%xmm12 +- movdqa L$k_sb2(%rip),%xmm5 +- movdqa %xmm5,%xmm13 +- pxor %xmm4,%xmm0 +- pxor %xmm12,%xmm6 +- movdqa -64(%r11,%r10,1),%xmm1 +- +-.byte 102,15,56,0,234 +-.byte 102,69,15,56,0,232 +- movdqa (%r11,%r10,1),%xmm4 +- +- movdqa L$k_sb2+16(%rip),%xmm2 +- movdqa %xmm2,%xmm8 +-.byte 102,15,56,0,211 +-.byte 102,69,15,56,0,195 +- movdqa %xmm0,%xmm3 +- movdqa %xmm6,%xmm11 +- pxor %xmm5,%xmm2 +- pxor %xmm13,%xmm8 +-.byte 102,15,56,0,193 +-.byte 102,15,56,0,241 +- addq $16,%r9 +- pxor %xmm2,%xmm0 +- pxor %xmm8,%xmm6 +-.byte 102,15,56,0,220 +-.byte 102,68,15,56,0,220 +- addq $16,%r11 +- pxor %xmm0,%xmm3 +- pxor %xmm6,%xmm11 +-.byte 102,15,56,0,193 +-.byte 102,15,56,0,241 +- andq $0x30,%r11 +- subq $1,%rax +- pxor %xmm3,%xmm0 +- pxor %xmm11,%xmm6 +- +-L$enc2x_entry: +- +- movdqa %xmm9,%xmm1 +- movdqa %xmm9,%xmm7 +- movdqa L$k_inv+16(%rip),%xmm5 +- movdqa %xmm5,%xmm13 +- pandn %xmm0,%xmm1 +- pandn %xmm6,%xmm7 +- psrld $4,%xmm1 +- psrld $4,%xmm7 +- pand %xmm9,%xmm0 +- pand %xmm9,%xmm6 +-.byte 102,15,56,0,232 +-.byte 102,68,15,56,0,238 +- movdqa %xmm10,%xmm3 +- movdqa %xmm10,%xmm11 +- pxor %xmm1,%xmm0 +- pxor %xmm7,%xmm6 +-.byte 102,15,56,0,217 +-.byte 102,68,15,56,0,223 +- movdqa %xmm10,%xmm4 +- movdqa %xmm10,%xmm12 +- pxor %xmm5,%xmm3 +- pxor %xmm13,%xmm11 +-.byte 102,15,56,0,224 +-.byte 102,68,15,56,0,230 +- movdqa %xmm10,%xmm2 +- movdqa %xmm10,%xmm8 +- pxor %xmm5,%xmm4 +- pxor %xmm13,%xmm12 +-.byte 102,15,56,0,211 +-.byte 102,69,15,56,0,195 +- movdqa %xmm10,%xmm3 +- movdqa %xmm10,%xmm11 +- pxor %xmm0,%xmm2 +- pxor %xmm6,%xmm8 +-.byte 102,15,56,0,220 +-.byte 102,69,15,56,0,220 +- movdqu (%r9),%xmm5 +- +- pxor %xmm1,%xmm3 +- pxor %xmm7,%xmm11 +- jnz L$enc2x_loop +- +- +- movdqa -96(%r10),%xmm4 +- movdqa -80(%r10),%xmm0 +- movdqa %xmm4,%xmm12 +- movdqa %xmm0,%xmm6 +-.byte 102,15,56,0,226 +-.byte 102,69,15,56,0,224 +- pxor %xmm5,%xmm4 +- pxor %xmm5,%xmm12 +-.byte 102,15,56,0,195 +-.byte 102,65,15,56,0,243 +- movdqa 64(%r11,%r10,1),%xmm1 +- +- pxor %xmm4,%xmm0 +- pxor %xmm12,%xmm6 +-.byte 102,15,56,0,193 +-.byte 102,15,56,0,241 +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +- +-.p2align 4 +-_vpaes_decrypt_core: +- +- movq %rdx,%r9 +- movl 240(%rdx),%eax +- movdqa %xmm9,%xmm1 +- movdqa L$k_dipt(%rip),%xmm2 +- pandn %xmm0,%xmm1 +- movq %rax,%r11 +- psrld $4,%xmm1 +- movdqu (%r9),%xmm5 +- shlq $4,%r11 +- pand %xmm9,%xmm0 +-.byte 102,15,56,0,208 +- movdqa L$k_dipt+16(%rip),%xmm0 +- xorq $0x30,%r11 +- leaq L$k_dsbd(%rip),%r10 +-.byte 102,15,56,0,193 +- andq $0x30,%r11 +- pxor %xmm5,%xmm2 +- movdqa L$k_mc_forward+48(%rip),%xmm5 +- pxor %xmm2,%xmm0 +- addq $16,%r9 +- addq %r10,%r11 +- jmp L$dec_entry +- +-.p2align 4 +-L$dec_loop: +- +- +- +- movdqa -32(%r10),%xmm4 +- movdqa -16(%r10),%xmm1 +-.byte 102,15,56,0,226 +-.byte 102,15,56,0,203 +- pxor %xmm4,%xmm0 +- movdqa 0(%r10),%xmm4 +- pxor %xmm1,%xmm0 +- movdqa 16(%r10),%xmm1 +- +-.byte 102,15,56,0,226 +-.byte 102,15,56,0,197 +-.byte 102,15,56,0,203 +- pxor %xmm4,%xmm0 +- movdqa 32(%r10),%xmm4 +- pxor %xmm1,%xmm0 +- movdqa 48(%r10),%xmm1 +- +-.byte 102,15,56,0,226 +-.byte 102,15,56,0,197 +-.byte 102,15,56,0,203 +- pxor %xmm4,%xmm0 +- movdqa 64(%r10),%xmm4 +- pxor %xmm1,%xmm0 +- movdqa 80(%r10),%xmm1 +- +-.byte 102,15,56,0,226 +-.byte 102,15,56,0,197 +-.byte 102,15,56,0,203 +- pxor %xmm4,%xmm0 +- addq $16,%r9 +-.byte 102,15,58,15,237,12 +- pxor %xmm1,%xmm0 +- subq $1,%rax +- +-L$dec_entry: +- +- movdqa %xmm9,%xmm1 +- pandn %xmm0,%xmm1 +- movdqa %xmm11,%xmm2 +- psrld $4,%xmm1 +- pand %xmm9,%xmm0 +-.byte 102,15,56,0,208 +- movdqa %xmm10,%xmm3 +- pxor %xmm1,%xmm0 +-.byte 102,15,56,0,217 +- movdqa %xmm10,%xmm4 +- pxor %xmm2,%xmm3 +-.byte 102,15,56,0,224 +- pxor %xmm2,%xmm4 +- movdqa %xmm10,%xmm2 +-.byte 102,15,56,0,211 +- movdqa %xmm10,%xmm3 +- pxor %xmm0,%xmm2 +-.byte 102,15,56,0,220 +- movdqu (%r9),%xmm0 +- pxor %xmm1,%xmm3 +- jnz L$dec_loop +- +- +- movdqa 96(%r10),%xmm4 +-.byte 102,15,56,0,226 +- pxor %xmm0,%xmm4 +- movdqa 112(%r10),%xmm0 +- movdqa -352(%r11),%xmm2 +-.byte 102,15,56,0,195 +- pxor %xmm4,%xmm0 +-.byte 102,15,56,0,194 +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +- +-.p2align 4 +-_vpaes_schedule_core: +- +- +- +- +- +- +- call _vpaes_preheat +- movdqa L$k_rcon(%rip),%xmm8 +- movdqu (%rdi),%xmm0 +- +- +- movdqa %xmm0,%xmm3 +- leaq L$k_ipt(%rip),%r11 +- call _vpaes_schedule_transform +- movdqa %xmm0,%xmm7 +- +- leaq L$k_sr(%rip),%r10 +- testq %rcx,%rcx +- jnz L$schedule_am_decrypting +- +- +- movdqu %xmm0,(%rdx) +- jmp L$schedule_go +- +-L$schedule_am_decrypting: +- +- movdqa (%r8,%r10,1),%xmm1 +-.byte 102,15,56,0,217 +- movdqu %xmm3,(%rdx) +- xorq $0x30,%r8 +- +-L$schedule_go: +- cmpl $192,%esi +- ja L$schedule_256 +- je L$schedule_192 +- +- +- +- +- +- +- +- +- +- +-L$schedule_128: +- movl $10,%esi +- +-L$oop_schedule_128: +- call _vpaes_schedule_round +- decq %rsi +- jz L$schedule_mangle_last +- call _vpaes_schedule_mangle +- jmp L$oop_schedule_128 +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +-.p2align 4 +-L$schedule_192: +- movdqu 8(%rdi),%xmm0 +- call _vpaes_schedule_transform +- movdqa %xmm0,%xmm6 +- pxor %xmm4,%xmm4 +- movhlps %xmm4,%xmm6 +- movl $4,%esi +- +-L$oop_schedule_192: +- call _vpaes_schedule_round +-.byte 102,15,58,15,198,8 +- call _vpaes_schedule_mangle +- call _vpaes_schedule_192_smear +- call _vpaes_schedule_mangle +- call _vpaes_schedule_round +- decq %rsi +- jz L$schedule_mangle_last +- call _vpaes_schedule_mangle +- call _vpaes_schedule_192_smear +- jmp L$oop_schedule_192 +- +- +- +- +- +- +- +- +- +- +- +-.p2align 4 +-L$schedule_256: +- movdqu 16(%rdi),%xmm0 +- call _vpaes_schedule_transform +- movl $7,%esi +- +-L$oop_schedule_256: +- call _vpaes_schedule_mangle +- movdqa %xmm0,%xmm6 +- +- +- call _vpaes_schedule_round +- decq %rsi +- jz L$schedule_mangle_last +- call _vpaes_schedule_mangle +- +- +- pshufd $0xFF,%xmm0,%xmm0 +- movdqa %xmm7,%xmm5 +- movdqa %xmm6,%xmm7 +- call _vpaes_schedule_low_round +- movdqa %xmm5,%xmm7 +- +- jmp L$oop_schedule_256 +- +- +- +- +- +- +- +- +- +- +- +- +-.p2align 4 +-L$schedule_mangle_last: +- +- leaq L$k_deskew(%rip),%r11 +- testq %rcx,%rcx +- jnz L$schedule_mangle_last_dec +- +- +- movdqa (%r8,%r10,1),%xmm1 +-.byte 102,15,56,0,193 +- leaq L$k_opt(%rip),%r11 +- addq $32,%rdx +- +-L$schedule_mangle_last_dec: +- addq $-16,%rdx +- pxor L$k_s63(%rip),%xmm0 +- call _vpaes_schedule_transform +- movdqu %xmm0,(%rdx) +- +- +- pxor %xmm0,%xmm0 +- pxor %xmm1,%xmm1 +- pxor %xmm2,%xmm2 +- pxor %xmm3,%xmm3 +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- pxor %xmm6,%xmm6 +- pxor %xmm7,%xmm7 +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +-.p2align 4 +-_vpaes_schedule_192_smear: +- +- pshufd $0x80,%xmm6,%xmm1 +- pshufd $0xFE,%xmm7,%xmm0 +- pxor %xmm1,%xmm6 +- pxor %xmm1,%xmm1 +- pxor %xmm0,%xmm6 +- movdqa %xmm6,%xmm0 +- movhlps %xmm1,%xmm6 +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +-.p2align 4 +-_vpaes_schedule_round: +- +- +- pxor %xmm1,%xmm1 +-.byte 102,65,15,58,15,200,15 +-.byte 102,69,15,58,15,192,15 +- pxor %xmm1,%xmm7 +- +- +- pshufd $0xFF,%xmm0,%xmm0 +-.byte 102,15,58,15,192,1 +- +- +- +- +-_vpaes_schedule_low_round: +- +- movdqa %xmm7,%xmm1 +- pslldq $4,%xmm7 +- pxor %xmm1,%xmm7 +- movdqa %xmm7,%xmm1 +- pslldq $8,%xmm7 +- pxor %xmm1,%xmm7 +- pxor L$k_s63(%rip),%xmm7 +- +- +- movdqa %xmm9,%xmm1 +- pandn %xmm0,%xmm1 +- psrld $4,%xmm1 +- pand %xmm9,%xmm0 +- movdqa %xmm11,%xmm2 +-.byte 102,15,56,0,208 +- pxor %xmm1,%xmm0 +- movdqa %xmm10,%xmm3 +-.byte 102,15,56,0,217 +- pxor %xmm2,%xmm3 +- movdqa %xmm10,%xmm4 +-.byte 102,15,56,0,224 +- pxor %xmm2,%xmm4 +- movdqa %xmm10,%xmm2 +-.byte 102,15,56,0,211 +- pxor %xmm0,%xmm2 +- movdqa %xmm10,%xmm3 +-.byte 102,15,56,0,220 +- pxor %xmm1,%xmm3 +- movdqa %xmm13,%xmm4 +-.byte 102,15,56,0,226 +- movdqa %xmm12,%xmm0 +-.byte 102,15,56,0,195 +- pxor %xmm4,%xmm0 +- +- +- pxor %xmm7,%xmm0 +- movdqa %xmm0,%xmm7 +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +- +- +- +- +- +-.p2align 4 +-_vpaes_schedule_transform: +- +- movdqa %xmm9,%xmm1 +- pandn %xmm0,%xmm1 +- psrld $4,%xmm1 +- pand %xmm9,%xmm0 +- movdqa (%r11),%xmm2 +-.byte 102,15,56,0,208 +- movdqa 16(%r11),%xmm0 +-.byte 102,15,56,0,193 +- pxor %xmm2,%xmm0 +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +-.p2align 4 +-_vpaes_schedule_mangle: +- +- movdqa %xmm0,%xmm4 +- movdqa L$k_mc_forward(%rip),%xmm5 +- testq %rcx,%rcx +- jnz L$schedule_mangle_dec +- +- +- addq $16,%rdx +- pxor L$k_s63(%rip),%xmm4 +-.byte 102,15,56,0,229 +- movdqa %xmm4,%xmm3 +-.byte 102,15,56,0,229 +- pxor %xmm4,%xmm3 +-.byte 102,15,56,0,229 +- pxor %xmm4,%xmm3 +- +- jmp L$schedule_mangle_both +-.p2align 4 +-L$schedule_mangle_dec: +- +- leaq L$k_dksd(%rip),%r11 +- movdqa %xmm9,%xmm1 +- pandn %xmm4,%xmm1 +- psrld $4,%xmm1 +- pand %xmm9,%xmm4 +- +- movdqa 0(%r11),%xmm2 +-.byte 102,15,56,0,212 +- movdqa 16(%r11),%xmm3 +-.byte 102,15,56,0,217 +- pxor %xmm2,%xmm3 +-.byte 102,15,56,0,221 +- +- movdqa 32(%r11),%xmm2 +-.byte 102,15,56,0,212 +- pxor %xmm3,%xmm2 +- movdqa 48(%r11),%xmm3 +-.byte 102,15,56,0,217 +- pxor %xmm2,%xmm3 +-.byte 102,15,56,0,221 +- +- movdqa 64(%r11),%xmm2 +-.byte 102,15,56,0,212 +- pxor %xmm3,%xmm2 +- movdqa 80(%r11),%xmm3 +-.byte 102,15,56,0,217 +- pxor %xmm2,%xmm3 +-.byte 102,15,56,0,221 +- +- movdqa 96(%r11),%xmm2 +-.byte 102,15,56,0,212 +- pxor %xmm3,%xmm2 +- movdqa 112(%r11),%xmm3 +-.byte 102,15,56,0,217 +- pxor %xmm2,%xmm3 +- +- addq $-16,%rdx +- +-L$schedule_mangle_both: +- movdqa (%r8,%r10,1),%xmm1 +-.byte 102,15,56,0,217 +- addq $-16,%r8 +- andq $0x30,%r8 +- movdqu %xmm3,(%rdx) +- .byte 0xf3,0xc3 +- +- +- +- +- +- +-.globl _vpaes_set_encrypt_key +-.private_extern _vpaes_set_encrypt_key +- +-.p2align 4 +-_vpaes_set_encrypt_key: +- +-#ifdef BORINGSSL_DISPATCH_TEST +- +- movb $1,_BORINGSSL_function_hit+5(%rip) +-#endif +- +- movl %esi,%eax +- shrl $5,%eax +- addl $5,%eax +- movl %eax,240(%rdx) +- +- movl $0,%ecx +- movl $0x30,%r8d +- call _vpaes_schedule_core +- xorl %eax,%eax +- .byte 0xf3,0xc3 +- +- +- +-.globl _vpaes_set_decrypt_key +-.private_extern _vpaes_set_decrypt_key +- +-.p2align 4 +-_vpaes_set_decrypt_key: +- +- movl %esi,%eax +- shrl $5,%eax +- addl $5,%eax +- movl %eax,240(%rdx) +- shll $4,%eax +- leaq 16(%rdx,%rax,1),%rdx +- +- movl $1,%ecx +- movl %esi,%r8d +- shrl $1,%r8d +- andl $32,%r8d +- xorl $32,%r8d +- call _vpaes_schedule_core +- xorl %eax,%eax +- .byte 0xf3,0xc3 +- +- +- +-.globl _vpaes_encrypt +-.private_extern _vpaes_encrypt +- +-.p2align 4 +-_vpaes_encrypt: +- +-#ifdef BORINGSSL_DISPATCH_TEST +- +- movb $1,_BORINGSSL_function_hit+4(%rip) +-#endif +- movdqu (%rdi),%xmm0 +- call _vpaes_preheat +- call _vpaes_encrypt_core +- movdqu %xmm0,(%rsi) +- .byte 0xf3,0xc3 +- +- +- +-.globl _vpaes_decrypt +-.private_extern _vpaes_decrypt +- +-.p2align 4 +-_vpaes_decrypt: +- +- movdqu (%rdi),%xmm0 +- call _vpaes_preheat +- call _vpaes_decrypt_core +- movdqu %xmm0,(%rsi) +- .byte 0xf3,0xc3 +- +- +-.globl _vpaes_cbc_encrypt +-.private_extern _vpaes_cbc_encrypt +- +-.p2align 4 +-_vpaes_cbc_encrypt: +- +- xchgq %rcx,%rdx +- subq $16,%rcx +- jc L$cbc_abort +- movdqu (%r8),%xmm6 +- subq %rdi,%rsi +- call _vpaes_preheat +- cmpl $0,%r9d +- je L$cbc_dec_loop +- jmp L$cbc_enc_loop +-.p2align 4 +-L$cbc_enc_loop: +- movdqu (%rdi),%xmm0 +- pxor %xmm6,%xmm0 +- call _vpaes_encrypt_core +- movdqa %xmm0,%xmm6 +- movdqu %xmm0,(%rsi,%rdi,1) +- leaq 16(%rdi),%rdi +- subq $16,%rcx +- jnc L$cbc_enc_loop +- jmp L$cbc_done +-.p2align 4 +-L$cbc_dec_loop: +- movdqu (%rdi),%xmm0 +- movdqa %xmm0,%xmm7 +- call _vpaes_decrypt_core +- pxor %xmm6,%xmm0 +- movdqa %xmm7,%xmm6 +- movdqu %xmm0,(%rsi,%rdi,1) +- leaq 16(%rdi),%rdi +- subq $16,%rcx +- jnc L$cbc_dec_loop +-L$cbc_done: +- movdqu %xmm6,(%r8) +-L$cbc_abort: +- .byte 0xf3,0xc3 +- +- +-.globl _vpaes_ctr32_encrypt_blocks +-.private_extern _vpaes_ctr32_encrypt_blocks +- +-.p2align 4 +-_vpaes_ctr32_encrypt_blocks: +- +- +- xchgq %rcx,%rdx +- testq %rcx,%rcx +- jz L$ctr32_abort +- movdqu (%r8),%xmm0 +- movdqa L$ctr_add_one(%rip),%xmm8 +- subq %rdi,%rsi +- call _vpaes_preheat +- movdqa %xmm0,%xmm6 +- pshufb L$rev_ctr(%rip),%xmm6 +- +- testq $1,%rcx +- jz L$ctr32_prep_loop +- +- +- +- movdqu (%rdi),%xmm7 +- call _vpaes_encrypt_core +- pxor %xmm7,%xmm0 +- paddd %xmm8,%xmm6 +- movdqu %xmm0,(%rsi,%rdi,1) +- subq $1,%rcx +- leaq 16(%rdi),%rdi +- jz L$ctr32_done +- +-L$ctr32_prep_loop: +- +- +- movdqa %xmm6,%xmm14 +- movdqa %xmm6,%xmm15 +- paddd %xmm8,%xmm15 +- +-L$ctr32_loop: +- movdqa L$rev_ctr(%rip),%xmm1 +- movdqa %xmm14,%xmm0 +- movdqa %xmm15,%xmm6 +-.byte 102,15,56,0,193 +-.byte 102,15,56,0,241 +- call _vpaes_encrypt_core_2x +- movdqu (%rdi),%xmm1 +- movdqu 16(%rdi),%xmm2 +- movdqa L$ctr_add_two(%rip),%xmm3 +- pxor %xmm1,%xmm0 +- pxor %xmm2,%xmm6 +- paddd %xmm3,%xmm14 +- paddd %xmm3,%xmm15 +- movdqu %xmm0,(%rsi,%rdi,1) +- movdqu %xmm6,16(%rsi,%rdi,1) +- subq $2,%rcx +- leaq 32(%rdi),%rdi +- jnz L$ctr32_loop +- +-L$ctr32_done: +-L$ctr32_abort: +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +- +-.p2align 4 +-_vpaes_preheat: +- +- leaq L$k_s0F(%rip),%r10 +- movdqa -32(%r10),%xmm10 +- movdqa -16(%r10),%xmm11 +- movdqa 0(%r10),%xmm9 +- movdqa 48(%r10),%xmm13 +- movdqa 64(%r10),%xmm12 +- movdqa 80(%r10),%xmm15 +- movdqa 96(%r10),%xmm14 +- .byte 0xf3,0xc3 +- +- +- +- +- +- +- +- +-.p2align 6 +-_vpaes_consts: +-L$k_inv: +-.quad 0x0E05060F0D080180, 0x040703090A0B0C02 +-.quad 0x01040A060F0B0780, 0x030D0E0C02050809 +- +-L$k_s0F: +-.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F +- +-L$k_ipt: +-.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 +-.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 +- +-L$k_sb1: +-.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 +-.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF +-L$k_sb2: +-.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD +-.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A +-L$k_sbo: +-.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 +-.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA +- +-L$k_mc_forward: +-.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 +-.quad 0x080B0A0904070605, 0x000302010C0F0E0D +-.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 +-.quad 0x000302010C0F0E0D, 0x080B0A0904070605 +- +-L$k_mc_backward: +-.quad 0x0605040702010003, 0x0E0D0C0F0A09080B +-.quad 0x020100030E0D0C0F, 0x0A09080B06050407 +-.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 +-.quad 0x0A09080B06050407, 0x020100030E0D0C0F +- +-L$k_sr: +-.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 +-.quad 0x030E09040F0A0500, 0x0B06010C07020D08 +-.quad 0x0F060D040B020900, 0x070E050C030A0108 +-.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 +- +-L$k_rcon: +-.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 +- +-L$k_s63: +-.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B +- +-L$k_opt: +-.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 +-.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 +- +-L$k_deskew: +-.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A +-.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 +- +- +- +- +- +-L$k_dksd: +-.quad 0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9 +-.quad 0x41C277F4B5368300, 0x5FDC69EAAB289D1E +-L$k_dksb: +-.quad 0x9A4FCA1F8550D500, 0x03D653861CC94C99 +-.quad 0x115BEDA7B6FC4A00, 0xD993256F7E3482C8 +-L$k_dkse: +-.quad 0xD5031CCA1FC9D600, 0x53859A4C994F5086 +-.quad 0xA23196054FDC7BE8, 0xCD5EF96A20B31487 +-L$k_dks9: +-.quad 0xB6116FC87ED9A700, 0x4AED933482255BFC +-.quad 0x4576516227143300, 0x8BB89FACE9DAFDCE +- +- +- +- +- +-L$k_dipt: +-.quad 0x0F505B040B545F00, 0x154A411E114E451A +-.quad 0x86E383E660056500, 0x12771772F491F194 +- +-L$k_dsb9: +-.quad 0x851C03539A86D600, 0xCAD51F504F994CC9 +-.quad 0xC03B1789ECD74900, 0x725E2C9EB2FBA565 +-L$k_dsbd: +-.quad 0x7D57CCDFE6B1A200, 0xF56E9B13882A4439 +-.quad 0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3 +-L$k_dsbb: +-.quad 0xD022649296B44200, 0x602646F6B0F2D404 +-.quad 0xC19498A6CD596700, 0xF3FF0C3E3255AA6B +-L$k_dsbe: +-.quad 0x46F2929626D4D000, 0x2242600464B4F6B0 +-.quad 0x0C55A6CDFFAAC100, 0x9467F36B98593E32 +-L$k_dsbo: +-.quad 0x1387EA537EF94000, 0xC7AA6DB9D4943E2D +-.quad 0x12D7560F93441D00, 0xCA4B8159D8C58E9C +- +- +-L$rev_ctr: +-.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 +- +- +-L$ctr_add_one: +-.quad 0x0000000000000000, 0x0000000100000000 +-L$ctr_add_two: +-.quad 0x0000000000000000, 0x0000000200000000 +- +-.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 +-.p2align 6 +- +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/x86_64-mont.S b/mac-x86_64/crypto/fipsmodule/x86_64-mont.S +deleted file mode 100644 +index d354b2d..0000000 +--- a/mac-x86_64/crypto/fipsmodule/x86_64-mont.S ++++ /dev/null +@@ -1,1256 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +- +-.globl _bn_mul_mont +-.private_extern _bn_mul_mont +- +-.p2align 4 +-_bn_mul_mont: +- +- movl %r9d,%r9d +- movq %rsp,%rax +- +- testl $3,%r9d +- jnz L$mul_enter +- cmpl $8,%r9d +- jb L$mul_enter +- leaq _OPENSSL_ia32cap_P(%rip),%r11 +- movl 8(%r11),%r11d +- cmpq %rsi,%rdx +- jne L$mul4x_enter +- testl $7,%r9d +- jz L$sqr8x_enter +- jmp L$mul4x_enter +- +-.p2align 4 +-L$mul_enter: +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- +- negq %r9 +- movq %rsp,%r11 +- leaq -16(%rsp,%r9,8),%r10 +- negq %r9 +- andq $-1024,%r10 +- +- +- +- +- +- +- +- +- +- subq %r10,%r11 +- andq $-4096,%r11 +- leaq (%r10,%r11,1),%rsp +- movq (%rsp),%r11 +- cmpq %r10,%rsp +- ja L$mul_page_walk +- jmp L$mul_page_walk_done +- +-.p2align 4 +-L$mul_page_walk: +- leaq -4096(%rsp),%rsp +- movq (%rsp),%r11 +- cmpq %r10,%rsp +- ja L$mul_page_walk +-L$mul_page_walk_done: +- +- movq %rax,8(%rsp,%r9,8) +- +-L$mul_body: +- movq %rdx,%r12 +- movq (%r8),%r8 +- movq (%r12),%rbx +- movq (%rsi),%rax +- +- xorq %r14,%r14 +- xorq %r15,%r15 +- +- movq %r8,%rbp +- mulq %rbx +- movq %rax,%r10 +- movq (%rcx),%rax +- +- imulq %r10,%rbp +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r10 +- movq 8(%rsi),%rax +- adcq $0,%rdx +- movq %rdx,%r13 +- +- leaq 1(%r15),%r15 +- jmp L$1st_enter +- +-.p2align 4 +-L$1st: +- addq %rax,%r13 +- movq (%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r11,%r13 +- movq %r10,%r11 +- adcq $0,%rdx +- movq %r13,-16(%rsp,%r15,8) +- movq %rdx,%r13 +- +-L$1st_enter: +- mulq %rbx +- addq %rax,%r11 +- movq (%rcx,%r15,8),%rax +- adcq $0,%rdx +- leaq 1(%r15),%r15 +- movq %rdx,%r10 +- +- mulq %rbp +- cmpq %r9,%r15 +- jne L$1st +- +- addq %rax,%r13 +- movq (%rsi),%rax +- adcq $0,%rdx +- addq %r11,%r13 +- adcq $0,%rdx +- movq %r13,-16(%rsp,%r15,8) +- movq %rdx,%r13 +- movq %r10,%r11 +- +- xorq %rdx,%rdx +- addq %r11,%r13 +- adcq $0,%rdx +- movq %r13,-8(%rsp,%r9,8) +- movq %rdx,(%rsp,%r9,8) +- +- leaq 1(%r14),%r14 +- jmp L$outer +-.p2align 4 +-L$outer: +- movq (%r12,%r14,8),%rbx +- xorq %r15,%r15 +- movq %r8,%rbp +- movq (%rsp),%r10 +- mulq %rbx +- addq %rax,%r10 +- movq (%rcx),%rax +- adcq $0,%rdx +- +- imulq %r10,%rbp +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r10 +- movq 8(%rsi),%rax +- adcq $0,%rdx +- movq 8(%rsp),%r10 +- movq %rdx,%r13 +- +- leaq 1(%r15),%r15 +- jmp L$inner_enter +- +-.p2align 4 +-L$inner: +- addq %rax,%r13 +- movq (%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- movq (%rsp,%r15,8),%r10 +- adcq $0,%rdx +- movq %r13,-16(%rsp,%r15,8) +- movq %rdx,%r13 +- +-L$inner_enter: +- mulq %rbx +- addq %rax,%r11 +- movq (%rcx,%r15,8),%rax +- adcq $0,%rdx +- addq %r11,%r10 +- movq %rdx,%r11 +- adcq $0,%r11 +- leaq 1(%r15),%r15 +- +- mulq %rbp +- cmpq %r9,%r15 +- jne L$inner +- +- addq %rax,%r13 +- movq (%rsi),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- movq (%rsp,%r15,8),%r10 +- adcq $0,%rdx +- movq %r13,-16(%rsp,%r15,8) +- movq %rdx,%r13 +- +- xorq %rdx,%rdx +- addq %r11,%r13 +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %r13,-8(%rsp,%r9,8) +- movq %rdx,(%rsp,%r9,8) +- +- leaq 1(%r14),%r14 +- cmpq %r9,%r14 +- jb L$outer +- +- xorq %r14,%r14 +- movq (%rsp),%rax +- movq %r9,%r15 +- +-.p2align 4 +-L$sub: sbbq (%rcx,%r14,8),%rax +- movq %rax,(%rdi,%r14,8) +- movq 8(%rsp,%r14,8),%rax +- leaq 1(%r14),%r14 +- decq %r15 +- jnz L$sub +- +- sbbq $0,%rax +- movq $-1,%rbx +- xorq %rax,%rbx +- xorq %r14,%r14 +- movq %r9,%r15 +- +-L$copy: +- movq (%rdi,%r14,8),%rcx +- movq (%rsp,%r14,8),%rdx +- andq %rbx,%rcx +- andq %rax,%rdx +- movq %r9,(%rsp,%r14,8) +- orq %rcx,%rdx +- movq %rdx,(%rdi,%r14,8) +- leaq 1(%r14),%r14 +- subq $1,%r15 +- jnz L$copy +- +- movq 8(%rsp,%r9,8),%rsi +- +- movq $1,%rax +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$mul_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 4 +-bn_mul4x_mont: +- +- movl %r9d,%r9d +- movq %rsp,%rax +- +-L$mul4x_enter: +- andl $0x80100,%r11d +- cmpl $0x80100,%r11d +- je L$mulx4x_enter +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- +- negq %r9 +- movq %rsp,%r11 +- leaq -32(%rsp,%r9,8),%r10 +- negq %r9 +- andq $-1024,%r10 +- +- subq %r10,%r11 +- andq $-4096,%r11 +- leaq (%r10,%r11,1),%rsp +- movq (%rsp),%r11 +- cmpq %r10,%rsp +- ja L$mul4x_page_walk +- jmp L$mul4x_page_walk_done +- +-L$mul4x_page_walk: +- leaq -4096(%rsp),%rsp +- movq (%rsp),%r11 +- cmpq %r10,%rsp +- ja L$mul4x_page_walk +-L$mul4x_page_walk_done: +- +- movq %rax,8(%rsp,%r9,8) +- +-L$mul4x_body: +- movq %rdi,16(%rsp,%r9,8) +- movq %rdx,%r12 +- movq (%r8),%r8 +- movq (%r12),%rbx +- movq (%rsi),%rax +- +- xorq %r14,%r14 +- xorq %r15,%r15 +- +- movq %r8,%rbp +- mulq %rbx +- movq %rax,%r10 +- movq (%rcx),%rax +- +- imulq %r10,%rbp +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r10 +- movq 8(%rsi),%rax +- adcq $0,%rdx +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq 8(%rcx),%rax +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq 16(%rsi),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- leaq 4(%r15),%r15 +- adcq $0,%rdx +- movq %rdi,(%rsp) +- movq %rdx,%r13 +- jmp L$1st4x +-.p2align 4 +-L$1st4x: +- mulq %rbx +- addq %rax,%r10 +- movq -16(%rcx,%r15,8),%rax +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq -8(%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %r13,-24(%rsp,%r15,8) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq -8(%rcx,%r15,8),%rax +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq (%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- adcq $0,%rdx +- movq %rdi,-16(%rsp,%r15,8) +- movq %rdx,%r13 +- +- mulq %rbx +- addq %rax,%r10 +- movq (%rcx,%r15,8),%rax +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq 8(%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %r13,-8(%rsp,%r15,8) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq 8(%rcx,%r15,8),%rax +- adcq $0,%rdx +- leaq 4(%r15),%r15 +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq -16(%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- adcq $0,%rdx +- movq %rdi,-32(%rsp,%r15,8) +- movq %rdx,%r13 +- cmpq %r9,%r15 +- jb L$1st4x +- +- mulq %rbx +- addq %rax,%r10 +- movq -16(%rcx,%r15,8),%rax +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq -8(%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %r13,-24(%rsp,%r15,8) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq -8(%rcx,%r15,8),%rax +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq (%rsi),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- adcq $0,%rdx +- movq %rdi,-16(%rsp,%r15,8) +- movq %rdx,%r13 +- +- xorq %rdi,%rdi +- addq %r10,%r13 +- adcq $0,%rdi +- movq %r13,-8(%rsp,%r15,8) +- movq %rdi,(%rsp,%r15,8) +- +- leaq 1(%r14),%r14 +-.p2align 2 +-L$outer4x: +- movq (%r12,%r14,8),%rbx +- xorq %r15,%r15 +- movq (%rsp),%r10 +- movq %r8,%rbp +- mulq %rbx +- addq %rax,%r10 +- movq (%rcx),%rax +- adcq $0,%rdx +- +- imulq %r10,%rbp +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r10 +- movq 8(%rsi),%rax +- adcq $0,%rdx +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq 8(%rcx),%rax +- adcq $0,%rdx +- addq 8(%rsp),%r11 +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq 16(%rsi),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- leaq 4(%r15),%r15 +- adcq $0,%rdx +- movq %rdi,(%rsp) +- movq %rdx,%r13 +- jmp L$inner4x +-.p2align 4 +-L$inner4x: +- mulq %rbx +- addq %rax,%r10 +- movq -16(%rcx,%r15,8),%rax +- adcq $0,%rdx +- addq -16(%rsp,%r15,8),%r10 +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq -8(%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %r13,-24(%rsp,%r15,8) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq -8(%rcx,%r15,8),%rax +- adcq $0,%rdx +- addq -8(%rsp,%r15,8),%r11 +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq (%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- adcq $0,%rdx +- movq %rdi,-16(%rsp,%r15,8) +- movq %rdx,%r13 +- +- mulq %rbx +- addq %rax,%r10 +- movq (%rcx,%r15,8),%rax +- adcq $0,%rdx +- addq (%rsp,%r15,8),%r10 +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq 8(%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %r13,-8(%rsp,%r15,8) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq 8(%rcx,%r15,8),%rax +- adcq $0,%rdx +- addq 8(%rsp,%r15,8),%r11 +- adcq $0,%rdx +- leaq 4(%r15),%r15 +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq -16(%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- adcq $0,%rdx +- movq %rdi,-32(%rsp,%r15,8) +- movq %rdx,%r13 +- cmpq %r9,%r15 +- jb L$inner4x +- +- mulq %rbx +- addq %rax,%r10 +- movq -16(%rcx,%r15,8),%rax +- adcq $0,%rdx +- addq -16(%rsp,%r15,8),%r10 +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq -8(%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %r13,-24(%rsp,%r15,8) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq -8(%rcx,%r15,8),%rax +- adcq $0,%rdx +- addq -8(%rsp,%r15,8),%r11 +- adcq $0,%rdx +- leaq 1(%r14),%r14 +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq (%rsi),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- adcq $0,%rdx +- movq %rdi,-16(%rsp,%r15,8) +- movq %rdx,%r13 +- +- xorq %rdi,%rdi +- addq %r10,%r13 +- adcq $0,%rdi +- addq (%rsp,%r9,8),%r13 +- adcq $0,%rdi +- movq %r13,-8(%rsp,%r15,8) +- movq %rdi,(%rsp,%r15,8) +- +- cmpq %r9,%r14 +- jb L$outer4x +- movq 16(%rsp,%r9,8),%rdi +- leaq -4(%r9),%r15 +- movq 0(%rsp),%rax +- movq 8(%rsp),%rdx +- shrq $2,%r15 +- leaq (%rsp),%rsi +- xorq %r14,%r14 +- +- subq 0(%rcx),%rax +- movq 16(%rsi),%rbx +- movq 24(%rsi),%rbp +- sbbq 8(%rcx),%rdx +- +-L$sub4x: +- movq %rax,0(%rdi,%r14,8) +- movq %rdx,8(%rdi,%r14,8) +- sbbq 16(%rcx,%r14,8),%rbx +- movq 32(%rsi,%r14,8),%rax +- movq 40(%rsi,%r14,8),%rdx +- sbbq 24(%rcx,%r14,8),%rbp +- movq %rbx,16(%rdi,%r14,8) +- movq %rbp,24(%rdi,%r14,8) +- sbbq 32(%rcx,%r14,8),%rax +- movq 48(%rsi,%r14,8),%rbx +- movq 56(%rsi,%r14,8),%rbp +- sbbq 40(%rcx,%r14,8),%rdx +- leaq 4(%r14),%r14 +- decq %r15 +- jnz L$sub4x +- +- movq %rax,0(%rdi,%r14,8) +- movq 32(%rsi,%r14,8),%rax +- sbbq 16(%rcx,%r14,8),%rbx +- movq %rdx,8(%rdi,%r14,8) +- sbbq 24(%rcx,%r14,8),%rbp +- movq %rbx,16(%rdi,%r14,8) +- +- sbbq $0,%rax +- movq %rbp,24(%rdi,%r14,8) +- pxor %xmm0,%xmm0 +-.byte 102,72,15,110,224 +- pcmpeqd %xmm5,%xmm5 +- pshufd $0,%xmm4,%xmm4 +- movq %r9,%r15 +- pxor %xmm4,%xmm5 +- shrq $2,%r15 +- xorl %eax,%eax +- +- jmp L$copy4x +-.p2align 4 +-L$copy4x: +- movdqa (%rsp,%rax,1),%xmm1 +- movdqu (%rdi,%rax,1),%xmm2 +- pand %xmm4,%xmm1 +- pand %xmm5,%xmm2 +- movdqa 16(%rsp,%rax,1),%xmm3 +- movdqa %xmm0,(%rsp,%rax,1) +- por %xmm2,%xmm1 +- movdqu 16(%rdi,%rax,1),%xmm2 +- movdqu %xmm1,(%rdi,%rax,1) +- pand %xmm4,%xmm3 +- pand %xmm5,%xmm2 +- movdqa %xmm0,16(%rsp,%rax,1) +- por %xmm2,%xmm3 +- movdqu %xmm3,16(%rdi,%rax,1) +- leaq 32(%rax),%rax +- decq %r15 +- jnz L$copy4x +- movq 8(%rsp,%r9,8),%rsi +- +- movq $1,%rax +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$mul4x_epilogue: +- .byte 0xf3,0xc3 +- +- +- +- +- +- +-.p2align 5 +-bn_sqr8x_mont: +- +- movq %rsp,%rax +- +-L$sqr8x_enter: +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$sqr8x_prologue: +- +- movl %r9d,%r10d +- shll $3,%r9d +- shlq $3+2,%r10 +- negq %r9 +- +- +- +- +- +- +- leaq -64(%rsp,%r9,2),%r11 +- movq %rsp,%rbp +- movq (%r8),%r8 +- subq %rsi,%r11 +- andq $4095,%r11 +- cmpq %r11,%r10 +- jb L$sqr8x_sp_alt +- subq %r11,%rbp +- leaq -64(%rbp,%r9,2),%rbp +- jmp L$sqr8x_sp_done +- +-.p2align 5 +-L$sqr8x_sp_alt: +- leaq 4096-64(,%r9,2),%r10 +- leaq -64(%rbp,%r9,2),%rbp +- subq %r10,%r11 +- movq $0,%r10 +- cmovcq %r10,%r11 +- subq %r11,%rbp +-L$sqr8x_sp_done: +- andq $-64,%rbp +- movq %rsp,%r11 +- subq %rbp,%r11 +- andq $-4096,%r11 +- leaq (%r11,%rbp,1),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$sqr8x_page_walk +- jmp L$sqr8x_page_walk_done +- +-.p2align 4 +-L$sqr8x_page_walk: +- leaq -4096(%rsp),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$sqr8x_page_walk +-L$sqr8x_page_walk_done: +- +- movq %r9,%r10 +- negq %r9 +- +- movq %r8,32(%rsp) +- movq %rax,40(%rsp) +- +-L$sqr8x_body: +- +-.byte 102,72,15,110,209 +- pxor %xmm0,%xmm0 +-.byte 102,72,15,110,207 +-.byte 102,73,15,110,218 +- leaq _OPENSSL_ia32cap_P(%rip),%rax +- movl 8(%rax),%eax +- andl $0x80100,%eax +- cmpl $0x80100,%eax +- jne L$sqr8x_nox +- +- call _bn_sqrx8x_internal +- +- +- +- +- leaq (%r8,%rcx,1),%rbx +- movq %rcx,%r9 +- movq %rcx,%rdx +-.byte 102,72,15,126,207 +- sarq $3+2,%rcx +- jmp L$sqr8x_sub +- +-.p2align 5 +-L$sqr8x_nox: +- call _bn_sqr8x_internal +- +- +- +- +- leaq (%rdi,%r9,1),%rbx +- movq %r9,%rcx +- movq %r9,%rdx +-.byte 102,72,15,126,207 +- sarq $3+2,%rcx +- jmp L$sqr8x_sub +- +-.p2align 5 +-L$sqr8x_sub: +- movq 0(%rbx),%r12 +- movq 8(%rbx),%r13 +- movq 16(%rbx),%r14 +- movq 24(%rbx),%r15 +- leaq 32(%rbx),%rbx +- sbbq 0(%rbp),%r12 +- sbbq 8(%rbp),%r13 +- sbbq 16(%rbp),%r14 +- sbbq 24(%rbp),%r15 +- leaq 32(%rbp),%rbp +- movq %r12,0(%rdi) +- movq %r13,8(%rdi) +- movq %r14,16(%rdi) +- movq %r15,24(%rdi) +- leaq 32(%rdi),%rdi +- incq %rcx +- jnz L$sqr8x_sub +- +- sbbq $0,%rax +- leaq (%rbx,%r9,1),%rbx +- leaq (%rdi,%r9,1),%rdi +- +-.byte 102,72,15,110,200 +- pxor %xmm0,%xmm0 +- pshufd $0,%xmm1,%xmm1 +- movq 40(%rsp),%rsi +- +- jmp L$sqr8x_cond_copy +- +-.p2align 5 +-L$sqr8x_cond_copy: +- movdqa 0(%rbx),%xmm2 +- movdqa 16(%rbx),%xmm3 +- leaq 32(%rbx),%rbx +- movdqu 0(%rdi),%xmm4 +- movdqu 16(%rdi),%xmm5 +- leaq 32(%rdi),%rdi +- movdqa %xmm0,-32(%rbx) +- movdqa %xmm0,-16(%rbx) +- movdqa %xmm0,-32(%rbx,%rdx,1) +- movdqa %xmm0,-16(%rbx,%rdx,1) +- pcmpeqd %xmm1,%xmm0 +- pand %xmm1,%xmm2 +- pand %xmm1,%xmm3 +- pand %xmm0,%xmm4 +- pand %xmm0,%xmm5 +- pxor %xmm0,%xmm0 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqu %xmm4,-32(%rdi) +- movdqu %xmm5,-16(%rdi) +- addq $32,%r9 +- jnz L$sqr8x_cond_copy +- +- movq $1,%rax +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$sqr8x_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-bn_mulx4x_mont: +- +- movq %rsp,%rax +- +-L$mulx4x_enter: +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$mulx4x_prologue: +- +- shll $3,%r9d +- xorq %r10,%r10 +- subq %r9,%r10 +- movq (%r8),%r8 +- leaq -72(%rsp,%r10,1),%rbp +- andq $-128,%rbp +- movq %rsp,%r11 +- subq %rbp,%r11 +- andq $-4096,%r11 +- leaq (%r11,%rbp,1),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$mulx4x_page_walk +- jmp L$mulx4x_page_walk_done +- +-.p2align 4 +-L$mulx4x_page_walk: +- leaq -4096(%rsp),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$mulx4x_page_walk +-L$mulx4x_page_walk_done: +- +- leaq (%rdx,%r9,1),%r10 +- +- +- +- +- +- +- +- +- +- +- +- +- movq %r9,0(%rsp) +- shrq $5,%r9 +- movq %r10,16(%rsp) +- subq $1,%r9 +- movq %r8,24(%rsp) +- movq %rdi,32(%rsp) +- movq %rax,40(%rsp) +- +- movq %r9,48(%rsp) +- jmp L$mulx4x_body +- +-.p2align 5 +-L$mulx4x_body: +- leaq 8(%rdx),%rdi +- movq (%rdx),%rdx +- leaq 64+32(%rsp),%rbx +- movq %rdx,%r9 +- +- mulxq 0(%rsi),%r8,%rax +- mulxq 8(%rsi),%r11,%r14 +- addq %rax,%r11 +- movq %rdi,8(%rsp) +- mulxq 16(%rsi),%r12,%r13 +- adcq %r14,%r12 +- adcq $0,%r13 +- +- movq %r8,%rdi +- imulq 24(%rsp),%r8 +- xorq %rbp,%rbp +- +- mulxq 24(%rsi),%rax,%r14 +- movq %r8,%rdx +- leaq 32(%rsi),%rsi +- adcxq %rax,%r13 +- adcxq %rbp,%r14 +- +- mulxq 0(%rcx),%rax,%r10 +- adcxq %rax,%rdi +- adoxq %r11,%r10 +- mulxq 8(%rcx),%rax,%r11 +- adcxq %rax,%r10 +- adoxq %r12,%r11 +-.byte 0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00 +- movq 48(%rsp),%rdi +- movq %r10,-32(%rbx) +- adcxq %rax,%r11 +- adoxq %r13,%r12 +- mulxq 24(%rcx),%rax,%r15 +- movq %r9,%rdx +- movq %r11,-24(%rbx) +- adcxq %rax,%r12 +- adoxq %rbp,%r15 +- leaq 32(%rcx),%rcx +- movq %r12,-16(%rbx) +- +- jmp L$mulx4x_1st +- +-.p2align 5 +-L$mulx4x_1st: +- adcxq %rbp,%r15 +- mulxq 0(%rsi),%r10,%rax +- adcxq %r14,%r10 +- mulxq 8(%rsi),%r11,%r14 +- adcxq %rax,%r11 +- mulxq 16(%rsi),%r12,%rax +- adcxq %r14,%r12 +- mulxq 24(%rsi),%r13,%r14 +-.byte 0x67,0x67 +- movq %r8,%rdx +- adcxq %rax,%r13 +- adcxq %rbp,%r14 +- leaq 32(%rsi),%rsi +- leaq 32(%rbx),%rbx +- +- adoxq %r15,%r10 +- mulxq 0(%rcx),%rax,%r15 +- adcxq %rax,%r10 +- adoxq %r15,%r11 +- mulxq 8(%rcx),%rax,%r15 +- adcxq %rax,%r11 +- adoxq %r15,%r12 +- mulxq 16(%rcx),%rax,%r15 +- movq %r10,-40(%rbx) +- adcxq %rax,%r12 +- movq %r11,-32(%rbx) +- adoxq %r15,%r13 +- mulxq 24(%rcx),%rax,%r15 +- movq %r9,%rdx +- movq %r12,-24(%rbx) +- adcxq %rax,%r13 +- adoxq %rbp,%r15 +- leaq 32(%rcx),%rcx +- movq %r13,-16(%rbx) +- +- decq %rdi +- jnz L$mulx4x_1st +- +- movq 0(%rsp),%rax +- movq 8(%rsp),%rdi +- adcq %rbp,%r15 +- addq %r15,%r14 +- sbbq %r15,%r15 +- movq %r14,-8(%rbx) +- jmp L$mulx4x_outer +- +-.p2align 5 +-L$mulx4x_outer: +- movq (%rdi),%rdx +- leaq 8(%rdi),%rdi +- subq %rax,%rsi +- movq %r15,(%rbx) +- leaq 64+32(%rsp),%rbx +- subq %rax,%rcx +- +- mulxq 0(%rsi),%r8,%r11 +- xorl %ebp,%ebp +- movq %rdx,%r9 +- mulxq 8(%rsi),%r14,%r12 +- adoxq -32(%rbx),%r8 +- adcxq %r14,%r11 +- mulxq 16(%rsi),%r15,%r13 +- adoxq -24(%rbx),%r11 +- adcxq %r15,%r12 +- adoxq -16(%rbx),%r12 +- adcxq %rbp,%r13 +- adoxq %rbp,%r13 +- +- movq %rdi,8(%rsp) +- movq %r8,%r15 +- imulq 24(%rsp),%r8 +- xorl %ebp,%ebp +- +- mulxq 24(%rsi),%rax,%r14 +- movq %r8,%rdx +- adcxq %rax,%r13 +- adoxq -8(%rbx),%r13 +- adcxq %rbp,%r14 +- leaq 32(%rsi),%rsi +- adoxq %rbp,%r14 +- +- mulxq 0(%rcx),%rax,%r10 +- adcxq %rax,%r15 +- adoxq %r11,%r10 +- mulxq 8(%rcx),%rax,%r11 +- adcxq %rax,%r10 +- adoxq %r12,%r11 +- mulxq 16(%rcx),%rax,%r12 +- movq %r10,-32(%rbx) +- adcxq %rax,%r11 +- adoxq %r13,%r12 +- mulxq 24(%rcx),%rax,%r15 +- movq %r9,%rdx +- movq %r11,-24(%rbx) +- leaq 32(%rcx),%rcx +- adcxq %rax,%r12 +- adoxq %rbp,%r15 +- movq 48(%rsp),%rdi +- movq %r12,-16(%rbx) +- +- jmp L$mulx4x_inner +- +-.p2align 5 +-L$mulx4x_inner: +- mulxq 0(%rsi),%r10,%rax +- adcxq %rbp,%r15 +- adoxq %r14,%r10 +- mulxq 8(%rsi),%r11,%r14 +- adcxq 0(%rbx),%r10 +- adoxq %rax,%r11 +- mulxq 16(%rsi),%r12,%rax +- adcxq 8(%rbx),%r11 +- adoxq %r14,%r12 +- mulxq 24(%rsi),%r13,%r14 +- movq %r8,%rdx +- adcxq 16(%rbx),%r12 +- adoxq %rax,%r13 +- adcxq 24(%rbx),%r13 +- adoxq %rbp,%r14 +- leaq 32(%rsi),%rsi +- leaq 32(%rbx),%rbx +- adcxq %rbp,%r14 +- +- adoxq %r15,%r10 +- mulxq 0(%rcx),%rax,%r15 +- adcxq %rax,%r10 +- adoxq %r15,%r11 +- mulxq 8(%rcx),%rax,%r15 +- adcxq %rax,%r11 +- adoxq %r15,%r12 +- mulxq 16(%rcx),%rax,%r15 +- movq %r10,-40(%rbx) +- adcxq %rax,%r12 +- adoxq %r15,%r13 +- mulxq 24(%rcx),%rax,%r15 +- movq %r9,%rdx +- movq %r11,-32(%rbx) +- movq %r12,-24(%rbx) +- adcxq %rax,%r13 +- adoxq %rbp,%r15 +- leaq 32(%rcx),%rcx +- movq %r13,-16(%rbx) +- +- decq %rdi +- jnz L$mulx4x_inner +- +- movq 0(%rsp),%rax +- movq 8(%rsp),%rdi +- adcq %rbp,%r15 +- subq 0(%rbx),%rbp +- adcq %r15,%r14 +- sbbq %r15,%r15 +- movq %r14,-8(%rbx) +- +- cmpq 16(%rsp),%rdi +- jne L$mulx4x_outer +- +- leaq 64(%rsp),%rbx +- subq %rax,%rcx +- negq %r15 +- movq %rax,%rdx +- shrq $3+2,%rax +- movq 32(%rsp),%rdi +- jmp L$mulx4x_sub +- +-.p2align 5 +-L$mulx4x_sub: +- movq 0(%rbx),%r11 +- movq 8(%rbx),%r12 +- movq 16(%rbx),%r13 +- movq 24(%rbx),%r14 +- leaq 32(%rbx),%rbx +- sbbq 0(%rcx),%r11 +- sbbq 8(%rcx),%r12 +- sbbq 16(%rcx),%r13 +- sbbq 24(%rcx),%r14 +- leaq 32(%rcx),%rcx +- movq %r11,0(%rdi) +- movq %r12,8(%rdi) +- movq %r13,16(%rdi) +- movq %r14,24(%rdi) +- leaq 32(%rdi),%rdi +- decq %rax +- jnz L$mulx4x_sub +- +- sbbq $0,%r15 +- leaq 64(%rsp),%rbx +- subq %rdx,%rdi +- +-.byte 102,73,15,110,207 +- pxor %xmm0,%xmm0 +- pshufd $0,%xmm1,%xmm1 +- movq 40(%rsp),%rsi +- +- jmp L$mulx4x_cond_copy +- +-.p2align 5 +-L$mulx4x_cond_copy: +- movdqa 0(%rbx),%xmm2 +- movdqa 16(%rbx),%xmm3 +- leaq 32(%rbx),%rbx +- movdqu 0(%rdi),%xmm4 +- movdqu 16(%rdi),%xmm5 +- leaq 32(%rdi),%rdi +- movdqa %xmm0,-32(%rbx) +- movdqa %xmm0,-16(%rbx) +- pcmpeqd %xmm1,%xmm0 +- pand %xmm1,%xmm2 +- pand %xmm1,%xmm3 +- pand %xmm0,%xmm4 +- pand %xmm0,%xmm5 +- pxor %xmm0,%xmm0 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqu %xmm4,-32(%rdi) +- movdqu %xmm5,-16(%rdi) +- subq $32,%rdx +- jnz L$mulx4x_cond_copy +- +- movq %rdx,(%rbx) +- +- movq $1,%rax +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$mulx4x_epilogue: +- .byte 0xf3,0xc3 +- +- +-.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +-.p2align 4 +-#endif +diff --git a/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S b/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S +deleted file mode 100644 +index e1fd9c9..0000000 +--- a/mac-x86_64/crypto/fipsmodule/x86_64-mont5.S ++++ /dev/null +@@ -1,3788 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +- +-.globl _bn_mul_mont_gather5 +-.private_extern _bn_mul_mont_gather5 +- +-.p2align 6 +-_bn_mul_mont_gather5: +- +- movl %r9d,%r9d +- movq %rsp,%rax +- +- testl $7,%r9d +- jnz L$mul_enter +- leaq _OPENSSL_ia32cap_P(%rip),%r11 +- movl 8(%r11),%r11d +- jmp L$mul4x_enter +- +-.p2align 4 +-L$mul_enter: +- movd 8(%rsp),%xmm5 +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +- +- negq %r9 +- movq %rsp,%r11 +- leaq -280(%rsp,%r9,8),%r10 +- negq %r9 +- andq $-1024,%r10 +- +- +- +- +- +- +- +- +- +- subq %r10,%r11 +- andq $-4096,%r11 +- leaq (%r10,%r11,1),%rsp +- movq (%rsp),%r11 +- cmpq %r10,%rsp +- ja L$mul_page_walk +- jmp L$mul_page_walk_done +- +-L$mul_page_walk: +- leaq -4096(%rsp),%rsp +- movq (%rsp),%r11 +- cmpq %r10,%rsp +- ja L$mul_page_walk +-L$mul_page_walk_done: +- +- leaq L$inc(%rip),%r10 +- movq %rax,8(%rsp,%r9,8) +- +-L$mul_body: +- +- leaq 128(%rdx),%r12 +- movdqa 0(%r10),%xmm0 +- movdqa 16(%r10),%xmm1 +- leaq 24-112(%rsp,%r9,8),%r10 +- andq $-16,%r10 +- +- pshufd $0,%xmm5,%xmm5 +- movdqa %xmm1,%xmm4 +- movdqa %xmm1,%xmm2 +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +-.byte 0x67 +- movdqa %xmm4,%xmm3 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,112(%r10) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,128(%r10) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,144(%r10) +- movdqa %xmm4,%xmm2 +- +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,160(%r10) +- movdqa %xmm4,%xmm3 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,176(%r10) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,192(%r10) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,208(%r10) +- movdqa %xmm4,%xmm2 +- +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,224(%r10) +- movdqa %xmm4,%xmm3 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,240(%r10) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,256(%r10) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,272(%r10) +- movdqa %xmm4,%xmm2 +- +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,288(%r10) +- movdqa %xmm4,%xmm3 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,304(%r10) +- +- paddd %xmm2,%xmm3 +-.byte 0x67 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,320(%r10) +- +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,336(%r10) +- pand 64(%r12),%xmm0 +- +- pand 80(%r12),%xmm1 +- pand 96(%r12),%xmm2 +- movdqa %xmm3,352(%r10) +- pand 112(%r12),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- movdqa -128(%r12),%xmm4 +- movdqa -112(%r12),%xmm5 +- movdqa -96(%r12),%xmm2 +- pand 112(%r10),%xmm4 +- movdqa -80(%r12),%xmm3 +- pand 128(%r10),%xmm5 +- por %xmm4,%xmm0 +- pand 144(%r10),%xmm2 +- por %xmm5,%xmm1 +- pand 160(%r10),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- movdqa -64(%r12),%xmm4 +- movdqa -48(%r12),%xmm5 +- movdqa -32(%r12),%xmm2 +- pand 176(%r10),%xmm4 +- movdqa -16(%r12),%xmm3 +- pand 192(%r10),%xmm5 +- por %xmm4,%xmm0 +- pand 208(%r10),%xmm2 +- por %xmm5,%xmm1 +- pand 224(%r10),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- movdqa 0(%r12),%xmm4 +- movdqa 16(%r12),%xmm5 +- movdqa 32(%r12),%xmm2 +- pand 240(%r10),%xmm4 +- movdqa 48(%r12),%xmm3 +- pand 256(%r10),%xmm5 +- por %xmm4,%xmm0 +- pand 272(%r10),%xmm2 +- por %xmm5,%xmm1 +- pand 288(%r10),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- por %xmm1,%xmm0 +- pshufd $0x4e,%xmm0,%xmm1 +- por %xmm1,%xmm0 +- leaq 256(%r12),%r12 +-.byte 102,72,15,126,195 +- +- movq (%r8),%r8 +- movq (%rsi),%rax +- +- xorq %r14,%r14 +- xorq %r15,%r15 +- +- movq %r8,%rbp +- mulq %rbx +- movq %rax,%r10 +- movq (%rcx),%rax +- +- imulq %r10,%rbp +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r10 +- movq 8(%rsi),%rax +- adcq $0,%rdx +- movq %rdx,%r13 +- +- leaq 1(%r15),%r15 +- jmp L$1st_enter +- +-.p2align 4 +-L$1st: +- addq %rax,%r13 +- movq (%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r11,%r13 +- movq %r10,%r11 +- adcq $0,%rdx +- movq %r13,-16(%rsp,%r15,8) +- movq %rdx,%r13 +- +-L$1st_enter: +- mulq %rbx +- addq %rax,%r11 +- movq (%rcx,%r15,8),%rax +- adcq $0,%rdx +- leaq 1(%r15),%r15 +- movq %rdx,%r10 +- +- mulq %rbp +- cmpq %r9,%r15 +- jne L$1st +- +- +- addq %rax,%r13 +- adcq $0,%rdx +- addq %r11,%r13 +- adcq $0,%rdx +- movq %r13,-16(%rsp,%r9,8) +- movq %rdx,%r13 +- movq %r10,%r11 +- +- xorq %rdx,%rdx +- addq %r11,%r13 +- adcq $0,%rdx +- movq %r13,-8(%rsp,%r9,8) +- movq %rdx,(%rsp,%r9,8) +- +- leaq 1(%r14),%r14 +- jmp L$outer +-.p2align 4 +-L$outer: +- leaq 24+128(%rsp,%r9,8),%rdx +- andq $-16,%rdx +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- movdqa -128(%r12),%xmm0 +- movdqa -112(%r12),%xmm1 +- movdqa -96(%r12),%xmm2 +- movdqa -80(%r12),%xmm3 +- pand -128(%rdx),%xmm0 +- pand -112(%rdx),%xmm1 +- por %xmm0,%xmm4 +- pand -96(%rdx),%xmm2 +- por %xmm1,%xmm5 +- pand -80(%rdx),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa -64(%r12),%xmm0 +- movdqa -48(%r12),%xmm1 +- movdqa -32(%r12),%xmm2 +- movdqa -16(%r12),%xmm3 +- pand -64(%rdx),%xmm0 +- pand -48(%rdx),%xmm1 +- por %xmm0,%xmm4 +- pand -32(%rdx),%xmm2 +- por %xmm1,%xmm5 +- pand -16(%rdx),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa 0(%r12),%xmm0 +- movdqa 16(%r12),%xmm1 +- movdqa 32(%r12),%xmm2 +- movdqa 48(%r12),%xmm3 +- pand 0(%rdx),%xmm0 +- pand 16(%rdx),%xmm1 +- por %xmm0,%xmm4 +- pand 32(%rdx),%xmm2 +- por %xmm1,%xmm5 +- pand 48(%rdx),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa 64(%r12),%xmm0 +- movdqa 80(%r12),%xmm1 +- movdqa 96(%r12),%xmm2 +- movdqa 112(%r12),%xmm3 +- pand 64(%rdx),%xmm0 +- pand 80(%rdx),%xmm1 +- por %xmm0,%xmm4 +- pand 96(%rdx),%xmm2 +- por %xmm1,%xmm5 +- pand 112(%rdx),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- por %xmm5,%xmm4 +- pshufd $0x4e,%xmm4,%xmm0 +- por %xmm4,%xmm0 +- leaq 256(%r12),%r12 +- +- movq (%rsi),%rax +-.byte 102,72,15,126,195 +- +- xorq %r15,%r15 +- movq %r8,%rbp +- movq (%rsp),%r10 +- +- mulq %rbx +- addq %rax,%r10 +- movq (%rcx),%rax +- adcq $0,%rdx +- +- imulq %r10,%rbp +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r10 +- movq 8(%rsi),%rax +- adcq $0,%rdx +- movq 8(%rsp),%r10 +- movq %rdx,%r13 +- +- leaq 1(%r15),%r15 +- jmp L$inner_enter +- +-.p2align 4 +-L$inner: +- addq %rax,%r13 +- movq (%rsi,%r15,8),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- movq (%rsp,%r15,8),%r10 +- adcq $0,%rdx +- movq %r13,-16(%rsp,%r15,8) +- movq %rdx,%r13 +- +-L$inner_enter: +- mulq %rbx +- addq %rax,%r11 +- movq (%rcx,%r15,8),%rax +- adcq $0,%rdx +- addq %r11,%r10 +- movq %rdx,%r11 +- adcq $0,%r11 +- leaq 1(%r15),%r15 +- +- mulq %rbp +- cmpq %r9,%r15 +- jne L$inner +- +- addq %rax,%r13 +- adcq $0,%rdx +- addq %r10,%r13 +- movq (%rsp,%r9,8),%r10 +- adcq $0,%rdx +- movq %r13,-16(%rsp,%r9,8) +- movq %rdx,%r13 +- +- xorq %rdx,%rdx +- addq %r11,%r13 +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %r13,-8(%rsp,%r9,8) +- movq %rdx,(%rsp,%r9,8) +- +- leaq 1(%r14),%r14 +- cmpq %r9,%r14 +- jb L$outer +- +- xorq %r14,%r14 +- movq (%rsp),%rax +- leaq (%rsp),%rsi +- movq %r9,%r15 +- jmp L$sub +-.p2align 4 +-L$sub: sbbq (%rcx,%r14,8),%rax +- movq %rax,(%rdi,%r14,8) +- movq 8(%rsi,%r14,8),%rax +- leaq 1(%r14),%r14 +- decq %r15 +- jnz L$sub +- +- sbbq $0,%rax +- movq $-1,%rbx +- xorq %rax,%rbx +- xorq %r14,%r14 +- movq %r9,%r15 +- +-L$copy: +- movq (%rdi,%r14,8),%rcx +- movq (%rsp,%r14,8),%rdx +- andq %rbx,%rcx +- andq %rax,%rdx +- movq %r14,(%rsp,%r14,8) +- orq %rcx,%rdx +- movq %rdx,(%rdi,%r14,8) +- leaq 1(%r14),%r14 +- subq $1,%r15 +- jnz L$copy +- +- movq 8(%rsp,%r9,8),%rsi +- +- movq $1,%rax +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$mul_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-bn_mul4x_mont_gather5: +- +-.byte 0x67 +- movq %rsp,%rax +- +-L$mul4x_enter: +- andl $0x80108,%r11d +- cmpl $0x80108,%r11d +- je L$mulx4x_enter +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$mul4x_prologue: +- +-.byte 0x67 +- shll $3,%r9d +- leaq (%r9,%r9,2),%r10 +- negq %r9 +- +- +- +- +- +- +- +- +- +- +- leaq -320(%rsp,%r9,2),%r11 +- movq %rsp,%rbp +- subq %rdi,%r11 +- andq $4095,%r11 +- cmpq %r11,%r10 +- jb L$mul4xsp_alt +- subq %r11,%rbp +- leaq -320(%rbp,%r9,2),%rbp +- jmp L$mul4xsp_done +- +-.p2align 5 +-L$mul4xsp_alt: +- leaq 4096-320(,%r9,2),%r10 +- leaq -320(%rbp,%r9,2),%rbp +- subq %r10,%r11 +- movq $0,%r10 +- cmovcq %r10,%r11 +- subq %r11,%rbp +-L$mul4xsp_done: +- andq $-64,%rbp +- movq %rsp,%r11 +- subq %rbp,%r11 +- andq $-4096,%r11 +- leaq (%r11,%rbp,1),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$mul4x_page_walk +- jmp L$mul4x_page_walk_done +- +-L$mul4x_page_walk: +- leaq -4096(%rsp),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$mul4x_page_walk +-L$mul4x_page_walk_done: +- +- negq %r9 +- +- movq %rax,40(%rsp) +- +-L$mul4x_body: +- +- call mul4x_internal +- +- movq 40(%rsp),%rsi +- +- movq $1,%rax +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$mul4x_epilogue: +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-mul4x_internal: +- +- shlq $5,%r9 +- movd 8(%rax),%xmm5 +- leaq L$inc(%rip),%rax +- leaq 128(%rdx,%r9,1),%r13 +- shrq $5,%r9 +- movdqa 0(%rax),%xmm0 +- movdqa 16(%rax),%xmm1 +- leaq 88-112(%rsp,%r9,1),%r10 +- leaq 128(%rdx),%r12 +- +- pshufd $0,%xmm5,%xmm5 +- movdqa %xmm1,%xmm4 +-.byte 0x67,0x67 +- movdqa %xmm1,%xmm2 +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +-.byte 0x67 +- movdqa %xmm4,%xmm3 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,112(%r10) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,128(%r10) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,144(%r10) +- movdqa %xmm4,%xmm2 +- +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,160(%r10) +- movdqa %xmm4,%xmm3 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,176(%r10) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,192(%r10) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,208(%r10) +- movdqa %xmm4,%xmm2 +- +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,224(%r10) +- movdqa %xmm4,%xmm3 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,240(%r10) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,256(%r10) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,272(%r10) +- movdqa %xmm4,%xmm2 +- +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,288(%r10) +- movdqa %xmm4,%xmm3 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,304(%r10) +- +- paddd %xmm2,%xmm3 +-.byte 0x67 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,320(%r10) +- +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,336(%r10) +- pand 64(%r12),%xmm0 +- +- pand 80(%r12),%xmm1 +- pand 96(%r12),%xmm2 +- movdqa %xmm3,352(%r10) +- pand 112(%r12),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- movdqa -128(%r12),%xmm4 +- movdqa -112(%r12),%xmm5 +- movdqa -96(%r12),%xmm2 +- pand 112(%r10),%xmm4 +- movdqa -80(%r12),%xmm3 +- pand 128(%r10),%xmm5 +- por %xmm4,%xmm0 +- pand 144(%r10),%xmm2 +- por %xmm5,%xmm1 +- pand 160(%r10),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- movdqa -64(%r12),%xmm4 +- movdqa -48(%r12),%xmm5 +- movdqa -32(%r12),%xmm2 +- pand 176(%r10),%xmm4 +- movdqa -16(%r12),%xmm3 +- pand 192(%r10),%xmm5 +- por %xmm4,%xmm0 +- pand 208(%r10),%xmm2 +- por %xmm5,%xmm1 +- pand 224(%r10),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- movdqa 0(%r12),%xmm4 +- movdqa 16(%r12),%xmm5 +- movdqa 32(%r12),%xmm2 +- pand 240(%r10),%xmm4 +- movdqa 48(%r12),%xmm3 +- pand 256(%r10),%xmm5 +- por %xmm4,%xmm0 +- pand 272(%r10),%xmm2 +- por %xmm5,%xmm1 +- pand 288(%r10),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- por %xmm1,%xmm0 +- pshufd $0x4e,%xmm0,%xmm1 +- por %xmm1,%xmm0 +- leaq 256(%r12),%r12 +-.byte 102,72,15,126,195 +- +- movq %r13,16+8(%rsp) +- movq %rdi,56+8(%rsp) +- +- movq (%r8),%r8 +- movq (%rsi),%rax +- leaq (%rsi,%r9,1),%rsi +- negq %r9 +- +- movq %r8,%rbp +- mulq %rbx +- movq %rax,%r10 +- movq (%rcx),%rax +- +- imulq %r10,%rbp +- leaq 64+8(%rsp),%r14 +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r10 +- movq 8(%rsi,%r9,1),%rax +- adcq $0,%rdx +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq 8(%rcx),%rax +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq 16(%rsi,%r9,1),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- leaq 32(%r9),%r15 +- leaq 32(%rcx),%rcx +- adcq $0,%rdx +- movq %rdi,(%r14) +- movq %rdx,%r13 +- jmp L$1st4x +- +-.p2align 5 +-L$1st4x: +- mulq %rbx +- addq %rax,%r10 +- movq -16(%rcx),%rax +- leaq 32(%r14),%r14 +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq -8(%rsi,%r15,1),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %r13,-24(%r14) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq -8(%rcx),%rax +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq (%rsi,%r15,1),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- adcq $0,%rdx +- movq %rdi,-16(%r14) +- movq %rdx,%r13 +- +- mulq %rbx +- addq %rax,%r10 +- movq 0(%rcx),%rax +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq 8(%rsi,%r15,1),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %r13,-8(%r14) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq 8(%rcx),%rax +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq 16(%rsi,%r15,1),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- leaq 32(%rcx),%rcx +- adcq $0,%rdx +- movq %rdi,(%r14) +- movq %rdx,%r13 +- +- addq $32,%r15 +- jnz L$1st4x +- +- mulq %rbx +- addq %rax,%r10 +- movq -16(%rcx),%rax +- leaq 32(%r14),%r14 +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq -8(%rsi),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %r13,-24(%r14) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq -8(%rcx),%rax +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq (%rsi,%r9,1),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- adcq $0,%rdx +- movq %rdi,-16(%r14) +- movq %rdx,%r13 +- +- leaq (%rcx,%r9,1),%rcx +- +- xorq %rdi,%rdi +- addq %r10,%r13 +- adcq $0,%rdi +- movq %r13,-8(%r14) +- +- jmp L$outer4x +- +-.p2align 5 +-L$outer4x: +- leaq 16+128(%r14),%rdx +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- movdqa -128(%r12),%xmm0 +- movdqa -112(%r12),%xmm1 +- movdqa -96(%r12),%xmm2 +- movdqa -80(%r12),%xmm3 +- pand -128(%rdx),%xmm0 +- pand -112(%rdx),%xmm1 +- por %xmm0,%xmm4 +- pand -96(%rdx),%xmm2 +- por %xmm1,%xmm5 +- pand -80(%rdx),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa -64(%r12),%xmm0 +- movdqa -48(%r12),%xmm1 +- movdqa -32(%r12),%xmm2 +- movdqa -16(%r12),%xmm3 +- pand -64(%rdx),%xmm0 +- pand -48(%rdx),%xmm1 +- por %xmm0,%xmm4 +- pand -32(%rdx),%xmm2 +- por %xmm1,%xmm5 +- pand -16(%rdx),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa 0(%r12),%xmm0 +- movdqa 16(%r12),%xmm1 +- movdqa 32(%r12),%xmm2 +- movdqa 48(%r12),%xmm3 +- pand 0(%rdx),%xmm0 +- pand 16(%rdx),%xmm1 +- por %xmm0,%xmm4 +- pand 32(%rdx),%xmm2 +- por %xmm1,%xmm5 +- pand 48(%rdx),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa 64(%r12),%xmm0 +- movdqa 80(%r12),%xmm1 +- movdqa 96(%r12),%xmm2 +- movdqa 112(%r12),%xmm3 +- pand 64(%rdx),%xmm0 +- pand 80(%rdx),%xmm1 +- por %xmm0,%xmm4 +- pand 96(%rdx),%xmm2 +- por %xmm1,%xmm5 +- pand 112(%rdx),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- por %xmm5,%xmm4 +- pshufd $0x4e,%xmm4,%xmm0 +- por %xmm4,%xmm0 +- leaq 256(%r12),%r12 +-.byte 102,72,15,126,195 +- +- movq (%r14,%r9,1),%r10 +- movq %r8,%rbp +- mulq %rbx +- addq %rax,%r10 +- movq (%rcx),%rax +- adcq $0,%rdx +- +- imulq %r10,%rbp +- movq %rdx,%r11 +- movq %rdi,(%r14) +- +- leaq (%r14,%r9,1),%r14 +- +- mulq %rbp +- addq %rax,%r10 +- movq 8(%rsi,%r9,1),%rax +- adcq $0,%rdx +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq 8(%rcx),%rax +- adcq $0,%rdx +- addq 8(%r14),%r11 +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq 16(%rsi,%r9,1),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- leaq 32(%r9),%r15 +- leaq 32(%rcx),%rcx +- adcq $0,%rdx +- movq %rdx,%r13 +- jmp L$inner4x +- +-.p2align 5 +-L$inner4x: +- mulq %rbx +- addq %rax,%r10 +- movq -16(%rcx),%rax +- adcq $0,%rdx +- addq 16(%r14),%r10 +- leaq 32(%r14),%r14 +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq -8(%rsi,%r15,1),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %rdi,-32(%r14) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq -8(%rcx),%rax +- adcq $0,%rdx +- addq -8(%r14),%r11 +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq (%rsi,%r15,1),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- adcq $0,%rdx +- movq %r13,-24(%r14) +- movq %rdx,%r13 +- +- mulq %rbx +- addq %rax,%r10 +- movq 0(%rcx),%rax +- adcq $0,%rdx +- addq (%r14),%r10 +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq 8(%rsi,%r15,1),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %rdi,-16(%r14) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq 8(%rcx),%rax +- adcq $0,%rdx +- addq 8(%r14),%r11 +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq 16(%rsi,%r15,1),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- leaq 32(%rcx),%rcx +- adcq $0,%rdx +- movq %r13,-8(%r14) +- movq %rdx,%r13 +- +- addq $32,%r15 +- jnz L$inner4x +- +- mulq %rbx +- addq %rax,%r10 +- movq -16(%rcx),%rax +- adcq $0,%rdx +- addq 16(%r14),%r10 +- leaq 32(%r14),%r14 +- adcq $0,%rdx +- movq %rdx,%r11 +- +- mulq %rbp +- addq %rax,%r13 +- movq -8(%rsi),%rax +- adcq $0,%rdx +- addq %r10,%r13 +- adcq $0,%rdx +- movq %rdi,-32(%r14) +- movq %rdx,%rdi +- +- mulq %rbx +- addq %rax,%r11 +- movq %rbp,%rax +- movq -8(%rcx),%rbp +- adcq $0,%rdx +- addq -8(%r14),%r11 +- adcq $0,%rdx +- movq %rdx,%r10 +- +- mulq %rbp +- addq %rax,%rdi +- movq (%rsi,%r9,1),%rax +- adcq $0,%rdx +- addq %r11,%rdi +- adcq $0,%rdx +- movq %r13,-24(%r14) +- movq %rdx,%r13 +- +- movq %rdi,-16(%r14) +- leaq (%rcx,%r9,1),%rcx +- +- xorq %rdi,%rdi +- addq %r10,%r13 +- adcq $0,%rdi +- addq (%r14),%r13 +- adcq $0,%rdi +- movq %r13,-8(%r14) +- +- cmpq 16+8(%rsp),%r12 +- jb L$outer4x +- xorq %rax,%rax +- subq %r13,%rbp +- adcq %r15,%r15 +- orq %r15,%rdi +- subq %rdi,%rax +- leaq (%r14,%r9,1),%rbx +- movq (%rcx),%r12 +- leaq (%rcx),%rbp +- movq %r9,%rcx +- sarq $3+2,%rcx +- movq 56+8(%rsp),%rdi +- decq %r12 +- xorq %r10,%r10 +- movq 8(%rbp),%r13 +- movq 16(%rbp),%r14 +- movq 24(%rbp),%r15 +- jmp L$sqr4x_sub_entry +- +- +-.globl _bn_power5 +-.private_extern _bn_power5 +- +-.p2align 5 +-_bn_power5: +- +- movq %rsp,%rax +- +- leaq _OPENSSL_ia32cap_P(%rip),%r11 +- movl 8(%r11),%r11d +- andl $0x80108,%r11d +- cmpl $0x80108,%r11d +- je L$powerx5_enter +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$power5_prologue: +- +- shll $3,%r9d +- leal (%r9,%r9,2),%r10d +- negq %r9 +- movq (%r8),%r8 +- +- +- +- +- +- +- +- +- leaq -320(%rsp,%r9,2),%r11 +- movq %rsp,%rbp +- subq %rdi,%r11 +- andq $4095,%r11 +- cmpq %r11,%r10 +- jb L$pwr_sp_alt +- subq %r11,%rbp +- leaq -320(%rbp,%r9,2),%rbp +- jmp L$pwr_sp_done +- +-.p2align 5 +-L$pwr_sp_alt: +- leaq 4096-320(,%r9,2),%r10 +- leaq -320(%rbp,%r9,2),%rbp +- subq %r10,%r11 +- movq $0,%r10 +- cmovcq %r10,%r11 +- subq %r11,%rbp +-L$pwr_sp_done: +- andq $-64,%rbp +- movq %rsp,%r11 +- subq %rbp,%r11 +- andq $-4096,%r11 +- leaq (%r11,%rbp,1),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$pwr_page_walk +- jmp L$pwr_page_walk_done +- +-L$pwr_page_walk: +- leaq -4096(%rsp),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$pwr_page_walk +-L$pwr_page_walk_done: +- +- movq %r9,%r10 +- negq %r9 +- +- +- +- +- +- +- +- +- +- +- movq %r8,32(%rsp) +- movq %rax,40(%rsp) +- +-L$power5_body: +-.byte 102,72,15,110,207 +-.byte 102,72,15,110,209 +-.byte 102,73,15,110,218 +-.byte 102,72,15,110,226 +- +- call __bn_sqr8x_internal +- call __bn_post4x_internal +- call __bn_sqr8x_internal +- call __bn_post4x_internal +- call __bn_sqr8x_internal +- call __bn_post4x_internal +- call __bn_sqr8x_internal +- call __bn_post4x_internal +- call __bn_sqr8x_internal +- call __bn_post4x_internal +- +-.byte 102,72,15,126,209 +-.byte 102,72,15,126,226 +- movq %rsi,%rdi +- movq 40(%rsp),%rax +- leaq 32(%rsp),%r8 +- +- call mul4x_internal +- +- movq 40(%rsp),%rsi +- +- movq $1,%rax +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$power5_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.globl _bn_sqr8x_internal +-.private_extern _bn_sqr8x_internal +-.private_extern _bn_sqr8x_internal +- +-.p2align 5 +-_bn_sqr8x_internal: +-__bn_sqr8x_internal: +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- leaq 32(%r10),%rbp +- leaq (%rsi,%r9,1),%rsi +- +- movq %r9,%rcx +- +- +- movq -32(%rsi,%rbp,1),%r14 +- leaq 48+8(%rsp,%r9,2),%rdi +- movq -24(%rsi,%rbp,1),%rax +- leaq -32(%rdi,%rbp,1),%rdi +- movq -16(%rsi,%rbp,1),%rbx +- movq %rax,%r15 +- +- mulq %r14 +- movq %rax,%r10 +- movq %rbx,%rax +- movq %rdx,%r11 +- movq %r10,-24(%rdi,%rbp,1) +- +- mulq %r14 +- addq %rax,%r11 +- movq %rbx,%rax +- adcq $0,%rdx +- movq %r11,-16(%rdi,%rbp,1) +- movq %rdx,%r10 +- +- +- movq -8(%rsi,%rbp,1),%rbx +- mulq %r15 +- movq %rax,%r12 +- movq %rbx,%rax +- movq %rdx,%r13 +- +- leaq (%rbp),%rcx +- mulq %r14 +- addq %rax,%r10 +- movq %rbx,%rax +- movq %rdx,%r11 +- adcq $0,%r11 +- addq %r12,%r10 +- adcq $0,%r11 +- movq %r10,-8(%rdi,%rcx,1) +- jmp L$sqr4x_1st +- +-.p2align 5 +-L$sqr4x_1st: +- movq (%rsi,%rcx,1),%rbx +- mulq %r15 +- addq %rax,%r13 +- movq %rbx,%rax +- movq %rdx,%r12 +- adcq $0,%r12 +- +- mulq %r14 +- addq %rax,%r11 +- movq %rbx,%rax +- movq 8(%rsi,%rcx,1),%rbx +- movq %rdx,%r10 +- adcq $0,%r10 +- addq %r13,%r11 +- adcq $0,%r10 +- +- +- mulq %r15 +- addq %rax,%r12 +- movq %rbx,%rax +- movq %r11,(%rdi,%rcx,1) +- movq %rdx,%r13 +- adcq $0,%r13 +- +- mulq %r14 +- addq %rax,%r10 +- movq %rbx,%rax +- movq 16(%rsi,%rcx,1),%rbx +- movq %rdx,%r11 +- adcq $0,%r11 +- addq %r12,%r10 +- adcq $0,%r11 +- +- mulq %r15 +- addq %rax,%r13 +- movq %rbx,%rax +- movq %r10,8(%rdi,%rcx,1) +- movq %rdx,%r12 +- adcq $0,%r12 +- +- mulq %r14 +- addq %rax,%r11 +- movq %rbx,%rax +- movq 24(%rsi,%rcx,1),%rbx +- movq %rdx,%r10 +- adcq $0,%r10 +- addq %r13,%r11 +- adcq $0,%r10 +- +- +- mulq %r15 +- addq %rax,%r12 +- movq %rbx,%rax +- movq %r11,16(%rdi,%rcx,1) +- movq %rdx,%r13 +- adcq $0,%r13 +- leaq 32(%rcx),%rcx +- +- mulq %r14 +- addq %rax,%r10 +- movq %rbx,%rax +- movq %rdx,%r11 +- adcq $0,%r11 +- addq %r12,%r10 +- adcq $0,%r11 +- movq %r10,-8(%rdi,%rcx,1) +- +- cmpq $0,%rcx +- jne L$sqr4x_1st +- +- mulq %r15 +- addq %rax,%r13 +- leaq 16(%rbp),%rbp +- adcq $0,%rdx +- addq %r11,%r13 +- adcq $0,%rdx +- +- movq %r13,(%rdi) +- movq %rdx,%r12 +- movq %rdx,8(%rdi) +- jmp L$sqr4x_outer +- +-.p2align 5 +-L$sqr4x_outer: +- movq -32(%rsi,%rbp,1),%r14 +- leaq 48+8(%rsp,%r9,2),%rdi +- movq -24(%rsi,%rbp,1),%rax +- leaq -32(%rdi,%rbp,1),%rdi +- movq -16(%rsi,%rbp,1),%rbx +- movq %rax,%r15 +- +- mulq %r14 +- movq -24(%rdi,%rbp,1),%r10 +- addq %rax,%r10 +- movq %rbx,%rax +- adcq $0,%rdx +- movq %r10,-24(%rdi,%rbp,1) +- movq %rdx,%r11 +- +- mulq %r14 +- addq %rax,%r11 +- movq %rbx,%rax +- adcq $0,%rdx +- addq -16(%rdi,%rbp,1),%r11 +- movq %rdx,%r10 +- adcq $0,%r10 +- movq %r11,-16(%rdi,%rbp,1) +- +- xorq %r12,%r12 +- +- movq -8(%rsi,%rbp,1),%rbx +- mulq %r15 +- addq %rax,%r12 +- movq %rbx,%rax +- adcq $0,%rdx +- addq -8(%rdi,%rbp,1),%r12 +- movq %rdx,%r13 +- adcq $0,%r13 +- +- mulq %r14 +- addq %rax,%r10 +- movq %rbx,%rax +- adcq $0,%rdx +- addq %r12,%r10 +- movq %rdx,%r11 +- adcq $0,%r11 +- movq %r10,-8(%rdi,%rbp,1) +- +- leaq (%rbp),%rcx +- jmp L$sqr4x_inner +- +-.p2align 5 +-L$sqr4x_inner: +- movq (%rsi,%rcx,1),%rbx +- mulq %r15 +- addq %rax,%r13 +- movq %rbx,%rax +- movq %rdx,%r12 +- adcq $0,%r12 +- addq (%rdi,%rcx,1),%r13 +- adcq $0,%r12 +- +-.byte 0x67 +- mulq %r14 +- addq %rax,%r11 +- movq %rbx,%rax +- movq 8(%rsi,%rcx,1),%rbx +- movq %rdx,%r10 +- adcq $0,%r10 +- addq %r13,%r11 +- adcq $0,%r10 +- +- mulq %r15 +- addq %rax,%r12 +- movq %r11,(%rdi,%rcx,1) +- movq %rbx,%rax +- movq %rdx,%r13 +- adcq $0,%r13 +- addq 8(%rdi,%rcx,1),%r12 +- leaq 16(%rcx),%rcx +- adcq $0,%r13 +- +- mulq %r14 +- addq %rax,%r10 +- movq %rbx,%rax +- adcq $0,%rdx +- addq %r12,%r10 +- movq %rdx,%r11 +- adcq $0,%r11 +- movq %r10,-8(%rdi,%rcx,1) +- +- cmpq $0,%rcx +- jne L$sqr4x_inner +- +-.byte 0x67 +- mulq %r15 +- addq %rax,%r13 +- adcq $0,%rdx +- addq %r11,%r13 +- adcq $0,%rdx +- +- movq %r13,(%rdi) +- movq %rdx,%r12 +- movq %rdx,8(%rdi) +- +- addq $16,%rbp +- jnz L$sqr4x_outer +- +- +- movq -32(%rsi),%r14 +- leaq 48+8(%rsp,%r9,2),%rdi +- movq -24(%rsi),%rax +- leaq -32(%rdi,%rbp,1),%rdi +- movq -16(%rsi),%rbx +- movq %rax,%r15 +- +- mulq %r14 +- addq %rax,%r10 +- movq %rbx,%rax +- movq %rdx,%r11 +- adcq $0,%r11 +- +- mulq %r14 +- addq %rax,%r11 +- movq %rbx,%rax +- movq %r10,-24(%rdi) +- movq %rdx,%r10 +- adcq $0,%r10 +- addq %r13,%r11 +- movq -8(%rsi),%rbx +- adcq $0,%r10 +- +- mulq %r15 +- addq %rax,%r12 +- movq %rbx,%rax +- movq %r11,-16(%rdi) +- movq %rdx,%r13 +- adcq $0,%r13 +- +- mulq %r14 +- addq %rax,%r10 +- movq %rbx,%rax +- movq %rdx,%r11 +- adcq $0,%r11 +- addq %r12,%r10 +- adcq $0,%r11 +- movq %r10,-8(%rdi) +- +- mulq %r15 +- addq %rax,%r13 +- movq -16(%rsi),%rax +- adcq $0,%rdx +- addq %r11,%r13 +- adcq $0,%rdx +- +- movq %r13,(%rdi) +- movq %rdx,%r12 +- movq %rdx,8(%rdi) +- +- mulq %rbx +- addq $16,%rbp +- xorq %r14,%r14 +- subq %r9,%rbp +- xorq %r15,%r15 +- +- addq %r12,%rax +- adcq $0,%rdx +- movq %rax,8(%rdi) +- movq %rdx,16(%rdi) +- movq %r15,24(%rdi) +- +- movq -16(%rsi,%rbp,1),%rax +- leaq 48+8(%rsp),%rdi +- xorq %r10,%r10 +- movq 8(%rdi),%r11 +- +- leaq (%r14,%r10,2),%r12 +- shrq $63,%r10 +- leaq (%rcx,%r11,2),%r13 +- shrq $63,%r11 +- orq %r10,%r13 +- movq 16(%rdi),%r10 +- movq %r11,%r14 +- mulq %rax +- negq %r15 +- movq 24(%rdi),%r11 +- adcq %rax,%r12 +- movq -8(%rsi,%rbp,1),%rax +- movq %r12,(%rdi) +- adcq %rdx,%r13 +- +- leaq (%r14,%r10,2),%rbx +- movq %r13,8(%rdi) +- sbbq %r15,%r15 +- shrq $63,%r10 +- leaq (%rcx,%r11,2),%r8 +- shrq $63,%r11 +- orq %r10,%r8 +- movq 32(%rdi),%r10 +- movq %r11,%r14 +- mulq %rax +- negq %r15 +- movq 40(%rdi),%r11 +- adcq %rax,%rbx +- movq 0(%rsi,%rbp,1),%rax +- movq %rbx,16(%rdi) +- adcq %rdx,%r8 +- leaq 16(%rbp),%rbp +- movq %r8,24(%rdi) +- sbbq %r15,%r15 +- leaq 64(%rdi),%rdi +- jmp L$sqr4x_shift_n_add +- +-.p2align 5 +-L$sqr4x_shift_n_add: +- leaq (%r14,%r10,2),%r12 +- shrq $63,%r10 +- leaq (%rcx,%r11,2),%r13 +- shrq $63,%r11 +- orq %r10,%r13 +- movq -16(%rdi),%r10 +- movq %r11,%r14 +- mulq %rax +- negq %r15 +- movq -8(%rdi),%r11 +- adcq %rax,%r12 +- movq -8(%rsi,%rbp,1),%rax +- movq %r12,-32(%rdi) +- adcq %rdx,%r13 +- +- leaq (%r14,%r10,2),%rbx +- movq %r13,-24(%rdi) +- sbbq %r15,%r15 +- shrq $63,%r10 +- leaq (%rcx,%r11,2),%r8 +- shrq $63,%r11 +- orq %r10,%r8 +- movq 0(%rdi),%r10 +- movq %r11,%r14 +- mulq %rax +- negq %r15 +- movq 8(%rdi),%r11 +- adcq %rax,%rbx +- movq 0(%rsi,%rbp,1),%rax +- movq %rbx,-16(%rdi) +- adcq %rdx,%r8 +- +- leaq (%r14,%r10,2),%r12 +- movq %r8,-8(%rdi) +- sbbq %r15,%r15 +- shrq $63,%r10 +- leaq (%rcx,%r11,2),%r13 +- shrq $63,%r11 +- orq %r10,%r13 +- movq 16(%rdi),%r10 +- movq %r11,%r14 +- mulq %rax +- negq %r15 +- movq 24(%rdi),%r11 +- adcq %rax,%r12 +- movq 8(%rsi,%rbp,1),%rax +- movq %r12,0(%rdi) +- adcq %rdx,%r13 +- +- leaq (%r14,%r10,2),%rbx +- movq %r13,8(%rdi) +- sbbq %r15,%r15 +- shrq $63,%r10 +- leaq (%rcx,%r11,2),%r8 +- shrq $63,%r11 +- orq %r10,%r8 +- movq 32(%rdi),%r10 +- movq %r11,%r14 +- mulq %rax +- negq %r15 +- movq 40(%rdi),%r11 +- adcq %rax,%rbx +- movq 16(%rsi,%rbp,1),%rax +- movq %rbx,16(%rdi) +- adcq %rdx,%r8 +- movq %r8,24(%rdi) +- sbbq %r15,%r15 +- leaq 64(%rdi),%rdi +- addq $32,%rbp +- jnz L$sqr4x_shift_n_add +- +- leaq (%r14,%r10,2),%r12 +-.byte 0x67 +- shrq $63,%r10 +- leaq (%rcx,%r11,2),%r13 +- shrq $63,%r11 +- orq %r10,%r13 +- movq -16(%rdi),%r10 +- movq %r11,%r14 +- mulq %rax +- negq %r15 +- movq -8(%rdi),%r11 +- adcq %rax,%r12 +- movq -8(%rsi),%rax +- movq %r12,-32(%rdi) +- adcq %rdx,%r13 +- +- leaq (%r14,%r10,2),%rbx +- movq %r13,-24(%rdi) +- sbbq %r15,%r15 +- shrq $63,%r10 +- leaq (%rcx,%r11,2),%r8 +- shrq $63,%r11 +- orq %r10,%r8 +- mulq %rax +- negq %r15 +- adcq %rax,%rbx +- adcq %rdx,%r8 +- movq %rbx,-16(%rdi) +- movq %r8,-8(%rdi) +-.byte 102,72,15,126,213 +-__bn_sqr8x_reduction: +- xorq %rax,%rax +- leaq (%r9,%rbp,1),%rcx +- leaq 48+8(%rsp,%r9,2),%rdx +- movq %rcx,0+8(%rsp) +- leaq 48+8(%rsp,%r9,1),%rdi +- movq %rdx,8+8(%rsp) +- negq %r9 +- jmp L$8x_reduction_loop +- +-.p2align 5 +-L$8x_reduction_loop: +- leaq (%rdi,%r9,1),%rdi +-.byte 0x66 +- movq 0(%rdi),%rbx +- movq 8(%rdi),%r9 +- movq 16(%rdi),%r10 +- movq 24(%rdi),%r11 +- movq 32(%rdi),%r12 +- movq 40(%rdi),%r13 +- movq 48(%rdi),%r14 +- movq 56(%rdi),%r15 +- movq %rax,(%rdx) +- leaq 64(%rdi),%rdi +- +-.byte 0x67 +- movq %rbx,%r8 +- imulq 32+8(%rsp),%rbx +- movq 0(%rbp),%rax +- movl $8,%ecx +- jmp L$8x_reduce +- +-.p2align 5 +-L$8x_reduce: +- mulq %rbx +- movq 8(%rbp),%rax +- negq %r8 +- movq %rdx,%r8 +- adcq $0,%r8 +- +- mulq %rbx +- addq %rax,%r9 +- movq 16(%rbp),%rax +- adcq $0,%rdx +- addq %r9,%r8 +- movq %rbx,48-8+8(%rsp,%rcx,8) +- movq %rdx,%r9 +- adcq $0,%r9 +- +- mulq %rbx +- addq %rax,%r10 +- movq 24(%rbp),%rax +- adcq $0,%rdx +- addq %r10,%r9 +- movq 32+8(%rsp),%rsi +- movq %rdx,%r10 +- adcq $0,%r10 +- +- mulq %rbx +- addq %rax,%r11 +- movq 32(%rbp),%rax +- adcq $0,%rdx +- imulq %r8,%rsi +- addq %r11,%r10 +- movq %rdx,%r11 +- adcq $0,%r11 +- +- mulq %rbx +- addq %rax,%r12 +- movq 40(%rbp),%rax +- adcq $0,%rdx +- addq %r12,%r11 +- movq %rdx,%r12 +- adcq $0,%r12 +- +- mulq %rbx +- addq %rax,%r13 +- movq 48(%rbp),%rax +- adcq $0,%rdx +- addq %r13,%r12 +- movq %rdx,%r13 +- adcq $0,%r13 +- +- mulq %rbx +- addq %rax,%r14 +- movq 56(%rbp),%rax +- adcq $0,%rdx +- addq %r14,%r13 +- movq %rdx,%r14 +- adcq $0,%r14 +- +- mulq %rbx +- movq %rsi,%rbx +- addq %rax,%r15 +- movq 0(%rbp),%rax +- adcq $0,%rdx +- addq %r15,%r14 +- movq %rdx,%r15 +- adcq $0,%r15 +- +- decl %ecx +- jnz L$8x_reduce +- +- leaq 64(%rbp),%rbp +- xorq %rax,%rax +- movq 8+8(%rsp),%rdx +- cmpq 0+8(%rsp),%rbp +- jae L$8x_no_tail +- +-.byte 0x66 +- addq 0(%rdi),%r8 +- adcq 8(%rdi),%r9 +- adcq 16(%rdi),%r10 +- adcq 24(%rdi),%r11 +- adcq 32(%rdi),%r12 +- adcq 40(%rdi),%r13 +- adcq 48(%rdi),%r14 +- adcq 56(%rdi),%r15 +- sbbq %rsi,%rsi +- +- movq 48+56+8(%rsp),%rbx +- movl $8,%ecx +- movq 0(%rbp),%rax +- jmp L$8x_tail +- +-.p2align 5 +-L$8x_tail: +- mulq %rbx +- addq %rax,%r8 +- movq 8(%rbp),%rax +- movq %r8,(%rdi) +- movq %rdx,%r8 +- adcq $0,%r8 +- +- mulq %rbx +- addq %rax,%r9 +- movq 16(%rbp),%rax +- adcq $0,%rdx +- addq %r9,%r8 +- leaq 8(%rdi),%rdi +- movq %rdx,%r9 +- adcq $0,%r9 +- +- mulq %rbx +- addq %rax,%r10 +- movq 24(%rbp),%rax +- adcq $0,%rdx +- addq %r10,%r9 +- movq %rdx,%r10 +- adcq $0,%r10 +- +- mulq %rbx +- addq %rax,%r11 +- movq 32(%rbp),%rax +- adcq $0,%rdx +- addq %r11,%r10 +- movq %rdx,%r11 +- adcq $0,%r11 +- +- mulq %rbx +- addq %rax,%r12 +- movq 40(%rbp),%rax +- adcq $0,%rdx +- addq %r12,%r11 +- movq %rdx,%r12 +- adcq $0,%r12 +- +- mulq %rbx +- addq %rax,%r13 +- movq 48(%rbp),%rax +- adcq $0,%rdx +- addq %r13,%r12 +- movq %rdx,%r13 +- adcq $0,%r13 +- +- mulq %rbx +- addq %rax,%r14 +- movq 56(%rbp),%rax +- adcq $0,%rdx +- addq %r14,%r13 +- movq %rdx,%r14 +- adcq $0,%r14 +- +- mulq %rbx +- movq 48-16+8(%rsp,%rcx,8),%rbx +- addq %rax,%r15 +- adcq $0,%rdx +- addq %r15,%r14 +- movq 0(%rbp),%rax +- movq %rdx,%r15 +- adcq $0,%r15 +- +- decl %ecx +- jnz L$8x_tail +- +- leaq 64(%rbp),%rbp +- movq 8+8(%rsp),%rdx +- cmpq 0+8(%rsp),%rbp +- jae L$8x_tail_done +- +- movq 48+56+8(%rsp),%rbx +- negq %rsi +- movq 0(%rbp),%rax +- adcq 0(%rdi),%r8 +- adcq 8(%rdi),%r9 +- adcq 16(%rdi),%r10 +- adcq 24(%rdi),%r11 +- adcq 32(%rdi),%r12 +- adcq 40(%rdi),%r13 +- adcq 48(%rdi),%r14 +- adcq 56(%rdi),%r15 +- sbbq %rsi,%rsi +- +- movl $8,%ecx +- jmp L$8x_tail +- +-.p2align 5 +-L$8x_tail_done: +- xorq %rax,%rax +- addq (%rdx),%r8 +- adcq $0,%r9 +- adcq $0,%r10 +- adcq $0,%r11 +- adcq $0,%r12 +- adcq $0,%r13 +- adcq $0,%r14 +- adcq $0,%r15 +- adcq $0,%rax +- +- negq %rsi +-L$8x_no_tail: +- adcq 0(%rdi),%r8 +- adcq 8(%rdi),%r9 +- adcq 16(%rdi),%r10 +- adcq 24(%rdi),%r11 +- adcq 32(%rdi),%r12 +- adcq 40(%rdi),%r13 +- adcq 48(%rdi),%r14 +- adcq 56(%rdi),%r15 +- adcq $0,%rax +- movq -8(%rbp),%rcx +- xorq %rsi,%rsi +- +-.byte 102,72,15,126,213 +- +- movq %r8,0(%rdi) +- movq %r9,8(%rdi) +-.byte 102,73,15,126,217 +- movq %r10,16(%rdi) +- movq %r11,24(%rdi) +- movq %r12,32(%rdi) +- movq %r13,40(%rdi) +- movq %r14,48(%rdi) +- movq %r15,56(%rdi) +- leaq 64(%rdi),%rdi +- +- cmpq %rdx,%rdi +- jb L$8x_reduction_loop +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-__bn_post4x_internal: +- +- movq 0(%rbp),%r12 +- leaq (%rdi,%r9,1),%rbx +- movq %r9,%rcx +-.byte 102,72,15,126,207 +- negq %rax +-.byte 102,72,15,126,206 +- sarq $3+2,%rcx +- decq %r12 +- xorq %r10,%r10 +- movq 8(%rbp),%r13 +- movq 16(%rbp),%r14 +- movq 24(%rbp),%r15 +- jmp L$sqr4x_sub_entry +- +-.p2align 4 +-L$sqr4x_sub: +- movq 0(%rbp),%r12 +- movq 8(%rbp),%r13 +- movq 16(%rbp),%r14 +- movq 24(%rbp),%r15 +-L$sqr4x_sub_entry: +- leaq 32(%rbp),%rbp +- notq %r12 +- notq %r13 +- notq %r14 +- notq %r15 +- andq %rax,%r12 +- andq %rax,%r13 +- andq %rax,%r14 +- andq %rax,%r15 +- +- negq %r10 +- adcq 0(%rbx),%r12 +- adcq 8(%rbx),%r13 +- adcq 16(%rbx),%r14 +- adcq 24(%rbx),%r15 +- movq %r12,0(%rdi) +- leaq 32(%rbx),%rbx +- movq %r13,8(%rdi) +- sbbq %r10,%r10 +- movq %r14,16(%rdi) +- movq %r15,24(%rdi) +- leaq 32(%rdi),%rdi +- +- incq %rcx +- jnz L$sqr4x_sub +- +- movq %r9,%r10 +- negq %r9 +- .byte 0xf3,0xc3 +- +- +-.globl _bn_from_montgomery +-.private_extern _bn_from_montgomery +- +-.p2align 5 +-_bn_from_montgomery: +- +- testl $7,%r9d +- jz bn_from_mont8x +- xorl %eax,%eax +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-bn_from_mont8x: +- +-.byte 0x67 +- movq %rsp,%rax +- +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$from_prologue: +- +- shll $3,%r9d +- leaq (%r9,%r9,2),%r10 +- negq %r9 +- movq (%r8),%r8 +- +- +- +- +- +- +- +- +- leaq -320(%rsp,%r9,2),%r11 +- movq %rsp,%rbp +- subq %rdi,%r11 +- andq $4095,%r11 +- cmpq %r11,%r10 +- jb L$from_sp_alt +- subq %r11,%rbp +- leaq -320(%rbp,%r9,2),%rbp +- jmp L$from_sp_done +- +-.p2align 5 +-L$from_sp_alt: +- leaq 4096-320(,%r9,2),%r10 +- leaq -320(%rbp,%r9,2),%rbp +- subq %r10,%r11 +- movq $0,%r10 +- cmovcq %r10,%r11 +- subq %r11,%rbp +-L$from_sp_done: +- andq $-64,%rbp +- movq %rsp,%r11 +- subq %rbp,%r11 +- andq $-4096,%r11 +- leaq (%r11,%rbp,1),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$from_page_walk +- jmp L$from_page_walk_done +- +-L$from_page_walk: +- leaq -4096(%rsp),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$from_page_walk +-L$from_page_walk_done: +- +- movq %r9,%r10 +- negq %r9 +- +- +- +- +- +- +- +- +- +- +- movq %r8,32(%rsp) +- movq %rax,40(%rsp) +- +-L$from_body: +- movq %r9,%r11 +- leaq 48(%rsp),%rax +- pxor %xmm0,%xmm0 +- jmp L$mul_by_1 +- +-.p2align 5 +-L$mul_by_1: +- movdqu (%rsi),%xmm1 +- movdqu 16(%rsi),%xmm2 +- movdqu 32(%rsi),%xmm3 +- movdqa %xmm0,(%rax,%r9,1) +- movdqu 48(%rsi),%xmm4 +- movdqa %xmm0,16(%rax,%r9,1) +-.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 +- movdqa %xmm1,(%rax) +- movdqa %xmm0,32(%rax,%r9,1) +- movdqa %xmm2,16(%rax) +- movdqa %xmm0,48(%rax,%r9,1) +- movdqa %xmm3,32(%rax) +- movdqa %xmm4,48(%rax) +- leaq 64(%rax),%rax +- subq $64,%r11 +- jnz L$mul_by_1 +- +-.byte 102,72,15,110,207 +-.byte 102,72,15,110,209 +-.byte 0x67 +- movq %rcx,%rbp +-.byte 102,73,15,110,218 +- leaq _OPENSSL_ia32cap_P(%rip),%r11 +- movl 8(%r11),%r11d +- andl $0x80108,%r11d +- cmpl $0x80108,%r11d +- jne L$from_mont_nox +- +- leaq (%rax,%r9,1),%rdi +- call __bn_sqrx8x_reduction +- call __bn_postx4x_internal +- +- pxor %xmm0,%xmm0 +- leaq 48(%rsp),%rax +- jmp L$from_mont_zero +- +-.p2align 5 +-L$from_mont_nox: +- call __bn_sqr8x_reduction +- call __bn_post4x_internal +- +- pxor %xmm0,%xmm0 +- leaq 48(%rsp),%rax +- jmp L$from_mont_zero +- +-.p2align 5 +-L$from_mont_zero: +- movq 40(%rsp),%rsi +- +- movdqa %xmm0,0(%rax) +- movdqa %xmm0,16(%rax) +- movdqa %xmm0,32(%rax) +- movdqa %xmm0,48(%rax) +- leaq 64(%rax),%rax +- subq $32,%r9 +- jnz L$from_mont_zero +- +- movq $1,%rax +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$from_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.p2align 5 +-bn_mulx4x_mont_gather5: +- +- movq %rsp,%rax +- +-L$mulx4x_enter: +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$mulx4x_prologue: +- +- shll $3,%r9d +- leaq (%r9,%r9,2),%r10 +- negq %r9 +- movq (%r8),%r8 +- +- +- +- +- +- +- +- +- +- +- leaq -320(%rsp,%r9,2),%r11 +- movq %rsp,%rbp +- subq %rdi,%r11 +- andq $4095,%r11 +- cmpq %r11,%r10 +- jb L$mulx4xsp_alt +- subq %r11,%rbp +- leaq -320(%rbp,%r9,2),%rbp +- jmp L$mulx4xsp_done +- +-L$mulx4xsp_alt: +- leaq 4096-320(,%r9,2),%r10 +- leaq -320(%rbp,%r9,2),%rbp +- subq %r10,%r11 +- movq $0,%r10 +- cmovcq %r10,%r11 +- subq %r11,%rbp +-L$mulx4xsp_done: +- andq $-64,%rbp +- movq %rsp,%r11 +- subq %rbp,%r11 +- andq $-4096,%r11 +- leaq (%r11,%rbp,1),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$mulx4x_page_walk +- jmp L$mulx4x_page_walk_done +- +-L$mulx4x_page_walk: +- leaq -4096(%rsp),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$mulx4x_page_walk +-L$mulx4x_page_walk_done: +- +- +- +- +- +- +- +- +- +- +- +- +- +- movq %r8,32(%rsp) +- movq %rax,40(%rsp) +- +-L$mulx4x_body: +- call mulx4x_internal +- +- movq 40(%rsp),%rsi +- +- movq $1,%rax +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$mulx4x_epilogue: +- .byte 0xf3,0xc3 +- +- +- +- +-.p2align 5 +-mulx4x_internal: +- +- movq %r9,8(%rsp) +- movq %r9,%r10 +- negq %r9 +- shlq $5,%r9 +- negq %r10 +- leaq 128(%rdx,%r9,1),%r13 +- shrq $5+5,%r9 +- movd 8(%rax),%xmm5 +- subq $1,%r9 +- leaq L$inc(%rip),%rax +- movq %r13,16+8(%rsp) +- movq %r9,24+8(%rsp) +- movq %rdi,56+8(%rsp) +- movdqa 0(%rax),%xmm0 +- movdqa 16(%rax),%xmm1 +- leaq 88-112(%rsp,%r10,1),%r10 +- leaq 128(%rdx),%rdi +- +- pshufd $0,%xmm5,%xmm5 +- movdqa %xmm1,%xmm4 +-.byte 0x67 +- movdqa %xmm1,%xmm2 +-.byte 0x67 +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm4,%xmm3 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,112(%r10) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,128(%r10) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,144(%r10) +- movdqa %xmm4,%xmm2 +- +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,160(%r10) +- movdqa %xmm4,%xmm3 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,176(%r10) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,192(%r10) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,208(%r10) +- movdqa %xmm4,%xmm2 +- +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,224(%r10) +- movdqa %xmm4,%xmm3 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,240(%r10) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,256(%r10) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,272(%r10) +- movdqa %xmm4,%xmm2 +- +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,288(%r10) +- movdqa %xmm4,%xmm3 +-.byte 0x67 +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,304(%r10) +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,320(%r10) +- +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,336(%r10) +- +- pand 64(%rdi),%xmm0 +- pand 80(%rdi),%xmm1 +- pand 96(%rdi),%xmm2 +- movdqa %xmm3,352(%r10) +- pand 112(%rdi),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- movdqa -128(%rdi),%xmm4 +- movdqa -112(%rdi),%xmm5 +- movdqa -96(%rdi),%xmm2 +- pand 112(%r10),%xmm4 +- movdqa -80(%rdi),%xmm3 +- pand 128(%r10),%xmm5 +- por %xmm4,%xmm0 +- pand 144(%r10),%xmm2 +- por %xmm5,%xmm1 +- pand 160(%r10),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- movdqa -64(%rdi),%xmm4 +- movdqa -48(%rdi),%xmm5 +- movdqa -32(%rdi),%xmm2 +- pand 176(%r10),%xmm4 +- movdqa -16(%rdi),%xmm3 +- pand 192(%r10),%xmm5 +- por %xmm4,%xmm0 +- pand 208(%r10),%xmm2 +- por %xmm5,%xmm1 +- pand 224(%r10),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- movdqa 0(%rdi),%xmm4 +- movdqa 16(%rdi),%xmm5 +- movdqa 32(%rdi),%xmm2 +- pand 240(%r10),%xmm4 +- movdqa 48(%rdi),%xmm3 +- pand 256(%r10),%xmm5 +- por %xmm4,%xmm0 +- pand 272(%r10),%xmm2 +- por %xmm5,%xmm1 +- pand 288(%r10),%xmm3 +- por %xmm2,%xmm0 +- por %xmm3,%xmm1 +- pxor %xmm1,%xmm0 +- pshufd $0x4e,%xmm0,%xmm1 +- por %xmm1,%xmm0 +- leaq 256(%rdi),%rdi +-.byte 102,72,15,126,194 +- leaq 64+32+8(%rsp),%rbx +- +- movq %rdx,%r9 +- mulxq 0(%rsi),%r8,%rax +- mulxq 8(%rsi),%r11,%r12 +- addq %rax,%r11 +- mulxq 16(%rsi),%rax,%r13 +- adcq %rax,%r12 +- adcq $0,%r13 +- mulxq 24(%rsi),%rax,%r14 +- +- movq %r8,%r15 +- imulq 32+8(%rsp),%r8 +- xorq %rbp,%rbp +- movq %r8,%rdx +- +- movq %rdi,8+8(%rsp) +- +- leaq 32(%rsi),%rsi +- adcxq %rax,%r13 +- adcxq %rbp,%r14 +- +- mulxq 0(%rcx),%rax,%r10 +- adcxq %rax,%r15 +- adoxq %r11,%r10 +- mulxq 8(%rcx),%rax,%r11 +- adcxq %rax,%r10 +- adoxq %r12,%r11 +- mulxq 16(%rcx),%rax,%r12 +- movq 24+8(%rsp),%rdi +- movq %r10,-32(%rbx) +- adcxq %rax,%r11 +- adoxq %r13,%r12 +- mulxq 24(%rcx),%rax,%r15 +- movq %r9,%rdx +- movq %r11,-24(%rbx) +- adcxq %rax,%r12 +- adoxq %rbp,%r15 +- leaq 32(%rcx),%rcx +- movq %r12,-16(%rbx) +- jmp L$mulx4x_1st +- +-.p2align 5 +-L$mulx4x_1st: +- adcxq %rbp,%r15 +- mulxq 0(%rsi),%r10,%rax +- adcxq %r14,%r10 +- mulxq 8(%rsi),%r11,%r14 +- adcxq %rax,%r11 +- mulxq 16(%rsi),%r12,%rax +- adcxq %r14,%r12 +- mulxq 24(%rsi),%r13,%r14 +-.byte 0x67,0x67 +- movq %r8,%rdx +- adcxq %rax,%r13 +- adcxq %rbp,%r14 +- leaq 32(%rsi),%rsi +- leaq 32(%rbx),%rbx +- +- adoxq %r15,%r10 +- mulxq 0(%rcx),%rax,%r15 +- adcxq %rax,%r10 +- adoxq %r15,%r11 +- mulxq 8(%rcx),%rax,%r15 +- adcxq %rax,%r11 +- adoxq %r15,%r12 +- mulxq 16(%rcx),%rax,%r15 +- movq %r10,-40(%rbx) +- adcxq %rax,%r12 +- movq %r11,-32(%rbx) +- adoxq %r15,%r13 +- mulxq 24(%rcx),%rax,%r15 +- movq %r9,%rdx +- movq %r12,-24(%rbx) +- adcxq %rax,%r13 +- adoxq %rbp,%r15 +- leaq 32(%rcx),%rcx +- movq %r13,-16(%rbx) +- +- decq %rdi +- jnz L$mulx4x_1st +- +- movq 8(%rsp),%rax +- adcq %rbp,%r15 +- leaq (%rsi,%rax,1),%rsi +- addq %r15,%r14 +- movq 8+8(%rsp),%rdi +- adcq %rbp,%rbp +- movq %r14,-8(%rbx) +- jmp L$mulx4x_outer +- +-.p2align 5 +-L$mulx4x_outer: +- leaq 16-256(%rbx),%r10 +- pxor %xmm4,%xmm4 +-.byte 0x67,0x67 +- pxor %xmm5,%xmm5 +- movdqa -128(%rdi),%xmm0 +- movdqa -112(%rdi),%xmm1 +- movdqa -96(%rdi),%xmm2 +- pand 256(%r10),%xmm0 +- movdqa -80(%rdi),%xmm3 +- pand 272(%r10),%xmm1 +- por %xmm0,%xmm4 +- pand 288(%r10),%xmm2 +- por %xmm1,%xmm5 +- pand 304(%r10),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa -64(%rdi),%xmm0 +- movdqa -48(%rdi),%xmm1 +- movdqa -32(%rdi),%xmm2 +- pand 320(%r10),%xmm0 +- movdqa -16(%rdi),%xmm3 +- pand 336(%r10),%xmm1 +- por %xmm0,%xmm4 +- pand 352(%r10),%xmm2 +- por %xmm1,%xmm5 +- pand 368(%r10),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa 0(%rdi),%xmm0 +- movdqa 16(%rdi),%xmm1 +- movdqa 32(%rdi),%xmm2 +- pand 384(%r10),%xmm0 +- movdqa 48(%rdi),%xmm3 +- pand 400(%r10),%xmm1 +- por %xmm0,%xmm4 +- pand 416(%r10),%xmm2 +- por %xmm1,%xmm5 +- pand 432(%r10),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa 64(%rdi),%xmm0 +- movdqa 80(%rdi),%xmm1 +- movdqa 96(%rdi),%xmm2 +- pand 448(%r10),%xmm0 +- movdqa 112(%rdi),%xmm3 +- pand 464(%r10),%xmm1 +- por %xmm0,%xmm4 +- pand 480(%r10),%xmm2 +- por %xmm1,%xmm5 +- pand 496(%r10),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- por %xmm5,%xmm4 +- pshufd $0x4e,%xmm4,%xmm0 +- por %xmm4,%xmm0 +- leaq 256(%rdi),%rdi +-.byte 102,72,15,126,194 +- +- movq %rbp,(%rbx) +- leaq 32(%rbx,%rax,1),%rbx +- mulxq 0(%rsi),%r8,%r11 +- xorq %rbp,%rbp +- movq %rdx,%r9 +- mulxq 8(%rsi),%r14,%r12 +- adoxq -32(%rbx),%r8 +- adcxq %r14,%r11 +- mulxq 16(%rsi),%r15,%r13 +- adoxq -24(%rbx),%r11 +- adcxq %r15,%r12 +- mulxq 24(%rsi),%rdx,%r14 +- adoxq -16(%rbx),%r12 +- adcxq %rdx,%r13 +- leaq (%rcx,%rax,1),%rcx +- leaq 32(%rsi),%rsi +- adoxq -8(%rbx),%r13 +- adcxq %rbp,%r14 +- adoxq %rbp,%r14 +- +- movq %r8,%r15 +- imulq 32+8(%rsp),%r8 +- +- movq %r8,%rdx +- xorq %rbp,%rbp +- movq %rdi,8+8(%rsp) +- +- mulxq 0(%rcx),%rax,%r10 +- adcxq %rax,%r15 +- adoxq %r11,%r10 +- mulxq 8(%rcx),%rax,%r11 +- adcxq %rax,%r10 +- adoxq %r12,%r11 +- mulxq 16(%rcx),%rax,%r12 +- adcxq %rax,%r11 +- adoxq %r13,%r12 +- mulxq 24(%rcx),%rax,%r15 +- movq %r9,%rdx +- movq 24+8(%rsp),%rdi +- movq %r10,-32(%rbx) +- adcxq %rax,%r12 +- movq %r11,-24(%rbx) +- adoxq %rbp,%r15 +- movq %r12,-16(%rbx) +- leaq 32(%rcx),%rcx +- jmp L$mulx4x_inner +- +-.p2align 5 +-L$mulx4x_inner: +- mulxq 0(%rsi),%r10,%rax +- adcxq %rbp,%r15 +- adoxq %r14,%r10 +- mulxq 8(%rsi),%r11,%r14 +- adcxq 0(%rbx),%r10 +- adoxq %rax,%r11 +- mulxq 16(%rsi),%r12,%rax +- adcxq 8(%rbx),%r11 +- adoxq %r14,%r12 +- mulxq 24(%rsi),%r13,%r14 +- movq %r8,%rdx +- adcxq 16(%rbx),%r12 +- adoxq %rax,%r13 +- adcxq 24(%rbx),%r13 +- adoxq %rbp,%r14 +- leaq 32(%rsi),%rsi +- leaq 32(%rbx),%rbx +- adcxq %rbp,%r14 +- +- adoxq %r15,%r10 +- mulxq 0(%rcx),%rax,%r15 +- adcxq %rax,%r10 +- adoxq %r15,%r11 +- mulxq 8(%rcx),%rax,%r15 +- adcxq %rax,%r11 +- adoxq %r15,%r12 +- mulxq 16(%rcx),%rax,%r15 +- movq %r10,-40(%rbx) +- adcxq %rax,%r12 +- adoxq %r15,%r13 +- movq %r11,-32(%rbx) +- mulxq 24(%rcx),%rax,%r15 +- movq %r9,%rdx +- leaq 32(%rcx),%rcx +- movq %r12,-24(%rbx) +- adcxq %rax,%r13 +- adoxq %rbp,%r15 +- movq %r13,-16(%rbx) +- +- decq %rdi +- jnz L$mulx4x_inner +- +- movq 0+8(%rsp),%rax +- adcq %rbp,%r15 +- subq 0(%rbx),%rdi +- movq 8+8(%rsp),%rdi +- movq 16+8(%rsp),%r10 +- adcq %r15,%r14 +- leaq (%rsi,%rax,1),%rsi +- adcq %rbp,%rbp +- movq %r14,-8(%rbx) +- +- cmpq %r10,%rdi +- jb L$mulx4x_outer +- +- movq -8(%rcx),%r10 +- movq %rbp,%r8 +- movq (%rcx,%rax,1),%r12 +- leaq (%rcx,%rax,1),%rbp +- movq %rax,%rcx +- leaq (%rbx,%rax,1),%rdi +- xorl %eax,%eax +- xorq %r15,%r15 +- subq %r14,%r10 +- adcq %r15,%r15 +- orq %r15,%r8 +- sarq $3+2,%rcx +- subq %r8,%rax +- movq 56+8(%rsp),%rdx +- decq %r12 +- movq 8(%rbp),%r13 +- xorq %r8,%r8 +- movq 16(%rbp),%r14 +- movq 24(%rbp),%r15 +- jmp L$sqrx4x_sub_entry +- +- +- +-.p2align 5 +-bn_powerx5: +- +- movq %rsp,%rax +- +-L$powerx5_enter: +- pushq %rbx +- +- pushq %rbp +- +- pushq %r12 +- +- pushq %r13 +- +- pushq %r14 +- +- pushq %r15 +- +-L$powerx5_prologue: +- +- shll $3,%r9d +- leaq (%r9,%r9,2),%r10 +- negq %r9 +- movq (%r8),%r8 +- +- +- +- +- +- +- +- +- leaq -320(%rsp,%r9,2),%r11 +- movq %rsp,%rbp +- subq %rdi,%r11 +- andq $4095,%r11 +- cmpq %r11,%r10 +- jb L$pwrx_sp_alt +- subq %r11,%rbp +- leaq -320(%rbp,%r9,2),%rbp +- jmp L$pwrx_sp_done +- +-.p2align 5 +-L$pwrx_sp_alt: +- leaq 4096-320(,%r9,2),%r10 +- leaq -320(%rbp,%r9,2),%rbp +- subq %r10,%r11 +- movq $0,%r10 +- cmovcq %r10,%r11 +- subq %r11,%rbp +-L$pwrx_sp_done: +- andq $-64,%rbp +- movq %rsp,%r11 +- subq %rbp,%r11 +- andq $-4096,%r11 +- leaq (%r11,%rbp,1),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$pwrx_page_walk +- jmp L$pwrx_page_walk_done +- +-L$pwrx_page_walk: +- leaq -4096(%rsp),%rsp +- movq (%rsp),%r10 +- cmpq %rbp,%rsp +- ja L$pwrx_page_walk +-L$pwrx_page_walk_done: +- +- movq %r9,%r10 +- negq %r9 +- +- +- +- +- +- +- +- +- +- +- +- +- pxor %xmm0,%xmm0 +-.byte 102,72,15,110,207 +-.byte 102,72,15,110,209 +-.byte 102,73,15,110,218 +-.byte 102,72,15,110,226 +- movq %r8,32(%rsp) +- movq %rax,40(%rsp) +- +-L$powerx5_body: +- +- call __bn_sqrx8x_internal +- call __bn_postx4x_internal +- call __bn_sqrx8x_internal +- call __bn_postx4x_internal +- call __bn_sqrx8x_internal +- call __bn_postx4x_internal +- call __bn_sqrx8x_internal +- call __bn_postx4x_internal +- call __bn_sqrx8x_internal +- call __bn_postx4x_internal +- +- movq %r10,%r9 +- movq %rsi,%rdi +-.byte 102,72,15,126,209 +-.byte 102,72,15,126,226 +- movq 40(%rsp),%rax +- +- call mulx4x_internal +- +- movq 40(%rsp),%rsi +- +- movq $1,%rax +- +- movq -48(%rsi),%r15 +- +- movq -40(%rsi),%r14 +- +- movq -32(%rsi),%r13 +- +- movq -24(%rsi),%r12 +- +- movq -16(%rsi),%rbp +- +- movq -8(%rsi),%rbx +- +- leaq (%rsi),%rsp +- +-L$powerx5_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.globl _bn_sqrx8x_internal +-.private_extern _bn_sqrx8x_internal +-.private_extern _bn_sqrx8x_internal +- +-.p2align 5 +-_bn_sqrx8x_internal: +-__bn_sqrx8x_internal: +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- +- leaq 48+8(%rsp),%rdi +- leaq (%rsi,%r9,1),%rbp +- movq %r9,0+8(%rsp) +- movq %rbp,8+8(%rsp) +- jmp L$sqr8x_zero_start +- +-.p2align 5 +-.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 +-L$sqrx8x_zero: +-.byte 0x3e +- movdqa %xmm0,0(%rdi) +- movdqa %xmm0,16(%rdi) +- movdqa %xmm0,32(%rdi) +- movdqa %xmm0,48(%rdi) +-L$sqr8x_zero_start: +- movdqa %xmm0,64(%rdi) +- movdqa %xmm0,80(%rdi) +- movdqa %xmm0,96(%rdi) +- movdqa %xmm0,112(%rdi) +- leaq 128(%rdi),%rdi +- subq $64,%r9 +- jnz L$sqrx8x_zero +- +- movq 0(%rsi),%rdx +- +- xorq %r10,%r10 +- xorq %r11,%r11 +- xorq %r12,%r12 +- xorq %r13,%r13 +- xorq %r14,%r14 +- xorq %r15,%r15 +- leaq 48+8(%rsp),%rdi +- xorq %rbp,%rbp +- jmp L$sqrx8x_outer_loop +- +-.p2align 5 +-L$sqrx8x_outer_loop: +- mulxq 8(%rsi),%r8,%rax +- adcxq %r9,%r8 +- adoxq %rax,%r10 +- mulxq 16(%rsi),%r9,%rax +- adcxq %r10,%r9 +- adoxq %rax,%r11 +-.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 +- adcxq %r11,%r10 +- adoxq %rax,%r12 +-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 +- adcxq %r12,%r11 +- adoxq %rax,%r13 +- mulxq 40(%rsi),%r12,%rax +- adcxq %r13,%r12 +- adoxq %rax,%r14 +- mulxq 48(%rsi),%r13,%rax +- adcxq %r14,%r13 +- adoxq %r15,%rax +- mulxq 56(%rsi),%r14,%r15 +- movq 8(%rsi),%rdx +- adcxq %rax,%r14 +- adoxq %rbp,%r15 +- adcq 64(%rdi),%r15 +- movq %r8,8(%rdi) +- movq %r9,16(%rdi) +- sbbq %rcx,%rcx +- xorq %rbp,%rbp +- +- +- mulxq 16(%rsi),%r8,%rbx +- mulxq 24(%rsi),%r9,%rax +- adcxq %r10,%r8 +- adoxq %rbx,%r9 +- mulxq 32(%rsi),%r10,%rbx +- adcxq %r11,%r9 +- adoxq %rax,%r10 +-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 +- adcxq %r12,%r10 +- adoxq %rbx,%r11 +-.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 +- adcxq %r13,%r11 +- adoxq %r14,%r12 +-.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 +- movq 16(%rsi),%rdx +- adcxq %rax,%r12 +- adoxq %rbx,%r13 +- adcxq %r15,%r13 +- adoxq %rbp,%r14 +- adcxq %rbp,%r14 +- +- movq %r8,24(%rdi) +- movq %r9,32(%rdi) +- +- mulxq 24(%rsi),%r8,%rbx +- mulxq 32(%rsi),%r9,%rax +- adcxq %r10,%r8 +- adoxq %rbx,%r9 +- mulxq 40(%rsi),%r10,%rbx +- adcxq %r11,%r9 +- adoxq %rax,%r10 +-.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 +- adcxq %r12,%r10 +- adoxq %r13,%r11 +-.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 +-.byte 0x3e +- movq 24(%rsi),%rdx +- adcxq %rbx,%r11 +- adoxq %rax,%r12 +- adcxq %r14,%r12 +- movq %r8,40(%rdi) +- movq %r9,48(%rdi) +- mulxq 32(%rsi),%r8,%rax +- adoxq %rbp,%r13 +- adcxq %rbp,%r13 +- +- mulxq 40(%rsi),%r9,%rbx +- adcxq %r10,%r8 +- adoxq %rax,%r9 +- mulxq 48(%rsi),%r10,%rax +- adcxq %r11,%r9 +- adoxq %r12,%r10 +- mulxq 56(%rsi),%r11,%r12 +- movq 32(%rsi),%rdx +- movq 40(%rsi),%r14 +- adcxq %rbx,%r10 +- adoxq %rax,%r11 +- movq 48(%rsi),%r15 +- adcxq %r13,%r11 +- adoxq %rbp,%r12 +- adcxq %rbp,%r12 +- +- movq %r8,56(%rdi) +- movq %r9,64(%rdi) +- +- mulxq %r14,%r9,%rax +- movq 56(%rsi),%r8 +- adcxq %r10,%r9 +- mulxq %r15,%r10,%rbx +- adoxq %rax,%r10 +- adcxq %r11,%r10 +- mulxq %r8,%r11,%rax +- movq %r14,%rdx +- adoxq %rbx,%r11 +- adcxq %r12,%r11 +- +- adcxq %rbp,%rax +- +- mulxq %r15,%r14,%rbx +- mulxq %r8,%r12,%r13 +- movq %r15,%rdx +- leaq 64(%rsi),%rsi +- adcxq %r14,%r11 +- adoxq %rbx,%r12 +- adcxq %rax,%r12 +- adoxq %rbp,%r13 +- +-.byte 0x67,0x67 +- mulxq %r8,%r8,%r14 +- adcxq %r8,%r13 +- adcxq %rbp,%r14 +- +- cmpq 8+8(%rsp),%rsi +- je L$sqrx8x_outer_break +- +- negq %rcx +- movq $-8,%rcx +- movq %rbp,%r15 +- movq 64(%rdi),%r8 +- adcxq 72(%rdi),%r9 +- adcxq 80(%rdi),%r10 +- adcxq 88(%rdi),%r11 +- adcq 96(%rdi),%r12 +- adcq 104(%rdi),%r13 +- adcq 112(%rdi),%r14 +- adcq 120(%rdi),%r15 +- leaq (%rsi),%rbp +- leaq 128(%rdi),%rdi +- sbbq %rax,%rax +- +- movq -64(%rsi),%rdx +- movq %rax,16+8(%rsp) +- movq %rdi,24+8(%rsp) +- +- +- xorl %eax,%eax +- jmp L$sqrx8x_loop +- +-.p2align 5 +-L$sqrx8x_loop: +- movq %r8,%rbx +- mulxq 0(%rbp),%rax,%r8 +- adcxq %rax,%rbx +- adoxq %r9,%r8 +- +- mulxq 8(%rbp),%rax,%r9 +- adcxq %rax,%r8 +- adoxq %r10,%r9 +- +- mulxq 16(%rbp),%rax,%r10 +- adcxq %rax,%r9 +- adoxq %r11,%r10 +- +- mulxq 24(%rbp),%rax,%r11 +- adcxq %rax,%r10 +- adoxq %r12,%r11 +- +-.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 +- adcxq %rax,%r11 +- adoxq %r13,%r12 +- +- mulxq 40(%rbp),%rax,%r13 +- adcxq %rax,%r12 +- adoxq %r14,%r13 +- +- mulxq 48(%rbp),%rax,%r14 +- movq %rbx,(%rdi,%rcx,8) +- movl $0,%ebx +- adcxq %rax,%r13 +- adoxq %r15,%r14 +- +-.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 +- movq 8(%rsi,%rcx,8),%rdx +- adcxq %rax,%r14 +- adoxq %rbx,%r15 +- adcxq %rbx,%r15 +- +-.byte 0x67 +- incq %rcx +- jnz L$sqrx8x_loop +- +- leaq 64(%rbp),%rbp +- movq $-8,%rcx +- cmpq 8+8(%rsp),%rbp +- je L$sqrx8x_break +- +- subq 16+8(%rsp),%rbx +-.byte 0x66 +- movq -64(%rsi),%rdx +- adcxq 0(%rdi),%r8 +- adcxq 8(%rdi),%r9 +- adcq 16(%rdi),%r10 +- adcq 24(%rdi),%r11 +- adcq 32(%rdi),%r12 +- adcq 40(%rdi),%r13 +- adcq 48(%rdi),%r14 +- adcq 56(%rdi),%r15 +- leaq 64(%rdi),%rdi +-.byte 0x67 +- sbbq %rax,%rax +- xorl %ebx,%ebx +- movq %rax,16+8(%rsp) +- jmp L$sqrx8x_loop +- +-.p2align 5 +-L$sqrx8x_break: +- xorq %rbp,%rbp +- subq 16+8(%rsp),%rbx +- adcxq %rbp,%r8 +- movq 24+8(%rsp),%rcx +- adcxq %rbp,%r9 +- movq 0(%rsi),%rdx +- adcq $0,%r10 +- movq %r8,0(%rdi) +- adcq $0,%r11 +- adcq $0,%r12 +- adcq $0,%r13 +- adcq $0,%r14 +- adcq $0,%r15 +- cmpq %rcx,%rdi +- je L$sqrx8x_outer_loop +- +- movq %r9,8(%rdi) +- movq 8(%rcx),%r9 +- movq %r10,16(%rdi) +- movq 16(%rcx),%r10 +- movq %r11,24(%rdi) +- movq 24(%rcx),%r11 +- movq %r12,32(%rdi) +- movq 32(%rcx),%r12 +- movq %r13,40(%rdi) +- movq 40(%rcx),%r13 +- movq %r14,48(%rdi) +- movq 48(%rcx),%r14 +- movq %r15,56(%rdi) +- movq 56(%rcx),%r15 +- movq %rcx,%rdi +- jmp L$sqrx8x_outer_loop +- +-.p2align 5 +-L$sqrx8x_outer_break: +- movq %r9,72(%rdi) +-.byte 102,72,15,126,217 +- movq %r10,80(%rdi) +- movq %r11,88(%rdi) +- movq %r12,96(%rdi) +- movq %r13,104(%rdi) +- movq %r14,112(%rdi) +- leaq 48+8(%rsp),%rdi +- movq (%rsi,%rcx,1),%rdx +- +- movq 8(%rdi),%r11 +- xorq %r10,%r10 +- movq 0+8(%rsp),%r9 +- adoxq %r11,%r11 +- movq 16(%rdi),%r12 +- movq 24(%rdi),%r13 +- +- +-.p2align 5 +-L$sqrx4x_shift_n_add: +- mulxq %rdx,%rax,%rbx +- adoxq %r12,%r12 +- adcxq %r10,%rax +-.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 +-.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 +- adoxq %r13,%r13 +- adcxq %r11,%rbx +- movq 40(%rdi),%r11 +- movq %rax,0(%rdi) +- movq %rbx,8(%rdi) +- +- mulxq %rdx,%rax,%rbx +- adoxq %r10,%r10 +- adcxq %r12,%rax +- movq 16(%rsi,%rcx,1),%rdx +- movq 48(%rdi),%r12 +- adoxq %r11,%r11 +- adcxq %r13,%rbx +- movq 56(%rdi),%r13 +- movq %rax,16(%rdi) +- movq %rbx,24(%rdi) +- +- mulxq %rdx,%rax,%rbx +- adoxq %r12,%r12 +- adcxq %r10,%rax +- movq 24(%rsi,%rcx,1),%rdx +- leaq 32(%rcx),%rcx +- movq 64(%rdi),%r10 +- adoxq %r13,%r13 +- adcxq %r11,%rbx +- movq 72(%rdi),%r11 +- movq %rax,32(%rdi) +- movq %rbx,40(%rdi) +- +- mulxq %rdx,%rax,%rbx +- adoxq %r10,%r10 +- adcxq %r12,%rax +- jrcxz L$sqrx4x_shift_n_add_break +-.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 +- adoxq %r11,%r11 +- adcxq %r13,%rbx +- movq 80(%rdi),%r12 +- movq 88(%rdi),%r13 +- movq %rax,48(%rdi) +- movq %rbx,56(%rdi) +- leaq 64(%rdi),%rdi +- nop +- jmp L$sqrx4x_shift_n_add +- +-.p2align 5 +-L$sqrx4x_shift_n_add_break: +- adcxq %r13,%rbx +- movq %rax,48(%rdi) +- movq %rbx,56(%rdi) +- leaq 64(%rdi),%rdi +-.byte 102,72,15,126,213 +-__bn_sqrx8x_reduction: +- xorl %eax,%eax +- movq 32+8(%rsp),%rbx +- movq 48+8(%rsp),%rdx +- leaq -64(%rbp,%r9,1),%rcx +- +- movq %rcx,0+8(%rsp) +- movq %rdi,8+8(%rsp) +- +- leaq 48+8(%rsp),%rdi +- jmp L$sqrx8x_reduction_loop +- +-.p2align 5 +-L$sqrx8x_reduction_loop: +- movq 8(%rdi),%r9 +- movq 16(%rdi),%r10 +- movq 24(%rdi),%r11 +- movq 32(%rdi),%r12 +- movq %rdx,%r8 +- imulq %rbx,%rdx +- movq 40(%rdi),%r13 +- movq 48(%rdi),%r14 +- movq 56(%rdi),%r15 +- movq %rax,24+8(%rsp) +- +- leaq 64(%rdi),%rdi +- xorq %rsi,%rsi +- movq $-8,%rcx +- jmp L$sqrx8x_reduce +- +-.p2align 5 +-L$sqrx8x_reduce: +- movq %r8,%rbx +- mulxq 0(%rbp),%rax,%r8 +- adcxq %rbx,%rax +- adoxq %r9,%r8 +- +- mulxq 8(%rbp),%rbx,%r9 +- adcxq %rbx,%r8 +- adoxq %r10,%r9 +- +- mulxq 16(%rbp),%rbx,%r10 +- adcxq %rbx,%r9 +- adoxq %r11,%r10 +- +- mulxq 24(%rbp),%rbx,%r11 +- adcxq %rbx,%r10 +- adoxq %r12,%r11 +- +-.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 +- movq %rdx,%rax +- movq %r8,%rdx +- adcxq %rbx,%r11 +- adoxq %r13,%r12 +- +- mulxq 32+8(%rsp),%rbx,%rdx +- movq %rax,%rdx +- movq %rax,64+48+8(%rsp,%rcx,8) +- +- mulxq 40(%rbp),%rax,%r13 +- adcxq %rax,%r12 +- adoxq %r14,%r13 +- +- mulxq 48(%rbp),%rax,%r14 +- adcxq %rax,%r13 +- adoxq %r15,%r14 +- +- mulxq 56(%rbp),%rax,%r15 +- movq %rbx,%rdx +- adcxq %rax,%r14 +- adoxq %rsi,%r15 +- adcxq %rsi,%r15 +- +-.byte 0x67,0x67,0x67 +- incq %rcx +- jnz L$sqrx8x_reduce +- +- movq %rsi,%rax +- cmpq 0+8(%rsp),%rbp +- jae L$sqrx8x_no_tail +- +- movq 48+8(%rsp),%rdx +- addq 0(%rdi),%r8 +- leaq 64(%rbp),%rbp +- movq $-8,%rcx +- adcxq 8(%rdi),%r9 +- adcxq 16(%rdi),%r10 +- adcq 24(%rdi),%r11 +- adcq 32(%rdi),%r12 +- adcq 40(%rdi),%r13 +- adcq 48(%rdi),%r14 +- adcq 56(%rdi),%r15 +- leaq 64(%rdi),%rdi +- sbbq %rax,%rax +- +- xorq %rsi,%rsi +- movq %rax,16+8(%rsp) +- jmp L$sqrx8x_tail +- +-.p2align 5 +-L$sqrx8x_tail: +- movq %r8,%rbx +- mulxq 0(%rbp),%rax,%r8 +- adcxq %rax,%rbx +- adoxq %r9,%r8 +- +- mulxq 8(%rbp),%rax,%r9 +- adcxq %rax,%r8 +- adoxq %r10,%r9 +- +- mulxq 16(%rbp),%rax,%r10 +- adcxq %rax,%r9 +- adoxq %r11,%r10 +- +- mulxq 24(%rbp),%rax,%r11 +- adcxq %rax,%r10 +- adoxq %r12,%r11 +- +-.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 +- adcxq %rax,%r11 +- adoxq %r13,%r12 +- +- mulxq 40(%rbp),%rax,%r13 +- adcxq %rax,%r12 +- adoxq %r14,%r13 +- +- mulxq 48(%rbp),%rax,%r14 +- adcxq %rax,%r13 +- adoxq %r15,%r14 +- +- mulxq 56(%rbp),%rax,%r15 +- movq 72+48+8(%rsp,%rcx,8),%rdx +- adcxq %rax,%r14 +- adoxq %rsi,%r15 +- movq %rbx,(%rdi,%rcx,8) +- movq %r8,%rbx +- adcxq %rsi,%r15 +- +- incq %rcx +- jnz L$sqrx8x_tail +- +- cmpq 0+8(%rsp),%rbp +- jae L$sqrx8x_tail_done +- +- subq 16+8(%rsp),%rsi +- movq 48+8(%rsp),%rdx +- leaq 64(%rbp),%rbp +- adcq 0(%rdi),%r8 +- adcq 8(%rdi),%r9 +- adcq 16(%rdi),%r10 +- adcq 24(%rdi),%r11 +- adcq 32(%rdi),%r12 +- adcq 40(%rdi),%r13 +- adcq 48(%rdi),%r14 +- adcq 56(%rdi),%r15 +- leaq 64(%rdi),%rdi +- sbbq %rax,%rax +- subq $8,%rcx +- +- xorq %rsi,%rsi +- movq %rax,16+8(%rsp) +- jmp L$sqrx8x_tail +- +-.p2align 5 +-L$sqrx8x_tail_done: +- xorq %rax,%rax +- addq 24+8(%rsp),%r8 +- adcq $0,%r9 +- adcq $0,%r10 +- adcq $0,%r11 +- adcq $0,%r12 +- adcq $0,%r13 +- adcq $0,%r14 +- adcq $0,%r15 +- adcq $0,%rax +- +- subq 16+8(%rsp),%rsi +-L$sqrx8x_no_tail: +- adcq 0(%rdi),%r8 +-.byte 102,72,15,126,217 +- adcq 8(%rdi),%r9 +- movq 56(%rbp),%rsi +-.byte 102,72,15,126,213 +- adcq 16(%rdi),%r10 +- adcq 24(%rdi),%r11 +- adcq 32(%rdi),%r12 +- adcq 40(%rdi),%r13 +- adcq 48(%rdi),%r14 +- adcq 56(%rdi),%r15 +- adcq $0,%rax +- +- movq 32+8(%rsp),%rbx +- movq 64(%rdi,%rcx,1),%rdx +- +- movq %r8,0(%rdi) +- leaq 64(%rdi),%r8 +- movq %r9,8(%rdi) +- movq %r10,16(%rdi) +- movq %r11,24(%rdi) +- movq %r12,32(%rdi) +- movq %r13,40(%rdi) +- movq %r14,48(%rdi) +- movq %r15,56(%rdi) +- +- leaq 64(%rdi,%rcx,1),%rdi +- cmpq 8+8(%rsp),%r8 +- jb L$sqrx8x_reduction_loop +- .byte 0xf3,0xc3 +- +- +-.p2align 5 +- +-__bn_postx4x_internal: +- +- movq 0(%rbp),%r12 +- movq %rcx,%r10 +- movq %rcx,%r9 +- negq %rax +- sarq $3+2,%rcx +- +-.byte 102,72,15,126,202 +-.byte 102,72,15,126,206 +- decq %r12 +- movq 8(%rbp),%r13 +- xorq %r8,%r8 +- movq 16(%rbp),%r14 +- movq 24(%rbp),%r15 +- jmp L$sqrx4x_sub_entry +- +-.p2align 4 +-L$sqrx4x_sub: +- movq 0(%rbp),%r12 +- movq 8(%rbp),%r13 +- movq 16(%rbp),%r14 +- movq 24(%rbp),%r15 +-L$sqrx4x_sub_entry: +- andnq %rax,%r12,%r12 +- leaq 32(%rbp),%rbp +- andnq %rax,%r13,%r13 +- andnq %rax,%r14,%r14 +- andnq %rax,%r15,%r15 +- +- negq %r8 +- adcq 0(%rdi),%r12 +- adcq 8(%rdi),%r13 +- adcq 16(%rdi),%r14 +- adcq 24(%rdi),%r15 +- movq %r12,0(%rdx) +- leaq 32(%rdi),%rdi +- movq %r13,8(%rdx) +- sbbq %r8,%r8 +- movq %r14,16(%rdx) +- movq %r15,24(%rdx) +- leaq 32(%rdx),%rdx +- +- incq %rcx +- jnz L$sqrx4x_sub +- +- negq %r9 +- +- .byte 0xf3,0xc3 +- +- +-.globl _bn_scatter5 +-.private_extern _bn_scatter5 +- +-.p2align 4 +-_bn_scatter5: +- +- cmpl $0,%esi +- jz L$scatter_epilogue +- leaq (%rdx,%rcx,8),%rdx +-L$scatter: +- movq (%rdi),%rax +- leaq 8(%rdi),%rdi +- movq %rax,(%rdx) +- leaq 256(%rdx),%rdx +- subl $1,%esi +- jnz L$scatter +-L$scatter_epilogue: +- .byte 0xf3,0xc3 +- +- +- +-.globl _bn_gather5 +-.private_extern _bn_gather5 +- +-.p2align 5 +-_bn_gather5: +- +-L$SEH_begin_bn_gather5: +- +-.byte 0x4c,0x8d,0x14,0x24 +- +-.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 +- leaq L$inc(%rip),%rax +- andq $-16,%rsp +- +- movd %ecx,%xmm5 +- movdqa 0(%rax),%xmm0 +- movdqa 16(%rax),%xmm1 +- leaq 128(%rdx),%r11 +- leaq 128(%rsp),%rax +- +- pshufd $0,%xmm5,%xmm5 +- movdqa %xmm1,%xmm4 +- movdqa %xmm1,%xmm2 +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm4,%xmm3 +- +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,-128(%rax) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,-112(%rax) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,-96(%rax) +- movdqa %xmm4,%xmm2 +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,-80(%rax) +- movdqa %xmm4,%xmm3 +- +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,-64(%rax) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,-48(%rax) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,-32(%rax) +- movdqa %xmm4,%xmm2 +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,-16(%rax) +- movdqa %xmm4,%xmm3 +- +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,0(%rax) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,16(%rax) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,32(%rax) +- movdqa %xmm4,%xmm2 +- paddd %xmm0,%xmm1 +- pcmpeqd %xmm5,%xmm0 +- movdqa %xmm3,48(%rax) +- movdqa %xmm4,%xmm3 +- +- paddd %xmm1,%xmm2 +- pcmpeqd %xmm5,%xmm1 +- movdqa %xmm0,64(%rax) +- movdqa %xmm4,%xmm0 +- +- paddd %xmm2,%xmm3 +- pcmpeqd %xmm5,%xmm2 +- movdqa %xmm1,80(%rax) +- movdqa %xmm4,%xmm1 +- +- paddd %xmm3,%xmm0 +- pcmpeqd %xmm5,%xmm3 +- movdqa %xmm2,96(%rax) +- movdqa %xmm4,%xmm2 +- movdqa %xmm3,112(%rax) +- jmp L$gather +- +-.p2align 5 +-L$gather: +- pxor %xmm4,%xmm4 +- pxor %xmm5,%xmm5 +- movdqa -128(%r11),%xmm0 +- movdqa -112(%r11),%xmm1 +- movdqa -96(%r11),%xmm2 +- pand -128(%rax),%xmm0 +- movdqa -80(%r11),%xmm3 +- pand -112(%rax),%xmm1 +- por %xmm0,%xmm4 +- pand -96(%rax),%xmm2 +- por %xmm1,%xmm5 +- pand -80(%rax),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa -64(%r11),%xmm0 +- movdqa -48(%r11),%xmm1 +- movdqa -32(%r11),%xmm2 +- pand -64(%rax),%xmm0 +- movdqa -16(%r11),%xmm3 +- pand -48(%rax),%xmm1 +- por %xmm0,%xmm4 +- pand -32(%rax),%xmm2 +- por %xmm1,%xmm5 +- pand -16(%rax),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa 0(%r11),%xmm0 +- movdqa 16(%r11),%xmm1 +- movdqa 32(%r11),%xmm2 +- pand 0(%rax),%xmm0 +- movdqa 48(%r11),%xmm3 +- pand 16(%rax),%xmm1 +- por %xmm0,%xmm4 +- pand 32(%rax),%xmm2 +- por %xmm1,%xmm5 +- pand 48(%rax),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- movdqa 64(%r11),%xmm0 +- movdqa 80(%r11),%xmm1 +- movdqa 96(%r11),%xmm2 +- pand 64(%rax),%xmm0 +- movdqa 112(%r11),%xmm3 +- pand 80(%rax),%xmm1 +- por %xmm0,%xmm4 +- pand 96(%rax),%xmm2 +- por %xmm1,%xmm5 +- pand 112(%rax),%xmm3 +- por %xmm2,%xmm4 +- por %xmm3,%xmm5 +- por %xmm5,%xmm4 +- leaq 256(%r11),%r11 +- pshufd $0x4e,%xmm4,%xmm0 +- por %xmm4,%xmm0 +- movq %xmm0,(%rdi) +- leaq 8(%rdi),%rdi +- subl $1,%esi +- jnz L$gather +- +- leaq (%r10),%rsp +- +- .byte 0xf3,0xc3 +-L$SEH_end_bn_gather5: +- +- +-.p2align 6 +-L$inc: +-.long 0,0, 1,1 +-.long 2,2, 2,2 +-.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 +-#endif +diff --git a/mac-x86_64/crypto/test/trampoline-x86_64.S b/mac-x86_64/crypto/test/trampoline-x86_64.S +deleted file mode 100644 +index 5f20aa7..0000000 +--- a/mac-x86_64/crypto/test/trampoline-x86_64.S ++++ /dev/null +@@ -1,513 +0,0 @@ +-// This file is generated from a similarly-named Perl script in the BoringSSL +-// source tree. Do not edit by hand. +- +-#if defined(__has_feature) +-#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) +-#define OPENSSL_NO_ASM +-#endif +-#endif +- +-#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) +-#if defined(BORINGSSL_PREFIX) +-#include +-#endif +-.text +- +- +- +- +- +- +- +- +- +-.globl _abi_test_trampoline +-.private_extern _abi_test_trampoline +-.p2align 4 +-_abi_test_trampoline: +-L$abi_test_trampoline_seh_begin: +- +- +- +- +- +- +- +- +- +- +- subq $120,%rsp +- +-L$abi_test_trampoline_seh_prolog_alloc: +- movq %r8,48(%rsp) +- movq %rbx,64(%rsp) +- +-L$abi_test_trampoline_seh_prolog_rbx: +- movq %rbp,72(%rsp) +- +-L$abi_test_trampoline_seh_prolog_rbp: +- movq %r12,80(%rsp) +- +-L$abi_test_trampoline_seh_prolog_r12: +- movq %r13,88(%rsp) +- +-L$abi_test_trampoline_seh_prolog_r13: +- movq %r14,96(%rsp) +- +-L$abi_test_trampoline_seh_prolog_r14: +- movq %r15,104(%rsp) +- +-L$abi_test_trampoline_seh_prolog_r15: +-L$abi_test_trampoline_seh_prolog_end: +- movq 0(%rsi),%rbx +- movq 8(%rsi),%rbp +- movq 16(%rsi),%r12 +- movq 24(%rsi),%r13 +- movq 32(%rsi),%r14 +- movq 40(%rsi),%r15 +- +- movq %rdi,32(%rsp) +- movq %rsi,40(%rsp) +- +- +- +- +- movq %rdx,%r10 +- movq %rcx,%r11 +- decq %r11 +- js L$args_done +- movq (%r10),%rdi +- addq $8,%r10 +- decq %r11 +- js L$args_done +- movq (%r10),%rsi +- addq $8,%r10 +- decq %r11 +- js L$args_done +- movq (%r10),%rdx +- addq $8,%r10 +- decq %r11 +- js L$args_done +- movq (%r10),%rcx +- addq $8,%r10 +- decq %r11 +- js L$args_done +- movq (%r10),%r8 +- addq $8,%r10 +- decq %r11 +- js L$args_done +- movq (%r10),%r9 +- addq $8,%r10 +- leaq 0(%rsp),%rax +-L$args_loop: +- decq %r11 +- js L$args_done +- +- +- +- +- +- +- movq %r11,56(%rsp) +- movq (%r10),%r11 +- movq %r11,(%rax) +- movq 56(%rsp),%r11 +- +- addq $8,%r10 +- addq $8,%rax +- jmp L$args_loop +- +-L$args_done: +- movq 32(%rsp),%rax +- movq 48(%rsp),%r10 +- testq %r10,%r10 +- jz L$no_unwind +- +- +- pushfq +- orq $0x100,0(%rsp) +- popfq +- +- +- +- nop +-.globl _abi_test_unwind_start +-.private_extern _abi_test_unwind_start +-_abi_test_unwind_start: +- +- call *%rax +-.globl _abi_test_unwind_return +-.private_extern _abi_test_unwind_return +-_abi_test_unwind_return: +- +- +- +- +- pushfq +- andq $-0x101,0(%rsp) +- popfq +-.globl _abi_test_unwind_stop +-.private_extern _abi_test_unwind_stop +-_abi_test_unwind_stop: +- +- jmp L$call_done +- +-L$no_unwind: +- call *%rax +- +-L$call_done: +- +- movq 40(%rsp),%rsi +- movq %rbx,0(%rsi) +- movq %rbp,8(%rsi) +- movq %r12,16(%rsi) +- movq %r13,24(%rsi) +- movq %r14,32(%rsi) +- movq %r15,40(%rsi) +- movq 64(%rsp),%rbx +- +- movq 72(%rsp),%rbp +- +- movq 80(%rsp),%r12 +- +- movq 88(%rsp),%r13 +- +- movq 96(%rsp),%r14 +- +- movq 104(%rsp),%r15 +- +- addq $120,%rsp +- +- +- +- .byte 0xf3,0xc3 +- +-L$abi_test_trampoline_seh_end: +- +- +-.globl _abi_test_clobber_rax +-.private_extern _abi_test_clobber_rax +-.p2align 4 +-_abi_test_clobber_rax: +- xorq %rax,%rax +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_rbx +-.private_extern _abi_test_clobber_rbx +-.p2align 4 +-_abi_test_clobber_rbx: +- xorq %rbx,%rbx +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_rcx +-.private_extern _abi_test_clobber_rcx +-.p2align 4 +-_abi_test_clobber_rcx: +- xorq %rcx,%rcx +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_rdx +-.private_extern _abi_test_clobber_rdx +-.p2align 4 +-_abi_test_clobber_rdx: +- xorq %rdx,%rdx +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_rdi +-.private_extern _abi_test_clobber_rdi +-.p2align 4 +-_abi_test_clobber_rdi: +- xorq %rdi,%rdi +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_rsi +-.private_extern _abi_test_clobber_rsi +-.p2align 4 +-_abi_test_clobber_rsi: +- xorq %rsi,%rsi +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_rbp +-.private_extern _abi_test_clobber_rbp +-.p2align 4 +-_abi_test_clobber_rbp: +- xorq %rbp,%rbp +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_r8 +-.private_extern _abi_test_clobber_r8 +-.p2align 4 +-_abi_test_clobber_r8: +- xorq %r8,%r8 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_r9 +-.private_extern _abi_test_clobber_r9 +-.p2align 4 +-_abi_test_clobber_r9: +- xorq %r9,%r9 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_r10 +-.private_extern _abi_test_clobber_r10 +-.p2align 4 +-_abi_test_clobber_r10: +- xorq %r10,%r10 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_r11 +-.private_extern _abi_test_clobber_r11 +-.p2align 4 +-_abi_test_clobber_r11: +- xorq %r11,%r11 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_r12 +-.private_extern _abi_test_clobber_r12 +-.p2align 4 +-_abi_test_clobber_r12: +- xorq %r12,%r12 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_r13 +-.private_extern _abi_test_clobber_r13 +-.p2align 4 +-_abi_test_clobber_r13: +- xorq %r13,%r13 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_r14 +-.private_extern _abi_test_clobber_r14 +-.p2align 4 +-_abi_test_clobber_r14: +- xorq %r14,%r14 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_r15 +-.private_extern _abi_test_clobber_r15 +-.p2align 4 +-_abi_test_clobber_r15: +- xorq %r15,%r15 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm0 +-.private_extern _abi_test_clobber_xmm0 +-.p2align 4 +-_abi_test_clobber_xmm0: +- pxor %xmm0,%xmm0 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm1 +-.private_extern _abi_test_clobber_xmm1 +-.p2align 4 +-_abi_test_clobber_xmm1: +- pxor %xmm1,%xmm1 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm2 +-.private_extern _abi_test_clobber_xmm2 +-.p2align 4 +-_abi_test_clobber_xmm2: +- pxor %xmm2,%xmm2 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm3 +-.private_extern _abi_test_clobber_xmm3 +-.p2align 4 +-_abi_test_clobber_xmm3: +- pxor %xmm3,%xmm3 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm4 +-.private_extern _abi_test_clobber_xmm4 +-.p2align 4 +-_abi_test_clobber_xmm4: +- pxor %xmm4,%xmm4 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm5 +-.private_extern _abi_test_clobber_xmm5 +-.p2align 4 +-_abi_test_clobber_xmm5: +- pxor %xmm5,%xmm5 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm6 +-.private_extern _abi_test_clobber_xmm6 +-.p2align 4 +-_abi_test_clobber_xmm6: +- pxor %xmm6,%xmm6 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm7 +-.private_extern _abi_test_clobber_xmm7 +-.p2align 4 +-_abi_test_clobber_xmm7: +- pxor %xmm7,%xmm7 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm8 +-.private_extern _abi_test_clobber_xmm8 +-.p2align 4 +-_abi_test_clobber_xmm8: +- pxor %xmm8,%xmm8 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm9 +-.private_extern _abi_test_clobber_xmm9 +-.p2align 4 +-_abi_test_clobber_xmm9: +- pxor %xmm9,%xmm9 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm10 +-.private_extern _abi_test_clobber_xmm10 +-.p2align 4 +-_abi_test_clobber_xmm10: +- pxor %xmm10,%xmm10 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm11 +-.private_extern _abi_test_clobber_xmm11 +-.p2align 4 +-_abi_test_clobber_xmm11: +- pxor %xmm11,%xmm11 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm12 +-.private_extern _abi_test_clobber_xmm12 +-.p2align 4 +-_abi_test_clobber_xmm12: +- pxor %xmm12,%xmm12 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm13 +-.private_extern _abi_test_clobber_xmm13 +-.p2align 4 +-_abi_test_clobber_xmm13: +- pxor %xmm13,%xmm13 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm14 +-.private_extern _abi_test_clobber_xmm14 +-.p2align 4 +-_abi_test_clobber_xmm14: +- pxor %xmm14,%xmm14 +- .byte 0xf3,0xc3 +- +- +-.globl _abi_test_clobber_xmm15 +-.private_extern _abi_test_clobber_xmm15 +-.p2align 4 +-_abi_test_clobber_xmm15: +- pxor %xmm15,%xmm15 +- .byte 0xf3,0xc3 +- +- +- +- +- +-.globl _abi_test_bad_unwind_wrong_register +-.private_extern _abi_test_bad_unwind_wrong_register +-.p2align 4 +-_abi_test_bad_unwind_wrong_register: +- +-L$abi_test_bad_unwind_wrong_register_seh_begin: +- pushq %r12 +- +-L$abi_test_bad_unwind_wrong_register_seh_push_r13: +- +- +- +- nop +- popq %r12 +- +- .byte 0xf3,0xc3 +-L$abi_test_bad_unwind_wrong_register_seh_end: +- +- +- +- +- +- +- +-.globl _abi_test_bad_unwind_temporary +-.private_extern _abi_test_bad_unwind_temporary +-.p2align 4 +-_abi_test_bad_unwind_temporary: +- +-L$abi_test_bad_unwind_temporary_seh_begin: +- pushq %r12 +- +-L$abi_test_bad_unwind_temporary_seh_push_r12: +- +- movq %r12,%rax +- incq %rax +- movq %rax,(%rsp) +- +- +- +- movq %r12,(%rsp) +- +- +- popq %r12 +- +- .byte 0xf3,0xc3 +-L$abi_test_bad_unwind_temporary_seh_end: +- +- +- +- +- +- +- +-.globl _abi_test_get_and_clear_direction_flag +-.private_extern _abi_test_get_and_clear_direction_flag +-_abi_test_get_and_clear_direction_flag: +- pushfq +- popq %rax +- andq $0x400,%rax +- shrq $10,%rax +- cld +- .byte 0xf3,0xc3 +- +- +- +- +- +-.globl _abi_test_set_direction_flag +-.private_extern _abi_test_set_direction_flag +-_abi_test_set_direction_flag: +- std +- .byte 0xf3,0xc3 +- +-#endif +diff --git a/sources.bp b/sources.bp +index 704f514..2f3e684 100644 +--- a/sources.bp ++++ b/sources.bp +@@ -75,6 +75,7 @@ cc_defaults { + "src/crypto/cipher_extra/e_aesctrhmac.c", + "src/crypto/cipher_extra/e_aesgcmsiv.c", + "src/crypto/cipher_extra/e_chacha20poly1305.c", ++ "src/crypto/cipher_extra/e_des.c", + "src/crypto/cipher_extra/e_null.c", + "src/crypto/cipher_extra/e_rc2.c", + "src/crypto/cipher_extra/e_rc4.c", +@@ -93,6 +94,7 @@ cc_defaults { + "src/crypto/crypto.c", + "src/crypto/curve25519/curve25519.c", + "src/crypto/curve25519/spake25519.c", ++ "src/crypto/des/des.c", + "src/crypto/dh_extra/dh_asn1.c", + "src/crypto/dh_extra/params.c", + "src/crypto/digest_extra/digest_extra.c", +diff --git a/sources.mk b/sources.mk +index ebc49c7..397432e 100644 +--- a/sources.mk ++++ b/sources.mk +@@ -73,6 +73,7 @@ crypto_sources := \ + src/crypto/cipher_extra/e_aesctrhmac.c\ + src/crypto/cipher_extra/e_aesgcmsiv.c\ + src/crypto/cipher_extra/e_chacha20poly1305.c\ ++ src/crypto/cipher_extra/e_des.c\ + src/crypto/cipher_extra/e_null.c\ + src/crypto/cipher_extra/e_rc2.c\ + src/crypto/cipher_extra/e_rc4.c\ +@@ -91,6 +92,7 @@ crypto_sources := \ + src/crypto/crypto.c\ + src/crypto/curve25519/curve25519.c\ + src/crypto/curve25519/spake25519.c\ ++ src/crypto/des/des.c\ + src/crypto/dh_extra/dh_asn1.c\ + src/crypto/dh_extra/params.c\ + src/crypto/digest_extra/digest_extra.c\ +diff --git a/src/.gitignore b/src/.gitignore +index a8e3184..6cbc9d2 100644 +--- a/src/.gitignore ++++ b/src/.gitignore +@@ -24,7 +24,7 @@ util/bot/nasm-win32.exe + util/bot/perl-win32 + util/bot/perl-win32.zip + util/bot/sde-linux64 +-util/bot/sde-linux64.tar.bz2 ++util/bot/sde-linux64.tar.xz + util/bot/sde-win32 +-util/bot/sde-win32.tar.bz2 ++util/bot/sde-win32.tar.xz + util/bot/win_toolchain.json +diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt +index f74e233..35ff4c1 100644 +--- a/src/CMakeLists.txt ++++ b/src/CMakeLists.txt +@@ -257,10 +257,11 @@ if(CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11") + endif() + +-# pthread_rwlock_t on Linux requires a feature flag. However, it should not be +-# set on Apple platforms, where it instead disables APIs we use. See compat(5) +-# and sys/cdefs.h. +-if(NOT WIN32 AND NOT APPLE) ++# pthread_rwlock_t on Linux requires a feature flag. We limit this to Linux ++# because, on Apple platforms, it instead disables APIs we use. See compat(5) ++# and sys/cdefs.h. Reportedly, FreeBSD also breaks when this is set. See ++# https://crbug.com/boringssl/471. ++if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_XOPEN_SOURCE=700") + endif() + +@@ -592,7 +593,7 @@ add_subdirectory(crypto) + add_subdirectory(ssl) + add_subdirectory(ssl/test) + add_subdirectory(tool) +-add_subdirectory(util/fipstools/cavp) ++add_subdirectory(util/fipstools) + add_subdirectory(util/fipstools/acvp/modulewrapper) + add_subdirectory(decrepit) + +@@ -617,7 +618,7 @@ if(RUST_BINDINGS) + endif() + endif() + +-if(UNIX AND NOT APPLE AND NOT ANDROID) ++if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(HANDSHAKER_ARGS "-handshaker-path" $) + endif() + +diff --git a/src/crypto/CMakeLists.txt b/src/crypto/CMakeLists.txt +index d9cfa5c..6ab74b8 100644 +--- a/src/crypto/CMakeLists.txt ++++ b/src/crypto/CMakeLists.txt +@@ -256,6 +256,7 @@ add_library( + cipher_extra/e_aesctrhmac.c + cipher_extra/e_aesgcmsiv.c + cipher_extra/e_chacha20poly1305.c ++ cipher_extra/e_des.c + cipher_extra/e_null.c + cipher_extra/e_rc2.c + cipher_extra/e_rc4.c +@@ -274,6 +275,7 @@ add_library( + crypto.c + curve25519/curve25519.c + curve25519/spake25519.c ++ des/des.c + dh_extra/params.c + dh_extra/dh_asn1.c + digest_extra/digest_extra.c +diff --git a/src/crypto/cipher_extra/e_aesgcmsiv.c b/src/crypto/cipher_extra/e_aesgcmsiv.c +index 9e77375..387eaff 100644 +--- a/src/crypto/cipher_extra/e_aesgcmsiv.c ++++ b/src/crypto/cipher_extra/e_aesgcmsiv.c +@@ -857,22 +857,15 @@ static const EVP_AEAD aead_aes_256_gcm_siv = { + + #if defined(AES_GCM_SIV_ASM) + +-static char avx_aesni_capable(void) { +- const uint32_t ecx = OPENSSL_ia32cap_P[1]; +- +- return (ecx & (1 << (57 - 32))) != 0 /* AESNI */ && +- (ecx & (1 << 28)) != 0 /* AVX */; +-} +- + const EVP_AEAD *EVP_aead_aes_128_gcm_siv(void) { +- if (avx_aesni_capable()) { ++ if (CRYPTO_is_AVX_capable() && CRYPTO_is_AESNI_capable()) { + return &aead_aes_128_gcm_siv_asm; + } + return &aead_aes_128_gcm_siv; + } + + const EVP_AEAD *EVP_aead_aes_256_gcm_siv(void) { +- if (avx_aesni_capable()) { ++ if (CRYPTO_is_AVX_capable() && CRYPTO_is_AESNI_capable()) { + return &aead_aes_256_gcm_siv_asm; + } + return &aead_aes_256_gcm_siv; +diff --git a/src/crypto/cipher_extra/e_des.c b/src/crypto/cipher_extra/e_des.c +new file mode 100644 +index 0000000..087029b +--- /dev/null ++++ b/src/crypto/cipher_extra/e_des.c +@@ -0,0 +1,258 @@ ++/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] */ ++ ++#include ++#include ++#include ++ ++#include "internal.h" ++ ++ ++typedef struct { ++ union { ++ double align; ++ DES_key_schedule ks; ++ } ks; ++} EVP_DES_KEY; ++ ++static int des_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key, ++ const uint8_t *iv, int enc) { ++ DES_cblock *deskey = (DES_cblock *)key; ++ EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; ++ ++ DES_set_key(deskey, &dat->ks.ks); ++ return 1; ++} ++ ++static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in, ++ size_t in_len) { ++ EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; ++ ++ DES_ncbc_encrypt(in, out, in_len, &dat->ks.ks, (DES_cblock *)ctx->iv, ++ ctx->encrypt); ++ ++ return 1; ++} ++ ++static const EVP_CIPHER evp_des_cbc = { ++ /* nid = */ NID_des_cbc, ++ /* block_size = */ 8, ++ /* key_len = */ 8, ++ /* iv_len = */ 8, ++ /* ctx_size = */ sizeof(EVP_DES_KEY), ++ /* flags = */ EVP_CIPH_CBC_MODE, ++ /* app_data = */ NULL, ++ /* init = */ des_init_key, ++ /* cipher = */ des_cbc_cipher, ++ /* cleanup = */ NULL, ++ /* ctrl = */ NULL, ++}; ++ ++const EVP_CIPHER *EVP_des_cbc(void) { return &evp_des_cbc; } ++ ++static int des_ecb_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in, ++ size_t in_len) { ++ if (in_len < ctx->cipher->block_size) { ++ return 1; ++ } ++ in_len -= ctx->cipher->block_size; ++ ++ EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; ++ for (size_t i = 0; i <= in_len; i += ctx->cipher->block_size) { ++ DES_ecb_encrypt((DES_cblock *)(in + i), (DES_cblock *)(out + i), ++ &dat->ks.ks, ctx->encrypt); ++ } ++ return 1; ++} ++ ++static const EVP_CIPHER evp_des_ecb = { ++ /* nid = */ NID_des_ecb, ++ /* block_size = */ 8, ++ /* key_len = */ 8, ++ /* iv_len = */ 0, ++ /* ctx_size = */ sizeof(EVP_DES_KEY), ++ /* flags = */ EVP_CIPH_ECB_MODE, ++ /* app_data = */ NULL, ++ /* init = */ des_init_key, ++ /* cipher = */ des_ecb_cipher, ++ /* cleanup = */ NULL, ++ /* ctrl = */ NULL, ++}; ++ ++const EVP_CIPHER *EVP_des_ecb(void) { return &evp_des_ecb; } ++ ++typedef struct { ++ union { ++ double align; ++ DES_key_schedule ks[3]; ++ } ks; ++} DES_EDE_KEY; ++ ++static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key, ++ const uint8_t *iv, int enc) { ++ DES_cblock *deskey = (DES_cblock *)key; ++ DES_EDE_KEY *dat = (DES_EDE_KEY *)ctx->cipher_data; ++ ++ DES_set_key(&deskey[0], &dat->ks.ks[0]); ++ DES_set_key(&deskey[1], &dat->ks.ks[1]); ++ DES_set_key(&deskey[2], &dat->ks.ks[2]); ++ ++ return 1; ++} ++ ++static int des_ede3_cbc_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, ++ const uint8_t *in, size_t in_len) { ++ DES_EDE_KEY *dat = (DES_EDE_KEY *)ctx->cipher_data; ++ ++ DES_ede3_cbc_encrypt(in, out, in_len, &dat->ks.ks[0], &dat->ks.ks[1], ++ &dat->ks.ks[2], (DES_cblock *)ctx->iv, ctx->encrypt); ++ ++ return 1; ++} ++ ++static const EVP_CIPHER evp_des_ede3_cbc = { ++ /* nid = */ NID_des_ede3_cbc, ++ /* block_size = */ 8, ++ /* key_len = */ 24, ++ /* iv_len = */ 8, ++ /* ctx_size = */ sizeof(DES_EDE_KEY), ++ /* flags = */ EVP_CIPH_CBC_MODE, ++ /* app_data = */ NULL, ++ /* init = */ des_ede3_init_key, ++ /* cipher = */ des_ede3_cbc_cipher, ++ /* cleanup = */ NULL, ++ /* ctrl = */ NULL, ++}; ++ ++const EVP_CIPHER *EVP_des_ede3_cbc(void) { return &evp_des_ede3_cbc; } ++ ++static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key, ++ const uint8_t *iv, int enc) { ++ DES_cblock *deskey = (DES_cblock *)key; ++ DES_EDE_KEY *dat = (DES_EDE_KEY *)ctx->cipher_data; ++ ++ DES_set_key(&deskey[0], &dat->ks.ks[0]); ++ DES_set_key(&deskey[1], &dat->ks.ks[1]); ++ DES_set_key(&deskey[0], &dat->ks.ks[2]); ++ ++ return 1; ++} ++ ++static const EVP_CIPHER evp_des_ede_cbc = { ++ /* nid = */ NID_des_ede_cbc, ++ /* block_size = */ 8, ++ /* key_len = */ 16, ++ /* iv_len = */ 8, ++ /* ctx_size = */ sizeof(DES_EDE_KEY), ++ /* flags = */ EVP_CIPH_CBC_MODE, ++ /* app_data = */ NULL, ++ /* init = */ des_ede_init_key, ++ /* cipher = */ des_ede3_cbc_cipher, ++ /* cleanup = */ NULL, ++ /* ctrl = */ NULL, ++}; ++ ++const EVP_CIPHER *EVP_des_ede_cbc(void) { return &evp_des_ede_cbc; } ++ ++static int des_ede_ecb_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, ++ const uint8_t *in, size_t in_len) { ++ if (in_len < ctx->cipher->block_size) { ++ return 1; ++ } ++ in_len -= ctx->cipher->block_size; ++ ++ DES_EDE_KEY *dat = (DES_EDE_KEY *) ctx->cipher_data; ++ for (size_t i = 0; i <= in_len; i += ctx->cipher->block_size) { ++ DES_ecb3_encrypt((DES_cblock *) (in + i), (DES_cblock *) (out + i), ++ &dat->ks.ks[0], &dat->ks.ks[1], &dat->ks.ks[2], ++ ctx->encrypt); ++ } ++ return 1; ++} ++ ++static const EVP_CIPHER evp_des_ede = { ++ /* nid = */ NID_des_ede_ecb, ++ /* block_size = */ 8, ++ /* key_len = */ 16, ++ /* iv_len = */ 0, ++ /* ctx_size = */ sizeof(DES_EDE_KEY), ++ /* flags = */ EVP_CIPH_ECB_MODE, ++ /* app_data = */ NULL, ++ /* init = */ des_ede_init_key, ++ /* cipher = */ des_ede_ecb_cipher, ++ /* cleanup = */ NULL, ++ /* ctrl = */ NULL, ++}; ++ ++const EVP_CIPHER *EVP_des_ede(void) { return &evp_des_ede; } ++ ++static const EVP_CIPHER evp_des_ede3 = { ++ /* nid = */ NID_des_ede3_ecb, ++ /* block_size = */ 8, ++ /* key_len = */ 24, ++ /* iv_len = */ 0, ++ /* ctx_size = */ sizeof(DES_EDE_KEY), ++ /* flags = */ EVP_CIPH_ECB_MODE, ++ /* app_data = */ NULL, ++ /* init = */ des_ede3_init_key, ++ /* cipher = */ des_ede_ecb_cipher, ++ /* cleanup = */ NULL, ++ /* ctrl = */ NULL, ++}; ++ ++const EVP_CIPHER *EVP_des_ede3(void) { return &evp_des_ede3; } ++ ++const EVP_CIPHER *EVP_des_ede3_ecb(void) { return EVP_des_ede3(); } +diff --git a/src/crypto/cipher_extra/internal.h b/src/crypto/cipher_extra/internal.h +index 0f5f566..4e8fa46 100644 +--- a/src/crypto/cipher_extra/internal.h ++++ b/src/crypto/cipher_extra/internal.h +@@ -171,8 +171,7 @@ OPENSSL_STATIC_ASSERT(sizeof(union chacha20_poly1305_seal_data) == 48 + 8 + 8, + "wrong chacha20_poly1305_seal_data size"); + + OPENSSL_INLINE int chacha20_poly1305_asm_capable(void) { +- const int sse41_capable = (OPENSSL_ia32cap_P[1] & (1 << 19)) != 0; +- return sse41_capable; ++ return CRYPTO_is_SSE4_1_capable(); + } + + // chacha20_poly1305_open is defined in chacha20_poly1305_x86_64.pl. It decrypts +diff --git a/src/crypto/curve25519/curve25519.c b/src/crypto/curve25519/curve25519.c +index 64aa1e6..7cb0add 100644 +--- a/src/crypto/curve25519/curve25519.c ++++ b/src/crypto/curve25519/curve25519.c +@@ -502,27 +502,21 @@ static void ge_p3_tobytes(uint8_t s[32], const ge_p3 *h) { + int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t s[32]) { + fe u; + fe_loose v; +- fe v3; ++ fe w; + fe vxx; + fe_loose check; + + fe_frombytes(&h->Y, s); + fe_1(&h->Z); +- fe_sq_tt(&v3, &h->Y); +- fe_mul_ttt(&vxx, &v3, &d); +- fe_sub(&v, &v3, &h->Z); // u = y^2-1 ++ fe_sq_tt(&w, &h->Y); ++ fe_mul_ttt(&vxx, &w, &d); ++ fe_sub(&v, &w, &h->Z); // u = y^2-1 + fe_carry(&u, &v); + fe_add(&v, &vxx, &h->Z); // v = dy^2+1 + +- fe_sq_tl(&v3, &v); +- fe_mul_ttl(&v3, &v3, &v); // v3 = v^3 +- fe_sq_tt(&h->X, &v3); +- fe_mul_ttl(&h->X, &h->X, &v); +- fe_mul_ttt(&h->X, &h->X, &u); // x = uv^7 +- +- fe_pow22523(&h->X, &h->X); // x = (uv^7)^((q-5)/8) +- fe_mul_ttt(&h->X, &h->X, &v3); +- fe_mul_ttt(&h->X, &h->X, &u); // x = uv^3(uv^7)^((q-5)/8) ++ fe_mul_ttl(&w, &u, &v); // w = u*v ++ fe_pow22523(&h->X, &w); // x = w^((q-5)/8) ++ fe_mul_ttt(&h->X, &h->X, &u); // x = u*w^((q-5)/8) + + fe_sq_tt(&vxx, &h->X); + fe_mul_ttl(&vxx, &vxx, &v); +diff --git a/src/crypto/des/des.c b/src/crypto/des/des.c +new file mode 100644 +index 0000000..95c430c +--- /dev/null ++++ b/src/crypto/des/des.c +@@ -0,0 +1,784 @@ ++/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] */ ++ ++#include ++ ++#include ++ ++#include "internal.h" ++ ++ ++static const uint32_t des_skb[8][64] = { ++ { // for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 ++ 0x00000000L, 0x00000010L, 0x20000000L, 0x20000010L, 0x00010000L, ++ 0x00010010L, 0x20010000L, 0x20010010L, 0x00000800L, 0x00000810L, ++ 0x20000800L, 0x20000810L, 0x00010800L, 0x00010810L, 0x20010800L, ++ 0x20010810L, 0x00000020L, 0x00000030L, 0x20000020L, 0x20000030L, ++ 0x00010020L, 0x00010030L, 0x20010020L, 0x20010030L, 0x00000820L, ++ 0x00000830L, 0x20000820L, 0x20000830L, 0x00010820L, 0x00010830L, ++ 0x20010820L, 0x20010830L, 0x00080000L, 0x00080010L, 0x20080000L, ++ 0x20080010L, 0x00090000L, 0x00090010L, 0x20090000L, 0x20090010L, ++ 0x00080800L, 0x00080810L, 0x20080800L, 0x20080810L, 0x00090800L, ++ 0x00090810L, 0x20090800L, 0x20090810L, 0x00080020L, 0x00080030L, ++ 0x20080020L, 0x20080030L, 0x00090020L, 0x00090030L, 0x20090020L, ++ 0x20090030L, 0x00080820L, 0x00080830L, 0x20080820L, 0x20080830L, ++ 0x00090820L, 0x00090830L, 0x20090820L, 0x20090830L, }, ++ { // for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 ++ 0x00000000L, 0x02000000L, 0x00002000L, 0x02002000L, 0x00200000L, ++ 0x02200000L, 0x00202000L, 0x02202000L, 0x00000004L, 0x02000004L, ++ 0x00002004L, 0x02002004L, 0x00200004L, 0x02200004L, 0x00202004L, ++ 0x02202004L, 0x00000400L, 0x02000400L, 0x00002400L, 0x02002400L, ++ 0x00200400L, 0x02200400L, 0x00202400L, 0x02202400L, 0x00000404L, ++ 0x02000404L, 0x00002404L, 0x02002404L, 0x00200404L, 0x02200404L, ++ 0x00202404L, 0x02202404L, 0x10000000L, 0x12000000L, 0x10002000L, ++ 0x12002000L, 0x10200000L, 0x12200000L, 0x10202000L, 0x12202000L, ++ 0x10000004L, 0x12000004L, 0x10002004L, 0x12002004L, 0x10200004L, ++ 0x12200004L, 0x10202004L, 0x12202004L, 0x10000400L, 0x12000400L, ++ 0x10002400L, 0x12002400L, 0x10200400L, 0x12200400L, 0x10202400L, ++ 0x12202400L, 0x10000404L, 0x12000404L, 0x10002404L, 0x12002404L, ++ 0x10200404L, 0x12200404L, 0x10202404L, 0x12202404L, }, ++ { // for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 ++ 0x00000000L, 0x00000001L, 0x00040000L, 0x00040001L, 0x01000000L, ++ 0x01000001L, 0x01040000L, 0x01040001L, 0x00000002L, 0x00000003L, ++ 0x00040002L, 0x00040003L, 0x01000002L, 0x01000003L, 0x01040002L, ++ 0x01040003L, 0x00000200L, 0x00000201L, 0x00040200L, 0x00040201L, ++ 0x01000200L, 0x01000201L, 0x01040200L, 0x01040201L, 0x00000202L, ++ 0x00000203L, 0x00040202L, 0x00040203L, 0x01000202L, 0x01000203L, ++ 0x01040202L, 0x01040203L, 0x08000000L, 0x08000001L, 0x08040000L, ++ 0x08040001L, 0x09000000L, 0x09000001L, 0x09040000L, 0x09040001L, ++ 0x08000002L, 0x08000003L, 0x08040002L, 0x08040003L, 0x09000002L, ++ 0x09000003L, 0x09040002L, 0x09040003L, 0x08000200L, 0x08000201L, ++ 0x08040200L, 0x08040201L, 0x09000200L, 0x09000201L, 0x09040200L, ++ 0x09040201L, 0x08000202L, 0x08000203L, 0x08040202L, 0x08040203L, ++ 0x09000202L, 0x09000203L, 0x09040202L, 0x09040203L, }, ++ { // for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 ++ 0x00000000L, 0x00100000L, 0x00000100L, 0x00100100L, 0x00000008L, ++ 0x00100008L, 0x00000108L, 0x00100108L, 0x00001000L, 0x00101000L, ++ 0x00001100L, 0x00101100L, 0x00001008L, 0x00101008L, 0x00001108L, ++ 0x00101108L, 0x04000000L, 0x04100000L, 0x04000100L, 0x04100100L, ++ 0x04000008L, 0x04100008L, 0x04000108L, 0x04100108L, 0x04001000L, ++ 0x04101000L, 0x04001100L, 0x04101100L, 0x04001008L, 0x04101008L, ++ 0x04001108L, 0x04101108L, 0x00020000L, 0x00120000L, 0x00020100L, ++ 0x00120100L, 0x00020008L, 0x00120008L, 0x00020108L, 0x00120108L, ++ 0x00021000L, 0x00121000L, 0x00021100L, 0x00121100L, 0x00021008L, ++ 0x00121008L, 0x00021108L, 0x00121108L, 0x04020000L, 0x04120000L, ++ 0x04020100L, 0x04120100L, 0x04020008L, 0x04120008L, 0x04020108L, ++ 0x04120108L, 0x04021000L, 0x04121000L, 0x04021100L, 0x04121100L, ++ 0x04021008L, 0x04121008L, 0x04021108L, 0x04121108L, }, ++ { // for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 ++ 0x00000000L, 0x10000000L, 0x00010000L, 0x10010000L, 0x00000004L, ++ 0x10000004L, 0x00010004L, 0x10010004L, 0x20000000L, 0x30000000L, ++ 0x20010000L, 0x30010000L, 0x20000004L, 0x30000004L, 0x20010004L, ++ 0x30010004L, 0x00100000L, 0x10100000L, 0x00110000L, 0x10110000L, ++ 0x00100004L, 0x10100004L, 0x00110004L, 0x10110004L, 0x20100000L, ++ 0x30100000L, 0x20110000L, 0x30110000L, 0x20100004L, 0x30100004L, ++ 0x20110004L, 0x30110004L, 0x00001000L, 0x10001000L, 0x00011000L, ++ 0x10011000L, 0x00001004L, 0x10001004L, 0x00011004L, 0x10011004L, ++ 0x20001000L, 0x30001000L, 0x20011000L, 0x30011000L, 0x20001004L, ++ 0x30001004L, 0x20011004L, 0x30011004L, 0x00101000L, 0x10101000L, ++ 0x00111000L, 0x10111000L, 0x00101004L, 0x10101004L, 0x00111004L, ++ 0x10111004L, 0x20101000L, 0x30101000L, 0x20111000L, 0x30111000L, ++ 0x20101004L, 0x30101004L, 0x20111004L, 0x30111004L, }, ++ { // for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 ++ 0x00000000L, 0x08000000L, 0x00000008L, 0x08000008L, 0x00000400L, ++ 0x08000400L, 0x00000408L, 0x08000408L, 0x00020000L, 0x08020000L, ++ 0x00020008L, 0x08020008L, 0x00020400L, 0x08020400L, 0x00020408L, ++ 0x08020408L, 0x00000001L, 0x08000001L, 0x00000009L, 0x08000009L, ++ 0x00000401L, 0x08000401L, 0x00000409L, 0x08000409L, 0x00020001L, ++ 0x08020001L, 0x00020009L, 0x08020009L, 0x00020401L, 0x08020401L, ++ 0x00020409L, 0x08020409L, 0x02000000L, 0x0A000000L, 0x02000008L, ++ 0x0A000008L, 0x02000400L, 0x0A000400L, 0x02000408L, 0x0A000408L, ++ 0x02020000L, 0x0A020000L, 0x02020008L, 0x0A020008L, 0x02020400L, ++ 0x0A020400L, 0x02020408L, 0x0A020408L, 0x02000001L, 0x0A000001L, ++ 0x02000009L, 0x0A000009L, 0x02000401L, 0x0A000401L, 0x02000409L, ++ 0x0A000409L, 0x02020001L, 0x0A020001L, 0x02020009L, 0x0A020009L, ++ 0x02020401L, 0x0A020401L, 0x02020409L, 0x0A020409L, }, ++ { // for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 ++ 0x00000000L, 0x00000100L, 0x00080000L, 0x00080100L, 0x01000000L, ++ 0x01000100L, 0x01080000L, 0x01080100L, 0x00000010L, 0x00000110L, ++ 0x00080010L, 0x00080110L, 0x01000010L, 0x01000110L, 0x01080010L, ++ 0x01080110L, 0x00200000L, 0x00200100L, 0x00280000L, 0x00280100L, ++ 0x01200000L, 0x01200100L, 0x01280000L, 0x01280100L, 0x00200010L, ++ 0x00200110L, 0x00280010L, 0x00280110L, 0x01200010L, 0x01200110L, ++ 0x01280010L, 0x01280110L, 0x00000200L, 0x00000300L, 0x00080200L, ++ 0x00080300L, 0x01000200L, 0x01000300L, 0x01080200L, 0x01080300L, ++ 0x00000210L, 0x00000310L, 0x00080210L, 0x00080310L, 0x01000210L, ++ 0x01000310L, 0x01080210L, 0x01080310L, 0x00200200L, 0x00200300L, ++ 0x00280200L, 0x00280300L, 0x01200200L, 0x01200300L, 0x01280200L, ++ 0x01280300L, 0x00200210L, 0x00200310L, 0x00280210L, 0x00280310L, ++ 0x01200210L, 0x01200310L, 0x01280210L, 0x01280310L, }, ++ { // for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 ++ 0x00000000L, 0x04000000L, 0x00040000L, 0x04040000L, 0x00000002L, ++ 0x04000002L, 0x00040002L, 0x04040002L, 0x00002000L, 0x04002000L, ++ 0x00042000L, 0x04042000L, 0x00002002L, 0x04002002L, 0x00042002L, ++ 0x04042002L, 0x00000020L, 0x04000020L, 0x00040020L, 0x04040020L, ++ 0x00000022L, 0x04000022L, 0x00040022L, 0x04040022L, 0x00002020L, ++ 0x04002020L, 0x00042020L, 0x04042020L, 0x00002022L, 0x04002022L, ++ 0x00042022L, 0x04042022L, 0x00000800L, 0x04000800L, 0x00040800L, ++ 0x04040800L, 0x00000802L, 0x04000802L, 0x00040802L, 0x04040802L, ++ 0x00002800L, 0x04002800L, 0x00042800L, 0x04042800L, 0x00002802L, ++ 0x04002802L, 0x00042802L, 0x04042802L, 0x00000820L, 0x04000820L, ++ 0x00040820L, 0x04040820L, 0x00000822L, 0x04000822L, 0x00040822L, ++ 0x04040822L, 0x00002820L, 0x04002820L, 0x00042820L, 0x04042820L, ++ 0x00002822L, 0x04002822L, 0x00042822L, 0x04042822L, }}; ++ ++static const uint32_t DES_SPtrans[8][64] = { ++ { // nibble 0 ++ 0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L, 0x02000000L, ++ 0x00080802L, 0x00080002L, 0x02000002L, 0x00080802L, 0x02080800L, ++ 0x02080000L, 0x00000802L, 0x02000802L, 0x02000000L, 0x00000000L, ++ 0x00080002L, 0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L, ++ 0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L, 0x00000002L, ++ 0x00000800L, 0x00080800L, 0x02080002L, 0x00000800L, 0x02000802L, ++ 0x02080002L, 0x00000000L, 0x00000000L, 0x02080802L, 0x02000800L, ++ 0x00080002L, 0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L, ++ 0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L, 0x00080802L, ++ 0x00000002L, 0x02000002L, 0x02080000L, 0x02080802L, 0x00080800L, ++ 0x02080000L, 0x02000802L, 0x02000000L, 0x00000802L, 0x00080002L, ++ 0x00000000L, 0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L, ++ 0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L, }, ++ { // nibble 1 ++ 0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L, 0x40000010L, ++ 0x00008010L, 0x40008000L, 0x00108000L, 0x00008000L, 0x40100010L, ++ 0x00000010L, 0x40008000L, 0x00100010L, 0x40108000L, 0x40100000L, ++ 0x00000010L, 0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L, ++ 0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L, 0x40008010L, ++ 0x00108010L, 0x40108000L, 0x40000010L, 0x40000000L, 0x00100000L, ++ 0x00008010L, 0x40108010L, 0x00100010L, 0x40108000L, 0x40008000L, ++ 0x00108010L, 0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L, ++ 0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L, 0x00008000L, ++ 0x40000000L, 0x00108010L, 0x40008010L, 0x40108000L, 0x00008000L, ++ 0x00000000L, 0x40000010L, 0x00000010L, 0x40108010L, 0x00108000L, ++ 0x40100000L, 0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L, ++ 0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L, }, ++ { // nibble 2 ++ 0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L, 0x00040001L, ++ 0x04000000L, 0x04000101L, 0x00040100L, 0x04000100L, 0x00040000L, ++ 0x04040000L, 0x00000001L, 0x04040101L, 0x00000101L, 0x00000001L, ++ 0x04040001L, 0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L, ++ 0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L, 0x04040001L, ++ 0x04000100L, 0x00040101L, 0x04040000L, 0x00040100L, 0x00000000L, ++ 0x04000000L, 0x00040101L, 0x04040100L, 0x00000100L, 0x00000001L, ++ 0x00040000L, 0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L, ++ 0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L, 0x00040001L, ++ 0x04000000L, 0x04040101L, 0x00000001L, 0x00040101L, 0x04000001L, ++ 0x04000000L, 0x04040101L, 0x00040000L, 0x04000100L, 0x04000101L, ++ 0x00040100L, 0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L, ++ 0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L, }, ++ { // nibble 3 ++ 0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L, 0x00000000L, ++ 0x10400000L, 0x10001008L, 0x00400008L, 0x10401000L, 0x10000008L, ++ 0x10000000L, 0x00001008L, 0x10000008L, 0x00401008L, 0x00400000L, ++ 0x10000000L, 0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L, ++ 0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L, 0x00001008L, ++ 0x00000000L, 0x00400008L, 0x10401000L, 0x10001000L, 0x10400008L, ++ 0x10401008L, 0x00400000L, 0x10400008L, 0x00001008L, 0x00400000L, ++ 0x10000008L, 0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L, ++ 0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L, 0x00000000L, ++ 0x10400008L, 0x10401000L, 0x00001000L, 0x10000000L, 0x10401008L, ++ 0x00401008L, 0x00400000L, 0x10401008L, 0x00000008L, 0x10001000L, ++ 0x00401008L, 0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L, ++ 0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L, }, ++ { // nibble 4 ++ 0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L, 0x08010020L, ++ 0x08000400L, 0x00010420L, 0x08010000L, 0x00010000L, 0x00000020L, ++ 0x08000020L, 0x00010400L, 0x08000420L, 0x08010020L, 0x08010400L, ++ 0x00000000L, 0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L, ++ 0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L, 0x00000020L, ++ 0x08000420L, 0x08010420L, 0x00010020L, 0x08010000L, 0x00000400L, ++ 0x00000420L, 0x08010400L, 0x08010400L, 0x08000420L, 0x00010020L, ++ 0x08010000L, 0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L, ++ 0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L, 0x00010420L, ++ 0x08000000L, 0x00000400L, 0x00010020L, 0x08000420L, 0x00000400L, ++ 0x00000000L, 0x08010420L, 0x08010020L, 0x08010400L, 0x00000420L, ++ 0x00010000L, 0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L, ++ 0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L, }, ++ { // nibble 5 ++ 0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L, 0x00200040L, ++ 0x00002000L, 0x80002040L, 0x00200000L, 0x00002040L, 0x80202040L, ++ 0x00202000L, 0x80000000L, 0x80002000L, 0x80000040L, 0x80200000L, ++ 0x00202040L, 0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L, ++ 0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L, 0x80202040L, ++ 0x80200000L, 0x80000000L, 0x00002040L, 0x00000040L, 0x00202000L, ++ 0x00202040L, 0x80002000L, 0x00002040L, 0x80000000L, 0x80002000L, ++ 0x00202040L, 0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L, ++ 0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L, 0x00200040L, ++ 0x80202040L, 0x00202000L, 0x00000040L, 0x80202040L, 0x00202000L, ++ 0x00200000L, 0x80002040L, 0x80000040L, 0x80200000L, 0x00202040L, ++ 0x00000000L, 0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L, ++ 0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L, }, ++ { // nibble 6 ++ 0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L, 0x01004204L, ++ 0x00004004L, 0x00004200L, 0x00000000L, 0x01000000L, 0x01000204L, ++ 0x00000204L, 0x01004000L, 0x00000004L, 0x01004200L, 0x01004000L, ++ 0x00000204L, 0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L, ++ 0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L, 0x01004004L, ++ 0x00004204L, 0x01004200L, 0x00000004L, 0x00004204L, 0x01004004L, ++ 0x00000200L, 0x01000000L, 0x00004204L, 0x01004000L, 0x01004004L, ++ 0x00000204L, 0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L, ++ 0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L, 0x00000200L, ++ 0x01000004L, 0x00000004L, 0x01000200L, 0x00000000L, 0x01000204L, ++ 0x01000200L, 0x00004200L, 0x00000204L, 0x00004000L, 0x01004204L, ++ 0x01000000L, 0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L, ++ 0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L, }, ++ { // nibble 7 ++ 0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L, 0x20020000L, ++ 0x00800080L, 0x20800000L, 0x20820080L, 0x00000080L, 0x20000000L, ++ 0x00820000L, 0x00020080L, 0x00820080L, 0x20020080L, 0x20000080L, ++ 0x20800000L, 0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L, ++ 0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L, 0x20000000L, ++ 0x00800000L, 0x20020080L, 0x20800080L, 0x00800000L, 0x00020000L, ++ 0x20820000L, 0x00000080L, 0x00800000L, 0x00020000L, 0x20000080L, ++ 0x20820080L, 0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L, ++ 0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L, 0x20820000L, ++ 0x00000080L, 0x00800080L, 0x20020000L, 0x20820080L, 0x00800000L, ++ 0x20800000L, 0x20000080L, 0x00820000L, 0x00020080L, 0x20020080L, ++ 0x20800000L, 0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L, ++ 0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L, }}; ++ ++#define HPERM_OP(a, t, n, m) \ ++ ((t) = ((((a) << (16 - (n))) ^ (a)) & (m)), \ ++ (a) = (a) ^ (t) ^ ((t) >> (16 - (n)))) ++ ++void DES_set_key(const DES_cblock *key, DES_key_schedule *schedule) { ++ static const int shifts2[16] = {0, 0, 1, 1, 1, 1, 1, 1, ++ 0, 1, 1, 1, 1, 1, 1, 0}; ++ uint32_t c, d, t, s, t2; ++ const uint8_t *in; ++ int i; ++ ++ in = key->bytes; ++ ++ c2l(in, c); ++ c2l(in, d); ++ ++ // do PC1 in 47 simple operations :-) ++ // Thanks to John Fletcher (john_fletcher@lccmail.ocf.llnl.gov) ++ // for the inspiration. :-) ++ PERM_OP(d, c, t, 4, 0x0f0f0f0fL); ++ HPERM_OP(c, t, -2, 0xcccc0000L); ++ HPERM_OP(d, t, -2, 0xcccc0000L); ++ PERM_OP(d, c, t, 1, 0x55555555L); ++ PERM_OP(c, d, t, 8, 0x00ff00ffL); ++ PERM_OP(d, c, t, 1, 0x55555555L); ++ d = (((d & 0x000000ffL) << 16L) | (d & 0x0000ff00L) | ++ ((d & 0x00ff0000L) >> 16L) | ((c & 0xf0000000L) >> 4L)); ++ c &= 0x0fffffffL; ++ ++ for (i = 0; i < ITERATIONS; i++) { ++ if (shifts2[i]) { ++ c = ((c >> 2L) | (c << 26L)); ++ d = ((d >> 2L) | (d << 26L)); ++ } else { ++ c = ((c >> 1L) | (c << 27L)); ++ d = ((d >> 1L) | (d << 27L)); ++ } ++ c &= 0x0fffffffL; ++ d &= 0x0fffffffL; ++ // could be a few less shifts but I am to lazy at this ++ // point in time to investigate ++ s = des_skb[0][(c) & 0x3f] | ++ des_skb[1][((c >> 6L) & 0x03) | ((c >> 7L) & 0x3c)] | ++ des_skb[2][((c >> 13L) & 0x0f) | ((c >> 14L) & 0x30)] | ++ des_skb[3][((c >> 20L) & 0x01) | ((c >> 21L) & 0x06) | ++ ((c >> 22L) & 0x38)]; ++ t = des_skb[4][(d) & 0x3f] | ++ des_skb[5][((d >> 7L) & 0x03) | ((d >> 8L) & 0x3c)] | ++ des_skb[6][(d >> 15L) & 0x3f] | ++ des_skb[7][((d >> 21L) & 0x0f) | ((d >> 22L) & 0x30)]; ++ ++ // table contained 0213 4657 ++ t2 = ((t << 16L) | (s & 0x0000ffffL)) & 0xffffffffL; ++ schedule->subkeys[i][0] = CRYPTO_rotr_u32(t2, 30); ++ ++ t2 = ((s >> 16L) | (t & 0xffff0000L)); ++ schedule->subkeys[i][1] = CRYPTO_rotr_u32(t2, 26); ++ } ++} ++ ++static const uint8_t kOddParity[256] = { ++ 1, 1, 2, 2, 4, 4, 7, 7, 8, 8, 11, 11, 13, 13, 14, ++ 14, 16, 16, 19, 19, 21, 21, 22, 22, 25, 25, 26, 26, 28, 28, ++ 31, 31, 32, 32, 35, 35, 37, 37, 38, 38, 41, 41, 42, 42, 44, ++ 44, 47, 47, 49, 49, 50, 50, 52, 52, 55, 55, 56, 56, 59, 59, ++ 61, 61, 62, 62, 64, 64, 67, 67, 69, 69, 70, 70, 73, 73, 74, ++ 74, 76, 76, 79, 79, 81, 81, 82, 82, 84, 84, 87, 87, 88, 88, ++ 91, 91, 93, 93, 94, 94, 97, 97, 98, 98, 100, 100, 103, 103, 104, ++ 104, 107, 107, 109, 109, 110, 110, 112, 112, 115, 115, 117, 117, 118, 118, ++ 121, 121, 122, 122, 124, 124, 127, 127, 128, 128, 131, 131, 133, 133, 134, ++ 134, 137, 137, 138, 138, 140, 140, 143, 143, 145, 145, 146, 146, 148, 148, ++ 151, 151, 152, 152, 155, 155, 157, 157, 158, 158, 161, 161, 162, 162, 164, ++ 164, 167, 167, 168, 168, 171, 171, 173, 173, 174, 174, 176, 176, 179, 179, ++ 181, 181, 182, 182, 185, 185, 186, 186, 188, 188, 191, 191, 193, 193, 194, ++ 194, 196, 196, 199, 199, 200, 200, 203, 203, 205, 205, 206, 206, 208, 208, ++ 211, 211, 213, 213, 214, 214, 217, 217, 218, 218, 220, 220, 223, 223, 224, ++ 224, 227, 227, 229, 229, 230, 230, 233, 233, 234, 234, 236, 236, 239, 239, ++ 241, 241, 242, 242, 244, 244, 247, 247, 248, 248, 251, 251, 253, 253, 254, ++ 254 ++}; ++ ++void DES_set_odd_parity(DES_cblock *key) { ++ unsigned i; ++ ++ for (i = 0; i < DES_KEY_SZ; i++) { ++ key->bytes[i] = kOddParity[key->bytes[i]]; ++ } ++} ++ ++static void DES_encrypt1(uint32_t *data, const DES_key_schedule *ks, int enc) { ++ uint32_t l, r, t, u; ++ ++ r = data[0]; ++ l = data[1]; ++ ++ IP(r, l); ++ // Things have been modified so that the initial rotate is done outside ++ // the loop. This required the DES_SPtrans values in sp.h to be ++ // rotated 1 bit to the right. One perl script later and things have a ++ // 5% speed up on a sparc2. Thanks to Richard Outerbridge ++ // <71755.204@CompuServe.COM> for pointing this out. ++ // clear the top bits on machines with 8byte longs ++ // shift left by 2 ++ r = CRYPTO_rotr_u32(r, 29); ++ l = CRYPTO_rotr_u32(l, 29); ++ ++ // I don't know if it is worth the effort of loop unrolling the ++ // inner loop ++ if (enc) { ++ D_ENCRYPT(ks, l, r, 0); ++ D_ENCRYPT(ks, r, l, 1); ++ D_ENCRYPT(ks, l, r, 2); ++ D_ENCRYPT(ks, r, l, 3); ++ D_ENCRYPT(ks, l, r, 4); ++ D_ENCRYPT(ks, r, l, 5); ++ D_ENCRYPT(ks, l, r, 6); ++ D_ENCRYPT(ks, r, l, 7); ++ D_ENCRYPT(ks, l, r, 8); ++ D_ENCRYPT(ks, r, l, 9); ++ D_ENCRYPT(ks, l, r, 10); ++ D_ENCRYPT(ks, r, l, 11); ++ D_ENCRYPT(ks, l, r, 12); ++ D_ENCRYPT(ks, r, l, 13); ++ D_ENCRYPT(ks, l, r, 14); ++ D_ENCRYPT(ks, r, l, 15); ++ } else { ++ D_ENCRYPT(ks, l, r, 15); ++ D_ENCRYPT(ks, r, l, 14); ++ D_ENCRYPT(ks, l, r, 13); ++ D_ENCRYPT(ks, r, l, 12); ++ D_ENCRYPT(ks, l, r, 11); ++ D_ENCRYPT(ks, r, l, 10); ++ D_ENCRYPT(ks, l, r, 9); ++ D_ENCRYPT(ks, r, l, 8); ++ D_ENCRYPT(ks, l, r, 7); ++ D_ENCRYPT(ks, r, l, 6); ++ D_ENCRYPT(ks, l, r, 5); ++ D_ENCRYPT(ks, r, l, 4); ++ D_ENCRYPT(ks, l, r, 3); ++ D_ENCRYPT(ks, r, l, 2); ++ D_ENCRYPT(ks, l, r, 1); ++ D_ENCRYPT(ks, r, l, 0); ++ } ++ ++ // rotate and clear the top bits on machines with 8byte longs ++ l = CRYPTO_rotr_u32(l, 3); ++ r = CRYPTO_rotr_u32(r, 3); ++ ++ FP(r, l); ++ data[0] = l; ++ data[1] = r; ++} ++ ++static void DES_encrypt2(uint32_t *data, const DES_key_schedule *ks, int enc) { ++ uint32_t l, r, t, u; ++ ++ r = data[0]; ++ l = data[1]; ++ ++ // Things have been modified so that the initial rotate is done outside the ++ // loop. This required the DES_SPtrans values in sp.h to be rotated 1 bit to ++ // the right. One perl script later and things have a 5% speed up on a ++ // sparc2. Thanks to Richard Outerbridge <71755.204@CompuServe.COM> for ++ // pointing this out. ++ // clear the top bits on machines with 8byte longs ++ r = CRYPTO_rotr_u32(r, 29); ++ l = CRYPTO_rotr_u32(l, 29); ++ ++ // I don't know if it is worth the effort of loop unrolling the ++ // inner loop ++ if (enc) { ++ D_ENCRYPT(ks, l, r, 0); ++ D_ENCRYPT(ks, r, l, 1); ++ D_ENCRYPT(ks, l, r, 2); ++ D_ENCRYPT(ks, r, l, 3); ++ D_ENCRYPT(ks, l, r, 4); ++ D_ENCRYPT(ks, r, l, 5); ++ D_ENCRYPT(ks, l, r, 6); ++ D_ENCRYPT(ks, r, l, 7); ++ D_ENCRYPT(ks, l, r, 8); ++ D_ENCRYPT(ks, r, l, 9); ++ D_ENCRYPT(ks, l, r, 10); ++ D_ENCRYPT(ks, r, l, 11); ++ D_ENCRYPT(ks, l, r, 12); ++ D_ENCRYPT(ks, r, l, 13); ++ D_ENCRYPT(ks, l, r, 14); ++ D_ENCRYPT(ks, r, l, 15); ++ } else { ++ D_ENCRYPT(ks, l, r, 15); ++ D_ENCRYPT(ks, r, l, 14); ++ D_ENCRYPT(ks, l, r, 13); ++ D_ENCRYPT(ks, r, l, 12); ++ D_ENCRYPT(ks, l, r, 11); ++ D_ENCRYPT(ks, r, l, 10); ++ D_ENCRYPT(ks, l, r, 9); ++ D_ENCRYPT(ks, r, l, 8); ++ D_ENCRYPT(ks, l, r, 7); ++ D_ENCRYPT(ks, r, l, 6); ++ D_ENCRYPT(ks, l, r, 5); ++ D_ENCRYPT(ks, r, l, 4); ++ D_ENCRYPT(ks, l, r, 3); ++ D_ENCRYPT(ks, r, l, 2); ++ D_ENCRYPT(ks, l, r, 1); ++ D_ENCRYPT(ks, r, l, 0); ++ } ++ // rotate and clear the top bits on machines with 8byte longs ++ data[0] = CRYPTO_rotr_u32(l, 3); ++ data[1] = CRYPTO_rotr_u32(r, 3); ++} ++ ++void DES_encrypt3(uint32_t *data, const DES_key_schedule *ks1, ++ const DES_key_schedule *ks2, const DES_key_schedule *ks3) { ++ uint32_t l, r; ++ ++ l = data[0]; ++ r = data[1]; ++ IP(l, r); ++ data[0] = l; ++ data[1] = r; ++ DES_encrypt2((uint32_t *)data, ks1, DES_ENCRYPT); ++ DES_encrypt2((uint32_t *)data, ks2, DES_DECRYPT); ++ DES_encrypt2((uint32_t *)data, ks3, DES_ENCRYPT); ++ l = data[0]; ++ r = data[1]; ++ FP(r, l); ++ data[0] = l; ++ data[1] = r; ++} ++ ++void DES_decrypt3(uint32_t *data, const DES_key_schedule *ks1, ++ const DES_key_schedule *ks2, const DES_key_schedule *ks3) { ++ uint32_t l, r; ++ ++ l = data[0]; ++ r = data[1]; ++ IP(l, r); ++ data[0] = l; ++ data[1] = r; ++ DES_encrypt2((uint32_t *)data, ks3, DES_DECRYPT); ++ DES_encrypt2((uint32_t *)data, ks2, DES_ENCRYPT); ++ DES_encrypt2((uint32_t *)data, ks1, DES_DECRYPT); ++ l = data[0]; ++ r = data[1]; ++ FP(r, l); ++ data[0] = l; ++ data[1] = r; ++} ++ ++void DES_ecb_encrypt(const DES_cblock *in_block, DES_cblock *out_block, ++ const DES_key_schedule *schedule, int is_encrypt) { ++ uint32_t l; ++ uint32_t ll[2]; ++ const uint8_t *in = in_block->bytes; ++ uint8_t *out = out_block->bytes; ++ ++ c2l(in, l); ++ ll[0] = l; ++ c2l(in, l); ++ ll[1] = l; ++ DES_encrypt1(ll, schedule, is_encrypt); ++ l = ll[0]; ++ l2c(l, out); ++ l = ll[1]; ++ l2c(l, out); ++ ll[0] = ll[1] = 0; ++} ++ ++void DES_ncbc_encrypt(const uint8_t *in, uint8_t *out, size_t len, ++ const DES_key_schedule *schedule, DES_cblock *ivec, ++ int enc) { ++ uint32_t tin0, tin1; ++ uint32_t tout0, tout1, xor0, xor1; ++ uint32_t tin[2]; ++ unsigned char *iv; ++ ++ iv = ivec->bytes; ++ ++ if (enc) { ++ c2l(iv, tout0); ++ c2l(iv, tout1); ++ for (; len >= 8; len -= 8) { ++ c2l(in, tin0); ++ c2l(in, tin1); ++ tin0 ^= tout0; ++ tin[0] = tin0; ++ tin1 ^= tout1; ++ tin[1] = tin1; ++ DES_encrypt1((uint32_t *)tin, schedule, DES_ENCRYPT); ++ tout0 = tin[0]; ++ l2c(tout0, out); ++ tout1 = tin[1]; ++ l2c(tout1, out); ++ } ++ if (len != 0) { ++ c2ln(in, tin0, tin1, len); ++ tin0 ^= tout0; ++ tin[0] = tin0; ++ tin1 ^= tout1; ++ tin[1] = tin1; ++ DES_encrypt1((uint32_t *)tin, schedule, DES_ENCRYPT); ++ tout0 = tin[0]; ++ l2c(tout0, out); ++ tout1 = tin[1]; ++ l2c(tout1, out); ++ } ++ iv = ivec->bytes; ++ l2c(tout0, iv); ++ l2c(tout1, iv); ++ } else { ++ c2l(iv, xor0); ++ c2l(iv, xor1); ++ for (; len >= 8; len -= 8) { ++ c2l(in, tin0); ++ tin[0] = tin0; ++ c2l(in, tin1); ++ tin[1] = tin1; ++ DES_encrypt1((uint32_t *)tin, schedule, DES_DECRYPT); ++ tout0 = tin[0] ^ xor0; ++ tout1 = tin[1] ^ xor1; ++ l2c(tout0, out); ++ l2c(tout1, out); ++ xor0 = tin0; ++ xor1 = tin1; ++ } ++ if (len != 0) { ++ c2l(in, tin0); ++ tin[0] = tin0; ++ c2l(in, tin1); ++ tin[1] = tin1; ++ DES_encrypt1((uint32_t *)tin, schedule, DES_DECRYPT); ++ tout0 = tin[0] ^ xor0; ++ tout1 = tin[1] ^ xor1; ++ l2cn(tout0, tout1, out, len); ++ xor0 = tin0; ++ xor1 = tin1; ++ } ++ iv = ivec->bytes; ++ l2c(xor0, iv); ++ l2c(xor1, iv); ++ } ++ tin[0] = tin[1] = 0; ++} ++ ++void DES_ecb3_encrypt(const DES_cblock *input, DES_cblock *output, ++ const DES_key_schedule *ks1, const DES_key_schedule *ks2, ++ const DES_key_schedule *ks3, int enc) { ++ uint32_t l0, l1; ++ uint32_t ll[2]; ++ const uint8_t *in = input->bytes; ++ uint8_t *out = output->bytes; ++ ++ c2l(in, l0); ++ c2l(in, l1); ++ ll[0] = l0; ++ ll[1] = l1; ++ if (enc) { ++ DES_encrypt3(ll, ks1, ks2, ks3); ++ } else { ++ DES_decrypt3(ll, ks1, ks2, ks3); ++ } ++ l0 = ll[0]; ++ l1 = ll[1]; ++ l2c(l0, out); ++ l2c(l1, out); ++} ++ ++void DES_ede3_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len, ++ const DES_key_schedule *ks1, ++ const DES_key_schedule *ks2, ++ const DES_key_schedule *ks3, DES_cblock *ivec, ++ int enc) { ++ uint32_t tin0, tin1; ++ uint32_t tout0, tout1, xor0, xor1; ++ uint32_t tin[2]; ++ uint8_t *iv; ++ ++ iv = ivec->bytes; ++ ++ if (enc) { ++ c2l(iv, tout0); ++ c2l(iv, tout1); ++ for (; len >= 8; len -= 8) { ++ c2l(in, tin0); ++ c2l(in, tin1); ++ tin0 ^= tout0; ++ tin1 ^= tout1; ++ ++ tin[0] = tin0; ++ tin[1] = tin1; ++ DES_encrypt3((uint32_t *)tin, ks1, ks2, ks3); ++ tout0 = tin[0]; ++ tout1 = tin[1]; ++ ++ l2c(tout0, out); ++ l2c(tout1, out); ++ } ++ if (len != 0) { ++ c2ln(in, tin0, tin1, len); ++ tin0 ^= tout0; ++ tin1 ^= tout1; ++ ++ tin[0] = tin0; ++ tin[1] = tin1; ++ DES_encrypt3((uint32_t *)tin, ks1, ks2, ks3); ++ tout0 = tin[0]; ++ tout1 = tin[1]; ++ ++ l2c(tout0, out); ++ l2c(tout1, out); ++ } ++ iv = ivec->bytes; ++ l2c(tout0, iv); ++ l2c(tout1, iv); ++ } else { ++ uint32_t t0, t1; ++ ++ c2l(iv, xor0); ++ c2l(iv, xor1); ++ for (; len >= 8; len -= 8) { ++ c2l(in, tin0); ++ c2l(in, tin1); ++ ++ t0 = tin0; ++ t1 = tin1; ++ ++ tin[0] = tin0; ++ tin[1] = tin1; ++ DES_decrypt3((uint32_t *)tin, ks1, ks2, ks3); ++ tout0 = tin[0]; ++ tout1 = tin[1]; ++ ++ tout0 ^= xor0; ++ tout1 ^= xor1; ++ l2c(tout0, out); ++ l2c(tout1, out); ++ xor0 = t0; ++ xor1 = t1; ++ } ++ if (len != 0) { ++ c2l(in, tin0); ++ c2l(in, tin1); ++ ++ t0 = tin0; ++ t1 = tin1; ++ ++ tin[0] = tin0; ++ tin[1] = tin1; ++ DES_decrypt3((uint32_t *)tin, ks1, ks2, ks3); ++ tout0 = tin[0]; ++ tout1 = tin[1]; ++ ++ tout0 ^= xor0; ++ tout1 ^= xor1; ++ l2cn(tout0, tout1, out, len); ++ xor0 = t0; ++ xor1 = t1; ++ } ++ ++ iv = ivec->bytes; ++ l2c(xor0, iv); ++ l2c(xor1, iv); ++ } ++ ++ tin[0] = tin[1] = 0; ++} ++ ++void DES_ede2_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len, ++ const DES_key_schedule *ks1, ++ const DES_key_schedule *ks2, ++ DES_cblock *ivec, ++ int enc) { ++ DES_ede3_cbc_encrypt(in, out, len, ks1, ks2, ks1, ivec, enc); ++} ++ ++ ++// Deprecated functions. ++ ++void DES_set_key_unchecked(const DES_cblock *key, DES_key_schedule *schedule) { ++ DES_set_key(key, schedule); ++} ++ ++#undef HPERM_OP ++#undef c2l ++#undef l2c ++#undef c2ln ++#undef l2cn ++#undef PERM_OP ++#undef IP ++#undef FP ++#undef LOAD_DATA ++#undef D_ENCRYPT ++#undef ITERATIONS ++#undef HALF_ITERATIONS +diff --git a/src/crypto/des/internal.h b/src/crypto/des/internal.h +new file mode 100644 +index 0000000..2124fd5 +--- /dev/null ++++ b/src/crypto/des/internal.h +@@ -0,0 +1,238 @@ ++/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) ++ * All rights reserved. ++ * ++ * This package is an SSL implementation written ++ * by Eric Young (eay@cryptsoft.com). ++ * The implementation was written so as to conform with Netscapes SSL. ++ * ++ * This library is free for commercial and non-commercial use as long as ++ * the following conditions are aheared to. The following conditions ++ * apply to all code found in this distribution, be it the RC4, RSA, ++ * lhash, DES, etc., code; not just the SSL code. The SSL documentation ++ * included with this distribution is covered by the same copyright terms ++ * except that the holder is Tim Hudson (tjh@cryptsoft.com). ++ * ++ * Copyright remains Eric Young's, and as such any Copyright notices in ++ * the code are not to be removed. ++ * If this package is used in a product, Eric Young should be given attribution ++ * as the author of the parts of the library used. ++ * This can be in the form of a textual message at program startup or ++ * in documentation (online or textual) provided with the package. ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * 1. Redistributions of source code must retain the copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. All advertising materials mentioning features or use of this software ++ * must display the following acknowledgement: ++ * "This product includes cryptographic software written by ++ * Eric Young (eay@cryptsoft.com)" ++ * The word 'cryptographic' can be left out if the rouines from the library ++ * being used are not cryptographic related :-). ++ * 4. If you include any Windows specific code (or a derivative thereof) from ++ * the apps directory (application code) you must include an acknowledgement: ++ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" ++ * ++ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND ++ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ++ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ++ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ++ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS ++ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) ++ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT ++ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY ++ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ++ * SUCH DAMAGE. ++ * ++ * The licence and distribution terms for any publically available version or ++ * derivative of this code cannot be changed. i.e. this code cannot simply be ++ * copied and put under another distribution licence ++ * [including the GNU Public Licence.] */ ++ ++#ifndef OPENSSL_HEADER_DES_INTERNAL_H ++#define OPENSSL_HEADER_DES_INTERNAL_H ++ ++#include ++ ++#include "../internal.h" ++ ++#if defined(__cplusplus) ++extern "C" { ++#endif ++ ++ ++#define c2l(c, l) \ ++ do { \ ++ (l) = ((uint32_t)(*((c)++))); \ ++ (l) |= ((uint32_t)(*((c)++))) << 8L; \ ++ (l) |= ((uint32_t)(*((c)++))) << 16L; \ ++ (l) |= ((uint32_t)(*((c)++))) << 24L; \ ++ } while (0) ++ ++#define l2c(l, c) \ ++ do { \ ++ *((c)++) = (unsigned char)(((l)) & 0xff); \ ++ *((c)++) = (unsigned char)(((l) >> 8L) & 0xff); \ ++ *((c)++) = (unsigned char)(((l) >> 16L) & 0xff); \ ++ *((c)++) = (unsigned char)(((l) >> 24L) & 0xff); \ ++ } while (0) ++ ++// NOTE - c is not incremented as per c2l ++#define c2ln(c, l1, l2, n) \ ++ do { \ ++ (c) += (n); \ ++ (l1) = (l2) = 0; \ ++ switch (n) { \ ++ case 8: \ ++ (l2) = ((uint32_t)(*(--(c)))) << 24L; \ ++ OPENSSL_FALLTHROUGH; \ ++ case 7: \ ++ (l2) |= ((uint32_t)(*(--(c)))) << 16L; \ ++ OPENSSL_FALLTHROUGH; \ ++ case 6: \ ++ (l2) |= ((uint32_t)(*(--(c)))) << 8L; \ ++ OPENSSL_FALLTHROUGH; \ ++ case 5: \ ++ (l2) |= ((uint32_t)(*(--(c)))); \ ++ OPENSSL_FALLTHROUGH; \ ++ case 4: \ ++ (l1) = ((uint32_t)(*(--(c)))) << 24L; \ ++ OPENSSL_FALLTHROUGH; \ ++ case 3: \ ++ (l1) |= ((uint32_t)(*(--(c)))) << 16L; \ ++ OPENSSL_FALLTHROUGH; \ ++ case 2: \ ++ (l1) |= ((uint32_t)(*(--(c)))) << 8L; \ ++ OPENSSL_FALLTHROUGH; \ ++ case 1: \ ++ (l1) |= ((uint32_t)(*(--(c)))); \ ++ } \ ++ } while (0) ++ ++// NOTE - c is not incremented as per l2c ++#define l2cn(l1, l2, c, n) \ ++ do { \ ++ (c) += (n); \ ++ switch (n) { \ ++ case 8: \ ++ *(--(c)) = (unsigned char)(((l2) >> 24L) & 0xff); \ ++ OPENSSL_FALLTHROUGH; \ ++ case 7: \ ++ *(--(c)) = (unsigned char)(((l2) >> 16L) & 0xff); \ ++ OPENSSL_FALLTHROUGH; \ ++ case 6: \ ++ *(--(c)) = (unsigned char)(((l2) >> 8L) & 0xff); \ ++ OPENSSL_FALLTHROUGH; \ ++ case 5: \ ++ *(--(c)) = (unsigned char)(((l2)) & 0xff); \ ++ OPENSSL_FALLTHROUGH; \ ++ case 4: \ ++ *(--(c)) = (unsigned char)(((l1) >> 24L) & 0xff); \ ++ OPENSSL_FALLTHROUGH; \ ++ case 3: \ ++ *(--(c)) = (unsigned char)(((l1) >> 16L) & 0xff); \ ++ OPENSSL_FALLTHROUGH; \ ++ case 2: \ ++ *(--(c)) = (unsigned char)(((l1) >> 8L) & 0xff); \ ++ OPENSSL_FALLTHROUGH; \ ++ case 1: \ ++ *(--(c)) = (unsigned char)(((l1)) & 0xff); \ ++ } \ ++ } while (0) ++ ++/* IP and FP ++ * The problem is more of a geometric problem that random bit fiddling. ++ 0 1 2 3 4 5 6 7 62 54 46 38 30 22 14 6 ++ 8 9 10 11 12 13 14 15 60 52 44 36 28 20 12 4 ++16 17 18 19 20 21 22 23 58 50 42 34 26 18 10 2 ++24 25 26 27 28 29 30 31 to 56 48 40 32 24 16 8 0 ++ ++32 33 34 35 36 37 38 39 63 55 47 39 31 23 15 7 ++40 41 42 43 44 45 46 47 61 53 45 37 29 21 13 5 ++48 49 50 51 52 53 54 55 59 51 43 35 27 19 11 3 ++56 57 58 59 60 61 62 63 57 49 41 33 25 17 9 1 ++ ++The output has been subject to swaps of the form ++0 1 -> 3 1 but the odd and even bits have been put into ++2 3 2 0 ++different words. The main trick is to remember that ++t=((l>>size)^r)&(mask); ++r^=t; ++l^=(t<> (n)) ^ (b)) & (m)); \ ++ (b) ^= (t); \ ++ (a) ^= ((t) << (n)); \ ++ } while (0) ++ ++#define IP(l, r) \ ++ do { \ ++ uint32_t tt; \ ++ PERM_OP(r, l, tt, 4, 0x0f0f0f0fL); \ ++ PERM_OP(l, r, tt, 16, 0x0000ffffL); \ ++ PERM_OP(r, l, tt, 2, 0x33333333L); \ ++ PERM_OP(l, r, tt, 8, 0x00ff00ffL); \ ++ PERM_OP(r, l, tt, 1, 0x55555555L); \ ++ } while (0) ++ ++#define FP(l, r) \ ++ do { \ ++ uint32_t tt; \ ++ PERM_OP(l, r, tt, 1, 0x55555555L); \ ++ PERM_OP(r, l, tt, 8, 0x00ff00ffL); \ ++ PERM_OP(l, r, tt, 2, 0x33333333L); \ ++ PERM_OP(r, l, tt, 16, 0x0000ffffL); \ ++ PERM_OP(l, r, tt, 4, 0x0f0f0f0fL); \ ++ } while (0) ++ ++#define LOAD_DATA(ks, R, S, u, t, E0, E1) \ ++ do { \ ++ (u) = (R) ^ (ks)->subkeys[S][0]; \ ++ (t) = (R) ^ (ks)->subkeys[S][1]; \ ++ } while (0) ++ ++#define D_ENCRYPT(ks, LL, R, S) \ ++ do { \ ++ LOAD_DATA(ks, R, S, u, t, E0, E1); \ ++ t = CRYPTO_rotr_u32(t, 4); \ ++ (LL) ^= \ ++ DES_SPtrans[0][(u >> 2L) & 0x3f] ^ DES_SPtrans[2][(u >> 10L) & 0x3f] ^ \ ++ DES_SPtrans[4][(u >> 18L) & 0x3f] ^ \ ++ DES_SPtrans[6][(u >> 26L) & 0x3f] ^ DES_SPtrans[1][(t >> 2L) & 0x3f] ^ \ ++ DES_SPtrans[3][(t >> 10L) & 0x3f] ^ \ ++ DES_SPtrans[5][(t >> 18L) & 0x3f] ^ DES_SPtrans[7][(t >> 26L) & 0x3f]; \ ++ } while (0) ++ ++#define ITERATIONS 16 ++#define HALF_ITERATIONS 8 ++ ++ ++#if defined(__cplusplus) ++} // extern C ++#endif ++ ++#endif // OPENSSL_HEADER_DES_INTERNAL_H +diff --git a/src/crypto/err/ssl.errordata b/src/crypto/err/ssl.errordata +index 6879134..4205402 100644 +--- a/src/crypto/err/ssl.errordata ++++ b/src/crypto/err/ssl.errordata +@@ -90,6 +90,7 @@ SSL,301,INVALID_DELEGATED_CREDENTIAL + SSL,318,INVALID_ECH_CONFIG_LIST + SSL,317,INVALID_ECH_PUBLIC_NAME + SSL,159,INVALID_MESSAGE ++SSL,320,INVALID_OUTER_EXTENSION + SSL,251,INVALID_OUTER_RECORD_TYPE + SSL,269,INVALID_SCT_LIST + SSL,295,INVALID_SIGNATURE_ALGORITHM +@@ -133,7 +134,6 @@ SSL,289,OCSP_CB_ERROR + SSL,187,OLD_SESSION_CIPHER_NOT_RETURNED + SSL,268,OLD_SESSION_PRF_HASH_MISMATCH + SSL,188,OLD_SESSION_VERSION_NOT_RETURNED +-SSL,320,OUTER_EXTENSION_NOT_FOUND + SSL,189,OUTPUT_ALIASES_INPUT + SSL,190,PARSE_TLSEXT + SSL,191,PATH_TOO_LONG +diff --git a/src/crypto/fipsmodule/FIPS.md b/src/crypto/fipsmodule/FIPS.md +index d3b3890..bc5708f 100644 +--- a/src/crypto/fipsmodule/FIPS.md ++++ b/src/crypto/fipsmodule/FIPS.md +@@ -12,30 +12,21 @@ BoringCrypto has undergone the following validations: + 1. 2018-07-30: certificate [#3318](https://csrc.nist.gov/Projects/Cryptographic-Module-Validation-Program/Certificate/3318), [security policy](/crypto/fipsmodule/policydocs/BoringCrypto-Security-Policy-20180730.docx) (in docx format). + 1. 2019-08-08: certificate [#3678](https://csrc.nist.gov/Projects/Cryptographic-Module-Validation-Program/Certificate/3678), [security policy](/crypto/fipsmodule/policydocs/BoringCrypto-Security-Policy-20190808.docx) (in docx format). + 1. 2019-10-20: certificate [#3753](https://csrc.nist.gov/Projects/Cryptographic-Module-Validation-Program/Certificate/3753), [security policy](/crypto/fipsmodule/policydocs/BoringCrypto-Android-Security-Policy-20191020.docx) (in docx format). ++1. 2021-01-28: certificate [#4156](https://csrc.nist.gov/Projects/Cryptographic-Module-Validation-Program/Certificate/4156), [security policy](/crypto/fipsmodule/policydocs/BoringCrypto-Android-Security-Policy-20210319.docx) (in docx format). + +-## Running CAVP tests ++## Running ACVP tests + +-CAVP results are calculated by `util/fipstools/cavp`, but that binary is almost always run by `util/fipstools/run_cavp.go`. The latter knows the set of tests to be processed and the flags needed to configure `cavp` for each one. It must be run from the top of a CAVP directory and needs the following options: ++See `util/fipstools/acvp/ACVP.md` for details of how ACVP testing is done. + +-1. `-oracle-bin`: points to the location of `util/fipstools/cavp` +-2. `-no-fax`: this is needed to suppress checking of the FAX files, which are only included in sample sets. ++## Breaking known-answer and continuous tests + +-## Breaking power-on and continuous tests ++Each known-answer test (KAT) uses a unique, random input value. `util/fipstools/break-kat.go` contains a listing of those values and can be used to corrupt a given test in a binary. Since changes to the KAT input values will invalidate the integrity test, `BORINGSSL_FIPS_BREAK_TESTS` can be defined in `fips_break_tests.h` to disable it for the purposes of testing. + +-In order to demonstrate failures of the various FIPS 140 tests, BoringSSL can be built in ways that will trigger such failures. This is controlled by passing `-DFIPS_BREAK_TEST=`(test to break) to CMake, where the following tests can be specified: ++Some FIPS tests cannot be broken by replacing a known string in the binary. For those, when `BORINGSSL_FIPS_BREAK_TESTS` is defined, the environment variable `BORINGSSL_FIPS_BREAK_TEST` can be set to one of a number of values in order to break the corresponding test: + +-1. AES\_CBC +-1. AES\_GCM +-1. DES +-1. SHA\_1 +-1. SHA\_256 +-1. SHA\_512 +-1. RSA\_SIG +-1. ECDSA\_SIG +-1. DRBG +-1. RSA\_PWCT +-1. ECDSA\_PWCT +-1. TLS\_KDF ++1. `RSA_PWCT` ++1. `ECDSA_PWCT` ++1. `CRNG` + + ## Breaking the integrity test + +@@ -61,12 +52,6 @@ There is a second interface to the RNG which allows the caller to supply bytes t + + FIPS requires that RNG state be zeroed when the process exits. In order to implement this, all per-thread RNG states are tracked in a linked list and a destructor function is included which clears them. In order for this to be safe in the presence of threads, a lock is used to stop all other threads from using the RNG once this process has begun. Thus the main thread exiting may cause other threads to deadlock, and drawing on entropy in a destructor function may also deadlock. + +-## Self-test optimisation +- +-On Android, the self-tests are optimised in line with [IG](https://csrc.nist.gov/csrc/media/projects/cryptographic-module-validation-program/documents/fips140-2/fips1402ig.pdf) section 9.11. The module will always perform the integrity test at power-on, but the self-tests will test for the presence of a file named after the hex encoded, HMAC-SHA-256 hash of the module in `/dev/boringssl/selftest/`. If such a file is found then the self-tests are skipped. Otherwise, after the self-tests complete successfully, that file will be written. Any I/O errors are ignored and, if they occur when testing for the presence of the file, the module acts as if it's not present. +- +-It is intended that a `tmpfs` be mounted at that location in order to skip running the self tests for every process once they have already passed in a given instance of the operating system. +- + ## Integrity Test + + FIPS-140 mandates that a module calculate an HMAC of its own code in a constructor function and compare the result to a known-good value. Typical code produced by a C compiler includes large numbers of relocations: places in the machine code where the linker needs to resolve and inject the final value of a symbolic expression. These relocations mean that the bytes that make up any specific bit of code generally aren't known until the final link has completed. +diff --git a/src/crypto/fipsmodule/aes/internal.h b/src/crypto/fipsmodule/aes/internal.h +index 9f7dd47..0685bc4 100644 +--- a/src/crypto/fipsmodule/aes/internal.h ++++ b/src/crypto/fipsmodule/aes/internal.h +@@ -30,18 +30,14 @@ extern "C" { + #define HWAES + #define HWAES_ECB + +-OPENSSL_INLINE int hwaes_capable(void) { +- return (OPENSSL_ia32cap_get()[1] & (1 << (57 - 32))) != 0; +-} ++OPENSSL_INLINE int hwaes_capable(void) { return CRYPTO_is_AESNI_capable(); } + + #define VPAES + #if defined(OPENSSL_X86_64) + #define VPAES_CTR32 + #endif + #define VPAES_CBC +-OPENSSL_INLINE int vpaes_capable(void) { +- return (OPENSSL_ia32cap_get()[1] & (1 << (41 - 32))) != 0; +-} ++OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_SSSE3_capable(); } + + #elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) + #define HWAES +diff --git a/src/crypto/fipsmodule/bcm.c b/src/crypto/fipsmodule/bcm.c +index 639235e..1219bc7 100644 +--- a/src/crypto/fipsmodule/bcm.c ++++ b/src/crypto/fipsmodule/bcm.c +@@ -58,8 +58,6 @@ + #include "cipher/aead.c" + #include "cipher/cipher.c" + #include "cipher/e_aes.c" +-#include "cipher/e_des.c" +-#include "des/des.c" + #include "dh/check.c" + #include "dh/dh.c" + #include "digest/digest.c" +@@ -192,16 +190,23 @@ BORINGSSL_bcm_power_on_self_test(void) { + #endif + + assert_within(rodata_start, kPrimes, rodata_end); +- assert_within(rodata_start, des_skb, rodata_end); + assert_within(rodata_start, kP256Params, rodata_end); + assert_within(rodata_start, kPKCS1SigPrefixes, rodata_end); + + #if defined(OPENSSL_AARCH64) || defined(OPENSSL_ANDROID) + uint8_t result[SHA256_DIGEST_LENGTH]; + const EVP_MD *const kHashFunction = EVP_sha256(); ++ if (!boringssl_self_test_sha256() || ++ !boringssl_self_test_hmac_sha256()) { ++ goto err; ++ } + #else + uint8_t result[SHA512_DIGEST_LENGTH]; + const EVP_MD *const kHashFunction = EVP_sha512(); ++ if (!boringssl_self_test_sha512() || ++ !boringssl_self_test_hmac_sha256()) { ++ goto err; ++ } + #endif + + static const uint8_t kHMACKey[64] = {0}; +@@ -238,19 +243,17 @@ BORINGSSL_bcm_power_on_self_test(void) { + const uint8_t *expected = BORINGSSL_bcm_text_hash; + + if (!check_test(expected, result, sizeof(result), "FIPS integrity test")) { ++#if !defined(BORINGSSL_FIPS_BREAK_TESTS) + goto err; ++#endif + } + + OPENSSL_cleanse(result, sizeof(result)); // FIPS 140-3, AS05.10. ++#endif // OPENSSL_ASAN + +- if (!boringssl_fips_self_test(BORINGSSL_bcm_text_hash, sizeof(result))) { ++ if (!boringssl_self_test_startup()) { + goto err; + } +-#else +- if (!BORINGSSL_self_test()) { +- goto err; +- } +-#endif // OPENSSL_ASAN + + return; + +diff --git a/src/crypto/fipsmodule/bn/rsaz_exp.h b/src/crypto/fipsmodule/bn/rsaz_exp.h +index 2f0c2c0..104bb7a 100644 +--- a/src/crypto/fipsmodule/bn/rsaz_exp.h ++++ b/src/crypto/fipsmodule/bn/rsaz_exp.h +@@ -41,18 +41,17 @@ void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16], const BN_ULONG base_norm[16], + BN_ULONG storage_words[MOD_EXP_CTIME_STORAGE_LEN]); + + OPENSSL_INLINE int rsaz_avx2_capable(void) { +- const uint32_t *cap = OPENSSL_ia32cap_get(); +- return (cap[2] & (1 << 5)) != 0; // AVX2 ++ return CRYPTO_is_AVX2_capable(); + } + + OPENSSL_INLINE int rsaz_avx2_preferred(void) { +- const uint32_t *cap = OPENSSL_ia32cap_get(); +- static const uint32_t kBMI2AndADX = (1 << 8) | (1 << 19); +- if ((cap[2] & kBMI2AndADX) == kBMI2AndADX) { +- // If BMI2 and ADX are available, x86_64-mont5.pl is faster. ++ if (CRYPTO_is_BMI1_capable() && CRYPTO_is_BMI2_capable() && ++ CRYPTO_is_ADX_capable()) { ++ // If BMI1, BMI2, and ADX are available, x86_64-mont5.pl is faster. See the ++ // .Lmulx4x_enter and .Lpowerx5_enter branches. + return 0; + } +- return (cap[2] & (1 << 5)) != 0; // AVX2 ++ return CRYPTO_is_AVX2_capable(); + } + + +diff --git a/src/crypto/fipsmodule/cipher/e_des.c b/src/crypto/fipsmodule/cipher/e_des.c +deleted file mode 100644 +index e77363b..0000000 +--- a/src/crypto/fipsmodule/cipher/e_des.c ++++ /dev/null +@@ -1,237 +0,0 @@ +-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) +- * All rights reserved. +- * +- * This package is an SSL implementation written +- * by Eric Young (eay@cryptsoft.com). +- * The implementation was written so as to conform with Netscapes SSL. +- * +- * This library is free for commercial and non-commercial use as long as +- * the following conditions are aheared to. The following conditions +- * apply to all code found in this distribution, be it the RC4, RSA, +- * lhash, DES, etc., code; not just the SSL code. The SSL documentation +- * included with this distribution is covered by the same copyright terms +- * except that the holder is Tim Hudson (tjh@cryptsoft.com). +- * +- * Copyright remains Eric Young's, and as such any Copyright notices in +- * the code are not to be removed. +- * If this package is used in a product, Eric Young should be given attribution +- * as the author of the parts of the library used. +- * This can be in the form of a textual message at program startup or +- * in documentation (online or textual) provided with the package. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * 1. Redistributions of source code must retain the copyright +- * notice, this list of conditions and the following disclaimer. +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution. +- * 3. All advertising materials mentioning features or use of this software +- * must display the following acknowledgement: +- * "This product includes cryptographic software written by +- * Eric Young (eay@cryptsoft.com)" +- * The word 'cryptographic' can be left out if the rouines from the library +- * being used are not cryptographic related :-). +- * 4. If you include any Windows specific code (or a derivative thereof) from +- * the apps directory (application code) you must include an acknowledgement: +- * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" +- * +- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND +- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +- * SUCH DAMAGE. +- * +- * The licence and distribution terms for any publically available version or +- * derivative of this code cannot be changed. i.e. this code cannot simply be +- * copied and put under another distribution licence +- * [including the GNU Public Licence.] */ +- +-#include +-#include +-#include +- +-#include "internal.h" +-#include "../delocate.h" +- +- +-typedef struct { +- union { +- double align; +- DES_key_schedule ks; +- } ks; +-} EVP_DES_KEY; +- +-static int des_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key, +- const uint8_t *iv, int enc) { +- DES_cblock *deskey = (DES_cblock *)key; +- EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; +- +- DES_set_key(deskey, &dat->ks.ks); +- return 1; +-} +- +-static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in, +- size_t in_len) { +- EVP_DES_KEY *dat = (EVP_DES_KEY *)ctx->cipher_data; +- +- DES_ncbc_encrypt(in, out, in_len, &dat->ks.ks, (DES_cblock *)ctx->iv, +- ctx->encrypt); +- +- return 1; +-} +- +-DEFINE_METHOD_FUNCTION(EVP_CIPHER, EVP_des_cbc) { +- memset(out, 0, sizeof(EVP_CIPHER)); +- out->nid = NID_des_cbc; +- out->block_size = 8; +- out->key_len = 8; +- out->iv_len = 8; +- out->ctx_size = sizeof(EVP_DES_KEY); +- out->flags = EVP_CIPH_CBC_MODE; +- out->init = des_init_key; +- out->cipher = des_cbc_cipher; +-} +- +-static int des_ecb_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in, +- size_t in_len) { +- if (in_len < ctx->cipher->block_size) { +- return 1; +- } +- in_len -= ctx->cipher->block_size; +- +- EVP_DES_KEY *dat = (EVP_DES_KEY *) ctx->cipher_data; +- for (size_t i = 0; i <= in_len; i += ctx->cipher->block_size) { +- DES_ecb_encrypt((DES_cblock *) (in + i), (DES_cblock *) (out + i), +- &dat->ks.ks, ctx->encrypt); +- } +- return 1; +-} +- +-DEFINE_METHOD_FUNCTION(EVP_CIPHER, EVP_des_ecb) { +- memset(out, 0, sizeof(EVP_CIPHER)); +- out->nid = NID_des_ecb; +- out->block_size = 8; +- out->key_len = 8; +- out->iv_len = 0; +- out->ctx_size = sizeof(EVP_DES_KEY); +- out->flags = EVP_CIPH_ECB_MODE; +- out->init = des_init_key; +- out->cipher = des_ecb_cipher; +-} +- +-typedef struct { +- union { +- double align; +- DES_key_schedule ks[3]; +- } ks; +-} DES_EDE_KEY; +- +-static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key, +- const uint8_t *iv, int enc) { +- DES_cblock *deskey = (DES_cblock *)key; +- DES_EDE_KEY *dat = (DES_EDE_KEY*) ctx->cipher_data; +- +- DES_set_key(&deskey[0], &dat->ks.ks[0]); +- DES_set_key(&deskey[1], &dat->ks.ks[1]); +- DES_set_key(&deskey[2], &dat->ks.ks[2]); +- +- return 1; +-} +- +-static int des_ede3_cbc_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, +- const uint8_t *in, size_t in_len) { +- DES_EDE_KEY *dat = (DES_EDE_KEY*) ctx->cipher_data; +- +- DES_ede3_cbc_encrypt(in, out, in_len, &dat->ks.ks[0], &dat->ks.ks[1], +- &dat->ks.ks[2], (DES_cblock *)ctx->iv, ctx->encrypt); +- +- return 1; +-} +- +-DEFINE_METHOD_FUNCTION(EVP_CIPHER, EVP_des_ede3_cbc) { +- memset(out, 0, sizeof(EVP_CIPHER)); +- out->nid = NID_des_ede3_cbc; +- out->block_size = 8; +- out->key_len = 24; +- out->iv_len = 8; +- out->ctx_size = sizeof(DES_EDE_KEY); +- out->flags = EVP_CIPH_CBC_MODE; +- out->init = des_ede3_init_key; +- out->cipher = des_ede3_cbc_cipher; +-} +- +-static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const uint8_t *key, +- const uint8_t *iv, int enc) { +- DES_cblock *deskey = (DES_cblock *) key; +- DES_EDE_KEY *dat = (DES_EDE_KEY *) ctx->cipher_data; +- +- DES_set_key(&deskey[0], &dat->ks.ks[0]); +- DES_set_key(&deskey[1], &dat->ks.ks[1]); +- DES_set_key(&deskey[0], &dat->ks.ks[2]); +- +- return 1; +-} +- +-DEFINE_METHOD_FUNCTION(EVP_CIPHER, EVP_des_ede_cbc) { +- memset(out, 0, sizeof(EVP_CIPHER)); +- out->nid = NID_des_ede_cbc; +- out->block_size = 8; +- out->key_len = 16; +- out->iv_len = 8; +- out->ctx_size = sizeof(DES_EDE_KEY); +- out->flags = EVP_CIPH_CBC_MODE; +- out->init = des_ede_init_key; +- out->cipher = des_ede3_cbc_cipher; +-} +- +-static int des_ede_ecb_cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, +- const uint8_t *in, size_t in_len) { +- if (in_len < ctx->cipher->block_size) { +- return 1; +- } +- in_len -= ctx->cipher->block_size; +- +- DES_EDE_KEY *dat = (DES_EDE_KEY *) ctx->cipher_data; +- for (size_t i = 0; i <= in_len; i += ctx->cipher->block_size) { +- DES_ecb3_encrypt((DES_cblock *) (in + i), (DES_cblock *) (out + i), +- &dat->ks.ks[0], &dat->ks.ks[1], &dat->ks.ks[2], +- ctx->encrypt); +- } +- return 1; +-} +- +-DEFINE_METHOD_FUNCTION(EVP_CIPHER, EVP_des_ede) { +- memset(out, 0, sizeof(EVP_CIPHER)); +- out->nid = NID_des_ede_ecb; +- out->block_size = 8; +- out->key_len = 16; +- out->iv_len = 0; +- out->ctx_size = sizeof(DES_EDE_KEY); +- out->flags = EVP_CIPH_ECB_MODE; +- out->init = des_ede_init_key; +- out->cipher = des_ede_ecb_cipher; +-} +- +-DEFINE_METHOD_FUNCTION(EVP_CIPHER, EVP_des_ede3) { +- memset(out, 0, sizeof(EVP_CIPHER)); +- out->nid = NID_des_ede3_ecb; +- out->block_size = 8; +- out->key_len = 24; +- out->iv_len = 0; +- out->ctx_size = sizeof(DES_EDE_KEY); +- out->flags = EVP_CIPH_ECB_MODE; +- out->init = des_ede3_init_key; +- out->cipher = des_ede_ecb_cipher; +-} +- +-const EVP_CIPHER* EVP_des_ede3_ecb(void) { +- return EVP_des_ede3(); +-} +diff --git a/src/crypto/fipsmodule/des/des.c b/src/crypto/fipsmodule/des/des.c +deleted file mode 100644 +index 95c430c..0000000 +--- a/src/crypto/fipsmodule/des/des.c ++++ /dev/null +@@ -1,784 +0,0 @@ +-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) +- * All rights reserved. +- * +- * This package is an SSL implementation written +- * by Eric Young (eay@cryptsoft.com). +- * The implementation was written so as to conform with Netscapes SSL. +- * +- * This library is free for commercial and non-commercial use as long as +- * the following conditions are aheared to. The following conditions +- * apply to all code found in this distribution, be it the RC4, RSA, +- * lhash, DES, etc., code; not just the SSL code. The SSL documentation +- * included with this distribution is covered by the same copyright terms +- * except that the holder is Tim Hudson (tjh@cryptsoft.com). +- * +- * Copyright remains Eric Young's, and as such any Copyright notices in +- * the code are not to be removed. +- * If this package is used in a product, Eric Young should be given attribution +- * as the author of the parts of the library used. +- * This can be in the form of a textual message at program startup or +- * in documentation (online or textual) provided with the package. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * 1. Redistributions of source code must retain the copyright +- * notice, this list of conditions and the following disclaimer. +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution. +- * 3. All advertising materials mentioning features or use of this software +- * must display the following acknowledgement: +- * "This product includes cryptographic software written by +- * Eric Young (eay@cryptsoft.com)" +- * The word 'cryptographic' can be left out if the rouines from the library +- * being used are not cryptographic related :-). +- * 4. If you include any Windows specific code (or a derivative thereof) from +- * the apps directory (application code) you must include an acknowledgement: +- * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" +- * +- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND +- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +- * SUCH DAMAGE. +- * +- * The licence and distribution terms for any publically available version or +- * derivative of this code cannot be changed. i.e. this code cannot simply be +- * copied and put under another distribution licence +- * [including the GNU Public Licence.] */ +- +-#include +- +-#include +- +-#include "internal.h" +- +- +-static const uint32_t des_skb[8][64] = { +- { // for C bits (numbered as per FIPS 46) 1 2 3 4 5 6 +- 0x00000000L, 0x00000010L, 0x20000000L, 0x20000010L, 0x00010000L, +- 0x00010010L, 0x20010000L, 0x20010010L, 0x00000800L, 0x00000810L, +- 0x20000800L, 0x20000810L, 0x00010800L, 0x00010810L, 0x20010800L, +- 0x20010810L, 0x00000020L, 0x00000030L, 0x20000020L, 0x20000030L, +- 0x00010020L, 0x00010030L, 0x20010020L, 0x20010030L, 0x00000820L, +- 0x00000830L, 0x20000820L, 0x20000830L, 0x00010820L, 0x00010830L, +- 0x20010820L, 0x20010830L, 0x00080000L, 0x00080010L, 0x20080000L, +- 0x20080010L, 0x00090000L, 0x00090010L, 0x20090000L, 0x20090010L, +- 0x00080800L, 0x00080810L, 0x20080800L, 0x20080810L, 0x00090800L, +- 0x00090810L, 0x20090800L, 0x20090810L, 0x00080020L, 0x00080030L, +- 0x20080020L, 0x20080030L, 0x00090020L, 0x00090030L, 0x20090020L, +- 0x20090030L, 0x00080820L, 0x00080830L, 0x20080820L, 0x20080830L, +- 0x00090820L, 0x00090830L, 0x20090820L, 0x20090830L, }, +- { // for C bits (numbered as per FIPS 46) 7 8 10 11 12 13 +- 0x00000000L, 0x02000000L, 0x00002000L, 0x02002000L, 0x00200000L, +- 0x02200000L, 0x00202000L, 0x02202000L, 0x00000004L, 0x02000004L, +- 0x00002004L, 0x02002004L, 0x00200004L, 0x02200004L, 0x00202004L, +- 0x02202004L, 0x00000400L, 0x02000400L, 0x00002400L, 0x02002400L, +- 0x00200400L, 0x02200400L, 0x00202400L, 0x02202400L, 0x00000404L, +- 0x02000404L, 0x00002404L, 0x02002404L, 0x00200404L, 0x02200404L, +- 0x00202404L, 0x02202404L, 0x10000000L, 0x12000000L, 0x10002000L, +- 0x12002000L, 0x10200000L, 0x12200000L, 0x10202000L, 0x12202000L, +- 0x10000004L, 0x12000004L, 0x10002004L, 0x12002004L, 0x10200004L, +- 0x12200004L, 0x10202004L, 0x12202004L, 0x10000400L, 0x12000400L, +- 0x10002400L, 0x12002400L, 0x10200400L, 0x12200400L, 0x10202400L, +- 0x12202400L, 0x10000404L, 0x12000404L, 0x10002404L, 0x12002404L, +- 0x10200404L, 0x12200404L, 0x10202404L, 0x12202404L, }, +- { // for C bits (numbered as per FIPS 46) 14 15 16 17 19 20 +- 0x00000000L, 0x00000001L, 0x00040000L, 0x00040001L, 0x01000000L, +- 0x01000001L, 0x01040000L, 0x01040001L, 0x00000002L, 0x00000003L, +- 0x00040002L, 0x00040003L, 0x01000002L, 0x01000003L, 0x01040002L, +- 0x01040003L, 0x00000200L, 0x00000201L, 0x00040200L, 0x00040201L, +- 0x01000200L, 0x01000201L, 0x01040200L, 0x01040201L, 0x00000202L, +- 0x00000203L, 0x00040202L, 0x00040203L, 0x01000202L, 0x01000203L, +- 0x01040202L, 0x01040203L, 0x08000000L, 0x08000001L, 0x08040000L, +- 0x08040001L, 0x09000000L, 0x09000001L, 0x09040000L, 0x09040001L, +- 0x08000002L, 0x08000003L, 0x08040002L, 0x08040003L, 0x09000002L, +- 0x09000003L, 0x09040002L, 0x09040003L, 0x08000200L, 0x08000201L, +- 0x08040200L, 0x08040201L, 0x09000200L, 0x09000201L, 0x09040200L, +- 0x09040201L, 0x08000202L, 0x08000203L, 0x08040202L, 0x08040203L, +- 0x09000202L, 0x09000203L, 0x09040202L, 0x09040203L, }, +- { // for C bits (numbered as per FIPS 46) 21 23 24 26 27 28 +- 0x00000000L, 0x00100000L, 0x00000100L, 0x00100100L, 0x00000008L, +- 0x00100008L, 0x00000108L, 0x00100108L, 0x00001000L, 0x00101000L, +- 0x00001100L, 0x00101100L, 0x00001008L, 0x00101008L, 0x00001108L, +- 0x00101108L, 0x04000000L, 0x04100000L, 0x04000100L, 0x04100100L, +- 0x04000008L, 0x04100008L, 0x04000108L, 0x04100108L, 0x04001000L, +- 0x04101000L, 0x04001100L, 0x04101100L, 0x04001008L, 0x04101008L, +- 0x04001108L, 0x04101108L, 0x00020000L, 0x00120000L, 0x00020100L, +- 0x00120100L, 0x00020008L, 0x00120008L, 0x00020108L, 0x00120108L, +- 0x00021000L, 0x00121000L, 0x00021100L, 0x00121100L, 0x00021008L, +- 0x00121008L, 0x00021108L, 0x00121108L, 0x04020000L, 0x04120000L, +- 0x04020100L, 0x04120100L, 0x04020008L, 0x04120008L, 0x04020108L, +- 0x04120108L, 0x04021000L, 0x04121000L, 0x04021100L, 0x04121100L, +- 0x04021008L, 0x04121008L, 0x04021108L, 0x04121108L, }, +- { // for D bits (numbered as per FIPS 46) 1 2 3 4 5 6 +- 0x00000000L, 0x10000000L, 0x00010000L, 0x10010000L, 0x00000004L, +- 0x10000004L, 0x00010004L, 0x10010004L, 0x20000000L, 0x30000000L, +- 0x20010000L, 0x30010000L, 0x20000004L, 0x30000004L, 0x20010004L, +- 0x30010004L, 0x00100000L, 0x10100000L, 0x00110000L, 0x10110000L, +- 0x00100004L, 0x10100004L, 0x00110004L, 0x10110004L, 0x20100000L, +- 0x30100000L, 0x20110000L, 0x30110000L, 0x20100004L, 0x30100004L, +- 0x20110004L, 0x30110004L, 0x00001000L, 0x10001000L, 0x00011000L, +- 0x10011000L, 0x00001004L, 0x10001004L, 0x00011004L, 0x10011004L, +- 0x20001000L, 0x30001000L, 0x20011000L, 0x30011000L, 0x20001004L, +- 0x30001004L, 0x20011004L, 0x30011004L, 0x00101000L, 0x10101000L, +- 0x00111000L, 0x10111000L, 0x00101004L, 0x10101004L, 0x00111004L, +- 0x10111004L, 0x20101000L, 0x30101000L, 0x20111000L, 0x30111000L, +- 0x20101004L, 0x30101004L, 0x20111004L, 0x30111004L, }, +- { // for D bits (numbered as per FIPS 46) 8 9 11 12 13 14 +- 0x00000000L, 0x08000000L, 0x00000008L, 0x08000008L, 0x00000400L, +- 0x08000400L, 0x00000408L, 0x08000408L, 0x00020000L, 0x08020000L, +- 0x00020008L, 0x08020008L, 0x00020400L, 0x08020400L, 0x00020408L, +- 0x08020408L, 0x00000001L, 0x08000001L, 0x00000009L, 0x08000009L, +- 0x00000401L, 0x08000401L, 0x00000409L, 0x08000409L, 0x00020001L, +- 0x08020001L, 0x00020009L, 0x08020009L, 0x00020401L, 0x08020401L, +- 0x00020409L, 0x08020409L, 0x02000000L, 0x0A000000L, 0x02000008L, +- 0x0A000008L, 0x02000400L, 0x0A000400L, 0x02000408L, 0x0A000408L, +- 0x02020000L, 0x0A020000L, 0x02020008L, 0x0A020008L, 0x02020400L, +- 0x0A020400L, 0x02020408L, 0x0A020408L, 0x02000001L, 0x0A000001L, +- 0x02000009L, 0x0A000009L, 0x02000401L, 0x0A000401L, 0x02000409L, +- 0x0A000409L, 0x02020001L, 0x0A020001L, 0x02020009L, 0x0A020009L, +- 0x02020401L, 0x0A020401L, 0x02020409L, 0x0A020409L, }, +- { // for D bits (numbered as per FIPS 46) 16 17 18 19 20 21 +- 0x00000000L, 0x00000100L, 0x00080000L, 0x00080100L, 0x01000000L, +- 0x01000100L, 0x01080000L, 0x01080100L, 0x00000010L, 0x00000110L, +- 0x00080010L, 0x00080110L, 0x01000010L, 0x01000110L, 0x01080010L, +- 0x01080110L, 0x00200000L, 0x00200100L, 0x00280000L, 0x00280100L, +- 0x01200000L, 0x01200100L, 0x01280000L, 0x01280100L, 0x00200010L, +- 0x00200110L, 0x00280010L, 0x00280110L, 0x01200010L, 0x01200110L, +- 0x01280010L, 0x01280110L, 0x00000200L, 0x00000300L, 0x00080200L, +- 0x00080300L, 0x01000200L, 0x01000300L, 0x01080200L, 0x01080300L, +- 0x00000210L, 0x00000310L, 0x00080210L, 0x00080310L, 0x01000210L, +- 0x01000310L, 0x01080210L, 0x01080310L, 0x00200200L, 0x00200300L, +- 0x00280200L, 0x00280300L, 0x01200200L, 0x01200300L, 0x01280200L, +- 0x01280300L, 0x00200210L, 0x00200310L, 0x00280210L, 0x00280310L, +- 0x01200210L, 0x01200310L, 0x01280210L, 0x01280310L, }, +- { // for D bits (numbered as per FIPS 46) 22 23 24 25 27 28 +- 0x00000000L, 0x04000000L, 0x00040000L, 0x04040000L, 0x00000002L, +- 0x04000002L, 0x00040002L, 0x04040002L, 0x00002000L, 0x04002000L, +- 0x00042000L, 0x04042000L, 0x00002002L, 0x04002002L, 0x00042002L, +- 0x04042002L, 0x00000020L, 0x04000020L, 0x00040020L, 0x04040020L, +- 0x00000022L, 0x04000022L, 0x00040022L, 0x04040022L, 0x00002020L, +- 0x04002020L, 0x00042020L, 0x04042020L, 0x00002022L, 0x04002022L, +- 0x00042022L, 0x04042022L, 0x00000800L, 0x04000800L, 0x00040800L, +- 0x04040800L, 0x00000802L, 0x04000802L, 0x00040802L, 0x04040802L, +- 0x00002800L, 0x04002800L, 0x00042800L, 0x04042800L, 0x00002802L, +- 0x04002802L, 0x00042802L, 0x04042802L, 0x00000820L, 0x04000820L, +- 0x00040820L, 0x04040820L, 0x00000822L, 0x04000822L, 0x00040822L, +- 0x04040822L, 0x00002820L, 0x04002820L, 0x00042820L, 0x04042820L, +- 0x00002822L, 0x04002822L, 0x00042822L, 0x04042822L, }}; +- +-static const uint32_t DES_SPtrans[8][64] = { +- { // nibble 0 +- 0x02080800L, 0x00080000L, 0x02000002L, 0x02080802L, 0x02000000L, +- 0x00080802L, 0x00080002L, 0x02000002L, 0x00080802L, 0x02080800L, +- 0x02080000L, 0x00000802L, 0x02000802L, 0x02000000L, 0x00000000L, +- 0x00080002L, 0x00080000L, 0x00000002L, 0x02000800L, 0x00080800L, +- 0x02080802L, 0x02080000L, 0x00000802L, 0x02000800L, 0x00000002L, +- 0x00000800L, 0x00080800L, 0x02080002L, 0x00000800L, 0x02000802L, +- 0x02080002L, 0x00000000L, 0x00000000L, 0x02080802L, 0x02000800L, +- 0x00080002L, 0x02080800L, 0x00080000L, 0x00000802L, 0x02000800L, +- 0x02080002L, 0x00000800L, 0x00080800L, 0x02000002L, 0x00080802L, +- 0x00000002L, 0x02000002L, 0x02080000L, 0x02080802L, 0x00080800L, +- 0x02080000L, 0x02000802L, 0x02000000L, 0x00000802L, 0x00080002L, +- 0x00000000L, 0x00080000L, 0x02000000L, 0x02000802L, 0x02080800L, +- 0x00000002L, 0x02080002L, 0x00000800L, 0x00080802L, }, +- { // nibble 1 +- 0x40108010L, 0x00000000L, 0x00108000L, 0x40100000L, 0x40000010L, +- 0x00008010L, 0x40008000L, 0x00108000L, 0x00008000L, 0x40100010L, +- 0x00000010L, 0x40008000L, 0x00100010L, 0x40108000L, 0x40100000L, +- 0x00000010L, 0x00100000L, 0x40008010L, 0x40100010L, 0x00008000L, +- 0x00108010L, 0x40000000L, 0x00000000L, 0x00100010L, 0x40008010L, +- 0x00108010L, 0x40108000L, 0x40000010L, 0x40000000L, 0x00100000L, +- 0x00008010L, 0x40108010L, 0x00100010L, 0x40108000L, 0x40008000L, +- 0x00108010L, 0x40108010L, 0x00100010L, 0x40000010L, 0x00000000L, +- 0x40000000L, 0x00008010L, 0x00100000L, 0x40100010L, 0x00008000L, +- 0x40000000L, 0x00108010L, 0x40008010L, 0x40108000L, 0x00008000L, +- 0x00000000L, 0x40000010L, 0x00000010L, 0x40108010L, 0x00108000L, +- 0x40100000L, 0x40100010L, 0x00100000L, 0x00008010L, 0x40008000L, +- 0x40008010L, 0x00000010L, 0x40100000L, 0x00108000L, }, +- { // nibble 2 +- 0x04000001L, 0x04040100L, 0x00000100L, 0x04000101L, 0x00040001L, +- 0x04000000L, 0x04000101L, 0x00040100L, 0x04000100L, 0x00040000L, +- 0x04040000L, 0x00000001L, 0x04040101L, 0x00000101L, 0x00000001L, +- 0x04040001L, 0x00000000L, 0x00040001L, 0x04040100L, 0x00000100L, +- 0x00000101L, 0x04040101L, 0x00040000L, 0x04000001L, 0x04040001L, +- 0x04000100L, 0x00040101L, 0x04040000L, 0x00040100L, 0x00000000L, +- 0x04000000L, 0x00040101L, 0x04040100L, 0x00000100L, 0x00000001L, +- 0x00040000L, 0x00000101L, 0x00040001L, 0x04040000L, 0x04000101L, +- 0x00000000L, 0x04040100L, 0x00040100L, 0x04040001L, 0x00040001L, +- 0x04000000L, 0x04040101L, 0x00000001L, 0x00040101L, 0x04000001L, +- 0x04000000L, 0x04040101L, 0x00040000L, 0x04000100L, 0x04000101L, +- 0x00040100L, 0x04000100L, 0x00000000L, 0x04040001L, 0x00000101L, +- 0x04000001L, 0x00040101L, 0x00000100L, 0x04040000L, }, +- { // nibble 3 +- 0x00401008L, 0x10001000L, 0x00000008L, 0x10401008L, 0x00000000L, +- 0x10400000L, 0x10001008L, 0x00400008L, 0x10401000L, 0x10000008L, +- 0x10000000L, 0x00001008L, 0x10000008L, 0x00401008L, 0x00400000L, +- 0x10000000L, 0x10400008L, 0x00401000L, 0x00001000L, 0x00000008L, +- 0x00401000L, 0x10001008L, 0x10400000L, 0x00001000L, 0x00001008L, +- 0x00000000L, 0x00400008L, 0x10401000L, 0x10001000L, 0x10400008L, +- 0x10401008L, 0x00400000L, 0x10400008L, 0x00001008L, 0x00400000L, +- 0x10000008L, 0x00401000L, 0x10001000L, 0x00000008L, 0x10400000L, +- 0x10001008L, 0x00000000L, 0x00001000L, 0x00400008L, 0x00000000L, +- 0x10400008L, 0x10401000L, 0x00001000L, 0x10000000L, 0x10401008L, +- 0x00401008L, 0x00400000L, 0x10401008L, 0x00000008L, 0x10001000L, +- 0x00401008L, 0x00400008L, 0x00401000L, 0x10400000L, 0x10001008L, +- 0x00001008L, 0x10000000L, 0x10000008L, 0x10401000L, }, +- { // nibble 4 +- 0x08000000L, 0x00010000L, 0x00000400L, 0x08010420L, 0x08010020L, +- 0x08000400L, 0x00010420L, 0x08010000L, 0x00010000L, 0x00000020L, +- 0x08000020L, 0x00010400L, 0x08000420L, 0x08010020L, 0x08010400L, +- 0x00000000L, 0x00010400L, 0x08000000L, 0x00010020L, 0x00000420L, +- 0x08000400L, 0x00010420L, 0x00000000L, 0x08000020L, 0x00000020L, +- 0x08000420L, 0x08010420L, 0x00010020L, 0x08010000L, 0x00000400L, +- 0x00000420L, 0x08010400L, 0x08010400L, 0x08000420L, 0x00010020L, +- 0x08010000L, 0x00010000L, 0x00000020L, 0x08000020L, 0x08000400L, +- 0x08000000L, 0x00010400L, 0x08010420L, 0x00000000L, 0x00010420L, +- 0x08000000L, 0x00000400L, 0x00010020L, 0x08000420L, 0x00000400L, +- 0x00000000L, 0x08010420L, 0x08010020L, 0x08010400L, 0x00000420L, +- 0x00010000L, 0x00010400L, 0x08010020L, 0x08000400L, 0x00000420L, +- 0x00000020L, 0x00010420L, 0x08010000L, 0x08000020L, }, +- { // nibble 5 +- 0x80000040L, 0x00200040L, 0x00000000L, 0x80202000L, 0x00200040L, +- 0x00002000L, 0x80002040L, 0x00200000L, 0x00002040L, 0x80202040L, +- 0x00202000L, 0x80000000L, 0x80002000L, 0x80000040L, 0x80200000L, +- 0x00202040L, 0x00200000L, 0x80002040L, 0x80200040L, 0x00000000L, +- 0x00002000L, 0x00000040L, 0x80202000L, 0x80200040L, 0x80202040L, +- 0x80200000L, 0x80000000L, 0x00002040L, 0x00000040L, 0x00202000L, +- 0x00202040L, 0x80002000L, 0x00002040L, 0x80000000L, 0x80002000L, +- 0x00202040L, 0x80202000L, 0x00200040L, 0x00000000L, 0x80002000L, +- 0x80000000L, 0x00002000L, 0x80200040L, 0x00200000L, 0x00200040L, +- 0x80202040L, 0x00202000L, 0x00000040L, 0x80202040L, 0x00202000L, +- 0x00200000L, 0x80002040L, 0x80000040L, 0x80200000L, 0x00202040L, +- 0x00000000L, 0x00002000L, 0x80000040L, 0x80002040L, 0x80202000L, +- 0x80200000L, 0x00002040L, 0x00000040L, 0x80200040L, }, +- { // nibble 6 +- 0x00004000L, 0x00000200L, 0x01000200L, 0x01000004L, 0x01004204L, +- 0x00004004L, 0x00004200L, 0x00000000L, 0x01000000L, 0x01000204L, +- 0x00000204L, 0x01004000L, 0x00000004L, 0x01004200L, 0x01004000L, +- 0x00000204L, 0x01000204L, 0x00004000L, 0x00004004L, 0x01004204L, +- 0x00000000L, 0x01000200L, 0x01000004L, 0x00004200L, 0x01004004L, +- 0x00004204L, 0x01004200L, 0x00000004L, 0x00004204L, 0x01004004L, +- 0x00000200L, 0x01000000L, 0x00004204L, 0x01004000L, 0x01004004L, +- 0x00000204L, 0x00004000L, 0x00000200L, 0x01000000L, 0x01004004L, +- 0x01000204L, 0x00004204L, 0x00004200L, 0x00000000L, 0x00000200L, +- 0x01000004L, 0x00000004L, 0x01000200L, 0x00000000L, 0x01000204L, +- 0x01000200L, 0x00004200L, 0x00000204L, 0x00004000L, 0x01004204L, +- 0x01000000L, 0x01004200L, 0x00000004L, 0x00004004L, 0x01004204L, +- 0x01000004L, 0x01004200L, 0x01004000L, 0x00004004L, }, +- { // nibble 7 +- 0x20800080L, 0x20820000L, 0x00020080L, 0x00000000L, 0x20020000L, +- 0x00800080L, 0x20800000L, 0x20820080L, 0x00000080L, 0x20000000L, +- 0x00820000L, 0x00020080L, 0x00820080L, 0x20020080L, 0x20000080L, +- 0x20800000L, 0x00020000L, 0x00820080L, 0x00800080L, 0x20020000L, +- 0x20820080L, 0x20000080L, 0x00000000L, 0x00820000L, 0x20000000L, +- 0x00800000L, 0x20020080L, 0x20800080L, 0x00800000L, 0x00020000L, +- 0x20820000L, 0x00000080L, 0x00800000L, 0x00020000L, 0x20000080L, +- 0x20820080L, 0x00020080L, 0x20000000L, 0x00000000L, 0x00820000L, +- 0x20800080L, 0x20020080L, 0x20020000L, 0x00800080L, 0x20820000L, +- 0x00000080L, 0x00800080L, 0x20020000L, 0x20820080L, 0x00800000L, +- 0x20800000L, 0x20000080L, 0x00820000L, 0x00020080L, 0x20020080L, +- 0x20800000L, 0x00000080L, 0x20820000L, 0x00820080L, 0x00000000L, +- 0x20000000L, 0x20800080L, 0x00020000L, 0x00820080L, }}; +- +-#define HPERM_OP(a, t, n, m) \ +- ((t) = ((((a) << (16 - (n))) ^ (a)) & (m)), \ +- (a) = (a) ^ (t) ^ ((t) >> (16 - (n)))) +- +-void DES_set_key(const DES_cblock *key, DES_key_schedule *schedule) { +- static const int shifts2[16] = {0, 0, 1, 1, 1, 1, 1, 1, +- 0, 1, 1, 1, 1, 1, 1, 0}; +- uint32_t c, d, t, s, t2; +- const uint8_t *in; +- int i; +- +- in = key->bytes; +- +- c2l(in, c); +- c2l(in, d); +- +- // do PC1 in 47 simple operations :-) +- // Thanks to John Fletcher (john_fletcher@lccmail.ocf.llnl.gov) +- // for the inspiration. :-) +- PERM_OP(d, c, t, 4, 0x0f0f0f0fL); +- HPERM_OP(c, t, -2, 0xcccc0000L); +- HPERM_OP(d, t, -2, 0xcccc0000L); +- PERM_OP(d, c, t, 1, 0x55555555L); +- PERM_OP(c, d, t, 8, 0x00ff00ffL); +- PERM_OP(d, c, t, 1, 0x55555555L); +- d = (((d & 0x000000ffL) << 16L) | (d & 0x0000ff00L) | +- ((d & 0x00ff0000L) >> 16L) | ((c & 0xf0000000L) >> 4L)); +- c &= 0x0fffffffL; +- +- for (i = 0; i < ITERATIONS; i++) { +- if (shifts2[i]) { +- c = ((c >> 2L) | (c << 26L)); +- d = ((d >> 2L) | (d << 26L)); +- } else { +- c = ((c >> 1L) | (c << 27L)); +- d = ((d >> 1L) | (d << 27L)); +- } +- c &= 0x0fffffffL; +- d &= 0x0fffffffL; +- // could be a few less shifts but I am to lazy at this +- // point in time to investigate +- s = des_skb[0][(c) & 0x3f] | +- des_skb[1][((c >> 6L) & 0x03) | ((c >> 7L) & 0x3c)] | +- des_skb[2][((c >> 13L) & 0x0f) | ((c >> 14L) & 0x30)] | +- des_skb[3][((c >> 20L) & 0x01) | ((c >> 21L) & 0x06) | +- ((c >> 22L) & 0x38)]; +- t = des_skb[4][(d) & 0x3f] | +- des_skb[5][((d >> 7L) & 0x03) | ((d >> 8L) & 0x3c)] | +- des_skb[6][(d >> 15L) & 0x3f] | +- des_skb[7][((d >> 21L) & 0x0f) | ((d >> 22L) & 0x30)]; +- +- // table contained 0213 4657 +- t2 = ((t << 16L) | (s & 0x0000ffffL)) & 0xffffffffL; +- schedule->subkeys[i][0] = CRYPTO_rotr_u32(t2, 30); +- +- t2 = ((s >> 16L) | (t & 0xffff0000L)); +- schedule->subkeys[i][1] = CRYPTO_rotr_u32(t2, 26); +- } +-} +- +-static const uint8_t kOddParity[256] = { +- 1, 1, 2, 2, 4, 4, 7, 7, 8, 8, 11, 11, 13, 13, 14, +- 14, 16, 16, 19, 19, 21, 21, 22, 22, 25, 25, 26, 26, 28, 28, +- 31, 31, 32, 32, 35, 35, 37, 37, 38, 38, 41, 41, 42, 42, 44, +- 44, 47, 47, 49, 49, 50, 50, 52, 52, 55, 55, 56, 56, 59, 59, +- 61, 61, 62, 62, 64, 64, 67, 67, 69, 69, 70, 70, 73, 73, 74, +- 74, 76, 76, 79, 79, 81, 81, 82, 82, 84, 84, 87, 87, 88, 88, +- 91, 91, 93, 93, 94, 94, 97, 97, 98, 98, 100, 100, 103, 103, 104, +- 104, 107, 107, 109, 109, 110, 110, 112, 112, 115, 115, 117, 117, 118, 118, +- 121, 121, 122, 122, 124, 124, 127, 127, 128, 128, 131, 131, 133, 133, 134, +- 134, 137, 137, 138, 138, 140, 140, 143, 143, 145, 145, 146, 146, 148, 148, +- 151, 151, 152, 152, 155, 155, 157, 157, 158, 158, 161, 161, 162, 162, 164, +- 164, 167, 167, 168, 168, 171, 171, 173, 173, 174, 174, 176, 176, 179, 179, +- 181, 181, 182, 182, 185, 185, 186, 186, 188, 188, 191, 191, 193, 193, 194, +- 194, 196, 196, 199, 199, 200, 200, 203, 203, 205, 205, 206, 206, 208, 208, +- 211, 211, 213, 213, 214, 214, 217, 217, 218, 218, 220, 220, 223, 223, 224, +- 224, 227, 227, 229, 229, 230, 230, 233, 233, 234, 234, 236, 236, 239, 239, +- 241, 241, 242, 242, 244, 244, 247, 247, 248, 248, 251, 251, 253, 253, 254, +- 254 +-}; +- +-void DES_set_odd_parity(DES_cblock *key) { +- unsigned i; +- +- for (i = 0; i < DES_KEY_SZ; i++) { +- key->bytes[i] = kOddParity[key->bytes[i]]; +- } +-} +- +-static void DES_encrypt1(uint32_t *data, const DES_key_schedule *ks, int enc) { +- uint32_t l, r, t, u; +- +- r = data[0]; +- l = data[1]; +- +- IP(r, l); +- // Things have been modified so that the initial rotate is done outside +- // the loop. This required the DES_SPtrans values in sp.h to be +- // rotated 1 bit to the right. One perl script later and things have a +- // 5% speed up on a sparc2. Thanks to Richard Outerbridge +- // <71755.204@CompuServe.COM> for pointing this out. +- // clear the top bits on machines with 8byte longs +- // shift left by 2 +- r = CRYPTO_rotr_u32(r, 29); +- l = CRYPTO_rotr_u32(l, 29); +- +- // I don't know if it is worth the effort of loop unrolling the +- // inner loop +- if (enc) { +- D_ENCRYPT(ks, l, r, 0); +- D_ENCRYPT(ks, r, l, 1); +- D_ENCRYPT(ks, l, r, 2); +- D_ENCRYPT(ks, r, l, 3); +- D_ENCRYPT(ks, l, r, 4); +- D_ENCRYPT(ks, r, l, 5); +- D_ENCRYPT(ks, l, r, 6); +- D_ENCRYPT(ks, r, l, 7); +- D_ENCRYPT(ks, l, r, 8); +- D_ENCRYPT(ks, r, l, 9); +- D_ENCRYPT(ks, l, r, 10); +- D_ENCRYPT(ks, r, l, 11); +- D_ENCRYPT(ks, l, r, 12); +- D_ENCRYPT(ks, r, l, 13); +- D_ENCRYPT(ks, l, r, 14); +- D_ENCRYPT(ks, r, l, 15); +- } else { +- D_ENCRYPT(ks, l, r, 15); +- D_ENCRYPT(ks, r, l, 14); +- D_ENCRYPT(ks, l, r, 13); +- D_ENCRYPT(ks, r, l, 12); +- D_ENCRYPT(ks, l, r, 11); +- D_ENCRYPT(ks, r, l, 10); +- D_ENCRYPT(ks, l, r, 9); +- D_ENCRYPT(ks, r, l, 8); +- D_ENCRYPT(ks, l, r, 7); +- D_ENCRYPT(ks, r, l, 6); +- D_ENCRYPT(ks, l, r, 5); +- D_ENCRYPT(ks, r, l, 4); +- D_ENCRYPT(ks, l, r, 3); +- D_ENCRYPT(ks, r, l, 2); +- D_ENCRYPT(ks, l, r, 1); +- D_ENCRYPT(ks, r, l, 0); +- } +- +- // rotate and clear the top bits on machines with 8byte longs +- l = CRYPTO_rotr_u32(l, 3); +- r = CRYPTO_rotr_u32(r, 3); +- +- FP(r, l); +- data[0] = l; +- data[1] = r; +-} +- +-static void DES_encrypt2(uint32_t *data, const DES_key_schedule *ks, int enc) { +- uint32_t l, r, t, u; +- +- r = data[0]; +- l = data[1]; +- +- // Things have been modified so that the initial rotate is done outside the +- // loop. This required the DES_SPtrans values in sp.h to be rotated 1 bit to +- // the right. One perl script later and things have a 5% speed up on a +- // sparc2. Thanks to Richard Outerbridge <71755.204@CompuServe.COM> for +- // pointing this out. +- // clear the top bits on machines with 8byte longs +- r = CRYPTO_rotr_u32(r, 29); +- l = CRYPTO_rotr_u32(l, 29); +- +- // I don't know if it is worth the effort of loop unrolling the +- // inner loop +- if (enc) { +- D_ENCRYPT(ks, l, r, 0); +- D_ENCRYPT(ks, r, l, 1); +- D_ENCRYPT(ks, l, r, 2); +- D_ENCRYPT(ks, r, l, 3); +- D_ENCRYPT(ks, l, r, 4); +- D_ENCRYPT(ks, r, l, 5); +- D_ENCRYPT(ks, l, r, 6); +- D_ENCRYPT(ks, r, l, 7); +- D_ENCRYPT(ks, l, r, 8); +- D_ENCRYPT(ks, r, l, 9); +- D_ENCRYPT(ks, l, r, 10); +- D_ENCRYPT(ks, r, l, 11); +- D_ENCRYPT(ks, l, r, 12); +- D_ENCRYPT(ks, r, l, 13); +- D_ENCRYPT(ks, l, r, 14); +- D_ENCRYPT(ks, r, l, 15); +- } else { +- D_ENCRYPT(ks, l, r, 15); +- D_ENCRYPT(ks, r, l, 14); +- D_ENCRYPT(ks, l, r, 13); +- D_ENCRYPT(ks, r, l, 12); +- D_ENCRYPT(ks, l, r, 11); +- D_ENCRYPT(ks, r, l, 10); +- D_ENCRYPT(ks, l, r, 9); +- D_ENCRYPT(ks, r, l, 8); +- D_ENCRYPT(ks, l, r, 7); +- D_ENCRYPT(ks, r, l, 6); +- D_ENCRYPT(ks, l, r, 5); +- D_ENCRYPT(ks, r, l, 4); +- D_ENCRYPT(ks, l, r, 3); +- D_ENCRYPT(ks, r, l, 2); +- D_ENCRYPT(ks, l, r, 1); +- D_ENCRYPT(ks, r, l, 0); +- } +- // rotate and clear the top bits on machines with 8byte longs +- data[0] = CRYPTO_rotr_u32(l, 3); +- data[1] = CRYPTO_rotr_u32(r, 3); +-} +- +-void DES_encrypt3(uint32_t *data, const DES_key_schedule *ks1, +- const DES_key_schedule *ks2, const DES_key_schedule *ks3) { +- uint32_t l, r; +- +- l = data[0]; +- r = data[1]; +- IP(l, r); +- data[0] = l; +- data[1] = r; +- DES_encrypt2((uint32_t *)data, ks1, DES_ENCRYPT); +- DES_encrypt2((uint32_t *)data, ks2, DES_DECRYPT); +- DES_encrypt2((uint32_t *)data, ks3, DES_ENCRYPT); +- l = data[0]; +- r = data[1]; +- FP(r, l); +- data[0] = l; +- data[1] = r; +-} +- +-void DES_decrypt3(uint32_t *data, const DES_key_schedule *ks1, +- const DES_key_schedule *ks2, const DES_key_schedule *ks3) { +- uint32_t l, r; +- +- l = data[0]; +- r = data[1]; +- IP(l, r); +- data[0] = l; +- data[1] = r; +- DES_encrypt2((uint32_t *)data, ks3, DES_DECRYPT); +- DES_encrypt2((uint32_t *)data, ks2, DES_ENCRYPT); +- DES_encrypt2((uint32_t *)data, ks1, DES_DECRYPT); +- l = data[0]; +- r = data[1]; +- FP(r, l); +- data[0] = l; +- data[1] = r; +-} +- +-void DES_ecb_encrypt(const DES_cblock *in_block, DES_cblock *out_block, +- const DES_key_schedule *schedule, int is_encrypt) { +- uint32_t l; +- uint32_t ll[2]; +- const uint8_t *in = in_block->bytes; +- uint8_t *out = out_block->bytes; +- +- c2l(in, l); +- ll[0] = l; +- c2l(in, l); +- ll[1] = l; +- DES_encrypt1(ll, schedule, is_encrypt); +- l = ll[0]; +- l2c(l, out); +- l = ll[1]; +- l2c(l, out); +- ll[0] = ll[1] = 0; +-} +- +-void DES_ncbc_encrypt(const uint8_t *in, uint8_t *out, size_t len, +- const DES_key_schedule *schedule, DES_cblock *ivec, +- int enc) { +- uint32_t tin0, tin1; +- uint32_t tout0, tout1, xor0, xor1; +- uint32_t tin[2]; +- unsigned char *iv; +- +- iv = ivec->bytes; +- +- if (enc) { +- c2l(iv, tout0); +- c2l(iv, tout1); +- for (; len >= 8; len -= 8) { +- c2l(in, tin0); +- c2l(in, tin1); +- tin0 ^= tout0; +- tin[0] = tin0; +- tin1 ^= tout1; +- tin[1] = tin1; +- DES_encrypt1((uint32_t *)tin, schedule, DES_ENCRYPT); +- tout0 = tin[0]; +- l2c(tout0, out); +- tout1 = tin[1]; +- l2c(tout1, out); +- } +- if (len != 0) { +- c2ln(in, tin0, tin1, len); +- tin0 ^= tout0; +- tin[0] = tin0; +- tin1 ^= tout1; +- tin[1] = tin1; +- DES_encrypt1((uint32_t *)tin, schedule, DES_ENCRYPT); +- tout0 = tin[0]; +- l2c(tout0, out); +- tout1 = tin[1]; +- l2c(tout1, out); +- } +- iv = ivec->bytes; +- l2c(tout0, iv); +- l2c(tout1, iv); +- } else { +- c2l(iv, xor0); +- c2l(iv, xor1); +- for (; len >= 8; len -= 8) { +- c2l(in, tin0); +- tin[0] = tin0; +- c2l(in, tin1); +- tin[1] = tin1; +- DES_encrypt1((uint32_t *)tin, schedule, DES_DECRYPT); +- tout0 = tin[0] ^ xor0; +- tout1 = tin[1] ^ xor1; +- l2c(tout0, out); +- l2c(tout1, out); +- xor0 = tin0; +- xor1 = tin1; +- } +- if (len != 0) { +- c2l(in, tin0); +- tin[0] = tin0; +- c2l(in, tin1); +- tin[1] = tin1; +- DES_encrypt1((uint32_t *)tin, schedule, DES_DECRYPT); +- tout0 = tin[0] ^ xor0; +- tout1 = tin[1] ^ xor1; +- l2cn(tout0, tout1, out, len); +- xor0 = tin0; +- xor1 = tin1; +- } +- iv = ivec->bytes; +- l2c(xor0, iv); +- l2c(xor1, iv); +- } +- tin[0] = tin[1] = 0; +-} +- +-void DES_ecb3_encrypt(const DES_cblock *input, DES_cblock *output, +- const DES_key_schedule *ks1, const DES_key_schedule *ks2, +- const DES_key_schedule *ks3, int enc) { +- uint32_t l0, l1; +- uint32_t ll[2]; +- const uint8_t *in = input->bytes; +- uint8_t *out = output->bytes; +- +- c2l(in, l0); +- c2l(in, l1); +- ll[0] = l0; +- ll[1] = l1; +- if (enc) { +- DES_encrypt3(ll, ks1, ks2, ks3); +- } else { +- DES_decrypt3(ll, ks1, ks2, ks3); +- } +- l0 = ll[0]; +- l1 = ll[1]; +- l2c(l0, out); +- l2c(l1, out); +-} +- +-void DES_ede3_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len, +- const DES_key_schedule *ks1, +- const DES_key_schedule *ks2, +- const DES_key_schedule *ks3, DES_cblock *ivec, +- int enc) { +- uint32_t tin0, tin1; +- uint32_t tout0, tout1, xor0, xor1; +- uint32_t tin[2]; +- uint8_t *iv; +- +- iv = ivec->bytes; +- +- if (enc) { +- c2l(iv, tout0); +- c2l(iv, tout1); +- for (; len >= 8; len -= 8) { +- c2l(in, tin0); +- c2l(in, tin1); +- tin0 ^= tout0; +- tin1 ^= tout1; +- +- tin[0] = tin0; +- tin[1] = tin1; +- DES_encrypt3((uint32_t *)tin, ks1, ks2, ks3); +- tout0 = tin[0]; +- tout1 = tin[1]; +- +- l2c(tout0, out); +- l2c(tout1, out); +- } +- if (len != 0) { +- c2ln(in, tin0, tin1, len); +- tin0 ^= tout0; +- tin1 ^= tout1; +- +- tin[0] = tin0; +- tin[1] = tin1; +- DES_encrypt3((uint32_t *)tin, ks1, ks2, ks3); +- tout0 = tin[0]; +- tout1 = tin[1]; +- +- l2c(tout0, out); +- l2c(tout1, out); +- } +- iv = ivec->bytes; +- l2c(tout0, iv); +- l2c(tout1, iv); +- } else { +- uint32_t t0, t1; +- +- c2l(iv, xor0); +- c2l(iv, xor1); +- for (; len >= 8; len -= 8) { +- c2l(in, tin0); +- c2l(in, tin1); +- +- t0 = tin0; +- t1 = tin1; +- +- tin[0] = tin0; +- tin[1] = tin1; +- DES_decrypt3((uint32_t *)tin, ks1, ks2, ks3); +- tout0 = tin[0]; +- tout1 = tin[1]; +- +- tout0 ^= xor0; +- tout1 ^= xor1; +- l2c(tout0, out); +- l2c(tout1, out); +- xor0 = t0; +- xor1 = t1; +- } +- if (len != 0) { +- c2l(in, tin0); +- c2l(in, tin1); +- +- t0 = tin0; +- t1 = tin1; +- +- tin[0] = tin0; +- tin[1] = tin1; +- DES_decrypt3((uint32_t *)tin, ks1, ks2, ks3); +- tout0 = tin[0]; +- tout1 = tin[1]; +- +- tout0 ^= xor0; +- tout1 ^= xor1; +- l2cn(tout0, tout1, out, len); +- xor0 = t0; +- xor1 = t1; +- } +- +- iv = ivec->bytes; +- l2c(xor0, iv); +- l2c(xor1, iv); +- } +- +- tin[0] = tin[1] = 0; +-} +- +-void DES_ede2_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len, +- const DES_key_schedule *ks1, +- const DES_key_schedule *ks2, +- DES_cblock *ivec, +- int enc) { +- DES_ede3_cbc_encrypt(in, out, len, ks1, ks2, ks1, ivec, enc); +-} +- +- +-// Deprecated functions. +- +-void DES_set_key_unchecked(const DES_cblock *key, DES_key_schedule *schedule) { +- DES_set_key(key, schedule); +-} +- +-#undef HPERM_OP +-#undef c2l +-#undef l2c +-#undef c2ln +-#undef l2cn +-#undef PERM_OP +-#undef IP +-#undef FP +-#undef LOAD_DATA +-#undef D_ENCRYPT +-#undef ITERATIONS +-#undef HALF_ITERATIONS +diff --git a/src/crypto/fipsmodule/des/internal.h b/src/crypto/fipsmodule/des/internal.h +deleted file mode 100644 +index 3e3992e..0000000 +--- a/src/crypto/fipsmodule/des/internal.h ++++ /dev/null +@@ -1,238 +0,0 @@ +-/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) +- * All rights reserved. +- * +- * This package is an SSL implementation written +- * by Eric Young (eay@cryptsoft.com). +- * The implementation was written so as to conform with Netscapes SSL. +- * +- * This library is free for commercial and non-commercial use as long as +- * the following conditions are aheared to. The following conditions +- * apply to all code found in this distribution, be it the RC4, RSA, +- * lhash, DES, etc., code; not just the SSL code. The SSL documentation +- * included with this distribution is covered by the same copyright terms +- * except that the holder is Tim Hudson (tjh@cryptsoft.com). +- * +- * Copyright remains Eric Young's, and as such any Copyright notices in +- * the code are not to be removed. +- * If this package is used in a product, Eric Young should be given attribution +- * as the author of the parts of the library used. +- * This can be in the form of a textual message at program startup or +- * in documentation (online or textual) provided with the package. +- * +- * Redistribution and use in source and binary forms, with or without +- * modification, are permitted provided that the following conditions +- * are met: +- * 1. Redistributions of source code must retain the copyright +- * notice, this list of conditions and the following disclaimer. +- * 2. Redistributions in binary form must reproduce the above copyright +- * notice, this list of conditions and the following disclaimer in the +- * documentation and/or other materials provided with the distribution. +- * 3. All advertising materials mentioning features or use of this software +- * must display the following acknowledgement: +- * "This product includes cryptographic software written by +- * Eric Young (eay@cryptsoft.com)" +- * The word 'cryptographic' can be left out if the rouines from the library +- * being used are not cryptographic related :-). +- * 4. If you include any Windows specific code (or a derivative thereof) from +- * the apps directory (application code) you must include an acknowledgement: +- * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" +- * +- * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND +- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +- * SUCH DAMAGE. +- * +- * The licence and distribution terms for any publically available version or +- * derivative of this code cannot be changed. i.e. this code cannot simply be +- * copied and put under another distribution licence +- * [including the GNU Public Licence.] */ +- +-#ifndef OPENSSL_HEADER_DES_INTERNAL_H +-#define OPENSSL_HEADER_DES_INTERNAL_H +- +-#include +- +-#include "../../internal.h" +- +-#if defined(__cplusplus) +-extern "C" { +-#endif +- +- +-#define c2l(c, l) \ +- do { \ +- (l) = ((uint32_t)(*((c)++))); \ +- (l) |= ((uint32_t)(*((c)++))) << 8L; \ +- (l) |= ((uint32_t)(*((c)++))) << 16L; \ +- (l) |= ((uint32_t)(*((c)++))) << 24L; \ +- } while (0) +- +-#define l2c(l, c) \ +- do { \ +- *((c)++) = (unsigned char)(((l)) & 0xff); \ +- *((c)++) = (unsigned char)(((l) >> 8L) & 0xff); \ +- *((c)++) = (unsigned char)(((l) >> 16L) & 0xff); \ +- *((c)++) = (unsigned char)(((l) >> 24L) & 0xff); \ +- } while (0) +- +-// NOTE - c is not incremented as per c2l +-#define c2ln(c, l1, l2, n) \ +- do { \ +- (c) += (n); \ +- (l1) = (l2) = 0; \ +- switch (n) { \ +- case 8: \ +- (l2) = ((uint32_t)(*(--(c)))) << 24L; \ +- OPENSSL_FALLTHROUGH; \ +- case 7: \ +- (l2) |= ((uint32_t)(*(--(c)))) << 16L; \ +- OPENSSL_FALLTHROUGH; \ +- case 6: \ +- (l2) |= ((uint32_t)(*(--(c)))) << 8L; \ +- OPENSSL_FALLTHROUGH; \ +- case 5: \ +- (l2) |= ((uint32_t)(*(--(c)))); \ +- OPENSSL_FALLTHROUGH; \ +- case 4: \ +- (l1) = ((uint32_t)(*(--(c)))) << 24L; \ +- OPENSSL_FALLTHROUGH; \ +- case 3: \ +- (l1) |= ((uint32_t)(*(--(c)))) << 16L; \ +- OPENSSL_FALLTHROUGH; \ +- case 2: \ +- (l1) |= ((uint32_t)(*(--(c)))) << 8L; \ +- OPENSSL_FALLTHROUGH; \ +- case 1: \ +- (l1) |= ((uint32_t)(*(--(c)))); \ +- } \ +- } while (0) +- +-// NOTE - c is not incremented as per l2c +-#define l2cn(l1, l2, c, n) \ +- do { \ +- (c) += (n); \ +- switch (n) { \ +- case 8: \ +- *(--(c)) = (unsigned char)(((l2) >> 24L) & 0xff); \ +- OPENSSL_FALLTHROUGH; \ +- case 7: \ +- *(--(c)) = (unsigned char)(((l2) >> 16L) & 0xff); \ +- OPENSSL_FALLTHROUGH; \ +- case 6: \ +- *(--(c)) = (unsigned char)(((l2) >> 8L) & 0xff); \ +- OPENSSL_FALLTHROUGH; \ +- case 5: \ +- *(--(c)) = (unsigned char)(((l2)) & 0xff); \ +- OPENSSL_FALLTHROUGH; \ +- case 4: \ +- *(--(c)) = (unsigned char)(((l1) >> 24L) & 0xff); \ +- OPENSSL_FALLTHROUGH; \ +- case 3: \ +- *(--(c)) = (unsigned char)(((l1) >> 16L) & 0xff); \ +- OPENSSL_FALLTHROUGH; \ +- case 2: \ +- *(--(c)) = (unsigned char)(((l1) >> 8L) & 0xff); \ +- OPENSSL_FALLTHROUGH; \ +- case 1: \ +- *(--(c)) = (unsigned char)(((l1)) & 0xff); \ +- } \ +- } while (0) +- +-/* IP and FP +- * The problem is more of a geometric problem that random bit fiddling. +- 0 1 2 3 4 5 6 7 62 54 46 38 30 22 14 6 +- 8 9 10 11 12 13 14 15 60 52 44 36 28 20 12 4 +-16 17 18 19 20 21 22 23 58 50 42 34 26 18 10 2 +-24 25 26 27 28 29 30 31 to 56 48 40 32 24 16 8 0 +- +-32 33 34 35 36 37 38 39 63 55 47 39 31 23 15 7 +-40 41 42 43 44 45 46 47 61 53 45 37 29 21 13 5 +-48 49 50 51 52 53 54 55 59 51 43 35 27 19 11 3 +-56 57 58 59 60 61 62 63 57 49 41 33 25 17 9 1 +- +-The output has been subject to swaps of the form +-0 1 -> 3 1 but the odd and even bits have been put into +-2 3 2 0 +-different words. The main trick is to remember that +-t=((l>>size)^r)&(mask); +-r^=t; +-l^=(t<> (n)) ^ (b)) & (m)); \ +- (b) ^= (t); \ +- (a) ^= ((t) << (n)); \ +- } while (0) +- +-#define IP(l, r) \ +- do { \ +- uint32_t tt; \ +- PERM_OP(r, l, tt, 4, 0x0f0f0f0fL); \ +- PERM_OP(l, r, tt, 16, 0x0000ffffL); \ +- PERM_OP(r, l, tt, 2, 0x33333333L); \ +- PERM_OP(l, r, tt, 8, 0x00ff00ffL); \ +- PERM_OP(r, l, tt, 1, 0x55555555L); \ +- } while (0) +- +-#define FP(l, r) \ +- do { \ +- uint32_t tt; \ +- PERM_OP(l, r, tt, 1, 0x55555555L); \ +- PERM_OP(r, l, tt, 8, 0x00ff00ffL); \ +- PERM_OP(l, r, tt, 2, 0x33333333L); \ +- PERM_OP(r, l, tt, 16, 0x0000ffffL); \ +- PERM_OP(l, r, tt, 4, 0x0f0f0f0fL); \ +- } while (0) +- +-#define LOAD_DATA(ks, R, S, u, t, E0, E1) \ +- do { \ +- (u) = (R) ^ (ks)->subkeys[S][0]; \ +- (t) = (R) ^ (ks)->subkeys[S][1]; \ +- } while (0) +- +-#define D_ENCRYPT(ks, LL, R, S) \ +- do { \ +- LOAD_DATA(ks, R, S, u, t, E0, E1); \ +- t = CRYPTO_rotr_u32(t, 4); \ +- (LL) ^= \ +- DES_SPtrans[0][(u >> 2L) & 0x3f] ^ DES_SPtrans[2][(u >> 10L) & 0x3f] ^ \ +- DES_SPtrans[4][(u >> 18L) & 0x3f] ^ \ +- DES_SPtrans[6][(u >> 26L) & 0x3f] ^ DES_SPtrans[1][(t >> 2L) & 0x3f] ^ \ +- DES_SPtrans[3][(t >> 10L) & 0x3f] ^ \ +- DES_SPtrans[5][(t >> 18L) & 0x3f] ^ DES_SPtrans[7][(t >> 26L) & 0x3f]; \ +- } while (0) +- +-#define ITERATIONS 16 +-#define HALF_ITERATIONS 8 +- +- +-#if defined(__cplusplus) +-} // extern C +-#endif +- +-#endif // OPENSSL_HEADER_DES_INTERNAL_H +diff --git a/src/crypto/fipsmodule/dh/dh.c b/src/crypto/fipsmodule/dh/dh.c +index ab596e9..b59afc6 100644 +--- a/src/crypto/fipsmodule/dh/dh.c ++++ b/src/crypto/fipsmodule/dh/dh.c +@@ -64,6 +64,7 @@ + #include + #include + ++#include "internal.h" + #include "../../internal.h" + #include "../bn/internal.h" + +@@ -186,6 +187,8 @@ int DH_set_length(DH *dh, unsigned priv_length) { + } + + int DH_generate_key(DH *dh) { ++ boringssl_ensure_ffdh_self_test(); ++ + int ok = 0; + int generate_new_key = 0; + BN_CTX *ctx = NULL; +@@ -322,7 +325,8 @@ static int dh_compute_key(DH *dh, BIGNUM *out_shared_key, + return ret; + } + +-int DH_compute_key_padded(unsigned char *out, const BIGNUM *peers_key, DH *dh) { ++int dh_compute_key_padded_no_self_test(unsigned char *out, ++ const BIGNUM *peers_key, DH *dh) { + BN_CTX *ctx = BN_CTX_new(); + if (ctx == NULL) { + return -1; +@@ -343,7 +347,15 @@ int DH_compute_key_padded(unsigned char *out, const BIGNUM *peers_key, DH *dh) { + return ret; + } + ++int DH_compute_key_padded(unsigned char *out, const BIGNUM *peers_key, DH *dh) { ++ boringssl_ensure_ffdh_self_test(); ++ ++ return dh_compute_key_padded_no_self_test(out, peers_key, dh); ++} ++ + int DH_compute_key(unsigned char *out, const BIGNUM *peers_key, DH *dh) { ++ boringssl_ensure_ffdh_self_test(); ++ + BN_CTX *ctx = BN_CTX_new(); + if (ctx == NULL) { + return -1; +diff --git a/src/crypto/fipsmodule/dh/internal.h b/src/crypto/fipsmodule/dh/internal.h +new file mode 100644 +index 0000000..c40172d +--- /dev/null ++++ b/src/crypto/fipsmodule/dh/internal.h +@@ -0,0 +1,36 @@ ++/* Copyright (c) 2022, Google Inc. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY ++ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION ++ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN ++ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ ++ ++#ifndef OPENSSL_HEADER_CRYPTO_FIPSMODULE_DH_INTERNAL_H ++#define OPENSSL_HEADER_CRYPTO_FIPSMODULE_DH_INTERNAL_H ++ ++#include ++ ++#if defined(__cplusplus) ++extern "C" { ++#endif ++ ++ ++// dh_compute_key_padded_no_self_test does the same as |DH_compute_key_padded|, ++// but doesn't try to run the self-test first. This is for use in the self tests ++// themselves, to prevent an infinite loop. ++int dh_compute_key_padded_no_self_test(unsigned char *out, ++ const BIGNUM *peers_key, DH *dh); ++ ++ ++#if defined(__cplusplus) ++} ++#endif ++ ++#endif // OPENSSL_HEADER_CRYPTO_FIPSMODULE_DH_INTERNAL_H +diff --git a/src/crypto/fipsmodule/ec/ec.c b/src/crypto/fipsmodule/ec/ec.c +index 1f03e15..93fdcfc 100644 +--- a/src/crypto/fipsmodule/ec/ec.c ++++ b/src/crypto/fipsmodule/ec/ec.c +@@ -943,8 +943,9 @@ static int arbitrary_bignum_to_scalar(const EC_GROUP *group, EC_SCALAR *out, + return ok; + } + +-int EC_POINT_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar, +- const EC_POINT *p, const BIGNUM *p_scalar, BN_CTX *ctx) { ++int ec_point_mul_no_self_test(const EC_GROUP *group, EC_POINT *r, ++ const BIGNUM *g_scalar, const EC_POINT *p, ++ const BIGNUM *p_scalar, BN_CTX *ctx) { + // Previously, this function set |r| to the point at infinity if there was + // nothing to multiply. But, nobody should be calling this function with + // nothing to multiply in the first place. +@@ -1010,6 +1011,13 @@ err: + return ret; + } + ++int EC_POINT_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar, ++ const EC_POINT *p, const BIGNUM *p_scalar, BN_CTX *ctx) { ++ boringssl_ensure_ecc_self_test(); ++ ++ return ec_point_mul_no_self_test(group, r, g_scalar, p, p_scalar, ctx); ++} ++ + int ec_point_mul_scalar_public(const EC_GROUP *group, EC_RAW_POINT *r, + const EC_SCALAR *g_scalar, const EC_RAW_POINT *p, + const EC_SCALAR *p_scalar) { +diff --git a/src/crypto/fipsmodule/ec/ec_key.c b/src/crypto/fipsmodule/ec/ec_key.c +index 7a6daab..d7acf96 100644 +--- a/src/crypto/fipsmodule/ec/ec_key.c ++++ b/src/crypto/fipsmodule/ec/ec_key.c +@@ -339,9 +339,9 @@ int EC_KEY_check_fips(const EC_KEY *key) { + if (key->priv_key) { + uint8_t data[16] = {0}; + ECDSA_SIG *sig = ECDSA_do_sign(data, sizeof(data), key); +-#if defined(BORINGSSL_FIPS_BREAK_ECDSA_PWCT) +- data[0] = ~data[0]; +-#endif ++ if (boringssl_fips_break_test("ECDSA_PWCT")) { ++ data[0] = ~data[0]; ++ } + int ok = sig != NULL && + ECDSA_do_verify(data, sizeof(data), sig, key); + ECDSA_SIG_free(sig); +@@ -439,6 +439,8 @@ int EC_KEY_generate_key(EC_KEY *key) { + } + + int EC_KEY_generate_key_fips(EC_KEY *eckey) { ++ boringssl_ensure_ecc_self_test(); ++ + if (EC_KEY_generate_key(eckey) && EC_KEY_check_fips(eckey)) { + return 1; + } +diff --git a/src/crypto/fipsmodule/ec/internal.h b/src/crypto/fipsmodule/ec/internal.h +index 289c3aa..488adb8 100644 +--- a/src/crypto/fipsmodule/ec/internal.h ++++ b/src/crypto/fipsmodule/ec/internal.h +@@ -301,6 +301,13 @@ int ec_jacobian_to_affine_batch(const EC_GROUP *group, EC_AFFINE *out, + int ec_point_set_affine_coordinates(const EC_GROUP *group, EC_AFFINE *out, + const EC_FELEM *x, const EC_FELEM *y); + ++// ec_point_mul_no_self_test does the same as |EC_POINT_mul|, but doesn't try to ++// run the self-test first. This is for use in the self tests themselves, to ++// prevent an infinite loop. ++int ec_point_mul_no_self_test(const EC_GROUP *group, EC_POINT *r, ++ const BIGNUM *g_scalar, const EC_POINT *p, ++ const BIGNUM *p_scalar, BN_CTX *ctx); ++ + // ec_point_mul_scalar sets |r| to |p| * |scalar|. Both inputs are considered + // secret. + int ec_point_mul_scalar(const EC_GROUP *group, EC_RAW_POINT *r, +diff --git a/src/crypto/fipsmodule/ec/p256-x86_64.c b/src/crypto/fipsmodule/ec/p256-x86_64.c +index 99deb36..506b7d2 100644 +--- a/src/crypto/fipsmodule/ec/p256-x86_64.c ++++ b/src/crypto/fipsmodule/ec/p256-x86_64.c +@@ -554,7 +554,7 @@ static void ecp_nistz256_inv0_mod_ord(const EC_GROUP *group, EC_SCALAR *out, + static int ecp_nistz256_scalar_to_montgomery_inv_vartime(const EC_GROUP *group, + EC_SCALAR *out, + const EC_SCALAR *in) { +- if ((OPENSSL_ia32cap_get()[1] & (1 << 28)) == 0) { ++ if (!CRYPTO_is_AVX_capable()) { + // No AVX support; fallback to generic code. + return ec_simple_scalar_to_montgomery_inv_vartime(group, out, in); + } +diff --git a/src/crypto/fipsmodule/ec/p256-x86_64_test.cc b/src/crypto/fipsmodule/ec/p256-x86_64_test.cc +index a083f3d..f6f070a 100644 +--- a/src/crypto/fipsmodule/ec/p256-x86_64_test.cc ++++ b/src/crypto/fipsmodule/ec/p256-x86_64_test.cc +@@ -98,7 +98,7 @@ TEST(P256_X86_64Test, SelectW7) { + } + + TEST(P256_X86_64Test, BEEU) { +- if ((OPENSSL_ia32cap_P[1] & (1 << 28)) == 0) { ++ if (!CRYPTO_is_AVX_capable()) { + // No AVX support; cannot run the BEEU code. + return; + } +diff --git a/src/crypto/fipsmodule/ecdh/ecdh.c b/src/crypto/fipsmodule/ecdh/ecdh.c +index 4e6d0bf..36fbadc 100644 +--- a/src/crypto/fipsmodule/ecdh/ecdh.c ++++ b/src/crypto/fipsmodule/ecdh/ecdh.c +@@ -75,10 +75,13 @@ + #include + + #include "../ec/internal.h" ++#include "../../internal.h" + + + int ECDH_compute_key_fips(uint8_t *out, size_t out_len, const EC_POINT *pub_key, + const EC_KEY *priv_key) { ++ boringssl_ensure_ecc_self_test(); ++ + if (priv_key->priv_key == NULL) { + OPENSSL_PUT_ERROR(ECDH, ECDH_R_NO_PRIVATE_VALUE); + return 0; +diff --git a/src/crypto/fipsmodule/ecdsa/ecdsa.c b/src/crypto/fipsmodule/ecdsa/ecdsa.c +index 5d99903..db0c6e5 100644 +--- a/src/crypto/fipsmodule/ecdsa/ecdsa.c ++++ b/src/crypto/fipsmodule/ecdsa/ecdsa.c +@@ -151,8 +151,8 @@ int ECDSA_SIG_set0(ECDSA_SIG *sig, BIGNUM *r, BIGNUM *s) { + return 1; + } + +-int ECDSA_do_verify(const uint8_t *digest, size_t digest_len, +- const ECDSA_SIG *sig, const EC_KEY *eckey) { ++int ecdsa_do_verify_no_self_test(const uint8_t *digest, size_t digest_len, ++ const ECDSA_SIG *sig, const EC_KEY *eckey) { + const EC_GROUP *group = EC_KEY_get0_group(eckey); + const EC_POINT *pub_key = EC_KEY_get0_public_key(eckey); + if (group == NULL || pub_key == NULL || sig == NULL) { +@@ -198,6 +198,13 @@ int ECDSA_do_verify(const uint8_t *digest, size_t digest_len, + return 1; + } + ++int ECDSA_do_verify(const uint8_t *digest, size_t digest_len, ++ const ECDSA_SIG *sig, const EC_KEY *eckey) { ++ boringssl_ensure_ecc_self_test(); ++ ++ return ecdsa_do_verify_no_self_test(digest, digest_len, sig, eckey); ++} ++ + static ECDSA_SIG *ecdsa_sign_impl(const EC_GROUP *group, int *out_retry, + const EC_SCALAR *priv_key, const EC_SCALAR *k, + const uint8_t *digest, size_t digest_len) { +@@ -292,12 +299,16 @@ ECDSA_SIG *ecdsa_sign_with_nonce_for_known_answer_test(const uint8_t *digest, + ECDSA_SIG *ECDSA_sign_with_nonce_and_leak_private_key_for_testing( + const uint8_t *digest, size_t digest_len, const EC_KEY *eckey, + const uint8_t *nonce, size_t nonce_len) { ++ boringssl_ensure_ecc_self_test(); ++ + return ecdsa_sign_with_nonce_for_known_answer_test(digest, digest_len, eckey, + nonce, nonce_len); + } + + ECDSA_SIG *ECDSA_do_sign(const uint8_t *digest, size_t digest_len, + const EC_KEY *eckey) { ++ boringssl_ensure_ecc_self_test(); ++ + if (eckey->ecdsa_meth && eckey->ecdsa_meth->sign) { + OPENSSL_PUT_ERROR(ECDSA, ECDSA_R_NOT_IMPLEMENTED); + return NULL; +diff --git a/src/crypto/fipsmodule/ecdsa/internal.h b/src/crypto/fipsmodule/ecdsa/internal.h +index 5115dfa..645959f 100644 +--- a/src/crypto/fipsmodule/ecdsa/internal.h ++++ b/src/crypto/fipsmodule/ecdsa/internal.h +@@ -31,6 +31,12 @@ ECDSA_SIG *ecdsa_sign_with_nonce_for_known_answer_test(const uint8_t *digest, + const uint8_t *nonce, + size_t nonce_len); + ++// ecdsa_do_verify_no_self_test does the same as |ECDSA_do_verify|, but doesn't ++// try to run the self-test first. This is for use in the self tests themselves, ++// to prevent an infinite loop. ++int ecdsa_do_verify_no_self_test(const uint8_t *digest, size_t digest_len, ++ const ECDSA_SIG *sig, const EC_KEY *eckey); ++ + + #if defined(__cplusplus) + } +diff --git a/src/crypto/fipsmodule/modes/gcm.c b/src/crypto/fipsmodule/modes/gcm.c +index 28218b4..5b909aa 100644 +--- a/src/crypto/fipsmodule/modes/gcm.c ++++ b/src/crypto/fipsmodule/modes/gcm.c +@@ -152,7 +152,7 @@ void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, + + #if defined(GHASH_ASM_X86_64) + if (crypto_gcm_clmul_enabled()) { +- if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { // AVX+MOVBE ++ if (CRYPTO_is_AVX_capable() && CRYPTO_is_MOVBE_capable()) { + gcm_init_avx(out_table, H.u); + *out_mult = gcm_gmult_avx; + *out_hash = gcm_ghash_avx; +@@ -164,7 +164,7 @@ void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, + *out_hash = gcm_ghash_clmul; + return; + } +- if (gcm_ssse3_capable()) { ++ if (CRYPTO_is_SSSE3_capable()) { + gcm_init_ssse3(out_table, H.u); + *out_mult = gcm_gmult_ssse3; + *out_hash = gcm_ghash_ssse3; +@@ -177,7 +177,7 @@ void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash, + *out_hash = gcm_ghash_clmul; + return; + } +- if (gcm_ssse3_capable()) { ++ if (CRYPTO_is_SSSE3_capable()) { + gcm_init_ssse3(out_table, H.u); + *out_mult = gcm_gmult_ssse3; + *out_hash = gcm_ghash_ssse3; +@@ -722,9 +722,7 @@ void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) { + #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) + int crypto_gcm_clmul_enabled(void) { + #if defined(GHASH_ASM_X86) || defined(GHASH_ASM_X86_64) +- const uint32_t *ia32cap = OPENSSL_ia32cap_get(); +- return (ia32cap[0] & (1 << 24)) && // check FXSR bit +- (ia32cap[1] & (1 << 1)); // check PCLMULQDQ bit ++ return CRYPTO_is_FXSR_capable() && CRYPTO_is_PCLMUL_capable(); + #else + return 0; + #endif +diff --git a/src/crypto/fipsmodule/modes/gcm_test.cc b/src/crypto/fipsmodule/modes/gcm_test.cc +index 539b764..d66d8ae 100644 +--- a/src/crypto/fipsmodule/modes/gcm_test.cc ++++ b/src/crypto/fipsmodule/modes/gcm_test.cc +@@ -136,7 +136,7 @@ TEST(GCMTest, ABI) { + + alignas(16) u128 Htable[16]; + #if defined(GHASH_ASM_X86) || defined(GHASH_ASM_X86_64) +- if (gcm_ssse3_capable()) { ++ if (CRYPTO_is_SSSE3_capable()) { + CHECK_ABI_SEH(gcm_init_ssse3, Htable, kH); + CHECK_ABI_SEH(gcm_gmult_ssse3, X, Htable); + for (size_t blocks : kBlockCounts) { +@@ -152,7 +152,7 @@ TEST(GCMTest, ABI) { + } + + #if defined(GHASH_ASM_X86_64) +- if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { // AVX+MOVBE ++ if (CRYPTO_is_AVX_capable() && CRYPTO_is_MOVBE_capable()) { + CHECK_ABI_SEH(gcm_init_avx, Htable, kH); + CHECK_ABI_SEH(gcm_gmult_avx, X, Htable); + for (size_t blocks : kBlockCounts) { +diff --git a/src/crypto/fipsmodule/modes/internal.h b/src/crypto/fipsmodule/modes/internal.h +index f022f9b..0164aac 100644 +--- a/src/crypto/fipsmodule/modes/internal.h ++++ b/src/crypto/fipsmodule/modes/internal.h +@@ -253,10 +253,6 @@ void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]); + void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp, + size_t len); + +-OPENSSL_INLINE char gcm_ssse3_capable(void) { +- return (OPENSSL_ia32cap_get()[1] & (1 << (41 - 32))) != 0; +-} +- + // |gcm_gmult_ssse3| and |gcm_ghash_ssse3| require |Htable| to be + // 16-byte-aligned, but |gcm_init_ssse3| does not. + void gcm_init_ssse3(u128 Htable[16], const uint64_t Xi[2]); +diff --git a/src/crypto/fipsmodule/policydocs/BoringCrypto-Android-Security-Policy-20210319.docx b/src/crypto/fipsmodule/policydocs/BoringCrypto-Android-Security-Policy-20210319.docx +new file mode 100644 +index 0000000..17fcd25 +--- /dev/null ++++ b/src/crypto/fipsmodule/policydocs/BoringCrypto-Android-Security-Policy-20210319.docx +@@ -0,0 +1,540 @@ ++PK!/Û%¤ã[Content_Types].xml ¢( ´•ËNÃ0E÷HüCä-j\X „š²à±$@bëړÖÂ/ÙӖþ=ã>"„RRQ²‰{î½Çc%3ºù´¦X@LÚ»Š—CV€“^i7­ØÛëÃàŠ …SÂx[Ab7ãӓÑë*@*HíRÅfˆášó$g`E*}G;µV ½Æ)B~ˆ)ð‹áð’Kï0{°ñèj17XÜÒò†$¸)+n7u9ªbÚf}^筊&ýˆŒ–iŸ/œúÁ5Ø2•¤\פ™éŒ ++ö$äý[Ý53jųˆø(,Uñ¥Š+/疔åï6-œ¾®µ„FŸÝBôR¢[²¦lv¬ÐnÇ߯!ç ½}·†kû}HçGã4¦Ù"jhz¸·nn'‰þÿ›ÑXwB$\HÿO°ñíŽDô°uîDXÂä¥7Šoæ µ÷è<öqu'8ÕÃÎù >@<þ›líÄò)OL ôA°µî„@"°y߉µÍo‘T¹þÒPŠ8ön†dõ ôlÉúèóAO ++TK6_èñÿÿPK!‘·ïN _rels/.rels ¢( ¬’ÁjÃ0 @ïƒýƒÑ½QÚÁ£N/cÐÛÙ[ILÛØj×þý<ØØ]éaGËÒӓÐzsœFuà”]ð–U н Öù^Ã[û¼x•…¼¥1xÖp⠛æöfýÊ#I)ʃ‹YŠÏ‘øˆ˜ÍÀå*Döå§ i")ÏÔc$³£žqU×÷˜~3 ™1ÕÖjH[{ª=E¾†ºÎ~ ++f?±—3-ÂÞ²]ÄTꓸ2j)õ,l0/%œ‘b¬ ++ð¼Ñêz£¿§Å‰…, ¡ ‰/û|f\ZþçŠæ?6ï!Y´_áoœ]AóÿÿPK!$R‘”–word/_rels/document.xml.rels ¢( ¼•OOã0Åï+ñ"ß±›ògDʅ]‰—¥+quib#{ZÚoÏ@Eš²`q°8ú%~ï§ç‰su½q]±†­ÇJ”r" ++@ãk‹M%þÍÿÿE$µî삱-Q©T4-8¥ïùÉÒ§‰—¡Q½6º5LÎU{ˆÙgq[W"Ü֜?ßöðo¿\Z7Þ¬ }¡¬ãl6Ô¡ª„ƒÚêXÊ¡>f(OrBïA¼.wb)Ùì3ˆ¬ ‘¶Ÿä±[§â挬ÑÓàMI!”Óœ K4׋ntƒ”¢È +++·€ÀÛb’Uä„0«HÞ=pÚ!å^U–À%Gó<ï¹xz7ƒ”¬$k'/‘ $[8˙ÿ‹{ â1õ0“ML¾çΜ¦îÌÓ¬×ÕU¼)©.r"´ì:‹{†ÛȾ&#ÑF’_«&ø¿?¿SÆ­{e±†liÿyÝùš¡~ox P¿Ò«ƒ¿éìÿÿPK!¸õbwÖÑê4 word/document.xmlì}ënãÆ—ç÷öˆ^`‘,â6ëJÒóOxí4¶“xÚNf±0 )Úbš"5$eÇù”w˜ý8óry’­*RYºX”dY²«ƒX/u9çÔ:çÔ?þùQªÝÇE™äÙ÷ïÀ{ýgQ>H²»ïßýrœ™ï´² ++³A˜æYüý»Ç¸|÷ÏþûûÇÃÅ &£8«4VDV^<Œ£ïß «j|q~^FÃx–ïGITäe~[½òÑy~{›DñùC^ Ρtñm\äQ\–¬>7ÌîÃò]S\ôÇf¥ Šð½Ì ÄçÑ0,ªøy w!äÜ:7å‚F‹]ËÇqÆnÞæÅ(¬ØÏâî|_'ã3Vî8¬’›$MªGV¤N§Åäß¿›ÙESÄÙ¬)ü•‹º)ÍÇôb“zëW¼†¢Æó"NYò¬&ãMGۖÆn§…ܯëÄý(>÷0x7ðj®Ì ܤù +GiÝòõ%}Žð"fol҄nÓ–ŒÂ$›W¼iZĤ_p¡ZÆýŠ Mçåãh>4Æw»qùc‘OÆóҒÝJû”}•ÅñªGY´´%¸Ü­1WÃp̆ò(ºøt—åEx“²1ÞkŒ}šà€ÆGÉ» MoòÁ#ÿ³øbá§Á÷ïiú¶îwâ*C¤Š_5šìêCîÁ—ïßéºçQ³K^|NÒjñÎe뒨ð²WÕcÊÚsq¦ß¿»Ïäþœß+êGŠ Ϫ’=1º¸ašÜ à¼yâ|VØMžåhȊ)*öB­ódሑàß>æN}­ Ÿ>ëgƒÙ“u¡ãEzè¸Øðü“¢Çò®xµìWÐŒM9z]Ñ)0LÇ ]!:ñ15ék0>ê!y \Á6…6%Îkè ++±°«î+èŠAm xH0dä0”~¾®\'U×ý(ÇaÄlv'¼­bfø(aﲟ=Þz¸¨>|Ìó»4旪ú†x±Ýiè20ÑE?z”ûöù³»®Xl‚€©žÅ¾ï¹ ++ü<mÛì2”jˆ…)²C0q¿/C{³Žù­_p ++Ucf'•ã8M… ՔؓPN^ðyuñ8®r‰ Ëëb6ØV5qƒY0+h\Äe\ÜÇï>hv6(òd°» å8„êœÖ/%ë@`Ï÷ÝÂ]ª¯¼Ÿ.¯4€õ3¨]ÅѤHªGí2O“èqRÓ0r¤wL,4 €kJªúÔw ½y ‹æÒZöáAž½úç´<ˆW°{ÚÜˢՇ ++ç\¿b³Ý‡°X«$Žîµ+Àe]s¨K|º‘^Yßõ_7¨äyˆ^¯ÈÉ/º4_>ÈÖm6θŠm+^ö‡çÍåE”bħò ƒÌtÙH‘ö˜YÓÒ­ ++¸HCÑLq-ÎÎ\[ƒA4ˆŒ››ˆÆÄ ++#@F^#´€[Œ}ÀŒ”hkP¡HÖ: ,zîZ|®{Šäõ·g­ö$·Zd37âIÓÒç^XÅÒ@¦Šž«úÃ"®­~‰l‹°ÖÕbÃðÍgêäw‡ ï¨C°Á¸¦Á¦èàË(n8r™mSMʺ+‰XMã[V ¡[¼¬Û¤(«Ï _çæW–B\XFIòý»_²„ïòiØÐÜ<ÄY´ÏÉݰâ í¬\ÿ(ûfÚ¶f€Þ¸¥øŒò4ç,÷ÂI•/¿y¡üÓåmê\›L/ðó侜1ÝsÐÁ-rŘ'ƒMǰ܀(Æc ÝÔl3Řg Å6 ®¥F̱1xÔr ÷à«õŠ1O0Æp×òðÁwOcž`Œîù´VŒ96(²}Ä)Ñ^˜AÔD:í.Ì(ÆTù[f`ø¾RþGg.ë>–¯Ýt¼÷cöÏ‚ldPÒ1Ør‡*ؗš`O'¦ìFì8Ò›bÌ!ƒj‘À“1qàírÍ£óRke¦OM(ì»‚@AÙË1ØÈó©ìï€ \ÛáëŠ1/ÄÝÐ]Oì·@ttuŒ‰˜·À˜æN‹ K x=Œ¹¿ú(ü=/~àĚQm靨¬Z—dÐìBu};rÅÃ.¿ÇÈEqVqS³¾|5 ¬ 'iŠà"Mëk2ù:þ?Y~É]¾Ä»Ò¶ó/Wí×.šöu|fѐµ}”Õ5ñ͙}û̇FzóóËüg™ŒÆi|™—âÙ:é>þ!æbň@%¤Ì„»‰‡Lƽ<¦yô5Ô_ÃÇ|R}ÊÜ8eíd¤&&œâ^˜¦ùÃÏLjÓp,îñ8Ц±BßA‡:½üFN%||1Î˄‡Oý0kyPä#ÎØâ.Éê×Ù3?ßޖqõá X:ÑÛ§?ër:¥þ*•ÊEû®ÇC¹`Ê]ªÖ•û«x‡ÇC戅åô9/€dÚ§øö6Ž*¿~VŒfÆñ÷fÞû֖Ÿò,® òè²Ðnw¿ÓêŽdÞżgwÍ+ÑO÷y’((Ø#—»^ˆN%QOn¿%¢¦ZEyaj“"Ù¢¨qU“"f¥±oãY³Ø·KËî/“ˆþƒÑ¢!˜¾‚`Ó§ø;Ú¸ˆoãâK#_â2ù3nxÁké–}“&ã ISNþ]+.âÑ 1Lî…{DxQÑÆéú{UÄU4ä_oÙkÍõóÖ QǼXþ«ø^üq[Œøg~{+ ‘ËÜZ;Ÿ¿>fêêcœ4þ…5”µA°4¼ÿ\6­™>Â/§Ù´çu;ÄWö¿x°%íß\Ù8ç¤c„\3j¹"‰ªMÕðƒ±í Íïi)õP«AEàã 9VÎö†C{íD;f\žbé*ýE ò¡O%?J)20æÞ“ÏdXü‡<ª,·â°¬ì2 §z-ɦzm©fYà%”WàuG„`j@8­oëJ¹ºbš%f†I.…"¿ŠR«Ÿ²ªÈ“ˆƒ!¥å4²X&ڲʪ >_nk[>ñ- Ï}ÔÖsÅ ++Ï·¥\áb"Þ¨½”š‡E7–»Á1ër˜jŸ²‡XµËYp¡6u|-µË ¿Qý@í¾û÷_ÿoîÂû%þ÷IRÄ<œ·ÔXQZí>çýÒ¸ÔÊq%· ûVÈ/DFÍ UΔ·6ˆÇiþÈ,Æ$ÓBViÆ5Ó}¬ÝL*í—,bÌ+y©-Îî“"Ïx©ï5Ærí'ÑbÑÁ²JªIkùm«[ìS»Ž£aÆL¤»G훟>]]+®JœžQ–Ó5@„Â9{jbc¨›Ä¨Ù0ª¶ Ô¸êƒfl´„™æ²ÖqMµÇ›¸˜Qu—&¬riýÆuÝ«o7(y]!«^_”ºËä@û5L“ÁLØØÍk׏¿^~«“LcÆ©6ugx.î7Üüõ³}©1;¶àÖVÉāIµÌÚ¨bÆ`Fï4¼b3Ž .Òó’šþI”ÏkÓîëf01›ŠÛ(ŽE!³—šGXKßO[ÍŰ…T1+ÿŽ bÆ+ ›RØ­°âL“°¸@ñúù×;fn”U·—cw\$áûe—ÈŽ´uC^Ë » #]°Ø Œ|ø1g²š´à‚õ4\'O6SØæZº“”.kyÍ|Áœ‡ø†òX «WÈ!S„Ešd_™eÂ1—™%bÒ:dӃ¼à6B·¼€êМïtÔ#4AgÈ]}6­H»Ü”+™-1;è}ƪ}—ߟßñ0óòüêúÇóht?>çâùÇûa5Je¶Îzðµ7×ÁX—ô ++%Ä´‚õQW¸Æ€\Ãv¤¤Õº'M€õº\Ô¸ Ë›©gCgû†MϘL0›f‹xRO'ªËB€Lßv¡èQ›R>4‰+Â%·:-¢Ô _.ç×¼Yž1PÏñ()ÊuɵkѽW¹5oH18B I¯?2Ögu<[;‚ÃV•Ù# l}ϵ[f·k½ÂWºþìö!:ëû}2ˆ¹õ¡å÷ìZ?p³„c߸65…Ƶar77|-ôðûwv‘„iÏNqÁïû èswˆ<z©Á½( ++¢…$XôXEž/í±@êP×ÄÚ¼R[( ++ÑrBŠ*Eы^ËΘaj~±½Ú JmHbf«Ú€t© ¥“®tÏE6°Ôüb[µ!ÈtBj)µÑ‹^KCկꌵ¿™R[«C©IÜÌcUÔ öeõ`XðNؓúhœ ŸR¦ù:ԇ ÜáV×7RÝɞ”Á“Ô;è'Ÿð…µ‰Xî¯Å}Sá¾$bG‹û:²}âiÓQ×®ˆPӆ®ôm†ûO§¦9.3¿Jaôb…:bïgVƒš&ì¦.,¥.$ñZ{DæKª d¹"ÁT'D7·•Y©‹^ê‰ü9'¤.ˆR;Ì0šU¦Ë¼¨êHÚO<Äë6T¾P³'úk¤+-"IÝÑjJ]ßvä0Ÿ3¯´È–Zä {{DEª´H/z}ÉÓ¸üN³'ՐGÈ7‰™¹¹âù•ú˜=±…úP1“²ú<ýqîuÓõ,9JÈF0P‹VÛꏃDUî ¥?zÑërøX2­‘n”k^©‹µêB®.¨‹£ ½@Ȃ–O„À#ÐsfyZ”ºè©.ÚºGø3•ºèE¯ÙÖÓþüŒ¥4¶V*˜øt”„~`ÖɐÚþ´Èö¨.’ (¥Ñ¿Í”ÆA÷‚–R½èÕÝé°Ó»œM4†£RûŸáhüOÚÿ޵Ã,¼‹•*ÙI•¨ÈïUÂ4=ÎMs‹¾#NÝn/WY‚>èžÅ½“*y[¾µè ¡ß=•…ò­•HbÇE~¤#¯æŠA逭u€ ++ê>€ŸØ';N¹˜¸ºÕ=…Né€ÍuÀA¢º{ꀞSÚ7 Ò4P*à9T€ ++Ð^PøhUu½@×¥P σ@³Jl¡¡ÝSô4C^¿ ++ø)ÏÎÔT`ʶÐ*Òútô%†ïCWJ𤠁£B­·ÕÇjm½ï)ƒo-ÔZíì þUÀõü“c…¸ð þ 4txj%h[ø?HÄuOøï)ƒ¯þ/'7iûÊØßíU¼ôé =1<Ý L9¿ ˆo{ÂF¡}´Ç ˜î‰ö=3 ++¿~´çæýÇ8S 2:õ|¬B›ÿhÓwë¸>–Ïó.6·†-ø[þ%Ýð{æ~€Å8ÞÅ ++í·F{‰|BhO™}onÈ·s<Û¡ÐR ++í·@ûŸïºÚ÷Láø6ÐþÿÆEžü©ìûÝ_Ÿâϱ|“#TۋÇô‘§;{Dü ãÂ(}ˆjÁÄ@ÿôÁ v§·gU\Vjå{E¡ˆÅÑÈCq- ùŽnKQÔ$Ô¯ÕÔ` E!öLŽjjÀT ++û’hr™?ÄÅÙϙ&`ÿZÁþN°¯‚}OöÛHd¡koø:ÈÑ-¢Ü{¶…ýã öe°¯"½$š¸y6Hš”A ++ù÷€ü*Ä÷tX>6G ++ñEÀ¡ö6ÂkY:µƒ›xÿÔ ö#Ówu.ëüVË«a\hvU…ÑW¥9¶×*2xAsï᛾N(²ùlkß²½^Wšc Í!ÈtJš*Íы`'ɀñ=nÎ@ˆ&E¬dۃîPÑÄ ºãxOd \7Щ´­Ó¢Œ—ûÓol½éø¢‰™vPÛ M>1| ++Ó´ž<ðÅ$âßÕÄa{ðW±Ä'þúŽ§Ë©$…-ìñdƒ7þÇKÌÀ_m6H4QÿÞ0_E˘÷ð3P?0åJ©£‹ƒ™÷„ùh3̾ ++Ð'ÇRÌA_Ùü‹6R%aª¢:õ}¢¢ŠOôm? >‘ €ZFúF~Wþ6ýã + ¯l}‰*¿”á]Ì·‡mÿJû9p¾ÓÜÀËþìsïûzoH ¨`ãÓQȧtMI @ì1Í`ˆY¥ºò·™8¾hc¡Ti‰*m%pöÑýQÁþÖ°¯"ŽOö)ò|Óµ†.¶àýJßì$ä¸?쫜ÑUf°]$ã4>óü+…ü[#¿ ++!>!äw\ڐ¯L·sÉD·¶Zòwåo3ä?ÂbŽüo+]t»û_®l±˜ã»û¦ÒCï„ð*Zøtž@ÛGòބp±áïñL°·åùO-¼OÏôþ `ÿ ö%¾‹8‹â²öý¯Øß°(µ±½ÚP¡Æ j£ç6ÜáÔ4° M °ï™®öèôÆÔÆÉ…c¥6z̎Š<{ÕJƒ …„û©pÙ[¨ g|:jšÀ¥È’\Gul±ˆØ_˜7SǺîΣl©æŽè@«\àÙ_·”Feï^ŒÅßrP¹lØÇYMGös±;Ö$¥ZÂؐ¦=Ö|Ŭ՝æŽ(½# +Ã2J’)æ,!®3ì¬l?•ŒªiåiÎ{/àIÿêÛ_ã"›^o–âÊ?§ ®¯l¨áNµ¬ ++^r%DbÞµårYÆ8ðJÎ80…F—ù®g¸-yh1¿{ç²u©% VÑÌ¢I²»ºW¿GÓ;Ö¸htÖr`…°¤¬)_Ø dó©Á%#¼SÄáWQ@õá3³…ÄÎKx“Æ]-¹œ+ÄBÔr”ô”Z¨Î䲁aú$ xkòÛ ¹›0ÕTwv‰‰z”è:$\ÔçüÏS:™ $WÁ¿ý©ýi5mÞ­TÈkj_¦ Ÿ°—·K°àyÈ ­ºÅбè§´¿ÿú'‹ó@ 0Õüì>a¦?VVwÛw*€lÜm^zèm"Ù¶3-ÛôÁÞN?aä}˶ øß#–Ae¿†i2¨ãÙ?Ç÷qªÝ¬é„ =¤NL\ÏtÀނ"_/¬mü¸GX3kXKÓü¡ƒj ++¿¶Æ/òw:øE|˰yñÌ,—¡’ÚÕ|¿¶‹íÛ#~Y¿øs‰i¦@lkSh§b†c[–/œ0Ú9‰l!¨U²'Al»H³}zÎêÅxª…z6yuYjåd<΋*(ÛÅT<ÔŠõÌkr8CcêyÒTZ¦éҍ£Þ0н¼ÿp9¹I“HûÊÁLaØî¦ÜþOÀ‡(Ö=);$2Ji}‘°uöâ~ÿ vü¿Ìââ,Ï´«8½=«âRy“í€`ÊËÁŽÖÍ ++Mߑö)v`«±<‰`/îæj?7Ï"F˜*Ûˆ)OÿÓ1è–c9Üg ílá9®cBž¤@Ø:3^ÜÕÔ¾þ*±Ü¾aÌPžý2Œqb9R!Ƀ–}LÕºþ“0öâžý víW‰ÎöcʉƎv{’ §_ZØ'”Àr»i•WgÛ"9T“²¬ìdˆ’qiË\PAÇWaÖàå %›üO7./º}eeª¨ÅÌ\åŸó·×æŠÚ?Ë&ùŽÉäÁ•BauSà˜ÝP؃§ë’:9O{Õh¨Ž”/ˆ%éMƒz>'¨ÛDo¾ú¼WMMO$‚ÛX l^d¿Z5×'ÑÚ©+ýSmÕÇûõ6, :^-QÛ59š\[Ÿó»$ ++SÍÉ'|bô¸o ð† ++K#{?Èu_ö4,‡Xâ å¶öÒ±mzA7àYÒ^õ8ÜQ¥m6û„Ú ô9‚ž¡ìf4ÿž"]Å8]¥i+óåÍ&žnéȕÎÛXÚl•2öµ¥ŒELdp¤Yá#Ç"fOæ·ÅsQºâÙ M2ýl2jTFzŸÎ(3»÷i0#Bӟ٠2džôaúMž…ÅW6tŠŠ=ŸðÁÁ{•…Ü蛫)Hêr÷Ô~{u¾ÙOYUäƒÉ²wÓû'PÝÜf¸/ršêŽé3¦t9M]ÝõmG,5ÏÕJ€¬:ñ¿ÌièêD¸éÏ9Ý<Üâô³Asõácžß¥ñwÚçÏîZ½8oӆ%¯0Q¤JÆ\·úÅ|ˆ•ã8M…¼4Ñi0 ú´âƒ“lT¸Åã¸Ê7ªÛ¯ÕÒÎ5¯è¿ Ё>¦K O¯š‹}do "ãæ&¢1±ÂHaÝ­A„à£Á@0U­×KYÐß4xA™Øt…S¿Aɒ¯óß-^ºßjã4¬nóbÄn\…IºDÓôéÓM̤âbé ªnÒæ£yø&]ÜãùX$ÍL‡ÝþWvëoÜÔéPëùÐàf+†=ðÏÙ#µ²øÀg6už–¯c[s›lž?ð½­™”ó_ó›nžNFYëþô‚x$ËpúÌ~ýZÿk¼ß­þñÞð¯w쓕Q·×ÛPÒUñ²Ë„³M«yyUWùºÄ§²ÔMŠAQ³%@l:rÐds}Ӛp]k¢O‡hÕ›nE [x?‰^g«8Q‡D¬„}ÿ.œTyMã”Ý…>C̚m|·-ÈïM ++ºÏ®^ ëÁ ¦th"ŸˆE-Û盷ú }_Z§þÒmÈîÊÍ`v@¹³¼ˆ³f»u‡oÂèë]Á·‹¦Â!#ö^Êãcí,r‚Vk(Dîx8ˆ-yº8ó­V-éžìGÚå $ñ¯ˆ­+öقÙHއÙbä4Œ ++«KùÁß ++³/‹œ'Ôf%á(I»Ì.óú%^[E€]€!òyX[8ye­šB׈Ö2E°šñYÔç+¶«lècƒûÈ<Áø®Ýüã[FÓ YÔ݉;àòj‚tm1 nS S`m@ÐÀׁ´@PèP:Þý´í–Û¯ÍRõZÝEÞ|’Êk MË[ëæÜ¿}.Ìí\âSË<½ ±¼šzg§‰ÆeòGœjHû?Ÿ·aK•áʦ»V ›†4ßGı°%0tëÖ콌°e»›¢îfwó‡ô+!ÈD:h¦Æm!ëÿ2 S6«iWY8á“"í›+ïGöùí*‰êTØ\ôb>Sۛ É"¶¸•J%*.ú!{˜kS|v“TëWnû¯~³¶¼5ا’‡¤j—¶Ýe^gtñúéeÑmÁ2#âŽ8}®mïÛвì`¶ùüLFˆnŒ¤Lç” ++“)Öcg#$ÐuÇÓ·ÑÒxÙÂé*£Í ²Ÿb:æî¾mˆ¤m8’×ùîFȞ ÚÔ°óhïa™ìڀ1Wž‡j+ɵßj^ְٙß´v Ï„XÒ2 †ÔüÖFï~¬=å×m¼t$öµÙ(>1ˆÕogÛ(ù¤zf3E7-ÇódÕ&ûÍ>‹™bxdÿKS{ê23¹RhͲ)}X× ŸÛ ÆúZ$ ­å`-„p]mÊ212 ã‹ø’½´›¼?­(×Èè&®Ü ™úÀFDZGB:ÁfPu©Œ ed(#㌠õ°Å†©ƒÍü[J&«Œ ed<‡‘ñü‹„ç%"9ò¡í €@àB§µªÖ-;°y¨5LÙ˜A ­Aé °ŽÖ:7¾ÄöÊÎ#Vùx(þV£þÃC4÷6B–m"­çêÅ®io¯`k…ÑKìóh }ß§íþÑ»š-vp†èË ++àvV—5Ï?ê÷—¡M·MË -jaØÂý·½uCLW'úlûáY -‚mB ­%a‚}ÓÖçm’®c/˜»Ö?ehu¯]«ëK¢[¡Lq¡«ùµ’t=iuÛg¨)%Ɂ¦žJOÐn¡õ²´S†ÖS´P†Öë1´ »6瘵Ñ×`3Mï4Œ—Z;ãnÝoÉКSn§J^ÀÐZ*6GëÃB-S·<$M<²ì£cfuxº¼ìÃsߺõì«hzD†G÷ìÍ«Ÿá}Õ8,×esî»ÊˆÕ bˆÞÍåCY#‹å)kDY#ýÇvÏe Bݐ°gús+c_c[™(/j¢|@kb)‡÷µâtfнp±¡Èð<Jiøc!S7CÚ¾¡–OLù˜¿ãµºª7Ãɞ.» Ð\I‹/;>ÄÂÿZÙ@ÊÚ©De½ØŠ ñ0С´<Q$Õ¬l eíЕ“¶°ä Û/bQäזõ;$ ð¡mñ¨öýZ2²»wïìcm•ýÌ ˜¶ãÉYÚ6±åØkÉ´/3HymXž2ƒÖUó„Wð–0Y[FøF †&¦Ø‘æm†O°gÁîÚõ>Fâ£åH½‡Iã=LV{ïS¥o"Bݎž° Ϟús˜ .žƒ-(m³Ė Hwx¢oIIÀÓ!\;Ÿ8¢% oa= â91Fíy— 䀮1¡ÖT”1qrÆDÐr  ¾)¢Ç:(ç¼¶D8puÒ݌\—Tn‰\£*]ίeúÕ@úG¡`Ù)ق$5[¼Íû7˜YBâ逗ØçÖ- bHUâ¹ÀÁ]Ó툵Àó§ûÆv`!ߔ6܈¡øÝSДèÓ(¥6AB}`ºXA[¶G¬nÒ¥–J͎Zàù§ÀqÅîK¤Odmñ_—ªF.!Ð0×ÚǤ*‘Qò9ÐÔØ$_Jý¡AŸF)E°©bÙØ—³uR`1Uàv㽕"X*5[(‚f)ëÓjb›âžƒkúÈëgm-è.rƒîQG¬ž?ÄLÆ(’ÁžMÉñêˆaðùSë7À>3»tÒ €Ù^;€ ¶Üü,ß· Wr_£ŽéŽ%mƼEìQø ++ÄÛËrîæö›çèØ’³}¸|ÍPå5"H¨I|g'\ϟ j Çø"è»È $/\+Ëƾgˆ<²í¹¨aú–%NzWÀµ#pí¾üØÃÜò¨gL¯vÖiè&vd?w~nqD˜Ø)ŒFøüQ÷cl×#«=;w€žÓºW¨µ’ŠÐ‚ŽGÅá‹mi³?’q­´)ÔêZÏon!Ç ãE²Ð3ˆrM"†^{þcSàýT–ËàóGüê:ãOlµm(`°ét7ôð$kÿƒž›¿¾më†)9ìA;€Ä"k Ö§–ý_huk|kKe†.qè H]ǵ±!™Ô€†í¢SÚðùcøP÷ålý–á8ò™î ++—Ïî,9­Ú’f"²ÁlóµC`Âb™ ún€á¹thÐBØ1¡iJƒQ÷,hQr2vÛóA#0a;Ø¡–g#ÉÙ@ÖR ++RLí: q‹‚ˆºùTòU µëdó™‹B [@ä7<4b—™xv DDpà¸ú©,ýÀçØ1<Ãq.6QŸ®t&ŠB¬åsõÀ¶tCNí©;¦‰)ºB¬Ã-ñ›t‘]¾C< tÇ ÐtælàÔY}s÷ŽàLs©Å™ñUõÈä°é¯Ž«$Ïx.~¦—£8«â¢¾Ú•7yþu_¯ª°¨Øó ͼIY8bôû·ë<"³‰%1šâ×Ðø:¼y"E¸‘éBÑ¿ÛtàC~·ùv-ÊM|—ˆþ=õ~’•UqÍ(¿‚ßWþ¿huƒ~û_šýÅv>¹¢i³÷¶nbs1¨â•­ ,D<·= ²ü²Èó[i¬õnµcM!+šgƒNëÖ Œ¿ÿúí:.«x ý<ŽYǘ€…©æg÷I‘g#&Z¥ÔÎ©ÐøÙ`&2O ++GW.êÁ¶ oM 龜Ú°±mèµ>š‘²ÿZÎß5Y& tEö ö㍛­¸Ôk;¡`S÷¦åpZ\c-*ÇU~W„ãai£|0a›”Z˜–¹VNÆã¼àœaøX±§oó4ÍVn^³Š}‹[Œb°f±’†Ó/+Æ^þhȦwyÁ0p4»ú–Z–W{½>аJw©gH^5€gAWa>hGk=Å*R<†¬jJh±ª ‹Ÿ“²ºd d¬E1›Œþ?{ߺÛ6²­ù*Dt’¸®,2ƒÀkìîŽ'Îîƒ °AË´ÍIÔ¡¤¤Ýƒö;̯ózûI¦.¤D–HŠÔͲÍ>ûÄ6)‘Uk}뫵ªV­RŸLÆßÇÅçÀêÞ'\y Û¹ZVß81 ö®,µ7[yh‡Ñ½K¹©ãH­Q\‡}͖ýEÖk‡ÄÞúÎÏ#{])ŸDêœ\Ýê‘Mæ¹ÚZ8&È6¬ú=U3¬~°úÁêÏÀ꫓Z4óÜùUUf¡]˜…"dAO¦«LÙù˜¨ã³Ö­Ô"*ÕÊ=콬¿ô¸ý¬.|0#ütye@Þ ãÓd6Ž…g'=9ãÃ2¹‰¦£Øøð–¾ÞRinåÜvUÚ¨q‡êdÊ»”ÈNp/w%¼oɔ{¸ÂsM¹›ßcþÚt:7Ò[éôò¯ÎâÑ"ùWœÚŠßûƒ1–Ÿn“ ++vX/©(ü4¢1®“i”=Á,§£{þŒøæ­ñ{jŒÆQ21FÑÔ¸Ž¹Ìnbƒ»ì‹Töw”f—ÎZ4…Øž² 8DæñhÉc•ƒ‡ìñônq¿ÂÄ]<Ýå¸ù?̍ä–Ç6ücé2 ‘ÜHtå"”aøR]­¥Ì—Šé€åˆ?ém–ž‡/ÐTŒM ++ƒ ºe[ PÛcÑ²à”Øæ‹/<ˆç ¾¹Œîb7‹£oҋQQã‡Kï§ùÏÆrÎ{wÃEą\Ds“xa„1ÿ&7/L–Ód$q1—&sa[ÆO¡çýlcŽœ,DwÓxÁÃÏOb閿T˜çOÁoŸ~–²Ó?Ɵ3㏼NÆB7üsüQYüË$“\¥¢ÐëåœÛõ|.)àzßJ;¿~0ãà*âº,Zú%¾[ŽU3_Ü©[´¡¸âÄ#)­>)$ Ÿ…œJ¯–×31æ¾-³/G-ç[iS£&ÚâÄ£Bóuè®,QíõŒöáòó•‘-§S¡°djpI‰!ŸK*ω_sÍ—©Öûh‘³`IÀér1–¯OÚ}߄vŽ«ÅûuÏzHÇøôAŒ6FÄé•C«‹‘@†}߯Ú䨾`ÓßHöž°éCwÜÊNÄuÞo\rD}‹Å, g:¡ä.Ÿc®/[JÌÎS#Ÿ[JÔ‡Ä ›Ürónd´Å4Ñ8s…uړ󕕍.΢E¼å]p‡€xÂY,§ªP×&.©¦ªø¾Éd‰Ç ÜY„™Nu¹$ÿp wÕ8ôcÝpS…=CPԁfa*=šw1œyýòÊɒh¬Ñëuóúr ++NŸ×G¦zîZШÞ­¶VU1ÊÔJ ++8H ß_–òkü=·@yÏ÷lŸ8·ò¯ÔÀ™™v€™'°¸FÛ¤VƒñꝌ÷äÖmëB¸¥éí…\¹T3—¬0ŠtV¢áeÄÑè^ŒM‘ ׫ŸÁΗòøü¥×ã3äbmi¥6Z.R¥Þúõ5þ%7͸×2—¥³âCjšÿõË+²æV Ÿ>JÇ©”ø/ÇO|+lsÇo_§‹E:ÙýûYrw¿Çë“)Wwüqßü±ë„‚«ª¸ÿÊí«x j[Ém’q_:åJ–sÒ³­ozéx9™–îäG¦éG—;Á«¿þPI¶WmXéC–܈_ïøOþ Cå0;*—!³ä¢™zDñÍjŠL­Y€\’ïr]íˆ0© ±,$¾5E&t¨¨iªEa|×c1†ÅY#ìE+‹7¤;ˆn…åé\…÷«ñn4Ž£¬¤JihRc1Œ¸2‚âæ>‰Cy1‰&¨çW¯îyKyå†E»jó)Dš£czZ©î乒ròŒÐ°’O!+—ÖÞE·\„+ uÉþZþ!¥S|*”ÿå¢ü«¸Šò§ÍÿòÄØTº¶•˜ó~筂rKãü•˜´á<ý5YhА¥òZ %Mä| e†8 ®¿%sv€ÔÁ!õÇzÈßô×*P’4%¿´Óa ´Óhº¸š“Å#òh.è*诣ѷ»,]NoVÃVđé#-‚:’ËÙâ§vE½JNÆo*ȾÊÃc ÄG`³½Ë<†¦¹¥ÖñÉ{ˆ 6õOGͶlºÜ—;˜ã:6[f½ž/wP<Çs·l~™Üq™f<ŽSéj†‡5Õ©'@!Ä%Q[¾ ++9…`Èì-AË3v?ç{®^GéÅQȗtÏ_óð)ûžŒÄo‚8œ%ètñDýà8X«Y4Ç!Ƀ9,pY§ã,ž#y Órˆn‰Ýž=y„É4YðhE,# ++÷£-\>O®@z¾e\q<®@.™%O‹}‘\€$ Î b§æŠËû‡9w(Æb¦V®‡>5ª€¾Iˆ%OL*ébhc½nO=†>cjîø)PÅïÎÉ"f˜[¿çKØ~HÐK'‹†½O2¨K¹s!OU(i“Ø!tm¹¯å…Pƽ èzo‚;Az¡·¥¦ë³'ŒêLèßâã·hÝÉ䯧Æcà ➙JŸULB  ˜ß©ò³œü´BÚò›—Ì­[.Ú·"<5VÁ¡ézH¯,=°ÊY‡2WßVørX…y˜Æ^úªìU<¾}#Šœ<¹eW3t½¢ss&ŸU¼B¸ãȃZ^應™G‚ssnOÍ~=§Ñc‘îµ­ôfÑ] ÏõÝ£ØgÆí2÷­¦ð4+†@Y p£dH¹ÚóþM·›«]õ܋W pÜ p€µ\©ëÇ@€ÔhÖèØŽ€}t,K-lè¸<8vuxªºr…§eÕU¹Zýfý˜0´\_Ÿ›Å¸ÌÂՂkÕÊ=[jú@bbOq®ÆÆù‡eœ‚îÅõº•ÍÛþJYܘ§·‹¢×8¹ÎD »Y–~OŽøMïÐϒ›O×*¥‹÷Õð¶þ“}›:ãA©2Ìîí¸QU‰¦)g˜›Ùx,Gý|XïÕè÷×£ÉÛ´Ó;¦õ~Ãë\A¼›ÜоÉʖyõC>Ø)н'bð€ßÌÀ6U˜Ø´Y^ú²^êÍ\xΞ@~J‡,)ŽP*ÕwUƒ ++8ÜŹlBz-•ØøIÜÈùUÉô{* ++Pn³áct¨ ËÆÓ…å‹ r°õ9E•_9ଏG™?pžžtoåû.4mzk" ¹¥ˆ“ÅòL”MHàPo×qwåã[¸»tGìü»[òÑ rIäUórNÉâ¼¶î}2+x¥öà™ÜäV,·»ù‡ ÕtË*´ù“ršm%ÆòøÜå|æa?@¾6‰ ø8P»*aŸ—­ÇÆr`V¹£3uI¾°UßVì"›ú©Ìd–¸ZYmk>á½_ްX3l9}ªnk x“E¢†#ÿuö.™Š:ÅÆM2_|å~%sW¿ýºúMH‡÷sÆMG÷i&ÕM±gbW7â›DIÖòœ[2¼ž½ãâæÝ6Fr´Lþ—øèAñ ¿ ¥Á?u{ËÇÌ@}VίB¯ ÑßWƵøW}ò&]f†L¹³¢ââËd´¶¥¦¥gïF¿ÿ  'ä1F,º½ËMI^ù5}› žšÎßE¿¼º_,fï..æ£ûxÍßrršò{‚Æ£ÿ3»»Èe6_p1/DiþòԓÌé%ç3[Þ¿ï[Kò£Ed,³d‡GÍ”ÄøÓøoïf«fñßö~Úô;×‡è³øƒ‹"×(´UxPogÓ;!±âcâKB€ϸ'3±Ä%º.~7²wñä:æÏä(K#Y•YŒ6Ü+‘â ûu¾ÈS2ú?Èr°‘ûÆ£À{C Þ86aoĂôþ¯ø6‡2ã8F¢±?K ++…A²!™I2ÊRAÃo9Ç^¤··|œ,dÃ%A”õŒá…lPñS6ñBuJ´U†^Œîůbï G•úÎê†ÎZ⯹¤ˆèݟ·ÙDüäÍ0þ”â~È_ª„Ðf€ë¯Ï²ùâCÌCñ —0oƒ”hô·V}´øHÞÕù+ÿù‰@Ë+ëPœ#iEE­CMà8ŒGXUú&®í3ªÈÛéÛr¹÷°>ü¨/}ïzÂãæâɚÎk'úät™>ÑÇò)Hùµ¦ÚÍʳhßO¼ –·h8Ù±Mmr¹ë×Üì>+¹zK!ÅÔҝ2êæ í÷–ƒVkÑ|”$_EâÁ/¯&Ñ¿§ÙGg:OijôÕ~é^Ü«ý0gíÒe7¹IÔ+…)µ"¡^ž ¡U ­TýQª`‰só)W°,®í°¼µ×2JÏÅ/àZÀáÚмVÄ °Ýj’6gVRfõÎóZü’ÝÚX)¯3Bkm‹_¢~‹ ³V›¯·›¯oë5ÎÇdQm >s, zÛ¤ñÊ,i7”¦Tw[õ­}w×ç)‰Ù™<®ÍRåQñTž°"¥ºšÈ~g83±–’Ÿô{:}ÓFà;Ž×¥e‡ç}hzÕ®Ïmš=+É÷÷ÅÙTyþ7Q!N!kuވÈg›¥?âÌXÎ|ŽÞû«SH7ßZ²ï ³¯'D=bqRÃiw}W“NãX¬À´uÙrH(“›÷ÔÓ|9“¥·Ëñø¡qué°½”L”NÇknjë)4²zèVÿZîE⎖׶Ÿ«ó¨çrqMÍDß¼5Œ«8ÎÓÝÚúí…Ìjb¸«2ىx`ÛþéÐÍ~sE¸•³ø®X ä1Å4ôê°ôV3ñ˜ºxÿÚa(á—[ìQkR¨¹õ¯ûÅmQ õSî­z-ÇõdnÈi‰â¼¨ú™hãd¥=ÅP\/—n7iÂ~èi]^ ­Ò˜‹úpX޶„8¶G–§u­¤µ¢ü³ Ë䂊–ábûF­„Ó.¡ÏÝÊ{×jÅñ?u JØq¨)gË¡9±ìÀ·Ý ++0€…=°ŽÖóå›êÅ0ªwÔ ++u©Œ.æ±V’&òÒãö23A9râÓt¶+ÔE.•8²Už –ó~"ï ÑM¸?˜­®;—ŸV¹Os­Èç}^.Ú˜ªtz¢Çє¥ã­ŒF‹e4–Oh#E†øÿú ¸z÷ÛI°¿vò@O”]Î7…—LG㥈§E/³x±Ì¦b9b¹N~ªJ­–kf«p“äwqF–²Ò-綝éA`Iµ•ËØ,D«^¦êÓêÉõçƒÕÚ¾ÇXèç%ÔWYe6° @*²•Rw>˜hOq¹iï’c×®{ϸO-f­Ÿ®gÉŒ‘Ó¥$ã1øñя\Ú´â¾Uاܺ²ÌjlVª¦ÕÇ£¡GLÚå°¢»€`UXú7»€@pØqA`:¾~Χ:1–V×n{‚@Ôb“™'g‚ãé»X*ìrFä×V¨ì5"àb ++·U»S#³±:Rì}†œæÑ¡i:~ âèâwõ0ú­þÛډ>7J–P‚ìp[å×g#ÙAªµ¥Üß]ÄrcÀl-d¢î¡ˆ³QêˆùDr7ÔKºˆ'Zƒžƒö­a§êç‡&*ìbâE/ƒ¨T€yt¦bØöLu©Q10U›Ô{1EÀ ++¹é¥0Õæd’˜'¬L”‘¼$”n;³ï8äEo¤ã¼”Q©2xtú¢ã#¾Ó¥l×@_mRïE_2h:r6qp´tïËULì÷Áf§úï‡æ*„ ë2öR[™Ë?:WAÚ=Údí å*‚€†Š°=®:‘SèÛbÊÁ‰Š7ÑgNÈ}¶$óEOãRaF)ôȖzœM–¦(CŽçÓ8²¡Ç¢¬1—ã·8žýÎ_zñ*èR^Vü¨«Éß ±~Ƶ€eYDÖaÜUn¹Úå¶ëžµŽ²­Íû‘¨Ú(âØ¡ìßPÄQ=¢ëæ5ÜQT-9ÐæµÝ«Š¼‰úÄ*ßGúQÄáÎñªwU;¨±’9ìÅ¥ÇudŽz‰9•ajcÆk‘ˆ$ç%r‚_ÕñQ´ŸÌÅoKç¢a{5Q´ é=» ++#/CbÌGé,~k|ä.Ã÷8õdâ©V@'/"«‚ˆaÄh7m¤™Ê(å@3â,ãÊíޯ喕|þ.•Ïj“Q¾^¼·ŒòöþëŸÿ¹%•ëP¸YU ¹óïyW ^‡íòûSõI*ô>¹N"I¸òÒfg‚@¤ ”°ƒ]Y {= r”›è6}ugwfèÞñž›=‘Ëm]fc¡‹¸wQ×½êµËO]*õø‰f—kW¯³Šóv¨ âvófÏ/é8ælä,9ÑLE­&1ø]å’4Û©ú¤7T@‹bY:§<ô!?´=R8âÿGW©/eTï4 `¯Ñ°ô¸½@ä¬Ö–8Jľ1qéܸN÷Æßù¤”Uj·ñYVÙȌŸ¼Ï?™ÐI{ÍD߇ °ú4X´­Ã#—B 7i,Jˆ-Ô¶™w›ÕRе‚åGë󖢗U¾;< ×û¬eȍnQ-YŒŒh>_Š*~×Òa[ð«rË¡rVòr’z7önÞÊÖoÏkUó .«‚Io`Ø RØoïwdN¾¤^•æÿc*JCæ÷œ‰ÅuY û\ÜTií¹Ép¾g1÷"¹{(6gžBJüErCÁh™e¢z•”™¨©vj½­´A«x–¾Àá!Dä֑ù£ÐV*t Pª*‚Žüb`Ý6ÈÓÌWºÚ„X5'iþÊ ,¹»D²¸ç1Çñl;ç1ÕÀ…#êB­±…1[^sþ)íä8ðÞ«âÕrŽ1QÑÙB΅c'j}•Á·šØ&–µdf±Ù$zàQÓêÖmòg¼þö9îDAä¬T7¨X×\FȬ۷!–uÛõ˶U”soÜ·R‹¦Ú5a³º­ —и۾)•M2[X!Æ>é²|ÓfqÅž¡Ù£oYØdŒ¾U{)ÕX¾ÔI‹ö%ø«}lZØDV—%ÙAû»håÌcî+ zµïpDHÞ;."LÛ´î›UaúŒ"×_yZ» Â'Ða«Ã烈ã)ÿoñCãäbæ°­j.írÁŸï]]ît`¨y ̀£ú]6r Tµ Zådß.ºîÕqu-<› ÃÔ±tïTâÀEj|ãÔՍÄõ” ++·U÷*æý¨u¨íСhÛq¿ù¡³ë՝Çq¨‹ýÍ,E}2Y×¶œÁÕGÅa[%ԌԚ͟%qT§:NIAêb*©(Yòiš,’hœüµ93ۑˆöô—·*›ñ_+OŠD–V‹UC¹ämÉ ++j¶µ=Õ­ê ð=[WRpô±½–ý Žƒ¸hÛÕÁæûz¶" ++ˁ¥“vž‘:jÛ¯tä}ÞEE{zR[U„(‘kåщ…pØKµñC}n¥ÐFG²6ùÚD¡‰=M£Ö)c¤‡‡Ò¨PFä8zùä#9ÆÄB~ –¸j+ä<õ%o™\ÞΕ4+N>¿–KT¿ä5ÿÅqð2ÇAüñ…»Ümø3åç%2…`ßòæØEJe®ÕÚªìÉ´¦*{íÍJ}öʝu}vq¹¨ÏÞÇô¹‰äW“I¼È’‘Oe¾w§.nââ×]ˆ©Î³j% MŸ)Ֆ@‰MæÉó© ÊÍhíÔ leµ3€ê™ Ò ®^­Å ùÐD|.V”%³q¼Q ûLçÕúâÝ̸Ö#o4cBׅz)PJ<‹w…<˜ñ0â8ÏÇP¶PA*;v œ~”sސl˜kš¯\#1Á¹…ÖFэ ++lÏ%@ßÛq²yëBŸ…ŠŒÌ×Æá¦ 1I©e""Ä09`²“ÁŸñh¹hI]?ÔgWýè5u‚]Ó´Á¶- Ǚ:Áø>ÖÝ[ÌBqF÷¡£ÔaêäI ¾Þkø[ü‹Ó¦ç÷B»t¯éL˜ïXíä;Îp;3ÏQ/s èmp=.þø›ã½¹úx°…¸fÿ»‡¨Ká¡Á?xÙþÀ¤¤v¡Ä^q³gÁpËˀÉ“'Œ£ŽH qԀÉm˜ÄÓ8;èæ¦FԙÐô ԋë Ì;0ï@½¯_þáq?ì€É~°„T[X"–Z®±¶&©H ý’“O7 W2—mP8.4¥ÆñÍkUËjUý^ãYuëoñÃÖ#!š‚^a!±×´€€î0 A›%ž,,$¡‡‚ ÎɀÉm˜ü·,YÄÍëg›3Ô6Y§s”hZê?”NWxkTB]éB ;U 9to8´‘¥9®ˆ¹ ¬Z©t១‹z•ÜM£Å2k­]ÒÛÙº¤ÕõØópý¼È9 øý…ñ=Βۺ2â]¦~ëÜ¡A;ýÕQÐ0„= j™¥Ñ±æšT² +˜ rUªVªw¬<%¬|¹ª[Kـ BX«§h×!p=™V“m^̀§‹Àókѳm8ë7ŒL=;ÔXuȞ1ÈNrb' <Ç×Ï|÷¡šTpݖá¿ÙPøtQxº­ë®k…›û2P`1 Vˀð{1ðÛHæ?Ô7²2P\‡ú4DmÍ/‡[¦6ަ@.óõ©>Ÿº–£¥ ký)a¼zgSÎÍ"Lqî²8GIï¨ýÂèÔÑO†¡(Ž 周OXyvƒ›sež·Ðú…*¦C­p|†aÕs€öx†ÿñ´Á ++¢!ò--õYÝýrÏr§Û/.* Ÿ … È®Î"H{ŽHÛ/éœLÎ\G\ßr”ì‘’Éù›Ó-¿ÛsøÀ.9vK†dò'î(ò¸ä„ÁGxx+³ñz®ö »Ïo=‡M û¾Õ)édH~ŽXé–8l†¾Ì0¯Â„X­Ž`Câð ŠKO‘8L(ž)Ýû2ö,„PàK%Ð{°÷txë1‡™‹Cä‚YOßjãÀX+Ü3Aèé¦PL$ÏrކÉ!qøec²oâ0$xX¯)f"…­!3ÐÝ  U¡ñCét…7!Â0 Tæx”•P_=ˆy×GY‘g˜bbá-IËuý)á§zGâ'¿ôˆø¼4o>Oßøñ$šÞtàAè½l”…Þäօ-0oæš+OvÀºŠÇ·oñüøéÔÆ”²@;ӂPf{he¥mðóMlÒ€Ï~OˋÊEx& l-\Ü>ۓœ‡sw8žlÞÃÄ÷,YP}Àä€É6Lîš^²B9|%˜"O_ ;MèŒ}‡ÃÆ×3Iž+äpw÷ÿг4ùë4¹%̵PËEF®Ë%ÕÃ×·tpKûñ_”Ĉ]ж”ˆ\€%ß;ã±X!>~ù}‚†ð¥ŒÑ&wÇäéjK¹nH˜¯¥„R „vhŠÕ½1Éÿ_uoÀä6Lž üöK½ï¾ÖÈ0¡Î¶|ä#LfHa`ï°Ö8Lϖv¯îÓòpžåNÞ@¿€‰8Øâ1S€ˆQȨ<e˜–//`"À÷!ÛÈ÷wL¥÷Ž÷€cÏtZ/ðF †Xi´Ûàxº£Ê|/¤>ì²}îq0ù-Žg¿óf =+€v9ÎZü¸×ð í„6Sª˜£.t\_怬1× ¯²µ½h&– :(IÉÛñôÍ߯ê5t¦ß&Qöíje þùD8»²ÓHHï_Ó¥ÄdÈùó¥ôêO/’è7*R-@t!lyHvðv|ãÝGânþÛWiî×ñ]";¸íûÉt¾È¾rÑ7œ¤tüC5èÿ7Ãù⸟<Ù´Õ÷vnâ<8Xč­ mLyT]RÇ4½ÌÒôVƒ é(ª–‡441žÞTZ×¢4ã_ÿü­:Ã¥ˆ4<™Ͳô{|³ñÕÍ+þ©d¤¤ª½Ä³Äa'ê%ï__ÒñæçÛ:!ÎluFü%sãKrw¿Ð¿\=,šÃ\I¨.žq§šÚ˜M€‚€UO„¯l\¶ïš#7‡~æÚù+¢ïœIzÃÇC¨'¹‰ç'Eã6Óbš¦Ó7+Ý InÓRèAàºeLnkEƒº~Ü'£{c¹HÆÉ_­g¶…Q ðÆh|—fÉâ~27ÆÉ|Á;œLs~°[ˆåÃ> xÿ®ò¼aù@Â䟻oº²D®¼»C’‘+îPýó¯ý=¤KAæê ·ÉŸñú™¿r;(¾ˆ#|›dóŗTx^Òûˆò¿Ö7½t¼œLK÷‹ j¨H?ºÜþVý¡þ‚…ÛVê±èŸøõŽÿäÏPD”0ÕÀêe“Gʛ—!!¸î²EÐú…Å{ªS 6EÀiôÿJö,HÛÔS• rƒp½‡UMR‡Úù˜½( p=þG7qÖ¨(ÑÊâà óB(›ZŸß þROÇQ&>2JÇ)oJ´\¤Jcc¡\À€WP\ÃF´™–…­—užs»ä6¨¯úqI}ù%Ù ++՘VÿåZþ!{V|*”ÿåbø«¸ªh\\ñ„#YºVCŸµí?Î[ä_ H´i_`ÿ¸Ú§„PÓ׏x¾mUǸg¢ýã)ú÷ò@.§#á¥ï4Y-ÈíØzgŽE:-± V¿ 6}׎º#Øqu}ÛsY¨ÍßQèjkûC›ß¢æ¿éÉ)'GZs(tÁŸï]]¶ PüPOY5º—‡D-è@Ä´‰âb¶N,lôè5‚@BßÒ&iNFtÃZòY®%?L&ñ"KFF<er:íâ&.~݁¶k]µFPš¦ë"¬{]Ì4$ðø^×°‚Ò8< $Ï}¿ùäµñ›O_—ŸýŸ8¿îâ/ôòa™mÒÐbÚ®âÁ‡¨ññ|XBA9…VöˆÅ$—¸L>¬Âäq}XŽÁ€¹š (òËZáëQ}XlÚÔt°0®Á‡}ÃUr7Ë,6îV§Ü\ßã,¹MF»Ö%ééÉ2Ê!+Ý,³uß)eSÖ@p ægAý“Ґ!ôøõL­z{ú¢Í'æÞˆÓT¿fÑt>K³]ŽèXRÏ | ++õó©-ÏÇ(\s8–Ïn/"°Üê·ôKIg™DÏI|émH§Ë>2©L_(±?ÓDÀd` ì#"­jÆ/ÈC&†"_‡œG3WÓϏê!CËDo{µ‰Ã"ÍKr›?¬–gv áž~3cØb¾nžgúŽû¤²a¸ï„·¹ 8ÇZ,p4’µ1 ª0x¾Ã‚ÌD6 9¶ô=C9,È<₠°´Í©A&¶M‡}-‹É§nŠuÕ ©oÙÔCúb†ë2è²dÖ ++f½ªÅå—ušj²Õ eq/)©Æ!h¨©ѵ$í(ª–‡¨d¥.`šÉ¿çñh™%‹#‹ÇñwîR4 ¬©´XT‚««´H¡ïZD°^ÙvüÐ%Hn[‹ Ȃ#¶S½#mRà:aÙv*exûمú§ÃWÄíù⠗wœÅ7—Ñ]ìfqôM¾²ÉJÖU£ñÆ¢üo!hŠó¤$§Ö&ZœÏq\0}8¬‹¢ÛEºa_ՏKû*ÑRn_CIÎfퟠ<ó€î’Ôªiÿ (úx:]9Wù¨ÜaÌÌöMªÇlÔw,Çñ«SÏÄЎ'ÿ½ ++žŠ1å¸V… 5=UǤlUÄ䯆Á`U ZݹŒ©pŽ«Qf¹¡˜Æ“ù L,æ ­åIu>B&ÏG0iÓùºäးH˜Jy•2BLe½WvÔÁJ¶vv­)B,´í-+4Ý]k;/îµü«2`„ØÁÚxOB> ™dsH —NL)ul´6!«ýd‚Ä$Yµ§ùÅR«·Nüew±œ3¸‹3#åI+Q{+¨cÛÐѽ@rh2ÛÂ#‘™,Q¿SÔSçŸ5 ×¥Øw±¶àFdbìáju“c©ÏšÎñ@ Ølσéörm¶‚–oÚ¡6Ò²Ð7‰Vgß8wHöì<î/p}_ú…‡÷Lîz¤k„‰ìÐATË¥&£åF÷D…ûñr³@«bÜs­ÚOær%SÐɄ´£R#½5¼¯_þáq+Uüóø­ÿ`¼ÒZ6ë˜A&d®¬b>‹Çc¹ÃâLšÝÜ$BÈÑøÉt¶¬îfkêG _ϧ¯ Á.“˜SÖ.c`¿½aèm:à…À?¾·5dàTYíL˜ԇ]à×Ë垟eAO(± ?7 Äíð;'¤=±\õ³}P;´·ß5êBŸ餗 fBæxX3ØSdºÕ:Aí¨A µúÒa?Dƒ°\²?MíÂÞ+’Km‡.3m3X'ƒì`$Ĩ& A̺ã[ýꪜsiž‰9ý[|7‰¦‹ÚîbŸs‡Þx\ðqfÜ«jáÆdq¿OÉæ^Ž8f>t¨§-™A‹ÈôZgæGüY§˜ÆP¤úW‡.@<âÖT‹Ã£¬’Ò“ØØä“'+a ›|±ÉÇì(ª–‡`“ÏCœX^¤K_ŠM yvl‡-=4ÊÍ÷%°Ï6‡kñuEæ³è®$\e‡ºÅ!×$>Ós8`haßôªi_B”´Ö¸ªw¤q嗍Kd·pjη¼M—õ¡dü]dÀˏäKžüÞ§ÕZ°ÑEé :á´öË+'K¢±zÄ>[õ䐤[1.[ñþMÈw)Ôoqzyÿ0OFѸ>·I2‹¨^ᐠ’PËՀÐc¾+ëE•n‰|Î:…‡sˆÚgZMÆéºù«x¦¢íÒ㺁|!·xɆS9ø'£bÃW27Äý,™‹R·Æ<½]üˆ²ØH§ã‘f,î—sã&çÆ4]£q”LøõcVȹ؈ô¶"èû =ꊙŸêbr軦OªQ³8«w”ý¨KOß~䨽a?yšÛÁìgñþs‘!ÅL¿'Y:ð‘z»­È̃zåæ™!ôµI6æ!9iV”k3Hå!bM¶RVnþá30}äìñŒúapÃUæ76ýUZ¸DÝlç[ ++mõB‹Ów/iտəÞdirÓú.ôÚùÂC ê}•Ç¡åÖNk¦{ækÈïõfÜYÙr:ç¬nDƇKOü5±^:íó(¥HšäçóE<™óY2$Ó-6ÿ_eÏÔv“’N2ÜöÉDcÑèވV»ƒµÞ“hÊ=‹¹Áuþ› ¹«q3‰Œqz'F@>PÎùøÚ4ÎÞkÉÿëŸÿ97–ü­j°å‘¡ô[”ÌLEºªR€x”hoÓ8?‡_†¸oœHbŸTî2àšÌ5MWnõ,¯ †ŒPO–[-YzÀp)ý´Ìɕ;‡áäîúëé¢S›@NL—G!@Q“|÷ç—J=~¢.†Œ<6\Œ<£ð@MhwѽŠ'êŒïRî?ÞsÂù¯Ñdöß Qwð7i}Ý\’bëo d6 Èvnuâ· .BÁ:õµò¸§ k lÃ>ê–]ÚPw¾öÚà ›Æõ‚@ۈ¼$®®ãCF¶ÃAn·®‡\ñT?hù&©N˜Ë$jú³7ðpÐʋGÄCí/®Ò@ŸLfci9=*näß Føéòʀ¼AM¯Ûµ‹;¸Vý{µ†s´B\}ý‰+:ØÄ–“$+:œaÑHŠ)½:CÎjz-VsQU•G¿LÔ̞jG}-‡• –m›b8¶^¶[f`¢U@©ÒS,Ït}e¯ûT–‚ØTæ!wššÀ´fiZL[¦ãT·iC˜²´ê赅ÈJ“SÓé7dԎbrú Å>Ž¿ñ˜”L‹y‘Ú§rlrþ¸4¼˜·î¿ÔpƒÔdf„•3”ˆ€¥í³¤å…تîÙx˜95VþÈ.útx\ý#F“ÈÉÄ ++g`H) ô¿s ßDÙÍê—ÃÞqÕoº!°T©ÿ’úI˜Ø «‰ÀƒúwQÿoéM|ñ[¼¸O«(µD´zTS܎‹«ä¯]²§¤u\ B[¾óâõÄSèqg¦ºÒ3`pÝÿ}Þ¢yñC}®ÈÕâJ™,<×ó¹? ì7צuŸßœ NÛ`œP]¦úx9f…&º„ÿ~1|©|ú{7mÛå­ ­rr›.CÅ1 —!¤Hǒx‰C¿u¹Q‰¨f™WeDˆf’fMO…G^u]°q&PÒ)õ z×07VM挏:Í ++ˆÞp’4<&ÑìÌc¢¶ ݃ÚãœÇqª;€ºû†ÁX%%­œéÝxÒ5V¶«æ±6öÏò×·?ÿ*ä6D|H«$¥ööCĀ@Ž´Ÿ¹Îðd®Nl¥X~Õͽæ™–懇ÏÙø#‚ÈD{™å`ˆ ×ô¡›Ó‚·§Mçñ\ì<2’@?× ++Ä@“XÎSY‚w®˜ýñúäì<”&Š&Ì«%€ $@fë¢_°óN—fgvžÞ‚tj瑌KŒœRRŒ¤˜pŒçtTäs%Ýf4õã?^›ÿ_nCŇ4ô€±Jݎ°5*Æ@)&慍ŠÏWiÈ¿Gÿ0kðhfþµëŽðïÃoÕßÝÄl”µ’H]-ER“ÔíŽöƒ˜t„q¥2Wγ$‹Å<[k‘sT>°Ó R·Iv°ó‚·H^Œª8IýƬ ‘ Р™ðì¼s„lùû–ýô˜-%£4ó„ц”±>Iš vÞ6vÄt”m¶Õ[|Ó*e™ <×ÉÅÒHêf\Вϑ|¯¹Šàð¾.¶ß è£jú,¼Ñ6RÅdXLòOù}>ʇ·®ÑÜ!?éÞPTöB™òÁú|yHë),Uêó%WBdZœ¦X |¹ _Z{Uå£þW×в[¬Øå-£V¢¨c=ã‰ÌsIŸ³½ ++µžˆáXCt™½Z™TTÐÌÜ{õy¬ÕœÜ3È?ÍXÂÑBçnBå^±VòÊ(§µº·`Ä®A^åáu諚ÝÍÆ¸õ0,ån»3î!íW1ŽDìw:F:¥ˆ¬‘¢"4|TFí1ÓpeÎ&@œ¢9‹˜bºðNši–À$õ6K<5j>zÊ}9"íh©œ]],«þ<G=cÏôÃPг8õŠ· 6ÿÅliÖÕiI†c²KY2|ê (íO¢ŸóaµN7QÛ坳‘w¦€h 8¬ŽÎBõDò„ëYNöK¦Ì@…B$e–¾ê91PRB–ûIƒ%¹-%Ù Ž€S˜M›Îè!É4Al§eK—b÷ۆØz~uµdsÍî l3c:'Ő åO €"a‰Ûùõt'°ìŸ³ ²Å,¶ªÏ³ˆ1§Œ ++¯$«µW×1Ìb“m·Ø¯áªÿy؛<òšŠdñðX ͨÊ,ŸË^T;ý:j»©~ô÷|Ô¿¯vz]>Ý ú·îI¿÷ý».1·@Кê0¯Ü$V^ÑÓF„Ë „ˆ™«^Û8vJ݆ÀÛtÚNg1”ŠQëJ‰Ûå”gUõ?÷'vÃÍ!¶nâژUû£¼}ÆKJULY–AÏÛof.æ°ÜÝæàJi"ùB®¸îè:‹»–þ´ô:€ÒAXª5Äޒ¢$A2ÎfKz’Ìð~»RÒÍt"FTæ)ñ8Ö,Án§S×9’øbg.»ÎYÄ ++s$ §¹Â2 ÜϧM… \`ûÅÞ[ÛíÀtõ^\ÀÅ6_GúmÎüþ‹EY×lÏ6£¬Ãbx±xx|T—ìFÌcpào§yÎ%/4ÁFÜ ¨†1flž€ô£IíK@Ö1P"õô:"”Q”õq¦µLÞ]úƒüB^7m”J†…ò¨,!Ý?è.Ld•<ÏÖÙ:+Š*½f(#L+uœÁÜÛ}­¢”ƒV0B¬’sg3%$!ëââ‹3¦Û ++è:“é6Ó}Ðäñ$#»D¾ÆtÇ)å±N÷>÷~ɘÔÒ՜þ Šƒë{ø†³,c°ßJ]§¸ÞbÚImÁ›ï:ƒÖI<æ;†¯ë LØŠ«‹««eô¼˜òŒh,3¿BžQNy¹áAÔ ÊFùW¼8¤À…Tr&„çÏy©…?—íi™4†êý}?¿xŸ½¡ÍqõS]][—íښ1è'Ÿòá]>Êï.{Ÿót”÷þp󚼉þå:U‘íõQé6Ž­HÝþ(Mn‹“Í1S*[h'ØLW3Rù[&»yÆMöª ÁeÏéê%{-_‡ÖD·´U"ÏuqK)a±*‘§%1‡WfƊ©o_mW¿·|‡W„c‰ÜçÞx åo×KnòÏ}÷¹«îïǓѵYˆö±DWÙ¿Eå€þã_#ñI¤¤Úì¾­‡8Î-ULfbgáڨݪÜs¹’IÃârT÷ 95y×t§tcÉC:†hV­1º%‹æI<>ŽŠ¯}Äàs1êO¾<Œz ÅJ·$n¥ WiَX\A±ßÅ ¸E ++™×ÁL¥tã`³šggU‡½z29NScßfµYš7;h§eº8›gÜ# /•ˆV^~Ÿ÷îúÃÏN1«ùôP^Ô|L/q‚ϝû0“¤S½evÃf>€6X({û°€Yù× —ˆÁ øfGZ¯Bá ++wû·52ZI0f,ƒ‰c‘ù…4Ä(Ú«=«blë­T= ×$±êâ%ürý%Š»'³®ã§ÇÇb4GF‰‰î ;¶TÙÆ€ÊÐ(¨Uª™yëâýÅq¼›2çÍÓ$êUóݛ3êäKo=ô¾G7y™9ÓºÍ˜Ú 7Šû¨xl«n÷p‡¤ƒw&ïþÚ¸rºH•„4?**¸4]<š•²Ð[ғ9]©ŽìJÑéƒÓÓ¿ª˜Þ Œöa/»ïŒø)¬élÿtÂÈþ5?)‹ÁÓðv~z ¤îâ}j lö×ïå_3Õ²6øŸGý;ûëgóÓ<£+" BÆa#òÝáòÓ;›Ž25*_ !Ðm¿]£o¢â$^Øy°Ý{V§ˆŠûn‡÷y‡`þ¿Í¦ÐY˜Ââ5÷gþ5Öÿ6§ëSlO×ÿž¾]/¼hzæÿ…Õï¨N4o¨Va¹¯Ð-ãŒê6·T X9{W{°&6¸O€U¹W‰µVÊØHÈÔÈ`ªW8?ý7Xó…N Ù9n[”3:^€¡¼¦”nµ ‹›á%kéxoÏkI4¡’­“€ÖòßÇy÷*:Öt×ÍÆaG8`Îy¶ ++1ŽY"¤_ÔÈ3®Õ<ãgml,Q±DÈ:5fØè ÷<±q*û¹ ö~š€%hf ´Ò€ÐyÙA›š—;nª-rS¬+Ý»AÖ3¿àRîá§¢ ^mñØo]Úä§+á\×£Þpl•ɦºø2¸¸r!£ ++“O"¨D‰óm®Xǔ ++çèY[G‰ uÕ:­ë¸í*ÚüؘÈÚz®å3Þi2[Í·?þâ\{ùäKqgæüó÷eéÅévI\ưöÈonNij‡óîò£¼úxñ¾¡/¿ “a¡þ]>^:uâÛáËoòÉ·<®óê™ íҝNÃ;#ëŽfå÷û¥ˆ²£ùқþdl-ý|–ñ'£|øyò¥1Æ·khj‰¢¸¶Ÿq·¦3ǚûí¢WZ±RJ01SÀOPcPØf­Sଠ³æ™PJª>ƒxUM€nFçí]á \§)^ˆÆÇԕeó=c9þñÅ çÃûfldçų¡+#º&…™ÉèknFm˜¾Õ®&Kú¢T|m´…´@ Á#ÏÀb„“ÇMS·Û¹Š JÅ,!ÕMgu¨6; iagP-„´ªn§ËÐ1„§6 OÅkNՒ‡ì*ÿÌJ^µP~óÌ©€œQ½ªG×y‰]ƒßŠáÅ,Ò°}hú‘-ËI*bíZ.ז ¦21¸=÷,_. $î7O®V/!ËZ(xßË Ð2ŽQÔ6þ-úΆK/NÀÓ(Lq7ŽŒI9)C;%jøM6–|Á»×Ñ7³_¢Þ(†Å<†d+¬°±íE6b5_H2z™¯ì——|3r;2_ofs^ª>¿ ++**q±Á¯æS®pK©Ò-8(·Äôt sÁG-6šËDÛÅK~é}/ž,‘—§îûÿÌÒ¸“2 ~í•TßÛ«ì³j1©Ù“FýÏ_ºÏ—µö4?‚µ#cGkæÜ[Á¶VÝ}Oó0aSf[Áªëd)£(#^ê{»š5™‘o» ++éײ†·i1º3ZŽû£xœÎ©ud r{ÃøO3÷Kɓî)•–é¼»¢|RµÄ[Þ}SL&ÅÃö÷O)h«ÛíÊÔçac½ÚÞ?¥M7‡ÍØël¶klP:¼ó³Y踿Á'ÎtZ`”rî·VcŒÉ”§þNVœ‰µ×úÑÃö-â-5ªl•ÏFBïz£ ++1;S÷‰eçq\vršwŸÏ}%›íÄ­Þ;èߌúö®iñÆA×>¼qäv<ÿÓ=Ù-út´óþ‘†Ó£Ï‹â𿵘±ž4Õ ñ¤¡¬žÔ×½Ž~U۔Ø;¤ƶÝþa,#R$‰…cRd1t%äǀ±Uw–NÛåuöå3 ++"À´J­œ—ÌQ”jµ"©"hŽÕ6Ñ‘¤X /ê@Ҙ˜5sCg4¹`Bè’úçV]ì^”Äs‚3»‘ÚÏò×eŠbmñOð ;<¨¯¬Oæ¶xxô{Ãɒ˜nÈ Šd€\ۄ‹,¸}xr)Ì0äÍÈi7ä6Ï< rÈ%È®8è@ÇÊvö‚í€ÆÓ”’z™ûLÇ,¡j^höb€tÈc‡7·‰`Шí€P8Ò+E…2!2I^L£ú#Ï3ïöÐ(àÏñᏭ؇zE2Co`½JÄ ^t[Ýh†>º%HCTvà ++êU€·:¼]þý—ÿý»ø%(Tr¶Œ ++‹DeiZt*¼œEª“Aœyèõ*û£-±žjHSá‹=&SÊp™8£ÁîÄú–|ÕZNç^ëÇ.$ÖWM0Êé ++‰õ»H¬ïÌÝõƱä!;J¬o$oo’®í:ˆ•L± õa‘`,¼”>¢N2í„Õ|Â0ehžÁ]'ÿòÌì#n짖Âà±÷¹¶íc„%Š*ϱ‡²X)ì|ȵ1t¶k“€8ð©eø»/dø»§Ô±ÆKìéî|Ít[Çþkoh®±µÏ«iȕ촯3ÅS×P»¾~)ÖRÇÍÆJ1›Ûײ~-ÙIµ”ÿf©×vû:Ô+֚Ì%Õ®aRt“Šo¶Þ«,UîENå“ÜØÒÎeØR¢A£¬t»ñ\º¾ŠÝ¾Û¯7tSu›»º—ÕoË>²Æƒk¾öÝMgõýn¿¤V±ãÎY +Ú7oV”<4ÊÊèYgW(÷×òÆZeéÝ3ËœÖBúÕ ++U[Jÿêª*©qÆdeƒDX€…Þ-̜b”Ò¤::EA¥„ôœ_•P(ßV~hhÃåÁʾ|T­É”OڍuG‹Ô¼ËFN˜eH- ¹¦”ô ¼i¶Ò‰w°Ñ²¤~Ɖ•êEC¬ÜL¿çyÆUëÐvô‹Rßð~3êB n¹I^²œŽu÷½œ:M]Q|–Ó.K§”T>¾5ö®ë ±Å:;,Þï:3£µÇ‰Óçê¶³±†5-oÍuV¹{kA2ïŸ~æ¼\íV—ß½> ŸÚ:‰¯Zg'\÷»Î@$1®‡k})á™æÍæåžÿþ4YºöGyél(×چÃAø \@¦ÑΎúOãÄÁõŸgmŸÀ æn¯· ++C1d"iúgZ)(SèV¿y ëQ$[â7wžºK32çD¨¦a!`ÂVºÏaâLú¹û<ÿgïvRÞ9wzÚHDy¬Z‰¦SúÚNÛßþòУ÷Öýì(§rN·žtNêÖ3·c·Õá´Wù¬}®ïðaï¤Pµ&çö½ðlB©Ñ]¶cÏUKŸÇeRĜ8~ª{°¬ÿJy¬}pÙ·þ]ñM9‹¹¾.ö#¯¬v¨²Î3¿ùgîþ>ž+»04Äٌ_{÷“ÜŒMû~‡à±v?ˆao0ø͚EFæÁý³ñ6"  ¦Úz꼗â Ôyª“÷‚·1ðÞ~xo—ÞÆTc‚æÔ¼¤î'e™Nà ++u?gêm”(љ¿ÿ ++¢ˆXó ++ ++êV<š——j”‡øµàWsZk3u~æl¤Þû&Ë~hG[én)'è"¦ÆÜ´·×#™Ì(öÚ­·b…w°†\€Äí¿°b?ôx抒Ôå.²3FÔÍ\d­¬ù£ºÈVé5ç8!ͽ*Ï:„ïc%|O±å°BháõD2*M` Â7ßCàÉù{­ožkÿÂ3†¹‹oÍ{ÁH¼w,µó!‘ C™ß(S€ÔñX𞏇r%TBj« ¨T(I›s´à׺&爈ïòk(Ã:M…ª¥q¬] ÀfÍŽ*ÃÓhù_zã/®MÎ+ÈÀ[녲>(끢5ý ë±àAMlšsäÅÂhfssæ;š¿'ê5HlmÁ²ÈÉU6ÙDªû§á­ûå.¿7,qgíÄ«K¦~jùqi$àb}T:ze4åŸÞl!˜Üù"a bO;fg˜›úÓ ¦†ávZ}#‘pʬôÏI‚”„aï™ÚÎ×?‡…P‚!‹+´GÉ)óûmµQ×JëiË˔ãòP@Ä„X‰uÕlŸ!ÖµëZòúÓªOéÏÑïZ—ЁŽgMoÏ~®Þun²qö_~•çwæóqr`0’’(?žM S鷙ØLÞ¨Œh—Á:—7µ…òfy“@L ކÅ$ÊÿٟDæÕÑCqgÖ®›IìòæÙ,9[·3 ‡TÄ8óµGŽe’¦öÚà›:#ßÁ aBتç¾Â”)ée‡oXñ¬ >4Ü9¼ø÷«vLle%—Äç³Nj¬´ÎKì\>Ý ú·nV/ŠãÕöE1ŒŒ…JìŒÖ­FFd¢d3AÑ[”Rþn´RÕ¡9¾3@MWÞäƒâ[ô8*¾öï ðö¢ÛâáqOòhÐOl_ ÁÕgÿaÉóiœßE7ßkÐü×lž±ùQMÞÍ AâÑ|µC¹ æt¥Å0vØç)Fmênû¥÷½x²k]Þpßÿg ¥|(»ÝµPóŒ£~ N£ÔÙeßúŒ4 N­ý’J7+šò»ËÞç<å½?ÜC\sói«ÔÅ<Ÿ–‹»WŒ;)É\YnÝ Š9€5û¤’fªµåHóŒ[1ÆhŒ›o6·žªÞ°þô•%Öeyô”¸ÇÑçÙ¤ÙÂꕨÆ1W½õud#Xýûû~~ñ> zþú4Þ@RÈAƒßW¿¡ëyÛÎTBØ.V€Zóþ±g Ìeß庱Õá¶o5Pô˜ß™\møž—{ÀS»`¿}¸ºŽ®.£rCp¼ë‰x÷&ªMþð8(l7ˆék—RŠ`‚|ÿË,®Àˆhd¹xX*½ã4öž[9FÇz!­øÙèŠFُn DNyÛè™Ñ874dOôÆßòɨrVÍQàÍòí1 ¬ÜyíYìÕҙ–œÑF {åLòÛ¼ÿ5GyÕÁñ±7›ƒ¦뽈Qœ¢d3Îh<ïm«(¡œœH¿ò–BÅcOõ}YQ²É‡^¿ åŠ(ÊMÖïz“žY¶É—¨?Žª6 ³öî¤mp6º7(;ӘËÅgc²Ø†éwýñm1´¤>2¼la®›‹Í]ÅèÁšÃMª Kü™ŠþŸüV¸+úC{}yÉ·þ`ÝÌF9ù2*ž>™ª:>[ôíK>t§Û)ýÑ*±ÙÈNgé¹0(?8Í©RžÞå'徜jsVêÊ{[`§U¿¸xÖΚàŒe~*‹9SºlÉ2×˟ÝmuçZPÖ«·o„V%¿2‚ÈhêÞª·èã®v¿}¥ŒÒc¦½¾Dë´/XáL›WÉïB¯Þ°É Y)èbG…ó×Y\!ÅÝ´á\Xöç¸?ž™Ë)µÝ2Ô¯AWo”;‘gÀoRÔѧÂO ’ÿzƒÊÉSêxÎæm<ƾ©T9ò‡bdÑpÞ]үι¥a{‘ƒÀrû ++óOqk·¯(1r<=ÝZ/Ó8zu÷4²×ÛA ú÷ù¤ÿà>ª7t,í82ÿhšçÜíÝ:¿‹žÆÓû¦Ó•ß÷‡æ„¹¯ü–âÑẹjüÝ̚#ÆÓ/±Ã®ÄBäð£”OÃÞÓäK12˜yW½Ï¬MÖw-q&$ôphÉaÂ×èŠs¦8äº(,âP5²qèÿÌeûJ,*=í«…“B‘u°¥‘VoNÕ¥nXT냷 Uìdµ®AJL°Ž\ßµ^tÿ4,­O˟%ÓôÚ0ÅqØ Æ’”O›#Ùà{ɬmw÷ <Ãqߺ.­Åb—ÀX,æöéèúå[l£µÊ¼)? |ìßgpUb€½¶ÜÕ_h Óƌ†ÑÓcdî}4kPÌ"´ÞúQoP‚ï2u¦"€ a¸ƒ.Ž4“¾ÏŠ$ʵ*6)vt¹ù”¼†Ìœ€±¢ÀHJ¥uöÀcñ-76î÷[·žfu̱Qn8Ú-»]ë/ÅØhê¹ùÇ®‰¥ûÆs.´dŠzñ@gkÕô6¸¶´vÝ~Ú×ÿª»D¥ðhԊéÉNBCF‚ˆÌ~HíóVRÅsLsŸÇBçº46pjœÙD2ͥϢdªÊkI¦YX«¥å+ï{ýÁëúSûÕ>söîJѯŸw¹օFæmT'åx†Ed¿¯o]ƕ&9õ'¬å.DjÎ}iÈkæ<º1Û'e4¬AÛ{û!·ºŠ]ûb0(¾Y¹ã´ï&¡ìºë6-sÏ¶ë6LâJkmfÕÑÞuÛ£²r¥gZëzM)…ñZ ¶1…¶ìª|[9ôîáÙiàn©iy—}~„B+áå{³,&g³-¦Ö žµô N¨.A^ 5ôù-¿õ‡ún'ˆ »´ÇK¸Í!螹 ÇPk§²×qÀ”Èn Üv:Üf”¸nnsBÎ]hDZ¹&ã Æ$õÔ­šˆ± ‰¬ªTCÜ%²îFEyb>¯R-ŒÊس®P̘¸ Æ.±´²Ü_KC¥ÚI×Ë|˜9z—cã2à0¦˜ÙìûSá4Žcè\\[3Uå´¨3Õ¢‚z_ö{6û½ÿUȋ«÷â¢ÁyߚA€4e 4jÎæûeëcL‘˜û1‘6}Œ+Dˆ½æxÁ3ô} ìPëp}ý5}žÍú(w‘×,0Ò¦4é†q$Eêm䶜Kóò ­q±þ§Y”ü8,¾ #ór›´¥ÂÖá:šÜ¦ÅÈðóØ ¿˜LЇé8l:ê ·÷ŒÿüÛ_\UUsª3à”­jÏz¡F¨ ++B)±W‰BR¨yœ5}í<ÛÔjáÙÅ VàÙÀ³<+²«è£¸Ž^Õú(•yÓ?]ªy4îÿ™ÿ5‚(¾p ö§A•¸\½ ÍGxjQIp/£!!³„®PŸÛho9z_EG>þÈòd1ÂodÎ9@"õ¶7ۈ؂¨î ¶ó•¶?Ë_Kâ¦hB]QB¸ó‰ë§gjA”N-¾‰$î~‰ aÄÓLûŸq†’4·iºf$ˆá`1ïJ†_úƒüBmf8³½‹q$Y‚± ™¦AŒ/EVBx†áªŽÕAŒ1¾‹þ¼1±-˜<1nÌ Äž×ït]wOV1þ 1Þµ© ²;|X±>ïÈ;ïSõ¶½Mï›H>™Ûÿ¹íÁ¼ýŽw+ãP¥^RQñAÆ{°+ "²Hd|ñû%6ÛJ9e©WSM ׈¸ô¦`ªSÝæ/–Ò|–É_G³ß)Dû d£TéäAÕóYíËj ¨Ö!dõ ÈjFYBdµ0AI³%r°Çƒ¬~AY]uÑ·Màåõ§ÿTŸÒŸKÑ=w¢ãzïÙgHC„¤ü ´—ã(gŊ¡„öþ‰R˜’Dûûasmwé^š} ì ´÷)´Ý~ØzË]”|4Û0·ÛýZa¿¹¬¯v-i²@›¬7:o¢¸o²UõAÖ{yo’¦$FëÌ=Ú«!móLõAÖwdš“LÆ"óˆÄTìeš·©˜;ݗS`uñ QàQËú9ރÌo]›Ã~·ûκ ¿þå*ú ++ß è£ÒV˜ïQà‰uê6q w> pý 5ˆuœ‰ŠY¸Aàî"ü’M™²FpØLK‰-Üã1®›kB+>)hëö£¸ºÈdˆv™bU¢Øþ$-WI ++± ++%]AÒ.?¡©Héÿgï[wG–4_…h,Ý3e“LÞÎâÀk•qêâ±]}þ P %Úæ)‰Ô!)Wy±ô#,0¿Ø}¹~’ÍÈLR$EI%ےý£Ëâ5™ñÅ‘‘}òĄ¥–vÇ9ϱmOo傫¦ïëfЬ’%,­°´-mpK;<‚L˜ŸN—¹n2³Øwmd+bÉÕþÌ,"züâÌ,Ö=OöÚUF;ÍlKöêÛ~6Î3û4f¶Hg囎bÙ´…;-oV5ìÈ*Þ6 Üê©âñSŏdc_Ú§ý½üÀ§Šy6üCœ`öG¯Llø‡Õ2jå4Z–ìú.HN}ÊÄP‘ï£æúƒ­ŠCÕ ++7uŠ·ÒYë¶t6 ¿±³M£lbÓ¹—Éz­{•Õ«µEÿ E5]…6ýf2vïB8Ëÿ‚šP¢Û˜6}Óýq’ÙéÔUú.ý—Xƒþã_%ûÂvÎ\Ú´ê¾ÁMÌ#á¢Vµõˆ€O¯QÚ0Iϳ4½i‰Rzö՚§¬hc”ŒÍ[3jҟü—T•wÛ¦L(­BÆ4b ïå@ ¼¥z†¦å ++ÍNë#û†0¦(¼Úy?ìRyšL.ð&ߍ8iäMvºî j݁ Ž7Õ(2ÓÅ¿¢o[WŽ“ó°VµÈ;4|ØÝÞýnÌúÿͪwí÷£$¨Î ++6ÏgíڂÑÏh4§6‹ªì\£ÂàºþÐ,äZí¿·,äÜz^ -—Vï~pÝöxæšÛ¯÷x}ÂQšÜÄ·óŒtK‘öyz³Å¦¬>«!]¼K“ÉÃníƒJ;µaUÕF5]‰Û¹^X†63Ÿ_çÑ?çIJJ`W„'«Ï»[s¯ˆ.U½ò(¹³4r‹áDº³x£¥Ý<øÎ—‹³Ïï//?~ƒÊÇß.ýÁ·+ÿòê[ðÑ~OŽ}ôû~û7R˜ƒ&=† ++ìÑÓµêÌgï%롵Úʵ¿ùš$ ÑVÓ@&ñ4Ø'ŸÝÔ4‘|?˜Ø¥A»¡hëÍ3  ~ `>$qt¦Ô€µ,Ô[öuà}Zä3’ò¤=uÞGÒ,„ ++¯ë†Ë°5_ÙîûWZLxž¯è³:Ì{éGÉÒèdaºD¹ OgifÒMLNù3±§Å®ÿóÿÎëX‰Uqò¾y³h_«ùœÁìÞG¿ÒúÛô}Ò8%­‡úÀ³ˆ¼(/¤p”¥yÎ9Çèa4‰Ú²ºãÀ½û­2=ž$엠]AW¶_µóS©=ÍìÖüw£»hôHúMš> ++õޓÑJ)Þ37þq%Ñ=ÓÕ4£Ub;«1d(aM¬Ô.ì›WjÈ`/ãܳƒæIñzu´È›ü=ôíÞºßQlÕo}Ï&¯b§WŸ1áXÁc†‹„g²‚êT'+¸¤ãNF‰¡;=G8S«‘Nµj´+‘§É›'RiöQT'­è(:.ýú7û*§‹:šöUZa~Ý žÙºU±†Ü*vÁ;€Ü]z»¾w:Ïéà0r#±061Ü_ÉMo]XʒJ_nnâØ=âKƒH®˜JðLr0?•ÀÙ¬Aµ'sϓtÑZi-oª-­îۑÑƈx<ì£À$—~Ï]J>i6 hèôöR #è…ù=A/쪪aµkÿbU6­4±·\ z5ÏРWŒv|a>’t?%ó)»(žÜW%öèì =wVM÷”¡ðê†>½_Î.ë ŒÑT;vŒ[¡´Zoò3[iž›&ãÌA3®„’>=•úê‘CÁ&¹½”[ºƒ ܬ]Õ)j¾ƒlmqÝúá\NùE³Ï䥬/¶…Đùzw¶p ñkÕÝw‘«ï²Ï3PùtÆWè5iÍ©´Á® øhbÁn±÷ä5 ¹tÆf 9¡úO&`j"0sR”eÄÞþUDo€œ‘JÒ ++?K˜¬Œã5ã7ì«ÒŒl3š—E+kü6šÒ%cÒºñyx9Ä6§‚EøÜ‚BðÎ÷2Š@¿ p˜ É|±p ij]TË©ä‡£»ò¸ÉCH!ü£8ꔧ¿4>³ÔB>oFþás=i ›=#Zl…Á9ýw‰N‘›KEiM‘wÍvÑÛ>†éPŠÝpÿŒxìN¬(¿6Džþjä¸Tç’´ž³’¤U‚ ¬ú¿Ïâ1üI(ÊØ¥UQËÚÀÆašP=¢¼³™&ԉP]iBºî#EÞ´ŸH*Ot¥ =^6PïÄ Bd·’ŠÎô Cö×赗ÓzéécßDöO%…VËƲ•Aós3á»Á”ÀDw‡Q¡:°FÛV$WíSÛTËÓlXO(´MhÛi[{ ++¶¡mÔÈÑ ¡Û3ՕUdbÿèãf";¥ˆêšå(ºÑJ—3‚Àñ½¹%ÿK@M‘6zÔi£çaœüˆóHrÓ&£dô°çâÒO´ÖÎÓtÇÐÛ {°•õ*f#üT*&Ôé¨Õ‰¸ˆ¤ó†þüØ{1}@¥¶KYŒ´5WvQà©f«z»æÛ¾oP;ºÐÜ遾r…ö’:7/ê,Ôy;Q¦ûï2o”éDu°±’ É®é ¢Ö‚׿@hžãRß ++g+XÕ_¡9þ=åªÖìp Dn‘îÅç÷kö4;L_éº º`³¾Œ3pvèB·Á*&ˆçÚ*á3ês6C!oé³G4&­ÙóñÈrOÑ,PSAF-|UdÛªã)ð¦&²æ„¥47•Ù)è¹ô¤¥NˆNŠ;Øl¦Ø[1TØFÝZŠlž…¼ÅRÙ=±/Ûé`#ïOP¡|•òu§I6ø ¨çÊgX‰¢?ÿø?P¤è2ÑJ„NÕSé"úç<†µÂœ÷¸§¨•öÞÅ|àŸ®-{A*VZ«ú õÇ2²› ++µ:#–+Ê6º³ëŽ”s-íhQ¦/¬ép±£÷ÝÑBíÙWkž²§-–²Ä{njA·ùbJ±oW±P«Þnk¾«Öv„OuuM®¥ü×Ä¿y†Š?1G¦Ãa¬ pübÖ¹L¥3Ž£ÚŒã"Y›§ãòU9×)±Ú³4ÏcÈ䥋ſyôFŠNoO¥Ëø6yû;Ô?y ‰¿>«|òÖceON;@¢ÝŠJ(¶î/ÙWS6íچˆð!Š®Xݛ|4ϰÉy2[s¹n½ÃbM»¨¾Þƒq¯õ•Í(-Ã_±³8œ°G첂ÖEXZ A×!”b¼s`”>A¿­2çÉȰ·‹‚ФO7öé`d‘&ÛíV•emâ¨c€™øn5êüPmÔù÷ [ Q{\Ÿç@/]-Òóùâ8¢5DMX¶ü”ur$…·!@®²–~ÜÅ£;ª‚é¼  9󬆥3úc±yAµÂ²åÛHiíGÚ9!¿±ƒ—{³›!…ÿè`H¥J-^Ô¸xÁ‹àð‚5xâØ¡l–^ìrª? Nƒôðåñøÿú‹´)“ÚA™Êc}¥c_Úv ÅܙYx[3™+l†eº&2Z6ƒÈ‚æ#µP|r¬“25ϼ,Œ¤nÍFòÍ6÷6jëVżŸÇã0EÔ ú …Á¾”K^6ã'¥‘+ ¤çjØ]ªÆè92±šTîÕ@r y΁õþ—†—o&×y[>³Ø[K512‘º,›yÁZùɄFø‘Í©o´Ù(je‚w× ªE íÙ²©c¹åùÈWőê[›yØeqjPùŵA]Dè>\c[˜V¼ûÒÒʶ»r.écœÌJ?Òì;¬n‹a)wœçóhii×ëæ`Çéd’†cª5´Ž,Ÿ¥Ý ½Z ˆ<Õu½ ræ34:ŠÛöÖÁõýÊՕų®ë³‡*ÇzlŠÁMt»á^ú÷Ò½}wHï~sVߕx»WÃ`ðOï­flWWéC‘KÙoք.&A¼t¢Y)ZzЃڡ–AGœ× :;¶ +½73jÈÆSµdXŒIK{îwô\M¸T>‹&“£ëzÚêƒïû»¢˜åyû6/Ҍ”ÓÛ4%ªÎâü”ÀîÛÛ¸8x=)q7 ?¥¹-þ§ôŸôïÀK('Ò'i¯‚}cm^¼ë1¶¶u“Àؗ€±mM?¾#®ûvP @úi†fþo?_F÷ÆYÛHS©ó#®ß¾·³Û×½'íÍZŽ|³htdèûîä{ô ™Žcٞa©¾éëMí@,›ðT]SeOó-E4W×zÀ±¦)2öip¦9qçX˜åUTpÜ ++Ù°.hí­.¸ð«‚co8,Ÿ÷”4RúßGȃ·¶’‡:n''<à{ҟ«Âui.¤%Þoؼ»uz€W1ûlÙ²#7ïÔ÷Êð:Dà§aßDy!Ýdéô/•ÚŒâÓ0gi<¦êr='DEòùõ4.ŠhüÖ@H6Uým˜æ³oa6Õñɜ á8ºžß¾À¾ÞÅÛòÑ߸ŧdÄ{Œ&òm4‘ùGéÈ՗?©šE7y|êÑäy6±œƒ1íž`é3‡»eøA{J1|Kóüæv.æVœF¸˜[pš§¥+YsXŸÁ¬åw¡¢éù|JŒÁ@8Á²¥àÀvø]'fàÈR­u²g+Ø¡Å3WºµÖ5Ï ñm¶¯i®ÌW˜êOa2'Z}!CöRql_Òy1›3“²>ÃuчPbõþì}ÛT¢œDÈ<¢å%š(ýœE#¨ûtæw¬ŽCÎ6à‡ÝG«=[aSò^uFV“Àzcß5Qwų܌ÂPÆuaU¯­TÎâl„º‰q¨ã››±ª†‘!Ó£±qÂэа)+‘f]ëàòZ:2n®ÇÝ 4¾AzeI¶Õ5Å3\ÃoIs§®=ª4¯Ò55pìöÞy*r-Ӕ¸ZëØà=š®±aª%ò={Óë"Œ!)ðúfI¬4ãk+XÄÛ;Íïú|ÁZ¯Y ,7pèÜÿNìȾêh¸c!±ó–|—™eѸQ÷‘ÏôÀ­MÔX;ÃÃ?ƒ½­Ï ŠlSAÔú×ÐS]K£»Î<ÑV]¤™¤céºEغ[¯X¾BDÐÛ·ø‰|ڗ~”Ì“Ñ6Èzm^µH¿#‰¯‡Vê®a{ͬZ)´ò0´²]Ëý85v.Mz¨ –MMÖ¨SWA!MÑÁ\öQÁŽä6¡‚ÂÕßNd§Ìß?ÆyÅñu¯F‹)Ýý;ªyþB¶Ø8B±ÝϾ1ŸôGÎfÑV©TÝæH³eÛ@VÛ7UYñôEÜîi½LUééebÍq±cÈ^݈$W\áz>n>CߗÎò ++Wynêªíc¬·fìòäîõsº×J`ª¦-»M½Äª'~àÂç>÷Îr,|îgG’ãëzás?£Ø?‚Ï­š6L÷P§æ=Ïԑ6°Ö:Ùô°ª¸°³O뚓ö»E¾ÎÒ¦Øg“¨,jÓï4eB"Ý?Šò¼k¾~Í&é+×gm–LŽßÇ£HJï¡.†çÐ-«ïÂûHЉ$Ñâ °íõbR°n¾©ØÐøú¦‚]BDDFøè‚Ûnóº¸ÍVSU²î›ØG­]ND¬OàˆÀ‘WŽ#½b}@ iѦ_%›&۝_ ©@R¤I·BÒÆ„€TºÎÅP, Ë-'Wq±m{di. U“‘çµøŠ®XöbólÀߤ ++°`'BëG0%=?¾ØWè9¾Š]­ßÐm×4wmèwï š}ÚØ*±VqëGL´g–ÅI!ýùÇÿEþñÿ¤øFŠ )Î¥8‘B¶xaùD©*܍áæ֌¤„;„°€¤Õs¼WO‡»{L5I™Fk„ªØÒX ++^3º¶fÉԂ´{¬yf—%D‚Š‹(GY4>o#'‹ÂïÌ>.ä`FN”´ò»lÇq]æý7ùg" •pOBzŽ ++~”œ|½¬Ë~ùõ+¤Y÷ˆ‹©¶ ‘…MSó›{¯æ>Í3ìEKîsY<€4²f~ˆÂ1ù"…}G2Ÿ²‹âÉý¤RáêÜÙ¸Gy| U=ÄÅac&?R”ك4Kƒüêùç¿I?H?ߑã9E(]ËëéÎÒQv2ŸIPWô{VvPð&§¤óF´(©&y=ۄº%AªM(…Ú*¸§aO3Mì5T—uß q[%šgŽ ö(çX‚½Æ­[ ¿æ„AüÙþå±ãý¸Ø} œ7’°UÈä_s3tjuèl¥uºŠ‰ÙzÜ:‘³MGä&AmICãzŠÈâk÷š:@X8#ðsÏiþ vxÊK°àú4ç…”¤…4/b@Q:›ú}¹éç;èŠKœ‡¶ï`(F`YMoKFª¥vNw4Ï›¢Ò|á%E-ùá6^×ɓ÷î§ÍJÃ=ëÝá  Ýc-ëF š”]mò§,_Ó»©›gèXóCûÒ8KS=×ÞVã@³Fa^ï~¶ûÂÙïÒm”p¿@šFÅ]:c:)åÑ$À<¨ÆÁõ÷Pʽܬa´ƒþuT:ÍÁØÆ¤-ÀŽˆiN¤4‰ R:ie‹B¬PÚÀ0dÅ;ZHléº+[Í(»¢h®º€Øú@:†L#CG;Žyš@”àzõñ˜_‘ŽÒ 씑Ã=èT‘~¥[uÄ!aHE*]Ž¢$ÌâTBðœZ€Æ>Õ~;•š¬kq'¥]ŸÏ.¯¤ËsɔåM©äfÈPÆù”Ò¦Ô-?ä"p%M1ÍÆ»tm©Yջз†­ùŠ·Mß®ÀxÕ[†Ža÷{r)/²xTb“±B˜àà}7ÏAÝÀfÑ, ++‹Séï%÷$Ýý¼ ç@Vi'‡?ãé|*ü¿&êK}–æÌ  z›ÓÎ¥Ûøž[‹Ç€€Ú¶Îy6mózæë?Á‡®èb ×,xHìÕÏ ++7`zöûºÆ¹²¢â=ÐÎՑÒaý:Ü)ÆØ²]}<,§xç¯?' ++ÌøÍþãÒÜû¢Ä¦â9\fqb¢=` H[ ++âä2 h>¥´¶ @TӘãÃü˜.HaþQsoa‹æ„ùNµ/w À›ïë¾ ·ù©t>‰`†"‹n à•JgþUPÅ«¤_I?üFÎ8*ÂxY“èžG¬»ß'ãø>ÏÃIýmÍ YÎâÔ¡ëH Ü‹enð$Nç&ñHY„[ù£!çƒê3R•…³B›¤¯Å~PƒÌgpÇR˜Óà2tTH:z”Þ&ô8èRL{Mñ(/[²Öš2WE÷ýv0¯‰+‡Îq`Óà) –©¤¾÷J¾Ø°ÂÜožHª˜ÉkQ±Õv~°hô2¸æûVÛ]S]9ÈoÕt¤µ në`=—¤qæØ|8ê -|8lÊØÐTe]š%…‘%·§8ò!¹¸´©'ï»ùëoĚ´F½ÃŸ3›ï¬¡I¿+ÏPM_i×Lā¨žÒLDÙnÙ¡Ú6;›XäÂÎãð¯¿ü­É>ú¡f``X—N×½“Xݳ¼s¥ÖÁSu¤*Æþ¤–¯±¥ $U®À܇Mˆn=u¨]>3¬xž¡«Nk{ ÅC*äE<KÈÈw۞&"¼A_x.õÌv9ý–Ú--Ña]Q O(Wh/ŽŒòÅÏÆo¹ÈVv,'P ȁ«Û¤sRvê&þñ-ÈI—ùO!k5Ûφ< QIn=©Üt¦û<|Oãiðb"f•†ÊŒÓ¼ê‹ôè¬Ä…¹úEOºéd>Mjç’ôƒCÆ®úõ{ù«jCÕwï³x ޒÉ3xÓ-™‡ „j(ï,ã\‰p]7°ƒ-ÕÛ°ñÀEùš¼\FaR\³`Ñ}ˆê;û(&o*i€`”ñ–r4bÿ/qa¡ŸØ! #'ÍÈ3rú£Ï¶G´‹ùþA0~²Íª×ÆG«îî»õѪû{n~Ô};ô^½ò»Ê^@ö¬vÝ1‰ ++å„ʯ!»ÜÌ£RÆi/–ÒÛêïš:ujfÕ+îoèÛò€Ò/¡í qÃÙNbj±­{­­±»ó•šÀÍhɎhÞ´âW°tꜴŒ:•ìa¨Øfl8ùRºv'ð·EÃÎðã‚0}¥?ßsˆ)ìApäDücT e9Ö¼»±`¥§q’f€0Á‹JcÜy’ÒªÎ3£¼¨vâ1eYä“èÿ©0•í ++èt(á— Úu8ú~›Aê7¥8†®sXŽúãÈEïìëë,º—÷[`öŠõ˜£½¬6æ^Ì)¾&[ºÛgósæžæ‚ùd"]΢Q|Sf2&>âjÄ£ Þ[õHCëÞG®ø­=ƒºé'r “oY´ý„óYx±Ûçá0i}#dB+ZCȶ1­y ýLpuˆÚÀÅjõŸ r†Q?YGRڛH«ŠæëF+$-€åµKº½ˆþ93r142‘ÜŽÅ©ÍØøP"¦kØ·4wC¨E1ŠÏJÄtd™êõ©þ.ðòu1S>Á‚ˆ ÈFİêú¦‰AW°`aÀ·ÚþæwՌ»ôëå‡Ëæ†Rƒ9—©[.VçxȜK l¤è›\¯‘sé‚s Èʹ4¬™¶eÎ%€e,^|áDº¬–Ö-˜—w¹/楞c´7ÅÌKÀàãÁàæ¥º–iA5 ›ÌË2ï€30օ»h…Y+V¬Øã{؉o,ù‹UÊ%óÚ ç–ÖZ}%8—€ÀǃÀ!Ñ.ßD ++‘àõb*Àñ5r.ó Ö% g`–¿íD¤[;n`yÕÀBnƒÆ¢ñ düåtWi{NnL <%åa.läóë‡O¶»§ð—­[²åôZôù¼TL@Ûq³)ÝG ++Z.‡)@ï5ƒÞY³8ÑþX”@‹c BX×UìÊ­‚ɺm[¦R«9"0áUaÂt6‰ ѝ1ž÷󘆀¬wê}ux©­² ++e3þÛ¤ñw¶a<ã<ƒ“͍ņ2&Í0|[Áü¨^ =Ì CËAVൂn heÐZE´‰v ÈyñœÍ@š¸¦,°to7yQn‘ÌHð7j;Hn<ƒ*„§±2_Ê=¹ÿ"]ÝeP·,ácÈÉR‘ÞÈÒôÆÏ 7˜äæ³h2¡{ò;à~zǾ”¦×gù ßÝîø_º,"BÀ‰ÒÕªŽK‡{/ Üð UÁ´Œ`àÂòá8@®y¾F͍0”ÂPRY^0pO0p9ø®¨^€6mÔ*€åU˲ý>œ¤qþÖMç`Ìè%Ò¯ïÝO¿ÑÀêûO¶»¾¦ËÈÔm :,øšO>Ç×°†U?À­É(«‚¯_ _3pq«­¾ˆŠ°ìÆ×>ѪJlÿ·¿EÒßÉÎÈÀmÍÒ Scõ”ëR*;šæib ™‡ÌÒdWÎᷢj²=Gõic–¥¼Ž­ƒõúT3L[`z ¸É ™¦‹)l.C×_XЬ «UÚ›†¡êJ°g ᇆ,²ZÏ&!;ãìäïqNfH~^¡Šó;ÈM”.Gð¼\ú ++z&yq>Ê¢"’>¦·!-þYÏIl–\ëCÐ<é ++O•¯æ“tÅ6ËoJm7A3 d:¬kAÛæ~¡ @Ј‰UeGÙímJy?o·y9ךœ p=`på¬M[•ÄZÐudº}¢h¸²š´ \yI¤Ü"]eñŒ –a}»¬Ì.ýzåùv× ++Ú¶ÔéA Š ++"-¬™:&uõ@íÖÔ¾s†;6R-1C. íAg4*²¬¹í¸‰æ{ØÇ¬$ö²”÷C1‘uì3ä–,²r†Î+¾ë+„^Å ¹,Üþ"LÆD?ϧ×Q&½>5^Fa#bÁÈó⼈GåÅN\”W¦ÙöÒº"±ºìéŠnõš*‘XÏ‰•]Wµí Ûl®ö]±¬jˆ¶Iø®T`†¡*ïÆAUNאŠ_˜3´V­¨XWÇbETl/ÈÂ;ë@å* ɵ@Ç çÉՄ•EÁØÜ§ ++Âæt• ̬Œ’ÛânOÜ Ù–aÚDSp3“ÏËÍK1]ß[/¦‚›½Nn¦ ++j& g5CŽª)¶îÿ"¨™ f%°t„Òš¤ XØ"œ¶5«¬Q=É֐ bNS âA¯'ÑM;´–¿ i®j©j/[œk^6/SÇHÄ4AÄä $b!G8h1Åöeh Ö5X:ˆ˜–£%bþO˜®$ã`ÏÝae>N.gÑ(¾áÄ̋²øžÝ̓ü±9Vÿ\O–ÅR÷¤ «µË…ây²« ¬ ±|ü©v7¤‰™L‚þ1*/ÌvÕu“J*=%'_/»ÿ:M¿OÃì;Ý’\ӈ ´) a`¾]¥#MúAØ)>˜î= ©Ö´7 ¬ä >ÐPTÓUèÞLÆî]gù_W0®£Û˜~à¦ûã$/²+2«öSôÿ]b ú•ì Û9siÓªû71@8Šhe+KÕ¼†%é9lXْnÔ¬¿º!kž²¢Û ³zðšQ“þü㿤‹è&Ê¢d±(sY¾°©7 yñ)¢qiaïéHVeǐÕP²¦;H5u‚2;ºcȀ™×e5iœ¡jâº)k«ÕäCŽ BЖ‘^›OÙEñä~R^Â¥™œ;«,ªÂ¿¤º¡ €%Ìýõ;‹Ã ·œ;hdD,iš¦×4m÷&ÀøÙ£,Mø éÛ8¡ó=F·ì:º É?üÁ×Îá,Õèt¯'gôY䒓.^@/ù>¤sèvê&þñm\ÉI7šL8©q”dé<Ã÷ÚÓàÅä{+YFæMœåÅEZҜIXÿEOºéd>Mjç’ôƒCz¹úõ{ù«jCÕwï³x ޒÉ3xÓq‰´ÃøåÕ#Ê;7gJ#)ž‹7ÄÍYT¡f}¶Š*Ð&ÎEoá†|b‡$ÂÏIÉÂOì…ìrÉٚòwjæãS~Ã#–»­üèÎØëKHc\K؞še“O¢ÿ§ÂT¶+ ÿÑ¡„[\&h×áèûm–Γqië×»ϟ¹ûÇýXX͆‘d¦Jô„£h½¬1á^©¦E¨®ñZ¶2w\·pVƒåsôòª   Û¸•K„±ìmX–ðè¹D‚^î|wˆ'?ô"_ӐeoXtДå~ÐÛ¼\Lì· òZXf{Ž`n<†F.¬±ãö­Ú5Qö,Ó¹€Áà!†ïÒ6 É8Kcˆ•^Ïo%'‹Ç·û©Å„o9Ú°Æèѳ|E(G:òl՗[Sî/Ö;=”ëìÆC>ÿRð&(ã]vtß~#E ++G”ñ=¤×·+ öBª4×4mÃé5É)H•€ÀÇ%U¦Év;aL@àë†Àó3Aª¢ Ü=À5±«Ñœ&(Q8¢,²Ÿ¥ó ¦M§0g0ïzCgkfÕ5£'[ª&›  `Vùp<ꁋ ·pP·m¸\:í½,Ëýp°cFOààÁå]û÷sÁ¢z L6õ}Û²Û¦ «|Ô\_¶û”ž@ÃCî]•¤ßÃI̗—qµ5sêŠIɺK_`sØÇ‡ã¹r¡<Õ ++T$&ú„YDÇlJ Ê0DÑTU6,¢ÜQjˆB+¼œðr/wa ;[nͦlÏÐ.X¥¸)¶£Ø*]o/ؔÀ>>Ï5ÃgŽÅö$©'‡š†-£€nž°,Ëý°¯y¹À¾õ$]W¤H ôš"¥ O3Lxx =T'ðuÅÚ =Dj æÄ;ëP %LÂq&’%E±' hËe4šgq±}IìÎé<7=Y¥‚ ˜¶çøíµÅ´\ŸöÔ²,‹™»ãdLX‹'pbè´¿§¨¶o·pv€”-‡*æ*œ„éE€*IA5³Å4·Ë;¬+¯ÛM=v…£ïûa’Xµ ÕõšJ)˜¤°Ïa!\ÎÈgà«Ä0ÏÕäzÁÁ$_€1øÛ{Á$N ½é&FšÙ Ŷˆ5S“|ùà‘¦3Z²å>Úµ‚KWʗußpۑ]Á‰Ö=ÖÉ®¡¸W]+‘ ±O¬ëìÆCÀO"ƒ^@Êð¼ÓñM·O}t)¯RhZ½ô)“>}¬dz¬Ùª‰d±k–€Á€AÏvÅny‘_7 ~¼JÊÀ°”¥yŠî^%¥Í«ÊåŠ_nnâQ”mM¦ªÒ‡uqóU_·7U]dJ`ß`ŸâItôÖ<²J„V6Ps£åÝ˯ ì;¼üùWAœx õÄdÅJ›MK†¬j^5'Àãł$ˇ5¢EúšÄÅÖ,©+ÁIõL“øùAK¶K@÷ô@‡,ّu¹rò[qé~X²ÜèDÖÓQÝÅç÷W‚' ø¸ÒFÏÒZð!»È´u\¯Áð!òžŽ#ï))âdžÎsé"LÆDØ>ϧ°¸'@¥™tåÛ󦮩:E¶MW×aÄoÀLJ㹢K²`ÍÜ@⛲,"ë{¾În<4¼9PQ†.51\W5¸W Š@Ž(Y\Ä£pRmÖ Á L£bÀŒ]§ÒÕ ++Û_/tŒSù9¶¼ §²àT{# ¬y²æâ ÓÇ_^]N%e —¦²#Ó0’@(QÒ9T²9™¦ãýT^V<ß6dvX)y|8žkB/@Še˜Wƒ¼ß? ++%e ‰²×Rdè(Q8¢¤ÓYšDIQ_˜÷10Õו"¥Zàªt 6Á¨þñáx.FeÚ®¡È­©îiª#ëh–e¹þ‰©cÈqð|1«'Àc(y²mY՝VL[·5[óè~>ƒÁC$Hx°“ü¤È¤ó4Nö“ %[kb—LrÁ4?°½Më>…‡øªÝ޼†**ƚ¥ª"õòcQñÍM|ˆ&“i˜l͚ºÂPšêɦ§nX´ X“À·'À7akʆ<<á4¾.§ñÂu]¤ ]”§8~àx-Ê$ åuC ++,¿#g㼈Gß÷À‰‹Å¦[s+Ó@f™îT™3Ív|ÛSÖ˞àVŸ""åªA+"…dWU|öÔ²,÷ÂæåÔw¼³y<†æ(†¬â eÉ4Ûò5Mƒ•*<^xàé6.`ƒø6 ‹y Ÿºë BaËC–ݲM‚( ¬{¬3lMUuCDTxŒ¯Êcô]Až  Ai:ñ°(ÿŸ½sÛMÂð«X}îÉ{¸©´ì¡â)RŸ€7Ea¨Ú·/ë:-¸„`—–Cþ» %bÇßÎÌîle6›>¦ì“[/¿žæJ¹—\` wýŸ&¸@iB è5ÐshˆÒs_µ6bˆ²­QÅdµ,I¤Êû"”å—Î2µ¯›\ÉÜ8aRn2ô5Ëq®¤„›ºJ·Œ2æÚùi;±|úÐM~ é÷à~<úþu\ë?vbLÓ`ê§´/<Ð4ÞÁ›šëRˆò|úéb°—}›ÏžžâÇeQ›wúæ]¶5ýø{gÕ£„hÝʛqŸ‡˜ã®*ÐúTÏn@axkߑ³-ScüžX>ŽÖ»¿Z_ªêe8!|ôÆG°ƒœÊv­‘(*K©3àã¶ñ±ãuÙ?80HcT$ÖYØp×,ǹªŒŒ ++©B+1†šÀ+¯2ú ++¦ôc ++áI£q˜rÀ«Úýðv½ú\¦ù~%k_õ‘{Æ© )À!Yb³ç:ü#…–ÒµÿH¯­ .%ªzP}¼Š=åÕGÀ£o×36RÖ|¿ý‚‡‹4'þ¯ZP}¼âêcݵ5?,ŠZªÊùãx5ý8J ˆbΑV6Ëq®¤| ^8ݺÖ*]»`E¨aóáJWçJC¸8Ñӕ¼q!­ÒJ: q®ôj\i˜F*–ÅbÒýØà>U\µù‚ڍ:¨xJÉDNTÉV0­duˆB•nE•"šÚÁ‰Þ9iË #¶µ¥Ìk¥èAN@•nV•b±‰üñ,¥“æëÅtRÏ4®ê—ÓªÚüÜٗö5@)5PŒ«"ð%p°YŽ3qE¥#þp0îÆòqD³ÂÕ6+ÄáîúRúöTa£céŸ)@Jƒ”F­†‹Í'>¯Å*»[–“¢JORöthuÃâƒAT"¼0†þãÓÊoö8ô…óÀá«Âá{7‚`(=‰Bµn²Ù ++ˆÒe<+§Õ[W®S}/•÷Ý«{ûfÐH©–2­\ ++äk–ã\Ù*¢© ¬•:åÊKæÌnƒÆHœ’|—¹‘EÍôèÛ`´ÌƒO»M¨´4l·æzÜ"=jEÊFEU~ž¸+«¦ÌW_ÿÙ٘öfŸŒ Š#ûæ]BK(³>¨—ŠÍ§Ü+þÿÿìënÛ8Ç_EÈÇ/¢¤b[€ɶØv&/ ,™µµ%¯$Ošù´ï°o¸O²$-'¶ê\¬ÉÔvrR4±®¦ÈÃÿ$ØÉÛ¿^½žŸÖîϨ½žjïêõoÙôÍÉyv1Õ§Y]ÖÙ|ròʞÑ̳¼(/Í)úKUë7'(81[Ó¢´Ÿãxµq¶˜šú[–·Ë+‹rlé/í›sšÛW/¿¶VUÙ6æhÖäEq>Ñ3sé¬(«ú=/›ÂÞRgM˛"Ûzpb?l=’7íÚî¤ö‹_ußüêæÁksnÝã³Óú͉ï§㐹œÙs¯^·oߝ¦v»]î]&ÛýnswB÷»;=ÿÕÜôêÍIû‘½{=77˖YÞæIUuݸj¾*ìÆ”êTÛ šßߜP÷Á”µ¹Ôw©¬¦•Ëó×w²E9üꋪm«Ùðëëâr2øë]Þ­åC3±¶én•OuV¯]”-ÚÊn~)¦Ón«ËÇOÙ2Ã]^u_ÕËí.ÌQ´µ4nráŽëWOyÇ Üs¸t¸O]%ö®}=7õöÃØ\F㧄Ÿ¸½­þÖÚ½a÷cïèì~ÍêW»6ªÂj§Ð_²Å´ýþÈi¯ÞQ™(ºÔu6õNõ¼j´—V³ù¢Õõݘ±–ß<˜},PNÓ¥m­,Ž0?¦$$÷[ܹË”†‘J\¶´]êò¬lGóiÑ.«Yý^¯×tE+Ã_=Ívü!n«P€¿—…?E\¢´gŒDù$‰#wÇïmùqøÛ<ð×Ãß¡îô—ˆ' Ç0zâ‘D¶B®7e‰‘.§†ÒÃW¡tTz2=z:闹Ùlm†®›VÏvÖK7±fd8’*À¼Û z ˆ·GâQßüK0t¡»x‹Á÷Ÿ8Œ@R"%±ÏdÜQ>Š%O£[T @Š`(Fhur˜"êoèXCÝC%~ é××?½Ïš‰{ŸtÓd—Úã sMٹтUé¥ÕX﬷œŠ'"P¶™%ØîŠc_}_)xê³ÎDWÆÈ Ru Ý!}_9;lç ++DÐcà¸{Œ¨¥½vA d(SÛ»¼“0Hvl ([]—ºõdyi2Q×6¯Ï³æ«§ª:ß]m#C*– ŠÄ¦92¶ím¾Oq„#ûÑÀۚ‡BÁw –(}´p¤JzbÉg‘ˆâpsš€òb€2›OÍe»dz·(ÆYùDr*ŒÓ8á¾íќúuű'úaæKŠÄþ‚@¿—E¿Ï §(ûg©iµ0}À¡€ò²€Rm‘M‹ß—rê³ÎÛêiüÝÃÄ8=õb ++Ø·ö!Nb%Kuc”Š‘Há•1nÚòãØ³vGálÁÁÝà1tÊ_$‘B¨'œX„˜O—s(<`&ï(࡯=~Ykm‡¼QnOßY"mo" !aö[%H@¹O¹€‘DÅ¡t¡{Ø¡Ÿƒn¢ $J „H{s ++¥#JY]•ž9ÿJ×Þ¹nڝ•Ô¶Á&Ÿ„„ˆàÆ ”pïGp³@-ûukÆH)Á*½ßÉ›žEQ€‡8ÀcèHu Æû‹h _îŽCáƒMÇ2Ø$t]ü¶œS‹2·žD%QÎQ,¤Ð€;‹RŔêcÌ#‚±2Æm AtlL;‡Ù7àÄÐ%s‘L©Ïú]ûñ(¼•3 ˆž³ :¯³²™WõàÙ·mjˆIâG"í7@ †€r{3Ri„qoÌá§Èi$PCÏh¿‚L ÃD(Â0J\x¨5LiÈe(ÿÐÐ2ˆ¡cC¿šÜ~䣸;e ȶ‡qož¥½µ—A¡$Šï«èèÐ)( àÄ@)ÅcՋ¿FÀG.§@½ä]íÄ;ÍÆc“Ù;Ë!. I–6´²,Ì#óVÿæ@)LÂß-ÖÅ~€Õæ¨÷-?s›§æs?Bhq€ÇÐÐ!#¢¢žFòcӑ’xS#íÐHGbV´zì},²‹Â(’k÷&»¬¼ÞY'm[µ†B,IÈM띨Ûêe(QÜÞücÜ´åÇ¡֘íx/ e0QüÀGH‰†€(/‹(OøŠ¥XL:ìÛ%CI*Ã<³€àq)…ŒUdódÝ)P(&½!ö[~7Of÷ñ“  Ÿá!b ++÷&¤ux„$‘,Þ\¿ðøóäS—Y‡B”N>‰âR7­—M/«ºh'3ï“Ø Íá0ðÞ!ë¶ •£ëba €Ì‡@fÂ#¢êc@ÇöeulEZˆ2Œ(4ŠÓ0Lhހ(/‹(wk½{P³ƒ¨B$¦X1X·<€ùGBÓ0Ž¿óÓ¾`þüŠƒ¨¢ tp%Œ÷Vc)☳Í4 ʋ!Jµ”µÔÚ´ ++†b„%­íOw¥õs–ύÌ­™ï,N·†ØŠ¢+¨ Ä)4%]qì©) ˜’" ä5cd>F¦ ++ÞT‘þ¡¦ä@Øöó‡„oz ¤Q…ËMzPÊeœˆ5ñüéá|ܲ©÷¡lÚ¢]´Ú«¾x£6+ÇY=n<ó×;×ù¤4æ~ù4K ¦„2Üëû€zþíƒL¦„×WüpþmÍÆC¢8ûùh*`ÊЕ™’KJ{ÞqŒÑTQt+ €)/Š)½ðîkɵ‰=¬Ñ½£ÜºÕµ9Z›{gF±šêýóbv¡kï.u yÛõ¶à!¡1®z€r…V¦+Ž}µ2*–éGõŒQq®Œ®ŒqӖ×Ê@ð£è¹þÈ!Äàc¨£p„"?!}‘Êã$Ž¢Íiòñ±øû\Me›Õ×ÞÇìÂãy^ëqÑ.—¾žÖ•)‘ÙÎâiÛ¤)0aBÁ¤)ÐïèFKÕSò˜ 8õåfÑ7è~Q ¨$àÄ0NøQ`äŒ[¿Æ ?Š$§÷¿±TÒ³€Ç¢/ZOi=¾Èò¯; ¡­!hY(„! î‡}Óþê.œF1Åî¥0Šô¬wÊaÀc¨óD„°Ïü^/ ++EHbìo:Oì‘"†¡kuèð¨«\7MU{üf(Ïs=8çvSêë TúŠ†µÀÛ'ð„ÏB[¿ØAsJ‚”‹•1nÚ2°í(ÙvÆ?0N äDjªyÔ±‡Ï¤OѲ!¹‹ Œž<–NHV 5÷IϪzwßù­“h¾¯RÎÀw(·ʅ؈!ŸöŒ‘…cÌí…I´g4ÁöƒÇ˜%,íM¢á”úŒ;ÿÅÁ“h †Žúß Ý´žªjûŽ¢™.۝Åж…„fG(e ++E¹ð(Â~ì/qv1nÚòã ‹~ŽvÑÏÙf׀(Ce“’> (åe^WÕY×7EÖÌõt:j³ºK½=r¸È)~3Zkƒ:w=’,LJÿ@Þ·Ùt…„y­]ÿ¦OÞz£I6+jïQÏy,EÇÇS=ËÊ]Ên¨tf<õCL{Y ¡¡ÛCCÄÔ§í͖@C÷¢¥óè=Hg Ê@¢ Ä% ë D¢tDÑù¢ÖÞû¬™Üú«Ý͘;ÔÔ¶YY‘„Òþt?¨)`ߨGcø8î±Ï—!~rË´ [~û`ªöf[FïG œáA0Rj«à<•D„F·îûàSµG5´ ++û$ É…@"åöO9ÌÔÞ[Kbä§$ǵg4‘˜êß ӕJE<x’lŽ"zŽì˜ë¼È¦Þéâ¾=ÈFÄz-ȐòTÚ¢zP E!Š’´@îO‚‰¤ˆ• õðÐPù.Z³f€Ü!@njn{¦Kcçz|š]ê¤ÖÙ×¥ý¾>‚v¬ u^󣔊޲ý !!Rþ½]¬Ú °r¨I£*ÿª[ïcv­w±M;ùiLÃԕh'€\Wûê ÊØOBjo¾fŒ4I#.…Ë©» 2éÈxvž&°@1ÔÉbód}N^%±/™_5ô¼éQó©þIȑ—ó‰®J¦Fyé$+J“ï;K£®Œ{³„ü¸¸¢ €x*ñX A¹iŒ($¦ nñ¶aËßÁmóÀí@á&$¸j(†Îŧ*d¬ÿ&„qƒÐUSųRAžÈÚ̓e^_ÏÝKfžÖ› !<°jTÀíGL® n±;½ñ §¾ê0@ôlà&SxÏ €b((ü@2ŽìÄØút£˜ãà-ڀ¢# ’S·uU¹—Vcí%Uµû{g¶ºÅØÈjòÀ¢HPF¼ÑíCRE´¿JÍ@,A‚ßöñ@=¶}„iÀ‰¡Sìi¢Ìÿ^ ++‡"Ѝ‹Âè™ £¬læUÝ.}ˆ<çWT´»GS߉„˜ ++)l=¨ˆ`QîÏíré+*íÍ×NJX"$²:i‹-?Žt³ähc–œopn-±®’HJ%9”¤.{°w¥÷3×ô¶A˜®:8Ӄ•<¼npýbp½}†óё쟋é÷ÆFƒ0¦,íM"¤÷Þ9ÌpÌû/Ú$4@î¿OaliæžjiÿÊW»s]¶ºîr¤»ð¢ª¾Î²ú«‹ÀjN,\űßVf6#ÿy^åA@YˆCÔٙËÈíA_•÷C¾Þ؃MzˆI”b—ô/Óq:ÉìÑîÓ¹«àú²pIèú¢lÚúÜäô]håß½e‚þñŸñäCê’vsÝà$6Ú{«ïL¥ŠI Òu›/«S%¶g(xd^Ýs—;Ò¨oÂÏ.o|O©yÿûÏ=ž×Uy=kÎ}Ùwþ¸e°eÆr%S C0IUÆå&Eož3XGdFJ%YŠöÌ¢ÇÅÏó:a2“Ê1BÚ¤Ö4E…s:ÁØÒ‚ b‡‚S£¬Êݐ*UžsÊp­L†ã0 ++oÚ(ʬ…xK"·Ä¢Gw×Ñ2CjpöÀ¦1ŽízFt3dŒðì+Hœ–¦4“ Ì•ÄÁ§Ù`AÌG¥ÀÕÄñw^r·d89bTŠ*#“1èRñ.I“Jû8z˜kâ6.+E+Á¤;Dƕƒ’¶àºë©ø. ”Ų½TÄV”G»ZG£ûĪ™J¼&ýv”¢l2¿LŒÂ+&âÇ5)üó˜‰ \ö¿ÕšO͍ƷâÀIJÛãí^ôW£Ö›û¦üÛ¨J÷4~m-?:–ßW7°ÚÓòùÛû’y-ˆ†«,h²ÞHeÈ{ Áì_p˜@ào ZôÛ4¨·×`f™&†8eˆx–¢At°Óà8J¼n Âp‡ãñ 6Œ—ÂÆr^:mÞ6{ö†á4þ5[ƝhÅrR•îTóìE«Õdú7ŸXM(”F$w öRèJº—Ê×H*§^ÌqçÞ0Ž55*ÓþÛòÏu‚*鸬ëìõkWÂ3MrF£Uøô4ålyÔ¿ÛÅ_ÿÿPK!¤ÎÓCžword/footer1.xmlìWÛN1}¯Ô°ö©• ++{ÜV$6!Aè»ãõ&nwmËvÒ§þCÿ°_ÒñÞ.…@ ++Tjó°—ñ̙ã¹x6{ûWYŠfTi&x×ñ·=QNDÌø¸ë\^ ¶ÚÒó§‚Ó®³ ÚÙï½}³7£XsÎ%é:cd躚Lh†õvƈZ$f›ˆÌIÂuçBÅnàù^þ$• Tkpa>ÃÚ)áÈÕzh±Âs0¶€».™`eèÕÃ4HÃí¸í›@Ùí­ I9,&BeØÀ«»V_§r p%6lÄRfé5+Ñu¦Š‡%ÄVMŚ„•òVY¨uü&}A¦å&÷è*šÁõ„É:¦ÙSÑ`qRÌîÛÄ,K+½¹ôw7+ˆ~‘•%à:ôËTfiÁü~Dß[##¢¶X‡ÂuŸ“ 3¾tü¤Ð¬×o< ¸ÐÔôqÂՋlÙs9Þ,ËÇJLåm†v¿ÖXö¨zVY-«¬7#s>ÁZ9#áɘ …G)0‚Ü#HÊ3€l—8=8H%vC‰>‰»N£³sÈÉ¥p+ :;ƒ~¿o¥!Öñ§®ãyí¨9ðwjQŸ&xššÛ+C+ê÷›­NP8ªüvn)ðg8í:! UŽkW´Ä6K# ++å Ôm»âÖ¶Å¥|n4(Z„S6R¬ÄùV¡û¹½•DVsEæÖ8¦w,Ä8¥Ðéi„/ðí¢)TžÍ)å:¦÷¹˜‚†àKúµ—¢,ÕTͨÓâ1E/@#I㪟.ˆŒèެRó¹|3®º€¿;hxp|”Ç V|•Xhj[ÓÐkáXmC¿á òÎzC%Dò¤î‚×+Êãç.‡ß•HþõFøxyf{áü3¬4Cã/i¸J«{cŠ·£hàu®OñVùÛ`Š7v[Gyxœâ_H%%ð9_IÿhˆìæŸóƒ`8¥Œ 3¨8ÅpŠ~~ÿ/ 4¢HQøÚЧ„ÆHðtG œ ++Å cA¶Ì5 ônÎÌDL X̘ðï·¯•ÎÝ „B4;­ŸaOKàÙªYqó×½_ÿÿPK!Ljz† word/footnotes.xmläVێÚ0}¯Ô°ò\ȅ[@ —¥B­ªÕ†ýoâ«ñE¶Ëßwœ„pYÔ²»OUyHÈxæxæÌñ$w__X޶Di*øØñ۞ƒEBùfì<­—­ÐAÚ`žà\p2vöD;_'Ÿ?ÝíF©† C4 ®G;Ì9r]g„aÝf4VB‹Ô´cÁ\‘¦4&îN¨Ä <ß+ÿI%b¢5l8Ç|‹µSÃÅ/·¡% ++ï ØvÝ8Ãʐ—#†ÿfž;tÃK öº4! ‡ÅT(† <ªË°úUÈàJlè3ͩ٤×?Àˆ±S(>ª!ZM*6dT¥Rßê–}«…ˆ F¸)wtÉ!ÁuFeÃ){/,fíŸŠØ²üී~÷c‚XT]9ޒ~ÝJ–W™ÿÑ÷n舅h"nIá|ÏC& S~Üø]Ԝë÷Þ¼èkò6ˆ^ áê=;Ü|¬Ëߔ(ä~ mÅ5Xv`½«VË©‚õǒ‰2,á(³x´Úp¡ðsAï´•@ö”8““qŠv#³—à§‰Ä ++¡0Ñdì´üÒQBdwd×V`ìt‚aoˆ±VYÆZõφÂlOÇŽç ‚N8ӂ¤¸ÈÍë•kZ,úƒaPmø ìMKCMà„SC`0y6 §–å Û<<¶H\ḓ;· ¯05UKªr(¯‡ú¯r n(/ʉ]òâ]¡¥¿ìýp:ÿ7h¹ZÞÍY®IÃïøƒáb6½à`6íú÷á쌃pÞ_úÆô`ˀú{÷ò€KbÎÝKbüewê O‰‘‘Ùç6¿-ÎÇβÎw iX ´rËqI^éDxë)ªù)—/xªCÔUäG’/äàH,7ŒÖcÏyÙ@hTDµ%ÎY/sâ{•–`²Îb"æ"]H)”Ñh¹zˆßõZ†Á–$.Îs±# ŠÕ^±QXf4F8ßEMÆ4‚÷Zÿ€¸¶÷.>‚o.¸í³dn‘B­Î3) –ƒÎ,ïÏ¥/„—ÂÂë-†WÏÈ&…[³=hƒä9•Z¨Q¡A”#kß§Q+ŠæHg ôLJ#ÀÑðú¹ŠÖ-Eà5ßa‰ÕNa…Q…Xmф Œå” ‰ù~€ž)ÈþVo0Uà#٘ì¯z:yГßÿÿPK ++!Õ:ǵúîúîword/media/image1.png‰PNG ++ ++ IHDR— 7|X€IDATxÚì½WTÙºý}¿Äû¿÷žc·ž|L˜³mëi»Û6Ŝ%T"g$A%* "* FPT‘œ$ç\9û®½7ÒEQ "eÏßXÁÅ+í¢æ¬µžç¿Þèÿ….€^ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èp1 Àŀ~ú\ èúäb¨U*µ¸OÕÛ l¯P6½œ¡¥­\%êÅĦ=q1d"EùmÉu;ÑÙ .p?Ÿ¡Åùo¢ˆ%©oÔjL/ðqéíå¿*©Í|RTYÝ(•Ê&tîðϳjºh»Q?¯°âQΫ–Ö.Ýo!K_•RÕ«kh•J寍J¥ÊÊ)fÅۇ\ ++O¼#Š?J5:ºúÜ"®ðB.9„^.­l µšòf^M{ìr™4óìÅ4©ì}§Ÿ/ªªiº÷¨àeqM{{ÏTÕS$’Ü~˜{ån}cÛL˜UÕMOsK_V6·téö¡nkïy’Wú8çy„ ++Åû×A­.†²±@µCè=WàðG>û·”Y`ÿ»Zx³œÏ„Þó•M…ã½ÕðAˆ$÷Ÿ¼<î±å˜÷Æn뭜ÿsÈ݂x&.µ¾¹ƒh¼q?§’:»ûl¼ÎÚl;yÊ=òªfB"½Ÿ]Èò‰ÞrÌgÃ>—õ{œ8â¹ß1,õAnÿ€`̏÷JՋWU®Á—È•7î'Õsúþ‡9'0æêݎÎ^5mqy¥}ˆ¡­¿‘]ÀðB^Ùçꖐžõ¢¹­kʲ¾»ç“îþëçãs mvrN÷ôòßçjeÕ »BwŒ¦v9ë~…ÌC2‘ÔïjÓ×õ­+,íçî°Y`b÷ û¥ò¸~æÙÎ3´±°ö#³tr×!²¼¦¾58榩ÿ¦ƒî묜Ètýá¨çnû‹)™-íÝïSI…By.ñÎ ++ Þ2sî.î醿ö2%ÈÐ4¶tÆ^½GEÞ%¾ÝëòÝ~×ÍG<ºG¤ÞÏDO!3çzÆÓýŽ¡?õ"§|»×™<ÂûÂ’Rwvî!$™1+Pãl!ï3d"…ŧêRg½p1_Uo½ìi¤(ÆTàþoÚȘ3s½ îgÒk'T>Þ(À4kP|êܵ†6 MX‹LÙ&t1e“Ÿ¿Ùa³ý¸Ï˒šq¯£T*¢®ch³ØŒ3ßÈî¨×ùÑ7’Ëç2Ù-0þåF䇅&ìù†¶qšô Q˜ž¼\ké0ÏÈö—êÑ5$²Ö’XQÓ¨Öá Ù¼—•k­ÉH Ñ­*äR¤Î_l=±í¨WfÎ+ROLŒ¡Î¹zŸLÒKV¡½}‚÷¹™Hë÷8h‚E¦2ýÈ(ÌÝn³—\TZ3ü‹úº†¶µäDöR næÓ¢±Ã%ðâºV»Y“s1È$ÌÊyõý~·¹†6ï4͌CjþÍvkSk¿ÅÕJå$+/–È̬ýÈÃBzŒHÊýç#ú¡¦¾åanqiUäo¡K+jš~>äñõƒa#i#©ØBc;¿Èk#I‘SZÛ»m¼ÎÏgN!Ï»1‹ÖBú>àV[ߢé-EUX\C…š-£ =a<Ã?ãݎVH”Í/%I‡ù3ÙÅàÍ8üQÙVŒ7J0Éaïa±%#‰ùá€Ë;Š{*Æä¤ïbZ/Õ±ÃúT]}«öë¤g¾XmiÏx] …R}}…—ñG~<àÎòŽæžŠÝ~̋ˆêDS¶µÏùÑ5#ëÅ:Ú} ²g­¥Ã ·Hžß"t Lì˜ ­OõôÿuàpcãçG½võ&eã§ù†6DV1Òz•¥ý­ûÏad|hã»aC`xÔû;+§y†6d4©)gÂZméP\^§G.†J¥.¯iܰÏՀ~jH[6ïwcš¶ˆÈuº]óŒl9q“Þ %•ÊN¸E‡‹\j± ëQ^Éóî°óÙ¯¶[³}cd²µÙª¦®ÅØÚqgّ¾"£wôj .ã.5ã„ÄÞŠ~ÙyÔÝÓOžPÚ¯ä,0f™Ùø‡\¸u&>͒¸˜ž LXG\ÏöŽZæ#W(bï0Oå\C›¹;Þ)_o·Ùhåü4¯ôÓt1 ©Pþòªà”µƒƒ7í >û·²¬ ¼Q€é$¯¨r%í>,7çúE^kiïæ D¤ôõ b®ÞcŒ "ü#®ŒµŒŸH©†æöí'|Û ³Ìœ«ÑÅÈ)(_C›äj<ÿ õMíX wtõK¸½˜þ:—œ—|ø7Éõí;Nø2ßÜZqƒK*ëøBrVo¿ íA.‹èÕΧ/ŽË`ÈÅ %8:¥«g »—*=ý%• Ñ)«v90k=ÖY:Ô͌¸Ÿª‹A®æq­³»ŸRÈ4È}YyÒ;Šùî}¾‘-×ÿ[1ó] ‘XzØù,Ó´ïö¹$ßyÚÞÕË4­ª®%ôÂÍ{|À-+çÕûT2¯¨j÷ô¶¾añ©‚wc”ˆÅÒÍG<çÚrOÅ~ C¡Tºœ¾´ÀˆrIsng½èíã3oÕõ-ûÎ,¢WH­²àÖ5 îv‘Ë®? ×k°ÖíqJ¸‘ÕÓǗHdR©¬Ÿ/L¸õh•%õÐ-3çD&dŒVòDs½£ÉdX¾“WTöº¥³gDéèî“ëYC¿3­*ò„ËÜÙ3s9†ðìfµ1ŠÀ4¡+bR ¨•ü¶ÇÜ#‡Ûj5¡ð˜k8‘% ŒíLYc­wèéç[qƒš°–[p]‚âW[ÚÏ7´ább‡ xr#r)+^ðÀ»›ç…"‰C@üBú·†'}[ßFCT*•q×ÌÝnMêðÃ!Ϫڦ·Ž¹z—œBtÎ+§—Å5:º‹M9áñé ++…ò®P(ÏÄÝZjÎ%Wûz‡uRê#Lêb„ÆÞ½àE"’îæ3ëe~<êÕöv&Ì|ãUùkR7rúsε´Ç#~«V«_•Ö>|òòý«*IZÛ4Ä×(*­]·Çi¾‘Ýr1ÈB¿`•ÕF3Nʝg#(+¯ÛtÀz|¶['ß~¼X[ßbt—Ú5fdŸ6â‚dÄ%?\Bïç²ä÷ßñe$Ò¸‘A߸ßí=+ÿ_zþªU- ÕԊŒ™gdÝ¿P”eà½L Míævþ&,#»ëwŸŽX ®R©óKj èõð[y·jLµ ~—Jû v\ÿ ¯›Û™è#\ŒÒÊúõ»¨­&ìԇ¹#nDþ[VݰÖ·ól³òJ5­LnfãǬT÷=—.Սíÿ9äA-q7¶ó ++KÂô˜~ƒ~1Ù±a¯K΋2}q1å¼ZfÁ£XÚ E’é©Çϋ×ìvü.†:éF³¿ã»½Îý|á¨>T³˜31Wî2/漬XLïÛ|À½°¤ZÃ[PsÇ÷ÝÉó»iŸkSkçð_5·v’YhÌÚÍyß- ÿõ <ŠŠ×ž_Ï@#ƒÏ™%¹vR­Ä<0¹EÔW¬+iûú5ԗuŒ‹ñó.Æ­ûÏ×îv$ºt';°¡¹ýus‡F#évö×Û¬‰ÂÙzÌ»ª¦iôuúû…D$ÅB$oÄU©LFTSQÙk¢ ‰ZiiÿqÁè³$™OÄÕùF¶äĽ¡]}ïãbvØú/¦â²œƒ.ŽP¹um)÷rœO_r¢ŠGXâÇÍïê.¢!_–Ö„_Nw ++¼èrú’SÐÅиT"çDb â¶¥½›\Ç=4‘ºìiª$ßyZ;v’šú–ô¬|r÷#.g-Yñod’ºöõMg.¦G$Ü.,¥–¨}{íþ1÷ˆý¡ ·²†ݐAZX~:ö&i¯ Ý´àØ›TUßêð.†@ ®oj¿|+ëm?\ŒI¾ÿº¡u ++]Œ˜+÷cÓ·W¥µc¹åU‘‰‘‰wÓ“Zi ïZ^Óx.ñ9ìփÜ!gA¡P”TÔÅÜ´ó‰"Èö9—˜QÛÐê÷~.Fñ ‹±“×ÐÔ1îñJ•*¿¨ŠT/"!ãҍ¬ÑµÏEý62!ãZz¶L>èJ¼nj'ÃJZt5ý ³rô!™-/Š«=ï,¡ 8Ö>‡°‡¹Å™y%¤<-¬x'iˆúMu]˕ÛO\‚/ÓÃ}Ñë알Çmí=ãÆÇ%¿oiëN½ÿ<âRú£g¯J ãb|½Ý:åÎS敬¥_n=IzuljS5£Oimï&ï äùݰǹ±¥sD,6çÌ5´q¾üiºdK¥²–Ö®òª†Úú–îž2œcŽ„J){|F`ÿûgdp?ž^­âwà*øèH¤²ó‰w˜%&¶þ]ÝýïªuSKǏG¼æÛ-·àeåRêÇr1<ÃçÚ.2ãõ8'Ë4ÞÎûÜ5Æ1ÙÃ=-Qº4ñFæsj•þú}.í½Ϻÿ¬ÈÀ؎Tòû}®UµÍïãb…⟎{1A‚cnüâJÈå·3óŒ­ý¾ÙnME4´%å›ÖówXïµÉ{Y1|'¹‘ýkwÙS2ېŸÛ¹‡%64ÿòQŸœõ4¿lŸcØÜᗥ¢Z î_[Ž“cü¢®«édÁdt®¤go>àN^œgdK:„4ô¿™Ÿ¹ßâ÷p1ò +++(Ǎ¾Â§3Í­]d|µô‰R©ŒˆO#ûÕ¶“dz>òl|Ú¼¶_n³ÞqÔkÈÿºÿüÕÿm>òÕvëmǼ›èjkï%“p9•µ„r ™Øº_l9NÊ¿·_méPQÝ8d®]¿ótÛ1Ÿo¶SóghÙt>SRQÿ>+\Hý_×·þpؓŠ,cdWU×<ÂÅØvÔ»¬²~ô‰¤?öÐèb¤?Ì%*™QWï¿nh{Uö:· ¼¾±}@ "¯.ómF»¤ r^íæ­Ûã´z·ã+§ û\ì¼£ë›:Æj›JØ%\1ãdðæ*)ëžáO&øètõôó^Dï×p ¾¨R½óÑZ$’XqOÓù&ØáñiŒ)0–‹qÄå,y‘h<^ð¥±n}åîR:'åö¹¡B>ÉûG^c¾ßxÈ}¬ö/^U­¥£r.1a–Õ¾‹‘x#sÅN{¢¿Úfý,¿|èu¢{—[ðèôìMû\öqOïåýpÀÎËúþ€Ûp÷$>%“ HE@<äA6>é³|'µ¢„(ç»Ù/‡Õ¼z•£½_f½•ãvÐ^î魇=—ÐI:VZ:ÜR8tp_Ÿå}~1}YòïRs.F߀póA·Eo3Èžð¿zÜQãšÆÅˆJÈ`ÆzóÏÑÓû|ÂmRCÒ@3?Ñ[7'3¯ä+Ú0±õon£†»£³wë ßÅæœ¡&PÓÂKs=‡–>…^¸¹ŒöÉ1?tßÏ &Óø?û\™Ürس¼ºqrþ…J­îìéÛcBæ!©°ÏÙ+·‘GóKªWÑáB–š²ï>µœJýæq^)©çBöhãBò}2ëȹ«­×îqZµÛa•¥ùá?݂£S:ºúu72fœ‹ÑÑÑÃö‹Yf1:á0{õ.Ǩ¤»#b·u¶¼0Iàð‡ƒû¹$ñ€Z*Ä_Mðq!ê}ž¡-•—t¿KIùëFX|*µ‚ÝÈî¸{äÐ>ü±\ +Îiò:9>übÚX·»žñtåN{æ“|_H$Š[ð%fÇQç³c –²Ê†ï)CóÅÏ'rŠ*ut1Î]º=\÷I|?»`óaFôw göא:¼(ªümX¬Øi~ùv?_(•ÉIéé埊¸º‚–ÄÃzú˜K™Øø“ ++/4fE¾=X&—×Ö·†_Jºz¯÷m„TòÊöc>‹h?Â=4¡³«Ÿ¹¬D*‹J¼³ÞʉèaS›Sõos=Èd²K7.0²Ý~Â'2ñNmC+QÚ|¡øáӗ?ñdÒÄ^¼‘9ÂÅ ´ºw¹9÷ûCî¾QÉ1Wî†Äݺ—](•Êl}¢‰¸i¿ëŔÌÞ~>uw™¼ ¸Ú%äò!—3õô²‘!ƒI€ºùûŔ‡M­]äȾÁ™ø[+ÞYê½çÊñ¾ÌîbØû]ÈÎ-yò¢””ϊRäÚÄ­³rZ`l·a¯KÂÍÌáVÂhƒ Mai-‘å¤V÷»V¿~gùuHÔ Æ‘1=yйTGW߇ܩí*dˆÝ# Kj˜>ïí\¸voÝÛü5“s1JåÝGùkw;2Ûa³`ÍnÇî‘)÷sjêZF¬t˜*ƒÊÔÔQYÝÄ ++¸@=zÆvG\ÂIÓJ*êI©¨n‰%äÖÙ/J×R–{ín'Ò·}ƒÓ¸­³Ç#,YÁÁòžÐòe=/ŽI~àÿó/rw2pîg¯ô ÒÙÖѳÏ1l½ÕŒsºæuóP?¨Uêgùe?òXDG÷4gõ‹,—+\Î$Qɉèµ`ólçÚÐ{Çì˜ð¢ÛŽyçä—)Ç0‰f´‹10 äøÅ.`RæŽ*ÔãaÂ&ϤfÓH)ú/›‰Ë1þ¨ê©Ã_Mð± b¦´²aÇI*¹ù°íu}ÄYùËé¯X·ó!bièõ1] q)}¬›f>)"ª‘#\Œc®ácÉ+¢o·ÓI(ãU•n.û°ó™Ø+wc®R%2á6ÛÿÂzZ?ÕôÓQ¯ÊšÁýD#9]œKï§p ++¾$½#n%bé×p¢Bçn·Î+ª"§ÔÕ·¯¦›0Ïж«w@KeÂ/¦3»!ö9†¼›šA.“{žM2 .bsãÞó¡×…béíÇ£¢Š„ÒÈ¥8ñC[†\ Fä?Ê)~Êó‚òedø(±Í¹dÕ.‡•;í—Ð \bÎÙÅ lj}'Σ\®8uö ++Ó̳fFú ‘í›ö»å¿¬a¤’…"‰@(–Hd¤Edv‘EbÉåëè7 ++: ++ïé‹zæb¸…&0 ¢µÒ§©÷žk|QTg ++œÿ2óR®ÎE½‘‹ñçL?õmûÂèp,£“§Fˆ¥îށƒÎgÓñ|#®ŒÐ{ãî(9Ÿ:Ö}SFí(q==èbv ++sGIUÃ÷ÜÉ)ÿÖ}G ½~þëmÖTˆGC[F\}·Ï5õÞ3‰T6\8d=}É$ž Å?:%öfæˆâry2“åòVMYÙ3_/¯ÝíÈñ¿|÷YeM£ò]yÿª¤vÓ7&¥«KXÂèËúžO&O®sÐ1lÜñJ}”ÿå֓‹)#°±¹s„‹ñÝ~×âÒwv OzGÑÉqm=’¤R¹®.†¦L«±I÷ˆ<&7úñ˜·Æœ#] r5žo̳eLÉÉ/»ùâÒÍ̓^çWîä2[ œ‚/ñ߯Ú+ÓjEUÃæÃž‹Ì8óvX?Î-R×A±7ÈÁóvØøG_g´êu “²w¥¥CúýÜQI…§ÆÅ`ä7¹ZGgïýìK©»¹ÁÆä¾ƒA7ÜB/3!K§ÓŸy7‡éó5VNAq·Fη™ÜÀxúñô MPêìb#›;*ªÉðŧ<4¶ñc%3ۀYZH‡¤=̳ö‰ú~¿ë+§ï¬œ·÷ᝊýé t®¡í©óÉê䐨›Ìf·ŸŽzU×6铋ÑÜÖým§iw1ȬSÑr¹_P5Ð&>¿]`?gÆ^ß(«³ðL»…Ñ~Ô=b»”È’òêÆáòJ*•E$d0ñ>müÛººûøÃKymãbòˆ ÿíêÐ.9†- 6´s\Î$Žuëè¤;Œl[oåÄD÷ô=›D/¯fo9æ5Öº†¼¢Ê5–Ttυ†¶ù%5:¹¦lÇÀøì‚òÇeaÓ£iî´?Ÿtg()CBJ&©£)æî°¦ÒŽ +ä¿ó mhC‡åǜ’W²èmc;ÒÔ~:I-'àBÖ³"é[mù0û%óÔe‰ü¦3˜Œ¸,‘ßÌe÷ñBF(dHÜÐÜqënŽÿ¹d–ÿ…cÞQ¦œÀ…ô7»]ŒMÝ_×½“ µ©¥Ó‚Dzu¾‘mØÛ°¬“w1®LÒÅ8{kôñ|¾(üRúB*ì2snòí'Ú] ×ïÝ;÷ÐÁ|œ5õ-L´…o÷8?Í/e^|üü“údµ•cÁ˪çbŒ µ½ûÊíìïö:3A4¿µrbòÔL§‹x‡2éG‰IC3z/2¡ž ·àKÊIe*¡"5´ZS«¢¾Þf}.!cô1ý²ê†âŠºâʺʚ&ÿÈk+vòHNøÏÝ£ e5M_n=AZD¦Sæ° ¸3ÝÅ Ýt5-{¾1K»…Ál*ùá°g}c›Æ«H©”«s¨}3ÉÅàs~+½ç‹?¢`:éç 8a–'ü|ijlT΂úÆv÷‰-šPæÑé6Ë›y;l™ÅDP‘ÿ~ñó —³”máu&‰Ê´jÊ9é­Vjö#¢Sh¥Ç6·ñéL«71ÂrÃ×þÍå•,¥¿Þ`åT^Ý ›‹ñK޾@xÒë<ó-ý¼m'Ÿ¾³šãRòÃe´‹AŠƒßGÿ8…ãóèÙ`à %tï4-’ÊÚÀcÖ¹Ä fáÃýÇëhc1‹í=Öe¹¾172žWÚDw _hl;߈*mêR3Îв|]Œ× mÆÖ~ô~–ébhÌ´ú†ZìÓ¿•þŠžŠ7適žãöç0­“Jû²b¥ýBÚ=Ì)(ŸPTÑ7Ԃ†®éXË͹ 73õÆÅP*UûB Æ[ˆÁø¦K͸éYù#¢˜ ^§1_àôoö s1>…mR‹ð§LÍ{Bá§CešÒ.Rù>}cæÚ. ÷;h,C_%’Ÿ‰˜t O"'^¼™õÕ6j׃‘ŸÆï…Bñ!7¢Ïíˆ s ºÈììÈUµ„Î ¹j—ÃÓ¼ÒÑgµ—JP4aíd¶¶wOÔÅ rcÛÖãÞLr#ëSµõ­CšêNæ j?¿g™·EëÅG —+_•½NLÏ>æ±z§ý&¡© +1õùmþËÊ û]É¿Ùn]\Q§Ë…"ɹ„Û+iáG¤ï.vP@̍ä»Ïî>.ð‹Jùf‡µî.F[{7’“ô[`ô íÖÃô»|ˆå½ž?G=ÎIèèZ\Œž>>嬙²WXðâ¯?xC§õ]@n7\ß¾(ªbÜ·Õ»ró˧ÓŠ쿦ö{R[wØN¿‹q--›éóŸOøŒŽl¢;¤— ++‹«ŠJkÇ2Î%d0™7,B´\‡\a3÷g©9Ç;âŠdX©kn_J¯¬Yoå|?»@o\ŒW%5묜ǵ0˜Bž[ïh¦–K…a›fÚZ R¾ã•¹o&hJ0•ÕØvÌ#’(j#‰õ©ê×Í… ++•»¤ª!0*åTä5¿sÉ#Šÿ¹dÇ ‹Dj’ß?ñ$ÿõ‹L~ú¢ŒœXXV»‚–dË͹÷žŒ¾xM}Ëö»ÐC2Ÿ3„â­tG"öBãnþ–˜(X3:³©±Ýir€R5 ƒœñ„*:Å!‹pA*”UD´S9GhMñðÉˉö*iEOïÀ‹WÕV¼`fq .D,–n³ö¥¶“Ù^Jy¨Ë¥>{e@¯8ظß-åγöÎ^™|P©ÞÊz¡%.ÆhC*‘qƒâi›ÀÎÚã¼vÅ>ý.Fo¿`/7˜Ù$rÌóœD:Ž‹A.’˜žm`B­L9îu.¯°âÛ½.‹L9+Í9ío3­*k›é=MTÆÜk©ÙSîb òâ±z ®±íûƒîô³«ªt1Î_¾=èbö½c ++] ÒvÆÁ!ÿìEéäÞ"zzù6¾Q›ö»~Ƚ¤¢^ãûÃ阪“Í8»Æv1ZÛ{ö9a¾ìuŠ&òqnÉ[OYG*S\V«7.Æé˜‹M9:ºԒ cVU]‹¦é¦R]åóf\h >÷sYòIµB†¿©àƒÒ? 8ìr–QªÛŽy•V6hWkZhêèfâbó>?ô"9«³»ïˆ[³@cŸc¨@øŽt!¢ëì¥tÊC1am=æÝÔ2(Åå ++EÀùëÌ—-G½jë[GTænvá\CªÚ«w9æ”ko¦ƒTÀ7ü ++QVŒÏ’WÂT[®P2ÙUÈYîg’4.¶WH,©é€wžR® ë°óYæEžìúvÏ(ÇØÓ1|-¹S`<Fôl\êˆÛMÔÅx£~Ÿòp1½èfýnÇöÎÞåb俪^Lw8™>瓙Æjq1¨ß6µ·×…ô϶ã>ž§/­´´'·8u]2̌à Å+w& 9î¡x7‹%™ vÞQLš.†D"ëèì‰Çñ5š[»ânÜÍÑØ¨ÛYùŒañÓafQR©Œ¿vŸ~‘½a¿kkÛȕ>¡nM•‹Ñ7 0:áØt~£²&Í ++{†?#†Ö§˜gÄø¸ÏˆG˜9€É¼Cæ¹kðű¦“çÙ¤yôt"3ðÉØ–ŠR©êíã÷÷ 4քåÍÄÙ9@e/ÿ‹ÿábtu÷™±ÇÍN2¼|³ÃúüåۚÇKÂø¸³g\ÊU·¨úšñg|8ZÚºö:†2êîçÃùEU5¯›«jš†—Šê¦¦æ]6̏•£„‘5¸;c±)+øÂM‘X2$l®¥?YNï!¢.üRÚð•U5lÚçJ”Q€v>ÑÃs¾æ¾¬Ø|ȝ‘Ķ^Q#”Û„\ BcK×ÖãÞ é¯ë×ív*)¯c ˆ›÷s Þ~‡{íþHÉ]Té~&1)íñЪø§y¥÷\lB¤>ìÓ9hPÝ=Y¹˜^{Od¹OäU¹âé[UÛäw>9É-Q(TCºy7gÃ>j¯–1ËãL³ˆ†Ô­²¶‰Ù0µÌ‚{-={èj¤[ÒäþpØ}0å‡n.†ì f/˜yÈud2¹‘B©¼’žMæ6Õv+§wž)ޝoÙ¹ÅöAñ×ï>ÓnÆ=É-]jN…¤%p ¹<üaìèìµñ:¿„"3‡õóŠÑWP(U×ɓnAEô$O:yš†=¦¨üõ^ð.^pêý\‘H2ô:§Kƒ1wMX÷³ uy—›.†Zý¬ | ½ Fwc¡ ۊ{Zúnæ¤Áë)e’k'øìßÎ8ƒ÷;Ynþ²€Dk{ÏqÈELÔ3Žá1¯zo<à6¢|·ßmó!Ê×ãÁ¦ÅÅ âÍ%ø³hŸˆKnPPTJhìÍ£ná«w90A:á¿ÕíCGÂͬ´ò¡|–ã>>áWÎÆ¥ÚûÅ~G+C¢]·ö$’{ܯdµ»*•šÈão÷8‘F-šp àSñD{ûøv§bIÝH»ˆÐeyG]JÉL{˜wíöS;ï(R"W™s Jj˜4 [Žy­Ùíèÿ¼¨¢®©½º¡5)õñ惔"]:ìËg"Ì£oÐá?‰ˆ=êt'ýaÑlNAñ›¹/2¡Ž¿ù‚9>âbÚ×;¨ˆ•?òÈx”ߨÚI.~÷q+`¹w)-†uw1HÛÏ'ÞYfÎ¥6㘲÷ØÇ_{ú ÷zÆS¶O42‚Ž> ‹aÆÙ~ÌÛÆó¼­Uì¼Î›°~:浒NZÁlˆ¸œ>äVhw1iž3³ˆÙ¹sÐå¬d„BVS–Ä÷ܘ}%ÿ9àæq%=3/æÊ=2IW¬Ûã´xԎ’ÄôÇ Œl©"†¶§cnŒÕ®ú¦¶ÝÜÓäÊÌ,%—²`Z{ž# 4¶ñ#ӆô3•·ØÆ¯®¡m˜±"=à6ßȖôƦý®§"¯¥=ÈKºõˆí½ÒÒ~©9‡LTÝ]ŒË72)‹òDx®!—¯¦eïs ŠN‘ËåäÉèìîßãºÀˆª!©÷TÌåYiro>:éyŽTx¡íæƒnÃÂh²!”N§/-4fÞ.Ø&6~^a‰áӝƒâ·ó¦jK²õKñ¿¡=Ê)ZeiÏäj±â“'KËTaùDÏÛa³6†ÌY§£RÎÆ§y†%n?áČ ×ñOâ EúábH¥2j]™‘­îSVïvÌzV¤qT%·¿ŸqùVí'¾h¥FhŒ·î•Nåú£×s\¦2܉®Ý23®ü!ê9…ø@]ôþåý«1Óf€CjfÞ×´(b¾õe´Æ2Ïж üµ..Ñräà#žçFÿV&Spýb™,¤DÉÌ5´%…ùy1kçtM¥TªÎÄ¥®´àQ*фM„9k>­É+?öxœ[¬Kcs +Öìv ² ûlœæÜ~ç’çQwYlÊ‹»ÅT¦©¥s7/˜ÙðBîøõv믶üj»õ|:)9’(®Îž~jçHÆÓõVN¤†äWÿÚr|1éŠí'¿ÞfMÎZhlwkx¾@Èò%õYH—ovЗÝf=J>B…Þ<ät¦¶aЀxÝØN®Ì˜äÈïv;.7eÿó§cËÍØîÁ—v²ƒÈuÌíßf¬Ì|öŠ\Š£qÀm´‹Á@dó:ˆ )t£¬©Fّú·ÏõÖ½çjÚň¾rü–\·}HoïHå“t—‘µ?õ×Å(,©^G·‚ÉÞ2T0i\èÉäùvÓùË·‡oë â 9јµÄœ ++P2ÚÅP)U¦¬@ÆDXfÎI £¨Ž5´péa^ɗ?Ÿ ×4¶ñkn}ÇŨkhýþ€Û`¶#[Ò.2‹Öìv,*­:À’´ÀЖªÏÛiÌFî²ÌŒë|I{@5µ+¢ßÖëü|êa½ó0Òí]d²ñ:Ïç‹F+¤ÚúV3»fÜ7íw­ÕíaÉO·÷™G[HT‚Xú½‚þ/ ÖrsŽÛ™Dá°53ÝÅèl¤m׉ºä¯ˆ«šÃ ´ŠÂœ.†ðÔ"ec>þ¾RoK-ª¦Us¡¶ÒR¤–‹>®‘¡êy­(¹!»ç+I¶E‹¢Œ¨©b$¾¼Ošê(­¬}üF)ŸšÛµ—jí“UK¡ZÜ?«ÓáMùªÞú õ¶ŠßN¥yì ++ÕÂÎ;6¤E­ÅTŸhhKª£â̈́ÒJ‘?žåãÏÉ÷,LÅ´; 2á8W誝äìj+çÊ-¯»ô˜«÷¾ÚzÒ`(èØe®Î.‘ßD†ÖäbÐÒ]tó^Ž¥] £îáºíˆWdÂíîÞ13ôyœWzÂ-b‘ {è¬M{]<Ãë›:tÌ ™[XA}+N¯«ËÅhíè>ät†Ñ¢ë¬_•×1ïî ÂØÜÆÏàmÈ+-x'ÝÏe>/ZñN´Æ‹âj砋Dš2k ++CĒ˜ñ¸`H‹þÒ|Qz֋½¼`&–S–šq÷ه¤fæ÷ ++ T*_–Ö˜Ùø3 ++™\y±)g¿ChAI5Ñr¡q·ˆÂ7{×Åøb+%wµ¸"±ôaΫ}¼`&²)Ó(ªWÏ$–V50Ñ>ƒÈ]ÊfÃÅX@kÚͺ¹k­i7JÃ[¿Ûa78öÚýšú֑‘/ÚVív¤#G²5ºoè@d–’ëì°ñë S®Ž†ôÕ½ìB &",ÝÞ {œ£®·wöR!Wy“[ì²ýÅÅ íu Œ_@//:~Uëg2uÿ€ðv拓î‘+-¸Ã&ê–ÃÁ±7ë›ÚU£OÈdò‡ÏŠvÚ ¿j'çK4WWÿv"ã mMO¾ãbü{Ëq2FÖ#] ³ü²mG½˜%! ©¦9ŸOšE¤†¤™®?4¢’%ÿ2×Ò+Œžæ—‹Ç‹ýÁ|cÜÛÇ¿x#Óø„3ր^]uÀ!ônv!³‚itçX{_@ç"Z>íAÞøbD­nl팸œAÝhXm—šqŽ»…?/¬˜Pf“ïbܼ›3 ƒÉödjëß3ƛ£4ÃmpnjŠñÉþìQ¨Z­ú•ÿ}Uñ;ÄÑF—¿ \ÿ®­¸ÿKvß÷ÍôvyÀT}͊šÇÒTaÈzÛ?Î8üAîçt™M—ÏéÙõ{ãŸ©†x}#ŽÛ)ωR6MÎeüÛºqœ>ñüZYýp2}Þß"pþ?mWvü“ôºíµRw+J–<ö þCrå˜zŠÌ äR(ŠSƼ;yÝgZ.™ÀåQäÏ—¿ŽÓÿïYœÿ*:·MûüS5<¸¡å"oUí“è4áéµÚêæòÂS‹Ô} ×µÖÝÇïé¿t÷óGÄnЈB©¢îãÆ\éM9_(ªªkÉzöêQNqåëf¾@$—ó1€W‘HÒÖÕKNy”ó*¿¸ªŸ/”H¥º¯\&›iù—x')‰‡Z-“+†S(”¤QuMtŠsòË;{úI•FHSµJMÔ`_H„uäóâšúrͱ¬¥R%Š[Ú{²sKÈÁ¤´uöÐÇ«Gì$ýVVՐõŒ¹l«¾»šGR[Ò!Š·YZ¤293²½…B©¥Wɽ*j›¨kæ—V5ôö $ïnÚ$ýÑèV ý¶—/w,è!t1ÇÈ­±ÆiFš04vDùk¼Ñ/õä Ucª$M א&“Aìè¨B^ïuú_ä}&i¾¡íÜñ—üÑ$S¢«wàEQ3OÊ«ûZ‚¶úù_RQÿèYñãçÅõMíbzrR¶È/õy;¬ryÝF2Á”£2òÃH2·Î}YIæ§lÔc¥P(È4®©k¥Ÿ¾Wù¯ªImÇzÆ|ÌJ2iIӘû$¯´µ³WË$'ïãSãNJ5!º?³|¡¸â퍞¾(# jz:f´‹AÚÄ5b½ƒ³Â‚ÇìXÓ0áêsèP3,Y ç3QÄOj™èWþ÷UQŸËç|6~wq?®RIÓ5Uª®jYN´èìf>ç·|ÖÿRV…îqO8³ø¶ÿÈTIÂ^EYºZ!žÄâ|aЪqnäüÊÊ{“q1úšùZ›ÃgÿVzõ¨îž‘ZÜ/M: r/÷/•íÊõ ü— ìçŒ9ŽV ´MÌÅÛ4Ÿ\á~&:óý8ï‰õÏøN֚ðhŽäº­Z©œðìò[¬µn³ž_©©õ8Ÿ,µõ-«wòö;„vvõ¢7ô”ìbÔ¾nÞtÐ}r.³Ã+4Q(Ԑ“V-ˆB¾™JøŽRuUýÊ7ŸKoÙó9³tè®Ù·*ò¦¡Jj)_zÏWèk pøpÅd‡˜ó™Àåo¢ÈŸ”•÷Õʉ­Ï×#CÕ×(>÷ó˜E{ˆ²§oTÊ1^ŠâDÌk3•ȃÖðüu1>§–ätTÁŘ}ý;Ÿ¨•–ö9ùå*• ++câºQ­>—˜±Ø„5ic‘){•9·¡¥CÃՕ ++ù‹8>÷³ƒ7G’b÷¤^ ÷ ++Wêjpg˲N¿ù`¨úȄD‹‚ÖRÛCì§jñ³è÷⋻”Í…o7}R.†²³Z¼NÛ8òæý–*»j>„å$Ž6âófks1þ¨(¾þIºLÅ {Õ¢‰}¿rr +,Y×ӟ(aaÀŘ=½{BOÞÅ å«íÖWSk–m‚Nç7\–L"åªû¿U‚Ž_­‰!ÏO ÂèÜ]¢Ók”ÝDYQ»Ó¤|iš£ÀéO:íp™ÄX³+ð™'ϋWÉtŠÑ O.Fc¡ÀãKínßî¿eÙg¦~ ++•e\ÿ¦Ý@8ü^ö8ì“u1HßrfÉKSábèŽD"žÀŘ0eՍKéð¤ïãb˜°NxœC–ˆÅ—öm6ã\ ‡?È_&ÿJ= …D|ÁœÏž¥û¢¢„w?De”]5DRRK0xp矈CÇ?I’mTRþ§äb(ª‘¡gM oŽ0dý…t*‡M!•$¢ì!{­.†ý數Ÿ°‹A&­(ò§7’>¸.Æt P*ƒcnÌ3´y ƒ)›¸¼ªÖxYÞE.z`ºËÉu»7rñ¯p©º_ }LH+òÙ³$WŽL½…ÑøBxz-Ÿ7{š¬+û߉“ލú[µ‡DÑ'ãÕM¾Íÿ7~۝þ,{2eérÕjU{•leœIä·sÄ­>eƒÇ?Ë2ƒÔ:oOƒ‹àbLžþᏇ<Þs!Æ`1aG&fhN­ÔS/:½z湿£â´–þÚfQ ò—É“о Õüö©«†ZÕQ)$ƒ3½ët¸Ÿ‹ÎmSie G.†üÉY¾Ýÿè°ñá3*3¨°kjÆN!•\=¦[P•9ÔRÝS®è£‹AúÖ™JØ£c´`¸.ÆäÉx˜·„Ïùþ.Æ#»ýNaB‘D£$¤°f\¾U"íX¿‘?ùµe*!rKþãd„¢ãŸeYSö}¾ª§NpÊ@À›=ý›‰ø¼Ù’¤Ãê±sÇꑋ!Kw ’Ñê’hÆñŠšGS3v­%×è¸âI¼î _×4zéb0‰`2ÜtLÞ ÀŘ$j•š·xŠ\ SöÚݎ5õ­—c(j 8³fš‘Áç|&Ž5U+¿ªÙ¦lÈãO*÷-‘ߢÈ-ZÄÿdð@›8n×Äç‘‹³ùÜÏøœYdìèg &‘•>^vßo¬¬+zäbHèt†û¹øÜV•¨çýß8¤ü ¬xZ¬j/›JƒÌöoß«Øý·0dÔ¹ÌÌtú³²^§œ²p1\ŒIRWß¶å¨×ì%fdœ¹pS"•i¢>aÐwöL !pý‡ªç×%¤·]“Ë}Ë#úêe{ÅûË`©ç3·$P«'¸³ù¼ß Üþ):·UvË^–&r†Yfäâná)*8(uÌlsÇ~N9ՙý=r1D‘['0šœYŠIUû¬­Lè¿\÷P&B¯¯•õ¹SåbûŠW*Ên+êr&_^?S¶O©‹A=’Äj… .€‹ñ¡HJÏ^dšBc‘)ç;+ǞÞ ¢@!“?qùVé"IåýŠæšL( ùv’‹bÈYÜÏå9çÞ(e“¯€Z­¬Îwý‡®ç3ËßD‘?+^\T ´«¥µBJ-ŸQQE­”«e"µ¸_ÕøB’|R賀’ôã^™R࿝ߡìkÒ¸ŸH\ ¯ÁF“;[|iÏû9PjYVÐøYQÞ ,úWEiÚºâ3›¦0>˔¹¤¸þƒJ{4Þ5¸.Ædà Ozž[`l7….)ó mï=Ê׬ßzênÿšiË1¨½¾ U‚î_ÉT“^¸ü}¥ÕȯS L>½³z M¼^w ,>¿MQ–¦– ++uòZK¤w<_h»>i û¿eOΪÇÞ[¡.QÚ¢^û›ó_(Ê'Ÿ1W-P3¡ì6ÜÙò¼K¿ƒÏž%ŠØ2îr ¸.Æ$>¢¿©ªk^jʞ’ˆ#6•¸†\V*5$T˄âX3*ÁŒZ‹Á›M}W\ùà×0ϨÔ‰‡µ Ÿó™(l#­pöØ!QÿWQ“5Ù¨d/.ñíOçàgÕßá÷²;j‰`¢“[QûX¼V0zw ýŠ8|³²ù•Z«M /.†ª¹@àúÏ Æ‚™%IØOf¤FO){)pøÃµýodYÁ:®.ÑïµdV;üAž£#Þ ++\ còÑÏÆ§Í5´Z ƒ úóqïŠêFw•fó'(¦!c‘LÒÛ.o´ ++O¿C軐¯U"Êòâ^sµ¥á|&¹z|’詇}7~Hª†?9«¦¶®L<% ++QÛ= âX³áëJ(ïÆõÒ;^ja7}Mõ'àb( ++ΝhˆV¡çWªîÚɼoÈD€ݎÄgϒ^?©cþ=w1h§Ìퟪî×p1\Œ©D,‘m=æc`Âþ.ÆSÎÕÛٚ¥AoÐñŒ Á›# X¦ìªùÔ= ¥¢øŸ÷ù8é$óe÷}Ɖ׸R5Ð:‰*ȟÇÒiAǓÁΕ? ++&‚ö½š;Ð*¾`1”Stf£¢âžZ7¯J_\ é½SÇ?MxÂs>“¦:¨åâ _Ñu%ì'èb0™€D½¿ƒšlÒ 7*n \ cªÈÎ-^ii?åÛI˜2ßÈÎÖ'J&×,ÅWŽÎÀŸ|öoäÅ7>íI¦–Äá?i {A¤~´Ñ¹HÅoçm¬å+w§¿ÈŸŸx ”´@ýL=%BTÙ×(<½šïøiš“Šß¡û©zâb¨$WORÉY&¡´í¯êz=±šóÛ%ÑF“ÉÑËý\ºA¥›ï ÷.cóùÌWÔ>+Ì'\ c¸‡%.þA1Cc˜°¾ÛçÒÕݯñ֊Š *ÁÁLs18Ÿ‰ã-?íI¦j+pfi {ù¹ä¾/í6(„¡ßj±øì߈/˜«Y+¡V+r.ç›|¢xÏnV :'³‘DÓMMùò¢ë«ªþ¸¢ð'ã ÒqOdw<&TseÕê^¼I¸³…ÞóU½¿ƒÏú_*ø\ cJhlî0´ö5ø0ÆÐ¾’Ë×ÊåÖT«ú[…+'£…>l™#ðøRÝ×ø O2é]/f¹û˜Àú_eÝ3ZÁ©¤·xô֏±ãVx~£êkš€*T©ÄWéiõ/ŠÒôqsU~hôÂÅPKù¢ Õã¯m£ˆWª:*tWõâ¸]¤n“Û±ÅçÍVuTÿz\ ªÉ®WV?|£RÁÅ> ++ ++…R*—k,D¤ßª§ý§P(ˆ½qÜ7úˆ×¹ûO^*•ª©½>Ÿ/J¾ý$$öFiEÝЋåՍ'|£Iá_jlêjum]«]ÀæWumÓÓ¤ã™!Éå*ÕšO–½>2xÃÇN©R5ÐS5Üä\…RɈ ++‹kîdæçä—UÖ6‘kªTêqê,“74u<|òòöƒ¼Êš&‘Dª½ÕôT]}/ŠªRï=ÏÉ/¯khÓ^yr¼T¦­ùäô±ªG~×Õ3ð$·$í~nIyÝ_4Ñ99}.†Z¥¾ÿ´p釴0hƒ³ã˜7i*EöÀV)sf–‘áðÙ]¯Oõ}\-éEü4®îR 3ÎÊ ¯ ÿ ÝqPä^7©äpÉ- Y¯}áx2l£Z*øèÝ¥.Æ@«Ðoɤ÷gñysdBÕºÜH­V6æ \ÿ>ùµNvÿ­l~©ËúšOÄÅ`¼ø/U »àb…€¨ëK-¸«v;Ž(k¬œvÚx„&<+¬èNg•úû–¼à…&¬ovØ\¸ö`j] "WCãR LX‹MÙ?ñxU>hd<Ë/'/’²~¯KYeÐ~Y\ÃD00a¿*{===`·t'ŒÂ‡<ò_Ué~âó‚ò{¬Üå°l§½_dòP+È™/6ìs=ÐLùnŸË.V`àù”üâê¾~Á$¼ 2LåՍŽ6ìu^¾“·Ì‚Gþ]ié`iñ(Ÿ^rÉÑWŠ$)wžîf­°´'§0gm;æ•x+«·¯±d‹Ê^Ûxž_·ÛqèFkv;žtxþ²R,–jüÄzýA®){¬æ¯³rŠ»v_“…¡ÎÊ)>æ¾|ç`õVì´ß¸Ï%äÂ͖önÝ ¦és1B1ûTìc»êb0FFnA¹æ©ÐR,pýÇL[ŽA´¥(xZ2ðiºÑÅ7ÇIfÁ›#I¶VKßʕm%ã(yÞlQðjµ¨OWõÞV&ôž7Žävü“¼èêG_ˆ¡/.†ª½\èýͤŸ#>g–ðôjµ¸_ëS)¹¼ÿ½.Öo”EW~u.oŽ<7.ðQð ¿:×ІRé¦l¢}† ++ùïBÖÌÝ`åLªM®3ßȎü0ψúـªŒsð%¡PúkØ¥ôo¶[Õü¥fœø«#5‹B¡¼xýáʝ›Yk1ˆxpý»¢îù§÷&®VÊ%×mÆÙÀ›£hxþËx¥ŒŠÃª}ç3EÍ#]m”ê,ó_´Inò+¿Åj¹h&ô˜^¸d® ++\ÿùnà>ô¼¸7*å8ts‘Àã+wö{Dϝ%»ç£Ë»¡ž¹<ºhÙ¶¼^ÙQ˜~NE\#òŒ¨¢ÖÌlüöò‚IÙÃ=ýýA7"z™Dóï8éûº®uzªôA] ¢œy~ÐÊy‰)ûnvÁ t1ì|bˆZ¦nJ‹ý£gEªñ>ªTª'¹¥Ë-xŒ€% ô>{e¸‹‘v?—i)+,Öíu^ÖXQë;ƒœ»ÞÊ))íñh;`¬ÏƒyEUö:“.%þv¯3Ë;Úçì§ÀøŸ{0Ù"VX|ªX"%0l‰ÛÜÎß=ä²WX¢7h¹—¼¸ÀÈî˜G„X,~£Gy%kv90Û^HRú“Âò×éY/ؾ±KÌ8Ԙšs“3žŽˆØ@Näø]˜OCkv;Žhûj+§Í#S$ßyºÜœKŗ0eÿxÈÝ)ð¢÷Ù$[¯ó«w9É@ŠŸŽxÔ7é”aú\Œ˜¤; YÚÂ`\ svPC“f±!½ã)pøÃŒK¹ÊùLöÀW­”}boâjq¯È™¶ ++D4žZ¬z7¦<7ŽRÈZ£JӜtÓîJy^¼@k’W>g–$é°z,ÄУ*“6†Æ ÞçBï¹|-ãN9GK´ç@U+dÒ~ÚêL*à3ŸŠÛª¥&œYÒ[¶ÉèàbˆV(Šo*jO¦Ô>Ñ1‹íø.w¶Ðgv‡Ïú4Ý .ð]Œ-G¼ŠJ_«Þ¢P* Šªyœ#ú“ÉKÀö‹ÑQÖÎdƒ|x)¯n:æab}å®@(f^Ÿ™.Ýól3ÿ¾þqö’÷ôX²¿ý~±×™¤Ñ.ãq\KϦ¶B«TC¥§;ó…oôZ+GªÌ8+,¸IôaãÖV u , "lmʪÉUj5)흽V¼`¦c—›qê†iÞ{م+wRkC–™s½Î&õóEÌ) ++…òòÌ´C~~)}hUˆB¡8á9Ÿ^GcëÕÙÕ7T=‰Dêž´„¦VN-í=#}'ˆôɪݎ¤JªwÛN&˜rÔ“’Šº-G<) ƘEfKgw¿Šn”R©ÌzV´ù;³ÉÈ> N*_O“‹!•ÊÍYAÌ; .ÆR3΃gEš³îj¡Ç3ÎÅ éôՇþšwº= •²üö¸ÊY·S­xg·•ª«JÀþ6ijÿ;Qèú7Œ:™HvÛEûª¢÷ä/âßÀÅÐÝÅ(NÕ>:|»ÿ‘?¸ü]뮇ß) ++¯h«pG¥Àý_cވ¼îøgÙwQàJ¾VÃKe8%.s5¾Ýóíþýï {Ö¥\§ã¹|Î,ñ éÕãÚ¼7:uW \ à#ºÅå#Uzwï€çô"ú«ïŸù´´u뻋1ø¡[&ïë{ǘ™.éj©‚9÷nv¡öSRîvÊJ¿óש}&,#¿!G*“3—ZfÁ{>*&CcSǏG¼Èo¿Üzâyá;±ùUjõú=NävF'}dškŒætìÍy;lÈՌmüjëG®Bºzûé—[O’ß~·Ï57¿l¦¸¤Ùkv;MÃv¦Ì5´qKÃèRKâwϸM%ösøœÏåw>)ƒÈÂóÛ´Ë]ò[éýS#FJ-ˆB6Œ„•(ؼ¸ñëÀï\0ÇÅ`ÿVQóx†tš¸J…üqØ8ÑRíþGQûTö8d+*ÚH-ìóoá?>ë´Bÿeêþ&Qè-.Ÿ ½Ïü)s1Þ§8þyê\ŒßŠãw«û›þK´Ìm>w¶(â'µL˜9.†B®L¹ûŒ{É^·Û±¬ºaôär9_(fŠP$ÖÅtP*•¡Xðö¬ëÿÇu1d2¹TJ•_~Ëµ{Ìaä¾Ì݇NJŸñ>.†T*ãÓÍ!…Š.ùÞß6¹[{PÛ+LX¬À¦ÖαŽ¯ol'Ð[38÷:3.†Æ%㺤s<-ZüÖ ++9â10^l׋ÉVZÚ/·àíµ‘Êd£¦‡b»õ)rÓyFvQ‰ƒú±¤ª~ýnjÑÇSvÆ£üÑ×$3m•#™‹ÍØÏ_VR‹4ÔꢒÚe¼Efœå;y•5²@³ɍ¾Þn}˜éC†¸¬²~©—üÊÞ7F®ƒ‹AN1:黐^Ó‘1:ÇJs{÷¶#^TÚzE^cæÕGv1Ès{ÀZÙƒÜkËQ/ïDÔW&M2q㇍Žñ¹8áà§ô®j+8üQ»Žz~¥¨½V­”És΍g=üFråˆZ©ÏÅh—Dj­Ã¡××ʖb¸:ºj _z“­-VoŽÐã+es‘ª¿Ne2¶cèòwEÙm «`Ès+ì¬Ð2p|û9ò*2ê|mAOæ<¾ uþä\Œ]Ԉǡ|ά1ý>*ð_巇‚›ÂÅ>º‹A)äÖNZ±×X:”T¾ó——(ö’š°Ki¶ÞQ֞çIáù_HHÏ®­oÕâeT½nNL{lçmãEeë}.)£¸¢nh〃èÉê×Í!q©Ñ7BboÝÉ|qöRz@TJh|jK[÷h1E^ilé ‰»r.ñnO/•  ¶±íTdr@ô˜«÷»ºûÞÇÅÄÏ ++Êý£RHCHsl¼Î{ž¹ò$¿¬¿ÿ½’ Ò.5(AçSö8„.¢2Ä^0Öññ)çڐÃö9†Úûƒ®›ôZ fXI—2^WZð ++Jj´ï+ij鼛ù"ñffu]ËhÁO†oÛ».yåÒͬy†¶äúÛOøÖ¼n}ÍÞ>ÁN.µ„Ì=ï°$¡ˆÚøÓÝ=@5^‹Q4*_ŒH$ÙaãGnôÕ¶“™Ã6:Û]»õh‰9—”Ä›YŠñ2É 3QB–˜qž½(}ŒH, ŒN¡ƒ’ÚYÙ÷ó…ßÅèèìÙÉ¢δÒAi÷žktqTݯ…>3-S U|æ«Z>‘÷oµZš4(#ÇV˜¢Ðd@4¹mT~M-R™üÊ{®J*оD'ÃãKeË+¸:º*~‡Xûò2¬!ëU]µÔHsÒ~GjM\:j÷|ö¿[Æ.:½–ñ&$‰‡Úcg¸þCÕ>þ²4}t1T½ âðïµF™- ++ÿIÙß ++˜!.†B¡¼ý(ŸJ`aÌúÏ÷Úú¶aÚµƒã»ÚÒ~¾‘UŒÉ¿¶t±ÛtÀíbʃÑI. ++EÌÕ{ÿ9èNŸb;ŸJc1xÖ·{œ|"¯ ++b-.ùÜQUݸí„©ð7;l8¾1mí=[{ә#lÃbn’ÚŽü(¨V‡Dß ×™kh{Ü#‚ù$þ¸ ì‹ŸOW¶õ®©k™„‹¡¦+ÓÕ3`í}~ÑçtCæÒY9HM–šq¬½Î÷O*_éãæ½ç×Ò³©|%fœÍ‡Ü[Û{FInuwïÀÖcÞ¤»(Eù0/òòmÆÅ˜ÜZ †WuLMÒ¨ 7åòÉÇC©ªiÚxÀÜô›íÖ7ïR "d2¹Gh`õ„w”L¦ù‹^×ÐËt ³¾Œý$—+v²™ü2—oe ï^òó퇹k˜ÙÖ5·ŸÃ®ñ‹M9Ë,¸µu­•UM™O^ÞÍʯ®i¢ã¨F ™o¯=XbFÅõ\bÎ!£¬±zÏ +++È@“c6pkïìùø.F^QÕjs¦ÓÅ ór¯}°ÆÐ j™Hšæ<] §?ɞ„"&† Sµ}¼˜¦³¤·4Ÿ.‰Î|?ÎÆÞEA­Ë1Æw1ˆä^§êz=CúM\ŒžQðZí9_đ[T}Ԛ4uw­Ð×@Kçóÿ¤l.aE©Äý¢3ZmrgË_Ä«éªJn9Žãb8ÿUYžñIºoÔJEY†€¬e_ {–ìi\ `†¸]Ýýf6~LÖ}NaŒË@ ‚]œ *¦ k•¥ý›SGÝÂ÷8„~È}!Î`•%/òRúpJ~Ž¿þ`… ++iÂÚ¸ßu¯SË7ʄ¸v·#íD‚æWkq1ZÚº¸…/¤×q9ÓÙÝO^ŒJÌ`Vþÿxij¿äWâ|xË1o¢ìÈõÓæ1/>),'W&ÙqÂwÒ.¹»©Í)z¯ kó![ï(§Àx٘Žx.2¡2}ì³ééãOnP†\ŒäŒ§=½ümG½h÷í–8bC„\¡ð ¿²ÄŒÊ²í¸÷_xöbÚû»½&vþÌuŽ8ŸK&™ÕÜ7úʽ¥t´ŽU;yM­TæT‰TvÐ1l!ÛÕåLâXç†_L£Oä¬0å´wõі*™ÚßDuï–Þwtv÷‘&“~~YVchí·Ð˜µÈ”å.™Y»1äbXØø1ƒ»a¿ëZ+§U–+-í×îq"=v6>µ±¥sø…Réxq±)åP¬µr”Œ¼³¨ü5cô)Ý8öfŸir1$©sÐÅéÉN2"Æçêݎe•š?‘SÙ7þ2ӌ ê«éóÛÔ2á'ðö­¨¸§-¸ã`ô„ÿV”Ž‘W¥’¦9òÙ¿Ñîbƒ×«¥Â÷r1ìGmÀŽ] e{¥Àí_Z¡Ù’ÄýjñàbBqòIzרƒ(Ž5S+†½—©”ŠÒtZ™Ï{ ÏUÏà£-¡ýú§?Éóâ?Mƒ ++"#Eþ¬}8„ÿ?{ïáÖÆ•¿Þ¿á>ÏýÝ»»IìMv÷»»ß훸ÆÝq‰—¸Ò›M1ÅÓÔ@ ½÷ދ1`zǦSMï½wPï÷Œd ÒH4#Ûç}ÎãiæÌi3:ï;Ÿâ{Z’ ªTÅ0wë|_ŝ˜sJtú¸îâ‡ÂïôÑ? ÐK„óë3 ++kèô·‘­úÇIÁ©—y@ÛúÖ±¾p°$o¥µ{̛öþwiÑËÚÖ'®Q¡)…’ðRU :“MðI8¢†¼‡7´^KBˆñÅ9d2¯¬~Cïž¾:£KUÝ6sY\¦î–ŠFÀ="T{TÝVçŽ_3<éé3s ++;‚X¦Ø&ÊöQPÅÈ*®Ýolí;‹ôsÉÀ¡gp|ý‘½C $\ºòu;hFXRÁ=JĤ˜ûÈ%bPó-S“½½ŽLÍ,Ü{â‰Ô#δÊ«!l÷>Æ|VBTj‘¬s3 ªÏêځÖþýæãéwÆ4#0!÷Â}û£`é Ï83Çð zvÈ'ºvŸDÊû<ÀÿžÒ!H⤧’EŠ$ÿ«ø5m|Öû§ð[·h C7%ˊÚ?<ùÓC'Ibڑ©¹}V1–©çôì>°„ñÖ©D›^ =™ˆ±Èð:¶‡Œe»™JèNLu~ìÏn‘€ÏÉ·—|ä ÝþkáҘL¤5SŽ‚ä¹8 «Gk u†£‚îQBwý¯`ª ++«“m4›ÿƒN†9Åä5G!Át %o®ØV‚?òë$ ++éó̰2F¤ž¯¹/|~mOo ÚJÇÛÃyé÷©ªÈFey-™‹Øt…õÌT$ aJ Šñ¡TŒîÛ[»D:$ƒâlî~ÕÄYbV𭊵wÔóµSºzG/8JaFAõ?*¡ƒEÌþ«´÷MZ8ÈA©ßª"IT­½Ç'6æ:⨪¾­WÌi1׌û†~M]þ¾¬Oíüù¡Óàºd™;T1ÀWñY/Žˆ5sçˆÅM:ð9hÿ SrË&c*¸bu}Ç%qÔš˜†wiDÁÄN «ê;$mÞ[ B`ŠDÅ8¥ß†ŠÁçñ£ÓJNˆ“Å^3!W7t¬5FAãÅ&ÔÐ=0öÄ%ò¬žèà9]»[ÝÔ¬}îZxüpß´ö´AÓÆ§îMïze ÄÏd/Qh €?ÀÕy<>“É^¡Ð£ÒŠOJȸ&.81_âW¢¸Šñ‹’¨\.ßÈ1ä˜f_T ÄYH‡Ð¸)ÅîÛÁ_¢;ý] 3• ‘™Ë³†!ôÓä*D€[¦?âÕ'ð¥—7éŒÀ‹rGŒþ‹ˆ:-{ 2Ñ(´„ãa~ë‹Þv¬ ÏMÅàÕÇ &@ELlxM)ëOÌöz,;VëA†×1Á´Ø‰ÏaŪ£FÄøŠ[æ¹>ŽÒ/ÌoÐWÂùå)µŠ!˜éføžFÉ8KÇ}ÉζEfªRÅ[ÝK ++øä°š­{ôØûv€þIœ)Ͷt‹–¼ÿ߀꺎ËŽ ÂïT¬[º‡À~¡£gø}qfMm|U³â5ðCçp—ˆŒ³ºv€£]1r*xÑÀãó7«Ä`±êa•’[)Ñ8MÉ­8¬j}Tcë·¾;T18.Æ+ 8Xã˜\šV¼¾$hØú‚öü gÿâUëÎUŒUqpÇÀT‰É‰99‚ÃAöi€Ïûûý“Ùï:¸+*ñ(‰zëQb¶eȘg¥§u4'5q9¥¯ßûñ(ñ—$¾ MʗUɳÜÊ3:„õ*Æ ++•¡‡õ“Œ¼šwnY]ßÀøôÌâðètyM‹±cÈ÷bʋˆ-]ƒŠ´“Fgâ½âŋÙV@¡Ò7¨ú?.Wú–¸whâŠI)TŒæ¶>ñý¶?¶ |§j—#«y¬x-ö %s*{I T~Ô"#ìšb1G¾AR±¢ŒehößpÛ²P2•ðZ2ÐÙ;B𞙉DBe=åW18Åd9¶Ä?ò»Šß_BVªjÊßqK\ÀÊ TÐ]þfSàuLH{/؏ˆ>Ïð:‚ugõ]œŽí«’KÿˆÄìØF!ýmïT D¯«¦~vß!7ÔñÁTŒf®¯ºæ—)Ãã3æÎáï¢!’꛺׿?›[XyDB¾4ÛÎ?I*ÇÌÿª ’–â°ºmáËFpz^iÝ=;„Ójãßt *¨bHHé5Œ„ VÑd¤´ŒzZ|B wD‰©É7‰Î`jYy‡çÈ/«[ðU ‹cêˆD¾8þÖbEJGaÀ×µ+«~³+*@ÿÐäqñEÏß·/,¯¯|Ýz^¢JhâZ?²+%Ó³‹ª–^’¨®BÙ[‰îÉãó£ÓŠ%¾Hgtð›}Ž8®‰}°Äӄœ*«Б“ZÈ0žÕÂÍ-®€.ÇæœÐDÒbœÖ%´u m’$XÚ_‰‡Æ3NÁ—¾ ýÇÄ-¹óØ}naY¢b|%®`ñȌîÙ=t^Ï^œŒ?þÎYfT Ä4(³L’Íe¿T 0|šÖ><V+¼úx´·Êû§b°³l?Þ·`¦›îô7…T ±7œ"WÅÀü–“ƒEÑ øýâH®h,—îyT$R ++/åW1Øift9Ñ4ÿ,©Ýð,ÎtÑÉÿD;Ëýˆ6ÇzfJ“˜°wN¹§è}+}‘|ENΔ ‹¢•±ª4ÂfÈO¢¥‹²½²ªÈ£»*†¾Àð=žW¦ŸT1 ö@ÅXŸ£¤³g伞ÝqqŠ{O<%Ù@Þªó+ÆÄt£·ü†™«DÅ(x¨YE¯¶¡b¢«…ó·òŠt]œIõ´Hêû‡&.8€cNéà›ÛúÁk›ºUBØø‡ˆ«&bÔ`é÷¬4êi±Ôò4·Bz¸€m©,6'<¥ð¸X¡Ð'ßÇú#³ n띵~LvÅC’iü­Šupb¾,~*…ËðøÑϊÁ,ƒ¹;«KHÎ~¹ù\.Ú&±+±p‹‘%‘‚SÅ9G°Z–^K+4.Ç±àÐÂY¹DJÀ\HÆMÅÊ[Áˆ$ýӗô@K®’Ƨ·ðiöKÐq¦Uüü¢ô—‹µ’L«Øë&d‰ü±?*Æò ++ÍÐ.øˆØùgË]»šºŽÍٕ‘51ÕÁð8¤UþÀ…á^DýHÜÈ;a»¯?ܨÚéuL$”ù Î÷3<ËvgXûÿ‘ל ++U ET fÔmTƒ… Ò_…s½›Od¥è£™>ÿÀN7g¸þe¦žGãÕw›ÂNÑG›_ÂA†ç1átçÎU Vèϒd{ˆíª«B¡`øÝñÏÛ¿õ Š±g*_ (zÙxZ/‰”á•É`¼õ)X¡Ð ¾‰âˆ’6¶^qRÉd]s÷#’Ä'¥º¡ ì^ÕwJ(4 …uÍ= ++ªß"q1*™LvxJÁ)„Už×µ+Õ"uK‚ñˆ‘¤\uJ ++EŽþÉàrß©ÚÄfo8xç%8ï$Ö¦&0)ŸÃáîú¤HU1@ƒfæ—nš¹€ëžÔÆIb—Ü0u™˜ž_ÿîiç*“ÅöˆÈ8¢f+I}ÒÜ1  ]X q¥gtG°x’²^¬eÉ]ð᳂jI°Ò{O<‡'7³B¡éàý‘6h`<ÃÒLÖÔÌâeqNÐþ°éÂJI՛CªÈ¸]5!·´+‘¤­o䤢¹¬Ùb€ÅÓÙ3"騪¶±KêÄ结ØQhŒýQ1ÀÄ´÷#«Akÿ‹S@²@šŠ±Êe²³¬Åo/•LÈpø3¯)uU9"5l*ÓfY±jZ²ÿšßž n_éK‘Ë`ÿ('ÞøÖï,àÃPŐ«b /üQ™0ƒüwmFŠh8\CÃËNV‚~ŠÏbç`¥Í/“›o‡:¿éÎã×}â*†DÒI{(ö÷9U ¥R1V‘|Œ8÷øê¤¶àeãÚç‘饒pº_Ú»$ë‘Sòúì[*‹G8¶hfnù’Ñ[Ïý´ÜJ‘˜“ËU1Ör”p8<+·¨ãˆöš‰ËȄ”Nc[ÿaUĬþ®¥gã›Þ›æîH*M5›áMï<ºgTF)¨‰›€œš]ü0*ÆÛK§zGZ©cB’ 6¼ÿÞ¡Šj+®l>,žwP‰™S8UE_[1i%’$¦gt)9/¥Jt÷]z€ÄIù^STÕ¼Y%鿨ODŒk´pýâ9š›_ùñ¡“X¨ÂÄdK5É)xÑ(Q1n˜¹v½Ÿ6X*@%ñÏˑŬf«ƒõ_Y¡¯}¥úÄó¨Øî#üiÑæŽ€I¿kî†x¯¨Û'åɽÐ^©\ß#,íè>ÅõÜPý’¤«€Yuä"îâJfŽx;ù ßÝS[0Úhö‡V…E|é1rDB×=ÄàoößðZ3ws,xlcau‹á6ähˆÂõ'~gÞÖ[#LµÉU1ØéÀhµ0žGå¨n߉¤%Ù²(¬ŒÇ4™.¨Dℬ‚Yiï<^]ÿ%ºJÅï)û TŒUÁp-Ýí[ªJ§b MkX{/6›W±ôŸ|°¾µ÷œââqZ›ÐÔÖ·yw€õI8¬†ø ˜C–Vh’MC%Üø±K$ƒÉڒЁì8&fU-=‘4±v> ÌMñ&)šVÞࢗ ]ü’.é;NôOÚ¬³ì<ÓjM}¨‰‹¡fS\Õ,µ’œk RÝÃó +;Q1VÅ¡L5m}$ûÞωÉù ìPÅWüÙØIâ¸ñ³©½{hUWձْ%&ä}§bªºnBnïÞpbrvÅ·÷o2&wlŠÐñáTŒÅ%ê5S°4÷]Â8¡…sM“¥bˆ(“tgł8|ÐL%èîß ?2:! 9erF쉊¦ïkÁd‹ÌfM·ÓÉÿ[ #ð¢pi|×DŒÞ2P!¯% ÐcÅÏbÅiÈՀ¸•A[——¸¼šP¹¹Z8¹8tÙE8ÑLwý/º¢ÄŠº+ËÁ‡?TMwúßmˆ†4ü—œŒ¬ ÄüÁJ”hbæ·¼Æ¤U¡ð“W1V…|N‘Ó63­@bU €Ü²zÀ$ÅBæ‰k”äµ<…ÊÐÅùC¸VÃÊkhlPVð/ƒÉ~–_ýÃä-úI-\aeãÚ{lÀö¨"Ù%Àç€"Ò,ñIpÖäô‚[xºGdÆ ++….UŐl:^¼j½¤ïp ®‹Ë,Ûð6T•˜ýâ˜:𩋈'´°gt •õRò¡ì\Å`2ÙxŸ„câ@’w-<šÚúüâî€ÿÎ-,Ç>+1t m|'ñLÍ,à}oš¹ÙwŒíDÅ(ªh¼¤O¼l萚+%ǂ"Ñ=ŸæV2XlƒµVƧæ Ê ¾ çÅúâ¼£g—QX£ˆ/ ‡ÃK/;£G@L'´ñ‘O‹À@5µ4µõ¯/ -} ëbU”¿j=§g'I¾ë‘1¿HCª #>³ü˜ -ÜiBB֋µ.½hDòžˆ³‡$äR¨t.2êȢљÏò«.ê;ˆý˜l‚Þ¾Få 9¥u`m›‘ÂK*›Áú—àpx,6§½{,3$¾‰FÕÊsdì=ëìξÑÛænH|S5[ —ÈəE0Å …`êK«ßüüÐI5Æ%4M•g¯TŒ¼Ò×§tðÊ`ˆqVÏ®æuÛzèýM¿€“a©tùVÅD…ט(ËKB9!äq~çöCBX1áö2—<+V†“—÷%'ËzK¢ƒ *.OÐ=Ž Caÿv– ’ V1£ ö3St’OÃÅJÔ²¶ÖHĦ3C¯ËW1^ú£×ÃoË@d4ã÷ìT#p[IoŸËˆº³åصó߄‹#2%šñfšý7(ã¦Ɂ‚:³ŸˆŠ!Qf¶ª{¯b°X¿è,1½G¨cVÉk‰eDSKïÏÆÎ’ Wý£²žåUŧ—™9„ é9ÄY ŒC¨´__˜šjéuì]2W çð„ŒòôüêЄ¼«Žj"¾…b¿©*†˜ ƒr‹ù-àØ­Ý߁÷ Žÿbæ*y3$ã´öš[Ú ü}ÑÀQ’.ôü}{œG\JöËgùÕ¡Ï®Š«‰.’䝥µ‡T­A%`¨IòÃÛ¡«owð2ôt[ 唒ÎBó×$»àôK…5Šõ5'‰2$Õ^z@¼ð6sÊÆrLcì²þ\·´â³ÄŽBΎ~IäÀTU Ic@¬Ü¢yë´*ŸoåyT,«‰šüCâsÀŠyZlltJ'8n™'§ß& žTyì.Y3àß_LÈNþÉ®Áif¡kǟÖ!d—Jç¼òzIhPNép±®!i&öÁȉâù½cî:5£?ў¨<.ßÊ#昸“€»]ËÆWòh±L„¢¥šãÿ(ÿŠtIÄç|Dl~W>ø§ý1V¬ªˆ:#sM¶eÉ7ÄÏþN.AÄى/H¸4Æ »¾–f…†?À ¼ÈëÈUèÞy%‡ä‹•~wñÖ¦¦»X~ÎÇ¿p[2ä4¯2HÎÛÃ{é+K² …‚‰Fé¯[UôXi&".S¦|6ÓCwù/ªŠñ%'ÍXÄ¡}*†Ïoς*„rª«bã5+¯CjHȉ‹ˆû°¨©ïøÅÔå;k$·¨ºíQÄþy­}DsXÅë?;·´ÔÌͯ^úí=«c’¥à,$¾Æa5ÛÿÞ³Â{ÅKÂ+ÊR1V‘ü©,cÇI~m[Ÿé÷cR…B¼oâqq2ΓZ¸Ð¤|©]Þ\«¡µÐ„ÓJƒôHºsï±Ge]‡¤ž¢ª¦ˆppDÍÖÖ=fç* ++T160‰‡Õm¿U±>©‰%ø$vö ++„ ++½Ô\\¢¨Xz­Ef8.­òwâFÍÖwý¹ Û7æùqñ¥%m'©E–Ä1u Î;~òý ¦ à‚o"XB—¥#ï/<ð¹>!°ohbmÍ0˜¬¼ò†«F¤oÅ U¼ê°’?À¬epÕÈ©¼¦…Ç“òR“ËågÖü gÎ=¦±Ö<ðöŠµƯ»L¨XhÈ=Q1zÆ®›•AÅã¸fý‚fÄ/tT‹ôýI¹j÷5´þ£y` Ìþ€|¦ˆâ6ŠÜøÄ?ñû_ʤv”)Vô]š¼J$pK]…¬mEúòão˜¡×6¤Ì@Ì@œÿÁ.tÒæÐƒ¶ ++ÆêåòOö fØ !‹ªh£(“Hb¹™q|N ++§ÚЫbçèÔ¶ÿ$ì-A7˯^¯bÒ84:­OúÑØ˜·!EHqEã53ðÕ­'žÃ£Ò_6t \0pÇڏŒ¿=¦©}àŠ¸NkïþÁ‰5þßÙ3|ó±;è;(€µ¾·G§ƒâs5ߍø÷’é>Ö?æYéðØÌZË©4f@\Îy}âwªÖnáér'Å)è)¨çgrÁ‹†­Nh|F蘈€¸Üõ*FYõ›µŽl.wŸx–çµ`¶‡îò¹ù ·o™?0Îo© Ž*Ηãjý '€ÒB^{¨@$Wpö ös+!mfK±9E\&¯-›ŽduýJ–Ç +++ê6âH(ÓQHH_dxŸT$®+ÍT!CȦ²ÒÍirE:ÂAf¬Ú*ªíx¬°t…F›‡ÿÍ´£+5¼ÎþK…ŒÄö#¬TC9#Of¢§¡!`x}HHŸ‡Šñvµ£ûþ@BY! ++GFgÚ:[;)ŠP;pJwïh[çP[×›ÅùF`pxŒèÑÒ²tsZŸšwXŦ¬¶®™Í+¬0†›õ/Y PèÈêjžWdß:04!>e°w`káÁ¹’û‡&·Ñ»ÝW1˜L¶‰Cˆ$‡Ðþ–cX[¯x ++….·Íü‘zºû!%ŒŽÁ ýy•±ðQÜ'Üæ§Šø,ðÞ¤Š˜K"ÆâÖ ++u†Sæ./`ªÇў†:b%¡ yÆdøœæV‰Ø ++Ýö‚áfœºX%9€ZíWtçr_x‹CxHùAñ9œ"G:ö·Š„aƪ£D‹@º<×njQ‘?/ˆÂò·2PžLCgEܔ^ÄñÏ"DúAû­2™ šòÍvÞ©üñF9£Ï¡²bUPUŒƒ4â„+SŸŠ!òYi¦[ AU ¥hözzÆ~г7!…Ï)–¦.¡-¢àYÛ^u;\®ÿ×®ÖÐèô)qÐeÈNò¬ Z¡fs™ì§•PÅ ;þ¯-û#K™K¬D9„ ß3"k{—Œ5ѝäe“!þQБ#Ó@$Ì´#r•"Æ}I'þ‰t™Wá/˜êXÄZYí‡&¤Î ++æûùoÒýÂå_ˆ½ƒ" þ+šý7œbW‘€']S®¥“þB—KòŚÃë8·ÜK0×+¤/¬r™HÛ¸L!}^0?È-ucx£+Ô*@òÿ(\cµ$¢/€Ñ@ӒðÞ'™M^{65çšõ ++;IOđ'D ++¬4t§0î á|ÿNUŒ+¢å±µ5°ýÂ碬Ò]Q1[f¢…†$>U ˆO ‹+÷苍­}Ÿ†j±Ÿ*†@ ˆÍ>®µÿÆqMì}‡ÁE T¸õñÈv!ñö4S æ·ìl[åñ)˜l4/Ñlÿ?N±óö2æ3äG¹B 3ⶈÏEãÏ5a ++ñçõ¯ñq¿£a~à ¼ÀyfÂ)râ–8s‹9ÙÖ̰ë4âhØßÒ [IlI8Àpù·`ªMqéÌx-…L$ö ñO¬8 N‘[BæäÛ³bU‘†!Fì#'O~LSáÒ(Ãë8š¼‚ÿ’zU¡ÙäÐ?șq°Uþ`ŪٖØyvâà)h>ù];S12Ü¿ã䐰“RLæµeÉô*Ú=ôˆ[鿅¥U ˆ­†vAåu23QB@Cqp¹üÛîǕ ÁêqM¬™C(‡ËS”‡S¦—Ìû%ô÷èi5énß ++e§ÞPpKÝiØßÉÑ_ó‡_o_Å ++9ÅdD€@Oìâü7áL'ç2X™–Û‘«¶pT3ç6$¡kÆüñ&ºÃÿl¿y[ó#øŠá} ¬ÿUy2¶8ôÉ¿Ð*Ç}¡»Ϧ@0T)/ˆéï˜Qw„…i… ++­0 Š±;(­j>£Kø^ TŒS:øªºv¡PÑèŒ"‡ýÌLÙl1$ì‹ß’¶¥0“#ð":s¦á°´ÐÃ+ʅpa€îö­Ü¨Üúx9í¥Í2¯ïƒb…4ïN %ièÉç9 ÿÀ­ )°À-ˆ¶‚¢`ËÉÅ*¼l¸Ì°t”X¡Žå·e)úK֘Bwü3Úý‹ÿŠ“o¿ccWÊ×ìgæFÅÓÊ­‹QTƒ*Äç¦bp¹<§à§G• Áê1 ÌíGn[ç„ÁD3ÍþJ'd¾bFÜ Ê+btæÑI•›5–Û˜´ók1ã4äðLüV¼¦=#X”)Fø ªÂîKÄ?²óí[V„~ ++KÌÈ[ˆ±—-¤á¾`¥¯ò˜Š >  ++=À*ÍöÿãV(:—"·6 ++qœÁxô½r€}ܖŠ.¾tç¿¡«ìdƒÏMÅ@À¦ ^éT1 >7c`dêSeP1¾U±öOÈUÜã-}àsåäkÜãôWáx³².!+^[NöÀ-]þ%˜íÙùÅxíÏi6ÿ\s ÁD‹Üf VÆ¡?Ó>Ìt‹#q²‹œdEô”Êf Ãt÷C{ØBü†ïYÁ²bY‘ø\^Múݦ†÷&c úešSêÊÉxÌÉ´ØXÒ Fë¶ BÎt3\ÿCCU1˜?|Ž*˜º¡Jö·Pŀ€€€€€€€€€*Æû,B(Ì{ÑpXÕfß³“€œÓ%T7vn™Ž …œW9ñöAÅ2T(ç.3<ÒpèA72cTD|ÞÎ/'˜ëCòn š'ÂÆ-!+$P§YOÒíÿ°§ÜÉ]Bþ·*XnøL)-ªføŸ£)×`K÷%3ôgÁⰂÎJ"“Oã7D8Àë*ÜÒ-'B²½ÐÁÈl*ôU! 됾Àp?Œ¢b€¶1¼¿Gñåù„U !›Âζ‘ï U ˆÏJÅàòxÃ¨Û*C\OS§ð¥%ê6zÁï¯`¸ýWéò­°¢U4ŸÊ‘I™)×/ÃáOüμUán„öà³¹ÅÎròwà0.®*–ÒU*¬@Âmì…ïRáAFÐe~gþ¬06“mÌð뻚@ç ÷;QÔ,¹†B a.s’ï£3a†ëõûµÞ'QU $ÃJ²ÕOXÅ˽òs CâóQ1ÀÆ{lbî´rÄõ<®‰~V¼Ížð˜¬è»ÊãÓñ/¼êU% Ã+dQX)úò¨ÑW ßÓ»xQþ` ô¿r^,“þÊïÊS|ù ++—ǘASi„Ý Þw a~Ã- ®îØEĦ²“ è¸74I,qË‚³L1Qwø=E">kËþAHÞ楆hÖÿ7ژ°uV÷Ïï‰WîN³ýÌæq°ÿ†[¦b_¯ç{\²Ó¡ªµ4âPj aÃJÔÛνI£a¿'…‘V-¸k\þU ˆÏBŨªë8¯g¯ ø7§ä5Ÿ¿ý¼¤" ·ýWñ{gŽA8ÈïÈVªÕˆ1âÜŠ“ôB#ý…•¬'¢ÏíªŠ!Z¥ÏÐ=˺(RH¥;þ•ß[¶ý‹0—y•Œ ËtÒ_èÄ?Œh„ƒë‹ä½:N‡?ÑÉÿbgX+V…"'®p²…ýô!Ýõ?²Ûö5µ4ÌéïÜ\‚p¾w‡Wô•Ñìþ€2×à+vyW#¿åÍî´Uáð?œRw4#âÝñÏh5ìRa?·AS1Æêi䡜N#þ‰õÔx;«š:ËNз†ôšÿB÷>.Zƒ¿‹Ÿ¸ŠÁåòóŽ*A\Ï£˜›¦dsg6ü€^ ¿zQÙb|Æj¬ŠJ³xD">p?ÔÂB¼`v=œ`zr®+.;À%|#¢NñG^óÞ¤s_ErJܑR*.àÊ@^}"¿·D8Ý!â2EîîD0Ul•‚ˉ8tÁd ¯=›û:†SêñkÃÊ­Z{çKBÙUŒñ©ù;ænG50û®bRµq OðwÊóEl*Ãÿüž¤·Ü‰-þüOá\/\µPÅØá‰j»þyû‰2¤&9£/|Ù¸ £"p²mhØß*™-ÆAºÝ7ܺeËTñѨV.QG”Àä¸&V°¸´;ñ/ùytò?•-4ÿ+QWÄZ† âC‚ÃåUÕuPi åoêÀðäò ++NT1¤czvé²!I²“ÕÀÄåìÚÀp™H&eËTÚãô¿‚‰¸p! ” âU[jÖËÙ¹¥}o •ÆXZ¡ ¶ÇM$µv ýlHjêü/ƒÉ¦Pé*C^¡Ó,á&srP‹ÅYX¢Ì.,ƒ²´Le³¹ ++ ©T(9evaôˆÇão8à˜JôK×Ý^gAKÖ6¿Hõˆ¤™Ã+>4:K(|¯¡PH¥1eÏ`²v2×âöSßµE‘qx`——V¨àùâp—ß]h‰B£3iŸÇ_¡Ð$gÍ-®€³D[ô6Ø©Š•ZxB $ P«Ù¶u íâ}έ c§t™J°¿ãVŠ|ø«¡ÌÛ;,ýö#מþ}Îe¾°LÅyÆZ¹D­P¶i¡˜°©C¨{ •ʐ4ñÏJ;†Y:E  R¸oäó ø|ÁðĬ_t¶êc·;&Î7ŒHºÖ¾‘©Å³óË fYm`²Ø/_·Y:G\7"³À¿ú¸€g5K˔ 4¸¥sð„&®¢®}«Ý ++…Ó á)…jÝn;Ý|HºiLvôKnêäñ7R°ÄŒòGŽ¡ ++Œ@˜Gس #zJòK’z<Õàø\ѶÂð‚É酈”bµÇî7:.Ü5u!§ŽNÌn–{Öä$ñ¢¶í‰SUq!ß·õ-xÙ´´LEQµ¦fÃS‹´­½1"Ýzèt×ÌÕÎ'±£w„Ëã¡´p…JÏ*®5%ƒI¿mâtËØ™è“ØÞ;Âæl!zýŽT :eD >®‰U†«ªÝw׺I0Ó¥t¡1§’ ¿3". þ*@@@@@@@@@@ì²Jk³ÊwAÅàó}#2îYxôöïg.óÅeêCbÈ!U›‡¡Ë+´íURð¢á'CRckŸ$%`û¯»œSÉ!i(Å54í'#’™s8gGåóéÕ5žxÆd”5¶õ5´÷E>+¹mF¾jBÎ.}-µóóˏ]"/êÙY#Ë^µ6µ÷W4t€\гӴõZÏúYlŽ¥K”‰cèÂÂðx¼œòú«Æä[¦.a©E ­}õm½É¹•÷±þçõì¼"36( µMÝþÉè#@~zÅÈÑÐ!xƒŽSÛØuÉÀã»ùçà§Ï ++k¶¡:¦DxnµIDATbpy¼¸ô²+FN–ž©ùU`TÁØf—×éÚúžÕµ‹H-f²¤dóìšÐ·ùِ”˜×ØÞJ}koRNåõ‡Nš6>õÍ=R%ŒòW­7¹Þ2&ƒ³jßô4´õf–Ôš8†×ÀøDe"Ö+ÒNŸœÓÁù_Ñwp H©¨ooêè/®ycæúƒž1 …Fc*ØÓ©íÝ× •AÅ8¦‰ÍÑÀç礪‚ˆ¹ÂŠ×¤ã(9ñ‚Þøë±ë ÍÚ3ւþi¨½#º?]¬Ÿ¦µ·¾}ð6T ‘8"Æ—HÏX.÷××ì|¾€FgÒé,™…Á*(¯»fâœWV·ÆÉ…"QZ~Տ†Žþ19Ó³‹k‚?†Æ¦m=b2"å—×oàðTã)ìÖ#ׯZ––iï p ÎlhëSyìþ09³°~ß´õ_}蜻îÒrQÝÐyYßÁÎ7qbz~Í4@ÀÌÎ/‡$üpß>.½t½Ÿ…Ü`0XÅ/¯>tÊ)Ù(Í ™)­ý ËÒ-ZÕÒk}ÐSЌµmôìJ+›¥FÁhjë?©‰-¬lZN…Fgª˜»ƒ§×ÉFk`³¹]"ïX¸ÏÍ/oøÜ10åGǁáÉÍ<–Ëå¥äTžÒ•T7תּ1;¿¬òØý¨Æþ«‡Ump¾‰²ü|váÊÃçÿ•’©_1¼Ž †áo Ä®«Ã•V؜]P³ð¨oë—'vJ«Þ\4 |$~÷÷±~ÛS1Ãt Ïдòb±8[Fai͛ïîYVÖ¿œâ‰kä5Sr‡ì€†mÝÃ'50iÕküna‘ò˜ùëG•r ++Ÿ˜“uLqeó5›šÆNÅG@ãZ¾´¼qÌ'¦æo›¹Æ¤•·›2fèt¦­W¼ÆÏ Ñ7Þ¶x‚[îî‘ë¥.K—(B W†ŠA£³œSnš8Ï.üjXQXÑxÛÂc`xJօš[û2"…%ç¯ÿï“pÔ¼æH"UHŠI+¾làPø¢aU G¿¤Ãj¶û×S«jå53»¸GO1^k&ÃéoJã÷ëéC›ª„Ï}°þ„"txyo@„p@ ”€Rr¸¼Ééù»O<‘B'&ç‡Gg@f²8ë·s op¢ ¢1.ûetfyzQMmS7¶1a¤TC¥1׹­«sgµ°•ò¸tGÏpüó²õÙ¶­bôL}¯jSÓÔµ¥³òÊëNëà«ßW[^¼n=¢jS\‰æ5ÀãñŸæUºgÕÞ;²F¶§¦6d*}OøX¡©Ûx¾½á󅅕Çäœ{œÜüHˆ \å˜ÌÂW§´ ­]ƒ ++Ž@áˆZØÒš–Í_õ Ož×ÆÕ¾éÙ-ް´L}ä¡míÍDµ—y€ Àù'­[ÆBrHšÚOYJË+TS‡ÐGï…]Y¡Ï¢´¼¶¡ëGCÇÈÔ¢õÃ{û‘Î;%{+8fdlö¢ƒGd¦@ž‰Ê6UŒ¹¹åëf.Ç4ö?®çu[×Ðg«{ÆE|.#ð"Ý™c ;ÿC¸4ªl~ðÐééoní]Y¡¯Bˆ18<Õô¦§ûÝSB9‘–_yËÜýª©ËIüi]Âe#’¤\2t|^P³fÀfsÒ ÝR5wúEÛyÅ?!…ÿl䨇˜[X‘«bPiLsräm šú޽S16c{*†P(,¬h:£‰í™Tü,*•¡oü˜¹þ%?ø;()ÿŸ·,ƦçÑOï=®ŽIÊz©à寧çOh`sJë6ãøçå?ê,öΗÁ/ñš™ë†)– ++…þÀ.Ș"Õ~¤¾­ÿ¸ªÍÐØôn-]0àVî±÷Ìݘ²{Êd°¯™¹à’׏OE]û-\Kç T«©ÙÅK÷í£ÒK¶m4)³ì'#Ré;­ ++\¥xò¸&*£Ýö©ƒñ+Gn²’mª)9§´ñʐä;ëÚ¦î=}œq ++”2åêWÜêÑ.Ù í³óKÏKjÍÃ.8ž»oVÏîÜû»î^Q™íÝC{²ä½’ÅÖ¶ñѱõÕÁø¡pLTr,§¯]„€/hëM.0 ^2$!rßü ~ÿîcýC“ó[»‡ø|ÁÞ5 »oԐ¨m뫋ñC÷LÌ(ÓƒcëkëE£3÷¨=à±eDD™ '.‘É9£ã3{:/à‡32µPÓÚ[ÓÊ;2µˆ»+¡±½_ÃÊ }¾·&m}¼ÂÒ)Ô]“üÀï¥GØ3d¨m}BÀÈ˽wB’òµÅS£‡ñÝÞ^ ++â“ÁèølU]{á‹ú_Ì݌ìk»*ëڑòº}a‘"yù v¶Ä€äë&ä¼Ò×Ë“ÅÛ-@’G'fý“ÁÎjxl]Ŕû”Ã:>©´*صÆå¨YzM+lü˜jVqíO†ŽÕõëߨ¦MðIøIŸ¸´"Ç¢|x|梾ƒ_t–ÜkêGÆf̝"Þ ‹ËRªÍ+¯?¡…Ý©^QP£jáQTѤˆõ8&·¼þ’¾Ce]›´ñŦ—^DԁR·°g6n1V®Q6î1¾±9u-=¢BHE\fÙ]B—ìW§¹%¯¸o“V¼þù…+×h}BÀÔ¦PKËT;¿DUKÏé¹-ø=p8\K÷hU+¯åw³ ÖCñ‹†óº„ÂÊF¹§ÛzĀûe³Î.¨`÷oí« ÙIŽibï=r[X¢ìéãL0Ö ¡tæ̐ŸEîþ>ëÁ- ++n3§ðÃ*6‡ÕmÁŒWŽjb¿S±¾fìœSV‡b;´sЙìÃj¶GÔmj`P ++8ÆÉ/yY‹x$ÍÌ-ùÅçülD}?¢ŽY? àoÐÈïîYÿl萐;3»¸Gn&M­}gtí$ÓÁDž½"ҏi`h`¬P÷Ê|¦­gø¬.Œ†ì©±ùöž•®­_[÷°pÏ\oÀÈA©ßªZ«bM ++~Êރ•ð¢¾T޾ß븺í#ǰ¥åÝt +ÕrHÍÌ)p}B …BG¹yÁïúi<²,U­Ÿ¸)Áâ“øiV±ô²v‹–Bé‚ü²:Àϳ7½ö_¿NP·ööZw@ªŠAc°pÞñ:¶¾í}ÛT14?¤ŠÁ'xƙ’¶°c»J=|€icªÚÅ%Š©C¨¶¥—ÜmçäôÂu²£l•‡ËãSéÌþ¡‰ÂŠFs§0K·¨ ++Mê.²¶±ë´¡®¹g4°Œùশþˆ§ÅV^yeõ ++îÞLöC‡‡Äµ,¤TžÀØlÀêÕ,½Ì#lkF ++{ìžURÛÙ;J{÷ðËÚ6+×h=¬]K¯âG‚sÏèâ3ËeÓa³’+UïÇy• ++rpêՇÎr ^¶£b MýüÐù˜†RäXÍ-oØS†Œ¬3Ú+VqâP6s çÿŽÔîïS~dbæö#·£ê˜ãHŒÜ÷šØÚ¸Úøâ%â±çïÛ¿hî™Ùx¾*.± ×7@JRöTÅšÔ±ñ‘(߯“âYÌs×½hr/šµ`̏‹/®bøDfŠg ++wï‘ÛÞ©í=Ã?ˆÛƒ ˆö½yyLnš’G'g÷NÅp ++yzXÝö°š­sHÚ^¬„ ‡Õl6Åîùµ›¾Â<&…ﮊÁæpü#Ÿ×z«¥ËŽ|¼L¡ýbJ–<ÉõôÁè-ó Y*…ÆøÉÀÁ;&K*ÛÝäÌò{ÝC(*øÁåryŠ$ΐԹ`³tVû²¾cý‡qÙ ƒÇ㛃‰©LŜ2@cÊjZŽ«Ú4´ömؕ8L€ÃÔå19BUN-¨>¥C8«gwËÌ¥¬ºÅƶµsð´!¯¬~«Ë€Æ`]3s9ÿÀጮÎ=fjfQêN¡¨¬k?«oêzøÍáÒ,°;-# ++øƒÁd¾h¸þ¬báI¡nÙJ\7"¹ð„&Ö7ê9¨M´Ó³‹Fv`lë³aéJÚ&7,)ÿ=»sbr0°îÛ»§Í¾Ÿ-U.æWÔ,<Œí‚ç×ÉàŠañ¹·Ì\[:ä§w!§žÓµ›’gý±eƒÇ<+¨9¢†W ‡÷Ø ++]¼"¼æT$3ˆÒ™cdgÛ®ŠöÍ©„É⸇¥ƒÅ€Ð3M,Æ;¾¤¢irfðáæ¶>¿¸°%Tꮅ‡‚.d;Q1À#&¯¸1©¥»otzviïĔÁÑiKOÉPQ·½nLv‹È(©lîî}W/­jöŒÊ¼nì|TÝö8r FÝÒkÛ6cŠæèÒbM}ÇúIiëÊ.®5wސÈ^GÕ1.!O?^cf~¹èe#˜ôu¥ ëý½XS¸ ïðþWÍÅMÍmý»ÞP¡¡]è&rÑûö¯¤Åâ¢Pè&ޡߋÅ#Pò_Ô •Ã= B™UŒŠ×­`ÓRVýF»¾iæZð.ÉÂÎs”ħ—ªš»©?v_+*\Ïèào™’רfîf€óC¹»=°lU ÷À”BßvÞ«iíõŠßì3 ˜­>.ã ¼"*†¹s¸¬FÆg2 ªÃS í’Õžx‚]M÷À¸ÔÍ ØpNž+ÍvFíåóµIÍ©ð‹Ïyèz×Ü5&£”­@®Ù¹ùeÄ_Û3n}hUŕˆú–ž+†$çà§¼òitÆÄäüäÔ¯ü/ƒÁÞ° ŒzZ|Ä|û‘«G¬bžsÈ33‡#»À‡v?Ü·Û,-+Öµôµl}|c³²Š^”7ä•Õ&äêá ðù ++ºä/.Qm3øjjf!,9ÿ¼!½ ZV“5Pi ¢²š¥' AèèQÌÓâ+F¤šùnPä ÔËú3óK»¬btõ}«b}|¿% PNëà“2_|Ê*ð˜á7•MÅ@œ\ø½åûõˆñªõ°š B?;µI“Öè –¦A“ZXóTŒÖöþ}Q1¢ÒАûB{Tc彂²ˆÍáØ{Ç–ïÕÛo¥µÅ¸bà8='ý‘T^óæ*â‹qî¾}éËÆ½hƾ¨`ŠCã÷AÅ@n?+}½Hk瓶 kß6w œ–¿h`T-=÷.°+Ä'¦bD§—€½ÓŒŸ!>@V1± *g¬±sc3>p\ °…P{â˜\ È{rz^ÓÖ×Ô!„Ë•l‹FcØù&^3p\’׆‰©ùŸ:ùE=WpÛ#É´jF ++“¦bŒí\Őlê2 ++ª©ÙV¡RqÐr BàNöù©Ù'µ°/j[רÐÚã‘ý|­øEf¾¬n‘5&€Ïÿù<Á—H ßÅōÑ$y<¾¶­¯‰cX¨Rç—Ng9¦žÑÀŽ£æ ¥1XDßÄ«FŽ]}£R‡®¬¢é=»âª&¹}'$ßy쾸DÝeÃ%$í¨DÄåÆ#×¹ùåDYE"^s ++ÝáÏJû;ösk—µ/øòš–#ˆsëÐðÔæÀ‘”ýÒÚ#ÖÖ3®¤úÍ‘ö}W1¦gʼn‡‘ûˆ²B‘ÿÛ({gÀÿ1ª³ ËßÞ³xR™\¸Íø¬TŒUq`­'äHð¸#NÏ®¤ªü’»ci‰ª‹õ—x^6 ½jê†á0 T1bÒKNé:º‡‘8²±¶ÍÛ¹G‰tãæ(]À¸E[¹ÇÈõ;ÔÜʋ÷í[û¤nty<^@bþ15YÂ5ô M×ÀÄ>+Q¼A ¹WŒH]=Ã>ÝÜ}Z‡Pùº}ÆÒÊy<94msøÌµ_Z~Õ9Bݛž\¨¤²é”îÅë÷T ¡H$x?BŠäÑ«;^ÿ¸œ›f.›M$ÀÿÖÔw€ e¿B©¡³oäŒ.>!£LÖ‹KÇÀ”_L;ûFEÒ¬„BÑÀÐ ©yUr·º?]\€\k­©SÓ à–V†«€Šà¼ã?èóŒÃ {}/ŽŽ¡\¡1èä ++©3ûòˆÓ1xDl‹qî¾}Vq­TÃáP© PX,º‘ÛÇ«bĦ—œ{րZz÷ý§÷cT1æ—(ÿ½g)1ðŽÈ€*Æ.@$›œ»óØý¨8;Ì5còÐØ4ÇwLY˛V²Þt]ňÍ(=¥ChëV\øØU >_@J½äni…ª.Nº’r®¸êÍ!Uë²4o _SV÷ïÛOւ8ÁdqÐ7ùù¥uW:痼ÞðùËÚV°EÛcôÆ B‰¾/ÔÚÌ>ØØ>DjhvÐ<&“­d»e0wä3^RÑtJ »AÅØÖfPÔÖ3|J ‘R$õÛ蔫P5—ñɹk¦.ŽþIR¿¥Ò™)ªO<;zFP&H$]7v&ø%¢Œ08}fvñG#1(•Ç즊‘W^VÏNTŒÿܱ¬mîùÀO4v–†ý­Ò™còk#öå?·¸ò“¾âZ¯¹bHêèe³9^DØ_\ÎÚ+Å#ê ç6gÿã |Œ*ÆÄô‚Äã”6>þY)T1vkq>Í­<­—ÂxÆVÖ·ŸÑ%hhZyA_ˆ-©eÕ̀ˊîùiªA\zé-3׉éyô#“2ÊOh`^5w¡ìÆû'.< zEf¢Ôö'V.QwÌ\&gÞz1²íÅç£1ÛÂòúkÆÎy%_¬>ͯÛÝeŠœîøä¼¹C(ØN6¨4†¹cèCûà…%ŠTäivÅm|]KÏùH|zéYBÍÖ§x“®D»cêjê:/MB·%æÝ0!7£J<3³‹·»c½â6Å`²‰>‰?èÙ5µËÛiïŸ|ÓÌÜ(×ÌüêKúÏD°lúí’‘ô ++JDÕÂCnäÒ]¿ÿ%pPéÌ1ì2£îíË#žÇã'çTç(å´6ÎÐ>Ä'6;£ º£{˜ó¡øüz£»{XÖS|¼úÆÌܒºµ7 …ÿ½g•Y\« ‰>F#:­øˆº-¸»Ï? ¾nè‚*Ænö:(õ8ÿëI-üÅûDI8 Ä4ctú’@@@@@@lI؞]:£ó‹Ëá)ö›þ ¨`·ÐÒ=tìže×À*×]ºûØãžìLCÔ¬¼GÇgeÓÒ9xNŸš[¹–¤#òiÉ9=»¹ÅVB ++~zݔ š±þs‹ãš¦cí͔—[Ìì/ÜìýQŽ›þ^Ý&,µHjԏٹe5+/[Ï8¹XÐÁ`°:„Þ³òš‘çwƒ&“mç—tAß¡£wDÖÌ–7œÒĠƤ냗³áóÅe*)(Ü¥ÕoÐ& ++*ÏhãÁeeÒãfJ ++ÓÅù¯P䳒-¨€~Ü0!ÕØC À£ÓK¥®ž½%$+“¬¨[4¥S1~Owý·pò;<åéL6àHguí$Vë€"U·=£C¸jB6 ‡'僻}¯é⚊ֆŠ•—®]Ðæ¢O .¯iً«ŒÏübêrL{XÕ¦¹kP~z•6GɏŽã“s`=¬/à9õòuÛ SI¾ kï=R‚>O`lbö®¹ÉƒÆœÕ%¤åUÁÔª2TŒ•{2T À–Ɂ©7Í\ӖõÓØãÚkY*`×Û¶ˆ›šÿIßáUs÷‡Q1ĺÉì=»ôÂW²Þ€^?˯:«g÷²V¾DKÇàE}¢OÌs©lPY[Ø›æ®ëÂIv Œ]1pt M£Ò˜›› ІF§®;á}6˜˜šÓÃø„gpåíÁkHÚ5òëÆ.©{$°“ ˆË9©…khë“:Yŵ€í—VÉ'eàZR#kˆÄÁ/Š*›.< F?-ÙÉc›QTsá>1.½”/ۉƒÁ`]3&[{ÄÊ2'g±Ø¾ÑY'Õ1¤á˜rňTVÝÌç+Äʗ–¨nQ¿˜ÁÖtó·|¾ ¬¦ìQŸæV(R›¢*¨·°¢Iò²tß 1.é;´÷ ïËC[ô6…r¥\=À)&ïãkÕ¬¢WÚ8?Àœ?<*&KÇÅ´íšÍ L̳2À÷®yëU YåŒ6>£°z/®Þ=0vQlhp^—Ð=0¶ªPRC{N×Î?&':­ø×ò´ø )ü¸:æ˜xÍUÇħ—íQ3>[ £oôœXHûaRV¡‚!‹KkoR( ÇÐé,æ»o &[õ‰§!hf~Y¸v„,§ª¾3"¹pmß+UśðäB¿äÑñ¹m4Åæ6·öËAD› ‡‘¨Ôõ*rEp­'nѦ!²ŽŸšYдõ1´ [¹5‚JBó.Â_ÓÂBŸ_1¿°â•qù1ç}Ç0bÁIÿ¹g’”þ^Š€/™T1wûù!ihlc¬ÀWoºÏjãÚ{†ä†çUŽÏ^7s½aæÒ;8¾afLV|FÙi-9ä©ÔA˜˜šÓ´öy€õWä-Ø«ûÅdÏÌ-ñxüõWa2Ùõ-=?8Z8‡£çs•Û—‘±™Ÿ IV®ÑrwªIY/Ïhã’s^Ò¬ †Åb•Ô¼9­‰óŽÌd¬30GÚû$^Ð'U5‹äaýåZ;/8Úù%¾¿!ve}ûˆXÏXCŠ(ªbЬûX¿cšÊ`ˆ}`ÈÜYĔíËZcMt§¿!)N•JÅÀ}É ¹"¤NïããžFc´uåW6:ø%Ý6u9¤jsT¬y!\Z gê>;¿¼×*(Žº¹|¯I˫ڋ«ŽN_3vB nÛÚ=¬ ?½Ê©bH& Ôæ"ѼÇöˆÈ`ìÙ­ý9«\ßÎ7á¸XŸô€8:9 w¨²Àç ý“Ïè|¢ž7·ö疾6v K̗üŽ#ƒzX¿_¹úÇeW4µ´¼¨~÷¬ÔÄ)ìcç´üª5{©*F÷À8؀U³uHÞÃ_.ïUCW^éëüÒ:¤”Õ©èÒ{ÿØl–4ϰ$¢ÒJNj`Þt *" ½jìtéŠ!ÉÎ?)5»B<¯3Ëcºø€èž>»³DœÃÀ.̗"/þYlnlzéyüC‡ÐÄÌò5­­í¯:Óó«ÌœÂÏèàÝBÓÖK*4Ó+úù1M¬!À/ú¹•w<(֛Šw|ÄÓ"ÆûÎ5`‚:{Gn›»ÞxäꛝUô ++ôýyaµ¥{ô¥€E.ÈK°ºe£«wô¤dW•ÿ‹zZ$Ü'»ŸÃþQÙT ±mÈ×üáW«ûîå.BÄZpãuôyFd\3%K^ÿ‚–R¸G¼qMÅ8¥ƒN-*©lÞ\J«ÞŒMÎíÅÕ'§À/¸“¿½gURÓ²GyX>㰚í!U›ÃâÔ6’;/ªÙ».€M _t–&ÖOç/)*ÖÞ h‰?ÿê`ýRó*¬mzzt381o3Ý`²8Æ¡NA©Ü­ìsØnMc—{ŒŠ¥çms÷[ÜÀPýÁ¾QV:UZýÆÜ9üŽ…ÇÇîw-d\ŒÓ:kïxbðS+¯¸ZXI˜†K鹕óó+«{·w ýb†$ÑüÏí'MŸ¨Š@ ++J9¤ŠÄz8­CˆÏ,g£2d@]úºº¾SV¶ä?¸yw-<$I4¬½)T†TZÛÕ7zÙÐ ÝQ LF~ÍÞ¥.–ªbÔ4v^¸O”ä²yLŽäòö–áCª;Áâï“Ðþ~ÒM%Î K)(®l–õ2âÓW1À~÷¦©‹2Å\+ )OÖ¢pq˜z¦dùVAaxNµ°qèš8)ŽªpLC HÙчÏç{G??ªŽÄÅøÙбohâSU1Ǧ¯9IÌ1Nêàý¢žóø|© a2ÙA ygtçõ잗¼v=ƒEB9‚Æà¾×ÄÆ¥— ++7É%‹Ë´G¤p‰OÇe}‡©Ù=Œ5#KÅ &0>wÍÈË/úùžÎT1 Š±Ã“ÅÙ£÷p»ÙÎÕU¯xPOˆOMÅkôå«¶ï• Á*Ø|Ÿ¿o_ÓØ¥$·§ÔM¬b(—A#ྊø`™Jè –£òQq`…SÚx§ ÔÖ®ÁÅE ++`§KËÔ¾Á‰ÐäÂ3ºvÇÄTÙ9ä)mo2P¬©gôìJ^6NK-Câ²G]ð”¬ké½fì,2Nhã´m}³ËêºúFgf—(úÜürgßhNy½ÆïmuÛ{O<¦föD> Ñ™*–’©9­w~ÚÜ1Úðÿ·wÎmdû¾è÷ð î}·êÝsöì}êÕ­:÷¾waãL†2™™Lx63Ç [¶%ƒldfffffff™-få-[‰"[’ãd’‰ìù}ªkʑZÝ«W·’ù}½zõþ>c~i£±kÄØ5L\Òtñ|Ç9¥?TŠñòð¡Y,},Uz…ühHhï“|´‡¼@Š)¸ø)—Ç7s ;ü®¤Oí)ïÖýw%škcºý?jvSɟØå$<ù-çø\\¥=²&£jùðQ£Oo™xZzF¹…d¼ Åübåû÷§Ž‡¿í×t¹eFœš]ùHm¥_ic~yáûÌÎÿ™EqyjëÿÔ֏Éþˆ—PkÏèmS/Žnåø\Ãåoî˜]Ãì|âM èç¿?qxõ®¦ËMSφöÁ4úm¶²©÷}WiªògN?èáÍ6ÞqÏí(—Ž*yô:jŒ&xƒöq/˜SR ÔΙ¹Õ‡7¶v‹.„¶½÷‘b H1 Å?ŝ\üÁ€ðÉ# ´|­ O*VŸñK.“Iþ»º¥Lüg üg"ÚÄoÖb±dv~퉽¿´Z–f¨6Fÿ½ôú•»^ý#3ãÖ ¹ãÕî.©^ކêBÿ˜Aª'f– °Ô¯uprrØ'Ò*ýÕ-':8§ÀéÙՏ×'/ÆÑU5õ^5ò?5òç}§Œñ¡³ïõô©÷H1¾{FɉN‹Í©¾t„¡äù±f¨=L1³þùü0Å Fdýf)FDRéWGWàM Ò'L1’‹¥OÒy)¸À)Fb^íWê0C‹ª åÕÍO2åòª@$´E2]ÿMݦÆ``þ…_îþ[Îñ‰ln–écC¾×sýÛ‡¿?uüÛÇË:¸gvJ|Áâ ++í£ž86‡÷ÄÆï‰­ßS;ÿS´Â“ñ܏ݯ¬®Û90ù•Ï7ZÔ‡òÔñk-Ì+oL`JimçáôŸÿZF%ôäÌ2:Ïì)ßéàþvԒE÷¥æ“ßKò§ŽéeÍê<ã‘€“¢ÃÀügµ{äªç¿ '*á*•P(ÚØÜÝÚٗH>@Ý ‰7·÷ç–7f—76¶vÑ¡‡:àó«´í½†ä”ÇŸ"ÅHÌ«ùëc‡/Õ`FŒŸÍICãsç %^kCý¦Æ`â?ã–àá;.$TÐuô?|ám„ ¡mîýÊòN(Õt>·óÿVà -Vná t2øäÄbqi}÷ÏfžÿD&‹#QòE8ç_ä_™b Òr¤|¡ùéo'¹¤…q¦$ñø‚óqa­²¿U»ç­â>c…^ï-Ã7\<"‘(.·úïO¿Ó÷tŽüÊ#6«ò'#BDZÙàøÜðÄüÔ܊@øÁ2€ê,:ƒUÛ2šWCŽÊñÊ¡Äæ¥äՎŒ/ˆÀÙªp¹ÀEök²¡PXXÕñùSǑ©ÅÜ*‰¤¤¶SÓÖ߯+¶¡}p_¡ÀÙ íÖµö[yDZ§çW`ÒðN ++êÎmŠÑ58}ÝÐméפkë[mȗ41êp;‰ox¶Ú?äñÁ:Óç/LWµ›ƒáñïbú:|ÃPŠÇD¦–¡jpa…ö7Ë“K4ì)¥u] û”5÷öIy5&8êüòà¬$’øüÚsœbH$“7 ïŸb oKmëÀeœ:ÌëyY7<1/Ÿ§/°„Ïæ×‘™¸?ªßM%Ÿñ›Cá;.rA§ðƒ<±X"9Z”Ç |AtZ¹Kð:mç,ûzµµSã>_˜WÑznjØÖ;v¶mŠ ªÚµÖ7wUÝá.¥Gq¶¤ãpue피{¯ªêåŸRý–Ò}œ¾¾ä}O¬ÇÎx—tå³÷Îësñq®ç34›ÅâàS±ð:‘bHÎöÀžÓW‘¨>䳟ÜÓ/W.—‘\ú«R ƒí’¡óz~þÜÙB?5­TOâÍI&ñÿU· ƒÿŒqG"8ý À[ ¹âšöòº.¡ÜLœ]ýÅ5‡sfJ$´ÍÝÔü:LpšsP*ZòÊ[vwé²5áäÔÒðè\P\–=¥½{dt|-ÃcóK+4ùI7QYH£íf–4yFd9¥ M‘ãòš:†EG9bÃúGgYy÷ Ϝ’/ˆW›.ÕAa©¥'6ˆþˆê½ÒÚ.bTöáQ¦Râ ++Ú{FUÍ Š6;7¿›Uéš‰šŠ£¦ÇdTŒŒÍÉ ‘H\Ýԗ_Ù¦jVÈñ饤ܚ©ÙUù §¤¦stjmaÿ€‰ºQڥᩥc“ ÒÍ¢ÿNÍ,‡§•:¦¸¥'N.ŠUT°he&“SRÝá›ë”ꜚZÒÝ?)ŠN¬¹ÀÈ)kÞÛ?|Ȩ€/léñËw<ÚꇾÁ)¥ý€^ìéŸ$Ç壍£•}¢sr˚Lö‰hm“Çå7´ $º¥¢î ++H(¬iîçóøª‚_ÐÔ>œT|´~ªwtvNIÓÎî¾üú3 «‰9Õó‹J·€:0£¨¡¦©Oö(ßuÚNr~íæÎÚÈÊêVdZ¹Kp6$-­¸µYú‘¬Ò¦–®YW£Ž:8`¹P’Zz ϼºnGç6·[206›Yq@gg"ÔȶžqUë Ý•7vM>+Y]ÛNÌ­v9:éèdWw ½«:³<ž ¡ui´‹ ++j[P— ++d;:l¿H„.€„ÂÛÆÍCo¾w«´·Nvû&Ř^XýîhÄ'ˆqIÃ%*³â<ÎÓ+æ2Ø1˜ØQ·ƒIúï¢ÙføG\0[[{&náænaò7ŒÏ¬|jOÙÜÚ+¬j×u 2w‹ðÈòË´ D\5$˜»G¢’Xºæi€ ¹ÿÂ÷º‰Çe]ü}+Ÿ‡6d´Ü³òÁ¥²9\éj¨b¬iéjëwÏÊۙï•ëžeêz×ÒÛÑ7~nq]±töK$„¤‹D'krT¸îәý£³Ù%Í9¥-}ût–´š=,¼§t\‚gæVåkÑkbôs’•G¤oD6)4CÇ9ðŽ))µ°N±æçó9å-¬|PS]’QSсa©¿¼ðÏªbsxÒ6Ø{Åèa¨›[»J{µ¢®ûº±{]K¿|ÂbGŠñŠÈZ\¡Ù{Çá¨äˆlOjš†½ÿCkß®þ ´MTü?³£X{D¢z§=·÷¿nä^ÙÔ«Ø9¨[F'MÝÂÑg1ä¿È¿Èl'Ÿ¸/|¼Â³¸Üc ¬¬mé:e—4-,mC3Úú¹R’ü¢r)mÈ7L<²ª„ÇûY F§•£­™¹…“#³Q{p~‰·Í½Ðç×äûviu KI¾kéeKŠAG„6‹6þĎlMŠQ:ßêΝ@M»fâ¡ëäž…Zާ$=µóC—Y×À›<¥¢±ç{]|cÛ Òî]YßzbMvJ•¥#ó·L=Q‡£ à™=åŽ mPӁâ’Žz­°¸´ñØÆÏ+.ÉCcsòQElfÕemlïð´üoQõž”_÷ƒ¾kDjÙæö¾ü®+{݂ÓÖi»ÒÃÎ+FKÝTñ„ËŠúîë&õÇS {¯X;ï8Ÿ¸ÜòïUoïî2^¢ÍÝ#û†§ ±Ô’ÚNYÿ0X.þ‰·Íˆ#³'êäþ‘Ùk†nNä„%¹‰HÐ:# Xªgh†|»º¶¥ç’PôÂ3*8±H:Ö@úÖúƆ’tEßµ{èMýÈ ++ªÛo»×´ôÉî}@½4¿´–Z2:½$[sckOË)@<|8PE,Ÿ›8øÆ›àCOô!‹Í5!„=²&×·õóäæpÜÝc„§–FÅoR =|ƒŠcu}û© Ù#8M>Å@•^Poá™öêÜ¡&ɞ¼³¸L{(—b0ìÁ‘Ùîþ {Ÿø‡–^]}²ëb´ƒÉ¶öŠ}îHAmSÚt,ƸP¯ØÝ=ºª/WHbÑ#[ÔÎT{ß8´;±èÍHŠôÂúkFîè[Ã;þl uÚºòõ°Ô‘ñyùa/Ë«¨KŒ°!ÒCC5<>ß78å•{ë(/“µ~qãLc1ކ3‰õ0ÔKjðt’Kš.–žQŠ™åy!^`‘ÿ¡†O*aGÞ3·àß9páSŒ„ìªo´±ž¡éŠó\H^†¥•~«ƒëš–¢Ó+~ÔÅÏ,¬)îbbzéª!Á;"[(PòKÁ™…Õë&žÞY²ÒZ(¹$Ã2íV"™]X3÷ˆ4tnî¥mí¡Š„" 9Þ=2>§ô#MÃ—õð¹å-ÇSŒòõ”¥Ilnõ% çé…Uåÿû/Äܵðý”ÖÝ}Æm ¯¼²cõ!jž™{¸5 œ²*•‚ƸŒâFùbØ/6˜"x]®«š©‘ËáyEå<´òf±ßÜö‚ JýÙÂ{emKrêĜï™bxÅê9mÏbm}[Ç%è{%ÏËD-·óŽÕÁËnáA¯Ô´ þ ‡ŸUvÎajÃ`¡¹¥¼~*íêږ®s{”ÒY<ЋX¿Äç6~+ë[¯s¥µ›FîÉ%<þi)ÆÄÌò7šÎåM½JóÏcò ñ¡¨ö~Õ Úö—σŠx‡Sf¼å²|ã–ñ¡ yaIùTJS æ©)ƃõÐÊÇÞ'^q8—Ç'Çä?0'íìÑO9”ÐÄâ›&žóËʟwYÙÐý®º©OöÊøôÒemLuK¿Ò.Q¹Æø°Õ×gêMбú^)†otî—j0µá;]Üm÷\ÿM*Èf8ý'5ŽÁÍ·•yðO¸ð)Æm3âôœòÜatbኾkLVÅYRŒ•µ-CÕzʽúÍÃhwIÙÕ¯Êãé¥ Ý:zÇe+…ІM{ÿ…å ù ++¿ohú™ehtV~ké¥ÍvÞq|ÁÛoÎ®BÔÎëí÷Á RHú[?øÞ)Æá ++…Ù"Ð+Qh×,Ž’.ò‰È~lçO{=ªo݂R5¸\å… ê«Ì²æ§ÖdÙóbÇb¸¹Q’¤ÆC<~TZÙU}×¹¥WÅ6msÏÜ-\Ç)`aiý”N@ɏ†„éy•áTïàÔ] Òðë,,³¨ñ+mLÏЙ¦>xŸÔè™­jƒï‘b…¢¬’æ¯5\ºÚ<¿´~ל”œWË?~?È !‰Åw-½e¡Ã ¨P Ÿ]þ&­‹Ë¬¼fä1»¸®ªKяö+»7êUŠaô^)ºì4œ/©ÁÓI¾Ðp!†f°Þ6™‡šoÏ1=þ‰ÿLí‚ ¯ÿ!fmÃ?uRŒØìʳ¤£“ 7 Ý#’JN©÷6·ö¶òÆ¥JÝ74uY·²þæ¼é ö-c÷ìã£÷Q9š˜ScMŠ9ñ¢ot®üX eÕ D <•S…j(YŠÑÜ5ü½.>[ndÇo™bb‚¹<%cÂSJîZz-¯nÉJY]§Àgv~q‰YUŠKRV5),ãª~q•vÖ#µìšÛœÜ(††ö¡ïõ\Ÿ;Rò+ZéL6ª–G¸§þdä–P¨´h‰N)ý^×Ù?!]ß+< õɌŠ!9¿>ŸmJŒÍ¨ø€)²FÛ¹iì‰L9qE¤–Ý1õÜÚ98ý(ޚb|­Í­x3¦É5(嚱GXb‘ª.M/G]ÚÞ7~,Åx¿±ýcsèk¦c1¾ÒÂö OKÎù]>›_IR»›JÐâöo‚Ž8ø§@ŠqÆchb½žQÐpJ¬ ŠïÛø:ø'ɧ4¹‰![ºFP±Óü©«‡Q‚_¼_Tα„B"q&'PSJ' ++¤3X¨y½#35]ÔÄâà„"c·ˆ¯µq²£¾}ð²¶²¾[ÝRŒ{–Þò)†¦å±5Ù;"Ë72[éâ™E/”Íšñ®)†ähBk׈)úš¡û-cžQe ݇7ÚÈU𨀤«†îÔ4UÍ@‹LÞüë„àÔ[æ¤å³ÕÛj’b  ›™uǜ´q4å§ôE@ íè“'Ûû‡J10”¤ŸŒÝ=CNëRJL®¬Kß?Å@GNŽÊýBbhºh9m)Lr —º™žÿõðá jõÈUÜÙIšþµbœ%Åž˜¿ªïšYxZŠ!Šؒ)ÉÒc`dæ².nyíMíWPÙvYÏuuýذèՍmT"f•4۔X¢eOiè’qsk¯ ²ÝÆ;NËÎß ø\ ð¬˜œj ¯Ø¯uÞ¤ ƒßéâ*ë{Ô<Åxnïï䟸Öç9Ȫõ÷‹!+ò[»GãóëL áh-ǀŠÆ^٘,%éòÖöþ[›!m švÛÂkå\¥HWÿøm3RDZ™ìƒùå-ẅ­]£o=Š3¥åÇRŒGv~§<ôäę}ÿ;J–Ö6¯ha>ù@ ԀKΉEã/S1s›ú÷¯j6ÇçgLŸ¿—zá_;)ÆÙSŒôüúSî(ð…÷mÈN¯RŒÙ…µ«†„Öî1Ù ++EUí—õð‹+›²*­Y×>øÅs瞡¹pA”^Xoå³·÷f.ÌíÝlÈs[¿¤œZT»1˜l‡'mLôñ;JS ly]—š§ö{r‚ìY­oõÞ)† ‹ÅYZ¥ÙxF§‹+o|•òà’ï¿ ¯ŸyNFwjÚmsÒÒÊ9K1'rÂ{Úæ®ô#΁ö>g¡ÿ®c1pImýh›{gìÒ÷‹‘YÔø·'ê0¯çU#÷N¹)pÎ5tmq‹]ø?«Ý“J0ÿ™ßõR"‚ð¤oM1æןÛú9{ǝòØÎÞþÉÛf¤¨´Ri…Ig°¼ð͔›Ÿb`töKM—ÆÎaYŠÑ72c‚ ¹nâQZû&t™\x`áÕÜ5"_¦ZyDZ£WÖ¶„Bщ[¥#3¨ŠÍSóÚþÌÞÿôÇ ~ØCZ ¡=àBŒñ¡ÒW’rj¾×wš]>c3b2*¾ÓÎ͝=ÅhT£¾mðŠ.¾é躟ûZÓ¥¦eàƒ¥rc1sª¯fçWÏØ¥ïŸbX“b.©ÁŒ—41Ú.AÜ3_Êêcˆ–{YÞÿSýžTò¯ìèþÁb¼5Å`±¹¾Ñ¹?ê»®¬m©*“ójo™k_?r•¦8J1,S6£$ڈƒW¬.–šUÒTZۉÖáYÕÜ”PdMŒnéîšNɯ3u -®éßüÆqtrñ’†SI½²á’—‘YÇRŒí}|ès‡€ýÆ[S grÂsŠªYÊ뺮”uWUKÿemìÐøœÒçq~¤Cºkr\ÁckßW};µxé¹S^e›P(µ°á,ëW7÷ý çZTÙ®ô]t-=±öýmR ´6ƒÉ6À[ºGжöücóŸZûî읩|×cdjñ+M—‚êv¡ðL¿5ÿ;Jn›“>ý¤žÚØ¿=uLέ¹`¥²ã«ãŸ®ÑÆÎûªü)ÒØ1ø•&%¿NéÔ;»tM§@ HÙ+¨8oîznO’û½=‡Ã£&>²õ{lçO¤¦IuªPJLÞ;¿¶d߄^…´v~¡á,» âÄ~ü’¾’›ÝIͯûNW\Û‰Ú Q¨áYl®´¼Dï&åÔ\7rïUöh6‡çšñ½¾k½Üoì?TŠqXïoéc¨–„ˆ}:Sé B§R —¼ó잉H$V,wDb±1ښ-ýãéìÿÜÖoéõÃPN@}%ÿ[vÔZc\Èck²üãråƒ!“-ëÌщù»^>áY"¹¤Fr´ 5;‡ÿùԁHMÿµ)“ƒ H‘¥²)<ã›úÎá¿<²+¨n¿oáUPÕ®x%£s§…¼ë%¨K|ãŸÙø-žÒ¥r º¨Â’JäÇbœ±HýÃWj0¯ç—Z˜Ë:ØùeÚû+UÐÏpüj—bà?ã9KDBø7@ŠñÖƒÏúÅä^5pË)o=ñ*ð!é?º&ä_‹Å6Ähÿ¸|ùáÒ-¨D—Õ¢Ò9ÜÃÂ]!yxIÛÞûNK Ïârð¨öŠÈ|fã{E/Ÿbì3˜&®¡7M‰Ã'¶Ö7}ieS1 XÛØ¹÷Âû®…×+Ÿm…çi K2(¡ÐÔ-¼ohJþŠ}×±¨móKëw֤̉èE…êþ¨KËÑáËv.˜¸¬Êo´1#“ ÒWè öøÔâ[éüáŸÏÕáé$Î~ §ßÌs‰6ÆX>Q·'• …ø­DȇóÀH1Œ\ÃLñ!ÇRŒÌʛ¦žSs+ªRŒËºø˜Ì ++ù#*­ü{Üô‚òàU§þqùwͽˆa™Ù%Mmƒõ­ƒÑéeºê-3ÏÆŽ!Å_n·öŒ>±!£Õ„¢÷Ÿ“Ž/斷ýdHð‹ÎiêZllBµ®µgTNY * ¯è¹Ê§¨ü›[\3!„ß³ôöÊÉ/oml,ªj÷É{lëçLNØx]~£5sʚ¯ê»:’ãsJÑ Õ6„'—h;ÇeWµuÞ2#Ö5÷ɧv¤}—`%)ÆÎ™{¤KÒ#,©øg /ùCÚá…Uí¿XùáC#ÓJ+ë»Ñq¥Ô{Gf?³ó·%EÏ,¬ËâƒÕµ-§@Wÿ$¥)ÚidJÙU=üÜëçw®Ñv, áOlý’K«›z›:†k›ûcÒ+~yáë›Og²å» &çÀgö” øÂâêŽæÎaԁñ:.ÁúÎAm½cb¹Ç‘ŠD¢Âª¶{V^zXjdjieC7:¿©ùµÎ~‰¿Xz%ç×ÉO;9³üÈÚ[Ë)0&£¢¦¹m9½°þ)ÚÑ7~hlΚCJ}“bŒÏß4ñˆI+W™b,Ñ~±&“Â2äS Ôþ’º®oµq¾ñ5Í}…•í–žQèå N̊ZWpIÃÙ7:Wñé0;{ôoupŸ?wrö‰“¿$$¡èŽ9IUŠÑ38õ¥¦KŽ\Šq´#qk÷¨¶Sàs‡€`Y—–4Æv©žS`{߄ì¨Ñ±ŒOC&naÕM}åuÝ.”¤žÑtÆ[’?|¡ñ‰Çb|©ýZS«bâ“sMÂ9àØ3ðŸ©]Šá÷¤àØÛ£»‡fÃ2ä+·‚Ê6sÈÅåc½gæWuq!ù•oV‚œ²C|ÈҚÊûóQu‡Ê3¬ÿáSîZy£EÛ%È?&wqeCéÍÚ±$¯¼õ¶‰GgÿÄ)Ï79ö‰„É䜨ÚoAE›¦s0Úã½ÞOìý=¨i³ k¨‚š]6óŒÚ?8YòñùÂÌ¢|Èý¾èS¬ÉÎ~ eµJ&×hj2%„ßµô¹kåsÏÚýCS¨Èœ_\7'FuõË‡”˜<·à4ÅI7ö÷Ĉ,Bp*OÙaf7ZyÅllî*ìKt‚È‘9OîZzß³òA‡† H.­îÞy!CÛÚŦ„'+í4»´ÉØ5lE®Øæñùå-F®¡è¸텏 !<¯´Eq¾†Ãñ {Œ°Ä"mlðÑiõAç×Ö;6-¿–§â¬­oîy†¤?s @›ýÙÒû™S9"gbfYñ*ØÝ§»SÓXûJ{XKÍ*nd±¹,Ç/® "¥TVÏ£kÒÌ=2ÿx"pl§ëÛ¶>q1Y'.t®sªÚ kÒíÈ+,sEiè y™’Wû³…Wïà”’$ˆËwñOºgå[ZÝ)’Km2 ++,Iћ[Êã2:¹¨áT-7`çõ®^îvi±6&XÚÿ¨Kí¼cÓ êû_$—Ôt½øÖoÊþùÜéÏ멅ylë·¦"à9 ÷ÿ¢nÃ1X!?BŠ.TÔ E¨òŸøµâ=ä?"ŠägŽ@¯~Dél ++'ö%mo,,­/.Ó8<žøø~OÖaauÇÏfİÔÒý}æéÂåðZ,<£·f^D;A¥/ÚéÂÒÆÞ>C¶Si{”6­‚ÞZ[ßFŸ¢mîŠDŠe¼é=´Y´ÚÚÆ6úYՖ¥“MõDù)‰U´Då¹8ܦøp:‰£lì0›)V²}‘Â)~yê¹;zQÂãñ—Vh‹K«kÛ<ž@Õ} ++Ò-p8<´2jÆÖö¾HÚÕ×zŸÎ`¡kõ›Í=¥mè­õÃ fiµGºÙW)~Ódi‡Ÿr'…lÅ¡¾AmF-_§íªjÉþKâ엨ªè,¬®oK[÷Žß#%oK»”ËE]º‰|{Gu—J^¢WÑUÚ ‰Î2ã®è¹~êI1\¼"³Ï8?í¹#Þ_e]fªÑpŒÃwªmí¿‡ó\PÊ% —/?ÅÃVÑN?×p~î°¼²yqÿÞ ‡‹˜¾eàþøÉGd0🡅¤)á1ᛠÀoƒÍáeÔ[‘¢¶ðÒµ÷·óŒ¶%Æ 㛠mïÿÔÖOKMÌ©^ߨ¾“ɱ!Fë9mîì]ã€ÎL)¬¿oNú¿Øþ÷6dùÿØüÏGvyìpÊò¿Ùýí±Æ?inqí‚?õS"Nղî3þO†Ód8ý_ʖÿÈpü ûÿaÿ|´åcúÁ¯ö3·_ÂcVøí ++‚C\.o~i£wdººu °¶«°®«¥g´tfaiƒÉâHׁ¾\ÏàÔuCBUSâ/î×ZÝØž^ZŸ^\ûPKkÏXECOec¯Ò½ÕØ>4³´~@gý.¾«b‘ø`M´1&.(Y† ++„Sµ¢õÑúØG[FÅ»‹¾Æð;–T¢å´¶±}aŽèpR€ ibzinaý"€¸˜.ÞÍJbà|€ç¤8 ÅÀù)ÎH1p>@Š€óR œbà|€ç¤€ú‰Dt&›ÃåK$è H15%‘Hº&ôœ‚âòù’~g) ¦8\ž[hú_Ÿ8Ü0õœ˜^’k|f)6«’ÍážÓCÛÝ£Gg–ïìÑßéSbjJ,·õŽ¿ðŒ¢&s¹<ù‘ùÕíwM<ö˜çôЦV¯ëãçW6ÞéSbçOBAý}sҝuNÛß54}ݐ°´ºùNŸ‚8’ŠÎuŠÑ;:{R :}ÖÃ3>î ++Ô›ÍuÍx`áu"Å@W칸h¹<~\fÕaб)àBc²8]ƒ“»û ±XI¹Æš»G¶w•OÈã z'WÖ¶…"qßÈÌìâšH$R\M"‘ôÎ O.(݈X,žž_™8|w~y£t†Ï*]s•¶ÓÜ3ªê@vö:û'8þ)‹Z"Ǧ+šûòj:ÐR×14¿´®ôØ¥6·÷;&J{r«;Šë»;ú'V×·å?9»<<1ºBéÇi[»Ýƒ“›;û²W„BÑèäÂÆæ.*ÙîÐø|Amg~MgKÏÈúƎlµ­Ý¶Þ1´ÓüÚÎæžtìh_ªÉáòG¦ê;‡òÐú5­½c‹+hG'kuwplŽÉæH›1»°V×>ˆvú¡¦mpi•¦jû‹Ë´ê¶´q´rMëàÀè,GÙ,˜¨o§çV›zFС•;G&f—Ñ‹ª6+ˆfV›_¯Ú€¶Ì`°å×ÙÞ=@¸£â ++ds¹è›œ]‘Á«ghŠÁ:ÜÈî½¥guÚ>º®xüÃkƒÃá ŒÎÌ-­K£7ô_ÔûL %ùK¯Å卵õ´ ³ŒZ‚Þ]£m£“ÎfsO‰êЩA\X¡©Zín|z ­†VØÝc æt´ŒL̟2¥¨@ ˜š[iê}Õ¥]£èzãóò—´H$f29! E7 ƒ£³²öïíÑO¹f ŜK¨l{òÂ×/2‡ËSRÿ×· |©éBŽÎa²8Šï¶tþdH¨lèE5¼1FC]ZQò«àµõíûV>Ï( KJ¦d0ÙºÎA¤°,ôsfaÃ=s¯š¦^eÄKïÈ쯵0MíƒJC‡€Øü'¶~[ry¢•ÕM÷´ïõ]ZzÙ£¬Ü#¯¸Ý1'U5õ)]¿¥k䉝ÿ7ÚX# ÕÑ+ÖÚ#ê¶±Ç+ŸÒÚNÁQF€ ++H'r¼!.tskOé*ê»o›ëZ䞣wœoT.ÁÆR’ìˆÑæ®a?¸j9NL-¢uFfŸÚû?µöA;5LJý¨‹¿káÕ;4­tÛ»t'râO†nN¤{bŒŽ=冩gDZÙÉhmËCÍ/o¥ÓY!‰EW ݍ\¨Ž¤,õ=ü=KïâªvÅíV´þlNºoNDíDí1pBâ䗰&—¹ ,6×?&ï'#Âs2ZµÄ|ÓØÝ-8Ëå)‹”^†$ÿhH¸aèfã…Ö7r ¾ªïj恮YPÕÜ{ÍЭ¹}Hy°µ¾­açO IG'BúÊÈÄÂ]sRsÇðìš6䒆óOúø+ÚXsb4j!Zai™ö̎BŽÌ‘îbxr^Ë%è±åC·ouq·Ì‰·-Hh¹fìAM*Fg¹¶µÿ¦±GFaƒªDm§²¾÷–±G]K¿ª#4±H‚¾bõ­ÏЙ}ácGжvüÙÄ㊾«gh¦òŒ†sإ׌Ü5lßté cw5M2ÙBćÔà¯u°7Í<¥í¿nêéBIf)ûÚBŠ8ÇØ!4~ %3£s¯èáXx­È @BE]dFÅnËk›¨†¬møFù½\q#]ƒ“׍ÜQ™]V×¥XÎ-m|£…)kìA\^¡Ý·òñŠÌ–¥òkj;~§‹wö‰;ñ.z‹Ãåi8¸$óx*Çbìәz˜`·°ê澝=:Á: ³Öh;>‘97Œ<Æg–äÑPTVß}ÃØÃš6:µ¸À¤3Øhý͝ýÄÜZ[¯˜ÕmiŠaë­ƒ >%ŸvTßʧö¤gÿ$"5-2µ mGºåéù5=—`gÿÄ©Ù3|X\VåÖÎÁ«·V]CŸÚùÍ̯¨“§çWےѻ]“{û ´>Z¶÷è5Íý”ˆÔRùßÛ¯®mé:E¥–a)Éî!éӋҏ £Ÿ3%„Ý4%ŽÅ(¯:A$jlºmê™VX¿µ³z ­¼»ÏèìŸp Jé›}Ó·Lk¯è'v~UÍ}Û»téšhã=CSÆøP%ùĝ|”ú³9)µ ny}ëh}ÖÞcbvٝšNM(’ $)oì¹¢‹ohT•b<±!{§É¥ó·Í6˜~|ˆ '°&F»…mlö¨‰EOìüC‹¬ˆÑ•½´í}é™E{N,¼jàz˜˜´²»G·"F¡“Ž.×í½×]zÀèœBíÁú'I¿°¨'k[úQ³ Ai7ŒÝ³Jš¤í/¯ïîšQ)àÜ+«ïº¬‹ëì›8ñ:ƒÁ2À‡()× -Ý#'Þ¥mí™"]|⥃8ö˜×ÜÃRJO¬&¢²*PÝn„¥zEŒ'x|~TZùC+Ÿù×Ã4É šÎÁûÇx)‹'¦5(8¿Ä«F„.P©ÙÞ=vۜ¤t( ªÃRJ¶wN¼>·¸~Ûē’!_ðOÏ­Ü0r÷ Í`³OŽöG‡Àfó¤50úÙÎ+FK=%ŸnâQ<ÅpôŽû^Ÿ[sâ(:‡n™xØzF‡§” ++×´Ãc³·L‰qùò)*kÝCÒY“§çVwªß›&3Kò)†žsðckŸ¨ôr†BM>29E”PÄá¼:±½½oåq8JBöŠRèHÊ­¹mæÙÚ­ä~Ÿñ饯EÕír—„° ªýª¾[©BªõòèV¡àMí]ÑØó½Þi)ÆS%)ѓšîž©4ÒZ\¦=´ñó ++ϔïIt€¸ÀÔ_æÅJ+¨û^ß­­gLiGg¿Õç֟ÒE¡ÉÅWtñ6^1S³+ ++ß2660ùG·Åš|—&æÔ ËRi—ŽM-þ¨ïZRÛ)¿~xré CÂòúÖ;}÷!Ŝ?3ók¨ÒK.V(ƒ{ïZzL,º†»¦œxwh|îš!¡¾m@úû^TG9ø'ê8*­3«[’òëï[yŸ¸o…ÁâhØø#²eCßó+Ú.ë`痏Ý{Â]ý1A©½ƒS_h¸Ôµ œ¨ÿC’ŠڐWÖN+áÐjJo ++@»&„fhØùÉ·Í;*ûš‰ÇŒ²tàD¢ñ)†ƒW¬Žc âÍ/Ë«›šŽ?fæOîW,–ØxE£í0d/v L|¯‹Z”(»ÅfŸÎ´"DÃ2¯GÙ¦.A–náteãnЋvÄhǀuÚ«[E¦V¯¸Å¤Wpy§Í6²¸ºyY “_Õ.)i/ðË0#DÈ6»¹{ð¦‹Ghæ[ïwxïãgKҘŠyX”¦Lç”cï€yÓØÃšÎPè7‘HšVvÃÀMÕÜ1¯RŒ¤âëÆ¨?•ÞrR\Ýñ®¾uP¾K¿×ÁÕv¨êRRHš¹{„l".÷UŠÏ(\|¨¶·&FécC6·ÞÔՁÐ/.ßÊ= ++Õω¹ÕŽ{ûtY †>“Yußқ)7T!¯²íš‘{ÿð´|©ÖÞ7~ÇÔsckÛ÷-HSs+òQBïðô×ÚØ:¹ua…†*çÄÜù_¤oïÜ4õÈ*mf²9†.ÁaiÇn”Ø?`jc¨îÔ´÷;|ÔÚȬJԌ½WÁìÑmXJâ['G|ïÃCÝÙ=9*d{çÀÌ#òŽI:ç ¤ÐŒ§öþ²¡óâ‘ýÔÎOÕÄ¢B¡09¿N˞²±¹#Ÿb¸ù'³•­àry¨¾fà6·´.}emcGÏ9ØÌUå¬Ré…õWô]§U$>¨[»GXyËb…ªöKš.m½cg9;ï‘bÜ2õtNSuîÞ#Å@߅茊+:8Åj—×¶ېC‹x§=¡‰Åw-¼Vn˒ê˜üJ›[Ñ*{%­ þªÛì⺪.m;êÒщH1¿Gùå-׌ýon*ÙÚÙ¿gAJ-j@?Ï/®?´!U¶É ++?¡Pdˆ Á§Ê?bbzé±­P\¾ü“J…ÊרœjY:¿B»ià69»,­Öœ|ã½£²e…ßäÌòUƒÃy äKAô3>8õ©C‹õf€*›“sk%GÐú¨¨¦3_ÍȰ¿Ï0!„ې¢åç D«µõýó™ÓÈԛ ¢³+1TTÛ£wÙ\Þ]ÏΡ)Y¥”_wÛØc}k÷]ò‹Ãb~q½¬¶³¼¦CI–O1êÛ.ë`+êÞ‹|ŒÃ£<ÅO)¹gé½¼º%K1´(¨b×u ¨Zt(–®aë´ÝwL16äPTRÕ~ÃÈýìã>¤àô¹…“c.pÉWô\u)§´Ä<úz ƒ[pê-sÒòʙêí÷¹£Ä”›QñaS ‡çèŸtϒÄfóä¯U#ìÉ,JÒ}W‡·t):Øc)†¤€ß.—çä›h„ åÝ©Á?œË Ó#,C6sAvY˃¾›Û¯jõ°Ä¢§vÅérK›0$ ¿.®*ê:µƒV×^UV“³Ë¨Z“=©¤³o⮩¼®ëÄàÿ¹Åõ릞Ñé¯ ++ÑùÅu]§À–®aéPôÂ+UÂÒw·wPÙïž%¨‚jÔ¦®aðLç`# ÕÈ5ÔÎ7Þ;:Oß5ìk7)FCÇàwº¸Êú5O1žÛû;ø&°9<´¾ª…Çã˺÷=RŒ—GãnhÛ{ùm¤¨\ ÊO®–‘]ƒ“â×Á–’tßÚ•è§7C4¨i·-¼VVÏSŠqxU´ Þ4ñLÉ«“}°®©ÿ–©gMKß[â=RŒ‡¶~kÛoéÒ×gR Àï‹X,©h꽪XÁ¢"Åæ>±ö­–+ÏW7ï™Çç‡f wŸÚù¹§*>ÇqecU¹¥M¨ªDÛôIÇ&ËÕÉ}—à€„‚—GC3bskîZLΉ)ù<1ÚÌ=B:×@sÏèc+ožÜDiōšöiá::µø•†Ks÷¨Ò.eÐ.èL¶GhÆs[?|pZycÏØäÂÊÚæþ>ã¥X—_û•–\ŠÑ>x.ÆbhØû;ä{ætï—böÞëfæW+›û4îZxÉî?ÂR’X“OyÎè îÔ´Ûæ$فœ—ƒÃæZzFêc©ûtæÑyä›ÂÍÜ# ևI1*Þ¤XJâ#[?کӑȃ;J¿;Ss+_ø†'—…¢Ê†žÛ¤¹™ è ¶£o!0…Éä ŽÎ~wø G%Éd±¹¤XS|*ÀV7¶¶ðÊ.m’ËJÄéùõŽÓs+h/Æ®a„à4¥‰CJN ªØÛžmI ¦ÙûÆËϵ182ûØÖ¯´º­^ñ³¥÷ÊÛ0‰êvB`êá„ÊfLŒ>>/Fÿ𠪄ӊÕ;Ő¸¦h;²O} êI1=³÷·ôˆ”þ—YyÕÈ}úló\ Qéå߸¼ž™ò¼¤HVqÓ÷ú®ý£³G;ZøNŸSÖr–£x×µÿº‰çœŠÙ=!ŀûHÜC3´œ‚8<>%.ßÑ'N~¨ª™Ë{o6·÷³Ê[~Ðů({à‚X,n깬…›YFÅÞ]OùÕPÝÈdqXxÕ´ líüdàVÖØ­4ÅØÝ¥ß³ðŠÉ®BûÕv Õvȯ ü1þ‰"¡ÈÌ=žÿÖñ“s+_iºäUµ)}ÔetNµ|б¶±£éd€ a±¹oM1|â4×TÌïXÕØsÝø£¤è@Šj;¯èàf×O‡òî)Æ[Šg±Dâ“÷ÈÚ÷Uè3:ûϧŽ•M}²(át íCydWPÝ!>Ãú•‡)†k™²ÈLšb<³õûÍRŒ½Æ32†’´¿ÏˆÊ¨üÙÄãŒÃ%BßñŽ’¾‘™KϜjZÄgëRH1¿Gùm?{TÔuëb©¹¥Í¢ãÓULϯ¢z2·¬ÙÞ7çŸÄæ(¯ðQ]w͈›Y靋£$ ¿—gCŠö ËHΩyjç¯8³†Œ½WŒ1ª ¢MÓ)`|zñÄ»é… ·ÌI%Õ¿X>Å8Œ R˾Óãz[1õ@¯p¹<騱X™ZvËÔsxlNY. Œ/øÁÀ­¾eàƒ§ÈÂòúc2!0…Åæ*1¡Pn Ë»ŽÅ@Gªôy¥b±ÄÉ?ÑÌ5LúÇÝ}†1.ÄÔõpŽDEÖ#.vöèO¬} °!›;%£KKÖí}ÃÓ·L‰AqŠ-A¯tôO|«…ñ¤¦ÿÚƒÉÁ¤¼5Å@½™_Õþùǚ–þG/|rªs‹úœ¥ð ÜwcoŸa‚—v©òûtÐñÊ?l˜Ëå‡%•À¼€ß—…¥›fD3\È3[?Åê•Öz¸#—àû^M#ª~ñŽ*|[Ÿ8Ê3kß¡©Å²Ë[±ðÖsðÏા"¿ºý–‰‡¡SOtŽâ»óË´ËÚX3lÈ;¿­í“¿ßÞ£{§e7ÊîCYYß¾¤á™Q!:þ<Tÿ'ç×]Õçï*Ÿb ŸXyKg0=Ñ´í½ˆ”’…£§Ã¢·†'æ¿ÒÆøÇå£Jòx8ÂJɯ×w øÉˆð1ÆbHUw|¥å’VX/ ++OD-ûŒâêNTÕËÚÿ®)ƃÕÙ?ypÀ<ñ$ší½ƒ»æ¤˜¬*ًÓKW Ý|£rPi}be6›×Ò=Z^×-{ýÐ=4õ6––yâ*‰ÅCcs±ù²Ng¹…ß¶ -¯ÏÑûFf ]‚îZ~ýX ‹‹J»kê)»PofC ++߂ùåÆîZ7LßÑ3š˜W£á@1Ä/¿®H…BQTzù·ZrLncÛ Zmht.«¤IC¥Äæ×·ôß2#}¼´±Y7Œäø‚Š6T£ãªiêÉªÔr ++Ôs ++™\==÷]SŒåµMmŠ–sPvIÓÀðÌøäâÐèl~Eë;JÒÎ]öYÔ ¥u÷,HøÐô¢úŽÞ±ñ©Å¦¶¡”‚:S÷ˆG–ÞUÍ}òµ@(ŒË®F§ÃÚ+&¿¼µohjd|¾º±×;*玩gPB‘ü‡Þ¡™«zx3ÈâêŽÁÑ´åšæ^l`й[xk׈5)æ×§¨m¥MŸ?sò‰ÊFGÚÖ=Š J!…e*>B))<ë‹çÎnÔtÅÙm·w®èáѵ‡Aß ¹wß#Å@]T\{Ø¥FøÐŒ¢i—¢ LÚ¥-¼ªåºýÐ;<ó•&ÆÉ/±hºg`’ûÂ3êàm3BŠ8ÇP¹[V×}ǜÔÜ3ªtþˆ½}††JŒÌæŸ:Åìܪ.ŽJŽÉS,¥Õr@bÑ}²ÒŒCæ€Îô‰Ê}î@ÙÚÙW¸ˆ;&o™“ªš”<ç²®mð–)QÇ%xrö͔“L7,¥ô±­ßsÇ}|ˆ.–êäWß6Èbs‡'4ƒÐѝ¬xWhqùmÉÒO¡êÝÔ5,*½«ò59ÚBzQÃS{ÿ_¬}ÑjŽfnáù•m,gvaMÜú:L9J1øž!>q» ++»C @Ź£oœÒ#9·F²¶±­ØŸm½cöÞ±mÈOíüÑÞQKÌ á‘©eë´ùvnÐvì¼b)Q¹)FJ~†½¿ì®¡P¸°B I,Òv ++|fOÑp ++|î ïB¥&ÉG¯ªnptjüÄÎu—¦c ê.c|¨_TÎ̪â•ÀfsÑ´òˆ|„šmOA[FÿuôŽ«négŸŽD( MÌ[xDÞ·ôFÍÐt ++Ôv ++ˆÍŸ[Ú`2ÙègYŠ19³ŒÞÍ(¬Wu]­¬nÂC’ŠO\áût9*÷Ž9íµÜ†Ý50©t,CZ~Ý-SϦŽaÅ·PËM á×L åñ›.õ3ƇøGçÎ-­ŸèR_›YyÏÊ뉽?º ÌÝ«[ú”~!Å\’WÞv|€‰$ 1¥Ÿ:ºÑctnƒ¶+QX…Ngv÷MôöO®¿žvTº}ñ©í˜ï雜™]=½h³hŸ\P<ÒcGqô¥û“ÎÓ©l2•&;ÆÝ]FoÿTßÀÔúƎŠþ|yz‡JTô8›ÍëšF›ãry§Ÿ ƒs´òäÂÙ±±½}Ð78…N ++“Á–?ES‹Ò>v#Ò)PÕN±òþG>¿°ŽNôäô²ª S9®%1ÚÂ3ò”Ã9>IÊ«swú“€E*Îûљ̳v)ºªQûQÎ8á+¤À…55¿ré¹sauûÅ8H1€‹‰Íá‚Ó[û®©¸7ä܁¸˜fÖî˜z¦Ô+>`õœ‚¸˜‚ ++ošç–Ö/ÌAŠ\LÝý“݃“"e.9§ ÅÀù)ÎH1p>@Š€óR œbà|€ç¤8 ÅÀù)ÎH1p>@Š€óR œbà|€çÃÿɵ5ÁôDFIEND®B`‚PK ++!|Z<??word/media/image2.png‰PNG ++ ++ IHDR-›QÑ pHYsÄÄ•+tEXtSoftwareAdobe ImageReadyqÉe<>•IDATxÚì} xTUºmðz_¿$´eH„w&/PéN‚ ʔ0$Q $2ˆˆBâ€Hp 2I˜ ÍhˆBDƒ$H!Ð ´ ++C+ ’ Ð}¿~÷òVê×ýާ*•ÊTuNÕZ_8ìÚgÿÖþÿ½ÏÙg×ó!­HLL¼ÿ-Z´¨k×®¬‚p=¦M›†k9AÂÐÐPÖA¸~~~¸ÞÁŠ ·ƒ<$ò ò ÈC‚ ÈC‚ðxÖ«W/33³® €,‘ë+Î5¥c)ÈÃ* M›6õ¬èÞ½û®]»”ÿµkתšTBBq>¼mˆ.Â4iÒ$**êܹsuTwÕ($''CrÈ,’ÇÅŹ¦hKNŠ@¿~ý´MéF9rí¨j岚IKK3Ñד’’Ξ=‰ö«I×oݺuß¾}kØl"̦M› šÐàc!Ú•†&ÍÉÉÉáv‹$8|øp·nÝjؔµŒž7n¬j}Ì5YcTrÙhXŽÜÜÜÛ5IMMUnT–8Й„T¸¢Ñ´¨D †j…?ÚøÃ+ün óã?"´|$¢üTvp¨ð¶Âh ¹F'­Nòøøx‘J)HîBÉ!U²XŠ c·ÈºÚ“$®-‹¤%ŒˆOm=8‰`IÕ³®øº´•)¢mÛZ’Ä¥ÈÚúQ-ˆâ‹'îªzж¸]ɹØVKE­©­IV[K€ˆ*w‰h·€â)â8®Þjs'$$$11±Öx(*HÄRRBnTº ®¨/„§ÜEapé²2Ô麢 ¢ „¸Jí#5 #•h—‡" ¢ qiE“1ё¬dm;‚ˆÛVrEB‰”µ<‘d”‘"~"°Ý"ẠԀê£: %¶*‹ˆI\u&»!…R"|_+ðPô¡ yÜ–œª–T┓U‘GêDyJ1u-nWríÈ¥h¦•³¢ÖÔÖ r×ՒȬ‹XQUDTB¥ÕkŠLRéé%”GyªÎ'†¨*‰ª¤cW{È(«Fe»<ÔZ¹ÂmKÊBìŠxh«Qµ)h ¢¡QÚÞ¬ ©ŠŒt”’Ôe§†¥«Le<®Èú¨(¤-՜ñPšRH堛jWí«4$!˜L_+* ðS:žó áæõRÛ%#­§vIJÔDKTÕ$^R„h—à¤+Ã]J£n±KVØÔ¨©]“y‘DWžÈB-‚Á¡fv%—i *°ˆ$s? i+’¶ÈòS­”(1dñF+¹vZ%žÊ°]ã­4¤¬4ˆðJ<]ÃÙJ®]Ñ\Wt¬¶~´ë¥ª~춸]ɵ€æT²Tîv[³¢šÑÎ¥óèZíPV ++¥\•V¯!ÖiœªÌÁò GŠ\тŠÑ`»ºævÔò:““Ý~ýú‰òñ’÷•¼°È„Ñç‡è”²œ][«L¦à¡7ÙD¥3¦¨õÄ.å÷iÂ-µ‚ûžÂËìR‚ ÈC‚  ‚  ‚<$‚<$ò ò ÈC‚ ÈC‚  ‚  ‚<$‚<$ò ò ÈC‚ ÈC‚  ‚pwÖ$ò´iÓ ++ Y‰ÄXáž={îÞ{ï êÈ6 ¼Ÿdo·X,îчHÎf ¼'Oq~HæyHä!Aä!A‡A‡AAyH„—ððࡂ. ñÚÝ͛7ë.ñ+W®¾6kJº;7¯vëwÆzBÖ¿çóPºWµ+QEÌÌÌDRUЍútUãÚ"#cMfææªÆ*:qò\±Å™ééï7hØ0gWnÏÁµ^?*_+!¢#Ãv$Ô?Z<¬ÐÆYY[ýýj>$ÿî:w®’¢xþ¹)Ջk«¬þ²%32rdU#ŽŠŽròÕáuk? îѳY³¦ 4pMýØJˆèHİ} õVpr\#…#‡ ÂÇ xô±‚Cµ ™a¡` ÃW?‘~[ ­ßÖ¯«ÎÒC\d'YDF=®ÍbDd< ¨RP(,üªY³fÊÌFD%yöŽí_ˆ!­¡®KA$DH­¨(KI‰EÔ;ÆвœHƒ""Äè¶/¯ƒjŒŒB°˜˜1Ê_ÕRǎ$®ójZm GFFÚÖPb±ÜÓ¬)""Œ¼1‡¤™*KöWA¢K¨š ­€¶0fo¿Ó°<Züî;pÿi܄N»4lØÐ,õ{ù»oó÷~‘øÚ+½{ÿ±_ÿýû÷«ÿËfe‚0¡³²²’Ìë×oÀæ¿dõíêëÛȌµ|þÂÅuë7¦­X¾jeêì×ßèÞí?½°«a<½úý÷®Ì1°C{ò°šmñ»)ǏK_óç˜Ñ£LÝ[µl‘0ó…‰qgÏ7vô³SŸ?~œ·ñpß¾}° \™ã¤ÉSãâbMQ9]§))9?6fôÿúÍoòöì1;  ÌS½ ÅþQÖÖÕ«ß÷B•ˆiÅmWÙ-_¶øð‘¿’‡Õ7G&'õìõકiÐ$ÖG º#;;kÛ_V­ZÍyQbL̟f¿öª|„<¬2V¯^}éÒEÐ¤³ÁJØáíEï.Y¼È,£µI1zôèöí;,Y¼˜<¬2Ð5W­L]·~ƒ§’PiÅÉo™e´6)êׯ¿(eѺuÞùégäa•áÊ´Wg%öêìñ½db܄;ï¼ó³œ]$LšK–­ˆŸ1½¤ä€‰¨E̞XTt|ýúõä!ñ+4ºë.V‚ËØáµÙ¯'-˜wòÔ×ä!A¸ 1£G=7mú̧ê1ƝlÂS1n¼ýÍÿýßÿ}óæ%‹kOì"ë–’ó˖-ëÍd¸{lPRrrçNAº0õêÕÛ»oï‡z‰ãâÅKd¤ggog6;î»ï÷K–9zAâßþíß®_¿N}Xç$пÿ¨§F=VúåyÀ–„pO9zõ þ駟؉=­Z¶˜2)ÎA€¥ËS÷¹<¬[@††õymÖËòϙXþ­&Œ‹a'&\Ï\§ù(k[XŸGtž¯Ï_ϊ•«3ìÆ‚ÚÄ]åˆ{f®ÆOx;~":z”òO‚ +ę3§m=û<ÒçZi)恱ãǂTŽmT\»÷@ø Žç¿ñ~FŽÑ!0>ð×&jÝd˜“ÐnÕ IšŸ™ô‡‡„ÙÇG°Qý|}»uþä“hup{êÔÉð¡íZGÈÞùZG†?ü½üê«…Ä èaã gò06nbîîϵ>–’óhãðaÃåË%„ñüsSß^´X†?ü99«'‹É“'§¥¾‡ !èWZVm& ++°WÏ` ºÕH°cP‡¶mÛ½ŸþÜM/‰š3XDžWuVö13AÂ`tÔô¾]»öä¡;ѹSÐÑcE—.]¼?À¿±Ÿ&xÚ¿8#?³¶mC€F~~›¬E‹æÊ¡u«¤ÄáyuzFê{ËÐÀ‡ é ++ëÖ­¯ÞÎw(l¾z^é¬>>þ¥ëׯ[J ø„øx1Y5j„&3K½yìs|P1uÅrü)Ÿ7&áå©lT‘òQîFwÝõégŸùùùfnÞ ++e Y×E@×ür_~¿~ýMôMþúõë2pðð>Ö½Û:ÿñr×`¿ØR³ú ÖI…Ø>F[¨«`tÌX\_yå%ŒžÔ‡ž†]Ÿïíº{W†çºkã[·n}±7?.vBôã#ñÓ\cDD„÷êõàö³žü̃½z¬ZµÊÉ7ªe6˜³kw5fõë7nÖ}¹TLÕß/uÓ§=‹¿:ÍâêÕïsrrþüaÆ¥Kå–UóæÍÃÃ#ê(/X7«þ¢ó™Ó§+ Ü£çûŽdq ++þÀÌa#"ÿõ¯9Ž:ÉÐ9r®E'N‰¿ãYýœ×ç}‘u ++&ƒ·×¯;õFTÛ¶ÿ‘">>ž<$~e‚æ|öéû«Wj=[¶lµ2-µÒ¸ùù_o Ð`c»öŽŒù'¢Føøl©0,ÿùēëׯ•Y}lÜD»³zàµY/¿>wþ¸±1òÐ8mUz'޵0Úv3òÐ=`·ûÖQŽ`…¿@UcÜߺÒ)ß·ßþ}Å{ËÔOhõá#"¡Ø÷íÛWRbqT´²Ñ§J³zPQ½Ã¨»E»Ô¥Èÿò€< È.Ц¹aÓ—m¿Øàæ„[·dj‰7xHø¨§ž®4nÓ»ïnÚônã´f¶Šê0Gÿø‡Þùñ‡v}žg(ýl ++xìs µ%Âñ. ++KÉyõb1 Q ‰ÙšŸ¥eeꭎ3ðSÒW¯qÔú6¢ç͛÷ފ•ÆüÎ_ExòÉh’ÐÛyXïhUÇ»(&MœÔ¨Q#ø[JÐW,[èCL UÄyóœ:yðwñÂüÿëׯ#"ǎk©›ãüý[™ëù!Aúèžäڅn®òâ¬ÐŠžü¾õfòÓcÆ þàoma}AYß;p°€©¶€AMÙ0LŽ?¡6ŠE#ÖM»víµ¯•’‡^'5!á€þýﺫÑÑcER_~õUÌÕâã[´l)û¡d‡7¬•3gN#Œ”F¾¾ æÏ'ˆ¶m۝9}s¹#‡+ÔT½ê…`°3LvfØ öâŒø׬Aüemý ~’-uõMy•Tž?Ál #û¡ta¤„GDÈ ¨ä¡›a»ûauzÆÚ?ÐØÏÏnåÎܼS>»?Àç§åG¾½hqôã#a©00\1#þ:ÉcåŠ$j»ß4‰›˜8땕«3쎕b¤dïüL,U˜©ä¡{ Ý!¡|NŸþž ƒ]tׯ_+³Jh§O{Vb©0ƒ%@½··zU@ë&j»ß4IN^04<"vüXŒ˜ºwM13„‘¢zlÀ‹3ГŸ}–<$ˆÚ‡ …˜bR¾)­í×¥sGX+/½”*¾µ0éõ¹ó[4¿<$ˆÁö›&:6ú”¿µ³KY"b¼`–˜ºbùœ¹ódó·Åاp“‡„ÑaûM\13”Ÿï¤,A˜GôµˆðGt¸/^ú;yHՇí7MΟ¿¸{WŽü,8x@6aëÆ@Q„D˜©o/Zlºé:ß/u3.ýý[V‚-uß4Y¿~-þ´at+s`¦©&1œ>,+»îU}îÛo¿}rÔÓ䞗Ã@< ´n£>õõi¯j€Ëß}Û AvDòÐ@–·ç ¯j€ƒ´hђ‘<4:bFî=µâ䩜œOxàvDòÐ@x衇Ñ/½gï† z¨7w6Æâ!zdtô¨•+WzC՟¿pñ¹s¢G=Å^Hn½4úÉQï¯^™ñÁZ¯ú9s^:4Â\ŸŸ!¼…‡P‰sæÎ;æ)̝<¸Þg½6ÃÍø ±ì‚„1ß§‰úä“O <ØS©m‹tɲœƒ¾OóìÔ©?ýô¨øæÛ‹F ê1Õ]Vvý­·ßz¡Ezýúuƒ,Â}}ú yX9êׯÿƼy«V­9<ü¹iÓ§O¾UËfï…肯͚õõ×§V§àßöܜ¹Ô~¦á¡`üøq]x }õ*ÿV-_•ø‡?þ±ß>¦«âó.îÎݳcûÇè‚ã'ÄÅ'¼ä_ŒˆÇ)g>ÐÝ»-ÿboþþ}ù0çà32òñF™¥~sw^\|®eËVƏüèãlN SòP€©þ0i´”œwæè¯*áÖ­[ee¥¿ÿ}lâ~fҔV­ZvhÏ®F˜ž‡jÒ(Ÿ¯Ýd·mË:q¢hâÄ‰ì „»À}À>Ÿdoßþq–“ÇÖyXû8|ä¯rÔٗûòÙòÐ=ؗ¿WK§Üªú9ÕAÖW¯~¯= ++û‹½T‰yèrääüj¯ãÖ-™ìyèjüùà íOL¹ZC‡.¬ÐK—.5oÞ¼W¯ñ30°c¹†üìSö ‚B÷J@DDøÃ?Ì>A‡®ƒÝ÷rx¶;A»” ÈC‚ Lj—~’½ýä‰"SWA~~ù—‹×~ø¿«MÔ¤µoßÖ=<1b¸Åb1{ 8ð%®¿û] ö'oÀΝ;»vízï½÷Öbš ahh¨{xã­’——WRR‚²Ô¤ !%%¥}ûö³gÏæü ܃ŒŒŒ²²²5k֔––’‡á6êä!A¸………{öìQÖ)yHî™*wIII^^yH.&„Û¶m³k£’‡á"€„eeeZã¬Ö‡> 7>öž{Ϫð£tĈ¸vëÖ-$$Ä8³Ä;Ù<3g¼ÀJðxúùù[,–-[¶4lØ“ChHƒðÐú0--­^½z¸*Ÿ&MšÔ³¢M›6™™å³Øµk~jc!LBB‚k$DFȎ}ÝÈèÚµ+ˆ§{e*"" K5&à¡0M®‚k×®%%%ýøã}ûöŠŠÂÏsçÎébÅÆÆân ++väÈd 2Bvìë„ÇòºWéô‚ÆV€Âۈ¸U×<ìÞ½»ŒÈH$!Ïä¡tôøøxJ¬0SwYt¦±ø 7ø#aÞIYÒ®]ùnàcÇODGB\ᖻqqqbÃ!?ÅNNN–ôÅS’UV± ++&ŠZBŠ*.A˜Œ‡èâ­[·†Âöƒ[§*Ñéá_‘Þ;g…uq F,®úóüù‹ðO}oYÜÄÉå¼øøA×JKqY¾ž6÷y–““ƒˆ2 ++ "ÜðÁl·nß¾]žBjêÙ³g•U ¾áÖY+à)Q×ք&ˆräææÞ6*„pDFF­< ˆtquW…IµBù+7ˆ’²dï¾òÏé[JÄâÖ5ëÓ$ñAxm‚›6mBv’¯ÜR•2®¦Â+O•”m²„‹ÞŽV 1ˆ<$11ÑÐúPÖHe½T­‹j™pøðájLAéuëþ¼ûóݱq*>šV«Z¡ëdDp~fË!žð»6È Ã캪N+ètèÐÁÄ×^ ëó~ö~¨WÛ¶í/^VZV†+ÜðE1—“Qe߂óŽÃKÂcøNææ!º²zB­ž"¶ԆÔýTžZå†c„Xð퉨?¯mÞrê䉯~~¸Â ŸääÖ½{÷&Mš€Z` HÕ¦MícLÌ1„žT)CKÃÝÆ ++±bšêij+*áå¨'óCnE'¼yyyaaa˜•ä ~¨|¿” ܾ_êsòÔéŸ~ú©}»ÿðõõemœº“&ÆõêüÕW_±*ò ÈC‚ ÈC‚ ½Ó¦M³ý&BJJJaa!»ábxïz)H >ê ú™‹ÅPŸ—%¨=_–••=ÿüóÄO8Ž=ê'ä¡iеk×.]ºÀñÏþSKNö ‚¿–Ž(ýúõ“[;?-M7ÿËåÁž™„ë;)KðG»víá¯#¿Š˜f…Ï.°Û•S+yHx6mÚ$T å Á.Xž>¿œÉb€¥‡†áªN‰•ÃÒAZhQ9ý±Gû[JÎ_¼x wÃú<²wßþžŸúíß/]+-õÔèùo¼¡Í$”ˆH$22șÓp#M[!u2T¯¤\/%Œ ðT÷äœIttL…ðÄàƒ[ I||¼Ž>šƒk/^ú»8Ô¡—ݺûùú¶m×.ñµW´± A*1†}¬ïœ` p°Ó­[·Šd >$<B*p/Ó ++éý¸Έ'f†ºˆrx» ÷C½ª—»¨GèF¤V‘xd  ÓC9äc=öÀOø‹(f*´~ê–.šS¢8éüŒ¼‚ú•óØå ++ÎÃSìa[8<$Lôoh˜6mÚ`r³“4±BA¡™õ) BYP©KWg¤#úâwßÅ­-škӗŸ:ONˆ…¸éÈUû8D{Æ»­ œeŽÂÞ³ËO™Šú¤Q{7É ++µÌ£½ÓT½I£ZO5ßÓå«[}±MD'yHx¸†„®ƒ²ò¼Ï…Ú.µ”œŸ1óç'3>vü„1å”S:Ï ›¶@f’§ëF ¨ÕN¤¬¬ìà¡Ã'O&%á€þýﺫÑÑcE0^~õÕΝ‚ª‘NiYÙësçש¨ò`J‡^=ƒ‡IòÔ"0'<{ö,ìÆš$òÕW_¡i&MŒ#Â²eËCÃú¼6ëe¡_µ×w~ºKû€Èeðo5a\ ÉC˜›‡em ëóˆ] 0î™IíڵǬVñŽ%,Tü% 4*þ¢/WJò*,[„„W±r¡-•é+oýì³bKÉõ²² ’ ”ÖoÜìc]ښ>íلøøA×JKqYþ^ży .^¸€ðøÓft¢¨hÇ'Ÿ"zŸGú üÞ}ûcǏUT].âùûûîCÈ¡áòž”2V'Mœq áЗ}Ž0Ù:MEx啗`¦FŽ,ÿ¼MöÎÏrví~ì±Ab¸>=f ȃ?8tßÿ„‚úä“S§NöóõÅ<‡˜>l¸DѪß)S§|´¿¸'>3é?ÇÉS_‹Ý\`…"ån݃‘‹.SŒ[ï§9^Š LÀÃØ¸‰¹»?·{Kz3®/ΈÏÚ¶m÷®µ"rýúOUͨQ£»l=%hZ0DÕ=bªF.aJNž<ª“.³8(\ua`‹" ŒÒ~}ûˆ¨+ÓÂ׬K}~ya>P˜mÛ¶[¼xÒÁnø€í)YØÒ^^îÕ3ZWëo›KELaý"}Ʌ}Ž0;w ++:z¬èÒ¥‹÷ø7öóä«´´L÷’ôrp &ŸòDxüµhÙÖ „AølØ´%sóL퐮pãîK/%4òõ•,´)KFˆ šá.´®NsjsÑJ¥ÜʁŒ0Eú¿óÓ]ìs„}äææÞ®{$&&>òóÝyø‹OxåOã&ÔMXŸGü|}Ÿˆýø¯>+ûÖ­_ ¿N¾£cÆÊ¾D0VމCDÝþ [8uæÌi04¥*…ú^Áùa%°»ÙÂ1 Qµï¸y!âââäȇîÝ»ËW=Õir2„îX2݉r…¡ŽR#]„Þõzì±A²ïóº2Òç¼>Om¶ÀŸÚlÑ«g°Ìu{,|l6OxÉüP‡¨¨(ùX“|åó±~ôº[·nðCTw,™îÄØ«p#Ì+ú8/yhnØn}P>IÉɲï¡K玽|èµY/«Íú÷oÞ¼…ƒ™úö¢ÅxøAPÔÇfÿ„vóÄÁC^Øcäãöò‰ëÔÔÔk×®Aéá +++>¶<ȉ)_ÆUÔ£˜<ôÀù¡ÖP´ýPlçNAëׯş ++Ê¥®XŽ?ù fŠcú´gñW>{ü%°JÍ6/„úla5Nü åcÛ ³ƒ'¨ Â`Xʱ K¨8э2Ù³òéNŒO3žJn^˜‘••6mÚ¨s#ä¬Oùªï¦M›ÒÒÒlÊ9Ú#L†ámCÒ.% „¿}söÛ¿—’#//ÏÅY‡††:°ö¶§©¨s&À7pL>õ«"êNŒ°=¬“[XÈC#â™Øqâ sãÛy@¹‰ý »×~r›<$ áÃGÞºukçÎ쐐S,jPÌËjŸ=FÆÂä)S`š‚‡®·K«íigD5Àu‚  #áȑ#òš¼³V»{ ¸©‚<$œåáµk×nß¾-ï¬EEEÕú4’•L΢qãÆ}ûö­Æë2yHÔ¦âÊÌÌT/m«ÝАòB©z%M¹Åò¸_LP(X¸e†„OKKÓW…‘ˆÐÄ ++¥„a׺B& Ó@‘$55ÕG³[âðáÃजPokjŠCv]àŠ`Wñ« ÁpäääÀôŒŒšIDy)'66Vm•‡mŸø“‡„·$9{ö,žhwK€]ñññŽß E±i«¢} ²ùPk÷" Äñà™i…¤Cދ֭[C_ÙnJªùŒQ´+x.ÊV;#•|åpÙ]Åù!Áùa& XnÈn ÐJL}u3=Ñ]%¢öa ˜Ú‡”Å¿¢‡Hiz•QêãÆ÷i®_¿~à`{¼_Ÿ>ãv5(óC™È‰ÊÂtj“&M„$â k3Î ++¨Mm\­[¦|J0ø#:|`šÚècÝW%;†½j†Ûx¸9s#þÈ=Cܳ}Ï|Ðî–Pó@ÝÖ{m\å†&¨­Æsé5+â½æ[xîäaÿþýðÇ~OèsÚÌTڒóC‚p5d™Ô wl¸TÞºuëoߜeos²ØÛ൛6\ÊÝ;³ñGŽ„ÛìÒÙ³góÀôE‹ç´tØAÉCoAJJʞ={, «‚ ݃mÛ¶•””ÙòÐ=ÈÈÈÐ9‚hÍsT¯àá¹s碢¢RSSå‹ÑÎD‘ƒSêZ*»n‚ðXúX?ïÕ¸qc'¿#Ô½{wÇߟ&ò°jïÏB%*µc{,‰|³ˆ‹‹sŽ6mÚ@+Ê(@ddÔ±ã'ò¿<÷Œ™åáÛµk¿aӖèèQp¯\¡#¿:e%Í ++Ý1)¶ÐÊÀFxAœœ9Š|ð±~ Sw,IrròáÇa¸Â-ߕ€{öìY9¨ÜêÜ¥kB|üŋ—p÷÷÷5¿VZ:ê©Ñя|z̘½ûöǎ[rþ‚ÊT²‚¸P¶ǤØNµ2°‡Î"77÷¶© ™nÀ±iÓ&9ÌDNÏnÅC»zY¿q³õL@ë†cæÍâå$#•µÀ¢|ßZåe+a( ·£¥ŒóE"H’˜˜hÊçøòu=Lütǒ€ŠPDr@ÌT],!d ?ÁTÑ1)Z¥í@‚ð„u9ÛWµT£=–DÎB"‚[·t)&«±çÏ_ÄüЙeMHNYA‚ru|LŠÂìRX‰rö®°E•¿Í'Ôñ]@,X0nQ˜Rä´Ué˜ ++*[Të†#ß~•8”›zF’j…¸eP¶«ḎB'A»´R»´žðмû  š4i‚ïm'"ÕC^^^XXz?F'Ô ++s¿ç PN~& Î݆Z9–déòTü±+n„¹ßó®•cIžüŒßö&¨ ‚<ôFX,»Ÿf›={6û„G¢°°Ðö#`¥¥¥iq/åa@@xصkWµh¶mÛ6xd ¨u ­§M›úÍ7߈h‰7ÐkM÷^[m·f[[·nå6OÅsÏ=§ÚÏÏÏ ßÄïµÕ ++bbbt>þþþTž ++èC­EŠk—.] òäÜ{yˆq̘1Ž™IxØdʧ"f’‡FQ‰ÆiÂ-îëëkóÇ«y›¶¨¸¡՜ð`ÊA&Hhœ÷öç‡JÒ(õ¶7”ùãí<úg¾N¸¦Å1QìÚµ«q¤ªÑ{mžñ´mÀ€Ú‰„Çãá‡îÕ«W]´x€Ռ\íG(lQ‚Ð"11±zÏkúž÷ú›;´gÄüyó8?$ƒ<$ò ò ÈC‚ ÈC‚  ‚  ‚<$‚<$ò ò°f¸yófFƚ¡CwéÜñµY³*¨‹\ŠNœ årfîþy‰'îêÂC‘‚Ÿbgв ++ …†+x«†Ûå,í¬ÀA"#GÂ3¸GO«;n%AÖ! =þqë–nfí¸ÍT¿ftГW¯–ßí×7 6ÛCöÔ2Vñ”›ðå—f"JUEÊÊÚêxö~þY³-Y cã&‚ H}(d(XÈ©ÌiEP…%j‚;YUÆþìÛ¡Á”OAA ?¨£ÝÖÞ ƒMzçß_i% ÌȆ¿tYŠƒ¶‰?6mUz|B‰)YUEBÁ.DY!ý×çÎ ƒzlذᓣžyÀUøÃâ…÷à`G]W„ºXš¢>$œ ³ ÎËt špǎl(±iÓ_T222à»0JaòÁÌ?ÉÞqÃ:»rU¿†qËJ!P÷ò•«ÚÙØ&;³æ1|D$†„ÄUw<gpÜñdÒ©0K ë#Á$ŒŽÞ"@¥E ÈCWª`Õêth’—fÂT+*:¾vý¦>a¡šÍƏ‹»˜³‰ÚLœ=çÆÃÂÃO^®væ&”x~ú h¡·ß\رc'uÜ“çÌNÔY€¶LÒZùûC˜è'oа¡-ÏA?ܝ6uÊ=÷Þ;eÊ¥ØÁ4™ ‚ÀbÏ_°P”¡VÛ"h-Ҋ܄“¨çcý^[õ¾ÞY¯^=~'J ¨X}G¹%w±‡¡Ÿ¡—-_^ià™3f@]k l¢&˜?o^ûöm«z b¨œz¬fð‰ŽAAŖ04,Ìqøòg}ﯶv‚Ö×i< î2É0aƒå U ;Æ0f°ŽÃ[,˜¯ñ ´!ÉCæZn1J1ë8wîësç:³Y턖p;¸NCä!Aä!A‡A‡AAyHÄ/¨ésü/÷åŸ9}šõH%%–öíÛº‡ýúõ+>wµU’üã§OݵëžÑ0‡‚ƒ{xFY ++ ¿ ++ ++êøïÿþïP–¢¢¢–-[øúúÕn²wß}w@@@õãçææÞ6Ë·Ø\»ví¶ù±uëVCÕmMP\\ìSþՙôÛ__ß1cÆD˜t{cÍ322ÔÕìHIIñ°²ÈÕì@‹”••mÛ¶­´´”ë4ö+¨¤¤Ä3ÚÛb±ìÙ³Ž5kÖª½k2>=z4//Ï3Ê"T$+¬ ë|·Äìí­JÌ®ExFYÔøTuîWðP[AžÑwírÒìe1»z×¶…¡†û; XAÒÞæý¢»V˜]½ëÆGS‘At§,w§ïVêcFbv•hk¼™·,˜jÇGC©÷; ÒqQAþþþ]ºtÁÏ1cƘ—‡………P ¾¾¾áááø9`À¸³²²Ì¨ÞÑGÑwe•?ƒ‚‚ÐFæUï2‚ EpEO 1N73ÑØ‹-BOõó+´S\\`¨-ç;11eéÚµ+~öêÕ nôc3+¨ÿˆˆŒ,hësj¸Q:3–’£w}õÕW ø 7F“ôôtƒ”åNƒ´·Î$4é kÛ®hr“êö+d–¨Êb¨eFçaQz”¶_©RAyHyHä!Aä!A‡A‡AAyH„ÑxXXXhëYZZjÆ= ++vËâÀßȨ¨þMºwÄî'ƒ´‹!x˜’’¡m݌ŒŒ€€3nýÞ¶m[hh¨öeb¸Q3ò2ëv½ P:3¾ê¾„²h7OÂ'&&fÚ´iFÑíßöƒ"‰¿¿¿uo˜\Íû‰AUŠvíÚáêëëkҏAj[¤yóæR4”ÑŒe‘¡÷Üsô44Š>)ßM4U{kaÞ¯eÊ®Y-ŒóµÌê}QV ôSQV P‘ß/ýtæ*–ªIçܶ[ÚLºg϶]ì–Î,@Ò ÷Æ)讓ȿ){óÍÌ ´í¯FŸ[w+Ð ++ZõŽñѼ<´VŒ39¼Ã˜ím ÙsÛÛÔW'¿'•ÅPãã¬#S+]YÌ®@têÝìã£v¸7TYî0T{˺œÙ;®¶½= ,ªËzÀø¨Z#‹¡ î0Z{{€Ñö]³+ÕwÑ.žQQïFìcÕ{nѯ_?Õ¾ã2ڮΐ֙‡±,‹‹FTû¹Eõ¿›ø›ßüïg§N{èáÞÆ¯ èÇGVfððQO=mü²¬ýó‡Î[¿q3ËâJ|¹/¿&òÖèû¥wßÝ4°C{@ƒúõMQÈéL0–ÅÅ8sútMxÈýá~‡AAyHyHä!Aä!A‡A‡áÕ<¼yóæŽٓ'MêÒ¹ãÐ!ƒ“““á)?Å'#c ÂÀ?*PᆏA*®èÄIU„e˖‰Àª€ðmÖ,mxžW®\U>»sóàƒªpeÍCT¬9WlñŒÞŒ"¥³m”JkÍáEåJÜãµU˺ó%%¸üÿa%o÷ç.–BÞ¸qoÍÚõD´xŽŠŽ2ã÷H+ê]%%i”TÛ(•øùç¦x³³³øá‡ø„„ž=‚›5kÚúþ€ŽArë·õëÃ?Cû<²n퇦hx)‚NࢢãOÓ»÷÷ìùÕ  @↠++QÃÇ]½zÕeÒBÈà=!°ªv±/ÐÅ¡&þ”¢Æ­ÌÌLø ³ByB‘Âê]2åc«UÛ#.leÂ@Èɠ W6 ++J*b·P²A0±¹„„"*(¯­EãQ<ÌËÍ8hpƒ loýãÖ-4-ZzKæ¦Ø¸‰&ƒ T£hþn݃CÃÂò4Ûç À÷¥ëP$$Lt܊þsË;Êk!* dsrCý·”[ ++Œ§5-ÇÃõ çù^£»Ö¯TnãÍAcÐXëÓ4ÁNS.y¬ßk÷úþ›¥M‹î~³_¯øœþú¾ë«× ´émá‡Ã ³a”6}‹MšµÀ3ð”6[øf¥Û÷š^ƒ"J’éºâ7êÁj·kȄÑËVxÛ÷†ÍÚž¡Ê¹èJåYk1ºÇøÚ¹H’Ä‘‹ž àDÉ!'Î. #¼J˜€áJ­2¬Ôá¿úxº¥-‚Îc”“N‡FbkHñqĈ“™ì¸WA«›ƒ¼zñâå£ç/ýþòñ㗏~]®½-w%a^îÍOßüóôKçïß~|óä[;^äñ¯ùêõþ—ziÐúîÙëçÏ^}ÿõ_??±À»æá$ÆÂ¹Ž[,† ZÀ‡üý$"DòÝ$(AJƂÈÈ@__ Š,¸6íx‡Cº°/Íï„÷#>—ļÅp1Úcܺ§kj­¼æIh_œÏó¸[ÙÖ6¼<˜Ï î‰MeaƒæM ++.G!N°tÔ›bl»Kˆa×=2âL°‰tˆÕ$äЈ¦Lè2‰Á/ Að·a›½;NQ›ú>>2‘p6µ©ÄÔ0ã%4—(¶2F1Í#w‘Œl$÷|d\Hðtˆ)sc,„Mæ_t¯Aš±»}.bÉ%™Ú»ˆ±<²Ï¦A„♕3I¢<öŠ˜Bˆ"ç&“VÌ5CfW]lW:š©”puhí$nˆØØ_¡Ö›2ÂJõ…=^Üðß»œ1¹÷2ø½e ±¿³m5ÈæA•aK· b¸?QÇI‹Í­róÐfn(o=1IÞZmÔ>þÇ«} ÂxõÃS ötê;ð$•NQ2Ù¬oŠp›UMÀø˜|úEM͓›î ô¬¦9«iþ÷5MÑy>«dÎ*™³JÆ.ò*™¬xя€Vz´–¸ð©Ï„Pº/ï ++]ö8ûã! êŽZ?dšEÐ\.gàBŽtÛáL~Ad´¡,SÕ+„b©:Ό (œô°U·š óxÓÑjuõ\ÌÆ¡ðZC™&ÓÑF3{€·V¯{¡~к" d߇Dn1“DÝB¢¹| ½³SaѶ°h)õ…,ô×Ò+p99H=÷½”„„ôXù)•_y÷Ô=]dLsÛ5ËöڊëéxÚ ‘ 7“D. #¸<6‡OÙ×íÌ¥=eŠmÍÖÇðµJ"¹&fÏ9†3W÷AÍÍ:î~2A3ž>¡2¢aÒqGrièÉ,3.d‰(…é©tÿ1‘˜;”Äëy7Ð$ãV­5Õ?Qríʧg9ý•w2žLðHŒd]˜K•XgOV6ÒûÑøØ9¤s~ ¡üfUpL„\[sLx.¸3+n¤«åQ4Þ·dGÑY„–7J>™§pÝ^ÓÉíC3ÝܕÙ_næ0TN:ñ­ûv!5‘Kšˆº5íùãã]ò9VYÞ7X¥©{3×µW¹®è–8ù…£–-fPSŒ-Ô²Q“Ú)¹åÖ¡YtGœöm°µê‚XՕº·õb›ރÈïCµ:§Rhªð«…£`õJ2Ízt•]îKgÎIÇ}Pñ»^PóƒR¥åJ^Ý«”Z~·^êú~½:ð«•~¯öŒ"£¸ê§káÇ>],ßÛëñ­w÷ñªÔ>7bq™é:¸¬…õ»ûj­øÝ½CÀ2µa»Þî5JízwXòú½V©4z¥~#hö‡ýÀoµ‡]çHƒ½n=ðƒV©Q ‚’ר(ú­v©éÕj]¯Ùm ¼îÃ¥­aç«ï•y5¯ÿÿPK!Îc À«òGword/settings.xml´\ën7þ¿À¾ƒáßë˜÷‹§àuÛ¢Ù.ê쌥q,DÒ#9®»Øwß3ºÔ‰ó±H[1Ñ|CòððÜə×ßü¼Zž}ìÇíbX_ŸóWìü¬_φùbýþúü?ïê…;?Ûîºõ¼[ëþúü©ßžóæï{ýxµíw;zl{F]¬·W«Ùõùýn·¹º¼ÜÎîûU·}5lú5wøêvôs|¹êÆ›‹Ù°Út»Åíb¹Ø=] ++ÆÌù±›áúüa\_»¸X-fã°îvS“«áîn1ëÿZŒ_3î¡If«~½Ûx9öK¢aXoï›í©·ÕíÀûS'kWËÓsœ}Åt‡qþk‹¯!oj°‡Y¿ÝÒ­–'ëçÕý:ö+û8Å}WԜ³ýÕ§”ëßׁø¢³í_úØÅåöiÕÿ|êh»ü– ·c7îȏÕìê»÷ëaìn—Dñ匦v¶§îü Iù/ð:{¼Úô㌖šT„±óË w7»n×¼ÝôËå^gf˾£n¯ÞÝФýtgßfÞßuËÝ»îöf7l衏QoűËÙ}7v³]?Þlºõ–†õn–§çæÃ¿†]"Íia-îçãÍ}·éó¡ãí›×ÃÕvºqi{öñªÿ™Èîç‹iòf1_u?_Ÿ ¦üÔÃ%êâñênvëa×ÿ{üôѱ˜_Ÿ_ðÃØ/nïçpù²m¿žñãE?Ÿß=uóYùx¾º9˜j²îV´lŸ™“·Ã¼Ÿàa\|½|M öLæú¸p L帘÷ï&q¹Ù=-ûJkt³ø¥ëù÷Û݂zÜ•?AÁoЯ§‘$÷´ékßíHþ¢ÁöW—‹ÍÛÅ8ãwë9©À_6Øâî®i€©Ô[’ÄÅ8<îùümßÍÉCýÉq/?#òwó½¤O?‘Äže,gc½8P:¡ÏcÒ¹ã^ \T]1"½Äˆà‚ï•ð DŠZF,·"бä0"ÃT+G)ïD4—sGkY DŒM ++,.cDH}Tŗˆ´ S`•®ÞtñG#û±Âˆ8™N¶þ%¢Œ°1êd@^ Þf‘ÀecœÈZRy”X¢ª Ӗ¬Ñ)BiÌÑ¢T‘1:5«^íJ¼ÆTW#–j ‡O+{›¼¦œ)ˈÖr‡sm8\Î-7-D6hÂX(\ò¡6r)8Ã<–Iܛ22áÞ4‹S­eÈxÄÃÜ1,hw) Óf¸+˜;Fd—0"¶½ÜÐLãXk Tѱ¯ID9Ôn sx²!sdz 垛Œió"h¸7¼4Ʊ•cŽFa ´‰<ΰñš&斐¤¶Ê<é‚íOÖULu%7m+^…bbÀº]¬Ö˜‚*„År0!xœ*½ÆãTE®»}=¯V%(ñ‚þé¢r…R%„HÎG2׸d^ã6Rsl)„´[K¡”Å"”Qò@(«-n£™Ž×B« 0w 9[(;„pì̈́>4ÚX‡%Q'Áò&¬ˆóÍjƒ=“°FŒ8f´9֐xátäPëɌ íŽðÜ)Ì/üö’B!Œ¨ÒkoÃm“ ++êœÆ,;Áø‚ljd­°ÄGAbŠ%±íÑÑèÍĆ.$¦ î-±˜±„d¥R±:aîs-y+ºp<Ÿb™ÅTÎ#”DɵÇñ$9®¶²`Ÿ%…⮩$p¦’¬öÛRj‹%QJÒm(ñR»!¤è‰D—ã8„t$DÈk2°Dx!sÙ@²Àh¢S`Œ’P륕獄DìKÈÿù&R´;2Xñ*D²UXv…½Pze–GO2kémZ>K’–TÈB–JÜÁ3­šÌ F,±!jZU¸ ++ŠÙÄ!mг†6*.ƒn Êá] N“ň®S-e#¯WÒ$ìëI(öèÊP|‹Ç±,`{­,/ ++ÏÇʚàú(k¼Ã¼¶–%ܛcÚ@=UŽ"8(!ʉ\ ž*ϋÆmüd ŒÄ BRÀT“^1hU”Ç**1ߢu ¯\fGø*«ª1m¤Y¸ŽDˆÃ>XUY–QÒ9 ­Ë„ÔFoÖáPSfRáL Éʎf‚'hã ñÎT3ɰ$ãïq¼£)ø7˜6Ás‚ú£9-L0 o¦%«®(Ž=†V\ÌmU6R+‰m•¦Ü˨ö–CyӞ'¥W– î-h–¡„âq}Tk(_ I;ìÍt&ÑÆ½eãp| )æs¸M¡¬Ï´(…kº*…-’®Æàz•¡¨ÏC ++ §h ÎÔpË3¤ÀP8Šs3#•ÃZBHÀ1Ÿ‘”•3Òx %„⠁+fF‘G…5ä³$恦S elP­u£`ŒvZ ++â5ö€Æsƒóã•-x¼iè!Û74x¦‘BH<ŸHÔzCùÞû0‰×„iKº̝LqæNVûS´ÊXz«äS]UÀ±‹©–æŠËxIo„T©¶”™àýŸ©Ú‹+SΖ¡\O5jE[APâ É8G·BD\·B:\´Bk…)œ¢ðN—•uQ,<ùL[pÄE™¦®P¬V±± ++ÚڂÛ8±?µŽ,6–’]\]±ž»ˆ{ó2àÚ õJcHHÁÖÅÖØ"ÄãÜÙ®pÇ]ò5уM´ ++X’Œ8>°‰æƒÛdŦ-«„kv¶Ç3-S}#¶Qs°•%<Ç%õh'TА'ŒÄ¹&! ++×PœLÜ)n2Ô§D˜5m'aĚWۙ)B€ˆ%¹†ºà¼ ++j° ªbëâ‚VZ)¬Áó™ö\1w¢bX·]b×¸È f\‘¡T*âº2![$WD#ªqUXì™\mÅb~Já|<ã ++ë‚g2ã}ZOÑ ^9Bªƒï97ÎÇsJ20BYös^ˆ‚w†(©.8û ¡jd‡^ÉÆ>­W5Ú¨F]ÌSâx:äõáב<¥zØVy-3Î5½Ö G5^‡-‰”Æû۞\:¶.ÞªŒã7ï¦ýXŒƒ+%ž<>A‰s‰˜?6Ɖ–e<өꎥwÚ£l U@=%$àŸµl¬OáÇ|„‡çSÈmcé-–¼ ++ÃâØßSØ  ÚŠ­ÁBîšÖíÀ'ׄYp$Db±–® ŽÅ(à+¸¤ŒÊA´¤¸72;¸LH¶6ÖàH((žÔí lÅõø §€#¼`ÝÚr|Z+尅 FW왂1 [¾`(ƒò($Æu—à(ÛÅ´9Šú f¯'¯‚§\¼ÑÆ&l]B`kI"[Öi?#:b­'¤âs±Z‹÷ãï%&$Öú4ÕÃá*Rñ¹Dyh‚Z’oìÓ&!>˜„â¸ú•¤¦´ #ÆâÂD1ÎôɸFœ”Êx奀j}2"`+–Œ ++óÀh&0ÕäqœœœÇ§Q )8ÿIÖr‹y`mÆ'n’ãgɳ0w¼Ð¹ÑF*\SM;¼ƒ›‚ÔøtFŠ,ãª{"±ÂuKBU¶DvÇ@KY\S%$àŠs"ëҐël–œ„-R*¼±#ª ødfJàL/3Š ß2g{ŒÌ¹¨pµ3—ï¬f¡*^Óܬ©f9å@Ûª,M#—ÉŠqM¥4\¯Ê”Uc»CHc#kq¼CHµõl¸ÇU‚l¤Á{Ù(‰k„øSš…ÛXéq­&“Öã=Êì”Æ±v¦âý¹<Õ{±x-–Do§Q )ø,uö6àó9L„cD2œIä(¢ÀãD“ñî?¹?#°ìPýiž”ڐö»<¹(…£[BªÄ²S´ÅV9«ñùxB,Î(mõ8Þ)LfœMf ++Ž¢ çSㅋˆÏïPÀű )ÓILø=–"ŒÂÚ"¥Ã>¸hêS u±-‚ö1¼Që,VFœQ«2–øâÃçùŠ“ZZñJáó.…òSü¦S S#4!LuäAã™FeðH„dq ++oqþSOø´ !W#J’ŸT*É ++ì™JæEbîdÁpæJHc/´d¥:Wx#—)U5ޒ+U›m•X gZ§D§ˆ„wû*S.C ++*³šPå‚ã ™„#âÊUÆçž*7çšU0‰W°Œßo¬Rœ×WIó%$ᵪ„Ä ++!¼±*cq½·jUð‰(J Þ«F4|I5R⌿Z1mVJœoSñ~cuô‡Û8®q¥¾º&ß¼Ô¸NNH.X½öxŠO To9Ž„jïÏÕ  Þ©QE|Ê­N§…qBpFIH#7«”lâ³y5QŒ„9ši鰞"K9a-R-ŠR½¢±g"ÄâSã”F´æCÎûàJÙŽ©~OõåÚ¾y½ºš¾€1½¯¸š^Œ?[Z¤nu;.º³·Ó72.§'nÇq±>á·ýÝ0öŸ"7·'ðââlWÝrYÇnvöеºš/¶›Üßí¯—o»ñýs¿Ç'FxwÞß}ÿk_ÓÇúñŸãð°9 c·9¼ð~z„«Ã¡‚ÕÕb½ûa±:Ýß>ÜޜZ­»ñéèa=ÿñã¸çÓ3{¯v÷ýjÿ်ý ðûgûõÅiÓh¶o¦—Ûû·ÝfsxGþö=¿>_.ÞßïøôZûŽ~Í»ñÃþÇí{qÄÄlÿ£›M3£§Ï÷ÄéÞ'ÏÉÓ=ù|Oî©ç{útO?ß3§{fºwÿ´éÇåbýáúü×Ëéþݰ\ýüÛgü‹[&ì?cñG¿kq|zÙ= »Ïž°éáÍç=Ì»]wúPÀg÷"þ‚–éC³‰ãÍÓêöùû¯„/ÛÝM¿éÆn7Œ'ì{Œ««ù0ûŽ4‰®BEºìN癸Þd÷Ž„ü­ûOý]ì¶ýüˆšêCÓÿ’{3!Ewá=SJ}á<+2DK)Œ¯L‡ÿ•ôô±ž7ÿÿÿPK!ëU0Yݹword/styles.xmlì][sÛ6~ߙý=í>¤ÖÍr’©Û±e{“ÙÜ9íãEB6ŠÐ’Tlç×/n¤@‚äa7鶙i"’çpn8ç$~üù~{_HšE49M~<’4Œ’›Óѧë«gÏG^–ûIèÇ4!§£’~þéïûñîe–?Ä$ó@’½Ü§£Û<ß½<:ʂ[²õ³èŽ$ìæ†¦[?g?ӛ£­Ÿ~Þïžt»óóhÅQþp4#“öA¡›M ì·$ÉýQJb†H“ì6ÚeÚ]´;š†»”$ËØ ·±ÄÛúQRÂLæh)Íè&ÿ FõH@1òÉXükŽqS°ÈâXAe[r?ò¶ÁË×7 Mýu̐ؐ<Ö+O~bÒ ipA6þ>Î3þ3ýªŸê—øëŠ&yæÝ½ô³ Š®Y/Ô6b¨¯Î’,±;ÄÏò³,òoÞò4Þ ²\»|…Ñ舷˜}e7¿øñéh:-®,y*×b?¹)®‘äÙòLôiÅ/­îéÈOŸ­Î8ᑘü[î®þK4¼óƒH´ãorÂu²sÐ8âv1=~Qüø¸çö÷9Uùw {8Îô—ióJ»K6oh𙄫œÝ8‰¶ØÅO¯?¤M™áœŽ^ˆ6ÙÅÙF¯¢0$‰ö`r…ä·[’|ÊHx¸þ˕P~u! û„ý{v2Zgáå}@vܔØÝÄç2yÇ bþô>:4.Èÿ[€M”$šèo‰Ïý‰7©Cˆî£ ¦œ"ÓFی¹¯]<…jhöT ͟ª¡ã§jhñT 3h{;n÷a‡Û=%ØávÏv¸Ýß·Û¿Ûáv»s;Ünïm‡Ûí¬ñ¸2Ôò^33KòÁV¶¡4OhN¼œÜGó†%’,7x|Ò#©“Aš`êá*ÞÕ©™yp/_ü¶í¦×s~Ïyzçэ·‰nö)ËՇö›$_H̲fÏC†ç0%ù>50ÄFÇS²!)IâRÑ݁òÌÐKöÛµ•ßù7ΰH:f_èÄI” ++Íòé[n#‘¥ÞúAJ‡wúÎÜÛ(Î+âïã˜8ÂzçFÅÖð\AÀ OÌðLAÀ O4™¹b‘BsÄ)…æˆa ++Íߤ~ºâ›BsÄ7…æˆo ++m8ß®£<.Þ®”·Œ)¯’îÆ*ºI|6ÿŸmT Õûà§þMêïn=^¤n†Õ†Œn眆Þµ‹)­Dræ Y²QGÉ~8C+h®l«Äsd]%ž#û*ñ†[Ø[%óøì•›ôfµ_çM6;H½ŒvåÇ{Ï·6?®a¸ŠÒ̙4Ã:Ðàw<šåâtáù½Þ±Öp³ª{%§ÝSzÓà³7üêaGR–•}ŒtEã˜Þ‘Ðâ*O©Ô5Ýä§B$½Lþr»»õ³H¤JˆþS}±¾î½õwƒô!ö£ÄÜ.Ÿmý(öÜE¯®ß¾ñ®éŽg™œ1nÏižÓ­3LUüÇodýO7ðLê]±f ++Àç})AMeÞ¦ò:%Hòæmš®S‚¨sÞæ}uJ0 Îۜ®°ËbS ++›Žq››Ñˆ'ò6o­‘C·ùhr¸Í3k„ÁmþX#<ö¸s®S÷äÓ¢Ü_ ++ÚÔQC81#´©%”UᎡaôš¡¯ôÌ}ÅhF@ÉÓƒ¬ ++-a3”¨¡™aEmo¨f¬¨!‚•¨Œ½¨!”µ¨!”¨¡cĊ"`EmïœÍV¢0ö¢†PÖ¢†Pv¢†SVÔ+jˆ€õÀ Ùc/je-je'jÜaE °¢†XQC+Q{QC(kQC(;Qƒ,-jˆ€5DÀŠ"X‰À؋BY‹Bµ‰ZTQ*¢FIX#Ça!nBÖqÎY#´È–4jËlIC°Ì– ¬ ++™ã²%]hf„¾Ò3#ô£%O# ^°f(´„ÍPv¢ÆeKM¢¶7T3VÔ¸lÉ(j\¶Ô*j\¶Ô*j\¶d5.[j5.[jµ½s6#X‰—-µŠ—-µŠ—-™EË–šDË–šDË–šD=pB6ÂØ‹—-µŠ—-™EË–šDË–šDË–šDË–Œ¢ÆeK­¢ÆeK­¢ÆeKfQã²¥&Qã²¥&Qã²¥&Qã²%£¨qÙR«¨qÙR«¨ ÙÒÑ]å<&Ž-Ž;cç;Â?Á­½0ÊoªE@ñàë°<7‰óžxê„*uYtX-ÊaGS%¸Z­”Ç6éð‡Ó–D k?#á{Î ÐxÂ?Í×p®¸^4³¼õSy÷À¬â¥‡±Ü½L3–¡ªÛãñÅÅâä…2!ušÖgBvïXûâÿñ&JH&~ÚZó¯Z1Lçâíuî–R*?ôæK\6¤$§šh=´Ìÿ½åÐ2~óR]ã÷+ç–U(ç–ñËçå¹eW³¢_Ó˓ù¹ð)âa¡‚§#_( Ðq™ïŠ`@çWápòÙLqN?ùL^Ó0³Ñ©QwT‹ntgÚCwª«é¬Ný´IØÏŸO›¦j ++ªœ£'® Ô¦™Q›Ôö7Ú4û>µ©°×oM›&Wó‹ñяÚtµIm©h“¸6P›æFmR-ºÑ¦ù÷©M¢¾ mŠäÿ—r@C=U·ÖL`~ ¾%i¼Ô7á˗šÅáëúdøp¼AT4uØÖ¬æ~ç<mé³P[#FÕUikWYÖ±Ôö× ×Ý;u€ìixïK(vIâø­/Ÿ¦;ó£1Ùp“cw'céMª÷×òk¸FúT¤MF€£jgäÏv=‘Çã¨ý|Ɲç ì›K‡rº§ç öcÈ3êý[‰ƒ—Óô°¼ÑÏeûݎž÷lŸÓW;ÖEi­üŒZþÁrñ®¤Wc«v«u+}<ê– ¹¹³8ºIć£$÷Ôüá>ëtt–Fò³D‡ã{ËKÂ9i¿ƒ¬ø}ÏXü'o|&i9O¨X¤Ï ÆS@ÁqM"ÜËE5Olrl]’òª´‘êŒV•±>§Uï賚@;LiŠq…ôޱ*¥Õ,êîåïAq»k"ôÖ5fÎz’'Þb‡ñ4òÍ; µÆ¼Æ ™]]¬r¯¨’kvê*9ìT]+Â8ÄaåÇìë¬?Ü­šdÃÜÓÉæ*W޳¥šy÷ÅE~þŒ”Ò[ôléo—̪ò}FËoýGÝkÒ´æjN§ b?yž¸r¸•ÃÅÍ78Ÿ~٘žKêt²Ÿ’è I3ïXg¾Ž¶$óޑ;ï#Ýúâ+2J•«\ŸëÏjš%þM ++UŸRîقÿ©ë÷TU'+ú]T,]ºcuòb&ßこ³šá ÍîÙ5¦ÕE o õíJ¾1±«MWòÒúPÔÙ¤m’Hš6ý¥3U&7ꌼ5Tg”ë Æ(Êb{“YÈ7*X†X90³}˜ßPñê[)¬·Eüs°ŒMp¦¸cX¨éþÛ8.”v~2mñ±ð¢,§šc˜Ê×<CËLäICÊOð_š+ùƒ6×?ÿáZGÉÅ.Kþs7÷‘„üxâF “ñX™`‰È÷v5‰È¸pÃBãåFþ!?ØIÌ^.Ën¸P²»y5àc÷ã襎[°åxò”B)^p5Цx I@¦ûPñTªã ++UZҚ85Õ7î°X¾¸NåìvH¬´‹*¡Ò®ðljéÇû­8/ÿßTRŸ-ޝ^¨ £½¬õÈyT‹º½Žcv3G!ñƒÿîáܧ?"¼ï±]¡w™“Û–<0W­²üy˟W)!+?Q¥‰o¼ʋê‡ÍÖuBlÓ9ÜîÒ·ÎTŸQU½~RÎwˆ÷ùޏÕçÄ5Ñ9‹ò嶈:Š(ê(ÊW£ý?Žò‡lx(íJá‰58^Ú’“;ÌÄ­&©éë{‘ߨG”-òð»Êà9MY€#$±À'ZU5V>ð¯,|ÿàŽRµ$Ä£;>µügE[. ZQ ‡VÄSмFþ«95ö˟6^ué‹íŽ@u½Ë‘švK Ýö#³1NM•jÙ`0ŸÏguÏÀ]w™ÙNâ!y­Û 4/,é–Oó‡/äÔ ™–áúeþNÏ®1§ÁEÊk¶S½bŽ8À£…/\T ++ªÏjnŸ‚û.5Æ*®6a¥>9¤£¥Ò:gê\W÷åyÜc ]º8 ”rÓZ•r¥NgmÑËâ×&ÁëÜl`›jÿ±õ·Rb“ɤsmӇbR8õŒYçmÚÌ7×§3èqõOÕÔè+Ç85qÉä5Ð'÷ˆVѧ!ó]‘J¼O¯9¹ÊtõK<Ù-×$ålnoQø:›Ûä7Ô½êzâXÙ¿3®Wö|×Y^¤Ê3G©²*Œ|Ï©ò¸›)R¹ñÚ$Ò¹#‘ªñ|g"}ê-Õ} »¢„a.àÉȐ ++žª›ÓÌñóÅ|¢¾Ù½…ñPß½4ÞqRùíœÜl´¬À–•«yžíØ #ž«‹òp§Iˆ=JYÅüdå„ sv^åãð¾XZ©p_»X.ȔWŒÛÛÔÌ«Õ7!–ôîÜOÂUôµä¿2„õ9}_¡Ó©mjt:½E•N'·ªÓAd¥î`¬Õ±(©>KPvq¤^å-.2þz…eŠå_&satJu›­+ÓÁ•ø¯A¿¤úêJð—}ZÄxO×¥ì’HyÏ<®Ë©K]å*¤K-»-]g_YZAÙãÛ ++bv÷r¶XŽÏJ5IÔР”kæ€&¯húUh%T¡Þí_MùŸ¶ö±Yóõû¥iã‚þÕn©aՉªºÓÍXè†+‰6oþçãñRåDÊpá‹{Å6ªe»}Ïäý.¸ç4ðA°«ŲN6yRÝÇnÁ¸Ý‹s@ÅöߪëÓÓ²üdǶÑÍUt³O |ÑAÄ{Ýxõ€-¯ì_^œ,‹¯Ñ›ƒB<º’šs>@ï""nu±¢-c™- Y: sa¡ò‘·õv43©‘?mÛT—Óé‰Zt\1½3Ì#À‰Ü#ØšÀ¥Gx~>¾:^Zx„¹ÜYØÍ–¾Õ™O ³|!á[ˆ6-+žðŠGšXøkšó““ñòRé— ++Ãõèøù˜ÿQؽ¨ÿca:%ËC¸ë¬+oˆÝj#µ«ïN–óñÒam;PþºDBsÒ¸ Á_}á7ñk¼:ìc,i0k›^uN2ýÙÖâ’sÁ°©.â'kYQZûÇ7À%ó6RQÿøM ¼ùBR¹¥´x*ÛïXN¤Ñ®Ø%^ãKñ¯ì§ÿ ÿÿPK!{„øòôword/numbering.xmlìÛnãF†ïØw0 ø2›lžŒ8OZÌb,’Yìµ,sl!:’í™Û¼Ì>Â>V^ayh‰"›ì¦(5­ÿÊ3²šÒ_]Õü\]¬þñço³éÕk­&‹ùý5¹U®¯Âùxñ8™?Ý_ÿûËðëújµÍGÓÅ<¼¿þ®®þéïûñínþ2{£øWñ5æ«»·åøþúy½^Þ «ñs8­ng“q´X-¾®oNjÙ`ñõëdÞÑã@Uˆ’þk-Æáj_ÇÍ_G«ëÍåÆßš]í1½Åƒ“ ÒÁøy­Ãoï× ÜÑöÀ*^hv(m± çñ/¿.¢Ùhÿ7zÌFÑ/Ëâë.GëÉÃd:Y/©ÛË,î¯_¢ùÝæ?ä_%r—}•͏툨ÉçfCüÅøeÎ×é'¢p‡Å|õ¢ÉWØÿÌí7™&ó÷2͎q‰ÎwõàÆ*仄¾¹Ä`õ}öo˧v³ühñ²|¿Ú¤ÝÕ>Íÿȯ•,X×ÚxË®¯Ú}™ßŸGË8”gã»OOóE4z˜Æß(žû«xú®Ò¸J¢äú§x9=¬ÖÑh¼þõevµ÷¿O÷×ñ²¹‹Âx-Ž’³•×ùº#7 ++G$oI®2_Mãᯣiü ++1(±‚áõ ùÍìeºž|_Ãé—ïËpûžôÕiòjö®õl9ÝþÎõ¼À7,+ûÍô5ùÅ$þ±ý¬ô»lßL²wÅ7…á,ñ1Of£Í¥ã‘_ÂoùïnÈmþú?ÇÛW§á×uöòò_Qòc2Oô$/ß_›jb‡»çÑü)½?†–¼w¿9Úü.æëUòÎÉ||‹¯£Xææ­é{éÇeY“Õç§i…ÀµµDÅ<ªDõøooÒYàQIk_f"û˜2µNdÞÞPé”f‹GJootn±”î‹MÕS­Þ¡ÚÛC>ÁF·‚ooÒå…K³¥4'F8¦f³sÍ·7ï75id[§}{cË |°G8ÉX&þ!ü1MÒáF/ð§Ö?‚1ÆQÀàðøSªøÓì~â*„?®­›ÄcâÏó÷‡hòø ‚H0tl_wr«åó•e½œŽãRÅV¥)?<¼L§áÆà…iûëÏÿñÎO4£fvF«ñdrýû÷ÙÃbšub»í½ÀËMEKhé+á|=ZO^ÃædŰ̂Ûo‹‹sSÃx‹—hFW¿†o;Ö)¼:^¾‘ŊVӏoµ¿þü/¯ÝTbˆÙí?ñ»“}³ÕŽÕö_ㅸ¢2G;²¸Nµ ++Lwúˆ£RF\l©#.‹/ù"Žj‚Kø±#ΐ4âtEp)?^ęRFœn ++®Õ'Š8K҈3¨àÞ>â8Ñ7ñ2~ôõì@ó7Ó/’ùóÕµS“ìO¨$™¿ÚéêæïØ‘ùCæ¯q¾€[,2Èüµ×ŒÌ_w²û™ùKþÐåÆBuÍðü ±ñYë€ØøŒ1îˆþ€?ÀŸRÍÀŸîd÷’l?þØA`Ëτ!ûSë€ÈþÆwD€?ÀàO©fàOw²û‰?É~4?þøŽ®h¶— Cö§Ö‘ýŒ1îˆþ€?ÀŸRÍÀŸîd÷’š3~üREц̦õeïï(¶j;¹ÕòùJ’•t%-éŠÌàá¼ñNP’"ٝŸBʤŠu ++ߞØé+üiÓÅ[}×ñ\”+R¹Ü5IÚ©#ßHrÛHúm1ÍËieŠ¢ÉÓ3Oíß)ª8æÐÇÄ$1}Žrϐª³ótU°r2§ÓùÃHe×ÖVÉ©œÎàw:Í., œn§ø¹K§3¹gˆ"ËÂNÁr×NgqKÒ)ûI*T8•ÓÙüNgÂÒPátœ hæf•(šêjh|„H.)¤@ÿáÄ-)¤@ÚkF ++¤;ÙýL$Ì*€?v ;dãàÀŸZþÆwD€?ÀàO©fàOw²{Ú÷1ùüãjÄ5‘þÿäÁ?àðOÕ=B>ÁàŸ.4ƒܜ—„_«$04ÝdV7èü¨RðÈûڐOXjt~LE-ÎõVCçÇZŠ+sì+š뭆Ώµ¸Y4:?æTZ4 :?Ö[ [³¯P×sU 4SÉòcâìkùj`5컩’ûn-ö­·ÚAµ:Øwß@‡µï`ßCö•(âzľREœŒì+UÄÉžE\ØWªˆëû ++µ=W©«ŽÙòÄÓ° xï;¸ù¼²ï™}¬ß†¼*Õ£5ØÚ&ezôñ€({þèc ÿ5D6©}¬!·†<%Ó£PÕïGë¨!µHõèc ¼4D ++©},rÅq}$BÍ¿UÓ÷‰´L€yãx•8V×,àr3`ŲdÀÙ­®·ÁÅgÀê:%\n¬Øo°Fv«kì€ XM›ˆ‹Í€4›@¬™Ý,Á%üô0¡ÖïªghÄ&©&<ùÑadáÉ<ùqF¥xòãXjñäG3Íxò£Ùxò£š„z¿«¾ëQâ´Lþ9–b¾¾y~dwÂdIþ¡ú­ÂqQý†ê·.Õo†Aõ›XÄ¡ú­åÖï "Õo¨~;Kî/q<~ö ¨Itdnüïiΰ"Xœ×ó, VVÍö-؅蚘a.}U]Ìnþ¶y•}m©#NZö¥‚KøÅ°/\Ê?<ûZ‚kõ¥³¯.¸„Ÿž}…¼Ð,â(DÙXGž×:ª„ûÞǖˆ}oì{7SŠ}ocß» ÍØ÷>³r^þ:ñB³ME3?S&šû£ŽMmÇur³å†}ïwp*ZûÞõVþw-Å „}oì{cßûÝn“ûþw…a°ïýÁ÷½Ó•Ÿ}]Í5}‚ÓNzœûÃ3/ÈýÕ@`GJ‘û“A0r]hFîïÌÊyùGè´Í¥6ñÚÖý¹®O ×z_ùó ;Ìý¡ë]©"t½[£ë]*]ïZ8ºÞ¡ë]UJåºÞ©BÇ>h¾®Q?Aä(2‘AI$A9u$ù[P€¼¡aӖIÕ2¨¯Ülù„&Aγ;,\èóÑ+ „+y.½J´À§ýîð¾d­€.ôùèP•<^%\às숓¶J´ÐçÃwý­ä¹ð ++¨øïðsE/ü ++y¡ •¡¡Y›?i…»Þù¾b mš[7ŸWYàW‚bä2ø=ýB {ñ±¬°+Mñ±¬°+Añ±œ°+{ñ±¬°{Æâã}É ++»ÂlòÑa×´åþóRVصŒ¾q¡ ++qA‰íi®ÒòWeH:4dÎôжsûð™^Ñ~m—žémãv9™^Ávn>Ó+Ú¯íÒ3½¢mÜ.&Ó+ÚÎíÃgzEûµ]|¦Wp ?=ü ++oBM%P}Ϻ6GXY(óD™ç•¢ÌóXjQæÙL3Ê<»‘2ÏjþIÀ[€,J[—yR*Žko®²;a²$ÿÐç®ÂqÑç}îº8ô¹«0Œì¥&èsW‹›E¡ÔDêÜú܉E\úÜ qAí¡nzCæÙÆÈý!÷×R"rÈýUü±…Ü_[ÁÈýu¡¹¿3+çå¡3.¨ ¡îgÊÀ?µþ 2îÿ€À?àŸRÍàŸîd÷“´ä ð󏸪ã1|ÿ€ÚwHÀ?àðO©fðOw²{Ê?BçPŸ˜Å9OàðøüSO0ø§ ÍàŸ3+çå¡#¨o(ªN™]ÞØüãªk§Z÷§Jþ©­×ê?ÿ["øüþ©ºGÈ'üӅfðϙ•óòORfÏÏ?Nêžýÿ€ÚwHÀ?àðO©fðOw²{Ê?B]þuªªzà:™2ìÕz ö¿ƒŒ;¤À?àðø§T3ø§;Ù=塯ïºá(Š2Üäoÿ©õ@䃌;¤À?àðø§T3ø§;Ù=å¡Þߺª¯o–1äj=ùÁ ã)ðøüþ)Õ þéNvOùG¨÷·îØT3]<ÿþÉ%‚À?àŸª{„|‚Á?]hÿœY9/ÿõÖ}ÃÓ Û͔‰ž}¢ CU”÷•?Ÿ°Ô*2|\¼Çàð“ ++ÏÅá'ì¦ê¼»—vð±Z¸ïãð“al¹böðCp ?vÄI{ø \Ê?üÁÇDp­¾ôÃO,Á%¼}Äñ¯Póo=Ð}ªØüDò/—ˆä’SÜb‘üCò¯½f$ÿº“ÝÏä_eÜüc˜–bj¦—)Mþ™†eèÄ{¿“çv˜üŠLááÄñÎPMÅxì¾=I S„ʧ‹·0ú®ã¹(W¤r+:XXI:H¦·¤ß³Ñ¼\‘V¦(š<=WK:8N˜ìß*ª@æÐÇÄ$1}ŽrÏPÝá¿U´r2§Ó¹%ÕÛ[…$§r:ƒßéNÔmät‡ NœÎ䞡ºóo«xâdNgñKª9¹¶ŠNåt6¿Ó*[át¼ ÔÛpTÝ ³d=¸D Ô¡·é£´;²ìr?5v)€5$Qi˜Kß,â ++6ùè'é` F;â¤Ý,²6ùèï'ç`Cž;â¤Ý,‚ª´€T¨û¹áj.QI‹ @t8÷ º?`°;RŠ @c° ÍØ<³r^þê~n40¨±1" ++ j=P‚AÆRàðøüSªüӝìžòP÷óx©UÔ¡µ¹‹î}’@ñ4ÝéqÔ¸¨ µRÍ+P+…Z©ª­7ÔJ5t:ÔJ%’P+Ք„º…› ŒÀÝ !ò%Ù[‘/A¾DP&ò%ȗ _‚| ò%§Î—u 7-WÑ]ûEàŸ\"øüþ©ºGÈ'üӅfðϙ•óòP·ð˜¨AìÍSnàŸZÿwHÀ?àðO©fðOw²{Ê?BÝÂM[ñ5Ÿ 3e¢õ2ŠFg³3¹?a©UÐ+ Y´zÔ[ ½j)®h ô ++ȱ¯hô ++¨·zÔâfÑ@èSiÑ4èPo5ô ++h;BÍÂMêa×~58)‡Ë×½Ò `>¯ ++%`ß]K€}뭶þ«}÷ ´Ã¾y€u` ž³¯D×#ö•*âdd_©"N.ö•(âzľRE\ØWO¾'?û–¡´Å)Ùè“uî}oôɾw v¤ûÞ2ƾwš±ï}få‡ü3O¹gžñNv#ރ íW¥éC¶µ‘¥N“¢Y5á5”iT†Ûô[5á7”¥Qeù sÁ`øÊŠ`•å8́ ÇјY‹ +U†ç0mÃpù ÇQY˰ÊpiT–ã°–Û°Æ1¼†~ÑO¿ŒÆÎڛ†£ùËrûF}çYöð§ÿ ÿÿPK!t?9zÂ(customXml/_rels/item1.xml.rels ¢( ŒÏ±ŠÃ0 àýàÞÁhoœÜPʧK)t;JºGILcËXjiß¾æ¦+tè(‰ÿûQ»½…E]1³§h ©jP >N~ûýjŠÅÆÁ.ÑÀ¶ÝçG{ÄÅJ ñì«¢D60‹¤o­ÙÍ,W”0–ËH9X)cžt²îl'Ô_u½Öù¿ݓ©ƒ|Pý=á;6£w¸#w åE…v ++§°üd*ª·yB1àÃߪ©Š ºkõÓÝÿÿPK!œ‡[KâU(customXml/itemProps1.xml ¢$(  œÁjÃ0 †ïƒ½ƒÑݵÛ&]Râ”4i¡×±Â®®ã$†Ø¶S6ÆÞ};uǝÄ'!}?*zDwé¼²†ÁzEI#l«LÏàúvÆ ¸iùhd`,Êç§¢õû–îƒuò¤F±¡b½4 ¾võ)ÍÒlƒÓºÎq²NN¸Ê› ++'yC«-}ÙÏé7 ¨6ñŒg0„0í ñbšû•¤‰ÃÎ:ÍCD×ÛuJÈÆŠYKȆÒsÔëw=B¹äùÝ~•Ä%ÚìÔ-7u•ퟆO eAþ¨~xEùÿÿPK!¿WgCÇ2(customXml/item1.xml ¢$(  ¬ÁjÃ0 †_Åè¾8롌¤ÚÛA¶žvq%1ØR°Õ‘¾ý¼•í vúþýªwkðêcrL <%($˃£©÷·ÓÓ ¨$†㙰bصu_u|‹“ÊqJUßÀ,²TZ';c0©à)ïFŽÁHã¤yÅÛ[@½)Ë­î]ïOÑ,ó²ÿQuèÑ ++Ü}>ûcÿºïÜ*óqp’«^~òްX“ρoðlB†3 êúû“-´µþ+Ü~ÿÿPK!œ€? ‹!docProps/core.xml ¢( |’]oÛ †ï'í?XÜ;€3u«å8R6õª‘ªÕS§Ý18IXl@p×ÿ¾ØŽ¥êvw>Þóï¡X¿4ur´5+ÂŒ$`¤UÚìWäGu—~!I@a”¨­é uùñC!].­‡oxÔ’H2!—nEˆ.§4È4",¢ÂÄæÎúF`Lýž:!b4cì†6€B ´¦n&’3RÉéž}=”¤PCå N/Z߄w†Î_ÊFcçà]éԜÕ/A϶mírÆûsús{ÿ8<5Õ¦÷J) %sÔXCYÐK£ðüûHËscéA õå÷N˜¤:ØèÜ ™ê½ãGèZëUˆÓWY”)Òk‡q#ûªÕµ¸‹ÝiP›®ÜxÏy²V ¬7Ý~ÀÃI÷ߢ¼s:Á¼6ªÌXÆRÎÓìSÅos¶Ìû53'Qq^ÌøPI44íŸ:O˯ߪ;òޛù °9ßúÿÄ,eYÊ?W<âø5qŒ¦^êòÿÿPK!Ī¿]6word/fontTable.xmläW]oÚ0}Ÿ´ÿ`ù½“†¢ÒjeEš´õa¥Ú³I°æÈ6¥üûù#¡PBEèÚjš#¤äØ9ñ=9÷ærqõÈx JS)†0>E‘ÉœŠÙÞOÆ'}´Á"ÇL ++2„+¢áÕåçOËA!…ÑÀÞ/ô€gC87¦D‘Îæ„c}*K"ìd!ÇÆ^ªYıú½(O2ÉKlè”2jVQ‚PV4êY4#_e¶àD¤³ŒRè9-uͶ<„m)U^*™­m̜>Ž©XÓÄé§™’ZæÔSíÈSÙÛcäÏ8{"è´#Hvºš´£èT‘^qòÏßfB*oi„±"ä‹ðÑx•ÕsRà3/=ìŽÌ$÷ßö&`øF‡Ÿmâ> ݤ½MÝÃxÃL{`$EN„&ùF•~–¡l…èžÉÖ0ZȶÙâi&jA&«’‘É>+ÿz í‘ÉqÜ6“í¿,ÉpsÓjzhÿÚێiÿ¶ÙãÑV"‡Q‹õšD®NôåÿÿPK!»_ïword/webSettings.xml”“MoÛ0 †ïö Ý;iÓF“AÑ¡@÷®í]–éD˜$ ++¢×ýõclç£ÍõIäKò1 Ò×7¯Ö$¤ÑÍÄx”‰œÂR»åLOSR+°’FèÁq°Â`ed7,S+Ãßµ?Sh½ŒºÐFÇ&dÙ¥è1á3¬*­àÕڂ‹m}À0­´§­þ ­ÆPú€ ++ˆxk:ž•Úí1㋐Õ* aGž\M¦Óó6¡À²¹mƒi8*Ò­Ê}€*îÔl¯>êåê?òúSq1¢ý s#‹2l­x¨q|‰‚zÛæm /ô¶Bƒ|@r±C˜£Î†Uï:VŽ'Rš†îÌÝÛ.}ÔV¿Á†EÀš t_Óür/?ZOƒõïŸß;ÚÑo:ÿÿÿPK!‚ÏââdocProps/app.xml ¢( œSÁnÛ0 ½Ø?º7²¦-EŐbèa[ÄmϚLÇÂdIT£Ù׏¶OY{šOôÓI±Û×Ng=ø ¬Ùb‘“ Œ´µ2‡ y¬¾^ܐ,Daj¡­ 9B ·üó'¶óց ++B†&lH£[Sd  LÌ4Öw"bèÔ6’pgåK&Ò2ϯ(¼F05Ôn$“⺏ÿ+Z[9ø OÕÑ¡gtN‹üÇð§ft&Xe£Ð•ê€×+LÌ!ۉ^"9!öl}øjY”ŒN˜m[ᅌØ@¾Ì˛%£ þ8§•›Ë¿+ém°MÌFÇÙ ÀhZÂð{/^Å#ÏMCöM™Ñ ž=AôçÅÁ ×~] &çí¥Ð°ÅðFèŒþ%Ø=ˆa¼;¡‹}\÷ £õYP¿qÀ%É~ŠCã6¤^ ÉT6#Ö.DÏ+5jÏñÓ²«K^ŒÎ Ç`ô€øÜÝxBxhðnñ³Ejvô0YMì¤ÎNgü£ºµ[Lg„þ]eï†yëá9™ŒþYÅvï„Ä¡,W«ò2]‚$ÇöÈBS§2ìïàõpþkPŸjÞ'†µzšž,/®9~ã8\…ù-ñ?ÿÿPK-!/Û%¤ã[Content_Types].xmlPK-!‘·ïN Ý_rels/.relsPK-!$R‘”–ýword/_rels/document.xml.relsPK-!¸õbwÖÑê4 Ó word/document.xmlPK-!øÀ()¿ØÛword/endnotes.xmlPK-!¤ÎÓCž0Þword/footer1.xmlPK-!Ljz† ¡áword/footnotes.xmlPK- ++!Õ:ǵúîúîWåword/media/image1.pngPK- ++!|Z<??„Ôword/media/image2.pngPK-!ìL8ÓÉ ¿word/theme/theme1.xmlPK-!Îc À«òGÅword/settings.xmlPK-!ëU0YݹŸ*word/styles.xmlPK-!{„øòôÒ>word/numbering.xmlPK-!t?9zÂ(PcustomXml/_rels/item1.xml.relsPK-!œ‡[KâU RcustomXml/itemProps1.xmlPK-!¿WgCÇ2IScustomXml/item1.xmlPK-!œ€? ‹!iTdocProps/core.xmlPK-!Ī¿]6+Wword/fontTable.xmlPK-!»_ïkZword/webSettings.xmlPK-!‚Ïââ4\docProps/app.xmlPKL_ +\ No newline at end of file +diff --git a/src/crypto/fipsmodule/rand/internal.h b/src/crypto/fipsmodule/rand/internal.h +index bbeef76..eccf047 100644 +--- a/src/crypto/fipsmodule/rand/internal.h ++++ b/src/crypto/fipsmodule/rand/internal.h +@@ -143,15 +143,14 @@ OPENSSL_EXPORT void CTR_DRBG_clear(CTR_DRBG_STATE *drbg); + #if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) + + OPENSSL_INLINE int have_rdrand(void) { +- return (OPENSSL_ia32cap_get()[1] & (1u << 30)) != 0; ++ return CRYPTO_is_RDRAND_capable(); + } + + // have_fast_rdrand returns true if RDRAND is supported and it's reasonably + // fast. Concretely the latter is defined by whether the chip is Intel (fast) or + // not (assumed slow). + OPENSSL_INLINE int have_fast_rdrand(void) { +- const uint32_t *const ia32cap = OPENSSL_ia32cap_get(); +- return (ia32cap[1] & (1u << 30)) && (ia32cap[0] & (1u << 30)); ++ return CRYPTO_is_RDRAND_capable() && CRYPTO_is_intel_cpu(); + } + + // CRYPTO_rdrand writes eight bytes of random data from the hardware RNG to +diff --git a/src/crypto/fipsmodule/rand/rand.c b/src/crypto/fipsmodule/rand/rand.c +index 9c54fc5..357be39 100644 +--- a/src/crypto/fipsmodule/rand/rand.c ++++ b/src/crypto/fipsmodule/rand/rand.c +@@ -170,11 +170,11 @@ void CRYPTO_get_seed_entropy(uint8_t *out_entropy, size_t out_entropy_len, + CRYPTO_sysrand_for_seed(out_entropy, out_entropy_len); + } + +-#if defined(BORINGSSL_FIPS_BREAK_CRNG) +- // This breaks the "continuous random number generator test" defined in FIPS +- // 140-2, section 4.9.2, and implemented in |rand_get_seed|. +- OPENSSL_memset(out_entropy, 0, out_entropy_len); +-#endif ++ if (boringssl_fips_break_test("CRNG")) { ++ // This breaks the "continuous random number generator test" defined in FIPS ++ // 140-2, section 4.9.2, and implemented in |rand_get_seed|. ++ OPENSSL_memset(out_entropy, 0, out_entropy_len); ++ } + } + + // In passive entropy mode, entropy is supplied from outside of the module via +diff --git a/src/crypto/fipsmodule/rsa/internal.h b/src/crypto/fipsmodule/rsa/internal.h +index d9d6fac..1cb3b5f 100644 +--- a/src/crypto/fipsmodule/rsa/internal.h ++++ b/src/crypto/fipsmodule/rsa/internal.h +@@ -124,6 +124,28 @@ extern const BN_ULONG kBoringSSLRSASqrtTwo[]; + extern const size_t kBoringSSLRSASqrtTwoLen; + + ++// Functions that avoid self-tests. ++// ++// Self-tests need to call functions that don't try and ensure that the ++// self-tests have passed. These functions, in turn, need to limit themselves ++// to such functions too. ++// ++// These functions are the same as their public versions, but skip the self-test ++// check. ++ ++int rsa_verify_no_self_test(int hash_nid, const uint8_t *digest, ++ size_t digest_len, const uint8_t *sig, ++ size_t sig_len, RSA *rsa); ++ ++int rsa_verify_raw_no_self_test(RSA *rsa, size_t *out_len, uint8_t *out, ++ size_t max_out, const uint8_t *in, ++ size_t in_len, int padding); ++ ++int rsa_sign_no_self_test(int hash_nid, const uint8_t *digest, ++ unsigned digest_len, uint8_t *out, unsigned *out_len, ++ RSA *rsa); ++ ++ + #if defined(__cplusplus) + } // extern C + #endif +diff --git a/src/crypto/fipsmodule/rsa/rsa.c b/src/crypto/fipsmodule/rsa/rsa.c +index 3205d7d..733e7fa 100644 +--- a/src/crypto/fipsmodule/rsa/rsa.c ++++ b/src/crypto/fipsmodule/rsa/rsa.c +@@ -303,8 +303,9 @@ int RSA_public_encrypt(size_t flen, const uint8_t *from, uint8_t *to, RSA *rsa, + return out_len; + } + +-int RSA_sign_raw(RSA *rsa, size_t *out_len, uint8_t *out, size_t max_out, +- const uint8_t *in, size_t in_len, int padding) { ++static int rsa_sign_raw_no_self_test(RSA *rsa, size_t *out_len, uint8_t *out, ++ size_t max_out, const uint8_t *in, ++ size_t in_len, int padding) { + if (rsa->meth->sign_raw) { + return rsa->meth->sign_raw(rsa, out_len, out, max_out, in, in_len, padding); + } +@@ -312,6 +313,13 @@ int RSA_sign_raw(RSA *rsa, size_t *out_len, uint8_t *out, size_t max_out, + return rsa_default_sign_raw(rsa, out_len, out, max_out, in, in_len, padding); + } + ++int RSA_sign_raw(RSA *rsa, size_t *out_len, uint8_t *out, size_t max_out, ++ const uint8_t *in, size_t in_len, int padding) { ++ boringssl_ensure_rsa_self_test(); ++ return rsa_sign_raw_no_self_test(rsa, out_len, out, max_out, in, in_len, ++ padding); ++} ++ + int RSA_private_encrypt(size_t flen, const uint8_t *from, uint8_t *to, RSA *rsa, + int padding) { + size_t out_len; +@@ -523,8 +531,9 @@ int RSA_add_pkcs1_prefix(uint8_t **out_msg, size_t *out_msg_len, + return 0; + } + +-int RSA_sign(int hash_nid, const uint8_t *digest, unsigned digest_len, +- uint8_t *out, unsigned *out_len, RSA *rsa) { ++int rsa_sign_no_self_test(int hash_nid, const uint8_t *digest, ++ unsigned digest_len, uint8_t *out, unsigned *out_len, ++ RSA *rsa) { + const unsigned rsa_size = RSA_size(rsa); + int ret = 0; + uint8_t *signed_msg = NULL; +@@ -539,8 +548,9 @@ int RSA_sign(int hash_nid, const uint8_t *digest, unsigned digest_len, + if (!RSA_add_pkcs1_prefix(&signed_msg, &signed_msg_len, + &signed_msg_is_alloced, hash_nid, digest, + digest_len) || +- !RSA_sign_raw(rsa, &size_t_out_len, out, rsa_size, signed_msg, +- signed_msg_len, RSA_PKCS1_PADDING)) { ++ !rsa_sign_raw_no_self_test(rsa, &size_t_out_len, out, rsa_size, ++ signed_msg, signed_msg_len, ++ RSA_PKCS1_PADDING)) { + goto err; + } + +@@ -554,6 +564,13 @@ err: + return ret; + } + ++int RSA_sign(int hash_nid, const uint8_t *digest, unsigned digest_len, ++ uint8_t *out, unsigned *out_len, RSA *rsa) { ++ boringssl_ensure_rsa_self_test(); ++ ++ return rsa_sign_no_self_test(hash_nid, digest, digest_len, out, out_len, rsa); ++} ++ + int RSA_sign_pss_mgf1(RSA *rsa, size_t *out_len, uint8_t *out, size_t max_out, + const uint8_t *digest, size_t digest_len, + const EVP_MD *md, const EVP_MD *mgf1_md, int salt_len) { +@@ -577,8 +594,9 @@ int RSA_sign_pss_mgf1(RSA *rsa, size_t *out_len, uint8_t *out, size_t max_out, + return ret; + } + +-int RSA_verify(int hash_nid, const uint8_t *digest, size_t digest_len, +- const uint8_t *sig, size_t sig_len, RSA *rsa) { ++int rsa_verify_no_self_test(int hash_nid, const uint8_t *digest, ++ size_t digest_len, const uint8_t *sig, ++ size_t sig_len, RSA *rsa) { + if (rsa->n == NULL || rsa->e == NULL) { + OPENSSL_PUT_ERROR(RSA, RSA_R_VALUE_MISSING); + return 0; +@@ -602,12 +620,9 @@ int RSA_verify(int hash_nid, const uint8_t *digest, size_t digest_len, + return 0; + } + +- if (!RSA_verify_raw(rsa, &len, buf, rsa_size, sig, sig_len, +- RSA_PKCS1_PADDING)) { +- goto out; +- } +- +- if (!RSA_add_pkcs1_prefix(&signed_msg, &signed_msg_len, ++ if (!rsa_verify_raw_no_self_test(rsa, &len, buf, rsa_size, sig, sig_len, ++ RSA_PKCS1_PADDING) || ++ !RSA_add_pkcs1_prefix(&signed_msg, &signed_msg_len, + &signed_msg_is_alloced, hash_nid, digest, + digest_len)) { + goto out; +@@ -630,6 +645,13 @@ out: + return ret; + } + ++int RSA_verify(int hash_nid, const uint8_t *digest, size_t digest_len, ++ const uint8_t *sig, size_t sig_len, RSA *rsa) { ++ boringssl_ensure_rsa_self_test(); ++ return rsa_verify_no_self_test(hash_nid, digest, digest_len, sig, sig_len, ++ rsa); ++} ++ + int RSA_verify_pss_mgf1(RSA *rsa, const uint8_t *digest, size_t digest_len, + const EVP_MD *md, const EVP_MD *mgf1_md, int salt_len, + const uint8_t *sig, size_t sig_len) { +@@ -905,9 +927,9 @@ int RSA_check_fips(RSA *key) { + ret = 0; + goto cleanup; + } +-#if defined(BORINGSSL_FIPS_BREAK_RSA_PWCT) +- data[0] = ~data[0]; +-#endif ++ if (boringssl_fips_break_test("RSA_PWCT")) { ++ data[0] = ~data[0]; ++ } + if (!RSA_verify(NID_sha256, data, sizeof(data), sig, sig_len, key)) { + OPENSSL_PUT_ERROR(RSA, ERR_R_INTERNAL_ERROR); + ret = 0; +diff --git a/src/crypto/fipsmodule/rsa/rsa_impl.c b/src/crypto/fipsmodule/rsa/rsa_impl.c +index a6865c0..1046f35 100644 +--- a/src/crypto/fipsmodule/rsa/rsa_impl.c ++++ b/src/crypto/fipsmodule/rsa/rsa_impl.c +@@ -261,6 +261,8 @@ size_t rsa_default_size(const RSA *rsa) { + + int RSA_encrypt(RSA *rsa, size_t *out_len, uint8_t *out, size_t max_out, + const uint8_t *in, size_t in_len, int padding) { ++ boringssl_ensure_rsa_self_test(); ++ + if (!rsa_check_public_key(rsa)) { + return 0; + } +@@ -528,6 +530,8 @@ err: + + int rsa_default_decrypt(RSA *rsa, size_t *out_len, uint8_t *out, size_t max_out, + const uint8_t *in, size_t in_len, int padding) { ++ boringssl_ensure_rsa_self_test(); ++ + const unsigned rsa_size = RSA_size(rsa); + uint8_t *buf = NULL; + int ret = 0; +@@ -593,8 +597,9 @@ err: + + static int mod_exp(BIGNUM *r0, const BIGNUM *I, RSA *rsa, BN_CTX *ctx); + +-int RSA_verify_raw(RSA *rsa, size_t *out_len, uint8_t *out, size_t max_out, +- const uint8_t *in, size_t in_len, int padding) { ++int rsa_verify_raw_no_self_test(RSA *rsa, size_t *out_len, uint8_t *out, ++ size_t max_out, const uint8_t *in, ++ size_t in_len, int padding) { + if (!rsa_check_public_key(rsa)) { + return 0; + } +@@ -686,6 +691,14 @@ err: + return ret; + } + ++int RSA_verify_raw(RSA *rsa, size_t *out_len, uint8_t *out, ++ size_t max_out, const uint8_t *in, ++ size_t in_len, int padding) { ++ boringssl_ensure_rsa_self_test(); ++ return rsa_verify_raw_no_self_test(rsa, out_len, out, max_out, in, in_len, ++ padding); ++} ++ + int rsa_default_private_transform(RSA *rsa, uint8_t *out, const uint8_t *in, + size_t len) { + if (rsa->n == NULL || rsa->d == NULL) { +@@ -1324,6 +1337,8 @@ static void replace_bn_mont_ctx(BN_MONT_CTX **out, BN_MONT_CTX **in) { + static int RSA_generate_key_ex_maybe_fips(RSA *rsa, int bits, + const BIGNUM *e_value, BN_GENCB *cb, + int check_fips) { ++ boringssl_ensure_rsa_self_test(); ++ + RSA *tmp = NULL; + uint32_t err; + int ret = 0; +diff --git a/src/crypto/fipsmodule/self_check/self_check.c b/src/crypto/fipsmodule/self_check/self_check.c +index 94f2da7..b7cd868 100644 +--- a/src/crypto/fipsmodule/self_check/self_check.c ++++ b/src/crypto/fipsmodule/self_check/self_check.c +@@ -20,17 +20,18 @@ + #include + #include + #include +-#include + #include + #include + #include + #include + #include ++#include + #include + #include + #include + + #include "../../internal.h" ++#include "../dh/internal.h" + #include "../ec/internal.h" + #include "../ecdsa/internal.h" + #include "../rand/internal.h" +@@ -47,21 +48,6 @@ int BORINGSSL_self_test(void) { + + #else + +-#if defined(BORINGSSL_FIPS) && defined(OPENSSL_ANDROID) +-// FIPS builds on Android will test for flag files, named after the module hash, +-// in /dev/boringssl/selftest/. If such a flag file exists, it's assumed that +-// self-tests have already passed and thus do not need to be repeated. (The +-// integrity tests always run, however.) +-// +-// If self-tests complete successfully and the environment variable named in +-// |kFlagWriteEnableEnvVar| is present, then the flag file will be created. The +-// flag file isn't written without the environment variable being set in order +-// to avoid SELinux violations on Android. +-#define BORINGSSL_FIPS_SELF_TEST_FLAG_FILE +-static const char kFlagPrefix[] = "/dev/boringssl/selftest/"; +-static const char kFlagWriteEnableEnvVar[] = "BORINGSSL_SELF_TEST_CREATE_FLAG"; +-#endif +- + static void hexdump(const uint8_t *in, size_t len) { + for (size_t i = 0; i < len; i++) { + fprintf(stderr, "%02x", in[i]); +@@ -71,7 +57,7 @@ static void hexdump(const uint8_t *in, size_t len) { + static int check_test(const void *expected, const void *actual, + size_t expected_len, const char *name) { + if (OPENSSL_memcmp(actual, expected, expected_len) != 0) { +- fprintf(stderr, "%s failed.\nExpected: ", name); ++ fprintf(stderr, "%s failed.\nExpected: ", name); + hexdump(expected, expected_len); + fprintf(stderr, "\nCalculated: "); + hexdump(actual, expected_len); +@@ -87,6 +73,28 @@ static int set_bignum(BIGNUM **out, const uint8_t *in, size_t len) { + return *out != NULL; + } + ++static int serialize_ecdsa_sig(uint8_t *out, size_t out_len, ++ const ECDSA_SIG *sig) { ++ if ((out_len & 1) || // ++ !BN_bn2bin_padded(out, out_len / 2, sig->r) || ++ !BN_bn2bin_padded(out + out_len / 2, out_len / 2, sig->s)) { ++ return 0; ++ } ++ return 1; ++} ++ ++static ECDSA_SIG *parse_ecdsa_sig(const uint8_t *in, size_t in_len) { ++ ECDSA_SIG *ret = ECDSA_SIG_new(); ++ if (!ret || // ++ (in_len & 1) || ++ BN_bin2bn(in, in_len/2, ret->r) == NULL || ++ BN_bin2bn(in + in_len/2, in_len/2, ret->s) == NULL) { ++ ECDSA_SIG_free(ret); ++ ret = NULL; ++ } ++ return ret; ++} ++ + static RSA *self_test_rsa_key(void) { + static const uint8_t kN[] = { + 0xd3, 0x3a, 0x62, 0x9f, 0x07, 0x77, 0xb0, 0x18, 0xf3, 0xff, 0xfe, 0xcc, +@@ -289,195 +297,185 @@ err: + return NULL; + } + +-#if defined(OPENSSL_ANDROID) +-#define MODULE_DIGEST_SIZE SHA256_DIGEST_LENGTH +-#else +-#define MODULE_DIGEST_SIZE SHA512_DIGEST_LENGTH +-#endif + +-int boringssl_fips_self_test( +- const uint8_t *module_hash, size_t module_hash_len) { +-#if defined(BORINGSSL_FIPS_SELF_TEST_FLAG_FILE) +- char flag_path[sizeof(kFlagPrefix) + 2 * MODULE_DIGEST_SIZE]; +- if (module_hash_len != 0) { +- if (module_hash_len != MODULE_DIGEST_SIZE) { +- fprintf(stderr, +- "module hash of length %zu does not match expected length %d\n", +- module_hash_len, MODULE_DIGEST_SIZE); +- BORINGSSL_FIPS_abort(); +- } +- +- // Test whether the flag file exists. +- memcpy(flag_path, kFlagPrefix, sizeof(kFlagPrefix) - 1); +- static const char kHexTable[17] = "0123456789abcdef"; +- for (size_t i = 0; i < MODULE_DIGEST_SIZE; i++) { +- flag_path[sizeof(kFlagPrefix) - 1 + 2 * i] = +- kHexTable[module_hash[i] >> 4]; +- flag_path[sizeof(kFlagPrefix) - 1 + 2 * i + 1] = +- kHexTable[module_hash[i] & 15]; +- } +- flag_path[sizeof(flag_path) - 1] = 0; +- +- if (access(flag_path, F_OK) == 0) { +- // Flag file found. Skip self-tests. +- return 1; +- } +- } +-#endif // BORINGSSL_FIPS_SELF_TEST_FLAG_FILE ++// Lazy self-tests ++// ++// Self tests that are slow are deferred until the corresponding algorithm is ++// actually exercised, in FIPS mode. (In non-FIPS mode these tests are only run ++// when requested by |BORINGSSL_self_test|.) + +- static const uint8_t kAESKey[16] = "BoringCrypto Key"; +- static const uint8_t kAESIV[16] = {0}; +- static const uint8_t kPlaintext[64] = +- "BoringCryptoModule FIPS KAT Encryption and Decryption Plaintext!"; +- static const uint8_t kAESCBCCiphertext[64] = { +- 0x87, 0x2d, 0x98, 0xc2, 0xcc, 0x31, 0x5b, 0x41, 0xe0, 0xfa, 0x7b, +- 0x0a, 0x71, 0xc0, 0x42, 0xbf, 0x4f, 0x61, 0xd0, 0x0d, 0x58, 0x8c, +- 0xf7, 0x05, 0xfb, 0x94, 0x89, 0xd3, 0xbc, 0xaa, 0x1a, 0x50, 0x45, +- 0x1f, 0xc3, 0x8c, 0xb8, 0x98, 0x86, 0xa3, 0xe3, 0x6c, 0xfc, 0xad, +- 0x3a, 0xb5, 0x59, 0x27, 0x7d, 0x21, 0x07, 0xca, 0x4c, 0x1d, 0x55, +- 0x34, 0xdd, 0x5a, 0x2d, 0xc4, 0xb4, 0xf5, 0xa8, +-#if !defined(BORINGSSL_FIPS_BREAK_AES_CBC) +- 0x35 +-#else +- 0x00 +-#endif ++static int boringssl_self_test_rsa(void) { ++ int ret = 0; ++ uint8_t output[256]; ++ ++ RSA *const rsa_key = self_test_rsa_key(); ++ if (rsa_key == NULL) { ++ fprintf(stderr, "RSA key construction failed\n"); ++ goto err; ++ } ++ ++ // RSA Sign KAT ++ ++ static const uint8_t kRSASignDigest[32] = { ++ 0xd2, 0xb5, 0x6e, 0x53, 0x30, 0x6f, 0x72, 0x0d, 0x79, 0x29, 0xd8, ++ 0x70, 0x8b, 0xf4, 0x6f, 0x1c, 0x22, 0x30, 0x03, 0x05, 0x58, 0x2b, ++ 0x11, 0x5b, 0xed, 0xca, 0xc7, 0x22, 0xd8, 0xaa, 0x5a, 0xb2, + }; +- static const uint8_t kAESGCMCiphertext[80] = { +- 0x4a, 0xd8, 0xe7, 0x7d, 0x78, 0xd7, 0x7d, 0x5e, 0xb2, 0x11, 0xb6, 0xc9, +- 0xa4, 0xbc, 0xb2, 0xae, 0xbe, 0x93, 0xd1, 0xb7, 0xfe, 0x65, 0xc1, 0x82, +- 0x2a, 0xb6, 0x71, 0x5f, 0x1a, 0x7c, 0xe0, 0x1b, 0x2b, 0xe2, 0x53, 0xfa, +- 0xa0, 0x47, 0xfa, 0xd7, 0x8f, 0xb1, 0x4a, 0xc4, 0xdc, 0x89, 0xf9, 0xb4, +- 0x14, 0x4d, 0xde, 0x95, 0xea, 0x29, 0x69, 0x76, 0x81, 0xa3, 0x5c, 0x33, +- 0xd8, 0x37, 0xd8, 0xfa, 0x47, 0x19, 0x46, 0x2f, 0xf1, 0x90, 0xb7, 0x61, +- 0x8f, 0x6f, 0xdd, 0x31, 0x3f, 0x6a, 0x64, +-#if !defined(BORINGSSL_FIPS_BREAK_AES_GCM) +- 0x0d +-#else +- 0x00 +-#endif ++ static const uint8_t kRSASignSignature[256] = { ++ 0x64, 0xce, 0xdd, 0x91, 0x27, 0xb0, 0x4f, 0xb9, 0x14, 0xea, 0xc0, 0xb4, ++ 0xa2, 0x06, 0xc5, 0xd8, 0x40, 0x0f, 0x6c, 0x54, 0xac, 0xf7, 0x02, 0xde, ++ 0x26, 0xbb, 0xfd, 0x33, 0xe5, 0x2f, 0x4d, 0xb1, 0x53, 0xc4, 0xff, 0xd0, ++ 0x5f, 0xea, 0x15, 0x89, 0x83, 0x4c, 0xe3, 0x80, 0x0b, 0xe9, 0x13, 0x82, ++ 0x1d, 0x71, 0x92, 0x1a, 0x03, 0x60, 0x2c, 0xaf, 0xe2, 0x16, 0xc7, 0x43, ++ 0x3f, 0xde, 0x6b, 0x94, 0xfd, 0x6e, 0x08, 0x7b, 0x11, 0xf1, 0x34, 0x52, ++ 0xe5, 0xc0, 0x97, 0x66, 0x4a, 0xe0, 0x91, 0x45, 0xc8, 0xb1, 0x3d, 0x6a, ++ 0x54, 0xc1, 0x32, 0x0f, 0x32, 0xad, 0x25, 0x11, 0x3e, 0x49, 0xad, 0x41, ++ 0xce, 0x7b, 0xca, 0x95, 0x6b, 0x54, 0x5e, 0x86, 0x1b, 0xce, 0xfa, 0x2a, ++ 0x60, 0xe8, 0xfa, 0xbb, 0x23, 0xb2, 0x41, 0xbc, 0x7c, 0x98, 0xec, 0x73, ++ 0x20, 0xed, 0xb3, 0xcf, 0xab, 0x07, 0x24, 0x85, 0x6a, 0x2a, 0x61, 0x76, ++ 0x28, 0xf8, 0x00, 0x80, 0xeb, 0xd9, 0x3a, 0x63, 0xe2, 0x01, 0xb1, 0xee, ++ 0x6d, 0xe9, 0x73, 0xe9, 0xb6, 0x75, 0x2e, 0xf9, 0x81, 0xd9, 0xa8, 0x79, ++ 0xf6, 0x8f, 0xe3, 0x02, 0x7d, 0xf6, 0xea, 0xdc, 0x35, 0xe4, 0x62, 0x0d, ++ 0x91, 0xba, 0x3e, 0x7d, 0x8b, 0x82, 0xbf, 0x15, 0x74, 0x6a, 0x4e, 0x29, ++ 0xf8, 0x9b, 0x2c, 0x94, 0x8d, 0xa7, 0x00, 0x4d, 0x7b, 0xbf, 0x35, 0x07, ++ 0xeb, 0xdd, 0x10, 0xef, 0xd5, 0x2f, 0xe6, 0x98, 0x4b, 0x7e, 0x24, 0x80, ++ 0xe2, 0x01, 0xf2, 0x66, 0xb7, 0xd3, 0x93, 0xfe, 0x2a, 0xb3, 0x74, 0xed, ++ 0xec, 0x4b, 0xb1, 0x5f, 0x5f, 0xee, 0x85, 0x44, 0xa7, 0x26, 0xdf, 0xc1, ++ 0x2e, 0x7a, 0xf3, 0xa5, 0x8f, 0xf8, 0x64, 0xda, 0x65, 0xad, 0x91, 0xe2, ++ 0x90, 0x94, 0x20, 0x16, 0xb8, 0x61, 0xa5, 0x0a, 0x7d, 0xb4, 0xbf, 0xc0, ++ 0x10, 0xaf, 0x72, 0x67, + }; +- static const DES_cblock kDESKey1 = {"BCMDESK1"}; +- static const DES_cblock kDESKey2 = {"BCMDESK2"}; +- static const DES_cblock kDESKey3 = {"BCMDESK3"}; +- static const DES_cblock kDESIV = {"BCMDESIV"}; +- static const uint8_t kDESCiphertext[64] = { +- 0xa4, 0x30, 0x7a, 0x4c, 0x1f, 0x60, 0x16, 0xd7, 0x4f, 0x41, 0xe1, +- 0xbb, 0x27, 0xc4, 0x27, 0x37, 0xd4, 0x7f, 0xb9, 0x10, 0xf8, 0xbc, +- 0xaf, 0x93, 0x91, 0xb8, 0x88, 0x24, 0xb1, 0xf6, 0xf8, 0xbd, 0x31, +- 0x96, 0x06, 0x76, 0xde, 0x32, 0xcd, 0x29, 0x29, 0xba, 0x70, 0x5f, +- 0xea, 0xc0, 0xcb, 0xde, 0xc7, 0x75, 0x90, 0xe0, 0x0f, 0x5e, 0x2c, +- 0x0d, 0x49, 0x20, 0xd5, 0x30, 0x83, 0xf8, 0x08, +-#if !defined(BORINGSSL_FIPS_BREAK_DES) +- 0x5a +-#else +- 0x00 +-#endif ++ ++ unsigned sig_len; ++ if (!rsa_sign_no_self_test(NID_sha256, kRSASignDigest, sizeof(kRSASignDigest), ++ output, &sig_len, rsa_key) || ++ !check_test(kRSASignSignature, output, sizeof(kRSASignSignature), ++ "RSA-sign KAT")) { ++ fprintf(stderr, "RSA signing test failed.\n"); ++ goto err; ++ } ++ ++ // RSA Verify KAT ++ ++ static const uint8_t kRSAVerifyDigest[32] = { ++ 0x09, 0x65, 0x2f, 0xd8, 0xed, 0x9d, 0xc2, 0x6d, 0xbc, 0xbf, 0xf2, ++ 0xa7, 0xa5, 0xed, 0xe1, 0x37, 0x13, 0x78, 0x21, 0x36, 0xcf, 0x8d, ++ 0x22, 0x3d, 0xab, 0x93, 0xb4, 0x12, 0xa8, 0xb5, 0x15, 0x53, + }; +- static const uint8_t kPlaintextSHA1[20] = { +- 0xc6, 0xf8, 0xc9, 0x63, 0x1c, 0x14, 0x23, 0x62, 0x9b, 0xbd, +- 0x55, 0x82, 0xf4, 0xd6, 0x1d, 0xf2, 0xab, 0x7d, 0xc8, +-#if !defined(BORINGSSL_FIPS_BREAK_SHA_1) +- 0x28 +-#else +- 0x00 +-#endif ++ static const uint8_t kRSAVerifySignature[256] = { ++ 0xab, 0xe2, 0xcb, 0xc1, 0x3d, 0x6b, 0xd3, 0x9d, 0x48, 0xdb, 0x53, 0x34, ++ 0xdd, 0xbf, 0x8d, 0x07, 0x0a, 0x93, 0xbd, 0xcb, 0x10, 0x4e, 0x2c, 0xc5, ++ 0xd0, 0xee, 0x48, 0x6e, 0xe2, 0x95, 0xf6, 0xb3, 0x1b, 0xda, 0x12, 0x6c, ++ 0x41, 0x89, 0x0b, 0x98, 0xb7, 0x3e, 0x70, 0xe6, 0xb6, 0x5d, 0x82, 0xf9, ++ 0x5c, 0x66, 0x31, 0x21, 0x75, 0x5a, 0x90, 0x74, 0x4c, 0x8d, 0x1c, 0x21, ++ 0x14, 0x8a, 0x19, 0x60, 0xbe, 0x0e, 0xca, 0x44, 0x6e, 0x9f, 0xf4, 0x97, ++ 0xf1, 0x34, 0x5c, 0x53, 0x7e, 0xf8, 0x11, 0x9b, 0x9a, 0x43, 0x98, 0xe9, ++ 0x5c, 0x5c, 0x6d, 0xe2, 0xb1, 0xc9, 0x55, 0x90, 0x5c, 0x52, 0x99, 0xd8, ++ 0xce, 0x7a, 0x3b, 0x6a, 0xb7, 0x63, 0x80, 0xd9, 0xba, 0xbd, 0xd1, 0x5f, ++ 0x61, 0x02, 0x37, 0xe1, 0xf3, 0xf2, 0xaa, 0x1c, 0x1f, 0x1e, 0x77, 0x0b, ++ 0x62, 0xfb, 0xb5, 0x96, 0x38, 0x1b, 0x2e, 0xbd, 0xd7, 0x7e, 0xce, 0xf9, ++ 0xc9, 0x0d, 0x4c, 0x92, 0xf7, 0xb6, 0xb0, 0x5f, 0xed, 0x29, 0x36, 0x28, ++ 0x5f, 0xa9, 0x48, 0x26, 0xe6, 0x20, 0x55, 0x32, 0x2a, 0x33, 0xb6, 0xf0, ++ 0x4c, 0x74, 0xce, 0x69, 0xe5, 0xd8, 0xd7, 0x37, 0xfb, 0x83, 0x8b, 0x79, ++ 0xd2, 0xd4, 0x8e, 0x3d, 0xaf, 0x71, 0x38, 0x75, 0x31, 0x88, 0x25, 0x31, ++ 0xa9, 0x5a, 0xc9, 0x64, 0xd0, 0x2e, 0xa4, 0x13, 0xbf, 0x85, 0x95, 0x29, ++ 0x82, 0xbb, 0xc0, 0x89, 0x52, 0x7d, 0xaf, 0xf5, 0xb8, 0x45, 0xc9, 0xa0, ++ 0xf4, 0xd1, 0x4e, 0xf1, 0x95, 0x6d, 0x9c, 0x3a, 0xca, 0xe8, 0x82, 0xd1, ++ 0x2d, 0xa6, 0x6d, 0xa0, 0xf3, 0x57, 0x94, 0xf5, 0xee, 0x32, 0x23, 0x23, ++ 0x33, 0x51, 0x7d, 0xb9, 0x31, 0x52, 0x32, 0xa1, 0x83, 0xb9, 0x91, 0x65, ++ 0x4d, 0xbe, 0xa4, 0x16, 0x15, 0x34, 0x5c, 0x88, 0x53, 0x25, 0x92, 0x67, ++ 0x44, 0xa5, 0x39, 0x15, + }; +- static const uint8_t kPlaintextSHA256[32] = { +- 0x37, 0xbd, 0x70, 0x53, 0x72, 0xfc, 0xd4, 0x03, 0x79, 0x70, 0xfb, +- 0x06, 0x95, 0xb1, 0x2a, 0x82, 0x48, 0xe1, 0x3e, 0xf2, 0x33, 0xfb, +- 0xef, 0x29, 0x81, 0x22, 0x45, 0x40, 0x43, 0x70, 0xce, +-#if !defined(BORINGSSL_FIPS_BREAK_SHA_256) +- 0x0f +-#else +- 0x00 +-#endif +- }; +- static const uint8_t kPlaintextSHA512[64] = { +- 0x08, 0x6a, 0x1c, 0x84, 0x61, 0x9d, 0x8e, 0xb3, 0xc0, 0x97, 0x4e, +- 0xa1, 0x9f, 0x9c, 0xdc, 0xaf, 0x3b, 0x5c, 0x31, 0xf0, 0xf2, 0x74, +- 0xc3, 0xbd, 0x6e, 0xd6, 0x1e, 0xb2, 0xbb, 0x34, 0x74, 0x72, 0x5c, +- 0x51, 0x29, 0x8b, 0x87, 0x3a, 0xa3, 0xf2, 0x25, 0x23, 0xd4, 0x1c, +- 0x82, 0x1b, 0xfe, 0xd3, 0xc6, 0xee, 0xb5, 0xd6, 0xaf, 0x07, 0x7b, +- 0x98, 0xca, 0xa7, 0x01, 0xf3, 0x94, 0xf3, 0x68, +-#if !defined(BORINGSSL_FIPS_BREAK_SHA_512) +- 0x14 +-#else +- 0x00 +-#endif ++ if (!rsa_verify_no_self_test(NID_sha256, kRSAVerifyDigest, ++ sizeof(kRSAVerifyDigest), kRSAVerifySignature, ++ sizeof(kRSAVerifySignature), rsa_key)) { ++ fprintf(stderr, "RSA-verify KAT failed.\n"); ++ goto err; ++ } ++ ++ ret = 1; ++ ++err: ++ RSA_free(rsa_key); ++ ++ return ret; ++} ++ ++static int boringssl_self_test_ecc(void) { ++ int ret = 0; ++ EC_KEY *ec_key = NULL; ++ EC_GROUP *ec_group = NULL; ++ EC_POINT *ec_point_in = NULL; ++ EC_POINT *ec_point_out = NULL; ++ BIGNUM *ec_scalar = NULL; ++ ECDSA_SIG *sig = NULL; ++ ++ ec_key = self_test_ecdsa_key(); ++ if (ec_key == NULL) { ++ fprintf(stderr, "ECDSA KeyGen failed\n"); ++ goto err; ++ } ++ ++ // ECDSA Sign/Verify KAT ++ ++ static const uint8_t kECDSASignDigest[32] = { ++ 0x1e, 0x35, 0x93, 0x0b, 0xe8, 0x60, 0xd0, 0x94, 0x2c, 0xa7, 0xbb, ++ 0xd6, 0xf6, 0xde, 0xd8, 0x7f, 0x15, 0x7e, 0x4d, 0xe2, 0x4f, 0x81, ++ 0xed, 0x4b, 0x87, 0x5c, 0x0e, 0x01, 0x8e, 0x89, 0xa8, 0x1f, + }; +- static const uint8_t kRSASignature[256] = { +- 0x62, 0x66, 0x4b, 0xe3, 0xb1, 0xd2, 0x83, 0xf1, 0xa8, 0x56, 0x2b, 0x33, +- 0x60, 0x1e, 0xdb, 0x1e, 0x06, 0xf7, 0xa7, 0x1e, 0xa8, 0xef, 0x03, 0x4d, +- 0x0c, 0xf6, 0x83, 0x75, 0x7a, 0xf0, 0x14, 0xc7, 0xe2, 0x94, 0x3a, 0xb5, +- 0x67, 0x56, 0xa5, 0x48, 0x7f, 0x3a, 0xa5, 0xbf, 0xf7, 0x1d, 0x44, 0xa6, +- 0x34, 0xed, 0x9b, 0xd6, 0x51, 0xaa, 0x2c, 0x4e, 0xce, 0x60, 0x5f, 0xe9, +- 0x0e, 0xd5, 0xcd, 0xeb, 0x23, 0x27, 0xf8, 0xfb, 0x45, 0xe5, 0x34, 0x63, +- 0x77, 0x7f, 0x2e, 0x80, 0xcf, 0x9d, 0x2e, 0xfc, 0xe2, 0x50, 0x75, 0x29, +- 0x46, 0xf4, 0xaf, 0x91, 0xed, 0x36, 0xe1, 0x5e, 0xef, 0x66, 0xa1, 0xff, +- 0x27, 0xfc, 0x87, 0x7e, 0x60, 0x84, 0x0f, 0x54, 0x51, 0x56, 0x0f, 0x68, +- 0x99, 0xc0, 0x3f, 0xeb, 0xa5, 0xa0, 0x46, 0xb0, 0x86, 0x02, 0xb0, 0xc8, +- 0xe8, 0x46, 0x13, 0x06, 0xcd, 0xb7, 0x8a, 0xd0, 0x3b, 0x46, 0xd0, 0x14, +- 0x64, 0x53, 0x9b, 0x5b, 0x5e, 0x02, 0x45, 0xba, 0x6e, 0x7e, 0x0a, 0xb9, +- 0x9e, 0x62, 0xb7, 0xd5, 0x7a, 0x87, 0xea, 0xd3, 0x24, 0xa5, 0xef, 0xb3, +- 0xdc, 0x05, 0x9c, 0x04, 0x60, 0x4b, 0xde, 0xa8, 0x90, 0x08, 0x7b, 0x6a, +- 0x5f, 0xb4, 0x3f, 0xda, 0xc5, 0x1f, 0x6e, 0xd6, 0x15, 0xde, 0x65, 0xa4, +- 0x6e, 0x62, 0x9d, 0x8f, 0xa8, 0xbe, 0x86, 0xf6, 0x09, 0x90, 0x40, 0xa5, +- 0xf4, 0x23, 0xc5, 0xf6, 0x38, 0x86, 0x0d, 0x1c, 0xed, 0x4a, 0x0a, 0xae, +- 0xa4, 0x26, 0xc2, 0x2e, 0xd3, 0x13, 0x66, 0x61, 0xea, 0x35, 0x01, 0x0e, +- 0x13, 0xda, 0x78, 0x20, 0xae, 0x59, 0x5f, 0x9b, 0xa9, 0x6c, 0xf9, 0x1b, +- 0xdf, 0x76, 0x53, 0xc8, 0xa7, 0xf5, 0x63, 0x6d, 0xf3, 0xff, 0xfd, 0xaf, +- 0x75, 0x4b, 0xac, 0x67, 0xb1, 0x3c, 0xbf, 0x5e, 0xde, 0x73, 0x02, 0x6d, +- 0xd2, 0x0c, 0xb1, +-#if !defined(BORINGSSL_FIPS_BREAK_RSA_SIG) +- 0x64 +-#else +- 0x00 +-#endif ++ static const uint8_t kECDSASignSig[64] = { ++ 0x67, 0x80, 0xc5, 0xfc, 0x70, 0x27, 0x5e, 0x2c, 0x70, 0x61, 0xa0, ++ 0xe7, 0x87, 0x7b, 0xb1, 0x74, 0xde, 0xad, 0xeb, 0x98, 0x87, 0x02, ++ 0x7f, 0x3f, 0xa8, 0x36, 0x54, 0x15, 0x8b, 0xa7, 0xf5, 0x0c, 0x68, ++ 0x04, 0x73, 0x40, 0x94, 0xb2, 0xd1, 0x90, 0xac, 0x2d, 0x0c, 0xd7, ++ 0xa5, 0x7f, 0x2f, 0x2e, 0xb2, 0x62, 0xb0, 0x09, 0x16, 0xe1, 0xa6, ++ 0x70, 0xb5, 0xbb, 0x0d, 0xfd, 0x8e, 0x0c, 0x02, 0x3f, + }; +- const uint8_t kDRBGEntropy[48] = +- "BCM Known Answer Test DBRG Initial Entropy "; +- const uint8_t kDRBGPersonalization[18] = "BCMPersonalization"; +- const uint8_t kDRBGAD[16] = "BCM DRBG KAT AD "; +- const uint8_t kDRBGOutput[64] = { +- 0x1d, 0x63, 0xdf, 0x05, 0x51, 0x49, 0x22, 0x46, 0xcd, 0x9b, 0xc5, +- 0xbb, 0xf1, 0x5d, 0x44, 0xae, 0x13, 0x78, 0xb1, 0xe4, 0x7c, 0xf1, +- 0x96, 0x33, 0x3d, 0x60, 0xb6, 0x29, 0xd4, 0xbb, 0x6b, 0x44, 0xf9, +- 0xef, 0xd9, 0xf4, 0xa2, 0xba, 0x48, 0xea, 0x39, 0x75, 0x59, 0x32, +- 0xf7, 0x31, 0x2c, 0x98, 0x14, 0x2b, 0x49, 0xdf, 0x02, 0xb6, 0x5d, +- 0x71, 0x09, 0x50, 0xdb, 0x23, 0xdb, 0xe5, 0x22, +-#if !defined(BORINGSSL_FIPS_BREAK_DRBG) +- 0x95 +-#else +- 0x00 +-#endif ++ ++ // The 'k' value for ECDSA is fixed to avoid an entropy draw. ++ uint8_t ecdsa_k[32] = {0}; ++ ecdsa_k[31] = 42; ++ ++ sig = ecdsa_sign_with_nonce_for_known_answer_test( ++ kECDSASignDigest, sizeof(kECDSASignDigest), ec_key, ecdsa_k, ++ sizeof(ecdsa_k)); ++ ++ uint8_t ecdsa_sign_output[64]; ++ if (sig == NULL || ++ !serialize_ecdsa_sig(ecdsa_sign_output, sizeof(ecdsa_sign_output), sig) || ++ !check_test(kECDSASignSig, ecdsa_sign_output, sizeof(ecdsa_sign_output), ++ "ECDSA-sign signature")) { ++ fprintf(stderr, "ECDSA-sign KAT failed.\n"); ++ goto err; ++ } ++ ++ static const uint8_t kECDSAVerifyDigest[32] = { ++ 0x78, 0x7c, 0x50, 0x5c, 0x60, 0xc9, 0xe4, 0x13, 0x6c, 0xe4, 0x48, ++ 0xba, 0x93, 0xff, 0x71, 0xfa, 0x9c, 0x18, 0xf4, 0x17, 0x09, 0x4f, ++ 0xdf, 0x5a, 0xe2, 0x75, 0xc0, 0xcc, 0xd2, 0x67, 0x97, 0xad, + }; +- const uint8_t kDRBGEntropy2[48] = +- "BCM Known Answer Test DBRG Reseed Entropy "; +- const uint8_t kDRBGReseedOutput[64] = { +- 0xa4, 0x77, 0x05, 0xdb, 0x14, 0x11, 0x76, 0x71, 0x42, 0x5b, 0xd8, +- 0xd7, 0xa5, 0x4f, 0x8b, 0x39, 0xf2, 0x10, 0x4a, 0x50, 0x5b, 0xa2, +- 0xc8, 0xf0, 0xbb, 0x3e, 0xa1, 0xa5, 0x90, 0x7d, 0x54, 0xd9, 0xc6, +- 0xb0, 0x96, 0xc0, 0x2b, 0x7e, 0x9b, 0xc9, 0xa1, 0xdd, 0x78, 0x2e, +- 0xd5, 0xa8, 0x66, 0x16, 0xbd, 0x18, 0x3c, 0xf2, 0xaa, 0x7a, 0x2b, +- 0x37, 0xf9, 0xab, 0x35, 0x64, 0x15, 0x01, 0x3f, 0xc4, +- }; +- const uint8_t kECDSASigR[32] = { ++ static const uint8_t kECDSAVerifySig[64] = { + 0x67, 0x80, 0xc5, 0xfc, 0x70, 0x27, 0x5e, 0x2c, 0x70, 0x61, 0xa0, + 0xe7, 0x87, 0x7b, 0xb1, 0x74, 0xde, 0xad, 0xeb, 0x98, 0x87, 0x02, +- 0x7f, 0x3f, 0xa8, 0x36, 0x54, 0x15, 0x8b, 0xa7, 0xf5, +-#if !defined(BORINGSSL_FIPS_BREAK_ECDSA_SIG) +- 0x0c, +-#else +- 0x00, +-#endif +- }; +- const uint8_t kECDSASigS[32] = { +- 0xa5, 0x93, 0xe0, 0x23, 0x91, 0xe7, 0x4b, 0x8d, 0x77, 0x25, 0xa6, +- 0xba, 0x4d, 0xd9, 0x86, 0x77, 0xda, 0x7d, 0x8f, 0xef, 0xc4, 0x1a, +- 0xf0, 0xcc, 0x81, 0xe5, 0xea, 0x3f, 0xc2, 0x41, 0x7f, 0xd8, ++ 0x7f, 0x3f, 0xa8, 0x36, 0x54, 0x15, 0x8b, 0xa7, 0xf5, 0x0c, 0x2d, ++ 0x36, 0xe5, 0x79, 0x97, 0x90, 0xbf, 0xbe, 0x21, 0x83, 0xd3, 0x3e, ++ 0x96, 0xf3, 0xc5, 0x1f, 0x6a, 0x23, 0x2f, 0x2a, 0x24, 0x48, 0x8c, ++ 0x8e, 0x5f, 0x64, 0xc3, 0x7e, 0xa2, 0xcf, 0x05, 0x29, + }; ++ ++ ECDSA_SIG_free(sig); ++ sig = parse_ecdsa_sig(kECDSAVerifySig, sizeof(kECDSAVerifySig)); ++ if (!sig || ++ !ecdsa_do_verify_no_self_test(kECDSAVerifyDigest, ++ sizeof(kECDSAVerifyDigest), sig, ec_key)) { ++ fprintf(stderr, "ECDSA-verify KAT failed.\n"); ++ goto err; ++ } ++ ++ // Primitive Z Computation KAT (IG 9.6). ++ + // kP256Point is SHA256("Primitive Z Computation KAT")×G within P-256. +- const uint8_t kP256Point[65] = { ++ static const uint8_t kP256Point[65] = { + 0x04, 0x4e, 0xc1, 0x94, 0x8c, 0x5c, 0xf4, 0x37, 0x35, 0x0d, 0xa3, + 0xf9, 0x55, 0xf9, 0x8b, 0x26, 0x23, 0x5c, 0x43, 0xe0, 0x83, 0x51, + 0x2b, 0x0d, 0x4b, 0x56, 0x24, 0xc3, 0xe4, 0xa5, 0xa8, 0xe2, 0xe9, +@@ -486,50 +484,64 @@ int boringssl_fips_self_test( + 0x79, 0x93, 0x7c, 0x0b, 0x92, 0x2b, 0x7f, 0x17, 0xa5, 0x80, + }; + // kP256Scalar is SHA256("Primitive Z Computation KAT scalar"). +- const uint8_t kP256Scalar[32] = { ++ static const uint8_t kP256Scalar[32] = { + 0xe7, 0x60, 0x44, 0x91, 0x26, 0x9a, 0xfb, 0x5b, 0x10, 0x2d, 0x6e, + 0xa5, 0x2c, 0xb5, 0x9f, 0xeb, 0x70, 0xae, 0xde, 0x6c, 0xe3, 0xbf, + 0xb3, 0xe0, 0x10, 0x54, 0x85, 0xab, 0xd8, 0x61, 0xd7, 0x7b, + }; + // kP256PointResult is |kP256Scalar|×|kP256Point|. +- const uint8_t kP256PointResult[65] = { ++ static const uint8_t kP256PointResult[65] = { + 0x04, 0xf1, 0x63, 0x00, 0x88, 0xc5, 0xd5, 0xe9, 0x05, 0x52, 0xac, + 0xb6, 0xec, 0x68, 0x76, 0xb8, 0x73, 0x7f, 0x0f, 0x72, 0x34, 0xe6, + 0xbb, 0x30, 0x32, 0x22, 0x37, 0xb6, 0x2a, 0x80, 0xe8, 0x9e, 0x6e, + 0x6f, 0x36, 0x02, 0xe7, 0x21, 0xd2, 0x31, 0xdb, 0x94, 0x63, 0xb7, + 0xd8, 0x19, 0x0e, 0xc2, 0xc0, 0xa7, 0x2f, 0x15, 0x49, 0x1a, 0xa2, +- 0x7c, 0x41, 0x8f, 0xaf, 0x9c, 0x40, 0xaf, 0x2e, 0x4a, +-#if !defined(BORINGSSL_FIPS_BREAK_Z_COMPUTATION) +- 0x0c, +-#else +- 0x00, +-#endif +- }; +- const uint8_t kTLSOutput[32] = { +- 0x67, 0x85, 0xde, 0x60, 0xfc, 0x0a, 0x83, 0xe9, 0xa2, 0x2a, 0xb3, +- 0xf0, 0x27, 0x0c, 0xba, 0xf7, 0xfa, 0x82, 0x3d, 0x14, 0x77, 0x1d, +- 0x86, 0x29, 0x79, 0x39, 0x77, 0x8a, 0xd5, 0x0e, 0x9d, +-#if !defined(BORINGSSL_FIPS_BREAK_TLS_KDF) +- 0x32, +-#else +- 0x00, +-#endif +- }; +- const uint8_t kTLSSecret[32] = { +- 0xbf, 0xe4, 0xb7, 0xe0, 0x26, 0x55, 0x5f, 0x6a, 0xdf, 0x5d, 0x27, +- 0xd6, 0x89, 0x99, 0x2a, 0xd6, 0xf7, 0x65, 0x66, 0x07, 0x4b, 0x55, +- 0x5f, 0x64, 0x55, 0xcd, 0xd5, 0x77, 0xa4, 0xc7, 0x09, 0x61, +- }; +- const char kTLSLabel[] = "FIPS self test"; +- const uint8_t kTLSSeed1[16] = { +- 0x8f, 0x0d, 0xe8, 0xb6, 0x90, 0x8f, 0xb1, 0xd2, +- 0x6d, 0x51, 0xf4, 0x79, 0x18, 0x63, 0x51, 0x65, +- }; +- const uint8_t kTLSSeed2[16] = { +- 0x7d, 0x24, 0x1a, 0x9d, 0x3c, 0x59, 0xbf, 0x3c, +- 0x31, 0x1e, 0x2b, 0x21, 0x41, 0x8d, 0x32, 0x81, ++ 0x7c, 0x41, 0x8f, 0xaf, 0x9c, 0x40, 0xaf, 0x2e, 0x4a, 0x0c, + }; + ++ ec_group = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); ++ if (ec_group == NULL) { ++ fprintf(stderr, "Failed to create P-256 group.\n"); ++ goto err; ++ } ++ ec_point_in = EC_POINT_new(ec_group); ++ ec_point_out = EC_POINT_new(ec_group); ++ ec_scalar = BN_new(); ++ uint8_t z_comp_result[65]; ++ if (ec_point_in == NULL || ec_point_out == NULL || ec_scalar == NULL || ++ !EC_POINT_oct2point(ec_group, ec_point_in, kP256Point, sizeof(kP256Point), ++ NULL) || ++ !BN_bin2bn(kP256Scalar, sizeof(kP256Scalar), ec_scalar) || ++ !ec_point_mul_no_self_test(ec_group, ec_point_out, NULL, ec_point_in, ++ ec_scalar, NULL) || ++ !EC_POINT_point2oct(ec_group, ec_point_out, POINT_CONVERSION_UNCOMPRESSED, ++ z_comp_result, sizeof(z_comp_result), NULL) || ++ !check_test(kP256PointResult, z_comp_result, sizeof(z_comp_result), ++ "Z Computation Result")) { ++ fprintf(stderr, "Z-computation KAT failed.\n"); ++ goto err; ++ } ++ ++ ret = 1; ++ ++err: ++ EC_KEY_free(ec_key); ++ EC_POINT_free(ec_point_in); ++ EC_POINT_free(ec_point_out); ++ EC_GROUP_free(ec_group); ++ BN_free(ec_scalar); ++ ECDSA_SIG_free(sig); ++ ++ return ret; ++} ++ ++static int boringssl_self_test_ffdh(void) { ++ int ret = 0; ++ DH *dh = NULL; ++ BIGNUM *ffdhe2048_value = NULL; ++ ++ // FFC Diffie-Hellman KAT ++ + // kFFDHE2048PublicValueData is an arbitrary public value, mod + // kFFDHE2048Data. (The private key happens to be 4096.) + static const BN_ULONG kFFDHE2048PublicValueData[] = { +@@ -550,8 +562,7 @@ int boringssl_fips_self_test( + TOBN(0xbae7b0b3, 0x6e362dc0), TOBN(0xa57c73bd, 0xdc70fb82), + TOBN(0xfaff50d2, 0x9d573457), TOBN(0x352bd399, 0xbe84058e), + }; +- +- const uint8_t kDHOutput[2048 / 8] = { ++ static const uint8_t kDHOutput[2048 / 8] = { + 0x2a, 0xe6, 0xd3, 0xa6, 0x13, 0x58, 0x8e, 0xce, 0x53, 0xaa, 0xf6, 0x5d, + 0x9a, 0xae, 0x02, 0x12, 0xf5, 0x80, 0x3d, 0x06, 0x09, 0x76, 0xac, 0x57, + 0x37, 0x9e, 0xab, 0x38, 0x62, 0x25, 0x05, 0x1d, 0xf3, 0xa9, 0x39, 0x60, +@@ -573,23 +584,144 @@ int boringssl_fips_self_test( + 0x06, 0x80, 0x2a, 0x4e, 0x5a, 0xf0, 0x1e, 0xaa, 0xcb, 0xab, 0x06, 0x0e, + 0x27, 0x0f, 0xd9, 0x88, 0xd9, 0x01, 0xe3, 0x07, 0xeb, 0xdf, 0xc3, 0x12, + 0xe3, 0x40, 0x88, 0x7b, 0x5f, 0x59, 0x78, 0x6e, 0x26, 0x20, 0xc3, 0xdf, +- 0xc8, 0xe4, 0x5e, +-#if !defined(BORINGSSL_FIPS_BREAK_FFC_DH) +- 0xb8, +-#else +- 0x00, +-#endif ++ 0xc8, 0xe4, 0x5e, 0xb8, ++ }; ++ ++ ffdhe2048_value = BN_new(); ++ if (ffdhe2048_value) { ++ bn_set_static_words(ffdhe2048_value, kFFDHE2048PublicValueData, ++ OPENSSL_ARRAY_SIZE(kFFDHE2048PublicValueData)); ++ } ++ ++ dh = self_test_dh(); ++ uint8_t dh_out[sizeof(kDHOutput)]; ++ if (dh == NULL || ffdhe2048_value == NULL || sizeof(dh_out) != DH_size(dh) || ++ dh_compute_key_padded_no_self_test(dh_out, ffdhe2048_value, dh) != ++ sizeof(dh_out) || ++ !check_test(kDHOutput, dh_out, sizeof(dh_out), "FFC DH")) { ++ fprintf(stderr, "FFDH failed.\n"); ++ goto err; ++ } ++ ++ ret = 1; ++ ++err: ++ DH_free(dh); ++ BN_free(ffdhe2048_value); ++ ++ return ret; ++} ++ ++#if defined(BORINGSSL_FIPS) ++ ++static void run_self_test_rsa(void) { ++ if (!boringssl_self_test_rsa()) { ++ BORINGSSL_FIPS_abort(); ++ } ++} ++ ++DEFINE_STATIC_ONCE(g_self_test_once_rsa); ++ ++void boringssl_ensure_rsa_self_test(void) { ++ CRYPTO_once(g_self_test_once_rsa_bss_get(), run_self_test_rsa); ++} ++ ++static void run_self_test_ecc(void) { ++ if (!boringssl_self_test_ecc()) { ++ BORINGSSL_FIPS_abort(); ++ } ++} ++ ++DEFINE_STATIC_ONCE(g_self_test_once_ecc); ++ ++void boringssl_ensure_ecc_self_test(void) { ++ CRYPTO_once(g_self_test_once_ecc_bss_get(), run_self_test_ecc); ++} ++ ++static void run_self_test_ffdh(void) { ++ if (!boringssl_self_test_ffdh()) { ++ BORINGSSL_FIPS_abort(); ++ } ++} ++ ++DEFINE_STATIC_ONCE(g_self_test_once_ffdh); ++ ++void boringssl_ensure_ffdh_self_test(void) { ++ CRYPTO_once(g_self_test_once_ffdh_bss_get(), run_self_test_ffdh); ++} ++ ++#endif // BORINGSSL_FIPS ++ ++ ++// Startup self tests. ++// ++// These tests are run at process start when in FIPS mode. ++ ++int boringssl_self_test_sha256(void) { ++ static const uint8_t kInput[16] = { ++ 0xff, 0x3b, 0x85, 0x7d, 0xa7, 0x23, 0x6a, 0x2b, ++ 0xaa, 0x0f, 0x39, 0x6b, 0x51, 0x52, 0x22, 0x17, ++ }; ++ static const uint8_t kPlaintextSHA256[32] = { ++ 0x7f, 0xe4, 0xd5, 0xf1, 0xa1, 0xe3, 0x82, 0x87, 0xd9, 0x58, 0xf5, ++ 0x11, 0xc7, 0x1d, 0x5e, 0x27, 0x5e, 0xcc, 0xd2, 0x66, 0xcf, 0xb9, ++ 0xc8, 0xc6, 0x60, 0xd8, 0x92, 0x1e, 0x57, 0xfd, 0x46, 0x75, ++ }; ++ uint8_t output[SHA256_DIGEST_LENGTH]; ++ ++ // SHA-256 KAT ++ SHA256(kInput, sizeof(kInput), output); ++ return check_test(kPlaintextSHA256, output, sizeof(kPlaintextSHA256), ++ "SHA-256 KAT"); ++} ++ ++int boringssl_self_test_sha512(void) { ++ static const uint8_t kInput[16] = { ++ 0x21, 0x25, 0x12, 0xf8, 0xd2, 0xad, 0x83, 0x22, ++ 0x78, 0x1c, 0x6c, 0x4d, 0x69, 0xa9, 0xda, 0xa1, ++ }; ++ static const uint8_t kPlaintextSHA512[64] = { ++ 0x29, 0x3c, 0x94, 0x35, 0x4e, 0x98, 0x83, 0xe5, 0xc2, 0x78, 0x36, ++ 0x7a, 0xe5, 0x18, 0x90, 0xbf, 0x35, 0x41, 0x01, 0x64, 0x19, 0x8d, ++ 0x26, 0xeb, 0xe1, 0xf8, 0x2f, 0x04, 0x8e, 0xfa, 0x8b, 0x2b, 0xc6, ++ 0xb2, 0x9d, 0x5d, 0x46, 0x76, 0x5a, 0xc8, 0xb5, 0x25, 0xa3, 0xea, ++ 0x52, 0x84, 0x47, 0x6d, 0x6d, 0xf4, 0xc9, 0x71, 0xf3, 0x3d, 0x89, ++ 0x4c, 0x3b, 0x20, 0x8c, 0x5b, 0x75, 0xe8, 0xf8, 0x7c, ++ }; ++ uint8_t output[SHA512_DIGEST_LENGTH]; ++ ++ // SHA-512 KAT ++ SHA512(kInput, sizeof(kInput), output); ++ return check_test(kPlaintextSHA512, output, sizeof(kPlaintextSHA512), ++ "SHA-512 KAT"); ++} ++ ++int boringssl_self_test_hmac_sha256(void) { ++ static const uint8_t kInput[16] = { ++ 0xda, 0xd9, 0x12, 0x93, 0xdf, 0xcf, 0x2a, 0x7c, ++ 0x8e, 0xcd, 0x13, 0xfe, 0x35, 0x3f, 0xa7, 0x5b, + }; ++ static const uint8_t kPlaintextHMACSHA256[32] = { ++ 0x36, 0x5f, 0x5b, 0xd5, 0xf5, 0xeb, 0xfd, 0xc7, 0x6e, 0x53, 0xa5, ++ 0x73, 0x6d, 0x73, 0x20, 0x13, 0xaa, 0xd3, 0xbc, 0x86, 0x4b, 0xb8, ++ 0x84, 0x94, 0x16, 0x46, 0x88, 0x9c, 0x48, 0xee, 0xa9, 0x0e, ++ }; ++ uint8_t output[EVP_MAX_MD_SIZE]; ++ ++ unsigned output_len; ++ HMAC(EVP_sha256(), kInput, sizeof(kInput), kInput, sizeof(kInput), output, ++ &output_len); ++ return output_len == sizeof(kPlaintextHMACSHA256) && ++ check_test(kPlaintextHMACSHA256, output, sizeof(kPlaintextHMACSHA256), ++ "HMAC-SHA-256 KAT"); ++} ++ ++static int boringssl_self_test_fast(void) { ++ static const uint8_t kAESKey[16] = "BoringCrypto Key"; ++ static const uint8_t kAESIV[16] = {0}; + + EVP_AEAD_CTX aead_ctx; + EVP_AEAD_CTX_zero(&aead_ctx); +- RSA *rsa_key = NULL; +- EC_KEY *ec_key = NULL; +- EC_GROUP *ec_group = NULL; +- EC_POINT *ec_point_in = NULL; +- EC_POINT *ec_point_out = NULL; +- BIGNUM *ec_scalar = NULL; +- ECDSA_SIG *sig = NULL; + int ret = 0; + + AES_KEY aes_key; +@@ -597,28 +729,48 @@ int boringssl_fips_self_test( + uint8_t output[256]; + + // AES-CBC Encryption KAT ++ static const uint8_t kAESCBCEncPlaintext[32] = { ++ 0x07, 0x86, 0x09, 0xa6, 0xc5, 0xac, 0x25, 0x44, 0x69, 0x9a, 0xdf, ++ 0x68, 0x2f, 0xa3, 0x77, 0xf9, 0xbe, 0x8a, 0xb6, 0xae, 0xf5, 0x63, ++ 0xe8, 0xc5, 0x6a, 0x36, 0xb8, 0x4f, 0x55, 0x7f, 0xad, 0xd3, ++ }; ++ static const uint8_t kAESCBCEncCiphertext[sizeof(kAESCBCEncPlaintext)] = { ++ 0x56, 0x46, 0xc1, 0x41, 0xf4, 0x13, 0xd6, 0xff, 0x62, 0x92, 0x41, ++ 0x7a, 0x26, 0xc6, 0x86, 0xbd, 0x30, 0x5f, 0xb6, 0x57, 0xa7, 0xd2, ++ 0x50, 0x3a, 0xc5, 0x5e, 0x8e, 0x93, 0x40, 0xf2, 0x10, 0xd8, ++ }; + memcpy(aes_iv, kAESIV, sizeof(kAESIV)); + if (AES_set_encrypt_key(kAESKey, 8 * sizeof(kAESKey), &aes_key) != 0) { + fprintf(stderr, "AES_set_encrypt_key failed.\n"); + goto err; + } +- AES_cbc_encrypt(kPlaintext, output, sizeof(kPlaintext), &aes_key, aes_iv, +- AES_ENCRYPT); +- if (!check_test(kAESCBCCiphertext, output, sizeof(kAESCBCCiphertext), +- "AES-CBC Encryption KAT")) { ++ AES_cbc_encrypt(kAESCBCEncPlaintext, output, sizeof(kAESCBCEncPlaintext), ++ &aes_key, aes_iv, AES_ENCRYPT); ++ if (!check_test(kAESCBCEncCiphertext, output, sizeof(kAESCBCEncCiphertext), ++ "AES-CBC-encrypt KAT")) { + goto err; + } + + // AES-CBC Decryption KAT ++ static const uint8_t kAESCBCDecCiphertext[32] = { ++ 0x34, 0x7a, 0xa5, 0xa0, 0x24, 0xb2, 0x82, 0x57, 0xb3, 0x65, 0x10, ++ 0xbe, 0x58, 0x3d, 0x4f, 0x47, 0xad, 0xb7, 0xbb, 0xee, 0xdc, 0x60, ++ 0x05, 0xbb, 0xbd, 0x0d, 0x0a, 0x9f, 0x06, 0xbb, 0x7b, 0x10, ++ }; ++ static const uint8_t kAESCBCDecPlaintext[sizeof(kAESCBCDecCiphertext)] = { ++ 0x51, 0xa7, 0xa0, 0x1f, 0x6b, 0x79, 0x6c, 0xcd, 0x48, 0x03, 0xa1, ++ 0x41, 0xdc, 0x56, 0xa6, 0xc2, 0x16, 0xb5, 0xd1, 0xd3, 0xb7, 0x06, ++ 0xb2, 0x25, 0x6f, 0xa6, 0xd0, 0xd2, 0x0e, 0x6f, 0x19, 0xb5, ++ }; + memcpy(aes_iv, kAESIV, sizeof(kAESIV)); + if (AES_set_decrypt_key(kAESKey, 8 * sizeof(kAESKey), &aes_key) != 0) { + fprintf(stderr, "AES_set_decrypt_key failed.\n"); + goto err; + } +- AES_cbc_encrypt(kAESCBCCiphertext, output, sizeof(kAESCBCCiphertext), ++ AES_cbc_encrypt(kAESCBCDecCiphertext, output, sizeof(kAESCBCDecCiphertext), + &aes_key, aes_iv, AES_DECRYPT); +- if (!check_test(kPlaintext, output, sizeof(kPlaintext), +- "AES-CBC Decryption KAT")) { ++ if (!check_test(kAESCBCDecPlaintext, output, sizeof(kAESCBCDecPlaintext), ++ "AES-CBC-decrypt KAT")) { + goto err; + } + +@@ -632,194 +784,115 @@ int boringssl_fips_self_test( + } + + // AES-GCM Encryption KAT ++ static const uint8_t kAESGCMEncPlaintext[32] = { ++ 0x8f, 0xcc, 0x40, 0x99, 0x80, 0x8e, 0x75, 0xca, 0xaf, 0xf5, 0x82, ++ 0x89, 0x88, 0x48, 0xa8, 0x8d, 0x80, 0x8b, 0x55, 0xab, 0x4e, 0x93, ++ 0x70, 0x79, 0x7d, 0x94, 0x0b, 0xe8, 0xcc, 0x1d, 0x78, 0x84, ++ }; ++ static const uint8_t kAESGCMCiphertext[sizeof(kAESGCMEncPlaintext) + 16] = { ++ 0x87, 0x7b, 0xd5, 0x8d, 0x96, 0x3e, 0x4b, 0xe6, 0x64, 0x94, 0x40, 0x2f, ++ 0x61, 0x9b, 0x7e, 0x56, 0x52, 0x7d, 0xa4, 0x5a, 0xf9, 0xa6, 0xe2, 0xdb, ++ 0x1c, 0x63, 0x2e, 0x97, 0x93, 0x0f, 0xfb, 0xed, 0xb5, 0x9e, 0x1c, 0x20, ++ 0xb2, 0xb0, 0x58, 0xda, 0x48, 0x07, 0x2d, 0xbd, 0x96, 0x0d, 0x34, 0xc6, ++ }; + if (!EVP_AEAD_CTX_seal(&aead_ctx, output, &out_len, sizeof(output), nonce, + EVP_AEAD_nonce_length(EVP_aead_aes_128_gcm()), +- kPlaintext, sizeof(kPlaintext), NULL, 0) || ++ kAESGCMEncPlaintext, sizeof(kAESGCMEncPlaintext), NULL, ++ 0) || + !check_test(kAESGCMCiphertext, output, sizeof(kAESGCMCiphertext), +- "AES-GCM Encryption KAT")) { ++ "AES-GCM-encrypt KAT")) { + fprintf(stderr, "EVP_AEAD_CTX_seal for AES-128-GCM failed.\n"); + goto err; + } + + // AES-GCM Decryption KAT ++ static const uint8_t kAESGCMDecCiphertext[48] = { ++ 0x35, 0xf3, 0x05, 0x8f, 0x87, 0x57, 0x60, 0xff, 0x09, 0xd3, 0x12, 0x0f, ++ 0x70, 0xc4, 0xbc, 0x9e, 0xd7, 0xa8, 0x68, 0x72, 0xe1, 0x34, 0x52, 0x20, ++ 0x21, 0x76, 0xf7, 0x37, 0x1a, 0xe0, 0x4f, 0xaa, 0xe1, 0xdd, 0x39, 0x19, ++ 0x20, 0xf5, 0xd1, 0x39, 0x53, 0xd8, 0x96, 0x78, 0x59, 0x94, 0x82, 0x3c, ++ }; ++ static const uint8_t kAESGCMDecPlaintext[sizeof(kAESGCMDecCiphertext) - 16] = ++ { ++ 0x3d, 0x44, 0x90, 0x9b, 0x91, 0xe7, 0x5e, 0xd3, 0xc2, 0xb2, 0xd0, ++ 0xa9, 0x99, 0x17, 0x6a, 0x45, 0x05, 0x5e, 0x99, 0x83, 0x56, 0x01, ++ 0xc0, 0x82, 0x40, 0x81, 0xd2, 0x48, 0x45, 0xf2, 0xcc, 0xc3, ++ }; + if (!EVP_AEAD_CTX_open(&aead_ctx, output, &out_len, sizeof(output), nonce, + EVP_AEAD_nonce_length(EVP_aead_aes_128_gcm()), +- kAESGCMCiphertext, sizeof(kAESGCMCiphertext), NULL, +- 0) || +- !check_test(kPlaintext, output, sizeof(kPlaintext), +- "AES-GCM Decryption KAT")) { +- fprintf(stderr, "EVP_AEAD_CTX_open for AES-128-GCM failed.\n"); +- goto err; +- } +- +- DES_key_schedule des1, des2, des3; +- DES_cblock des_iv; +- DES_set_key(&kDESKey1, &des1); +- DES_set_key(&kDESKey2, &des2); +- DES_set_key(&kDESKey3, &des3); +- +- // 3DES Encryption KAT +- memcpy(&des_iv, &kDESIV, sizeof(des_iv)); +- DES_ede3_cbc_encrypt(kPlaintext, output, sizeof(kPlaintext), &des1, &des2, +- &des3, &des_iv, DES_ENCRYPT); +- if (!check_test(kDESCiphertext, output, sizeof(kDESCiphertext), +- "3DES Encryption KAT")) { +- goto err; +- } +- +- // 3DES Decryption KAT +- memcpy(&des_iv, &kDESIV, sizeof(des_iv)); +- DES_ede3_cbc_encrypt(kDESCiphertext, output, sizeof(kDESCiphertext), &des1, +- &des2, &des3, &des_iv, DES_DECRYPT); +- if (!check_test(kPlaintext, output, sizeof(kPlaintext), +- "3DES Decryption KAT")) { ++ kAESGCMDecCiphertext, sizeof(kAESGCMDecCiphertext), ++ NULL, 0) || ++ !check_test(kAESGCMDecPlaintext, output, sizeof(kAESGCMDecPlaintext), ++ "AES-GCM-decrypt KAT")) { ++ fprintf(stderr, ++ "AES-GCM-decrypt KAT failed because EVP_AEAD_CTX_open failed.\n"); + goto err; + } + + // SHA-1 KAT +- SHA1(kPlaintext, sizeof(kPlaintext), output); +- if (!check_test(kPlaintextSHA1, output, sizeof(kPlaintextSHA1), ++ static const uint8_t kSHA1Input[16] = { ++ 0x13, 0x2f, 0xd9, 0xba, 0xd5, 0xc1, 0x82, 0x62, ++ 0x63, 0xba, 0xfb, 0xb6, 0x99, 0xf7, 0x07, 0xa5, ++ }; ++ static const uint8_t kSHA1Digest[20] = { ++ 0x94, 0x19, 0x55, 0x93, 0x0a, 0x58, 0x29, 0x38, 0xeb, 0xf5, ++ 0x09, 0x11, 0x6d, 0x1a, 0xfd, 0x0f, 0x1e, 0x11, 0xe3, 0xcb, ++ }; ++ SHA1(kSHA1Input, sizeof(kSHA1Input), output); ++ if (!check_test(kSHA1Digest, output, sizeof(kSHA1Digest), + "SHA-1 KAT")) { + goto err; + } + +- // SHA-256 KAT +- SHA256(kPlaintext, sizeof(kPlaintext), output); +- if (!check_test(kPlaintextSHA256, output, sizeof(kPlaintextSHA256), +- "SHA-256 KAT")) { +- goto err; +- } +- +- // SHA-512 KAT +- SHA512(kPlaintext, sizeof(kPlaintext), output); +- if (!check_test(kPlaintextSHA512, output, sizeof(kPlaintextSHA512), +- "SHA-512 KAT")) { +- goto err; +- } +- +- rsa_key = self_test_rsa_key(); +- if (rsa_key == NULL) { +- fprintf(stderr, "RSA KeyGen failed\n"); +- goto err; +- } +- +- // RSA Sign KAT +- unsigned sig_len; +- +- // Disable blinding for the power-on tests because it's not needed and +- // triggers an entropy draw. +- rsa_key->flags |= RSA_FLAG_NO_BLINDING; +- +- if (!RSA_sign(NID_sha256, kPlaintextSHA256, sizeof(kPlaintextSHA256), output, +- &sig_len, rsa_key) || +- !check_test(kRSASignature, output, sizeof(kRSASignature), +- "RSA Sign KAT")) { +- fprintf(stderr, "RSA signing test failed.\n"); +- goto err; +- } +- +- // RSA Verify KAT +- if (!RSA_verify(NID_sha256, kPlaintextSHA256, sizeof(kPlaintextSHA256), +- kRSASignature, sizeof(kRSASignature), rsa_key)) { +- fprintf(stderr, "RSA Verify KAT failed.\n"); +- goto err; +- } +- +- ec_key = self_test_ecdsa_key(); +- if (ec_key == NULL) { +- fprintf(stderr, "ECDSA KeyGen failed\n"); +- goto err; +- } +- +- // ECDSA Sign/Verify KAT +- +- // The 'k' value for ECDSA is fixed to avoid an entropy draw. +- uint8_t ecdsa_k[32] = {0}; +- ecdsa_k[31] = 42; +- +- sig = ecdsa_sign_with_nonce_for_known_answer_test( +- kPlaintextSHA256, sizeof(kPlaintextSHA256), ec_key, ecdsa_k, +- sizeof(ecdsa_k)); +- +- uint8_t ecdsa_r_bytes[sizeof(kECDSASigR)]; +- uint8_t ecdsa_s_bytes[sizeof(kECDSASigS)]; +- if (sig == NULL || +- BN_num_bytes(sig->r) != sizeof(ecdsa_r_bytes) || +- !BN_bn2bin(sig->r, ecdsa_r_bytes) || +- BN_num_bytes(sig->s) != sizeof(ecdsa_s_bytes) || +- !BN_bn2bin(sig->s, ecdsa_s_bytes) || +- !check_test(kECDSASigR, ecdsa_r_bytes, sizeof(kECDSASigR), "ECDSA R") || +- !check_test(kECDSASigS, ecdsa_s_bytes, sizeof(kECDSASigS), "ECDSA S")) { +- fprintf(stderr, "ECDSA signature KAT failed.\n"); +- goto err; +- } +- +- if (!ECDSA_do_verify(kPlaintextSHA256, sizeof(kPlaintextSHA256), sig, +- ec_key)) { +- fprintf(stderr, "ECDSA verification KAT failed.\n"); +- goto err; +- } +- +- // Primitive Z Computation KAT (IG 9.6). +- ec_group = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); +- if (ec_group == NULL) { +- fprintf(stderr, "Failed to create P-256 group.\n"); +- goto err; +- } +- ec_point_in = EC_POINT_new(ec_group); +- ec_point_out = EC_POINT_new(ec_group); +- ec_scalar = BN_new(); +- uint8_t z_comp_result[65]; +- if (ec_point_in == NULL || ec_point_out == NULL || ec_scalar == NULL || +- !EC_POINT_oct2point(ec_group, ec_point_in, kP256Point, sizeof(kP256Point), +- NULL) || +- !BN_bin2bn(kP256Scalar, sizeof(kP256Scalar), ec_scalar) || +- !EC_POINT_mul(ec_group, ec_point_out, NULL, ec_point_in, ec_scalar, +- NULL) || +- !EC_POINT_point2oct(ec_group, ec_point_out, POINT_CONVERSION_UNCOMPRESSED, +- z_comp_result, sizeof(z_comp_result), NULL) || +- !check_test(kP256PointResult, z_comp_result, sizeof(z_comp_result), +- "Z Computation Result")) { +- fprintf(stderr, "Z Computation KAT failed.\n"); +- goto err; +- } +- +- // FFC Diffie-Hellman KAT +- +- BIGNUM *const ffdhe2048_value = BN_new(); +- DH *const dh = self_test_dh(); +- int dh_ok = 0; +- if (ffdhe2048_value && dh) { +- bn_set_static_words(ffdhe2048_value, kFFDHE2048PublicValueData, +- OPENSSL_ARRAY_SIZE(kFFDHE2048PublicValueData)); +- +- uint8_t dh_out[sizeof(kDHOutput)]; +- dh_ok = +- sizeof(dh_out) == DH_size(dh) && +- DH_compute_key_padded(dh_out, ffdhe2048_value, dh) == sizeof(dh_out) && +- check_test(kDHOutput, dh_out, sizeof(dh_out), "FFC DH"); +- } +- +- BN_free(ffdhe2048_value); +- DH_free(dh); +- if (!dh_ok) { +- fprintf(stderr, "FFDH failed.\n"); ++ if (!boringssl_self_test_sha256() || ++ !boringssl_self_test_sha512() || ++ !boringssl_self_test_hmac_sha256()) { + goto err; + } + + // DBRG KAT ++ static const uint8_t kDRBGEntropy[48] = { ++ 0xc4, 0xda, 0x07, 0x40, 0xd5, 0x05, 0xf1, 0xee, 0x28, 0x0b, 0x95, 0xe5, ++ 0x8c, 0x49, 0x31, 0xac, 0x6d, 0xe8, 0x46, 0xa0, 0x15, 0x2f, 0xbb, 0x4a, ++ 0x3f, 0x17, 0x4c, 0xf4, 0x78, 0x7a, 0x4f, 0x1a, 0x40, 0xc2, 0xb5, 0x0b, ++ 0xab, 0xe1, 0x4a, 0xae, 0x53, 0x0b, 0xe5, 0x88, 0x6d, 0x91, 0x0a, 0x27, ++ }; ++ static const uint8_t kDRBGPersonalization[18] = "BCMPersonalization"; ++ static const uint8_t kDRBGAD[16] = "BCM DRBG KAT AD "; ++ static const uint8_t kDRBGOutput[64] = { ++ 0x19, 0x1f, 0x2b, 0x49, 0x76, 0x85, 0xfd, 0x51, 0xb6, 0x56, 0xbc, ++ 0x1c, 0x7d, 0xd5, 0xdd, 0x44, 0x76, 0xa3, 0x5e, 0x17, 0x9b, 0x8e, ++ 0xb8, 0x98, 0x65, 0x12, 0xca, 0x35, 0x6c, 0xa0, 0x6f, 0xa0, 0x22, ++ 0xe4, 0xf6, 0xd8, 0x43, 0xed, 0x4e, 0x2d, 0x97, 0x39, 0x43, 0x3b, ++ 0x57, 0xfc, 0x23, 0x3f, 0x71, 0x0a, 0xe0, 0xed, 0xfe, 0xd5, 0xb8, ++ 0x67, 0x7a, 0x00, 0x39, 0xb2, 0x6e, 0xa9, 0x25, 0x97, ++ }; ++ static const uint8_t kDRBGEntropy2[48] = { ++ 0xc7, 0x16, 0x1c, 0xa3, 0x6c, 0x23, 0x09, 0xb7, 0x16, 0xe9, 0x85, 0x9b, ++ 0xb9, 0x6c, 0x6d, 0x49, 0xbd, 0xc8, 0x35, 0x21, 0x03, 0xa1, 0x8c, 0xd2, ++ 0x4e, 0xf4, 0x2e, 0xc9, 0x7e, 0xf4, 0x6b, 0xf4, 0x46, 0xeb, 0x1a, 0x45, ++ 0x76, 0xc1, 0x86, 0xe9, 0x35, 0x18, 0x03, 0x76, 0x3a, 0x79, 0x12, 0xfe, ++ }; ++ static const uint8_t kDRBGReseedOutput[64] = { ++ 0x00, 0xf2, 0x05, 0xaa, 0xfd, 0x11, 0x6c, 0x77, 0xbc, 0x81, 0x86, ++ 0x99, 0xca, 0x51, 0xcf, 0x80, 0x15, 0x9f, 0x02, 0x9e, 0x0b, 0xcd, ++ 0x26, 0xc8, 0x4b, 0x87, 0x8a, 0x15, 0x1a, 0xdd, 0xf2, 0xf3, 0xeb, ++ 0x94, 0x0b, 0x08, 0xc8, 0xc9, 0x57, 0xa4, 0x0b, 0x4b, 0x0f, 0x13, ++ 0xde, 0x7c, 0x0c, 0x6a, 0xac, 0x34, 0x4a, 0x9a, 0xf2, 0xd0, 0x83, ++ 0x02, 0x05, 0x17, 0xc9, 0x81, 0x8f, 0x2a, 0x81, 0x92, ++ }; + CTR_DRBG_STATE drbg; + if (!CTR_DRBG_init(&drbg, kDRBGEntropy, kDRBGPersonalization, + sizeof(kDRBGPersonalization)) || + !CTR_DRBG_generate(&drbg, output, sizeof(kDRBGOutput), kDRBGAD, + sizeof(kDRBGAD)) || + !check_test(kDRBGOutput, output, sizeof(kDRBGOutput), +- "DBRG Generate KAT") || ++ "DRBG Generate KAT") || + !CTR_DRBG_reseed(&drbg, kDRBGEntropy2, kDRBGAD, sizeof(kDRBGAD)) || + !CTR_DRBG_generate(&drbg, output, sizeof(kDRBGReseedOutput), kDRBGAD, + sizeof(kDRBGAD)) || + !check_test(kDRBGReseedOutput, output, sizeof(kDRBGReseedOutput), +- "DRBG Reseed KAT")) { ++ "DRBG-reseed KAT")) { + fprintf(stderr, "CTR-DRBG failed.\n"); + goto err; + } +@@ -832,43 +905,59 @@ int boringssl_fips_self_test( + } + + // TLS KDF KAT ++ static const uint8_t kTLSSecret[32] = { ++ 0xab, 0xc3, 0x65, 0x7b, 0x09, 0x4c, 0x76, 0x28, 0xa0, 0xb2, 0x82, ++ 0x99, 0x6f, 0xe7, 0x5a, 0x75, 0xf4, 0x98, 0x4f, 0xd9, 0x4d, 0x4e, ++ 0xcc, 0x2f, 0xcf, 0x53, 0xa2, 0xc4, 0x69, 0xa3, 0xf7, 0x31, ++ }; ++ static const char kTLSLabel[] = "FIPS self test"; ++ static const uint8_t kTLSSeed1[16] = { ++ 0x8f, 0x0d, 0xe8, 0xb6, 0x90, 0x8f, 0xb1, 0xd2, ++ 0x6d, 0x51, 0xf4, 0x79, 0x18, 0x63, 0x51, 0x65, ++ }; ++ static const uint8_t kTLSSeed2[16] = { ++ 0x7d, 0x24, 0x1a, 0x9d, 0x3c, 0x59, 0xbf, 0x3c, ++ 0x31, 0x1e, 0x2b, 0x21, 0x41, 0x8d, 0x32, 0x81, ++ }; ++ static const uint8_t kTLSOutput[32] = { ++ 0xe2, 0x1d, 0xd6, 0xc2, 0x68, 0xc7, 0x57, 0x03, 0x2c, 0x2c, 0xeb, ++ 0xbb, 0xb8, 0xa9, 0x7d, 0xe9, 0xee, 0xe6, 0xc9, 0x47, 0x83, 0x0a, ++ 0xbd, 0x11, 0x60, 0x5d, 0xd5, 0x2c, 0x47, 0xb6, 0x05, 0x88, ++ }; + uint8_t tls_output[sizeof(kTLSOutput)]; + if (!CRYPTO_tls1_prf(EVP_sha256(), tls_output, sizeof(tls_output), kTLSSecret, + sizeof(kTLSSecret), kTLSLabel, sizeof(kTLSLabel), + kTLSSeed1, sizeof(kTLSSeed1), kTLSSeed2, + sizeof(kTLSSeed2)) || +- !check_test(kTLSOutput, tls_output, sizeof(kTLSOutput), "TLS KDF KAT")) { ++ !check_test(kTLSOutput, tls_output, sizeof(kTLSOutput), "TLS-KDF KAT")) { + fprintf(stderr, "TLS KDF failed.\n"); + goto err; + } + + ret = 1; + +-#if defined(BORINGSSL_FIPS_SELF_TEST_FLAG_FILE) +- // Tests were successful. Write flag file if requested. +- if (module_hash_len != 0 && getenv(kFlagWriteEnableEnvVar) != NULL) { +- const int fd = open(flag_path, O_WRONLY | O_CREAT | O_TRUNC, 0644); +- if (fd >= 0) { +- close(fd); +- } +- } +-#endif // BORINGSSL_FIPS_SELF_TEST_FLAG_FILE +- + err: + EVP_AEAD_CTX_cleanup(&aead_ctx); +- RSA_free(rsa_key); +- EC_KEY_free(ec_key); +- EC_POINT_free(ec_point_in); +- EC_POINT_free(ec_point_out); +- EC_GROUP_free(ec_group); +- BN_free(ec_scalar); +- ECDSA_SIG_free(sig); + + return ret; + } + + int BORINGSSL_self_test(void) { +- return boringssl_fips_self_test(NULL, 0); ++ if (!boringssl_self_test_fast() || ++ // When requested to run self tests, also run the lazy tests. ++ !boringssl_self_test_rsa() || ++ !boringssl_self_test_ecc() || ++ !boringssl_self_test_ffdh()) { ++ return 0; ++ } ++ ++ return 1; + } + ++#if defined(BORINGSSL_FIPS) ++int boringssl_self_test_startup(void) { ++ return boringssl_self_test_fast(); ++} ++#endif ++ + #endif // !_MSC_VER +diff --git a/src/crypto/fipsmodule/sha/asm/sha512-x86_64.pl b/src/crypto/fipsmodule/sha/asm/sha512-x86_64.pl +index 61f67cb..2abd065 100755 +--- a/src/crypto/fipsmodule/sha/asm/sha512-x86_64.pl ++++ b/src/crypto/fipsmodule/sha/asm/sha512-x86_64.pl +@@ -126,15 +126,12 @@ die "can't locate x86_64-xlate.pl"; + # versions, but BoringSSL is intended to be used with pre-generated perlasm + # output, so this isn't useful anyway. + # +-# TODO(davidben): Enable AVX2 code after testing by setting $avx to 2. Is it +-# necessary to disable AVX2 code when SHA Extensions code is disabled? Upstream +-# did not tie them together until after $shaext was added. ++# This file also has an AVX2 implementation, controlled by setting $avx to 2. ++# For now, we intentionally disable it. While it gives a 13-16% perf boost, the ++# CFI annotations are wrong. It allocates stack in a loop and should be ++# rewritten to avoid this. + $avx = 1; +- +-# TODO(davidben): Consider enabling the Intel SHA Extensions code once it's +-# been tested. +-$shaext=0; ### set to zero if compiling for 1.0.1 +-$avx=1 if (!$shaext && $avx); ++$shaext = 1; + + open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; + *STDOUT=*OUT; +@@ -275,7 +272,7 @@ $code.=<<___ if ($SZ==4 || $avx); + ___ + $code.=<<___ if ($SZ==4 && $shaext); + test \$`1<<29`,%r11d # check for SHA +- jnz _shaext_shortcut ++ jnz .Lshaext_shortcut + ___ + # XOP codepath removed. + $code.=<<___ if ($avx>1); +@@ -559,7 +556,8 @@ $code.=<<___; + .type sha256_block_data_order_shaext,\@function,3 + .align 64 + sha256_block_data_order_shaext: +-_shaext_shortcut: ++.cfi_startproc ++.Lshaext_shortcut: + ___ + $code.=<<___ if ($win64); + lea `-8-5*16`(%rsp),%rsp +@@ -703,6 +701,7 @@ $code.=<<___ if ($win64); + ___ + $code.=<<___; + ret ++.cfi_endproc + .size sha256_block_data_order_shaext,.-sha256_block_data_order_shaext + ___ + }}} +diff --git a/src/crypto/hpke/hpke.c b/src/crypto/hpke/hpke.c +index c71ac2a..827ffaa 100644 +--- a/src/crypto/hpke/hpke.c ++++ b/src/crypto/hpke/hpke.c +@@ -30,7 +30,7 @@ + #include "../internal.h" + + +-// This file implements draft-irtf-cfrg-hpke-12. ++// This file implements RFC 9180. + + #define MAX_SEED_LEN X25519_PRIVATE_KEY_LEN + #define MAX_SHARED_SECRET_LEN SHA256_DIGEST_LENGTH +@@ -115,7 +115,7 @@ static int hpke_labeled_expand(const EVP_MD *hkdf_md, uint8_t *out_key, + // KEM implementations. + + // dhkem_extract_and_expand implements the ExtractAndExpand operation in the +-// DHKEM construction. See section 4.1 of draft-irtf-cfrg-hpke-12. ++// DHKEM construction. See section 4.1 of RFC 9180. + static int dhkem_extract_and_expand(uint16_t kem_id, const EVP_MD *hkdf_md, + uint8_t *out_key, size_t out_len, + const uint8_t *dh, size_t dh_len, +diff --git a/src/crypto/hpke/translate_test_vectors.py b/src/crypto/hpke/translate_test_vectors.py +index a4e399b..a1fffcf 100755 +--- a/src/crypto/hpke/translate_test_vectors.py ++++ b/src/crypto/hpke/translate_test_vectors.py +@@ -19,7 +19,7 @@ + Usage: translate_test_vectors.py TEST_VECTORS_JSON_FILE + + The TEST_VECTORS_JSON_FILE is expected to come from the JSON copy of +-draft-irtf-cfrg-hpke-12's test vectors, linked from its [TestVectors] citation. ++RFC 9180's test vectors, linked from its [TestVectors] citation. + The output is written to "hpke_test_vectors.txt". + """ + +diff --git a/src/crypto/hrss/hrss.c b/src/crypto/hrss/hrss.c +index 8e21068..388c9a9 100644 +--- a/src/crypto/hrss/hrss.c ++++ b/src/crypto/hrss/hrss.c +@@ -1314,8 +1314,7 @@ static void poly_mul_novec(struct POLY_MUL_SCRATCH *scratch, struct poly *out, + static void poly_mul(struct POLY_MUL_SCRATCH *scratch, struct poly *r, + const struct poly *a, const struct poly *b) { + #if defined(POLY_RQ_MUL_ASM) +- const int has_avx2 = (OPENSSL_ia32cap_P[2] & (1 << 5)) != 0; +- if (has_avx2) { ++ if (CRYPTO_is_AVX2_capable()) { + poly_Rq_mul(r->v, a->v, b->v, scratch->u.rq); + return; + } +diff --git a/src/crypto/hrss/hrss_test.cc b/src/crypto/hrss/hrss_test.cc +index 0693c82..bab968c 100644 +--- a/src/crypto/hrss/hrss_test.cc ++++ b/src/crypto/hrss/hrss_test.cc +@@ -453,8 +453,7 @@ TEST(HRSS, Golden) { + + #if defined(POLY_RQ_MUL_ASM) && defined(SUPPORTS_ABI_TEST) + TEST(HRSS, ABI) { +- const bool has_avx2 = (OPENSSL_ia32cap_P[2] & (1 << 5)) != 0; +- if (!has_avx2) { ++ if (!CRYPTO_is_AVX2_capable()) { + fprintf(stderr, "Skipping ABI test due to lack of AVX2 support.\n"); + return; + } +diff --git a/src/crypto/impl_dispatch_test.cc b/src/crypto/impl_dispatch_test.cc +index dae9e96..631e78f 100644 +--- a/src/crypto/impl_dispatch_test.cc ++++ b/src/crypto/impl_dispatch_test.cc +@@ -33,9 +33,9 @@ class ImplDispatchTest : public ::testing::Test { + public: + void SetUp() override { + #if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) +- aesni_ = OPENSSL_ia32cap_P[1] & (1 << (57 - 32)); +- avx_movbe_ = ((OPENSSL_ia32cap_P[1] >> 22) & 0x41) == 0x41; +- ssse3_ = OPENSSL_ia32cap_P[1] & (1 << (41 - 32)); ++ aesni_ = CRYPTO_is_AESNI_capable(); ++ avx_movbe_ = CRYPTO_is_AVX_capable() && CRYPTO_is_MOVBE_capable(); ++ ssse3_ = CRYPTO_is_SSSE3_capable(); + is_x86_64_ = + #if defined(OPENSSL_X86_64) + true; +diff --git a/src/crypto/internal.h b/src/crypto/internal.h +index 42f94d5..78dbbbf 100644 +--- a/src/crypto/internal.h ++++ b/src/crypto/internal.h +@@ -121,6 +121,10 @@ + #include + #endif + ++#if defined(BORINGSSL_FIPS_BREAK_TESTS) ++#include ++#endif ++ + #if !defined(__cplusplus) + #if defined(_MSC_VER) + #define alignas(x) __declspec(align(x)) +@@ -932,19 +936,50 @@ static inline uint64_t CRYPTO_rotr_u64(uint64_t value, int shift) { + // FIPS functions. + + #if defined(BORINGSSL_FIPS) ++ + // BORINGSSL_FIPS_abort is called when a FIPS power-on or continuous test + // fails. It prevents any further cryptographic operations by the current + // process. + void BORINGSSL_FIPS_abort(void) __attribute__((noreturn)); +-#endif + +-// boringssl_fips_self_test runs the FIPS KAT-based self tests. It returns one +-// on success and zero on error. The argument is the integrity hash of the FIPS +-// module and may be used to check and write flag files to suppress duplicate +-// self-tests. If |module_hash_len| is zero then no flag file will be checked +-// nor written and tests will always be run. +-int boringssl_fips_self_test(const uint8_t *module_hash, +- size_t module_hash_len); ++// boringssl_self_test_startup runs all startup self tests and returns one on ++// success or zero on error. Startup self tests do not include lazy tests. ++// Call |BORINGSSL_self_test| to run every self test. ++int boringssl_self_test_startup(void); ++ ++// boringssl_ensure_rsa_self_test checks whether the RSA self-test has been run ++// in this address space. If not, it runs it and crashes the address space if ++// unsuccessful. ++void boringssl_ensure_rsa_self_test(void); ++ ++// boringssl_ensure_ecc_self_test checks whether the ECDSA and ECDH self-test ++// has been run in this address space. If not, it runs it and crashes the ++// address space if unsuccessful. ++void boringssl_ensure_ecc_self_test(void); ++ ++// boringssl_ensure_ffdh_self_test checks whether the FFDH self-test has been ++// run in this address space. If not, it runs it and crashes the address space ++// if unsuccessful. ++void boringssl_ensure_ffdh_self_test(void); ++ ++#else ++ ++// Outside of FIPS mode, the lazy tests are no-ops. ++ ++OPENSSL_INLINE void boringssl_ensure_rsa_self_test(void) {} ++OPENSSL_INLINE void boringssl_ensure_ecc_self_test(void) {} ++OPENSSL_INLINE void boringssl_ensure_ffdh_self_test(void) {} ++ ++#endif // FIPS ++ ++// boringssl_self_test_sha256 performs a SHA-256 KAT. ++int boringssl_self_test_sha256(void); ++ ++// boringssl_self_test_sha512 performs a SHA-512 KAT. ++int boringssl_self_test_sha512(void); ++ ++// boringssl_self_test_hmac_sha256 performs an HMAC-SHA-256 KAT. ++int boringssl_self_test_hmac_sha256(void); + + #if defined(BORINGSSL_FIPS_COUNTERS) + void boringssl_fips_inc_counter(enum fips_counter_t counter); +@@ -952,6 +987,17 @@ void boringssl_fips_inc_counter(enum fips_counter_t counter); + OPENSSL_INLINE void boringssl_fips_inc_counter(enum fips_counter_t counter) {} + #endif + ++#if defined(BORINGSSL_FIPS_BREAK_TESTS) ++OPENSSL_INLINE int boringssl_fips_break_test(const char *test) { ++ const char *const value = getenv("BORINGSSL_FIPS_BREAK_TEST"); ++ return value != NULL && strcmp(value, test) == 0; ++} ++#else ++OPENSSL_INLINE int boringssl_fips_break_test(const char *test) { ++ return 0; ++} ++#endif // BORINGSSL_FIPS_BREAK_TESTS ++ + + // Runtime CPU feature support + +@@ -978,14 +1024,126 @@ OPENSSL_INLINE void boringssl_fips_inc_counter(enum fips_counter_t counter) {} + extern uint32_t OPENSSL_ia32cap_P[4]; + + #if defined(BORINGSSL_FIPS) && !defined(BORINGSSL_SHARED_LIBRARY) +-const uint32_t *OPENSSL_ia32cap_get(void); ++// The FIPS module, as a static library, requires an out-of-line version of ++// |OPENSSL_ia32cap_get| so accesses can be rewritten by delocate. Mark the ++// function const so multiple accesses can be optimized together. ++const uint32_t *OPENSSL_ia32cap_get(void) __attribute__((const)); + #else + OPENSSL_INLINE const uint32_t *OPENSSL_ia32cap_get(void) { + return OPENSSL_ia32cap_P; + } + #endif + ++// See Intel manual, volume 2A, table 3-11. ++ ++OPENSSL_INLINE int CRYPTO_is_FXSR_capable(void) { ++#if defined(__FXSR__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_get()[0] & (1 << 24)) != 0; + #endif ++} ++ ++OPENSSL_INLINE int CRYPTO_is_intel_cpu(void) { ++ // The reserved bit 30 is used to indicate an Intel CPU. ++ return (OPENSSL_ia32cap_get()[0] & (1 << 30)) != 0; ++} ++ ++// See Intel manual, volume 2A, table 3-10. ++ ++OPENSSL_INLINE int CRYPTO_is_PCLMUL_capable(void) { ++#if defined(__PCLMUL__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_get()[1] & (1 << 1)) != 0; ++#endif ++} ++ ++OPENSSL_INLINE int CRYPTO_is_SSSE3_capable(void) { ++#if defined(__SSSE3__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_get()[1] & (1 << 9)) != 0; ++#endif ++} ++ ++OPENSSL_INLINE int CRYPTO_is_SSE4_1_capable(void) { ++#if defined(__SSE4_1__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_P[1] & (1 << 19)) != 0; ++#endif ++} ++ ++OPENSSL_INLINE int CRYPTO_is_MOVBE_capable(void) { ++#if defined(__MOVBE__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_get()[1] & (1 << 22)) != 0; ++#endif ++} ++ ++OPENSSL_INLINE int CRYPTO_is_AESNI_capable(void) { ++#if defined(__AES__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_get()[1] & (1 << 25)) != 0; ++#endif ++} ++ ++OPENSSL_INLINE int CRYPTO_is_AVX_capable(void) { ++#if defined(__AVX__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_get()[1] & (1 << 28)) != 0; ++#endif ++} ++ ++OPENSSL_INLINE int CRYPTO_is_RDRAND_capable(void) { ++ // The GCC/Clang feature name and preprocessor symbol for RDRAND are "rdrnd" ++ // and |__RDRND__|, respectively. ++#if defined(__RDRND__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_get()[1] & (1u << 30)) != 0; ++#endif ++} ++ ++// See Intel manual, volume 2A, table 3-8. ++ ++OPENSSL_INLINE int CRYPTO_is_BMI1_capable(void) { ++#if defined(__BMI1__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_get()[2] & (1 << 3)) != 0; ++#endif ++} ++ ++OPENSSL_INLINE int CRYPTO_is_AVX2_capable(void) { ++#if defined(__AVX2__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_get()[2] & (1 << 5)) != 0; ++#endif ++} ++ ++OPENSSL_INLINE int CRYPTO_is_BMI2_capable(void) { ++#if defined(__BMI2__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_get()[2] & (1 << 8)) != 0; ++#endif ++} ++ ++OPENSSL_INLINE int CRYPTO_is_ADX_capable(void) { ++#if defined(__ADX__) ++ return 1; ++#else ++ return (OPENSSL_ia32cap_get()[2] & (1 << 19)) != 0; ++#endif ++} ++ ++#endif // OPENSSL_X86 || OPENSSL_X86_64 + + #if defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) + +diff --git a/src/crypto/pem/pem_all.c b/src/crypto/pem/pem_all.c +index e419774..706b7f4 100644 +--- a/src/crypto/pem/pem_all.c ++++ b/src/crypto/pem/pem_all.c +@@ -200,7 +200,7 @@ DSA *PEM_read_bio_DSAPrivateKey(BIO *bp, DSA **dsa, pem_password_cb *cb, + IMPLEMENT_PEM_write_cb_const(DSAPrivateKey, DSA, PEM_STRING_DSA, + DSAPrivateKey) + +- IMPLEMENT_PEM_rw(DSA_PUBKEY, DSA, PEM_STRING_PUBLIC, DSA_PUBKEY) ++IMPLEMENT_PEM_rw(DSA_PUBKEY, DSA, PEM_STRING_PUBLIC, DSA_PUBKEY) + DSA *PEM_read_DSAPrivateKey(FILE *fp, DSA **dsa, pem_password_cb *cb, void *u) + { + EVP_PKEY *pktmp; +@@ -237,7 +237,7 @@ EC_KEY *PEM_read_bio_ECPrivateKey(BIO *bp, EC_KEY **key, pem_password_cb *cb, + IMPLEMENT_PEM_write_cb(ECPrivateKey, EC_KEY, PEM_STRING_ECPRIVATEKEY, + ECPrivateKey) + +- IMPLEMENT_PEM_rw(EC_PUBKEY, EC_KEY, PEM_STRING_PUBLIC, EC_PUBKEY) ++IMPLEMENT_PEM_rw(EC_PUBKEY, EC_KEY, PEM_STRING_PUBLIC, EC_PUBKEY) + EC_KEY *PEM_read_ECPrivateKey(FILE *fp, EC_KEY **eckey, pem_password_cb *cb, + void *u) + { +@@ -247,6 +247,6 @@ EC_KEY *PEM_read_ECPrivateKey(FILE *fp, EC_KEY **eckey, pem_password_cb *cb, + } + + +-IMPLEMENT_PEM_write_const(DHparams, DH, PEM_STRING_DHPARAMS, DHparams) ++IMPLEMENT_PEM_rw_const(DHparams, DH, PEM_STRING_DHPARAMS, DHparams) + +- IMPLEMENT_PEM_rw(PUBKEY, EVP_PKEY, PEM_STRING_PUBLIC, PUBKEY) ++IMPLEMENT_PEM_rw(PUBKEY, EVP_PKEY, PEM_STRING_PUBLIC, PUBKEY) +diff --git a/src/crypto/pem/pem_pkey.c b/src/crypto/pem/pem_pkey.c +index 48d8c96..f75486d 100644 +--- a/src/crypto/pem/pem_pkey.c ++++ b/src/crypto/pem/pem_pkey.c +@@ -176,39 +176,3 @@ int PEM_write_PrivateKey(FILE *fp, EVP_PKEY *x, const EVP_CIPHER *enc, + BIO_free(b); + return ret; + } +- +- +-/* Transparently read in PKCS#3 or X9.42 DH parameters */ +- +-DH *PEM_read_bio_DHparams(BIO *bp, DH **x, pem_password_cb *cb, void *u) +-{ +- char *nm = NULL; +- const unsigned char *p = NULL; +- unsigned char *data = NULL; +- long len; +- DH *ret = NULL; +- +- if (!PEM_bytes_read_bio(&data, &len, &nm, PEM_STRING_DHPARAMS, bp, cb, u)) +- return NULL; +- p = data; +- +- ret = d2i_DHparams(x, &p, len); +- +- if (ret == NULL) +- OPENSSL_PUT_ERROR(PEM, ERR_R_ASN1_LIB); +- OPENSSL_free(nm); +- OPENSSL_free(data); +- return ret; +-} +- +-DH *PEM_read_DHparams(FILE *fp, DH **x, pem_password_cb *cb, void *u) +-{ +- BIO *b = BIO_new_fp(fp, BIO_NOCLOSE); +- if (b == NULL) { +- OPENSSL_PUT_ERROR(PEM, ERR_R_BUF_LIB); +- return NULL; +- } +- DH *ret = PEM_read_bio_DHparams(b, x, cb, u); +- BIO_free(b); +- return ret; +-} +diff --git a/src/crypto/pkcs8/pkcs12_test.cc b/src/crypto/pkcs8/pkcs12_test.cc +index e67630d..958bd8d 100644 +--- a/src/crypto/pkcs8/pkcs12_test.cc ++++ b/src/crypto/pkcs8/pkcs12_test.cc +@@ -34,7 +34,7 @@ std::string GetTestData(const char *path); + static const char kPassword[] = "foo"; + + // kUnicodePassword is the password for unicode_password.p12 +-static const char kUnicodePassword[] = u8"Hello, 世界"; ++static const char kUnicodePassword[] = "Hello, 世界"; + + static bssl::Span StringToBytes(const std::string &str) { + return bssl::MakeConstSpan(reinterpret_cast(str.data()), +@@ -391,7 +391,7 @@ TEST(PKCS12Test, RoundTrip) { + {bssl::Span(kTestCert2)}, 0, 0, 0, 0); + + // Test some Unicode. +- TestRoundTrip(kPassword, u8"Hello, 世界!", ++ TestRoundTrip(kPassword, "Hello, 世界!", + bssl::Span(kTestKey), + bssl::Span(kTestCert), + {bssl::Span(kTestCert2)}, 0, 0, 0, 0); +diff --git a/src/decrepit/des/cfb64ede.c b/src/decrepit/des/cfb64ede.c +index 6c39923..820c52e 100644 +--- a/src/decrepit/des/cfb64ede.c ++++ b/src/decrepit/des/cfb64ede.c +@@ -58,7 +58,7 @@ + + #include + +-#include "../../crypto/fipsmodule/des/internal.h" ++#include "../../crypto/des/internal.h" + #include "../../crypto/internal.h" + + +diff --git a/src/include/openssl/hpke.h b/src/include/openssl/hpke.h +index 56251b7..e2c9855 100644 +--- a/src/include/openssl/hpke.h ++++ b/src/include/openssl/hpke.h +@@ -30,7 +30,7 @@ extern "C" { + // Hybrid Public Key Encryption (HPKE) enables a sender to encrypt messages to a + // receiver with a public key. + // +-// See https://tools.ietf.org/html/draft-irtf-cfrg-hpke-12. ++// See RFC 9180. + + + // Parameters. +diff --git a/src/include/openssl/ssl.h b/src/include/openssl/ssl.h +index 232c627..a3b530e 100644 +--- a/src/include/openssl/ssl.h ++++ b/src/include/openssl/ssl.h +@@ -4039,10 +4039,16 @@ OPENSSL_EXPORT int SSL_set_handshake_hints(SSL *ssl, const uint8_t *hints, + // |len| bytes from |buf| contain the handshake message, one-byte + // ChangeCipherSpec body, and two-byte alert, respectively. + // ++// In connections that enable ECH, |cb| is additionally called with ++// |content_type| = |SSL3_RT_CLIENT_HELLO_INNER| for each ClientHelloInner that ++// is encrypted or decrypted. The |len| bytes from |buf| contain the ++// ClientHelloInner, including the reconstructed outer extensions and handshake ++// header. ++// + // For a V2ClientHello, |version| is |SSL2_VERSION|, |content_type| is zero, and + // the |len| bytes from |buf| contain the V2ClientHello structure. + OPENSSL_EXPORT void SSL_CTX_set_msg_callback( +- SSL_CTX *ctx, void (*cb)(int write_p, int version, int content_type, ++ SSL_CTX *ctx, void (*cb)(int is_write, int version, int content_type, + const void *buf, size_t len, SSL *ssl, void *arg)); + + // SSL_CTX_set_msg_callback_arg sets the |arg| parameter of the message +@@ -5598,7 +5604,7 @@ BSSL_NAMESPACE_END + #define SSL_R_INVALID_ECH_PUBLIC_NAME 317 + #define SSL_R_INVALID_ECH_CONFIG_LIST 318 + #define SSL_R_ECH_REJECTED 319 +-#define SSL_R_OUTER_EXTENSION_NOT_FOUND 320 ++#define SSL_R_INVALID_OUTER_EXTENSION 320 + #define SSL_R_INCONSISTENT_ECH_NEGOTIATION 321 + #define SSL_R_SSLV3_ALERT_CLOSE_NOTIFY 1000 + #define SSL_R_SSLV3_ALERT_UNEXPECTED_MESSAGE 1010 +diff --git a/src/include/openssl/ssl3.h b/src/include/openssl/ssl3.h +index e3910f0..533142c 100644 +--- a/src/include/openssl/ssl3.h ++++ b/src/include/openssl/ssl3.h +@@ -275,6 +275,7 @@ OPENSSL_STATIC_ASSERT(SSL3_RT_MAX_ENCRYPTED_OVERHEAD >= + + // Pseudo content type for SSL/TLS header info + #define SSL3_RT_HEADER 0x100 ++#define SSL3_RT_CLIENT_HELLO_INNER 0x101 + + #define SSL3_AL_WARNING 1 + #define SSL3_AL_FATAL 2 +diff --git a/src/ssl/encrypted_client_hello.cc b/src/ssl/encrypted_client_hello.cc +index 64fee3d..9e9adfe 100644 +--- a/src/ssl/encrypted_client_hello.cc ++++ b/src/ssl/encrypted_client_hello.cc +@@ -203,6 +203,12 @@ bool ssl_decode_client_hello_inner( + OPENSSL_PUT_ERROR(SSL, SSL_R_DECODE_ERROR); + return false; + } ++ // The ECH extension itself is not in the AAD and may not be referenced. ++ if (want == TLSEXT_TYPE_encrypted_client_hello) { ++ *out_alert = SSL_AD_ILLEGAL_PARAMETER; ++ OPENSSL_PUT_ERROR(SSL, SSL_R_INVALID_OUTER_EXTENSION); ++ return false; ++ } + // Seek to |want| in |outer_extensions|. |ext_list| is required to match + // ClientHelloOuter in order. + uint16_t found; +@@ -210,7 +216,7 @@ bool ssl_decode_client_hello_inner( + do { + if (CBS_len(&outer_extensions) == 0) { + *out_alert = SSL_AD_ILLEGAL_PARAMETER; +- OPENSSL_PUT_ERROR(SSL, SSL_R_OUTER_EXTENSION_NOT_FOUND); ++ OPENSSL_PUT_ERROR(SSL, SSL_R_INVALID_OUTER_EXTENSION); + return false; + } + if (!CBS_get_u16(&outer_extensions, &found) || +@@ -252,8 +258,8 @@ bool ssl_decode_client_hello_inner( + return true; + } + +-bool ssl_client_hello_decrypt(EVP_HPKE_CTX *hpke_ctx, Array *out, +- bool *out_is_decrypt_error, ++bool ssl_client_hello_decrypt(SSL_HANDSHAKE *hs, uint8_t *out_alert, ++ bool *out_is_decrypt_error, Array *out, + const SSL_CLIENT_HELLO *client_hello_outer, + Span payload) { + *out_is_decrypt_error = false; +@@ -264,6 +270,7 @@ bool ssl_client_hello_decrypt(EVP_HPKE_CTX *hpke_ctx, Array *out, + Array aad; + if (!aad.CopyFrom(MakeConstSpan(client_hello_outer->client_hello, + client_hello_outer->client_hello_len))) { ++ *out_alert = SSL_AD_INTERNAL_ERROR; + return false; + } + +@@ -278,35 +285,47 @@ bool ssl_client_hello_decrypt(EVP_HPKE_CTX *hpke_ctx, Array *out, + payload.data() - client_hello_outer->client_hello, payload.size()); + OPENSSL_memset(payload_aad.data(), 0, payload_aad.size()); + ++ // Decrypt the EncodedClientHelloInner. ++ Array encoded; + #if defined(BORINGSSL_UNSAFE_FUZZER_MODE) + // In fuzzer mode, disable encryption to improve coverage. We reserve a short + // input to signal decryption failure, so the fuzzer can explore fallback to + // ClientHelloOuter. + const uint8_t kBadPayload[] = {0xff}; + if (payload == kBadPayload) { ++ *out_alert = SSL_AD_DECRYPT_ERROR; + *out_is_decrypt_error = true; + OPENSSL_PUT_ERROR(SSL, SSL_R_DECRYPTION_FAILED); + return false; + } +- if (!out->CopyFrom(payload)) { ++ if (!encoded.CopyFrom(payload)) { ++ *out_alert = SSL_AD_INTERNAL_ERROR; + return false; + } + #else +- // Attempt to decrypt into |out|. +- if (!out->Init(payload.size())) { +- OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE); ++ if (!encoded.Init(payload.size())) { ++ *out_alert = SSL_AD_INTERNAL_ERROR; + return false; + } + size_t len; +- if (!EVP_HPKE_CTX_open(hpke_ctx, out->data(), &len, out->size(), +- payload.data(), payload.size(), aad.data(), +- aad.size())) { ++ if (!EVP_HPKE_CTX_open(hs->ech_hpke_ctx.get(), encoded.data(), &len, ++ encoded.size(), payload.data(), payload.size(), ++ aad.data(), aad.size())) { ++ *out_alert = SSL_AD_DECRYPT_ERROR; + *out_is_decrypt_error = true; + OPENSSL_PUT_ERROR(SSL, SSL_R_DECRYPTION_FAILED); + return false; + } +- out->Shrink(len); ++ encoded.Shrink(len); + #endif ++ ++ if (!ssl_decode_client_hello_inner(hs->ssl, out_alert, out, encoded, ++ client_hello_outer)) { ++ return false; ++ } ++ ++ ssl_do_msg_callback(hs->ssl, /*is_write=*/0, SSL3_RT_CLIENT_HELLO_INNER, ++ *out); + return true; + } + +@@ -789,6 +808,8 @@ bool ssl_encrypt_client_hello(SSL_HANDSHAKE *hs, Span enc) { + binder_len); + } + ++ ssl_do_msg_callback(ssl, /*is_write=*/1, SSL3_RT_CLIENT_HELLO_INNER, ++ hello_inner); + if (!hs->inner_transcript.Update(hello_inner)) { + return false; + } +diff --git a/src/ssl/handshake_client.cc b/src/ssl/handshake_client.cc +index 17b41e0..e630121 100644 +--- a/src/ssl/handshake_client.cc ++++ b/src/ssl/handshake_client.cc +@@ -331,7 +331,7 @@ bool ssl_add_client_hello(SSL_HANDSHAKE *hs) { + Array msg; + if (!ssl->method->init_message(ssl, cbb.get(), &body, SSL3_MT_CLIENT_HELLO) || + !ssl_write_client_hello_without_extensions(hs, &body, type, +- /*empty_session_id*/ false) || ++ /*empty_session_id=*/false) || + !ssl_add_clienthello_tlsext(hs, &body, /*out_encoded=*/nullptr, + &needs_psk_binder, type, CBB_len(&body)) || + !ssl->method->finish_message(ssl, cbb.get(), &msg)) { +diff --git a/src/ssl/handshake_server.cc b/src/ssl/handshake_server.cc +index 1d03c55..15820be 100644 +--- a/src/ssl/handshake_server.cc ++++ b/src/ssl/handshake_server.cc +@@ -554,29 +554,22 @@ static bool decrypt_ech(SSL_HANDSHAKE *hs, uint8_t *out_alert, + ERR_clear_error(); + continue; + } +- Array encoded_client_hello_inner; + bool is_decrypt_error; +- if (!ssl_client_hello_decrypt(hs->ech_hpke_ctx.get(), +- &encoded_client_hello_inner, +- &is_decrypt_error, client_hello, payload)) { ++ if (!ssl_client_hello_decrypt(hs, out_alert, &is_decrypt_error, ++ &hs->ech_client_hello_buf, client_hello, ++ payload)) { + if (is_decrypt_error) { + // Ignore the error and try another ECHConfig. + ERR_clear_error(); ++ // The |out_alert| calling convention currently relies on a default of ++ // |SSL_AD_DECODE_ERROR|. https://crbug.com/boringssl/373 tracks ++ // switching to sum types, which avoids this. ++ *out_alert = SSL_AD_DECODE_ERROR; + continue; + } + OPENSSL_PUT_ERROR(SSL, SSL_R_DECRYPTION_FAILED); + return false; + } +- +- // Recover the ClientHelloInner from the EncodedClientHelloInner. +- bssl::Array client_hello_inner; +- if (!ssl_decode_client_hello_inner(ssl, out_alert, &client_hello_inner, +- encoded_client_hello_inner, +- client_hello)) { +- OPENSSL_PUT_ERROR(SSL, SSL_R_DECODE_ERROR); +- return false; +- } +- hs->ech_client_hello_buf = std::move(client_hello_inner); + hs->ech_config_id = config_id; + ssl->s3->ech_status = ssl_ech_accepted; + return true; +diff --git a/src/ssl/internal.h b/src/ssl/internal.h +index 5196f17..8f68fc5 100644 +--- a/src/ssl/internal.h ++++ b/src/ssl/internal.h +@@ -1498,17 +1498,19 @@ enum ssl_client_hello_type_t { + // ClientHelloOuter |client_hello_outer|. If successful, it writes the recovered + // ClientHelloInner to |out_client_hello_inner|. It returns true on success and + // false on failure. ++// ++// This function is exported for fuzzing. + OPENSSL_EXPORT bool ssl_decode_client_hello_inner( + SSL *ssl, uint8_t *out_alert, Array *out_client_hello_inner, + Span encoded_client_hello_inner, + const SSL_CLIENT_HELLO *client_hello_outer); + +-// ssl_client_hello_decrypt attempts to decrypt the |payload| and writes the +-// result to |*out|. |payload| must point into |client_hello_outer|. It returns +-// true on success and false on error. On error, it sets |*out_is_decrypt_error| +-// to whether the failure was due to a bad ciphertext. +-bool ssl_client_hello_decrypt(EVP_HPKE_CTX *hpke_ctx, Array *out, +- bool *out_is_decrypt_error, ++// ssl_client_hello_decrypt attempts to decrypt and decode the |payload|. It ++// writes the result to |*out|. |payload| must point into |client_hello_outer|. ++// It returns true on success and false on error. On error, it sets ++// |*out_is_decrypt_error| to whether the failure was due to a bad ciphertext. ++bool ssl_client_hello_decrypt(SSL_HANDSHAKE *hs, uint8_t *out_alert, ++ bool *out_is_decrypt_error, Array *out, + const SSL_CLIENT_HELLO *client_hello_outer, + Span payload); + +@@ -3511,7 +3513,7 @@ struct ssl_ctx_st { + bssl::UniquePtr cert; + + // callback that allows applications to peek at protocol messages +- void (*msg_callback)(int write_p, int version, int content_type, ++ void (*msg_callback)(int is_write, int version, int content_type, + const void *buf, size_t len, SSL *ssl, + void *arg) = nullptr; + void *msg_callback_arg = nullptr; +diff --git a/src/ssl/test/CMakeLists.txt b/src/ssl/test/CMakeLists.txt +index bb9bd81..f02d6e2 100644 +--- a/src/ssl/test/CMakeLists.txt ++++ b/src/ssl/test/CMakeLists.txt +@@ -17,7 +17,7 @@ add_dependencies(bssl_shim global_target) + + target_link_libraries(bssl_shim test_support_lib ssl crypto) + +-if(UNIX AND NOT APPLE AND NOT ANDROID) ++if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + add_executable( + handshaker + +diff --git a/src/ssl/test/runner/hpke/hpke.go b/src/ssl/test/runner/hpke/hpke.go +index e6fc7be..b65dcf3 100644 +--- a/src/ssl/test/runner/hpke/hpke.go ++++ b/src/ssl/test/runner/hpke/hpke.go +@@ -14,7 +14,7 @@ + + // Package hpke implements Hybrid Public Key Encryption (HPKE). + // +-// See https://tools.ietf.org/html/draft-irtf-cfrg-hpke-12. ++// See RFC 9180. + package hpke + + import ( +diff --git a/src/ssl/test/runner/runner.go b/src/ssl/test/runner/runner.go +index cfff714..4c1c955 100644 +--- a/src/ssl/test/runner/runner.go ++++ b/src/ssl/test/runner/runner.go +@@ -16776,9 +16776,7 @@ func addEncryptedClientHelloTests() { + }, + shouldFail: true, + expectedLocalError: "remote error: illegal parameter", +- // The decoding algorithm relies on the ordering requirement, so +- // the wrong order appears as a missing extension. +- expectedError: ":OUTER_EXTENSION_NOT_FOUND:", ++ expectedError: ":INVALID_OUTER_EXTENSION:", + }) + + // Test that the server rejects duplicated values in ech_outer_extensions. +@@ -16812,9 +16810,7 @@ func addEncryptedClientHelloTests() { + }, + shouldFail: true, + expectedLocalError: "remote error: illegal parameter", +- // The decoding algorithm relies on the ordering requirement, so +- // duplicates appear as missing extensions. +- expectedError: ":OUTER_EXTENSION_NOT_FOUND:", ++ expectedError: ":INVALID_OUTER_EXTENSION:", + }) + + // Test that the server rejects references to missing extensions in +@@ -16843,7 +16839,7 @@ func addEncryptedClientHelloTests() { + }, + shouldFail: true, + expectedLocalError: "remote error: illegal parameter", +- expectedError: ":DECODE_ERROR:", ++ expectedError: ":INVALID_OUTER_EXTENSION:", + }) + + // Test that the server rejects a references to the ECH extension in +@@ -16871,7 +16867,46 @@ func addEncryptedClientHelloTests() { + }, + shouldFail: true, + expectedLocalError: "remote error: illegal parameter", +- expectedError: ":DECODE_ERROR:", ++ expectedError: ":INVALID_OUTER_EXTENSION:", ++ }) ++ ++ // Test the message callback is correctly reported with ECH. ++ clientAndServerHello := "read hs 1\nread clienthelloinner\nwrite hs 2\n" ++ expectMsgCallback := clientAndServerHello + "write ccs\n" ++ if hrr { ++ expectMsgCallback += clientAndServerHello ++ } ++ // EncryptedExtensions onwards. ++ expectMsgCallback += `write hs 8 ++write hs 11 ++write hs 15 ++write hs 20 ++read hs 20 ++write hs 4 ++write hs 4 ++` ++ testCases = append(testCases, testCase{ ++ testType: serverTest, ++ protocol: protocol, ++ name: prefix + "ECH-Server-MessageCallback" + suffix, ++ config: Config{ ++ ServerName: "secret.example", ++ ClientECHConfig: echConfig.ECHConfig, ++ DefaultCurves: defaultCurves, ++ Bugs: ProtocolBugs{ ++ NoCloseNotify: true, // Align QUIC and TCP traces. ++ }, ++ }, ++ flags: []string{ ++ "-ech-server-config", base64FlagValue(echConfig.ECHConfig.Raw), ++ "-ech-server-key", base64FlagValue(echConfig.Key), ++ "-ech-is-retry-config", "1", ++ "-expect-ech-accept", ++ "-expect-msg-callback", expectMsgCallback, ++ }, ++ expectations: connectionExpectations{ ++ echAccepted: true, ++ }, + }) + } + +@@ -18622,6 +18657,60 @@ func addEncryptedClientHelloTests() { + shouldFail: true, + expectedError: ":INCONSISTENT_ECH_NEGOTIATION:", + }) ++ ++ // Test the message callback is correctly reported, with and without ++ // HelloRetryRequest. ++ clientAndServerHello := "write clienthelloinner\nwrite hs 1\nread hs 2\n" ++ // EncryptedExtensions onwards. ++ finishHandshake := `read hs 8 ++read hs 11 ++read hs 15 ++read hs 20 ++write hs 20 ++read hs 4 ++read hs 4 ++` ++ testCases = append(testCases, testCase{ ++ testType: clientTest, ++ protocol: protocol, ++ name: prefix + "ECH-Client-MessageCallback", ++ config: Config{ ++ MinVersion: VersionTLS13, ++ MaxVersion: VersionTLS13, ++ ServerECHConfigs: []ServerECHConfig{echConfig}, ++ Bugs: ProtocolBugs{ ++ NoCloseNotify: true, // Align QUIC and TCP traces. ++ }, ++ }, ++ flags: []string{ ++ "-ech-config-list", base64FlagValue(CreateECHConfigList(echConfig.ECHConfig.Raw)), ++ "-expect-ech-accept", ++ "-expect-msg-callback", clientAndServerHello + "write ccs\n" + finishHandshake, ++ }, ++ expectations: connectionExpectations{echAccepted: true}, ++ }) ++ testCases = append(testCases, testCase{ ++ testType: clientTest, ++ protocol: protocol, ++ name: prefix + "ECH-Client-MessageCallback-HelloRetryRequest", ++ config: Config{ ++ MinVersion: VersionTLS13, ++ MaxVersion: VersionTLS13, ++ CurvePreferences: []CurveID{CurveP384}, ++ ServerECHConfigs: []ServerECHConfig{echConfig}, ++ Bugs: ProtocolBugs{ ++ ExpectMissingKeyShare: true, // Check we triggered HRR. ++ NoCloseNotify: true, // Align QUIC and TCP traces. ++ }, ++ }, ++ flags: []string{ ++ "-ech-config-list", base64FlagValue(CreateECHConfigList(echConfig.ECHConfig.Raw)), ++ "-expect-ech-accept", ++ "-expect-hrr", // Check we triggered HRR. ++ "-expect-msg-callback", clientAndServerHello + "write ccs\n" + clientAndServerHello + finishHandshake, ++ }, ++ expectations: connectionExpectations{echAccepted: true}, ++ }) + } + } + +@@ -19220,8 +19309,22 @@ func main() { + noneOfPattern = strings.Split(*skipTest, ";") + } + ++ shardIndex, shardTotal, err := getSharding() ++ if err != nil { ++ fmt.Fprintln(os.Stderr, err) ++ os.Exit(1) ++ } ++ ++ if shardTotal > 0 { ++ fmt.Printf("This is shard %d of 0..%d (inclusive)\n", shardIndex, shardTotal-1) ++ } ++ + var foundTest bool + for i := range testCases { ++ if shardTotal > 0 && i%shardTotal != shardIndex { ++ continue ++ } ++ + matched, err := match(oneOfPatternIfAny, noneOfPattern, testCases[i].name) + if err != nil { + fmt.Fprintf(os.Stderr, "Error matching pattern: %s\n", err) +@@ -19259,7 +19362,7 @@ func main() { + } + } + +- if !foundTest { ++ if !foundTest && shardTotal == 0 { + fmt.Fprintf(os.Stderr, "No tests run\n") + os.Exit(1) + } +diff --git a/src/ssl/test/runner/sharding.go b/src/ssl/test/runner/sharding.go +new file mode 100644 +index 0000000..5061a6f +--- /dev/null ++++ b/src/ssl/test/runner/sharding.go +@@ -0,0 +1,77 @@ ++// Copyright (c) 2022, Google Inc. ++// ++// Permission to use, copy, modify, and/or distribute this software for any ++// purpose with or without fee is hereby granted, provided that the above ++// copyright notice and this permission notice appear in all copies. ++// ++// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY ++// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION ++// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN ++// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ ++package runner ++ ++import ( ++ "fmt" ++ "io/ioutil" ++ "os" ++ "strconv" ++) ++ ++const ( ++ shardStatusFileEnv = "TEST_SHARD_STATUS_FILE" ++ shardTotalEnv = "TEST_TOTAL_SHARDS" ++ shardIndexEnv = "TEST_SHARD_INDEX" ++ shardPrefix = "RUNNER_" ++) ++ ++func init() { ++ // When run under `go test`, init() functions may be run twice if the ++ // test binary ends up forking and execing itself. Therefore we move ++ // the environment variables to names that don't interfere with Go's ++ // own support for sharding. If we recorded and erased them, then they ++ // wouldn't exist the second time the binary runs. ++ for _, key := range []string{shardStatusFileEnv, shardTotalEnv, shardIndexEnv} { ++ value := os.Getenv(key) ++ if len(value) > 0 { ++ os.Setenv(shardPrefix+key, value) ++ os.Setenv(key, "") ++ } ++ } ++} ++ ++// getSharding returns the shard index and count, or zeros if sharding is not ++// enabled. ++func getSharding() (index, total int, err error) { ++ statusFile := os.Getenv(shardPrefix + shardStatusFileEnv) ++ totalNumStr := os.Getenv(shardPrefix + shardTotalEnv) ++ indexStr := os.Getenv(shardPrefix + shardIndexEnv) ++ if len(totalNumStr) == 0 || len(indexStr) == 0 { ++ return 0, 0, nil ++ } ++ ++ totalNum, err := strconv.Atoi(totalNumStr) ++ if err != nil { ++ return 0, 0, fmt.Errorf("$%s is %q, but expected a number\n", shardTotalEnv, totalNumStr) ++ } ++ ++ index, err = strconv.Atoi(indexStr) ++ if err != nil { ++ return 0, 0, fmt.Errorf("$%s is %q, but expected a number\n", shardIndexEnv, indexStr) ++ } ++ ++ if index < 0 || index >= totalNum { ++ return 0, 0, fmt.Errorf("shard index/total of %d/%d is invalid\n", index, totalNum) ++ } ++ ++ if len(statusFile) > 0 { ++ if err := ioutil.WriteFile(statusFile, nil, 0664); err != nil { ++ return 0, 0, err ++ } ++ } ++ ++ return index, totalNum, nil ++} +diff --git a/src/ssl/test/test_config.cc b/src/ssl/test/test_config.cc +index 9a0f63d..a6409d6 100644 +--- a/src/ssl/test/test_config.cc ++++ b/src/ssl/test/test_config.cc +@@ -602,6 +602,7 @@ static void MessageCallback(int is_write, int version, int content_type, + state->msg_callback_text += "v2clienthello\n"; + return; + ++ case SSL3_RT_CLIENT_HELLO_INNER: + case SSL3_RT_HANDSHAKE: { + CBS cbs; + CBS_init(&cbs, buf_u8, len); +@@ -619,10 +620,19 @@ static void MessageCallback(int is_write, int version, int content_type, + return; + } + char text[16]; +- snprintf(text, sizeof(text), "hs %d\n", type); +- state->msg_callback_text += text; +- if (!is_write) { +- state->last_message_received = type; ++ if (content_type == SSL3_RT_CLIENT_HELLO_INNER) { ++ if (type != SSL3_MT_CLIENT_HELLO) { ++ fprintf(stderr, "Invalid header for ClientHelloInner.\n"); ++ state->msg_callback_ok = false; ++ return; ++ } ++ state->msg_callback_text += "clienthelloinner\n"; ++ } else { ++ snprintf(text, sizeof(text), "hs %d\n", type); ++ state->msg_callback_text += text; ++ if (!is_write) { ++ state->last_message_received = type; ++ } + } + return; + } +diff --git a/src/ssl/tls13_server.cc b/src/ssl/tls13_server.cc +index 2f000e5..dbf239d 100644 +--- a/src/ssl/tls13_server.cc ++++ b/src/ssl/tls13_server.cc +@@ -658,28 +658,16 @@ static enum ssl_hs_wait_t do_read_second_client_hello(SSL_HANDSHAKE *hs) { + } + + // Decrypt the payload with the HPKE context from the first ClientHello. +- Array encoded_client_hello_inner; ++ uint8_t alert = SSL_AD_DECODE_ERROR; + bool unused; +- if (!ssl_client_hello_decrypt(hs->ech_hpke_ctx.get(), +- &encoded_client_hello_inner, &unused, +- &client_hello, payload)) { ++ if (!ssl_client_hello_decrypt(hs, &alert, &unused, ++ &hs->ech_client_hello_buf, &client_hello, ++ payload)) { + // Decryption failure is fatal in the second ClientHello. + OPENSSL_PUT_ERROR(SSL, SSL_R_DECRYPTION_FAILED); +- ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_DECRYPT_ERROR); +- return ssl_hs_error; +- } +- +- // Recover the ClientHelloInner from the EncodedClientHelloInner. +- uint8_t alert = SSL_AD_DECODE_ERROR; +- bssl::Array client_hello_inner; +- if (!ssl_decode_client_hello_inner(ssl, &alert, &client_hello_inner, +- encoded_client_hello_inner, +- &client_hello)) { +- OPENSSL_PUT_ERROR(SSL, SSL_R_DECODE_ERROR); + ssl_send_alert(ssl, SSL3_AL_FATAL, alert); + return ssl_hs_error; + } +- hs->ech_client_hello_buf = std::move(client_hello_inner); + + // Reparse |client_hello| from the buffer owned by |hs|. + if (!hs->GetClientHello(&msg, &client_hello)) { +diff --git a/src/tool/server.cc b/src/tool/server.cc +index 18b692d..ebecee0 100644 +--- a/src/tool/server.cc ++++ b/src/tool/server.cc +@@ -132,21 +132,37 @@ static bssl::UniquePtr MakeKeyPairForSelfSignedCert() { + + static bssl::UniquePtr MakeSelfSignedCert(EVP_PKEY *evp_pkey, + const int valid_days) { ++ uint64_t serial; + bssl::UniquePtr x509(X509_new()); +- uint32_t serial; +- RAND_bytes(reinterpret_cast(&serial), sizeof(serial)); +- ASN1_INTEGER_set(X509_get_serialNumber(x509.get()), serial >> 1); +- X509_gmtime_adj(X509_get_notBefore(x509.get()), 0); +- X509_gmtime_adj(X509_get_notAfter(x509.get()), 60 * 60 * 24 * valid_days); +- +- X509_NAME* subject = X509_get_subject_name(x509.get()); +- X509_NAME_add_entry_by_txt(subject, "C", MBSTRING_ASC, +- reinterpret_cast("US"), -1, -1, +- 0); +- X509_NAME_add_entry_by_txt(subject, "O", MBSTRING_ASC, +- reinterpret_cast("BoringSSL"), -1, +- -1, 0); +- X509_set_issuer_name(x509.get(), subject); ++ if (!x509 || // ++ !X509_set_version(x509.get(), X509_VERSION_3) || ++ !RAND_bytes(reinterpret_cast(&serial), sizeof(serial)) || ++ !ASN1_INTEGER_set_uint64(X509_get_serialNumber(x509.get()), serial) || ++ !X509_gmtime_adj(X509_get_notBefore(x509.get()), 0) || ++ !X509_gmtime_adj(X509_get_notAfter(x509.get()), ++ 60 * 60 * 24 * valid_days)) { ++ return nullptr; ++ } ++ ++ X509_NAME *subject = X509_get_subject_name(x509.get()); ++ if (!X509_NAME_add_entry_by_txt(subject, "C", MBSTRING_ASC, ++ reinterpret_cast("US"), -1, ++ -1, 0) || ++ !X509_NAME_add_entry_by_txt( ++ subject, "O", MBSTRING_ASC, ++ reinterpret_cast("BoringSSL"), -1, -1, 0) || ++ !X509_set_issuer_name(x509.get(), subject)) { ++ return nullptr; ++ } ++ ++ // macOS requires an explicit EKU extension. ++ bssl::UniquePtr ekus(sk_ASN1_OBJECT_new_null()); ++ if (!ekus || ++ !sk_ASN1_OBJECT_push(ekus.get(), OBJ_nid2obj(NID_server_auth)) || ++ !X509_add1_ext_i2d(x509.get(), NID_ext_key_usage, ekus.get(), /*crit=*/1, ++ /*flags=*/0)) { ++ return nullptr; ++ } + + if (!X509_set_pubkey(x509.get(), evp_pkey)) { + fprintf(stderr, "Failed to set public key.\n"); +diff --git a/src/util/BUILD.toplevel b/src/util/BUILD.toplevel +index 462a24f..cfa695a 100644 +--- a/src/util/BUILD.toplevel ++++ b/src/util/BUILD.toplevel +@@ -18,10 +18,11 @@ load( + "crypto_headers", + "crypto_internal_headers", + "crypto_sources", ++ "crypto_sources_apple_aarch64", ++ "crypto_sources_apple_x86_64", + "crypto_sources_linux_aarch64", + "crypto_sources_linux_ppc64le", + "crypto_sources_linux_x86_64", +- "crypto_sources_mac_x86_64", + "fips_fragments", + "ssl_headers", + "ssl_internal_headers", +@@ -36,52 +37,42 @@ exports_files(["LICENSE"]) + + config_setting( + name = "linux_aarch64", +- values = {"cpu": "aarch64"}, ++ constraint_values = [ ++ "@platforms//os:linux", ++ "@platforms//cpu:aarch64", ++ ], + ) + + config_setting( + name = "linux_x86_64", +- values = {"cpu": "k8"}, ++ constraint_values = [ ++ "@platforms//os:linux", ++ "@platforms//cpu:x86_64", ++ ], + ) + + config_setting( + name = "linux_ppc64le", +- values = {"cpu": "ppc"}, +-) +- +-config_setting( +- name = "mac_x86_64", +- values = {"cpu": "darwin"}, +-) +- +-config_setting( +- name = "windows_x86_64", +- values = {"cpu": "x64_windows"}, +-) +- +-config_setting( +- name = "android_legacy", +- values = {"crosstool_top": "//external:android/crosstool"}, +-) +- +-config_setting( +- name = "android_stlport", +- values = {"crosstool_top": "@androidndk//:toolchain-stlport"}, +-) +- +-config_setting( +- name = "android_libcpp", +- values = {"crosstool_top": "@androidndk//:toolchain-libcpp"}, ++ constraint_values = [ ++ "@platforms//os:linux", ++ "@platforms//cpu:ppc", ++ ], + ) + + config_setting( +- name = "android_gnu_libstdcpp", +- values = {"crosstool_top": "@androidndk//:toolchain-gnu-libstdcpp"}, ++ name = "macos_aarch64", ++ constraint_values = [ ++ "@platforms//os:macos", ++ "@platforms//cpu:aarch64", ++ ], + ) + + config_setting( +- name = "android_default", +- values = {"crosstool_top": "@androidndk//:default_crosstool"}, ++ name = "macos_x86_64", ++ constraint_values = [ ++ "@platforms//os:macos", ++ "@platforms//cpu:x86_64", ++ ], + ) + + posix_copts = [ +@@ -98,11 +89,6 @@ posix_copts = [ + "-Wwrite-strings", + "-Wshadow", + "-fno-common", +- +- # Modern build environments should be able to set this to use atomic +- # operations for reference counting rather than locks. However, it's +- # known not to work on some Android builds. +- # "-DOPENSSL_C11_ATOMIC", + ] + + linux_copts = posix_copts + [ +@@ -113,24 +99,29 @@ linux_copts = posix_copts + [ + ] + + boringssl_copts = select({ +- ":linux_aarch64": linux_copts, +- ":linux_ppc64le": linux_copts, +- ":linux_x86_64": linux_copts, +- ":mac_x86_64": posix_copts, +- ":windows_x86_64": [ +- "-DWIN32_LEAN_AND_MEAN", +- "-DOPENSSL_NO_ASM", +- ], +- "//conditions:default": ["-DOPENSSL_NO_ASM"], ++ "@platforms//os:linux": linux_copts, ++ "@platforms//os:macos": posix_copts, ++ "@platforms//os:windows": ["-DWIN32_LEAN_AND_MEAN"], ++ "//conditions:default": [], + }) + ++# These selects must be kept in sync. + crypto_sources_asm = select({ + ":linux_aarch64": crypto_sources_linux_aarch64, + ":linux_ppc64le": crypto_sources_linux_ppc64le, + ":linux_x86_64": crypto_sources_linux_x86_64, +- ":mac_x86_64": crypto_sources_mac_x86_64, ++ ":macos_aarch64": crypto_sources_apple_aarch64, ++ ":macos_x86_64": crypto_sources_apple_x86_64, + "//conditions:default": [], + }) ++boringssl_copts += select({ ++ ":linux_aarch64": [], ++ ":linux_ppc64le": [], ++ ":linux_x86_64": [], ++ ":macos_aarch64": [], ++ ":macos_x86_64": [], ++ "//conditions:default": ["-DOPENSSL_NO_ASM"], ++}) + + # For C targets only (not C++), compile with C11 support. + posix_copts_c11 = [ +@@ -141,10 +132,8 @@ posix_copts_c11 = [ + ] + + boringssl_copts_c11 = boringssl_copts + select({ +- ":linux_aarch64": posix_copts_c11, +- ":linux_ppc64le": posix_copts_c11, +- ":linux_x86_64": posix_copts_c11, +- ":mac_x86_64": posix_copts_c11, ++ "@platforms//os:linux": posix_copts_c11, ++ "@platforms//os:macos": posix_copts_c11, + "//conditions:default": [], + }) + +@@ -155,10 +144,8 @@ posix_copts_cxx = [ + ] + + boringssl_copts_cxx = boringssl_copts + select({ +- ":linux_aarch64": posix_copts_cxx, +- ":linux_ppc64le": posix_copts_cxx, +- ":linux_x86_64": posix_copts_cxx, +- ":mac_x86_64": posix_copts_cxx, ++ "@platforms//os:linux": posix_copts_cxx, ++ "@platforms//os:macos": posix_copts_cxx, + "//conditions:default": [], + }) + +@@ -171,13 +158,9 @@ cc_library( + linkopts = select({ + # Android supports pthreads, but does not provide a libpthread + # to link against. +- ":android_legacy": [], +- ":android_stlport": [], +- ":android_libcpp": [], +- ":android_gnu_libstdcpp": [], +- ":android_default": [], +- ":mac_x86_64": [], +- ":windows_x86_64": ["-defaultlib:advapi32.lib"], ++ "@platforms//os:android": [], ++ "@platforms//os:macos": [], ++ "@platforms//os:windows": ["-defaultlib:advapi32.lib"], + "//conditions:default": ["-lpthread"], + }), + visibility = ["//visibility:public"], +diff --git a/src/util/bot/DEPS b/src/util/bot/DEPS +index e3c95f3..574d94b 100644 +--- a/src/util/bot/DEPS ++++ b/src/util/bot/DEPS +@@ -187,7 +187,7 @@ hooks = [ + 'action': [ 'download_from_google_storage', + '--no_resume', + '--bucket', 'chrome-boringssl-sde', +- '-s', 'boringssl/util/bot/sde-linux64.tar.bz2.sha1' ++ '-s', 'boringssl/util/bot/sde-linux64.tar.xz.sha1' + ], + }, + { +@@ -196,7 +196,7 @@ hooks = [ + 'condition': 'checkout_sde and host_os == "linux"', + 'action': [ 'python3', + 'boringssl/util/bot/extract.py', +- 'boringssl/util/bot/sde-linux64.tar.bz2', ++ 'boringssl/util/bot/sde-linux64.tar.xz', + 'boringssl/util/bot/sde-linux64/', + ], + }, +@@ -207,7 +207,7 @@ hooks = [ + 'action': [ 'download_from_google_storage', + '--no_resume', + '--bucket', 'chrome-boringssl-sde', +- '-s', 'boringssl/util/bot/sde-win32.tar.bz2.sha1' ++ '-s', 'boringssl/util/bot/sde-win32.tar.xz.sha1' + ], + }, + { +@@ -216,7 +216,7 @@ hooks = [ + 'condition': 'checkout_sde and host_os == "win"', + 'action': [ 'python3', + 'boringssl/util/bot/extract.py', +- 'boringssl/util/bot/sde-win32.tar.bz2', ++ 'boringssl/util/bot/sde-win32.tar.xz', + 'boringssl/util/bot/sde-win32/', + ], + }, +diff --git a/src/util/bot/UPDATING b/src/util/bot/UPDATING +index 2e6b914..dad6192 100644 +--- a/src/util/bot/UPDATING ++++ b/src/util/bot/UPDATING +@@ -46,13 +46,13 @@ perl-win32.zip: Update to the latest 64-bit prebuilt "Portable" edition of + + The current revision is strawberry-perl-5.26.2.1-64bit-portable.zip. + +-Finally, update sde-linux64.tar.bz2 and sde-win32.tar.bz2 by downloading the ++Finally, update sde-linux64.tar.xz and sde-win32.tar.xz by downloading the + latet release from Intel at + https://software.intel.com/en-us/articles/intel-software-development-emulator, + but upload it with the following command. (Note the bucket is different.) + +- upload_to_google_storage.py -b chrome-boringssl-sde sde-linux64.tar.bz2 sde-win32.tar.bz2 ++ upload_to_google_storage.py -b chrome-boringssl-sde sde-linux64.tar.xz sde-win32.tar.xz + +-The current revision is sde-external-8.50.0-2020-03-26-*.tar.bz2. ++The current revision is sde-external-9.0.0-2021-11-07-*.tar.xz. + + When adding new files, remember to update .gitignore. +diff --git a/src/util/bot/extract.py b/src/util/bot/extract.py +index 9b1b88a..4ef5f65 100644 +--- a/src/util/bot/extract.py ++++ b/src/util/bot/extract.py +@@ -118,6 +118,8 @@ def main(args): + entries = IterateTar(archive, 'gz') + elif archive.endswith('.tar.bz2'): + entries = IterateTar(archive, 'bz2') ++ elif archive.endswith('.tar.xz'): ++ entries = IterateTar(archive, 'xz') + else: + raise ValueError(archive) + +diff --git a/src/util/bot/sde-linux64.tar.bz2.sha1 b/src/util/bot/sde-linux64.tar.bz2.sha1 +deleted file mode 100644 +index c450f63..0000000 +--- a/src/util/bot/sde-linux64.tar.bz2.sha1 ++++ /dev/null +@@ -1 +0,0 @@ +-baacb5a29755e299d3384c41c6dd55f65235ef1f +\ No newline at end of file +diff --git a/src/util/bot/sde-linux64.tar.xz.sha1 b/src/util/bot/sde-linux64.tar.xz.sha1 +new file mode 100644 +index 0000000..f9ee198 +--- /dev/null ++++ b/src/util/bot/sde-linux64.tar.xz.sha1 +@@ -0,0 +1 @@ ++8bba6e01a47b2cfd9e7429f77256db540031ff43 +\ No newline at end of file +diff --git a/src/util/bot/sde-win32.tar.bz2.sha1 b/src/util/bot/sde-win32.tar.bz2.sha1 +deleted file mode 100644 +index b960747..0000000 +--- a/src/util/bot/sde-win32.tar.bz2.sha1 ++++ /dev/null +@@ -1 +0,0 @@ +-cc2d77ff4a221165a8bb13f43ccfbff6550b90c8 +\ No newline at end of file +diff --git a/src/util/bot/sde-win32.tar.xz.sha1 b/src/util/bot/sde-win32.tar.xz.sha1 +new file mode 100644 +index 0000000..dbaf87f +--- /dev/null ++++ b/src/util/bot/sde-win32.tar.xz.sha1 +@@ -0,0 +1 @@ ++59ef225031e14e5ac257ada61d416f6ea0c9c080 +\ No newline at end of file +diff --git a/src/util/doc.go b/src/util/doc.go +index a38e078..651998e 100644 +--- a/src/util/doc.go ++++ b/src/util/doc.go +@@ -503,7 +503,7 @@ func firstSentence(paragraphs []string) string { + + // markupPipeWords converts |s| into an HTML string, safe to be included outside + // a tag, while also marking up words surrounded by |. +-func markupPipeWords(allDecls map[string]string, s string) template.HTML { ++func markupPipeWords(allDecls map[string]string, s string, linkDecls bool) template.HTML { + // It is safe to look for '|' in the HTML-escaped version of |s| + // below. The escaped version cannot include '|' instead tags because + // there are no tags by construction. +@@ -524,12 +524,10 @@ func markupPipeWords(allDecls map[string]string, s string) template.HTML { + if i > 0 && (j == -1 || j > i) { + ret += "" + anchor, isLink := allDecls[s[:i]] +- if isLink { +- ret += fmt.Sprintf("", template.HTMLEscapeString(anchor)) +- } +- ret += s[:i] +- if isLink { +- ret += "" ++ if linkDecls && isLink { ++ ret += fmt.Sprintf("%s", template.HTMLEscapeString(anchor), s[:i]) ++ } else { ++ ret += s[:i] + } + ret += "" + s = s[i+1:] +@@ -602,11 +600,12 @@ func generate(outPath string, config *Config) (map[string]string, error) { + + headerTmpl := template.New("headerTmpl") + headerTmpl.Funcs(template.FuncMap{ +- "firstSentence": firstSentence, +- "markupPipeWords": func(s string) template.HTML { return markupPipeWords(allDecls, s) }, +- "markupFirstWord": markupFirstWord, +- "markupRFC": markupRFC, +- "newlinesToBR": newlinesToBR, ++ "firstSentence": firstSentence, ++ "markupPipeWords": func(s string) template.HTML { return markupPipeWords(allDecls, s, true /* linkDecls */) }, ++ "markupPipeWordsNoLink": func(s string) template.HTML { return markupPipeWords(allDecls, s, false /* linkDecls */) }, ++ "markupFirstWord": markupFirstWord, ++ "markupRFC": markupRFC, ++ "newlinesToBR": newlinesToBR, + }) + headerTmpl, err := headerTmpl.Parse(` + +@@ -623,12 +622,12 @@ func generate(outPath string, config *Config) (map[string]string, error) { + All headers + + +- {{range .Preamble}}

{{. | markupPipeWords}}

{{end}} ++ {{range .Preamble}}

{{. | markupPipeWords | markupRFC}}

{{end}} + +
    + {{range .Sections}} + {{if not .IsPrivate}} +- {{if .Anchor}}
  1. {{.Preamble | firstSentence | markupPipeWords}}
  2. {{end}} ++ {{if .Anchor}}
  3. {{.Preamble | firstSentence | markupPipeWordsNoLink}}
  4. {{end}} + {{range .Decls}} + {{if .Anchor}}
  5. {{.Name}}
  6. {{end}} + {{end}} +@@ -641,7 +640,7 @@ func generate(outPath string, config *Config) (map[string]string, error) { +
    + {{if .Preamble}} +
    +- {{range .Preamble}}

    {{. | markupPipeWords}}

    {{end}} ++ {{range .Preamble}}

    {{. | markupPipeWords | markupRFC}}

    {{end}} +
    + {{end}} + +diff --git a/src/util/fipstools/CMakeLists.txt b/src/util/fipstools/CMakeLists.txt +new file mode 100644 +index 0000000..6359383 +--- /dev/null ++++ b/src/util/fipstools/CMakeLists.txt +@@ -0,0 +1,12 @@ ++include_directories(../../include) ++ ++if(FIPS) ++ add_executable( ++ test_fips ++ ++ test_fips.c ++ ) ++ ++ add_dependencies(test_fips global_target) ++ target_link_libraries(test_fips crypto) ++endif() +diff --git a/src/util/fipstools/acvp/acvptool/acvp/acvp.go b/src/util/fipstools/acvp/acvptool/acvp/acvp.go +index 04f0932..9419508 100644 +--- a/src/util/fipstools/acvp/acvptool/acvp/acvp.go ++++ b/src/util/fipstools/acvp/acvptool/acvp/acvp.go +@@ -33,6 +33,8 @@ import ( + "time" + ) + ++const loginEndpoint = "acvp/v1/login" ++ + // Server represents an ACVP server. + type Server struct { + // PrefixTokens are access tokens that apply to URLs under a certain prefix. +@@ -239,7 +241,7 @@ func expired(tokenStr string) bool { + if json.Unmarshal(jsonBytes, &token) != nil { + return false + } +- return token.Expiry > 0 && token.Expiry < uint64(time.Now().Unix()) ++ return token.Expiry > 0 && token.Expiry < uint64(time.Now().Add(-10*time.Second).Unix()) + } + + func (server *Server) getToken(endPoint string) (string, error) { +@@ -255,7 +257,7 @@ func (server *Server) getToken(endPoint string) (string, error) { + var reply struct { + AccessToken string `json:"accessToken"` + } +- if err := server.postMessage(&reply, "acvp/v1/login", map[string]string{ ++ if err := server.postMessage(&reply, loginEndpoint, map[string]string{ + "password": server.totpFunc(), + "accessToken": token, + }); err != nil { +@@ -278,7 +280,7 @@ func (server *Server) Login() error { + SizeLimit int64 `json:"sizeConstraint"` + } + +- if err := server.postMessage(&reply, "acvp/v1/login", map[string]string{"password": server.totpFunc()}); err != nil { ++ if err := server.postMessage(&reply, loginEndpoint, map[string]string{"password": server.totpFunc()}); err != nil { + return err + } + +@@ -372,7 +374,7 @@ func (server *Server) newRequestWithToken(method, endpoint string, body io.Reade + if err != nil { + return nil, err + } +- if len(token) != 0 { ++ if len(token) != 0 && endpoint != loginEndpoint { + req.Header.Add("Authorization", "Bearer "+token) + } + return req, nil +diff --git a/src/util/fipstools/break-kat.go b/src/util/fipstools/break-kat.go +new file mode 100644 +index 0000000..b500545 +--- /dev/null ++++ b/src/util/fipstools/break-kat.go +@@ -0,0 +1,89 @@ ++// break-kat corrupts a known-answer-test input in a binary and writes the ++// corrupted binary to stdout. This is used to demonstrate that the KATs in the ++// binary notice the error. ++package main ++ ++import ( ++ "bytes" ++ "encoding/hex" ++ "flag" ++ "fmt" ++ "io/ioutil" ++ "os" ++ "sort" ++) ++ ++var ( ++ kats = map[string]string{ ++ "HMAC-SHA-256": "dad91293dfcf2a7c8ecd13fe353fa75b", ++ "AES-CBC-encrypt": "078609a6c5ac2544699adf682fa377f9be8ab6aef563e8c56a36b84f557fadd3", ++ "AES-CBC-decrypt": "347aa5a024b28257b36510be583d4f47adb7bbeedc6005bbbd0d0a9f06bb7b10", ++ "AES-GCM-encrypt": "8fcc4099808e75caaff582898848a88d808b55ab4e9370797d940be8cc1d7884", ++ "AES-GCM-decrypt": "35f3058f875760ff09d3120f70c4bc9ed7a86872e13452202176f7371ae04faae1dd391920f5d13953d896785994823c", ++ "DRBG": "c4da0740d505f1ee280b95e58c4931ac6de846a0152fbb4a3f174cf4787a4f1a40c2b50babe14aae530be5886d910a27", ++ "DRBG-reseed": "c7161ca36c2309b716e9859bb96c6d49bdc8352103a18cd24ef42ec97ef46bf446eb1a4576c186e9351803763a7912fe", ++ "SHA-1": "132fd9bad5c1826263bafbb699f707a5", ++ "SHA-256": "ff3b857da7236a2baa0f396b51522217", ++ "SHA-512": "212512f8d2ad8322781c6c4d69a9daa1", ++ "TLS-KDF": "abc3657b094c7628a0b282996fe75a75f4984fd94d4ecc2fcf53a2c469a3f731", ++ "RSA-sign": "d2b56e53306f720d7929d8708bf46f1c22300305582b115bedcac722d8aa5ab2", ++ "RSA-verify": "abe2cbc13d6bd39d48db5334ddbf8d070a93bdcb104e2cc5d0ee486ee295f6b31bda126c41890b98b73e70e6b65d82f95c663121755a90744c8d1c21148a1960be0eca446e9ff497f1345c537ef8119b9a4398e95c5c6de2b1c955905c5299d8ce7a3b6ab76380d9babdd15f610237e1f3f2aa1c1f1e770b62fbb596381b2ebdd77ecef9c90d4c92f7b6b05fed2936285fa94826e62055322a33b6f04c74ce69e5d8d737fb838b79d2d48e3daf71387531882531a95ac964d02ea413bf85952982bbc089527daff5b845c9a0f4d14ef1956d9c3acae882d12da66da0f35794f5ee32232333517db9315232a183b991654dbea41615345c885325926744a53915", ++ "ECDSA-sign": "1e35930be860d0942ca7bbd6f6ded87f157e4de24f81ed4b875c0e018e89a81f", ++ "ECDSA-verify": "6780c5fc70275e2c7061a0e7877bb174deadeb9887027f3fa83654158ba7f50c2d36e5799790bfbe2183d33e96f3c51f6a232f2a24488c8e5f64c37ea2cf0529", ++ "Z-computation": "e7604491269afb5b102d6ea52cb59feb70aede6ce3bfb3e0105485abd861d77b", ++ "FFDH": "a14f8ad36be37b18b8f35864392f150ab7ee22c47e1870052a3f17918274af18aaeaf4cf6aacfde96c9d586eb7ebaff6b03fe3b79a8e2ff9dd6df34caaf2ac70fd3771d026b41a561ee90e4337d0575f8a0bd160c868e7e3cef88aa1d88448b1e4742ba11480a9f8a8b737347c408d74a7d57598c48875629df0c85327a124ddec1ad50cd597a985588434ce19c6f044a1696b5f244b899b7e77d4f6f20213ae8eb15d37eb8e67e6c8bdbc4fd6e17426283da96f23a897b210058c7c70fb126a5bf606dbeb1a6d5cca04184c4e95c2e8a70f50f5c1eabd066bd79c180456316ac02d366eb3b0e7ba82fb70dcbd737ca55734579dd250fffa8e0584be99d32b35", ++ } ++ ++ listTests = flag.Bool("list-tests", false, "List known test values and exit") ++) ++ ++func main() { ++ flag.Parse() ++ ++ if *listTests { ++ for _, kat := range sortedKATs() { ++ fmt.Println(kat) ++ } ++ os.Exit(0) ++ } ++ ++ if flag.NArg() != 2 || kats[flag.Arg(1)] == "" { ++ fmt.Fprintln(os.Stderr, "Usage: break-kat > output") ++ fmt.Fprintln(os.Stderr, "Possible values for :") ++ for _, kat := range sortedKATs() { ++ fmt.Fprintln(os.Stderr, " ", kat) ++ } ++ os.Exit(1) ++ } ++ ++ inPath := flag.Arg(0) ++ test := flag.Arg(1) ++ testInputValue, err := hex.DecodeString(kats[test]) ++ if err != nil { ++ panic("invalid kat data: " + err.Error()) ++ } ++ ++ binaryContents, err := ioutil.ReadFile(inPath) ++ if err != nil { ++ fmt.Fprintln(os.Stderr, err) ++ os.Exit(2) ++ } ++ ++ i := bytes.Index(binaryContents, testInputValue) ++ if i < 0 { ++ fmt.Fprintln(os.Stderr, "Expected test input value was not found in binary.") ++ os.Exit(3) ++ } ++ ++ binaryContents[i] ^= 1 ++ os.Stdout.Write(binaryContents) ++} ++ ++func sortedKATs() []string { ++ var ret []string ++ for kat := range kats { ++ ret = append(ret, kat) ++ } ++ sort.Strings(ret) ++ return ret ++} +diff --git a/src/util/fipstools/break-tests-android.sh b/src/util/fipstools/break-tests-android.sh +deleted file mode 100644 +index efb166e..0000000 +--- a/src/util/fipstools/break-tests-android.sh ++++ /dev/null +@@ -1,117 +0,0 @@ +-# Copyright (c) 2019, Google Inc. +-# +-# Permission to use, copy, modify, and/or distribute this software for any +-# purpose with or without fee is hereby granted, provided that the above +-# copyright notice and this permission notice appear in all copies. +-# +-# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +-# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +-# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +-# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +-# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +-# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-# This script exists to exercise breaking each of the FIPS tests on an Android +-# device. Since, on Android, BoringCrypto exists in both 32- and 64-bit +-# versions, the first argument must be either "32" or "64" to select which is +-# being tested. The Android source tree must have been setup (with "lunch") for +-# a matching build configuration before using this script to build the +-# binaries. (Although it'll fail non-silently if there's a mismatch.) +-# +-# Since each test needs the FIPS module to be compiled differently, and that +-# can take a long time, this script is run twice: once with "build" as the +-# second argument to run the builds, and then with "run" as the second argument +-# to run each test. +-# +-# Run it with /bin/bash, not /bin/sh, otherwise "read" may fail. +-# +-# In order to reconfigure the build for each test, it needs to set a define. It +-# does so by rewriting a template in external/boringssl/Android.bp and you must +-# add the template value before doing the builds. To do so, insert +-# -DBORINGSSL_FIPS_BREAK_XXX=1 in the cflags list for the module, probably by +-# putting it in the "boringssl_flags" stanza. +- +-set -x +-set -e +- +-if [ ! -f external/boringssl/Android.bp ]; then +- echo "Must be run from the top-level of an Android source tree." +- exit 1 +-fi +- +-. build/envsetup.sh +- +-TESTS="NONE ECDSA_PWCT CRNG RSA_PWCT AES_CBC AES_GCM DES SHA_1 SHA_256 SHA_512 RSA_SIG DRBG ECDSA_SIG Z_COMPUTATION TLS_KDF FFC_DH" +- +-if [ "x$1" = "x32" ]; then +- lib="lib" +- bits="32" +-elif [ "x$1" = "x64" ] ; then +- lib="lib64" +- bits="64" +-else +- echo "First argument must be 32 or 64" +- exit 1 +-fi +- +-if [ "x$2" = "xbuild" ]; then +- if ! grep -q DBORINGSSL_FIPS_BREAK_XXX=1 external/boringssl/Android.bp; then +- echo "Missing DBORINGSSL_FIPS_BREAK_XXX in external/boringssl/Android.bp. Edit the file and insert -DBORINGSSL_FIPS_BREAK_XXX=1 in the cflags for the FIPS module" +- exit 1 +- fi +- +- printf "\\x1b[1mBuilding modules\\x1b[0m\n" +- for test in $TESTS; do +- printf "\\x1b[1mBuilding for ${test}\\x1b[0m\n" +- cp external/boringssl/Android.bp external/boringssl/Android.bp.orig +- sed -i -e "s/DBORINGSSL_FIPS_BREAK_XXX/DBORINGSSL_FIPS_BREAK_${test}/" external/boringssl/Android.bp +- m test_fips +- dir=test-${bits}-${test} +- rm -Rf $dir +- mkdir $dir +- cp ${ANDROID_PRODUCT_OUT}/system/${lib}/libcrypto.so $dir +- cp ${ANDROID_PRODUCT_OUT}/system/bin/test_fips $dir +- if [ $bits = "32" ] ; then +- if ! file ${dir}/test_fips | grep -q "32-bit" ; then +- echo "32-bit build requested but binaries don't appear to be 32-bit:" +- file ${dir}/test_fips +- exit 1 +- fi +- else +- if ! file ${dir}/test_fips | grep -q "64-bit" ; then +- echo "64-bit build requested but binaries don't appear to be 64-bit:" +- file ${dir}/test_fips +- exit 1 +- fi +- fi +- cp external/boringssl/Android.bp.orig external/boringssl/Android.bp +- done +-elif [ "x$2" = "xrun" ]; then +- printf "\\x1b[1mTesting\\x1b[0m\n" +- for test in $TESTS; do +- dir=test-${bits}-${test} +- if [ ! '(' -d ${dir} -a -f ${dir}/test_fips -a -f ${dir}/libcrypto.so ')' ] ; then +- echo "Build directory ${dir} is missing or is missing files" +- exit 1 +- fi +- adb push ${dir}/* /data/local/tmp +- printf "\\x1b[1mTesting ${test}\\x1b[0m\n" +- adb shell -n -t -x LD_LIBRARY_PATH=/data/local/tmp /data/local/tmp/test_fips +- read +- done +- +- printf "\\x1b[1mTesting integrity}\\x1b[0m\n" +- src=test-${bits}-NONE +- dir=test-${bits}-INT +- rm -Rf $dir +- mkdir $dir +- go run external/boringssl/src/util/fipstools/break-hash.go ${src}/libcrypto.so ${dir}/libcrypto.so +- cp ${src}/test_fips $dir +- adb push ${dir}/* /data/local/tmp +- adb shell -n -t -x LD_LIBRARY_PATH=/data/local/tmp /data/local/tmp/test_fips +- read +-else +- echo "Second argument must be build or run" +- exit 1 +-fi +diff --git a/src/util/fipstools/break-tests.sh b/src/util/fipstools/break-tests.sh +deleted file mode 100644 +index 84c24ee..0000000 +--- a/src/util/fipstools/break-tests.sh ++++ /dev/null +@@ -1,53 +0,0 @@ +-# Copyright (c) 2018, Google Inc. +-# +-# Permission to use, copy, modify, and/or distribute this software for any +-# purpose with or without fee is hereby granted, provided that the above +-# copyright notice and this permission notice appear in all copies. +-# +-# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +-# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +-# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +-# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +-# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +-# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +-# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-# This script exists to exercise breaking each of the FIPS tests. It builds +-# BoringSSL differently for each test and that can take a long time. Thus it's +-# run twice: once, from a BoringSSL source tree, with "build" as the sole +-# argument to run the builds, and then (from the same location) with no +-# arguments to run each script. +-# +-# Run it with /bin/bash, not /bin/sh, otherwise "read" may fail. +- +-set -x +- +-TESTS="NONE ECDSA_PWCT CRNG RSA_PWCT AES_CBC AES_GCM DES SHA_1 SHA_256 SHA_512 RSA_SIG DRBG ECDSA_SIG Z_COMPUTATION TLS_KDF FFC_DH" +- +-if [ "x$1" = "xbuild" ]; then +- for test in $TESTS; do +- rm -Rf build-$test +- mkdir build-$test +- pushd build-$test +- cmake -GNinja -DCMAKE_TOOLCHAIN_FILE=${HOME}/toolchain -DFIPS=1 -DFIPS_BREAK_TEST=${test} -DCMAKE_BUILD_TYPE=Release .. +- ninja test_fips +- popd +- done +- +- exit 0 +-fi +- +-for test in $TESTS; do +- pushd build-$test +- printf "\n\n\\x1b[1m$test\\x1b[0m\n" +- ./util/fipstools/cavp/test_fips +- echo "Waiting for keypress..." +- read +- popd +-done +- +-pushd build-NONE +-printf "\\x1b[1mIntegrity\\x1b[0m\n" +-go run ../util/fipstools/break-hash.go ./util/fipstools/cavp/test_fips ./util/fipstools/cavp/test_fips_broken +-./util/fipstools/cavp/test_fips_broken +-popd +diff --git a/src/util/fipstools/cavp/CMakeLists.txt b/src/util/fipstools/cavp/CMakeLists.txt +deleted file mode 100644 +index a50c9ab..0000000 +--- a/src/util/fipstools/cavp/CMakeLists.txt ++++ /dev/null +@@ -1,42 +0,0 @@ +-include_directories(../../../include) +- +-if(FIPS) +- add_executable( +- cavp +- +- cavp_main.cc +- +- cavp_aes_gcm_test.cc +- cavp_aes_test.cc +- cavp_ctr_drbg_test.cc +- cavp_ecdsa2_keypair_test.cc +- cavp_ecdsa2_pkv_test.cc +- cavp_ecdsa2_siggen_test.cc +- cavp_ecdsa2_sigver_test.cc +- cavp_hmac_test.cc +- cavp_kas_test.cc +- cavp_keywrap_test.cc +- cavp_rsa2_keygen_test.cc +- cavp_rsa2_siggen_test.cc +- cavp_rsa2_sigver_test.cc +- cavp_sha_monte_test.cc +- cavp_sha_test.cc +- cavp_tdes_test.cc +- cavp_tlskdf_test.cc +- +- cavp_test_util.cc +- ) +- +- add_dependencies(cavp global_target) +- +- add_executable( +- test_fips +- +- test_fips.c +- ) +- +- add_dependencies(test_fips global_target) +- +- target_link_libraries(cavp test_support_lib crypto) +- target_link_libraries(test_fips test_support_lib crypto) +-endif() +diff --git a/src/util/fipstools/cavp/cavp_aes_gcm_test.cc b/src/util/fipstools/cavp/cavp_aes_gcm_test.cc +deleted file mode 100644 +index 6ee991d..0000000 +--- a/src/util/fipstools/cavp/cavp_aes_gcm_test.cc ++++ /dev/null +@@ -1,166 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_aes_gcm_test processes a NIST CAVP AES GCM test vector request file and +-// emits the corresponding response. +- +-#include +- +-#include +-#include +-#include +-#include +- +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +- +-namespace { +- +-struct TestCtx { +- const EVP_AEAD *aead; +-}; +- +-} +- +-static const EVP_AEAD *GetAEAD(const std::string &name, const bool enc) { +- if (name == "aes-128-gcm") { +- return EVP_aead_aes_128_gcm(); +- } else if (name == "aes-192-gcm") { +- return EVP_aead_aes_192_gcm(); +- } else if (name == "aes-256-gcm") { +- return EVP_aead_aes_256_gcm(); +- } +- return nullptr; +-} +- +-static bool TestAEADEncrypt(FileTest *t, void *arg) { +- TestCtx *ctx = reinterpret_cast(arg); +- +- std::string key_len_str, iv_len_str, pt_len_str, aad_len_str, tag_len_str; +- if (!t->GetInstruction(&key_len_str, "Keylen") || +- !t->GetInstruction(&iv_len_str, "IVlen") || +- !t->GetInstruction(&pt_len_str, "PTlen") || +- !t->GetInstruction(&aad_len_str, "AADlen") || +- !t->GetInstruction(&tag_len_str, "Taglen")) { +- return false; +- } +- +- std::string count; +- std::vector key, iv, pt, aad, tag, ct; +- if (!t->GetAttribute(&count, "Count") || +- !t->GetBytes(&key, "Key") || +- !t->GetBytes(&iv, "IV") || +- !t->GetBytes(&pt, "PT") || +- !t->GetBytes(&aad, "AAD") || +- key.size() * 8 != strtoul(key_len_str.c_str(), nullptr, 0) || +- iv.size() * 8 != strtoul(iv_len_str.c_str(), nullptr, 0) || +- pt.size() * 8 != strtoul(pt_len_str.c_str(), nullptr, 0) || +- aad.size() * 8 != strtoul(aad_len_str.c_str(), nullptr, 0) || +- iv.size() != 12) { +- return false; +- } +- +- const size_t tag_len = strtoul(tag_len_str.c_str(), nullptr, 0) / 8; +- if (!AEADEncrypt(ctx->aead, &ct, &tag, tag_len, key, pt, aad, iv)) { +- return false; +- } +- printf("%s", t->CurrentTestToString().c_str()); +- printf("CT = %s\r\n", EncodeHex(ct).c_str()); +- printf("Tag = %s\r\n\r\n", EncodeHex(tag).c_str()); +- +- return true; +-} +- +-static bool TestAEADDecrypt(FileTest *t, void *arg) { +- TestCtx *ctx = reinterpret_cast(arg); +- +- std::string key_len, iv_len, pt_len_str, aad_len_str, tag_len; +- if (!t->GetInstruction(&key_len, "Keylen") || +- !t->GetInstruction(&iv_len, "IVlen") || +- !t->GetInstruction(&pt_len_str, "PTlen") || +- !t->GetInstruction(&aad_len_str, "AADlen") || +- !t->GetInstruction(&tag_len, "Taglen")) { +- t->PrintLine("Invalid instruction block."); +- return false; +- } +- size_t aad_len = strtoul(aad_len_str.c_str(), nullptr, 0) / 8; +- size_t pt_len = strtoul(pt_len_str.c_str(), nullptr, 0) / 8; +- +- std::string count; +- std::vector key, iv, ct, aad, tag, pt; +- if (!t->GetAttribute(&count, "Count") || +- !t->GetBytes(&key, "Key") || +- !t->GetBytes(&aad, "AAD") || +- !t->GetBytes(&tag, "Tag") || +- !t->GetBytes(&iv, "IV") || +- !t->GetBytes(&ct, "CT") || +- key.size() * 8 != strtoul(key_len.c_str(), nullptr, 0) || +- iv.size() * 8 != strtoul(iv_len.c_str(), nullptr, 0) || +- ct.size() != pt_len || +- aad.size() != aad_len || +- tag.size() * 8 != strtoul(tag_len.c_str(), nullptr, 0)) { +- t->PrintLine("Invalid test case"); +- return false; +- } +- +- printf("%s", t->CurrentTestToString().c_str()); +- bool aead_result = +- AEADDecrypt(ctx->aead, &pt, pt_len, key, aad, ct, tag, iv); +- if (aead_result) { +- printf("PT = %s\r\n\r\n", EncodeHex(pt).c_str()); +- } else { +- printf("FAIL\r\n\r\n"); +- } +- +- return true; +-} +- +-static int usage(char *arg) { +- fprintf(stderr, "usage: %s (enc|dec) \n", arg); +- return 1; +-} +- +-int cavp_aes_gcm_test_main(int argc, char **argv) { +- if (argc != 4) { +- return usage(argv[0]); +- } +- +- const std::string mode(argv[1]); +- bool (*test_fn)(FileTest * t, void *arg); +- if (mode == "enc") { +- test_fn = &TestAEADEncrypt; +- } else if (mode == "dec") { +- test_fn = &TestAEADDecrypt; +- } else { +- return usage(argv[0]); +- } +- +- const EVP_AEAD *aead = GetAEAD(argv[2], mode == "enc"); +- if (aead == nullptr) { +- fprintf(stderr, "invalid aead: %s\n", argv[2]); +- return 1; +- } +- +- TestCtx ctx = {aead}; +- +- FileTest::Options opts; +- opts.path = argv[3]; +- opts.callback = test_fn; +- opts.arg = &ctx; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_aes_test.cc b/src/util/fipstools/cavp/cavp_aes_test.cc +deleted file mode 100644 +index d1f49b4..0000000 +--- a/src/util/fipstools/cavp/cavp_aes_test.cc ++++ /dev/null +@@ -1,225 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_aes_test processes a NIST CAVP AES test vector request file and emits +-// the corresponding response. +- +-#include +- +-#include +-#include +-#include +- +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +- +-namespace { +- +-struct TestCtx { +- const EVP_CIPHER *cipher; +- bool has_iv; +- enum Mode { +- kKAT, // Known Answer Test +- kMCT, // Monte Carlo Test +- }; +- Mode mode; +-}; +- +-} +- +-static bool MonteCarlo(const TestCtx *ctx, FileTest *t, +- const EVP_CIPHER *cipher, std::vector *out, +- bool encrypt, std::vector key, +- std::vector iv, std::vector in) { +- const std::string in_label = encrypt ? "PLAINTEXT" : "CIPHERTEXT", +- result_label = encrypt ? "CIPHERTEXT" : "PLAINTEXT"; +- std::vector prev_result, result, prev_in; +- for (int i = 0; i < 100; i++) { +- printf("COUNT = %d\r\nKEY = %s\r\n", i, EncodeHex(key).c_str()); +- if (ctx->has_iv) { +- printf("IV = %s\r\n", EncodeHex(iv).c_str()); +- } +- printf("%s = %s\r\n", in_label.c_str(), EncodeHex(in).c_str()); +- +- if (!ctx->has_iv) { // ECB mode +- for (int j = 0; j < 1000; j++) { +- prev_result = result; +- if (!CipherOperation(cipher, &result, encrypt, key, iv, in)) { +- return false; +- } +- in = result; +- } +- } else { +- for (int j = 0; j < 1000; j++) { +- prev_result = result; +- if (j > 0) { +- if (encrypt) { +- iv = result; +- } else { +- iv = prev_in; +- } +- } +- +- if (!CipherOperation(cipher, &result, encrypt, key, iv, in)) { +- return false; +- } +- +- prev_in = in; +- +- if (j == 0) { +- in = iv; +- } else { +- in = prev_result; +- } +- } +- } +- +- printf("%s = %s\r\n\r\n", result_label.c_str(), EncodeHex(result).c_str()); +- +- const size_t key_len = key.size() * 8; +- if (key_len == 128) { +- for (size_t k = 0; k < key.size(); k++) { +- key[k] ^= result[k]; +- } +- } else if (key_len == 192) { +- for (size_t k = 0; k < key.size(); k++) { +- // Key[i+1] = Key[i] xor (last 64-bits of CT[j-1] || CT[j]) +- if (k < 8) { +- key[k] ^= prev_result[prev_result.size() - 8 + k]; +- } else { +- key[k] ^= result[k - 8]; +- } +- } +- } else { // key_len == 256 +- for (size_t k = 0; k < key.size(); k++) { +- // Key[i+1] = Key[i] xor (CT[j-1] || CT[j]) +- if (k < 16) { +- key[k] ^= prev_result[k]; +- } else { +- key[k] ^= result[k - 16]; +- } +- } +- } +- +- if (ctx->has_iv) { +- iv = result; +- in = prev_result; +- } else { +- in = result; +- } +- } +- +- return true; +-} +- +-static bool TestCipher(FileTest *t, void *arg) { +- TestCtx *ctx = reinterpret_cast(arg); +- +- if (t->HasInstruction("ENCRYPT") == t->HasInstruction("DECRYPT")) { +- t->PrintLine("Want either ENCRYPT or DECRYPT"); +- return false; +- } +- enum { +- kEncrypt, +- kDecrypt, +- } operation = t->HasInstruction("ENCRYPT") ? kEncrypt : kDecrypt; +- +- std::string count; +- std::vector key, iv, in, result; +- if (!t->GetAttribute(&count, "COUNT") || +- !t->GetBytes(&key, "KEY") || +- (ctx->has_iv && !t->GetBytes(&iv, "IV"))) { +- return false; +- } +- +- const EVP_CIPHER *cipher = ctx->cipher; +- if (operation == kEncrypt) { +- if (!t->GetBytes(&in, "PLAINTEXT")) { +- return false; +- } +- } else { // operation == kDecrypt +- if (!t->GetBytes(&in, "CIPHERTEXT")) { +- return false; +- } +- } +- +- if (ctx->mode == TestCtx::kKAT) { +- if (!CipherOperation(cipher, &result, operation == kEncrypt, key, iv, in)) { +- return false; +- } +- const std::string label = +- operation == kEncrypt ? "CIPHERTEXT" : "PLAINTEXT"; +- printf("%s%s = %s\r\n\r\n", t->CurrentTestToString().c_str(), label.c_str(), +- EncodeHex(result).c_str()); +- } else { // ctx->mode == kMCT +- const std::string op_label = +- operation == kEncrypt ? "[ENCRYPT]" : "[DECRYPT]"; +- printf("%s\r\n\r\n", op_label.c_str()); +- if (!MonteCarlo(ctx, t, cipher, &result, operation == kEncrypt, key, iv, +- in)) { +- return false; +- } +- if (operation == kEncrypt) { +- // MCT tests contain a stray blank line after the ENCRYPT section. +- printf("\r\n"); +- } +- } +- +- return true; +-} +- +-static int usage(char *arg) { +- fprintf(stderr, "usage: %s (kat|mct) \n", arg); +- return 1; +-} +- +-int cavp_aes_test_main(int argc, char **argv) { +- if (argc != 4) { +- return usage(argv[0]); +- } +- +- const std::string tm(argv[1]); +- enum TestCtx::Mode test_mode; +- if (tm == "kat") { +- test_mode = TestCtx::kKAT; +- } else if (tm == "mct") { +- test_mode = TestCtx::kMCT; +- } else { +- fprintf(stderr, "invalid test_mode: %s\n", tm.c_str()); +- return usage(argv[0]); +- } +- +- const std::string cipher_name(argv[2]); +- const EVP_CIPHER *cipher = GetCipher(argv[2]); +- if (cipher == nullptr) { +- fprintf(stderr, "invalid cipher: %s\n", argv[2]); +- return 1; +- } +- const bool has_iv = +- (cipher_name != "aes-128-ecb" && +- cipher_name != "aes-192-ecb" && +- cipher_name != "aes-256-ecb"); +- +- TestCtx ctx = {cipher, has_iv, test_mode}; +- +- FileTest::Options opts; +- opts.path = argv[3]; +- opts.callback = TestCipher; +- opts.arg = &ctx; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_ctr_drbg_test.cc b/src/util/fipstools/cavp/cavp_ctr_drbg_test.cc +deleted file mode 100644 +index a27736e..0000000 +--- a/src/util/fipstools/cavp/cavp_ctr_drbg_test.cc ++++ /dev/null +@@ -1,106 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_ctr_drbg_test processes a NIST CAVP DRBG800-90A test vector request +-// file and emits the corresponding response. +- +-#include +- +-#include +- +-#include "cavp_test_util.h" +-#include "../crypto/fipsmodule/rand/internal.h" +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +- +- +-static bool TestCTRDRBG(FileTest *t, void *arg) { +- std::string test_type, prediction_resistance, entropy_input_len, nonce_len, +- personalization_str_len, additional_input_len, returned_bits_len; +- if (!t->GetInstruction(&test_type, "AES-256 no df") || +- !t->GetInstruction(&prediction_resistance, "PredictionResistance") || +- !t->GetInstruction(&entropy_input_len, "EntropyInputLen") || +- !t->GetInstruction(&nonce_len, "NonceLen") || +- !t->GetInstruction(&personalization_str_len, +- "PersonalizationStringLen") || +- !t->GetInstruction(&additional_input_len, "AdditionalInputLen") || +- !t->GetInstruction(&returned_bits_len, "ReturnedBitsLen") || +- !test_type.empty() || +- prediction_resistance != "False" || +- strtoul(entropy_input_len.c_str(), nullptr, 0) != +- CTR_DRBG_ENTROPY_LEN * 8 || +- nonce_len != "0") { +- return false; +- } +- +- std::string count; +- std::vector entropy, nonce, personalization_str, ai1, ai2; +- if (!t->GetAttribute(&count, "COUNT") || +- !t->GetBytes(&entropy, "EntropyInput") || +- !t->GetBytes(&nonce, "Nonce") || +- !t->GetBytes(&personalization_str, "PersonalizationString") || +- !t->GetBytes(&ai1, "AdditionalInput") || +- !t->GetBytes(&ai2, "AdditionalInput/2") || +- entropy.size() * 8 != strtoul(entropy_input_len.c_str(), nullptr, 0) || +- nonce.size() != 0 || +- personalization_str.size() * 8 != +- strtoul(personalization_str_len.c_str(), nullptr, 0) || +- ai1.size() != ai2.size() || +- ai1.size() * 8 != strtoul(additional_input_len.c_str(), nullptr, 0)) { +- return false; +- } +- +- CTR_DRBG_STATE drbg; +- CTR_DRBG_init(&drbg, entropy.data(), +- personalization_str.size() > 0 ? personalization_str.data() +- : nullptr, +- personalization_str.size()); +- +- uint64_t out_len = strtoul(returned_bits_len.c_str(), nullptr, 0); +- if (out_len == 0 || (out_len & 7) != 0) { +- return false; +- } +- out_len /= 8; +- +- std::vector out; +- out.resize(out_len); +- +- CTR_DRBG_generate(&drbg, out.data(), out.size(), +- ai1.size() > 0 ? ai1.data() : nullptr, ai1.size()); +- CTR_DRBG_generate(&drbg, out.data(), out.size(), +- ai2.size() > 0 ? ai2.data() : nullptr, ai2.size()); +- +- printf("%s", t->CurrentTestToString().c_str()); +- printf("ReturnedBits = %s\r\n\r\n", EncodeHex(out).c_str()); +- +- return true; +-} +- +-static int usage(char *arg) { +- fprintf(stderr, "usage: %s \n", arg); +- return 1; +-} +- +-int cavp_ctr_drbg_test_main(int argc, char **argv) { +- if (argc != 2) { +- return usage(argv[0]); +- } +- +- FileTest::Options opts; +- opts.path = argv[1]; +- opts.callback = TestCTRDRBG; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_ecdsa2_keypair_test.cc b/src/util/fipstools/cavp/cavp_ecdsa2_keypair_test.cc +deleted file mode 100644 +index f8c4a01..0000000 +--- a/src/util/fipstools/cavp/cavp_ecdsa2_keypair_test.cc ++++ /dev/null +@@ -1,92 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_ecdsa2_keypair_test processes a NIST CAVP ECDSA2 KeyPair test vector +-// request file and emits the corresponding response. +- +-#include +- +-#include +- +-#include +-#include +-#include +-#include +-#include +- +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +- +-static bool TestECDSA2KeyPair(FileTest *t, void *arg) { +- std::string n_str; +- const char *group_str; +- int nid = GetECGroupNIDFromInstruction(t, &group_str); +- if (nid == NID_undef || +- !t->GetAttribute(&n_str, "N")) { +- return false; +- } +- +- // Don't use CurrentTestToString to avoid printing the N. +- printf( +- "[%s]\r\n\r\n[B.4.2 Key Pair Generation by Testing Candidates]\r\n\r\n", +- group_str); +- +- unsigned long n = strtoul(n_str.c_str(), nullptr, 10); +- for (unsigned long i = 0; i < n; i++) { +- bssl::UniquePtr qx(BN_new()), qy(BN_new()); +- bssl::UniquePtr key(EC_KEY_new_by_curve_name(nid)); +- if (!key || +- !EC_KEY_generate_key_fips(key.get()) || +- !EC_POINT_get_affine_coordinates_GFp(EC_KEY_get0_group(key.get()), +- EC_KEY_get0_public_key(key.get()), +- qx.get(), qy.get(), nullptr)) { +- return false; +- } +- +- size_t degree_len = +- (EC_GROUP_get_degree(EC_KEY_get0_group(key.get())) + 7) / 8; +- size_t order_len = +- BN_num_bytes(EC_GROUP_get0_order(EC_KEY_get0_group(key.get()))); +- std::vector qx_bytes(degree_len), qy_bytes(degree_len); +- std::vector d_bytes(order_len); +- if (!BN_bn2bin_padded(qx_bytes.data(), qx_bytes.size(), qx.get()) || +- !BN_bn2bin_padded(qy_bytes.data(), qy_bytes.size(), qy.get()) || +- !BN_bn2bin_padded(d_bytes.data(), d_bytes.size(), +- EC_KEY_get0_private_key(key.get()))) { +- return false; +- } +- +- printf("d = %s\r\nQx = %s\r\nQy = %s\r\n\r\n", EncodeHex(d_bytes).c_str(), +- EncodeHex(qx_bytes).c_str(), EncodeHex(qy_bytes).c_str()); +- } +- +- return true; +-} +- +-int cavp_ecdsa2_keypair_test_main(int argc, char **argv) { +- if (argc != 2) { +- fprintf(stderr, "usage: %s \n", +- argv[0]); +- return 1; +- } +- +- FileTest::Options opts; +- opts.path = argv[1]; +- opts.callback = TestECDSA2KeyPair; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_ecdsa2_pkv_test.cc b/src/util/fipstools/cavp/cavp_ecdsa2_pkv_test.cc +deleted file mode 100644 +index d823e7a..0000000 +--- a/src/util/fipstools/cavp/cavp_ecdsa2_pkv_test.cc ++++ /dev/null +@@ -1,66 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_ecdsa2_pkv_test processes a NIST CAVP ECDSA2 PKV test vector request file +-// and emits the corresponding response. +- +-#include +- +-#include +-#include +-#include +-#include +-#include +- +-#include "../crypto/test/file_test.h" +-#include "cavp_test_util.h" +- +- +-static bool TestECDSA2PKV(FileTest *t, void *arg) { +- int nid = GetECGroupNIDFromInstruction(t); +- if (nid == NID_undef) { +- return false; +- } +- bssl::UniquePtr key(EC_KEY_new_by_curve_name(nid)); +- bssl::UniquePtr qx = GetBIGNUM(t, "Qx"); +- bssl::UniquePtr qy = GetBIGNUM(t, "Qy"); +- if (!key || !qx || !qy) { +- return false; +- } +- +- if (EC_KEY_set_public_key_affine_coordinates(key.get(), qx.get(), qy.get())) { +- printf("%sResult = P\r\n\r\n", t->CurrentTestToString().c_str()); +- } else { +- char buf[256]; +- ERR_error_string_n(ERR_get_error(), buf, sizeof(buf)); +- printf("%sResult = F (%s)\r\n\r\n", t->CurrentTestToString().c_str(), buf); +- } +- ERR_clear_error(); +- return true; +-} +- +-int cavp_ecdsa2_pkv_test_main(int argc, char **argv) { +- if (argc != 2) { +- fprintf(stderr, "usage: %s \n", +- argv[0]); +- return 1; +- } +- +- FileTest::Options opts; +- opts.path = argv[1]; +- opts.callback = TestECDSA2PKV; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_ecdsa2_siggen_test.cc b/src/util/fipstools/cavp/cavp_ecdsa2_siggen_test.cc +deleted file mode 100644 +index 1282eaa..0000000 +--- a/src/util/fipstools/cavp/cavp_ecdsa2_siggen_test.cc ++++ /dev/null +@@ -1,123 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_ecdsa2_siggen_test processes NIST CAVP ECDSA2 SigGen and +-// SigGenComponent test vector request files and emits the corresponding +-// response. +- +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include "../crypto/internal.h" +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +- +-static bool TestECDSA2SigGenImpl(FileTest *t, bool is_component) { +- int nid = GetECGroupNIDFromInstruction(t); +- const EVP_MD *md = GetDigestFromInstruction(t); +- if (nid == NID_undef || md == nullptr) { +- return false; +- } +- bssl::UniquePtr qx(BN_new()), qy(BN_new()); +- bssl::UniquePtr key(EC_KEY_new_by_curve_name(nid)); +- std::vector msg; +- if (!qx || !qy || !key || +- !EC_KEY_generate_key_fips(key.get()) || +- !EC_POINT_get_affine_coordinates_GFp(EC_KEY_get0_group(key.get()), +- EC_KEY_get0_public_key(key.get()), +- qx.get(), qy.get(), nullptr) || +- !t->GetBytes(&msg, "Msg")) { +- return false; +- } +- +- uint8_t digest[EVP_MAX_MD_SIZE]; +- unsigned digest_len; +- if (is_component) { +- if (msg.size() != EVP_MD_size(md)) { +- t->PrintLine("Bad input length."); +- return false; +- } +- digest_len = EVP_MD_size(md); +- OPENSSL_memcpy(digest, msg.data(), msg.size()); +- } else if (!EVP_Digest(msg.data(), msg.size(), digest, &digest_len, md, +- nullptr)) { +- return false; +- } +- +- bssl::UniquePtr sig(ECDSA_do_sign(digest, digest_len, key.get())); +- if (!sig) { +- return false; +- } +- +- size_t degree_len = +- (EC_GROUP_get_degree(EC_KEY_get0_group(key.get())) + 7) / 8; +- size_t order_len = +- BN_num_bytes(EC_GROUP_get0_order(EC_KEY_get0_group(key.get()))); +- std::vector qx_bytes(degree_len), qy_bytes(degree_len); +- std::vector r_bytes(order_len), s_bytes(order_len); +- if (!BN_bn2bin_padded(qx_bytes.data(), qx_bytes.size(), qx.get()) || +- !BN_bn2bin_padded(qy_bytes.data(), qy_bytes.size(), qy.get()) || +- !BN_bn2bin_padded(r_bytes.data(), r_bytes.size(), sig->r) || +- !BN_bn2bin_padded(s_bytes.data(), s_bytes.size(), sig->s)) { +- return false; +- } +- +- printf("%sQx = %s\r\nQy = %s\r\nR = %s\r\nS = %s\r\n\r\n", +- t->CurrentTestToString().c_str(), EncodeHex(qx_bytes).c_str(), +- EncodeHex(qy_bytes).c_str(), EncodeHex(r_bytes).c_str(), +- EncodeHex(s_bytes).c_str()); +- return true; +-} +- +-static bool TestECDSA2SigGen(FileTest *t, void *arg) { +- return TestECDSA2SigGenImpl(t, false); +-} +- +-static bool TestECDSA2SigGenComponent(FileTest *t, void *arg) { +- return TestECDSA2SigGenImpl(t, true); +-} +- +-int cavp_ecdsa2_siggen_test_main(int argc, char **argv) { +- if (argc != 3) { +- fprintf(stderr, "usage: %s (SigGen|SigGenComponent) \n", +- argv[0]); +- return 1; +- } +- +- static bool (*test_func)(FileTest *, void *); +- if (strcmp(argv[1], "SigGen") == 0) { +- test_func = TestECDSA2SigGen; +- } else if (strcmp(argv[1], "SigGenComponent") == 0) { +- test_func = TestECDSA2SigGenComponent; +- } else { +- fprintf(stderr, "Unknown test type: %s\n", argv[1]); +- return 1; +- } +- +- FileTest::Options opts; +- opts.path = argv[2]; +- opts.callback = test_func; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_ecdsa2_sigver_test.cc b/src/util/fipstools/cavp/cavp_ecdsa2_sigver_test.cc +deleted file mode 100644 +index f3fd4b1..0000000 +--- a/src/util/fipstools/cavp/cavp_ecdsa2_sigver_test.cc ++++ /dev/null +@@ -1,84 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_ecdsa2_sigver_test processes a NIST CAVP ECDSA2 SigVer test vector +-// request file and emits the corresponding response. +- +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include "../crypto/test/file_test.h" +-#include "cavp_test_util.h" +- +- +-static bool TestECDSA2SigVer(FileTest *t, void *arg) { +- int nid = GetECGroupNIDFromInstruction(t); +- const EVP_MD *md = GetDigestFromInstruction(t); +- if (nid == NID_undef || md == nullptr) { +- return false; +- } +- bssl::UniquePtr sig(ECDSA_SIG_new()); +- bssl::UniquePtr key(EC_KEY_new_by_curve_name(nid)); +- bssl::UniquePtr qx = GetBIGNUM(t, "Qx"); +- bssl::UniquePtr qy = GetBIGNUM(t, "Qy"); +- bssl::UniquePtr r = GetBIGNUM(t, "R"); +- bssl::UniquePtr s = GetBIGNUM(t, "S"); +- std::vector msg; +- uint8_t digest[EVP_MAX_MD_SIZE]; +- unsigned digest_len; +- if (!sig || !key || !qx || !qy || !r || !s || +- !EC_KEY_set_public_key_affine_coordinates(key.get(), qx.get(), +- qy.get()) || +- !t->GetBytes(&msg, "Msg") || +- !EVP_Digest(msg.data(), msg.size(), digest, &digest_len, md, nullptr)) { +- return false; +- } +- +- BN_free(sig->r); +- sig->r = r.release(); +- BN_free(sig->s); +- sig->s = s.release(); +- +- if (ECDSA_do_verify(digest, digest_len, sig.get(), key.get())) { +- printf("%sResult = P\r\n\r\n", t->CurrentTestToString().c_str()); +- } else { +- char buf[256]; +- ERR_error_string_n(ERR_get_error(), buf, sizeof(buf)); +- printf("%sResult = F (%s)\r\n\r\n", t->CurrentTestToString().c_str(), buf); +- } +- ERR_clear_error(); +- return true; +-} +- +-int cavp_ecdsa2_sigver_test_main(int argc, char **argv) { +- if (argc != 2) { +- fprintf(stderr, "usage: %s \n", +- argv[0]); +- return 1; +- } +- +- FileTest::Options opts; +- opts.path = argv[1]; +- opts.callback = TestECDSA2SigVer; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_hmac_test.cc b/src/util/fipstools/cavp/cavp_hmac_test.cc +deleted file mode 100644 +index c88226a..0000000 +--- a/src/util/fipstools/cavp/cavp_hmac_test.cc ++++ /dev/null +@@ -1,106 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_hmac_test processes a NIST CAVP HMAC test vector request file and emits +-// the corresponding response. +- +-#include +- +-#include +-#include +-#include +- +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +- +-static bool TestHMAC(FileTest *t, void *arg) { +- std::string md_len_str; +- if (!t->GetInstruction(&md_len_str, "L")) { +- return false; +- } +- const size_t md_len = strtoul(md_len_str.c_str(), nullptr, 0); +- +- const EVP_MD *md; +- switch (md_len) { +- case 20: +- md = EVP_sha1(); +- break; +- case 28: +- md = EVP_sha224(); +- break; +- case 32: +- md = EVP_sha256(); +- break; +- case 48: +- md = EVP_sha384(); +- break; +- case 64: +- md = EVP_sha512(); +- break; +- default: +- return false; +- } +- +- std::string count_str, k_len_str, t_len_str; +- std::vector key, msg; +- if (!t->GetAttribute(&count_str, "Count") || +- !t->GetAttribute(&k_len_str, "Klen") || +- !t->GetAttribute(&t_len_str, "Tlen") || +- !t->GetBytes(&key, "Key") || +- !t->GetBytes(&msg, "Msg")) { +- return false; +- } +- +- size_t k_len = strtoul(k_len_str.c_str(), nullptr, 0); +- size_t t_len = strtoul(t_len_str.c_str(), nullptr, 0); +- if (key.size() < k_len) { +- return false; +- } +- unsigned out_len; +- uint8_t out[EVP_MAX_MD_SIZE]; +- if (HMAC(md, key.data(), k_len, msg.data(), msg.size(), out, &out_len) == +- NULL) { +- return false; +- } +- +- if (out_len < t_len) { +- return false; +- } +- +- printf("%s", t->CurrentTestToString().c_str()); +- printf("Mac = %s\r\n\r\n", +- EncodeHex(bssl::MakeConstSpan(out, t_len)).c_str()); +- +- return true; +-} +- +-static int usage(char *arg) { +- fprintf(stderr, "usage: %s \n", arg); +- return 1; +-} +- +-int cavp_hmac_test_main(int argc, char **argv) { +- if (argc != 2) { +- return usage(argv[0]); +- } +- +- FileTest::Options opts; +- opts.path = argv[1]; +- opts.callback = TestHMAC; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_kas_test.cc b/src/util/fipstools/cavp/cavp_kas_test.cc +deleted file mode 100644 +index 9a74f1d..0000000 +--- a/src/util/fipstools/cavp/cavp_kas_test.cc ++++ /dev/null +@@ -1,156 +0,0 @@ +-/* Copyright (c) 2018, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_kas_test processes NIST CAVP ECC KAS test vector request files and +-// emits the corresponding response. +- +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include "../crypto/internal.h" +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +- +-static bool TestKAS(FileTest *t, void *arg) { +- const bool validate = *reinterpret_cast(arg); +- +- int nid = NID_undef; +- size_t digest_len = 0; +- +- if (t->HasInstruction("EB - SHA224")) { +- nid = NID_secp224r1; +- digest_len = SHA224_DIGEST_LENGTH; +- } else if (t->HasInstruction("EC - SHA256")) { +- nid = NID_X9_62_prime256v1; +- digest_len = SHA256_DIGEST_LENGTH; +- } else if (t->HasInstruction("ED - SHA384")) { +- nid = NID_secp384r1; +- digest_len = SHA384_DIGEST_LENGTH; +- } else if (t->HasInstruction("EE - SHA512")) { +- nid = NID_secp521r1; +- digest_len = SHA512_DIGEST_LENGTH; +- } else { +- return false; +- } +- +- if (!t->HasAttribute("COUNT")) { +- return false; +- } +- +- bssl::UniquePtr their_x(GetBIGNUM(t, "QeCAVSx")); +- bssl::UniquePtr their_y(GetBIGNUM(t, "QeCAVSy")); +- bssl::UniquePtr ec_key(EC_KEY_new_by_curve_name(nid)); +- bssl::UniquePtr ctx(BN_CTX_new()); +- if (!their_x || !their_y || !ec_key || !ctx) { +- return false; +- } +- +- const EC_GROUP *const group = EC_KEY_get0_group(ec_key.get()); +- bssl::UniquePtr their_point(EC_POINT_new(group)); +- if (!their_point || +- !EC_POINT_set_affine_coordinates_GFp( +- group, their_point.get(), their_x.get(), their_y.get(), ctx.get())) { +- return false; +- } +- +- if (validate) { +- bssl::UniquePtr our_k(GetBIGNUM(t, "deIUT")); +- if (!our_k || +- !EC_KEY_set_private_key(ec_key.get(), our_k.get()) || +- // These attributes are ignored. +- !t->HasAttribute("QeIUTx") || +- !t->HasAttribute("QeIUTy")) { +- return false; +- } +- } else if (!EC_KEY_generate_key(ec_key.get())) { +- return false; +- } +- +- uint8_t digest[EVP_MAX_MD_SIZE]; +- if (!ECDH_compute_key_fips(digest, digest_len, their_point.get(), +- ec_key.get())) { +- return false; +- } +- +- if (validate) { +- std::vector expected_shared_bytes; +- if (!t->GetBytes(&expected_shared_bytes, "CAVSHashZZ")) { +- return false; +- } +- const bool ok = +- digest_len == expected_shared_bytes.size() && +- OPENSSL_memcmp(digest, expected_shared_bytes.data(), digest_len) == 0; +- +- printf("%sIUTHashZZ = %s\r\nResult = %c\r\n\r\n\r\n", +- t->CurrentTestToString().c_str(), +- EncodeHex(bssl::MakeConstSpan(digest, digest_len)).c_str(), +- ok ? 'P' : 'F'); +- } else { +- const EC_POINT *pub = EC_KEY_get0_public_key(ec_key.get()); +- bssl::UniquePtr x(BN_new()); +- bssl::UniquePtr y(BN_new()); +- if (!x || !y || +- !EC_POINT_get_affine_coordinates_GFp(group, pub, x.get(), y.get(), +- ctx.get())) { +- return false; +- } +- bssl::UniquePtr x_hex(BN_bn2hex(x.get())); +- bssl::UniquePtr y_hex(BN_bn2hex(y.get())); +- +- printf("%sQeIUTx = %s\r\nQeIUTy = %s\r\nHashZZ = %s\r\n", +- t->CurrentTestToString().c_str(), x_hex.get(), y_hex.get(), +- EncodeHex(bssl::MakeConstSpan(digest, digest_len)).c_str()); +- } +- +- return true; +-} +- +-int cavp_kas_test_main(int argc, char **argv) { +- if (argc != 3) { +- fprintf(stderr, "usage: %s (validity|function) \n", +- argv[0]); +- return 1; +- } +- +- bool validity; +- if (strcmp(argv[1], "validity") == 0) { +- validity = true; +- } else if (strcmp(argv[1], "function") == 0) { +- validity = false; +- } else { +- fprintf(stderr, "Unknown test type: %s\n", argv[1]); +- return 1; +- } +- +- FileTest::Options opts; +- opts.path = argv[2]; +- opts.arg = &validity; +- opts.callback = TestKAS; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- opts.is_kas_test = true; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_keywrap_test.cc b/src/util/fipstools/cavp/cavp_keywrap_test.cc +deleted file mode 100644 +index 67397ec..0000000 +--- a/src/util/fipstools/cavp/cavp_keywrap_test.cc ++++ /dev/null +@@ -1,166 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_keywrap_test processes a NIST CAVP AES test vector request file and +-// emits the corresponding response. +- +-#include +- +-#include +-#include +- +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +- +-namespace { +- +-struct TestCtx { +- bool encrypt; +- bool padding; +-}; +- +-} // namespace +- +-static bool AESKeyWrap(std::vector *out, bool encrypt, +- const std::vector &key, +- const std::vector &in) { +- size_t key_bits = key.size() * 8; +- if (key_bits != 128 && key_bits != 192 && key_bits != 256) { +- return false; +- } +- AES_KEY aes_key; +- +- if (encrypt) { +- out->resize(in.size() + 8); +- if (AES_set_encrypt_key(key.data(), key_bits, &aes_key) || +- AES_wrap_key(&aes_key, nullptr, out->data(), in.data(), in.size()) == +- -1) { +- return false; +- } +- } else { +- out->resize(in.size() - 8); +- if (AES_set_decrypt_key(key.data(), key_bits, &aes_key) || +- AES_unwrap_key(&aes_key, nullptr, out->data(), in.data(), in.size()) == +- -1) { +- return false; +- } +- } +- +- return true; +-} +- +-static bool AESKeyWrapWithPadding(std::vector *out, bool encrypt, +- const std::vector &key, +- const std::vector &in) { +- const size_t key_bits = key.size() * 8; +- if (key_bits != 128 && key_bits != 192 && key_bits != 256) { +- return false; +- } +- AES_KEY aes_key; +- +- size_t out_len; +- if (encrypt) { +- out->resize(in.size() + 15); +- if (AES_set_encrypt_key(key.data(), key_bits, &aes_key) || +- !AES_wrap_key_padded(&aes_key, out->data(), &out_len, out->size(), +- in.data(), in.size())) { +- return false; +- } +- } else { +- out->resize(in.size()); +- if (AES_set_decrypt_key(key.data(), key_bits, &aes_key) || +- !AES_unwrap_key_padded(&aes_key, out->data(), &out_len, out->size(), +- in.data(), in.size())) { +- return false; +- } +- } +- +- out->resize(out_len); +- return true; +-} +- +-static bool TestCipher(FileTest *t, void *arg) { +- TestCtx *ctx = reinterpret_cast(arg); +- +- std::string count, unused, in_label = ctx->encrypt ? "P" : "C", +- result_label = ctx->encrypt ? "C" : "P"; +- std::vector key, in, result; +- // clang-format off +- if (!t->GetInstruction(&unused, "PLAINTEXT LENGTH") || +- !t->GetAttribute(&count, "COUNT") || +- !t->GetBytes(&key, "K") || +- !t->GetBytes(&in, in_label)) { +- return false; +- } +- // clang-format on +- +- auto wrap_function = AESKeyWrap; +- if (ctx->padding) { +- wrap_function = AESKeyWrapWithPadding; +- } +- +- printf("%s", t->CurrentTestToString().c_str()); +- if (!wrap_function(&result, ctx->encrypt, key, in)) { +- if (ctx->encrypt) { +- return false; +- } else { +- printf("FAIL\r\n\r\n"); +- } +- } else { +- printf("%s = %s\r\n\r\n", result_label.c_str(), EncodeHex(result).c_str()); +- } +- +- return true; +-} +- +-static int usage(char *arg) { +- fprintf( +- stderr, +- "usage: %s (enc|dec|enc-pad|dec-pad) (128|192|256) \n", +- arg); +- return 1; +-} +- +-int cavp_keywrap_test_main(int argc, char **argv) { +- if (argc != 4) { +- return usage(argv[0]); +- } +- +- const std::string op(argv[1]); +- bool encrypt = false; +- bool padding = false; +- if (op == "enc") { +- encrypt = true; +- } else if (op == "dec") { +- } else if (op == "enc-pad") { +- encrypt = true; +- padding = true; +- } else if (op == "dec-pad") { +- padding = true; +- } else { +- return usage(argv[0]); +- } +- +- TestCtx ctx = {encrypt, padding}; +- +- FileTest::Options opts; +- opts.path = argv[3]; +- opts.callback = TestCipher; +- opts.arg = &ctx; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_main.cc b/src/util/fipstools/cavp/cavp_main.cc +deleted file mode 100644 +index 64dbd69..0000000 +--- a/src/util/fipstools/cavp/cavp_main.cc ++++ /dev/null +@@ -1,73 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_main is a wrapper that invokes the main entry function of one of the +-// CAVP validation suite binaries. +- +-#include +-#include +-#include +- +-#include +- +-#include "cavp_test_util.h" +- +- +-static int usage(char *arg) { +- fprintf(stderr, "usage: %s \n", arg); +- return 1; +-} +- +-struct TestSuite { +- std::string name; +- int (*main_func)(int argc, char **argv); +-}; +- +-static TestSuite all_test_suites[] = { +- {"aes", &cavp_aes_test_main}, +- {"aes_gcm", &cavp_aes_gcm_test_main}, +- {"ctr_drbg", &cavp_ctr_drbg_test_main}, +- {"ecdsa2_keypair", &cavp_ecdsa2_keypair_test_main}, +- {"ecdsa2_pkv", &cavp_ecdsa2_pkv_test_main}, +- {"ecdsa2_siggen", &cavp_ecdsa2_siggen_test_main}, +- {"ecdsa2_sigver", &cavp_ecdsa2_sigver_test_main}, +- {"hmac", &cavp_hmac_test_main}, +- {"kas", &cavp_kas_test_main}, +- {"keywrap", &cavp_keywrap_test_main}, +- {"rsa2_keygen", &cavp_rsa2_keygen_test_main}, +- {"rsa2_siggen", &cavp_rsa2_siggen_test_main}, +- {"rsa2_sigver", &cavp_rsa2_sigver_test_main}, +- {"tlskdf", &cavp_tlskdf_test_main}, +- {"sha", &cavp_sha_test_main}, +- {"sha_monte", &cavp_sha_monte_test_main}, +- {"tdes", &cavp_tdes_test_main} +-}; +- +-int main(int argc, char **argv) { +- CRYPTO_library_init(); +- +- if (argc < 3) { +- return usage(argv[0]); +- } +- +- const std::string suite(argv[1]); +- for (const TestSuite &s : all_test_suites) { +- if (s.name == suite) { +- return s.main_func(argc - 1, &argv[1]); +- } +- } +- +- fprintf(stderr, "invalid test suite: %s\n\n", argv[1]); +- return usage(argv[0]); +-} +diff --git a/src/util/fipstools/cavp/cavp_rsa2_keygen_test.cc b/src/util/fipstools/cavp/cavp_rsa2_keygen_test.cc +deleted file mode 100644 +index e7088c7..0000000 +--- a/src/util/fipstools/cavp/cavp_rsa2_keygen_test.cc ++++ /dev/null +@@ -1,93 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_rsa2_keygen_test processes NIST CAVP RSA2 KeyGen test vector request +-// files and emits the corresponding response. +- +-#include +- +-#include +-#include +-#include +- +-#include "../crypto/internal.h" +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +- +-static bool TestRSA2KeyGen(FileTest *t, void *arg) { +- std::string mod_str, table, count_str; +- if (!t->GetInstruction(&mod_str, "mod") || +- !t->GetInstruction(&table, "Table for M-R Test") || +- table != "C.2" || +- !t->GetAttribute(&count_str, "N")) { +- return false; +- } +- +- printf("[mod = %s]\r\n", mod_str.c_str()); +- printf("[Table for M-R Test = %s]\r\n\r\n", table.c_str()); +- +- size_t bits = strtoul(mod_str.c_str(), nullptr, 0); +- size_t count = strtoul(count_str.c_str(), nullptr, 0); +- for (size_t i = 0; i < count; i++) { +- bssl::UniquePtr key(RSA_new()); +- if (key == nullptr || +- bits == 0 || +- !RSA_generate_key_fips(key.get(), bits, nullptr)) { +- return 0; +- } +- +- const BIGNUM *n, *e, *d, *p, *q; +- RSA_get0_key(key.get(), &n, &e, &d); +- RSA_get0_factors(key.get(), &p, &q); +- std::vector n_bytes(BN_num_bytes(n)), e_bytes(BN_num_bytes(e)), +- d_bytes((bits + 7) / 8), p_bytes(BN_num_bytes(p)), +- q_bytes(BN_num_bytes(q)); +- if (n == NULL || +- BN_bn2bin(n, n_bytes.data()) != n_bytes.size() || +- e == NULL || +- BN_bn2bin(e, e_bytes.data()) != e_bytes.size() || +- d == NULL || +- !BN_bn2bin_padded(d_bytes.data(), d_bytes.size(), d) || +- p == NULL || +- BN_bn2bin(p, p_bytes.data()) != p_bytes.size() || +- q == NULL || +- BN_bn2bin(q, q_bytes.data()) != q_bytes.size()) { +- return false; +- } +- +- printf("e = %s\r\np = %s\r\nq = %s\r\nn = %s\r\nd = %s\r\n\r\n", +- EncodeHex(e_bytes).c_str(), EncodeHex(p_bytes).c_str(), +- EncodeHex(q_bytes).c_str(), EncodeHex(n_bytes).c_str(), +- EncodeHex(d_bytes).c_str()); +- } +- +- return true; +-} +- +-int cavp_rsa2_keygen_test_main(int argc, char **argv) { +- if (argc != 2) { +- fprintf(stderr, "usage: %s \n", +- argv[0]); +- return 1; +- } +- +- FileTest::Options opts; +- opts.path = argv[1]; +- opts.callback = TestRSA2KeyGen; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_rsa2_siggen_test.cc b/src/util/fipstools/cavp/cavp_rsa2_siggen_test.cc +deleted file mode 100644 +index 636a73a..0000000 +--- a/src/util/fipstools/cavp/cavp_rsa2_siggen_test.cc ++++ /dev/null +@@ -1,128 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_rsa2_siggen_test processes NIST CAVP RSA2 SigGen test vector request +-// files and emits the corresponding response. +- +-#include +- +-#include +-#include +-#include +-#include +- +-#include "../crypto/internal.h" +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +-namespace { +- +-struct TestCtx { +- bssl::UniquePtr key; +- bool is_pss; +-}; +- +-} +- +-static bool TestRSA2SigGen(FileTest *t, void *arg) { +- TestCtx *ctx = reinterpret_cast(arg); +- +- std::string mod_str, hash; +- std::vector msg; +- if (!t->GetInstruction(&mod_str, "mod") || +- !t->GetAttribute(&hash, "SHAAlg") || +- !t->GetBytes(&msg, "Msg")) { +- return false; +- } +- +- std::string test = t->CurrentTestToString(); +- if (t->IsAtNewInstructionBlock()) { +- int mod_bits = strtoul(mod_str.c_str(), nullptr, 0); +- ctx->key = bssl::UniquePtr(RSA_new()); +- if (ctx->key == nullptr || +- mod_bits == 0 || +- !RSA_generate_key_fips(ctx->key.get(), mod_bits, nullptr)) { +- return false; +- } +- +- const BIGNUM *n, *e; +- RSA_get0_key(ctx->key.get(), &n, &e, nullptr); +- +- std::vector n_bytes(BN_num_bytes(n)); +- std::vector e_bytes(BN_num_bytes(e)); +- if (!BN_bn2bin_padded(n_bytes.data(), n_bytes.size(), n) || +- !BN_bn2bin_padded(e_bytes.data(), e_bytes.size(), e)) { +- return false; +- } +- +- printf("[mod = %s]\r\n\r\nn = %s\r\n\r\ne = %s", mod_str.c_str(), +- EncodeHex(n_bytes).c_str(), EncodeHex(e_bytes).c_str()); +- test = test.substr(test.find("]") + 3); +- } +- +- const EVP_MD *md = EVP_get_digestbyname(hash.c_str()); +- uint8_t digest_buf[EVP_MAX_MD_SIZE]; +- std::vector sig(RSA_size(ctx->key.get())); +- unsigned digest_len; +- size_t sig_len; +- if (md == NULL || +- !EVP_Digest(msg.data(), msg.size(), digest_buf, &digest_len, md, NULL)) { +- return false; +- } +- +- if (ctx->is_pss) { +- if (!RSA_sign_pss_mgf1(ctx->key.get(), &sig_len, sig.data(), sig.size(), +- digest_buf, digest_len, md, md, -1)) { +- return false; +- } +- } else { +- unsigned sig_len_u; +- if (!RSA_sign(EVP_MD_type(md), digest_buf, digest_len, sig.data(), +- &sig_len_u, ctx->key.get())) { +- return false; +- } +- sig_len = sig_len_u; +- } +- +- sig.resize(sig_len); +- printf("%sS = %s\r\n\r\n", test.c_str(), EncodeHex(sig).c_str()); +- return true; +-} +- +-int cavp_rsa2_siggen_test_main(int argc, char **argv) { +- if (argc != 3) { +- fprintf(stderr, "usage: %s (pkcs15|pss) \n", +- argv[0]); +- return 1; +- } +- +- TestCtx ctx; +- if (strcmp(argv[1], "pkcs15") == 0) { +- ctx = {nullptr, false}; +- } else if (strcmp(argv[1], "pss") == 0) { +- ctx = {nullptr, true}; +- } else { +- fprintf(stderr, "Unknown test type: %s\n", argv[1]); +- return 1; +- } +- +- FileTest::Options opts; +- opts.path = argv[2]; +- opts.callback = TestRSA2SigGen; +- opts.arg = &ctx; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_rsa2_sigver_test.cc b/src/util/fipstools/cavp/cavp_rsa2_sigver_test.cc +deleted file mode 100644 +index cbcfc1f..0000000 +--- a/src/util/fipstools/cavp/cavp_rsa2_sigver_test.cc ++++ /dev/null +@@ -1,125 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_rsa2_sigver_test processes NIST CAVP RSA2 SigVer test vector request +-// files and emits the corresponding response. +- +-#include +- +-#include +-#include +-#include +-#include +-#include +- +-#include "../crypto/internal.h" +-#include "../crypto/test/file_test.h" +-#include "cavp_test_util.h" +- +- +-namespace { +- +-struct TestCtx { +- std::vector N; +- bool is_pss; +-}; +- +-} +- +-static bool TestRSA2SigVer(FileTest *t, void *arg) { +- TestCtx *ctx = reinterpret_cast(arg); +- +- std::string mod_str; +- if (!t->GetInstruction(&mod_str, "mod")) { +- return false; +- } +- +- printf("%s", t->CurrentTestToString().c_str()); +- +- if (t->HasAttribute("n")) { +- printf("\r\n"); +- return t->GetBytes(&ctx->N, "n"); +- } +- +- std::string hash; +- std::vector e_bytes, msg, sig; +- if (!t->GetAttribute(&hash, "SHAAlg") || +- !t->GetBytes(&e_bytes, "e") || +- !t->GetBytes(&msg, "Msg") || +- !t->GetBytes(&sig, "S")) { +- return false; +- } +- +- bssl::UniquePtr key(RSA_new()); +- key->n = BN_new(); +- key->e = BN_new(); +- if (key == nullptr || +- !BN_bin2bn(ctx->N.data(), ctx->N.size(), key->n) || +- !BN_bin2bn(e_bytes.data(), e_bytes.size(), key->e)) { +- return false; +- } +- +- const EVP_MD *md = EVP_get_digestbyname(hash.c_str()); +- uint8_t digest_buf[EVP_MAX_MD_SIZE]; +- unsigned digest_len; +- if (md == NULL || +- !EVP_Digest(msg.data(), msg.size(), digest_buf, &digest_len, md, NULL)) { +- return false; +- } +- +- int ok; +- if (ctx->is_pss) { +- ok = RSA_verify_pss_mgf1(key.get(), digest_buf, digest_len, md, md, -1, +- sig.data(), sig.size()); +- } else { +- ok = RSA_verify(EVP_MD_type(md), digest_buf, digest_len, sig.data(), +- sig.size(), key.get()); +- } +- +- if (ok) { +- printf("Result = P\r\n\r\n"); +- } else { +- char buf[256]; +- ERR_error_string_n(ERR_get_error(), buf, sizeof(buf)); +- printf("Result = F (%s)\r\n\r\n", buf); +- } +- ERR_clear_error(); +- return true; +-} +- +-int cavp_rsa2_sigver_test_main(int argc, char **argv) { +- if (argc != 3) { +- fprintf(stderr, "usage: %s (pkcs15|pss) \n", +- argv[0]); +- return 1; +- } +- +- TestCtx ctx; +- if (strcmp(argv[1], "pkcs15") == 0) { +- ctx = {std::vector(), false}; +- } else if (strcmp(argv[1], "pss") == 0) { +- ctx = {std::vector(), true}; +- } else { +- fprintf(stderr, "Unknown test type: %s\n", argv[1]); +- return 1; +- } +- +- FileTest::Options opts; +- opts.path = argv[2]; +- opts.callback = TestRSA2SigVer; +- opts.arg = &ctx; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_sha_monte_test.cc b/src/util/fipstools/cavp/cavp_sha_monte_test.cc +deleted file mode 100644 +index f5bcdd1..0000000 +--- a/src/util/fipstools/cavp/cavp_sha_monte_test.cc ++++ /dev/null +@@ -1,103 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_sha_monte_test processes a NIST CAVP SHA-Monte test vector request file +-// and emits the corresponding response. +- +-#include +- +-#include +-#include +- +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +- +-namespace { +- +-struct TestCtx { +- std::string hash; +-}; +- +-} +- +-static bool TestSHAMonte(FileTest *t, void *arg) { +- TestCtx *ctx = reinterpret_cast(arg); +- +- const EVP_MD *md = EVP_get_digestbyname(ctx->hash.c_str()); +- if (md == nullptr) { +- return false; +- } +- const size_t md_len = EVP_MD_size(md); +- +- std::string out_len; +- if (!t->GetInstruction(&out_len, "L") || +- md_len != strtoul(out_len.c_str(), nullptr, 0)) { +- return false; +- } +- +- std::vector seed; +- if (!t->GetBytes(&seed, "Seed") || +- seed.size() != md_len) { +- return false; +- } +- +- std::vector out = seed; +- +- printf("%s\r\n", t->CurrentTestToString().c_str()); +- +- for (int count = 0; count < 100; count++) { +- std::vector msg; +- msg.insert(msg.end(), out.begin(), out.end()); +- msg.insert(msg.end(), out.begin(), out.end()); +- msg.insert(msg.end(), out.begin(), out.end()); +- for (int i = 0; i < 1000; i++) { +- unsigned digest_len; +- if (!EVP_Digest(msg.data(), msg.size(), out.data(), &digest_len, md, +- nullptr) || +- digest_len != out.size()) { +- return false; +- } +- +- msg.erase(msg.begin(), msg.begin() + out.size()); +- msg.insert(msg.end(), out.begin(), out.end()); +- } +- printf("COUNT = %d\r\n", count); +- printf("MD = %s\r\n\r\n", EncodeHex(out).c_str()); +- } +- +- return true; +-} +- +-static int usage(char *arg) { +- fprintf(stderr, "usage: %s \n", arg); +- return 1; +-} +- +-int cavp_sha_monte_test_main(int argc, char **argv) { +- if (argc != 3) { +- return usage(argv[0]); +- } +- +- TestCtx ctx = {std::string(argv[1])}; +- +- FileTest::Options opts; +- opts.path = argv[2]; +- opts.callback = TestSHAMonte; +- opts.arg = &ctx; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_sha_test.cc b/src/util/fipstools/cavp/cavp_sha_test.cc +deleted file mode 100644 +index c046451..0000000 +--- a/src/util/fipstools/cavp/cavp_sha_test.cc ++++ /dev/null +@@ -1,97 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_sha_test processes a NIST CAVP SHA test vector request file and emits +-// the corresponding response. +- +-#include +- +-#include +-#include +- +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +-namespace { +- +-struct TestCtx { +- std::string hash; +-}; +- +-} +- +-static bool TestSHA(FileTest *t, void *arg) { +- TestCtx *ctx = reinterpret_cast(arg); +- +- const EVP_MD *md = EVP_get_digestbyname(ctx->hash.c_str()); +- if (md == nullptr) { +- return false; +- } +- const size_t md_len = EVP_MD_size(md); +- +- std::string out_len; +- if (!t->GetInstruction(&out_len, "L") || +- md_len != strtoul(out_len.c_str(), nullptr, 0)) { +- return false; +- } +- +- std::string msg_len_str; +- std::vector msg; +- if (!t->GetAttribute(&msg_len_str, "Len") || +- !t->GetBytes(&msg, "Msg")) { +- return false; +- } +- +- size_t msg_len = strtoul(msg_len_str.c_str(), nullptr, 0); +- if (msg_len % 8 != 0 || +- msg_len / 8 > msg.size()) { +- return false; +- } +- msg_len /= 8; +- +- std::vector out; +- out.resize(md_len); +- unsigned digest_len; +- if (!EVP_Digest(msg.data(), msg_len, out.data(), &digest_len, md, nullptr) || +- digest_len != out.size()) { +- return false; +- } +- +- printf("%s", t->CurrentTestToString().c_str()); +- printf("MD = %s\r\n\r\n", EncodeHex(out).c_str()); +- +- return true; +-} +- +-static int usage(char *arg) { +- fprintf(stderr, "usage: %s \n", arg); +- return 1; +-} +- +-int cavp_sha_test_main(int argc, char **argv) { +- if (argc != 3) { +- return usage(argv[0]); +- } +- +- TestCtx ctx = {std::string(argv[1])}; +- +- FileTest::Options opts; +- opts.path = argv[2]; +- opts.callback = TestSHA; +- opts.arg = &ctx; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_tdes_test.cc b/src/util/fipstools/cavp/cavp_tdes_test.cc +deleted file mode 100644 +index 7b8839d..0000000 +--- a/src/util/fipstools/cavp/cavp_tdes_test.cc ++++ /dev/null +@@ -1,336 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_tdes_test processes a NIST TMOVS test vector request file and emits the +-// corresponding response. +- +-#include +- +-#include +-#include +-#include +- +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +-#include "cavp_test_util.h" +- +- +-namespace { +- +-struct TestCtx { +- const EVP_CIPHER *cipher; +- enum Mode { +- kKAT, // Known Answer Test +- kMCT, // Monte Carlo Test +- }; +- bool has_iv; +- Mode mode; +-}; +- +-} +- +-static bool TestKAT(FileTest *t, void *arg) { +- TestCtx *ctx = reinterpret_cast(arg); +- +- if (t->HasInstruction("ENCRYPT") == t->HasInstruction("DECRYPT")) { +- t->PrintLine("Want either ENCRYPT or DECRYPT"); +- return false; +- } +- enum { +- kEncrypt, +- kDecrypt, +- } operation = t->HasInstruction("ENCRYPT") ? kEncrypt : kDecrypt; +- +- if (t->HasAttribute("NumKeys")) { +- // Another file format quirk: NumKeys is a single attribute line immediately +- // following an instruction and should probably have been an instruction +- // instead. If it is present, the file has separate attributes "KEY{1,2,3}". +- // If it is not, the keys are concatenated in a single attribute "KEYs". +- std::string num_keys; +- t->GetAttribute(&num_keys, "NumKeys"); +- t->InjectInstruction("NumKeys", num_keys); +- +- std::string header = operation == kEncrypt ? "[ENCRYPT]" : "[DECRYPT]"; +- printf("%s\r\n\r\n", header.c_str()); +- +- return true; +- } +- +- enum { +- kNotPresent, +- kTwo, +- kThree, +- } num_keys = kNotPresent; +- if (t->HasInstruction("NumKeys")) { +- std::string num_keys_str; +- t->GetInstruction(&num_keys_str, "NumKeys"); +- const int n = strtoul(num_keys_str.c_str(), nullptr, 0); +- if (n == 2) { +- num_keys = kTwo; +- } else if (n == 3) { +- num_keys = kThree; +- } else { +- t->PrintLine("invalid NumKeys value"); +- return false; +- } +- } +- +- std::string count; +- std::vector keys, key1, key2, key3, iv, in, result; +- const std::string in_label = +- operation == kEncrypt ? "PLAINTEXT" : "CIPHERTEXT"; +- // clang-format off +- if (!t->GetAttribute(&count, "COUNT") || +- (num_keys == 0 && !t->GetBytes(&keys, "KEYs")) || +- (num_keys > 0 && +- (!t->GetBytes(&key1, "KEY1") || +- !t->GetBytes(&key2, "KEY2") || +- !t->GetBytes(&key3, "KEY3"))) || +- (ctx->has_iv && !t->GetBytes(&iv, "IV")) || +- !t->GetBytes(&in, in_label)) { +- return false; +- } +- // clang-format on +- std::vector key; +- if (num_keys != kNotPresent) { +- key.insert(key.end(), key1.begin(), key1.end()); +- key.insert(key.end(), key2.begin(), key2.end()); +- if (num_keys == kThree) { +- key.insert(key.end(), key3.begin(), key3.end()); +- } +- } else { +- key.insert(key.end(), keys.begin(), keys.end()); +- key.insert(key.end(), keys.begin(), keys.end()); +- key.insert(key.end(), keys.begin(), keys.end()); +- } +- +- if (!CipherOperation(ctx->cipher, &result, operation == kEncrypt, key, iv, +- in)) { +- return false; +- } +- +- // TDES fax files output format differs from file to file, and the input +- // format is inconsistent with the output, so we construct the output manually +- // rather than printing CurrentTestToString(). +- if (t->IsAtNewInstructionBlock() && num_keys == kNotPresent) { +- // If NumKeys is present, header is printed when parsing NumKeys. +- std::string header = operation == kEncrypt ? "[ENCRYPT]" : "[DECRYPT]"; +- printf("%s\r\n", header.c_str()); +- } +- const std::string result_label = +- operation == kEncrypt ? "CIPHERTEXT" : "PLAINTEXT"; +- printf("COUNT = %s\r\n", count.c_str()); +- if (num_keys == kNotPresent) { +- printf("KEYs = %s\r\n", EncodeHex(keys).c_str()); +- } else { +- printf("KEY1 = %s\r\nKEY2 = %s\r\nKEY3 = %s\r\n", EncodeHex(key1).c_str(), +- EncodeHex(key2).c_str(), EncodeHex(key3).c_str()); +- } +- if (ctx->has_iv) { +- printf("IV = %s\r\n", EncodeHex(iv).c_str()); +- } +- printf("%s = %s\r\n", in_label.c_str(), EncodeHex(in).c_str()); +- printf("%s = %s\r\n\r\n", result_label.c_str(), EncodeHex(result).c_str()); +- +- return true; +-} +- +-// XORKeyWithOddParityLSB sets |*key| to |key| XOR |value| and then writes +-// the LSB of each byte to establish odd parity for that byte. This parity-based +-// embedded of a DES key into 64 bits is an old tradition and something that +-// NIST's tests require. +-static void XORKeyWithOddParityLSB(std::vector *key, +- const std::vector &value) { +- for (size_t i = 0; i < key->size(); i++) { +- uint8_t v = (*key)[i] ^ value[i]; +- +- // Use LSB to establish odd parity. +- v |= 0x01; +- for (uint8_t j = 1; j < 8; j++) { +- v ^= ((v >> j) & 0x01); +- } +- (*key)[i] = v; +- } +-} +- +-static bool TestMCT(FileTest *t, void *arg) { +- TestCtx *ctx = reinterpret_cast(arg); +- +- if (t->HasInstruction("ENCRYPT") == t->HasInstruction("DECRYPT")) { +- t->PrintLine("Want either ENCRYPT or DECRYPT"); +- return false; +- } +- enum { +- kEncrypt, +- kDecrypt, +- } operation = t->HasInstruction("ENCRYPT") ? kEncrypt : kDecrypt; +- +- if (t->HasAttribute("NumKeys")) { +- // Another file format quirk: NumKeys is a single attribute line immediately +- // following an instruction and should probably have been an instruction +- // instead. +- std::string num_keys; +- t->GetAttribute(&num_keys, "NumKeys"); +- t->InjectInstruction("NumKeys", num_keys); +- return true; +- } +- +- enum { +- kTwo, +- kThree, +- } num_keys; +- std::string num_keys_str; +- if (!t->GetInstruction(&num_keys_str, "NumKeys")) { +- return false; +- } else { +- const int n = strtoul(num_keys_str.c_str(), nullptr, 0); +- if (n == 2) { +- num_keys = kTwo; +- } else if (n == 3) { +- num_keys = kThree; +- } else { +- t->PrintLine("invalid NumKeys value"); +- return false; +- } +- } +- +- std::string count; +- std::vector key1, key2, key3, iv, in, result; +- const std::string in_label = +- operation == kEncrypt ? "PLAINTEXT" : "CIPHERTEXT"; +- // clang-format off +- if (!t->GetBytes(&key1, "KEY1") || +- !t->GetBytes(&key2, "KEY2") || +- !t->GetBytes(&key3, "KEY3") || +- (ctx->has_iv && !t->GetBytes(&iv, "IV")) || +- !t->GetBytes(&in, in_label)) { +- return false; +- } +- // clang-format on +- +- for (int i = 0; i < 400; i++) { +- std::vector current_iv = iv, current_in = in, prev_result, +- prev_prev_result; +- +- std::vector key(key1); +- key.insert(key.end(), key2.begin(), key2.end()); +- key.insert(key.end(), key3.begin(), key3.end()); +- +- for (int j = 0; j < 10000; j++) { +- prev_prev_result = prev_result; +- prev_result = result; +- const EVP_CIPHER *cipher = ctx->cipher; +- if (!CipherOperation(cipher, &result, operation == kEncrypt, key, +- current_iv, current_in)) { +- t->PrintLine("CipherOperation failed"); +- return false; +- } +- if (ctx->has_iv) { +- if (operation == kEncrypt) { +- if (j == 0) { +- current_in = current_iv; +- } else { +- current_in = prev_result; +- } +- current_iv = result; +- } else { // operation == kDecrypt +- current_iv = current_in; +- current_in = result; +- } +- } else { +- current_in = result; +- } +- } +- +- // Output result for COUNT = i. +- const std::string result_label = +- operation == kEncrypt ? "CIPHERTEXT" : "PLAINTEXT"; +- if (i == 0) { +- const std::string op_label = +- operation == kEncrypt ? "ENCRYPT" : "DECRYPT"; +- printf("[%s]\n\n", op_label.c_str()); +- } +- printf("COUNT = %d\r\nKEY1 = %s\r\nKEY2 = %s\r\nKEY3 = %s\r\n", i, +- EncodeHex(key1).c_str(), EncodeHex(key2).c_str(), +- EncodeHex(key3).c_str()); +- if (ctx->has_iv) { +- printf("IV = %s\r\n", EncodeHex(iv).c_str()); +- } +- printf("%s = %s\r\n", in_label.c_str(), EncodeHex(in).c_str()); +- printf("%s = %s\r\n\r\n", result_label.c_str(), EncodeHex(result).c_str()); +- +- +- XORKeyWithOddParityLSB(&key1, result); +- XORKeyWithOddParityLSB(&key2, prev_result); +- if (num_keys == kThree) { +- XORKeyWithOddParityLSB(&key3, prev_prev_result); +- } else { +- XORKeyWithOddParityLSB(&key3, result); +- } +- +- if (ctx->has_iv) { +- if (operation == kEncrypt) { +- in = prev_result; +- iv = result; +- } else { +- iv = current_iv; +- in = current_in; +- } +- } else { +- in = result; +- } +- } +- +- return true; +-} +- +-static int usage(char *arg) { +- fprintf(stderr, "usage: %s (kat|mct) \n", arg); +- return 1; +-} +- +-int cavp_tdes_test_main(int argc, char **argv) { +- if (argc != 4) { +- return usage(argv[0]); +- } +- +- const std::string tm(argv[1]); +- enum TestCtx::Mode test_mode; +- if (tm == "kat") { +- test_mode = TestCtx::kKAT; +- } else if (tm == "mct") { +- test_mode = TestCtx::kMCT; +- } else { +- fprintf(stderr, "invalid test_mode: %s\n", tm.c_str()); +- return usage(argv[0]); +- } +- +- const std::string cipher_name(argv[2]); +- const EVP_CIPHER *cipher = GetCipher(argv[2]); +- if (cipher == nullptr) { +- fprintf(stderr, "invalid cipher: %s\n", argv[2]); +- return 1; +- } +- bool has_iv = cipher_name != "des-ede" && cipher_name != "des-ede3"; +- TestCtx ctx = {cipher, has_iv, test_mode}; +- +- FileTestFunc test_fn = test_mode == TestCtx::kKAT ? &TestKAT : &TestMCT; +- FileTest::Options opts; +- opts.path = argv[3]; +- opts.callback = test_fn; +- opts.arg = &ctx; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/cavp_test_util.cc b/src/util/fipstools/cavp/cavp_test_util.cc +deleted file mode 100644 +index 1b4e3a1..0000000 +--- a/src/util/fipstools/cavp/cavp_test_util.cc ++++ /dev/null +@@ -1,220 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-#include "cavp_test_util.h" +- +-#include +-#include +-#include +-#include +- +- +-const EVP_CIPHER *GetCipher(const std::string &name) { +- if (name == "des-cbc") { +- return EVP_des_cbc(); +- } else if (name == "des-ecb") { +- return EVP_des_ecb(); +- } else if (name == "des-ede") { +- return EVP_des_ede(); +- } else if (name == "des-ede3") { +- return EVP_des_ede3(); +- } else if (name == "des-ede-cbc") { +- return EVP_des_ede_cbc(); +- } else if (name == "des-ede3-cbc") { +- return EVP_des_ede3_cbc(); +- } else if (name == "rc4") { +- return EVP_rc4(); +- } else if (name == "aes-128-ecb") { +- return EVP_aes_128_ecb(); +- } else if (name == "aes-256-ecb") { +- return EVP_aes_256_ecb(); +- } else if (name == "aes-128-cbc") { +- return EVP_aes_128_cbc(); +- } else if (name == "aes-128-gcm") { +- return EVP_aes_128_gcm(); +- } else if (name == "aes-128-ofb") { +- return EVP_aes_128_ofb(); +- } else if (name == "aes-192-cbc") { +- return EVP_aes_192_cbc(); +- } else if (name == "aes-192-ctr") { +- return EVP_aes_192_ctr(); +- } else if (name == "aes-192-ecb") { +- return EVP_aes_192_ecb(); +- } else if (name == "aes-256-cbc") { +- return EVP_aes_256_cbc(); +- } else if (name == "aes-128-ctr") { +- return EVP_aes_128_ctr(); +- } else if (name == "aes-256-ctr") { +- return EVP_aes_256_ctr(); +- } else if (name == "aes-256-gcm") { +- return EVP_aes_256_gcm(); +- } else if (name == "aes-256-ofb") { +- return EVP_aes_256_ofb(); +- } +- return nullptr; +-} +- +-bool CipherOperation(const EVP_CIPHER *cipher, std::vector *out, +- bool encrypt, const std::vector &key, +- const std::vector &iv, +- const std::vector &in) { +- bssl::ScopedEVP_CIPHER_CTX ctx; +- if (!EVP_CipherInit_ex(ctx.get(), cipher, nullptr, nullptr, nullptr, +- encrypt ? 1 : 0)) { +- return false; +- } +- if (!iv.empty() && iv.size() != EVP_CIPHER_CTX_iv_length(ctx.get())) { +- return false; +- } +- +- int result_len1 = 0, result_len2; +- *out = std::vector(in.size()); +- if (!EVP_CIPHER_CTX_set_key_length(ctx.get(), key.size()) || +- !EVP_CipherInit_ex(ctx.get(), nullptr, nullptr, key.data(), iv.data(), +- -1) || +- !EVP_CIPHER_CTX_set_padding(ctx.get(), 0) || +- !EVP_CipherUpdate(ctx.get(), out->data(), &result_len1, in.data(), +- in.size()) || +- !EVP_CipherFinal_ex(ctx.get(), out->data() + result_len1, &result_len2)) { +- return false; +- } +- out->resize(result_len1 + result_len2); +- +- return true; +-} +- +-bool AEADEncrypt(const EVP_AEAD *aead, std::vector *ct, +- std::vector *tag, size_t tag_len, +- const std::vector &key, +- const std::vector &pt, +- const std::vector &aad, +- const std::vector &iv) { +- bssl::ScopedEVP_AEAD_CTX ctx; +- if (!EVP_AEAD_CTX_init(ctx.get(), aead, key.data(), key.size(), tag_len, +- nullptr)) { +- return false; +- } +- +- std::vector out; +- out.resize(pt.size() + EVP_AEAD_max_overhead(aead)); +- size_t out_len; +- if (!EVP_AEAD_CTX_seal(ctx.get(), out.data(), &out_len, out.size(), iv.data(), +- iv.size(), pt.data(), pt.size(), aad.data(), +- aad.size())) { +- return false; +- } +- out.resize(out_len); +- +- ct->assign(out.begin(), out.end() - tag_len); +- tag->assign(out.end() - tag_len, out.end()); +- +- return true; +-} +- +-bool AEADDecrypt(const EVP_AEAD *aead, std::vector *pt, size_t pt_len, +- const std::vector &key, +- const std::vector &aad, +- const std::vector &ct, +- const std::vector &tag, +- const std::vector &iv) { +- bssl::ScopedEVP_AEAD_CTX ctx; +- if (!EVP_AEAD_CTX_init_with_direction(ctx.get(), aead, key.data(), key.size(), +- tag.size(), evp_aead_open)) { +- return false; +- } +- std::vector in = ct; +- in.reserve(ct.size() + tag.size()); +- in.insert(in.end(), tag.begin(), tag.end()); +- +- pt->resize(pt_len); +- size_t out_pt_len; +- if (!EVP_AEAD_CTX_open(ctx.get(), pt->data(), &out_pt_len, pt->size(), +- iv.data(), iv.size(), in.data(), in.size(), aad.data(), +- aad.size()) || +- out_pt_len != pt_len) { +- return false; +- } +- return true; +-} +- +-static int HexToBIGNUM(bssl::UniquePtr *out, const char *in) { +- BIGNUM *raw = NULL; +- int ret = BN_hex2bn(&raw, in); +- out->reset(raw); +- return ret; +-} +- +-bssl::UniquePtr GetBIGNUM(FileTest *t, const char *attribute) { +- std::string hex; +- if (!t->GetAttribute(&hex, attribute)) { +- return nullptr; +- } +- +- bssl::UniquePtr ret; +- if (HexToBIGNUM(&ret, hex.c_str()) != static_cast(hex.size())) { +- t->PrintLine("Could not decode '%s'.", hex.c_str()); +- return nullptr; +- } +- return ret; +-} +- +-int GetECGroupNIDFromInstruction(FileTest *t, const char **out_str) { +- const char *dummy; +- if (out_str == nullptr) { +- out_str = &dummy; +- } +- +- if (t->HasInstruction("P-224")) { +- *out_str = "P-224"; +- return NID_secp224r1; +- } +- if (t->HasInstruction("P-256")) { +- *out_str = "P-256"; +- return NID_X9_62_prime256v1; +- } +- if (t->HasInstruction("P-384")) { +- *out_str = "P-384"; +- return NID_secp384r1; +- } +- if (t->HasInstruction("P-521")) { +- *out_str = "P-521"; +- return NID_secp521r1; +- } +- t->PrintLine("No supported group specified."); +- return NID_undef; +-} +- +-const EVP_MD *GetDigestFromInstruction(FileTest *t) { +- if (t->HasInstruction("SHA-1")) { +- return EVP_sha1(); +- } +- if (t->HasInstruction("SHA-224")) { +- return EVP_sha224(); +- } +- if (t->HasInstruction("SHA-256")) { +- return EVP_sha256(); +- } +- if (t->HasInstruction("SHA-384")) { +- return EVP_sha384(); +- } +- if (t->HasInstruction("SHA-512")) { +- return EVP_sha512(); +- } +- t->PrintLine("No supported digest function specified."); +- return nullptr; +-} +- +-void EchoComment(const std::string& comment) { +- fwrite(comment.c_str(), comment.size(), 1, stdout); +-} +diff --git a/src/util/fipstools/cavp/cavp_test_util.h b/src/util/fipstools/cavp/cavp_test_util.h +deleted file mode 100644 +index d51dfe6..0000000 +--- a/src/util/fipstools/cavp/cavp_test_util.h ++++ /dev/null +@@ -1,76 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-#ifndef OPENSSL_HEADER_CRYPTO_FIPSMODULE_CAVP_TEST_UTIL_H +-#define OPENSSL_HEADER_CRYPTO_FIPSMODULE_CAVP_TEST_UTIL_H +- +-#include +-#include +-#include +- +-#include +-#include +- +-#include "../crypto/test/file_test.h" +- +- +-const EVP_CIPHER *GetCipher(const std::string &name); +- +-bool CipherOperation(const EVP_CIPHER *cipher, std::vector *out, +- bool encrypt, const std::vector &key, +- const std::vector &iv, +- const std::vector &in); +- +-bool AEADEncrypt(const EVP_AEAD *aead, std::vector *ct, +- std::vector *tag, size_t tag_len, +- const std::vector &key, +- const std::vector &pt, +- const std::vector &aad, +- const std::vector &iv); +- +-bool AEADDecrypt(const EVP_AEAD *aead, std::vector *pt, size_t pt_len, +- const std::vector &key, +- const std::vector &aad, +- const std::vector &ct, +- const std::vector &tag, +- const std::vector &iv); +- +-bssl::UniquePtr GetBIGNUM(FileTest *t, const char *attribute); +- +-int GetECGroupNIDFromInstruction(FileTest *t, const char **out_str = nullptr); +- +-const EVP_MD *GetDigestFromInstruction(FileTest *t); +- +-void EchoComment(const std::string& comment); +- +-int cavp_aes_gcm_test_main(int argc, char **argv); +-int cavp_aes_test_main(int argc, char **argv); +-int cavp_ctr_drbg_test_main(int argc, char **argv); +-int cavp_ecdsa2_keypair_test_main(int argc, char **argv); +-int cavp_ecdsa2_pkv_test_main(int argc, char **argv); +-int cavp_ecdsa2_siggen_test_main(int argc, char **argv); +-int cavp_ecdsa2_sigver_test_main(int argc, char **argv); +-int cavp_hmac_test_main(int argc, char **argv); +-int cavp_kas_test_main(int argc, char **argv); +-int cavp_keywrap_test_main(int argc, char **argv); +-int cavp_rsa2_keygen_test_main(int argc, char **argv); +-int cavp_rsa2_siggen_test_main(int argc, char **argv); +-int cavp_rsa2_sigver_test_main(int argc, char **argv); +-int cavp_sha_monte_test_main(int argc, char **argv); +-int cavp_sha_test_main(int argc, char **argv); +-int cavp_tdes_test_main(int argc, char **argv); +-int cavp_tlskdf_test_main(int argc, char **argv); +- +- +-#endif // OPENSSL_HEADER_CRYPTO_FIPSMODULE_CAVP_TEST_UTIL_H +diff --git a/src/util/fipstools/cavp/cavp_tlskdf_test.cc b/src/util/fipstools/cavp/cavp_tlskdf_test.cc +deleted file mode 100644 +index 0243439..0000000 +--- a/src/util/fipstools/cavp/cavp_tlskdf_test.cc ++++ /dev/null +@@ -1,113 +0,0 @@ +-/* Copyright (c) 2018, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-// cavp_tlskdf_test processes NIST TLS KDF test vectors and emits the +-// corresponding response. +-// See https://csrc.nist.gov/CSRC/media/Projects/Cryptographic-Algorithm-Validation-Program/documents/components/askdfvs.pdf, section 6.4. +- +-#include +- +-#include +- +-#include +- +-#include "cavp_test_util.h" +-#include "../crypto/fipsmodule/tls/internal.h" +-#include "../crypto/test/file_test.h" +-#include "../crypto/test/test_util.h" +- +- +-static bool TestTLSKDF(FileTest *t, void *arg) { +- const EVP_MD *md = nullptr; +- +- if (t->HasInstruction("TLS 1.0/1.1")) { +- md = EVP_md5_sha1(); +- } else if (t->HasInstruction("TLS 1.2")) { +- if (t->HasInstruction("SHA-256")) { +- md = EVP_sha256(); +- } else if (t->HasInstruction("SHA-384")) { +- md = EVP_sha384(); +- } else if (t->HasInstruction("SHA-512")) { +- md = EVP_sha512(); +- } +- } +- +- if (md == nullptr) { +- return false; +- } +- +- std::string key_block_len_str; +- std::vector premaster, server_random, client_random, +- key_block_server_random, key_block_client_random; +- if (!t->GetBytes(&premaster, "pre_master_secret") || +- !t->GetBytes(&server_random, "serverHello_random") || +- !t->GetBytes(&client_random, "clientHello_random") || +- // The NIST tests specify different client and server randoms for the +- // expansion step from the master-secret step. This is impossible in TLS. +- !t->GetBytes(&key_block_server_random, "server_random") || +- !t->GetBytes(&key_block_client_random, "client_random") || +- !t->GetInstruction(&key_block_len_str, "key block length") || +- // These are ignored. +- !t->HasAttribute("COUNT") || +- !t->HasInstruction("pre-master secret length")) { +- return false; +- } +- +- uint8_t master_secret[48]; +- static const char kMasterSecretLabel[] = "master secret"; +- if (!CRYPTO_tls1_prf(md, master_secret, sizeof(master_secret), +- premaster.data(), premaster.size(), kMasterSecretLabel, +- sizeof(kMasterSecretLabel) - 1, client_random.data(), +- client_random.size(), server_random.data(), +- server_random.size())) { +- return false; +- } +- +- errno = 0; +- const long int key_block_bits = +- strtol(key_block_len_str.c_str(), nullptr, 10); +- if (errno != 0 || key_block_bits <= 0 || (key_block_bits & 7) != 0) { +- return false; +- } +- const size_t key_block_len = key_block_bits / 8; +- std::vector key_block(key_block_len); +- static const char kLabel[] = "key expansion"; +- if (!CRYPTO_tls1_prf( +- md, key_block.data(), key_block.size(), master_secret, +- sizeof(master_secret), kLabel, sizeof(kLabel) - 1, +- key_block_server_random.data(), key_block_server_random.size(), +- key_block_client_random.data(), key_block_client_random.size())) { +- return false; +- } +- +- printf("%smaster_secret = %s\r\nkey_block = %s\r\n\r\n", +- t->CurrentTestToString().c_str(), EncodeHex(master_secret).c_str(), +- EncodeHex(key_block).c_str()); +- +- return true; +-} +- +-int cavp_tlskdf_test_main(int argc, char **argv) { +- if (argc != 2) { +- fprintf(stderr, "usage: %s \n", argv[0]); +- return 1; +- } +- +- FileTest::Options opts; +- opts.path = argv[1]; +- opts.callback = TestTLSKDF; +- opts.silent = true; +- opts.comment_callback = EchoComment; +- return FileTestMain(opts); +-} +diff --git a/src/util/fipstools/cavp/run_cavp.go b/src/util/fipstools/cavp/run_cavp.go +deleted file mode 100644 +index 51a4100..0000000 +--- a/src/util/fipstools/cavp/run_cavp.go ++++ /dev/null +@@ -1,592 +0,0 @@ +-// run_cavp.go processes CAVP input files and generates suitable response +-// files, optionally comparing the results against the provided FAX files. +-package main +- +-import ( +- "bufio" +- "errors" +- "flag" +- "fmt" +- "os" +- "os/exec" +- "path" +- "path/filepath" +- "runtime" +- "strings" +- "sync" +- "time" +-) +- +-var ( +- oraclePath = flag.String("oracle-bin", "", "Path to the oracle binary") +- suiteDir = flag.String("suite-dir", "", "Base directory containing the CAVP test suite") +- noFAX = flag.Bool("no-fax", false, "Skip comparing against FAX files") +- android = flag.Bool("android", false, "Run tests via ADB") +-) +- +-const ( +- androidTmpPath = "/data/local/tmp/" +- androidCAVPPath = androidTmpPath + "cavp" +- androidLibCryptoPath = androidTmpPath + "libcrypto.so" +-) +- +-// test describes a single request file. +-type test struct { +- // inFile is the base of the filename without an extension, i.e. +- // “ECBMCT128”. +- inFile string +- // args are the arguments (not including the input filename) to the +- // oracle binary. +- args []string +- // noFAX, if true, indicates that the output cannot be compared against +- // the FAX file. (E.g. because the primitive is non-deterministic.) +- noFAX bool +-} +- +-// nextLineState can be used by FAX next-line function to store state. +-type nextLineState struct { +- // State used by the KAS test. +- nextIsIUTHash bool +-} +- +-// testSuite describes a series of tests that are handled by a single oracle +-// binary. +-type testSuite struct { +- // directory is the name of the directory in the CAVP input, i.e. “AES”. +- directory string +- // suite names the test suite to pass as the first command-line argument. +- suite string +- // nextLineFunc, if not nil, is the function used to read the next line +- // from the FAX file. This can be used to skip lines and/or mutate them +- // as needed. The second argument can be used by the scanner to store +- // state, if needed. If isWildcard is true on return then line is not +- // meaningful and any line from the response file should be accepted. +- nextLineFunc func(*bufio.Scanner, *nextLineState) (line string, isWildcard, ok bool) +- tests []test +-} +- +-func (t *testSuite) getDirectory() string { +- return filepath.Join(*suiteDir, t.directory) +-} +- +-var aesGCMTests = testSuite{ +- "AES_GCM", +- "aes_gcm", +- nil, +- []test{ +- {"gcmDecrypt128", []string{"dec", "aes-128-gcm"}, false}, +- {"gcmDecrypt192", []string{"dec", "aes-192-gcm"}, false}, +- {"gcmDecrypt256", []string{"dec", "aes-256-gcm"}, false}, +- {"gcmEncryptExtIV128", []string{"enc", "aes-128-gcm"}, false}, +- {"gcmEncryptExtIV192", []string{"enc", "aes-192-gcm"}, false}, +- {"gcmEncryptExtIV256", []string{"enc", "aes-256-gcm"}, false}, +- }, +-} +- +-var aesTests = testSuite{ +- "AES", +- "aes", +- nil, +- []test{ +- {"CBCGFSbox128", []string{"kat", "aes-128-cbc"}, false}, +- {"CBCGFSbox192", []string{"kat", "aes-192-cbc"}, false}, +- {"CBCGFSbox256", []string{"kat", "aes-256-cbc"}, false}, +- {"CBCKeySbox128", []string{"kat", "aes-128-cbc"}, false}, +- {"CBCKeySbox192", []string{"kat", "aes-192-cbc"}, false}, +- {"CBCKeySbox256", []string{"kat", "aes-256-cbc"}, false}, +- {"CBCMMT128", []string{"kat", "aes-128-cbc"}, false}, +- {"CBCMMT192", []string{"kat", "aes-192-cbc"}, false}, +- {"CBCMMT256", []string{"kat", "aes-256-cbc"}, false}, +- {"CBCVarKey128", []string{"kat", "aes-128-cbc"}, false}, +- {"CBCVarKey192", []string{"kat", "aes-192-cbc"}, false}, +- {"CBCVarKey256", []string{"kat", "aes-256-cbc"}, false}, +- {"CBCVarTxt128", []string{"kat", "aes-128-cbc"}, false}, +- {"CBCVarTxt192", []string{"kat", "aes-192-cbc"}, false}, +- {"CBCVarTxt256", []string{"kat", "aes-256-cbc"}, false}, +- {"ECBGFSbox128", []string{"kat", "aes-128-ecb"}, false}, +- {"ECBGFSbox192", []string{"kat", "aes-192-ecb"}, false}, +- {"ECBGFSbox256", []string{"kat", "aes-256-ecb"}, false}, +- {"ECBKeySbox128", []string{"kat", "aes-128-ecb"}, false}, +- {"ECBKeySbox192", []string{"kat", "aes-192-ecb"}, false}, +- {"ECBKeySbox256", []string{"kat", "aes-256-ecb"}, false}, +- {"ECBMMT128", []string{"kat", "aes-128-ecb"}, false}, +- {"ECBMMT192", []string{"kat", "aes-192-ecb"}, false}, +- {"ECBMMT256", []string{"kat", "aes-256-ecb"}, false}, +- {"ECBVarKey128", []string{"kat", "aes-128-ecb"}, false}, +- {"ECBVarKey192", []string{"kat", "aes-192-ecb"}, false}, +- {"ECBVarKey256", []string{"kat", "aes-256-ecb"}, false}, +- {"ECBVarTxt128", []string{"kat", "aes-128-ecb"}, false}, +- {"ECBVarTxt192", []string{"kat", "aes-192-ecb"}, false}, +- {"ECBVarTxt256", []string{"kat", "aes-256-ecb"}, false}, +- // AES Monte-Carlo tests +- {"ECBMCT128", []string{"mct", "aes-128-ecb"}, false}, +- {"ECBMCT192", []string{"mct", "aes-192-ecb"}, false}, +- {"ECBMCT256", []string{"mct", "aes-256-ecb"}, false}, +- {"CBCMCT128", []string{"mct", "aes-128-cbc"}, false}, +- {"CBCMCT192", []string{"mct", "aes-192-cbc"}, false}, +- {"CBCMCT256", []string{"mct", "aes-256-cbc"}, false}, +- }, +-} +- +-var ecdsa2KeyPairTests = testSuite{ +- "ECDSA2", +- "ecdsa2_keypair", +- nil, +- []test{{"KeyPair", nil, true}}, +-} +- +-var ecdsa2PKVTests = testSuite{ +- "ECDSA2", +- "ecdsa2_pkv", +- nil, +- []test{{"PKV", nil, false}}, +-} +- +-var ecdsa2SigGenTests = testSuite{ +- "ECDSA2", +- "ecdsa2_siggen", +- nil, +- []test{ +- {"SigGen", []string{"SigGen"}, true}, +- {"SigGenComponent", []string{"SigGenComponent"}, true}, +- }, +-} +- +-var ecdsa2SigVerTests = testSuite{ +- "ECDSA2", +- "ecdsa2_sigver", +- nil, +- []test{{"SigVer", nil, false}}, +-} +- +-var rsa2KeyGenTests = testSuite{ +- "RSA2", +- "rsa2_keygen", +- nil, +- []test{ +- {"KeyGen_RandomProbablyPrime3_3", nil, true}, +- }, +-} +- +-var rsa2SigGenTests = testSuite{ +- "RSA2", +- "rsa2_siggen", +- nil, +- []test{ +- {"SigGen15_186-3", []string{"pkcs15"}, true}, +- {"SigGenPSS_186-3", []string{"pss"}, true}, +- }, +-} +- +-var rsa2SigVerTests = testSuite{ +- "RSA2", +- "rsa2_sigver", +- func(s *bufio.Scanner, state *nextLineState) (string, bool, bool) { +- for { +- if !s.Scan() { +- return "", false, false +- } +- +- line := s.Text() +- if strings.HasPrefix(line, "p = ") || strings.HasPrefix(line, "d = ") || strings.HasPrefix(line, "SaltVal = ") || strings.HasPrefix(line, "EM with ") { +- continue +- } +- if strings.HasPrefix(line, "q = ") { +- // Skip the "q = " line and an additional blank line. +- if !s.Scan() || +- len(strings.TrimSpace(s.Text())) > 0 { +- return "", false, false +- } +- continue +- } +- return line, false, true +- } +- }, +- []test{ +- {"SigVer15_186-3", []string{"pkcs15"}, false}, +- {"SigVerPSS_186-3", []string{"pss"}, false}, +- }, +-} +- +-var hmacTests = testSuite{ +- "HMAC", +- "hmac", +- nil, +- []test{{"HMAC", nil, false}}, +-} +- +-var shaTests = testSuite{ +- "SHA", +- "sha", +- nil, +- []test{ +- {"SHA1LongMsg", []string{"SHA1"}, false}, +- {"SHA1ShortMsg", []string{"SHA1"}, false}, +- {"SHA224LongMsg", []string{"SHA224"}, false}, +- {"SHA224ShortMsg", []string{"SHA224"}, false}, +- {"SHA256LongMsg", []string{"SHA256"}, false}, +- {"SHA256ShortMsg", []string{"SHA256"}, false}, +- {"SHA384LongMsg", []string{"SHA384"}, false}, +- {"SHA384ShortMsg", []string{"SHA384"}, false}, +- {"SHA512LongMsg", []string{"SHA512"}, false}, +- {"SHA512ShortMsg", []string{"SHA512"}, false}, +- }, +-} +- +-var shaMonteTests = testSuite{ +- "SHA", +- "sha_monte", +- nil, +- []test{ +- {"SHA1Monte", []string{"SHA1"}, false}, +- {"SHA224Monte", []string{"SHA224"}, false}, +- {"SHA256Monte", []string{"SHA256"}, false}, +- {"SHA384Monte", []string{"SHA384"}, false}, +- {"SHA512Monte", []string{"SHA512"}, false}, +- }, +-} +- +-var ctrDRBGTests = testSuite{ +- "DRBG800-90A", +- "ctr_drbg", +- nil, +- []test{{"CTR_DRBG", nil, false}}, +-} +- +-var tdesTests = testSuite{ +- "TDES", +- "tdes", +- nil, +- []test{ +- {"TCBCMMT2", []string{"kat", "des-ede-cbc"}, false}, +- {"TCBCMMT3", []string{"kat", "des-ede3-cbc"}, false}, +- {"TCBCMonte2", []string{"mct", "des-ede3-cbc"}, false}, +- {"TCBCMonte3", []string{"mct", "des-ede3-cbc"}, false}, +- {"TCBCinvperm", []string{"kat", "des-ede3-cbc"}, false}, +- {"TCBCpermop", []string{"kat", "des-ede3-cbc"}, false}, +- {"TCBCsubtab", []string{"kat", "des-ede3-cbc"}, false}, +- {"TCBCvarkey", []string{"kat", "des-ede3-cbc"}, false}, +- {"TCBCvartext", []string{"kat", "des-ede3-cbc"}, false}, +- {"TECBMMT2", []string{"kat", "des-ede"}, false}, +- {"TECBMMT3", []string{"kat", "des-ede3"}, false}, +- {"TECBMonte2", []string{"mct", "des-ede3"}, false}, +- {"TECBMonte3", []string{"mct", "des-ede3"}, false}, +- {"TECBinvperm", []string{"kat", "des-ede3"}, false}, +- {"TECBpermop", []string{"kat", "des-ede3"}, false}, +- {"TECBsubtab", []string{"kat", "des-ede3"}, false}, +- {"TECBvarkey", []string{"kat", "des-ede3"}, false}, +- {"TECBvartext", []string{"kat", "des-ede3"}, false}, +- }, +-} +- +-var keyWrapTests = testSuite{ +- "KeyWrap38F", +- "keywrap", +- nil, +- []test{ +- {"KW_AD_128", []string{"dec", "128"}, false}, +- {"KW_AD_192", []string{"dec", "192"}, false}, +- {"KW_AD_256", []string{"dec", "256"}, false}, +- {"KW_AE_128", []string{"enc", "128"}, false}, +- {"KW_AE_192", []string{"enc", "192"}, false}, +- {"KW_AE_256", []string{"enc", "256"}, false}, +- {"KWP_AD_128", []string{"dec-pad", "128"}, false}, +- {"KWP_AD_192", []string{"dec-pad", "192"}, false}, +- {"KWP_AD_256", []string{"dec-pad", "256"}, false}, +- {"KWP_AE_128", []string{"enc-pad", "128"}, false}, +- {"KWP_AE_192", []string{"enc-pad", "192"}, false}, +- {"KWP_AE_256", []string{"enc-pad", "256"}, false}, +- }, +-} +- +-var kasTests = testSuite{ +- "KAS", +- "kas", +- func(s *bufio.Scanner, state *nextLineState) (line string, isWildcard, ok bool) { +- for { +- // If the response file will include the IUT hash next, +- // return a wildcard signal because this cannot be +- // matched against the FAX file. +- if state.nextIsIUTHash { +- state.nextIsIUTHash = false +- return "", true, true +- } +- +- if !s.Scan() { +- return "", false, false +- } +- +- line := s.Text() +- if strings.HasPrefix(line, "deCAVS = ") || strings.HasPrefix(line, "Z = ") { +- continue +- } +- if strings.HasPrefix(line, "CAVSHashZZ = ") { +- state.nextIsIUTHash = true +- } +- return line, false, true +- } +- }, +- []test{ +- {"KASFunctionTest_ECCEphemeralUnified_NOKC_ZZOnly_init", []string{"function"}, true}, +- {"KASFunctionTest_ECCEphemeralUnified_NOKC_ZZOnly_resp", []string{"function"}, true}, +- {"KASValidityTest_ECCEphemeralUnified_NOKC_ZZOnly_init", []string{"validity"}, false}, +- {"KASValidityTest_ECCEphemeralUnified_NOKC_ZZOnly_resp", []string{"validity"}, false}, +- }, +-} +- +-var tlsKDFTests = testSuite{ +- "KDF135", +- "tlskdf", +- nil, +- []test{ +- {"tls", nil, false}, +- }, +-} +- +-var testSuites = []*testSuite{ +- &aesGCMTests, +- &aesTests, +- &ctrDRBGTests, +- &ecdsa2KeyPairTests, +- &ecdsa2PKVTests, +- &ecdsa2SigGenTests, +- &ecdsa2SigVerTests, +- &hmacTests, +- &keyWrapTests, +- &rsa2KeyGenTests, +- &rsa2SigGenTests, +- &rsa2SigVerTests, +- &shaTests, +- &shaMonteTests, +- &tdesTests, +- &kasTests, +- &tlsKDFTests, +-} +- +-// testInstance represents a specific test in a testSuite. +-type testInstance struct { +- suite *testSuite +- testIndex int +-} +- +-func worker(wg *sync.WaitGroup, work <-chan testInstance) { +- defer wg.Done() +- +- for ti := range work { +- test := ti.suite.tests[ti.testIndex] +- +- if err := doTest(ti.suite, test); err != nil { +- fmt.Fprintf(os.Stderr, "%s\n", err) +- os.Exit(2) +- } +- +- if !*noFAX && !test.noFAX { +- if err := compareFAX(ti.suite, test); err != nil { +- fmt.Fprintf(os.Stderr, "%s\n", err) +- os.Exit(3) +- } +- } +- } +-} +- +-func checkAndroidPrereqs() error { +- // The cavp binary, and a matching libcrypto.so, are required to be placed +- // in /data/local/tmp before running this script. +- if err := exec.Command("adb", "shell", "ls", androidCAVPPath).Run(); err != nil { +- return errors.New("failed to list cavp binary; ensure that adb works and cavp binary is in place: " + err.Error()) +- } +- if err := exec.Command("adb", "shell", "ls", androidLibCryptoPath).Run(); err != nil { +- return errors.New("failed to list libcrypto.so; ensure that library is in place: " + err.Error()) +- } +- return nil +-} +- +-func main() { +- flag.Parse() +- +- if *android { +- if err := checkAndroidPrereqs(); err != nil { +- fmt.Fprintf(os.Stderr, "%s\n", err) +- os.Exit(1) +- } +- } else if len(*oraclePath) == 0 { +- fmt.Fprintf(os.Stderr, "Must give -oracle-bin\n") +- os.Exit(1) +- } +- +- work := make(chan testInstance) +- var wg sync.WaitGroup +- +- numWorkers := runtime.NumCPU() +- if *android { +- numWorkers = 1 +- } +- +- for i := 0; i < numWorkers; i++ { +- wg.Add(1) +- go worker(&wg, work) +- } +- +- for _, suite := range testSuites { +- for i := range suite.tests { +- work <- testInstance{suite, i} +- } +- } +- +- close(work) +- wg.Wait() +-} +- +-func doTest(suite *testSuite, test test) error { +- bin := *oraclePath +- var args []string +- +- if *android { +- bin = "adb" +- args = []string{"shell", "LD_LIBRARY_PATH=" + androidTmpPath, androidCAVPPath} +- } +- +- args = append(args, suite.suite) +- args = append(args, test.args...) +- reqPath := filepath.Join(suite.getDirectory(), "req", test.inFile+".req") +- var reqPathOnDevice string +- +- if *android { +- reqPathOnDevice = path.Join(androidTmpPath, test.inFile+".req") +- if err := exec.Command("adb", "push", reqPath, reqPathOnDevice).Run(); err != nil { +- return errors.New("failed to push request file: " + err.Error()) +- } +- args = append(args, reqPathOnDevice) +- } else { +- args = append(args, reqPath) +- } +- +- respDir := filepath.Join(suite.getDirectory(), "resp") +- if err := os.Mkdir(respDir, 0755); err != nil && !os.IsExist(err) { +- return fmt.Errorf("cannot create resp directory: %s", err) +- } +- outPath := filepath.Join(respDir, test.inFile+".rsp") +- outFile, err := os.OpenFile(outPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) +- if err != nil { +- return fmt.Errorf("cannot open output file for %q %q: %s", suite.getDirectory(), test.inFile, err) +- } +- defer outFile.Close() +- +- cmd := exec.Command(bin, args...) +- cmd.Stdout = outFile +- cmd.Stderr = os.Stderr +- +- cmdLine := strings.Join(append([]string{bin}, args...), " ") +- startTime := time.Now() +- if err := cmd.Run(); err != nil { +- return fmt.Errorf("cannot run command for %q %q (%s): %s", suite.getDirectory(), test.inFile, cmdLine, err) +- } +- +- fmt.Printf("%s (%ds)\n", cmdLine, int(time.Since(startTime).Seconds())) +- +- if *android { +- exec.Command("adb", "shell", "rm", reqPathOnDevice).Run() +- } +- +- return nil +-} +- +-func canonicalizeLine(in string) string { +- if strings.HasPrefix(in, "Result = P (") { +- return "Result = P" +- } +- if strings.HasPrefix(in, "Result = F (") { +- return "Result = F" +- } +- return in +-} +- +-func compareFAX(suite *testSuite, test test) error { +- nextLineFunc := suite.nextLineFunc +- if nextLineFunc == nil { +- nextLineFunc = func(s *bufio.Scanner, state *nextLineState) (string, bool, bool) { +- if !s.Scan() { +- return "", false, false +- } +- return s.Text(), false, true +- } +- } +- +- respPath := filepath.Join(suite.getDirectory(), "resp", test.inFile+".rsp") +- respFile, err := os.Open(respPath) +- if err != nil { +- return fmt.Errorf("cannot read output of %q %q: %s", suite.getDirectory(), test.inFile, err) +- } +- defer respFile.Close() +- +- faxPath := filepath.Join(suite.getDirectory(), "fax", test.inFile+".fax") +- faxFile, err := os.Open(faxPath) +- if err != nil { +- return fmt.Errorf("cannot open fax file for %q %q: %s", suite.getDirectory(), test.inFile, err) +- } +- defer faxFile.Close() +- +- respScanner := bufio.NewScanner(respFile) +- faxScanner := bufio.NewScanner(faxFile) +- var nextLineState nextLineState +- +- lineNo := 0 +- inHeader := true +- +- for respScanner.Scan() { +- lineNo++ +- respLine := respScanner.Text() +- var faxLine string +- var isWildcard, ok bool +- +- if inHeader && (len(respLine) == 0 || respLine[0] == '#') { +- continue +- } +- +- for { +- haveFaxLine := false +- +- if inHeader { +- for { +- if faxLine, isWildcard, ok = nextLineFunc(faxScanner, &nextLineState); !ok { +- break +- } +- if len(faxLine) != 0 && faxLine[0] != '#' { +- haveFaxLine = true +- break +- } +- } +- +- inHeader = false +- } else { +- faxLine, isWildcard, haveFaxLine = nextLineFunc(faxScanner, &nextLineState) +- } +- +- if !haveFaxLine { +- // Ignore blank lines at the end of the generated file. +- if len(respLine) == 0 { +- break +- } +- return fmt.Errorf("resp file is longer than fax for %q %q", suite.getDirectory(), test.inFile) +- } +- +- if strings.HasPrefix(faxLine, " (Reason: ") { +- continue +- } +- +- break +- } +- +- if isWildcard || canonicalizeLine(faxLine) == canonicalizeLine(respLine) { +- continue +- } +- +- return fmt.Errorf("resp and fax differ at line %d for %q %q: %q vs %q", lineNo, suite.getDirectory(), test.inFile, respLine, faxLine) +- } +- +- if _, _, ok := nextLineFunc(faxScanner, &nextLineState); ok { +- return fmt.Errorf("fax file is longer than resp for %q %q", suite.getDirectory(), test.inFile) +- } +- +- return nil +-} +diff --git a/src/util/fipstools/cavp/test_fips.c b/src/util/fipstools/cavp/test_fips.c +deleted file mode 100644 +index dd82d65..0000000 +--- a/src/util/fipstools/cavp/test_fips.c ++++ /dev/null +@@ -1,309 +0,0 @@ +-/* Copyright (c) 2017, Google Inc. +- * +- * Permission to use, copy, modify, and/or distribute this software for any +- * purpose with or without fee is hereby granted, provided that the above +- * copyright notice and this permission notice appear in all copies. +- * +- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION +- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +- +-/* test_fips exercises various cryptographic primitives for demonstration +- * purposes in the validation process only. */ +- +-#include +- +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +-#include +- +-#include "../crypto/fipsmodule/rand/internal.h" +-#include "../crypto/fipsmodule/tls/internal.h" +-#include "../crypto/internal.h" +- +- +-static void hexdump(const void *a, size_t len) { +- const unsigned char *in = (const unsigned char *)a; +- for (size_t i = 0; i < len; i++) { +- printf("%02x", in[i]); +- } +- +- printf("\n"); +-} +- +-int main(int argc, char **argv) { +- CRYPTO_library_init(); +- +- static const uint8_t kAESKey[16] = "BoringCrypto Key"; +- static const uint8_t kPlaintext[64] = +- "BoringCryptoModule FIPS KAT Encryption and Decryption Plaintext!"; +- static const DES_cblock kDESKey1 = {"BCMDESK1"}; +- static const DES_cblock kDESKey2 = {"BCMDESK2"}; +- static const DES_cblock kDESKey3 = {"BCMDESK3"}; +- static const DES_cblock kDESIV = {"BCMDESIV"}; +- static const uint8_t kPlaintextSHA256[32] = { +- 0x37, 0xbd, 0x70, 0x53, 0x72, 0xfc, 0xd4, 0x03, 0x79, 0x70, 0xfb, +- 0x06, 0x95, 0xb1, 0x2a, 0x82, 0x48, 0xe1, 0x3e, 0xf2, 0x33, 0xfb, +- 0xef, 0x29, 0x81, 0x22, 0x45, 0x40, 0x43, 0x70, 0xce, 0x0f}; +- const uint8_t kDRBGEntropy[48] = +- "DBRG Initial Entropy "; +- const uint8_t kDRBGPersonalization[18] = "BCMPersonalization"; +- const uint8_t kDRBGAD[16] = "BCM DRBG AD "; +- const uint8_t kDRBGEntropy2[48] = +- "DBRG Reseed Entropy "; +- +- AES_KEY aes_key; +- uint8_t aes_iv[16]; +- uint8_t output[256]; +- +- /* AES-CBC Encryption */ +- memset(aes_iv, 0, sizeof(aes_iv)); +- if (AES_set_encrypt_key(kAESKey, 8 * sizeof(kAESKey), &aes_key) != 0) { +- printf("AES_set_encrypt_key failed\n"); +- goto err; +- } +- +- printf("About to AES-CBC encrypt "); +- hexdump(kPlaintext, sizeof(kPlaintext)); +- AES_cbc_encrypt(kPlaintext, output, sizeof(kPlaintext), &aes_key, aes_iv, +- AES_ENCRYPT); +- printf(" got "); +- hexdump(output, sizeof(kPlaintext)); +- +- /* AES-CBC Decryption */ +- memset(aes_iv, 0, sizeof(aes_iv)); +- if (AES_set_decrypt_key(kAESKey, 8 * sizeof(kAESKey), &aes_key) != 0) { +- printf("AES decrypt failed\n"); +- goto err; +- } +- printf("About to AES-CBC decrypt "); +- hexdump(output, sizeof(kPlaintext)); +- AES_cbc_encrypt(output, output, sizeof(kPlaintext), &aes_key, aes_iv, +- AES_DECRYPT); +- printf(" got "); +- hexdump(output, sizeof(kPlaintext)); +- +- size_t out_len; +- uint8_t nonce[EVP_AEAD_MAX_NONCE_LENGTH]; +- OPENSSL_memset(nonce, 0, sizeof(nonce)); +- EVP_AEAD_CTX aead_ctx; +- if (!EVP_AEAD_CTX_init(&aead_ctx, EVP_aead_aes_128_gcm(), kAESKey, +- sizeof(kAESKey), 0, NULL)) { +- printf("EVP_AEAD_CTX_init failed\n"); +- goto err; +- } +- +- /* AES-GCM Encryption */ +- printf("About to AES-GCM seal "); +- hexdump(output, sizeof(kPlaintext)); +- if (!EVP_AEAD_CTX_seal(&aead_ctx, output, &out_len, sizeof(output), nonce, +- EVP_AEAD_nonce_length(EVP_aead_aes_128_gcm()), +- kPlaintext, sizeof(kPlaintext), NULL, 0)) { +- printf("AES-GCM encrypt failed\n"); +- goto err; +- } +- printf(" got "); +- hexdump(output, out_len); +- +- /* AES-GCM Decryption */ +- printf("About to AES-GCM open "); +- hexdump(output, out_len); +- if (!EVP_AEAD_CTX_open(&aead_ctx, output, &out_len, sizeof(output), nonce, +- EVP_AEAD_nonce_length(EVP_aead_aes_128_gcm()), +- output, out_len, NULL, 0)) { +- printf("AES-GCM decrypt failed\n"); +- goto err; +- } +- printf(" got "); +- hexdump(output, out_len); +- +- EVP_AEAD_CTX_cleanup(&aead_ctx); +- +- DES_key_schedule des1, des2, des3; +- DES_cblock des_iv; +- DES_set_key(&kDESKey1, &des1); +- DES_set_key(&kDESKey2, &des2); +- DES_set_key(&kDESKey3, &des3); +- +- /* 3DES Encryption */ +- memcpy(&des_iv, &kDESIV, sizeof(des_iv)); +- printf("About to 3DES-CBC encrypt "); +- hexdump(kPlaintext, sizeof(kPlaintext)); +- DES_ede3_cbc_encrypt(kPlaintext, output, sizeof(kPlaintext), &des1, &des2, +- &des3, &des_iv, DES_ENCRYPT); +- printf(" got "); +- hexdump(output, sizeof(kPlaintext)); +- +- /* 3DES Decryption */ +- memcpy(&des_iv, &kDESIV, sizeof(des_iv)); +- printf("About to 3DES-CBC decrypt "); +- hexdump(kPlaintext, sizeof(kPlaintext)); +- DES_ede3_cbc_encrypt(output, output, sizeof(kPlaintext), &des1, +- &des2, &des3, &des_iv, DES_DECRYPT); +- printf(" got "); +- hexdump(output, sizeof(kPlaintext)); +- +- /* SHA-1 */ +- printf("About to SHA-1 hash "); +- hexdump(kPlaintext, sizeof(kPlaintext)); +- SHA1(kPlaintext, sizeof(kPlaintext), output); +- printf(" got "); +- hexdump(output, SHA_DIGEST_LENGTH); +- +- /* SHA-256 */ +- printf("About to SHA-256 hash "); +- hexdump(kPlaintext, sizeof(kPlaintext)); +- SHA256(kPlaintext, sizeof(kPlaintext), output); +- printf(" got "); +- hexdump(output, SHA256_DIGEST_LENGTH); +- +- /* SHA-512 */ +- printf("About to SHA-512 hash "); +- hexdump(kPlaintext, sizeof(kPlaintext)); +- SHA512(kPlaintext, sizeof(kPlaintext), output); +- printf(" got "); +- hexdump(output, SHA512_DIGEST_LENGTH); +- +- RSA *rsa_key = RSA_new(); +- printf("About to generate RSA key\n"); +- if (!RSA_generate_key_fips(rsa_key, 2048, NULL)) { +- printf("RSA_generate_key_fips failed\n"); +- goto err; +- } +- +- /* RSA Sign */ +- unsigned sig_len; +- printf("About to RSA sign "); +- hexdump(kPlaintextSHA256, sizeof(kPlaintextSHA256)); +- if (!RSA_sign(NID_sha256, kPlaintextSHA256, sizeof(kPlaintextSHA256), output, +- &sig_len, rsa_key)) { +- printf("RSA Sign failed\n"); +- goto err; +- } +- printf(" got "); +- hexdump(output, sig_len); +- +- /* RSA Verify */ +- printf("About to RSA verify "); +- hexdump(output, sig_len); +- if (!RSA_verify(NID_sha256, kPlaintextSHA256, sizeof(kPlaintextSHA256), +- output, sig_len, rsa_key)) { +- printf("RSA Verify failed.\n"); +- goto err; +- } +- +- RSA_free(rsa_key); +- +- EC_KEY *ec_key = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1); +- if (ec_key == NULL) { +- printf("invalid ECDSA key\n"); +- goto err; +- } +- +- printf("About to generate P-256 key\n"); +- if (!EC_KEY_generate_key_fips(ec_key)) { +- printf("EC_KEY_generate_key_fips failed\n"); +- goto err; +- } +- +- /* Primitive Z Computation */ +- const EC_GROUP *const ec_group = EC_KEY_get0_group(ec_key); +- EC_POINT *z_point = EC_POINT_new(ec_group); +- uint8_t z_result[65]; +- printf("About to compute key-agreement Z with P-256:\n"); +- if (!EC_POINT_mul(ec_group, z_point, NULL, EC_KEY_get0_public_key(ec_key), +- EC_KEY_get0_private_key(ec_key), NULL) || +- EC_POINT_point2oct(ec_group, z_point, POINT_CONVERSION_UNCOMPRESSED, +- z_result, sizeof(z_result), +- NULL) != sizeof(z_result)) { +- fprintf(stderr, "EC_POINT_mul failed.\n"); +- goto err; +- } +- EC_POINT_free(z_point); +- +- printf(" got "); +- hexdump(z_result, sizeof(z_result)); +- +- /* ECDSA Sign/Verify PWCT */ +- printf("About to ECDSA sign "); +- hexdump(kPlaintextSHA256, sizeof(kPlaintextSHA256)); +- ECDSA_SIG *sig = +- ECDSA_do_sign(kPlaintextSHA256, sizeof(kPlaintextSHA256), ec_key); +- if (sig == NULL || +- !ECDSA_do_verify(kPlaintextSHA256, sizeof(kPlaintextSHA256), sig, +- ec_key)) { +- printf("ECDSA Sign/Verify PWCT failed.\n"); +- goto err; +- } +- +- ECDSA_SIG_free(sig); +- EC_KEY_free(ec_key); +- +- /* DBRG */ +- CTR_DRBG_STATE drbg; +- printf("About to seed CTR-DRBG with "); +- hexdump(kDRBGEntropy, sizeof(kDRBGEntropy)); +- if (!CTR_DRBG_init(&drbg, kDRBGEntropy, kDRBGPersonalization, +- sizeof(kDRBGPersonalization)) || +- !CTR_DRBG_generate(&drbg, output, sizeof(output), kDRBGAD, +- sizeof(kDRBGAD)) || +- !CTR_DRBG_reseed(&drbg, kDRBGEntropy2, kDRBGAD, sizeof(kDRBGAD)) || +- !CTR_DRBG_generate(&drbg, output, sizeof(output), kDRBGAD, +- sizeof(kDRBGAD))) { +- printf("DRBG failed\n"); +- goto err; +- } +- printf(" generated "); +- hexdump(output, sizeof(output)); +- CTR_DRBG_clear(&drbg); +- +- /* TLS KDF */ +- printf("About to run TLS KDF\n"); +- uint8_t tls_output[32]; +- if (!CRYPTO_tls1_prf(EVP_sha256(), tls_output, sizeof(tls_output), kAESKey, +- sizeof(kAESKey), "foo", 3, kPlaintextSHA256, +- sizeof(kPlaintextSHA256), kPlaintextSHA256, +- sizeof(kPlaintextSHA256))) { +- fprintf(stderr, "TLS KDF failed.\n"); +- goto err; +- } +- printf(" got "); +- hexdump(tls_output, sizeof(tls_output)); +- +- /* FFDH */ +- printf("About to compute FFDH key-agreement:\n"); +- DH *dh = DH_get_rfc7919_2048(); +- uint8_t dh_result[2048/8]; +- if (!dh || +- !DH_generate_key(dh) || +- sizeof(dh_result) != DH_size(dh) || +- DH_compute_key_padded(dh_result, DH_get0_pub_key(dh), dh) != +- sizeof(dh_result)) { +- fprintf(stderr, "FFDH failed.\n"); +- goto err; +- } +- DH_free(dh); +- +- printf(" got "); +- hexdump(dh_result, sizeof(dh_result)); +- +- printf("PASS\n"); +- return 0; +- +-err: +- printf("FAIL\n"); +- abort(); +-} +diff --git a/src/util/fipstools/delocate/delocate.peg b/src/util/fipstools/delocate/delocate.peg +index f79ed76..c253a48 100644 +--- a/src/util/fipstools/delocate/delocate.peg ++++ b/src/util/fipstools/delocate/delocate.peg +@@ -44,7 +44,7 @@ SymbolShift <- ('<<' / '>>') WS? [0-9]+ + SymbolArg <- (OpenParen WS?)? ( + Offset / + SymbolType / +- (Offset / LocalSymbol / SymbolName / Dot) WS? Operator WS? (Offset / LocalSymbol / SymbolName) / ++ (Offset / LocalSymbol / SymbolName / Dot) (WS? Operator WS? (Offset / LocalSymbol / SymbolName))* / + LocalSymbol TCMarker? / + SymbolName Offset / + SymbolName TCMarker?) +diff --git a/src/util/fipstools/delocate/delocate.peg.go b/src/util/fipstools/delocate/delocate.peg.go +index 56c4a20..ea7c195 100644 +--- a/src/util/fipstools/delocate/delocate.peg.go ++++ b/src/util/fipstools/delocate/delocate.peg.go +@@ -2540,7 +2540,7 @@ func (p *Asm) Init() { + position, tokenIndex = position291, tokenIndex291 + return false + }, +- /* 16 SymbolArg <- <((OpenParen WS?)? (Offset / SymbolType / ((Offset / LocalSymbol / SymbolName / Dot) WS? Operator WS? (Offset / LocalSymbol / SymbolName)) / (LocalSymbol TCMarker?) / (SymbolName Offset) / (SymbolName TCMarker?)) (WS? CloseParen)? (WS? SymbolShift)?)> */ ++ /* 16 SymbolArg <- <((OpenParen WS?)? (Offset / SymbolType / ((Offset / LocalSymbol / SymbolName / Dot) (WS? Operator WS? (Offset / LocalSymbol / SymbolName))*) / (LocalSymbol TCMarker?) / (SymbolName Offset) / (SymbolName TCMarker?)) (WS? CloseParen)? (WS? SymbolShift)?)> */ + func() bool { + position299, tokenIndex299 := position, tokenIndex + { +@@ -2604,131 +2604,138 @@ func (p *Asm) Init() { + } + } + l309: ++ l313: + { +- position313, tokenIndex313 := position, tokenIndex +- if !_rules[ruleWS]() { +- goto l313 ++ position314, tokenIndex314 := position, tokenIndex ++ { ++ position315, tokenIndex315 := position, tokenIndex ++ if !_rules[ruleWS]() { ++ goto l315 ++ } ++ goto l316 ++ l315: ++ position, tokenIndex = position315, tokenIndex315 + } +- goto l314 +- l313: +- position, tokenIndex = position313, tokenIndex313 +- } +- l314: +- if !_rules[ruleOperator]() { +- goto l308 +- } +- { +- position315, tokenIndex315 := position, tokenIndex +- if !_rules[ruleWS]() { +- goto l315 ++ l316: ++ if !_rules[ruleOperator]() { ++ goto l314 + } +- goto l316 +- l315: +- position, tokenIndex = position315, tokenIndex315 +- } +- l316: +- { +- position317, tokenIndex317 := position, tokenIndex +- if !_rules[ruleOffset]() { ++ { ++ position317, tokenIndex317 := position, tokenIndex ++ if !_rules[ruleWS]() { ++ goto l317 ++ } + goto l318 ++ l317: ++ position, tokenIndex = position317, tokenIndex317 + } +- goto l317 + l318: +- position, tokenIndex = position317, tokenIndex317 +- if !_rules[ruleLocalSymbol]() { ++ { ++ position319, tokenIndex319 := position, tokenIndex ++ if !_rules[ruleOffset]() { ++ goto l320 ++ } ++ goto l319 ++ l320: ++ position, tokenIndex = position319, tokenIndex319 ++ if !_rules[ruleLocalSymbol]() { ++ goto l321 ++ } + goto l319 ++ l321: ++ position, tokenIndex = position319, tokenIndex319 ++ if !_rules[ruleSymbolName]() { ++ goto l314 ++ } + } +- goto l317 + l319: +- position, tokenIndex = position317, tokenIndex317 +- if !_rules[ruleSymbolName]() { +- goto l308 +- } ++ goto l313 ++ l314: ++ position, tokenIndex = position314, tokenIndex314 + } +- l317: + goto l305 + l308: + position, tokenIndex = position305, tokenIndex305 + if !_rules[ruleLocalSymbol]() { +- goto l320 ++ goto l322 + } + { +- position321, tokenIndex321 := position, tokenIndex ++ position323, tokenIndex323 := position, tokenIndex + if !_rules[ruleTCMarker]() { +- goto l321 ++ goto l323 + } +- goto l322 +- l321: +- position, tokenIndex = position321, tokenIndex321 ++ goto l324 ++ l323: ++ position, tokenIndex = position323, tokenIndex323 + } +- l322: ++ l324: + goto l305 +- l320: ++ l322: + position, tokenIndex = position305, tokenIndex305 + if !_rules[ruleSymbolName]() { +- goto l323 ++ goto l325 + } + if !_rules[ruleOffset]() { +- goto l323 ++ goto l325 + } + goto l305 +- l323: ++ l325: + position, tokenIndex = position305, tokenIndex305 + if !_rules[ruleSymbolName]() { + goto l299 + } + { +- position324, tokenIndex324 := position, tokenIndex ++ position326, tokenIndex326 := position, tokenIndex + if !_rules[ruleTCMarker]() { +- goto l324 ++ goto l326 + } +- goto l325 +- l324: +- position, tokenIndex = position324, tokenIndex324 ++ goto l327 ++ l326: ++ position, tokenIndex = position326, tokenIndex326 + } +- l325: ++ l327: + } + l305: + { +- position326, tokenIndex326 := position, tokenIndex ++ position328, tokenIndex328 := position, tokenIndex + { +- position328, tokenIndex328 := position, tokenIndex ++ position330, tokenIndex330 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l328 ++ goto l330 + } +- goto l329 +- l328: +- position, tokenIndex = position328, tokenIndex328 ++ goto l331 ++ l330: ++ position, tokenIndex = position330, tokenIndex330 + } +- l329: ++ l331: + if !_rules[ruleCloseParen]() { +- goto l326 ++ goto l328 + } +- goto l327 +- l326: +- position, tokenIndex = position326, tokenIndex326 ++ goto l329 ++ l328: ++ position, tokenIndex = position328, tokenIndex328 + } +- l327: ++ l329: + { +- position330, tokenIndex330 := position, tokenIndex ++ position332, tokenIndex332 := position, tokenIndex + { +- position332, tokenIndex332 := position, tokenIndex ++ position334, tokenIndex334 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l332 ++ goto l334 + } +- goto l333 +- l332: +- position, tokenIndex = position332, tokenIndex332 ++ goto l335 ++ l334: ++ position, tokenIndex = position334, tokenIndex334 + } +- l333: ++ l335: + if !_rules[ruleSymbolShift]() { +- goto l330 ++ goto l332 + } +- goto l331 +- l330: +- position, tokenIndex = position330, tokenIndex330 ++ goto l333 ++ l332: ++ position, tokenIndex = position332, tokenIndex332 + } +- l331: ++ l333: + add(ruleSymbolArg, position300) + } + return true +@@ -2738,3250 +2745,3240 @@ func (p *Asm) Init() { + }, + /* 17 OpenParen <- <'('> */ + func() bool { +- position334, tokenIndex334 := position, tokenIndex ++ position336, tokenIndex336 := position, tokenIndex + { +- position335 := position ++ position337 := position + if buffer[position] != rune('(') { +- goto l334 ++ goto l336 + } + position++ +- add(ruleOpenParen, position335) ++ add(ruleOpenParen, position337) + } + return true +- l334: +- position, tokenIndex = position334, tokenIndex334 ++ l336: ++ position, tokenIndex = position336, tokenIndex336 + return false + }, + /* 18 CloseParen <- <')'> */ + func() bool { +- position336, tokenIndex336 := position, tokenIndex ++ position338, tokenIndex338 := position, tokenIndex + { +- position337 := position ++ position339 := position + if buffer[position] != rune(')') { +- goto l336 ++ goto l338 + } + position++ +- add(ruleCloseParen, position337) ++ add(ruleCloseParen, position339) + } + return true +- l336: +- position, tokenIndex = position336, tokenIndex336 ++ l338: ++ position, tokenIndex = position338, tokenIndex338 + return false + }, + /* 19 SymbolType <- <(('@' / '%') (('f' 'u' 'n' 'c' 't' 'i' 'o' 'n') / ('o' 'b' 'j' 'e' 'c' 't')))> */ + func() bool { +- position338, tokenIndex338 := position, tokenIndex ++ position340, tokenIndex340 := position, tokenIndex + { +- position339 := position ++ position341 := position + { +- position340, tokenIndex340 := position, tokenIndex ++ position342, tokenIndex342 := position, tokenIndex + if buffer[position] != rune('@') { +- goto l341 ++ goto l343 + } + position++ +- goto l340 +- l341: +- position, tokenIndex = position340, tokenIndex340 ++ goto l342 ++ l343: ++ position, tokenIndex = position342, tokenIndex342 + if buffer[position] != rune('%') { +- goto l338 ++ goto l340 + } + position++ + } +- l340: ++ l342: + { +- position342, tokenIndex342 := position, tokenIndex ++ position344, tokenIndex344 := position, tokenIndex + if buffer[position] != rune('f') { +- goto l343 ++ goto l345 + } + position++ + if buffer[position] != rune('u') { +- goto l343 ++ goto l345 + } + position++ + if buffer[position] != rune('n') { +- goto l343 ++ goto l345 + } + position++ + if buffer[position] != rune('c') { +- goto l343 ++ goto l345 + } + position++ + if buffer[position] != rune('t') { +- goto l343 ++ goto l345 + } + position++ + if buffer[position] != rune('i') { +- goto l343 ++ goto l345 + } + position++ + if buffer[position] != rune('o') { +- goto l343 ++ goto l345 + } + position++ + if buffer[position] != rune('n') { +- goto l343 ++ goto l345 + } + position++ +- goto l342 +- l343: +- position, tokenIndex = position342, tokenIndex342 ++ goto l344 ++ l345: ++ position, tokenIndex = position344, tokenIndex344 + if buffer[position] != rune('o') { +- goto l338 ++ goto l340 + } + position++ + if buffer[position] != rune('b') { +- goto l338 ++ goto l340 + } + position++ + if buffer[position] != rune('j') { +- goto l338 ++ goto l340 + } + position++ + if buffer[position] != rune('e') { +- goto l338 ++ goto l340 + } + position++ + if buffer[position] != rune('c') { +- goto l338 ++ goto l340 + } + position++ + if buffer[position] != rune('t') { +- goto l338 ++ goto l340 + } + position++ + } +- l342: +- add(ruleSymbolType, position339) ++ l344: ++ add(ruleSymbolType, position341) + } + return true +- l338: +- position, tokenIndex = position338, tokenIndex338 ++ l340: ++ position, tokenIndex = position340, tokenIndex340 + return false + }, + /* 20 Dot <- <'.'> */ + func() bool { +- position344, tokenIndex344 := position, tokenIndex ++ position346, tokenIndex346 := position, tokenIndex + { +- position345 := position ++ position347 := position + if buffer[position] != rune('.') { +- goto l344 ++ goto l346 + } + position++ +- add(ruleDot, position345) ++ add(ruleDot, position347) + } + return true +- l344: +- position, tokenIndex = position344, tokenIndex344 ++ l346: ++ position, tokenIndex = position346, tokenIndex346 + return false + }, + /* 21 TCMarker <- <('[' 'T' 'C' ']')> */ + func() bool { +- position346, tokenIndex346 := position, tokenIndex ++ position348, tokenIndex348 := position, tokenIndex + { +- position347 := position ++ position349 := position + if buffer[position] != rune('[') { +- goto l346 ++ goto l348 + } + position++ + if buffer[position] != rune('T') { +- goto l346 ++ goto l348 + } + position++ + if buffer[position] != rune('C') { +- goto l346 ++ goto l348 + } + position++ + if buffer[position] != rune(']') { +- goto l346 ++ goto l348 + } + position++ +- add(ruleTCMarker, position347) ++ add(ruleTCMarker, position349) + } + return true +- l346: +- position, tokenIndex = position346, tokenIndex346 ++ l348: ++ position, tokenIndex = position348, tokenIndex348 + return false + }, + /* 22 EscapedChar <- <('\\' .)> */ + func() bool { +- position348, tokenIndex348 := position, tokenIndex ++ position350, tokenIndex350 := position, tokenIndex + { +- position349 := position ++ position351 := position + if buffer[position] != rune('\\') { +- goto l348 ++ goto l350 + } + position++ + if !matchDot() { +- goto l348 ++ goto l350 + } +- add(ruleEscapedChar, position349) ++ add(ruleEscapedChar, position351) + } + return true +- l348: +- position, tokenIndex = position348, tokenIndex348 ++ l350: ++ position, tokenIndex = position350, tokenIndex350 + return false + }, + /* 23 WS <- <(' ' / '\t')+> */ + func() bool { +- position350, tokenIndex350 := position, tokenIndex ++ position352, tokenIndex352 := position, tokenIndex + { +- position351 := position ++ position353 := position + { +- position354, tokenIndex354 := position, tokenIndex ++ position356, tokenIndex356 := position, tokenIndex + if buffer[position] != rune(' ') { +- goto l355 ++ goto l357 + } + position++ +- goto l354 +- l355: +- position, tokenIndex = position354, tokenIndex354 ++ goto l356 ++ l357: ++ position, tokenIndex = position356, tokenIndex356 + if buffer[position] != rune('\t') { +- goto l350 ++ goto l352 + } + position++ + } ++ l356: + l354: +- l352: + { +- position353, tokenIndex353 := position, tokenIndex ++ position355, tokenIndex355 := position, tokenIndex + { +- position356, tokenIndex356 := position, tokenIndex ++ position358, tokenIndex358 := position, tokenIndex + if buffer[position] != rune(' ') { +- goto l357 ++ goto l359 + } + position++ +- goto l356 +- l357: +- position, tokenIndex = position356, tokenIndex356 ++ goto l358 ++ l359: ++ position, tokenIndex = position358, tokenIndex358 + if buffer[position] != rune('\t') { +- goto l353 ++ goto l355 + } + position++ + } +- l356: +- goto l352 +- l353: +- position, tokenIndex = position353, tokenIndex353 ++ l358: ++ goto l354 ++ l355: ++ position, tokenIndex = position355, tokenIndex355 + } +- add(ruleWS, position351) ++ add(ruleWS, position353) + } + return true +- l350: +- position, tokenIndex = position350, tokenIndex350 ++ l352: ++ position, tokenIndex = position352, tokenIndex352 + return false + }, + /* 24 Comment <- <((('/' '/') / '#') (!'\n' .)*)> */ + func() bool { +- position358, tokenIndex358 := position, tokenIndex ++ position360, tokenIndex360 := position, tokenIndex + { +- position359 := position ++ position361 := position + { +- position360, tokenIndex360 := position, tokenIndex ++ position362, tokenIndex362 := position, tokenIndex + if buffer[position] != rune('/') { +- goto l361 ++ goto l363 + } + position++ + if buffer[position] != rune('/') { +- goto l361 ++ goto l363 + } + position++ +- goto l360 +- l361: +- position, tokenIndex = position360, tokenIndex360 ++ goto l362 ++ l363: ++ position, tokenIndex = position362, tokenIndex362 + if buffer[position] != rune('#') { +- goto l358 ++ goto l360 + } + position++ + } +- l360: + l362: ++ l364: + { +- position363, tokenIndex363 := position, tokenIndex ++ position365, tokenIndex365 := position, tokenIndex + { +- position364, tokenIndex364 := position, tokenIndex ++ position366, tokenIndex366 := position, tokenIndex + if buffer[position] != rune('\n') { +- goto l364 ++ goto l366 + } + position++ +- goto l363 +- l364: +- position, tokenIndex = position364, tokenIndex364 ++ goto l365 ++ l366: ++ position, tokenIndex = position366, tokenIndex366 + } + if !matchDot() { +- goto l363 ++ goto l365 + } +- goto l362 +- l363: +- position, tokenIndex = position363, tokenIndex363 ++ goto l364 ++ l365: ++ position, tokenIndex = position365, tokenIndex365 + } +- add(ruleComment, position359) ++ add(ruleComment, position361) + } + return true +- l358: +- position, tokenIndex = position358, tokenIndex358 ++ l360: ++ position, tokenIndex = position360, tokenIndex360 + return false + }, + /* 25 Label <- <((LocalSymbol / LocalLabel / SymbolName) ':')> */ + func() bool { +- position365, tokenIndex365 := position, tokenIndex ++ position367, tokenIndex367 := position, tokenIndex + { +- position366 := position ++ position368 := position + { +- position367, tokenIndex367 := position, tokenIndex ++ position369, tokenIndex369 := position, tokenIndex + if !_rules[ruleLocalSymbol]() { +- goto l368 ++ goto l370 + } +- goto l367 +- l368: +- position, tokenIndex = position367, tokenIndex367 ++ goto l369 ++ l370: ++ position, tokenIndex = position369, tokenIndex369 + if !_rules[ruleLocalLabel]() { +- goto l369 ++ goto l371 + } +- goto l367 +- l369: +- position, tokenIndex = position367, tokenIndex367 ++ goto l369 ++ l371: ++ position, tokenIndex = position369, tokenIndex369 + if !_rules[ruleSymbolName]() { +- goto l365 ++ goto l367 + } + } +- l367: ++ l369: + if buffer[position] != rune(':') { +- goto l365 ++ goto l367 + } + position++ +- add(ruleLabel, position366) ++ add(ruleLabel, position368) + } + return true +- l365: +- position, tokenIndex = position365, tokenIndex365 ++ l367: ++ position, tokenIndex = position367, tokenIndex367 + return false + }, + /* 26 SymbolName <- <(([a-z] / [A-Z] / '.' / '_') ([a-z] / [A-Z] / '.' / ([0-9] / [0-9]) / '$' / '_')*)> */ + func() bool { +- position370, tokenIndex370 := position, tokenIndex ++ position372, tokenIndex372 := position, tokenIndex + { +- position371 := position ++ position373 := position + { +- position372, tokenIndex372 := position, tokenIndex ++ position374, tokenIndex374 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l373 ++ goto l375 + } + position++ +- goto l372 +- l373: +- position, tokenIndex = position372, tokenIndex372 ++ goto l374 ++ l375: ++ position, tokenIndex = position374, tokenIndex374 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l374 ++ goto l376 + } + position++ +- goto l372 +- l374: +- position, tokenIndex = position372, tokenIndex372 ++ goto l374 ++ l376: ++ position, tokenIndex = position374, tokenIndex374 + if buffer[position] != rune('.') { +- goto l375 ++ goto l377 + } + position++ +- goto l372 +- l375: +- position, tokenIndex = position372, tokenIndex372 ++ goto l374 ++ l377: ++ position, tokenIndex = position374, tokenIndex374 + if buffer[position] != rune('_') { +- goto l370 ++ goto l372 + } + position++ + } +- l372: +- l376: ++ l374: ++ l378: + { +- position377, tokenIndex377 := position, tokenIndex ++ position379, tokenIndex379 := position, tokenIndex + { +- position378, tokenIndex378 := position, tokenIndex ++ position380, tokenIndex380 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l379 ++ goto l381 + } + position++ +- goto l378 +- l379: +- position, tokenIndex = position378, tokenIndex378 ++ goto l380 ++ l381: ++ position, tokenIndex = position380, tokenIndex380 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l380 ++ goto l382 + } + position++ +- goto l378 +- l380: +- position, tokenIndex = position378, tokenIndex378 ++ goto l380 ++ l382: ++ position, tokenIndex = position380, tokenIndex380 + if buffer[position] != rune('.') { +- goto l381 ++ goto l383 + } + position++ +- goto l378 +- l381: +- position, tokenIndex = position378, tokenIndex378 ++ goto l380 ++ l383: ++ position, tokenIndex = position380, tokenIndex380 + { +- position383, tokenIndex383 := position, tokenIndex ++ position385, tokenIndex385 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l384 ++ goto l386 + } + position++ +- goto l383 +- l384: +- position, tokenIndex = position383, tokenIndex383 ++ goto l385 ++ l386: ++ position, tokenIndex = position385, tokenIndex385 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l382 ++ goto l384 + } + position++ + } +- l383: +- goto l378 +- l382: +- position, tokenIndex = position378, tokenIndex378 ++ l385: ++ goto l380 ++ l384: ++ position, tokenIndex = position380, tokenIndex380 + if buffer[position] != rune('$') { +- goto l385 ++ goto l387 + } + position++ +- goto l378 +- l385: +- position, tokenIndex = position378, tokenIndex378 ++ goto l380 ++ l387: ++ position, tokenIndex = position380, tokenIndex380 + if buffer[position] != rune('_') { +- goto l377 ++ goto l379 + } + position++ + } +- l378: +- goto l376 +- l377: +- position, tokenIndex = position377, tokenIndex377 ++ l380: ++ goto l378 ++ l379: ++ position, tokenIndex = position379, tokenIndex379 + } +- add(ruleSymbolName, position371) ++ add(ruleSymbolName, position373) + } + return true +- l370: +- position, tokenIndex = position370, tokenIndex370 ++ l372: ++ position, tokenIndex = position372, tokenIndex372 + return false + }, + /* 27 LocalSymbol <- <('.' 'L' ([a-z] / [A-Z] / ([a-z] / [A-Z]) / '.' / ([0-9] / [0-9]) / '$' / '_')+)> */ + func() bool { +- position386, tokenIndex386 := position, tokenIndex ++ position388, tokenIndex388 := position, tokenIndex + { +- position387 := position ++ position389 := position + if buffer[position] != rune('.') { +- goto l386 ++ goto l388 + } + position++ + if buffer[position] != rune('L') { +- goto l386 ++ goto l388 + } + position++ + { +- position390, tokenIndex390 := position, tokenIndex ++ position392, tokenIndex392 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l391 ++ goto l393 + } + position++ +- goto l390 +- l391: +- position, tokenIndex = position390, tokenIndex390 ++ goto l392 ++ l393: ++ position, tokenIndex = position392, tokenIndex392 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l392 ++ goto l394 + } + position++ +- goto l390 +- l392: +- position, tokenIndex = position390, tokenIndex390 ++ goto l392 ++ l394: ++ position, tokenIndex = position392, tokenIndex392 + { +- position394, tokenIndex394 := position, tokenIndex ++ position396, tokenIndex396 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l395 ++ goto l397 + } + position++ +- goto l394 +- l395: +- position, tokenIndex = position394, tokenIndex394 ++ goto l396 ++ l397: ++ position, tokenIndex = position396, tokenIndex396 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l393 ++ goto l395 + } + position++ + } +- l394: +- goto l390 +- l393: +- position, tokenIndex = position390, tokenIndex390 ++ l396: ++ goto l392 ++ l395: ++ position, tokenIndex = position392, tokenIndex392 + if buffer[position] != rune('.') { +- goto l396 ++ goto l398 + } + position++ +- goto l390 +- l396: +- position, tokenIndex = position390, tokenIndex390 ++ goto l392 ++ l398: ++ position, tokenIndex = position392, tokenIndex392 + { +- position398, tokenIndex398 := position, tokenIndex ++ position400, tokenIndex400 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l399 ++ goto l401 + } + position++ +- goto l398 +- l399: +- position, tokenIndex = position398, tokenIndex398 ++ goto l400 ++ l401: ++ position, tokenIndex = position400, tokenIndex400 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l397 ++ goto l399 + } + position++ + } +- l398: +- goto l390 +- l397: +- position, tokenIndex = position390, tokenIndex390 ++ l400: ++ goto l392 ++ l399: ++ position, tokenIndex = position392, tokenIndex392 + if buffer[position] != rune('$') { +- goto l400 ++ goto l402 + } + position++ +- goto l390 +- l400: +- position, tokenIndex = position390, tokenIndex390 ++ goto l392 ++ l402: ++ position, tokenIndex = position392, tokenIndex392 + if buffer[position] != rune('_') { +- goto l386 ++ goto l388 + } + position++ + } ++ l392: + l390: +- l388: + { +- position389, tokenIndex389 := position, tokenIndex ++ position391, tokenIndex391 := position, tokenIndex + { +- position401, tokenIndex401 := position, tokenIndex ++ position403, tokenIndex403 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l402 ++ goto l404 + } + position++ +- goto l401 +- l402: +- position, tokenIndex = position401, tokenIndex401 ++ goto l403 ++ l404: ++ position, tokenIndex = position403, tokenIndex403 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l403 ++ goto l405 + } + position++ +- goto l401 +- l403: +- position, tokenIndex = position401, tokenIndex401 ++ goto l403 ++ l405: ++ position, tokenIndex = position403, tokenIndex403 + { +- position405, tokenIndex405 := position, tokenIndex ++ position407, tokenIndex407 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l406 ++ goto l408 + } + position++ +- goto l405 +- l406: +- position, tokenIndex = position405, tokenIndex405 ++ goto l407 ++ l408: ++ position, tokenIndex = position407, tokenIndex407 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l404 ++ goto l406 + } + position++ + } +- l405: +- goto l401 +- l404: +- position, tokenIndex = position401, tokenIndex401 ++ l407: ++ goto l403 ++ l406: ++ position, tokenIndex = position403, tokenIndex403 + if buffer[position] != rune('.') { +- goto l407 ++ goto l409 + } + position++ +- goto l401 +- l407: +- position, tokenIndex = position401, tokenIndex401 ++ goto l403 ++ l409: ++ position, tokenIndex = position403, tokenIndex403 + { +- position409, tokenIndex409 := position, tokenIndex ++ position411, tokenIndex411 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l410 ++ goto l412 + } + position++ +- goto l409 +- l410: +- position, tokenIndex = position409, tokenIndex409 ++ goto l411 ++ l412: ++ position, tokenIndex = position411, tokenIndex411 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l408 ++ goto l410 + } + position++ + } +- l409: +- goto l401 +- l408: +- position, tokenIndex = position401, tokenIndex401 ++ l411: ++ goto l403 ++ l410: ++ position, tokenIndex = position403, tokenIndex403 + if buffer[position] != rune('$') { +- goto l411 ++ goto l413 + } + position++ +- goto l401 +- l411: +- position, tokenIndex = position401, tokenIndex401 ++ goto l403 ++ l413: ++ position, tokenIndex = position403, tokenIndex403 + if buffer[position] != rune('_') { +- goto l389 ++ goto l391 + } + position++ + } +- l401: +- goto l388 +- l389: +- position, tokenIndex = position389, tokenIndex389 ++ l403: ++ goto l390 ++ l391: ++ position, tokenIndex = position391, tokenIndex391 + } +- add(ruleLocalSymbol, position387) ++ add(ruleLocalSymbol, position389) + } + return true +- l386: +- position, tokenIndex = position386, tokenIndex386 ++ l388: ++ position, tokenIndex = position388, tokenIndex388 + return false + }, + /* 28 LocalLabel <- <([0-9] ([0-9] / '$')*)> */ + func() bool { +- position412, tokenIndex412 := position, tokenIndex ++ position414, tokenIndex414 := position, tokenIndex + { +- position413 := position ++ position415 := position + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l412 ++ goto l414 + } + position++ +- l414: ++ l416: + { +- position415, tokenIndex415 := position, tokenIndex ++ position417, tokenIndex417 := position, tokenIndex + { +- position416, tokenIndex416 := position, tokenIndex ++ position418, tokenIndex418 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l417 ++ goto l419 + } + position++ +- goto l416 +- l417: +- position, tokenIndex = position416, tokenIndex416 ++ goto l418 ++ l419: ++ position, tokenIndex = position418, tokenIndex418 + if buffer[position] != rune('$') { +- goto l415 ++ goto l417 + } + position++ + } +- l416: +- goto l414 +- l415: +- position, tokenIndex = position415, tokenIndex415 ++ l418: ++ goto l416 ++ l417: ++ position, tokenIndex = position417, tokenIndex417 + } +- add(ruleLocalLabel, position413) ++ add(ruleLocalLabel, position415) + } + return true +- l412: +- position, tokenIndex = position412, tokenIndex412 ++ l414: ++ position, tokenIndex = position414, tokenIndex414 + return false + }, + /* 29 LocalLabelRef <- <([0-9] ([0-9] / '$')* ('b' / 'f'))> */ + func() bool { +- position418, tokenIndex418 := position, tokenIndex ++ position420, tokenIndex420 := position, tokenIndex + { +- position419 := position ++ position421 := position + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l418 ++ goto l420 + } + position++ +- l420: ++ l422: + { +- position421, tokenIndex421 := position, tokenIndex ++ position423, tokenIndex423 := position, tokenIndex + { +- position422, tokenIndex422 := position, tokenIndex ++ position424, tokenIndex424 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l423 ++ goto l425 + } + position++ +- goto l422 +- l423: +- position, tokenIndex = position422, tokenIndex422 ++ goto l424 ++ l425: ++ position, tokenIndex = position424, tokenIndex424 + if buffer[position] != rune('$') { +- goto l421 ++ goto l423 + } + position++ + } +- l422: +- goto l420 +- l421: +- position, tokenIndex = position421, tokenIndex421 ++ l424: ++ goto l422 ++ l423: ++ position, tokenIndex = position423, tokenIndex423 + } + { +- position424, tokenIndex424 := position, tokenIndex ++ position426, tokenIndex426 := position, tokenIndex + if buffer[position] != rune('b') { +- goto l425 ++ goto l427 + } + position++ +- goto l424 +- l425: +- position, tokenIndex = position424, tokenIndex424 ++ goto l426 ++ l427: ++ position, tokenIndex = position426, tokenIndex426 + if buffer[position] != rune('f') { +- goto l418 ++ goto l420 + } + position++ + } +- l424: +- add(ruleLocalLabelRef, position419) ++ l426: ++ add(ruleLocalLabelRef, position421) + } + return true +- l418: +- position, tokenIndex = position418, tokenIndex418 ++ l420: ++ position, tokenIndex = position420, tokenIndex420 + return false + }, + /* 30 Instruction <- <(InstructionName (WS InstructionArg (WS? ',' WS? InstructionArg)*)?)> */ + func() bool { +- position426, tokenIndex426 := position, tokenIndex ++ position428, tokenIndex428 := position, tokenIndex + { +- position427 := position ++ position429 := position + if !_rules[ruleInstructionName]() { +- goto l426 ++ goto l428 + } + { +- position428, tokenIndex428 := position, tokenIndex ++ position430, tokenIndex430 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l428 ++ goto l430 + } + if !_rules[ruleInstructionArg]() { +- goto l428 ++ goto l430 + } +- l430: ++ l432: + { +- position431, tokenIndex431 := position, tokenIndex ++ position433, tokenIndex433 := position, tokenIndex + { +- position432, tokenIndex432 := position, tokenIndex ++ position434, tokenIndex434 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l432 ++ goto l434 + } +- goto l433 +- l432: +- position, tokenIndex = position432, tokenIndex432 ++ goto l435 ++ l434: ++ position, tokenIndex = position434, tokenIndex434 + } +- l433: ++ l435: + if buffer[position] != rune(',') { +- goto l431 ++ goto l433 + } + position++ + { +- position434, tokenIndex434 := position, tokenIndex ++ position436, tokenIndex436 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l434 ++ goto l436 + } +- goto l435 +- l434: +- position, tokenIndex = position434, tokenIndex434 ++ goto l437 ++ l436: ++ position, tokenIndex = position436, tokenIndex436 + } +- l435: ++ l437: + if !_rules[ruleInstructionArg]() { +- goto l431 ++ goto l433 + } +- goto l430 +- l431: +- position, tokenIndex = position431, tokenIndex431 ++ goto l432 ++ l433: ++ position, tokenIndex = position433, tokenIndex433 + } +- goto l429 +- l428: +- position, tokenIndex = position428, tokenIndex428 ++ goto l431 ++ l430: ++ position, tokenIndex = position430, tokenIndex430 + } +- l429: +- add(ruleInstruction, position427) ++ l431: ++ add(ruleInstruction, position429) + } + return true +- l426: +- position, tokenIndex = position426, tokenIndex426 ++ l428: ++ position, tokenIndex = position428, tokenIndex428 + return false + }, + /* 31 InstructionName <- <(([a-z] / [A-Z]) ([a-z] / [A-Z] / '.' / ([0-9] / [0-9]))* ('.' / '+' / '-')?)> */ + func() bool { +- position436, tokenIndex436 := position, tokenIndex ++ position438, tokenIndex438 := position, tokenIndex + { +- position437 := position ++ position439 := position + { +- position438, tokenIndex438 := position, tokenIndex ++ position440, tokenIndex440 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l439 ++ goto l441 + } + position++ +- goto l438 +- l439: +- position, tokenIndex = position438, tokenIndex438 ++ goto l440 ++ l441: ++ position, tokenIndex = position440, tokenIndex440 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l436 ++ goto l438 + } + position++ + } +- l438: + l440: ++ l442: + { +- position441, tokenIndex441 := position, tokenIndex ++ position443, tokenIndex443 := position, tokenIndex + { +- position442, tokenIndex442 := position, tokenIndex ++ position444, tokenIndex444 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l443 ++ goto l445 + } + position++ +- goto l442 +- l443: +- position, tokenIndex = position442, tokenIndex442 ++ goto l444 ++ l445: ++ position, tokenIndex = position444, tokenIndex444 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l444 ++ goto l446 + } + position++ +- goto l442 +- l444: +- position, tokenIndex = position442, tokenIndex442 ++ goto l444 ++ l446: ++ position, tokenIndex = position444, tokenIndex444 + if buffer[position] != rune('.') { +- goto l445 ++ goto l447 + } + position++ +- goto l442 +- l445: +- position, tokenIndex = position442, tokenIndex442 ++ goto l444 ++ l447: ++ position, tokenIndex = position444, tokenIndex444 + { +- position446, tokenIndex446 := position, tokenIndex ++ position448, tokenIndex448 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l447 ++ goto l449 + } + position++ +- goto l446 +- l447: +- position, tokenIndex = position446, tokenIndex446 ++ goto l448 ++ l449: ++ position, tokenIndex = position448, tokenIndex448 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l441 ++ goto l443 + } + position++ + } +- l446: ++ l448: + } +- l442: +- goto l440 +- l441: +- position, tokenIndex = position441, tokenIndex441 ++ l444: ++ goto l442 ++ l443: ++ position, tokenIndex = position443, tokenIndex443 + } + { +- position448, tokenIndex448 := position, tokenIndex ++ position450, tokenIndex450 := position, tokenIndex + { +- position450, tokenIndex450 := position, tokenIndex ++ position452, tokenIndex452 := position, tokenIndex + if buffer[position] != rune('.') { +- goto l451 ++ goto l453 + } + position++ +- goto l450 +- l451: +- position, tokenIndex = position450, tokenIndex450 ++ goto l452 ++ l453: ++ position, tokenIndex = position452, tokenIndex452 + if buffer[position] != rune('+') { +- goto l452 ++ goto l454 + } + position++ +- goto l450 +- l452: +- position, tokenIndex = position450, tokenIndex450 ++ goto l452 ++ l454: ++ position, tokenIndex = position452, tokenIndex452 + if buffer[position] != rune('-') { +- goto l448 ++ goto l450 + } + position++ + } ++ l452: ++ goto l451 + l450: +- goto l449 +- l448: +- position, tokenIndex = position448, tokenIndex448 ++ position, tokenIndex = position450, tokenIndex450 + } +- l449: +- add(ruleInstructionName, position437) ++ l451: ++ add(ruleInstructionName, position439) + } + return true +- l436: +- position, tokenIndex = position436, tokenIndex436 ++ l438: ++ position, tokenIndex = position438, tokenIndex438 + return false + }, + /* 32 InstructionArg <- <(IndirectionIndicator? (ARMConstantTweak / RegisterOrConstant / LocalLabelRef / TOCRefHigh / TOCRefLow / GOTLocation / GOTSymbolOffset / MemoryRef) AVX512Token*)> */ + func() bool { +- position453, tokenIndex453 := position, tokenIndex ++ position455, tokenIndex455 := position, tokenIndex + { +- position454 := position ++ position456 := position + { +- position455, tokenIndex455 := position, tokenIndex ++ position457, tokenIndex457 := position, tokenIndex + if !_rules[ruleIndirectionIndicator]() { +- goto l455 ++ goto l457 + } +- goto l456 +- l455: +- position, tokenIndex = position455, tokenIndex455 ++ goto l458 ++ l457: ++ position, tokenIndex = position457, tokenIndex457 + } +- l456: ++ l458: + { +- position457, tokenIndex457 := position, tokenIndex ++ position459, tokenIndex459 := position, tokenIndex + if !_rules[ruleARMConstantTweak]() { +- goto l458 +- } +- goto l457 +- l458: +- position, tokenIndex = position457, tokenIndex457 +- if !_rules[ruleRegisterOrConstant]() { +- goto l459 +- } +- goto l457 +- l459: +- position, tokenIndex = position457, tokenIndex457 +- if !_rules[ruleLocalLabelRef]() { + goto l460 + } +- goto l457 ++ goto l459 + l460: +- position, tokenIndex = position457, tokenIndex457 +- if !_rules[ruleTOCRefHigh]() { ++ position, tokenIndex = position459, tokenIndex459 ++ if !_rules[ruleRegisterOrConstant]() { + goto l461 + } +- goto l457 ++ goto l459 + l461: +- position, tokenIndex = position457, tokenIndex457 +- if !_rules[ruleTOCRefLow]() { ++ position, tokenIndex = position459, tokenIndex459 ++ if !_rules[ruleLocalLabelRef]() { + goto l462 + } +- goto l457 ++ goto l459 + l462: +- position, tokenIndex = position457, tokenIndex457 +- if !_rules[ruleGOTLocation]() { ++ position, tokenIndex = position459, tokenIndex459 ++ if !_rules[ruleTOCRefHigh]() { + goto l463 + } +- goto l457 ++ goto l459 + l463: +- position, tokenIndex = position457, tokenIndex457 +- if !_rules[ruleGOTSymbolOffset]() { ++ position, tokenIndex = position459, tokenIndex459 ++ if !_rules[ruleTOCRefLow]() { + goto l464 + } +- goto l457 ++ goto l459 + l464: +- position, tokenIndex = position457, tokenIndex457 ++ position, tokenIndex = position459, tokenIndex459 ++ if !_rules[ruleGOTLocation]() { ++ goto l465 ++ } ++ goto l459 ++ l465: ++ position, tokenIndex = position459, tokenIndex459 ++ if !_rules[ruleGOTSymbolOffset]() { ++ goto l466 ++ } ++ goto l459 ++ l466: ++ position, tokenIndex = position459, tokenIndex459 + if !_rules[ruleMemoryRef]() { +- goto l453 ++ goto l455 + } + } +- l457: +- l465: ++ l459: ++ l467: + { +- position466, tokenIndex466 := position, tokenIndex ++ position468, tokenIndex468 := position, tokenIndex + if !_rules[ruleAVX512Token]() { +- goto l466 ++ goto l468 + } +- goto l465 +- l466: +- position, tokenIndex = position466, tokenIndex466 ++ goto l467 ++ l468: ++ position, tokenIndex = position468, tokenIndex468 + } +- add(ruleInstructionArg, position454) ++ add(ruleInstructionArg, position456) + } + return true +- l453: +- position, tokenIndex = position453, tokenIndex453 ++ l455: ++ position, tokenIndex = position455, tokenIndex455 + return false + }, + /* 33 GOTLocation <- <('$' '_' 'G' 'L' 'O' 'B' 'A' 'L' '_' 'O' 'F' 'F' 'S' 'E' 'T' '_' 'T' 'A' 'B' 'L' 'E' '_' '-' LocalSymbol)> */ + func() bool { +- position467, tokenIndex467 := position, tokenIndex ++ position469, tokenIndex469 := position, tokenIndex + { +- position468 := position ++ position470 := position + if buffer[position] != rune('$') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('_') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('G') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('L') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('O') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('B') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('A') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('L') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('_') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('O') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('F') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('F') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('S') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('E') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('T') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('_') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('T') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('A') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('B') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('L') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('E') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('_') { +- goto l467 ++ goto l469 + } + position++ + if buffer[position] != rune('-') { +- goto l467 ++ goto l469 + } + position++ + if !_rules[ruleLocalSymbol]() { +- goto l467 ++ goto l469 + } +- add(ruleGOTLocation, position468) ++ add(ruleGOTLocation, position470) + } + return true +- l467: +- position, tokenIndex = position467, tokenIndex467 ++ l469: ++ position, tokenIndex = position469, tokenIndex469 + return false + }, + /* 34 GOTSymbolOffset <- <(('$' SymbolName ('@' 'G' 'O' 'T') ('O' 'F' 'F')?) / (':' ('g' / 'G') ('o' / 'O') ('t' / 'T') ':' SymbolName))> */ + func() bool { +- position469, tokenIndex469 := position, tokenIndex ++ position471, tokenIndex471 := position, tokenIndex + { +- position470 := position ++ position472 := position + { +- position471, tokenIndex471 := position, tokenIndex ++ position473, tokenIndex473 := position, tokenIndex + if buffer[position] != rune('$') { +- goto l472 ++ goto l474 + } + position++ + if !_rules[ruleSymbolName]() { +- goto l472 ++ goto l474 + } + if buffer[position] != rune('@') { +- goto l472 ++ goto l474 + } + position++ + if buffer[position] != rune('G') { +- goto l472 ++ goto l474 + } + position++ + if buffer[position] != rune('O') { +- goto l472 ++ goto l474 + } + position++ + if buffer[position] != rune('T') { +- goto l472 ++ goto l474 + } + position++ + { +- position473, tokenIndex473 := position, tokenIndex ++ position475, tokenIndex475 := position, tokenIndex + if buffer[position] != rune('O') { +- goto l473 ++ goto l475 + } + position++ + if buffer[position] != rune('F') { +- goto l473 ++ goto l475 + } + position++ + if buffer[position] != rune('F') { +- goto l473 ++ goto l475 + } + position++ +- goto l474 +- l473: +- position, tokenIndex = position473, tokenIndex473 ++ goto l476 ++ l475: ++ position, tokenIndex = position475, tokenIndex475 + } ++ l476: ++ goto l473 + l474: +- goto l471 +- l472: +- position, tokenIndex = position471, tokenIndex471 ++ position, tokenIndex = position473, tokenIndex473 + if buffer[position] != rune(':') { +- goto l469 ++ goto l471 + } + position++ +- { +- position475, tokenIndex475 := position, tokenIndex +- if buffer[position] != rune('g') { +- goto l476 +- } +- position++ +- goto l475 +- l476: +- position, tokenIndex = position475, tokenIndex475 +- if buffer[position] != rune('G') { +- goto l469 +- } +- position++ +- } +- l475: + { + position477, tokenIndex477 := position, tokenIndex +- if buffer[position] != rune('o') { ++ if buffer[position] != rune('g') { + goto l478 + } + position++ + goto l477 + l478: + position, tokenIndex = position477, tokenIndex477 +- if buffer[position] != rune('O') { +- goto l469 ++ if buffer[position] != rune('G') { ++ goto l471 + } + position++ + } + l477: + { + position479, tokenIndex479 := position, tokenIndex +- if buffer[position] != rune('t') { ++ if buffer[position] != rune('o') { + goto l480 + } + position++ + goto l479 + l480: + position, tokenIndex = position479, tokenIndex479 +- if buffer[position] != rune('T') { +- goto l469 ++ if buffer[position] != rune('O') { ++ goto l471 + } + position++ + } + l479: ++ { ++ position481, tokenIndex481 := position, tokenIndex ++ if buffer[position] != rune('t') { ++ goto l482 ++ } ++ position++ ++ goto l481 ++ l482: ++ position, tokenIndex = position481, tokenIndex481 ++ if buffer[position] != rune('T') { ++ goto l471 ++ } ++ position++ ++ } ++ l481: + if buffer[position] != rune(':') { +- goto l469 ++ goto l471 + } + position++ + if !_rules[ruleSymbolName]() { +- goto l469 ++ goto l471 + } + } +- l471: +- add(ruleGOTSymbolOffset, position470) ++ l473: ++ add(ruleGOTSymbolOffset, position472) + } + return true +- l469: +- position, tokenIndex = position469, tokenIndex469 ++ l471: ++ position, tokenIndex = position471, tokenIndex471 + return false + }, + /* 35 AVX512Token <- <(WS? '{' '%'? ([0-9] / [a-z])* '}')> */ + func() bool { +- position481, tokenIndex481 := position, tokenIndex ++ position483, tokenIndex483 := position, tokenIndex + { +- position482 := position ++ position484 := position + { +- position483, tokenIndex483 := position, tokenIndex ++ position485, tokenIndex485 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l483 ++ goto l485 + } +- goto l484 +- l483: +- position, tokenIndex = position483, tokenIndex483 ++ goto l486 ++ l485: ++ position, tokenIndex = position485, tokenIndex485 + } +- l484: ++ l486: + if buffer[position] != rune('{') { +- goto l481 ++ goto l483 + } + position++ + { +- position485, tokenIndex485 := position, tokenIndex ++ position487, tokenIndex487 := position, tokenIndex + if buffer[position] != rune('%') { +- goto l485 ++ goto l487 + } + position++ +- goto l486 +- l485: +- position, tokenIndex = position485, tokenIndex485 ++ goto l488 ++ l487: ++ position, tokenIndex = position487, tokenIndex487 + } +- l486: +- l487: ++ l488: ++ l489: + { +- position488, tokenIndex488 := position, tokenIndex ++ position490, tokenIndex490 := position, tokenIndex + { +- position489, tokenIndex489 := position, tokenIndex ++ position491, tokenIndex491 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l490 ++ goto l492 + } + position++ +- goto l489 +- l490: +- position, tokenIndex = position489, tokenIndex489 ++ goto l491 ++ l492: ++ position, tokenIndex = position491, tokenIndex491 + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l488 ++ goto l490 + } + position++ + } +- l489: +- goto l487 +- l488: +- position, tokenIndex = position488, tokenIndex488 ++ l491: ++ goto l489 ++ l490: ++ position, tokenIndex = position490, tokenIndex490 + } + if buffer[position] != rune('}') { +- goto l481 ++ goto l483 + } + position++ +- add(ruleAVX512Token, position482) ++ add(ruleAVX512Token, position484) + } + return true +- l481: +- position, tokenIndex = position481, tokenIndex481 ++ l483: ++ position, tokenIndex = position483, tokenIndex483 + return false + }, + /* 36 TOCRefHigh <- <('.' 'T' 'O' 'C' '.' '-' (('0' 'b') / ('.' 'L' ([a-z] / [A-Z] / '_' / [0-9])+)) ('@' ('h' / 'H') ('a' / 'A')))> */ + func() bool { +- position491, tokenIndex491 := position, tokenIndex ++ position493, tokenIndex493 := position, tokenIndex + { +- position492 := position ++ position494 := position + if buffer[position] != rune('.') { +- goto l491 ++ goto l493 + } + position++ + if buffer[position] != rune('T') { +- goto l491 ++ goto l493 + } + position++ + if buffer[position] != rune('O') { +- goto l491 ++ goto l493 + } + position++ + if buffer[position] != rune('C') { +- goto l491 ++ goto l493 + } + position++ + if buffer[position] != rune('.') { +- goto l491 ++ goto l493 + } + position++ + if buffer[position] != rune('-') { +- goto l491 ++ goto l493 + } + position++ + { +- position493, tokenIndex493 := position, tokenIndex ++ position495, tokenIndex495 := position, tokenIndex + if buffer[position] != rune('0') { +- goto l494 ++ goto l496 + } + position++ + if buffer[position] != rune('b') { +- goto l494 ++ goto l496 + } + position++ +- goto l493 +- l494: +- position, tokenIndex = position493, tokenIndex493 ++ goto l495 ++ l496: ++ position, tokenIndex = position495, tokenIndex495 + if buffer[position] != rune('.') { +- goto l491 ++ goto l493 + } + position++ + if buffer[position] != rune('L') { +- goto l491 ++ goto l493 + } + position++ + { +- position497, tokenIndex497 := position, tokenIndex ++ position499, tokenIndex499 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l498 ++ goto l500 + } + position++ +- goto l497 +- l498: +- position, tokenIndex = position497, tokenIndex497 ++ goto l499 ++ l500: ++ position, tokenIndex = position499, tokenIndex499 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l499 ++ goto l501 + } + position++ +- goto l497 +- l499: +- position, tokenIndex = position497, tokenIndex497 ++ goto l499 ++ l501: ++ position, tokenIndex = position499, tokenIndex499 + if buffer[position] != rune('_') { +- goto l500 ++ goto l502 + } + position++ +- goto l497 +- l500: +- position, tokenIndex = position497, tokenIndex497 ++ goto l499 ++ l502: ++ position, tokenIndex = position499, tokenIndex499 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l491 ++ goto l493 + } + position++ + } ++ l499: + l497: +- l495: + { +- position496, tokenIndex496 := position, tokenIndex ++ position498, tokenIndex498 := position, tokenIndex + { +- position501, tokenIndex501 := position, tokenIndex ++ position503, tokenIndex503 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l502 ++ goto l504 + } + position++ +- goto l501 +- l502: +- position, tokenIndex = position501, tokenIndex501 ++ goto l503 ++ l504: ++ position, tokenIndex = position503, tokenIndex503 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l503 ++ goto l505 + } + position++ +- goto l501 +- l503: +- position, tokenIndex = position501, tokenIndex501 ++ goto l503 ++ l505: ++ position, tokenIndex = position503, tokenIndex503 + if buffer[position] != rune('_') { +- goto l504 ++ goto l506 + } + position++ +- goto l501 +- l504: +- position, tokenIndex = position501, tokenIndex501 ++ goto l503 ++ l506: ++ position, tokenIndex = position503, tokenIndex503 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l496 ++ goto l498 + } + position++ + } +- l501: +- goto l495 +- l496: +- position, tokenIndex = position496, tokenIndex496 ++ l503: ++ goto l497 ++ l498: ++ position, tokenIndex = position498, tokenIndex498 + } + } +- l493: ++ l495: + if buffer[position] != rune('@') { +- goto l491 ++ goto l493 + } + position++ + { +- position505, tokenIndex505 := position, tokenIndex ++ position507, tokenIndex507 := position, tokenIndex + if buffer[position] != rune('h') { +- goto l506 ++ goto l508 + } + position++ +- goto l505 +- l506: +- position, tokenIndex = position505, tokenIndex505 ++ goto l507 ++ l508: ++ position, tokenIndex = position507, tokenIndex507 + if buffer[position] != rune('H') { +- goto l491 ++ goto l493 + } + position++ + } +- l505: ++ l507: + { +- position507, tokenIndex507 := position, tokenIndex ++ position509, tokenIndex509 := position, tokenIndex + if buffer[position] != rune('a') { +- goto l508 ++ goto l510 + } + position++ +- goto l507 +- l508: +- position, tokenIndex = position507, tokenIndex507 ++ goto l509 ++ l510: ++ position, tokenIndex = position509, tokenIndex509 + if buffer[position] != rune('A') { +- goto l491 ++ goto l493 + } + position++ + } +- l507: +- add(ruleTOCRefHigh, position492) ++ l509: ++ add(ruleTOCRefHigh, position494) + } + return true +- l491: +- position, tokenIndex = position491, tokenIndex491 ++ l493: ++ position, tokenIndex = position493, tokenIndex493 + return false + }, + /* 37 TOCRefLow <- <('.' 'T' 'O' 'C' '.' '-' (('0' 'b') / ('.' 'L' ([a-z] / [A-Z] / '_' / [0-9])+)) ('@' ('l' / 'L')))> */ + func() bool { +- position509, tokenIndex509 := position, tokenIndex ++ position511, tokenIndex511 := position, tokenIndex + { +- position510 := position ++ position512 := position + if buffer[position] != rune('.') { +- goto l509 ++ goto l511 + } + position++ + if buffer[position] != rune('T') { +- goto l509 ++ goto l511 + } + position++ + if buffer[position] != rune('O') { +- goto l509 ++ goto l511 + } + position++ + if buffer[position] != rune('C') { +- goto l509 ++ goto l511 + } + position++ + if buffer[position] != rune('.') { +- goto l509 ++ goto l511 + } + position++ + if buffer[position] != rune('-') { +- goto l509 ++ goto l511 + } + position++ + { +- position511, tokenIndex511 := position, tokenIndex ++ position513, tokenIndex513 := position, tokenIndex + if buffer[position] != rune('0') { +- goto l512 ++ goto l514 + } + position++ + if buffer[position] != rune('b') { +- goto l512 ++ goto l514 + } + position++ +- goto l511 +- l512: +- position, tokenIndex = position511, tokenIndex511 ++ goto l513 ++ l514: ++ position, tokenIndex = position513, tokenIndex513 + if buffer[position] != rune('.') { +- goto l509 ++ goto l511 + } + position++ + if buffer[position] != rune('L') { +- goto l509 ++ goto l511 + } + position++ + { +- position515, tokenIndex515 := position, tokenIndex ++ position517, tokenIndex517 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l516 ++ goto l518 + } + position++ +- goto l515 +- l516: +- position, tokenIndex = position515, tokenIndex515 ++ goto l517 ++ l518: ++ position, tokenIndex = position517, tokenIndex517 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l517 ++ goto l519 + } + position++ +- goto l515 +- l517: +- position, tokenIndex = position515, tokenIndex515 ++ goto l517 ++ l519: ++ position, tokenIndex = position517, tokenIndex517 + if buffer[position] != rune('_') { +- goto l518 ++ goto l520 + } + position++ +- goto l515 +- l518: +- position, tokenIndex = position515, tokenIndex515 ++ goto l517 ++ l520: ++ position, tokenIndex = position517, tokenIndex517 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l509 ++ goto l511 + } + position++ + } ++ l517: + l515: +- l513: + { +- position514, tokenIndex514 := position, tokenIndex ++ position516, tokenIndex516 := position, tokenIndex + { +- position519, tokenIndex519 := position, tokenIndex ++ position521, tokenIndex521 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l520 ++ goto l522 + } + position++ +- goto l519 +- l520: +- position, tokenIndex = position519, tokenIndex519 ++ goto l521 ++ l522: ++ position, tokenIndex = position521, tokenIndex521 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l521 ++ goto l523 + } + position++ +- goto l519 +- l521: +- position, tokenIndex = position519, tokenIndex519 ++ goto l521 ++ l523: ++ position, tokenIndex = position521, tokenIndex521 + if buffer[position] != rune('_') { +- goto l522 ++ goto l524 + } + position++ +- goto l519 +- l522: +- position, tokenIndex = position519, tokenIndex519 ++ goto l521 ++ l524: ++ position, tokenIndex = position521, tokenIndex521 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l514 ++ goto l516 + } + position++ + } +- l519: +- goto l513 +- l514: +- position, tokenIndex = position514, tokenIndex514 ++ l521: ++ goto l515 ++ l516: ++ position, tokenIndex = position516, tokenIndex516 + } + } +- l511: ++ l513: + if buffer[position] != rune('@') { +- goto l509 ++ goto l511 + } + position++ + { +- position523, tokenIndex523 := position, tokenIndex ++ position525, tokenIndex525 := position, tokenIndex + if buffer[position] != rune('l') { +- goto l524 ++ goto l526 + } + position++ +- goto l523 +- l524: +- position, tokenIndex = position523, tokenIndex523 ++ goto l525 ++ l526: ++ position, tokenIndex = position525, tokenIndex525 + if buffer[position] != rune('L') { +- goto l509 ++ goto l511 + } + position++ + } +- l523: +- add(ruleTOCRefLow, position510) ++ l525: ++ add(ruleTOCRefLow, position512) + } + return true +- l509: +- position, tokenIndex = position509, tokenIndex509 ++ l511: ++ position, tokenIndex = position511, tokenIndex511 + return false + }, + /* 38 IndirectionIndicator <- <'*'> */ + func() bool { +- position525, tokenIndex525 := position, tokenIndex ++ position527, tokenIndex527 := position, tokenIndex + { +- position526 := position ++ position528 := position + if buffer[position] != rune('*') { +- goto l525 ++ goto l527 + } + position++ +- add(ruleIndirectionIndicator, position526) ++ add(ruleIndirectionIndicator, position528) + } + return true +- l525: +- position, tokenIndex = position525, tokenIndex525 ++ l527: ++ position, tokenIndex = position527, tokenIndex527 + return false + }, + /* 39 RegisterOrConstant <- <((('%' ([a-z] / [A-Z]) ([a-z] / [A-Z] / ([0-9] / [0-9]))*) / ('$'? ((Offset Offset) / Offset)) / ('#' Offset ('*' [0-9]+ ('-' [0-9] [0-9]*)?)?) / ('#' '~'? '(' [0-9] WS? ('<' '<') WS? [0-9] ')') / ARMRegister) !('f' / 'b' / ':' / '(' / '+' / '-'))> */ + func() bool { +- position527, tokenIndex527 := position, tokenIndex ++ position529, tokenIndex529 := position, tokenIndex + { +- position528 := position ++ position530 := position + { +- position529, tokenIndex529 := position, tokenIndex ++ position531, tokenIndex531 := position, tokenIndex + if buffer[position] != rune('%') { +- goto l530 ++ goto l532 + } + position++ + { +- position531, tokenIndex531 := position, tokenIndex ++ position533, tokenIndex533 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l532 ++ goto l534 + } + position++ +- goto l531 +- l532: +- position, tokenIndex = position531, tokenIndex531 ++ goto l533 ++ l534: ++ position, tokenIndex = position533, tokenIndex533 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l530 ++ goto l532 + } + position++ + } +- l531: + l533: ++ l535: + { +- position534, tokenIndex534 := position, tokenIndex ++ position536, tokenIndex536 := position, tokenIndex + { +- position535, tokenIndex535 := position, tokenIndex ++ position537, tokenIndex537 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l536 ++ goto l538 + } + position++ +- goto l535 +- l536: +- position, tokenIndex = position535, tokenIndex535 ++ goto l537 ++ l538: ++ position, tokenIndex = position537, tokenIndex537 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l537 ++ goto l539 + } + position++ +- goto l535 +- l537: +- position, tokenIndex = position535, tokenIndex535 ++ goto l537 ++ l539: ++ position, tokenIndex = position537, tokenIndex537 + { +- position538, tokenIndex538 := position, tokenIndex ++ position540, tokenIndex540 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l539 ++ goto l541 + } + position++ +- goto l538 +- l539: +- position, tokenIndex = position538, tokenIndex538 ++ goto l540 ++ l541: ++ position, tokenIndex = position540, tokenIndex540 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l534 ++ goto l536 + } + position++ + } +- l538: ++ l540: + } +- l535: +- goto l533 +- l534: +- position, tokenIndex = position534, tokenIndex534 ++ l537: ++ goto l535 ++ l536: ++ position, tokenIndex = position536, tokenIndex536 + } +- goto l529 +- l530: +- position, tokenIndex = position529, tokenIndex529 ++ goto l531 ++ l532: ++ position, tokenIndex = position531, tokenIndex531 + { +- position541, tokenIndex541 := position, tokenIndex ++ position543, tokenIndex543 := position, tokenIndex + if buffer[position] != rune('$') { +- goto l541 ++ goto l543 + } + position++ +- goto l542 +- l541: +- position, tokenIndex = position541, tokenIndex541 ++ goto l544 ++ l543: ++ position, tokenIndex = position543, tokenIndex543 + } +- l542: ++ l544: + { +- position543, tokenIndex543 := position, tokenIndex ++ position545, tokenIndex545 := position, tokenIndex + if !_rules[ruleOffset]() { +- goto l544 ++ goto l546 + } + if !_rules[ruleOffset]() { +- goto l544 ++ goto l546 + } +- goto l543 +- l544: +- position, tokenIndex = position543, tokenIndex543 ++ goto l545 ++ l546: ++ position, tokenIndex = position545, tokenIndex545 + if !_rules[ruleOffset]() { +- goto l540 ++ goto l542 + } + } +- l543: +- goto l529 +- l540: +- position, tokenIndex = position529, tokenIndex529 ++ l545: ++ goto l531 ++ l542: ++ position, tokenIndex = position531, tokenIndex531 + if buffer[position] != rune('#') { +- goto l545 ++ goto l547 + } + position++ + if !_rules[ruleOffset]() { +- goto l545 ++ goto l547 + } + { +- position546, tokenIndex546 := position, tokenIndex ++ position548, tokenIndex548 := position, tokenIndex + if buffer[position] != rune('*') { +- goto l546 ++ goto l548 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l546 ++ goto l548 + } + position++ +- l548: ++ l550: + { +- position549, tokenIndex549 := position, tokenIndex ++ position551, tokenIndex551 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l549 ++ goto l551 + } + position++ +- goto l548 +- l549: +- position, tokenIndex = position549, tokenIndex549 ++ goto l550 ++ l551: ++ position, tokenIndex = position551, tokenIndex551 + } + { +- position550, tokenIndex550 := position, tokenIndex ++ position552, tokenIndex552 := position, tokenIndex + if buffer[position] != rune('-') { +- goto l550 ++ goto l552 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l550 ++ goto l552 + } + position++ +- l552: ++ l554: + { +- position553, tokenIndex553 := position, tokenIndex ++ position555, tokenIndex555 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l553 ++ goto l555 + } + position++ +- goto l552 +- l553: +- position, tokenIndex = position553, tokenIndex553 ++ goto l554 ++ l555: ++ position, tokenIndex = position555, tokenIndex555 + } +- goto l551 +- l550: +- position, tokenIndex = position550, tokenIndex550 ++ goto l553 ++ l552: ++ position, tokenIndex = position552, tokenIndex552 + } +- l551: +- goto l547 +- l546: +- position, tokenIndex = position546, tokenIndex546 ++ l553: ++ goto l549 ++ l548: ++ position, tokenIndex = position548, tokenIndex548 + } ++ l549: ++ goto l531 + l547: +- goto l529 +- l545: +- position, tokenIndex = position529, tokenIndex529 ++ position, tokenIndex = position531, tokenIndex531 + if buffer[position] != rune('#') { +- goto l554 ++ goto l556 + } + position++ + { +- position555, tokenIndex555 := position, tokenIndex ++ position557, tokenIndex557 := position, tokenIndex + if buffer[position] != rune('~') { +- goto l555 ++ goto l557 + } + position++ +- goto l556 +- l555: +- position, tokenIndex = position555, tokenIndex555 +- } +- l556: ++ goto l558 ++ l557: ++ position, tokenIndex = position557, tokenIndex557 ++ } ++ l558: + if buffer[position] != rune('(') { +- goto l554 ++ goto l556 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l554 ++ goto l556 + } + position++ + { +- position557, tokenIndex557 := position, tokenIndex ++ position559, tokenIndex559 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l557 ++ goto l559 + } +- goto l558 +- l557: +- position, tokenIndex = position557, tokenIndex557 ++ goto l560 ++ l559: ++ position, tokenIndex = position559, tokenIndex559 + } +- l558: ++ l560: + if buffer[position] != rune('<') { +- goto l554 ++ goto l556 + } + position++ + if buffer[position] != rune('<') { +- goto l554 ++ goto l556 + } + position++ + { +- position559, tokenIndex559 := position, tokenIndex ++ position561, tokenIndex561 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l559 ++ goto l561 + } +- goto l560 +- l559: +- position, tokenIndex = position559, tokenIndex559 ++ goto l562 ++ l561: ++ position, tokenIndex = position561, tokenIndex561 + } +- l560: ++ l562: + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l554 ++ goto l556 + } + position++ + if buffer[position] != rune(')') { +- goto l554 ++ goto l556 + } + position++ +- goto l529 +- l554: +- position, tokenIndex = position529, tokenIndex529 ++ goto l531 ++ l556: ++ position, tokenIndex = position531, tokenIndex531 + if !_rules[ruleARMRegister]() { +- goto l527 ++ goto l529 + } + } +- l529: ++ l531: + { +- position561, tokenIndex561 := position, tokenIndex ++ position563, tokenIndex563 := position, tokenIndex + { +- position562, tokenIndex562 := position, tokenIndex ++ position564, tokenIndex564 := position, tokenIndex + if buffer[position] != rune('f') { +- goto l563 ++ goto l565 + } + position++ +- goto l562 +- l563: +- position, tokenIndex = position562, tokenIndex562 ++ goto l564 ++ l565: ++ position, tokenIndex = position564, tokenIndex564 + if buffer[position] != rune('b') { +- goto l564 ++ goto l566 + } + position++ +- goto l562 +- l564: +- position, tokenIndex = position562, tokenIndex562 ++ goto l564 ++ l566: ++ position, tokenIndex = position564, tokenIndex564 + if buffer[position] != rune(':') { +- goto l565 ++ goto l567 + } + position++ +- goto l562 +- l565: +- position, tokenIndex = position562, tokenIndex562 ++ goto l564 ++ l567: ++ position, tokenIndex = position564, tokenIndex564 + if buffer[position] != rune('(') { +- goto l566 ++ goto l568 + } + position++ +- goto l562 +- l566: +- position, tokenIndex = position562, tokenIndex562 ++ goto l564 ++ l568: ++ position, tokenIndex = position564, tokenIndex564 + if buffer[position] != rune('+') { +- goto l567 ++ goto l569 + } + position++ +- goto l562 +- l567: +- position, tokenIndex = position562, tokenIndex562 ++ goto l564 ++ l569: ++ position, tokenIndex = position564, tokenIndex564 + if buffer[position] != rune('-') { +- goto l561 ++ goto l563 + } + position++ + } +- l562: +- goto l527 +- l561: +- position, tokenIndex = position561, tokenIndex561 ++ l564: ++ goto l529 ++ l563: ++ position, tokenIndex = position563, tokenIndex563 + } +- add(ruleRegisterOrConstant, position528) ++ add(ruleRegisterOrConstant, position530) + } + return true +- l527: +- position, tokenIndex = position527, tokenIndex527 ++ l529: ++ position, tokenIndex = position529, tokenIndex529 + return false + }, + /* 40 ARMConstantTweak <- <(((('l' / 'L') ('s' / 'S') ('l' / 'L')) / (('s' / 'S') ('x' / 'X') ('t' / 'T') ('w' / 'W')) / (('s' / 'S') ('x' / 'X') ('t' / 'T') ('b' / 'B')) / (('u' / 'U') ('x' / 'X') ('t' / 'T') ('w' / 'W')) / (('u' / 'U') ('x' / 'X') ('t' / 'T') ('b' / 'B')) / (('l' / 'L') ('s' / 'S') ('r' / 'R')) / (('r' / 'R') ('o' / 'O') ('r' / 'R')) / (('a' / 'A') ('s' / 'S') ('r' / 'R'))) (WS '#' Offset)?)> */ + func() bool { +- position568, tokenIndex568 := position, tokenIndex ++ position570, tokenIndex570 := position, tokenIndex + { +- position569 := position ++ position571 := position + { +- position570, tokenIndex570 := position, tokenIndex +- { +- position572, tokenIndex572 := position, tokenIndex +- if buffer[position] != rune('l') { +- goto l573 +- } +- position++ +- goto l572 +- l573: +- position, tokenIndex = position572, tokenIndex572 +- if buffer[position] != rune('L') { +- goto l571 +- } +- position++ +- } +- l572: ++ position572, tokenIndex572 := position, tokenIndex + { + position574, tokenIndex574 := position, tokenIndex +- if buffer[position] != rune('s') { ++ if buffer[position] != rune('l') { + goto l575 + } + position++ + goto l574 + l575: + position, tokenIndex = position574, tokenIndex574 +- if buffer[position] != rune('S') { +- goto l571 ++ if buffer[position] != rune('L') { ++ goto l573 + } + position++ + } + l574: + { + position576, tokenIndex576 := position, tokenIndex +- if buffer[position] != rune('l') { ++ if buffer[position] != rune('s') { + goto l577 + } + position++ + goto l576 + l577: + position, tokenIndex = position576, tokenIndex576 +- if buffer[position] != rune('L') { +- goto l571 ++ if buffer[position] != rune('S') { ++ goto l573 + } + position++ + } + l576: +- goto l570 +- l571: +- position, tokenIndex = position570, tokenIndex570 + { +- position579, tokenIndex579 := position, tokenIndex +- if buffer[position] != rune('s') { +- goto l580 ++ position578, tokenIndex578 := position, tokenIndex ++ if buffer[position] != rune('l') { ++ goto l579 + } + position++ +- goto l579 +- l580: +- position, tokenIndex = position579, tokenIndex579 +- if buffer[position] != rune('S') { +- goto l578 ++ goto l578 ++ l579: ++ position, tokenIndex = position578, tokenIndex578 ++ if buffer[position] != rune('L') { ++ goto l573 + } + position++ + } +- l579: ++ l578: ++ goto l572 ++ l573: ++ position, tokenIndex = position572, tokenIndex572 + { + position581, tokenIndex581 := position, tokenIndex +- if buffer[position] != rune('x') { ++ if buffer[position] != rune('s') { + goto l582 + } + position++ + goto l581 + l582: + position, tokenIndex = position581, tokenIndex581 +- if buffer[position] != rune('X') { +- goto l578 ++ if buffer[position] != rune('S') { ++ goto l580 + } + position++ + } + l581: + { + position583, tokenIndex583 := position, tokenIndex +- if buffer[position] != rune('t') { ++ if buffer[position] != rune('x') { + goto l584 + } + position++ + goto l583 + l584: + position, tokenIndex = position583, tokenIndex583 +- if buffer[position] != rune('T') { +- goto l578 ++ if buffer[position] != rune('X') { ++ goto l580 + } + position++ + } + l583: + { + position585, tokenIndex585 := position, tokenIndex +- if buffer[position] != rune('w') { ++ if buffer[position] != rune('t') { + goto l586 + } + position++ + goto l585 + l586: + position, tokenIndex = position585, tokenIndex585 +- if buffer[position] != rune('W') { +- goto l578 ++ if buffer[position] != rune('T') { ++ goto l580 + } + position++ + } + l585: +- goto l570 +- l578: +- position, tokenIndex = position570, tokenIndex570 + { +- position588, tokenIndex588 := position, tokenIndex +- if buffer[position] != rune('s') { +- goto l589 ++ position587, tokenIndex587 := position, tokenIndex ++ if buffer[position] != rune('w') { ++ goto l588 + } + position++ +- goto l588 +- l589: +- position, tokenIndex = position588, tokenIndex588 +- if buffer[position] != rune('S') { +- goto l587 ++ goto l587 ++ l588: ++ position, tokenIndex = position587, tokenIndex587 ++ if buffer[position] != rune('W') { ++ goto l580 + } + position++ + } +- l588: ++ l587: ++ goto l572 ++ l580: ++ position, tokenIndex = position572, tokenIndex572 + { + position590, tokenIndex590 := position, tokenIndex +- if buffer[position] != rune('x') { ++ if buffer[position] != rune('s') { + goto l591 + } + position++ + goto l590 + l591: + position, tokenIndex = position590, tokenIndex590 +- if buffer[position] != rune('X') { +- goto l587 ++ if buffer[position] != rune('S') { ++ goto l589 + } + position++ + } + l590: + { + position592, tokenIndex592 := position, tokenIndex +- if buffer[position] != rune('t') { ++ if buffer[position] != rune('x') { + goto l593 + } + position++ + goto l592 + l593: + position, tokenIndex = position592, tokenIndex592 +- if buffer[position] != rune('T') { +- goto l587 ++ if buffer[position] != rune('X') { ++ goto l589 + } + position++ + } + l592: + { + position594, tokenIndex594 := position, tokenIndex +- if buffer[position] != rune('b') { ++ if buffer[position] != rune('t') { + goto l595 + } + position++ + goto l594 + l595: + position, tokenIndex = position594, tokenIndex594 +- if buffer[position] != rune('B') { +- goto l587 ++ if buffer[position] != rune('T') { ++ goto l589 + } + position++ + } + l594: +- goto l570 +- l587: +- position, tokenIndex = position570, tokenIndex570 + { +- position597, tokenIndex597 := position, tokenIndex +- if buffer[position] != rune('u') { +- goto l598 ++ position596, tokenIndex596 := position, tokenIndex ++ if buffer[position] != rune('b') { ++ goto l597 + } + position++ +- goto l597 +- l598: +- position, tokenIndex = position597, tokenIndex597 +- if buffer[position] != rune('U') { +- goto l596 ++ goto l596 ++ l597: ++ position, tokenIndex = position596, tokenIndex596 ++ if buffer[position] != rune('B') { ++ goto l589 + } + position++ + } +- l597: ++ l596: ++ goto l572 ++ l589: ++ position, tokenIndex = position572, tokenIndex572 + { + position599, tokenIndex599 := position, tokenIndex +- if buffer[position] != rune('x') { ++ if buffer[position] != rune('u') { + goto l600 + } + position++ + goto l599 + l600: + position, tokenIndex = position599, tokenIndex599 +- if buffer[position] != rune('X') { +- goto l596 ++ if buffer[position] != rune('U') { ++ goto l598 + } + position++ + } + l599: + { + position601, tokenIndex601 := position, tokenIndex +- if buffer[position] != rune('t') { ++ if buffer[position] != rune('x') { + goto l602 + } + position++ + goto l601 + l602: + position, tokenIndex = position601, tokenIndex601 +- if buffer[position] != rune('T') { +- goto l596 ++ if buffer[position] != rune('X') { ++ goto l598 + } + position++ + } + l601: + { + position603, tokenIndex603 := position, tokenIndex +- if buffer[position] != rune('w') { ++ if buffer[position] != rune('t') { + goto l604 + } + position++ + goto l603 + l604: + position, tokenIndex = position603, tokenIndex603 +- if buffer[position] != rune('W') { +- goto l596 ++ if buffer[position] != rune('T') { ++ goto l598 + } + position++ + } + l603: +- goto l570 +- l596: +- position, tokenIndex = position570, tokenIndex570 + { +- position606, tokenIndex606 := position, tokenIndex +- if buffer[position] != rune('u') { +- goto l607 ++ position605, tokenIndex605 := position, tokenIndex ++ if buffer[position] != rune('w') { ++ goto l606 + } + position++ +- goto l606 +- l607: +- position, tokenIndex = position606, tokenIndex606 +- if buffer[position] != rune('U') { +- goto l605 ++ goto l605 ++ l606: ++ position, tokenIndex = position605, tokenIndex605 ++ if buffer[position] != rune('W') { ++ goto l598 + } + position++ + } +- l606: ++ l605: ++ goto l572 ++ l598: ++ position, tokenIndex = position572, tokenIndex572 + { + position608, tokenIndex608 := position, tokenIndex +- if buffer[position] != rune('x') { ++ if buffer[position] != rune('u') { + goto l609 + } + position++ + goto l608 + l609: + position, tokenIndex = position608, tokenIndex608 +- if buffer[position] != rune('X') { +- goto l605 ++ if buffer[position] != rune('U') { ++ goto l607 + } + position++ + } + l608: + { + position610, tokenIndex610 := position, tokenIndex +- if buffer[position] != rune('t') { ++ if buffer[position] != rune('x') { + goto l611 + } + position++ + goto l610 + l611: + position, tokenIndex = position610, tokenIndex610 +- if buffer[position] != rune('T') { +- goto l605 ++ if buffer[position] != rune('X') { ++ goto l607 + } + position++ + } + l610: + { + position612, tokenIndex612 := position, tokenIndex +- if buffer[position] != rune('b') { ++ if buffer[position] != rune('t') { + goto l613 + } + position++ + goto l612 + l613: + position, tokenIndex = position612, tokenIndex612 +- if buffer[position] != rune('B') { +- goto l605 ++ if buffer[position] != rune('T') { ++ goto l607 + } + position++ + } + l612: +- goto l570 +- l605: +- position, tokenIndex = position570, tokenIndex570 + { +- position615, tokenIndex615 := position, tokenIndex +- if buffer[position] != rune('l') { +- goto l616 ++ position614, tokenIndex614 := position, tokenIndex ++ if buffer[position] != rune('b') { ++ goto l615 + } + position++ +- goto l615 +- l616: +- position, tokenIndex = position615, tokenIndex615 +- if buffer[position] != rune('L') { +- goto l614 ++ goto l614 ++ l615: ++ position, tokenIndex = position614, tokenIndex614 ++ if buffer[position] != rune('B') { ++ goto l607 + } + position++ + } +- l615: ++ l614: ++ goto l572 ++ l607: ++ position, tokenIndex = position572, tokenIndex572 + { + position617, tokenIndex617 := position, tokenIndex +- if buffer[position] != rune('s') { ++ if buffer[position] != rune('l') { + goto l618 + } + position++ + goto l617 + l618: + position, tokenIndex = position617, tokenIndex617 +- if buffer[position] != rune('S') { +- goto l614 ++ if buffer[position] != rune('L') { ++ goto l616 + } + position++ + } + l617: + { + position619, tokenIndex619 := position, tokenIndex +- if buffer[position] != rune('r') { ++ if buffer[position] != rune('s') { + goto l620 + } + position++ + goto l619 + l620: + position, tokenIndex = position619, tokenIndex619 +- if buffer[position] != rune('R') { +- goto l614 ++ if buffer[position] != rune('S') { ++ goto l616 + } + position++ + } + l619: +- goto l570 +- l614: +- position, tokenIndex = position570, tokenIndex570 + { +- position622, tokenIndex622 := position, tokenIndex ++ position621, tokenIndex621 := position, tokenIndex + if buffer[position] != rune('r') { +- goto l623 ++ goto l622 + } + position++ +- goto l622 +- l623: +- position, tokenIndex = position622, tokenIndex622 ++ goto l621 ++ l622: ++ position, tokenIndex = position621, tokenIndex621 + if buffer[position] != rune('R') { +- goto l621 ++ goto l616 + } + position++ + } +- l622: ++ l621: ++ goto l572 ++ l616: ++ position, tokenIndex = position572, tokenIndex572 + { + position624, tokenIndex624 := position, tokenIndex +- if buffer[position] != rune('o') { ++ if buffer[position] != rune('r') { + goto l625 + } + position++ + goto l624 + l625: + position, tokenIndex = position624, tokenIndex624 +- if buffer[position] != rune('O') { +- goto l621 ++ if buffer[position] != rune('R') { ++ goto l623 + } + position++ + } + l624: + { + position626, tokenIndex626 := position, tokenIndex +- if buffer[position] != rune('r') { ++ if buffer[position] != rune('o') { + goto l627 + } + position++ + goto l626 + l627: + position, tokenIndex = position626, tokenIndex626 +- if buffer[position] != rune('R') { +- goto l621 ++ if buffer[position] != rune('O') { ++ goto l623 + } + position++ + } + l626: +- goto l570 +- l621: +- position, tokenIndex = position570, tokenIndex570 + { + position628, tokenIndex628 := position, tokenIndex +- if buffer[position] != rune('a') { ++ if buffer[position] != rune('r') { + goto l629 + } + position++ + goto l628 + l629: + position, tokenIndex = position628, tokenIndex628 +- if buffer[position] != rune('A') { +- goto l568 ++ if buffer[position] != rune('R') { ++ goto l623 + } + position++ + } + l628: ++ goto l572 ++ l623: ++ position, tokenIndex = position572, tokenIndex572 + { + position630, tokenIndex630 := position, tokenIndex +- if buffer[position] != rune('s') { ++ if buffer[position] != rune('a') { + goto l631 + } + position++ + goto l630 + l631: + position, tokenIndex = position630, tokenIndex630 +- if buffer[position] != rune('S') { +- goto l568 ++ if buffer[position] != rune('A') { ++ goto l570 + } + position++ + } + l630: + { + position632, tokenIndex632 := position, tokenIndex +- if buffer[position] != rune('r') { ++ if buffer[position] != rune('s') { + goto l633 + } + position++ + goto l632 + l633: + position, tokenIndex = position632, tokenIndex632 +- if buffer[position] != rune('R') { +- goto l568 ++ if buffer[position] != rune('S') { ++ goto l570 + } + position++ + } + l632: ++ { ++ position634, tokenIndex634 := position, tokenIndex ++ if buffer[position] != rune('r') { ++ goto l635 ++ } ++ position++ ++ goto l634 ++ l635: ++ position, tokenIndex = position634, tokenIndex634 ++ if buffer[position] != rune('R') { ++ goto l570 ++ } ++ position++ ++ } ++ l634: + } +- l570: ++ l572: + { +- position634, tokenIndex634 := position, tokenIndex ++ position636, tokenIndex636 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l634 ++ goto l636 + } + if buffer[position] != rune('#') { +- goto l634 ++ goto l636 + } + position++ + if !_rules[ruleOffset]() { +- goto l634 ++ goto l636 + } +- goto l635 +- l634: +- position, tokenIndex = position634, tokenIndex634 ++ goto l637 ++ l636: ++ position, tokenIndex = position636, tokenIndex636 + } +- l635: +- add(ruleARMConstantTweak, position569) ++ l637: ++ add(ruleARMConstantTweak, position571) + } + return true +- l568: +- position, tokenIndex = position568, tokenIndex568 ++ l570: ++ position, tokenIndex = position570, tokenIndex570 + return false + }, + /* 41 ARMRegister <- <((('s' / 'S') ('p' / 'P')) / (('x' / 'w' / 'd' / 'q' / 's') [0-9] [0-9]?) / (('x' / 'X') ('z' / 'Z') ('r' / 'R')) / (('w' / 'W') ('z' / 'Z') ('r' / 'R')) / ARMVectorRegister / ('{' WS? ARMVectorRegister (',' WS? ARMVectorRegister)* WS? '}' ('[' [0-9] [0-9]? ']')?))> */ + func() bool { +- position636, tokenIndex636 := position, tokenIndex ++ position638, tokenIndex638 := position, tokenIndex + { +- position637 := position ++ position639 := position + { +- position638, tokenIndex638 := position, tokenIndex ++ position640, tokenIndex640 := position, tokenIndex + { +- position640, tokenIndex640 := position, tokenIndex ++ position642, tokenIndex642 := position, tokenIndex + if buffer[position] != rune('s') { +- goto l641 ++ goto l643 + } + position++ +- goto l640 +- l641: +- position, tokenIndex = position640, tokenIndex640 ++ goto l642 ++ l643: ++ position, tokenIndex = position642, tokenIndex642 + if buffer[position] != rune('S') { +- goto l639 ++ goto l641 + } + position++ + } +- l640: ++ l642: + { +- position642, tokenIndex642 := position, tokenIndex ++ position644, tokenIndex644 := position, tokenIndex + if buffer[position] != rune('p') { +- goto l643 ++ goto l645 + } + position++ +- goto l642 +- l643: +- position, tokenIndex = position642, tokenIndex642 ++ goto l644 ++ l645: ++ position, tokenIndex = position644, tokenIndex644 + if buffer[position] != rune('P') { +- goto l639 ++ goto l641 + } + position++ + } +- l642: +- goto l638 +- l639: +- position, tokenIndex = position638, tokenIndex638 ++ l644: ++ goto l640 ++ l641: ++ position, tokenIndex = position640, tokenIndex640 + { +- position645, tokenIndex645 := position, tokenIndex ++ position647, tokenIndex647 := position, tokenIndex + if buffer[position] != rune('x') { +- goto l646 ++ goto l648 + } + position++ +- goto l645 +- l646: +- position, tokenIndex = position645, tokenIndex645 ++ goto l647 ++ l648: ++ position, tokenIndex = position647, tokenIndex647 + if buffer[position] != rune('w') { +- goto l647 ++ goto l649 + } + position++ +- goto l645 +- l647: +- position, tokenIndex = position645, tokenIndex645 ++ goto l647 ++ l649: ++ position, tokenIndex = position647, tokenIndex647 + if buffer[position] != rune('d') { +- goto l648 ++ goto l650 + } + position++ +- goto l645 +- l648: +- position, tokenIndex = position645, tokenIndex645 ++ goto l647 ++ l650: ++ position, tokenIndex = position647, tokenIndex647 + if buffer[position] != rune('q') { +- goto l649 ++ goto l651 + } + position++ +- goto l645 +- l649: +- position, tokenIndex = position645, tokenIndex645 ++ goto l647 ++ l651: ++ position, tokenIndex = position647, tokenIndex647 + if buffer[position] != rune('s') { +- goto l644 ++ goto l646 + } + position++ + } +- l645: ++ l647: + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l644 ++ goto l646 + } + position++ + { +- position650, tokenIndex650 := position, tokenIndex ++ position652, tokenIndex652 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l650 +- } +- position++ +- goto l651 +- l650: +- position, tokenIndex = position650, tokenIndex650 +- } +- l651: +- goto l638 +- l644: +- position, tokenIndex = position638, tokenIndex638 +- { +- position653, tokenIndex653 := position, tokenIndex +- if buffer[position] != rune('x') { +- goto l654 +- } +- position++ +- goto l653 +- l654: +- position, tokenIndex = position653, tokenIndex653 +- if buffer[position] != rune('X') { + goto l652 + } + position++ ++ goto l653 ++ l652: ++ position, tokenIndex = position652, tokenIndex652 + } + l653: ++ goto l640 ++ l646: ++ position, tokenIndex = position640, tokenIndex640 + { + position655, tokenIndex655 := position, tokenIndex +- if buffer[position] != rune('z') { ++ if buffer[position] != rune('x') { + goto l656 + } + position++ + goto l655 + l656: + position, tokenIndex = position655, tokenIndex655 +- if buffer[position] != rune('Z') { +- goto l652 ++ if buffer[position] != rune('X') { ++ goto l654 + } + position++ + } + l655: + { + position657, tokenIndex657 := position, tokenIndex +- if buffer[position] != rune('r') { ++ if buffer[position] != rune('z') { + goto l658 + } + position++ + goto l657 + l658: + position, tokenIndex = position657, tokenIndex657 +- if buffer[position] != rune('R') { +- goto l652 ++ if buffer[position] != rune('Z') { ++ goto l654 + } + position++ + } + l657: +- goto l638 +- l652: +- position, tokenIndex = position638, tokenIndex638 + { +- position660, tokenIndex660 := position, tokenIndex +- if buffer[position] != rune('w') { +- goto l661 ++ position659, tokenIndex659 := position, tokenIndex ++ if buffer[position] != rune('r') { ++ goto l660 + } + position++ +- goto l660 +- l661: +- position, tokenIndex = position660, tokenIndex660 +- if buffer[position] != rune('W') { +- goto l659 ++ goto l659 ++ l660: ++ position, tokenIndex = position659, tokenIndex659 ++ if buffer[position] != rune('R') { ++ goto l654 + } + position++ + } +- l660: ++ l659: ++ goto l640 ++ l654: ++ position, tokenIndex = position640, tokenIndex640 + { + position662, tokenIndex662 := position, tokenIndex +- if buffer[position] != rune('z') { ++ if buffer[position] != rune('w') { + goto l663 + } + position++ + goto l662 + l663: + position, tokenIndex = position662, tokenIndex662 +- if buffer[position] != rune('Z') { +- goto l659 ++ if buffer[position] != rune('W') { ++ goto l661 + } + position++ + } + l662: + { + position664, tokenIndex664 := position, tokenIndex +- if buffer[position] != rune('r') { ++ if buffer[position] != rune('z') { + goto l665 + } + position++ + goto l664 + l665: + position, tokenIndex = position664, tokenIndex664 +- if buffer[position] != rune('R') { +- goto l659 ++ if buffer[position] != rune('Z') { ++ goto l661 + } + position++ + } + l664: +- goto l638 +- l659: +- position, tokenIndex = position638, tokenIndex638 +- if !_rules[ruleARMVectorRegister]() { ++ { ++ position666, tokenIndex666 := position, tokenIndex ++ if buffer[position] != rune('r') { ++ goto l667 ++ } ++ position++ + goto l666 ++ l667: ++ position, tokenIndex = position666, tokenIndex666 ++ if buffer[position] != rune('R') { ++ goto l661 ++ } ++ position++ + } +- goto l638 + l666: +- position, tokenIndex = position638, tokenIndex638 ++ goto l640 ++ l661: ++ position, tokenIndex = position640, tokenIndex640 ++ if !_rules[ruleARMVectorRegister]() { ++ goto l668 ++ } ++ goto l640 ++ l668: ++ position, tokenIndex = position640, tokenIndex640 + if buffer[position] != rune('{') { +- goto l636 ++ goto l638 + } + position++ + { +- position667, tokenIndex667 := position, tokenIndex ++ position669, tokenIndex669 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l667 ++ goto l669 + } +- goto l668 +- l667: +- position, tokenIndex = position667, tokenIndex667 ++ goto l670 ++ l669: ++ position, tokenIndex = position669, tokenIndex669 + } +- l668: ++ l670: + if !_rules[ruleARMVectorRegister]() { +- goto l636 ++ goto l638 + } +- l669: ++ l671: + { +- position670, tokenIndex670 := position, tokenIndex ++ position672, tokenIndex672 := position, tokenIndex + if buffer[position] != rune(',') { +- goto l670 ++ goto l672 + } + position++ + { +- position671, tokenIndex671 := position, tokenIndex ++ position673, tokenIndex673 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l671 ++ goto l673 + } +- goto l672 +- l671: +- position, tokenIndex = position671, tokenIndex671 ++ goto l674 ++ l673: ++ position, tokenIndex = position673, tokenIndex673 + } +- l672: ++ l674: + if !_rules[ruleARMVectorRegister]() { +- goto l670 ++ goto l672 + } +- goto l669 +- l670: +- position, tokenIndex = position670, tokenIndex670 ++ goto l671 ++ l672: ++ position, tokenIndex = position672, tokenIndex672 + } + { +- position673, tokenIndex673 := position, tokenIndex ++ position675, tokenIndex675 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l673 ++ goto l675 + } +- goto l674 +- l673: +- position, tokenIndex = position673, tokenIndex673 ++ goto l676 ++ l675: ++ position, tokenIndex = position675, tokenIndex675 + } +- l674: ++ l676: + if buffer[position] != rune('}') { +- goto l636 ++ goto l638 + } + position++ + { +- position675, tokenIndex675 := position, tokenIndex ++ position677, tokenIndex677 := position, tokenIndex + if buffer[position] != rune('[') { +- goto l675 ++ goto l677 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l675 ++ goto l677 + } + position++ + { +- position677, tokenIndex677 := position, tokenIndex ++ position679, tokenIndex679 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l677 ++ goto l679 + } + position++ +- goto l678 +- l677: +- position, tokenIndex = position677, tokenIndex677 ++ goto l680 ++ l679: ++ position, tokenIndex = position679, tokenIndex679 + } +- l678: ++ l680: + if buffer[position] != rune(']') { +- goto l675 ++ goto l677 + } + position++ +- goto l676 +- l675: +- position, tokenIndex = position675, tokenIndex675 ++ goto l678 ++ l677: ++ position, tokenIndex = position677, tokenIndex677 + } +- l676: ++ l678: + } +- l638: +- add(ruleARMRegister, position637) ++ l640: ++ add(ruleARMRegister, position639) + } + return true +- l636: +- position, tokenIndex = position636, tokenIndex636 ++ l638: ++ position, tokenIndex = position638, tokenIndex638 + return false + }, + /* 42 ARMVectorRegister <- <(('v' / 'V') [0-9] [0-9]? ('.' [0-9]* ('b' / 's' / 'd' / 'h' / 'q') ('[' [0-9] [0-9]? ']')?)?)> */ + func() bool { +- position679, tokenIndex679 := position, tokenIndex ++ position681, tokenIndex681 := position, tokenIndex + { +- position680 := position ++ position682 := position + { +- position681, tokenIndex681 := position, tokenIndex ++ position683, tokenIndex683 := position, tokenIndex + if buffer[position] != rune('v') { +- goto l682 ++ goto l684 + } + position++ +- goto l681 +- l682: +- position, tokenIndex = position681, tokenIndex681 ++ goto l683 ++ l684: ++ position, tokenIndex = position683, tokenIndex683 + if buffer[position] != rune('V') { +- goto l679 ++ goto l681 + } + position++ + } +- l681: ++ l683: + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l679 ++ goto l681 + } + position++ + { +- position683, tokenIndex683 := position, tokenIndex ++ position685, tokenIndex685 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l683 ++ goto l685 + } + position++ +- goto l684 +- l683: +- position, tokenIndex = position683, tokenIndex683 ++ goto l686 ++ l685: ++ position, tokenIndex = position685, tokenIndex685 + } +- l684: ++ l686: + { +- position685, tokenIndex685 := position, tokenIndex ++ position687, tokenIndex687 := position, tokenIndex + if buffer[position] != rune('.') { +- goto l685 ++ goto l687 + } + position++ +- l687: ++ l689: + { +- position688, tokenIndex688 := position, tokenIndex ++ position690, tokenIndex690 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l688 ++ goto l690 + } + position++ +- goto l687 +- l688: +- position, tokenIndex = position688, tokenIndex688 ++ goto l689 ++ l690: ++ position, tokenIndex = position690, tokenIndex690 + } + { +- position689, tokenIndex689 := position, tokenIndex ++ position691, tokenIndex691 := position, tokenIndex + if buffer[position] != rune('b') { +- goto l690 ++ goto l692 + } + position++ +- goto l689 +- l690: +- position, tokenIndex = position689, tokenIndex689 ++ goto l691 ++ l692: ++ position, tokenIndex = position691, tokenIndex691 + if buffer[position] != rune('s') { +- goto l691 ++ goto l693 + } + position++ +- goto l689 +- l691: +- position, tokenIndex = position689, tokenIndex689 ++ goto l691 ++ l693: ++ position, tokenIndex = position691, tokenIndex691 + if buffer[position] != rune('d') { +- goto l692 ++ goto l694 + } + position++ +- goto l689 +- l692: +- position, tokenIndex = position689, tokenIndex689 ++ goto l691 ++ l694: ++ position, tokenIndex = position691, tokenIndex691 + if buffer[position] != rune('h') { +- goto l693 ++ goto l695 + } + position++ +- goto l689 +- l693: +- position, tokenIndex = position689, tokenIndex689 ++ goto l691 ++ l695: ++ position, tokenIndex = position691, tokenIndex691 + if buffer[position] != rune('q') { +- goto l685 ++ goto l687 + } + position++ + } +- l689: ++ l691: + { +- position694, tokenIndex694 := position, tokenIndex ++ position696, tokenIndex696 := position, tokenIndex + if buffer[position] != rune('[') { +- goto l694 ++ goto l696 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l694 ++ goto l696 + } + position++ + { +- position696, tokenIndex696 := position, tokenIndex ++ position698, tokenIndex698 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l696 ++ goto l698 + } + position++ +- goto l697 +- l696: +- position, tokenIndex = position696, tokenIndex696 ++ goto l699 ++ l698: ++ position, tokenIndex = position698, tokenIndex698 + } +- l697: ++ l699: + if buffer[position] != rune(']') { +- goto l694 ++ goto l696 + } + position++ +- goto l695 +- l694: +- position, tokenIndex = position694, tokenIndex694 ++ goto l697 ++ l696: ++ position, tokenIndex = position696, tokenIndex696 + } +- l695: +- goto l686 +- l685: +- position, tokenIndex = position685, tokenIndex685 ++ l697: ++ goto l688 ++ l687: ++ position, tokenIndex = position687, tokenIndex687 + } +- l686: +- add(ruleARMVectorRegister, position680) ++ l688: ++ add(ruleARMVectorRegister, position682) + } + return true +- l679: +- position, tokenIndex = position679, tokenIndex679 ++ l681: ++ position, tokenIndex = position681, tokenIndex681 + return false + }, + /* 43 MemoryRef <- <((SymbolRef BaseIndexScale) / SymbolRef / Low12BitsSymbolRef / (Offset* BaseIndexScale) / (SegmentRegister Offset BaseIndexScale) / (SegmentRegister BaseIndexScale) / (SegmentRegister Offset) / ARMBaseIndexScale / BaseIndexScale)> */ + func() bool { +- position698, tokenIndex698 := position, tokenIndex ++ position700, tokenIndex700 := position, tokenIndex + { +- position699 := position ++ position701 := position + { +- position700, tokenIndex700 := position, tokenIndex ++ position702, tokenIndex702 := position, tokenIndex + if !_rules[ruleSymbolRef]() { +- goto l701 ++ goto l703 + } + if !_rules[ruleBaseIndexScale]() { +- goto l701 ++ goto l703 + } +- goto l700 +- l701: +- position, tokenIndex = position700, tokenIndex700 ++ goto l702 ++ l703: ++ position, tokenIndex = position702, tokenIndex702 + if !_rules[ruleSymbolRef]() { +- goto l702 ++ goto l704 + } +- goto l700 +- l702: +- position, tokenIndex = position700, tokenIndex700 ++ goto l702 ++ l704: ++ position, tokenIndex = position702, tokenIndex702 + if !_rules[ruleLow12BitsSymbolRef]() { +- goto l703 ++ goto l705 + } +- goto l700 +- l703: +- position, tokenIndex = position700, tokenIndex700 ++ goto l702 + l705: ++ position, tokenIndex = position702, tokenIndex702 ++ l707: + { +- position706, tokenIndex706 := position, tokenIndex ++ position708, tokenIndex708 := position, tokenIndex + if !_rules[ruleOffset]() { +- goto l706 ++ goto l708 + } +- goto l705 +- l706: +- position, tokenIndex = position706, tokenIndex706 ++ goto l707 ++ l708: ++ position, tokenIndex = position708, tokenIndex708 + } + if !_rules[ruleBaseIndexScale]() { +- goto l704 ++ goto l706 + } +- goto l700 +- l704: +- position, tokenIndex = position700, tokenIndex700 ++ goto l702 ++ l706: ++ position, tokenIndex = position702, tokenIndex702 + if !_rules[ruleSegmentRegister]() { +- goto l707 ++ goto l709 + } + if !_rules[ruleOffset]() { +- goto l707 ++ goto l709 + } + if !_rules[ruleBaseIndexScale]() { +- goto l707 ++ goto l709 + } +- goto l700 +- l707: +- position, tokenIndex = position700, tokenIndex700 ++ goto l702 ++ l709: ++ position, tokenIndex = position702, tokenIndex702 + if !_rules[ruleSegmentRegister]() { +- goto l708 ++ goto l710 + } + if !_rules[ruleBaseIndexScale]() { +- goto l708 ++ goto l710 + } +- goto l700 +- l708: +- position, tokenIndex = position700, tokenIndex700 ++ goto l702 ++ l710: ++ position, tokenIndex = position702, tokenIndex702 + if !_rules[ruleSegmentRegister]() { +- goto l709 ++ goto l711 + } + if !_rules[ruleOffset]() { +- goto l709 ++ goto l711 + } +- goto l700 +- l709: +- position, tokenIndex = position700, tokenIndex700 ++ goto l702 ++ l711: ++ position, tokenIndex = position702, tokenIndex702 + if !_rules[ruleARMBaseIndexScale]() { +- goto l710 ++ goto l712 + } +- goto l700 +- l710: +- position, tokenIndex = position700, tokenIndex700 ++ goto l702 ++ l712: ++ position, tokenIndex = position702, tokenIndex702 + if !_rules[ruleBaseIndexScale]() { +- goto l698 ++ goto l700 + } + } +- l700: +- add(ruleMemoryRef, position699) ++ l702: ++ add(ruleMemoryRef, position701) + } + return true +- l698: +- position, tokenIndex = position698, tokenIndex698 ++ l700: ++ position, tokenIndex = position700, tokenIndex700 + return false + }, + /* 44 SymbolRef <- <((Offset* '+')? (LocalSymbol / SymbolName) Offset* ('@' Section Offset*)?)> */ + func() bool { +- position711, tokenIndex711 := position, tokenIndex ++ position713, tokenIndex713 := position, tokenIndex + { +- position712 := position ++ position714 := position + { +- position713, tokenIndex713 := position, tokenIndex +- l715: ++ position715, tokenIndex715 := position, tokenIndex ++ l717: + { +- position716, tokenIndex716 := position, tokenIndex ++ position718, tokenIndex718 := position, tokenIndex + if !_rules[ruleOffset]() { +- goto l716 ++ goto l718 + } +- goto l715 +- l716: +- position, tokenIndex = position716, tokenIndex716 ++ goto l717 ++ l718: ++ position, tokenIndex = position718, tokenIndex718 + } + if buffer[position] != rune('+') { +- goto l713 ++ goto l715 + } + position++ +- goto l714 +- l713: +- position, tokenIndex = position713, tokenIndex713 ++ goto l716 ++ l715: ++ position, tokenIndex = position715, tokenIndex715 + } +- l714: ++ l716: + { +- position717, tokenIndex717 := position, tokenIndex ++ position719, tokenIndex719 := position, tokenIndex + if !_rules[ruleLocalSymbol]() { +- goto l718 ++ goto l720 + } +- goto l717 +- l718: +- position, tokenIndex = position717, tokenIndex717 ++ goto l719 ++ l720: ++ position, tokenIndex = position719, tokenIndex719 + if !_rules[ruleSymbolName]() { +- goto l711 ++ goto l713 + } + } +- l717: + l719: ++ l721: + { +- position720, tokenIndex720 := position, tokenIndex ++ position722, tokenIndex722 := position, tokenIndex + if !_rules[ruleOffset]() { +- goto l720 ++ goto l722 + } +- goto l719 +- l720: +- position, tokenIndex = position720, tokenIndex720 ++ goto l721 ++ l722: ++ position, tokenIndex = position722, tokenIndex722 + } + { +- position721, tokenIndex721 := position, tokenIndex ++ position723, tokenIndex723 := position, tokenIndex + if buffer[position] != rune('@') { +- goto l721 ++ goto l723 + } + position++ + if !_rules[ruleSection]() { +- goto l721 ++ goto l723 + } +- l723: ++ l725: + { +- position724, tokenIndex724 := position, tokenIndex ++ position726, tokenIndex726 := position, tokenIndex + if !_rules[ruleOffset]() { +- goto l724 ++ goto l726 + } +- goto l723 +- l724: +- position, tokenIndex = position724, tokenIndex724 ++ goto l725 ++ l726: ++ position, tokenIndex = position726, tokenIndex726 + } +- goto l722 +- l721: +- position, tokenIndex = position721, tokenIndex721 ++ goto l724 ++ l723: ++ position, tokenIndex = position723, tokenIndex723 + } +- l722: +- add(ruleSymbolRef, position712) ++ l724: ++ add(ruleSymbolRef, position714) + } + return true +- l711: +- position, tokenIndex = position711, tokenIndex711 ++ l713: ++ position, tokenIndex = position713, tokenIndex713 + return false + }, + /* 45 Low12BitsSymbolRef <- <(':' ('l' / 'L') ('o' / 'O') '1' '2' ':' (LocalSymbol / SymbolName) Offset?)> */ + func() bool { +- position725, tokenIndex725 := position, tokenIndex ++ position727, tokenIndex727 := position, tokenIndex + { +- position726 := position ++ position728 := position + if buffer[position] != rune(':') { +- goto l725 ++ goto l727 + } + position++ + { +- position727, tokenIndex727 := position, tokenIndex ++ position729, tokenIndex729 := position, tokenIndex + if buffer[position] != rune('l') { +- goto l728 ++ goto l730 + } + position++ +- goto l727 +- l728: +- position, tokenIndex = position727, tokenIndex727 ++ goto l729 ++ l730: ++ position, tokenIndex = position729, tokenIndex729 + if buffer[position] != rune('L') { +- goto l725 ++ goto l727 + } + position++ + } +- l727: ++ l729: + { +- position729, tokenIndex729 := position, tokenIndex ++ position731, tokenIndex731 := position, tokenIndex + if buffer[position] != rune('o') { +- goto l730 ++ goto l732 + } + position++ +- goto l729 +- l730: +- position, tokenIndex = position729, tokenIndex729 ++ goto l731 ++ l732: ++ position, tokenIndex = position731, tokenIndex731 + if buffer[position] != rune('O') { +- goto l725 ++ goto l727 + } + position++ + } +- l729: ++ l731: + if buffer[position] != rune('1') { +- goto l725 ++ goto l727 + } + position++ + if buffer[position] != rune('2') { +- goto l725 ++ goto l727 + } + position++ + if buffer[position] != rune(':') { +- goto l725 ++ goto l727 + } + position++ + { +- position731, tokenIndex731 := position, tokenIndex ++ position733, tokenIndex733 := position, tokenIndex + if !_rules[ruleLocalSymbol]() { +- goto l732 ++ goto l734 + } +- goto l731 +- l732: +- position, tokenIndex = position731, tokenIndex731 ++ goto l733 ++ l734: ++ position, tokenIndex = position733, tokenIndex733 + if !_rules[ruleSymbolName]() { +- goto l725 ++ goto l727 + } + } +- l731: ++ l733: + { +- position733, tokenIndex733 := position, tokenIndex ++ position735, tokenIndex735 := position, tokenIndex + if !_rules[ruleOffset]() { +- goto l733 ++ goto l735 + } +- goto l734 +- l733: +- position, tokenIndex = position733, tokenIndex733 ++ goto l736 ++ l735: ++ position, tokenIndex = position735, tokenIndex735 + } +- l734: +- add(ruleLow12BitsSymbolRef, position726) ++ l736: ++ add(ruleLow12BitsSymbolRef, position728) + } + return true +- l725: +- position, tokenIndex = position725, tokenIndex725 ++ l727: ++ position, tokenIndex = position727, tokenIndex727 + return false + }, + /* 46 ARMBaseIndexScale <- <('[' ARMRegister (',' WS? (('#' Offset ('*' [0-9]+)?) / ARMGOTLow12 / Low12BitsSymbolRef / ARMRegister) (',' WS? ARMConstantTweak)?)? ']' ARMPostincrement?)> */ + func() bool { +- position735, tokenIndex735 := position, tokenIndex ++ position737, tokenIndex737 := position, tokenIndex + { +- position736 := position ++ position738 := position + if buffer[position] != rune('[') { +- goto l735 ++ goto l737 + } + position++ + if !_rules[ruleARMRegister]() { +- goto l735 ++ goto l737 + } + { +- position737, tokenIndex737 := position, tokenIndex ++ position739, tokenIndex739 := position, tokenIndex + if buffer[position] != rune(',') { +- goto l737 ++ goto l739 + } + position++ + { +- position739, tokenIndex739 := position, tokenIndex ++ position741, tokenIndex741 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l739 ++ goto l741 + } +- goto l740 +- l739: +- position, tokenIndex = position739, tokenIndex739 ++ goto l742 ++ l741: ++ position, tokenIndex = position741, tokenIndex741 + } +- l740: ++ l742: + { +- position741, tokenIndex741 := position, tokenIndex ++ position743, tokenIndex743 := position, tokenIndex + if buffer[position] != rune('#') { +- goto l742 ++ goto l744 + } + position++ + if !_rules[ruleOffset]() { +- goto l742 ++ goto l744 + } + { +- position743, tokenIndex743 := position, tokenIndex ++ position745, tokenIndex745 := position, tokenIndex + if buffer[position] != rune('*') { +- goto l743 ++ goto l745 + } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l743 ++ goto l745 + } + position++ +- l745: ++ l747: + { +- position746, tokenIndex746 := position, tokenIndex ++ position748, tokenIndex748 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l746 ++ goto l748 + } + position++ +- goto l745 +- l746: +- position, tokenIndex = position746, tokenIndex746 ++ goto l747 ++ l748: ++ position, tokenIndex = position748, tokenIndex748 + } +- goto l744 +- l743: +- position, tokenIndex = position743, tokenIndex743 ++ goto l746 ++ l745: ++ position, tokenIndex = position745, tokenIndex745 + } ++ l746: ++ goto l743 + l744: +- goto l741 +- l742: +- position, tokenIndex = position741, tokenIndex741 ++ position, tokenIndex = position743, tokenIndex743 + if !_rules[ruleARMGOTLow12]() { +- goto l747 ++ goto l749 + } +- goto l741 +- l747: +- position, tokenIndex = position741, tokenIndex741 ++ goto l743 ++ l749: ++ position, tokenIndex = position743, tokenIndex743 + if !_rules[ruleLow12BitsSymbolRef]() { +- goto l748 ++ goto l750 + } +- goto l741 +- l748: +- position, tokenIndex = position741, tokenIndex741 ++ goto l743 ++ l750: ++ position, tokenIndex = position743, tokenIndex743 + if !_rules[ruleARMRegister]() { +- goto l737 ++ goto l739 + } + } +- l741: ++ l743: + { +- position749, tokenIndex749 := position, tokenIndex ++ position751, tokenIndex751 := position, tokenIndex + if buffer[position] != rune(',') { +- goto l749 ++ goto l751 + } + position++ + { +- position751, tokenIndex751 := position, tokenIndex ++ position753, tokenIndex753 := position, tokenIndex + if !_rules[ruleWS]() { +- goto l751 ++ goto l753 + } +- goto l752 +- l751: +- position, tokenIndex = position751, tokenIndex751 ++ goto l754 ++ l753: ++ position, tokenIndex = position753, tokenIndex753 + } +- l752: ++ l754: + if !_rules[ruleARMConstantTweak]() { +- goto l749 ++ goto l751 + } +- goto l750 +- l749: +- position, tokenIndex = position749, tokenIndex749 ++ goto l752 ++ l751: ++ position, tokenIndex = position751, tokenIndex751 + } +- l750: +- goto l738 +- l737: +- position, tokenIndex = position737, tokenIndex737 ++ l752: ++ goto l740 ++ l739: ++ position, tokenIndex = position739, tokenIndex739 + } +- l738: ++ l740: + if buffer[position] != rune(']') { +- goto l735 ++ goto l737 + } + position++ + { +- position753, tokenIndex753 := position, tokenIndex ++ position755, tokenIndex755 := position, tokenIndex + if !_rules[ruleARMPostincrement]() { +- goto l753 ++ goto l755 + } +- goto l754 +- l753: +- position, tokenIndex = position753, tokenIndex753 ++ goto l756 ++ l755: ++ position, tokenIndex = position755, tokenIndex755 + } +- l754: +- add(ruleARMBaseIndexScale, position736) ++ l756: ++ add(ruleARMBaseIndexScale, position738) + } + return true +- l735: +- position, tokenIndex = position735, tokenIndex735 ++ l737: ++ position, tokenIndex = position737, tokenIndex737 + return false + }, + /* 47 ARMGOTLow12 <- <(':' ('g' / 'G') ('o' / 'O') ('t' / 'T') '_' ('l' / 'L') ('o' / 'O') '1' '2' ':' SymbolName)> */ + func() bool { +- position755, tokenIndex755 := position, tokenIndex ++ position757, tokenIndex757 := position, tokenIndex + { +- position756 := position ++ position758 := position + if buffer[position] != rune(':') { +- goto l755 +- } +- position++ +- { +- position757, tokenIndex757 := position, tokenIndex +- if buffer[position] != rune('g') { +- goto l758 +- } +- position++ + goto l757 +- l758: +- position, tokenIndex = position757, tokenIndex757 +- if buffer[position] != rune('G') { +- goto l755 +- } +- position++ + } +- l757: ++ position++ + { + position759, tokenIndex759 := position, tokenIndex +- if buffer[position] != rune('o') { ++ if buffer[position] != rune('g') { + goto l760 + } + position++ + goto l759 + l760: + position, tokenIndex = position759, tokenIndex759 +- if buffer[position] != rune('O') { +- goto l755 ++ if buffer[position] != rune('G') { ++ goto l757 + } + position++ + } + l759: + { + position761, tokenIndex761 := position, tokenIndex +- if buffer[position] != rune('t') { ++ if buffer[position] != rune('o') { + goto l762 + } + position++ + goto l761 + l762: + position, tokenIndex = position761, tokenIndex761 +- if buffer[position] != rune('T') { +- goto l755 ++ if buffer[position] != rune('O') { ++ goto l757 + } + position++ + } + l761: +- if buffer[position] != rune('_') { +- goto l755 +- } +- position++ + { + position763, tokenIndex763 := position, tokenIndex +- if buffer[position] != rune('l') { ++ if buffer[position] != rune('t') { + goto l764 + } + position++ + goto l763 + l764: + position, tokenIndex = position763, tokenIndex763 +- if buffer[position] != rune('L') { +- goto l755 ++ if buffer[position] != rune('T') { ++ goto l757 + } + position++ + } + l763: ++ if buffer[position] != rune('_') { ++ goto l757 ++ } ++ position++ + { + position765, tokenIndex765 := position, tokenIndex +- if buffer[position] != rune('o') { ++ if buffer[position] != rune('l') { + goto l766 + } + position++ + goto l765 + l766: + position, tokenIndex = position765, tokenIndex765 +- if buffer[position] != rune('O') { +- goto l755 ++ if buffer[position] != rune('L') { ++ goto l757 + } + position++ + } + l765: ++ { ++ position767, tokenIndex767 := position, tokenIndex ++ if buffer[position] != rune('o') { ++ goto l768 ++ } ++ position++ ++ goto l767 ++ l768: ++ position, tokenIndex = position767, tokenIndex767 ++ if buffer[position] != rune('O') { ++ goto l757 ++ } ++ position++ ++ } ++ l767: + if buffer[position] != rune('1') { +- goto l755 ++ goto l757 + } + position++ + if buffer[position] != rune('2') { +- goto l755 ++ goto l757 + } + position++ + if buffer[position] != rune(':') { +- goto l755 ++ goto l757 + } + position++ + if !_rules[ruleSymbolName]() { +- goto l755 ++ goto l757 + } +- add(ruleARMGOTLow12, position756) ++ add(ruleARMGOTLow12, position758) + } + return true +- l755: +- position, tokenIndex = position755, tokenIndex755 ++ l757: ++ position, tokenIndex = position757, tokenIndex757 + return false + }, + /* 48 ARMPostincrement <- <'!'> */ + func() bool { +- position767, tokenIndex767 := position, tokenIndex ++ position769, tokenIndex769 := position, tokenIndex + { +- position768 := position ++ position770 := position + if buffer[position] != rune('!') { +- goto l767 ++ goto l769 + } + position++ +- add(ruleARMPostincrement, position768) ++ add(ruleARMPostincrement, position770) + } + return true +- l767: +- position, tokenIndex = position767, tokenIndex767 ++ l769: ++ position, tokenIndex = position769, tokenIndex769 + return false + }, + /* 49 BaseIndexScale <- <('(' RegisterOrConstant? WS? (',' WS? RegisterOrConstant WS? (',' [0-9]+)?)? ')')> */ + func() bool { +- position769, tokenIndex769 := position, tokenIndex ++ position771, tokenIndex771 := position, tokenIndex + { +- position770 := position ++ position772 := position + if buffer[position] != rune('(') { +- goto l769 ++ goto l771 + } + position++ +- { +- position771, tokenIndex771 := position, tokenIndex +- if !_rules[ruleRegisterOrConstant]() { +- goto l771 +- } +- goto l772 +- l771: +- position, tokenIndex = position771, tokenIndex771 +- } +- l772: + { + position773, tokenIndex773 := position, tokenIndex +- if !_rules[ruleWS]() { ++ if !_rules[ruleRegisterOrConstant]() { + goto l773 + } + goto l774 +@@ -5991,23 +5988,20 @@ func (p *Asm) Init() { + l774: + { + position775, tokenIndex775 := position, tokenIndex +- if buffer[position] != rune(',') { ++ if !_rules[ruleWS]() { + goto l775 + } +- position++ +- { +- position777, tokenIndex777 := position, tokenIndex +- if !_rules[ruleWS]() { +- goto l777 +- } +- goto l778 +- l777: +- position, tokenIndex = position777, tokenIndex777 +- } +- l778: +- if !_rules[ruleRegisterOrConstant]() { +- goto l775 ++ goto l776 ++ l775: ++ position, tokenIndex = position775, tokenIndex775 ++ } ++ l776: ++ { ++ position777, tokenIndex777 := position, tokenIndex ++ if buffer[position] != rune(',') { ++ goto l777 + } ++ position++ + { + position779, tokenIndex779 := position, tokenIndex + if !_rules[ruleWS]() { +@@ -6018,94 +6012,96 @@ func (p *Asm) Init() { + position, tokenIndex = position779, tokenIndex779 + } + l780: ++ if !_rules[ruleRegisterOrConstant]() { ++ goto l777 ++ } + { + position781, tokenIndex781 := position, tokenIndex +- if buffer[position] != rune(',') { ++ if !_rules[ruleWS]() { + goto l781 + } ++ goto l782 ++ l781: ++ position, tokenIndex = position781, tokenIndex781 ++ } ++ l782: ++ { ++ position783, tokenIndex783 := position, tokenIndex ++ if buffer[position] != rune(',') { ++ goto l783 ++ } + position++ + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l781 ++ goto l783 + } + position++ +- l783: ++ l785: + { +- position784, tokenIndex784 := position, tokenIndex ++ position786, tokenIndex786 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l784 ++ goto l786 + } + position++ +- goto l783 +- l784: +- position, tokenIndex = position784, tokenIndex784 ++ goto l785 ++ l786: ++ position, tokenIndex = position786, tokenIndex786 + } +- goto l782 +- l781: +- position, tokenIndex = position781, tokenIndex781 ++ goto l784 ++ l783: ++ position, tokenIndex = position783, tokenIndex783 + } +- l782: +- goto l776 +- l775: +- position, tokenIndex = position775, tokenIndex775 ++ l784: ++ goto l778 ++ l777: ++ position, tokenIndex = position777, tokenIndex777 + } +- l776: ++ l778: + if buffer[position] != rune(')') { +- goto l769 ++ goto l771 + } + position++ +- add(ruleBaseIndexScale, position770) ++ add(ruleBaseIndexScale, position772) + } + return true +- l769: +- position, tokenIndex = position769, tokenIndex769 ++ l771: ++ position, tokenIndex = position771, tokenIndex771 + return false + }, + /* 50 Operator <- <('+' / '-')> */ + func() bool { +- position785, tokenIndex785 := position, tokenIndex ++ position787, tokenIndex787 := position, tokenIndex + { +- position786 := position ++ position788 := position + { +- position787, tokenIndex787 := position, tokenIndex ++ position789, tokenIndex789 := position, tokenIndex + if buffer[position] != rune('+') { +- goto l788 ++ goto l790 + } + position++ +- goto l787 +- l788: +- position, tokenIndex = position787, tokenIndex787 ++ goto l789 ++ l790: ++ position, tokenIndex = position789, tokenIndex789 + if buffer[position] != rune('-') { +- goto l785 ++ goto l787 + } + position++ + } +- l787: +- add(ruleOperator, position786) ++ l789: ++ add(ruleOperator, position788) + } + return true +- l785: +- position, tokenIndex = position785, tokenIndex785 ++ l787: ++ position, tokenIndex = position787, tokenIndex787 + return false + }, + /* 51 Offset <- <('+'? '-'? (('0' ('b' / 'B') ('0' / '1')+) / ('0' ('x' / 'X') ([0-9] / [0-9] / ([a-f] / [A-F]))+) / [0-9]+))> */ + func() bool { +- position789, tokenIndex789 := position, tokenIndex ++ position791, tokenIndex791 := position, tokenIndex + { +- position790 := position +- { +- position791, tokenIndex791 := position, tokenIndex +- if buffer[position] != rune('+') { +- goto l791 +- } +- position++ +- goto l792 +- l791: +- position, tokenIndex = position791, tokenIndex791 +- } +- l792: ++ position792 := position + { + position793, tokenIndex793 := position, tokenIndex +- if buffer[position] != rune('-') { ++ if buffer[position] != rune('+') { + goto l793 + } + position++ +@@ -6116,284 +6112,295 @@ func (p *Asm) Init() { + l794: + { + position795, tokenIndex795 := position, tokenIndex ++ if buffer[position] != rune('-') { ++ goto l795 ++ } ++ position++ ++ goto l796 ++ l795: ++ position, tokenIndex = position795, tokenIndex795 ++ } ++ l796: ++ { ++ position797, tokenIndex797 := position, tokenIndex + if buffer[position] != rune('0') { +- goto l796 ++ goto l798 + } + position++ + { +- position797, tokenIndex797 := position, tokenIndex ++ position799, tokenIndex799 := position, tokenIndex + if buffer[position] != rune('b') { +- goto l798 ++ goto l800 + } + position++ +- goto l797 +- l798: +- position, tokenIndex = position797, tokenIndex797 ++ goto l799 ++ l800: ++ position, tokenIndex = position799, tokenIndex799 + if buffer[position] != rune('B') { +- goto l796 ++ goto l798 + } + position++ + } +- l797: ++ l799: + { +- position801, tokenIndex801 := position, tokenIndex ++ position803, tokenIndex803 := position, tokenIndex + if buffer[position] != rune('0') { +- goto l802 ++ goto l804 + } + position++ +- goto l801 +- l802: +- position, tokenIndex = position801, tokenIndex801 ++ goto l803 ++ l804: ++ position, tokenIndex = position803, tokenIndex803 + if buffer[position] != rune('1') { +- goto l796 ++ goto l798 + } + position++ + } ++ l803: + l801: +- l799: + { +- position800, tokenIndex800 := position, tokenIndex ++ position802, tokenIndex802 := position, tokenIndex + { +- position803, tokenIndex803 := position, tokenIndex ++ position805, tokenIndex805 := position, tokenIndex + if buffer[position] != rune('0') { +- goto l804 ++ goto l806 + } + position++ +- goto l803 +- l804: +- position, tokenIndex = position803, tokenIndex803 ++ goto l805 ++ l806: ++ position, tokenIndex = position805, tokenIndex805 + if buffer[position] != rune('1') { +- goto l800 ++ goto l802 + } + position++ + } +- l803: +- goto l799 +- l800: +- position, tokenIndex = position800, tokenIndex800 ++ l805: ++ goto l801 ++ l802: ++ position, tokenIndex = position802, tokenIndex802 + } +- goto l795 +- l796: +- position, tokenIndex = position795, tokenIndex795 ++ goto l797 ++ l798: ++ position, tokenIndex = position797, tokenIndex797 + if buffer[position] != rune('0') { +- goto l805 ++ goto l807 + } + position++ + { +- position806, tokenIndex806 := position, tokenIndex ++ position808, tokenIndex808 := position, tokenIndex + if buffer[position] != rune('x') { +- goto l807 ++ goto l809 + } + position++ +- goto l806 +- l807: +- position, tokenIndex = position806, tokenIndex806 ++ goto l808 ++ l809: ++ position, tokenIndex = position808, tokenIndex808 + if buffer[position] != rune('X') { +- goto l805 ++ goto l807 + } + position++ + } +- l806: ++ l808: + { +- position810, tokenIndex810 := position, tokenIndex ++ position812, tokenIndex812 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l811 ++ goto l813 + } + position++ +- goto l810 +- l811: +- position, tokenIndex = position810, tokenIndex810 ++ goto l812 ++ l813: ++ position, tokenIndex = position812, tokenIndex812 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l812 ++ goto l814 + } + position++ +- goto l810 +- l812: +- position, tokenIndex = position810, tokenIndex810 ++ goto l812 ++ l814: ++ position, tokenIndex = position812, tokenIndex812 + { +- position813, tokenIndex813 := position, tokenIndex ++ position815, tokenIndex815 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('f') { +- goto l814 ++ goto l816 + } + position++ +- goto l813 +- l814: +- position, tokenIndex = position813, tokenIndex813 ++ goto l815 ++ l816: ++ position, tokenIndex = position815, tokenIndex815 + if c := buffer[position]; c < rune('A') || c > rune('F') { +- goto l805 ++ goto l807 + } + position++ + } +- l813: ++ l815: + } ++ l812: + l810: +- l808: + { +- position809, tokenIndex809 := position, tokenIndex ++ position811, tokenIndex811 := position, tokenIndex + { +- position815, tokenIndex815 := position, tokenIndex ++ position817, tokenIndex817 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l816 ++ goto l818 + } + position++ +- goto l815 +- l816: +- position, tokenIndex = position815, tokenIndex815 ++ goto l817 ++ l818: ++ position, tokenIndex = position817, tokenIndex817 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l817 ++ goto l819 + } + position++ +- goto l815 +- l817: +- position, tokenIndex = position815, tokenIndex815 ++ goto l817 ++ l819: ++ position, tokenIndex = position817, tokenIndex817 + { +- position818, tokenIndex818 := position, tokenIndex ++ position820, tokenIndex820 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('f') { +- goto l819 ++ goto l821 + } + position++ +- goto l818 +- l819: +- position, tokenIndex = position818, tokenIndex818 ++ goto l820 ++ l821: ++ position, tokenIndex = position820, tokenIndex820 + if c := buffer[position]; c < rune('A') || c > rune('F') { +- goto l809 ++ goto l811 + } + position++ + } +- l818: ++ l820: + } +- l815: +- goto l808 +- l809: +- position, tokenIndex = position809, tokenIndex809 ++ l817: ++ goto l810 ++ l811: ++ position, tokenIndex = position811, tokenIndex811 + } +- goto l795 +- l805: +- position, tokenIndex = position795, tokenIndex795 ++ goto l797 ++ l807: ++ position, tokenIndex = position797, tokenIndex797 + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l789 ++ goto l791 + } + position++ +- l820: ++ l822: + { +- position821, tokenIndex821 := position, tokenIndex ++ position823, tokenIndex823 := position, tokenIndex + if c := buffer[position]; c < rune('0') || c > rune('9') { +- goto l821 ++ goto l823 + } + position++ +- goto l820 +- l821: +- position, tokenIndex = position821, tokenIndex821 ++ goto l822 ++ l823: ++ position, tokenIndex = position823, tokenIndex823 + } + } +- l795: +- add(ruleOffset, position790) ++ l797: ++ add(ruleOffset, position792) + } + return true +- l789: +- position, tokenIndex = position789, tokenIndex789 ++ l791: ++ position, tokenIndex = position791, tokenIndex791 + return false + }, + /* 52 Section <- <([a-z] / [A-Z] / '@')+> */ + func() bool { +- position822, tokenIndex822 := position, tokenIndex ++ position824, tokenIndex824 := position, tokenIndex + { +- position823 := position ++ position825 := position + { +- position826, tokenIndex826 := position, tokenIndex ++ position828, tokenIndex828 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l827 ++ goto l829 + } + position++ +- goto l826 +- l827: +- position, tokenIndex = position826, tokenIndex826 ++ goto l828 ++ l829: ++ position, tokenIndex = position828, tokenIndex828 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l828 ++ goto l830 + } + position++ +- goto l826 +- l828: +- position, tokenIndex = position826, tokenIndex826 ++ goto l828 ++ l830: ++ position, tokenIndex = position828, tokenIndex828 + if buffer[position] != rune('@') { +- goto l822 ++ goto l824 + } + position++ + } ++ l828: + l826: +- l824: + { +- position825, tokenIndex825 := position, tokenIndex ++ position827, tokenIndex827 := position, tokenIndex + { +- position829, tokenIndex829 := position, tokenIndex ++ position831, tokenIndex831 := position, tokenIndex + if c := buffer[position]; c < rune('a') || c > rune('z') { +- goto l830 ++ goto l832 + } + position++ +- goto l829 +- l830: +- position, tokenIndex = position829, tokenIndex829 ++ goto l831 ++ l832: ++ position, tokenIndex = position831, tokenIndex831 + if c := buffer[position]; c < rune('A') || c > rune('Z') { +- goto l831 ++ goto l833 + } + position++ +- goto l829 +- l831: +- position, tokenIndex = position829, tokenIndex829 ++ goto l831 ++ l833: ++ position, tokenIndex = position831, tokenIndex831 + if buffer[position] != rune('@') { +- goto l825 ++ goto l827 + } + position++ + } +- l829: +- goto l824 +- l825: +- position, tokenIndex = position825, tokenIndex825 ++ l831: ++ goto l826 ++ l827: ++ position, tokenIndex = position827, tokenIndex827 + } +- add(ruleSection, position823) ++ add(ruleSection, position825) + } + return true +- l822: +- position, tokenIndex = position822, tokenIndex822 ++ l824: ++ position, tokenIndex = position824, tokenIndex824 + return false + }, + /* 53 SegmentRegister <- <('%' ([c-g] / 's') ('s' ':'))> */ + func() bool { +- position832, tokenIndex832 := position, tokenIndex ++ position834, tokenIndex834 := position, tokenIndex + { +- position833 := position ++ position835 := position + if buffer[position] != rune('%') { +- goto l832 ++ goto l834 + } + position++ + { +- position834, tokenIndex834 := position, tokenIndex ++ position836, tokenIndex836 := position, tokenIndex + if c := buffer[position]; c < rune('c') || c > rune('g') { +- goto l835 ++ goto l837 + } + position++ +- goto l834 +- l835: +- position, tokenIndex = position834, tokenIndex834 ++ goto l836 ++ l837: ++ position, tokenIndex = position836, tokenIndex836 + if buffer[position] != rune('s') { +- goto l832 ++ goto l834 + } + position++ + } +- l834: ++ l836: + if buffer[position] != rune('s') { +- goto l832 ++ goto l834 + } + position++ + if buffer[position] != rune(':') { +- goto l832 ++ goto l834 + } + position++ +- add(ruleSegmentRegister, position833) ++ add(ruleSegmentRegister, position835) + } + return true +- l832: +- position, tokenIndex = position832, tokenIndex832 ++ l834: ++ position, tokenIndex = position834, tokenIndex834 + return false + }, + } +diff --git a/src/util/fipstools/delocate/testdata/x86_64-Basic/in.s b/src/util/fipstools/delocate/testdata/x86_64-Basic/in.s +index c54756b..7e48e27 100644 +--- a/src/util/fipstools/delocate/testdata/x86_64-Basic/in.s ++++ b/src/util/fipstools/delocate/testdata/x86_64-Basic/in.s +@@ -47,3 +47,4 @@ foo: + .L4: .L5: movq %rbx, %rax # This is also legal. + .size foo, .-foo + .type foo, @function ++.uleb128 .foo-1-.bar +diff --git a/src/util/fipstools/delocate/testdata/x86_64-Basic/out.s b/src/util/fipstools/delocate/testdata/x86_64-Basic/out.s +index 23e97c8..a55e852 100644 +--- a/src/util/fipstools/delocate/testdata/x86_64-Basic/out.s ++++ b/src/util/fipstools/delocate/testdata/x86_64-Basic/out.s +@@ -55,6 +55,7 @@ foo: + movq %rbx, %rax # This is also legal. + .size foo, .-foo + .type foo, @function ++.uleb128 .foo-1-.bar + .text + .loc 2 2 0 + BORINGSSL_bcm_text_end: +diff --git a/src/util/fipstools/test-break-kat.sh b/src/util/fipstools/test-break-kat.sh +new file mode 100644 +index 0000000..d2c44a7 +--- /dev/null ++++ b/src/util/fipstools/test-break-kat.sh +@@ -0,0 +1,40 @@ ++# Copyright (c) 2022, Google Inc. ++# ++# Permission to use, copy, modify, and/or distribute this software for any ++# purpose with or without fee is hereby granted, provided that the above ++# copyright notice and this permission notice appear in all copies. ++# ++# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY ++# SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION ++# OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN ++# CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ++ ++# This script attempts to break each of the known KATs and checks that doing so ++# seems to work and at least mentions the correct KAT in the output. ++ ++set -x ++set -e ++ ++TEST_FIPS_BIN="build/util/fipstools/test_fips" ++ ++if [ ! -f $TEST_FIPS_BIN ]; then ++ echo "$TEST_FIPS_BIN is missing. Run this script from the top level of a" ++ echo "BoringSSL checkout and ensure that ./build-fips-break-test-binaries.sh" ++ echo "has been run first." ++ exit 1 ++fi ++ ++KATS=$(go run util/fipstools/break-kat.go --list-tests) ++ ++for kat in $KATS; do ++ go run util/fipstools/break-kat.go $TEST_FIPS_BIN $kat > break-kat-bin ++ chmod u+x ./break-kat-bin ++ if ! (./break-kat-bin 2>&1 || true) | egrep -q "^$kat[^a-zA-Z0-9]"; then ++ echo "Failure for $kat did not mention that name in the output" ++ exit 1 ++ fi ++ rm ./break-kat-bin ++done +diff --git a/src/util/fipstools/test_fips.c b/src/util/fipstools/test_fips.c +new file mode 100644 +index 0000000..b3d5521 +--- /dev/null ++++ b/src/util/fipstools/test_fips.c +@@ -0,0 +1,309 @@ ++/* Copyright (c) 2017, Google Inc. ++ * ++ * Permission to use, copy, modify, and/or distribute this software for any ++ * purpose with or without fee is hereby granted, provided that the above ++ * copyright notice and this permission notice appear in all copies. ++ * ++ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ++ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY ++ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ++ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION ++ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN ++ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ ++ ++/* test_fips exercises various cryptographic primitives for demonstration ++ * purposes in the validation process only. */ ++ ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "../../crypto/fipsmodule/rand/internal.h" ++#include "../../crypto/fipsmodule/tls/internal.h" ++#include "../../crypto/internal.h" ++ ++ ++static void hexdump(const void *a, size_t len) { ++ const unsigned char *in = (const unsigned char *)a; ++ for (size_t i = 0; i < len; i++) { ++ printf("%02x", in[i]); ++ } ++ ++ printf("\n"); ++} ++ ++int main(int argc, char **argv) { ++ CRYPTO_library_init(); ++ ++ static const uint8_t kAESKey[16] = "BoringCrypto Key"; ++ static const uint8_t kPlaintext[64] = ++ "BoringCryptoModule FIPS KAT Encryption and Decryption Plaintext!"; ++ static const DES_cblock kDESKey1 = {"BCMDESK1"}; ++ static const DES_cblock kDESKey2 = {"BCMDESK2"}; ++ static const DES_cblock kDESKey3 = {"BCMDESK3"}; ++ static const DES_cblock kDESIV = {"BCMDESIV"}; ++ static const uint8_t kPlaintextSHA256[32] = { ++ 0x37, 0xbd, 0x70, 0x53, 0x72, 0xfc, 0xd4, 0x03, 0x79, 0x70, 0xfb, ++ 0x06, 0x95, 0xb1, 0x2a, 0x82, 0x48, 0xe1, 0x3e, 0xf2, 0x33, 0xfb, ++ 0xef, 0x29, 0x81, 0x22, 0x45, 0x40, 0x43, 0x70, 0xce, 0x0f}; ++ const uint8_t kDRBGEntropy[48] = ++ "DBRG Initial Entropy "; ++ const uint8_t kDRBGPersonalization[18] = "BCMPersonalization"; ++ const uint8_t kDRBGAD[16] = "BCM DRBG AD "; ++ const uint8_t kDRBGEntropy2[48] = ++ "DBRG Reseed Entropy "; ++ ++ AES_KEY aes_key; ++ uint8_t aes_iv[16]; ++ uint8_t output[256]; ++ ++ /* AES-CBC Encryption */ ++ memset(aes_iv, 0, sizeof(aes_iv)); ++ if (AES_set_encrypt_key(kAESKey, 8 * sizeof(kAESKey), &aes_key) != 0) { ++ printf("AES_set_encrypt_key failed\n"); ++ goto err; ++ } ++ ++ printf("About to AES-CBC encrypt "); ++ hexdump(kPlaintext, sizeof(kPlaintext)); ++ AES_cbc_encrypt(kPlaintext, output, sizeof(kPlaintext), &aes_key, aes_iv, ++ AES_ENCRYPT); ++ printf(" got "); ++ hexdump(output, sizeof(kPlaintext)); ++ ++ /* AES-CBC Decryption */ ++ memset(aes_iv, 0, sizeof(aes_iv)); ++ if (AES_set_decrypt_key(kAESKey, 8 * sizeof(kAESKey), &aes_key) != 0) { ++ printf("AES decrypt failed\n"); ++ goto err; ++ } ++ printf("About to AES-CBC decrypt "); ++ hexdump(output, sizeof(kPlaintext)); ++ AES_cbc_encrypt(output, output, sizeof(kPlaintext), &aes_key, aes_iv, ++ AES_DECRYPT); ++ printf(" got "); ++ hexdump(output, sizeof(kPlaintext)); ++ ++ size_t out_len; ++ uint8_t nonce[EVP_AEAD_MAX_NONCE_LENGTH]; ++ OPENSSL_memset(nonce, 0, sizeof(nonce)); ++ EVP_AEAD_CTX aead_ctx; ++ if (!EVP_AEAD_CTX_init(&aead_ctx, EVP_aead_aes_128_gcm(), kAESKey, ++ sizeof(kAESKey), 0, NULL)) { ++ printf("EVP_AEAD_CTX_init failed\n"); ++ goto err; ++ } ++ ++ /* AES-GCM Encryption */ ++ printf("About to AES-GCM seal "); ++ hexdump(output, sizeof(kPlaintext)); ++ if (!EVP_AEAD_CTX_seal(&aead_ctx, output, &out_len, sizeof(output), nonce, ++ EVP_AEAD_nonce_length(EVP_aead_aes_128_gcm()), ++ kPlaintext, sizeof(kPlaintext), NULL, 0)) { ++ printf("AES-GCM encrypt failed\n"); ++ goto err; ++ } ++ printf(" got "); ++ hexdump(output, out_len); ++ ++ /* AES-GCM Decryption */ ++ printf("About to AES-GCM open "); ++ hexdump(output, out_len); ++ if (!EVP_AEAD_CTX_open(&aead_ctx, output, &out_len, sizeof(output), nonce, ++ EVP_AEAD_nonce_length(EVP_aead_aes_128_gcm()), ++ output, out_len, NULL, 0)) { ++ printf("AES-GCM decrypt failed\n"); ++ goto err; ++ } ++ printf(" got "); ++ hexdump(output, out_len); ++ ++ EVP_AEAD_CTX_cleanup(&aead_ctx); ++ ++ DES_key_schedule des1, des2, des3; ++ DES_cblock des_iv; ++ DES_set_key(&kDESKey1, &des1); ++ DES_set_key(&kDESKey2, &des2); ++ DES_set_key(&kDESKey3, &des3); ++ ++ /* 3DES Encryption */ ++ memcpy(&des_iv, &kDESIV, sizeof(des_iv)); ++ printf("About to 3DES-CBC encrypt "); ++ hexdump(kPlaintext, sizeof(kPlaintext)); ++ DES_ede3_cbc_encrypt(kPlaintext, output, sizeof(kPlaintext), &des1, &des2, ++ &des3, &des_iv, DES_ENCRYPT); ++ printf(" got "); ++ hexdump(output, sizeof(kPlaintext)); ++ ++ /* 3DES Decryption */ ++ memcpy(&des_iv, &kDESIV, sizeof(des_iv)); ++ printf("About to 3DES-CBC decrypt "); ++ hexdump(kPlaintext, sizeof(kPlaintext)); ++ DES_ede3_cbc_encrypt(output, output, sizeof(kPlaintext), &des1, ++ &des2, &des3, &des_iv, DES_DECRYPT); ++ printf(" got "); ++ hexdump(output, sizeof(kPlaintext)); ++ ++ /* SHA-1 */ ++ printf("About to SHA-1 hash "); ++ hexdump(kPlaintext, sizeof(kPlaintext)); ++ SHA1(kPlaintext, sizeof(kPlaintext), output); ++ printf(" got "); ++ hexdump(output, SHA_DIGEST_LENGTH); ++ ++ /* SHA-256 */ ++ printf("About to SHA-256 hash "); ++ hexdump(kPlaintext, sizeof(kPlaintext)); ++ SHA256(kPlaintext, sizeof(kPlaintext), output); ++ printf(" got "); ++ hexdump(output, SHA256_DIGEST_LENGTH); ++ ++ /* SHA-512 */ ++ printf("About to SHA-512 hash "); ++ hexdump(kPlaintext, sizeof(kPlaintext)); ++ SHA512(kPlaintext, sizeof(kPlaintext), output); ++ printf(" got "); ++ hexdump(output, SHA512_DIGEST_LENGTH); ++ ++ RSA *rsa_key = RSA_new(); ++ printf("About to generate RSA key\n"); ++ if (!RSA_generate_key_fips(rsa_key, 2048, NULL)) { ++ printf("RSA_generate_key_fips failed\n"); ++ goto err; ++ } ++ ++ /* RSA Sign */ ++ unsigned sig_len; ++ printf("About to RSA sign "); ++ hexdump(kPlaintextSHA256, sizeof(kPlaintextSHA256)); ++ if (!RSA_sign(NID_sha256, kPlaintextSHA256, sizeof(kPlaintextSHA256), output, ++ &sig_len, rsa_key)) { ++ printf("RSA Sign failed\n"); ++ goto err; ++ } ++ printf(" got "); ++ hexdump(output, sig_len); ++ ++ /* RSA Verify */ ++ printf("About to RSA verify "); ++ hexdump(output, sig_len); ++ if (!RSA_verify(NID_sha256, kPlaintextSHA256, sizeof(kPlaintextSHA256), ++ output, sig_len, rsa_key)) { ++ printf("RSA Verify failed.\n"); ++ goto err; ++ } ++ ++ RSA_free(rsa_key); ++ ++ EC_KEY *ec_key = EC_KEY_new_by_curve_name(NID_X9_62_prime256v1); ++ if (ec_key == NULL) { ++ printf("invalid ECDSA key\n"); ++ goto err; ++ } ++ ++ printf("About to generate P-256 key\n"); ++ if (!EC_KEY_generate_key_fips(ec_key)) { ++ printf("EC_KEY_generate_key_fips failed\n"); ++ goto err; ++ } ++ ++ /* Primitive Z Computation */ ++ const EC_GROUP *const ec_group = EC_KEY_get0_group(ec_key); ++ EC_POINT *z_point = EC_POINT_new(ec_group); ++ uint8_t z_result[65]; ++ printf("About to compute key-agreement Z with P-256:\n"); ++ if (!EC_POINT_mul(ec_group, z_point, NULL, EC_KEY_get0_public_key(ec_key), ++ EC_KEY_get0_private_key(ec_key), NULL) || ++ EC_POINT_point2oct(ec_group, z_point, POINT_CONVERSION_UNCOMPRESSED, ++ z_result, sizeof(z_result), ++ NULL) != sizeof(z_result)) { ++ fprintf(stderr, "EC_POINT_mul failed.\n"); ++ goto err; ++ } ++ EC_POINT_free(z_point); ++ ++ printf(" got "); ++ hexdump(z_result, sizeof(z_result)); ++ ++ /* ECDSA Sign/Verify PWCT */ ++ printf("About to ECDSA sign "); ++ hexdump(kPlaintextSHA256, sizeof(kPlaintextSHA256)); ++ ECDSA_SIG *sig = ++ ECDSA_do_sign(kPlaintextSHA256, sizeof(kPlaintextSHA256), ec_key); ++ if (sig == NULL || ++ !ECDSA_do_verify(kPlaintextSHA256, sizeof(kPlaintextSHA256), sig, ++ ec_key)) { ++ printf("ECDSA Sign/Verify PWCT failed.\n"); ++ goto err; ++ } ++ ++ ECDSA_SIG_free(sig); ++ EC_KEY_free(ec_key); ++ ++ /* DBRG */ ++ CTR_DRBG_STATE drbg; ++ printf("About to seed CTR-DRBG with "); ++ hexdump(kDRBGEntropy, sizeof(kDRBGEntropy)); ++ if (!CTR_DRBG_init(&drbg, kDRBGEntropy, kDRBGPersonalization, ++ sizeof(kDRBGPersonalization)) || ++ !CTR_DRBG_generate(&drbg, output, sizeof(output), kDRBGAD, ++ sizeof(kDRBGAD)) || ++ !CTR_DRBG_reseed(&drbg, kDRBGEntropy2, kDRBGAD, sizeof(kDRBGAD)) || ++ !CTR_DRBG_generate(&drbg, output, sizeof(output), kDRBGAD, ++ sizeof(kDRBGAD))) { ++ printf("DRBG failed\n"); ++ goto err; ++ } ++ printf(" generated "); ++ hexdump(output, sizeof(output)); ++ CTR_DRBG_clear(&drbg); ++ ++ /* TLS KDF */ ++ printf("About to run TLS KDF\n"); ++ uint8_t tls_output[32]; ++ if (!CRYPTO_tls1_prf(EVP_sha256(), tls_output, sizeof(tls_output), kAESKey, ++ sizeof(kAESKey), "foo", 3, kPlaintextSHA256, ++ sizeof(kPlaintextSHA256), kPlaintextSHA256, ++ sizeof(kPlaintextSHA256))) { ++ fprintf(stderr, "TLS KDF failed.\n"); ++ goto err; ++ } ++ printf(" got "); ++ hexdump(tls_output, sizeof(tls_output)); ++ ++ /* FFDH */ ++ printf("About to compute FFDH key-agreement:\n"); ++ DH *dh = DH_get_rfc7919_2048(); ++ uint8_t dh_result[2048/8]; ++ if (!dh || ++ !DH_generate_key(dh) || ++ sizeof(dh_result) != DH_size(dh) || ++ DH_compute_key_padded(dh_result, DH_get0_pub_key(dh), dh) != ++ sizeof(dh_result)) { ++ fprintf(stderr, "FFDH failed.\n"); ++ goto err; ++ } ++ DH_free(dh); ++ ++ printf(" got "); ++ hexdump(dh_result, sizeof(dh_result)); ++ ++ printf("PASS\n"); ++ return 0; ++ ++err: ++ printf("FAIL\n"); ++ abort(); ++} +diff --git a/src/util/generate_build_files.py b/src/util/generate_build_files.py +index 5160805..8cfe44a 100644 +--- a/src/util/generate_build_files.py ++++ b/src/util/generate_build_files.py +@@ -26,15 +26,15 @@ import json + # OS_ARCH_COMBOS maps from OS and platform to the OpenSSL assembly "style" for + # that platform and the extension used by asm files. + OS_ARCH_COMBOS = [ +- ('ios', 'arm', 'ios32', [], 'S'), +- ('ios', 'aarch64', 'ios64', [], 'S'), ++ ('apple', 'arm', 'ios32', [], 'S'), ++ ('apple', 'aarch64', 'ios64', [], 'S'), ++ ('apple', 'x86', 'macosx', ['-fPIC', '-DOPENSSL_IA32_SSE2'], 'S'), ++ ('apple', 'x86_64', 'macosx', [], 'S'), + ('linux', 'arm', 'linux32', [], 'S'), + ('linux', 'aarch64', 'linux64', [], 'S'), + ('linux', 'ppc64le', 'linux64le', [], 'S'), + ('linux', 'x86', 'elf', ['-fPIC', '-DOPENSSL_IA32_SSE2'], 'S'), + ('linux', 'x86_64', 'elf', [], 'S'), +- ('mac', 'x86', 'macosx', ['-fPIC', '-DOPENSSL_IA32_SSE2'], 'S'), +- ('mac', 'x86_64', 'macosx', [], 'S'), + ('win', 'x86', 'win32n', ['-DOPENSSL_IA32_SSE2'], 'asm'), + ('win', 'x86_64', 'nasm', [], 'asm'), + ('win', 'aarch64', 'win64', [], 'S'), +@@ -589,12 +589,8 @@ include_directories(src/include) + asm_files) + + cmake.write( +-R'''if(APPLE AND ARCH STREQUAL "aarch64") +- set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_aarch64_SOURCES}) +-elseif(APPLE AND ARCH STREQUAL "arm") +- set(CRYPTO_ARCH_SOURCES ${CRYPTO_ios_arm_SOURCES}) +-elseif(APPLE) +- set(CRYPTO_ARCH_SOURCES ${CRYPTO_mac_${ARCH}_SOURCES}) ++R'''if(APPLE) ++ set(CRYPTO_ARCH_SOURCES ${CRYPTO_apple_${ARCH}_SOURCES}) + elseif(UNIX) + set(CRYPTO_ARCH_SOURCES ${CRYPTO_linux_${ARCH}_SOURCES}) + elseif(WIN32) +diff --git a/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm b/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm +index 68c74cc..49be6f6 100644 +--- a/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm ++++ b/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm +@@ -31,6 +31,8 @@ $L$SEH_begin_sha256_block_data_order: + mov r9d,DWORD[r11] + mov r10d,DWORD[4+r11] + mov r11d,DWORD[8+r11] ++ test r11d,536870912 ++ jnz NEAR $L$shaext_shortcut + and r9d,1073741824 + and r10d,268435968 + or r10d,r9d +@@ -1794,6 +1796,240 @@ DB 52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 + DB 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 + DB 111,114,103,62,0 + ++ALIGN 64 ++sha256_block_data_order_shaext: ++ mov QWORD[8+rsp],rdi ;WIN64 prologue ++ mov QWORD[16+rsp],rsi ++ mov rax,rsp ++$L$SEH_begin_sha256_block_data_order_shaext: ++ mov rdi,rcx ++ mov rsi,rdx ++ mov rdx,r8 ++ ++ ++ ++$L$shaext_shortcut: ++ lea rsp,[((-88))+rsp] ++ movaps XMMWORD[(-8-80)+rax],xmm6 ++ movaps XMMWORD[(-8-64)+rax],xmm7 ++ movaps XMMWORD[(-8-48)+rax],xmm8 ++ movaps XMMWORD[(-8-32)+rax],xmm9 ++ movaps XMMWORD[(-8-16)+rax],xmm10 ++$L$prologue_shaext: ++ lea rcx,[((K256+128))] ++ movdqu xmm1,XMMWORD[rdi] ++ movdqu xmm2,XMMWORD[16+rdi] ++ movdqa xmm7,XMMWORD[((512-128))+rcx] ++ ++ pshufd xmm0,xmm1,0x1b ++ pshufd xmm1,xmm1,0xb1 ++ pshufd xmm2,xmm2,0x1b ++ movdqa xmm8,xmm7 ++DB 102,15,58,15,202,8 ++ punpcklqdq xmm2,xmm0 ++ jmp NEAR $L$oop_shaext ++ ++ALIGN 16 ++$L$oop_shaext: ++ movdqu xmm3,XMMWORD[rsi] ++ movdqu xmm4,XMMWORD[16+rsi] ++ movdqu xmm5,XMMWORD[32+rsi] ++DB 102,15,56,0,223 ++ movdqu xmm6,XMMWORD[48+rsi] ++ ++ movdqa xmm0,XMMWORD[((0-128))+rcx] ++ paddd xmm0,xmm3 ++DB 102,15,56,0,231 ++ movdqa xmm10,xmm2 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ nop ++ movdqa xmm9,xmm1 ++DB 15,56,203,202 ++ ++ movdqa xmm0,XMMWORD[((32-128))+rcx] ++ paddd xmm0,xmm4 ++DB 102,15,56,0,239 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ lea rsi,[64+rsi] ++DB 15,56,204,220 ++DB 15,56,203,202 ++ ++ movdqa xmm0,XMMWORD[((64-128))+rcx] ++ paddd xmm0,xmm5 ++DB 102,15,56,0,247 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm6 ++DB 102,15,58,15,253,4 ++ nop ++ paddd xmm3,xmm7 ++DB 15,56,204,229 ++DB 15,56,203,202 ++ ++ movdqa xmm0,XMMWORD[((96-128))+rcx] ++ paddd xmm0,xmm6 ++DB 15,56,205,222 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm3 ++DB 102,15,58,15,254,4 ++ nop ++ paddd xmm4,xmm7 ++DB 15,56,204,238 ++DB 15,56,203,202 ++ movdqa xmm0,XMMWORD[((128-128))+rcx] ++ paddd xmm0,xmm3 ++DB 15,56,205,227 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm4 ++DB 102,15,58,15,251,4 ++ nop ++ paddd xmm5,xmm7 ++DB 15,56,204,243 ++DB 15,56,203,202 ++ movdqa xmm0,XMMWORD[((160-128))+rcx] ++ paddd xmm0,xmm4 ++DB 15,56,205,236 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm5 ++DB 102,15,58,15,252,4 ++ nop ++ paddd xmm6,xmm7 ++DB 15,56,204,220 ++DB 15,56,203,202 ++ movdqa xmm0,XMMWORD[((192-128))+rcx] ++ paddd xmm0,xmm5 ++DB 15,56,205,245 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm6 ++DB 102,15,58,15,253,4 ++ nop ++ paddd xmm3,xmm7 ++DB 15,56,204,229 ++DB 15,56,203,202 ++ movdqa xmm0,XMMWORD[((224-128))+rcx] ++ paddd xmm0,xmm6 ++DB 15,56,205,222 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm3 ++DB 102,15,58,15,254,4 ++ nop ++ paddd xmm4,xmm7 ++DB 15,56,204,238 ++DB 15,56,203,202 ++ movdqa xmm0,XMMWORD[((256-128))+rcx] ++ paddd xmm0,xmm3 ++DB 15,56,205,227 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm4 ++DB 102,15,58,15,251,4 ++ nop ++ paddd xmm5,xmm7 ++DB 15,56,204,243 ++DB 15,56,203,202 ++ movdqa xmm0,XMMWORD[((288-128))+rcx] ++ paddd xmm0,xmm4 ++DB 15,56,205,236 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm5 ++DB 102,15,58,15,252,4 ++ nop ++ paddd xmm6,xmm7 ++DB 15,56,204,220 ++DB 15,56,203,202 ++ movdqa xmm0,XMMWORD[((320-128))+rcx] ++ paddd xmm0,xmm5 ++DB 15,56,205,245 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm6 ++DB 102,15,58,15,253,4 ++ nop ++ paddd xmm3,xmm7 ++DB 15,56,204,229 ++DB 15,56,203,202 ++ movdqa xmm0,XMMWORD[((352-128))+rcx] ++ paddd xmm0,xmm6 ++DB 15,56,205,222 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm3 ++DB 102,15,58,15,254,4 ++ nop ++ paddd xmm4,xmm7 ++DB 15,56,204,238 ++DB 15,56,203,202 ++ movdqa xmm0,XMMWORD[((384-128))+rcx] ++ paddd xmm0,xmm3 ++DB 15,56,205,227 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm4 ++DB 102,15,58,15,251,4 ++ nop ++ paddd xmm5,xmm7 ++DB 15,56,204,243 ++DB 15,56,203,202 ++ movdqa xmm0,XMMWORD[((416-128))+rcx] ++ paddd xmm0,xmm4 ++DB 15,56,205,236 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ movdqa xmm7,xmm5 ++DB 102,15,58,15,252,4 ++DB 15,56,203,202 ++ paddd xmm6,xmm7 ++ ++ movdqa xmm0,XMMWORD[((448-128))+rcx] ++ paddd xmm0,xmm5 ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++DB 15,56,205,245 ++ movdqa xmm7,xmm8 ++DB 15,56,203,202 ++ ++ movdqa xmm0,XMMWORD[((480-128))+rcx] ++ paddd xmm0,xmm6 ++ nop ++DB 15,56,203,209 ++ pshufd xmm0,xmm0,0x0e ++ dec rdx ++ nop ++DB 15,56,203,202 ++ ++ paddd xmm2,xmm10 ++ paddd xmm1,xmm9 ++ jnz NEAR $L$oop_shaext ++ ++ pshufd xmm2,xmm2,0xb1 ++ pshufd xmm7,xmm1,0x1b ++ pshufd xmm1,xmm1,0xb1 ++ punpckhqdq xmm1,xmm2 ++DB 102,15,58,15,215,8 ++ ++ movdqu XMMWORD[rdi],xmm1 ++ movdqu XMMWORD[16+rdi],xmm2 ++ movaps xmm6,XMMWORD[((-8-80))+rax] ++ movaps xmm7,XMMWORD[((-8-64))+rax] ++ movaps xmm8,XMMWORD[((-8-48))+rax] ++ movaps xmm9,XMMWORD[((-8-32))+rax] ++ movaps xmm10,XMMWORD[((-8-16))+rax] ++ mov rsp,rax ++$L$epilogue_shaext: ++ mov rdi,QWORD[8+rsp] ;WIN64 epilogue ++ mov rsi,QWORD[16+rsp] ++ DB 0F3h,0C3h ;repret ++ ++$L$SEH_end_sha256_block_data_order_shaext: ++ + ALIGN 64 + sha256_block_data_order_ssse3: + mov QWORD[8+rsp],rdi ;WIN64 prologue +@@ -4115,11 +4351,46 @@ $L$in_prologue: + pop rsi + DB 0F3h,0C3h ;repret + ++ ++ALIGN 16 ++shaext_handler: ++ push rsi ++ push rdi ++ push rbx ++ push rbp ++ push r12 ++ push r13 ++ push r14 ++ push r15 ++ pushfq ++ sub rsp,64 ++ ++ mov rax,QWORD[120+r8] ++ mov rbx,QWORD[248+r8] ++ ++ lea r10,[$L$prologue_shaext] ++ cmp rbx,r10 ++ jb NEAR $L$in_prologue ++ ++ lea r10,[$L$epilogue_shaext] ++ cmp rbx,r10 ++ jae NEAR $L$in_prologue ++ ++ lea rsi,[((-8-80))+rax] ++ lea rdi,[512+r8] ++ mov ecx,10 ++ DD 0xa548f3fc ++ ++ jmp NEAR $L$in_prologue ++ + section .pdata rdata align=4 + ALIGN 4 + DD $L$SEH_begin_sha256_block_data_order wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order wrt ..imagebase ++ DD $L$SEH_begin_sha256_block_data_order_shaext wrt ..imagebase ++ DD $L$SEH_end_sha256_block_data_order_shaext wrt ..imagebase ++ DD $L$SEH_info_sha256_block_data_order_shaext wrt ..imagebase + DD $L$SEH_begin_sha256_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase + DD $L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase +@@ -4132,6 +4403,9 @@ $L$SEH_info_sha256_block_data_order: + DB 9,0,0,0 + DD se_handler wrt ..imagebase + DD $L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase ++$L$SEH_info_sha256_block_data_order_shaext: ++DB 9,0,0,0 ++ DD shaext_handler wrt ..imagebase + $L$SEH_info_sha256_block_data_order_ssse3: + DB 9,0,0,0 + DD se_handler wrt ..imagebase diff --git a/patches/series b/patches/series new file mode 100644 index 0000000..fda5e59 --- /dev/null +++ b/patches/series @@ -0,0 +1,3 @@ +01-Add-new-Arch-ia64-riscv64-sh4-x32.patch +02-sources-mk.patch +Sync-to-81502beeddc5f116d44d0898c.patch diff --git a/rules b/rules new file mode 100755 index 0000000..0301892 --- /dev/null +++ b/rules @@ -0,0 +1,67 @@ +#!/usr/bin/make -f + +include /usr/share/dpkg/architecture.mk +include /usr/share/dpkg/pkg-info.mk + +## Security Hardening +export DEB_BUILD_MAINT_OPTIONS = hardening=+all +export DEB_CFLAGS_MAINT_APPEND = -fPIC +export DEB_CXXFLAGS_MAINT_APPEND = -fPIC +export DEB_LDFLAGS_MAINT_APPEND = -fPIC +export DEB_CPPFLAGS_MAINT_APPEND = -DNDEBUG -UDEBUG \ + -DBORINGSSL_ANDROID_SYSTEM \ + -DBORINGSSL_IMPLEMENTATION \ + -DBORINGSSL_SHARED_LIBRARY \ + -DOPENSSL_SMALL \ + -fmessage-length=0 \ + -fno-exceptions \ + -fno-strict-aliasing \ + -no-canonical-prefixes \ + +ifneq (, $(shell which clang)) + export CC = clang + export CXX = clang++ +endif +ifneq (, $(shell which lld)) + export DEB_LDFLAGS_MAINT_APPEND += -fuse-ld=lld +endif + +%: + dh $@ + +# Since this depends on golang-go, not gccgo, so let's update manually +# Depends: golang-go +update-sources-mk: + dpkg-source --before-build . + python3 src/util/generate_build_files.py eureka + cp eureka.mk debian/sources.mk + +lib%.so: debian/lib%.mk + dh_auto_build --buildsystem=makefile -- --file=$< + +compiler_test: debian/compiler_test.mk + dh_auto_build --buildsystem=makefile -- --file=$< + LD_LIBRARY_PATH=debian/out debian/out/$@ + +%_test: debian/%_test.mk libtest_support.so libcrypto.so libssl.so + dh_auto_build --buildsystem=makefile -- --file=$< + +ifneq ($(DEB_HOST_ARCH), hurd-i386) +override_dh_auto_build-arch: compiler_test tool_test +else +# TODO: tool_test build fails on hurd-i386 currently +override_dh_auto_build-arch: compiler_test +endif + dh_auto_build + +ifeq (,$(findstring nocheck, $(DEB_BUILD_OPTIONS))) +override_dh_auto_test-arch: crypto_test ssl_test +ifneq ($(DEB_HOST_ARCH), hurd-i386) + LD_LIBRARY_PATH=debian/out debian/out/bssl-tool genrsa +endif + LD_LIBRARY_PATH=debian/out debian/out/crypto_test + LD_LIBRARY_PATH=debian/out debian/out/ssl_test +endif + +override_dh_dwz: + dh_dwz || true diff --git a/source/format b/source/format new file mode 100644 index 0000000..46ebe02 --- /dev/null +++ b/source/format @@ -0,0 +1 @@ +3.0 (quilt) \ No newline at end of file diff --git a/source/lintian-overrides b/source/lintian-overrides new file mode 100644 index 0000000..d7c8f41 --- /dev/null +++ b/source/lintian-overrides @@ -0,0 +1,10 @@ +# These are only test files for the ar implementation in Go. +source: source-is-missing [src/util/ar/testdata/linux/bar.cc.o] +source: source-is-missing [src/util/ar/testdata/linux/foo.c.o] +source: source-contains-prebuilt-binary [src/util/ar/testdata/linux/bar.cc.o] +source: source-contains-prebuilt-binary [src/util/ar/testdata/linux/foo.c.o] +source: very-long-line-length-in-source-file + +# Upstream tarball is always android- +# We need to update it manually +source: prefer-uscan-symlink diff --git a/sources.mk b/sources.mk new file mode 100644 index 0000000..79ca685 --- /dev/null +++ b/sources.mk @@ -0,0 +1,459 @@ +# Copyright (C) 2017 The Android Open Source Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file is created by generate_build_files.py. Do not edit manually. + +crypto_sources := \ + err_data.c\ + src/crypto/asn1/a_bitstr.c\ + src/crypto/asn1/a_bool.c\ + src/crypto/asn1/a_d2i_fp.c\ + src/crypto/asn1/a_dup.c\ + src/crypto/asn1/a_enum.c\ + src/crypto/asn1/a_gentm.c\ + src/crypto/asn1/a_i2d_fp.c\ + src/crypto/asn1/a_int.c\ + src/crypto/asn1/a_mbstr.c\ + src/crypto/asn1/a_object.c\ + src/crypto/asn1/a_octet.c\ + src/crypto/asn1/a_print.c\ + src/crypto/asn1/a_strex.c\ + src/crypto/asn1/a_strnid.c\ + src/crypto/asn1/a_time.c\ + src/crypto/asn1/a_type.c\ + src/crypto/asn1/a_utctm.c\ + src/crypto/asn1/a_utf8.c\ + src/crypto/asn1/asn1_lib.c\ + src/crypto/asn1/asn1_par.c\ + src/crypto/asn1/asn_pack.c\ + src/crypto/asn1/f_int.c\ + src/crypto/asn1/f_string.c\ + src/crypto/asn1/tasn_dec.c\ + src/crypto/asn1/tasn_enc.c\ + src/crypto/asn1/tasn_fre.c\ + src/crypto/asn1/tasn_new.c\ + src/crypto/asn1/tasn_typ.c\ + src/crypto/asn1/tasn_utl.c\ + src/crypto/asn1/time_support.c\ + src/crypto/base64/base64.c\ + src/crypto/bio/bio.c\ + src/crypto/bio/bio_mem.c\ + src/crypto/bio/connect.c\ + src/crypto/bio/fd.c\ + src/crypto/bio/file.c\ + src/crypto/bio/hexdump.c\ + src/crypto/bio/pair.c\ + src/crypto/bio/printf.c\ + src/crypto/bio/socket.c\ + src/crypto/bio/socket_helper.c\ + src/crypto/blake2/blake2.c\ + src/crypto/bn_extra/bn_asn1.c\ + src/crypto/bn_extra/convert.c\ + src/crypto/buf/buf.c\ + src/crypto/bytestring/asn1_compat.c\ + src/crypto/bytestring/ber.c\ + src/crypto/bytestring/cbb.c\ + src/crypto/bytestring/cbs.c\ + src/crypto/bytestring/unicode.c\ + src/crypto/chacha/chacha.c\ + src/crypto/cipher_extra/cipher_extra.c\ + src/crypto/cipher_extra/derive_key.c\ + src/crypto/cipher_extra/e_aesccm.c\ + src/crypto/cipher_extra/e_aesctrhmac.c\ + src/crypto/cipher_extra/e_aesgcmsiv.c\ + src/crypto/cipher_extra/e_chacha20poly1305.c\ + src/crypto/cipher_extra/e_des.c\ + src/crypto/cipher_extra/e_null.c\ + src/crypto/cipher_extra/e_rc2.c\ + src/crypto/cipher_extra/e_rc4.c\ + src/crypto/cipher_extra/e_tls.c\ + src/crypto/cipher_extra/tls_cbc.c\ + src/crypto/cmac/cmac.c\ + src/crypto/conf/conf.c\ + src/crypto/cpu_aarch64_apple.c\ + src/crypto/cpu_aarch64_fuchsia.c\ + src/crypto/cpu_aarch64_linux.c\ + src/crypto/cpu_aarch64_win.c\ + src/crypto/cpu_arm.c\ + src/crypto/cpu_arm_linux.c\ + src/crypto/cpu_intel.c\ + src/crypto/cpu_ppc64le.c\ + src/crypto/crypto.c\ + src/crypto/curve25519/curve25519.c\ + src/crypto/curve25519/spake25519.c\ + src/crypto/des/des.c\ + src/crypto/dh_extra/dh_asn1.c\ + src/crypto/dh_extra/params.c\ + src/crypto/digest_extra/digest_extra.c\ + src/crypto/dsa/dsa.c\ + src/crypto/dsa/dsa_asn1.c\ + src/crypto/ec_extra/ec_asn1.c\ + src/crypto/ec_extra/ec_derive.c\ + src/crypto/ec_extra/hash_to_curve.c\ + src/crypto/ecdh_extra/ecdh_extra.c\ + src/crypto/ecdsa_extra/ecdsa_asn1.c\ + src/crypto/engine/engine.c\ + src/crypto/err/err.c\ + src/crypto/evp/digestsign.c\ + src/crypto/evp/evp.c\ + src/crypto/evp/evp_asn1.c\ + src/crypto/evp/evp_ctx.c\ + src/crypto/evp/p_dsa_asn1.c\ + src/crypto/evp/p_ec.c\ + src/crypto/evp/p_ec_asn1.c\ + src/crypto/evp/p_ed25519.c\ + src/crypto/evp/p_ed25519_asn1.c\ + src/crypto/evp/p_rsa.c\ + src/crypto/evp/p_rsa_asn1.c\ + src/crypto/evp/p_x25519.c\ + src/crypto/evp/p_x25519_asn1.c\ + src/crypto/evp/pbkdf.c\ + src/crypto/evp/print.c\ + src/crypto/evp/scrypt.c\ + src/crypto/evp/sign.c\ + src/crypto/ex_data.c\ + src/crypto/fipsmodule/bcm.c\ + src/crypto/fipsmodule/fips_shared_support.c\ + src/crypto/hkdf/hkdf.c\ + src/crypto/hpke/hpke.c\ + src/crypto/hrss/hrss.c\ + src/crypto/lhash/lhash.c\ + src/crypto/mem.c\ + src/crypto/obj/obj.c\ + src/crypto/obj/obj_xref.c\ + src/crypto/pem/pem_all.c\ + src/crypto/pem/pem_info.c\ + src/crypto/pem/pem_lib.c\ + src/crypto/pem/pem_oth.c\ + src/crypto/pem/pem_pk8.c\ + src/crypto/pem/pem_pkey.c\ + src/crypto/pem/pem_x509.c\ + src/crypto/pem/pem_xaux.c\ + src/crypto/pkcs7/pkcs7.c\ + src/crypto/pkcs7/pkcs7_x509.c\ + src/crypto/pkcs8/p5_pbev2.c\ + src/crypto/pkcs8/pkcs8.c\ + src/crypto/pkcs8/pkcs8_x509.c\ + src/crypto/poly1305/poly1305.c\ + src/crypto/poly1305/poly1305_arm.c\ + src/crypto/poly1305/poly1305_vec.c\ + src/crypto/pool/pool.c\ + src/crypto/rand_extra/deterministic.c\ + src/crypto/rand_extra/forkunsafe.c\ + src/crypto/rand_extra/fuchsia.c\ + src/crypto/rand_extra/passive.c\ + src/crypto/rand_extra/rand_extra.c\ + src/crypto/rand_extra/windows.c\ + src/crypto/rc4/rc4.c\ + src/crypto/refcount_c11.c\ + src/crypto/refcount_lock.c\ + src/crypto/rsa_extra/rsa_asn1.c\ + src/crypto/rsa_extra/rsa_print.c\ + src/crypto/siphash/siphash.c\ + src/crypto/stack/stack.c\ + src/crypto/thread.c\ + src/crypto/thread_none.c\ + src/crypto/thread_pthread.c\ + src/crypto/thread_win.c\ + src/crypto/trust_token/pmbtoken.c\ + src/crypto/trust_token/trust_token.c\ + src/crypto/trust_token/voprf.c\ + src/crypto/x509/a_digest.c\ + src/crypto/x509/a_sign.c\ + src/crypto/x509/a_verify.c\ + src/crypto/x509/algorithm.c\ + src/crypto/x509/asn1_gen.c\ + src/crypto/x509/by_dir.c\ + src/crypto/x509/by_file.c\ + src/crypto/x509/i2d_pr.c\ + src/crypto/x509/name_print.c\ + src/crypto/x509/rsa_pss.c\ + src/crypto/x509/t_crl.c\ + src/crypto/x509/t_req.c\ + src/crypto/x509/t_x509.c\ + src/crypto/x509/t_x509a.c\ + src/crypto/x509/x509.c\ + src/crypto/x509/x509_att.c\ + src/crypto/x509/x509_cmp.c\ + src/crypto/x509/x509_d2.c\ + src/crypto/x509/x509_def.c\ + src/crypto/x509/x509_ext.c\ + src/crypto/x509/x509_lu.c\ + src/crypto/x509/x509_obj.c\ + src/crypto/x509/x509_req.c\ + src/crypto/x509/x509_set.c\ + src/crypto/x509/x509_trs.c\ + src/crypto/x509/x509_txt.c\ + src/crypto/x509/x509_v3.c\ + src/crypto/x509/x509_vfy.c\ + src/crypto/x509/x509_vpm.c\ + src/crypto/x509/x509cset.c\ + src/crypto/x509/x509name.c\ + src/crypto/x509/x509rset.c\ + src/crypto/x509/x509spki.c\ + src/crypto/x509/x_algor.c\ + src/crypto/x509/x_all.c\ + src/crypto/x509/x_attrib.c\ + src/crypto/x509/x_crl.c\ + src/crypto/x509/x_exten.c\ + src/crypto/x509/x_info.c\ + src/crypto/x509/x_name.c\ + src/crypto/x509/x_pkey.c\ + src/crypto/x509/x_pubkey.c\ + src/crypto/x509/x_req.c\ + src/crypto/x509/x_sig.c\ + src/crypto/x509/x_spki.c\ + src/crypto/x509/x_val.c\ + src/crypto/x509/x_x509.c\ + src/crypto/x509/x_x509a.c\ + src/crypto/x509v3/pcy_cache.c\ + src/crypto/x509v3/pcy_data.c\ + src/crypto/x509v3/pcy_lib.c\ + src/crypto/x509v3/pcy_map.c\ + src/crypto/x509v3/pcy_node.c\ + src/crypto/x509v3/pcy_tree.c\ + src/crypto/x509v3/v3_akey.c\ + src/crypto/x509v3/v3_akeya.c\ + src/crypto/x509v3/v3_alt.c\ + src/crypto/x509v3/v3_bcons.c\ + src/crypto/x509v3/v3_bitst.c\ + src/crypto/x509v3/v3_conf.c\ + src/crypto/x509v3/v3_cpols.c\ + src/crypto/x509v3/v3_crld.c\ + src/crypto/x509v3/v3_enum.c\ + src/crypto/x509v3/v3_extku.c\ + src/crypto/x509v3/v3_genn.c\ + src/crypto/x509v3/v3_ia5.c\ + src/crypto/x509v3/v3_info.c\ + src/crypto/x509v3/v3_int.c\ + src/crypto/x509v3/v3_lib.c\ + src/crypto/x509v3/v3_ncons.c\ + src/crypto/x509v3/v3_ocsp.c\ + src/crypto/x509v3/v3_pci.c\ + src/crypto/x509v3/v3_pcia.c\ + src/crypto/x509v3/v3_pcons.c\ + src/crypto/x509v3/v3_pmaps.c\ + src/crypto/x509v3/v3_prn.c\ + src/crypto/x509v3/v3_purp.c\ + src/crypto/x509v3/v3_skey.c\ + src/crypto/x509v3/v3_utl.c\ + +ssl_sources := \ + src/ssl/bio_ssl.cc\ + src/ssl/d1_both.cc\ + src/ssl/d1_lib.cc\ + src/ssl/d1_pkt.cc\ + src/ssl/d1_srtp.cc\ + src/ssl/dtls_method.cc\ + src/ssl/dtls_record.cc\ + src/ssl/encrypted_client_hello.cc\ + src/ssl/extensions.cc\ + src/ssl/handoff.cc\ + src/ssl/handshake.cc\ + src/ssl/handshake_client.cc\ + src/ssl/handshake_server.cc\ + src/ssl/s3_both.cc\ + src/ssl/s3_lib.cc\ + src/ssl/s3_pkt.cc\ + src/ssl/ssl_aead_ctx.cc\ + src/ssl/ssl_asn1.cc\ + src/ssl/ssl_buffer.cc\ + src/ssl/ssl_cert.cc\ + src/ssl/ssl_cipher.cc\ + src/ssl/ssl_file.cc\ + src/ssl/ssl_key_share.cc\ + src/ssl/ssl_lib.cc\ + src/ssl/ssl_privkey.cc\ + src/ssl/ssl_session.cc\ + src/ssl/ssl_stat.cc\ + src/ssl/ssl_transcript.cc\ + src/ssl/ssl_versions.cc\ + src/ssl/ssl_x509.cc\ + src/ssl/t1_enc.cc\ + src/ssl/tls13_both.cc\ + src/ssl/tls13_client.cc\ + src/ssl/tls13_enc.cc\ + src/ssl/tls13_server.cc\ + src/ssl/tls_method.cc\ + src/ssl/tls_record.cc\ + +tool_sources := \ + src/tool/args.cc\ + src/tool/ciphers.cc\ + src/tool/client.cc\ + src/tool/const.cc\ + src/tool/digest.cc\ + src/tool/fd.cc\ + src/tool/file.cc\ + src/tool/generate_ech.cc\ + src/tool/generate_ed25519.cc\ + src/tool/genrsa.cc\ + src/tool/pkcs12.cc\ + src/tool/rand.cc\ + src/tool/server.cc\ + src/tool/sign.cc\ + src/tool/speed.cc\ + src/tool/tool.cc\ + src/tool/transport_common.cc\ + +test_support_sources := \ + src/crypto/test/file_test.cc\ + src/crypto/test/malloc.cc\ + src/crypto/test/test_util.cc\ + src/crypto/test/wycheproof_util.cc\ + +crypto_test_sources := \ + crypto_test_data.cc\ + src/crypto/abi_self_test.cc\ + src/crypto/asn1/asn1_test.cc\ + src/crypto/base64/base64_test.cc\ + src/crypto/bio/bio_test.cc\ + src/crypto/blake2/blake2_test.cc\ + src/crypto/buf/buf_test.cc\ + src/crypto/bytestring/bytestring_test.cc\ + src/crypto/chacha/chacha_test.cc\ + src/crypto/cipher_extra/aead_test.cc\ + src/crypto/cipher_extra/cipher_test.cc\ + src/crypto/cmac/cmac_test.cc\ + src/crypto/compiler_test.cc\ + src/crypto/conf/conf_test.cc\ + src/crypto/constant_time_test.cc\ + src/crypto/cpu_arm_linux_test.cc\ + src/crypto/crypto_test.cc\ + src/crypto/curve25519/ed25519_test.cc\ + src/crypto/curve25519/spake25519_test.cc\ + src/crypto/curve25519/x25519_test.cc\ + src/crypto/dh_extra/dh_test.cc\ + src/crypto/digest_extra/digest_test.cc\ + src/crypto/dsa/dsa_test.cc\ + src/crypto/ecdh_extra/ecdh_test.cc\ + src/crypto/err/err_test.cc\ + src/crypto/evp/evp_extra_test.cc\ + src/crypto/evp/evp_test.cc\ + src/crypto/evp/pbkdf_test.cc\ + src/crypto/evp/scrypt_test.cc\ + src/crypto/fipsmodule/aes/aes_test.cc\ + src/crypto/fipsmodule/bn/bn_test.cc\ + src/crypto/fipsmodule/ec/ec_test.cc\ + src/crypto/fipsmodule/ec/p256-x86_64_test.cc\ + src/crypto/fipsmodule/ecdsa/ecdsa_test.cc\ + src/crypto/fipsmodule/md5/md5_test.cc\ + src/crypto/fipsmodule/modes/gcm_test.cc\ + src/crypto/fipsmodule/rand/ctrdrbg_test.cc\ + src/crypto/fipsmodule/rand/fork_detect_test.cc\ + src/crypto/fipsmodule/sha/sha_test.cc\ + src/crypto/hkdf/hkdf_test.cc\ + src/crypto/hmac_extra/hmac_test.cc\ + src/crypto/hpke/hpke_test.cc\ + src/crypto/hrss/hrss_test.cc\ + src/crypto/impl_dispatch_test.cc\ + src/crypto/lhash/lhash_test.cc\ + src/crypto/obj/obj_test.cc\ + src/crypto/pem/pem_test.cc\ + src/crypto/pkcs7/pkcs7_test.cc\ + src/crypto/pkcs8/pkcs12_test.cc\ + src/crypto/pkcs8/pkcs8_test.cc\ + src/crypto/poly1305/poly1305_test.cc\ + src/crypto/pool/pool_test.cc\ + src/crypto/rand_extra/rand_test.cc\ + src/crypto/refcount_test.cc\ + src/crypto/rsa_extra/rsa_test.cc\ + src/crypto/self_test.cc\ + src/crypto/siphash/siphash_test.cc\ + src/crypto/stack/stack_test.cc\ + src/crypto/test/abi_test.cc\ + src/crypto/test/file_test_gtest.cc\ + src/crypto/test/gtest_main.cc\ + src/crypto/thread_test.cc\ + src/crypto/trust_token/trust_token_test.cc\ + src/crypto/x509/x509_test.cc\ + src/crypto/x509/x509_time_test.cc\ + src/crypto/x509v3/tab_test.cc\ + +ssl_test_sources := \ + src/crypto/test/abi_test.cc\ + src/crypto/test/gtest_main.cc\ + src/ssl/span_test.cc\ + src/ssl/ssl_c_test.c\ + src/ssl/ssl_test.cc\ + +linux_aarch64_sources := \ + linux-aarch64/crypto/chacha/chacha-armv8.S\ + linux-aarch64/crypto/fipsmodule/aesv8-armx64.S\ + linux-aarch64/crypto/fipsmodule/armv8-mont.S\ + linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S\ + linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S\ + linux-aarch64/crypto/fipsmodule/sha1-armv8.S\ + linux-aarch64/crypto/fipsmodule/sha256-armv8.S\ + linux-aarch64/crypto/fipsmodule/sha512-armv8.S\ + linux-aarch64/crypto/fipsmodule/vpaes-armv8.S\ + linux-aarch64/crypto/test/trampoline-armv8.S\ + +linux_arm_sources := \ + linux-arm/crypto/chacha/chacha-armv4.S\ + linux-arm/crypto/fipsmodule/aesv8-armx32.S\ + linux-arm/crypto/fipsmodule/armv4-mont.S\ + linux-arm/crypto/fipsmodule/bsaes-armv7.S\ + linux-arm/crypto/fipsmodule/ghash-armv4.S\ + linux-arm/crypto/fipsmodule/ghashv8-armx32.S\ + linux-arm/crypto/fipsmodule/sha1-armv4-large.S\ + linux-arm/crypto/fipsmodule/sha256-armv4.S\ + linux-arm/crypto/fipsmodule/sha512-armv4.S\ + linux-arm/crypto/fipsmodule/vpaes-armv7.S\ + linux-arm/crypto/test/trampoline-armv4.S\ + src/crypto/curve25519/asm/x25519-asm-arm.S\ + src/crypto/poly1305/poly1305_arm_asm.S\ + +linux_ppc64le_sources := \ + linux-ppc64le/crypto/fipsmodule/aesp8-ppc.S\ + linux-ppc64le/crypto/fipsmodule/ghashp8-ppc.S\ + linux-ppc64le/crypto/test/trampoline-ppc.S\ + +linux_x86_sources := \ + linux-x86/crypto/chacha/chacha-x86.S\ + linux-x86/crypto/fipsmodule/aesni-x86.S\ + linux-x86/crypto/fipsmodule/bn-586.S\ + linux-x86/crypto/fipsmodule/co-586.S\ + linux-x86/crypto/fipsmodule/ghash-ssse3-x86.S\ + linux-x86/crypto/fipsmodule/ghash-x86.S\ + linux-x86/crypto/fipsmodule/md5-586.S\ + linux-x86/crypto/fipsmodule/sha1-586.S\ + linux-x86/crypto/fipsmodule/sha256-586.S\ + linux-x86/crypto/fipsmodule/sha512-586.S\ + linux-x86/crypto/fipsmodule/vpaes-x86.S\ + linux-x86/crypto/fipsmodule/x86-mont.S\ + linux-x86/crypto/test/trampoline-x86.S\ + +linux_x86_64_sources := \ + linux-x86_64/crypto/chacha/chacha-x86_64.S\ + linux-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.S\ + linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S\ + linux-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.S\ + linux-x86_64/crypto/fipsmodule/aesni-x86_64.S\ + linux-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.S\ + linux-x86_64/crypto/fipsmodule/ghash-x86_64.S\ + linux-x86_64/crypto/fipsmodule/md5-x86_64.S\ + linux-x86_64/crypto/fipsmodule/p256-x86_64-asm.S\ + linux-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.S\ + linux-x86_64/crypto/fipsmodule/rdrand-x86_64.S\ + linux-x86_64/crypto/fipsmodule/rsaz-avx2.S\ + linux-x86_64/crypto/fipsmodule/sha1-x86_64.S\ + linux-x86_64/crypto/fipsmodule/sha256-x86_64.S\ + linux-x86_64/crypto/fipsmodule/sha512-x86_64.S\ + linux-x86_64/crypto/fipsmodule/vpaes-x86_64.S\ + linux-x86_64/crypto/fipsmodule/x86_64-mont.S\ + linux-x86_64/crypto/fipsmodule/x86_64-mont5.S\ + linux-x86_64/crypto/test/trampoline-x86_64.S\ + src/crypto/hrss/asm/poly_rq_mul.S\ + diff --git a/ssl_test.mk b/ssl_test.mk new file mode 100644 index 0000000..8f92f51 --- /dev/null +++ b/ssl_test.mk @@ -0,0 +1,43 @@ +include debian/sources.mk + +NAME = ssl_test + +SOURCES = $(ssl_test_sources) +SOURCES_C = $(filter %.c,$(SOURCES)) +OBJECTS_C = $(SOURCES_C:.c=.o) +SOURCES_CC = $(filter %.cc,$(SOURCES)) +OBJECTS_CC = $(SOURCES_CC:.cc=.o) + +CFLAGS += -std=gnu11 +CXXFLAGS += -std=gnu++2a +CPPFLAGS += \ + -Isrc/include \ + +LDFLAGS += \ + -Ldebian/out \ + -Wl,-rpath=/usr/lib/$(DEB_HOST_MULTIARCH)/android \ + -lcrypto \ + -lgtest \ + -lpthread \ + -lssl \ + -ltest_support \ + -pie + +ifneq ($(filter mipsel mips64el,$(DEB_HOST_ARCH)),) + LDFLAGS += -Wl,-z,notext +endif + +# -latomic should be the last library specified +# https://github.com/android/ndk/issues/589 +ifeq ($(DEB_HOST_ARCH), armel) + LDFLAGS += -latomic +endif + +build: $(OBJECTS_C) $(OBJECTS_CC) + $(CXX) $^ -o debian/out/$(NAME) $(LDFLAGS) + +$(OBJECTS_C): %.o: %.c + $(CC) -c -o $@ $< $(CFLAGS) $(CPPFLAGS) + +$(OBJECTS_CC): %.o: %.cc + $(CXX) -c -o $@ $< $(CXXFLAGS) $(CPPFLAGS) diff --git a/tests/control b/tests/control new file mode 100644 index 0000000..185dbb3 --- /dev/null +++ b/tests/control @@ -0,0 +1,3 @@ +Test-Command: bssl-tool genrsa +Architecture: armel armhf arm64 amd64 i386 ppc64el mipsel mips64el ia64 kfreebsd-amd64 kfreebsd-i386 riscv64 sh4 x32 +Depends: android-boringssl diff --git a/tool_test.mk b/tool_test.mk new file mode 100644 index 0000000..a75fc40 --- /dev/null +++ b/tool_test.mk @@ -0,0 +1,23 @@ +include debian/sources.mk + +NAME = bssl-tool + +SOURCES = $(tool_sources) +OBJECTS = $(SOURCES:.cc=.o) + +CXXFLAGS += -std=gnu++2a +CPPFLAGS += \ + -Isrc/include \ + +LDFLAGS += \ + -Ldebian/out \ + -Wl,-rpath=/usr/lib/$(DEB_HOST_MULTIARCH)/android \ + -lcrypto \ + -lssl \ + -pie + +build: $(OBJECTS) + $(CXX) $^ -o debian/out/$(NAME) $(LDFLAGS) + +$(OBJECTS): %.o: %.cc + $(CXX) -c -o $@ $< $(CXXFLAGS) $(CPPFLAGS) diff --git a/upstream/metadata b/upstream/metadata new file mode 100644 index 0000000..67dc04b --- /dev/null +++ b/upstream/metadata @@ -0,0 +1,6 @@ +Archive: Android Open Source Project (AOSP) +Bug-Database: https://bugs.chromium.org/p/boringssl/issues/list +Bug-Submit: https://bugs.chromium.org/p/boringssl/issues/entry +Documentation: https://boringssl.googlesource.com/boringssl +Repository-Browse: https://android.googlesource.com/platform/external/boringssl +Security-Contact: https://source.android.com/security/overview/updates-resources#report-issues diff --git a/watch b/watch new file mode 100644 index 0000000..543d834 --- /dev/null +++ b/watch @@ -0,0 +1,9 @@ +# uscan --download-version 12.1.0_r5 + +version=4 +opts="pagemangle=s%\">android-%\.tar\.gz\">android-%g;s%/\+/refs/tags/%/+archive/%g, \ + oversionmangle=s%_r(\d+|\d+\.\d+|\w)%+r$1%, \ + repack, compression=xz, \ + filenamemangle=s%[\w\/\.:]+\+archive\/android-%@PACKAGE@-%" \ +https://android.googlesource.com/platform/external/boringssl/+refs \ + [\w\/]+\+archive\/android-([0-9\.]+_r\d+|\d+\.\d+|\w)\.tar\.gz