Remove endian-sensitivity from hash slow path
authorBenjamin Barenblat <bbaren@google.com>
Tue, 9 Feb 2021 19:41:06 +0000 (19:41 +0000)
committerBenjamin Barenblat <bbaren@debian.org>
Tue, 9 Feb 2021 19:41:06 +0000 (19:41 +0000)
Forwarded: yes
Applied-Upstream: https://github.com/abseil/abseil-cpp/commit/9c6a50fdd80bb39fabd95faeda84f04062685ff3

Prior to this commit, the Abseil hash fast path was endian-agnostic, but
the slow path assumed a little-endian platform. Change the slow path to
be endian-correct, ensuring that values produced by the fast and slow
paths are equal even on big-endian systems.

The author works at Google. Upstream applied this patch as Piper revision
355424258 and exported it to GitHub; the Applied-Upstream URL above points to
the exported commit.

Gbp-Pq: Name endian-hash.diff

absl/hash/BUILD.bazel
absl/hash/CMakeLists.txt
absl/hash/internal/hash.h

index 5b1e2d01fd3c91b117b595fa0eb77ef342a80bd2..3531591139565a01a8695d7237716f926d3146e1 100644 (file)
@@ -37,6 +37,7 @@ cc_library(
     linkopts = ABSL_DEFAULT_LINKOPTS,
     deps = [
         ":city",
+        "//absl/base:config",
         "//absl/base:core_headers",
         "//absl/base:endian",
         "//absl/container:fixed_array",
index 61365e9bb5a180027772cb5361296d52a58934c1..25f73c2b2dba3dedb4943e98ef1dbdafcb4869ae 100644 (file)
@@ -25,6 +25,7 @@ absl_cc_library(
   COPTS
     ${ABSL_DEFAULT_COPTS}
   DEPS
+    absl::config
     absl::core_headers
     absl::endian
     absl::fixed_array
index 9e608f7c3c2930ef1b7b2342d153d0ba165bba41..0e99904e517365d8922b71c2db1571019d2d9580 100644 (file)
@@ -38,7 +38,8 @@
 #include <utility>
 #include <vector>
 
-#include "absl/base/internal/endian.h"
+#include "absl/base/config.h"
+#include "absl/base/internal/unaligned_access.h"
 #include "absl/base/port.h"
 #include "absl/container/fixed_array.h"
 #include "absl/meta/type_traits.h"
@@ -804,26 +805,54 @@ class ABSL_DLL CityHashState
                                                size_t len);
 
   // Reads 9 to 16 bytes from p.
-  // The first 8 bytes are in .first, the rest (zero padded) bytes are in
-  // .second.
+  // The least significant 8 bytes are in .first, the rest (zero padded) bytes
+  // are in .second.
   static std::pair<uint64_t, uint64_t> Read9To16(const unsigned char* p,
                                                  size_t len) {
-    uint64_t high = little_endian::Load64(p + len - 8);
-    return {little_endian::Load64(p), high >> (128 - len * 8)};
+    uint64_t low_mem = absl::base_internal::UnalignedLoad64(p);
+    uint64_t high_mem = absl::base_internal::UnalignedLoad64(p + len - 8);
+#ifdef ABSL_IS_LITTLE_ENDIAN
+    uint64_t most_significant = high_mem;
+    uint64_t least_significant = low_mem;
+#else
+    uint64_t most_significant = low_mem;
+    uint64_t least_significant = high_mem;
+#endif
+    return {least_significant, most_significant >> (128 - len * 8)};
   }
 
   // Reads 4 to 8 bytes from p. Zero pads to fill uint64_t.
   static uint64_t Read4To8(const unsigned char* p, size_t len) {
-    return (static_cast<uint64_t>(little_endian::Load32(p + len - 4))
-            << (len - 4) * 8) |
-           little_endian::Load32(p);
+    uint32_t low_mem = absl::base_internal::UnalignedLoad32(p);
+    uint32_t high_mem = absl::base_internal::UnalignedLoad32(p + len - 4);
+#ifdef ABSL_IS_LITTLE_ENDIAN
+    uint32_t most_significant = high_mem;
+    uint32_t least_significant = low_mem;
+#else
+    uint32_t most_significant = low_mem;
+    uint32_t least_significant = high_mem;
+#endif
+    return (static_cast<uint64_t>(most_significant) << (len - 4) * 8) |
+           least_significant;
   }
 
   // Reads 1 to 3 bytes from p. Zero pads to fill uint32_t.
   static uint32_t Read1To3(const unsigned char* p, size_t len) {
-    return static_cast<uint32_t>((p[0]) |                         //
-                                 (p[len / 2] << (len / 2 * 8)) |  //
-                                 (p[len - 1] << ((len - 1) * 8)));
+    unsigned char mem0 = p[0];
+    unsigned char mem1 = p[len / 2];
+    unsigned char mem2 = p[len - 1];
+#ifdef ABSL_IS_LITTLE_ENDIAN
+    unsigned char significant2 = mem2;
+    unsigned char significant1 = mem1;
+    unsigned char significant0 = mem0;
+#else
+    unsigned char significant2 = mem0;
+    unsigned char significant1 = mem1;
+    unsigned char significant0 = mem2;
+#endif
+    return static_cast<uint32_t>(significant0 |                     //
+                                 (significant1 << (len / 2 * 8)) |  //
+                                 (significant2 << ((len - 1) * 8)));
   }
 
   ABSL_ATTRIBUTE_ALWAYS_INLINE static uint64_t Mix(uint64_t state, uint64_t v) {