Fix hashing on big endian platforms (#1028)
authorMilad Fa <46688537+miladfarca@users.noreply.github.com>
Mon, 7 Feb 2022 16:54:21 +0000 (16:54 +0000)
committerBenjamin Barenblat <bbaren@debian.org>
Mon, 7 Feb 2022 16:54:21 +0000 (16:54 +0000)
Forwarded: https://github.com/abseil/abseil-cpp/pull/1028
Origin: backport, https://github.com/abseil/abseil-cpp/commit/ae0f4c266095c9003786cd571bc1fb72544104a1
Bug-Debian: https://bugs.debian.org/977638

Avoid using libstdc++'s implementation of std::hash<std::bitset> and
std::hash<std::vector> on big endian platforms in the implementation
of absl::Hash.

This is a workaround for a buggy implementation that results in many
collisions.

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102531
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98731

Gbp-Pq: Name big-endian-hash2.diff

absl/hash/internal/hash.h
absl/hash/internal/wyhash_test.cc

index 7fb0af0b969a44620ac436a10427b064233d11bf..8747137e8ef5b508fde3044169b42b3e922d301c 100644 (file)
@@ -21,6 +21,7 @@
 
 #include <algorithm>
 #include <array>
+#include <bitset>
 #include <cmath>
 #include <cstring>
 #include <deque>
@@ -489,8 +490,9 @@ typename std::enable_if<is_hashable<T>::value, H>::type AbslHashValue(
 
 // AbslHashValue for hashing std::vector
 //
-// Do not use this for vector<bool>. It does not have a .data(), and a fallback
-// for std::hash<> is most likely faster.
+// Do not use this for vector<bool> on platforms that have a working
+// implementation of std::hash. It does not have a .data(), and a fallback for
+// std::hash<> is most likely faster.
 template <typename H, typename T, typename Allocator>
 typename std::enable_if<is_hashable<T>::value && !std::is_same<T, bool>::value,
                         H>::type
@@ -500,6 +502,27 @@ AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) {
                     vector.size());
 }
 
+#if defined(ABSL_IS_BIG_ENDIAN) && \
+    (defined(__GLIBCXX__) || defined(__GLIBCPP__))
+// AbslHashValue for hashing std::vector<bool>
+//
+// std::hash in libstdc++ does not work correctly with vector<bool> on Big
+// Endian platforms therefore we need to implement a custom AbslHashValue for
+// it. More details on the bug:
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102531
+template <typename H, typename T, typename Allocator>
+typename std::enable_if<is_hashable<T>::value && std::is_same<T, bool>::value,
+                        H>::type
+AbslHashValue(H hash_state, const std::vector<T, Allocator>& vector) {
+  typename H::AbslInternalPiecewiseCombiner combiner;
+  for (const auto& i : vector) {
+    unsigned char c = static_cast<unsigned char>(i);
+    hash_state = combiner.add_buffer(std::move(hash_state), &c, sizeof(c));
+  }
+  return H::combine(combiner.finalize(std::move(hash_state)), vector.size());
+}
+#endif
+
 // -----------------------------------------------------------------------------
 // AbslHashValue for Ordered Associative Containers
 // -----------------------------------------------------------------------------
@@ -592,9 +615,28 @@ AbslHashValue(H hash_state, const absl::variant<T...>& v) {
 // AbslHashValue for Other Types
 // -----------------------------------------------------------------------------
 
-// AbslHashValue for hashing std::bitset is not defined, for the same reason as
-// for vector<bool> (see std::vector above): It does not expose the raw bytes,
-// and a fallback to std::hash<> is most likely faster.
+// AbslHashValue for hashing std::bitset is not defined on Little Endian
+// platforms, for the same reason as for vector<bool> (see std::vector above):
+// It does not expose the raw bytes, and a fallback to std::hash<> is most
+// likely faster.
+
+#if defined(ABSL_IS_BIG_ENDIAN) && \
+    (defined(__GLIBCXX__) || defined(__GLIBCPP__))
+// AbslHashValue for hashing std::bitset
+//
+// std::hash in libstdc++ does not work correctly with std::bitset on Big Endian
+// platforms therefore we need to implement a custom AbslHashValue for it. More
+// details on the bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102531
+template <typename H, size_t N>
+H AbslHashValue(H hash_state, const std::bitset<N>& set) {
+  typename H::AbslInternalPiecewiseCombiner combiner;
+  for (int i = 0; i < N; i++) {
+    unsigned char c = static_cast<unsigned char>(set[i]);
+    hash_state = combiner.add_buffer(std::move(hash_state), &c, sizeof(c));
+  }
+  return H::combine(combiner.finalize(std::move(hash_state)), N);
+}
+#endif
 
 // -----------------------------------------------------------------------------
 
index 9e2d106b391b32d629312433f0dd7b5f612062c5..67179247106c4521fe13d7e35c6e8d6a7c717356 100644 (file)
@@ -404,6 +404,55 @@ TEST(WyhashTest, VerifyGolden) {
        uint64_t{0xc9ae5c8759b4877a}},
   };
 
+#if defined(ABSL_IS_BIG_ENDIAN)
+  constexpr uint64_t kGolden[kNumGoldenOutputs] = {
+      0xe5a40d39ab796423, 0x1766974bf7527d81, 0x5c3bbbe230db17a8,
+      0xa6630143a7e6aa6f, 0x17645cb7318b86b,  0x218b175f30ba61f8,
+      0xa6564b468248c683, 0xef192f401b116e1c, 0xbe8dc0c54617639d,
+      0xe7b01610fc22dbb8, 0x99d9f694404af913, 0xf4eecd37464b45c5,
+      0x7d2c653d63596d9b, 0x3f15c8544ec5393a, 0x6b9dc0c1704f796c,
+      0xf1ded7a7eae5ed5a, 0x2db2fd7c6dd4641b, 0x151ca2d3d4cd33ab,
+      0xa5af5994ac2ccd64, 0x2b2a4ca3191d2fce, 0xf89e68c9364e7c05,
+      0x71724c70b799c21,  0x70536fabfd157369, 0xdee92794c3c3082b,
+      0xac033a6743d3b3eb, 0xed2956b506cd5151, 0xbd669644755264b6,
+      0x6ab1ff5d5f549a63, 0xf6bd551a2e3e04e,  0x7b5a8cef6875ea73,
+      0x22bccf4d4db0a91c, 0x4f2bc07754c7c7eb, 0xfb6b8342a86725db,
+      0x13a1a0d4c5854da,  0x5f6e44655f7dedac, 0x54a9198dff2bdf85,
+      0xdb17e6915d4e4042, 0xa69926cf5c3b89f,  0xf77f031bfd74c096,
+      0x1d6f916fdd50ec3c, 0x334ac76013ade393, 0x99370f899111de15,
+      0x352457a03ada6de,  0x341974d4f42d854d, 0xda89ab02872aeb5,
+      0x6ec2b74e143b10d9, 0x6f284c0b5cd60522, 0xf9670de353438f88,
+      0xde920913adf0a2b4, 0xb7a07d7c0c17a8ec, 0x879a69f558ba3a98,
+      0x360cf6d802df20f9, 0x53530f8046673738, 0xbd8f5f2bcf35e483,
+      0x3f171f047144b983, 0x644d04e820823465, 0x50e44773a20b2702,
+      0xe584ed4c05c745dd, 0x9a825c85b95ab6c0, 0xbce2931deb74e775,
+      0x10468e9e705c7cfe, 0x12e01de3104141e2, 0x5c11ae2ee3713abd,
+      0x6ac5ffb0860319e6, 0xc1e6da1849d30fc9, 0xa0e4d247a458b447,
+      0x4530d4615c32b89b, 0x116aa09107a76505, 0xf941339d00d9bb73,
+      0x573a0fc1615afb33, 0xa975c81dc868b258, 0x3ab2c5250ab54bda,
+      0x37f99f208a3e3b11, 0x4b49b0ff706689d,  0x30bafa0b8f0a87fe,
+      0xea6787a65cc20cdd, 0x55861729f1fc3ab8, 0xea38e009c5be9b72,
+      0xcb8522cba33c3c66, 0x352e77653fe306f3, 0xe0bb760793bac064,
+      0xf66ec59322662956, 0x637aa320455d56f8, 0x46ee546be5824a89,
+      0x9e6842421e83d8a4, 0xf98ac2bc96b9fb8c, 0xf2c1002fd9a70b99,
+      0x4c2b62b1e39e9405, 0x3248555fa3ade9c4, 0xd4d04c37f6417c21,
+      0xf40cd506b1bf5653, 0x6c45d6005c760d2f, 0x61d88a7e61ff0d7e,
+      0x131591e8a53cc967, 0xdae85cb9bc29bab6, 0xe98835334905e626,
+      0x7cce50a2b66b8754, 0x5b0b3d0c5ac498ae, 0xd35a218c974d1756,
+      0xfce436ddc1d003c,  0xd183901de90bb741, 0x9378f8f34974a66,
+      0x21f11ae0a0402368, 0xf2fbd7c94ef89cb6, 0xc329c69d0f0d080b,
+      0xf2841cba16216a61, 0x47aba97b44916df1, 0x724d4e00a8019fcf,
+      0x2df9005c2a728d63, 0xc788892a1a5d7515, 0x9e993a65f9df0480,
+      0x76876721ff49f969, 0xbe7a796cfba15bf5, 0xa4c8bd54586f5488,
+      0xb390a325275501ab, 0x893f11317427ccf1, 0x92f2bb57da5695b9,
+      0x30985b90da88269f, 0x2c690e268e086de8, 0x1c02df6097997196,
+      0x1f9778f8bbdf6455, 0x7d57378c7bf8416d, 0xba8582a5f8d84d38,
+      0xe8ca43b85050be4e, 0x5048cf6bed8a5d9f, 0xfbc5ba80917d0ea4,
+      0x8011026525bf1691, 0x26b8dc6aed9fb50d, 0x191f5bfee77c1fe3,
+      0xdd497891465a2cc1, 0x6f1fe8c57a33072e, 0x2c9f4ec078c460c0,
+      0x9a725bde8f6a1437, 0x6ce545fa3ef61e4d,
+  };
+#else
   constexpr uint64_t kGolden[kNumGoldenOutputs] = {
       0xe5a40d39ab796423, 0x1766974bf7527d81, 0x5c3bbbe230db17a8,
       0xa6630143a7e6aa6f, 0x8787cb2d04b0c984, 0x33603654ff574ac2,
@@ -451,6 +500,7 @@ TEST(WyhashTest, VerifyGolden) {
       0xecf3de1acd04651f, 0xcc0a40552559ff32, 0xc385c374f20315b1,
       0xb90208a4c7234183, 0x58aa1ca7a4c075d9,
   };
+#endif
 
 #if UPDATE_GOLDEN
   (void)kGolden;  // Silence warning.