Correctly handle encoding for cache hash generation (#10128)
authorBrian Neradt <brian.neradt@gmail.com>
Tue, 1 Aug 2023 18:51:44 +0000 (13:51 -0500)
committerAdrian Bunk <bunk@debian.org>
Sat, 30 Sep 2023 14:35:12 +0000 (15:35 +0100)
Since origins may treat URL encoded or unencoded paths, query
parameters, or fragments differently, we should cache them separately.
This updates our URL cache hashing logic to not unencode these
components of a URI.

Gbp-Pq: Name 0001-Correctly-handle-encoding-for-cache-hash-generation-.patch

proxy/hdrs/URL.cc
proxy/hdrs/unit_tests/test_URL.cc

index 9edf1a43ccf0cfa1e9254f2814052204d697826c..e38df3379ba12bb655c7dd15be191cb00d15d42b 100644 (file)
@@ -1776,6 +1776,16 @@ url_CryptoHash_get_general(const URLImpl *url, CryptoContext &ctx, CryptoHash &h
       while (t < ends[i]) {
         if ((i == 0) || (i == 6)) { // scheme and host
           unescape_str_tolower(p, e, t, ends[i], s);
+        } else if (i == 8 || i == 10 || i == 12) { // path, params, query
+          // Don't unescape the parts of the URI that are processed by the
+          // origin since it may behave differently based upon whether these are
+          // escaped or not. Therefore differently encoded strings should be
+          // cached separately via differentiated hashes.
+          int path_len = ends[i] - t;
+          int min_len  = std::min(path_len, static_cast<int>(e - p));
+          memcpy(p, t, min_len);
+          p += min_len;
+          t += min_len;
         } else {
           unescape_str(p, e, t, ends[i], s);
         }
index b022d068e409ce27f0fbf01cb3f499a6aa80cf8a..44f621c76f7f910f838fae9fb739d3f850e94d73 100644 (file)
@@ -44,3 +44,104 @@ TEST_CASE("Validate Scheme", "[proxy][validscheme]")
     }
   }
 }
+
+struct get_hash_test_case {
+  const std::string description;
+  const std::string uri_1;
+  const std::string uri_2;
+  const bool has_equal_hash;
+};
+
+constexpr bool HAS_EQUAL_HASH = true;
+
+// clang-format off
+std::vector<get_hash_test_case> get_hash_test_cases = {
+  {
+    "No encoding: equal hashes",
+    "http://one.example.com/a/path?name=value#some=value?with_question#fragment",
+    "http://one.example.com/a/path?name=value#some=value?with_question#fragment",
+    HAS_EQUAL_HASH,
+  },
+  {
+    "Scheme encoded: equal hashes",
+    "http%3C://one.example.com/a/path?name=value#some=value?with_question#fragment",
+    "http<://one.example.com/a/path?name=value#some=value?with_question#fragment",
+    HAS_EQUAL_HASH,
+  },
+  {
+    "Host encoded: equal hashes",
+    "http://one%2Eexample.com/a/path?name=value#some=value?with_question#fragment",
+    "http://one.example.com/a/path?name=value#some=value?with_question#fragment",
+    HAS_EQUAL_HASH,
+  },
+  {
+    "Path encoded: differing hashes",
+    "http://one.example.com/a%2Fpath?name=value#some=value?with_question#fragment",
+    "http://one.example.com/a/path?name=value#some=value?with_question#fragment",
+    !HAS_EQUAL_HASH,
+  },
+  {
+    "Query = encoded: differing hashes",
+    "http://one.example.com/a/path?name%3Dvalue#some=value?with_question#fragment",
+    "http://one.example.com/a/path?name=value#some=value?with_question#fragment",
+    !HAS_EQUAL_HASH,
+  },
+  {
+    "Query internal encoded: differing hashes",
+    "http://one.example.com/a/path?name=valu%5D#some=value?with_question#fragment",
+    "http://one.example.com/a/path?name=valu]#some=value?with_question#fragment",
+    !HAS_EQUAL_HASH,
+  },
+  {
+    "Fragment encoded: fragment is not part of the hash",
+    "http://one.example.com/a/path?name=value#some=value?with_question#frag%7Dent",
+    "http://one.example.com/a/path?name=value#some=value?with_question/frag}ent",
+    HAS_EQUAL_HASH,
+  },
+  {
+    "Username encoded: equal hashes",
+    "mysql://my%7Eser:mypassword@localhost/mydatabase",
+    "mysql://my~ser:mypassword@localhost/mydatabase",
+    HAS_EQUAL_HASH,
+  },
+  {
+    "Password encoded: equal hashes",
+    "mysql://myuser:mypa%24sword@localhost/mydatabase",
+    "mysql://myuser:mypa$sword@localhost/mydatabase",
+    HAS_EQUAL_HASH,
+  },
+};
+
+/** Return the hash related to a URI.
+  *
+  * @param[in] uri The URI to hash.
+  * @return The hash of the URI.
+ */
+CryptoHash
+get_hash(const std::string &uri)
+{
+  URL url;
+  HdrHeap *heap = new_HdrHeap();
+  url.create(heap);
+  url.parse(uri.c_str(), uri.length());
+  CryptoHash hash;
+  url.hash_get(&hash);
+  heap->destroy();
+  return hash;
+}
+
+TEST_CASE("UrlHashGet", "[url][hash_get]")
+{
+  for (auto const &test_case : get_hash_test_cases) {
+    std::string description = test_case.description + ": " + test_case.uri_1 + " vs " + test_case.uri_2;
+    SECTION(description) {
+      CryptoHash hash1 = get_hash(test_case.uri_1);
+      CryptoHash hash2 = get_hash(test_case.uri_2);
+      if (test_case.has_equal_hash) {
+        CHECK(hash1 == hash2);
+      } else {
+        CHECK(hash1 != hash2);
+      }
+    }
+  }
+}