From: Brian Neradt Date: Tue, 1 Aug 2023 18:51:44 +0000 (-0500) Subject: Correctly handle encoding for cache hash generation (#10128) X-Git-Tag: archive/raspbian/8.1.7-0+deb10u3+rpi1^2~5 X-Git-Url: https://dgit.raspbian.org/?a=commitdiff_plain;h=1d7eb288c8415f8998bbeba577b9eb70fa405c77;p=trafficserver.git Correctly handle encoding for cache hash generation (#10128) Since origins may treat URL encoded or unencoded paths, query parameters, or fragments differently, we should cache them separately. This updates our URL cache hashing logic to not unencode these components of a URI. Gbp-Pq: Name 0001-Correctly-handle-encoding-for-cache-hash-generation-.patch --- diff --git a/proxy/hdrs/URL.cc b/proxy/hdrs/URL.cc index 9edf1a43..e38df337 100644 --- a/proxy/hdrs/URL.cc +++ b/proxy/hdrs/URL.cc @@ -1776,6 +1776,16 @@ url_CryptoHash_get_general(const URLImpl *url, CryptoContext &ctx, CryptoHash &h while (t < ends[i]) { if ((i == 0) || (i == 6)) { // scheme and host unescape_str_tolower(p, e, t, ends[i], s); + } else if (i == 8 || i == 10 || i == 12) { // path, params, query + // Don't unescape the parts of the URI that are processed by the + // origin since it may behave differently based upon whether these are + // escaped or not. Therefore differently encoded strings should be + // cached separately via differentiated hashes. + int path_len = ends[i] - t; + int min_len = std::min(path_len, static_cast(e - p)); + memcpy(p, t, min_len); + p += min_len; + t += min_len; } else { unescape_str(p, e, t, ends[i], s); } diff --git a/proxy/hdrs/unit_tests/test_URL.cc b/proxy/hdrs/unit_tests/test_URL.cc index b022d068..44f621c7 100644 --- a/proxy/hdrs/unit_tests/test_URL.cc +++ b/proxy/hdrs/unit_tests/test_URL.cc @@ -44,3 +44,104 @@ TEST_CASE("Validate Scheme", "[proxy][validscheme]") } } } + +struct get_hash_test_case { + const std::string description; + const std::string uri_1; + const std::string uri_2; + const bool has_equal_hash; +}; + +constexpr bool HAS_EQUAL_HASH = true; + +// clang-format off +std::vector get_hash_test_cases = { + { + "No encoding: equal hashes", + "http://one.example.com/a/path?name=value#some=value?with_question#fragment", + "http://one.example.com/a/path?name=value#some=value?with_question#fragment", + HAS_EQUAL_HASH, + }, + { + "Scheme encoded: equal hashes", + "http%3C://one.example.com/a/path?name=value#some=value?with_question#fragment", + "http<://one.example.com/a/path?name=value#some=value?with_question#fragment", + HAS_EQUAL_HASH, + }, + { + "Host encoded: equal hashes", + "http://one%2Eexample.com/a/path?name=value#some=value?with_question#fragment", + "http://one.example.com/a/path?name=value#some=value?with_question#fragment", + HAS_EQUAL_HASH, + }, + { + "Path encoded: differing hashes", + "http://one.example.com/a%2Fpath?name=value#some=value?with_question#fragment", + "http://one.example.com/a/path?name=value#some=value?with_question#fragment", + !HAS_EQUAL_HASH, + }, + { + "Query = encoded: differing hashes", + "http://one.example.com/a/path?name%3Dvalue#some=value?with_question#fragment", + "http://one.example.com/a/path?name=value#some=value?with_question#fragment", + !HAS_EQUAL_HASH, + }, + { + "Query internal encoded: differing hashes", + "http://one.example.com/a/path?name=valu%5D#some=value?with_question#fragment", + "http://one.example.com/a/path?name=valu]#some=value?with_question#fragment", + !HAS_EQUAL_HASH, + }, + { + "Fragment encoded: fragment is not part of the hash", + "http://one.example.com/a/path?name=value#some=value?with_question#frag%7Dent", + "http://one.example.com/a/path?name=value#some=value?with_question/frag}ent", + HAS_EQUAL_HASH, + }, + { + "Username encoded: equal hashes", + "mysql://my%7Eser:mypassword@localhost/mydatabase", + "mysql://my~ser:mypassword@localhost/mydatabase", + HAS_EQUAL_HASH, + }, + { + "Password encoded: equal hashes", + "mysql://myuser:mypa%24sword@localhost/mydatabase", + "mysql://myuser:mypa$sword@localhost/mydatabase", + HAS_EQUAL_HASH, + }, +}; + +/** Return the hash related to a URI. + * + * @param[in] uri The URI to hash. + * @return The hash of the URI. + */ +CryptoHash +get_hash(const std::string &uri) +{ + URL url; + HdrHeap *heap = new_HdrHeap(); + url.create(heap); + url.parse(uri.c_str(), uri.length()); + CryptoHash hash; + url.hash_get(&hash); + heap->destroy(); + return hash; +} + +TEST_CASE("UrlHashGet", "[url][hash_get]") +{ + for (auto const &test_case : get_hash_test_cases) { + std::string description = test_case.description + ": " + test_case.uri_1 + " vs " + test_case.uri_2; + SECTION(description) { + CryptoHash hash1 = get_hash(test_case.uri_1); + CryptoHash hash2 = get_hash(test_case.uri_2); + if (test_case.has_equal_hash) { + CHECK(hash1 == hash2); + } else { + CHECK(hash1 != hash2); + } + } + } +}