[PATCH] [3.9] bpo-43882 Remove the newline, and tab early. From query and fragments...
authorSenthil Kumaran <senthil@uthcode.com>
Mon, 3 May 2021 19:08:59 +0000 (12:08 -0700)
committerSean Whitton <spwhitton@spwhitton.name>
Thu, 20 Mar 2025 02:07:39 +0000 (10:07 +0800)
* Remove the newline, and tab early. From query and fragments.

Gbp-Pq: Name CVE-2022-0391-2.patch

Lib/test/test_urlparse.py
Lib/urllib/parse.py

index 306f68713e1d56f960847cdf0f180d2743f9ef24..4a78159c598678fb3174cc9676ff02807f3822da 100644 (file)
@@ -673,32 +673,40 @@ class UrlParseTestCase(unittest.TestCase):
 
     def test_urlsplit_remove_unsafe_bytes(self):
         # Remove ASCII tabs and newlines from input
-        url = "http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
+        url = "http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
         p = urllib.parse.urlsplit(url)
         self.assertEqual(p.scheme, "http")
         self.assertEqual(p.netloc, "www.python.org")
         self.assertEqual(p.path, "/javascript:alert('msg')/")
-        self.assertEqual(p.query, "")
-        self.assertEqual(p.fragment, "frag")
+        self.assertEqual(p.query, "query=something")
+        self.assertEqual(p.fragment, "fragment")
         self.assertEqual(p.username, None)
         self.assertEqual(p.password, None)
         self.assertEqual(p.hostname, "www.python.org")
         self.assertEqual(p.port, None)
-        self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/#frag")
+        self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
 
         # Remove ASCII tabs and newlines from input as bytes.
-        url = b"http://www.python.org/java\nscript:\talert('msg\r\n')/#frag"
+        url = b"http\t://www.python\n.org\t/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
         p = urllib.parse.urlsplit(url)
         self.assertEqual(p.scheme, b"http")
         self.assertEqual(p.netloc, b"www.python.org")
         self.assertEqual(p.path, b"/javascript:alert('msg')/")
-        self.assertEqual(p.query, b"")
-        self.assertEqual(p.fragment, b"frag")
+        self.assertEqual(p.query, b"query=something")
+        self.assertEqual(p.fragment, b"fragment")
         self.assertEqual(p.username, None)
         self.assertEqual(p.password, None)
         self.assertEqual(p.hostname, b"www.python.org")
         self.assertEqual(p.port, None)
-        self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/#frag")
+        self.assertEqual(p.geturl(), b"http://www.python.org/javascript:alert('msg')/?query=something#fragment")
+
+        # with scheme as cache-key
+        url = "http://www.python.org/java\nscript:\talert('msg\r\n')/?query\n=\tsomething#frag\nment"
+        scheme = "ht\ntp"
+        for _ in range(2):
+            p = urllib.parse.urlsplit(url, scheme=scheme)
+            self.assertEqual(p.scheme, "http")
+            self.assertEqual(p.geturl(), "http://www.python.org/javascript:alert('msg')/?query=something#fragment")
 
     def test_attributes_bad_port(self):
         """Check handling of invalid ports."""
index e403e98ff6e306966572be2771c8e691696d7cf6..6a36fd0e54b4fe39b8191273bc505e23187def4d 100644 (file)
@@ -498,6 +498,10 @@ def urlsplit(url, scheme='', allow_fragments=True):
     url = url.lstrip(_WHATWG_C0_CONTROL_OR_SPACE)
     scheme = scheme.strip(_WHATWG_C0_CONTROL_OR_SPACE)
 
+    for b in _UNSAFE_URL_BYTES_TO_REMOVE:
+        url = url.replace(b, "")
+        scheme = scheme.replace(b, "")
+
     allow_fragments = bool(allow_fragments)
     key = url, scheme, allow_fragments, type(url), type(scheme)
     cached = _parse_cache.get(key, None)
@@ -514,9 +518,6 @@ def urlsplit(url, scheme='', allow_fragments=True):
         else:
             scheme, url = url[:i].lower(), url[i+1:]
 
-    for b in _UNSAFE_URL_BYTES_TO_REMOVE:
-        url = url.replace(b, "")
-
     if url[:2] == '//':
         netloc, url = _splitnetloc(url, 2)
         if (('[' in netloc and ']' not in netloc) or