[PATCH] [3.10] gh-145986: Avoid unbound C recursion in `conv_content_model` in `pyexp...
authorStan Ulbrych <stan@python.org>
Wed, 8 Apr 2026 10:27:42 +0000 (11:27 +0100)
committerArnaud Rebillout <arnaudr@debian.org>
Thu, 14 May 2026 03:00:00 +0000 (10:00 +0700)
* [3.10] gh-145986: Avoid unbound C recursion in `conv_content_model` in `pyexpat.c` (CVE 2026-4224) (GH-145987)

Fix C stack overflow (CVE-2026-4224) when an Expat parser
with a registered `ElementDeclHandler` parses inline DTD
containing deeply nested content model.

---------
(cherry picked from commit eb0e8be3a7e11b87d198a2c3af1ed0eccf532768)
(cherry picked from commit e5caf45faac74b0ed869e3336420cffd3510ce6e)

Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
* Update Misc/NEWS.d/next/Security/2026-03-14-17-31-39.gh-issue-145986.ifSSr8.rst

---------

Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
Origin: backport, https://github.com/python/cpython/commit/af856a7177326ac25d9f66cc6dd28b554d914fee

Gbp-Pq: Name CVE-2026-4224.patch

Lib/test/support/__init__.py
Lib/test/test_pyexpat.py
Misc/NEWS.d/next/Security/2026-03-14-17-31-39.gh-issue-145986.ifSSr8.rst [new file with mode: 0644]
Modules/pyexpat.c

index c68a8f26213769045a0aa56aa269277f938aa89f..cae8adf0395591ea705767df4b34abd3042dfc37 100644 (file)
@@ -3217,3 +3217,19 @@ def control_characters_c0() -> list[str]:
     C0 control characters defined as the byte range 0x00-0x1F, and 0x7F.
     """
     return [chr(c) for c in range(0x00, 0x20)] + ["\x7F"]
+
+
+@contextlib.contextmanager
+def infinite_recursion(max_depth=75):
+    """Set a lower limit for tests that interact with infinite recursions
+    (e.g test_ast.ASTHelpers_Test.test_recursion_direct) since on some
+    debug windows builds, due to not enough functions being inlined the
+    stack size might not handle the default recursion limit (1000). See
+    bpo-11105 for details."""
+
+    original_depth = sys.getrecursionlimit()
+    try:
+        sys.setrecursionlimit(max_depth)
+        yield
+    finally:
+        sys.setrecursionlimit(original_depth)
index b2b4dea060532d8cad2db4a0f29b154a8688c41c..b1d91b56b54ea87117ae0ada7ffc3c743b33f4c2 100644 (file)
@@ -12,6 +12,7 @@ import traceback
 from xml.parsers import expat
 from xml.parsers.expat import errors
 
+from test.support import infinite_recursion
 from test.support import sortdict
 
 
@@ -643,6 +644,24 @@ class ChardataBufferTest(unittest.TestCase):
         parser.Parse(xml2, True)
         self.assertEqual(self.n, 4)
 
+class ElementDeclHandlerTest(unittest.TestCase):
+    def test_deeply_nested_content_model(self):
+        # This should raise a RecursionError and not crash.
+        # See https://github.com/python/cpython/issues/145986.
+        N = 500_000
+        data = (
+            b'<!DOCTYPE root [\n<!ELEMENT root '
+            + b'(a, ' * N + b'a' + b')' * N
+            + b'>\n]>\n<root/>\n'
+        )
+
+        parser = expat.ParserCreate()
+        parser.ElementDeclHandler = lambda _1, _2: None
+        with infinite_recursion():
+            with self.assertRaises(RecursionError):
+                parser.Parse(data)
+
+
 class MalformedInputTest(unittest.TestCase):
     def test1(self):
         xml = b"\0\r\n"
diff --git a/Misc/NEWS.d/next/Security/2026-03-14-17-31-39.gh-issue-145986.ifSSr8.rst b/Misc/NEWS.d/next/Security/2026-03-14-17-31-39.gh-issue-145986.ifSSr8.rst
new file mode 100644 (file)
index 0000000..cb9dbad
--- /dev/null
@@ -0,0 +1,4 @@
+:mod:`xml.parsers.expat`: Fixed a crash caused by unbounded C recursion when
+converting deeply nested XML content models with
+:meth:`~xml.parsers.expat.xmlparser.ElementDeclHandler`.
+This addresses `CVE-2026-4224 <https://www.cve.org/CVERecord?id=CVE-2026-4224>`_.
index 12ae66d945bda8cb6b85944fd3de9e02f35245ac..9a440c7c798a5fe63b7ae83c51084615ce9d46e7 100644 (file)
@@ -508,6 +508,10 @@ static PyObject *
 conv_content_model(XML_Content * const model,
                    PyObject *(*conv_string)(const XML_Char *))
 {
+    if (Py_EnterRecursiveCall(" in conv_content_model")) {
+        return NULL;
+    }
+
     PyObject *result = NULL;
     PyObject *children = PyTuple_New(model->numchildren);
     int i;
@@ -519,7 +523,7 @@ conv_content_model(XML_Content * const model,
                                                  conv_string);
             if (child == NULL) {
                 Py_XDECREF(children);
-                return NULL;
+                goto done;
             }
             PyTuple_SET_ITEM(children, i, child);
         }
@@ -527,6 +531,8 @@ conv_content_model(XML_Content * const model,
                                model->type, model->quant,
                                conv_string,model->name, children);
     }
+done:
+    Py_LeaveRecursiveCall();
     return result;
 }