QXmlStreamReader: make fastScanName() indicate parsing status to callers
authorDebian Qt/KDE Maintainers <debian-qt-kde@lists.debian.org>
Sun, 28 Apr 2024 18:48:02 +0000 (20:48 +0200)
committerThorsten Alteholz <debian@alteholz.de>
Sun, 28 Apr 2024 18:48:02 +0000 (20:48 +0200)
Origin: upstream, commits
 https://code.qt.io/cgit/qt/qtbase.git/commit/?id=1a423ce4372d18a7
 https://code.qt.io/cgit/qt/qtbase.git/commit/?id=6326bec46a618c72
 https://code.qt.io/cgit/qt/qtbase.git/commit/?id=bdc8dc51380d2ce4
 https://code.qt.io/cgit/qt/qtbase.git/commit/?id=3bc3b8d69a291aa5
 .
 Based on KDE's backport:
 https://invent.kde.org/qt/qt/qtbase/-/merge_requests/263
Last-Update: 2023-07-15

This fixes a crash while parsing an XML file with garbage data, the file
starts with '<' then garbage data:
- The loop in the parse() keeps iterating until it hits "case 262:",
  which calls fastScanName()
- fastScanName() iterates over the text buffer scanning for the
  attribute name (e.g. "xml:lang"), until it finds ':'
- Consider a Value val, fastScanName() is called on it, it would set
  val.prefix to a number > val.len, then it would hit the 4096 condition
  and return (returned 0, now it returns the equivalent of
  std::null_opt), which means that val.len doesn't get modified, making
  it smaller than val.prefix
- The code would try constructing an XmlStringRef with negative length,
  which would hit an assert in one of QStringView's constructors

Add an assert to the XmlStringRef constructor.

Add unittest based on the file from the bug report.

Credit to OSS-Fuzz.

Gbp-Pq: Name CVE-2023-37369.diff

src/corelib/serialization/qxmlstream.cpp
src/corelib/serialization/qxmlstream.g
src/corelib/serialization/qxmlstream_p.h
tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp

index b2f846544debfa9ca33103db3bcf41afcb92b1e7..b393c3ef777f37e2bf71077becb23e47df30ec24 100644 (file)
@@ -1302,15 +1302,18 @@ inline int QXmlStreamReaderPrivate::fastScanContentCharList()
     return n;
 }
 
-inline int QXmlStreamReaderPrivate::fastScanName(int *prefix)
+// Fast scan an XML attribute name (e.g. "xml:lang").
+inline QXmlStreamReaderPrivate::FastScanNameResult
+QXmlStreamReaderPrivate::fastScanName(Value *val)
 {
     int n = 0;
     uint c;
     while ((c = getChar()) != StreamEOF) {
         if (n >= 4096) {
             // This is too long to be a sensible name, and
-            // can exhaust memory
-            return 0;
+            // can exhaust memory, or the range of decltype(*prefix)
+            raiseNamePrefixTooLongError();
+            return {};
         }
         switch (c) {
         case '\n':
@@ -1339,23 +1342,23 @@ inline int QXmlStreamReaderPrivate::fastScanName(int *prefix)
         case '+':
         case '*':
             putChar(c);
-            if (prefix && *prefix == n+1) {
-                *prefix = 0;
+            if (val && val->prefix == n + 1) {
+                val->prefix = 0;
                 putChar(':');
                 --n;
             }
-            return n;
+            return FastScanNameResult(n);
         case ':':
-            if (prefix) {
-                if (*prefix == 0) {
-                    *prefix = n+2;
+            if (val) {
+                if (val->prefix == 0) {
+                    val->prefix = n + 2;
                 } else { // only one colon allowed according to the namespace spec.
                     putChar(c);
-                    return n;
+                    return FastScanNameResult(n);
                 }
             } else {
                 putChar(c);
-                return n;
+                return FastScanNameResult(n);
             }
             Q_FALLTHROUGH();
         default:
@@ -1364,12 +1367,12 @@ inline int QXmlStreamReaderPrivate::fastScanName(int *prefix)
         }
     }
 
-    if (prefix)
-        *prefix = 0;
+    if (val)
+        val->prefix = 0;
     int pos = textBuffer.size() - n;
     putString(textBuffer, pos);
     textBuffer.resize(pos);
-    return 0;
+    return FastScanNameResult(0);
 }
 
 enum NameChar { NameBeginning, NameNotBeginning, NotName };
@@ -1878,6 +1881,14 @@ void QXmlStreamReaderPrivate::raiseWellFormedError(const QString &message)
     raiseError(QXmlStreamReader::NotWellFormedError, message);
 }
 
+void QXmlStreamReaderPrivate::raiseNamePrefixTooLongError()
+{
+    // TODO: add a ImplementationLimitsExceededError and use it instead
+    raiseError(QXmlStreamReader::NotWellFormedError,
+               QXmlStream::tr("Length of XML attribute name exceeds implementation limits (4KiB "
+                              "characters)."));
+}
+
 void QXmlStreamReaderPrivate::parseError()
 {
 
index b623de9505655917e61f9a6f409b60145ab4dcda..e43102850628cc0ca9f31355b2947be1d9839615 100644 (file)
@@ -516,7 +516,16 @@ public:
     int fastScanLiteralContent();
     int fastScanSpace();
     int fastScanContentCharList();
-    int fastScanName(int *prefix = nullptr);
+
+    struct FastScanNameResult {
+        FastScanNameResult() : ok(false) {}
+        explicit FastScanNameResult(int len) : addToLen(len), ok(true) { }
+        operator bool() { return ok; }
+        int operator*() { Q_ASSERT(ok); return addToLen; }
+        int addToLen;
+        bool ok;
+    };
+    FastScanNameResult fastScanName(Value *val = nullptr);
     inline int fastScanNMTOKEN();
 
 
@@ -525,6 +534,7 @@ public:
 
     void raiseError(QXmlStreamReader::Error error, const QString& message = QString());
     void raiseWellFormedError(const QString &message);
+    void raiseNamePrefixTooLongError();
 
     QXmlStreamEntityResolver *entityResolver;
 
@@ -1809,7 +1819,12 @@ space_opt ::= space;
 qname ::= LETTER;
 /.
         case $rule_number: {
-            sym(1).len += fastScanName(&sym(1).prefix);
+            Value &val = sym(1);
+            if (auto res = fastScanName(&val))
+                val.len += *res;
+            else
+                return false;
+
             if (atEnd) {
                 resume($rule_number);
                 return false;
@@ -1820,7 +1835,11 @@ qname ::= LETTER;
 name ::= LETTER;
 /.
         case $rule_number:
-            sym(1).len += fastScanName();
+            if (auto res = fastScanName())
+                sym(1).len += *res;
+            else
+                return false;
+
             if (atEnd) {
                 resume($rule_number);
                 return false;
index 103b123b10c77aab9ead71dc3ec4319d2ad0f038..80e7f74080fcfb27c2408ca6257a3d32bb69d2b5 100644 (file)
@@ -1005,7 +1005,16 @@ public:
     int fastScanLiteralContent();
     int fastScanSpace();
     int fastScanContentCharList();
-    int fastScanName(int *prefix = nullptr);
+
+    struct FastScanNameResult {
+        FastScanNameResult() : ok(false) {}
+        explicit FastScanNameResult(int len) : addToLen(len), ok(true) { }
+        operator bool() { return ok; }
+        int operator*() { Q_ASSERT(ok); return addToLen; }
+        int addToLen;
+        bool ok;
+    };
+    FastScanNameResult fastScanName(Value *val = nullptr);
     inline int fastScanNMTOKEN();
 
 
@@ -1014,6 +1023,7 @@ public:
 
     void raiseError(QXmlStreamReader::Error error, const QString& message = QString());
     void raiseWellFormedError(const QString &message);
+    void raiseNamePrefixTooLongError();
 
     QXmlStreamEntityResolver *entityResolver;
 
@@ -1937,7 +1947,12 @@ bool QXmlStreamReaderPrivate::parse()
         break;
 
         case 262: {
-            sym(1).len += fastScanName(&sym(1).prefix);
+            Value &val = sym(1);
+            if (auto res = fastScanName(&val))
+                val.len += *res;
+            else
+                return false;
+
             if (atEnd) {
                 resume(262);
                 return false;
@@ -1945,7 +1960,11 @@ bool QXmlStreamReaderPrivate::parse()
         } break;
 
         case 263:
-            sym(1).len += fastScanName();
+            if (auto res = fastScanName())
+                sym(1).len += *res;
+            else
+                return false;
+
             if (atEnd) {
                 resume(263);
                 return false;
index 28922574b8766fee73bd7b23d2fae2caa0c1969a..f2a36fb57cb0c1cfbf2fa94ee43b9f1ffb6c3a02 100644 (file)
@@ -39,6 +39,7 @@
 
 #include "qc14n.h"
 
+Q_DECLARE_METATYPE(QXmlStreamReader::Error)
 Q_DECLARE_METATYPE(QXmlStreamReader::ReadElementTextBehaviour)
 
 static const char *const catalogFile = "XML-Test-Suite/xmlconf/finalCatalog.xml";
@@ -580,6 +581,8 @@ private slots:
     void readBack() const;
     void roundTrip() const;
     void roundTrip_data() const;
+    void test_fastScanName_data() const;
+    void test_fastScanName() const;
 
     void entityExpansionLimit() const;
 
@@ -1812,5 +1815,42 @@ void tst_QXmlStream::roundTrip() const
     QCOMPARE(out, in);
 }
 
+void tst_QXmlStream::test_fastScanName_data() const
+{
+    QTest::addColumn<QByteArray>("data");
+    QTest::addColumn<QXmlStreamReader::Error>("errorType");
+
+    // 4096 is the limit in QXmlStreamReaderPrivate::fastScanName()
+
+    QByteArray arr = "<a:" + QByteArray("b").repeated(4096 - 1);
+    QTest::newRow("data1") << arr << QXmlStreamReader::PrematureEndOfDocumentError;
+
+    arr = "<a:" + QByteArray("b").repeated(4096);
+    QTest::newRow("data2") << arr << QXmlStreamReader::NotWellFormedError;
+
+    arr = "<" + QByteArray("a").repeated(4000) + ":" + QByteArray("b").repeated(96);
+    QTest::newRow("data3") << arr << QXmlStreamReader::PrematureEndOfDocumentError;
+
+    arr = "<" + QByteArray("a").repeated(4000) + ":" + QByteArray("b").repeated(96 + 1);
+    QTest::newRow("data4") << arr << QXmlStreamReader::NotWellFormedError;
+
+    arr = "<" + QByteArray("a").repeated(4000 + 1) + ":" + QByteArray("b").repeated(96);
+    QTest::newRow("data5") << arr << QXmlStreamReader::NotWellFormedError;
+}
+
+void tst_QXmlStream::test_fastScanName() const
+{
+    QFETCH(QByteArray, data);
+    QFETCH(QXmlStreamReader::Error, errorType);
+
+    QXmlStreamReader reader(data);
+    QXmlStreamReader::TokenType tokenType;
+    while (!reader.atEnd())
+        tokenType = reader.readNext();
+
+    QCOMPARE(tokenType, QXmlStreamReader::Invalid);
+    QCOMPARE(reader.error(), errorType);
+}
+
 #include "tst_qxmlstream.moc"
 // vim: et:ts=4:sw=4:sts=4