The conversion loop to the internal encoding does not follow
the interface contract that __GCONV_FULL_OUTPUT is only returned
after the internal wchar_t buffer has been filled completely. This
is enforced by the first of the two asserts in iconv/skeleton.c:
/* We must run out of output buffer space in this
rerun. */
assert (outbuf == outerr);
assert (nstatus == __GCONV_FULL_OUTPUT);
This commit solves this issue by queuing a second wide character
which cannot be written immediately in the state variable, like
other converters already do (e.g., BIG5-HKSCS or TSCII).
Reported-by: Tavis Ormandy <taviso@gmail.com>
From
ff012870b2c02a62598c04daa1e54632e020fd7d Mon Sep 17 00:00:00 2001
From: Nikita Popov <npv1310@gmail.com>
Date: Tue, 2 Nov 2021 13:21:42 +0500
Subject: [PATCH] gconv: Do not emit spurious NUL character in ISO-2022-JP-3
(bug 28524)
Bugfix 27256 has introduced another issue:
In conversion from ISO-2022-JP-3 encoding, it is possible
to force iconv to emit extra NUL character on internal state reset.
To do this, it is sufficient to feed iconv with escape sequence
which switches active character set.
The simplified check 'data->__statep->__count != ASCII_set'
introduced by the aforementioned bugfix picks that case and
behaves as if '\0' character has been queued thus emitting it.
To eliminate this issue, these steps are taken:
* Restore original condition
'(data->__statep->__count & ~7) != ASCII_set'.
It is necessary since bits 0-2 may contain
number of buffered input characters.
* Check that queued character is not NUL.
Similar step is taken for main conversion loop.
Bundled test case follows following logic:
* Try to convert ISO-2022-JP-3 escape sequence
switching active character set
* Reset internal state by providing NULL as input buffer
* Ensure that nothing has been converted.
Signed-off-by: Nikita Popov <npv1310@gmail.com>
Gbp-Pq: Topic all
Gbp-Pq: Name git-CVE-2021-3326-Fix-assertion-failure-in-ISO-2022-JP-3-module-.diff
ifeq (yes,$(build-shared))
tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \
tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \
- bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13
+ bug-iconv10 bug-iconv11 bug-iconv12 bug-iconv13 bug-iconv14 bug-iconv15
ifeq ($(have-thread-library),yes)
tests += bug-iconv3
endif
$(addprefix $(objpfx),$(modules.so))
$(objpfx)bug-iconv12.out: $(objpfx)gconv-modules \
$(addprefix $(objpfx),$(modules.so))
+$(objpfx)bug-iconv14.out: $(objpfx)gconv-modules \
+ $(addprefix $(objpfx),$(modules.so))
+$(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \
+ $(addprefix $(objpfx),$(modules.so))
$(objpfx)iconv-test.out: run-iconv-test.sh $(objpfx)gconv-modules \
$(addprefix $(objpfx),$(modules.so)) \
--- /dev/null
+/* Assertion in ISO-2022-JP-3 due to two-character sequence (bug 27256).
+ Copyright (C) 2021 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <iconv.h>
+#include <string.h>
+#include <errno.h>
+#include <support/check.h>
+
+/* Use an escape sequence to return to the initial state. */
+static void
+with_escape_sequence (void)
+{
+ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
+ TEST_VERIFY_EXIT (c != (iconv_t) -1);
+
+ char in[] = "\e$(O+D\e(B";
+ char *inbuf = in;
+ size_t inleft = strlen (in);
+ char out[3]; /* Space for one output character. */
+ char *outbuf;
+ size_t outleft;
+
+ outbuf = out;
+ outleft = sizeof (out);
+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
+ TEST_COMPARE (errno, E2BIG);
+ TEST_COMPARE (inleft, 3);
+ TEST_COMPARE (inbuf - in, strlen (in) - 3);
+ TEST_COMPARE (outleft, sizeof (out) - 2);
+ TEST_COMPARE (outbuf - out, 2);
+ TEST_COMPARE (out[0] & 0xff, 0xc3);
+ TEST_COMPARE (out[1] & 0xff, 0xa6);
+
+ /* Return to the initial shift state, producing the pending
+ character. */
+ outbuf = out;
+ outleft = sizeof (out);
+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), 0);
+ TEST_COMPARE (inleft, 0);
+ TEST_COMPARE (inbuf - in, strlen (in));
+ TEST_COMPARE (outleft, sizeof (out) - 2);
+ TEST_COMPARE (outbuf - out, 2);
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
+ TEST_COMPARE (out[1] & 0xff, 0x80);
+
+ /* Nothing should be flushed the second time. */
+ outbuf = out;
+ outleft = sizeof (out);
+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
+ TEST_COMPARE (outleft, sizeof (out));
+ TEST_COMPARE (outbuf - out, 0);
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
+ TEST_COMPARE (out[1] & 0xff, 0x80);
+
+ TEST_COMPARE (iconv_close (c), 0);
+}
+
+/* Use an explicit flush to return to the initial state. */
+static void
+with_flush (void)
+{
+ iconv_t c = iconv_open ("UTF-8", "ISO-2022-JP-3");
+ TEST_VERIFY_EXIT (c != (iconv_t) -1);
+
+ char in[] = "\e$(O+D";
+ char *inbuf = in;
+ size_t inleft = strlen (in);
+ char out[3]; /* Space for one output character. */
+ char *outbuf;
+ size_t outleft;
+
+ outbuf = out;
+ outleft = sizeof (out);
+ TEST_COMPARE (iconv (c, &inbuf, &inleft, &outbuf, &outleft), (size_t) -1);
+ TEST_COMPARE (errno, E2BIG);
+ TEST_COMPARE (inleft, 0);
+ TEST_COMPARE (inbuf - in, strlen (in));
+ TEST_COMPARE (outleft, sizeof (out) - 2);
+ TEST_COMPARE (outbuf - out, 2);
+ TEST_COMPARE (out[0] & 0xff, 0xc3);
+ TEST_COMPARE (out[1] & 0xff, 0xa6);
+
+ /* Flush the pending character. */
+ outbuf = out;
+ outleft = sizeof (out);
+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
+ TEST_COMPARE (outleft, sizeof (out) - 2);
+ TEST_COMPARE (outbuf - out, 2);
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
+ TEST_COMPARE (out[1] & 0xff, 0x80);
+
+ /* Nothing should be flushed the second time. */
+ outbuf = out;
+ outleft = sizeof (out);
+ TEST_COMPARE (iconv (c, NULL, 0, &outbuf, &outleft), 0);
+ TEST_COMPARE (outleft, sizeof (out));
+ TEST_COMPARE (outbuf - out, 0);
+ TEST_COMPARE (out[0] & 0xff, 0xcc);
+ TEST_COMPARE (out[1] & 0xff, 0x80);
+
+ TEST_COMPARE (iconv_close (c), 0);
+}
+
+static int
+do_test (void)
+{
+ with_escape_sequence ();
+ with_flush ();
+ return 0;
+}
+
+#include <support/test-driver.c>
--- /dev/null
+/* Bug 28524: Conversion from ISO-2022-JP-3 with iconv
+ may emit spurious NUL character on state reset.
+ Copyright (C) The GNU Toolchain Authors.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <https://www.gnu.org/licenses/>. */
+
+#include <stddef.h>
+#include <iconv.h>
+#include <support/check.h>
+
+static int
+do_test (void)
+{
+ char in[] = "\x1b(I";
+ char *inbuf = in;
+ size_t inleft = sizeof (in) - 1;
+ char out[1];
+ char *outbuf = out;
+ size_t outleft = sizeof (out);
+ iconv_t cd;
+
+ cd = iconv_open ("UTF8", "ISO-2022-JP-3");
+ TEST_VERIFY_EXIT (cd != (iconv_t) -1);
+
+ /* First call to iconv should alter internal state.
+ Now, JISX0201_Kana_set is selected and
+ state value != ASCII_set. */
+ TEST_VERIFY (iconv (cd, &inbuf, &inleft, &outbuf, &outleft) != (size_t) -1);
+
+ /* No bytes should have been added to
+ the output buffer at this point. */
+ TEST_VERIFY (outbuf == out);
+ TEST_VERIFY (outleft == sizeof (out));
+
+ /* Second call shall emit spurious NUL character in unpatched glibc. */
+ TEST_VERIFY (iconv (cd, NULL, NULL, &outbuf, &outleft) != (size_t) -1);
+
+ /* No characters are expected to be produced. */
+ TEST_VERIFY (outbuf == out);
+ TEST_VERIFY (outleft == sizeof (out));
+
+ TEST_VERIFY_EXIT (iconv_close (cd) != -1);
+
+ return 0;
+}
+
+#include <support/test-driver.c>
CURRENT_SEL_MASK = 7 << 3
};
-/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the state
- also contains the last two bytes to be output, shifted by 6 bits, and a
- one-bit indicator whether they must be preceded by the shift sequence,
- in bit 22. */
+/* During UCS-4 to ISO-2022-JP-3 conversion, the COUNT element of the
+ state also contains the last two bytes to be output, shifted by 6
+ bits, and a one-bit indicator whether they must be preceded by the
+ shift sequence, in bit 22. During ISO-2022-JP-3 to UCS-4
+ conversion, COUNT may also contain a non-zero pending wide
+ character, shifted by six bits. This happens for certain inputs in
+ JISX0213_1_2004_set and JISX0213_2_set if the second wide character
+ in a combining sequence cannot be written because the buffer is
+ full. */
/* Since this is a stateful encoding we have to provide code which resets
the output state to the initial state. This has to be done during the
{ \
if (FROM_DIRECTION) \
{ \
- /* It's easy, we don't have to emit anything, we just reset the \
- state for the input. */ \
- data->__statep->__count &= 7; \
- data->__statep->__count |= ASCII_set; \
+ uint32_t ch = data->__statep->__count >> 6; \
+ \
+ if (__glibc_unlikely (ch != 0)) \
+ { \
+ if (__glibc_likely (outbuf + 4 <= outend)) \
+ { \
+ /* Write out the last character. */ \
+ put32u (outbuf, ch); \
+ outbuf += 4; \
+ data->__statep->__count &= 7; \
+ data->__statep->__count |= ASCII_set; \
+ } \
+ else \
+ /* We don't have enough room in the output buffer. */ \
+ status = __GCONV_FULL_OUTPUT; \
+ } \
+ else \
+ { \
+ data->__statep->__count &= 7; \
+ data->__statep->__count |= ASCII_set; \
+ } \
} \
else \
{ \
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
- uint32_t ch = *inptr; \
+ uint32_t ch; \
+ \
+ /* Output any pending character. */ \
+ ch = set >> 6; \
+ if (__glibc_unlikely (ch != 0)) \
+ { \
+ put32 (outptr, ch); \
+ outptr += 4; \
+ /* Remove the pending character, but preserve state bits. */ \
+ set &= (1 << 6) - 1; \
+ continue; \
+ } \
+ \
+ /* Otherwise read the next input byte. */ \
+ ch = *inptr; \
\
/* Recognize escape sequences. */ \
if (__glibc_unlikely (ch == ESC)) \
uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
\
+ inptr += 2; \
+ \
+ put32 (outptr, u1); \
+ outptr += 4; \
+ \
/* See whether we have room for two characters. */ \
- if (outptr + 8 <= outend) \
+ if (outptr + 4 <= outend) \
{ \
- inptr += 2; \
- put32 (outptr, u1); \
- outptr += 4; \
put32 (outptr, u2); \
outptr += 4; \
continue; \
} \
- else \
- { \
- result = __GCONV_FULL_OUTPUT; \
- break; \
- } \
+ \
+ /* Otherwise store only the first character now, and \
+ put the second one into the queue. */ \
+ set |= u2 << 6; \
+ /* Tell the caller why we terminate the loop. */ \
+ result = __GCONV_FULL_OUTPUT; \
+ break; \
} \
\
inptr += 2; \