From 84c640e9ef8c51647c037fba87c4357d8e5f5175 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Tue, 29 Apr 2025 14:47:59 +0200 Subject: [PATCH] [PATCH] coredump: use %d in kernel core pattern MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The kernel provides %d which is documented as "dump mode—same as value returned by prctl(2) PR_GET_DUMPABLE". We already query /proc/pid/auxv for this information, but unfortunately this check is subject to a race, because the crashed process may be replaced by an attacker before we read this data, for example replacing a SUID process that was killed by a signal with another process that is not SUID, tricking us into making the coredump of the original process readable by the attacker. With this patch, we effectively add one more check to the list of conditions that need be satisfied if we are to make the coredump accessible to the user. Reportedy-by: Qualys Security Advisory (cherry-picked from commit 0c49e0049b7665bb7769a13ef346fef92e1ad4d6) (cherry-picked from commit c58a8a6ec9817275bb4babaa2c08e0e35090d4e3) (cherry picked from commit 19d439189ab85dd7222bdd59fd442bbcc8ea99a7) (cherry picked from commit 254ab8d2a7866679cee006d844d078774cbac3c9) (cherry picked from commit 7fc7aa5a4d28d7768dfd1eb85be385c3ea949168) (cherry picked from commit 19b228662e0fcc6596c0395a0af8486a4b3f1627) Origin: upstream, https://github.com/systemd/systemd-stable/commit/2eb46dce078334805c547cbcf5e6462cf9d2f9f0 Forwarded: not-needed Last-Update: 2025-06-23 Gbp-Pq: Name CVE-2025-4598-4.patch --- src/coredump/coredump.c | 21 +++- sysctl.d/50-coredump.conf.in | 2 +- test/units/testsuite-74.coredump.sh | 144 ++++++++++++++++++++++++++++ 3 files changed, 163 insertions(+), 4 deletions(-) create mode 100755 test/units/testsuite-74.coredump.sh diff --git a/src/coredump/coredump.c b/src/coredump/coredump.c index f787a7a0..a4678f6d 100644 --- a/src/coredump/coredump.c +++ b/src/coredump/coredump.c @@ -84,6 +84,7 @@ typedef enum { _META_ARGV_REQUIRED, /* The fields below were added to kernel/core_pattern at later points, so they might be missing. */ META_ARGV_HOSTNAME = _META_ARGV_REQUIRED, /* %h: hostname */ + META_ARGV_DUMPABLE, /* %d: as set by the kernel */ _META_ARGV_MAX, /* If new fields are added, they should be added here, to maintain compatibility * with callers which don't know about the new fields. */ @@ -112,6 +113,7 @@ static const char * const meta_field_names[_META_MAX] = { [META_ARGV_TIMESTAMP] = "COREDUMP_TIMESTAMP=", [META_ARGV_RLIMIT] = "COREDUMP_RLIMIT=", [META_ARGV_HOSTNAME] = "COREDUMP_HOSTNAME=", + [META_ARGV_DUMPABLE] = "COREDUMP_DUMPABLE=", [META_COMM] = "COREDUMP_COMM=", [META_EXE] = "COREDUMP_EXE=", [META_UNIT] = "COREDUMP_UNIT=", @@ -122,6 +124,7 @@ typedef struct Context { const char *meta[_META_MAX]; size_t meta_size[_META_MAX]; pid_t pid; + unsigned dumpable; bool is_pid1; bool is_journald; } Context; @@ -469,14 +472,16 @@ static int grant_user_access(int core_fd, const Context *context) { if (r < 0) return r; - /* We allow access if we got all the data and at_secure is not set and - * the uid/gid matches euid/egid. */ + /* We allow access if dumpable on the command line was exactly 1, we got all the data, + * at_secure is not set, and the uid/gid match euid/egid. */ bool ret = + context->dumpable == 1 && at_secure == 0 && uid != UID_INVALID && euid != UID_INVALID && uid == euid && gid != GID_INVALID && egid != GID_INVALID && gid == egid; - log_debug("Will %s access (uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)", + log_debug("Will %s access (dumpable=%u uid="UID_FMT " euid="UID_FMT " gid="GID_FMT " egid="GID_FMT " at_secure=%s)", ret ? "permit" : "restrict", + context->dumpable, uid, euid, gid, egid, yes_no(at_secure)); return ret; } @@ -1005,6 +1010,16 @@ static int save_context(Context *context, const struct iovec_wrapper *iovw) { if (r < 0) return log_error_errno(r, "Failed to parse PID \"%s\": %m", context->meta[META_ARGV_PID]); + /* The value is set to contents of /proc/sys/fs/suid_dumpable, which we set to 2, + * if the process is marked as not dumpable, see PR_SET_DUMPABLE(2const). */ + if (context->meta[META_ARGV_DUMPABLE]) { + r = safe_atou(context->meta[META_ARGV_DUMPABLE], &context->dumpable); + if (r < 0) + return log_error_errno(r, "Failed to parse dumpable field \"%s\": %m", context->meta[META_ARGV_DUMPABLE]); + if (context->dumpable > 2) + log_notice("Got unexpected %%d/dumpable value %u.", context->dumpable); + } + unit = context->meta[META_UNIT]; context->is_pid1 = streq(context->meta[META_ARGV_PID], "1") || streq_ptr(unit, SPECIAL_INIT_SCOPE); context->is_journald = streq_ptr(unit, SPECIAL_JOURNALD_SERVICE); diff --git a/sysctl.d/50-coredump.conf.in b/sysctl.d/50-coredump.conf.in index 8e501c40..2f92294b 100644 --- a/sysctl.d/50-coredump.conf.in +++ b/sysctl.d/50-coredump.conf.in @@ -13,7 +13,7 @@ # the core dump. # # See systemd-coredump(8) and core(5). -kernel.core_pattern=|@rootlibexecdir@/systemd-coredump %P %u %g %s %t 9223372036854775808 %h +kernel.core_pattern=|@rootlibexecdir@/systemd-coredump %P %u %g %s %t 9223372036854775808 %h %d # Allow that 16 coredumps are dispatched in parallel by the kernel. We want to # be able to collect process metadata from /proc/%P/ while processing diff --git a/test/units/testsuite-74.coredump.sh b/test/units/testsuite-74.coredump.sh new file mode 100755 index 00000000..ff23418b --- /dev/null +++ b/test/units/testsuite-74.coredump.sh @@ -0,0 +1,144 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: LGPL-2.1-or-later +set -eux +set -o pipefail + +# Make sure the binary name fits into 15 characters +CORE_TEST_BIN="/tmp/test-dump" +MAKE_DUMP_SCRIPT="/tmp/make-dump" +# Unset $PAGER so we don't have to use --no-pager everywhere +export PAGER= + +at_exit() { + rm -fv -- "$CORE_TEST_BIN" "$MAKE_DUMP_SCRIPT" +} + +trap at_exit EXIT + +if systemd-detect-virt -cq; then + echo "Running in a container, skipping the systemd-coredump test..." + exit 0 +fi + +# To make all coredump entries stored in system.journal. +journalctl --rotate + +# Check that we're the ones to receive coredumps +sysctl kernel.core_pattern | grep systemd-coredump + +# Prepare "fake" binaries for coredumps, so we can properly exercise +# the matching stuff too +cp -vf /bin/sleep "${CORE_TEST_BIN:?}" +# Simple script that spawns given "fake" binary and then kills it with +# given signal +cat >"${MAKE_DUMP_SCRIPT:?}" <<\EOF +#!/bin/bash -ex + +bin="${1:?}" +sig="${2:?}" + +ulimit -c unlimited +"$bin" infinity & +pid=$! +# Sync with the "fake" binary, so we kill it once it's fully forked off, +# otherwise we might kill it during fork and kernel would then report +# "wrong" binary name (i.e. $MAKE_DUMP_SCRIPT instead of $CORE_TEST_BIN). +# In this case, wait until the "fake" binary (sleep in this case) enters +# the "interruptible sleep" state, at which point it should be ready +# to be sacrificed. +for _ in {0..9}; do + read -ra self_stat <"/proc/$pid/stat" + [[ "${self_stat[2]}" == S ]] && break + sleep .5 +done +kill -s "$sig" "$pid" +# This should always fail +! wait "$pid" +EOF +chmod +x "$MAKE_DUMP_SCRIPT" + +# Privileged stuff +[[ "$(id -u)" -eq 0 ]] +# Trigger a couple of coredumps +"$MAKE_DUMP_SCRIPT" "$CORE_TEST_BIN" "SIGTRAP" +"$MAKE_DUMP_SCRIPT" "$CORE_TEST_BIN" "SIGABRT" +# In the tests we store the coredumps in journals, so let's generate a couple +# with Storage=external as well +mkdir -p /run/systemd/coredump.conf.d/ +printf '[Coredump]\nStorage=external' >/run/systemd/coredump.conf.d/99-external.conf +"$MAKE_DUMP_SCRIPT" "$CORE_TEST_BIN" "SIGTRAP" +"$MAKE_DUMP_SCRIPT" "$CORE_TEST_BIN" "SIGABRT" +rm -fv /run/systemd/coredump.conf.d/99-external.conf +# Wait a bit for the coredumps to get processed +timeout 30 bash -c "while [[ \$(coredumpctl list -q --no-legend $CORE_TEST_BIN | wc -l) -lt 4 ]]; do sleep 1; done" + +coredumpctl +SYSTEMD_LOG_LEVEL=debug coredumpctl +coredumpctl --help +coredumpctl --version +coredumpctl --no-pager --no-legend +coredumpctl -1 +coredumpctl --reverse +coredumpctl -F COREDUMP_EXE +coredumpctl --directory=/var/log/journal +coredumpctl --file="/var/log/journal/$(/tmp/core.redirected +test -s /tmp/core.redirected +coredumpctl dump -o /tmp/core.output "${CORE_TEST_BIN##*/}" +test -s /tmp/core.output +rm -f /tmp/core.{output,redirected} + +# --backtrace mode +# Pass one of the existing journal coredump records to systemd-coredump. +# Use our PID as the source to be able to create a PIDFD and to make matching easier. +# systemd-coredump args: PID UID GID SIGNUM TIMESTAMP CORE_SOFT_RLIMIT [HOSTNAME] +journalctl -b -n 1 --output=export --output-fields=MESSAGE,COREDUMP COREDUMP_EXE="/usr/bin/test-dump" | + /usr/lib/systemd/systemd-coredump --backtrace $$ 0 0 6 1679509900 12345 +journalctl -b -n 1 --output=export --output-fields=MESSAGE,COREDUMP COREDUMP_EXE="/usr/bin/test-dump" | + /usr/lib/systemd/systemd-coredump --backtrace $$ 0 0 6 1679509901 12345 mymachine +journalctl -b -n 1 --output=export --output-fields=MESSAGE,COREDUMP COREDUMP_EXE="/usr/bin/test-dump" | + /usr/lib/systemd/systemd-coredump --backtrace $$ 0 0 6 1679509902 12345 youmachine 1 +# Wait a bit for the coredumps to get processed +timeout 30 bash -c "while [[ \$(coredumpctl list -q --no-legend $$ | wc -l) -lt 2 ]]; do sleep 1; done" +coredumpctl info $$ +coredumpctl info COREDUMP_TIMESTAMP=1679509900000000 +coredumpctl info COREDUMP_TIMESTAMP=1679509901000000 +coredumpctl info COREDUMP_HOSTNAME="mymachine" +coredumpctl info COREDUMP_TIMESTAMP=1679509902000000 +coredumpctl info COREDUMP_HOSTNAME="youmachine" +coredumpctl info COREDUMP_DUMPABLE="1" + +systemd-run -t --property CoredumpFilter=default ls /tmp + +(! coredumpctl --hello-world) +(! coredumpctl --file=/dev/null) +(! coredumpctl --since=0) +(! coredumpctl --until='') +(! coredumpctl --since=today --until=yesterday) +(! coredumpctl -F foo -F bar) +(! coredumpctl list 0) +(! coredumpctl list -- -1) +(! coredumpctl list '') +(! coredumpctl info /../.~=) +(! coredumpctl info '') +(! coredumpctl dump --output=/dev/full "$CORE_TEST_BIN") +(! coredumpctl dump --output=/dev/null --output=/dev/null "$CORE_TEST_BIN") +(! coredumpctl debug --debugger=/bin/false) -- 2.30.2