cgroup_get() expected to be called only on live cgroups and triggers
authorTejun Heo <tj@kernel.org>
Fri, 28 Apr 2017 19:14:55 +0000 (15:14 -0400)
committerRaspbian kernel package updater <root@raspbian.org>
Sat, 31 Mar 2018 14:54:18 +0000 (15:54 +0100)
warning on a dead cgroup; however, cgroup_sk_alloc() may be called
while cloning a socket which is left in an empty and removed cgroup
and thus may legitimately duplicate its reference on a dead cgroup.
This currently triggers the following warning spuriously.

 WARNING: CPU: 14 PID: 0 at kernel/cgroup.c:490 cgroup_get+0x55/0x60
 ...
  [<ffffffff8107e123>] __warn+0xd3/0xf0
  [<ffffffff8107e20e>] warn_slowpath_null+0x1e/0x20
  [<ffffffff810ff465>] cgroup_get+0x55/0x60
  [<ffffffff81106061>] cgroup_sk_alloc+0x51/0xe0
  [<ffffffff81761beb>] sk_clone_lock+0x2db/0x390
  [<ffffffff817cce06>] inet_csk_clone_lock+0x16/0xc0
  [<ffffffff817e8173>] tcp_create_openreq_child+0x23/0x4b0
  [<ffffffff818601a1>] tcp_v6_syn_recv_sock+0x91/0x670
  [<ffffffff817e8b16>] tcp_check_req+0x3a6/0x4e0
  [<ffffffff81861ba3>] tcp_v6_rcv+0x693/0xa00
  [<ffffffff81837429>] ip6_input_finish+0x59/0x3e0
  [<ffffffff81837cb2>] ip6_input+0x32/0xb0
  [<ffffffff81837387>] ip6_rcv_finish+0x57/0xa0
  [<ffffffff81837ac8>] ipv6_rcv+0x318/0x4d0
  [<ffffffff817778c7>] __netif_receive_skb_core+0x2d7/0x9a0
  [<ffffffff81777fa6>] __netif_receive_skb+0x16/0x70
  [<ffffffff81778023>] netif_receive_skb_internal+0x23/0x80
  [<ffffffff817787d8>] napi_gro_frags+0x208/0x270
  [<ffffffff8168a9ec>] mlx4_en_process_rx_cq+0x74c/0xf40
  [<ffffffff8168b270>] mlx4_en_poll_rx_cq+0x30/0x90
  [<ffffffff81778b30>] net_rx_action+0x210/0x350
  [<ffffffff8188c426>] __do_softirq+0x106/0x2c7
  [<ffffffff81082bad>] irq_exit+0x9d/0xa0 [<ffffffff8188c0e4>] do_IRQ+0x54/0xd0
  [<ffffffff8188a63f>] common_interrupt+0x7f/0x7f <EOI>
  [<ffffffff8173d7e7>] cpuidle_enter+0x17/0x20
  [<ffffffff810bdfd9>] cpu_startup_entry+0x2a9/0x2f0
  [<ffffffff8103edd1>] start_secondary+0xf1/0x100

This patch renames the existing cgroup_get() with the dead cgroup
warning to cgroup_get_live() after cgroup_kn_lock_live() and
introduces the new cgroup_get() which doesn't check whether the cgroup
is live or dead.

All existing cgroup_get() users except for cgroup_sk_alloc() are
converted to use cgroup_get_live().

Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets")
Cc: stable@vger.kernel.org # v4.5+
Cc: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: Chris Mason <clm@fb.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/cgroup.c

index d0ecb18e0e3ad06483be2ddea9114c5a5860daaa..2650df194b5d0bb39bb92c295d93f691b00faffb 100644 (file)
@@ -488,6 +488,11 @@ static inline bool cgroup_is_dead(const struct cgroup *cgrp)
 }
 
 static void cgroup_get(struct cgroup *cgrp)
+{
+       css_get(&cgrp->self);
+}
+
+static void cgroup_get_live(struct cgroup *cgrp)
 {
        WARN_ON_ONCE(cgroup_is_dead(cgrp));
        css_get(&cgrp->self);
@@ -1049,7 +1054,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
        list_add_tail(&link->cgrp_link, &cset->cgrp_links);
 
        if (cgroup_parent(cgrp))
-               cgroup_get(cgrp);
+               cgroup_get_live(cgrp);
 }
 
 /**
@@ -2118,7 +2123,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                }
                cgrp_dfl_visible = true;
                root = &cgrp_dfl_root;
-               cgroup_get(&root->cgrp);
+               cgroup_get_live(&root->cgrp);
                goto out_mount;
        }
 
@@ -3151,7 +3156,7 @@ restart:
                        if (!css || !percpu_ref_is_dying(&css->refcnt))
                                continue;
 
-                       cgroup_get(dsct);
+                       cgroup_get_live(dsct);
                        prepare_to_wait(&dsct->offline_waitq, &wait,
                                        TASK_UNINTERRUPTIBLE);
 
@@ -5096,7 +5101,7 @@ static void init_and_link_css(struct cgroup_subsys_state *css,
 {
        lockdep_assert_held(&cgroup_mutex);
 
-       cgroup_get(cgrp);
+       cgroup_get_live(cgrp);
 
        memset(css, 0, sizeof(*css));
        css->cgroup = cgrp;
@@ -5272,7 +5277,7 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
        /* allocation complete, commit to creation */
        list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
        atomic_inc(&root->nr_cgrps);
-       cgroup_get(parent);
+       cgroup_get_live(parent);
 
        /*
         * @cgrp is now fully operational.  If something fails after this
@@ -6269,7 +6274,7 @@ struct cgroup *cgroup_get_from_path(const char *path)
        if (kn) {
                if (kernfs_type(kn) == KERNFS_DIR) {
                        cgrp = kn->priv;
-                       cgroup_get(cgrp);
+                       cgroup_get_live(cgrp);
                } else {
                        cgrp = ERR_PTR(-ENOTDIR);
                }
@@ -6349,6 +6354,11 @@ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
 
        /* Socket clone path */
        if (skcd->val) {
+               /*
+                * We might be cloning a socket which is left in an empty
+                * cgroup and the cgroup might have already been rmdir'd.
+                * Don't use cgroup_get_live().
+                */
                cgroup_get(sock_cgroup_ptr(skcd));
                return;
        }