| From 1f47b61fb4077936465dcde872a4e5cc4fe708da Mon Sep 17 00:00:00 2001 |
| From: Vladimir Davydov <vdavydov@virtuozzo.com> |
| Date: Thu, 11 Aug 2016 15:33:00 -0700 |
| Subject: mm: memcontrol: fix swap counter leak on swapout from offline cgroup |
| |
| From: Vladimir Davydov <vdavydov@virtuozzo.com> |
| |
| commit 1f47b61fb4077936465dcde872a4e5cc4fe708da upstream. |
| |
| An offline memory cgroup might have anonymous memory or shmem left |
| charged to it and no swap. Since only swap entries pin the id of an |
| offline cgroup, such a cgroup will have no id and so an attempt to |
| swapout its anon/shmem will not store memory cgroup info in the swap |
| cgroup map. As a result, memcg->swap or memcg->memsw will never get |
| uncharged from it and any of its ascendants. |
| |
| Fix this by always charging swapout to the first ancestor cgroup that |
| hasn't released its id yet. |
| |
| [hannes@cmpxchg.org: add comment to mem_cgroup_swapout] |
| [vdavydov@virtuozzo.com: use WARN_ON_ONCE() in mem_cgroup_id_get_online()] |
| Link: http://lkml.kernel.org/r/20160803123445.GJ13263@esperanza |
| Fixes: 73f576c04b941 ("mm: memcontrol: fix cgroup creation failure after many small jobs") |
| Link: http://lkml.kernel.org/r/5336daa5c9a32e776067773d9da655d2dc126491.1470219853.git.vdavydov@virtuozzo.com |
| Signed-off-by: Vladimir Davydov <vdavydov@virtuozzo.com> |
| Acked-by: Johannes Weiner <hannes@cmpxchg.org> |
| Acked-by: Michal Hocko <mhocko@suse.com> |
| Cc: <stable@vger.kernel.org> [3.19+] |
| Signed-off-by: Andrew Morton <akpm@linux-foundation.org> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Michal Hocko <mhocko@suse.com> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| mm/memcontrol.c | 37 +++++++++++++++++++++++++++++++++---- |
| 1 file changed, 33 insertions(+), 4 deletions(-) |
| |
| --- a/mm/memcontrol.c |
| +++ b/mm/memcontrol.c |
| @@ -4141,6 +4141,24 @@ static void mem_cgroup_id_get(struct mem |
| atomic_inc(&memcg->id.ref); |
| } |
| |
| +static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg) |
| +{ |
| + while (!atomic_inc_not_zero(&memcg->id.ref)) { |
| + /* |
| + * The root cgroup cannot be destroyed, so it's refcount must |
| + * always be >= 1. |
| + */ |
| + if (WARN_ON_ONCE(memcg == root_mem_cgroup)) { |
| + VM_BUG_ON(1); |
| + break; |
| + } |
| + memcg = parent_mem_cgroup(memcg); |
| + if (!memcg) |
| + memcg = root_mem_cgroup; |
| + } |
| + return memcg; |
| +} |
| + |
| static void mem_cgroup_id_put(struct mem_cgroup *memcg) |
| { |
| if (atomic_dec_and_test(&memcg->id.ref)) { |
| @@ -5721,7 +5739,7 @@ subsys_initcall(mem_cgroup_init); |
| */ |
| void mem_cgroup_swapout(struct page *page, swp_entry_t entry) |
| { |
| - struct mem_cgroup *memcg; |
| + struct mem_cgroup *memcg, *swap_memcg; |
| unsigned short oldid; |
| |
| VM_BUG_ON_PAGE(PageLRU(page), page); |
| @@ -5736,16 +5754,27 @@ void mem_cgroup_swapout(struct page *pag |
| if (!memcg) |
| return; |
| |
| - mem_cgroup_id_get(memcg); |
| - oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg)); |
| + /* |
| + * In case the memcg owning these pages has been offlined and doesn't |
| + * have an ID allocated to it anymore, charge the closest online |
| + * ancestor for the swap instead and transfer the memory+swap charge. |
| + */ |
| + swap_memcg = mem_cgroup_id_get_online(memcg); |
| + oldid = swap_cgroup_record(entry, mem_cgroup_id(swap_memcg)); |
| VM_BUG_ON_PAGE(oldid, page); |
| - mem_cgroup_swap_statistics(memcg, true); |
| + mem_cgroup_swap_statistics(swap_memcg, true); |
| |
| page->mem_cgroup = NULL; |
| |
| if (!mem_cgroup_is_root(memcg)) |
| page_counter_uncharge(&memcg->memory, 1); |
| |
| + if (memcg != swap_memcg) { |
| + if (!mem_cgroup_is_root(swap_memcg)) |
| + page_counter_charge(&swap_memcg->memsw, 1); |
| + page_counter_uncharge(&memcg->memsw, 1); |
| + } |
| + |
| /* |
| * Interrupts should be disabled here because the caller holds the |
| * mapping->tree_lock lock which is taken with interrupts-off. It is |