diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 8b35bdbdc214..fd77f8303ab9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -490,9 +490,21 @@ bool mem_cgroup_oom_synchronize(bool wait); extern int do_swap_account; #endif -void lock_page_memcg(struct page *page); +struct mem_cgroup *lock_page_memcg(struct page *page); +void __unlock_page_memcg(struct mem_cgroup *memcg); void unlock_page_memcg(struct page *page); +static inline void __mem_cgroup_update_page_stat(struct page *page, + struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, + int val) +{ + VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page))); + + if (memcg && memcg->stat) + this_cpu_add(memcg->stat->count[idx], val); +} + /** * mem_cgroup_update_page_stat - update page state statistics * @page: the page @@ -508,13 +520,12 @@ void unlock_page_memcg(struct page *page); * mem_cgroup_update_page_stat(page, state, -1); * unlock_page(page) or unlock_page_memcg(page) */ + static inline void mem_cgroup_update_page_stat(struct page *page, enum mem_cgroup_stat_index idx, int val) { - VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page))); - if (page->mem_cgroup) - this_cpu_add(page->mem_cgroup->stat->count[idx], val); + __mem_cgroup_update_page_stat(page, page->mem_cgroup, idx, val); } static inline void mem_cgroup_inc_page_stat(struct page *page, @@ -709,7 +720,12 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) { } -static inline void lock_page_memcg(struct page *page) +static inline struct mem_cgroup *lock_page_memcg(struct page *page) +{ + return NULL; +} + +static inline void __unlock_page_memcg(struct mem_cgroup *memcg) { } @@ -745,6 +761,13 @@ static inline void mem_cgroup_update_page_stat(struct page *page, { } +static inline void __mem_cgroup_update_page_stat(struct page *page, + struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, + int nr) +{ +} + static inline void mem_cgroup_inc_page_stat(struct page *page, enum mem_cgroup_stat_index idx) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index d4232744c59f..27b0b4f03fcd 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1638,9 +1638,13 @@ cleanup: * @page: the page * * This function protects unlocked LRU pages from being moved to - * another cgroup and stabilizes their page->mem_cgroup binding. + * another cgroup. + * + * It ensures lifetime of the returned memcg. Caller is responsible + * for the lifetime of the page; __unlock_page_memcg() is available + * when @page might get freed inside the locked section. */ -void lock_page_memcg(struct page *page) +struct mem_cgroup *lock_page_memcg(struct page *page) { struct mem_cgroup *memcg; unsigned long flags; @@ -1649,18 +1653,24 @@ void lock_page_memcg(struct page *page) * The RCU lock is held throughout the transaction. The fast * path can get away without acquiring the memcg->move_lock * because page moving starts with an RCU grace period. - */ + * + * The RCU lock also protects the memcg from being freed when + * the page state that is going to change is the only thing + * preventing the page itself from being freed. E.g. writeback + * doesn't hold a page reference and relies on PG_writeback to + * keep off truncation, migration and so forth. + */ rcu_read_lock(); if (mem_cgroup_disabled()) - return; + return NULL; again: memcg = page->mem_cgroup; if (unlikely(!memcg)) - return; + return NULL; if (atomic_read(&memcg->moving_account) <= 0) - return; + return memcg; spin_lock_irqsave(&memcg->move_lock, flags); if (memcg != page->mem_cgroup) { @@ -1676,18 +1686,18 @@ again: memcg->move_lock_task = current; memcg->move_lock_flags = flags; - return; + return memcg; } EXPORT_SYMBOL(lock_page_memcg); /** - * unlock_page_memcg - unlock a page->mem_cgroup binding - * @page: the page + * __unlock_page_memcg - unlock and unpin a memcg + * @memcg: the memcg + * + * Unlock and unpin a memcg returned by lock_page_memcg(). */ -void unlock_page_memcg(struct page *page) +void __unlock_page_memcg(struct mem_cgroup *memcg) { - struct mem_cgroup *memcg = page->mem_cgroup; - if (memcg && memcg->move_lock_task == current) { unsigned long flags = memcg->move_lock_flags; @@ -1699,6 +1709,15 @@ void unlock_page_memcg(struct page *page) rcu_read_unlock(); } + +/** + * unlock_page_memcg - unlock a page->mem_cgroup binding + * @page: the page + */ +void unlock_page_memcg(struct page *page) +{ + __unlock_page_memcg(page->mem_cgroup); +} EXPORT_SYMBOL(unlock_page_memcg); /* diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 462c778b9fb5..498c924f2fcd 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2717,9 +2717,10 @@ EXPORT_SYMBOL(clear_page_dirty_for_io); int test_clear_page_writeback(struct page *page) { struct address_space *mapping = page_mapping(page); + struct mem_cgroup *memcg; int ret; - lock_page_memcg(page); + memcg = lock_page_memcg(page); if (mapping && mapping_use_writeback_tags(mapping)) { struct inode *inode = mapping->host; struct backing_dev_info *bdi = inode_to_bdi(inode); @@ -2747,13 +2748,20 @@ int test_clear_page_writeback(struct page *page) } else { ret = TestClearPageWriteback(page); } + /* + * NOTE: Page might be free now! Writeback doesn't hold a page + * reference on its own, it relies on truncation to wait for + * the clearing of PG_writeback. The below can only access + * page state that is static across allocation cycles. + */ if (ret) { - mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK); + __mem_cgroup_update_page_stat(page, memcg, + MEM_CGROUP_STAT_WRITEBACK, -1); dec_node_page_state(page, NR_WRITEBACK); dec_zone_page_state(page, NR_ZONE_WRITE_PENDING); inc_node_page_state(page, NR_WRITTEN); } - unlock_page_memcg(page); + __unlock_page_memcg(memcg); return ret; }