diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 31db222b6deb..8c901684b750 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -811,12 +811,44 @@ static int truncate_error_page(struct page *p, unsigned long pfn, return ret; } +struct page_state { + unsigned long mask; + unsigned long res; + enum mf_action_page_type type; + + /* Callback ->action() has to unlock the relevant page inside it. */ + int (*action)(struct page_state *ps, struct page *p); +}; + +/* + * Return true if page is still referenced by others, otherwise return + * false. + * + * The extra_pins is true when one extra refcount is expected. + */ +static bool has_extra_refcount(struct page_state *ps, struct page *p, + bool extra_pins) +{ + int count = page_count(p) - 1; + + if (extra_pins) + count -= 1; + + if (count > 0) { + pr_err("Memory failure: %#lx: %s still referenced by %d users\n", + page_to_pfn(p), action_page_types[ps->type], count); + return true; + } + + return false; +} + /* * Error hit kernel page. * Do nothing, try to be lucky and not touch this instead. For a few cases we * could be more sophisticated. */ -static int me_kernel(struct page *p, unsigned long pfn) +static int me_kernel(struct page_state *ps, struct page *p) { unlock_page(p); return MF_IGNORED; @@ -825,9 +857,9 @@ static int me_kernel(struct page *p, unsigned long pfn) /* * Page in unknown state. Do nothing. */ -static int me_unknown(struct page *p, unsigned long pfn) +static int me_unknown(struct page_state *ps, struct page *p) { - pr_err("Memory failure: %#lx: Unknown page state\n", pfn); + pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p)); unlock_page(p); return MF_FAILED; } @@ -835,7 +867,7 @@ static int me_unknown(struct page *p, unsigned long pfn) /* * Clean (or cleaned) page cache page. */ -static int me_pagecache_clean(struct page *p, unsigned long pfn) +static int me_pagecache_clean(struct page_state *ps, struct page *p) { int ret; struct address_space *mapping; @@ -872,9 +904,13 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) * * Open: to take i_rwsem or not for this? Right now we don't. */ - ret = truncate_error_page(p, pfn, mapping); + ret = truncate_error_page(p, page_to_pfn(p), mapping); out: unlock_page(p); + + if (has_extra_refcount(ps, p, false)) + ret = MF_FAILED; + return ret; } @@ -883,7 +919,7 @@ out: * Issues: when the error hit a hole page the error is not properly * propagated. */ -static int me_pagecache_dirty(struct page *p, unsigned long pfn) +static int me_pagecache_dirty(struct page_state *ps, struct page *p) { struct address_space *mapping = page_mapping(p); @@ -927,7 +963,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) mapping_set_error(mapping, -EIO); } - return me_pagecache_clean(p, pfn); + return me_pagecache_clean(ps, p); } /* @@ -949,9 +985,10 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) * Clean swap cache pages can be directly isolated. A later page fault will * bring in the known good data from disk. */ -static int me_swapcache_dirty(struct page *p, unsigned long pfn) +static int me_swapcache_dirty(struct page_state *ps, struct page *p) { int ret; + bool extra_pins = false; ClearPageDirty(p); /* Trigger EIO in shmem: */ @@ -959,10 +996,17 @@ static int me_swapcache_dirty(struct page *p, unsigned long pfn) ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED; unlock_page(p); + + if (ret == MF_DELAYED) + extra_pins = true; + + if (has_extra_refcount(ps, p, extra_pins)) + ret = MF_FAILED; + return ret; } -static int me_swapcache_clean(struct page *p, unsigned long pfn) +static int me_swapcache_clean(struct page_state *ps, struct page *p) { int ret; @@ -970,6 +1014,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED; unlock_page(p); + + if (has_extra_refcount(ps, p, false)) + ret = MF_FAILED; + return ret; } @@ -979,7 +1027,7 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn) * - Error on hugepage is contained in hugepage unit (not in raw page unit.) * To narrow down kill region to one page, we need to break up pmd. */ -static int me_huge_page(struct page *p, unsigned long pfn) +static int me_huge_page(struct page_state *ps, struct page *p) { int res; struct page *hpage = compound_head(p); @@ -990,7 +1038,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) mapping = page_mapping(hpage); if (mapping) { - res = truncate_error_page(hpage, pfn, mapping); + res = truncate_error_page(hpage, page_to_pfn(p), mapping); unlock_page(hpage); } else { res = MF_FAILED; @@ -1008,6 +1056,9 @@ static int me_huge_page(struct page *p, unsigned long pfn) } } + if (has_extra_refcount(ps, p, false)) + res = MF_FAILED; + return res; } @@ -1033,14 +1084,7 @@ static int me_huge_page(struct page *p, unsigned long pfn) #define slab (1UL << PG_slab) #define reserved (1UL << PG_reserved) -static struct page_state { - unsigned long mask; - unsigned long res; - enum mf_action_page_type type; - - /* Callback ->action() has to unlock the relevant page inside it. */ - int (*action)(struct page *p, unsigned long pfn); -} error_states[] = { +static struct page_state error_states[] = { { reserved, reserved, MF_MSG_KERNEL, me_kernel }, /* * free pages are specially detected outside this table: @@ -1100,19 +1144,10 @@ static int page_action(struct page_state *ps, struct page *p, unsigned long pfn) { int result; - int count; /* page p should be unlocked after returning from ps->action(). */ - result = ps->action(p, pfn); + result = ps->action(ps, p); - count = page_count(p) - 1; - if (ps->action == me_swapcache_dirty && result == MF_DELAYED) - count--; - if (count > 0) { - pr_err("Memory failure: %#lx: %s still referenced by %d users\n", - pfn, action_page_types[ps->type], count); - result = MF_FAILED; - } action_result(pfn, ps->type, result); /* Could do more checks here if page looks ok */