Merge branches 'pm-sleep', 'pm-qos' and 'powercap'

Merge system-wide power management changes and power capping updates
for 6.6-rc1:

 - Add device PM helpers to allow a device to remain powered-on during
   system-wide transitions (Ulf Hansson).

 - Rework hibernation memory snapshotting to avoid storing pages filled
   with zeros in hibernation image files (Brian Geffon).

 - Add check to make sure that CPU latency QoS constraints do not use
   negative values (Clive Lin).

 - Optimize rp->domains memory allocation in the Intel RAPL power
   capping driver (xiongxin).

 - Remove recursion while parsing zones in the arm_scmi power capping
   driver (Cristian Marussi).

* pm-sleep:
  PM: sleep: Add helpers to allow a device to remain powered-on
  PM: hibernate: don't store zero pages in the image file

* pm-qos:
  PM: QoS: Add check to make sure CPU latency is non-negative

* powercap:
  powercap: intel_rapl: Optimize rp->domains memory allocation
  powercap: arm_scmi: Remove recursion while parsing zones
This commit is contained in:
Rafael J. Wysocki
2023-08-25 21:23:30 +02:00
5 changed files with 259 additions and 108 deletions

View File

@@ -12,6 +12,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/powercap.h> #include <linux/powercap.h>
#include <linux/scmi_protocol.h> #include <linux/scmi_protocol.h>
#include <linux/slab.h>
#define to_scmi_powercap_zone(z) \ #define to_scmi_powercap_zone(z) \
container_of(z, struct scmi_powercap_zone, zone) container_of(z, struct scmi_powercap_zone, zone)
@@ -19,6 +20,8 @@
static const struct scmi_powercap_proto_ops *powercap_ops; static const struct scmi_powercap_proto_ops *powercap_ops;
struct scmi_powercap_zone { struct scmi_powercap_zone {
bool registered;
bool invalid;
unsigned int height; unsigned int height;
struct device *dev; struct device *dev;
struct scmi_protocol_handle *ph; struct scmi_protocol_handle *ph;
@@ -32,6 +35,7 @@ struct scmi_powercap_root {
unsigned int num_zones; unsigned int num_zones;
struct scmi_powercap_zone *spzones; struct scmi_powercap_zone *spzones;
struct list_head *registered_zones; struct list_head *registered_zones;
struct list_head scmi_zones;
}; };
static struct powercap_control_type *scmi_top_pcntrl; static struct powercap_control_type *scmi_top_pcntrl;
@@ -271,12 +275,6 @@ static void scmi_powercap_unregister_all_zones(struct scmi_powercap_root *pr)
} }
} }
static inline bool
scmi_powercap_is_zone_registered(struct scmi_powercap_zone *spz)
{
return !list_empty(&spz->node);
}
static inline unsigned int static inline unsigned int
scmi_powercap_get_zone_height(struct scmi_powercap_zone *spz) scmi_powercap_get_zone_height(struct scmi_powercap_zone *spz)
{ {
@@ -295,11 +293,46 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz)
return &spz->spzones[spz->info->parent_id]; return &spz->spzones[spz->info->parent_id];
} }
static int scmi_powercap_register_zone(struct scmi_powercap_root *pr,
struct scmi_powercap_zone *spz,
struct scmi_powercap_zone *parent)
{
int ret = 0;
struct powercap_zone *z;
if (spz->invalid) {
list_del(&spz->node);
return -EINVAL;
}
z = powercap_register_zone(&spz->zone, scmi_top_pcntrl, spz->info->name,
parent ? &parent->zone : NULL,
&zone_ops, 1, &constraint_ops);
if (!IS_ERR(z)) {
spz->height = scmi_powercap_get_zone_height(spz);
spz->registered = true;
list_move(&spz->node, &pr->registered_zones[spz->height]);
dev_dbg(spz->dev, "Registered node %s - parent %s - height:%d\n",
spz->info->name, parent ? parent->info->name : "ROOT",
spz->height);
} else {
list_del(&spz->node);
ret = PTR_ERR(z);
dev_err(spz->dev,
"Error registering node:%s - parent:%s - h:%d - ret:%d\n",
spz->info->name,
parent ? parent->info->name : "ROOT",
spz->height, ret);
}
return ret;
}
/** /**
* scmi_powercap_register_zone - Register an SCMI powercap zone recursively * scmi_zones_register- Register SCMI powercap zones starting from parent zones
* *
* @dev: A reference to the SCMI device
* @pr: A reference to the root powercap zones descriptors * @pr: A reference to the root powercap zones descriptors
* @spz: A reference to the SCMI powercap zone to register
* *
* When registering SCMI powercap zones with the powercap framework we should * When registering SCMI powercap zones with the powercap framework we should
* take care to always register zones starting from the root ones and to * take care to always register zones starting from the root ones and to
@@ -309,10 +342,10 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz)
* zones provided by the SCMI platform firmware is built to comply with such * zones provided by the SCMI platform firmware is built to comply with such
* requirement. * requirement.
* *
* This function, given an SCMI powercap zone to register, takes care to walk * This function, given the set of SCMI powercap zones to register, takes care
* the SCMI powercap zones tree up to the root looking recursively for * to walk the SCMI powercap zones trees up to the root registering any
* unregistered parent zones before registering the provided zone; at the same * unregistered parent zone before registering the child zones; at the same
* time each registered zone height in such a tree is accounted for and each * time each registered-zone height in such a tree is accounted for and each
* zone, once registered, is stored in the @registered_zones array that is * zone, once registered, is stored in the @registered_zones array that is
* indexed by zone height: this way will be trivial, at unregister time, to walk * indexed by zone height: this way will be trivial, at unregister time, to walk
* the @registered_zones array backward and unregister all the zones starting * the @registered_zones array backward and unregister all the zones starting
@@ -330,57 +363,55 @@ scmi_powercap_get_parent_zone(struct scmi_powercap_zone *spz)
* *
* Return: 0 on Success * Return: 0 on Success
*/ */
static int scmi_powercap_register_zone(struct scmi_powercap_root *pr, static int scmi_zones_register(struct device *dev,
struct scmi_powercap_zone *spz) struct scmi_powercap_root *pr)
{ {
int ret = 0; int ret = 0;
unsigned int sp = 0, reg_zones = 0;
struct scmi_powercap_zone *spz, **zones_stack;
zones_stack = kcalloc(pr->num_zones, sizeof(spz), GFP_KERNEL);
if (!zones_stack)
return -ENOMEM;
spz = list_first_entry_or_null(&pr->scmi_zones,
struct scmi_powercap_zone, node);
while (spz) {
struct scmi_powercap_zone *parent; struct scmi_powercap_zone *parent;
if (!spz->info)
return ret;
parent = scmi_powercap_get_parent_zone(spz); parent = scmi_powercap_get_parent_zone(spz);
if (parent && !scmi_powercap_is_zone_registered(parent)) { if (parent && !parent->registered) {
/* zones_stack[sp++] = spz;
* Bail out if a parent domain was marked as unsupported: spz = parent;
* only domains participating as leaves can be skipped.
*/
if (!parent->info)
return -ENODEV;
ret = scmi_powercap_register_zone(pr, parent);
if (ret)
return ret;
}
if (!scmi_powercap_is_zone_registered(spz)) {
struct powercap_zone *z;
z = powercap_register_zone(&spz->zone,
scmi_top_pcntrl,
spz->info->name,
parent ? &parent->zone : NULL,
&zone_ops, 1, &constraint_ops);
if (!IS_ERR(z)) {
spz->height = scmi_powercap_get_zone_height(spz);
list_add(&spz->node,
&pr->registered_zones[spz->height]);
dev_dbg(spz->dev,
"Registered node %s - parent %s - height:%d\n",
spz->info->name,
parent ? parent->info->name : "ROOT",
spz->height);
ret = 0;
} else { } else {
ret = PTR_ERR(z); ret = scmi_powercap_register_zone(pr, spz, parent);
dev_err(spz->dev, if (!ret) {
"Error registering node:%s - parent:%s - h:%d - ret:%d\n", reg_zones++;
spz->info->name, } else if (sp) {
parent ? parent->info->name : "ROOT", /* Failed to register a non-leaf zone.
spz->height, ret); * Bail-out.
*/
dev_err(dev,
"Failed to register non-leaf zone - ret:%d\n",
ret);
scmi_powercap_unregister_all_zones(pr);
reg_zones = 0;
goto out;
}
/* Pick next zone to process */
if (sp)
spz = zones_stack[--sp];
else
spz = list_first_entry_or_null(&pr->scmi_zones,
struct scmi_powercap_zone,
node);
} }
} }
out:
kfree(zones_stack);
dev_info(dev, "Registered %d SCMI Powercap domains !\n", reg_zones);
return ret; return ret;
} }
@@ -424,6 +455,8 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
if (!pr->registered_zones) if (!pr->registered_zones)
return -ENOMEM; return -ENOMEM;
INIT_LIST_HEAD(&pr->scmi_zones);
for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) { for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) {
/* /*
* Powercap domains are validate by the protocol layer, i.e. * Powercap domains are validate by the protocol layer, i.e.
@@ -438,6 +471,7 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
INIT_LIST_HEAD(&spz->node); INIT_LIST_HEAD(&spz->node);
INIT_LIST_HEAD(&pr->registered_zones[i]); INIT_LIST_HEAD(&pr->registered_zones[i]);
list_add_tail(&spz->node, &pr->scmi_zones);
/* /*
* Forcibly skip powercap domains using an abstract scale. * Forcibly skip powercap domains using an abstract scale.
* Note that only leaves domains can be skipped, so this could * Note that only leaves domains can be skipped, so this could
@@ -448,7 +482,7 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
dev_warn(dev, dev_warn(dev,
"Abstract power scale not supported. Skip %s.\n", "Abstract power scale not supported. Skip %s.\n",
spz->info->name); spz->info->name);
spz->info = NULL; spz->invalid = true;
continue; continue;
} }
} }
@@ -457,21 +491,12 @@ static int scmi_powercap_probe(struct scmi_device *sdev)
* Scan array of retrieved SCMI powercap domains and register them * Scan array of retrieved SCMI powercap domains and register them
* recursively starting from the root domains. * recursively starting from the root domains.
*/ */
for (i = 0, spz = pr->spzones; i < pr->num_zones; i++, spz++) { ret = scmi_zones_register(dev, pr);
ret = scmi_powercap_register_zone(pr, spz); if (ret)
if (ret) {
dev_err(dev,
"Failed to register powercap zone %s - ret:%d\n",
spz->info->name, ret);
scmi_powercap_unregister_all_zones(pr);
return ret; return ret;
}
}
dev_set_drvdata(dev, pr); dev_set_drvdata(dev, pr);
dev_info(dev, "Registered %d SCMI Powercap domains !\n", pr->num_zones);
return ret; return ret;
} }

View File

@@ -1485,7 +1485,7 @@ static int rapl_detect_domains(struct rapl_package *rp)
} }
pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name); pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name);
rp->domains = kcalloc(rp->nr_domains + 1, sizeof(struct rapl_domain), rp->domains = kcalloc(rp->nr_domains, sizeof(struct rapl_domain),
GFP_KERNEL); GFP_KERNEL);
if (!rp->domains) if (!rp->domains)
return -ENOMEM; return -ENOMEM;

View File

@@ -194,6 +194,16 @@ static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec,
#endif /* !CONFIG_PM_SLEEP */ #endif /* !CONFIG_PM_SLEEP */
static inline bool device_awake_path(struct device *dev)
{
return device_wakeup_path(dev);
}
static inline void device_set_awake_path(struct device *dev)
{
device_set_wakeup_path(dev);
}
static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec)
{ {
return pm_wakeup_ws_event(ws, msec, false); return pm_wakeup_ws_event(ws, msec, false);

View File

@@ -220,6 +220,11 @@ static struct pm_qos_constraints cpu_latency_constraints = {
.type = PM_QOS_MIN, .type = PM_QOS_MIN,
}; };
static inline bool cpu_latency_qos_value_invalid(s32 value)
{
return value < 0 && value != PM_QOS_DEFAULT_VALUE;
}
/** /**
* cpu_latency_qos_limit - Return current system-wide CPU latency QoS limit. * cpu_latency_qos_limit - Return current system-wide CPU latency QoS limit.
*/ */
@@ -263,7 +268,7 @@ static void cpu_latency_qos_apply(struct pm_qos_request *req,
*/ */
void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value) void cpu_latency_qos_add_request(struct pm_qos_request *req, s32 value)
{ {
if (!req) if (!req || cpu_latency_qos_value_invalid(value))
return; return;
if (cpu_latency_qos_request_active(req)) { if (cpu_latency_qos_request_active(req)) {
@@ -289,7 +294,7 @@ EXPORT_SYMBOL_GPL(cpu_latency_qos_add_request);
*/ */
void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value) void cpu_latency_qos_update_request(struct pm_qos_request *req, s32 new_value)
{ {
if (!req) if (!req || cpu_latency_qos_value_invalid(new_value))
return; return;
if (!cpu_latency_qos_request_active(req)) { if (!cpu_latency_qos_request_active(req)) {

View File

@@ -404,6 +404,7 @@ struct bm_position {
struct mem_zone_bm_rtree *zone; struct mem_zone_bm_rtree *zone;
struct rtree_node *node; struct rtree_node *node;
unsigned long node_pfn; unsigned long node_pfn;
unsigned long cur_pfn;
int node_bit; int node_bit;
}; };
@@ -589,6 +590,7 @@ static void memory_bm_position_reset(struct memory_bitmap *bm)
bm->cur.node = list_entry(bm->cur.zone->leaves.next, bm->cur.node = list_entry(bm->cur.zone->leaves.next,
struct rtree_node, list); struct rtree_node, list);
bm->cur.node_pfn = 0; bm->cur.node_pfn = 0;
bm->cur.cur_pfn = BM_END_OF_MAP;
bm->cur.node_bit = 0; bm->cur.node_bit = 0;
} }
@@ -799,6 +801,7 @@ node_found:
bm->cur.zone = zone; bm->cur.zone = zone;
bm->cur.node = node; bm->cur.node = node;
bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK; bm->cur.node_pfn = (pfn - zone->start_pfn) & ~BM_BLOCK_MASK;
bm->cur.cur_pfn = pfn;
/* Set return values */ /* Set return values */
*addr = node->data; *addr = node->data;
@@ -850,6 +853,11 @@ static void memory_bm_clear_current(struct memory_bitmap *bm)
clear_bit(bit, bm->cur.node->data); clear_bit(bit, bm->cur.node->data);
} }
static unsigned long memory_bm_get_current(struct memory_bitmap *bm)
{
return bm->cur.cur_pfn;
}
static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn)
{ {
void *addr; void *addr;
@@ -929,10 +937,12 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm)
if (bit < bits) { if (bit < bits) {
pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit; pfn = bm->cur.zone->start_pfn + bm->cur.node_pfn + bit;
bm->cur.node_bit = bit + 1; bm->cur.node_bit = bit + 1;
bm->cur.cur_pfn = pfn;
return pfn; return pfn;
} }
} while (rtree_next_node(bm)); } while (rtree_next_node(bm));
bm->cur.cur_pfn = BM_END_OF_MAP;
return BM_END_OF_MAP; return BM_END_OF_MAP;
} }
@@ -1423,14 +1433,19 @@ static unsigned int count_data_pages(void)
/* /*
* This is needed, because copy_page and memcpy are not usable for copying * This is needed, because copy_page and memcpy are not usable for copying
* task structs. * task structs. Returns true if the page was filled with only zeros,
* otherwise false.
*/ */
static inline void do_copy_page(long *dst, long *src) static inline bool do_copy_page(long *dst, long *src)
{ {
long z = 0;
int n; int n;
for (n = PAGE_SIZE / sizeof(long); n; n--) for (n = PAGE_SIZE / sizeof(long); n; n--) {
z |= *src;
*dst++ = *src++; *dst++ = *src++;
}
return !z;
} }
/** /**
@@ -1439,17 +1454,21 @@ static inline void do_copy_page(long *dst, long *src)
* Check if the page we are going to copy is marked as present in the kernel * Check if the page we are going to copy is marked as present in the kernel
* page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or * page tables. This always is the case if CONFIG_DEBUG_PAGEALLOC or
* CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present() * CONFIG_ARCH_HAS_SET_DIRECT_MAP is not set. In that case kernel_page_present()
* always returns 'true'. * always returns 'true'. Returns true if the page was entirely composed of
* zeros, otherwise it will return false.
*/ */
static void safe_copy_page(void *dst, struct page *s_page) static bool safe_copy_page(void *dst, struct page *s_page)
{ {
bool zeros_only;
if (kernel_page_present(s_page)) { if (kernel_page_present(s_page)) {
do_copy_page(dst, page_address(s_page)); zeros_only = do_copy_page(dst, page_address(s_page));
} else { } else {
hibernate_map_page(s_page); hibernate_map_page(s_page);
do_copy_page(dst, page_address(s_page)); zeros_only = do_copy_page(dst, page_address(s_page));
hibernate_unmap_page(s_page); hibernate_unmap_page(s_page);
} }
return zeros_only;
} }
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
@@ -1459,17 +1478,18 @@ static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn
saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn);
} }
static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) static bool copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
{ {
struct page *s_page, *d_page; struct page *s_page, *d_page;
void *src, *dst; void *src, *dst;
bool zeros_only;
s_page = pfn_to_page(src_pfn); s_page = pfn_to_page(src_pfn);
d_page = pfn_to_page(dst_pfn); d_page = pfn_to_page(dst_pfn);
if (PageHighMem(s_page)) { if (PageHighMem(s_page)) {
src = kmap_atomic(s_page); src = kmap_atomic(s_page);
dst = kmap_atomic(d_page); dst = kmap_atomic(d_page);
do_copy_page(dst, src); zeros_only = do_copy_page(dst, src);
kunmap_atomic(dst); kunmap_atomic(dst);
kunmap_atomic(src); kunmap_atomic(src);
} else { } else {
@@ -1478,30 +1498,39 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
* The page pointed to by src may contain some kernel * The page pointed to by src may contain some kernel
* data modified by kmap_atomic() * data modified by kmap_atomic()
*/ */
safe_copy_page(buffer, s_page); zeros_only = safe_copy_page(buffer, s_page);
dst = kmap_atomic(d_page); dst = kmap_atomic(d_page);
copy_page(dst, buffer); copy_page(dst, buffer);
kunmap_atomic(dst); kunmap_atomic(dst);
} else { } else {
safe_copy_page(page_address(d_page), s_page); zeros_only = safe_copy_page(page_address(d_page), s_page);
} }
} }
return zeros_only;
} }
#else #else
#define page_is_saveable(zone, pfn) saveable_page(zone, pfn) #define page_is_saveable(zone, pfn) saveable_page(zone, pfn)
static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) static inline int copy_data_page(unsigned long dst_pfn, unsigned long src_pfn)
{ {
safe_copy_page(page_address(pfn_to_page(dst_pfn)), return safe_copy_page(page_address(pfn_to_page(dst_pfn)),
pfn_to_page(src_pfn)); pfn_to_page(src_pfn));
} }
#endif /* CONFIG_HIGHMEM */ #endif /* CONFIG_HIGHMEM */
static void copy_data_pages(struct memory_bitmap *copy_bm, /*
struct memory_bitmap *orig_bm) * Copy data pages will copy all pages into pages pulled from the copy_bm.
* If a page was entirely filled with zeros it will be marked in the zero_bm.
*
* Returns the number of pages copied.
*/
static unsigned long copy_data_pages(struct memory_bitmap *copy_bm,
struct memory_bitmap *orig_bm,
struct memory_bitmap *zero_bm)
{ {
unsigned long copied_pages = 0;
struct zone *zone; struct zone *zone;
unsigned long pfn; unsigned long pfn, copy_pfn;
for_each_populated_zone(zone) { for_each_populated_zone(zone) {
unsigned long max_zone_pfn; unsigned long max_zone_pfn;
@@ -1514,18 +1543,29 @@ static void copy_data_pages(struct memory_bitmap *copy_bm,
} }
memory_bm_position_reset(orig_bm); memory_bm_position_reset(orig_bm);
memory_bm_position_reset(copy_bm); memory_bm_position_reset(copy_bm);
copy_pfn = memory_bm_next_pfn(copy_bm);
for(;;) { for(;;) {
pfn = memory_bm_next_pfn(orig_bm); pfn = memory_bm_next_pfn(orig_bm);
if (unlikely(pfn == BM_END_OF_MAP)) if (unlikely(pfn == BM_END_OF_MAP))
break; break;
copy_data_page(memory_bm_next_pfn(copy_bm), pfn); if (copy_data_page(copy_pfn, pfn)) {
memory_bm_set_bit(zero_bm, pfn);
/* Use this copy_pfn for a page that is not full of zeros */
continue;
} }
copied_pages++;
copy_pfn = memory_bm_next_pfn(copy_bm);
}
return copied_pages;
} }
/* Total number of image pages */ /* Total number of image pages */
static unsigned int nr_copy_pages; static unsigned int nr_copy_pages;
/* Number of pages needed for saving the original pfns of the image pages */ /* Number of pages needed for saving the original pfns of the image pages */
static unsigned int nr_meta_pages; static unsigned int nr_meta_pages;
/* Number of zero pages */
static unsigned int nr_zero_pages;
/* /*
* Numbers of normal and highmem page frames allocated for hibernation image * Numbers of normal and highmem page frames allocated for hibernation image
* before suspending devices. * before suspending devices.
@@ -1546,6 +1586,9 @@ static struct memory_bitmap orig_bm;
*/ */
static struct memory_bitmap copy_bm; static struct memory_bitmap copy_bm;
/* Memory bitmap which tracks which saveable pages were zero filled. */
static struct memory_bitmap zero_bm;
/** /**
* swsusp_free - Free pages allocated for hibernation image. * swsusp_free - Free pages allocated for hibernation image.
* *
@@ -1590,6 +1633,7 @@ loop:
out: out:
nr_copy_pages = 0; nr_copy_pages = 0;
nr_meta_pages = 0; nr_meta_pages = 0;
nr_zero_pages = 0;
restore_pblist = NULL; restore_pblist = NULL;
buffer = NULL; buffer = NULL;
alloc_normal = 0; alloc_normal = 0;
@@ -1808,8 +1852,15 @@ int hibernate_preallocate_memory(void)
goto err_out; goto err_out;
} }
error = memory_bm_create(&zero_bm, GFP_IMAGE, PG_ANY);
if (error) {
pr_err("Cannot allocate zero bitmap\n");
goto err_out;
}
alloc_normal = 0; alloc_normal = 0;
alloc_highmem = 0; alloc_highmem = 0;
nr_zero_pages = 0;
/* Count the number of saveable data pages. */ /* Count the number of saveable data pages. */
save_highmem = count_highmem_pages(); save_highmem = count_highmem_pages();
@@ -2089,19 +2140,19 @@ asmlinkage __visible int swsusp_save(void)
* Kill them. * Kill them.
*/ */
drain_local_pages(NULL); drain_local_pages(NULL);
copy_data_pages(&copy_bm, &orig_bm); nr_copy_pages = copy_data_pages(&copy_bm, &orig_bm, &zero_bm);
/* /*
* End of critical section. From now on, we can write to memory, * End of critical section. From now on, we can write to memory,
* but we should not touch disk. This specially means we must _not_ * but we should not touch disk. This specially means we must _not_
* touch swap space! Except we must write out our image of course. * touch swap space! Except we must write out our image of course.
*/ */
nr_pages += nr_highmem; nr_pages += nr_highmem;
nr_copy_pages = nr_pages; /* We don't actually copy the zero pages */
nr_zero_pages = nr_pages - nr_copy_pages;
nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE); nr_meta_pages = DIV_ROUND_UP(nr_pages * sizeof(long), PAGE_SIZE);
pr_info("Image created (%d pages copied)\n", nr_pages); pr_info("Image created (%d pages copied, %d zero pages)\n", nr_copy_pages, nr_zero_pages);
return 0; return 0;
} }
@@ -2146,15 +2197,22 @@ static int init_header(struct swsusp_info *info)
return init_header_complete(info); return init_header_complete(info);
} }
#define ENCODED_PFN_ZERO_FLAG ((unsigned long)1 << (BITS_PER_LONG - 1))
#define ENCODED_PFN_MASK (~ENCODED_PFN_ZERO_FLAG)
/** /**
* pack_pfns - Prepare PFNs for saving. * pack_pfns - Prepare PFNs for saving.
* @bm: Memory bitmap. * @bm: Memory bitmap.
* @buf: Memory buffer to store the PFNs in. * @buf: Memory buffer to store the PFNs in.
* @zero_bm: Memory bitmap containing PFNs of zero pages.
* *
* PFNs corresponding to set bits in @bm are stored in the area of memory * PFNs corresponding to set bits in @bm are stored in the area of memory
* pointed to by @buf (1 page at a time). * pointed to by @buf (1 page at a time). Pages which were filled with only
* zeros will have the highest bit set in the packed format to distinguish
* them from PFNs which will be contained in the image file.
*/ */
static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm,
struct memory_bitmap *zero_bm)
{ {
int j; int j;
@@ -2162,6 +2220,8 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm)
buf[j] = memory_bm_next_pfn(bm); buf[j] = memory_bm_next_pfn(bm);
if (unlikely(buf[j] == BM_END_OF_MAP)) if (unlikely(buf[j] == BM_END_OF_MAP))
break; break;
if (memory_bm_test_bit(zero_bm, buf[j]))
buf[j] |= ENCODED_PFN_ZERO_FLAG;
} }
} }
@@ -2203,7 +2263,7 @@ int snapshot_read_next(struct snapshot_handle *handle)
memory_bm_position_reset(&copy_bm); memory_bm_position_reset(&copy_bm);
} else if (handle->cur <= nr_meta_pages) { } else if (handle->cur <= nr_meta_pages) {
clear_page(buffer); clear_page(buffer);
pack_pfns(buffer, &orig_bm); pack_pfns(buffer, &orig_bm, &zero_bm);
} else { } else {
struct page *page; struct page *page;
@@ -2299,24 +2359,35 @@ static int load_header(struct swsusp_info *info)
* unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap. * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap.
* @bm: Memory bitmap. * @bm: Memory bitmap.
* @buf: Area of memory containing the PFNs. * @buf: Area of memory containing the PFNs.
* @zero_bm: Memory bitmap with the zero PFNs marked.
* *
* For each element of the array pointed to by @buf (1 page at a time), set the * For each element of the array pointed to by @buf (1 page at a time), set the
* corresponding bit in @bm. * corresponding bit in @bm. If the page was originally populated with only
* zeros then a corresponding bit will also be set in @zero_bm.
*/ */
static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm,
struct memory_bitmap *zero_bm)
{ {
unsigned long decoded_pfn;
bool zero;
int j; int j;
for (j = 0; j < PAGE_SIZE / sizeof(long); j++) { for (j = 0; j < PAGE_SIZE / sizeof(long); j++) {
if (unlikely(buf[j] == BM_END_OF_MAP)) if (unlikely(buf[j] == BM_END_OF_MAP))
break; break;
if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) { zero = !!(buf[j] & ENCODED_PFN_ZERO_FLAG);
memory_bm_set_bit(bm, buf[j]); decoded_pfn = buf[j] & ENCODED_PFN_MASK;
if (pfn_valid(decoded_pfn) && memory_bm_pfn_present(bm, decoded_pfn)) {
memory_bm_set_bit(bm, decoded_pfn);
if (zero) {
memory_bm_set_bit(zero_bm, decoded_pfn);
nr_zero_pages++;
}
} else { } else {
if (!pfn_valid(buf[j])) if (!pfn_valid(decoded_pfn))
pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n", pr_err(FW_BUG "Memory map mismatch at 0x%llx after hibernation\n",
(unsigned long long)PFN_PHYS(buf[j])); (unsigned long long)PFN_PHYS(decoded_pfn));
return -EFAULT; return -EFAULT;
} }
} }
@@ -2538,6 +2609,7 @@ static inline void free_highmem_data(void) {}
* prepare_image - Make room for loading hibernation image. * prepare_image - Make room for loading hibernation image.
* @new_bm: Uninitialized memory bitmap structure. * @new_bm: Uninitialized memory bitmap structure.
* @bm: Memory bitmap with unsafe pages marked. * @bm: Memory bitmap with unsafe pages marked.
* @zero_bm: Memory bitmap containing the zero pages.
* *
* Use @bm to mark the pages that will be overwritten in the process of * Use @bm to mark the pages that will be overwritten in the process of
* restoring the system memory state from the suspend image ("unsafe" pages) * restoring the system memory state from the suspend image ("unsafe" pages)
@@ -2548,10 +2620,15 @@ static inline void free_highmem_data(void) {}
* pages will be used for just yet. Instead, we mark them all as allocated and * pages will be used for just yet. Instead, we mark them all as allocated and
* create a lists of "safe" pages to be used later. On systems with high * create a lists of "safe" pages to be used later. On systems with high
* memory a list of "safe" highmem pages is created too. * memory a list of "safe" highmem pages is created too.
*
* Because it was not known which pages were unsafe when @zero_bm was created,
* make a copy of it and recreate it within safe pages.
*/ */
static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm,
struct memory_bitmap *zero_bm)
{ {
unsigned int nr_pages, nr_highmem; unsigned int nr_pages, nr_highmem;
struct memory_bitmap tmp;
struct linked_page *lp; struct linked_page *lp;
int error; int error;
@@ -2568,6 +2645,24 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
duplicate_memory_bitmap(new_bm, bm); duplicate_memory_bitmap(new_bm, bm);
memory_bm_free(bm, PG_UNSAFE_KEEP); memory_bm_free(bm, PG_UNSAFE_KEEP);
/* Make a copy of zero_bm so it can be created in safe pages */
error = memory_bm_create(&tmp, GFP_ATOMIC, PG_ANY);
if (error)
goto Free;
duplicate_memory_bitmap(&tmp, zero_bm);
memory_bm_free(zero_bm, PG_UNSAFE_KEEP);
/* Recreate zero_bm in safe pages */
error = memory_bm_create(zero_bm, GFP_ATOMIC, PG_SAFE);
if (error)
goto Free;
duplicate_memory_bitmap(zero_bm, &tmp);
memory_bm_free(&tmp, PG_UNSAFE_KEEP);
/* At this point zero_bm is in safe pages and it can be used for restoring. */
if (nr_highmem > 0) { if (nr_highmem > 0) {
error = prepare_highmem_image(bm, &nr_highmem); error = prepare_highmem_image(bm, &nr_highmem);
if (error) if (error)
@@ -2582,7 +2677,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
* *
* nr_copy_pages cannot be less than allocated_unsafe_pages too. * nr_copy_pages cannot be less than allocated_unsafe_pages too.
*/ */
nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE);
while (nr_pages > 0) { while (nr_pages > 0) {
lp = get_image_page(GFP_ATOMIC, PG_SAFE); lp = get_image_page(GFP_ATOMIC, PG_SAFE);
@@ -2595,7 +2690,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm)
nr_pages--; nr_pages--;
} }
/* Preallocate memory for the image */ /* Preallocate memory for the image */
nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; nr_pages = (nr_zero_pages + nr_copy_pages) - nr_highmem - allocated_unsafe_pages;
while (nr_pages > 0) { while (nr_pages > 0) {
lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC);
if (!lp) { if (!lp) {
@@ -2683,8 +2778,9 @@ int snapshot_write_next(struct snapshot_handle *handle)
static struct chain_allocator ca; static struct chain_allocator ca;
int error = 0; int error = 0;
next:
/* Check if we have already loaded the entire image */ /* Check if we have already loaded the entire image */
if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages)
return 0; return 0;
handle->sync_read = 1; handle->sync_read = 1;
@@ -2709,19 +2805,26 @@ int snapshot_write_next(struct snapshot_handle *handle)
if (error) if (error)
return error; return error;
error = memory_bm_create(&zero_bm, GFP_ATOMIC, PG_ANY);
if (error)
return error;
nr_zero_pages = 0;
hibernate_restore_protection_begin(); hibernate_restore_protection_begin();
} else if (handle->cur <= nr_meta_pages + 1) { } else if (handle->cur <= nr_meta_pages + 1) {
error = unpack_orig_pfns(buffer, &copy_bm); error = unpack_orig_pfns(buffer, &copy_bm, &zero_bm);
if (error) if (error)
return error; return error;
if (handle->cur == nr_meta_pages + 1) { if (handle->cur == nr_meta_pages + 1) {
error = prepare_image(&orig_bm, &copy_bm); error = prepare_image(&orig_bm, &copy_bm, &zero_bm);
if (error) if (error)
return error; return error;
chain_init(&ca, GFP_ATOMIC, PG_SAFE); chain_init(&ca, GFP_ATOMIC, PG_SAFE);
memory_bm_position_reset(&orig_bm); memory_bm_position_reset(&orig_bm);
memory_bm_position_reset(&zero_bm);
restore_pblist = NULL; restore_pblist = NULL;
handle->buffer = get_buffer(&orig_bm, &ca); handle->buffer = get_buffer(&orig_bm, &ca);
handle->sync_read = 0; handle->sync_read = 0;
@@ -2738,6 +2841,14 @@ int snapshot_write_next(struct snapshot_handle *handle)
handle->sync_read = 0; handle->sync_read = 0;
} }
handle->cur++; handle->cur++;
/* Zero pages were not included in the image, memset it and move on. */
if (handle->cur > nr_meta_pages + 1 &&
memory_bm_test_bit(&zero_bm, memory_bm_get_current(&orig_bm))) {
memset(handle->buffer, 0, PAGE_SIZE);
goto next;
}
return PAGE_SIZE; return PAGE_SIZE;
} }
@@ -2754,7 +2865,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
copy_last_highmem_page(); copy_last_highmem_page();
hibernate_restore_protect_page(handle->buffer); hibernate_restore_protect_page(handle->buffer);
/* Do that only if we have loaded the image entirely */ /* Do that only if we have loaded the image entirely */
if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages + nr_zero_pages) {
memory_bm_recycle(&orig_bm); memory_bm_recycle(&orig_bm);
free_highmem_data(); free_highmem_data();
} }
@@ -2763,7 +2874,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle)
int snapshot_image_loaded(struct snapshot_handle *handle) int snapshot_image_loaded(struct snapshot_handle *handle)
{ {
return !(!nr_copy_pages || !last_highmem_page_copied() || return !(!nr_copy_pages || !last_highmem_page_copied() ||
handle->cur <= nr_meta_pages + nr_copy_pages); handle->cur <= nr_meta_pages + nr_copy_pages + nr_zero_pages);
} }
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM