2019-06-01 10:08:44 +02:00
/* SPDX-License-Identifier: GPL-2.0-only */
2008-02-09 23:24:09 +01:00
/ *
* Hibernation s u p p o r t f o r x86 - 6 4
2005-04-16 15:20:36 -07:00
*
2008-02-09 23:24:09 +01:00
* Copyright 2 0 0 7 R a f a e l J . W y s o c k i < r j w @sisk.pl>
* Copyright 2 0 0 5 A n d i K l e e n < a k @suse.de>
* Copyright 2 0 0 4 P a v e l M a c h e k < p a v e l @suse.cz>
*
2007-10-18 03:04:53 -07:00
* swsusp_ a r c h _ r e s u m e m u s t n o t u s e a n y s t a c k o r a n y n o n l o c a l v a r i a b l e s w h i l e
* copying p a g e s :
2005-04-16 15:20:36 -07:00
*
* Its r e w r i t i n g o n e k e r n e l i m a g e w i t h a n o t h e r . W h a t i s s t a c k i n " o l d "
* image c o u l d v e r y w e l l b e d a t a p a g e i n " n e w " i m a g e , a n d o v e r w r i t i n g
* your o w n s t a c k u n d e r y o u i s b a d i d e a .
* /
2008-02-09 23:24:09 +01:00
2005-04-16 15:20:36 -07:00
.text
# include < l i n u x / l i n k a g e . h >
# include < a s m / s e g m e n t . h >
2009-02-13 11:14:01 -08:00
# include < a s m / p a g e _ t y p e s . h >
2005-09-09 21:28:48 +02:00
# include < a s m / a s m - o f f s e t s . h >
2008-02-09 00:49:13 +03:00
# include < a s m / p r o c e s s o r - f l a g s . h >
2016-01-21 16:49:24 -06:00
# include < a s m / f r a m e . h >
2021-01-21 15:29:34 -06:00
# include < a s m / n o s p e c - b r a n c h . h >
2005-04-16 15:20:36 -07:00
2021-01-21 15:29:35 -06:00
/* code below belongs to the image kernel */
.align PAGE_SIZE
SYM_ F U N C _ S T A R T ( r e s t o r e _ r e g i s t e r s )
/* go back to the original page tables */
movq % r9 , % c r3
/* Flush TLB, including "global" things (vmalloc) */
movq m m u _ c r4 _ f e a t u r e s ( % r i p ) , % r a x
movq % r a x , % r d x
andq $ ~ ( X 8 6 _ C R 4 _ P G E ) , % r d x
movq % r d x , % c r4 ; # turn off PGE
movq % c r3 , % r c x ; # flush TLB
movq % r c x , % c r3
movq % r a x , % c r4 ; # turn PGE back on
/* We don't restore %rax, it must be 0 anyway */
movq $ s a v e d _ c o n t e x t , % r a x
movq p t _ r e g s _ s p ( % r a x ) , % r s p
movq p t _ r e g s _ b p ( % r a x ) , % r b p
movq p t _ r e g s _ s i ( % r a x ) , % r s i
movq p t _ r e g s _ d i ( % r a x ) , % r d i
movq p t _ r e g s _ b x ( % r a x ) , % r b x
movq p t _ r e g s _ c x ( % r a x ) , % r c x
movq p t _ r e g s _ d x ( % r a x ) , % r d x
movq p t _ r e g s _ r8 ( % r a x ) , % r8
movq p t _ r e g s _ r9 ( % r a x ) , % r9
movq p t _ r e g s _ r10 ( % r a x ) , % r10
movq p t _ r e g s _ r11 ( % r a x ) , % r11
movq p t _ r e g s _ r12 ( % r a x ) , % r12
movq p t _ r e g s _ r13 ( % r a x ) , % r13
movq p t _ r e g s _ r14 ( % r a x ) , % r14
movq p t _ r e g s _ r15 ( % r a x ) , % r15
pushq p t _ r e g s _ f l a g s ( % r a x )
popfq
/* Saved in save_processor_state. */
lgdt s a v e d _ c o n t e x t _ g d t _ d e s c ( % r a x )
xorl % e a x , % e a x
/* tell the hibernation core that we've just restored the memory */
movq % r a x , i n _ s u s p e n d ( % r i p )
2021-12-04 14:43:40 +01:00
RET
2021-01-21 15:29:35 -06:00
SYM_ F U N C _ E N D ( r e s t o r e _ r e g i s t e r s )
2019-10-11 13:51:04 +02:00
SYM_ F U N C _ S T A R T ( s w s u s p _ a r c h _ s u s p e n d )
2007-10-23 22:37:24 +02:00
movq $ s a v e d _ c o n t e x t , % r a x
2008-01-30 13:30:56 +01:00
movq % r s p , p t _ r e g s _ s p ( % r a x )
movq % r b p , p t _ r e g s _ b p ( % r a x )
movq % r s i , p t _ r e g s _ s i ( % r a x )
movq % r d i , p t _ r e g s _ d i ( % r a x )
movq % r b x , p t _ r e g s _ b x ( % r a x )
movq % r c x , p t _ r e g s _ c x ( % r a x )
movq % r d x , p t _ r e g s _ d x ( % r a x )
2007-10-23 22:37:24 +02:00
movq % r8 , p t _ r e g s _ r8 ( % r a x )
movq % r9 , p t _ r e g s _ r9 ( % r a x )
movq % r10 , p t _ r e g s _ r10 ( % r a x )
movq % r11 , p t _ r e g s _ r11 ( % r a x )
movq % r12 , p t _ r e g s _ r12 ( % r a x )
movq % r13 , p t _ r e g s _ r13 ( % r a x )
movq % r14 , p t _ r e g s _ r14 ( % r a x )
movq % r15 , p t _ r e g s _ r15 ( % r a x )
pushfq
2008-01-30 13:30:56 +01:00
popq p t _ r e g s _ f l a g s ( % r a x )
2005-04-16 15:20:36 -07:00
2007-10-18 03:04:54 -07:00
/* save cr3 */
movq % c r3 , % r a x
movq % r a x , r e s t o r e _ c r3 ( % r i p )
2007-10-18 03:04:53 -07:00
x86/power/64: Fix hibernation return address corruption
In kernel bug 150021, a kernel panic was reported when restoring a
hibernate image. Only a picture of the oops was reported, so I can't
paste the whole thing here. But here are the most interesting parts:
kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
BUG: unable to handle kernel paging request at ffff8804615cfd78
...
RIP: ffff8804615cfd78
RSP: ffff8804615f0000
RBP: ffff8804615cfdc0
...
Call Trace:
do_signal+0x23
exit_to_usermode_loop+0x64
...
The RIP is on the same page as RBP, so it apparently started executing
on the stack.
The bug was bisected to commit ef0f3ed5a4ac (x86/asm/power: Create
stack frames in hibernate_asm_64.S), which in retrospect seems quite
dangerous, since that code saves and restores the stack pointer from a
global variable ('saved_context').
There are a lot of moving parts in the hibernate save and restore paths,
so I don't know exactly what caused the panic. Presumably, a FRAME_END
was executed without the corresponding FRAME_BEGIN, or vice versa. That
would corrupt the return address on the stack and would be consistent
with the details of the above panic.
[ rjw: One major problem is that by the time the FRAME_BEGIN in
restore_registers() is executed, the stack pointer value may not
be valid any more. Namely, the stack area pointed to by it
previously may have been overwritten by some image memory contents
and that page frame may now be used for whatever different purpose
it had been allocated for before hibernation. In that case, the
FRAME_BEGIN will corrupt that memory. ]
Instead of doing the frame pointer save/restore around the bounds of the
affected functions, just do it around the call to swsusp_save().
That has the same effect of ensuring that if swsusp_save() sleeps, the
frame pointers will be correct. It's also a much more obviously safe
way to do it than the original patch. And objtool still doesn't report
any warnings.
Fixes: ef0f3ed5a4ac (x86/asm/power: Create stack frames in hibernate_asm_64.S)
Link: https://bugzilla.kernel.org/show_bug.cgi?id=150021
Cc: 4.6+ <stable@vger.kernel.org> # 4.6+
Reported-by: Andre Reinke <andre.reinke@mailbox.org>
Tested-by: Andre Reinke <andre.reinke@mailbox.org>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-07-28 23:15:21 +02:00
FRAME_ B E G I N
2005-04-16 15:20:36 -07:00
call s w s u s p _ s a v e
2016-01-21 16:49:24 -06:00
FRAME_ E N D
2021-12-04 14:43:40 +01:00
RET
2019-10-11 13:51:04 +02:00
SYM_ F U N C _ E N D ( s w s u s p _ a r c h _ s u s p e n d )
2005-04-16 15:20:36 -07:00
2021-01-21 15:29:36 -06:00
SYM_ F U N C _ S T A R T ( r e s t o r e _ i m a g e )
2007-10-18 03:04:53 -07:00
/* prepare to jump to the image kernel */
x86/power/64: Fix kernel text mapping corruption during image restoration
Logan Gunthorpe reports that hibernation stopped working reliably for
him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table
and rodata).
That turns out to be a consequence of a long-standing issue with the
64-bit image restoration code on x86, which is that the temporary
page tables set up by it to avoid page tables corruption when the
last bits of the image kernel's memory contents are copied into
their original page frames re-use the boot kernel's text mapping,
but that mapping may very well get corrupted just like any other
part of the page tables. Of course, if that happens, the final
jump to the image kernel's entry point will go to nowhere.
The exact reason why commit ab76f7b4ab23 matters here is that it
sometimes causes a PMD of a large page to be split into PTEs
that are allocated dynamically and get corrupted during image
restoration as described above.
To fix that issue note that the code copying the last bits of the
image kernel's memory contents to the page frames occupied by them
previoulsy doesn't use the kernel text mapping, because it runs from
a special page covered by the identity mapping set up for that code
from scratch. Hence, the kernel text mapping is only needed before
that code starts to run and then it will only be used just for the
final jump to the image kernel's entry point.
Accordingly, the temporary page tables set up in swsusp_arch_resume()
on x86-64 need to contain the kernel text mapping too. That mapping
is only going to be used for the final jump to the image kernel, so
it only needs to cover the image kernel's entry point, because the
first thing the image kernel does after getting control back is to
switch over to its own original page tables. Moreover, the virtual
address of the image kernel's entry point in that mapping has to be
the same as the one mapped by the image kernel's page tables.
With that in mind, modify the x86-64's arch_hibernation_header_save()
and arch_hibernation_header_restore() routines to pass the physical
address of the image kernel's entry point (in addition to its virtual
address) to the boot kernel (a small piece of assembly code involved
in passing the entry point's virtual address to the image kernel is
not necessary any more after that, so drop it). Update RESTORE_MAGIC
too to reflect the image header format change.
Next, in set_up_temporary_mappings(), use the physical and virtual
addresses of the image kernel's entry point passed in the image
header to set up a minimum kernel text mapping (using memory pages
that won't be overwritten by the image kernel's memory contents) that
will map those addresses to each other as appropriate.
This makes the concern about the possible corruption of the original
boot kernel text mapping go away and if the the minimum kernel text
mapping used for the final jump marks the image kernel's entry point
memory as executable, the jump to it is guaraneed to succeed.
Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata)
Link: http://marc.info/?l=linux-pm&m=146372852823760&w=2
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reported-and-tested-by: Borislav Petkov <bp@suse.de>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-06-30 18:11:41 +02:00
movq r e s t o r e _ j u m p _ a d d r e s s ( % r i p ) , % r8
movq r e s t o r e _ c r3 ( % r i p ) , % r9
/* prepare to switch to temporary page tables */
2018-09-21 14:27:40 +08:00
movq t e m p _ p g t ( % r i p ) , % r a x
x86/power/64: Fix kernel text mapping corruption during image restoration
Logan Gunthorpe reports that hibernation stopped working reliably for
him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table
and rodata).
That turns out to be a consequence of a long-standing issue with the
64-bit image restoration code on x86, which is that the temporary
page tables set up by it to avoid page tables corruption when the
last bits of the image kernel's memory contents are copied into
their original page frames re-use the boot kernel's text mapping,
but that mapping may very well get corrupted just like any other
part of the page tables. Of course, if that happens, the final
jump to the image kernel's entry point will go to nowhere.
The exact reason why commit ab76f7b4ab23 matters here is that it
sometimes causes a PMD of a large page to be split into PTEs
that are allocated dynamically and get corrupted during image
restoration as described above.
To fix that issue note that the code copying the last bits of the
image kernel's memory contents to the page frames occupied by them
previoulsy doesn't use the kernel text mapping, because it runs from
a special page covered by the identity mapping set up for that code
from scratch. Hence, the kernel text mapping is only needed before
that code starts to run and then it will only be used just for the
final jump to the image kernel's entry point.
Accordingly, the temporary page tables set up in swsusp_arch_resume()
on x86-64 need to contain the kernel text mapping too. That mapping
is only going to be used for the final jump to the image kernel, so
it only needs to cover the image kernel's entry point, because the
first thing the image kernel does after getting control back is to
switch over to its own original page tables. Moreover, the virtual
address of the image kernel's entry point in that mapping has to be
the same as the one mapped by the image kernel's page tables.
With that in mind, modify the x86-64's arch_hibernation_header_save()
and arch_hibernation_header_restore() routines to pass the physical
address of the image kernel's entry point (in addition to its virtual
address) to the boot kernel (a small piece of assembly code involved
in passing the entry point's virtual address to the image kernel is
not necessary any more after that, so drop it). Update RESTORE_MAGIC
too to reflect the image header format change.
Next, in set_up_temporary_mappings(), use the physical and virtual
addresses of the image kernel's entry point passed in the image
header to set up a minimum kernel text mapping (using memory pages
that won't be overwritten by the image kernel's memory contents) that
will map those addresses to each other as appropriate.
This makes the concern about the possible corruption of the original
boot kernel text mapping go away and if the the minimum kernel text
mapping used for the final jump marks the image kernel's entry point
memory as executable, the jump to it is guaraneed to succeed.
Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata)
Link: http://marc.info/?l=linux-pm&m=146372852823760&w=2
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reported-and-tested-by: Borislav Petkov <bp@suse.de>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-06-30 18:11:41 +02:00
movq m m u _ c r4 _ f e a t u r e s ( % r i p ) , % r b x
2007-10-18 03:04:53 -07:00
/* prepare to copy image data to their original locations */
2006-09-25 23:32:52 -07:00
movq r e s t o r e _ p b l i s t ( % r i p ) , % r d x
x86/power/64: Fix kernel text mapping corruption during image restoration
Logan Gunthorpe reports that hibernation stopped working reliably for
him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table
and rodata).
That turns out to be a consequence of a long-standing issue with the
64-bit image restoration code on x86, which is that the temporary
page tables set up by it to avoid page tables corruption when the
last bits of the image kernel's memory contents are copied into
their original page frames re-use the boot kernel's text mapping,
but that mapping may very well get corrupted just like any other
part of the page tables. Of course, if that happens, the final
jump to the image kernel's entry point will go to nowhere.
The exact reason why commit ab76f7b4ab23 matters here is that it
sometimes causes a PMD of a large page to be split into PTEs
that are allocated dynamically and get corrupted during image
restoration as described above.
To fix that issue note that the code copying the last bits of the
image kernel's memory contents to the page frames occupied by them
previoulsy doesn't use the kernel text mapping, because it runs from
a special page covered by the identity mapping set up for that code
from scratch. Hence, the kernel text mapping is only needed before
that code starts to run and then it will only be used just for the
final jump to the image kernel's entry point.
Accordingly, the temporary page tables set up in swsusp_arch_resume()
on x86-64 need to contain the kernel text mapping too. That mapping
is only going to be used for the final jump to the image kernel, so
it only needs to cover the image kernel's entry point, because the
first thing the image kernel does after getting control back is to
switch over to its own original page tables. Moreover, the virtual
address of the image kernel's entry point in that mapping has to be
the same as the one mapped by the image kernel's page tables.
With that in mind, modify the x86-64's arch_hibernation_header_save()
and arch_hibernation_header_restore() routines to pass the physical
address of the image kernel's entry point (in addition to its virtual
address) to the boot kernel (a small piece of assembly code involved
in passing the entry point's virtual address to the image kernel is
not necessary any more after that, so drop it). Update RESTORE_MAGIC
too to reflect the image header format change.
Next, in set_up_temporary_mappings(), use the physical and virtual
addresses of the image kernel's entry point passed in the image
header to set up a minimum kernel text mapping (using memory pages
that won't be overwritten by the image kernel's memory contents) that
will map those addresses to each other as appropriate.
This makes the concern about the possible corruption of the original
boot kernel text mapping go away and if the the minimum kernel text
mapping used for the final jump marks the image kernel's entry point
memory as executable, the jump to it is guaraneed to succeed.
Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata)
Link: http://marc.info/?l=linux-pm&m=146372852823760&w=2
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reported-and-tested-by: Borislav Petkov <bp@suse.de>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-06-30 18:11:41 +02:00
/* jump to relocated restore code */
2007-10-18 03:04:53 -07:00
movq r e l o c a t e d _ r e s t o r e _ c o d e ( % r i p ) , % r c x
2021-01-21 15:29:34 -06:00
ANNOTATE_ R E T P O L I N E _ S A F E
2007-10-18 03:04:53 -07:00
jmpq * % r c x
2021-01-21 15:29:36 -06:00
SYM_ F U N C _ E N D ( r e s t o r e _ i m a g e )
2007-10-18 03:04:53 -07:00
/* code below has been relocated to a safe page */
2021-01-21 15:29:36 -06:00
SYM_ F U N C _ S T A R T ( c o r e _ r e s t o r e _ c o d e )
x86/power/64: Fix kernel text mapping corruption during image restoration
Logan Gunthorpe reports that hibernation stopped working reliably for
him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table
and rodata).
That turns out to be a consequence of a long-standing issue with the
64-bit image restoration code on x86, which is that the temporary
page tables set up by it to avoid page tables corruption when the
last bits of the image kernel's memory contents are copied into
their original page frames re-use the boot kernel's text mapping,
but that mapping may very well get corrupted just like any other
part of the page tables. Of course, if that happens, the final
jump to the image kernel's entry point will go to nowhere.
The exact reason why commit ab76f7b4ab23 matters here is that it
sometimes causes a PMD of a large page to be split into PTEs
that are allocated dynamically and get corrupted during image
restoration as described above.
To fix that issue note that the code copying the last bits of the
image kernel's memory contents to the page frames occupied by them
previoulsy doesn't use the kernel text mapping, because it runs from
a special page covered by the identity mapping set up for that code
from scratch. Hence, the kernel text mapping is only needed before
that code starts to run and then it will only be used just for the
final jump to the image kernel's entry point.
Accordingly, the temporary page tables set up in swsusp_arch_resume()
on x86-64 need to contain the kernel text mapping too. That mapping
is only going to be used for the final jump to the image kernel, so
it only needs to cover the image kernel's entry point, because the
first thing the image kernel does after getting control back is to
switch over to its own original page tables. Moreover, the virtual
address of the image kernel's entry point in that mapping has to be
the same as the one mapped by the image kernel's page tables.
With that in mind, modify the x86-64's arch_hibernation_header_save()
and arch_hibernation_header_restore() routines to pass the physical
address of the image kernel's entry point (in addition to its virtual
address) to the boot kernel (a small piece of assembly code involved
in passing the entry point's virtual address to the image kernel is
not necessary any more after that, so drop it). Update RESTORE_MAGIC
too to reflect the image header format change.
Next, in set_up_temporary_mappings(), use the physical and virtual
addresses of the image kernel's entry point passed in the image
header to set up a minimum kernel text mapping (using memory pages
that won't be overwritten by the image kernel's memory contents) that
will map those addresses to each other as appropriate.
This makes the concern about the possible corruption of the original
boot kernel text mapping go away and if the the minimum kernel text
mapping used for the final jump marks the image kernel's entry point
memory as executable, the jump to it is guaraneed to succeed.
Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata)
Link: http://marc.info/?l=linux-pm&m=146372852823760&w=2
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reported-and-tested-by: Borislav Petkov <bp@suse.de>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-06-30 18:11:41 +02:00
/* switch to temporary page tables */
movq % r a x , % c r3
/* flush TLB */
movq % r b x , % r c x
andq $ ~ ( X 8 6 _ C R 4 _ P G E ) , % r c x
movq % r c x , % c r4 ; # turn off PGE
movq % c r3 , % r c x ; # flush TLB
movq % r c x , % c r3 ;
movq % r b x , % c r4 ; # turn PGE back on
2015-04-12 21:45:06 +02:00
.Lloop :
2005-04-16 15:20:36 -07:00
testq % r d x , % r d x
2015-04-12 21:45:06 +02:00
jz . L d o n e
2005-04-16 15:20:36 -07:00
/* get addresses from the pbe and copy the page */
movq p b e _ a d d r e s s ( % r d x ) , % r s i
movq p b e _ o r i g _ a d d r e s s ( % r d x ) , % r d i
2007-10-18 03:04:53 -07:00
movq $ ( P A G E _ S I Z E > > 3 ) , % r c x
2005-04-16 15:20:36 -07:00
rep
movsq
/* progress to the next pbe */
movq p b e _ n e x t ( % r d x ) , % r d x
2015-04-12 21:45:06 +02:00
jmp . L l o o p
x86/power/64: Fix kernel text mapping corruption during image restoration
Logan Gunthorpe reports that hibernation stopped working reliably for
him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table
and rodata).
That turns out to be a consequence of a long-standing issue with the
64-bit image restoration code on x86, which is that the temporary
page tables set up by it to avoid page tables corruption when the
last bits of the image kernel's memory contents are copied into
their original page frames re-use the boot kernel's text mapping,
but that mapping may very well get corrupted just like any other
part of the page tables. Of course, if that happens, the final
jump to the image kernel's entry point will go to nowhere.
The exact reason why commit ab76f7b4ab23 matters here is that it
sometimes causes a PMD of a large page to be split into PTEs
that are allocated dynamically and get corrupted during image
restoration as described above.
To fix that issue note that the code copying the last bits of the
image kernel's memory contents to the page frames occupied by them
previoulsy doesn't use the kernel text mapping, because it runs from
a special page covered by the identity mapping set up for that code
from scratch. Hence, the kernel text mapping is only needed before
that code starts to run and then it will only be used just for the
final jump to the image kernel's entry point.
Accordingly, the temporary page tables set up in swsusp_arch_resume()
on x86-64 need to contain the kernel text mapping too. That mapping
is only going to be used for the final jump to the image kernel, so
it only needs to cover the image kernel's entry point, because the
first thing the image kernel does after getting control back is to
switch over to its own original page tables. Moreover, the virtual
address of the image kernel's entry point in that mapping has to be
the same as the one mapped by the image kernel's page tables.
With that in mind, modify the x86-64's arch_hibernation_header_save()
and arch_hibernation_header_restore() routines to pass the physical
address of the image kernel's entry point (in addition to its virtual
address) to the boot kernel (a small piece of assembly code involved
in passing the entry point's virtual address to the image kernel is
not necessary any more after that, so drop it). Update RESTORE_MAGIC
too to reflect the image header format change.
Next, in set_up_temporary_mappings(), use the physical and virtual
addresses of the image kernel's entry point passed in the image
header to set up a minimum kernel text mapping (using memory pages
that won't be overwritten by the image kernel's memory contents) that
will map those addresses to each other as appropriate.
This makes the concern about the possible corruption of the original
boot kernel text mapping go away and if the the minimum kernel text
mapping used for the final jump marks the image kernel's entry point
memory as executable, the jump to it is guaraneed to succeed.
Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata)
Link: http://marc.info/?l=linux-pm&m=146372852823760&w=2
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reported-and-tested-by: Borislav Petkov <bp@suse.de>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-06-30 18:11:41 +02:00
2015-04-12 21:45:06 +02:00
.Ldone :
2007-10-18 03:04:53 -07:00
/* jump to the restore_registers address from the image header */
2021-01-21 15:29:34 -06:00
ANNOTATE_ R E T P O L I N E _ S A F E
x86/power/64: Fix kernel text mapping corruption during image restoration
Logan Gunthorpe reports that hibernation stopped working reliably for
him after commit ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table
and rodata).
That turns out to be a consequence of a long-standing issue with the
64-bit image restoration code on x86, which is that the temporary
page tables set up by it to avoid page tables corruption when the
last bits of the image kernel's memory contents are copied into
their original page frames re-use the boot kernel's text mapping,
but that mapping may very well get corrupted just like any other
part of the page tables. Of course, if that happens, the final
jump to the image kernel's entry point will go to nowhere.
The exact reason why commit ab76f7b4ab23 matters here is that it
sometimes causes a PMD of a large page to be split into PTEs
that are allocated dynamically and get corrupted during image
restoration as described above.
To fix that issue note that the code copying the last bits of the
image kernel's memory contents to the page frames occupied by them
previoulsy doesn't use the kernel text mapping, because it runs from
a special page covered by the identity mapping set up for that code
from scratch. Hence, the kernel text mapping is only needed before
that code starts to run and then it will only be used just for the
final jump to the image kernel's entry point.
Accordingly, the temporary page tables set up in swsusp_arch_resume()
on x86-64 need to contain the kernel text mapping too. That mapping
is only going to be used for the final jump to the image kernel, so
it only needs to cover the image kernel's entry point, because the
first thing the image kernel does after getting control back is to
switch over to its own original page tables. Moreover, the virtual
address of the image kernel's entry point in that mapping has to be
the same as the one mapped by the image kernel's page tables.
With that in mind, modify the x86-64's arch_hibernation_header_save()
and arch_hibernation_header_restore() routines to pass the physical
address of the image kernel's entry point (in addition to its virtual
address) to the boot kernel (a small piece of assembly code involved
in passing the entry point's virtual address to the image kernel is
not necessary any more after that, so drop it). Update RESTORE_MAGIC
too to reflect the image header format change.
Next, in set_up_temporary_mappings(), use the physical and virtual
addresses of the image kernel's entry point passed in the image
header to set up a minimum kernel text mapping (using memory pages
that won't be overwritten by the image kernel's memory contents) that
will map those addresses to each other as appropriate.
This makes the concern about the possible corruption of the original
boot kernel text mapping go away and if the the minimum kernel text
mapping used for the final jump marks the image kernel's entry point
memory as executable, the jump to it is guaraneed to succeed.
Fixes: ab76f7b4ab23 (x86/mm: Set NX on gap between __ex_table and rodata)
Link: http://marc.info/?l=linux-pm&m=146372852823760&w=2
Reported-by: Logan Gunthorpe <logang@deltatee.com>
Reported-and-tested-by: Borislav Petkov <bp@suse.de>
Tested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2016-06-30 18:11:41 +02:00
jmpq * % r8
2021-01-21 15:29:36 -06:00
SYM_ F U N C _ E N D ( c o r e _ r e s t o r e _ c o d e )