2019-08-20 11:13:51 +03:00
/* SPDX-License-Identifier: GPL-2.0 */
/ *
* Copyright ( C ) I B M C o r p o r a t i o n , 2 0 1 1
* Derived f r o m c o p y u s e r _ p o w e r7 . s b y A n t o n B l a n c h a r d < a n t o n @au.ibm.com>
* Author - B a l b i r S i n g h < b s i n g h a r o r a @gmail.com>
* /
# include < a s m / p p c _ a s m . h >
# include < a s m / e r r n o . h >
# include < a s m / e x p o r t . h >
.macro err1
100 :
EX_ T A B L E ( 1 0 0 b ,. L d o _ e r r1 )
.endm
.macro err2
200 :
EX_ T A B L E ( 2 0 0 b ,. L d o _ e r r2 )
.endm
.macro err3
300 : EX_ T A B L E ( 3 0 0 b ,. L d o n e )
.endm
.Ldo_err2 :
ld r22 ,S T K _ R E G ( R 2 2 ) ( r1 )
ld r21 ,S T K _ R E G ( R 2 1 ) ( r1 )
ld r20 ,S T K _ R E G ( R 2 0 ) ( r1 )
ld r19 ,S T K _ R E G ( R 1 9 ) ( r1 )
ld r18 ,S T K _ R E G ( R 1 8 ) ( r1 )
ld r17 ,S T K _ R E G ( R 1 7 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
addi r1 ,r1 ,S T A C K F R A M E S I Z E
.Ldo_err1 :
/* Do a byte by byte copy to get the exact remaining size */
mtctr r7
46 :
err3 ; lbz r0,0(r4)
addi r4 ,r4 ,1
err3 ; stb r0,0(r3)
addi r3 ,r3 ,1
bdnz 4 6 b
li r3 ,0
blr
.Ldone :
mfctr r3
blr
x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.
Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:
On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
> On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
> >
> > However now I see that copy_user_generic() works for the wrong reason.
> > It works because the exception on the source address due to poison
> > looks no different than a write fault on the user address to the
> > caller, it's still just a short copy. So it makes copy_to_user() work
> > for the wrong reason relative to the name.
>
> Right.
>
> And it won't work that way on other architectures. On x86, we have a
> generic function that can take faults on either side, and we use it
> for both cases (and for the "in_user" case too), but that's an
> artifact of the architecture oddity.
>
> In fact, it's probably wrong even on x86 - because it can hide bugs -
> but writing those things is painful enough that everybody prefers
> having just one function.
Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().
Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.
One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.
[ bp: Massage a bit. ]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
2020-10-06 06:40:16 +03:00
_ GLOBAL( c o p y _ m c _ g e n e r i c )
2019-08-20 11:13:51 +03:00
mr r7 ,r5
cmpldi r5 ,1 6
blt . L s h o r t _ c o p y
.Lcopy :
/* Get the source 8B aligned */
neg r6 ,r4
mtocrf 0 x01 ,r6
clrldi r6 ,r6 ,( 6 4 - 3 )
bf c r7 * 4 + 3 ,1 f
err1 ; lbz r0,0(r4)
addi r4 ,r4 ,1
err1 ; stb r0,0(r3)
addi r3 ,r3 ,1
subi r7 ,r7 ,1
1 : bf c r7 * 4 + 2 ,2 f
err1 ; lhz r0,0(r4)
addi r4 ,r4 ,2
err1 ; sth r0,0(r3)
addi r3 ,r3 ,2
subi r7 ,r7 ,2
2 : bf c r7 * 4 + 1 ,3 f
err1 ; lwz r0,0(r4)
addi r4 ,r4 ,4
err1 ; stw r0,0(r3)
addi r3 ,r3 ,4
subi r7 ,r7 ,4
3 : sub r5 ,r5 ,r6
cmpldi r5 ,1 2 8
mflr r0
stdu r1 ,- S T A C K F R A M E S I Z E ( r1 )
std r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
std r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
std r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
std r17 ,S T K _ R E G ( R 1 7 ) ( r1 )
std r18 ,S T K _ R E G ( R 1 8 ) ( r1 )
std r19 ,S T K _ R E G ( R 1 9 ) ( r1 )
std r20 ,S T K _ R E G ( R 2 0 ) ( r1 )
std r21 ,S T K _ R E G ( R 2 1 ) ( r1 )
std r22 ,S T K _ R E G ( R 2 2 ) ( r1 )
std r0 ,S T A C K F R A M E S I Z E + 1 6 ( r1 )
2019-09-04 00:43:58 +03:00
blt 5 f
2019-08-20 11:13:51 +03:00
srdi r6 ,r5 ,7
mtctr r6
/* Now do cacheline (128B) sized loads and stores. */
.align 5
4 :
err2 ; ld r0,0(r4)
err2 ; ld r6,8(r4)
err2 ; ld r8,16(r4)
err2 ; ld r9,24(r4)
err2 ; ld r10,32(r4)
err2 ; ld r11,40(r4)
err2 ; ld r12,48(r4)
err2 ; ld r14,56(r4)
err2 ; ld r15,64(r4)
err2 ; ld r16,72(r4)
err2 ; ld r17,80(r4)
err2 ; ld r18,88(r4)
err2 ; ld r19,96(r4)
err2 ; ld r20,104(r4)
err2 ; ld r21,112(r4)
err2 ; ld r22,120(r4)
addi r4 ,r4 ,1 2 8
err2 ; std r0,0(r3)
err2 ; std r6,8(r3)
err2 ; std r8,16(r3)
err2 ; std r9,24(r3)
err2 ; std r10,32(r3)
err2 ; std r11,40(r3)
err2 ; std r12,48(r3)
err2 ; std r14,56(r3)
err2 ; std r15,64(r3)
err2 ; std r16,72(r3)
err2 ; std r17,80(r3)
err2 ; std r18,88(r3)
err2 ; std r19,96(r3)
err2 ; std r20,104(r3)
err2 ; std r21,112(r3)
err2 ; std r22,120(r3)
addi r3 ,r3 ,1 2 8
subi r7 ,r7 ,1 2 8
bdnz 4 b
clrldi r5 ,r5 ,( 6 4 - 7 )
/* Up to 127B to go */
5 : srdi r6 ,r5 ,4
mtocrf 0 x01 ,r6
6 : bf c r7 * 4 + 1 ,7 f
err2 ; ld r0,0(r4)
err2 ; ld r6,8(r4)
err2 ; ld r8,16(r4)
err2 ; ld r9,24(r4)
err2 ; ld r10,32(r4)
err2 ; ld r11,40(r4)
err2 ; ld r12,48(r4)
err2 ; ld r14,56(r4)
addi r4 ,r4 ,6 4
err2 ; std r0,0(r3)
err2 ; std r6,8(r3)
err2 ; std r8,16(r3)
err2 ; std r9,24(r3)
err2 ; std r10,32(r3)
err2 ; std r11,40(r3)
err2 ; std r12,48(r3)
err2 ; std r14,56(r3)
addi r3 ,r3 ,6 4
subi r7 ,r7 ,6 4
7 : ld r14 ,S T K _ R E G ( R 1 4 ) ( r1 )
ld r15 ,S T K _ R E G ( R 1 5 ) ( r1 )
ld r16 ,S T K _ R E G ( R 1 6 ) ( r1 )
ld r17 ,S T K _ R E G ( R 1 7 ) ( r1 )
ld r18 ,S T K _ R E G ( R 1 8 ) ( r1 )
ld r19 ,S T K _ R E G ( R 1 9 ) ( r1 )
ld r20 ,S T K _ R E G ( R 2 0 ) ( r1 )
ld r21 ,S T K _ R E G ( R 2 1 ) ( r1 )
ld r22 ,S T K _ R E G ( R 2 2 ) ( r1 )
addi r1 ,r1 ,S T A C K F R A M E S I Z E
/* Up to 63B to go */
bf c r7 * 4 + 2 ,8 f
err1 ; ld r0,0(r4)
err1 ; ld r6,8(r4)
err1 ; ld r8,16(r4)
err1 ; ld r9,24(r4)
addi r4 ,r4 ,3 2
err1 ; std r0,0(r3)
err1 ; std r6,8(r3)
err1 ; std r8,16(r3)
err1 ; std r9,24(r3)
addi r3 ,r3 ,3 2
subi r7 ,r7 ,3 2
/* Up to 31B to go */
8 : bf c r7 * 4 + 3 ,9 f
err1 ; ld r0,0(r4)
err1 ; ld r6,8(r4)
addi r4 ,r4 ,1 6
err1 ; std r0,0(r3)
err1 ; std r6,8(r3)
addi r3 ,r3 ,1 6
subi r7 ,r7 ,1 6
9 : clrldi r5 ,r5 ,( 6 4 - 4 )
/* Up to 15B to go */
.Lshort_copy :
mtocrf 0 x01 ,r5
bf c r7 * 4 + 0 ,1 2 f
err1 ; lwz r0,0(r4) /* Less chance of a reject with word ops */
err1 ; lwz r6,4(r4)
addi r4 ,r4 ,8
err1 ; stw r0,0(r3)
err1 ; stw r6,4(r3)
addi r3 ,r3 ,8
subi r7 ,r7 ,8
12 : bf c r7 * 4 + 1 ,1 3 f
err1 ; lwz r0,0(r4)
addi r4 ,r4 ,4
err1 ; stw r0,0(r3)
addi r3 ,r3 ,4
subi r7 ,r7 ,4
13 : bf c r7 * 4 + 2 ,1 4 f
err1 ; lhz r0,0(r4)
addi r4 ,r4 ,2
err1 ; sth r0,0(r3)
addi r3 ,r3 ,2
subi r7 ,r7 ,2
14 : bf c r7 * 4 + 3 ,1 5 f
err1 ; lbz r0,0(r4)
err1 ; stb r0,0(r3)
15 : li r3 ,0
blr
x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()
In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.
Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:
On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
>
> On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
> >
> > However now I see that copy_user_generic() works for the wrong reason.
> > It works because the exception on the source address due to poison
> > looks no different than a write fault on the user address to the
> > caller, it's still just a short copy. So it makes copy_to_user() work
> > for the wrong reason relative to the name.
>
> Right.
>
> And it won't work that way on other architectures. On x86, we have a
> generic function that can take faults on either side, and we use it
> for both cases (and for the "in_user" case too), but that's an
> artifact of the architecture oddity.
>
> In fact, it's probably wrong even on x86 - because it can hide bugs -
> but writing those things is painful enough that everybody prefers
> having just one function.
Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().
Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.
One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.
[ bp: Massage a bit. ]
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
2020-10-06 06:40:16 +03:00
EXPORT_ S Y M B O L _ G P L ( c o p y _ m c _ g e n e r i c ) ;