cd1e64935f
The ISA states: "when ACC[i] contains defined data, the contents of VSRs 4×i to 4×i+3 are undefined until either a VSX Move From ACC instruction is used to copy the contents of ACC[i] to VSRs 4×i to 4×i+3 or some other instruction directly writes to one of these VSRs." We aren't doing this. This test only works on Power10 because the hardware implementation happens to map ACC0 to VSRs 0-3, but will fail on any other implementation that doesn't do this. So add xxmfacc between writing to the accumulator and accessing the VSRs. Fixes: 3527e1ab9a79 ("selftests/powerpc: Add matrix multiply assist (MMA) test") Signed-off-by: Rashmica Gupta <rashmica@linux.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220617043935.428083-1-rashmica@linux.ibm.com
37 lines
761 B
ArmAsm
37 lines
761 B
ArmAsm
/* SPDX-License-Identifier: GPL-2.0-or-later
|
|
*
|
|
* Test basic matrix multiply assist (MMA) functionality if available.
|
|
*
|
|
* Copyright 2020, Alistair Popple, IBM Corp.
|
|
*/
|
|
.global test_mma
|
|
test_mma:
|
|
/* Load accumulator via VSX registers from image passed in r3 */
|
|
lxvh8x 4,0,3
|
|
lxvh8x 5,0,4
|
|
|
|
/* Clear and prime the accumulator (xxsetaccz) */
|
|
.long 0x7c030162
|
|
|
|
/* Prime the accumulator with MMA VSX move to accumulator
|
|
* X-form (xxmtacc) (not needed due to above zeroing) */
|
|
//.long 0x7c010162
|
|
|
|
/* xvi16ger2s */
|
|
.long 0xec042958
|
|
|
|
/* Deprime the accumulator - xxmfacc 0 */
|
|
.long 0x7c000162
|
|
|
|
/* Store result in image passed in r5 */
|
|
stxvw4x 0,0,5
|
|
addi 5,5,16
|
|
stxvw4x 1,0,5
|
|
addi 5,5,16
|
|
stxvw4x 2,0,5
|
|
addi 5,5,16
|
|
stxvw4x 3,0,5
|
|
addi 5,5,16
|
|
|
|
blr
|