2013-05-16 17:20:32 +02:00
/ * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
*
* neon . uc - RAID - 6 syndrome calculation using ARM NEON instructions
*
* Copyright ( C ) 2012 Rob Herring
2015-07-01 12:19:56 +10:00
* Copyright ( C ) 2015 Linaro Ltd . < ard . biesheuvel @ linaro . org >
2013-05-16 17:20:32 +02:00
*
* Based on altivec . uc :
* Copyright 2002 - 2004 H . Peter Anvin - All Rights Reserved
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , Inc . , 53 Temple Place Ste 330 ,
* Boston MA 02111 - 1307 , USA ; either version 2 of the License , or
* ( at your option ) any later version ; incorporated herein by reference .
*
* -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- -- - * /
/ *
* neon$ # . c
*
* $ # - way unrolled NEON intrinsics math RAID - 6 instruction set
*
* This file is postprocessed using unroll . awk
* /
# include < arm _neon . h >
typedef uint8x16 _t unative _t ;
# define NSIZE sizeof ( unative _t )
/ *
* The SHLBYTE ( ) operation shifts each byte left by 1 , * not *
* rolling over into the next byte
* /
static inline unative _t SHLBYTE ( unative _t v )
{
return vshlq _n _u8 ( v , 1 ) ;
}
/ *
* The MASK ( ) operation returns 0xFF in any byte for which the high
* bit is 1 , 0x00 for any byte for which the high bit is 0.
* /
static inline unative _t MASK ( unative _t v )
{
2017-07-13 18:16:00 +01:00
return ( unative _t ) vshrq _n _s8 ( ( int8x16 _t ) v , 7 ) ;
}
static inline unative _t PMUL ( unative _t v , unative _t u )
{
return ( unative _t ) vmulq _p8 ( ( poly8x16 _t ) v , ( poly8x16 _t ) u ) ;
2013-05-16 17:20:32 +02:00
}
void raid6 _neon$ # _gen _syndrome _real ( int disks , unsigned long bytes , void * * ptrs )
{
uint8 _t * * dptr = ( uint8 _t * * ) ptrs ;
uint8 _t * p , * q ;
int d , z , z0 ;
register unative _t wd$$ , wq$$ , wp$$ , w1$$ , w2$$ ;
2019-02-25 20:03:42 -08:00
const unative _t x1d = vdupq _n _u8 ( 0x1d ) ;
2013-05-16 17:20:32 +02:00
z0 = disks - 3 ; /* Highest data disk */
p = dptr [ z0 + 1 ] ; /* XOR parity */
q = dptr [ z0 + 2 ] ; /* RS syndrome */
for ( d = 0 ; d < bytes ; d += NSIZE * $ # ) {
wq$$ = wp$$ = vld1q _u8 ( & dptr [ z0 ] [ d + $$ * NSIZE ] ) ;
for ( z = z0 - 1 ; z >= 0 ; z -- ) {
wd$$ = vld1q _u8 ( & dptr [ z ] [ d + $$ * NSIZE ] ) ;
wp$$ = veorq _u8 ( wp$$ , wd$$ ) ;
w2$$ = MASK ( wq$$ ) ;
w1$$ = SHLBYTE ( wq$$ ) ;
w2$$ = vandq _u8 ( w2$$ , x1d ) ;
w1$$ = veorq _u8 ( w1$$ , w2$$ ) ;
wq$$ = veorq _u8 ( w1$$ , wd$$ ) ;
}
vst1q _u8 ( & p [ d + NSIZE * $$ ] , wp$$ ) ;
vst1q _u8 ( & q [ d + NSIZE * $$ ] , wq$$ ) ;
}
}
2015-07-01 12:19:56 +10:00
void raid6 _neon$ # _xor _syndrome _real ( int disks , int start , int stop ,
unsigned long bytes , void * * ptrs )
{
uint8 _t * * dptr = ( uint8 _t * * ) ptrs ;
uint8 _t * p , * q ;
int d , z , z0 ;
register unative _t wd$$ , wq$$ , wp$$ , w1$$ , w2$$ ;
2019-02-25 20:03:42 -08:00
const unative _t x1d = vdupq _n _u8 ( 0x1d ) ;
2015-07-01 12:19:56 +10:00
z0 = stop ; /* P/Q right side optimization */
p = dptr [ disks - 2 ] ; /* XOR parity */
q = dptr [ disks - 1 ] ; /* RS syndrome */
for ( d = 0 ; d < bytes ; d += NSIZE * $ # ) {
wq$$ = vld1q _u8 ( & dptr [ z0 ] [ d + $$ * NSIZE ] ) ;
wp$$ = veorq _u8 ( vld1q _u8 ( & p [ d + $$ * NSIZE ] ) , wq$$ ) ;
/* P/Q data pages */
for ( z = z0 - 1 ; z >= start ; z -- ) {
wd$$ = vld1q _u8 ( & dptr [ z ] [ d + $$ * NSIZE ] ) ;
wp$$ = veorq _u8 ( wp$$ , wd$$ ) ;
w2$$ = MASK ( wq$$ ) ;
w1$$ = SHLBYTE ( wq$$ ) ;
w2$$ = vandq _u8 ( w2$$ , x1d ) ;
w1$$ = veorq _u8 ( w1$$ , w2$$ ) ;
wq$$ = veorq _u8 ( w1$$ , wd$$ ) ;
}
/* P/Q left side optimization */
2017-07-13 18:16:00 +01:00
for ( z = start - 1 ; z >= 3 ; z -= 4 ) {
w2$$ = vshrq _n _u8 ( wq$$ , 4 ) ;
w1$$ = vshlq _n _u8 ( wq$$ , 4 ) ;
w2$$ = PMUL ( w2$$ , x1d ) ;
wq$$ = veorq _u8 ( w1$$ , w2$$ ) ;
}
switch ( z ) {
case 2 :
w2$$ = vshrq _n _u8 ( wq$$ , 5 ) ;
w1$$ = vshlq _n _u8 ( wq$$ , 3 ) ;
w2$$ = PMUL ( w2$$ , x1d ) ;
wq$$ = veorq _u8 ( w1$$ , w2$$ ) ;
break ;
case 1 :
w2$$ = vshrq _n _u8 ( wq$$ , 6 ) ;
w1$$ = vshlq _n _u8 ( wq$$ , 2 ) ;
w2$$ = PMUL ( w2$$ , x1d ) ;
wq$$ = veorq _u8 ( w1$$ , w2$$ ) ;
break ;
case 0 :
2015-07-01 12:19:56 +10:00
w2$$ = MASK ( wq$$ ) ;
w1$$ = SHLBYTE ( wq$$ ) ;
w2$$ = vandq _u8 ( w2$$ , x1d ) ;
wq$$ = veorq _u8 ( w1$$ , w2$$ ) ;
}
w1$$ = vld1q _u8 ( & q [ d + NSIZE * $$ ] ) ;
wq$$ = veorq _u8 ( wq$$ , w1$$ ) ;
vst1q _u8 ( & p [ d + NSIZE * $$ ] , wp$$ ) ;
vst1q _u8 ( & q [ d + NSIZE * $$ ] , wq$$ ) ;
}
}