2019-06-04 10:11:33 +02:00
/* SPDX-License-Identifier: GPL-2.0-only */
2013-01-18 15:12:19 +05:30
/ *
* TLB E x c e p t i o n H a n d l i n g f o r A R C
*
* Copyright ( C ) 2 0 0 4 , 2 0 0 7 - 2 0 1 0 , 2 0 1 1 - 2 0 1 2 S y n o p s y s , I n c . ( w w w . s y n o p s y s . c o m )
*
* Vineetg : April 2 0 1 1 :
* - MMU v1 : m o v e d o u t l e g a c y c o d e i n t o a s e p e r a t e f i l e
* - MMU v3 : P D { 0 ,1 } b i t s l a y o u t c h a n g e d : T h e y d o n ' t o v e r l a p a n y m o r e ,
* helps a v o i d a s h i f t w h e n p r e p a r i n g P D 0 f r o m P T E
*
* Vineetg : July 2 0 0 9
* - For M M U V 2 , w e n e e d n o t d o h e u r i s t i c s a t t h e t i m e o f c o m m i t i n g a D - T L B
* entry, s o t h a t i t d o e s n ' t k n o c k o u t i t ' s I - T L B e n t r y
* - Some m o r e f i n e t u n i n g :
* bmsk i n s t e a d o f a d d , a s l . c c i n s t e a d o f b r a n c h , d e l a y s l o t u t i l i s e e t c
*
* Vineetg : July 2 0 0 9
* - Practically r e w r o t e t h e I / D T L B M i s s h a n d l e r s
* Now 4 0 a n d 1 3 5 i n s t r u c t i o n s a p e i c e a s c o m p a r e d t o 1 3 1 a n d 4 4 9 r e s p .
* Hence L e a n e r b y 1 . 5 K
* Used C o n d i t i o n a l a r i t h m e t i c t o r e p l a c e e x c e s s i v e b r a n c h i n g
* Also u s e d s h o r t i n s t r u c t i o n s w h e r e v e r p o s s i b l e
*
* Vineetg : Aug 1 3 t h 2 0 0 8
* - Passing E C R ( E x c e p t i o n C a u s e R E G ) t o d o _ p a g e _ f a u l t ( ) f o r p r i n t i n g
* more i n f o r m a t i o n i n c a s e o f a F a t a l i t y
*
* Vineetg : March 2 5 t h B u g #92690
* - Added D e b u g C o d e t o c h e c k i f s w - A S I D = = h w - A S I D
* Rahul T r i v e d i , A m i t B h o r : C o d i t o T e c h n o l o g i e s 2 0 0 4
* /
# include < l i n u x / l i n k a g e . h >
2020-06-08 21:32:42 -07:00
# include < l i n u x / p g t a b l e . h >
2013-01-18 15:12:19 +05:30
# include < a s m / e n t r y . h >
2013-05-14 13:28:17 +05:30
# include < a s m / m m u . h >
2013-01-18 15:12:19 +05:30
# include < a s m / a r c r e g s . h >
# include < a s m / c a c h e . h >
# include < a s m / p r o c e s s o r . h >
2015-04-06 17:22:39 +05:30
# ifdef C O N F I G _ I S A _ A R C O M P A C T
2013-07-10 11:40:27 +05:30
;-----------------------------------------------------------------
; ARC700 Exception Handling doesn't auto-switch stack and it only provides
; ONE scratch AUX reg "ARC_REG_SCRATCH_DATA0"
;
; For Non-SMP, the scratch AUX reg is repurposed to cache task PGD, so a
; "global" is used to free-up FIRST core reg to be able to code the rest of
; exception prologue (IRQ auto-disabled on Exceptions, so it's IRQ-safe).
; Since the Fast Path TLB Miss handler is coded with 4 regs, the remaining 3
; need to be saved as well by extending the "global" to be 4 words. Hence
; ".size ex_saved_reg1, 16"
; [All of this dance is to avoid stack switching for each TLB Miss, since we
; only need to save only a handful of regs, as opposed to complete reg file]
;
; For ARC700 SMP, the "global" obviously can't be used for free up the FIRST
; core reg as it will not be SMP safe.
; Thus scratch AUX reg is used (and no longer used to cache task PGD).
; To save the rest of 3 regs - per cpu, the global is made "per-cpu".
; Epilogue thus has to locate the "per-cpu" storage for regs.
; To avoid cache line bouncing the per-cpu global is aligned/sized per
; L1_CACHE_SHIFT, despite fundamentally needing to be 12 bytes only. Hence
; ".size ex_saved_reg1, (CONFIG_NR_CPUS << L1_CACHE_SHIFT)"
; As simple as that....
2013-01-18 15:12:19 +05:30
;--------------------------------------------------------------------------
2013-07-10 11:40:27 +05:30
; scratch memory to save [r0-r3] used to code TLB refill Handler
2013-01-18 15:12:25 +05:30
ARCFP_ D A T A e x _ s a v e d _ r e g 1
2013-07-10 11:40:27 +05:30
.align 1 < < L1 _ C A C H E _ S H I F T
2013-01-18 15:12:19 +05:30
.type ex_ s a v e d _ r e g 1 , @object
2013-01-18 15:12:23 +05:30
# ifdef C O N F I G _ S M P
.size ex_ s a v e d _ r e g 1 , ( C O N F I G _ N R _ C P U S < < L 1 _ C A C H E _ S H I F T )
ex_saved_reg1 :
.zero ( CONFIG_ N R _ C P U S < < L 1 _ C A C H E _ S H I F T )
# else
2013-01-18 15:12:19 +05:30
.size ex_ s a v e d _ r e g 1 , 1 6
ex_saved_reg1 :
.zero 16
2013-01-18 15:12:23 +05:30
# endif
2013-01-18 15:12:19 +05:30
2013-07-10 11:40:27 +05:30
.macro TLBMISS_FREEUP_REGS
# ifdef C O N F I G _ S M P
sr r0 , [ A R C _ R E G _ S C R A T C H _ D A T A 0 ] ; freeup r0 to code with
GET_ C P U _ I D r0 ; get to per cpu scratch mem,
2015-11-05 09:13:31 +05:30
asl r0 , r0 , L 1 _ C A C H E _ S H I F T ; cache line wide per cpu
2013-07-10 11:40:27 +05:30
add r0 , @ex_saved_reg1, r0
# else
st r0 , [ @ex_saved_reg1]
mov_ s r0 , @ex_saved_reg1
# endif
st_ s r1 , [ r0 , 4 ]
st_ s r2 , [ r0 , 8 ]
st_ s r3 , [ r0 , 1 2 ]
.endm
.macro TLBMISS_RESTORE_REGS
# ifdef C O N F I G _ S M P
GET_ C P U _ I D r0 ; get to per cpu scratch mem
2015-11-05 09:13:31 +05:30
asl r0 , r0 , L 1 _ C A C H E _ S H I F T ; each is cache line wide
2013-07-10 11:40:27 +05:30
add r0 , @ex_saved_reg1, r0
ld_ s r3 , [ r0 ,1 2 ]
ld_ s r2 , [ r0 , 8 ]
ld_ s r1 , [ r0 , 4 ]
lr r0 , [ A R C _ R E G _ S C R A T C H _ D A T A 0 ]
# else
mov_ s r0 , @ex_saved_reg1
ld_ s r3 , [ r0 ,1 2 ]
ld_ s r2 , [ r0 , 8 ]
ld_ s r1 , [ r0 , 4 ]
ld_ s r0 , [ r0 ]
# endif
.endm
2015-04-06 17:22:39 +05:30
# else / * A R C v2 * /
.macro TLBMISS_FREEUP_REGS
2015-02-11 18:37:43 +05:30
# ifdef C O N F I G _ A R C _ H A S _ L L 6 4
std r0 , [ s p , - 1 6 ]
std r2 , [ s p , - 8 ]
# else
2015-04-06 17:22:39 +05:30
PUSH r0
PUSH r1
PUSH r2
PUSH r3
2015-02-11 18:37:43 +05:30
# endif
2015-04-06 17:22:39 +05:30
.endm
.macro TLBMISS_RESTORE_REGS
2015-02-11 18:37:43 +05:30
# ifdef C O N F I G _ A R C _ H A S _ L L 6 4
ldd r0 , [ s p , - 1 6 ]
ldd r2 , [ s p , - 8 ]
# else
2015-04-06 17:22:39 +05:30
POP r3
POP r2
POP r1
POP r0
2015-02-11 18:37:43 +05:30
# endif
2015-04-06 17:22:39 +05:30
.endm
# endif
2013-01-18 15:12:19 +05:30
;============================================================================
;TLB Miss handling Code
;============================================================================
2020-10-01 16:42:15 -07:00
# ifndef P M D _ S H I F T
# define P M D _ S H I F T P U D _ S H I F T
# endif
# ifndef P U D _ S H I F T
# define P U D _ S H I F T P G D I R _ S H I F T
# endif
2013-01-18 15:12:19 +05:30
;-----------------------------------------------------------------------------
; This macro does the page-table lookup for the faulting address.
; OUT: r0 = PTE faulted on, r1 = ptr to PTE, r2 = Faulting V-address
.macro LOAD_FAULT_PTE
lr r2 , [ e f a ]
2020-01-13 09:16:06 -08:00
# ifdef C O N F I G _ I S A _ A R C V 2
2013-01-18 15:12:19 +05:30
lr r1 , [ A R C _ R E G _ S C R A T C H _ D A T A 0 ] ; current pgd
2013-01-18 15:12:23 +05:30
# else
GET_ C U R R _ T A S K _ O N _ C P U r1
ld r1 , [ r1 , T A S K _ A C T _ M M ]
ld r1 , [ r1 , M M _ P G D ]
# endif
2013-01-18 15:12:19 +05:30
lsr r0 , r2 , P G D I R _ S H I F T ; Bits for indexing into PGD
2014-07-08 18:43:47 +05:30
ld. a s r3 , [ r1 , r0 ] ; PGD entry corresp to faulting addr
tst r3 , r3
bz d o _ s l o w _ p a t h _ p f ; if no Page Table, do page fault
2020-10-01 15:46:42 -07:00
# if C O N F I G _ P G T A B L E _ L E V E L S > 3
lsr r0 , r2 , P U D _ S H I F T ; Bits for indexing into PUD
and r0 , r0 , ( P T R S _ P E R _ P U D - 1 )
ld. a s r1 , [ r3 , r0 ] ; PMD entry
tst r1 , r1
bz d o _ s l o w _ p a t h _ p f
mov r3 , r1
# endif
2020-09-30 18:58:50 -07:00
# if C O N F I G _ P G T A B L E _ L E V E L S > 2
lsr r0 , r2 , P M D _ S H I F T ; Bits for indexing into PMD
and r0 , r0 , ( P T R S _ P E R _ P M D - 1 )
ld. a s r1 , [ r3 , r0 ] ; PMD entry
tst r1 , r1
bz d o _ s l o w _ p a t h _ p f
mov r3 , r1
# endif
2014-07-08 18:43:47 +05:30
# ifdef C O N F I G _ T R A N S P A R E N T _ H U G E P A G E
and. f 0 , r3 , _ P A G E _ H W _ S Z ; Is this Huge PMD (thp)
add2 . n z r1 , r1 , r0
bnz. d 2 f ; YES: PGD == PMD has THP PTE: stop pgd walk
mov. n z r0 , r3
# endif
and r1 , r3 , P A G E _ M A S K
2013-01-18 15:12:19 +05:30
; Get the PTE entry: The idea is
; (1) x = addr >> PAGE_SHIFT -> masks page-off bits from @fault-addr
; (2) y = x & (PTRS_PER_PTE - 1) -> to get index
2015-10-05 15:41:36 +05:30
; (3) z = (pgtbl + y * 4)
2013-01-18 15:12:19 +05:30
2015-02-06 18:44:57 +03:00
# ifdef C O N F I G _ A R C _ H A S _ P A E 4 0
# define P T E _ S I Z E _ L O G 3 / * 8 = = 2 ^ 3 * /
# else
2015-10-05 15:41:36 +05:30
# define P T E _ S I Z E _ L O G 2 / * 4 = = 2 ^ 2 * /
2015-02-06 18:44:57 +03:00
# endif
2015-10-05 15:41:36 +05:30
; multiply in step (3) above avoided by shifting lesser in step (1)
lsr r0 , r2 , ( P A G E _ S H I F T - P T E _ S I Z E _ L O G )
and r0 , r0 , ( ( P T R S _ P E R _ P T E - 1 ) < < P T E _ S I Z E _ L O G )
2015-02-06 18:44:57 +03:00
ld. a w r0 , [ r1 , r0 ] ; r0: PTE (lower word only for PAE40)
2015-10-05 15:41:36 +05:30
; r1: PTE ptr
2014-07-08 18:43:47 +05:30
2 :
2013-01-18 15:12:19 +05:30
.endm
;-----------------------------------------------------------------
; Convert Linux PTE entry into TLB entry
; A one-word PTE entry is programmed as two-word TLB Entry [PD0:PD1] in mmu
2015-02-06 18:44:57 +03:00
; (for PAE40, two-words PTE, while three-word TLB Entry [PD0:PD1:PD1HI])
2013-01-18 15:12:19 +05:30
; IN: r0 = PTE, r1 = ptr to PTE
.macro CONV_PTE_TO_TLB
2015-10-05 15:41:36 +05:30
and r3 , r0 , P T E _ B I T S _ R W X ; r w x
2015-11-05 09:13:31 +05:30
asl r2 , r3 , 3 ; Kr Kw Kx 0 0 0 (GLOBAL, kernel only)
ARC: MMUv4 preps/1 - Fold PTE K/U access flags
The current ARC VM code has 13 flags in Page Table entry: some software
(accesed/dirty/non-linear-maps) and rest hardware specific. With 8k MMU
page, we need 19 bits for addressing page frame so remaining 13 bits is
just about enough to accomodate the current flags.
In MMUv4 there are 2 additional flags, SZ (normal or super page) and WT
(cache access mode write-thru) - and additionally PFN is 20 bits (vs. 19
before for 8k). Thus these can't be held in current PTE w/o making each
entry 64bit wide.
It seems there is some scope of compressing the current PTE flags (and
freeing up a few bits). Currently PTE contains fully orthogonal distinct
access permissions for kernel and user mode (Kr, Kw, Kx; Ur, Uw, Ux)
which can be folded into one set (R, W, X). The translation of 3 PTE
bits into 6 TLB bits (when programming the MMU) can be done based on
following pre-requites/assumptions:
1. For kernel-mode-only translations (vmalloc: 0x7000_0000 to
0x7FFF_FFFF), PTE additionally has PAGE_GLOBAL flag set (and user
space entries can never be global). Thus such a PTE can translate
to Kr, Kw, Kx (as appropriate) and zero for User mode counterparts.
2. For non global entries, the PTE flags can be used to create mirrored
K and U TLB bits. This is true after commit a950549c675f2c8c504
"ARC: copy_(to|from)_user() to honor usermode-access permissions"
which ensured that user-space translations _MUST_ have same access
permissions for both U/K mode accesses so that copy_{to,from}_user()
play fair with fault based CoW break and such...
There is no such thing as free lunch - the cost is slightly infalted
TLB-Miss Handlers.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2013-06-17 18:12:13 +05:30
and. f 0 , r0 , _ P A G E _ G L O B A L
2015-10-05 15:41:36 +05:30
or. z r2 , r2 , r3 ; Kr Kw Kx Ur Uw Ux (!GLOBAL, user page)
ARC: MMUv4 preps/1 - Fold PTE K/U access flags
The current ARC VM code has 13 flags in Page Table entry: some software
(accesed/dirty/non-linear-maps) and rest hardware specific. With 8k MMU
page, we need 19 bits for addressing page frame so remaining 13 bits is
just about enough to accomodate the current flags.
In MMUv4 there are 2 additional flags, SZ (normal or super page) and WT
(cache access mode write-thru) - and additionally PFN is 20 bits (vs. 19
before for 8k). Thus these can't be held in current PTE w/o making each
entry 64bit wide.
It seems there is some scope of compressing the current PTE flags (and
freeing up a few bits). Currently PTE contains fully orthogonal distinct
access permissions for kernel and user mode (Kr, Kw, Kx; Ur, Uw, Ux)
which can be folded into one set (R, W, X). The translation of 3 PTE
bits into 6 TLB bits (when programming the MMU) can be done based on
following pre-requites/assumptions:
1. For kernel-mode-only translations (vmalloc: 0x7000_0000 to
0x7FFF_FFFF), PTE additionally has PAGE_GLOBAL flag set (and user
space entries can never be global). Thus such a PTE can translate
to Kr, Kw, Kx (as appropriate) and zero for User mode counterparts.
2. For non global entries, the PTE flags can be used to create mirrored
K and U TLB bits. This is true after commit a950549c675f2c8c504
"ARC: copy_(to|from)_user() to honor usermode-access permissions"
which ensured that user-space translations _MUST_ have same access
permissions for both U/K mode accesses so that copy_{to,from}_user()
play fair with fault based CoW break and such...
There is no such thing as free lunch - the cost is slightly infalted
TLB-Miss Handlers.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2013-06-17 18:12:13 +05:30
and r3 , r0 , P T E _ B I T S _ N O N _ R W X _ I N _ P D 1 ; Extract PFN+cache bits from PTE
or r3 , r3 , r2
2015-10-05 15:41:36 +05:30
sr r3 , [ A R C _ R E G _ T L B P D 1 ] ; paddr[31..13] | Kr Kw Kx Ur Uw Ux | C
2015-02-06 18:44:57 +03:00
# ifdef C O N F I G _ A R C _ H A S _ P A E 4 0
ld r3 , [ r1 , 4 ] ; paddr[39..32]
sr r3 , [ A R C _ R E G _ T L B P D 1 H I ]
# endif
2013-01-18 15:12:19 +05:30
and r2 , r0 , P T E _ B I T S _ I N _ P D 0 ; Extract other PTE flags: (V)alid, (G)lb
lr r3 ,[ A R C _ R E G _ T L B P D 0 ] ; MMU prepares PD0 with vaddr and asid
or r3 , r3 , r2 ; S | vaddr | {sasid|asid}
sr r3 ,[ A R C _ R E G _ T L B P D 0 ] ; rewrite PD0
.endm
;-----------------------------------------------------------------
; Commit the TLB entry into MMU
.macro COMMIT_ENTRY_TO_MMU
2019-09-09 17:36:34 -07:00
# ifdef C O N F I G _ A R C _ M M U _ V 3
2013-01-18 15:12:19 +05:30
/* Get free TLB slot: Set = computed from vaddr, way = random */
sr T L B G e t I n d e x , [ A R C _ R E G _ T L B C O M M A N D ]
/* Commit the Write */
sr T L B W r i t e N I , [ A R C _ R E G _ T L B C O M M A N D ]
2015-04-06 17:22:39 +05:30
# else
sr T L B I n s e r t E n t r y , [ A R C _ R E G _ T L B C O M M A N D ]
# endif
2017-05-28 09:52:03 +03:00
88 :
2013-01-18 15:12:19 +05:30
.endm
2013-01-18 15:12:25 +05:30
ARCFP_ C O D E ;Fast Path Code, candidate for ICCM
2013-01-18 15:12:19 +05:30
;-----------------------------------------------------------------------------
; I-TLB Miss Exception Handler
;-----------------------------------------------------------------------------
2014-02-07 13:47:43 +05:30
ENTRY( E V _ T L B M i s s I )
2013-01-18 15:12:19 +05:30
TLBMISS_ F R E E U P _ R E G S
;----------------------------------------------------------------
2013-06-17 14:33:15 +05:30
; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA
2013-01-18 15:12:19 +05:30
LOAD_ F A U L T _ P T E
;----------------------------------------------------------------
; VERIFY_PTE: Check if PTE permissions approp for executing code
cmp_ s r2 , V M A L L O C _ S T A R T
ARC: MMUv4 preps/1 - Fold PTE K/U access flags
The current ARC VM code has 13 flags in Page Table entry: some software
(accesed/dirty/non-linear-maps) and rest hardware specific. With 8k MMU
page, we need 19 bits for addressing page frame so remaining 13 bits is
just about enough to accomodate the current flags.
In MMUv4 there are 2 additional flags, SZ (normal or super page) and WT
(cache access mode write-thru) - and additionally PFN is 20 bits (vs. 19
before for 8k). Thus these can't be held in current PTE w/o making each
entry 64bit wide.
It seems there is some scope of compressing the current PTE flags (and
freeing up a few bits). Currently PTE contains fully orthogonal distinct
access permissions for kernel and user mode (Kr, Kw, Kx; Ur, Uw, Ux)
which can be folded into one set (R, W, X). The translation of 3 PTE
bits into 6 TLB bits (when programming the MMU) can be done based on
following pre-requites/assumptions:
1. For kernel-mode-only translations (vmalloc: 0x7000_0000 to
0x7FFF_FFFF), PTE additionally has PAGE_GLOBAL flag set (and user
space entries can never be global). Thus such a PTE can translate
to Kr, Kw, Kx (as appropriate) and zero for User mode counterparts.
2. For non global entries, the PTE flags can be used to create mirrored
K and U TLB bits. This is true after commit a950549c675f2c8c504
"ARC: copy_(to|from)_user() to honor usermode-access permissions"
which ensured that user-space translations _MUST_ have same access
permissions for both U/K mode accesses so that copy_{to,from}_user()
play fair with fault based CoW break and such...
There is no such thing as free lunch - the cost is slightly infalted
TLB-Miss Handlers.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2013-06-17 18:12:13 +05:30
mov_ s r2 , ( _ P A G E _ P R E S E N T | _ P A G E _ E X E C U T E )
or. h s r2 , r2 , _ P A G E _ G L O B A L
2013-01-18 15:12:19 +05:30
and r3 , r0 , r2 ; Mask out NON Flag bits from PTE
xor. f r3 , r3 , r2 ; check ( ( pte & flags_test ) == flags_test )
bnz d o _ s l o w _ p a t h _ p f
; Let Linux VM know that the page was accessed
2013-06-17 11:35:15 +05:30
or r0 , r0 , _ P A G E _ A C C E S S E D ; set Accessed Bit
st_ s r0 , [ r1 ] ; Write back PTE
2013-01-18 15:12:19 +05:30
CONV_ P T E _ T O _ T L B
COMMIT_ E N T R Y _ T O _ M M U
TLBMISS_ R E S T O R E _ R E G S
2014-12-03 15:52:41 +01:00
EV_TLBMissI_fast_ret : ; additional label for VDK OS-kit instrumentation
2013-01-18 15:12:19 +05:30
rtie
2014-02-07 13:47:43 +05:30
END( E V _ T L B M i s s I )
2013-01-18 15:12:19 +05:30
;-----------------------------------------------------------------------------
; D-TLB Miss Exception Handler
;-----------------------------------------------------------------------------
2014-02-07 13:47:43 +05:30
ENTRY( E V _ T L B M i s s D )
2013-01-18 15:12:19 +05:30
TLBMISS_ F R E E U P _ R E G S
;----------------------------------------------------------------
; Get the PTE corresponding to V-addr accessed
2013-06-17 14:33:15 +05:30
; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA
2013-01-18 15:12:19 +05:30
LOAD_ F A U L T _ P T E
;----------------------------------------------------------------
; VERIFY_PTE: Chk if PTE permissions approp for data access (R/W/R+W)
ARC: MMUv4 preps/1 - Fold PTE K/U access flags
The current ARC VM code has 13 flags in Page Table entry: some software
(accesed/dirty/non-linear-maps) and rest hardware specific. With 8k MMU
page, we need 19 bits for addressing page frame so remaining 13 bits is
just about enough to accomodate the current flags.
In MMUv4 there are 2 additional flags, SZ (normal or super page) and WT
(cache access mode write-thru) - and additionally PFN is 20 bits (vs. 19
before for 8k). Thus these can't be held in current PTE w/o making each
entry 64bit wide.
It seems there is some scope of compressing the current PTE flags (and
freeing up a few bits). Currently PTE contains fully orthogonal distinct
access permissions for kernel and user mode (Kr, Kw, Kx; Ur, Uw, Ux)
which can be folded into one set (R, W, X). The translation of 3 PTE
bits into 6 TLB bits (when programming the MMU) can be done based on
following pre-requites/assumptions:
1. For kernel-mode-only translations (vmalloc: 0x7000_0000 to
0x7FFF_FFFF), PTE additionally has PAGE_GLOBAL flag set (and user
space entries can never be global). Thus such a PTE can translate
to Kr, Kw, Kx (as appropriate) and zero for User mode counterparts.
2. For non global entries, the PTE flags can be used to create mirrored
K and U TLB bits. This is true after commit a950549c675f2c8c504
"ARC: copy_(to|from)_user() to honor usermode-access permissions"
which ensured that user-space translations _MUST_ have same access
permissions for both U/K mode accesses so that copy_{to,from}_user()
play fair with fault based CoW break and such...
There is no such thing as free lunch - the cost is slightly infalted
TLB-Miss Handlers.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2013-06-17 18:12:13 +05:30
cmp_ s r2 , V M A L L O C _ S T A R T
mov_ s r2 , _ P A G E _ P R E S E N T ; common bit for K/U PTE
or. h s r2 , r2 , _ P A G E _ G L O B A L ; kernel PTE only
; Linux PTE [RWX] bits are semantically overloaded:
; -If PAGE_GLOBAL set, they refer to kernel-only flags (vmalloc)
; -Otherwise they are user-mode permissions, and those are exactly
; same for kernel mode as well (e.g. copy_(to|from)_user)
2013-01-18 15:12:19 +05:30
lr r3 , [ e c r ]
btst_ s r3 , E C R _ C _ B I T _ D T L B _ L D _ M I S S ; Read Access
ARC: MMUv4 preps/1 - Fold PTE K/U access flags
The current ARC VM code has 13 flags in Page Table entry: some software
(accesed/dirty/non-linear-maps) and rest hardware specific. With 8k MMU
page, we need 19 bits for addressing page frame so remaining 13 bits is
just about enough to accomodate the current flags.
In MMUv4 there are 2 additional flags, SZ (normal or super page) and WT
(cache access mode write-thru) - and additionally PFN is 20 bits (vs. 19
before for 8k). Thus these can't be held in current PTE w/o making each
entry 64bit wide.
It seems there is some scope of compressing the current PTE flags (and
freeing up a few bits). Currently PTE contains fully orthogonal distinct
access permissions for kernel and user mode (Kr, Kw, Kx; Ur, Uw, Ux)
which can be folded into one set (R, W, X). The translation of 3 PTE
bits into 6 TLB bits (when programming the MMU) can be done based on
following pre-requites/assumptions:
1. For kernel-mode-only translations (vmalloc: 0x7000_0000 to
0x7FFF_FFFF), PTE additionally has PAGE_GLOBAL flag set (and user
space entries can never be global). Thus such a PTE can translate
to Kr, Kw, Kx (as appropriate) and zero for User mode counterparts.
2. For non global entries, the PTE flags can be used to create mirrored
K and U TLB bits. This is true after commit a950549c675f2c8c504
"ARC: copy_(to|from)_user() to honor usermode-access permissions"
which ensured that user-space translations _MUST_ have same access
permissions for both U/K mode accesses so that copy_{to,from}_user()
play fair with fault based CoW break and such...
There is no such thing as free lunch - the cost is slightly infalted
TLB-Miss Handlers.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2013-06-17 18:12:13 +05:30
or. n z r2 , r2 , _ P A G E _ R E A D ; chk for Read flag in PTE
2013-01-18 15:12:19 +05:30
btst_ s r3 , E C R _ C _ B I T _ D T L B _ S T _ M I S S ; Write Access
ARC: MMUv4 preps/1 - Fold PTE K/U access flags
The current ARC VM code has 13 flags in Page Table entry: some software
(accesed/dirty/non-linear-maps) and rest hardware specific. With 8k MMU
page, we need 19 bits for addressing page frame so remaining 13 bits is
just about enough to accomodate the current flags.
In MMUv4 there are 2 additional flags, SZ (normal or super page) and WT
(cache access mode write-thru) - and additionally PFN is 20 bits (vs. 19
before for 8k). Thus these can't be held in current PTE w/o making each
entry 64bit wide.
It seems there is some scope of compressing the current PTE flags (and
freeing up a few bits). Currently PTE contains fully orthogonal distinct
access permissions for kernel and user mode (Kr, Kw, Kx; Ur, Uw, Ux)
which can be folded into one set (R, W, X). The translation of 3 PTE
bits into 6 TLB bits (when programming the MMU) can be done based on
following pre-requites/assumptions:
1. For kernel-mode-only translations (vmalloc: 0x7000_0000 to
0x7FFF_FFFF), PTE additionally has PAGE_GLOBAL flag set (and user
space entries can never be global). Thus such a PTE can translate
to Kr, Kw, Kx (as appropriate) and zero for User mode counterparts.
2. For non global entries, the PTE flags can be used to create mirrored
K and U TLB bits. This is true after commit a950549c675f2c8c504
"ARC: copy_(to|from)_user() to honor usermode-access permissions"
which ensured that user-space translations _MUST_ have same access
permissions for both U/K mode accesses so that copy_{to,from}_user()
play fair with fault based CoW break and such...
There is no such thing as free lunch - the cost is slightly infalted
TLB-Miss Handlers.
Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
2013-06-17 18:12:13 +05:30
or. n z r2 , r2 , _ P A G E _ W R I T E ; chk for Write flag in PTE
; Above laddering takes care of XCHG access (both R and W)
2013-01-18 15:12:19 +05:30
; By now, r2 setup with all the Flags we need to check in PTE
and r3 , r0 , r2 ; Mask out NON Flag bits from PTE
brne. d r3 , r2 , d o _ s l o w _ p a t h _ p f ; is ((pte & flags_test) == flags_test)
;----------------------------------------------------------------
; UPDATE_PTE: Let Linux VM know that page was accessed/dirty
2013-06-17 11:35:15 +05:30
or r0 , r0 , _ P A G E _ A C C E S S E D ; Accessed bit always
2013-12-05 12:05:05 +05:30
or. n z r0 , r0 , _ P A G E _ D I R T Y ; if Write, set Dirty bit as well
2013-01-18 15:12:19 +05:30
st_ s r0 , [ r1 ] ; Write back PTE
CONV_ P T E _ T O _ T L B
COMMIT_ E N T R Y _ T O _ M M U
TLBMISS_ R E S T O R E _ R E G S
2014-12-03 15:52:41 +01:00
EV_TLBMissD_fast_ret : ; additional label for VDK OS-kit instrumentation
2013-01-18 15:12:19 +05:30
rtie
;-------- Common routine to call Linux Page Fault Handler -----------
do_slow_path_pf :
2019-04-09 19:16:37 -07:00
# ifdef C O N F I G _ I S A _ A R C V 2
; Set Z flag if exception in U mode. Hardware micro-ops do this on any
; taken interrupt/exception, and thus is already the case at the entry
; above, but ensuing code would have already clobbered.
; EXCEPTION_PROLOGUE called in slow path, relies on correct Z flag set
lr r2 , [ e r s t a t u s ]
and r2 , r2 , S T A T U S _ U _ M A S K
bxor. f 0 , r2 , S T A T U S _ U _ B I T
# endif
2013-01-18 15:12:19 +05:30
; Restore the 4-scratch regs saved by fast path miss handler
TLBMISS_ R E S T O R E _ R E G S
; Slow path TLB Miss handled as a regular ARC Exception
; (stack switching / save the complete reg-file).
2014-10-13 14:20:39 +05:30
b c a l l _ d o _ p a g e _ f a u l t
2014-02-07 13:47:43 +05:30
END( E V _ T L B M i s s D )