powerpc/e6500: Optimize hugepage TLB misses
Some workloads take a lot of TLB misses despite using traditional hugepages. Handle these TLB misses in the asm fastpath rather than going through a bunch of C code. With this patch I measured around a 5x speedup in handling hugepage TLB misses. Signed-off-by: Scott Wood <scottwood@freescale.com>
This commit is contained in:
parent
fb326e9841
commit
c89ca8ab74
@ -398,18 +398,18 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
|
|||||||
rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
|
rldicl r15,r16,64-PUD_SHIFT+3,64-PUD_INDEX_SIZE-3
|
||||||
clrrdi r15,r15,3
|
clrrdi r15,r15,3
|
||||||
cmpdi cr0,r14,0
|
cmpdi cr0,r14,0
|
||||||
bge tlb_miss_fault_e6500 /* Bad pgd entry or hugepage; bail */
|
bge tlb_miss_huge_e6500 /* Bad pgd entry or hugepage; bail */
|
||||||
ldx r14,r14,r15 /* grab pud entry */
|
ldx r14,r14,r15 /* grab pud entry */
|
||||||
|
|
||||||
rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
|
rldicl r15,r16,64-PMD_SHIFT+3,64-PMD_INDEX_SIZE-3
|
||||||
clrrdi r15,r15,3
|
clrrdi r15,r15,3
|
||||||
cmpdi cr0,r14,0
|
cmpdi cr0,r14,0
|
||||||
bge tlb_miss_fault_e6500
|
bge tlb_miss_huge_e6500
|
||||||
ldx r14,r14,r15 /* Grab pmd entry */
|
ldx r14,r14,r15 /* Grab pmd entry */
|
||||||
|
|
||||||
mfspr r10,SPRN_MAS0
|
mfspr r10,SPRN_MAS0
|
||||||
cmpdi cr0,r14,0
|
cmpdi cr0,r14,0
|
||||||
bge tlb_miss_fault_e6500
|
bge tlb_miss_huge_e6500
|
||||||
|
|
||||||
/* Now we build the MAS for a 2M indirect page:
|
/* Now we build the MAS for a 2M indirect page:
|
||||||
*
|
*
|
||||||
@ -428,6 +428,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT)
|
|||||||
clrrdi r15,r16,21 /* make EA 2M-aligned */
|
clrrdi r15,r16,21 /* make EA 2M-aligned */
|
||||||
mtspr SPRN_MAS2,r15
|
mtspr SPRN_MAS2,r15
|
||||||
|
|
||||||
|
tlb_miss_huge_done_e6500:
|
||||||
lbz r15,TCD_ESEL_NEXT(r11)
|
lbz r15,TCD_ESEL_NEXT(r11)
|
||||||
lbz r16,TCD_ESEL_MAX(r11)
|
lbz r16,TCD_ESEL_MAX(r11)
|
||||||
lbz r14,TCD_ESEL_FIRST(r11)
|
lbz r14,TCD_ESEL_FIRST(r11)
|
||||||
@ -456,6 +457,50 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT)
|
|||||||
tlb_epilog_bolted
|
tlb_epilog_bolted
|
||||||
rfi
|
rfi
|
||||||
|
|
||||||
|
tlb_miss_huge_e6500:
|
||||||
|
beq tlb_miss_fault_e6500
|
||||||
|
li r10,1
|
||||||
|
andi. r15,r14,HUGEPD_SHIFT_MASK@l /* r15 = psize */
|
||||||
|
rldimi r14,r10,63,0 /* Set PD_HUGE */
|
||||||
|
xor r14,r14,r15 /* Clear size bits */
|
||||||
|
ldx r14,0,r14
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Now we build the MAS for a huge page.
|
||||||
|
*
|
||||||
|
* MAS 0 : ESEL needs to be filled by software round-robin
|
||||||
|
* - can be handled by indirect code
|
||||||
|
* MAS 1 : Need to clear IND and set TSIZE
|
||||||
|
* MAS 2,3+7: Needs to be redone similar to non-tablewalk handler
|
||||||
|
*/
|
||||||
|
|
||||||
|
subi r15,r15,10 /* Convert psize to tsize */
|
||||||
|
mfspr r10,SPRN_MAS1
|
||||||
|
rlwinm r10,r10,0,~MAS1_IND
|
||||||
|
rlwimi r10,r15,MAS1_TSIZE_SHIFT,MAS1_TSIZE_MASK
|
||||||
|
mtspr SPRN_MAS1,r10
|
||||||
|
|
||||||
|
li r10,-0x400
|
||||||
|
sld r15,r10,r15 /* Generate mask based on size */
|
||||||
|
and r10,r16,r15
|
||||||
|
rldicr r15,r14,64-(PTE_RPN_SHIFT-PAGE_SHIFT),63-PAGE_SHIFT
|
||||||
|
rlwimi r10,r14,32-19,27,31 /* Insert WIMGE */
|
||||||
|
clrldi r15,r15,PAGE_SHIFT /* Clear crap at the top */
|
||||||
|
rlwimi r15,r14,32-8,22,25 /* Move in U bits */
|
||||||
|
mtspr SPRN_MAS2,r10
|
||||||
|
andi. r10,r14,_PAGE_DIRTY
|
||||||
|
rlwimi r15,r14,32-2,26,31 /* Move in BAP bits */
|
||||||
|
|
||||||
|
/* Mask out SW and UW if !DIRTY (XXX optimize this !) */
|
||||||
|
bne 1f
|
||||||
|
li r10,MAS3_SW|MAS3_UW
|
||||||
|
andc r15,r15,r10
|
||||||
|
1:
|
||||||
|
mtspr SPRN_MAS7_MAS3,r15
|
||||||
|
|
||||||
|
mfspr r10,SPRN_MAS0
|
||||||
|
b tlb_miss_huge_done_e6500
|
||||||
|
|
||||||
tlb_miss_kernel_e6500:
|
tlb_miss_kernel_e6500:
|
||||||
ld r14,PACA_KERNELPGD(r13)
|
ld r14,PACA_KERNELPGD(r13)
|
||||||
cmpldi cr1,r15,8 /* Check for vmalloc region */
|
cmpldi cr1,r15,8 /* Check for vmalloc region */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user