2010-05-14 12:05:26 -07:00
/*
* OMAP4 specific common source file .
*
* Copyright ( C ) 2010 Texas Instruments , Inc .
* Author :
* Santosh Shilimkar < santosh . shilimkar @ ti . com >
*
*
* This program is free software , you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*/
# include <linux/kernel.h>
# include <linux/init.h>
# include <linux/io.h>
2012-10-18 12:20:08 +03:00
# include <linux/irq.h>
2012-11-05 16:18:28 -06:00
# include <linux/irqchip.h>
2010-05-14 12:05:26 -07:00
# include <linux/platform_device.h>
2011-06-25 18:04:31 -07:00
# include <linux/memblock.h>
2012-08-27 17:43:01 -07:00
# include <linux/of_irq.h>
# include <linux/of_platform.h>
# include <linux/export.h>
2012-12-27 13:10:24 -06:00
# include <linux/irqchip/arm-gic.h>
2013-02-25 14:12:58 +05:30
# include <linux/of_address.h>
2013-07-08 16:01:40 -07:00
# include <linux/reboot.h>
2010-05-14 12:05:26 -07:00
# include <asm/hardware/cache-l2x0.h>
2011-06-25 18:04:31 -07:00
# include <asm/mach/map.h>
2012-01-13 15:00:51 +00:00
# include <asm/memblock.h>
2012-10-18 12:20:08 +03:00
# include <asm/smp_twd.h>
2010-05-14 12:05:26 -07:00
2012-09-20 11:41:16 -07:00
# include "omap-wakeupgen.h"
2012-08-31 10:59:07 -07:00
# include "soc.h"
2012-10-29 20:50:21 -06:00
# include "iomap.h"
2011-11-10 22:45:17 +01:00
# include "common.h"
2012-10-15 12:09:43 -07:00
# include "mmc.h"
2012-10-29 20:56:07 -06:00
# include "prminst44xx.h"
2012-10-29 20:57:39 -06:00
# include "prcm_mpu44xx.h"
2011-01-01 19:56:04 +05:30
# include "omap4-sar-layout.h"
2012-10-02 00:17:06 +05:30
# include "omap-secure.h"
2012-10-29 09:35:35 -07:00
# include "sram.h"
2010-05-14 12:05:26 -07:00
# ifdef CONFIG_CACHE_L2X0
2011-03-03 18:03:25 +05:30
static void __iomem * l2cache_base ;
2010-05-14 12:05:26 -07:00
# endif
2011-01-01 19:56:04 +05:30
static void __iomem * sar_ram_base ;
ARM: OMAP4460: Workaround for ROM bug because of CA9 r2pX GIC control register change.
On OMAP4+ devices, GIC register context is lost when MPUSS hits
the OSWR(Open Switch Retention). On the CPU wakeup path, ROM code
gets executed and one of the steps in it is to restore the
saved context of the GIC. The ROM Code GIC distributor restoration
is split in two parts: CPU specific register done by each CPU and
common register done by only one CPU.
Below is the abstract flow.
...............................................................
- MPUSS in OSWR state.
- CPU0 wakes up on the event(interrupt) and start executing ROM code.
[..]
- CPU0 executes "GIC Restoration:"
[...]
- CPU0 swicthes to non-secure mode and jumps to OS resume code.
[...]
- CPU0 is online in OS
- CPU0 enables the GIC distributor. GICD.Enable Non-secure = 1
- CPU0 wakes up CPU1 with clock-domain force wakeup method.
- CPU0 continues it's execution.
[..]
- CPU1 wakes up and start executing ROM code.
[..]
- CPU1 executes "GIC Restoration:"
[..]
- CPU1 swicthes to non-secure mode and jumps to OS resume code.
[...]
- CPU1 is online in OS and start executing.
[...] -
GIC Restoration: /* Common routine for HS and GP devices */
{
if (GICD != 1) { /* This will be true in OSWR state */
if (GIC_SAR_BACKUP_STATE == SAVED)
- CPU restores GIC distributor
else
- reconfigure GIC distributor to boot values.
GICD.Enable secure = 1
}
if (GIC_SAR_BACKUP_STATE == SAVED)
- CPU restore its GIC CPU interface registers if saved.
else
- reconfigure its GIC CPU interface registers to boot
values.
}
...............................................................
So as mentioned in the flow, GICD != 1 condition decides how
the GIC registers are handled in ROM code wakeup path from
OSWR. As evident from the flow, ROM code relies on the entire
GICD register value and not specific register bits.
The assumption was valid till CortexA9 r1pX version since there
was only one banked bit to control secure and non-secure GICD.
Secure view which ROM code sees:
bit 0 == Enable Non-secure
Non-secure view which HLOS sees:
bit 0 == Enable secure
But GICD register has changed between CortexA9 r1pX and r2pX.
On r2pX GICD register is composed of 2 bits.
Secure view which ROM code sees:
bit 1 == Enable Non-secure
bit 0 == Enable secure
Non-secure view which HLOS sees:
bit 0 == Enable Non-secure
Hence on OMAP4460(r2pX) devices, if you go through the
above flow again during CPU1 wakeup, GICD == 3 and hence
ROM code fails to understand the real wakeup power state
and reconfigures GIC distributor to boot values. This is
nasty since you loose the entire interrupt controller
context in a live system.
The ROM code fix done on next OMAP4 device (OMAP4470 - r2px) is to
check "GICD.Enable secure != 1" for GIC restoration in OSWR wakeup path.
Since ROM code can't be fixed on OMAP4460 devices, a work around
needs to be implemented. As evident from the flow, as long as
CPU1 sees GICD == 1 in it's wakeup path from OSWR, the issue
won't happen. Below is the flow with the work-around.
...............................................................
- MPUSS in OSWR state.
- CPU0 wakes up on the event(interrupt) and start executing ROM code.
[..]
- CPU0 executes "GIC Restoration:"
[..]
- CPU0 swicthes to non-secure mode and jumps to OS resume code.
[..]
- CPU0 is online in OS.
- CPU0 does GICD.Enable Non-secure = 0
- CPU0 wakes up CPU1 with clock domain force wakeup method.
- CPU0 waits for GICD.Enable Non-secure = 1
- CPU0 coninues it's execution.
[..]
- CPU1 wakes up and start executing ROM code.
[..]
- CPU1 executes "GIC Restoration:"
[..]
- CPU1 swicthes to non-secure mode and jumps to OS resume code.
[..]
- CPU1 is online in OS
- CPU1 does GICD.Enable Non-secure = 1
- CPU1 start executing
[...]
...............................................................
With this procedure, the GIC configuration done between the
CPU0 wakeup and CPU1 wakeup will not be lost but during this
short windows, the CPU0 will not receive interrupts.
The BUG is applicable to only OMAP4460(r2pX) devices.
OMAP4470 (also r2pX) is not affected by this bug because
ROM code has been fixed.
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Kevin Hilman <khilman@ti.com>
2012-10-18 12:20:05 +03:00
static void __iomem * gic_dist_base_addr ;
2012-10-18 12:20:08 +03:00
static void __iomem * twd_base ;
# define IRQ_LOCALTIMER 29
2011-01-01 19:56:04 +05:30
2011-06-25 18:04:31 -07:00
# ifdef CONFIG_OMAP4_ERRATA_I688
/* Used to implement memory barrier on DRAM path */
# define OMAP4_DRAM_BARRIER_VA 0xfe600000
void __iomem * dram_sync , * sram_sync ;
2012-02-02 19:33:55 +05:30
static phys_addr_t paddr ;
static u32 size ;
2011-06-25 18:04:31 -07:00
void omap_bus_sync ( void )
{
if ( dram_sync & & sram_sync ) {
writel_relaxed ( readl_relaxed ( dram_sync ) , dram_sync ) ;
writel_relaxed ( readl_relaxed ( sram_sync ) , sram_sync ) ;
isb ( ) ;
}
}
2012-03-02 16:31:18 +05:30
EXPORT_SYMBOL ( omap_bus_sync ) ;
2011-06-25 18:04:31 -07:00
2012-02-02 19:33:55 +05:30
/* Steal one page physical memory for barrier implementation */
int __init omap_barrier_reserve_memblock ( void )
2011-06-25 18:04:31 -07:00
{
size = ALIGN ( PAGE_SIZE , SZ_1M ) ;
2012-01-13 15:00:51 +00:00
paddr = arm_memblock_steal ( size , SZ_1M ) ;
2012-02-02 19:33:55 +05:30
return 0 ;
}
void __init omap_barriers_init ( void )
{
struct map_desc dram_io_desc [ 1 ] ;
2011-06-25 18:04:31 -07:00
dram_io_desc [ 0 ] . virtual = OMAP4_DRAM_BARRIER_VA ;
dram_io_desc [ 0 ] . pfn = __phys_to_pfn ( paddr ) ;
dram_io_desc [ 0 ] . length = size ;
2013-10-24 10:26:40 +01:00
dram_io_desc [ 0 ] . type = MT_MEMORY_RW_SO ;
2011-06-25 18:04:31 -07:00
iotable_init ( dram_io_desc , ARRAY_SIZE ( dram_io_desc ) ) ;
dram_sync = ( void __iomem * ) dram_io_desc [ 0 ] . virtual ;
sram_sync = ( void __iomem * ) OMAP4_SRAM_VA ;
pr_info ( " OMAP4: Map 0x%08llx to 0x%08lx for dram barrier \n " ,
( long long ) paddr , dram_io_desc [ 0 ] . virtual ) ;
}
2012-02-02 19:33:55 +05:30
# else
void __init omap_barriers_init ( void )
{ }
2011-06-25 18:04:31 -07:00
# endif
2010-05-14 12:05:26 -07:00
void __init gic_init_irq ( void )
{
2011-11-15 17:22:45 +00:00
void __iomem * omap_irq_base ;
2010-05-14 12:05:26 -07:00
/* Static mapping, never released */
gic_dist_base_addr = ioremap ( OMAP44XX_GIC_DIST_BASE , SZ_4K ) ;
BUG_ON ( ! gic_dist_base_addr ) ;
2012-10-18 12:20:08 +03:00
twd_base = ioremap ( OMAP44XX_LOCAL_TWD_BASE , SZ_4K ) ;
BUG_ON ( ! twd_base ) ;
2010-05-14 12:05:26 -07:00
/* Static mapping, never released */
2011-05-17 03:51:26 -07:00
omap_irq_base = ioremap ( OMAP44XX_GIC_CPU_BASE , SZ_512 ) ;
BUG_ON ( ! omap_irq_base ) ;
2010-12-04 15:55:14 +00:00
2010-06-16 22:19:47 +05:30
omap_wakeupgen_init ( ) ;
2011-05-17 03:51:26 -07:00
gic_init ( 0 , 29 , gic_dist_base_addr , omap_irq_base ) ;
2010-05-14 12:05:26 -07:00
}
ARM: OMAP4460: Workaround for ROM bug because of CA9 r2pX GIC control register change.
On OMAP4+ devices, GIC register context is lost when MPUSS hits
the OSWR(Open Switch Retention). On the CPU wakeup path, ROM code
gets executed and one of the steps in it is to restore the
saved context of the GIC. The ROM Code GIC distributor restoration
is split in two parts: CPU specific register done by each CPU and
common register done by only one CPU.
Below is the abstract flow.
...............................................................
- MPUSS in OSWR state.
- CPU0 wakes up on the event(interrupt) and start executing ROM code.
[..]
- CPU0 executes "GIC Restoration:"
[...]
- CPU0 swicthes to non-secure mode and jumps to OS resume code.
[...]
- CPU0 is online in OS
- CPU0 enables the GIC distributor. GICD.Enable Non-secure = 1
- CPU0 wakes up CPU1 with clock-domain force wakeup method.
- CPU0 continues it's execution.
[..]
- CPU1 wakes up and start executing ROM code.
[..]
- CPU1 executes "GIC Restoration:"
[..]
- CPU1 swicthes to non-secure mode and jumps to OS resume code.
[...]
- CPU1 is online in OS and start executing.
[...] -
GIC Restoration: /* Common routine for HS and GP devices */
{
if (GICD != 1) { /* This will be true in OSWR state */
if (GIC_SAR_BACKUP_STATE == SAVED)
- CPU restores GIC distributor
else
- reconfigure GIC distributor to boot values.
GICD.Enable secure = 1
}
if (GIC_SAR_BACKUP_STATE == SAVED)
- CPU restore its GIC CPU interface registers if saved.
else
- reconfigure its GIC CPU interface registers to boot
values.
}
...............................................................
So as mentioned in the flow, GICD != 1 condition decides how
the GIC registers are handled in ROM code wakeup path from
OSWR. As evident from the flow, ROM code relies on the entire
GICD register value and not specific register bits.
The assumption was valid till CortexA9 r1pX version since there
was only one banked bit to control secure and non-secure GICD.
Secure view which ROM code sees:
bit 0 == Enable Non-secure
Non-secure view which HLOS sees:
bit 0 == Enable secure
But GICD register has changed between CortexA9 r1pX and r2pX.
On r2pX GICD register is composed of 2 bits.
Secure view which ROM code sees:
bit 1 == Enable Non-secure
bit 0 == Enable secure
Non-secure view which HLOS sees:
bit 0 == Enable Non-secure
Hence on OMAP4460(r2pX) devices, if you go through the
above flow again during CPU1 wakeup, GICD == 3 and hence
ROM code fails to understand the real wakeup power state
and reconfigures GIC distributor to boot values. This is
nasty since you loose the entire interrupt controller
context in a live system.
The ROM code fix done on next OMAP4 device (OMAP4470 - r2px) is to
check "GICD.Enable secure != 1" for GIC restoration in OSWR wakeup path.
Since ROM code can't be fixed on OMAP4460 devices, a work around
needs to be implemented. As evident from the flow, as long as
CPU1 sees GICD == 1 in it's wakeup path from OSWR, the issue
won't happen. Below is the flow with the work-around.
...............................................................
- MPUSS in OSWR state.
- CPU0 wakes up on the event(interrupt) and start executing ROM code.
[..]
- CPU0 executes "GIC Restoration:"
[..]
- CPU0 swicthes to non-secure mode and jumps to OS resume code.
[..]
- CPU0 is online in OS.
- CPU0 does GICD.Enable Non-secure = 0
- CPU0 wakes up CPU1 with clock domain force wakeup method.
- CPU0 waits for GICD.Enable Non-secure = 1
- CPU0 coninues it's execution.
[..]
- CPU1 wakes up and start executing ROM code.
[..]
- CPU1 executes "GIC Restoration:"
[..]
- CPU1 swicthes to non-secure mode and jumps to OS resume code.
[..]
- CPU1 is online in OS
- CPU1 does GICD.Enable Non-secure = 1
- CPU1 start executing
[...]
...............................................................
With this procedure, the GIC configuration done between the
CPU0 wakeup and CPU1 wakeup will not be lost but during this
short windows, the CPU0 will not receive interrupts.
The BUG is applicable to only OMAP4460(r2pX) devices.
OMAP4470 (also r2pX) is not affected by this bug because
ROM code has been fixed.
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Kevin Hilman <khilman@ti.com>
2012-10-18 12:20:05 +03:00
void gic_dist_disable ( void )
{
if ( gic_dist_base_addr )
__raw_writel ( 0x0 , gic_dist_base_addr + GIC_DIST_CTRL ) ;
}
2012-10-18 12:20:08 +03:00
bool gic_dist_disabled ( void )
{
return ! ( __raw_readl ( gic_dist_base_addr + GIC_DIST_CTRL ) & 0x1 ) ;
}
void gic_timer_retrigger ( void )
{
u32 twd_int = __raw_readl ( twd_base + TWD_TIMER_INTSTAT ) ;
u32 gic_int = __raw_readl ( gic_dist_base_addr + GIC_DIST_PENDING_SET ) ;
u32 twd_ctrl = __raw_readl ( twd_base + TWD_TIMER_CONTROL ) ;
if ( twd_int & & ! ( gic_int & BIT ( IRQ_LOCALTIMER ) ) ) {
/*
* The local timer interrupt got lost while the distributor was
* disabled . Ack the pending interrupt , and retrigger it .
*/
pr_warn ( " %s: lost localtimer interrupt \n " , __func__ ) ;
__raw_writel ( 1 , twd_base + TWD_TIMER_INTSTAT ) ;
if ( ! ( twd_ctrl & TWD_TIMER_CONTROL_PERIODIC ) ) {
__raw_writel ( 1 , twd_base + TWD_TIMER_COUNTER ) ;
twd_ctrl | = TWD_TIMER_CONTROL_ENABLE ;
__raw_writel ( twd_ctrl , twd_base + TWD_TIMER_CONTROL ) ;
}
}
}
2010-05-14 12:05:26 -07:00
# ifdef CONFIG_CACHE_L2X0
2010-07-31 21:40:10 +05:30
2011-03-03 18:03:25 +05:30
void __iomem * omap4_get_l2cache_base ( void )
{
return l2cache_base ;
}
2010-07-31 21:40:10 +05:30
static void omap4_l2x0_disable ( void )
{
2014-01-10 01:27:08 +01:00
outer_flush_all ( ) ;
2010-07-31 21:40:10 +05:30
/* Disable PL310 L2 Cache controller */
omap_smc1 ( 0x102 , 0x0 ) ;
}
2011-02-22 10:00:44 +01:00
static void omap4_l2x0_set_debug ( unsigned long val )
{
/* Program PL310 L2 Cache controller debug register */
omap_smc1 ( 0x100 , val ) ;
}
2010-05-14 12:05:26 -07:00
static int __init omap_l2_cache_init ( void )
{
2010-11-19 23:01:03 +05:30
u32 aux_ctrl = 0 ;
2010-05-14 12:05:26 -07:00
/*
* To avoid code running on other OMAPs in
* multi - omap builds
*/
if ( ! cpu_is_omap44xx ( ) )
return - ENODEV ;
/* Static mapping, never released */
l2cache_base = ioremap ( OMAP44XX_L2CACHE_BASE , SZ_4K ) ;
2011-03-03 17:36:52 +05:30
if ( WARN_ON ( ! l2cache_base ) )
return - ENOMEM ;
2010-05-14 12:05:26 -07:00
/*
2010-09-16 18:44:47 +05:30
* 16 - way associativity , parity disabled
* Way size - 32 KB ( es1 .0 )
* Way size - 64 KB ( es2 .0 + )
2010-05-14 12:05:26 -07:00
*/
2010-11-19 23:01:03 +05:30
aux_ctrl = ( ( 1 < < L2X0_AUX_CTRL_ASSOCIATIVITY_SHIFT ) |
( 0x1 < < 25 ) |
( 0x1 < < L2X0_AUX_CTRL_NS_LOCKDOWN_SHIFT ) |
( 0x1 < < L2X0_AUX_CTRL_NS_INT_CTRL_SHIFT ) ) ;
omap4: l2x0: enable instruction and data prefetching
Enabling L2 prefetching improves performance as shown on Panda
ES2.1 board with mem test, and it has measurable impact on
performances. I think we should consider it, even though it damages
"writes" a bit. (rebased to k.org)
Usually the prefetch is used at both levels together L1 + L2, however,
to enable the CP15 prefetch engines, these are under security, and on
GP devices, we cannot enable it(e.g. on PandaBoard). However, just
enabling PL310 prefetch seems to provide performance improvement,
as shown in the data below (from Ubuntu) and would be a great thing
to pull in.
What prefetch does is enable automatic next line prefetching. With this
enabled, whenever the PL310 receives a cachable read request, it
automatically prefetches the following cache line as well.
Measurement Data:
==
STOCK 10.10 WITHOUT PATCH
========================
~# ./memspeed
size 8388608 8192k 8M
offset 8388608, 0
buffers 0x2aaad000 0x2b2ad000
copy libc 133 MB/s
copy Android v5 273 MB/s
copy Android NEON 235 MB/s
copy INT32 116 MB/s
copy ASM ARM 187 MB/s
copy ASM VLDM 64 204 MB/s
copy ASM VLDM 128 173 MB/s
copy ASM VLD1 216 MB/s
read ASM ARM 286 MB/s
read ASM VLDM 242 MB/s
read ASM VLD1 286 MB/s
write libc 1947 MB/s
write ASM ARM 1943 MB/s
write ASM VSTM 1942 MB/s
write ASM VST1 1935 MB/s
10.10 + PATCH
=============
~# ./memspeed
size 8388608 8192k 8M
offset 8388608, 0
buffers 0x2ab17000 0x2b317000
copy libc 129 MB/s
copy Android v5 256 MB/s
copy Android NEON 356 MB/s
copy INT32 127 MB/s
copy ASM ARM 321 MB/s
copy ASM VLDM 64 337 MB/s
copy ASM VLDM 128 321 MB/s
copy ASM VLD1 350 MB/s
read ASM ARM 496 MB/s
read ASM VLDM 470 MB/s
read ASM VLD1 488 MB/s
write libc 1701 MB/s
write ASM ARM 1682 MB/s
write ASM VSTM 1693 MB/s
write ASM VST1 1681 MB/s
Signed-off-by: Mans Rullgard <mans@mansr.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
2010-11-19 23:01:04 +05:30
if ( omap_rev ( ) = = OMAP4430_REV_ES1_0 ) {
2010-11-19 23:01:03 +05:30
aux_ctrl | = 0x2 < < L2X0_AUX_CTRL_WAY_SIZE_SHIFT ;
omap4: l2x0: enable instruction and data prefetching
Enabling L2 prefetching improves performance as shown on Panda
ES2.1 board with mem test, and it has measurable impact on
performances. I think we should consider it, even though it damages
"writes" a bit. (rebased to k.org)
Usually the prefetch is used at both levels together L1 + L2, however,
to enable the CP15 prefetch engines, these are under security, and on
GP devices, we cannot enable it(e.g. on PandaBoard). However, just
enabling PL310 prefetch seems to provide performance improvement,
as shown in the data below (from Ubuntu) and would be a great thing
to pull in.
What prefetch does is enable automatic next line prefetching. With this
enabled, whenever the PL310 receives a cachable read request, it
automatically prefetches the following cache line as well.
Measurement Data:
==
STOCK 10.10 WITHOUT PATCH
========================
~# ./memspeed
size 8388608 8192k 8M
offset 8388608, 0
buffers 0x2aaad000 0x2b2ad000
copy libc 133 MB/s
copy Android v5 273 MB/s
copy Android NEON 235 MB/s
copy INT32 116 MB/s
copy ASM ARM 187 MB/s
copy ASM VLDM 64 204 MB/s
copy ASM VLDM 128 173 MB/s
copy ASM VLD1 216 MB/s
read ASM ARM 286 MB/s
read ASM VLDM 242 MB/s
read ASM VLD1 286 MB/s
write libc 1947 MB/s
write ASM ARM 1943 MB/s
write ASM VSTM 1942 MB/s
write ASM VST1 1935 MB/s
10.10 + PATCH
=============
~# ./memspeed
size 8388608 8192k 8M
offset 8388608, 0
buffers 0x2ab17000 0x2b317000
copy libc 129 MB/s
copy Android v5 256 MB/s
copy Android NEON 356 MB/s
copy INT32 127 MB/s
copy ASM ARM 321 MB/s
copy ASM VLDM 64 337 MB/s
copy ASM VLDM 128 321 MB/s
copy ASM VLD1 350 MB/s
read ASM ARM 496 MB/s
read ASM VLDM 470 MB/s
read ASM VLD1 488 MB/s
write libc 1701 MB/s
write ASM ARM 1682 MB/s
write ASM VSTM 1693 MB/s
write ASM VST1 1681 MB/s
Signed-off-by: Mans Rullgard <mans@mansr.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
2010-11-19 23:01:04 +05:30
} else {
aux_ctrl | = ( ( 0x3 < < L2X0_AUX_CTRL_WAY_SIZE_SHIFT ) |
2010-11-19 23:01:05 +05:30
( 1 < < L2X0_AUX_CTRL_SHARE_OVERRIDE_SHIFT ) |
omap4: l2x0: enable instruction and data prefetching
Enabling L2 prefetching improves performance as shown on Panda
ES2.1 board with mem test, and it has measurable impact on
performances. I think we should consider it, even though it damages
"writes" a bit. (rebased to k.org)
Usually the prefetch is used at both levels together L1 + L2, however,
to enable the CP15 prefetch engines, these are under security, and on
GP devices, we cannot enable it(e.g. on PandaBoard). However, just
enabling PL310 prefetch seems to provide performance improvement,
as shown in the data below (from Ubuntu) and would be a great thing
to pull in.
What prefetch does is enable automatic next line prefetching. With this
enabled, whenever the PL310 receives a cachable read request, it
automatically prefetches the following cache line as well.
Measurement Data:
==
STOCK 10.10 WITHOUT PATCH
========================
~# ./memspeed
size 8388608 8192k 8M
offset 8388608, 0
buffers 0x2aaad000 0x2b2ad000
copy libc 133 MB/s
copy Android v5 273 MB/s
copy Android NEON 235 MB/s
copy INT32 116 MB/s
copy ASM ARM 187 MB/s
copy ASM VLDM 64 204 MB/s
copy ASM VLDM 128 173 MB/s
copy ASM VLD1 216 MB/s
read ASM ARM 286 MB/s
read ASM VLDM 242 MB/s
read ASM VLD1 286 MB/s
write libc 1947 MB/s
write ASM ARM 1943 MB/s
write ASM VSTM 1942 MB/s
write ASM VST1 1935 MB/s
10.10 + PATCH
=============
~# ./memspeed
size 8388608 8192k 8M
offset 8388608, 0
buffers 0x2ab17000 0x2b317000
copy libc 129 MB/s
copy Android v5 256 MB/s
copy Android NEON 356 MB/s
copy INT32 127 MB/s
copy ASM ARM 321 MB/s
copy ASM VLDM 64 337 MB/s
copy ASM VLDM 128 321 MB/s
copy ASM VLD1 350 MB/s
read ASM ARM 496 MB/s
read ASM VLDM 470 MB/s
read ASM VLD1 488 MB/s
write libc 1701 MB/s
write ASM ARM 1682 MB/s
write ASM VSTM 1693 MB/s
write ASM VST1 1681 MB/s
Signed-off-by: Mans Rullgard <mans@mansr.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
2010-11-19 23:01:04 +05:30
( 1 < < L2X0_AUX_CTRL_DATA_PREFETCH_SHIFT ) |
2010-11-19 23:01:06 +05:30
( 1 < < L2X0_AUX_CTRL_INSTR_PREFETCH_SHIFT ) |
( 1 < < L2X0_AUX_CTRL_EARLY_BRESP_SHIFT ) ) ;
omap4: l2x0: enable instruction and data prefetching
Enabling L2 prefetching improves performance as shown on Panda
ES2.1 board with mem test, and it has measurable impact on
performances. I think we should consider it, even though it damages
"writes" a bit. (rebased to k.org)
Usually the prefetch is used at both levels together L1 + L2, however,
to enable the CP15 prefetch engines, these are under security, and on
GP devices, we cannot enable it(e.g. on PandaBoard). However, just
enabling PL310 prefetch seems to provide performance improvement,
as shown in the data below (from Ubuntu) and would be a great thing
to pull in.
What prefetch does is enable automatic next line prefetching. With this
enabled, whenever the PL310 receives a cachable read request, it
automatically prefetches the following cache line as well.
Measurement Data:
==
STOCK 10.10 WITHOUT PATCH
========================
~# ./memspeed
size 8388608 8192k 8M
offset 8388608, 0
buffers 0x2aaad000 0x2b2ad000
copy libc 133 MB/s
copy Android v5 273 MB/s
copy Android NEON 235 MB/s
copy INT32 116 MB/s
copy ASM ARM 187 MB/s
copy ASM VLDM 64 204 MB/s
copy ASM VLDM 128 173 MB/s
copy ASM VLD1 216 MB/s
read ASM ARM 286 MB/s
read ASM VLDM 242 MB/s
read ASM VLD1 286 MB/s
write libc 1947 MB/s
write ASM ARM 1943 MB/s
write ASM VSTM 1942 MB/s
write ASM VST1 1935 MB/s
10.10 + PATCH
=============
~# ./memspeed
size 8388608 8192k 8M
offset 8388608, 0
buffers 0x2ab17000 0x2b317000
copy libc 129 MB/s
copy Android v5 256 MB/s
copy Android NEON 356 MB/s
copy INT32 127 MB/s
copy ASM ARM 321 MB/s
copy ASM VLDM 64 337 MB/s
copy ASM VLDM 128 321 MB/s
copy ASM VLD1 350 MB/s
read ASM ARM 496 MB/s
read ASM VLDM 470 MB/s
read ASM VLD1 488 MB/s
write libc 1701 MB/s
write ASM ARM 1682 MB/s
write ASM VSTM 1693 MB/s
write ASM VST1 1681 MB/s
Signed-off-by: Mans Rullgard <mans@mansr.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Tested-by: Nishanth Menon <nm@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
2010-11-19 23:01:04 +05:30
}
if ( omap_rev ( ) ! = OMAP4430_REV_ES1_0 )
omap_smc1 ( 0x109 , aux_ctrl ) ;
/* Enable PL310 L2 Cache controller */
omap_smc1 ( 0x102 , 0x1 ) ;
2010-11-19 23:01:03 +05:30
2012-07-04 17:57:34 +05:30
if ( of_have_populated_dt ( ) )
l2x0_of_init ( aux_ctrl , L2X0_AUX_CTRL_MASK ) ;
else
l2x0_init ( l2cache_base , aux_ctrl , L2X0_AUX_CTRL_MASK ) ;
2010-05-14 12:05:26 -07:00
2010-07-31 21:40:10 +05:30
/*
* Override default outer_cache . disable with a OMAP4
* specific one
*/
outer_cache . disable = omap4_l2x0_disable ;
2011-02-22 10:00:44 +01:00
outer_cache . set_debug = omap4_l2x0_set_debug ;
2010-07-31 21:40:10 +05:30
2010-05-14 12:05:26 -07:00
return 0 ;
}
2013-01-11 11:24:18 -08:00
omap_early_initcall ( omap_l2_cache_init ) ;
2010-05-14 12:05:26 -07:00
# endif
2011-01-01 19:56:04 +05:30
void __iomem * omap4_get_sar_ram_base ( void )
{
return sar_ram_base ;
}
/*
* SAR RAM used to save and restore the HW
* context in low power modes
*/
static int __init omap4_sar_ram_init ( void )
{
2013-02-06 17:54:39 +05:30
unsigned long sar_base ;
2011-01-01 19:56:04 +05:30
/*
* To avoid code running on other OMAPs in
* multi - omap builds
*/
2013-02-06 17:54:39 +05:30
if ( cpu_is_omap44xx ( ) )
sar_base = OMAP44XX_SAR_RAM_BASE ;
else if ( soc_is_omap54xx ( ) )
sar_base = OMAP54XX_SAR_RAM_BASE ;
else
2011-01-01 19:56:04 +05:30
return - ENOMEM ;
/* Static mapping, never released */
2013-02-06 17:54:39 +05:30
sar_ram_base = ioremap ( sar_base , SZ_16K ) ;
2011-01-01 19:56:04 +05:30
if ( WARN_ON ( ! sar_ram_base ) )
return - ENOMEM ;
return 0 ;
}
2013-01-11 11:24:18 -08:00
omap_early_initcall ( omap4_sar_ram_init ) ;
2012-04-25 17:27:46 +05:30
2012-06-05 16:31:06 +05:30
void __init omap_gic_of_init ( void )
{
2013-02-25 14:12:58 +05:30
struct device_node * np ;
/* Extract GIC distributor and TWD bases for OMAP4460 ROM Errata WA */
if ( ! cpu_is_omap446x ( ) )
goto skip_errata_init ;
np = of_find_compatible_node ( NULL , NULL , " arm,cortex-a9-gic " ) ;
gic_dist_base_addr = of_iomap ( np , 0 ) ;
WARN_ON ( ! gic_dist_base_addr ) ;
np = of_find_compatible_node ( NULL , NULL , " arm,cortex-a9-twd-timer " ) ;
twd_base = of_iomap ( np , 0 ) ;
WARN_ON ( ! twd_base ) ;
skip_errata_init :
2012-06-05 16:31:06 +05:30
omap_wakeupgen_init ( ) ;
2012-11-05 16:18:28 -06:00
irqchip_init ( ) ;
2012-06-05 16:31:06 +05:30
}