2008-10-23 09:26:29 +04:00
# ifndef _ASM_X86_DESC_H
# define _ASM_X86_DESC_H
2008-01-30 15:31:13 +03:00
# include <asm/desc_defs.h>
# include <asm/ldt.h>
2008-01-30 15:31:14 +03:00
# include <asm/mmu.h>
2011-05-27 11:29:32 +04:00
2008-01-30 15:31:14 +03:00
# include <linux/smp.h>
2012-05-11 11:35:27 +04:00
# include <linux/percpu.h>
2008-01-30 15:31:13 +03:00
2011-05-27 11:29:32 +04:00
static inline void fill_ldt ( struct desc_struct * desc , const struct user_desc * info )
{
desc - > limit0 = info - > limit & 0x0ffff ;
desc - > base0 = ( info - > base_addr & 0x0000ffff ) ;
desc - > base1 = ( info - > base_addr & 0x00ff0000 ) > > 16 ;
desc - > type = ( info - > read_exec_only ^ 1 ) < < 1 ;
desc - > type | = info - > contents < < 2 ;
desc - > s = 1 ;
desc - > dpl = 0x3 ;
desc - > p = info - > seg_not_present ^ 1 ;
desc - > limit = ( info - > limit & 0xf0000 ) > > 16 ;
desc - > avl = info - > useable ;
desc - > d = info - > seg_32bit ;
desc - > g = info - > limit_in_pages ;
desc - > base2 = ( info - > base_addr & 0xff000000 ) > > 24 ;
2008-07-27 19:42:32 +04:00
/*
2011-08-03 17:31:53 +04:00
* Don ' t allow setting of the lm bit . It would confuse
* user_64bit_mode and would get overridden by sysret anyway .
2008-07-27 19:42:32 +04:00
*/
2011-05-27 11:29:32 +04:00
desc - > l = 0 ;
2008-01-30 15:31:13 +03:00
}
2008-01-30 15:31:14 +03:00
extern struct desc_ptr idt_descr ;
extern gate_desc idt_table [ ] ;
2013-06-20 19:45:44 +04:00
extern struct desc_ptr debug_idt_descr ;
extern gate_desc debug_idt_table [ ] ;
2008-01-30 15:31:13 +03:00
2008-05-29 03:19:53 +04:00
struct gdt_page {
struct desc_struct gdt [ GDT_ENTRIES ] ;
} __attribute__ ( ( aligned ( PAGE_SIZE ) ) ) ;
2011-05-27 11:29:32 +04:00
2009-04-22 02:00:24 +04:00
DECLARE_PER_CPU_PAGE_ALIGNED ( struct gdt_page , gdt_page ) ;
2008-05-29 03:19:53 +04:00
static inline struct desc_struct * get_cpu_gdt_table ( unsigned int cpu )
{
return per_cpu ( gdt_page , cpu ) . gdt ;
}
2008-01-30 15:31:14 +03:00
# ifdef CONFIG_X86_64
2008-01-30 15:31:14 +03:00
static inline void pack_gate ( gate_desc * gate , unsigned type , unsigned long func ,
unsigned dpl , unsigned ist , unsigned seg )
{
2011-05-27 11:29:32 +04:00
gate - > offset_low = PTR_LOW ( func ) ;
gate - > segment = __KERNEL_CS ;
gate - > ist = ist ;
gate - > p = 1 ;
gate - > dpl = dpl ;
gate - > zero0 = 0 ;
gate - > zero1 = 0 ;
gate - > type = type ;
gate - > offset_middle = PTR_MIDDLE ( func ) ;
gate - > offset_high = PTR_HIGH ( func ) ;
2008-01-30 15:31:14 +03:00
}
2008-01-30 15:31:14 +03:00
# else
2008-01-30 15:31:14 +03:00
static inline void pack_gate ( gate_desc * gate , unsigned char type ,
2008-03-23 11:01:58 +03:00
unsigned long base , unsigned dpl , unsigned flags ,
unsigned short seg )
2008-01-30 15:31:14 +03:00
{
gate - > a = ( seg < < 16 ) | ( base & 0xffff ) ;
2011-05-27 11:29:32 +04:00
gate - > b = ( base & 0xffff0000 ) | ( ( ( 0x80 | type | ( dpl < < 5 ) ) & 0xff ) < < 8 ) ;
2008-01-30 15:31:14 +03:00
}
2008-01-30 15:31:14 +03:00
# endif
2008-01-30 15:31:27 +03:00
static inline int desc_empty ( const void * ptr )
{
const u32 * desc = ptr ;
2011-05-27 11:29:32 +04:00
2008-01-30 15:31:27 +03:00
return ! ( desc [ 0 ] | desc [ 1 ] ) ;
}
2008-01-30 15:31:14 +03:00
# ifdef CONFIG_PARAVIRT
# include <asm/paravirt.h>
# else
2011-05-27 11:29:32 +04:00
# define load_TR_desc() native_load_tr_desc()
# define load_gdt(dtr) native_load_gdt(dtr)
# define load_idt(dtr) native_load_idt(dtr)
# define load_tr(tr) asm volatile("ltr %0"::"m" (tr))
# define load_ldt(ldt) asm volatile("lldt %0"::"m" (ldt))
# define store_gdt(dtr) native_store_gdt(dtr)
# define store_idt(dtr) native_store_idt(dtr)
# define store_tr(tr) (tr = native_store_tr())
# define load_TLS(t, cpu) native_load_tls(t, cpu)
# define set_ldt native_set_ldt
# define write_ldt_entry(dt, entry, desc) native_write_ldt_entry(dt, entry, desc)
# define write_gdt_entry(dt, entry, desc, type) native_write_gdt_entry(dt, entry, desc, type)
# define write_idt_entry(dt, entry, g) native_write_idt_entry(dt, entry, g)
2008-07-24 01:21:18 +04:00
static inline void paravirt_alloc_ldt ( struct desc_struct * ldt , unsigned entries )
{
}
static inline void paravirt_free_ldt ( struct desc_struct * ldt , unsigned entries )
{
}
# endif /* CONFIG_PARAVIRT */
2008-01-30 15:31:14 +03:00
2009-03-11 16:43:49 +03:00
# define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
2011-05-27 11:29:32 +04:00
static inline void native_write_idt_entry ( gate_desc * idt , int entry , const gate_desc * gate )
2008-01-30 15:31:14 +03:00
{
memcpy ( & idt [ entry ] , gate , sizeof ( * gate ) ) ;
}
2011-05-27 11:29:32 +04:00
static inline void native_write_ldt_entry ( struct desc_struct * ldt , int entry , const void * desc )
2008-01-30 15:31:14 +03:00
{
memcpy ( & ldt [ entry ] , desc , 8 ) ;
}
2011-05-27 11:29:32 +04:00
static inline void
native_write_gdt_entry ( struct desc_struct * gdt , int entry , const void * desc , int type )
2008-01-30 15:31:14 +03:00
{
unsigned int size ;
2011-05-27 11:29:32 +04:00
2008-01-30 15:31:14 +03:00
switch ( type ) {
2011-05-27 11:29:32 +04:00
case DESC_TSS : size = sizeof ( tss_desc ) ; break ;
case DESC_LDT : size = sizeof ( ldt_desc ) ; break ;
default : size = sizeof ( * gdt ) ; break ;
2008-01-30 15:31:14 +03:00
}
2011-05-27 11:29:32 +04:00
2008-01-30 15:31:14 +03:00
memcpy ( & gdt [ entry ] , desc , size ) ;
}
static inline void pack_descriptor ( struct desc_struct * desc , unsigned long base ,
unsigned long limit , unsigned char type ,
unsigned char flags )
{
desc - > a = ( ( base & 0xffff ) < < 16 ) | ( limit & 0xffff ) ;
desc - > b = ( base & 0xff000000 ) | ( ( base & 0xff0000 ) > > 16 ) |
2008-03-23 11:01:58 +03:00
( limit & 0x000f0000 ) | ( ( type & 0xff ) < < 8 ) |
( ( flags & 0xf ) < < 20 ) ;
2008-01-30 15:31:14 +03:00
desc - > p = 1 ;
}
2011-05-27 11:29:32 +04:00
static inline void set_tssldt_descriptor ( void * d , unsigned long addr , unsigned type , unsigned size )
2008-01-30 15:31:14 +03:00
{
# ifdef CONFIG_X86_64
2008-01-30 15:31:20 +03:00
struct ldttss_desc64 * desc = d ;
2011-05-27 11:29:32 +04:00
2008-01-30 15:31:20 +03:00
memset ( desc , 0 , sizeof ( * desc ) ) ;
2011-05-27 11:29:32 +04:00
desc - > limit0 = size & 0xFFFF ;
desc - > base0 = PTR_LOW ( addr ) ;
desc - > base1 = PTR_MIDDLE ( addr ) & 0xFF ;
desc - > type = type ;
desc - > p = 1 ;
desc - > limit1 = ( size > > 16 ) & 0xF ;
desc - > base2 = ( PTR_MIDDLE ( addr ) > > 8 ) & 0xFF ;
desc - > base3 = PTR_HIGH ( addr ) ;
2008-01-30 15:31:14 +03:00
# else
2008-01-30 15:31:20 +03:00
pack_descriptor ( ( struct desc_struct * ) d , addr , size , 0x80 | type , 0 ) ;
2008-01-30 15:31:14 +03:00
# endif
}
static inline void __set_tss_desc ( unsigned cpu , unsigned int entry , void * addr )
{
struct desc_struct * d = get_cpu_gdt_table ( cpu ) ;
tss_desc tss ;
/*
* sizeof ( unsigned long ) coming from an extra " long " at the end
* of the iobitmap . See tss_struct definition in processor . h
*
* - 1 ? seg base + limit should be pointing to the address of the
* last valid byte
*/
2008-01-30 15:31:20 +03:00
set_tssldt_descriptor ( & tss , ( unsigned long ) addr , DESC_TSS ,
2008-03-23 11:01:58 +03:00
IO_BITMAP_OFFSET + IO_BITMAP_BYTES +
sizeof ( unsigned long ) - 1 ) ;
2008-01-30 15:31:14 +03:00
write_gdt_entry ( d , entry , & tss , DESC_TSS ) ;
}
# define set_tss_desc(cpu, addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
2008-01-30 15:31:14 +03:00
static inline void native_set_ldt ( const void * addr , unsigned int entries )
{
if ( likely ( entries = = 0 ) )
2008-03-23 11:01:58 +03:00
asm volatile ( " lldt %w0 " : : " q " ( 0 ) ) ;
2008-01-30 15:31:14 +03:00
else {
unsigned cpu = smp_processor_id ( ) ;
ldt_desc ldt ;
2008-07-11 20:04:46 +04:00
set_tssldt_descriptor ( & ldt , ( unsigned long ) addr , DESC_LDT ,
entries * LDT_ENTRY_SIZE - 1 ) ;
2008-01-30 15:31:14 +03:00
write_gdt_entry ( get_cpu_gdt_table ( cpu ) , GDT_ENTRY_LDT ,
& ldt , DESC_LDT ) ;
2008-03-23 11:01:58 +03:00
asm volatile ( " lldt %w0 " : : " q " ( GDT_ENTRY_LDT * 8 ) ) ;
2008-01-30 15:31:14 +03:00
}
}
static inline void native_load_tr_desc ( void )
{
asm volatile ( " ltr %w0 " : : " q " ( GDT_ENTRY_TSS * 8 ) ) ;
}
static inline void native_load_gdt ( const struct desc_ptr * dtr )
{
asm volatile ( " lgdt %0 " : : " m " ( * dtr ) ) ;
}
static inline void native_load_idt ( const struct desc_ptr * dtr )
{
asm volatile ( " lidt %0 " : : " m " ( * dtr ) ) ;
}
static inline void native_store_gdt ( struct desc_ptr * dtr )
{
asm volatile ( " sgdt %0 " : " =m " ( * dtr ) ) ;
}
static inline void native_store_idt ( struct desc_ptr * dtr )
{
asm volatile ( " sidt %0 " : " =m " ( * dtr ) ) ;
}
static inline unsigned long native_store_tr ( void )
{
unsigned long tr ;
2011-05-27 11:29:32 +04:00
2008-01-30 15:31:14 +03:00
asm volatile ( " str %0 " : " =r " ( tr ) ) ;
2011-05-27 11:29:32 +04:00
2008-01-30 15:31:14 +03:00
return tr ;
}
static inline void native_load_tls ( struct thread_struct * t , unsigned int cpu )
{
struct desc_struct * gdt = get_cpu_gdt_table ( cpu ) ;
2011-05-27 11:29:32 +04:00
unsigned int i ;
2008-01-30 15:31:14 +03:00
for ( i = 0 ; i < GDT_ENTRY_TLS_ENTRIES ; i + + )
gdt [ GDT_ENTRY_TLS_MIN + i ] = t - > tls_array [ i ] ;
}
2008-03-23 11:01:58 +03:00
# define _LDT_empty(info) \
( ( info ) - > base_addr = = 0 & & \
( info ) - > limit = = 0 & & \
( info ) - > contents = = 0 & & \
( info ) - > read_exec_only = = 1 & & \
( info ) - > seg_32bit = = 0 & & \
( info ) - > limit_in_pages = = 0 & & \
( info ) - > seg_not_present = = 1 & & \
( info ) - > useable = = 0 )
2008-01-30 15:31:14 +03:00
# ifdef CONFIG_X86_64
# define LDT_empty(info) (_LDT_empty(info) && ((info)->lm == 0))
# else
# define LDT_empty(info) (_LDT_empty(info))
# endif
static inline void clear_LDT ( void )
{
set_ldt ( NULL , 0 ) ;
}
/*
* load one particular LDT into the current CPU
*/
static inline void load_LDT_nolock ( mm_context_t * pc )
{
set_ldt ( pc - > ldt , pc - > size ) ;
}
static inline void load_LDT ( mm_context_t * pc )
{
preempt_disable ( ) ;
load_LDT_nolock ( pc ) ;
preempt_enable ( ) ;
}
2008-01-30 15:31:51 +03:00
static inline unsigned long get_desc_base ( const struct desc_struct * desc )
2008-01-30 15:31:14 +03:00
{
2009-11-05 13:47:08 +03:00
return ( unsigned ) ( desc - > base0 | ( ( desc - > base1 ) < < 16 ) | ( ( desc - > base2 ) < < 24 ) ) ;
2008-01-30 15:31:14 +03:00
}
2008-01-30 15:31:51 +03:00
2009-07-18 19:11:06 +04:00
static inline void set_desc_base ( struct desc_struct * desc , unsigned long base )
{
desc - > base0 = base & 0xffff ;
desc - > base1 = ( base > > 16 ) & 0xff ;
desc - > base2 = ( base > > 24 ) & 0xff ;
}
2008-01-30 15:31:51 +03:00
static inline unsigned long get_desc_limit ( const struct desc_struct * desc )
{
return desc - > limit0 | ( desc - > limit < < 16 ) ;
}
2009-07-18 19:11:06 +04:00
static inline void set_desc_limit ( struct desc_struct * desc , unsigned long limit )
{
desc - > limit0 = limit & 0xffff ;
desc - > limit = ( limit > > 16 ) & 0xf ;
}
2011-12-09 12:02:19 +04:00
# ifdef CONFIG_X86_64
static inline void set_nmi_gate ( int gate , void * addr )
{
gate_desc s ;
pack_gate ( & s , GATE_INTERRUPT , ( unsigned long ) addr , 0 , 0 , __KERNEL_CS ) ;
2013-06-20 19:45:44 +04:00
write_idt_entry ( debug_idt_table , gate , & s ) ;
2011-12-09 12:02:19 +04:00
}
# endif
x86, trace: Add irq vector tracepoints
[Purpose of this patch]
As Vaibhav explained in the thread below, tracepoints for irq vectors
are useful.
http://www.spinics.net/lists/mm-commits/msg85707.html
<snip>
The current interrupt traces from irq_handler_entry and irq_handler_exit
provide when an interrupt is handled. They provide good data about when
the system has switched to kernel space and how it affects the currently
running processes.
There are some IRQ vectors which trigger the system into kernel space,
which are not handled in generic IRQ handlers. Tracing such events gives
us the information about IRQ interaction with other system events.
The trace also tells where the system is spending its time. We want to
know which cores are handling interrupts and how they are affecting other
processes in the system. Also, the trace provides information about when
the cores are idle and which interrupts are changing that state.
<snip>
On the other hand, my usecase is tracing just local timer event and
getting a value of instruction pointer.
I suggested to add an argument local timer event to get instruction pointer before.
But there is another way to get it with external module like systemtap.
So, I don't need to add any argument to irq vector tracepoints now.
[Patch Description]
Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all events.
But there is an above use case to trace specific irq_vector rather than tracing all events.
In this case, we are concerned about overhead due to unwanted events.
So, add following tracepoints instead of introducing irq_vector_entry/exit.
so that we can enable them independently.
- local_timer_vector
- reschedule_vector
- call_function_vector
- call_function_single_vector
- irq_work_entry_vector
- error_apic_vector
- thermal_apic_vector
- threshold_apic_vector
- spurious_apic_vector
- x86_platform_ipi_vector
Also, introduce a logic switching IDT at enabling/disabling time so that a time penalty
makes a zero when tracepoints are disabled. Detailed explanations are as follows.
- Create trace irq handlers with entering_irq()/exiting_irq().
- Create a new IDT, trace_idt_table, at boot time by adding a logic to
_set_gate(). It is just a copy of original idt table.
- Register the new handlers for tracpoints to the new IDT by introducing
macros to alloc_intr_gate() called at registering time of irq_vector handlers.
- Add checking, whether irq vector tracing is on/off, into load_current_idt().
This has to be done below debug checking for these reasons.
- Switching to debug IDT may be kicked while tracing is enabled.
- On the other hands, switching to trace IDT is kicked only when debugging
is disabled.
In addition, the new IDT is created only when CONFIG_TRACING is enabled to avoid being
used for other purposes.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/51C323ED.5050708@hds.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
2013-06-20 19:46:53 +04:00
# ifdef CONFIG_TRACING
extern struct desc_ptr trace_idt_descr ;
extern gate_desc trace_idt_table [ ] ;
static inline void write_trace_idt_entry ( int entry , const gate_desc * gate )
{
write_idt_entry ( trace_idt_table , entry , gate ) ;
}
# else
static inline void write_trace_idt_entry ( int entry , const gate_desc * gate )
{
}
# endif
2008-01-30 15:31:14 +03:00
static inline void _set_gate ( int gate , unsigned type , void * addr ,
2008-03-23 11:01:58 +03:00
unsigned dpl , unsigned ist , unsigned seg )
2008-01-30 15:31:14 +03:00
{
gate_desc s ;
2011-05-27 11:29:32 +04:00
2008-01-30 15:31:14 +03:00
pack_gate ( & s , type , ( unsigned long ) addr , dpl , ist , seg ) ;
/*
* does not need to be atomic because it is only done once at
* setup time
*/
write_idt_entry ( idt_table , gate , & s ) ;
x86, trace: Add irq vector tracepoints
[Purpose of this patch]
As Vaibhav explained in the thread below, tracepoints for irq vectors
are useful.
http://www.spinics.net/lists/mm-commits/msg85707.html
<snip>
The current interrupt traces from irq_handler_entry and irq_handler_exit
provide when an interrupt is handled. They provide good data about when
the system has switched to kernel space and how it affects the currently
running processes.
There are some IRQ vectors which trigger the system into kernel space,
which are not handled in generic IRQ handlers. Tracing such events gives
us the information about IRQ interaction with other system events.
The trace also tells where the system is spending its time. We want to
know which cores are handling interrupts and how they are affecting other
processes in the system. Also, the trace provides information about when
the cores are idle and which interrupts are changing that state.
<snip>
On the other hand, my usecase is tracing just local timer event and
getting a value of instruction pointer.
I suggested to add an argument local timer event to get instruction pointer before.
But there is another way to get it with external module like systemtap.
So, I don't need to add any argument to irq vector tracepoints now.
[Patch Description]
Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all events.
But there is an above use case to trace specific irq_vector rather than tracing all events.
In this case, we are concerned about overhead due to unwanted events.
So, add following tracepoints instead of introducing irq_vector_entry/exit.
so that we can enable them independently.
- local_timer_vector
- reschedule_vector
- call_function_vector
- call_function_single_vector
- irq_work_entry_vector
- error_apic_vector
- thermal_apic_vector
- threshold_apic_vector
- spurious_apic_vector
- x86_platform_ipi_vector
Also, introduce a logic switching IDT at enabling/disabling time so that a time penalty
makes a zero when tracepoints are disabled. Detailed explanations are as follows.
- Create trace irq handlers with entering_irq()/exiting_irq().
- Create a new IDT, trace_idt_table, at boot time by adding a logic to
_set_gate(). It is just a copy of original idt table.
- Register the new handlers for tracpoints to the new IDT by introducing
macros to alloc_intr_gate() called at registering time of irq_vector handlers.
- Add checking, whether irq vector tracing is on/off, into load_current_idt().
This has to be done below debug checking for these reasons.
- Switching to debug IDT may be kicked while tracing is enabled.
- On the other hands, switching to trace IDT is kicked only when debugging
is disabled.
In addition, the new IDT is created only when CONFIG_TRACING is enabled to avoid being
used for other purposes.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/51C323ED.5050708@hds.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
2013-06-20 19:46:53 +04:00
write_trace_idt_entry ( gate , & s ) ;
2008-01-30 15:31:14 +03:00
}
/*
* This needs to use ' idt_table ' rather than ' idt ' , and
* thus use the _nonmapped_ version of the IDT , as the
* Pentium F0 0F bugfix can have resulted in the mapped
* IDT being write - protected .
*/
static inline void set_intr_gate ( unsigned int n , void * addr )
{
BUG_ON ( ( unsigned ) n > 0xFF ) ;
_set_gate ( n , GATE_INTERRUPT , addr , 0 , 0 , __KERNEL_CS ) ;
}
2008-04-16 00:36:56 +04:00
extern int first_system_vector ;
2008-12-20 02:23:44 +03:00
/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
extern unsigned long used_vectors [ ] ;
2008-04-16 00:36:56 +04:00
static inline void alloc_system_vector ( int vector )
{
2008-12-20 02:23:44 +03:00
if ( ! test_bit ( vector , used_vectors ) ) {
set_bit ( vector , used_vectors ) ;
2008-04-16 00:36:56 +04:00
if ( first_system_vector > vector )
first_system_vector = vector ;
2011-05-27 11:29:32 +04:00
} else {
2008-04-16 00:36:56 +04:00
BUG ( ) ;
2011-05-27 11:29:32 +04:00
}
2008-04-16 00:36:56 +04:00
}
x86, trace: Add irq vector tracepoints
[Purpose of this patch]
As Vaibhav explained in the thread below, tracepoints for irq vectors
are useful.
http://www.spinics.net/lists/mm-commits/msg85707.html
<snip>
The current interrupt traces from irq_handler_entry and irq_handler_exit
provide when an interrupt is handled. They provide good data about when
the system has switched to kernel space and how it affects the currently
running processes.
There are some IRQ vectors which trigger the system into kernel space,
which are not handled in generic IRQ handlers. Tracing such events gives
us the information about IRQ interaction with other system events.
The trace also tells where the system is spending its time. We want to
know which cores are handling interrupts and how they are affecting other
processes in the system. Also, the trace provides information about when
the cores are idle and which interrupts are changing that state.
<snip>
On the other hand, my usecase is tracing just local timer event and
getting a value of instruction pointer.
I suggested to add an argument local timer event to get instruction pointer before.
But there is another way to get it with external module like systemtap.
So, I don't need to add any argument to irq vector tracepoints now.
[Patch Description]
Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all events.
But there is an above use case to trace specific irq_vector rather than tracing all events.
In this case, we are concerned about overhead due to unwanted events.
So, add following tracepoints instead of introducing irq_vector_entry/exit.
so that we can enable them independently.
- local_timer_vector
- reschedule_vector
- call_function_vector
- call_function_single_vector
- irq_work_entry_vector
- error_apic_vector
- thermal_apic_vector
- threshold_apic_vector
- spurious_apic_vector
- x86_platform_ipi_vector
Also, introduce a logic switching IDT at enabling/disabling time so that a time penalty
makes a zero when tracepoints are disabled. Detailed explanations are as follows.
- Create trace irq handlers with entering_irq()/exiting_irq().
- Create a new IDT, trace_idt_table, at boot time by adding a logic to
_set_gate(). It is just a copy of original idt table.
- Register the new handlers for tracpoints to the new IDT by introducing
macros to alloc_intr_gate() called at registering time of irq_vector handlers.
- Add checking, whether irq vector tracing is on/off, into load_current_idt().
This has to be done below debug checking for these reasons.
- Switching to debug IDT may be kicked while tracing is enabled.
- On the other hands, switching to trace IDT is kicked only when debugging
is disabled.
In addition, the new IDT is created only when CONFIG_TRACING is enabled to avoid being
used for other purposes.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/51C323ED.5050708@hds.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
2013-06-20 19:46:53 +04:00
# ifdef CONFIG_TRACING
static inline void trace_set_intr_gate ( unsigned int gate , void * addr )
{
gate_desc s ;
pack_gate ( & s , GATE_INTERRUPT , ( unsigned long ) addr , 0 , 0 , __KERNEL_CS ) ;
write_idt_entry ( trace_idt_table , gate , & s ) ;
}
static inline void __trace_alloc_intr_gate ( unsigned int n , void * addr )
{
trace_set_intr_gate ( n , addr ) ;
}
# else
static inline void trace_set_intr_gate ( unsigned int gate , void * addr )
{
}
# define __trace_alloc_intr_gate(n, addr)
# endif
static inline void __alloc_intr_gate ( unsigned int n , void * addr )
2008-04-16 00:36:56 +04:00
{
set_intr_gate ( n , addr ) ;
}
x86, trace: Add irq vector tracepoints
[Purpose of this patch]
As Vaibhav explained in the thread below, tracepoints for irq vectors
are useful.
http://www.spinics.net/lists/mm-commits/msg85707.html
<snip>
The current interrupt traces from irq_handler_entry and irq_handler_exit
provide when an interrupt is handled. They provide good data about when
the system has switched to kernel space and how it affects the currently
running processes.
There are some IRQ vectors which trigger the system into kernel space,
which are not handled in generic IRQ handlers. Tracing such events gives
us the information about IRQ interaction with other system events.
The trace also tells where the system is spending its time. We want to
know which cores are handling interrupts and how they are affecting other
processes in the system. Also, the trace provides information about when
the cores are idle and which interrupts are changing that state.
<snip>
On the other hand, my usecase is tracing just local timer event and
getting a value of instruction pointer.
I suggested to add an argument local timer event to get instruction pointer before.
But there is another way to get it with external module like systemtap.
So, I don't need to add any argument to irq vector tracepoints now.
[Patch Description]
Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all events.
But there is an above use case to trace specific irq_vector rather than tracing all events.
In this case, we are concerned about overhead due to unwanted events.
So, add following tracepoints instead of introducing irq_vector_entry/exit.
so that we can enable them independently.
- local_timer_vector
- reschedule_vector
- call_function_vector
- call_function_single_vector
- irq_work_entry_vector
- error_apic_vector
- thermal_apic_vector
- threshold_apic_vector
- spurious_apic_vector
- x86_platform_ipi_vector
Also, introduce a logic switching IDT at enabling/disabling time so that a time penalty
makes a zero when tracepoints are disabled. Detailed explanations are as follows.
- Create trace irq handlers with entering_irq()/exiting_irq().
- Create a new IDT, trace_idt_table, at boot time by adding a logic to
_set_gate(). It is just a copy of original idt table.
- Register the new handlers for tracpoints to the new IDT by introducing
macros to alloc_intr_gate() called at registering time of irq_vector handlers.
- Add checking, whether irq vector tracing is on/off, into load_current_idt().
This has to be done below debug checking for these reasons.
- Switching to debug IDT may be kicked while tracing is enabled.
- On the other hands, switching to trace IDT is kicked only when debugging
is disabled.
In addition, the new IDT is created only when CONFIG_TRACING is enabled to avoid being
used for other purposes.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/51C323ED.5050708@hds.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
2013-06-20 19:46:53 +04:00
# define alloc_intr_gate(n, addr) \
do { \
alloc_system_vector ( n ) ; \
__alloc_intr_gate ( n , addr ) ; \
__trace_alloc_intr_gate ( n , trace_ # # addr ) ; \
} while ( 0 )
2008-01-30 15:31:14 +03:00
/*
* This routine sets up an interrupt gate at directory privilege level 3.
*/
static inline void set_system_intr_gate ( unsigned int n , void * addr )
{
BUG_ON ( ( unsigned ) n > 0xFF ) ;
_set_gate ( n , GATE_INTERRUPT , addr , 0x3 , 0 , __KERNEL_CS ) ;
}
2008-10-04 00:00:32 +04:00
static inline void set_system_trap_gate ( unsigned int n , void * addr )
2008-01-30 15:31:14 +03:00
{
BUG_ON ( ( unsigned ) n > 0xFF ) ;
2008-10-04 00:00:32 +04:00
_set_gate ( n , GATE_TRAP , addr , 0x3 , 0 , __KERNEL_CS ) ;
2008-01-30 15:31:14 +03:00
}
2008-10-04 00:00:32 +04:00
static inline void set_trap_gate ( unsigned int n , void * addr )
2008-01-30 15:31:14 +03:00
{
BUG_ON ( ( unsigned ) n > 0xFF ) ;
2008-10-04 00:00:32 +04:00
_set_gate ( n , GATE_TRAP , addr , 0 , 0 , __KERNEL_CS ) ;
2008-01-30 15:31:14 +03:00
}
static inline void set_task_gate ( unsigned int n , unsigned int gdt_entry )
{
BUG_ON ( ( unsigned ) n > 0xFF ) ;
_set_gate ( n , GATE_TASK , ( void * ) 0 , 0 , 0 , ( gdt_entry < < 3 ) ) ;
}
static inline void set_intr_gate_ist ( int n , void * addr , unsigned ist )
{
BUG_ON ( ( unsigned ) n > 0xFF ) ;
_set_gate ( n , GATE_INTERRUPT , addr , 0 , ist , __KERNEL_CS ) ;
}
2008-10-04 00:00:32 +04:00
static inline void set_system_intr_gate_ist ( int n , void * addr , unsigned ist )
2008-01-30 15:31:14 +03:00
{
BUG_ON ( ( unsigned ) n > 0xFF ) ;
_set_gate ( n , GATE_INTERRUPT , addr , 0x3 , ist , __KERNEL_CS ) ;
}
2008-01-30 15:31:14 +03:00
2013-06-20 19:45:44 +04:00
# ifdef CONFIG_X86_64
DECLARE_PER_CPU ( u32 , debug_idt_ctr ) ;
static inline bool is_debug_idt_enabled ( void )
{
if ( this_cpu_read ( debug_idt_ctr ) )
return true ;
return false ;
}
static inline void load_debug_idt ( void )
{
load_idt ( ( const struct desc_ptr * ) & debug_idt_descr ) ;
}
# else
static inline bool is_debug_idt_enabled ( void )
{
return false ;
}
static inline void load_debug_idt ( void )
{
}
# endif
x86, trace: Add irq vector tracepoints
[Purpose of this patch]
As Vaibhav explained in the thread below, tracepoints for irq vectors
are useful.
http://www.spinics.net/lists/mm-commits/msg85707.html
<snip>
The current interrupt traces from irq_handler_entry and irq_handler_exit
provide when an interrupt is handled. They provide good data about when
the system has switched to kernel space and how it affects the currently
running processes.
There are some IRQ vectors which trigger the system into kernel space,
which are not handled in generic IRQ handlers. Tracing such events gives
us the information about IRQ interaction with other system events.
The trace also tells where the system is spending its time. We want to
know which cores are handling interrupts and how they are affecting other
processes in the system. Also, the trace provides information about when
the cores are idle and which interrupts are changing that state.
<snip>
On the other hand, my usecase is tracing just local timer event and
getting a value of instruction pointer.
I suggested to add an argument local timer event to get instruction pointer before.
But there is another way to get it with external module like systemtap.
So, I don't need to add any argument to irq vector tracepoints now.
[Patch Description]
Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all events.
But there is an above use case to trace specific irq_vector rather than tracing all events.
In this case, we are concerned about overhead due to unwanted events.
So, add following tracepoints instead of introducing irq_vector_entry/exit.
so that we can enable them independently.
- local_timer_vector
- reschedule_vector
- call_function_vector
- call_function_single_vector
- irq_work_entry_vector
- error_apic_vector
- thermal_apic_vector
- threshold_apic_vector
- spurious_apic_vector
- x86_platform_ipi_vector
Also, introduce a logic switching IDT at enabling/disabling time so that a time penalty
makes a zero when tracepoints are disabled. Detailed explanations are as follows.
- Create trace irq handlers with entering_irq()/exiting_irq().
- Create a new IDT, trace_idt_table, at boot time by adding a logic to
_set_gate(). It is just a copy of original idt table.
- Register the new handlers for tracpoints to the new IDT by introducing
macros to alloc_intr_gate() called at registering time of irq_vector handlers.
- Add checking, whether irq vector tracing is on/off, into load_current_idt().
This has to be done below debug checking for these reasons.
- Switching to debug IDT may be kicked while tracing is enabled.
- On the other hands, switching to trace IDT is kicked only when debugging
is disabled.
In addition, the new IDT is created only when CONFIG_TRACING is enabled to avoid being
used for other purposes.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/51C323ED.5050708@hds.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
2013-06-20 19:46:53 +04:00
# ifdef CONFIG_TRACING
extern atomic_t trace_idt_ctr ;
static inline bool is_trace_idt_enabled ( void )
{
if ( atomic_read ( & trace_idt_ctr ) )
return true ;
return false ;
}
static inline void load_trace_idt ( void )
{
load_idt ( ( const struct desc_ptr * ) & trace_idt_descr ) ;
}
# else
static inline bool is_trace_idt_enabled ( void )
{
return false ;
}
static inline void load_trace_idt ( void )
{
}
# endif
2013-06-20 19:45:44 +04:00
/*
* the load_current_idt ( ) is called with interrupt disabled by local_irq_save ( )
* to avoid races . That way the IDT will always be set back to the expected
* descriptor .
*/
static inline void load_current_idt ( void )
{
unsigned long flags ;
local_irq_save ( flags ) ;
if ( is_debug_idt_enabled ( ) )
load_debug_idt ( ) ;
x86, trace: Add irq vector tracepoints
[Purpose of this patch]
As Vaibhav explained in the thread below, tracepoints for irq vectors
are useful.
http://www.spinics.net/lists/mm-commits/msg85707.html
<snip>
The current interrupt traces from irq_handler_entry and irq_handler_exit
provide when an interrupt is handled. They provide good data about when
the system has switched to kernel space and how it affects the currently
running processes.
There are some IRQ vectors which trigger the system into kernel space,
which are not handled in generic IRQ handlers. Tracing such events gives
us the information about IRQ interaction with other system events.
The trace also tells where the system is spending its time. We want to
know which cores are handling interrupts and how they are affecting other
processes in the system. Also, the trace provides information about when
the cores are idle and which interrupts are changing that state.
<snip>
On the other hand, my usecase is tracing just local timer event and
getting a value of instruction pointer.
I suggested to add an argument local timer event to get instruction pointer before.
But there is another way to get it with external module like systemtap.
So, I don't need to add any argument to irq vector tracepoints now.
[Patch Description]
Vaibhav's patch shared a trace point ,irq_vector_entry/irq_vector_exit, in all events.
But there is an above use case to trace specific irq_vector rather than tracing all events.
In this case, we are concerned about overhead due to unwanted events.
So, add following tracepoints instead of introducing irq_vector_entry/exit.
so that we can enable them independently.
- local_timer_vector
- reschedule_vector
- call_function_vector
- call_function_single_vector
- irq_work_entry_vector
- error_apic_vector
- thermal_apic_vector
- threshold_apic_vector
- spurious_apic_vector
- x86_platform_ipi_vector
Also, introduce a logic switching IDT at enabling/disabling time so that a time penalty
makes a zero when tracepoints are disabled. Detailed explanations are as follows.
- Create trace irq handlers with entering_irq()/exiting_irq().
- Create a new IDT, trace_idt_table, at boot time by adding a logic to
_set_gate(). It is just a copy of original idt table.
- Register the new handlers for tracpoints to the new IDT by introducing
macros to alloc_intr_gate() called at registering time of irq_vector handlers.
- Add checking, whether irq vector tracing is on/off, into load_current_idt().
This has to be done below debug checking for these reasons.
- Switching to debug IDT may be kicked while tracing is enabled.
- On the other hands, switching to trace IDT is kicked only when debugging
is disabled.
In addition, the new IDT is created only when CONFIG_TRACING is enabled to avoid being
used for other purposes.
Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Link: http://lkml.kernel.org/r/51C323ED.5050708@hds.com
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
2013-06-20 19:46:53 +04:00
else if ( is_trace_idt_enabled ( ) )
load_trace_idt ( ) ;
2013-06-20 19:45:44 +04:00
else
load_idt ( ( const struct desc_ptr * ) & idt_descr ) ;
local_irq_restore ( flags ) ;
}
2008-10-23 09:26:29 +04:00
# endif /* _ASM_X86_DESC_H */