2017-10-06 10:31:47 -03:00
/*
* Copyright ( C ) 2011 - 2017 , Red Hat Inc , Arnaldo Carvalho de Melo < acme @ redhat . com >
*
* Parts came from evlist . c builtin - { top , stat , record } . c , see those files for further
* copyright notes .
*
* Released under the GPL v2 . ( and only v2 , not any later version )
*/
# include <sys/mman.h>
2017-10-06 10:46:01 -03:00
# include <inttypes.h>
# include <asm/bug.h>
# include "debug.h"
2017-10-06 10:31:47 -03:00
# include "event.h"
# include "mmap.h"
# include "util.h" /* page_size */
size_t perf_mmap__mmap_len ( struct perf_mmap * map )
{
return map - > mask + 1 + page_size ;
}
/* When check_messup is true, 'end' must points to a good entry */
2017-12-03 02:00:41 +00:00
static union perf_event * perf_mmap__read ( struct perf_mmap * map ,
2018-01-18 13:26:21 -08:00
u64 * startp , u64 end )
2017-10-06 10:31:47 -03:00
{
unsigned char * data = map - > base + page_size ;
union perf_event * event = NULL ;
2018-01-18 13:26:21 -08:00
int diff = end - * startp ;
2017-10-06 10:31:47 -03:00
if ( diff > = ( int ) sizeof ( event - > header ) ) {
size_t size ;
2018-01-18 13:26:21 -08:00
event = ( union perf_event * ) & data [ * startp & map - > mask ] ;
2017-10-06 10:31:47 -03:00
size = event - > header . size ;
2018-01-18 13:26:21 -08:00
if ( size < sizeof ( event - > header ) | | diff < ( int ) size )
return NULL ;
2017-10-06 10:31:47 -03:00
/*
* Event straddles the mmap boundary - - header should always
* be inside due to u64 alignment of output .
*/
2018-01-18 13:26:21 -08:00
if ( ( * startp & map - > mask ) + size ! = ( ( * startp + size ) & map - > mask ) ) {
unsigned int offset = * startp ;
2017-10-06 10:31:47 -03:00
unsigned int len = min ( sizeof ( * event ) , size ) , cpy ;
void * dst = map - > event_copy ;
do {
cpy = min ( map - > mask + 1 - ( offset & map - > mask ) , len ) ;
memcpy ( dst , & data [ offset & map - > mask ] , cpy ) ;
offset + = cpy ;
dst + = cpy ;
len - = cpy ;
} while ( len ) ;
event = ( union perf_event * ) map - > event_copy ;
}
2018-01-18 13:26:21 -08:00
* startp + = size ;
2017-10-06 10:31:47 -03:00
}
return event ;
}
2018-01-18 13:26:23 -08:00
/*
* Read event from ring buffer one by one .
* Return one event for each call .
*
* Usage :
* perf_mmap__read_init ( )
* while ( event = perf_mmap__read_event ( ) ) {
* //process the event
* perf_mmap__consume ( )
* }
* perf_mmap__read_done ( )
*/
2018-03-06 10:36:06 -05:00
union perf_event * perf_mmap__read_event ( struct perf_mmap * map )
2018-01-18 13:26:23 -08:00
{
union perf_event * event ;
/*
* Check if event was unmapped due to a POLLHUP / POLLERR .
*/
if ( ! refcount_read ( & map - > refcnt ) )
return NULL ;
/* non-overwirte doesn't pause the ringbuffer */
2018-03-06 10:36:03 -05:00
if ( ! map - > overwrite )
map - > end = perf_mmap__read_head ( map ) ;
2018-01-18 13:26:23 -08:00
2018-03-06 10:36:03 -05:00
event = perf_mmap__read ( map , & map - > start , map - > end ) ;
2018-01-18 13:26:23 -08:00
2018-03-06 10:36:03 -05:00
if ( ! map - > overwrite )
map - > prev = map - > start ;
2018-01-18 13:26:23 -08:00
return event ;
}
2017-10-06 10:31:47 -03:00
static bool perf_mmap__empty ( struct perf_mmap * map )
{
return perf_mmap__read_head ( map ) = = map - > prev & & ! map - > auxtrace_mmap . base ;
}
void perf_mmap__get ( struct perf_mmap * map )
{
refcount_inc ( & map - > refcnt ) ;
}
void perf_mmap__put ( struct perf_mmap * map )
{
BUG_ON ( map - > base & & refcount_read ( & map - > refcnt ) = = 0 ) ;
if ( refcount_dec_and_test ( & map - > refcnt ) )
perf_mmap__munmap ( map ) ;
}
2018-03-06 10:36:05 -05:00
void perf_mmap__consume ( struct perf_mmap * map )
2017-10-06 10:31:47 -03:00
{
2018-03-06 10:36:04 -05:00
if ( ! map - > overwrite ) {
2017-10-06 10:31:47 -03:00
u64 old = map - > prev ;
perf_mmap__write_tail ( map , old ) ;
}
if ( refcount_read ( & map - > refcnt ) = = 1 & & perf_mmap__empty ( map ) )
perf_mmap__put ( map ) ;
}
int __weak auxtrace_mmap__mmap ( struct auxtrace_mmap * mm __maybe_unused ,
struct auxtrace_mmap_params * mp __maybe_unused ,
void * userpg __maybe_unused ,
int fd __maybe_unused )
{
return 0 ;
}
void __weak auxtrace_mmap__munmap ( struct auxtrace_mmap * mm __maybe_unused )
{
}
void __weak auxtrace_mmap_params__init ( struct auxtrace_mmap_params * mp __maybe_unused ,
off_t auxtrace_offset __maybe_unused ,
unsigned int auxtrace_pages __maybe_unused ,
bool auxtrace_overwrite __maybe_unused )
{
}
void __weak auxtrace_mmap_params__set_idx ( struct auxtrace_mmap_params * mp __maybe_unused ,
struct perf_evlist * evlist __maybe_unused ,
int idx __maybe_unused ,
bool per_cpu __maybe_unused )
{
}
2018-11-06 12:03:35 +03:00
# ifdef HAVE_AIO_SUPPORT
static int perf_mmap__aio_mmap ( struct perf_mmap * map , struct mmap_params * mp )
{
2018-11-06 12:07:19 +03:00
int delta_max , i , prio ;
2018-11-06 12:03:35 +03:00
2018-11-06 12:04:58 +03:00
map - > aio . nr_cblocks = mp - > nr_cblocks ;
if ( map - > aio . nr_cblocks ) {
2018-11-06 12:07:19 +03:00
map - > aio . aiocb = calloc ( map - > aio . nr_cblocks , sizeof ( struct aiocb * ) ) ;
if ( ! map - > aio . aiocb ) {
pr_debug2 ( " failed to allocate aiocb for data buffer, error %m \n " ) ;
return - 1 ;
}
map - > aio . cblocks = calloc ( map - > aio . nr_cblocks , sizeof ( struct aiocb ) ) ;
if ( ! map - > aio . cblocks ) {
pr_debug2 ( " failed to allocate cblocks for data buffer, error %m \n " ) ;
return - 1 ;
}
map - > aio . data = calloc ( map - > aio . nr_cblocks , sizeof ( void * ) ) ;
2018-11-06 12:03:35 +03:00
if ( ! map - > aio . data ) {
pr_debug2 ( " failed to allocate data buffer, error %m \n " ) ;
return - 1 ;
}
delta_max = sysconf ( _SC_AIO_PRIO_DELTA_MAX ) ;
2018-11-06 12:07:19 +03:00
for ( i = 0 ; i < map - > aio . nr_cblocks ; + + i ) {
map - > aio . data [ i ] = malloc ( perf_mmap__mmap_len ( map ) ) ;
if ( ! map - > aio . data [ i ] ) {
pr_debug2 ( " failed to allocate data buffer area, error %m " ) ;
return - 1 ;
}
/*
* Use cblock . aio_fildes value different from - 1
* to denote started aio write operation on the
* cblock so it requires explicit record__aio_sync ( )
* call prior the cblock may be reused again .
*/
map - > aio . cblocks [ i ] . aio_fildes = - 1 ;
/*
* Allocate cblocks with priority delta to have
* faster aio write system calls because queued requests
* are kept in separate per - prio queues and adding
* a new request will iterate thru shorter per - prio
* list . Blocks with numbers higher than
* _SC_AIO_PRIO_DELTA_MAX go with priority 0.
*/
prio = delta_max - i ;
map - > aio . cblocks [ i ] . aio_reqprio = prio > = 0 ? prio : 0 ;
}
2018-11-06 12:03:35 +03:00
}
return 0 ;
}
static void perf_mmap__aio_munmap ( struct perf_mmap * map )
{
if ( map - > aio . data )
zfree ( & map - > aio . data ) ;
}
2018-11-06 12:04:58 +03:00
2018-11-06 12:07:19 +03:00
int perf_mmap__aio_push ( struct perf_mmap * md , void * to , int idx ,
2018-11-06 12:04:58 +03:00
int push ( void * to , struct aiocb * cblock , void * buf , size_t size , off_t off ) ,
off_t * off )
{
u64 head = perf_mmap__read_head ( md ) ;
unsigned char * data = md - > base + page_size ;
unsigned long size , size0 = 0 ;
void * buf ;
int rc = 0 ;
rc = perf_mmap__read_init ( md ) ;
if ( rc < 0 )
return ( rc = = - EAGAIN ) ? 0 : - 1 ;
/*
2018-11-06 12:07:19 +03:00
* md - > base data is copied into md - > data [ idx ] buffer to
2018-11-06 12:04:58 +03:00
* release space in the kernel buffer as fast as possible ,
* thru perf_mmap__consume ( ) below .
*
* That lets the kernel to proceed with storing more
* profiling data into the kernel buffer earlier than other
* per - cpu kernel buffers are handled .
*
* Coping can be done in two steps in case the chunk of
* profiling data crosses the upper bound of the kernel buffer .
* In this case we first move part of data from md - > start
* till the upper bound and then the reminder from the
* beginning of the kernel buffer till the end of
* the data chunk .
*/
size = md - > end - md - > start ;
if ( ( md - > start & md - > mask ) + size ! = ( md - > end & md - > mask ) ) {
buf = & data [ md - > start & md - > mask ] ;
size = md - > mask + 1 - ( md - > start & md - > mask ) ;
md - > start + = size ;
2018-11-06 12:07:19 +03:00
memcpy ( md - > aio . data [ idx ] , buf , size ) ;
2018-11-06 12:04:58 +03:00
size0 = size ;
}
buf = & data [ md - > start & md - > mask ] ;
size = md - > end - md - > start ;
md - > start + = size ;
2018-11-06 12:07:19 +03:00
memcpy ( md - > aio . data [ idx ] + size0 , buf , size ) ;
2018-11-06 12:04:58 +03:00
/*
2018-11-06 12:07:19 +03:00
* Increment md - > refcount to guard md - > data [ idx ] buffer
2018-11-06 12:04:58 +03:00
* from premature deallocation because md object can be
* released earlier than aio write request started
2018-11-06 12:07:19 +03:00
* on mmap - > data [ idx ] is complete .
2018-11-06 12:04:58 +03:00
*
* perf_mmap__put ( ) is done at record__aio_complete ( )
* after started request completion .
*/
perf_mmap__get ( md ) ;
md - > prev = head ;
perf_mmap__consume ( md ) ;
2018-11-06 12:07:19 +03:00
rc = push ( to , & md - > aio . cblocks [ idx ] , md - > aio . data [ idx ] , size0 + size , * off ) ;
2018-11-06 12:04:58 +03:00
if ( ! rc ) {
* off + = size0 + size ;
} else {
/*
* Decrement md - > refcount back if aio write
* operation failed to start .
*/
perf_mmap__put ( md ) ;
}
return rc ;
}
2018-11-06 12:03:35 +03:00
# else
static int perf_mmap__aio_mmap ( struct perf_mmap * map __maybe_unused ,
struct mmap_params * mp __maybe_unused )
{
return 0 ;
}
static void perf_mmap__aio_munmap ( struct perf_mmap * map __maybe_unused )
{
}
# endif
2017-10-06 10:31:47 -03:00
void perf_mmap__munmap ( struct perf_mmap * map )
{
2018-11-06 12:03:35 +03:00
perf_mmap__aio_munmap ( map ) ;
2017-10-06 10:31:47 -03:00
if ( map - > base ! = NULL ) {
munmap ( map - > base , perf_mmap__mmap_len ( map ) ) ;
map - > base = NULL ;
map - > fd = - 1 ;
refcount_set ( & map - > refcnt , 0 ) ;
}
auxtrace_mmap__munmap ( & map - > auxtrace_mmap ) ;
}
2018-08-17 13:45:55 +02:00
int perf_mmap__mmap ( struct perf_mmap * map , struct mmap_params * mp , int fd , int cpu )
2017-10-06 10:31:47 -03:00
{
/*
2018-03-01 18:09:11 -05:00
* The last one will be done at perf_mmap__consume ( ) , so that we
2017-10-06 10:31:47 -03:00
* make sure we don ' t prevent tools from consuming every last event in
* the ring buffer .
*
* I . e . we can get the POLLHUP meaning that the fd doesn ' t exist
* anymore , but the last events for it are still in the ring buffer ,
* waiting to be consumed .
*
* Tools can chose to ignore this at their own discretion , but the
* evlist layer can ' t just drop it when filtering events in
* perf_evlist__filter_pollfd ( ) .
*/
refcount_set ( & map - > refcnt , 2 ) ;
map - > prev = 0 ;
map - > mask = mp - > mask ;
map - > base = mmap ( NULL , perf_mmap__mmap_len ( map ) , mp - > prot ,
MAP_SHARED , fd , 0 ) ;
if ( map - > base = = MAP_FAILED ) {
pr_debug2 ( " failed to mmap perf event ring buffer, error %d \n " ,
errno ) ;
map - > base = NULL ;
return - 1 ;
}
map - > fd = fd ;
2018-08-17 13:45:55 +02:00
map - > cpu = cpu ;
2017-10-06 10:31:47 -03:00
if ( auxtrace_mmap__mmap ( & map - > auxtrace_mmap ,
& mp - > auxtrace_mp , map - > base , fd ) )
return - 1 ;
2018-11-06 12:03:35 +03:00
return perf_mmap__aio_mmap ( map , mp ) ;
2017-10-06 10:31:47 -03:00
}
2017-10-06 10:46:01 -03:00
2018-03-13 20:31:13 +08:00
static int overwrite_rb_find_range ( void * buf , int mask , u64 * start , u64 * end )
2017-10-06 10:46:01 -03:00
{
struct perf_event_header * pheader ;
2018-03-13 20:31:13 +08:00
u64 evt_head = * start ;
2017-10-06 10:46:01 -03:00
int size = mask + 1 ;
2018-03-13 20:31:13 +08:00
pr_debug2 ( " %s: buf=%p, start=% " PRIx64 " \n " , __func__ , buf , * start ) ;
pheader = ( struct perf_event_header * ) ( buf + ( * start & mask ) ) ;
2017-10-06 10:46:01 -03:00
while ( true ) {
2018-03-13 20:31:13 +08:00
if ( evt_head - * start > = ( unsigned int ) size ) {
2017-12-04 16:51:07 +00:00
pr_debug ( " Finished reading overwrite ring buffer: rewind \n " ) ;
2018-03-13 20:31:13 +08:00
if ( evt_head - * start > ( unsigned int ) size )
2017-10-06 10:46:01 -03:00
evt_head - = pheader - > size ;
* end = evt_head ;
return 0 ;
}
pheader = ( struct perf_event_header * ) ( buf + ( evt_head & mask ) ) ;
if ( pheader - > size = = 0 ) {
2017-12-04 16:51:07 +00:00
pr_debug ( " Finished reading overwrite ring buffer: get start \n " ) ;
2017-10-06 10:46:01 -03:00
* end = evt_head ;
return 0 ;
}
evt_head + = pheader - > size ;
pr_debug3 ( " move evt_head: % " PRIx64 " \n " , evt_head ) ;
}
WARN_ONCE ( 1 , " Shouldn't get here \n " ) ;
return - 1 ;
}
2018-01-18 13:26:19 -08:00
/*
* Report the start and end of the available data in ringbuffer
*/
2018-03-26 11:42:15 -03:00
static int __perf_mmap__read_init ( struct perf_mmap * md )
2017-10-06 10:46:01 -03:00
{
u64 head = perf_mmap__read_head ( md ) ;
u64 old = md - > prev ;
unsigned char * data = md - > base + page_size ;
unsigned long size ;
2018-03-06 10:36:01 -05:00
md - > start = md - > overwrite ? head : old ;
md - > end = md - > overwrite ? old : head ;
2017-10-06 10:46:01 -03:00
2018-03-06 10:36:01 -05:00
if ( md - > start = = md - > end )
2018-01-18 13:26:20 -08:00
return - EAGAIN ;
2017-10-06 10:46:01 -03:00
2018-03-06 10:36:01 -05:00
size = md - > end - md - > start ;
2017-10-06 10:46:01 -03:00
if ( size > ( unsigned long ) ( md - > mask ) + 1 ) {
2018-03-06 10:36:01 -05:00
if ( ! md - > overwrite ) {
2017-12-04 16:51:06 +00:00
WARN_ONCE ( 1 , " failed to keep up with mmap data. (warn only once) \n " ) ;
2017-10-06 10:46:01 -03:00
2017-12-04 16:51:06 +00:00
md - > prev = head ;
2018-03-06 10:36:05 -05:00
perf_mmap__consume ( md ) ;
2018-01-18 13:26:20 -08:00
return - EAGAIN ;
2017-12-04 16:51:06 +00:00
}
/*
* Backward ring buffer is full . We still have a chance to read
* most of data from it .
*/
2018-03-13 20:31:13 +08:00
if ( overwrite_rb_find_range ( data , md - > mask , & md - > start , & md - > end ) )
2018-01-18 13:26:20 -08:00
return - EINVAL ;
2017-10-06 10:46:01 -03:00
}
2018-01-18 13:26:20 -08:00
return 0 ;
2018-01-18 13:26:19 -08:00
}
2018-03-26 11:42:15 -03:00
int perf_mmap__read_init ( struct perf_mmap * map )
{
/*
* Check if event was unmapped due to a POLLHUP / POLLERR .
*/
if ( ! refcount_read ( & map - > refcnt ) )
return - ENOENT ;
return __perf_mmap__read_init ( map ) ;
}
2018-03-06 10:36:02 -05:00
int perf_mmap__push ( struct perf_mmap * md , void * to ,
2018-09-13 14:54:06 +02:00
int push ( struct perf_mmap * map , void * to , void * buf , size_t size ) )
2018-01-18 13:26:19 -08:00
{
u64 head = perf_mmap__read_head ( md ) ;
unsigned char * data = md - > base + page_size ;
unsigned long size ;
void * buf ;
int rc = 0 ;
2018-03-06 10:36:07 -05:00
rc = perf_mmap__read_init ( md ) ;
2018-01-18 13:26:20 -08:00
if ( rc < 0 )
return ( rc = = - EAGAIN ) ? 0 : - 1 ;
2018-01-18 13:26:19 -08:00
2018-03-06 10:36:02 -05:00
size = md - > end - md - > start ;
2018-01-18 13:26:17 -08:00
2018-03-06 10:36:02 -05:00
if ( ( md - > start & md - > mask ) + size ! = ( md - > end & md - > mask ) ) {
buf = & data [ md - > start & md - > mask ] ;
size = md - > mask + 1 - ( md - > start & md - > mask ) ;
md - > start + = size ;
2017-10-06 10:46:01 -03:00
2018-09-13 14:54:06 +02:00
if ( push ( md , to , buf , size ) < 0 ) {
2017-10-06 10:46:01 -03:00
rc = - 1 ;
goto out ;
}
}
2018-03-06 10:36:02 -05:00
buf = & data [ md - > start & md - > mask ] ;
size = md - > end - md - > start ;
md - > start + = size ;
2017-10-06 10:46:01 -03:00
2018-09-13 14:54:06 +02:00
if ( push ( md , to , buf , size ) < 0 ) {
2017-10-06 10:46:01 -03:00
rc = - 1 ;
goto out ;
}
md - > prev = head ;
2018-03-06 10:36:05 -05:00
perf_mmap__consume ( md ) ;
2017-10-06 10:46:01 -03:00
out :
return rc ;
}
2018-01-18 13:26:22 -08:00
/*
* Mandatory for overwrite mode
* The direction of overwrite mode is backward .
* The last perf_mmap__read ( ) will set tail to map - > prev .
* Need to correct the map - > prev to head which is the end of next read .
*/
void perf_mmap__read_done ( struct perf_mmap * map )
{
2018-03-26 09:42:09 -04:00
/*
* Check if event was unmapped due to a POLLHUP / POLLERR .
*/
if ( ! refcount_read ( & map - > refcnt ) )
return ;
2018-01-18 13:26:22 -08:00
map - > prev = perf_mmap__read_head ( map ) ;
}