tracing: Add lock-free tracing_map
Add tracing_map, a special-purpose lock-free map for tracing. tracing_map is designed to aggregate or 'sum' one or more values associated with a specific object of type tracing_map_elt, which is associated by the map to a given key. It provides various hooks allowing per-tracer customization and is separated out into a separate file in order to allow it to be shared between multiple tracers, but isn't meant to be generally used outside of that context. The tracing_map implementation was inspired by lock-free map algorithms originated by Dr. Cliff Click: http://www.azulsystems.com/blog/cliff/2007-03-26-non-blocking-hashtable http://www.azulsystems.com/events/javaone_2007/2007_LockFreeHash.pdf Link: http://lkml.kernel.org/r/b43d68d1add33582a396f553c8ef705a33a6a748.1449767187.git.tom.zanussi@linux.intel.com Signed-off-by: Tom Zanussi <tom.zanussi@linux.intel.com> Tested-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com> Reviewed-by: Namhyung Kim <namhyung@kernel.org> Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
This commit is contained in:
		
				
					committed by
					
						 Steven Rostedt
						Steven Rostedt
					
				
			
			
				
	
			
			
			
						parent
						
							540b589e63
						
					
				
				
					commit
					08d43a5fa0
				
			| @@ -528,6 +528,19 @@ config MMIOTRACE | ||||
| 	  See Documentation/trace/mmiotrace.txt. | ||||
| 	  If you are not helping to develop drivers, say N. | ||||
|  | ||||
| config TRACING_MAP | ||||
| 	bool | ||||
| 	depends on ARCH_HAVE_NMI_SAFE_CMPXCHG | ||||
| 	default n | ||||
| 	help | ||||
| 	  tracing_map is a special-purpose lock-free map for tracing, | ||||
| 	  separated out as a stand-alone facility in order to allow it | ||||
| 	  to be shared between multiple tracers.  It isn't meant to be | ||||
| 	  generally used outside of that context, and is normally | ||||
| 	  selected by tracers that use it. | ||||
|  | ||||
| 	  If in doubt, say N. | ||||
|  | ||||
| config MMIOTRACE_TEST | ||||
| 	tristate "Test module for mmiotrace" | ||||
| 	depends on MMIOTRACE && m | ||||
|   | ||||
| @@ -31,6 +31,7 @@ obj-$(CONFIG_TRACING) += trace_output.o | ||||
| obj-$(CONFIG_TRACING) += trace_seq.o | ||||
| obj-$(CONFIG_TRACING) += trace_stat.o | ||||
| obj-$(CONFIG_TRACING) += trace_printk.o | ||||
| obj-$(CONFIG_TRACING_MAP) += tracing_map.o | ||||
| obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o | ||||
| obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o | ||||
| obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o | ||||
|   | ||||
							
								
								
									
										1058
									
								
								kernel/trace/tracing_map.c
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1058
									
								
								kernel/trace/tracing_map.c
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										282
									
								
								kernel/trace/tracing_map.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										282
									
								
								kernel/trace/tracing_map.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,282 @@ | ||||
| #ifndef __TRACING_MAP_H | ||||
| #define __TRACING_MAP_H | ||||
|  | ||||
| #define TRACING_MAP_BITS_DEFAULT	11 | ||||
| #define TRACING_MAP_BITS_MAX		17 | ||||
| #define TRACING_MAP_BITS_MIN		7 | ||||
|  | ||||
| #define TRACING_MAP_FIELDS_MAX		4 | ||||
| #define TRACING_MAP_KEYS_MAX		2 | ||||
|  | ||||
| #define TRACING_MAP_SORT_KEYS_MAX	2 | ||||
|  | ||||
| typedef int (*tracing_map_cmp_fn_t) (void *val_a, void *val_b); | ||||
|  | ||||
| /* | ||||
|  * This is an overview of the tracing_map data structures and how they | ||||
|  * relate to the tracing_map API.  The details of the algorithms | ||||
|  * aren't discussed here - this is just a general overview of the data | ||||
|  * structures and how they interact with the API. | ||||
|  * | ||||
|  * The central data structure of the tracing_map is an initially | ||||
|  * zeroed array of struct tracing_map_entry (stored in the map field | ||||
|  * of struct tracing_map).  tracing_map_entry is a very simple data | ||||
|  * structure containing only two fields: a 32-bit unsigned 'key' | ||||
|  * variable and a pointer named 'val'.  This array of struct | ||||
|  * tracing_map_entry is essentially a hash table which will be | ||||
|  * modified by a single function, tracing_map_insert(), but which can | ||||
|  * be traversed and read by a user at any time (though the user does | ||||
|  * this indirectly via an array of tracing_map_sort_entry - see the | ||||
|  * explanation of that data structure in the discussion of the | ||||
|  * sorting-related data structures below). | ||||
|  * | ||||
|  * The central function of the tracing_map API is | ||||
|  * tracing_map_insert().  tracing_map_insert() hashes the | ||||
|  * arbitrarily-sized key passed into it into a 32-bit unsigned key. | ||||
|  * It then uses this key, truncated to the array size, as an index | ||||
|  * into the array of tracing_map_entries.  If the value of the 'key' | ||||
|  * field of the tracing_map_entry found at that location is 0, then | ||||
|  * that entry is considered to be free and can be claimed, by | ||||
|  * replacing the 0 in the 'key' field of the tracing_map_entry with | ||||
|  * the new 32-bit hashed key.  Once claimed, that tracing_map_entry's | ||||
|  * 'val' field is then used to store a unique element which will be | ||||
|  * forever associated with that 32-bit hashed key in the | ||||
|  * tracing_map_entry. | ||||
|  * | ||||
|  * That unique element now in the tracing_map_entry's 'val' field is | ||||
|  * an instance of tracing_map_elt, where 'elt' in the latter part of | ||||
|  * that variable name is short for 'element'.  The purpose of a | ||||
|  * tracing_map_elt is to hold values specific to the the particular | ||||
|  * 32-bit hashed key it's assocated with.  Things such as the unique | ||||
|  * set of aggregated sums associated with the 32-bit hashed key, along | ||||
|  * with a copy of the full key associated with the entry, and which | ||||
|  * was used to produce the 32-bit hashed key. | ||||
|  * | ||||
|  * When tracing_map_create() is called to create the tracing map, the | ||||
|  * user specifies (indirectly via the map_bits param, the details are | ||||
|  * unimportant for this discussion) the maximum number of elements | ||||
|  * that the map can hold (stored in the max_elts field of struct | ||||
|  * tracing_map).  This is the maximum possible number of | ||||
|  * tracing_map_entries in the tracing_map_entry array which can be | ||||
|  * 'claimed' as described in the above discussion, and therefore is | ||||
|  * also the maximum number of tracing_map_elts that can be associated | ||||
|  * with the tracing_map_entry array in the tracing_map.  Because of | ||||
|  * the way the insertion algorithm works, the size of the allocated | ||||
|  * tracing_map_entry array is always twice the maximum number of | ||||
|  * elements (2 * max_elts).  This value is stored in the map_size | ||||
|  * field of struct tracing_map. | ||||
|  * | ||||
|  * Because tracing_map_insert() needs to work from any context, | ||||
|  * including from within the memory allocation functions themselves, | ||||
|  * both the tracing_map_entry array and a pool of max_elts | ||||
|  * tracing_map_elts are pre-allocated before any call is made to | ||||
|  * tracing_map_insert(). | ||||
|  * | ||||
|  * The tracing_map_entry array is allocated as a single block by | ||||
|  * tracing_map_create(). | ||||
|  * | ||||
|  * Because the tracing_map_elts are much larger objects and can't | ||||
|  * generally be allocated together as a single large array without | ||||
|  * failure, they're allocated individually, by tracing_map_init(). | ||||
|  * | ||||
|  * The pool of tracing_map_elts are allocated by tracing_map_init() | ||||
|  * rather than by tracing_map_create() because at the time | ||||
|  * tracing_map_create() is called, there isn't enough information to | ||||
|  * create the tracing_map_elts.  Specifically,the user first needs to | ||||
|  * tell the tracing_map implementation how many fields the | ||||
|  * tracing_map_elts contain, and which types of fields they are (key | ||||
|  * or sum).  The user does this via the tracing_map_add_sum_field() | ||||
|  * and tracing_map_add_key_field() functions, following which the user | ||||
|  * calls tracing_map_init() to finish up the tracing map setup.  The | ||||
|  * array holding the pointers which make up the pre-allocated pool of | ||||
|  * tracing_map_elts is allocated as a single block and is stored in | ||||
|  * the elts field of struct tracing_map. | ||||
|  * | ||||
|  * There is also a set of structures used for sorting that might | ||||
|  * benefit from some minimal explanation. | ||||
|  * | ||||
|  * struct tracing_map_sort_key is used to drive the sort at any given | ||||
|  * time.  By 'any given time' we mean that a different | ||||
|  * tracing_map_sort_key will be used at different times depending on | ||||
|  * whether the sort currently being performed is a primary or a | ||||
|  * secondary sort. | ||||
|  * | ||||
|  * The sort key is very simple, consisting of the field index of the | ||||
|  * tracing_map_elt field to sort on (which the user saved when adding | ||||
|  * the field), and whether the sort should be done in an ascending or | ||||
|  * descending order. | ||||
|  * | ||||
|  * For the convenience of the sorting code, a tracing_map_sort_entry | ||||
|  * is created for each tracing_map_elt, again individually allocated | ||||
|  * to avoid failures that might be expected if allocated as a single | ||||
|  * large array of struct tracing_map_sort_entry. | ||||
|  * tracing_map_sort_entry instances are the objects expected by the | ||||
|  * various internal sorting functions, and are also what the user | ||||
|  * ultimately receives after calling tracing_map_sort_entries(). | ||||
|  * Because it doesn't make sense for users to access an unordered and | ||||
|  * sparsely populated tracing_map directly, the | ||||
|  * tracing_map_sort_entries() function is provided so that users can | ||||
|  * retrieve a sorted list of all existing elements.  In addition to | ||||
|  * the associated tracing_map_elt 'elt' field contained within the | ||||
|  * tracing_map_sort_entry, which is the object of interest to the | ||||
|  * user, tracing_map_sort_entry objects contain a number of additional | ||||
|  * fields which are used for caching and internal purposes and can | ||||
|  * safely be ignored. | ||||
| */ | ||||
|  | ||||
| struct tracing_map_field { | ||||
| 	tracing_map_cmp_fn_t		cmp_fn; | ||||
| 	union { | ||||
| 		atomic64_t			sum; | ||||
| 		unsigned int			offset; | ||||
| 	}; | ||||
| }; | ||||
|  | ||||
| struct tracing_map_elt { | ||||
| 	struct tracing_map		*map; | ||||
| 	struct tracing_map_field	*fields; | ||||
| 	void				*key; | ||||
| 	void				*private_data; | ||||
| }; | ||||
|  | ||||
| struct tracing_map_entry { | ||||
| 	u32				key; | ||||
| 	struct tracing_map_elt		*val; | ||||
| }; | ||||
|  | ||||
| struct tracing_map_sort_key { | ||||
| 	unsigned int			field_idx; | ||||
| 	bool				descending; | ||||
| }; | ||||
|  | ||||
| struct tracing_map_sort_entry { | ||||
| 	void				*key; | ||||
| 	struct tracing_map_elt		*elt; | ||||
| 	bool				elt_copied; | ||||
| 	bool				dup; | ||||
| }; | ||||
|  | ||||
| struct tracing_map_array { | ||||
| 	unsigned int entries_per_page; | ||||
| 	unsigned int entry_size_shift; | ||||
| 	unsigned int entry_shift; | ||||
| 	unsigned int entry_mask; | ||||
| 	unsigned int n_pages; | ||||
| 	void **pages; | ||||
| }; | ||||
|  | ||||
| #define TRACING_MAP_ARRAY_ELT(array, idx)				\ | ||||
| 	(array->pages[idx >> array->entry_shift] +			\ | ||||
| 	 ((idx & array->entry_mask) << array->entry_size_shift)) | ||||
|  | ||||
| #define TRACING_MAP_ENTRY(array, idx)					\ | ||||
| 	((struct tracing_map_entry *)TRACING_MAP_ARRAY_ELT(array, idx)) | ||||
|  | ||||
| #define TRACING_MAP_ELT(array, idx)					\ | ||||
| 	((struct tracing_map_elt **)TRACING_MAP_ARRAY_ELT(array, idx)) | ||||
|  | ||||
| struct tracing_map { | ||||
| 	unsigned int			key_size; | ||||
| 	unsigned int			map_bits; | ||||
| 	unsigned int			map_size; | ||||
| 	unsigned int			max_elts; | ||||
| 	atomic_t			next_elt; | ||||
| 	struct tracing_map_array	*elts; | ||||
| 	struct tracing_map_array	*map; | ||||
| 	const struct tracing_map_ops	*ops; | ||||
| 	void				*private_data; | ||||
| 	struct tracing_map_field	fields[TRACING_MAP_FIELDS_MAX]; | ||||
| 	unsigned int			n_fields; | ||||
| 	int				key_idx[TRACING_MAP_KEYS_MAX]; | ||||
| 	unsigned int			n_keys; | ||||
| 	struct tracing_map_sort_key	sort_key; | ||||
| 	atomic64_t			hits; | ||||
| 	atomic64_t			drops; | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * struct tracing_map_ops - callbacks for tracing_map | ||||
|  * | ||||
|  * The methods in this structure define callback functions for various | ||||
|  * operations on a tracing_map or objects related to a tracing_map. | ||||
|  * | ||||
|  * For a detailed description of tracing_map_elt objects please see | ||||
|  * the overview of tracing_map data structures at the beginning of | ||||
|  * this file. | ||||
|  * | ||||
|  * All the methods below are optional. | ||||
|  * | ||||
|  * @elt_alloc: When a tracing_map_elt is allocated, this function, if | ||||
|  *	defined, will be called and gives clients the opportunity to | ||||
|  *	allocate additional data and attach it to the element | ||||
|  *	(tracing_map_elt->private_data is meant for that purpose). | ||||
|  *	Element allocation occurs before tracing begins, when the | ||||
|  *	tracing_map_init() call is made by client code. | ||||
|  * | ||||
|  * @elt_copy: At certain points in the lifetime of an element, it may | ||||
|  *	need to be copied.  The copy should include a copy of the | ||||
|  *	client-allocated data, which can be copied into the 'to' | ||||
|  *	element from the 'from' element. | ||||
|  * | ||||
|  * @elt_free: When a tracing_map_elt is freed, this function is called | ||||
|  *	and allows client-allocated per-element data to be freed. | ||||
|  * | ||||
|  * @elt_clear: This callback allows per-element client-defined data to | ||||
|  *	be cleared, if applicable. | ||||
|  * | ||||
|  * @elt_init: This callback allows per-element client-defined data to | ||||
|  *	be initialized when used i.e. when the element is actually | ||||
|  *	claimed by tracing_map_insert() in the context of the map | ||||
|  *	insertion. | ||||
|  */ | ||||
| struct tracing_map_ops { | ||||
| 	int			(*elt_alloc)(struct tracing_map_elt *elt); | ||||
| 	void			(*elt_copy)(struct tracing_map_elt *to, | ||||
| 					    struct tracing_map_elt *from); | ||||
| 	void			(*elt_free)(struct tracing_map_elt *elt); | ||||
| 	void			(*elt_clear)(struct tracing_map_elt *elt); | ||||
| 	void			(*elt_init)(struct tracing_map_elt *elt); | ||||
| }; | ||||
|  | ||||
| extern struct tracing_map * | ||||
| tracing_map_create(unsigned int map_bits, | ||||
| 		   unsigned int key_size, | ||||
| 		   const struct tracing_map_ops *ops, | ||||
| 		   void *private_data); | ||||
| extern int tracing_map_init(struct tracing_map *map); | ||||
|  | ||||
| extern int tracing_map_add_sum_field(struct tracing_map *map); | ||||
| extern int tracing_map_add_key_field(struct tracing_map *map, | ||||
| 				     unsigned int offset, | ||||
| 				     tracing_map_cmp_fn_t cmp_fn); | ||||
|  | ||||
| extern void tracing_map_destroy(struct tracing_map *map); | ||||
| extern void tracing_map_clear(struct tracing_map *map); | ||||
|  | ||||
| extern struct tracing_map_elt * | ||||
| tracing_map_insert(struct tracing_map *map, void *key); | ||||
| extern struct tracing_map_elt * | ||||
| tracing_map_lookup(struct tracing_map *map, void *key); | ||||
|  | ||||
| extern tracing_map_cmp_fn_t tracing_map_cmp_num(int field_size, | ||||
| 						int field_is_signed); | ||||
| extern int tracing_map_cmp_string(void *val_a, void *val_b); | ||||
| extern int tracing_map_cmp_none(void *val_a, void *val_b); | ||||
|  | ||||
| extern void tracing_map_update_sum(struct tracing_map_elt *elt, | ||||
| 				   unsigned int i, u64 n); | ||||
| extern u64 tracing_map_read_sum(struct tracing_map_elt *elt, unsigned int i); | ||||
| extern void tracing_map_set_field_descr(struct tracing_map *map, | ||||
| 					unsigned int i, | ||||
| 					unsigned int key_offset, | ||||
| 					tracing_map_cmp_fn_t cmp_fn); | ||||
| extern int | ||||
| tracing_map_sort_entries(struct tracing_map *map, | ||||
| 			 struct tracing_map_sort_key *sort_keys, | ||||
| 			 unsigned int n_sort_keys, | ||||
| 			 struct tracing_map_sort_entry ***sort_entries); | ||||
|  | ||||
| extern void | ||||
| tracing_map_destroy_sort_entries(struct tracing_map_sort_entry **entries, | ||||
| 				 unsigned int n_entries); | ||||
| #endif /* __TRACING_MAP_H */ | ||||
		Reference in New Issue
	
	Block a user