From 7de211c88b77594f9a4146e59b37175f33f5a189 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Fri, 25 May 2012 23:53:16 +0200 Subject: [PATCH] MINOR: add a new function call tracer for debugging purposes This feature relies on GCC's ability to call helpers at function entry/exit points. We define these helpers to quickly dump the minimum info into a trace file that can be converted to a human readable format using a script in the contrib/trace directory. This has only been implemented in the GNU makefile for now on as it is unsure whether it's supported on all OSes. The feature is enabled by building with "TRACE=1". The performance impact is huge, so this feature should only be used when debugging. To limit the loss of performance, fprintf() has been disabled and the output is hand-crafted and emitted using fwrite(), resulting in doubling the performance. Using the TSC instead of gettimeofday() also doubles the performance. Around 1200 conns/s may be achieved on a Pentium-M 1.7 GHz which leads to around 50 MB/s of traces. The entry and exits of all functions will be dumped into a file designated by the HAPROXY_TRACE environment variable, or by default "trace.out". If the trace file name is empty or "/dev/null", then traces are disabled. If opening the trace file fails, then stderr is used. If HAPROXY_TRACE_FAST is used, then the time is taken from the global variable. Last, if HAPROXY_TRACE_TSC is used, then the machine's TSC is used instead of the real time (almost twice as fast). The output format is : or : where is '>' when entering a function and '<' when leaving. The awk script in contrib/trace provides a nicer indented output : 6f74989e6f8 ->->-> run_poll_loop > signal_process_queue [src/haproxy.c:1097:0x804bd69] > [include/proto/signal.h:32:0x8049cd0] 6f74989eb00 run_poll_loop < signal_process_queue [src/haproxy.c:1097:0x804bd69] < [include/proto/signal.h:32:0x8049cd0] 6f74989ef44 ->->-> run_poll_loop > wake_expired_tasks [src/haproxy.c:1100:0x804bd72] > [src/task.c:123:0x8055060] 6f74989f3a6 ->->->-> wake_expired_tasks > eb32_lookup_ge [src/task.c:128:0x8055091] > [ebtree/eb32tree.c:138:0x80a8c70] 6f74989f7e9 wake_expired_tasks < eb32_lookup_ge [src/task.c:128:0x8055091] < [ebtree/eb32tree.c:138:0x80a8c70] 6f74989fc0d ->->->-> wake_expired_tasks > eb32_first [src/task.c:134:0x80550d5] > [ebtree/eb32tree.h:55:0x8054ad0] 6f7498a003d ->->->->-> eb32_first > eb_first [ebtree/eb32tree.h:56:0x8054af1] > [ebtree/ebtree.h:520:0x8054a10] 6f7498a0436 ->->->->->-> eb_first > eb_walk_down [ebtree/ebtree.h:521:0x8054a33] > [ebtree/ebtree.h:442:0x80549a0] 6f7498a0843 ->->->->->->-> eb_walk_down > eb_gettag [ebtree/ebtree.h:445:0x80549d6] > [ebtree/ebtree.h:418:0x80548e0] 6f7498a0c2b eb_walk_down < eb_gettag [ebtree/ebtree.h:445:0x80549d6] < [ebtree/ebtree.h:418:0x80548e0] 6f7498a1042 ->->->->->->-> eb_walk_down > eb_untag [ebtree/ebtree.h:447:0x80549e2] > [ebtree/ebtree.h:412:0x80548a0] 6f7498a1498 eb_walk_down < eb_untag [ebtree/ebtree.h:447:0x80549e2] < [ebtree/ebtree.h:412:0x80548a0] 6f7498a18c6 ->->->->->->-> eb_walk_down > eb_root_to_node [ebtree/ebtree.h:448:0x80549e7] > [ebtree/ebtree.h:432:0x8054960] 6f7498a1cd4 eb_walk_down < eb_root_to_node [ebtree/ebtree.h:448:0x80549e7] < [ebtree/ebtree.h:432:0x8054960] 6f7498a20c4 eb_first < eb_walk_down [ebtree/ebtree.h:521:0x8054a33] < [ebtree/ebtree.h:442:0x80549a0] 6f7498a24b4 eb32_first < eb_first [ebtree/eb32tree.h:56:0x8054af1] < [ebtree/ebtree.h:520:0x8054a10] 6f7498a289c wake_expired_tasks < eb32_first [src/task.c:134:0x80550d5] < [ebtree/eb32tree.h:55:0x8054ad0] 6f7498a2c8c run_poll_loop < wake_expired_tasks [src/haproxy.c:1100:0x804bd72] < [src/task.c:123:0x8055060] 6f7498a3095 ->->-> run_poll_loop > process_runnable_tasks [src/haproxy.c:1103:0x804bd7a] > [src/task.c:190:0x8055150] A nice improvement would possibly consist in trying to get the function's arguments in the stack and to dump a few more infor for some well-known functions (eg: the session's status for process_session). --- Makefile | 18 +++ contrib/trace/trace.awk | 70 ++++++++++++ src/trace.c | 247 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 335 insertions(+) create mode 100755 contrib/trace/trace.awk create mode 100644 src/trace.c diff --git a/Makefile b/Makefile index a6c796241..c70dc94cb 100644 --- a/Makefile +++ b/Makefile @@ -122,6 +122,11 @@ SMALL_OPTS = # use at all. DEBUG = +#### Trace options +# Use TRACE=1 to trace function calls to file "trace.out" or to stderr if not +# possible. +TRACE = + #### Additional include and library dirs # Redefine this if you want to add some special PATH to include/libs ADDINC = @@ -478,6 +483,12 @@ ifneq ($(VERDATE),) COPTS += -DCONFIG_HAPROXY_DATE=\"$(VERDATE)\" endif +ifneq ($(TRACE),) +# if tracing is enabled, we want it to be as fast as possible +TRACE_COPTS := $(filter-out -O0 -O1 -O2 -pg -finstrument-functions,$(COPTS)) -O3 -fomit-frame-pointer +COPTS += -finstrument-functions +endif + #### Global link options # These options are added at the end of the "ld" command line. Use LDFLAGS to # add options at the beginning of the "ld" command line if needed. @@ -523,6 +534,10 @@ EBTREE_OBJS = $(EBTREE_DIR)/ebtree.o \ $(EBTREE_DIR)/ebmbtree.o $(EBTREE_DIR)/ebsttree.o \ $(EBTREE_DIR)/ebimtree.o $(EBTREE_DIR)/ebistree.o +ifneq ($(TRACE),) +OBJS += src/trace.o +endif + # Not used right now LIB_EBTREE = $(EBTREE_DIR)/libebtree.a @@ -538,6 +553,9 @@ objsize: haproxy %.o: %.c $(CC) $(COPTS) -c -o $@ $< +src/trace.o: src/trace.c + $(CC) $(TRACE_COPTS) -c -o $@ $< + src/haproxy.o: src/haproxy.c $(CC) $(COPTS) \ -DBUILD_TARGET='"$(strip $(TARGET))"' \ diff --git a/contrib/trace/trace.awk b/contrib/trace/trace.awk new file mode 100755 index 000000000..8c8854a90 --- /dev/null +++ b/contrib/trace/trace.awk @@ -0,0 +1,70 @@ +#!/bin/sh +# +# trace.awk - Fast trace symbol resolver - w@1wt.eu - 2012/05/25 +# +# Principle: this program launches reads pointers from a trace file and if not +# found in its cache, it passes them over a pipe to addr2line which is forked +# in a coprocess, then stores the result in the cache. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version +# 2 of the License, or (at your option) any later version. +# +# usage: $0 exec_file < trace.out +# + +if [ $# -lt 1 ]; then + echo "Usage: ${0##*/} exec_file < trace.out" + echo "Example: ${0##*/} ./haproxy < trace.out" + echo "Example: HAPROXY_TRACE=/dev/stdout ./haproxy -f cfg | ${0##*/} ./haproxy" + exit 1 +fi + +if [ ! -s "$1" ]; then + echo "$1 is not a valid executable file" + exit 1 +fi + +exec awk -v prog="$1" \ +' +BEGIN { + if (cmd == "") + cmd=ENVIRON["ADDR2LINE"]; + if (cmd == "") + cmd="addr2line"; + + if (prog == "") + prog=ENVIRON["PROG"]; + + cmd=cmd " -f -e " prog; + + for (i = 1; i < 100; i++) { + indents[">",i] = indents[">",i-1] "->" + indents[">",i-1] = indents[">",i-1] " " + indents["<",i] = indents["<",i-1] " " + indents["<",i-1] = indents["<",i-1] " " + } +} + +function getptr(ptr) +{ + loc=locs[ptr]; + name=names[ptr]; + if (loc == "" || name == "") { + print ptr |& cmd; + cmd |& getline name; + cmd |& getline loc; + names[ptr]=name + locs[ptr]=loc + } +} + +{ + # input format: + getptr($3); caller_loc=loc; caller_name=name + getptr($5); callee_loc=loc; callee_name=name + printf "%s %s %s %s %s [%s:%s] %s [%s:%s]\n", + $1, indents[$4,$2], caller_name, $4, callee_name, caller_loc, $3, $4, callee_loc, $5 +} +' diff --git a/src/trace.c b/src/trace.c new file mode 100644 index 000000000..ab3827d31 --- /dev/null +++ b/src/trace.c @@ -0,0 +1,247 @@ +/* + * Function call tracing for gcc >= 2.95 + * + * Copyright 2012 Willy Tarreau + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * gcc is able to call a specific function when entering and leaving any + * function when compiled with -finstrument-functions. This code must not + * be built with this argument. The performance impact is huge, so this + * feature should only be used when debugging. + * + * The entry and exits of all functions will be dumped into a file designated + * by the HAPROXY_TRACE environment variable, or by default "trace.out". If the + * trace file name is empty or "/dev/null", then traces are disabled. If + * opening the trace file fails, then stderr is used. If HAPROXY_TRACE_FAST is + * used, then the time is taken from the global variable. Last, if + * HAPROXY_TRACE_TSC is used, then the machine's TSC is used instead of the + * real time (almost twice as fast). + * + * The output format is : + * + * + * or : + * + * + * where is '>' when entering a function and '<' when leaving. + * + * The article below is a nice explanation of how this works : + * http://balau82.wordpress.com/2010/10/06/trace-and-profile-function-calls-with-gcc/ + */ + +#include +#include +#include +#include +#include +#include + +static FILE *log; +static int level; +static int disabled; +static int fast_time; +static int use_tsc; +static struct timeval trace_now; +static struct timeval *now_ptr; +static char line[128]; /* more than enough for a message (9+1+6+1+3+1+18+1+1+18+1+1) */ + +static int open_trace() +{ + const char *output = getenv("HAPROXY_TRACE"); + + if (!output) + output = "trace.out"; + + if (!*output || strcmp(output, "/dev/null") == 0) { + disabled = 1; + return 0; + } + + log = fopen(output, "w"); + if (!log) + log = stderr; + + now_ptr = &now; + if (getenv("HAPROXY_TRACE_FAST") != NULL) { + fast_time = 1; + now_ptr = &trace_now; + } + if (getenv("HAPROXY_TRACE_TSC") != NULL) { + fast_time = 1; + use_tsc = 1; + } + return 1; +} + +/* This function first divides the number by 100M then iteratively multiplies it + * by 100 (using adds and shifts). The trick is that dividing by 100M is equivalent + * to multiplying by 1/100M, which approximates to 1441151881/2^57. All local + * variables fit in registers on x86. This version outputs two digits per round. + * indicates the minimum number of pairs of digits that have to be + * emitted, which might be left-padded with zeroes. + * It returns the pointer to the ending '\0'. + */ +static char *ultoad2(unsigned int x, char *out, int min_pairs) +{ + unsigned int q; + char *p = out; + int pos = 4; + unsigned long long y; + + static const unsigned short bcd[100] = { + 0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930, + 0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931, + 0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932, + 0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933, + 0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934, + 0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935, + 0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936, + 0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937, + 0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938, + 0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939 }; + + y = x * 1441151881ULL; /* y>>57 will be the integer part of x/100M */ + while (1) { + q = y >> 57; + /* Q is composed of the first digit in the lower byte and the second + * digit in the higher byte. + */ + if (p != out || q > 9 || pos < min_pairs) { +#if defined(__i386__) || defined(__x86_64__) + /* unaligned accesses are fast on x86 */ + *(unsigned short *)p = bcd[q]; + p += 2; +#else + *(p++) = bcd[q]; + *(p++) = bcd[q] >> 8; +#endif + } + else if (q || !pos) { + /* only at most one digit */ + *(p++) = bcd[q] >> 8; + } + if (--pos < 0) + break; + + y &= 0x1FFFFFFFFFFFFFFULL; // remainder + + if (sizeof(long) >= sizeof(long long)) { + /* shifting is preferred on 64-bit archs, while mult is faster on 32-bit. + * We multiply by 100 by doing *5, *5 and *4, all of which are trivial. + */ + y += (y << 2); + y += (y << 2); + y <<= 2; + } + else + y *= 100; + } + + *p = '\0'; + return p; +} + +/* Send as hex into . Returns the pointer to the ending '\0'. */ +static char *emit_hex(unsigned long h, char *out) +{ + static unsigned char hextab[16] = "0123456789abcdef"; + int shift = sizeof(h) * 8 - 4; + unsigned int idx; + + do { + idx = (h >> shift); + if (idx || !shift) + *out++ = hextab[idx & 15]; + shift -= 4; + } while (shift >= 0); + *out = '\0'; + return out; +} + +#if defined(__i386__) || defined(__x86_64__) +static inline unsigned long long rdtsc() +{ + unsigned int a, d; + asm volatile("rdtsc" : "=a" (a), "=d" (d)); + return a + ((unsigned long long)d << 32); +} +#else +static inline unsigned long long rdtsc() +{ + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec * 1000000 + tv.tv_usec; +} +#endif + +static void make_line(void *from, void *to, int level, char dir) +{ + char *p = line; + + if (unlikely(!log) && !open_trace()) + return; + + if (unlikely(!fast_time)) + gettimeofday(now_ptr, NULL); + +#ifdef USE_SLOW_FPRINTF + if (!use_tsc) + fprintf(log, "%u.%06u %d %p %c %p\n", + (unsigned int)now_ptr->tv_sec, + (unsigned int)now_ptr->tv_usec, + level, from, dir, to); + else + fprintf(log, "%llx %d %p %c %p\n", + rdtsc(), level, from, dir, to); + return; +#endif + + if (unlikely(!use_tsc)) { + /* "%u.06u", tv_sec, tv_usec */ + p = ultoad2(now_ptr->tv_sec, p, 0); + *p++ = '.'; + p = ultoad2(now_ptr->tv_usec, p, 3); + } else { + /* "%08x%08x", high, low */ + unsigned long long t = rdtsc(); + if (sizeof(long) < sizeof(long long)) + p = emit_hex((unsigned long)(t >> 32U), p); + p = emit_hex((unsigned long)(t), p); + } + + /* " %u", level */ + *p++ = ' '; + p = ultoad2(level, p, 0); + + /* " %p", from */ + *p++ = ' '; *p++ = '0'; *p++ = 'x'; + p = emit_hex((unsigned long)from, p); + + /* " %c", dir */ + *p++ = ' '; *p++ = dir; + + /* " %p", to */ + *p++ = ' '; *p++ = '0'; *p++ = 'x'; + p = emit_hex((unsigned long)to, p); + + *p++ = '\n'; + + fwrite(line, p - line, 1, log); +} + +/* These are the functions GCC calls */ +void __cyg_profile_func_enter(void *to, void *from) +{ + if (!disabled) + return make_line(from, to, ++level, '>'); +} + +void __cyg_profile_func_exit(void *to, void *from) +{ + if (!disabled) + return make_line(from, to, level--, '<'); +}