mirror of
https://github.com/samba-team/samba.git
synced 2025-02-02 09:47:23 +03:00
common: Debug ctdb_addr_to_str() using new function ctdb_external_trace()
We've seen this function report "Unknown family, 0" and then CTDB disappeared without a trace. If we can reproduce it then this might help us to debug it. The idea is that you do something like the following in /etc/sysconfig/ctdb: export CTDB_EXTERNAL_TRACE="/etc/ctdb/config/gcore_trace.sh" When we hit this error than we call out to gcore to get a core file so we can do forensics. This might block CTDB for a few seconds. Signed-off-by: Martin Schwenke <martin@meltin.net> (This used to be ctdb commit 7895bc003f087ab2f3181df3c464386f59bfcc39)
This commit is contained in:
parent
6372592982
commit
8d7562f3f8
@ -359,6 +359,7 @@ install: all $(PMDA_INSTALL)
|
||||
if [ ! -f $(DESTDIR)$(etcdir)/ctdb/notify.sh ];then ${INSTALLCMD} -m 755 config/notify.sh $(DESTDIR)$(etcdir)/ctdb; fi
|
||||
${INSTALLCMD} -m 755 config/debug-hung-script.sh $(DESTDIR)$(etcdir)/ctdb
|
||||
if [ ! -f $(DESTDIR)$(etcdir)/ctdb/ctdb-crash-cleanup.sh ];then ${INSTALLCMD} -m 755 config/ctdb-crash-cleanup.sh $(DESTDIR)$(etcdir)/ctdb; fi
|
||||
if [ ! -f $(DESTDIR)$(etcdir)/ctdb/gcore_trace.sh ];then ${INSTALLCMD} -m 755 config/gcore_trace.sh $(DESTDIR)$(etcdir)/ctdb; fi
|
||||
|
||||
install_pmda:
|
||||
$(INSTALLCMD) -m 755 -d $(PMDA_DEST_DIR)
|
||||
|
@ -59,6 +59,30 @@ void ctdb_fatal(struct ctdb_context *ctdb, const char *msg)
|
||||
abort();
|
||||
}
|
||||
|
||||
/* Invoke an external program to do some sort of tracing on the CTDB
|
||||
* process. This might block for a little while. The external
|
||||
* program is specified by the environment variable
|
||||
* CTDB_EXTERNAL_TRACE. This program should take one argument: the
|
||||
* pid of the process to trace. Commonly, the program would be a
|
||||
* wrapper script around gcore.
|
||||
*/
|
||||
void ctdb_external_trace(void)
|
||||
{
|
||||
|
||||
const char * t = getenv("CTDB_EXTERNAL_TRACE");
|
||||
char * cmd;
|
||||
|
||||
if (t == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
cmd = talloc_asprintf(NULL, "%s %lu", t, (unsigned long) getpid());
|
||||
DEBUG(DEBUG_WARNING,("begin external trace: %s\n", cmd));
|
||||
system(cmd);
|
||||
DEBUG(DEBUG_WARNING,("end external trace: %s\n", cmd));
|
||||
talloc_free(cmd);
|
||||
}
|
||||
|
||||
/*
|
||||
parse a IP:port pair
|
||||
*/
|
||||
@ -555,6 +579,7 @@ char *ctdb_addr_to_str(ctdb_sock_addr *addr)
|
||||
break;
|
||||
default:
|
||||
DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family %u\n", addr->sa.sa_family));
|
||||
ctdb_external_trace();
|
||||
}
|
||||
|
||||
return cip;
|
||||
|
3
ctdb/config/gcore_trace.sh
Executable file
3
ctdb/config/gcore_trace.sh
Executable file
@ -0,0 +1,3 @@
|
||||
#!/bin/sh
|
||||
|
||||
gcore -o "/var/log/core" "$1" 2>&1 | logger -t "ctdb:gcore_trace"
|
@ -666,6 +666,7 @@ struct ctdb_fetch_handle {
|
||||
/* internal prototypes */
|
||||
void ctdb_set_error(struct ctdb_context *ctdb, const char *fmt, ...) PRINTF_ATTRIBUTE(2,3);
|
||||
void ctdb_fatal(struct ctdb_context *ctdb, const char *msg);
|
||||
void ctdb_external_trace(void);
|
||||
bool ctdb_same_address(struct ctdb_address *a1, struct ctdb_address *a2);
|
||||
int ctdb_parse_address(struct ctdb_context *ctdb,
|
||||
TALLOC_CTX *mem_ctx, const char *str,
|
||||
|
@ -123,6 +123,7 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%config(noreplace) %{_sysconfdir}/ctdb/notify.sh
|
||||
%config(noreplace) %{_sysconfdir}/ctdb/debug-hung-script.sh
|
||||
%config(noreplace) %{_sysconfdir}/ctdb/ctdb-crash-cleanup.sh
|
||||
%config(noreplace) %{_sysconfdir}/ctdb/gcore_trace.sh
|
||||
%config(noreplace) %{_sysconfdir}/ctdb/functions
|
||||
%attr(755,root,root) %{initdir}/ctdb
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user