From c488ba440ad33a89dc2b1d7b3e5146ee53150847 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Wed, 16 Dec 2009 19:26:22 +1030 Subject: [PATCH] Remove RT priority, use niceness. 1) It's buggy. Code needs to be carefully written (ie. no busy loops) to handle running with it, and we fork and run scripts.[1] 2) It makes debugging harder. If ctdbd loops (as has happened recently) it can be extremely hard to get in and see what's happening. We've already seen the valgrind hacks. 3) We have seen recent scheduler problems. Perhaps they are unrelated, but removing this very unusual setup is unlikely to hurt. 4) It doesn't make anything faster. Under all but the most perverse of circumstances, 99% of the cpu gives the same performance as 100%, and we will always preempt normal processes anyway. [1] I made this worse in 0fafdcb8d353 "eventscript: fork() a child for each script" by removing the switch_from_server_to_client() which restored it, but even that was only for monitor scripts. Others were run with RT priority. Signed-off-by: Rusty Russell (This used to be ctdb commit 482c302d46e2162d0cf552f8456bc49573ae729d) --- ctdb/client/ctdb_client.c | 5 ---- ctdb/common/ctdb_util.c | 49 ++++++------------------------------- ctdb/include/ctdb_private.h | 5 +--- ctdb/server/ctdb_daemon.c | 5 +--- ctdb/server/ctdbd.c | 3 +-- 5 files changed, 11 insertions(+), 56 deletions(-) diff --git a/ctdb/client/ctdb_client.c b/ctdb/client/ctdb_client.c index 75bc0a45fb6..236e21a5dc5 100644 --- a/ctdb/client/ctdb_client.c +++ b/ctdb/client/ctdb_client.c @@ -3627,11 +3627,6 @@ int switch_from_server_to_client(struct ctdb_context *ctdb) close(ctdb->daemon.sd); ctdb->daemon.sd = -1; - /* the client does not need to be realtime */ - if (ctdb->do_setsched) { - ctdb_restore_scheduler(ctdb); - } - /* initialise ctdb */ ret = ctdb_socket_connect(ctdb); if (ret != 0) { diff --git a/ctdb/common/ctdb_util.c b/ctdb/common/ctdb_util.c index 787d8d71748..fe2ada45ff5 100644 --- a/ctdb/common/ctdb_util.c +++ b/ctdb/common/ctdb_util.c @@ -323,52 +323,19 @@ struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, st return r; } - -#if HAVE_SCHED_H -#include -#endif - /* - if possible, make this task real time + if possible, make this task very high priority */ -void ctdb_set_scheduler(struct ctdb_context *ctdb) +void ctdb_high_priority(struct ctdb_context *ctdb) { -#if HAVE_SCHED_SETSCHEDULER - struct sched_param p; - if (ctdb->saved_scheduler_param == NULL) { - ctdb->saved_scheduler_param = talloc_size(ctdb, sizeof(p)); - } - - if (sched_getparam(0, (struct sched_param *)ctdb->saved_scheduler_param) == -1) { - DEBUG(DEBUG_ERR,("Unable to get old scheduler params\n")); - return; - } - - p = *(struct sched_param *)ctdb->saved_scheduler_param; - p.sched_priority = 1; - - if (sched_setscheduler(0, SCHED_FIFO, &p) == -1) { - DEBUG(DEBUG_CRIT,("Unable to set scheduler to SCHED_FIFO (%s)\n", - strerror(errno))); + errno = 0; + if (nice(-20) == -1 && errno != 0) { + DEBUG(DEBUG_WARNING,("Unable to renice self: %s\n", + strerror(errno))); } else { - DEBUG(DEBUG_NOTICE,("Set scheduler to SCHED_FIFO\n")); + DEBUG(DEBUG_NOTICE,("Scheduler says I'm nice: %i\n", + getpriority(PRIO_PROCESS, getpid()))); } -#endif -} - -/* - restore previous scheduler parameters - */ -void ctdb_restore_scheduler(struct ctdb_context *ctdb) -{ -#if HAVE_SCHED_SETSCHEDULER - if (ctdb->saved_scheduler_param == NULL) { - ctdb_fatal(ctdb, "No saved scheduler parameters\n"); - } - if (sched_setscheduler(0, SCHED_OTHER, (struct sched_param *)ctdb->saved_scheduler_param) == -1) { - ctdb_fatal(ctdb, "Unable to restore old scheduler parameters\n"); - } -#endif } void set_nonblocking(int fd) diff --git a/ctdb/include/ctdb_private.h b/ctdb/include/ctdb_private.h index e577abc9f8d..9fc2be74d14 100644 --- a/ctdb/include/ctdb_private.h +++ b/ctdb/include/ctdb_private.h @@ -431,9 +431,7 @@ struct ctdb_context { uint32_t recovery_master; struct ctdb_call_state *pending_calls; struct ctdb_client_ip *client_ip_list; - bool do_setsched; bool do_checkpublicip; - void *saved_scheduler_param; struct _trbt_tree_t *server_ids; const char *event_script_dir; const char *notification_script; @@ -1244,8 +1242,7 @@ void ctdb_call_resend_all(struct ctdb_context *ctdb); void ctdb_node_dead(struct ctdb_node *node); void ctdb_node_connected(struct ctdb_node *node); bool ctdb_blocking_freeze(struct ctdb_context *ctdb); -void ctdb_set_scheduler(struct ctdb_context *ctdb); -void ctdb_restore_scheduler(struct ctdb_context *ctdb); +void ctdb_high_priority(struct ctdb_context *ctdb); int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb, struct ctdb_req_control *c, TDB_DATA indata, diff --git a/ctdb/server/ctdb_daemon.c b/ctdb/server/ctdb_daemon.c index 9ade55aca86..97051625353 100644 --- a/ctdb/server/ctdb_daemon.c +++ b/ctdb/server/ctdb_daemon.c @@ -720,10 +720,7 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog) DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdbd_pid)); - if (ctdb->do_setsched) { - /* try to set us up as realtime */ - ctdb_set_scheduler(ctdb); - } + ctdb_high_priority(ctdb); /* ensure the socket is deleted on exit of the daemon */ domain_socket_name = talloc_strdup(talloc_autofree_context(), ctdb->daemon.name); diff --git a/ctdb/server/ctdbd.c b/ctdb/server/ctdbd.c index f38ed66d835..80c5b150f4b 100644 --- a/ctdb/server/ctdbd.c +++ b/ctdb/server/ctdbd.c @@ -127,7 +127,7 @@ int main(int argc, const char *argv[]) { "dbdir", 0, POPT_ARG_STRING, &options.db_dir, 0, "directory for the tdb files", NULL }, { "dbdir-persistent", 0, POPT_ARG_STRING, &options.db_dir_persistent, 0, "directory for persistent tdb files", NULL }, { "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" }, - { "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL }, + { "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "make valgrind more effective", NULL }, { "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL }, { "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL }, { "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL }, @@ -312,7 +312,6 @@ int main(int argc, const char *argv[]) } ctdb->valgrinding = options.valgrinding; - ctdb->do_setsched = !ctdb->valgrinding; ctdb->do_checkpublicip = !options.no_publicipcheck;