1
0
mirror of https://github.com/samba-team/samba.git synced 2025-02-02 09:47:23 +03:00

Remove RT priority, use niceness.

1) It's buggy.  Code needs to be carefully written (ie. no busy
   loops) to handle running with it, and we fork and run scripts.[1]

2) It makes debugging harder.  If ctdbd loops (as has happened recently)
   it can be extremely hard to get in and see what's happening.  We've already
   seen the valgrind hacks.

3) We have seen recent scheduler problems.  Perhaps they are unrelated,
   but removing this very unusual setup is unlikely to hurt.

4) It doesn't make anything faster.  Under all but the most perverse of
   circumstances, 99% of the cpu gives the same performance as 100%, and
   we will always preempt normal processes anyway.

[1] I made this worse in 0fafdcb8d353 "eventscript: fork() a child for
    each script" by removing the switch_from_server_to_client() which
    restored it, but even that was only for monitor scripts.  Others were
    run with RT priority.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>


(This used to be ctdb commit 482c302d46e2162d0cf552f8456bc49573ae729d)
This commit is contained in:
Rusty Russell 2009-12-16 19:26:22 +10:30
parent f148735928
commit c488ba440a
5 changed files with 11 additions and 56 deletions

View File

@ -3627,11 +3627,6 @@ int switch_from_server_to_client(struct ctdb_context *ctdb)
close(ctdb->daemon.sd);
ctdb->daemon.sd = -1;
/* the client does not need to be realtime */
if (ctdb->do_setsched) {
ctdb_restore_scheduler(ctdb);
}
/* initialise ctdb */
ret = ctdb_socket_connect(ctdb);
if (ret != 0) {

View File

@ -323,52 +323,19 @@ struct ctdb_rec_data *ctdb_marshall_loop_next(struct ctdb_marshall_buffer *m, st
return r;
}
#if HAVE_SCHED_H
#include <sched.h>
#endif
/*
if possible, make this task real time
if possible, make this task very high priority
*/
void ctdb_set_scheduler(struct ctdb_context *ctdb)
void ctdb_high_priority(struct ctdb_context *ctdb)
{
#if HAVE_SCHED_SETSCHEDULER
struct sched_param p;
if (ctdb->saved_scheduler_param == NULL) {
ctdb->saved_scheduler_param = talloc_size(ctdb, sizeof(p));
}
if (sched_getparam(0, (struct sched_param *)ctdb->saved_scheduler_param) == -1) {
DEBUG(DEBUG_ERR,("Unable to get old scheduler params\n"));
return;
}
p = *(struct sched_param *)ctdb->saved_scheduler_param;
p.sched_priority = 1;
if (sched_setscheduler(0, SCHED_FIFO, &p) == -1) {
DEBUG(DEBUG_CRIT,("Unable to set scheduler to SCHED_FIFO (%s)\n",
strerror(errno)));
errno = 0;
if (nice(-20) == -1 && errno != 0) {
DEBUG(DEBUG_WARNING,("Unable to renice self: %s\n",
strerror(errno)));
} else {
DEBUG(DEBUG_NOTICE,("Set scheduler to SCHED_FIFO\n"));
DEBUG(DEBUG_NOTICE,("Scheduler says I'm nice: %i\n",
getpriority(PRIO_PROCESS, getpid())));
}
#endif
}
/*
restore previous scheduler parameters
*/
void ctdb_restore_scheduler(struct ctdb_context *ctdb)
{
#if HAVE_SCHED_SETSCHEDULER
if (ctdb->saved_scheduler_param == NULL) {
ctdb_fatal(ctdb, "No saved scheduler parameters\n");
}
if (sched_setscheduler(0, SCHED_OTHER, (struct sched_param *)ctdb->saved_scheduler_param) == -1) {
ctdb_fatal(ctdb, "Unable to restore old scheduler parameters\n");
}
#endif
}
void set_nonblocking(int fd)

View File

@ -431,9 +431,7 @@ struct ctdb_context {
uint32_t recovery_master;
struct ctdb_call_state *pending_calls;
struct ctdb_client_ip *client_ip_list;
bool do_setsched;
bool do_checkpublicip;
void *saved_scheduler_param;
struct _trbt_tree_t *server_ids;
const char *event_script_dir;
const char *notification_script;
@ -1244,8 +1242,7 @@ void ctdb_call_resend_all(struct ctdb_context *ctdb);
void ctdb_node_dead(struct ctdb_node *node);
void ctdb_node_connected(struct ctdb_node *node);
bool ctdb_blocking_freeze(struct ctdb_context *ctdb);
void ctdb_set_scheduler(struct ctdb_context *ctdb);
void ctdb_restore_scheduler(struct ctdb_context *ctdb);
void ctdb_high_priority(struct ctdb_context *ctdb);
int32_t ctdb_control_takeover_ip(struct ctdb_context *ctdb,
struct ctdb_req_control *c,
TDB_DATA indata,

View File

@ -720,10 +720,7 @@ int ctdb_start_daemon(struct ctdb_context *ctdb, bool do_fork, bool use_syslog)
DEBUG(DEBUG_ERR, ("Starting CTDBD as pid : %u\n", ctdbd_pid));
if (ctdb->do_setsched) {
/* try to set us up as realtime */
ctdb_set_scheduler(ctdb);
}
ctdb_high_priority(ctdb);
/* ensure the socket is deleted on exit of the daemon */
domain_socket_name = talloc_strdup(talloc_autofree_context(), ctdb->daemon.name);

View File

@ -127,7 +127,7 @@ int main(int argc, const char *argv[])
{ "dbdir", 0, POPT_ARG_STRING, &options.db_dir, 0, "directory for the tdb files", NULL },
{ "dbdir-persistent", 0, POPT_ARG_STRING, &options.db_dir_persistent, 0, "directory for persistent tdb files", NULL },
{ "reclock", 0, POPT_ARG_STRING, &options.recovery_lock_file, 0, "location of recovery lock file", "filename" },
{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "disable setscheduler SCHED_FIFO call, use mmap for tdbs", NULL },
{ "valgrinding", 0, POPT_ARG_NONE, &options.valgrinding, 0, "make valgrind more effective", NULL },
{ "syslog", 0, POPT_ARG_NONE, &options.use_syslog, 0, "log messages to syslog", NULL },
{ "start-as-disabled", 0, POPT_ARG_NONE, &options.start_as_disabled, 0, "Node starts in disabled state", NULL },
{ "start-as-stopped", 0, POPT_ARG_NONE, &options.start_as_stopped, 0, "Node starts in stopped state", NULL },
@ -312,7 +312,6 @@ int main(int argc, const char *argv[])
}
ctdb->valgrinding = options.valgrinding;
ctdb->do_setsched = !ctdb->valgrinding;
ctdb->do_checkpublicip = !options.no_publicipcheck;