root/daemons/controld/controld_control.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. do_ha_control
  2. do_shutdown
  3. do_shutdown_req
  4. crmd_fast_exit
  5. crmd_exit
  6. do_exit
  7. sigpipe_ignore
  8. do_startup
  9. accept_controller_client
  10. dispatch_controller_ipc
  11. ipc_client_disconnected
  12. ipc_connection_destroyed
  13. do_stop
  14. do_started
  15. do_recover
  16. crmd_metadata
  17. controller_option
  18. config_query_callback
  19. crm_read_options
  20. do_read_config
  21. crm_shutdown

   1 /*
   2  * Copyright 2004-2022 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 
  12 #include <sys/param.h>
  13 #include <sys/types.h>
  14 #include <sys/stat.h>
  15 
  16 #include <crm/crm.h>
  17 #include <crm/msg_xml.h>
  18 #include <crm/pengine/rules.h>
  19 #include <crm/cluster/internal.h>
  20 #include <crm/cluster/election_internal.h>
  21 #include <crm/common/ipc_internal.h>
  22 
  23 #include <pacemaker-controld.h>
  24 
  25 qb_ipcs_service_t *ipcs = NULL;
  26 
  27 #if SUPPORT_COROSYNC
  28 extern gboolean crm_connect_corosync(crm_cluster_t * cluster);
  29 #endif
  30 
  31 void crm_shutdown(int nsig);
  32 gboolean crm_read_options(gpointer user_data);
  33 
  34 gboolean fsa_has_quorum = FALSE;
  35 crm_trigger_t *fsa_source = NULL;
  36 crm_trigger_t *config_read = NULL;
  37 bool no_quorum_suicide_escalation = FALSE;
  38 bool controld_shutdown_lock_enabled = false;
  39 
  40 /*       A_HA_CONNECT   */
  41 void
  42 do_ha_control(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
  43               enum crmd_fsa_cause cause,
  44               enum crmd_fsa_state cur_state,
  45               enum crmd_fsa_input current_input, fsa_data_t * msg_data)
  46 {
  47     gboolean registered = FALSE;
  48     static crm_cluster_t *cluster = NULL;
  49 
  50     if (cluster == NULL) {
  51         cluster = calloc(1, sizeof(crm_cluster_t));
  52     }
  53 
  54     if (action & A_HA_DISCONNECT) {
  55         crm_cluster_disconnect(cluster);
  56         crm_info("Disconnected from the cluster");
  57 
  58         controld_set_fsa_input_flags(R_HA_DISCONNECTED);
  59     }
  60 
  61     if (action & A_HA_CONNECT) {
  62         crm_set_status_callback(&peer_update_callback);
  63         crm_set_autoreap(FALSE);
  64 
  65         if (is_corosync_cluster()) {
  66 #if SUPPORT_COROSYNC
  67             registered = crm_connect_corosync(cluster);
  68 #endif
  69         }
  70 
  71         if (registered == TRUE) {
  72             controld_election_init(cluster->uname);
  73             fsa_our_uname = cluster->uname;
  74             fsa_our_uuid = cluster->uuid;
  75             if(cluster->uuid == NULL) {
  76                 crm_err("Could not obtain local uuid");
  77                 registered = FALSE;
  78             }
  79         }
  80 
  81         if (registered == FALSE) {
  82             controld_set_fsa_input_flags(R_HA_DISCONNECTED);
  83             register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
  84             return;
  85         }
  86 
  87         populate_cib_nodes(node_update_none, __func__);
  88         controld_clear_fsa_input_flags(R_HA_DISCONNECTED);
  89         crm_info("Connected to the cluster");
  90     }
  91 
  92     if (action & ~(A_HA_CONNECT | A_HA_DISCONNECT)) {
  93         crm_err("Unexpected action %s in %s", fsa_action2string(action),
  94                 __func__);
  95     }
  96 }
  97 
  98 /*       A_SHUTDOWN     */
  99 void
 100 do_shutdown(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 101             enum crmd_fsa_cause cause,
 102             enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 103 {
 104     /* just in case */
 105     controld_set_fsa_input_flags(R_SHUTDOWN);
 106     controld_disconnect_fencer(FALSE);
 107 }
 108 
 109 /*       A_SHUTDOWN_REQ */
 110 void
 111 do_shutdown_req(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 112                 enum crmd_fsa_cause cause,
 113                 enum crmd_fsa_state cur_state,
 114                 enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 115 {
 116     xmlNode *msg = NULL;
 117 
 118     controld_set_fsa_input_flags(R_SHUTDOWN);
 119     //controld_set_fsa_input_flags(R_STAYDOWN);
 120     crm_info("Sending shutdown request to all peers (DC is %s)",
 121              (fsa_our_dc? fsa_our_dc : "not set"));
 122     msg = create_request(CRM_OP_SHUTDOWN_REQ, NULL, NULL, CRM_SYSTEM_CRMD, CRM_SYSTEM_CRMD, NULL);
 123 
 124     if (send_cluster_message(NULL, crm_msg_crmd, msg, TRUE) == FALSE) {
 125         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 126     }
 127     free_xml(msg);
 128 }
 129 
 130 extern char *max_generation_from;
 131 extern xmlNode *max_generation_xml;
 132 extern GHashTable *resource_history;
 133 extern GHashTable *voted;
 134 extern pcmk__output_t *logger_out;
 135 
 136 void
 137 crmd_fast_exit(crm_exit_t exit_code)
     /* [previous][next][first][last][top][bottom][index][help] */
 138 {
 139     if (pcmk_is_set(fsa_input_register, R_STAYDOWN)) {
 140         crm_warn("Inhibiting respawn "CRM_XS" remapping exit code %d to %d",
 141                  exit_code, CRM_EX_FATAL);
 142         exit_code = CRM_EX_FATAL;
 143 
 144     } else if ((exit_code == CRM_EX_OK)
 145                && pcmk_is_set(fsa_input_register, R_IN_RECOVERY)) {
 146         crm_err("Could not recover from internal error");
 147         exit_code = CRM_EX_ERROR;
 148     }
 149 
 150     if (logger_out != NULL) {
 151         logger_out->finish(logger_out, exit_code, true, NULL);
 152         pcmk__output_free(logger_out);
 153         logger_out = NULL;
 154     }
 155 
 156     crm_exit(exit_code);
 157 }
 158 
 159 crm_exit_t
 160 crmd_exit(crm_exit_t exit_code)
     /* [previous][next][first][last][top][bottom][index][help] */
 161 {
 162     GList *gIter = NULL;
 163     GMainLoop *mloop = crmd_mainloop;
 164 
 165     static bool in_progress = FALSE;
 166 
 167     if (in_progress && (exit_code == CRM_EX_OK)) {
 168         crm_debug("Exit is already in progress");
 169         return exit_code;
 170 
 171     } else if(in_progress) {
 172         crm_notice("Error during shutdown process, exiting now with status %d (%s)",
 173                    exit_code, crm_exit_str(exit_code));
 174         crm_write_blackbox(SIGTRAP, NULL);
 175         crmd_fast_exit(exit_code);
 176     }
 177 
 178     in_progress = TRUE;
 179     crm_trace("Preparing to exit with status %d (%s)",
 180               exit_code, crm_exit_str(exit_code));
 181 
 182     /* Suppress secondary errors resulting from us disconnecting everything */
 183     controld_set_fsa_input_flags(R_HA_DISCONNECTED);
 184 
 185 /* Close all IPC servers and clients to ensure any and all shared memory files are cleaned up */
 186 
 187     if(ipcs) {
 188         crm_trace("Closing IPC server");
 189         mainloop_del_ipc_server(ipcs);
 190         ipcs = NULL;
 191     }
 192 
 193     controld_close_attrd_ipc();
 194     controld_shutdown_schedulerd_ipc();
 195     controld_disconnect_fencer(TRUE);
 196 
 197     if ((exit_code == CRM_EX_OK) && (crmd_mainloop == NULL)) {
 198         crm_debug("No mainloop detected");
 199         exit_code = CRM_EX_ERROR;
 200     }
 201 
 202     /* On an error, just get out.
 203      *
 204      * Otherwise, make the effort to have mainloop exit gracefully so
 205      * that it (mostly) cleans up after itself and valgrind has less
 206      * to report on - allowing real errors stand out
 207      */
 208     if (exit_code != CRM_EX_OK) {
 209         crm_notice("Forcing immediate exit with status %d (%s)",
 210                    exit_code, crm_exit_str(exit_code));
 211         crm_write_blackbox(SIGTRAP, NULL);
 212         crmd_fast_exit(exit_code);
 213     }
 214 
 215 /* Clean up as much memory as possible for valgrind */
 216 
 217     for (gIter = fsa_message_queue; gIter != NULL; gIter = gIter->next) {
 218         fsa_data_t *fsa_data = gIter->data;
 219 
 220         crm_info("Dropping %s: [ state=%s cause=%s origin=%s ]",
 221                  fsa_input2string(fsa_data->fsa_input),
 222                  fsa_state2string(fsa_state),
 223                  fsa_cause2string(fsa_data->fsa_cause), fsa_data->origin);
 224         delete_fsa_input(fsa_data);
 225     }
 226 
 227     controld_clear_fsa_input_flags(R_MEMBERSHIP);
 228     g_list_free(fsa_message_queue); fsa_message_queue = NULL;
 229 
 230     controld_election_fini();
 231 
 232     /* Tear down the CIB manager connection, but don't free it yet -- it could
 233      * be used when we drain the mainloop later.
 234      */
 235 
 236     controld_disconnect_cib_manager();
 237 
 238     verify_stopped(fsa_state, LOG_WARNING);
 239     controld_clear_fsa_input_flags(R_LRM_CONNECTED);
 240     lrm_state_destroy_all();
 241 
 242     /* This basically will not work, since mainloop has a reference to it */
 243     mainloop_destroy_trigger(fsa_source); fsa_source = NULL;
 244 
 245     mainloop_destroy_trigger(config_read); config_read = NULL;
 246     mainloop_destroy_trigger(transition_trigger); transition_trigger = NULL;
 247 
 248     pcmk__client_cleanup();
 249     crm_peer_destroy();
 250 
 251     controld_free_fsa_timers();
 252     te_cleanup_stonith_history_sync(NULL, TRUE);
 253     controld_free_sched_timer();
 254 
 255     free(fsa_our_dc_version); fsa_our_dc_version = NULL;
 256     free(fsa_our_uname); fsa_our_uname = NULL;
 257     free(fsa_our_uuid); fsa_our_uuid = NULL;
 258     free(fsa_our_dc); fsa_our_dc = NULL;
 259 
 260     free(fsa_cluster_name); fsa_cluster_name = NULL;
 261 
 262     free(te_uuid); te_uuid = NULL;
 263     free(failed_stop_offset); failed_stop_offset = NULL;
 264     free(failed_start_offset); failed_start_offset = NULL;
 265 
 266     free(max_generation_from); max_generation_from = NULL;
 267     free_xml(max_generation_xml); max_generation_xml = NULL;
 268 
 269     mainloop_destroy_signal(SIGPIPE);
 270     mainloop_destroy_signal(SIGUSR1);
 271     mainloop_destroy_signal(SIGTERM);
 272     mainloop_destroy_signal(SIGTRAP);
 273     /* leave SIGCHLD engaged as we might still want to drain some service-actions */
 274 
 275     if (mloop) {
 276         GMainContext *ctx = g_main_loop_get_context(crmd_mainloop);
 277 
 278         /* Don't re-enter this block */
 279         crmd_mainloop = NULL;
 280 
 281         /* no signals on final draining anymore */
 282         mainloop_destroy_signal(SIGCHLD);
 283 
 284         crm_trace("Draining mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
 285 
 286         {
 287             int lpc = 0;
 288 
 289             while((g_main_context_pending(ctx) && lpc < 10)) {
 290                 lpc++;
 291                 crm_trace("Iteration %d", lpc);
 292                 g_main_context_dispatch(ctx);
 293             }
 294         }
 295 
 296         crm_trace("Closing mainloop %d %d", g_main_loop_is_running(mloop), g_main_context_pending(ctx));
 297         g_main_loop_quit(mloop);
 298 
 299         /* Won't do anything yet, since we're inside it now */
 300         g_main_loop_unref(mloop);
 301     } else {
 302         mainloop_destroy_signal(SIGCHLD);
 303     }
 304 
 305     cib_delete(fsa_cib_conn);
 306     fsa_cib_conn = NULL;
 307 
 308     throttle_fini();
 309 
 310     /* Graceful */
 311     crm_trace("Done preparing for exit with status %d (%s)",
 312               exit_code, crm_exit_str(exit_code));
 313     return exit_code;
 314 }
 315 
 316 /*       A_EXIT_0, A_EXIT_1     */
 317 void
 318 do_exit(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 319         enum crmd_fsa_cause cause,
 320         enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 321 {
 322     crm_exit_t exit_code = CRM_EX_OK;
 323     int log_level = LOG_INFO;
 324     const char *exit_type = "gracefully";
 325 
 326     if (action & A_EXIT_1) {
 327         log_level = LOG_ERR;
 328         exit_type = "forcefully";
 329         exit_code = CRM_EX_ERROR;
 330     }
 331 
 332     verify_stopped(cur_state, LOG_ERR);
 333     do_crm_log(log_level, "Performing %s - %s exiting the controller",
 334                fsa_action2string(action), exit_type);
 335 
 336     crm_info("[%s] stopped (%d)", crm_system_name, exit_code);
 337     crmd_exit(exit_code);
 338 }
 339 
 340 static void sigpipe_ignore(int nsig) { return; }
     /* [previous][next][first][last][top][bottom][index][help] */
 341 
 342 /*       A_STARTUP      */
 343 void
 344 do_startup(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 345            enum crmd_fsa_cause cause,
 346            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 347 {
 348     crm_debug("Registering Signal Handlers");
 349     mainloop_add_signal(SIGTERM, crm_shutdown);
 350     mainloop_add_signal(SIGPIPE, sigpipe_ignore);
 351 
 352     fsa_source = mainloop_add_trigger(G_PRIORITY_HIGH, crm_fsa_trigger, NULL);
 353     config_read = mainloop_add_trigger(G_PRIORITY_HIGH, crm_read_options, NULL);
 354     transition_trigger = mainloop_add_trigger(G_PRIORITY_LOW, te_graph_trigger, NULL);
 355 
 356     crm_debug("Creating CIB manager and executor objects");
 357     fsa_cib_conn = cib_new();
 358 
 359     lrm_state_init_local();
 360     if (controld_init_fsa_timers() == FALSE) {
 361         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 362     }
 363 }
 364 
 365 // \return libqb error code (0 on success, -errno on error)
 366 static int32_t
 367 accept_controller_client(qb_ipcs_connection_t *c, uid_t uid, gid_t gid)
     /* [previous][next][first][last][top][bottom][index][help] */
 368 {
 369     crm_trace("Accepting new IPC client connection");
 370     if (pcmk__new_client(c, uid, gid) == NULL) {
 371         return -EIO;
 372     }
 373     return 0;
 374 }
 375 
 376 // \return libqb error code (0 on success, -errno on error)
 377 static int32_t
 378 dispatch_controller_ipc(qb_ipcs_connection_t * c, void *data, size_t size)
     /* [previous][next][first][last][top][bottom][index][help] */
 379 {
 380     uint32_t id = 0;
 381     uint32_t flags = 0;
 382     pcmk__client_t *client = pcmk__find_client(c);
 383 
 384     xmlNode *msg = pcmk__client_data2xml(client, data, &id, &flags);
 385 
 386     if (msg == NULL) {
 387         pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_PROTOCOL);
 388         return 0;
 389     }
 390     pcmk__ipc_send_ack(client, id, flags, "ack", NULL, CRM_EX_INDETERMINATE);
 391 
 392     CRM_ASSERT(client->user != NULL);
 393     pcmk__update_acl_user(msg, F_CRM_USER, client->user);
 394 
 395     crm_xml_add(msg, F_CRM_SYS_FROM, client->id);
 396     if (controld_authorize_ipc_message(msg, client, NULL)) {
 397         crm_trace("Processing IPC message from client %s",
 398                   pcmk__client_name(client));
 399         route_message(C_IPC_MESSAGE, msg);
 400     }
 401 
 402     trigger_fsa();
 403     free_xml(msg);
 404     return 0;
 405 }
 406 
 407 static int32_t
 408 ipc_client_disconnected(qb_ipcs_connection_t *c)
     /* [previous][next][first][last][top][bottom][index][help] */
 409 {
 410     pcmk__client_t *client = pcmk__find_client(c);
 411 
 412     if (client) {
 413         crm_trace("Disconnecting %sregistered client %s (%p/%p)",
 414                   (client->userdata? "" : "un"), pcmk__client_name(client),
 415                   c, client);
 416         free(client->userdata);
 417         pcmk__free_client(client);
 418         trigger_fsa();
 419     }
 420     return 0;
 421 }
 422 
 423 static void
 424 ipc_connection_destroyed(qb_ipcs_connection_t *c)
     /* [previous][next][first][last][top][bottom][index][help] */
 425 {
 426     crm_trace("Connection %p", c);
 427     ipc_client_disconnected(c);
 428 }
 429 
 430 /*       A_STOP */
 431 void
 432 do_stop(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 433         enum crmd_fsa_cause cause,
 434         enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 435 {
 436     crm_trace("Closing IPC server");
 437     mainloop_del_ipc_server(ipcs); ipcs = NULL;
 438     register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
 439 }
 440 
 441 /*       A_STARTED      */
 442 void
 443 do_started(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 444            enum crmd_fsa_cause cause,
 445            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 446 {
 447     static struct qb_ipcs_service_handlers crmd_callbacks = {
 448         .connection_accept = accept_controller_client,
 449         .connection_created = NULL,
 450         .msg_process = dispatch_controller_ipc,
 451         .connection_closed = ipc_client_disconnected,
 452         .connection_destroyed = ipc_connection_destroyed
 453     };
 454 
 455     if (cur_state != S_STARTING) {
 456         crm_err("Start cancelled... %s", fsa_state2string(cur_state));
 457         return;
 458 
 459     } else if (!pcmk_is_set(fsa_input_register, R_MEMBERSHIP)) {
 460         crm_info("Delaying start, no membership data (%.16llx)", R_MEMBERSHIP);
 461 
 462         crmd_fsa_stall(TRUE);
 463         return;
 464 
 465     } else if (!pcmk_is_set(fsa_input_register, R_LRM_CONNECTED)) {
 466         crm_info("Delaying start, not connected to executor (%.16llx)", R_LRM_CONNECTED);
 467 
 468         crmd_fsa_stall(TRUE);
 469         return;
 470 
 471     } else if (!pcmk_is_set(fsa_input_register, R_CIB_CONNECTED)) {
 472         crm_info("Delaying start, CIB not connected (%.16llx)", R_CIB_CONNECTED);
 473 
 474         crmd_fsa_stall(TRUE);
 475         return;
 476 
 477     } else if (!pcmk_is_set(fsa_input_register, R_READ_CONFIG)) {
 478         crm_info("Delaying start, Config not read (%.16llx)", R_READ_CONFIG);
 479 
 480         crmd_fsa_stall(TRUE);
 481         return;
 482 
 483     } else if (!pcmk_is_set(fsa_input_register, R_PEER_DATA)) {
 484 
 485         crm_info("Delaying start, No peer data (%.16llx)", R_PEER_DATA);
 486         crmd_fsa_stall(TRUE);
 487         return;
 488     }
 489 
 490     crm_debug("Init server comms");
 491     ipcs = pcmk__serve_controld_ipc(&crmd_callbacks);
 492     if (ipcs == NULL) {
 493         crm_err("Failed to create IPC server: shutting down and inhibiting respawn");
 494         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 495     } else {
 496         crm_notice("Pacemaker controller successfully started and accepting connections");
 497     }
 498     controld_trigger_fencer_connect();
 499 
 500     controld_clear_fsa_input_flags(R_STARTING);
 501     register_fsa_input(msg_data->fsa_cause, I_PENDING, NULL);
 502 }
 503 
 504 /*       A_RECOVER      */
 505 void
 506 do_recover(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 507            enum crmd_fsa_cause cause,
 508            enum crmd_fsa_state cur_state, enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 509 {
 510     controld_set_fsa_input_flags(R_IN_RECOVERY);
 511     crm_warn("Fast-tracking shutdown in response to errors");
 512 
 513     register_fsa_input(C_FSA_INTERNAL, I_TERMINATE, NULL);
 514 }
 515 
 516 static pcmk__cluster_option_t controller_options[] = {
 517     /* name, old name, type, allowed values,
 518      * default value, validator,
 519      * short description,
 520      * long description
 521      */
 522     {
 523         "dc-version", NULL, "string", NULL, PCMK__VALUE_NONE, NULL,
 524         N_("Pacemaker version on cluster node elected Designated Controller (DC)"),
 525         N_("Includes a hash which identifies the exact changeset the code was "
 526             "built from. Used for diagnostic purposes.")
 527     },
 528     {
 529         "cluster-infrastructure", NULL, "string", NULL, "corosync", NULL,
 530         N_("The messaging stack on which Pacemaker is currently running"),
 531         N_("Used for informational and diagnostic purposes.")
 532     },
 533     {
 534         "cluster-name", NULL, "string", NULL, NULL, NULL,
 535         N_("An arbitrary name for the cluster"),
 536         N_("This optional value is mostly for users' convenience as desired "
 537             "in administration, but may also be used in Pacemaker "
 538             "configuration rules via the #cluster-name node attribute, and "
 539             "by higher-level tools and resource agents.")
 540     },
 541     {
 542         XML_CONFIG_ATTR_DC_DEADTIME, NULL, "time",
 543         NULL, "20s", pcmk__valid_interval_spec,
 544         N_("How long to wait for a response from other nodes during start-up"),
 545         N_("The optimal value will depend on the speed and load of your network "
 546             "and the type of switches used.")
 547     },
 548     {
 549         XML_CONFIG_ATTR_RECHECK, NULL, "time",
 550         N_("Zero disables polling, while positive values are an interval in seconds"
 551             "(unless other units are specified, for example \"5min\")"),
 552         "15min", pcmk__valid_interval_spec,
 553         N_("Polling interval to recheck cluster state and evaluate rules "
 554             "with date specifications"),
 555         N_("Pacemaker is primarily event-driven, and looks ahead to know when to "
 556             "recheck cluster state for failure timeouts and most time-based "
 557             "rules. However, it will also recheck the cluster after this "
 558             "amount of inactivity, to evaluate rules with date specifications "
 559             "and serve as a fail-safe for certain types of scheduler bugs.")
 560     },
 561     {
 562         "load-threshold", NULL, "percentage", NULL,
 563         "80%", pcmk__valid_percentage,
 564         N_("Maximum amount of system load that should be used by cluster nodes"),
 565         N_("The cluster will slow down its recovery process when the amount of "
 566             "system resources used (currently CPU) approaches this limit"),
 567     },
 568     {
 569         "node-action-limit", NULL, "integer", NULL,
 570         "0", pcmk__valid_number,
 571         N_("Maximum number of jobs that can be scheduled per node "
 572             "(defaults to 2x cores)")
 573     },
 574     { XML_CONFIG_ATTR_FENCE_REACTION, NULL, "string", NULL, "stop", NULL,
 575         N_("How a cluster node should react if notified of its own fencing"),
 576         N_("A cluster node may receive notification of its own fencing if fencing "
 577         "is misconfigured, or if fabric fencing is in use that doesn't cut "
 578         "cluster communication. Allowed values are \"stop\" to attempt to "
 579         "immediately stop Pacemaker and stay stopped, or \"panic\" to attempt "
 580         "to immediately reboot the local node, falling back to stop on failure.")
 581     },
 582     {
 583         XML_CONFIG_ATTR_ELECTION_FAIL, NULL, "time", NULL,
 584         "2min", pcmk__valid_interval_spec,
 585         "*** Advanced Use Only ***",
 586         N_("Declare an election failed if it is not decided within this much "
 587             "time. If you need to adjust this value, it probably indicates "
 588             "the presence of a bug.")
 589     },
 590     {
 591         XML_CONFIG_ATTR_FORCE_QUIT, NULL, "time", NULL,
 592         "20min", pcmk__valid_interval_spec,
 593         "*** Advanced Use Only ***",
 594         N_("Exit immediately if shutdown does not complete within this much "
 595             "time. If you need to adjust this value, it probably indicates "
 596             "the presence of a bug.")
 597     },
 598     {
 599         "join-integration-timeout", "crmd-integration-timeout", "time", NULL,
 600         "3min", pcmk__valid_interval_spec,
 601         "*** Advanced Use Only ***",
 602         N_("If you need to adjust this value, it probably indicates "
 603             "the presence of a bug.")
 604     },
 605     {
 606         "join-finalization-timeout", "crmd-finalization-timeout", "time", NULL,
 607         "30min", pcmk__valid_interval_spec,
 608         "*** Advanced Use Only ***",
 609         N_("If you need to adjust this value, it probably indicates "
 610             "the presence of a bug.")
 611     },
 612     {
 613         "transition-delay", "crmd-transition-delay", "time", NULL,
 614         "0s", pcmk__valid_interval_spec,
 615         N_("*** Advanced Use Only *** Enabling this option will slow down "
 616             "cluster recovery under all conditions"),
 617         N_("Delay cluster recovery for this much time to allow for additional "
 618             "events to occur. Useful if your configuration is sensitive to "
 619             "the order in which ping updates arrive.")
 620     },
 621     {
 622         "stonith-watchdog-timeout", NULL, "time", NULL,
 623         "0", controld_verify_stonith_watchdog_timeout,
 624         N_("How long before nodes can be assumed to be safely down when "
 625            "watchdog-based self-fencing via SBD is in use"),
 626         N_("If this is set to a positive value, lost nodes are assumed to "
 627            "self-fence using watchdog-based SBD within this much time. This "
 628            "does not require a fencing resource to be explicitly configured, "
 629            "though a fence_watchdog resource can be configured, to limit use "
 630            "to specific nodes. If this is set to 0 (the default), the cluster "
 631            "will never assume watchdog-based self-fencing. If this is set to a "
 632            "negative value, the cluster will use twice the local value of the "
 633            "`SBD_WATCHDOG_TIMEOUT` environment variable if that is positive, "
 634            "or otherwise treat this as 0. WARNING: When used, this timeout "
 635            "must be larger than `SBD_WATCHDOG_TIMEOUT` on all nodes that use "
 636            "watchdog-based SBD, and Pacemaker will refuse to start on any of "
 637            "those nodes where this is not true for the local value or SBD is "
 638            "not active. When this is set to a negative value, "
 639            "`SBD_WATCHDOG_TIMEOUT` must be set to the same value on all nodes "
 640            "that use SBD, otherwise data corruption or loss could occur.")
 641     },
 642     {
 643         "stonith-max-attempts", NULL, "integer", NULL,
 644         "10", pcmk__valid_positive_number,
 645         N_("How many times fencing can fail before it will no longer be "
 646             "immediately re-attempted on a target")
 647     },
 648 
 649     // Already documented in libpe_status (other values must be kept identical)
 650     {
 651         "no-quorum-policy", NULL, "select",
 652         "stop, freeze, ignore, demote, suicide", "stop", pcmk__valid_quorum,
 653         "What to do when the cluster does not have quorum", NULL
 654     },
 655     {
 656         XML_CONFIG_ATTR_SHUTDOWN_LOCK, NULL, "boolean", NULL,
 657         "false", pcmk__valid_boolean,
 658         "Whether to lock resources to a cleanly shut down node",
 659         "When true, resources active on a node when it is cleanly shut down "
 660             "are kept \"locked\" to that node (not allowed to run elsewhere) "
 661             "until they start again on that node after it rejoins (or for at "
 662             "most shutdown-lock-limit, if set). Stonith resources and "
 663             "Pacemaker Remote connections are never locked. Clone and bundle "
 664             "instances and the promoted role of promotable clones are currently"
 665             " never locked, though support could be added in a future release."
 666     },
 667 };
 668 
 669 void
 670 crmd_metadata(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 671 {
 672     const char *desc_short = "Pacemaker controller options";
 673     const char *desc_long = "Cluster options used by Pacemaker's controller";
 674 
 675     gchar *s = pcmk__format_option_metadata("pacemaker-controld", desc_short,
 676                                             desc_long, controller_options,
 677                                             PCMK__NELEM(controller_options));
 678     printf("%s", s);
 679     g_free(s);
 680 }
 681 
 682 static const char *
 683 controller_option(GHashTable *options, const char *name)
     /* [previous][next][first][last][top][bottom][index][help] */
 684 {
 685     return pcmk__cluster_option(options, controller_options,
 686                                 PCMK__NELEM(controller_options), name);
 687 }
 688 
 689 static void
 690 config_query_callback(xmlNode * msg, int call_id, int rc, xmlNode * output, void *user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 691 {
 692     const char *value = NULL;
 693     GHashTable *config_hash = NULL;
 694     crm_time_t *now = crm_time_new(NULL);
 695     xmlNode *crmconfig = NULL;
 696     xmlNode *alerts = NULL;
 697 
 698     if (rc != pcmk_ok) {
 699         fsa_data_t *msg_data = NULL;
 700 
 701         crm_err("Local CIB query resulted in an error: %s", pcmk_strerror(rc));
 702         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 703 
 704         if (rc == -EACCES || rc == -pcmk_err_schema_validation) {
 705             crm_err("The cluster is mis-configured - shutting down and staying down");
 706             controld_set_fsa_input_flags(R_STAYDOWN);
 707         }
 708         goto bail;
 709     }
 710 
 711     crmconfig = output;
 712     if ((crmconfig) &&
 713         (crm_element_name(crmconfig)) &&
 714         (strcmp(crm_element_name(crmconfig), XML_CIB_TAG_CRMCONFIG) != 0)) {
 715         crmconfig = first_named_child(crmconfig, XML_CIB_TAG_CRMCONFIG);
 716     }
 717     if (!crmconfig) {
 718         fsa_data_t *msg_data = NULL;
 719 
 720         crm_err("Local CIB query for " XML_CIB_TAG_CRMCONFIG " section failed");
 721         register_fsa_error(C_FSA_INTERNAL, I_ERROR, NULL);
 722         goto bail;
 723     }
 724 
 725     crm_debug("Call %d : Parsing CIB options", call_id);
 726     config_hash = pcmk__strkey_table(free, free);
 727     pe_unpack_nvpairs(crmconfig, crmconfig, XML_CIB_TAG_PROPSET, NULL,
 728                       config_hash, CIB_OPTIONS_FIRST, FALSE, now, NULL);
 729 
 730     pcmk__validate_cluster_options(config_hash, controller_options,
 731                                    PCMK__NELEM(controller_options));
 732 
 733     value = controller_option(config_hash, XML_CONFIG_ATTR_DC_DEADTIME);
 734     election_trigger->period_ms = crm_parse_interval_spec(value);
 735 
 736     value = controller_option(config_hash, "node-action-limit");
 737     throttle_update_job_max(value);
 738 
 739     value = controller_option(config_hash, "load-threshold");
 740     if(value) {
 741         throttle_set_load_target(strtof(value, NULL) / 100.0);
 742     }
 743 
 744     value = controller_option(config_hash, "no-quorum-policy");
 745     if (pcmk__str_eq(value, "suicide", pcmk__str_casei) && pcmk__locate_sbd()) {
 746         no_quorum_suicide_escalation = TRUE;
 747     }
 748 
 749     set_fence_reaction(controller_option(config_hash,
 750                                          XML_CONFIG_ATTR_FENCE_REACTION));
 751 
 752     value = controller_option(config_hash, "stonith-max-attempts");
 753     update_stonith_max_attempts(value);
 754 
 755     value = controller_option(config_hash, XML_CONFIG_ATTR_FORCE_QUIT);
 756     shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
 757     crm_debug("Shutdown escalation occurs if DC has not responded to request in %ums",
 758               shutdown_escalation_timer->period_ms);
 759 
 760     value = controller_option(config_hash, XML_CONFIG_ATTR_ELECTION_FAIL);
 761     controld_set_election_period(value);
 762 
 763     value = controller_option(config_hash, XML_CONFIG_ATTR_RECHECK);
 764     recheck_interval_ms = crm_parse_interval_spec(value);
 765     crm_debug("Re-run scheduler after %dms of inactivity", recheck_interval_ms);
 766 
 767     value = controller_option(config_hash, "transition-delay");
 768     transition_timer->period_ms = crm_parse_interval_spec(value);
 769 
 770     value = controller_option(config_hash, "join-integration-timeout");
 771     integration_timer->period_ms = crm_parse_interval_spec(value);
 772 
 773     value = controller_option(config_hash, "join-finalization-timeout");
 774     finalization_timer->period_ms = crm_parse_interval_spec(value);
 775 
 776     value = controller_option(config_hash, XML_CONFIG_ATTR_SHUTDOWN_LOCK);
 777     controld_shutdown_lock_enabled = crm_is_true(value);
 778 
 779     free(fsa_cluster_name);
 780     fsa_cluster_name = NULL;
 781 
 782     value = g_hash_table_lookup(config_hash, "cluster-name");
 783     if (value) {
 784         fsa_cluster_name = strdup(value);
 785     }
 786 
 787     alerts = first_named_child(output, XML_CIB_TAG_ALERTS);
 788     crmd_unpack_alerts(alerts);
 789 
 790     controld_set_fsa_input_flags(R_READ_CONFIG);
 791     crm_trace("Triggering FSA: %s", __func__);
 792     mainloop_set_trigger(fsa_source);
 793 
 794     g_hash_table_destroy(config_hash);
 795   bail:
 796     crm_time_free(now);
 797 }
 798 
 799 gboolean
 800 crm_read_options(gpointer user_data)
     /* [previous][next][first][last][top][bottom][index][help] */
 801 {
 802     int call_id =
 803         fsa_cib_conn->cmds->query(fsa_cib_conn,
 804             "//" XML_CIB_TAG_CRMCONFIG " | //" XML_CIB_TAG_ALERTS,
 805             NULL, cib_xpath | cib_scope_local);
 806 
 807     fsa_register_cib_callback(call_id, FALSE, NULL, config_query_callback);
 808     crm_trace("Querying the CIB... call %d", call_id);
 809     return TRUE;
 810 }
 811 
 812 /*       A_READCONFIG   */
 813 void
 814 do_read_config(long long action,
     /* [previous][next][first][last][top][bottom][index][help] */
 815                enum crmd_fsa_cause cause,
 816                enum crmd_fsa_state cur_state,
 817                enum crmd_fsa_input current_input, fsa_data_t * msg_data)
 818 {
 819     throttle_init();
 820     mainloop_set_trigger(config_read);
 821 }
 822 
 823 void
 824 crm_shutdown(int nsig)
     /* [previous][next][first][last][top][bottom][index][help] */
 825 {
 826     if ((crmd_mainloop == NULL) || !g_main_loop_is_running(crmd_mainloop)) {
 827         crmd_exit(CRM_EX_OK);
 828         return;
 829     }
 830 
 831     if (pcmk_is_set(fsa_input_register, R_SHUTDOWN)) {
 832         crm_err("Escalating shutdown");
 833         register_fsa_input_before(C_SHUTDOWN, I_ERROR, NULL);
 834         return;
 835     }
 836 
 837     controld_set_fsa_input_flags(R_SHUTDOWN);
 838     register_fsa_input(C_SHUTDOWN, I_SHUTDOWN, NULL);
 839 
 840     if (shutdown_escalation_timer->period_ms == 0) {
 841         const char *value = controller_option(NULL, XML_CONFIG_ATTR_FORCE_QUIT);
 842 
 843         shutdown_escalation_timer->period_ms = crm_parse_interval_spec(value);
 844     }
 845 
 846     crm_notice("Initiating controller shutdown sequence " CRM_XS
 847                " limit=%ums", shutdown_escalation_timer->period_ms);
 848     controld_start_timer(shutdown_escalation_timer);
 849 }

/* [previous][next][first][last][top][bottom][index][help] */