root/lib/pacemaker/pcmk_sched_nodes.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. pcmk__node_available
  2. pcmk__copy_node_table
  3. destroy_node_tables
  4. pcmk__copy_node_tables
  5. pcmk__restore_node_tables
  6. pcmk__copy_node_list
  7. compare_nodes
  8. pcmk__sort_nodes
  9. pcmk__any_node_available
  10. pcmk__apply_node_health
  11. pcmk__top_allowed_node

   1 /*
   2  * Copyright 2004-2023 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 #include <crm_internal.h>
  11 #include <crm/msg_xml.h>
  12 #include <crm/common/xml_internal.h>
  13 #include <pacemaker-internal.h>
  14 #include <pacemaker.h>
  15 #include "libpacemaker_private.h"
  16 
  17 /*!
  18  * \internal
  19  * \brief Check whether a node is available to run resources
  20  *
  21  * \param[in] node            Node to check
  22  * \param[in] consider_score  If true, consider a negative score unavailable
  23  * \param[in] consider_guest  If true, consider a guest node unavailable whose
  24  *                            resource will not be active
  25  *
  26  * \return true if node is online and not shutting down, unclean, or in standby
  27  *         or maintenance mode, otherwise false
  28  */
  29 bool
  30 pcmk__node_available(const pcmk_node_t *node, bool consider_score,
     /* [previous][next][first][last][top][bottom][index][help] */
  31                      bool consider_guest)
  32 {
  33     if ((node == NULL) || (node->details == NULL) || !node->details->online
  34             || node->details->shutdown || node->details->unclean
  35             || node->details->standby || node->details->maintenance) {
  36         return false;
  37     }
  38 
  39     if (consider_score && (node->weight < 0)) {
  40         return false;
  41     }
  42 
  43     // @TODO Go through all callers to see which should set consider_guest
  44     if (consider_guest && pe__is_guest_node(node)) {
  45         pcmk_resource_t *guest = node->details->remote_rsc->container;
  46 
  47         if (guest->fns->location(guest, NULL, FALSE) == NULL) {
  48             return false;
  49         }
  50     }
  51 
  52     return true;
  53 }
  54 
  55 /*!
  56  * \internal
  57  * \brief Copy a hash table of node objects
  58  *
  59  * \param[in] nodes  Hash table to copy
  60  *
  61  * \return New copy of nodes (or NULL if nodes is NULL)
  62  */
  63 GHashTable *
  64 pcmk__copy_node_table(GHashTable *nodes)
     /* [previous][next][first][last][top][bottom][index][help] */
  65 {
  66     GHashTable *new_table = NULL;
  67     GHashTableIter iter;
  68     pcmk_node_t *node = NULL;
  69 
  70     if (nodes == NULL) {
  71         return NULL;
  72     }
  73     new_table = pcmk__strkey_table(NULL, free);
  74     g_hash_table_iter_init(&iter, nodes);
  75     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
  76         pcmk_node_t *new_node = pe__copy_node(node);
  77 
  78         g_hash_table_insert(new_table, (gpointer) new_node->details->id,
  79                             new_node);
  80     }
  81     return new_table;
  82 }
  83 
  84 /*!
  85  * \internal
  86  * \brief Free a table of node tables
  87  *
  88  * \param[in,out] data  Table to free
  89  *
  90  * \note This is a \c GDestroyNotify wrapper for \c g_hash_table_destroy().
  91  */
  92 static void
  93 destroy_node_tables(gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
  94 {
  95     g_hash_table_destroy((GHashTable *) data);
  96 }
  97 
  98 /*!
  99  * \internal
 100  * \brief Recursively copy the node tables of a resource
 101  *
 102  * Build a hash table containing copies of the allowed nodes tables of \p rsc
 103  * and its entire tree of descendants. The key is the resource ID, and the value
 104  * is a copy of the resource's node table.
 105  *
 106  * \param[in]     rsc   Resource whose node table to copy
 107  * \param[in,out] copy  Where to store the copied node tables
 108  *
 109  * \note \p *copy should be \c NULL for the top-level call.
 110  * \note The caller is responsible for freeing \p copy using
 111  *       \c g_hash_table_destroy().
 112  */
 113 void
 114 pcmk__copy_node_tables(const pcmk_resource_t *rsc, GHashTable **copy)
     /* [previous][next][first][last][top][bottom][index][help] */
 115 {
 116     CRM_ASSERT((rsc != NULL) && (copy != NULL));
 117 
 118     if (*copy == NULL) {
 119         *copy = pcmk__strkey_table(NULL, destroy_node_tables);
 120     }
 121 
 122     g_hash_table_insert(*copy, rsc->id,
 123                         pcmk__copy_node_table(rsc->allowed_nodes));
 124 
 125     for (const GList *iter = rsc->children; iter != NULL; iter = iter->next) {
 126         pcmk__copy_node_tables((const pcmk_resource_t *) iter->data, copy);
 127     }
 128 }
 129 
 130 /*!
 131  * \internal
 132  * \brief Recursively restore the node tables of a resource from backup
 133  *
 134  * Given a hash table containing backup copies of the allowed nodes tables of
 135  * \p rsc and its entire tree of descendants, replace the resources' current
 136  * node tables with the backed-up copies.
 137  *
 138  * \param[in,out] rsc     Resource whose node tables to restore
 139  * \param[in]     backup  Table of backup node tables (created by
 140  *                        \c pcmk__copy_node_tables())
 141  *
 142  * \note This function frees the resources' current node tables.
 143  */
 144 void
 145 pcmk__restore_node_tables(pcmk_resource_t *rsc, GHashTable *backup)
     /* [previous][next][first][last][top][bottom][index][help] */
 146 {
 147     CRM_ASSERT((rsc != NULL) && (backup != NULL));
 148 
 149     g_hash_table_destroy(rsc->allowed_nodes);
 150 
 151     // Copy to avoid danger with multiple restores
 152     rsc->allowed_nodes = g_hash_table_lookup(backup, rsc->id);
 153     rsc->allowed_nodes = pcmk__copy_node_table(rsc->allowed_nodes);
 154 
 155     for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
 156         pcmk__restore_node_tables((pcmk_resource_t *) iter->data, backup);
 157     }
 158 }
 159 
 160 /*!
 161  * \internal
 162  * \brief Copy a list of node objects
 163  *
 164  * \param[in] list   List to copy
 165  * \param[in] reset  Set copies' scores to 0
 166  *
 167  * \return New list of shallow copies of nodes in original list
 168  */
 169 GList *
 170 pcmk__copy_node_list(const GList *list, bool reset)
     /* [previous][next][first][last][top][bottom][index][help] */
 171 {
 172     GList *result = NULL;
 173 
 174     for (const GList *iter = list; iter != NULL; iter = iter->next) {
 175         pcmk_node_t *new_node = NULL;
 176         pcmk_node_t *this_node = iter->data;
 177 
 178         new_node = pe__copy_node(this_node);
 179         if (reset) {
 180             new_node->weight = 0;
 181         }
 182         result = g_list_prepend(result, new_node);
 183     }
 184     return result;
 185 }
 186 
 187 /*!
 188  * \internal
 189  * \brief Compare two nodes for assignment preference
 190  *
 191  * Given two nodes, check which one is more preferred by assignment criteria
 192  * such as node score and utilization.
 193  *
 194  * \param[in] a     First node to compare
 195  * \param[in] b     Second node to compare
 196  * \param[in] data  Node to prefer if all else equal
 197  *
 198  * \return -1 if \p a is preferred, +1 if \p b is preferred, or 0 if they are
 199  *         equally preferred
 200  */
 201 static gint
 202 compare_nodes(gconstpointer a, gconstpointer b, gpointer data)
     /* [previous][next][first][last][top][bottom][index][help] */
 203 {
 204     const pcmk_node_t *node1 = (const pcmk_node_t *) a;
 205     const pcmk_node_t *node2 = (const pcmk_node_t *) b;
 206     const pcmk_node_t *preferred = (const pcmk_node_t *) data;
 207 
 208     int node1_score = -INFINITY;
 209     int node2_score = -INFINITY;
 210 
 211     int result = 0;
 212 
 213     if (a == NULL) {
 214         return 1;
 215     }
 216     if (b == NULL) {
 217         return -1;
 218     }
 219 
 220     // Compare node scores
 221 
 222     if (pcmk__node_available(node1, false, false)) {
 223         node1_score = node1->weight;
 224     }
 225     if (pcmk__node_available(node2, false, false)) {
 226         node2_score = node2->weight;
 227     }
 228 
 229     if (node1_score > node2_score) {
 230         crm_trace("%s before %s (score %d > %d)",
 231                   pe__node_name(node1), pe__node_name(node2),
 232                   node1_score, node2_score);
 233         return -1;
 234     }
 235 
 236     if (node1_score < node2_score) {
 237         crm_trace("%s after %s (score %d < %d)",
 238                   pe__node_name(node1), pe__node_name(node2),
 239                   node1_score, node2_score);
 240         return 1;
 241     }
 242 
 243     // If appropriate, compare node utilization
 244 
 245     if (pcmk__str_eq(node1->details->data_set->placement_strategy, "minimal",
 246                      pcmk__str_casei)) {
 247         goto equal;
 248     }
 249 
 250     if (pcmk__str_eq(node1->details->data_set->placement_strategy, "balanced",
 251                      pcmk__str_casei)) {
 252         result = pcmk__compare_node_capacities(node1, node2);
 253         if (result < 0) {
 254             crm_trace("%s before %s (greater capacity by %d attributes)",
 255                       pe__node_name(node1), pe__node_name(node2), result * -1);
 256             return -1;
 257         } else if (result > 0) {
 258             crm_trace("%s after %s (lower capacity by %d attributes)",
 259                       pe__node_name(node1), pe__node_name(node2), result);
 260             return 1;
 261         }
 262     }
 263 
 264     // Compare number of resources already assigned to node
 265 
 266     if (node1->details->num_resources < node2->details->num_resources) {
 267         crm_trace("%s before %s (%d resources < %d)",
 268                   pe__node_name(node1), pe__node_name(node2),
 269                   node1->details->num_resources, node2->details->num_resources);
 270         return -1;
 271 
 272     } else if (node1->details->num_resources > node2->details->num_resources) {
 273         crm_trace("%s after %s (%d resources > %d)",
 274                   pe__node_name(node1), pe__node_name(node2),
 275                   node1->details->num_resources, node2->details->num_resources);
 276         return 1;
 277     }
 278 
 279     // Check whether one node is already running desired resource
 280 
 281     if (preferred != NULL) {
 282         if (pe__same_node(preferred, node1)) {
 283             crm_trace("%s before %s (preferred node)",
 284                       pe__node_name(node1), pe__node_name(node2));
 285             return -1;
 286         } else if (pe__same_node(preferred, node2)) {
 287             crm_trace("%s after %s (not preferred node)",
 288                       pe__node_name(node1), pe__node_name(node2));
 289             return 1;
 290         }
 291     }
 292 
 293     // If all else is equal, prefer node with lowest-sorting name
 294 equal:
 295     result = strcmp(node1->details->uname, node2->details->uname);
 296     if (result < 0) {
 297         crm_trace("%s before %s (name)",
 298                   pe__node_name(node1), pe__node_name(node2));
 299         return -1;
 300     } else if (result > 0) {
 301         crm_trace("%s after %s (name)",
 302                   pe__node_name(node1), pe__node_name(node2));
 303         return 1;
 304     }
 305 
 306     crm_trace("%s == %s", pe__node_name(node1), pe__node_name(node2));
 307     return 0;
 308 }
 309 
 310 /*!
 311  * \internal
 312  * \brief Sort a list of nodes by assigment preference
 313  *
 314  * \param[in,out] nodes        Node list to sort
 315  * \param[in]     active_node  Node where resource being assigned is active
 316  *
 317  * \return New head of sorted list
 318  */
 319 GList *
 320 pcmk__sort_nodes(GList *nodes, pcmk_node_t *active_node)
     /* [previous][next][first][last][top][bottom][index][help] */
 321 {
 322     return g_list_sort_with_data(nodes, compare_nodes, active_node);
 323 }
 324 
 325 /*!
 326  * \internal
 327  * \brief Check whether any node is available to run resources
 328  *
 329  * \param[in] nodes  Nodes to check
 330  *
 331  * \return true if any node in \p nodes is available to run resources,
 332  *         otherwise false
 333  */
 334 bool
 335 pcmk__any_node_available(GHashTable *nodes)
     /* [previous][next][first][last][top][bottom][index][help] */
 336 {
 337     GHashTableIter iter;
 338     const pcmk_node_t *node = NULL;
 339 
 340     if (nodes == NULL) {
 341         return false;
 342     }
 343     g_hash_table_iter_init(&iter, nodes);
 344     while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
 345         if (pcmk__node_available(node, true, false)) {
 346             return true;
 347         }
 348     }
 349     return false;
 350 }
 351 
 352 /*!
 353  * \internal
 354  * \brief Apply node health values for all nodes in cluster
 355  *
 356  * \param[in,out] scheduler  Scheduler data
 357  */
 358 void
 359 pcmk__apply_node_health(pcmk_scheduler_t *scheduler)
     /* [previous][next][first][last][top][bottom][index][help] */
 360 {
 361     int base_health = 0;
 362     enum pcmk__health_strategy strategy;
 363     const char *strategy_str = pe_pref(scheduler->config_hash,
 364                                        PCMK__OPT_NODE_HEALTH_STRATEGY);
 365 
 366     strategy = pcmk__parse_health_strategy(strategy_str);
 367     if (strategy == pcmk__health_strategy_none) {
 368         return;
 369     }
 370     crm_info("Applying node health strategy '%s'", strategy_str);
 371 
 372     // The progressive strategy can use a base health score
 373     if (strategy == pcmk__health_strategy_progressive) {
 374         base_health = pe__health_score(PCMK__OPT_NODE_HEALTH_BASE, scheduler);
 375     }
 376 
 377     for (GList *iter = scheduler->nodes; iter != NULL; iter = iter->next) {
 378         pcmk_node_t *node = (pcmk_node_t *) iter->data;
 379         int health = pe__sum_node_health_scores(node, base_health);
 380 
 381         // An overall health score of 0 has no effect
 382         if (health == 0) {
 383             continue;
 384         }
 385         crm_info("Overall system health of %s is %d",
 386                  pe__node_name(node), health);
 387 
 388         // Use node health as a location score for each resource on the node
 389         for (GList *r = scheduler->resources; r != NULL; r = r->next) {
 390             pcmk_resource_t *rsc = (pcmk_resource_t *) r->data;
 391 
 392             bool constrain = true;
 393 
 394             if (health < 0) {
 395                 /* Negative health scores do not apply to resources with
 396                  * allow-unhealthy-nodes=true.
 397                  */
 398                 constrain = !crm_is_true(g_hash_table_lookup(rsc->meta,
 399                                          PCMK__META_ALLOW_UNHEALTHY_NODES));
 400             }
 401             if (constrain) {
 402                 pcmk__new_location(strategy_str, rsc, health, NULL, node);
 403             } else {
 404                 pe_rsc_trace(rsc, "%s is immune from health ban on %s",
 405                              rsc->id, pe__node_name(node));
 406             }
 407         }
 408     }
 409 }
 410 
 411 /*!
 412  * \internal
 413  * \brief Check for a node in a resource's parent's allowed nodes
 414  *
 415  * \param[in] rsc   Resource whose parent should be checked
 416  * \param[in] node  Node to check for
 417  *
 418  * \return Equivalent of \p node from \p rsc's parent's allowed nodes if any,
 419  *         otherwise NULL
 420  */
 421 pcmk_node_t *
 422 pcmk__top_allowed_node(const pcmk_resource_t *rsc, const pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 423 {
 424     GHashTable *allowed_nodes = NULL;
 425 
 426     if ((rsc == NULL) || (node == NULL)) {
 427         return NULL;
 428     } else if (rsc->parent == NULL) {
 429         allowed_nodes = rsc->allowed_nodes;
 430     } else {
 431         allowed_nodes = rsc->parent->allowed_nodes;
 432     }
 433     return g_hash_table_lookup(allowed_nodes, node->details->id);
 434 }

/* [previous][next][first][last][top][bottom][index][help] */