root/lib/pacemaker/pcmk_sched_instances.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. can_run_instance
  2. ban_unavailable_allowed_nodes
  3. new_node_table
  4. apply_parent_colocations
  5. cmp_instance_by_colocation
  6. did_fail
  7. node_is_allowed
  8. pcmk__cmp_instance_number
  9. pcmk__cmp_instance
  10. increment_parent_count
  11. assign_instance
  12. assign_instance_early
  13. reset_allowed_node_counts
  14. preferred_node
  15. pcmk__assign_instances
  16. check_instance_state
  17. pcmk__create_instance_actions
  18. get_instance_list
  19. free_instance_list
  20. pcmk__instance_matches
  21. find_compatible_instance_on_node
  22. pcmk__find_compatible_instance
  23. unassign_if_mandatory
  24. find_instance_action
  25. orig_action_name
  26. update_interleaved_actions
  27. can_interleave_actions
  28. update_noninterleaved_actions
  29. pcmk__instance_update_ordered_actions
  30. pcmk__collective_action_flags

   1 /*
   2  * Copyright 2004-2023 the Pacemaker project contributors
   3  *
   4  * The version control history for this file may have further details.
   5  *
   6  * This source code is licensed under the GNU General Public License version 2
   7  * or later (GPLv2+) WITHOUT ANY WARRANTY.
   8  */
   9 
  10 /* This file is intended for code usable with both clone instances and bundle
  11  * replica containers.
  12  */
  13 
  14 #include <crm_internal.h>
  15 #include <crm/msg_xml.h>
  16 #include <pacemaker-internal.h>
  17 #include "libpacemaker_private.h"
  18 
  19 /*!
  20  * \internal
  21  * \brief Check whether a node is allowed to run an instance
  22  *
  23  * \param[in] instance      Clone instance or bundle container to check
  24  * \param[in] node          Node to check
  25  * \param[in] max_per_node  Maximum number of instances allowed to run on a node
  26  *
  27  * \return true if \p node is allowed to run \p instance, otherwise false
  28  */
  29 static bool
  30 can_run_instance(const pcmk_resource_t *instance, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
  31                  int max_per_node)
  32 {
  33     pcmk_node_t *allowed_node = NULL;
  34 
  35     if (pcmk_is_set(instance->flags, pcmk_rsc_removed)) {
  36         pe_rsc_trace(instance, "%s cannot run on %s: orphaned",
  37                      instance->id, pe__node_name(node));
  38         return false;
  39     }
  40 
  41     if (!pcmk__node_available(node, false, false)) {
  42         pe_rsc_trace(instance,
  43                      "%s cannot run on %s: node cannot run resources",
  44                      instance->id, pe__node_name(node));
  45         return false;
  46     }
  47 
  48     allowed_node = pcmk__top_allowed_node(instance, node);
  49     if (allowed_node == NULL) {
  50         crm_warn("%s cannot run on %s: node not allowed",
  51                  instance->id, pe__node_name(node));
  52         return false;
  53     }
  54 
  55     if (allowed_node->weight < 0) {
  56         pe_rsc_trace(instance, "%s cannot run on %s: parent score is %s there",
  57                      instance->id, pe__node_name(node),
  58                      pcmk_readable_score(allowed_node->weight));
  59         return false;
  60     }
  61 
  62     if (allowed_node->count >= max_per_node) {
  63         pe_rsc_trace(instance,
  64                      "%s cannot run on %s: node already has %d instance%s",
  65                      instance->id, pe__node_name(node), max_per_node,
  66                      pcmk__plural_s(max_per_node));
  67         return false;
  68     }
  69 
  70     pe_rsc_trace(instance, "%s can run on %s (%d already running)",
  71                  instance->id, pe__node_name(node), allowed_node->count);
  72     return true;
  73 }
  74 
  75 /*!
  76  * \internal
  77  * \brief Ban a clone instance or bundle replica from unavailable allowed nodes
  78  *
  79  * \param[in,out] instance      Clone instance or bundle replica to ban
  80  * \param[in]     max_per_node  Maximum instances allowed to run on a node
  81  */
  82 static void
  83 ban_unavailable_allowed_nodes(pcmk_resource_t *instance, int max_per_node)
     /* [previous][next][first][last][top][bottom][index][help] */
  84 {
  85     if (instance->allowed_nodes != NULL) {
  86         GHashTableIter iter;
  87         pcmk_node_t *node = NULL;
  88 
  89         g_hash_table_iter_init(&iter, instance->allowed_nodes);
  90         while (g_hash_table_iter_next(&iter, NULL, (void **) &node)) {
  91             if (!can_run_instance(instance, node, max_per_node)) {
  92                 pe_rsc_trace(instance, "Banning %s from unavailable node %s",
  93                              instance->id, pe__node_name(node));
  94                 node->weight = -INFINITY;
  95                 for (GList *child_iter = instance->children;
  96                      child_iter != NULL; child_iter = child_iter->next) {
  97                     pcmk_resource_t *child = child_iter->data;
  98                     pcmk_node_t *child_node = NULL;
  99 
 100                     child_node = g_hash_table_lookup(child->allowed_nodes,
 101                                                      node->details->id);
 102                     if (child_node != NULL) {
 103                         pe_rsc_trace(instance,
 104                                      "Banning %s child %s "
 105                                      "from unavailable node %s",
 106                                      instance->id, child->id,
 107                                      pe__node_name(node));
 108                         child_node->weight = -INFINITY;
 109                     }
 110                 }
 111             }
 112         }
 113     }
 114 }
 115 
 116 /*!
 117  * \internal
 118  * \brief Create a hash table with a single node in it
 119  *
 120  * \param[in] node  Node to copy into new table
 121  *
 122  * \return Newly created hash table containing a copy of \p node
 123  * \note The caller is responsible for freeing the result with
 124  *       g_hash_table_destroy().
 125  */
 126 static GHashTable *
 127 new_node_table(pcmk_node_t *node)
     /* [previous][next][first][last][top][bottom][index][help] */
 128 {
 129     GHashTable *table = pcmk__strkey_table(NULL, free);
 130 
 131     node = pe__copy_node(node);
 132     g_hash_table_insert(table, (gpointer) node->details->id, node);
 133     return table;
 134 }
 135 
 136 /*!
 137  * \internal
 138  * \brief Apply a resource's parent's colocation scores to a node table
 139  *
 140  * \param[in]     rsc    Resource whose colocations should be applied
 141  * \param[in,out] nodes  Node table to apply colocations to
 142  */
 143 static void
 144 apply_parent_colocations(const pcmk_resource_t *rsc, GHashTable **nodes)
     /* [previous][next][first][last][top][bottom][index][help] */
 145 {
 146     GList *colocations = pcmk__this_with_colocations(rsc);
 147 
 148     for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
 149         const pcmk__colocation_t *colocation = iter->data;
 150         pcmk_resource_t *other = colocation->primary;
 151         float factor = colocation->score / (float) INFINITY;
 152 
 153         other->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes,
 154                                                colocation, factor,
 155                                                pcmk__coloc_select_default);
 156     }
 157     g_list_free(colocations);
 158     colocations = pcmk__with_this_colocations(rsc);
 159 
 160     for (const GList *iter = colocations; iter != NULL; iter = iter->next) {
 161         const pcmk__colocation_t *colocation = iter->data;
 162         pcmk_resource_t *other = colocation->dependent;
 163         float factor = colocation->score / (float) INFINITY;
 164 
 165         if (!pcmk__colocation_has_influence(colocation, rsc)) {
 166             continue;
 167         }
 168         other->cmds->add_colocated_node_scores(other, rsc, rsc->id, nodes,
 169                                                colocation, factor,
 170                                                pcmk__coloc_select_nonnegative);
 171     }
 172     g_list_free(colocations);
 173 }
 174 
 175 /*!
 176  * \internal
 177  * \brief Compare clone or bundle instances based on colocation scores
 178  *
 179  * Determine the relative order in which two clone or bundle instances should be
 180  * assigned to nodes, considering the scores of colocation constraints directly
 181  * or indirectly involving them.
 182  *
 183  * \param[in] instance1  First instance to compare
 184  * \param[in] instance2  Second instance to compare
 185  *
 186  * \return A negative number if \p instance1 should be assigned first,
 187  *         a positive number if \p instance2 should be assigned first,
 188  *         or 0 if assignment order doesn't matter
 189  */
 190 static int
 191 cmp_instance_by_colocation(const pcmk_resource_t *instance1,
     /* [previous][next][first][last][top][bottom][index][help] */
 192                            const pcmk_resource_t *instance2)
 193 {
 194     int rc = 0;
 195     pcmk_node_t *node1 = NULL;
 196     pcmk_node_t *node2 = NULL;
 197     pcmk_node_t *current_node1 = pe__current_node(instance1);
 198     pcmk_node_t *current_node2 = pe__current_node(instance2);
 199     GHashTable *colocated_scores1 = NULL;
 200     GHashTable *colocated_scores2 = NULL;
 201 
 202     CRM_ASSERT((instance1 != NULL) && (instance1->parent != NULL)
 203                && (instance2 != NULL) && (instance2->parent != NULL)
 204                && (current_node1 != NULL) && (current_node2 != NULL));
 205 
 206     // Create node tables initialized with each node
 207     colocated_scores1 = new_node_table(current_node1);
 208     colocated_scores2 = new_node_table(current_node2);
 209 
 210     // Apply parental colocations
 211     apply_parent_colocations(instance1, &colocated_scores1);
 212     apply_parent_colocations(instance2, &colocated_scores2);
 213 
 214     // Find original nodes again, with scores updated for colocations
 215     node1 = g_hash_table_lookup(colocated_scores1, current_node1->details->id);
 216     node2 = g_hash_table_lookup(colocated_scores2, current_node2->details->id);
 217 
 218     // Compare nodes by updated scores
 219     if (node1->weight < node2->weight) {
 220         crm_trace("Assign %s (%d on %s) after %s (%d on %s)",
 221                   instance1->id, node1->weight, pe__node_name(node1),
 222                   instance2->id, node2->weight, pe__node_name(node2));
 223         rc = 1;
 224 
 225     } else if (node1->weight > node2->weight) {
 226         crm_trace("Assign %s (%d on %s) before %s (%d on %s)",
 227                   instance1->id, node1->weight, pe__node_name(node1),
 228                   instance2->id, node2->weight, pe__node_name(node2));
 229         rc = -1;
 230     }
 231 
 232     g_hash_table_destroy(colocated_scores1);
 233     g_hash_table_destroy(colocated_scores2);
 234     return rc;
 235 }
 236 
 237 /*!
 238  * \internal
 239  * \brief Check whether a resource or any of its children are failed
 240  *
 241  * \param[in] rsc  Resource to check
 242  *
 243  * \return true if \p rsc or any of its children are failed, otherwise false
 244  */
 245 static bool
 246 did_fail(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 247 {
 248     if (pcmk_is_set(rsc->flags, pcmk_rsc_failed)) {
 249         return true;
 250     }
 251     for (GList *iter = rsc->children; iter != NULL; iter = iter->next) {
 252         if (did_fail((const pcmk_resource_t *) iter->data)) {
 253             return true;
 254         }
 255     }
 256     return false;
 257 }
 258 
 259 /*!
 260  * \internal
 261  * \brief Check whether a node is allowed to run a resource
 262  *
 263  * \param[in]     rsc   Resource to check
 264  * \param[in,out] node  Node to check (will be set NULL if not allowed)
 265  *
 266  * \return true if *node is either NULL or allowed for \p rsc, otherwise false
 267  */
 268 static bool
 269 node_is_allowed(const pcmk_resource_t *rsc, pcmk_node_t **node)
     /* [previous][next][first][last][top][bottom][index][help] */
 270 {
 271     if (*node != NULL) {
 272         pcmk_node_t *allowed = g_hash_table_lookup(rsc->allowed_nodes,
 273                                                    (*node)->details->id);
 274 
 275         if ((allowed == NULL) || (allowed->weight < 0)) {
 276             pe_rsc_trace(rsc, "%s: current location (%s) is unavailable",
 277                          rsc->id, pe__node_name(*node));
 278             *node = NULL;
 279             return false;
 280         }
 281     }
 282     return true;
 283 }
 284 
 285 /*!
 286  * \internal
 287  * \brief Compare two clone or bundle instances' instance numbers
 288  *
 289  * \param[in] a  First instance to compare
 290  * \param[in] b  Second instance to compare
 291  *
 292  * \return A negative number if \p a's instance number is lower,
 293  *         a positive number if \p b's instance number is lower,
 294  *         or 0 if their instance numbers are the same
 295  */
 296 gint
 297 pcmk__cmp_instance_number(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
 298 {
 299     const pcmk_resource_t *instance1 = (const pcmk_resource_t *) a;
 300     const pcmk_resource_t *instance2 = (const pcmk_resource_t *) b;
 301     char *div1 = NULL;
 302     char *div2 = NULL;
 303 
 304     CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
 305 
 306     // Clone numbers are after a colon, bundle numbers after a dash
 307     div1 = strrchr(instance1->id, ':');
 308     if (div1 == NULL) {
 309         div1 = strrchr(instance1->id, '-');
 310     }
 311     div2 = strrchr(instance2->id, ':');
 312     if (div2 == NULL) {
 313         div2 = strrchr(instance2->id, '-');
 314     }
 315     CRM_ASSERT((div1 != NULL) && (div2 != NULL));
 316 
 317     return (gint) (strtol(div1 + 1, NULL, 10) - strtol(div2 + 1, NULL, 10));
 318 }
 319 
 320 /*!
 321  * \internal
 322  * \brief Compare clone or bundle instances according to assignment order
 323  *
 324  * Compare two clone or bundle instances according to the order they should be
 325  * assigned to nodes, preferring (in order):
 326  *
 327  *  - Active instance that is less multiply active
 328  *  - Instance that is not active on a disallowed node
 329  *  - Instance with higher configured priority
 330  *  - Active instance whose current node can run resources
 331  *  - Active instance whose parent is allowed on current node
 332  *  - Active instance whose current node has fewer other instances
 333  *  - Active instance
 334  *  - Instance that isn't failed
 335  *  - Instance whose colocations result in higher score on current node
 336  *  - Instance with lower ID in lexicographic order
 337  *
 338  * \param[in] a          First instance to compare
 339  * \param[in] b          Second instance to compare
 340  *
 341  * \return A negative number if \p a should be assigned first,
 342  *         a positive number if \p b should be assigned first,
 343  *         or 0 if assignment order doesn't matter
 344  */
 345 gint
 346 pcmk__cmp_instance(gconstpointer a, gconstpointer b)
     /* [previous][next][first][last][top][bottom][index][help] */
 347 {
 348     int rc = 0;
 349     pcmk_node_t *node1 = NULL;
 350     pcmk_node_t *node2 = NULL;
 351     unsigned int nnodes1 = 0;
 352     unsigned int nnodes2 = 0;
 353 
 354     bool can1 = true;
 355     bool can2 = true;
 356 
 357     const pcmk_resource_t *instance1 = (const pcmk_resource_t *) a;
 358     const pcmk_resource_t *instance2 = (const pcmk_resource_t *) b;
 359 
 360     CRM_ASSERT((instance1 != NULL) && (instance2 != NULL));
 361 
 362     node1 = instance1->fns->active_node(instance1, &nnodes1, NULL);
 363     node2 = instance2->fns->active_node(instance2, &nnodes2, NULL);
 364 
 365     /* If both instances are running and at least one is multiply
 366      * active, prefer instance that's running on fewer nodes.
 367      */
 368     if ((nnodes1 > 0) && (nnodes2 > 0)) {
 369         if (nnodes1 < nnodes2) {
 370             crm_trace("Assign %s (active on %d) before %s (active on %d): "
 371                       "less multiply active",
 372                       instance1->id, nnodes1, instance2->id, nnodes2);
 373             return -1;
 374 
 375         } else if (nnodes1 > nnodes2) {
 376             crm_trace("Assign %s (active on %d) after %s (active on %d): "
 377                       "more multiply active",
 378                       instance1->id, nnodes1, instance2->id, nnodes2);
 379             return 1;
 380         }
 381     }
 382 
 383     /* An instance that is either inactive or active on an allowed node is
 384      * preferred over an instance that is active on a no-longer-allowed node.
 385      */
 386     can1 = node_is_allowed(instance1, &node1);
 387     can2 = node_is_allowed(instance2, &node2);
 388     if (can1 && !can2) {
 389         crm_trace("Assign %s before %s: not active on a disallowed node",
 390                   instance1->id, instance2->id);
 391         return -1;
 392 
 393     } else if (!can1 && can2) {
 394         crm_trace("Assign %s after %s: active on a disallowed node",
 395                   instance1->id, instance2->id);
 396         return 1;
 397     }
 398 
 399     // Prefer instance with higher configured priority
 400     if (instance1->priority > instance2->priority) {
 401         crm_trace("Assign %s before %s: priority (%d > %d)",
 402                   instance1->id, instance2->id,
 403                   instance1->priority, instance2->priority);
 404         return -1;
 405 
 406     } else if (instance1->priority < instance2->priority) {
 407         crm_trace("Assign %s after %s: priority (%d < %d)",
 408                   instance1->id, instance2->id,
 409                   instance1->priority, instance2->priority);
 410         return 1;
 411     }
 412 
 413     // Prefer active instance
 414     if ((node1 == NULL) && (node2 == NULL)) {
 415         crm_trace("No assignment preference for %s vs. %s: inactive",
 416                   instance1->id, instance2->id);
 417         return 0;
 418 
 419     } else if (node1 == NULL) {
 420         crm_trace("Assign %s after %s: active", instance1->id, instance2->id);
 421         return 1;
 422 
 423     } else if (node2 == NULL) {
 424         crm_trace("Assign %s before %s: active", instance1->id, instance2->id);
 425         return -1;
 426     }
 427 
 428     // Prefer instance whose current node can run resources
 429     can1 = pcmk__node_available(node1, false, false);
 430     can2 = pcmk__node_available(node2, false, false);
 431     if (can1 && !can2) {
 432         crm_trace("Assign %s before %s: current node can run resources",
 433                   instance1->id, instance2->id);
 434         return -1;
 435 
 436     } else if (!can1 && can2) {
 437         crm_trace("Assign %s after %s: current node can't run resources",
 438                   instance1->id, instance2->id);
 439         return 1;
 440     }
 441 
 442     // Prefer instance whose parent is allowed to run on instance's current node
 443     node1 = pcmk__top_allowed_node(instance1, node1);
 444     node2 = pcmk__top_allowed_node(instance2, node2);
 445     if ((node1 == NULL) && (node2 == NULL)) {
 446         crm_trace("No assignment preference for %s vs. %s: "
 447                   "parent not allowed on either instance's current node",
 448                   instance1->id, instance2->id);
 449         return 0;
 450 
 451     } else if (node1 == NULL) {
 452         crm_trace("Assign %s after %s: parent not allowed on current node",
 453                   instance1->id, instance2->id);
 454         return 1;
 455 
 456     } else if (node2 == NULL) {
 457         crm_trace("Assign %s before %s: parent allowed on current node",
 458                   instance1->id, instance2->id);
 459         return -1;
 460     }
 461 
 462     // Prefer instance whose current node is running fewer other instances
 463     if (node1->count < node2->count) {
 464         crm_trace("Assign %s before %s: fewer active instances on current node",
 465                   instance1->id, instance2->id);
 466         return -1;
 467 
 468     } else if (node1->count > node2->count) {
 469         crm_trace("Assign %s after %s: more active instances on current node",
 470                   instance1->id, instance2->id);
 471         return 1;
 472     }
 473 
 474     // Prefer instance that isn't failed
 475     can1 = did_fail(instance1);
 476     can2 = did_fail(instance2);
 477     if (!can1 && can2) {
 478         crm_trace("Assign %s before %s: not failed",
 479                   instance1->id, instance2->id);
 480         return -1;
 481     } else if (can1 && !can2) {
 482         crm_trace("Assign %s after %s: failed",
 483                   instance1->id, instance2->id);
 484         return 1;
 485     }
 486 
 487     // Prefer instance with higher cumulative colocation score on current node
 488     rc = cmp_instance_by_colocation(instance1, instance2);
 489     if (rc != 0) {
 490         return rc;
 491     }
 492 
 493     // Prefer instance with lower instance number
 494     rc = pcmk__cmp_instance_number(instance1, instance2);
 495     if (rc < 0) {
 496         crm_trace("Assign %s before %s: instance number",
 497                   instance1->id, instance2->id);
 498     } else if (rc > 0) {
 499         crm_trace("Assign %s after %s: instance number",
 500                   instance1->id, instance2->id);
 501     } else {
 502         crm_trace("No assignment preference for %s vs. %s",
 503                   instance1->id, instance2->id);
 504     }
 505     return rc;
 506 }
 507 
 508 /*!
 509  * \internal
 510  * \brief Increment the parent's instance count after assigning an instance
 511  *
 512  * An instance's parent tracks how many instances have been assigned to each
 513  * node via its pcmk_node_t:count member. After assigning an instance to a node,
 514  * find the corresponding node in the parent's allowed table and increment it.
 515  *
 516  * \param[in,out] instance     Instance whose parent to update
 517  * \param[in]     assigned_to  Node to which the instance was assigned
 518  */
 519 static void
 520 increment_parent_count(pcmk_resource_t *instance,
     /* [previous][next][first][last][top][bottom][index][help] */
 521                        const pcmk_node_t *assigned_to)
 522 {
 523     pcmk_node_t *allowed = NULL;
 524 
 525     if (assigned_to == NULL) {
 526         return;
 527     }
 528     allowed = pcmk__top_allowed_node(instance, assigned_to);
 529 
 530     if (allowed == NULL) {
 531         /* The instance is allowed on the node, but its parent isn't. This
 532          * shouldn't be possible if the resource is managed, and we won't be
 533          * able to limit the number of instances assigned to the node.
 534          */
 535         CRM_LOG_ASSERT(!pcmk_is_set(instance->flags, pcmk_rsc_managed));
 536 
 537     } else {
 538         allowed->count++;
 539     }
 540 }
 541 
 542 /*!
 543  * \internal
 544  * \brief Assign an instance to a node
 545  *
 546  * \param[in,out] instance      Clone instance or bundle replica container
 547  * \param[in]     prefer        If not NULL, attempt early assignment to this
 548  *                              node, if still the best choice; otherwise,
 549  *                              perform final assignment
 550  * \param[in]     max_per_node  Assign at most this many instances to one node
 551  *
 552  * \return Node to which \p instance is assigned
 553  */
 554 static const pcmk_node_t *
 555 assign_instance(pcmk_resource_t *instance, const pcmk_node_t *prefer,
     /* [previous][next][first][last][top][bottom][index][help] */
 556                 int max_per_node)
 557 {
 558     pcmk_node_t *chosen = NULL;
 559 
 560     pe_rsc_trace(instance, "Assigning %s (preferring %s)", instance->id,
 561                  ((prefer == NULL)? "no node" : prefer->details->uname));
 562 
 563     if (pcmk_is_set(instance->flags, pcmk_rsc_assigning)) {
 564         pe_rsc_debug(instance,
 565                      "Assignment loop detected involving %s colocations",
 566                      instance->id);
 567         return NULL;
 568     }
 569     ban_unavailable_allowed_nodes(instance, max_per_node);
 570 
 571     // Failed early assignments are reversible (stop_if_fail=false)
 572     chosen = instance->cmds->assign(instance, prefer, (prefer == NULL));
 573     increment_parent_count(instance, chosen);
 574     return chosen;
 575 }
 576 
 577 /*!
 578  * \internal
 579  * \brief Try to assign an instance to its current node early
 580  *
 581  * \param[in] rsc           Clone or bundle being assigned (for logs only)
 582  * \param[in] instance      Clone instance or bundle replica container
 583  * \param[in] current       Instance's current node
 584  * \param[in] max_per_node  Maximum number of instances per node
 585  * \param[in] available     Number of instances still available for assignment
 586  *
 587  * \return \c true if \p instance was successfully assigned to its current node,
 588  *         or \c false otherwise
 589  */
 590 static bool
 591 assign_instance_early(const pcmk_resource_t *rsc, pcmk_resource_t *instance,
     /* [previous][next][first][last][top][bottom][index][help] */
 592                       const pcmk_node_t *current, int max_per_node,
 593                       int available)
 594 {
 595     const pcmk_node_t *chosen = NULL;
 596     int reserved = 0;
 597 
 598     pcmk_resource_t *parent = instance->parent;
 599     GHashTable *allowed_orig = NULL;
 600     GHashTable *allowed_orig_parent = parent->allowed_nodes;
 601     const pcmk_node_t *allowed_node = NULL;
 602 
 603     pe_rsc_trace(instance, "Trying to assign %s to its current node %s",
 604                  instance->id, pe__node_name(current));
 605 
 606     allowed_node = g_hash_table_lookup(instance->allowed_nodes,
 607                                        current->details->id);
 608     if (!pcmk__node_available(allowed_node, true, false)) {
 609         pe_rsc_info(instance,
 610                     "Not assigning %s to current node %s: unavailable",
 611                     instance->id, pe__node_name(current));
 612         return false;
 613     }
 614 
 615     /* On each iteration, if instance gets assigned to a node other than its
 616      * current one, we reserve one instance for the chosen node, unassign
 617      * instance, restore instance's original node tables, and try again. This
 618      * way, instances are proportionally assigned to nodes based on preferences,
 619      * but shuffling of specific instances is minimized. If a node will be
 620      * assigned instances at all, it preferentially receives instances that are
 621      * currently active there.
 622      *
 623      * parent->allowed_nodes tracks the number of instances assigned to each
 624      * node. If a node already has max_per_node instances assigned,
 625      * ban_unavailable_allowed_nodes() marks it as unavailable.
 626      *
 627      * In the end, we restore the original parent->allowed_nodes to undo the
 628      * changes to counts during tentative assignments. If we successfully
 629      * assigned instance to its current node, we increment that node's counter.
 630      */
 631 
 632     // Back up the allowed node tables of instance and its children recursively
 633     pcmk__copy_node_tables(instance, &allowed_orig);
 634 
 635     // Update instances-per-node counts in a scratch table
 636     parent->allowed_nodes = pcmk__copy_node_table(parent->allowed_nodes);
 637 
 638     while (reserved < available) {
 639         chosen = assign_instance(instance, current, max_per_node);
 640 
 641         if (pe__same_node(chosen, current)) {
 642             // Successfully assigned to current node
 643             break;
 644         }
 645 
 646         // Assignment updates scores, so restore to original state
 647         pe_rsc_debug(instance, "Rolling back node scores for %s", instance->id);
 648         pcmk__restore_node_tables(instance, allowed_orig);
 649 
 650         if (chosen == NULL) {
 651             // Assignment failed, so give up
 652             pe_rsc_info(instance,
 653                         "Not assigning %s to current node %s: unavailable",
 654                         instance->id, pe__node_name(current));
 655             pe__set_resource_flags(instance, pcmk_rsc_unassigned);
 656             break;
 657         }
 658 
 659         // We prefer more strongly to assign an instance to the chosen node
 660         pe_rsc_debug(instance,
 661                      "Not assigning %s to current node %s: %s is better",
 662                      instance->id, pe__node_name(current),
 663                      pe__node_name(chosen));
 664 
 665         // Reserve one instance for the chosen node and try again
 666         if (++reserved >= available) {
 667             pe_rsc_info(instance,
 668                         "Not assigning %s to current node %s: "
 669                         "other assignments are more important",
 670                         instance->id, pe__node_name(current));
 671 
 672         } else {
 673             pe_rsc_debug(instance,
 674                          "Reserved an instance of %s for %s. Retrying "
 675                          "assignment of %s to %s",
 676                          rsc->id, pe__node_name(chosen), instance->id,
 677                          pe__node_name(current));
 678         }
 679 
 680         // Clear this assignment (frees chosen); leave instance counts in parent
 681         pcmk__unassign_resource(instance);
 682         chosen = NULL;
 683     }
 684 
 685     g_hash_table_destroy(allowed_orig);
 686 
 687     // Restore original instances-per-node counts
 688     g_hash_table_destroy(parent->allowed_nodes);
 689     parent->allowed_nodes = allowed_orig_parent;
 690 
 691     if (chosen == NULL) {
 692         // Couldn't assign instance to current node
 693         return false;
 694     }
 695     pe_rsc_trace(instance, "Assigned %s to current node %s",
 696                  instance->id, pe__node_name(current));
 697     increment_parent_count(instance, chosen);
 698     return true;
 699 }
 700 
 701 /*!
 702  * \internal
 703  * \brief Reset the node counts of a resource's allowed nodes to zero
 704  *
 705  * \param[in,out] rsc  Resource to reset
 706  *
 707  * \return Number of nodes that are available to run resources
 708  */
 709 static unsigned int
 710 reset_allowed_node_counts(pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
 711 {
 712     unsigned int available_nodes = 0;
 713     pcmk_node_t *node = NULL;
 714     GHashTableIter iter;
 715 
 716     g_hash_table_iter_init(&iter, rsc->allowed_nodes);
 717     while (g_hash_table_iter_next(&iter, NULL, (gpointer *) &node)) {
 718         node->count = 0;
 719         if (pcmk__node_available(node, false, false)) {
 720             available_nodes++;
 721         }
 722     }
 723     return available_nodes;
 724 }
 725 
 726 /*!
 727  * \internal
 728  * \brief Check whether an instance has a preferred node
 729  *
 730  * \param[in] instance          Clone instance or bundle replica container
 731  * \param[in] optimal_per_node  Optimal number of instances per node
 732  *
 733  * \return Instance's current node if still available, otherwise NULL
 734  */
 735 static const pcmk_node_t *
 736 preferred_node(const pcmk_resource_t *instance, int optimal_per_node)
     /* [previous][next][first][last][top][bottom][index][help] */
 737 {
 738     const pcmk_node_t *node = NULL;
 739     const pcmk_node_t *parent_node = NULL;
 740 
 741     // Check whether instance is active, healthy, and not yet assigned
 742     if ((instance->running_on == NULL)
 743         || !pcmk_is_set(instance->flags, pcmk_rsc_unassigned)
 744         || pcmk_is_set(instance->flags, pcmk_rsc_failed)) {
 745         return NULL;
 746     }
 747 
 748     // Check whether instance's current node can run resources
 749     node = pe__current_node(instance);
 750     if (!pcmk__node_available(node, true, false)) {
 751         pe_rsc_trace(instance, "Not assigning %s to %s early (unavailable)",
 752                      instance->id, pe__node_name(node));
 753         return NULL;
 754     }
 755 
 756     // Check whether node already has optimal number of instances assigned
 757     parent_node = pcmk__top_allowed_node(instance, node);
 758     if ((parent_node != NULL) && (parent_node->count >= optimal_per_node)) {
 759         pe_rsc_trace(instance,
 760                      "Not assigning %s to %s early "
 761                      "(optimal instances already assigned)",
 762                      instance->id, pe__node_name(node));
 763         return NULL;
 764     }
 765 
 766     return node;
 767 }
 768 
 769 /*!
 770  * \internal
 771  * \brief Assign collective instances to nodes
 772  *
 773  * \param[in,out] collective    Clone or bundle resource being assigned
 774  * \param[in,out] instances     List of clone instances or bundle containers
 775  * \param[in]     max_total     Maximum instances to assign in total
 776  * \param[in]     max_per_node  Maximum instances to assign to any one node
 777  */
 778 void
 779 pcmk__assign_instances(pcmk_resource_t *collective, GList *instances,
     /* [previous][next][first][last][top][bottom][index][help] */
 780                        int max_total, int max_per_node)
 781 {
 782     // Reuse node count to track number of assigned instances
 783     unsigned int available_nodes = reset_allowed_node_counts(collective);
 784 
 785     int optimal_per_node = 0;
 786     int assigned = 0;
 787     GList *iter = NULL;
 788     pcmk_resource_t *instance = NULL;
 789     const pcmk_node_t *current = NULL;
 790 
 791     if (available_nodes > 0) {
 792         optimal_per_node = max_total / available_nodes;
 793     }
 794     if (optimal_per_node < 1) {
 795         optimal_per_node = 1;
 796     }
 797 
 798     pe_rsc_debug(collective,
 799                  "Assigning up to %d %s instance%s to up to %u node%s "
 800                  "(at most %d per host, %d optimal)",
 801                  max_total, collective->id, pcmk__plural_s(max_total),
 802                  available_nodes, pcmk__plural_s(available_nodes),
 803                  max_per_node, optimal_per_node);
 804 
 805     // Assign as many instances as possible to their current location
 806     for (iter = instances; (iter != NULL) && (assigned < max_total);
 807          iter = iter->next) {
 808         int available = max_total - assigned;
 809 
 810         instance = iter->data;
 811         if (!pcmk_is_set(instance->flags, pcmk_rsc_unassigned)) {
 812             continue;   // Already assigned
 813         }
 814 
 815         current = preferred_node(instance, optimal_per_node);
 816         if ((current != NULL)
 817             && assign_instance_early(collective, instance, current,
 818                                      max_per_node, available)) {
 819             assigned++;
 820         }
 821     }
 822 
 823     pe_rsc_trace(collective, "Assigned %d of %d instance%s to current node",
 824                  assigned, max_total, pcmk__plural_s(max_total));
 825 
 826     for (iter = instances; iter != NULL; iter = iter->next) {
 827         instance = (pcmk_resource_t *) iter->data;
 828 
 829         if (!pcmk_is_set(instance->flags, pcmk_rsc_unassigned)) {
 830             continue; // Already assigned
 831         }
 832 
 833         if (instance->running_on != NULL) {
 834             current = pe__current_node(instance);
 835             if (pcmk__top_allowed_node(instance, current) == NULL) {
 836                 const char *unmanaged = "";
 837 
 838                 if (!pcmk_is_set(instance->flags, pcmk_rsc_managed)) {
 839                     unmanaged = "Unmanaged resource ";
 840                 }
 841                 crm_notice("%s%s is running on %s which is no longer allowed",
 842                            unmanaged, instance->id, pe__node_name(current));
 843             }
 844         }
 845 
 846         if (assigned >= max_total) {
 847             pe_rsc_debug(collective,
 848                          "Not assigning %s because maximum %d instances "
 849                          "already assigned",
 850                          instance->id, max_total);
 851             resource_location(instance, NULL, -INFINITY,
 852                               "collective_limit_reached", collective->cluster);
 853 
 854         } else if (assign_instance(instance, NULL, max_per_node) != NULL) {
 855             assigned++;
 856         }
 857     }
 858 
 859     pe_rsc_debug(collective, "Assigned %d of %d possible instance%s of %s",
 860                  assigned, max_total, pcmk__plural_s(max_total),
 861                  collective->id);
 862 }
 863 
 864 enum instance_state {
 865     instance_starting   = (1 << 0),
 866     instance_stopping   = (1 << 1),
 867 
 868     /* This indicates that some instance is restarting. It's not the same as
 869      * instance_starting|instance_stopping, which would indicate that some
 870      * instance is starting, and some instance (not necessarily the same one) is
 871      * stopping.
 872      */
 873     instance_restarting = (1 << 2),
 874 
 875     instance_active     = (1 << 3),
 876 
 877     instance_all        = instance_starting|instance_stopping
 878                           |instance_restarting|instance_active,
 879 };
 880 
 881 /*!
 882  * \internal
 883  * \brief Check whether an instance is active, starting, and/or stopping
 884  *
 885  * \param[in]     instance  Clone instance or bundle replica container
 886  * \param[in,out] state     Whether any instance is starting, stopping, etc.
 887  */
 888 static void
 889 check_instance_state(const pcmk_resource_t *instance, uint32_t *state)
     /* [previous][next][first][last][top][bottom][index][help] */
 890 {
 891     const GList *iter = NULL;
 892     uint32_t instance_state = 0; // State of just this instance
 893 
 894     // No need to check further if all conditions have already been detected
 895     if (pcmk_all_flags_set(*state, instance_all)) {
 896         return;
 897     }
 898 
 899     // If instance is a collective (a cloned group), check its children instead
 900     if (instance->variant > pcmk_rsc_variant_primitive) {
 901         for (iter = instance->children;
 902              (iter != NULL) && !pcmk_all_flags_set(*state, instance_all);
 903              iter = iter->next) {
 904             check_instance_state((const pcmk_resource_t *) iter->data, state);
 905         }
 906         return;
 907     }
 908 
 909     // If we get here, instance is a primitive
 910 
 911     if (instance->running_on != NULL) {
 912         instance_state |= instance_active;
 913     }
 914 
 915     // Check each of the instance's actions for runnable start or stop
 916     for (iter = instance->actions;
 917          (iter != NULL) && !pcmk_all_flags_set(instance_state,
 918                                                instance_starting
 919                                                |instance_stopping);
 920          iter = iter->next) {
 921 
 922         const pcmk_action_t *action = (const pcmk_action_t *) iter->data;
 923         const bool optional = pcmk_is_set(action->flags, pcmk_action_optional);
 924 
 925         if (pcmk__str_eq(PCMK_ACTION_START, action->task, pcmk__str_none)) {
 926             if (!optional
 927                 && pcmk_is_set(action->flags, pcmk_action_runnable)) {
 928 
 929                 pe_rsc_trace(instance, "Instance is starting due to %s",
 930                              action->uuid);
 931                 instance_state |= instance_starting;
 932             } else {
 933                 pe_rsc_trace(instance, "%s doesn't affect %s state (%s)",
 934                              action->uuid, instance->id,
 935                              (optional? "optional" : "unrunnable"));
 936             }
 937 
 938         } else if (pcmk__str_eq(PCMK_ACTION_STOP, action->task,
 939                                 pcmk__str_none)) {
 940             /* Only stop actions can be pseudo-actions for primitives. That
 941              * indicates that the node they are on is being fenced, so the stop
 942              * is implied rather than actually executed.
 943              */
 944             if (!optional
 945                 && pcmk_any_flags_set(action->flags, pcmk_action_pseudo
 946                                                      |pcmk_action_runnable)) {
 947                 pe_rsc_trace(instance, "Instance is stopping due to %s",
 948                              action->uuid);
 949                 instance_state |= instance_stopping;
 950             } else {
 951                 pe_rsc_trace(instance, "%s doesn't affect %s state (%s)",
 952                              action->uuid, instance->id,
 953                              (optional? "optional" : "unrunnable"));
 954             }
 955         }
 956     }
 957 
 958     if (pcmk_all_flags_set(instance_state,
 959                            instance_starting|instance_stopping)) {
 960         instance_state |= instance_restarting;
 961     }
 962     *state |= instance_state;
 963 }
 964 
 965 /*!
 966  * \internal
 967  * \brief Create actions for collective resource instances
 968  *
 969  * \param[in,out] collective    Clone or bundle resource to create actions for
 970  * \param[in,out] instances     List of clone instances or bundle containers
 971  */
 972 void
 973 pcmk__create_instance_actions(pcmk_resource_t *collective, GList *instances)
     /* [previous][next][first][last][top][bottom][index][help] */
 974 {
 975     uint32_t state = 0;
 976 
 977     pcmk_action_t *stop = NULL;
 978     pcmk_action_t *stopped = NULL;
 979 
 980     pcmk_action_t *start = NULL;
 981     pcmk_action_t *started = NULL;
 982 
 983     pe_rsc_trace(collective, "Creating collective instance actions for %s",
 984                  collective->id);
 985 
 986     // Create actions for each instance appropriate to its variant
 987     for (GList *iter = instances; iter != NULL; iter = iter->next) {
 988         pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
 989 
 990         instance->cmds->create_actions(instance);
 991         check_instance_state(instance, &state);
 992     }
 993 
 994     // Create pseudo-actions for rsc start and started
 995     start = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_START,
 996                                       !pcmk_is_set(state, instance_starting),
 997                                       true);
 998     started = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_RUNNING,
 999                                         !pcmk_is_set(state, instance_starting),
1000                                         false);
1001     started->priority = INFINITY;
1002     if (pcmk_any_flags_set(state, instance_active|instance_starting)) {
1003         pe__set_action_flags(started, pcmk_action_runnable);
1004     }
1005 
1006     // Create pseudo-actions for rsc stop and stopped
1007     stop = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOP,
1008                                      !pcmk_is_set(state, instance_stopping),
1009                                      true);
1010     stopped = pe__new_rsc_pseudo_action(collective, PCMK_ACTION_STOPPED,
1011                                         !pcmk_is_set(state, instance_stopping),
1012                                         true);
1013     stopped->priority = INFINITY;
1014     if (!pcmk_is_set(state, instance_restarting)) {
1015         pe__set_action_flags(stop, pcmk_action_migratable);
1016     }
1017 
1018     if (collective->variant == pcmk_rsc_variant_clone) {
1019         pe__create_clone_notif_pseudo_ops(collective, start, started, stop,
1020                                           stopped);
1021     }
1022 }
1023 
1024 /*!
1025  * \internal
1026  * \brief Get a list of clone instances or bundle replica containers
1027  *
1028  * \param[in] rsc  Clone or bundle resource
1029  *
1030  * \return Clone instances if \p rsc is a clone, or a newly created list of
1031  *         \p rsc's replica containers if \p rsc is a bundle
1032  * \note The caller must call free_instance_list() on the result when the list
1033  *       is no longer needed.
1034  */
1035 static inline GList *
1036 get_instance_list(const pcmk_resource_t *rsc)
     /* [previous][next][first][last][top][bottom][index][help] */
1037 {
1038     if (rsc->variant == pcmk_rsc_variant_bundle) {
1039         return pe__bundle_containers(rsc);
1040     } else {
1041         return rsc->children;
1042     }
1043 }
1044 
1045 /*!
1046  * \internal
1047  * \brief Free any memory created by get_instance_list()
1048  *
1049  * \param[in]     rsc   Clone or bundle resource passed to get_instance_list()
1050  * \param[in,out] list  Return value of get_instance_list() for \p rsc
1051  */
1052 static inline void
1053 free_instance_list(const pcmk_resource_t *rsc, GList *list)
     /* [previous][next][first][last][top][bottom][index][help] */
1054 {
1055     if (list != rsc->children) {
1056         g_list_free(list);
1057     }
1058 }
1059 
1060 /*!
1061  * \internal
1062  * \brief Check whether an instance is compatible with a role and node
1063  *
1064  * \param[in] instance  Clone instance or bundle replica container
1065  * \param[in] node      Instance must match this node
1066  * \param[in] role      If not pcmk_role_unknown, instance must match this role
1067  * \param[in] current   If true, compare instance's original node and role,
1068  *                      otherwise compare assigned next node and role
1069  *
1070  * \return true if \p instance is compatible with \p node and \p role,
1071  *         otherwise false
1072  */
1073 bool
1074 pcmk__instance_matches(const pcmk_resource_t *instance, const pcmk_node_t *node,
     /* [previous][next][first][last][top][bottom][index][help] */
1075                        enum rsc_role_e role, bool current)
1076 {
1077     pcmk_node_t *instance_node = NULL;
1078 
1079     CRM_CHECK((instance != NULL) && (node != NULL), return false);
1080 
1081     if ((role != pcmk_role_unknown)
1082         && (role != instance->fns->state(instance, current))) {
1083         pe_rsc_trace(instance,
1084                      "%s is not a compatible instance (role is not %s)",
1085                      instance->id, role2text(role));
1086         return false;
1087     }
1088 
1089     if (!is_set_recursive(instance, pcmk_rsc_blocked, true)) {
1090         // We only want instances that haven't failed
1091         instance_node = instance->fns->location(instance, NULL, current);
1092     }
1093 
1094     if (instance_node == NULL) {
1095         pe_rsc_trace(instance,
1096                      "%s is not a compatible instance (not assigned to a node)",
1097                      instance->id);
1098         return false;
1099     }
1100 
1101     if (!pe__same_node(instance_node, node)) {
1102         pe_rsc_trace(instance,
1103                      "%s is not a compatible instance (assigned to %s not %s)",
1104                      instance->id, pe__node_name(instance_node),
1105                      pe__node_name(node));
1106         return false;
1107     }
1108 
1109     return true;
1110 }
1111 
1112 /*!
1113  * \internal
1114  * \brief Find an instance that matches a given resource by node and role
1115  *
1116  * \param[in] match_rsc  Resource that instance must match (for logging only)
1117  * \param[in] rsc        Clone or bundle resource to check for matching instance
1118  * \param[in] node       Instance must match this node
1119  * \param[in] role       If not pcmk_role_unknown, instance must match this role
1120  * \param[in] current    If true, compare instance's original node and role,
1121  *                       otherwise compare assigned next node and role
1122  *
1123  * \return \p rsc instance matching \p node and \p role if any, otherwise NULL
1124  */
1125 static pcmk_resource_t *
1126 find_compatible_instance_on_node(const pcmk_resource_t *match_rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1127                                  const pcmk_resource_t *rsc,
1128                                  const pcmk_node_t *node, enum rsc_role_e role,
1129                                  bool current)
1130 {
1131     GList *instances = NULL;
1132 
1133     instances = get_instance_list(rsc);
1134     for (GList *iter = instances; iter != NULL; iter = iter->next) {
1135         pcmk_resource_t *instance = (pcmk_resource_t *) iter->data;
1136 
1137         if (pcmk__instance_matches(instance, node, role, current)) {
1138             pe_rsc_trace(match_rsc,
1139                          "Found %s %s instance %s compatible with %s on %s",
1140                          role == pcmk_role_unknown? "matching" : role2text(role),
1141                          rsc->id, instance->id, match_rsc->id,
1142                          pe__node_name(node));
1143             free_instance_list(rsc, instances); // Only frees list, not contents
1144             return instance;
1145         }
1146     }
1147     free_instance_list(rsc, instances);
1148 
1149     pe_rsc_trace(match_rsc, "No %s %s instance found compatible with %s on %s",
1150                  ((role == pcmk_role_unknown)? "matching" : role2text(role)),
1151                  rsc->id, match_rsc->id, pe__node_name(node));
1152     return NULL;
1153 }
1154 
1155 /*!
1156  * \internal
1157  * \brief Find a clone instance or bundle container compatible with a resource
1158  *
1159  * \param[in] match_rsc  Resource that instance must match
1160  * \param[in] rsc        Clone or bundle resource to check for matching instance
1161  * \param[in] role       If not pcmk_role_unknown, instance must match this role
1162  * \param[in] current    If true, compare instance's original node and role,
1163  *                       otherwise compare assigned next node and role
1164  *
1165  * \return Compatible (by \p role and \p match_rsc location) instance of \p rsc
1166  *         if any, otherwise NULL
1167  */
1168 pcmk_resource_t *
1169 pcmk__find_compatible_instance(const pcmk_resource_t *match_rsc,
     /* [previous][next][first][last][top][bottom][index][help] */
1170                                const pcmk_resource_t *rsc, enum rsc_role_e role,
1171                                bool current)
1172 {
1173     pcmk_resource_t *instance = NULL;
1174     GList *nodes = NULL;
1175     const pcmk_node_t *node = NULL;
1176 
1177     // If match_rsc has a node, check only that node
1178     node = match_rsc->fns->location(match_rsc, NULL, current);
1179     if (node != NULL) {
1180         return find_compatible_instance_on_node(match_rsc, rsc, node, role,
1181                                                 current);
1182     }
1183 
1184     // Otherwise check for an instance matching any of match_rsc's allowed nodes
1185     nodes = pcmk__sort_nodes(g_hash_table_get_values(match_rsc->allowed_nodes),
1186                              NULL);
1187     for (GList *iter = nodes; (iter != NULL) && (instance == NULL);
1188          iter = iter->next) {
1189         instance = find_compatible_instance_on_node(match_rsc, rsc,
1190                                                     (pcmk_node_t *) iter->data,
1191                                                     role, current);
1192     }
1193 
1194     if (instance == NULL) {
1195         pe_rsc_debug(rsc, "No %s instance found compatible with %s",
1196                      rsc->id, match_rsc->id);
1197     }
1198     g_list_free(nodes);
1199     return instance;
1200 }
1201 
1202 /*!
1203  * \internal
1204  * \brief Unassign an instance if mandatory ordering has no interleave match
1205  *
1206  * \param[in]     first          'First' action in an ordering
1207  * \param[in]     then           'Then' action in an ordering
1208  * \param[in,out] then_instance  'Then' instance that has no interleave match
1209  * \param[in]     type           Group of enum pcmk__action_relation_flags
1210  * \param[in]     current        If true, "then" action is stopped or demoted
1211  *
1212  * \return true if \p then_instance was unassigned, otherwise false
1213  */
1214 static bool
1215 unassign_if_mandatory(const pcmk_action_t *first, const pcmk_action_t *then,
     /* [previous][next][first][last][top][bottom][index][help] */
1216                       pcmk_resource_t *then_instance, uint32_t type,
1217                       bool current)
1218 {
1219     // Allow "then" instance to go down even without an interleave match
1220     if (current) {
1221         pe_rsc_trace(then->rsc,
1222                      "%s has no instance to order before stopping "
1223                      "or demoting %s",
1224                      first->rsc->id, then_instance->id);
1225 
1226     /* If the "first" action must be runnable, but there is no "first"
1227      * instance, the "then" instance must not be allowed to come up.
1228      */
1229     } else if (pcmk_any_flags_set(type, pcmk__ar_unrunnable_first_blocks
1230                                         |pcmk__ar_first_implies_then)) {
1231         pe_rsc_info(then->rsc,
1232                     "Inhibiting %s from being active "
1233                     "because there is no %s instance to interleave",
1234                     then_instance->id, first->rsc->id);
1235         return pcmk__assign_resource(then_instance, NULL, true, true);
1236     }
1237     return false;
1238 }
1239 
1240 /*!
1241  * \internal
1242  * \brief Find first matching action for a clone instance or bundle container
1243  *
1244  * \param[in] action       Action in an interleaved ordering
1245  * \param[in] instance     Clone instance or bundle container being interleaved
1246  * \param[in] action_name  Action to look for
1247  * \param[in] node         If not NULL, require action to be on this node
1248  * \param[in] for_first    If true, \p instance is the 'first' resource in the
1249  *                         ordering, otherwise it is the 'then' resource
1250  *
1251  * \return First action for \p instance (or in some cases if \p instance is a
1252  *         bundle container, its containerized resource) that matches
1253  *         \p action_name and \p node if any, otherwise NULL
1254  */
1255 static pcmk_action_t *
1256 find_instance_action(const pcmk_action_t *action, const pcmk_resource_t *instance,
     /* [previous][next][first][last][top][bottom][index][help] */
1257                      const char *action_name, const pcmk_node_t *node,
1258                      bool for_first)
1259 {
1260     const pcmk_resource_t *rsc = NULL;
1261     pcmk_action_t *matching_action = NULL;
1262 
1263     /* If instance is a bundle container, sometimes we should interleave the
1264      * action for the container itself, and sometimes for the containerized
1265      * resource.
1266      *
1267      * For example, given "start bundle A then bundle B", B likely requires the
1268      * service inside A's container to be active, rather than just the
1269      * container, so we should interleave the action for A's containerized
1270      * resource. On the other hand, it's possible B's container itself requires
1271      * something from A, so we should interleave the action for B's container.
1272      *
1273      * Essentially, for 'first', we should use the containerized resource for
1274      * everything except stop, and for 'then', we should use the container for
1275      * everything except promote and demote (which can only be performed on the
1276      * containerized resource).
1277      */
1278     if ((for_first && !pcmk__str_any_of(action->task, PCMK_ACTION_STOP,
1279                                         PCMK_ACTION_STOPPED, NULL))
1280 
1281         || (!for_first && pcmk__str_any_of(action->task, PCMK_ACTION_PROMOTE,
1282                                            PCMK_ACTION_PROMOTED,
1283                                            PCMK_ACTION_DEMOTE,
1284                                            PCMK_ACTION_DEMOTED, NULL))) {
1285 
1286         rsc = pe__get_rsc_in_container(instance);
1287     }
1288     if (rsc == NULL) {
1289         rsc = instance; // No containerized resource, use instance itself
1290     } else {
1291         node = NULL; // Containerized actions are on bundle-created guest
1292     }
1293 
1294     matching_action = find_first_action(rsc->actions, NULL, action_name, node);
1295     if (matching_action != NULL) {
1296         return matching_action;
1297     }
1298 
1299     if (pcmk_is_set(instance->flags, pcmk_rsc_removed)
1300         || pcmk__str_any_of(action_name, PCMK_ACTION_STOP, PCMK_ACTION_DEMOTE,
1301                             NULL)) {
1302         crm_trace("No %s action found for %s%s",
1303                   action_name,
1304                   pcmk_is_set(instance->flags, pcmk_rsc_removed)? "orphan " : "",
1305                   instance->id);
1306     } else {
1307         crm_err("No %s action found for %s to interleave (bug?)",
1308                 action_name, instance->id);
1309     }
1310     return NULL;
1311 }
1312 
1313 /*!
1314  * \internal
1315  * \brief Get the original action name of a bundle or clone action
1316  *
1317  * Given an action for a bundle or clone, get the original action name,
1318  * mapping notify to the action being notified, and if the instances are
1319  * primitives, mapping completion actions to the action that was completed
1320  * (for example, stopped to stop).
1321  *
1322  * \param[in] action  Clone or bundle action to check
1323  *
1324  * \return Original action name for \p action
1325  */
1326 static const char *
1327 orig_action_name(const pcmk_action_t *action)
     /* [previous][next][first][last][top][bottom][index][help] */
1328 {
1329     // Any instance will do
1330     const pcmk_resource_t *instance = action->rsc->children->data;
1331 
1332     char *action_type = NULL;
1333     const char *action_name = action->task;
1334     enum action_tasks orig_task = pcmk_action_unspecified;
1335 
1336     if (pcmk__strcase_any_of(action->task, PCMK_ACTION_NOTIFY,
1337                              PCMK_ACTION_NOTIFIED, NULL)) {
1338         // action->uuid is RSC_(confirmed-){pre,post}_notify_ACTION_INTERVAL
1339         CRM_CHECK(parse_op_key(action->uuid, NULL, &action_type, NULL),
1340                   return task2text(pcmk_action_unspecified));
1341         action_name = strstr(action_type, "_notify_");
1342         CRM_CHECK(action_name != NULL,
1343                   return task2text(pcmk_action_unspecified));
1344         action_name += strlen("_notify_");
1345     }
1346     orig_task = get_complex_task(instance, action_name);
1347     free(action_type);
1348     return task2text(orig_task);
1349 }
1350 
1351 /*!
1352  * \internal
1353  * \brief Update two interleaved actions according to an ordering between them
1354  *
1355  * Given information about an ordering of two interleaved actions, update the
1356  * actions' flags (and runnable_before members if appropriate) as appropriate
1357  * for the ordering. Effects may cascade to other orderings involving the
1358  * actions as well.
1359  *
1360  * \param[in,out] first     'First' action in an ordering
1361  * \param[in,out] then      'Then' action in an ordering
1362  * \param[in]     node      If not NULL, limit scope of ordering to this node
1363  * \param[in]     filter    Action flags to limit scope of certain updates (may
1364  *                          include pcmk_action_optional to affect only
1365  *                          mandatory actions, and pcmk_action_runnable to
1366  *                          affect only runnable actions)
1367  * \param[in]     type      Group of enum pcmk__action_relation_flags to apply
1368  *
1369  * \return Group of enum pcmk__updated flags indicating what was updated
1370  */
1371 static uint32_t
1372 update_interleaved_actions(pcmk_action_t *first, pcmk_action_t *then,
     /* [previous][next][first][last][top][bottom][index][help] */
1373                            const pcmk_node_t *node, uint32_t filter,
1374                            uint32_t type)
1375 {
1376     GList *instances = NULL;
1377     uint32_t changed = pcmk__updated_none;
1378     const char *orig_first_task = orig_action_name(first);
1379 
1380     // Stops and demotes must be interleaved with instance on current node
1381     bool current = pcmk__ends_with(first->uuid, "_" PCMK_ACTION_STOPPED "_0")
1382                    || pcmk__ends_with(first->uuid,
1383                                       "_" PCMK_ACTION_DEMOTED "_0");
1384 
1385     // Update the specified actions for each "then" instance individually
1386     instances = get_instance_list(then->rsc);
1387     for (GList *iter = instances; iter != NULL; iter = iter->next) {
1388         pcmk_resource_t *first_instance = NULL;
1389         pcmk_resource_t *then_instance = iter->data;
1390 
1391         pcmk_action_t *first_action = NULL;
1392         pcmk_action_t *then_action = NULL;
1393 
1394         // Find a "first" instance to interleave with this "then" instance
1395         first_instance = pcmk__find_compatible_instance(then_instance,
1396                                                         first->rsc,
1397                                                         pcmk_role_unknown,
1398                                                         current);
1399 
1400         if (first_instance == NULL) { // No instance can be interleaved
1401             if (unassign_if_mandatory(first, then, then_instance, type,
1402                                       current)) {
1403                 pcmk__set_updated_flags(changed, first, pcmk__updated_then);
1404             }
1405             continue;
1406         }
1407 
1408         first_action = find_instance_action(first, first_instance,
1409                                             orig_first_task, node, true);
1410         if (first_action == NULL) {
1411             continue;
1412         }
1413 
1414         then_action = find_instance_action(then, then_instance, then->task,
1415                                            node, false);
1416         if (then_action == NULL) {
1417             continue;
1418         }
1419 
1420         if (order_actions(first_action, then_action, type)) {
1421             pcmk__set_updated_flags(changed, first,
1422                                     pcmk__updated_first|pcmk__updated_then);
1423         }
1424 
1425         changed |= then_instance->cmds->update_ordered_actions(
1426             first_action, then_action, node,
1427             first_instance->cmds->action_flags(first_action, node), filter,
1428             type, then->rsc->cluster);
1429     }
1430     free_instance_list(then->rsc, instances);
1431     return changed;
1432 }
1433 
1434 /*!
1435  * \internal
1436  * \brief Check whether two actions in an ordering can be interleaved
1437  *
1438  * \param[in] first  'First' action in the ordering
1439  * \param[in] then   'Then' action in the ordering
1440  *
1441  * \return true if \p first and \p then can be interleaved, otherwise false
1442  */
1443 static bool
1444 can_interleave_actions(const pcmk_action_t *first, const pcmk_action_t *then)
     /* [previous][next][first][last][top][bottom][index][help] */
1445 {
1446     bool interleave = false;
1447     pcmk_resource_t *rsc = NULL;
1448 
1449     if ((first->rsc == NULL) || (then->rsc == NULL)) {
1450         crm_trace("Not interleaving %s with %s: not resource actions",
1451                   first->uuid, then->uuid);
1452         return false;
1453     }
1454 
1455     if (first->rsc == then->rsc) {
1456         crm_trace("Not interleaving %s with %s: same resource",
1457                   first->uuid, then->uuid);
1458         return false;
1459     }
1460 
1461     if ((first->rsc->variant < pcmk_rsc_variant_clone)
1462         || (then->rsc->variant < pcmk_rsc_variant_clone)) {
1463         crm_trace("Not interleaving %s with %s: not clones or bundles",
1464                   first->uuid, then->uuid);
1465         return false;
1466     }
1467 
1468     if (pcmk__ends_with(then->uuid, "_stop_0")
1469         || pcmk__ends_with(then->uuid, "_demote_0")) {
1470         rsc = first->rsc;
1471     } else {
1472         rsc = then->rsc;
1473     }
1474 
1475     interleave = crm_is_true(g_hash_table_lookup(rsc->meta,
1476                                                  XML_RSC_ATTR_INTERLEAVE));
1477     pe_rsc_trace(rsc, "'%s then %s' will %sbe interleaved (based on %s)",
1478                  first->uuid, then->uuid, (interleave? "" : "not "), rsc->id);
1479     return interleave;
1480 }
1481 
1482 /*!
1483  * \internal
1484  * \brief Update non-interleaved instance actions according to an ordering
1485  *
1486  * Given information about an ordering of two non-interleaved actions, update
1487  * the actions' flags (and runnable_before members if appropriate) as
1488  * appropriate for the ordering. Effects may cascade to other orderings
1489  * involving the actions as well.
1490  *
1491  * \param[in,out] instance  Clone instance or bundle container
1492  * \param[in,out] first     "First" action in ordering
1493  * \param[in]     then      "Then" action in ordering (for \p instance's parent)
1494  * \param[in]     node      If not NULL, limit scope of ordering to this node
1495  * \param[in]     flags     Action flags for \p first for ordering purposes
1496  * \param[in]     filter    Action flags to limit scope of certain updates (may
1497  *                          include pcmk_action_optional to affect only
1498  *                          mandatory actions, and pcmk_action_runnable to
1499  *                          affect only runnable actions)
1500  * \param[in]     type      Group of enum pcmk__action_relation_flags to apply
1501  *
1502  * \return Group of enum pcmk__updated flags indicating what was updated
1503  */
1504 static uint32_t
1505 update_noninterleaved_actions(pcmk_resource_t *instance, pcmk_action_t *first,
     /* [previous][next][first][last][top][bottom][index][help] */
1506                               const pcmk_action_t *then, const pcmk_node_t *node,
1507                               uint32_t flags, uint32_t filter, uint32_t type)
1508 {
1509     pcmk_action_t *instance_action = NULL;
1510     uint32_t instance_flags = 0;
1511     uint32_t changed = pcmk__updated_none;
1512 
1513     // Check whether instance has an equivalent of "then" action
1514     instance_action = find_first_action(instance->actions, NULL, then->task,
1515                                         node);
1516     if (instance_action == NULL) {
1517         return changed;
1518     }
1519 
1520     // Check whether action is runnable
1521     instance_flags = instance->cmds->action_flags(instance_action, node);
1522     if (!pcmk_is_set(instance_flags, pcmk_action_runnable)) {
1523         return changed;
1524     }
1525 
1526     // If so, update actions for the instance
1527     changed = instance->cmds->update_ordered_actions(first, instance_action,
1528                                                      node, flags, filter, type,
1529                                                      instance->cluster);
1530 
1531     // Propagate any changes to later actions
1532     if (pcmk_is_set(changed, pcmk__updated_then)) {
1533         for (GList *after_iter = instance_action->actions_after;
1534              after_iter != NULL; after_iter = after_iter->next) {
1535             pcmk__related_action_t *after = after_iter->data;
1536 
1537             pcmk__update_action_for_orderings(after->action, instance->cluster);
1538         }
1539     }
1540 
1541     return changed;
1542 }
1543 
1544 /*!
1545  * \internal
1546  * \brief Update two actions according to an ordering between them
1547  *
1548  * Given information about an ordering of two clone or bundle actions, update
1549  * the actions' flags (and runnable_before members if appropriate) as
1550  * appropriate for the ordering. Effects may cascade to other orderings
1551  * involving the actions as well.
1552  *
1553  * \param[in,out] first      'First' action in an ordering
1554  * \param[in,out] then       'Then' action in an ordering
1555  * \param[in]     node       If not NULL, limit scope of ordering to this node
1556  *                           (only used when interleaving instances)
1557  * \param[in]     flags      Action flags for \p first for ordering purposes
1558  * \param[in]     filter     Action flags to limit scope of certain updates (may
1559  *                           include pcmk_action_optional to affect only
1560  *                           mandatory actions, and pcmk_action_runnable to
1561  *                           affect only runnable actions)
1562  * \param[in]     type       Group of enum pcmk__action_relation_flags to apply
1563  * \param[in,out] scheduler  Scheduler data
1564  *
1565  * \return Group of enum pcmk__updated flags indicating what was updated
1566  */
1567 uint32_t
1568 pcmk__instance_update_ordered_actions(pcmk_action_t *first, pcmk_action_t *then,
     /* [previous][next][first][last][top][bottom][index][help] */
1569                                       const pcmk_node_t *node, uint32_t flags,
1570                                       uint32_t filter, uint32_t type,
1571                                       pcmk_scheduler_t *scheduler)
1572 {
1573     CRM_ASSERT((first != NULL) && (then != NULL) && (scheduler != NULL));
1574 
1575     if (then->rsc == NULL) {
1576         return pcmk__updated_none;
1577 
1578     } else if (can_interleave_actions(first, then)) {
1579         return update_interleaved_actions(first, then, node, filter, type);
1580 
1581     } else {
1582         uint32_t changed = pcmk__updated_none;
1583         GList *instances = get_instance_list(then->rsc);
1584 
1585         // Update actions for the clone or bundle resource itself
1586         changed |= pcmk__update_ordered_actions(first, then, node, flags,
1587                                                 filter, type, scheduler);
1588 
1589         // Update the 'then' clone instances or bundle containers individually
1590         for (GList *iter = instances; iter != NULL; iter = iter->next) {
1591             pcmk_resource_t *instance = iter->data;
1592 
1593             changed |= update_noninterleaved_actions(instance, first, then,
1594                                                      node, flags, filter, type);
1595         }
1596         free_instance_list(then->rsc, instances);
1597         return changed;
1598     }
1599 }
1600 
1601 #define pe__clear_action_summary_flags(flags, action, flag) do {        \
1602         flags = pcmk__clear_flags_as(__func__, __LINE__, LOG_TRACE,     \
1603                                      "Action summary", action->rsc->id, \
1604                                      flags, flag, #flag);               \
1605     } while (0)
1606 
1607 /*!
1608  * \internal
1609  * \brief Return action flags for a given clone or bundle action
1610  *
1611  * \param[in,out] action     Action for a clone or bundle
1612  * \param[in]     instances  Clone instances or bundle containers
1613  * \param[in]     node       If not NULL, limit effects to this node
1614  *
1615  * \return Flags appropriate to \p action on \p node
1616  */
1617 uint32_t
1618 pcmk__collective_action_flags(pcmk_action_t *action, const GList *instances,
     /* [previous][next][first][last][top][bottom][index][help] */
1619                               const pcmk_node_t *node)
1620 {
1621     bool any_runnable = false;
1622     const char *action_name = orig_action_name(action);
1623 
1624     // Set original assumptions (optional and runnable may be cleared below)
1625     uint32_t flags = pcmk_action_optional
1626                      |pcmk_action_runnable
1627                      |pcmk_action_pseudo;
1628 
1629     for (const GList *iter = instances; iter != NULL; iter = iter->next) {
1630         const pcmk_resource_t *instance = iter->data;
1631         const pcmk_node_t *instance_node = NULL;
1632         pcmk_action_t *instance_action = NULL;
1633         uint32_t instance_flags;
1634 
1635         // Node is relevant only to primitive instances
1636         if (instance->variant == pcmk_rsc_variant_primitive) {
1637             instance_node = node;
1638         }
1639 
1640         instance_action = find_first_action(instance->actions, NULL,
1641                                             action_name, instance_node);
1642         if (instance_action == NULL) {
1643             pe_rsc_trace(action->rsc, "%s has no %s action on %s",
1644                          instance->id, action_name, pe__node_name(node));
1645             continue;
1646         }
1647 
1648         pe_rsc_trace(action->rsc, "%s has %s for %s on %s",
1649                      instance->id, instance_action->uuid, action_name,
1650                      pe__node_name(node));
1651 
1652         instance_flags = instance->cmds->action_flags(instance_action, node);
1653 
1654         // If any instance action is mandatory, so is the collective action
1655         if (pcmk_is_set(flags, pcmk_action_optional)
1656             && !pcmk_is_set(instance_flags, pcmk_action_optional)) {
1657             pe_rsc_trace(instance, "%s is mandatory because %s is",
1658                          action->uuid, instance_action->uuid);
1659             pe__clear_action_summary_flags(flags, action,
1660                                            pcmk_action_optional);
1661             pe__clear_action_flags(action, pcmk_action_optional);
1662         }
1663 
1664         // If any instance action is runnable, so is the collective action
1665         if (pcmk_is_set(instance_flags, pcmk_action_runnable)) {
1666             any_runnable = true;
1667         }
1668     }
1669 
1670     if (!any_runnable) {
1671         pe_rsc_trace(action->rsc,
1672                      "%s is not runnable because no instance can run %s",
1673                      action->uuid, action_name);
1674         pe__clear_action_summary_flags(flags, action, pcmk_action_runnable);
1675         if (node == NULL) {
1676             pe__clear_action_flags(action, pcmk_action_runnable);
1677         }
1678     }
1679 
1680     return flags;
1681 }

/* [previous][next][first][last][top][bottom][index][help] */