pacemaker  2.0.4-2deceaa3ae
Scalable High-Availability cluster resource manager
unpack.c
Go to the documentation of this file.
1 /*
2  * Copyright 2004-2020 the Pacemaker project contributors
3  *
4  * The version control history for this file may have further details.
5  *
6  * This source code is licensed under the GNU Lesser General Public License
7  * version 2.1 or later (LGPLv2.1+) WITHOUT ANY WARRANTY.
8  */
9 
10 #include <crm_internal.h>
11 
12 #include <stdio.h>
13 #include <string.h>
14 #include <glib.h>
15 #include <time.h>
16 
17 #include <crm/crm.h>
18 #include <crm/services.h>
19 #include <crm/msg_xml.h>
20 #include <crm/common/xml.h>
21 
22 #include <crm/common/util.h>
23 #include <crm/pengine/rules.h>
24 #include <crm/pengine/internal.h>
26 #include <pe_status_private.h>
27 
29 
30 #define set_config_flag(data_set, option, flag) do { \
31  const char *tmp = pe_pref(data_set->config_hash, option); \
32  if(tmp) { \
33  if(crm_is_true(tmp)) { \
34  set_bit(data_set->flags, flag); \
35  } else { \
36  clear_bit(data_set->flags, flag); \
37  } \
38  } \
39  } while(0)
40 
41 static void unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
42  xmlNode **last_failure,
43  enum action_fail_response *failed,
44  pe_working_set_t *data_set);
45 static void determine_remote_online_status(pe_working_set_t *data_set,
46  pe_node_t *this_node);
47 static void add_node_attrs(xmlNode *attrs, pe_node_t *node, bool overwrite,
48  pe_working_set_t *data_set);
49 static void determine_online_status(xmlNode *node_state, pe_node_t *this_node,
50  pe_working_set_t *data_set);
51 
52 static void unpack_lrm_resources(pe_node_t *node, xmlNode *lrm_state,
53  pe_working_set_t *data_set);
54 
55 
56 // Bitmask for warnings we only want to print once
57 uint32_t pe_wo = 0;
58 
59 static gboolean
60 is_dangling_guest_node(pe_node_t *node)
61 {
62  /* we are looking for a remote-node that was supposed to be mapped to a
63  * container resource, but all traces of that container have disappeared
64  * from both the config and the status section. */
65  if (pe__is_guest_or_remote_node(node) &&
66  node->details->remote_rsc &&
67  node->details->remote_rsc->container == NULL &&
69  return TRUE;
70  }
71 
72  return FALSE;
73 }
74 
75 
84 void
86  const char *reason, bool priority_delay)
87 {
88  CRM_CHECK(node, return);
89 
90  /* A guest node is fenced by marking its container as failed */
91  if (pe__is_guest_node(node)) {
93 
94  if (is_set(rsc->flags, pe_rsc_failed) == FALSE) {
95  if (!is_set(rsc->flags, pe_rsc_managed)) {
96  crm_notice("Not fencing guest node %s "
97  "(otherwise would because %s): "
98  "its guest resource %s is unmanaged",
99  node->details->uname, reason, rsc->id);
100  } else {
101  crm_warn("Guest node %s will be fenced "
102  "(by recovering its guest resource %s): %s",
103  node->details->uname, rsc->id, reason);
104 
105  /* We don't mark the node as unclean because that would prevent the
106  * node from running resources. We want to allow it to run resources
107  * in this transition if the recovery succeeds.
108  */
109  node->details->remote_requires_reset = TRUE;
110  set_bit(rsc->flags, pe_rsc_failed);
111  set_bit(rsc->flags, pe_rsc_stop);
112  }
113  }
114 
115  } else if (is_dangling_guest_node(node)) {
116  crm_info("Cleaning up dangling connection for guest node %s: "
117  "fencing was already done because %s, "
118  "and guest resource no longer exists",
119  node->details->uname, reason);
122 
123  } else if (pe__is_remote_node(node)) {
124  pe_resource_t *rsc = node->details->remote_rsc;
125 
126  if (rsc && (!is_set(rsc->flags, pe_rsc_managed))) {
127  crm_notice("Not fencing remote node %s "
128  "(otherwise would because %s): connection is unmanaged",
129  node->details->uname, reason);
130  } else if(node->details->remote_requires_reset == FALSE) {
131  node->details->remote_requires_reset = TRUE;
132  crm_warn("Remote node %s %s: %s",
133  node->details->uname,
134  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
135  reason);
136  }
137  node->details->unclean = TRUE;
138  // No need to apply `priority-fencing-delay` for remote nodes
139  pe_fence_op(node, NULL, TRUE, reason, FALSE, data_set);
140 
141  } else if (node->details->unclean) {
142  crm_trace("Cluster node %s %s because %s",
143  node->details->uname,
144  pe_can_fence(data_set, node)? "would also be fenced" : "also is unclean",
145  reason);
146 
147  } else {
148  crm_warn("Cluster node %s %s: %s",
149  node->details->uname,
150  pe_can_fence(data_set, node)? "will be fenced" : "is unclean",
151  reason);
152  node->details->unclean = TRUE;
153  pe_fence_op(node, NULL, TRUE, reason, priority_delay, data_set);
154  }
155 }
156 
157 // @TODO xpaths can't handle templates, rules, or id-refs
158 
159 // nvpair with provides or requires set to unfencing
160 #define XPATH_UNFENCING_NVPAIR XML_CIB_TAG_NVPAIR \
161  "[(@" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_PROVIDES "'" \
162  "or @" XML_NVPAIR_ATTR_NAME "='" XML_RSC_ATTR_REQUIRES "') " \
163  "and @" XML_NVPAIR_ATTR_VALUE "='unfencing']"
164 
165 // unfencing in rsc_defaults or any resource
166 #define XPATH_ENABLE_UNFENCING \
167  "/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RESOURCES \
168  "//" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR \
169  "|/" XML_TAG_CIB "/" XML_CIB_TAG_CONFIGURATION "/" XML_CIB_TAG_RSCCONFIG \
170  "/" XML_TAG_META_SETS "/" XPATH_UNFENCING_NVPAIR
171 
172 static
173 void set_if_xpath(unsigned long long flag, const char *xpath,
174  pe_working_set_t *data_set)
175 {
176  xmlXPathObjectPtr result = NULL;
177 
178  if (is_not_set(data_set->flags, flag)) {
179  result = xpath_search(data_set->input, xpath);
180  if (result && (numXpathResults(result) > 0)) {
181  set_bit(data_set->flags, flag);
182  }
183  freeXpathObject(result);
184  }
185 }
186 
187 gboolean
188 unpack_config(xmlNode * config, pe_working_set_t * data_set)
189 {
190  const char *value = NULL;
191  GHashTable *config_hash = crm_str_table_new();
192 
193  data_set->config_hash = config_hash;
194 
195  pe__unpack_dataset_nvpairs(config, XML_CIB_TAG_PROPSET, NULL, config_hash,
196  CIB_OPTIONS_FIRST, FALSE, data_set);
197 
198  verify_pe_options(data_set->config_hash);
199 
200  set_config_flag(data_set, "enable-startup-probes", pe_flag_startup_probes);
201  if(is_not_set(data_set->flags, pe_flag_startup_probes)) {
202  crm_info("Startup probes: disabled (dangerous)");
203  }
204 
205  value = pe_pref(data_set->config_hash, XML_ATTR_HAVE_WATCHDOG);
206  if (value && crm_is_true(value)) {
207  crm_notice("Watchdog will be used via SBD if fencing is required "
208  "and stonith-watchdog-timeout is nonzero");
210  }
211 
212  /* Set certain flags via xpath here, so they can be used before the relevant
213  * configuration sections are unpacked.
214  */
215  set_if_xpath(pe_flag_enable_unfencing, XPATH_ENABLE_UNFENCING, data_set);
216 
217  value = pe_pref(data_set->config_hash, "stonith-timeout");
218  data_set->stonith_timeout = (int) crm_parse_interval_spec(value);
219  crm_debug("STONITH timeout: %d", data_set->stonith_timeout);
220 
221  set_config_flag(data_set, "stonith-enabled", pe_flag_stonith_enabled);
222  crm_debug("STONITH of failed nodes is %s",
223  is_set(data_set->flags, pe_flag_stonith_enabled) ? "enabled" : "disabled");
224 
225  data_set->stonith_action = pe_pref(data_set->config_hash, "stonith-action");
226  if (!strcmp(data_set->stonith_action, "poweroff")) {
228  "Support for stonith-action of 'poweroff' is deprecated "
229  "and will be removed in a future release (use 'off' instead)");
230  data_set->stonith_action = "off";
231  }
232  crm_trace("STONITH will %s nodes", data_set->stonith_action);
233 
234  set_config_flag(data_set, "concurrent-fencing", pe_flag_concurrent_fencing);
235  crm_debug("Concurrent fencing is %s",
236  is_set(data_set->flags, pe_flag_concurrent_fencing) ? "enabled" : "disabled");
237 
238  value = pe_pref(data_set->config_hash,
240  if (value) {
241  data_set->priority_fencing_delay = crm_parse_interval_spec(value) / 1000;
242  crm_trace("Priority fencing delay is %ds", data_set->priority_fencing_delay);
243  }
244 
245  set_config_flag(data_set, "stop-all-resources", pe_flag_stop_everything);
246  crm_debug("Stop all active resources: %s",
247  is_set(data_set->flags, pe_flag_stop_everything) ? "true" : "false");
248 
249  set_config_flag(data_set, "symmetric-cluster", pe_flag_symmetric_cluster);
250  if (is_set(data_set->flags, pe_flag_symmetric_cluster)) {
251  crm_debug("Cluster is symmetric" " - resources can run anywhere by default");
252  }
253 
254  value = pe_pref(data_set->config_hash, "no-quorum-policy");
255 
256  if (safe_str_eq(value, "ignore")) {
258 
259  } else if (safe_str_eq(value, "freeze")) {
261 
262  } else if (safe_str_eq(value, "demote")) {
264 
265  } else if (safe_str_eq(value, "suicide")) {
266  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
267  int do_panic = 0;
268 
270  &do_panic);
271  if (do_panic || is_set(data_set->flags, pe_flag_have_quorum)) {
273  } else {
274  crm_notice("Resetting no-quorum-policy to 'stop': cluster has never had quorum");
275  data_set->no_quorum_policy = no_quorum_stop;
276  }
277  } else {
278  pcmk__config_err("Resetting no-quorum-policy to 'stop' because "
279  "fencing is disabled");
280  data_set->no_quorum_policy = no_quorum_stop;
281  }
282 
283  } else {
284  data_set->no_quorum_policy = no_quorum_stop;
285  }
286 
287  switch (data_set->no_quorum_policy) {
288  case no_quorum_freeze:
289  crm_debug("On loss of quorum: Freeze resources");
290  break;
291  case no_quorum_stop:
292  crm_debug("On loss of quorum: Stop ALL resources");
293  break;
294  case no_quorum_demote:
295  crm_debug("On loss of quorum: "
296  "Demote promotable resources and stop other resources");
297  break;
298  case no_quorum_suicide:
299  crm_notice("On loss of quorum: Fence all remaining nodes");
300  break;
301  case no_quorum_ignore:
302  crm_notice("On loss of quorum: Ignore");
303  break;
304  }
305 
306  set_config_flag(data_set, "stop-orphan-resources", pe_flag_stop_rsc_orphans);
307  crm_trace("Orphan resources are %s",
308  is_set(data_set->flags, pe_flag_stop_rsc_orphans) ? "stopped" : "ignored");
309 
310  set_config_flag(data_set, "stop-orphan-actions", pe_flag_stop_action_orphans);
311  crm_trace("Orphan resource actions are %s",
312  is_set(data_set->flags, pe_flag_stop_action_orphans) ? "stopped" : "ignored");
313 
314  set_config_flag(data_set, "remove-after-stop", pe_flag_remove_after_stop);
315  crm_trace("Stopped resources are removed from the status section: %s",
316  is_set(data_set->flags, pe_flag_remove_after_stop) ? "true" : "false");
317 
318  set_config_flag(data_set, "maintenance-mode", pe_flag_maintenance_mode);
319  crm_trace("Maintenance mode: %s",
320  is_set(data_set->flags, pe_flag_maintenance_mode) ? "true" : "false");
321 
322  set_config_flag(data_set, "start-failure-is-fatal", pe_flag_start_failure_fatal);
323  crm_trace("Start failures are %s",
324  is_set(data_set->flags,
325  pe_flag_start_failure_fatal) ? "always fatal" : "handled by failcount");
326 
327  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
328  set_config_flag(data_set, "startup-fencing", pe_flag_startup_fencing);
329  }
330  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
331  crm_trace("Unseen nodes will be fenced");
332  } else {
333  pe_warn_once(pe_wo_blind, "Blind faith: not fencing unseen nodes");
334  }
335 
336  pcmk__score_red = char2score(pe_pref(data_set->config_hash, "node-health-red"));
337  pcmk__score_green = char2score(pe_pref(data_set->config_hash, "node-health-green"));
338  pcmk__score_yellow = char2score(pe_pref(data_set->config_hash, "node-health-yellow"));
339 
340  crm_debug("Node scores: 'red' = %s, 'yellow' = %s, 'green' = %s",
341  pe_pref(data_set->config_hash, "node-health-red"),
342  pe_pref(data_set->config_hash, "node-health-yellow"),
343  pe_pref(data_set->config_hash, "node-health-green"));
344 
345  data_set->placement_strategy = pe_pref(data_set->config_hash, "placement-strategy");
346  crm_trace("Placement strategy: %s", data_set->placement_strategy);
347 
348  set_config_flag(data_set, "shutdown-lock", pe_flag_shutdown_lock);
349  crm_trace("Resources will%s be locked to cleanly shut down nodes",
350  (is_set(data_set->flags, pe_flag_shutdown_lock)? "" : " not"));
351  if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
352  value = pe_pref(data_set->config_hash,
354  data_set->shutdown_lock = crm_parse_interval_spec(value) / 1000;
355  crm_trace("Shutdown locks expire after %us", data_set->shutdown_lock);
356  }
357 
358  return TRUE;
359 }
360 
361 static void
362 destroy_digest_cache(gpointer ptr)
363 {
364  op_digest_cache_t *data = ptr;
365 
366  free_xml(data->params_all);
367  free_xml(data->params_secure);
368  free_xml(data->params_restart);
369 
370  free(data->digest_all_calc);
371  free(data->digest_restart_calc);
372  free(data->digest_secure_calc);
373 
374  free(data);
375 }
376 
377 pe_node_t *
378 pe_create_node(const char *id, const char *uname, const char *type,
379  const char *score, pe_working_set_t * data_set)
380 {
381  pe_node_t *new_node = NULL;
382 
383  if (pe_find_node(data_set->nodes, uname) != NULL) {
384  pcmk__config_warn("More than one node entry has name '%s'", uname);
385  }
386 
387  new_node = calloc(1, sizeof(pe_node_t));
388  if (new_node == NULL) {
389  return NULL;
390  }
391 
392  new_node->weight = char2score(score);
393  new_node->fixed = FALSE;
394  new_node->details = calloc(1, sizeof(struct pe_node_shared_s));
395 
396  if (new_node->details == NULL) {
397  free(new_node);
398  return NULL;
399  }
400 
401  crm_trace("Creating node for entry %s/%s", uname, id);
402  new_node->details->id = id;
403  new_node->details->uname = uname;
404  new_node->details->online = FALSE;
405  new_node->details->shutdown = FALSE;
406  new_node->details->rsc_discovery_enabled = TRUE;
407  new_node->details->running_rsc = NULL;
408  new_node->details->type = node_ping;
409 
410  if (safe_str_eq(type, "remote")) {
411  new_node->details->type = node_remote;
413  } else if ((type == NULL) || safe_str_eq(type, "member")) {
414  new_node->details->type = node_member;
415  }
416 
417  new_node->details->attrs = crm_str_table_new();
418 
419  if (pe__is_guest_or_remote_node(new_node)) {
420  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
421  strdup("remote"));
422  } else {
423  g_hash_table_insert(new_node->details->attrs, strdup(CRM_ATTR_KIND),
424  strdup("cluster"));
425  }
426 
427  new_node->details->utilization = crm_str_table_new();
428 
429  new_node->details->digest_cache = g_hash_table_new_full(crm_str_hash,
430  g_str_equal, free,
431  destroy_digest_cache);
432 
433  data_set->nodes = g_list_insert_sorted(data_set->nodes, new_node, sort_node_uname);
434  return new_node;
435 }
436 
437 static const char *
438 expand_remote_rsc_meta(xmlNode *xml_obj, xmlNode *parent, pe_working_set_t *data)
439 {
440  xmlNode *attr_set = NULL;
441  xmlNode *attr = NULL;
442 
443  const char *container_id = ID(xml_obj);
444  const char *remote_name = NULL;
445  const char *remote_server = NULL;
446  const char *remote_port = NULL;
447  const char *connect_timeout = "60s";
448  const char *remote_allow_migrate=NULL;
449  const char *is_managed = NULL;
450 
451  for (attr_set = __xml_first_child_element(xml_obj); attr_set != NULL;
452  attr_set = __xml_next_element(attr_set)) {
453  if (safe_str_neq((const char *)attr_set->name, XML_TAG_META_SETS)) {
454  continue;
455  }
456 
457  for (attr = __xml_first_child_element(attr_set); attr != NULL;
458  attr = __xml_next_element(attr)) {
459  const char *value = crm_element_value(attr, XML_NVPAIR_ATTR_VALUE);
460  const char *name = crm_element_value(attr, XML_NVPAIR_ATTR_NAME);
461 
463  remote_name = value;
464  } else if (safe_str_eq(name, "remote-addr")) {
465  remote_server = value;
466  } else if (safe_str_eq(name, "remote-port")) {
467  remote_port = value;
468  } else if (safe_str_eq(name, "remote-connect-timeout")) {
469  connect_timeout = value;
470  } else if (safe_str_eq(name, "remote-allow-migrate")) {
471  remote_allow_migrate=value;
472  } else if (safe_str_eq(name, XML_RSC_ATTR_MANAGED)) {
473  is_managed = value;
474  }
475  }
476  }
477 
478  if (remote_name == NULL) {
479  return NULL;
480  }
481 
482  if (pe_find_resource(data->resources, remote_name) != NULL) {
483  return NULL;
484  }
485 
486  pe_create_remote_xml(parent, remote_name, container_id,
487  remote_allow_migrate, is_managed,
488  connect_timeout, remote_server, remote_port);
489  return remote_name;
490 }
491 
492 static void
493 handle_startup_fencing(pe_working_set_t *data_set, pe_node_t *new_node)
494 {
495  if ((new_node->details->type == node_remote) && (new_node->details->remote_rsc == NULL)) {
496  /* Ignore fencing for remote nodes that don't have a connection resource
497  * associated with them. This happens when remote node entries get left
498  * in the nodes section after the connection resource is removed.
499  */
500  return;
501  }
502 
503  if (is_set(data_set->flags, pe_flag_startup_fencing)) {
504  // All nodes are unclean until we've seen their status entry
505  new_node->details->unclean = TRUE;
506 
507  } else {
508  // Blind faith ...
509  new_node->details->unclean = FALSE;
510  }
511 
512  /* We need to be able to determine if a node's status section
513  * exists or not separate from whether the node is unclean. */
514  new_node->details->unseen = TRUE;
515 }
516 
517 gboolean
518 unpack_nodes(xmlNode * xml_nodes, pe_working_set_t * data_set)
519 {
520  xmlNode *xml_obj = NULL;
521  pe_node_t *new_node = NULL;
522  const char *id = NULL;
523  const char *uname = NULL;
524  const char *type = NULL;
525  const char *score = NULL;
526 
527  for (xml_obj = __xml_first_child_element(xml_nodes); xml_obj != NULL;
528  xml_obj = __xml_next_element(xml_obj)) {
529 
530  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_NODE, TRUE)) {
531  new_node = NULL;
532 
533  id = crm_element_value(xml_obj, XML_ATTR_ID);
536  score = crm_element_value(xml_obj, XML_RULE_ATTR_SCORE);
537  crm_trace("Processing node %s/%s", uname, id);
538 
539  if (id == NULL) {
541  "> entry in configuration without id");
542  continue;
543  }
544  new_node = pe_create_node(id, uname, type, score, data_set);
545 
546  if (new_node == NULL) {
547  return FALSE;
548  }
549 
550 /* if(data_set->have_quorum == FALSE */
551 /* && data_set->no_quorum_policy == no_quorum_stop) { */
552 /* /\* start shutting resources down *\/ */
553 /* new_node->weight = -INFINITY; */
554 /* } */
555 
556  handle_startup_fencing(data_set, new_node);
557 
558  add_node_attrs(xml_obj, new_node, FALSE, data_set);
560  new_node->details->utilization, NULL,
561  FALSE, data_set);
562 
563  crm_trace("Done with node %s", crm_element_value(xml_obj, XML_ATTR_UNAME));
564  }
565  }
566 
567  if (data_set->localhost && pe_find_node(data_set->nodes, data_set->localhost) == NULL) {
568  crm_info("Creating a fake local node");
569  pe_create_node(data_set->localhost, data_set->localhost, NULL, 0,
570  data_set);
571  }
572 
573  return TRUE;
574 }
575 
576 static void
577 setup_container(pe_resource_t * rsc, pe_working_set_t * data_set)
578 {
579  const char *container_id = NULL;
580 
581  if (rsc->children) {
582  GListPtr gIter = rsc->children;
583 
584  for (; gIter != NULL; gIter = gIter->next) {
585  pe_resource_t *child_rsc = (pe_resource_t *) gIter->data;
586 
587  setup_container(child_rsc, data_set);
588  }
589  return;
590  }
591 
592  container_id = g_hash_table_lookup(rsc->meta, XML_RSC_ATTR_CONTAINER);
593  if (container_id && safe_str_neq(container_id, rsc->id)) {
594  pe_resource_t *container = pe_find_resource(data_set->resources, container_id);
595 
596  if (container) {
597  rsc->container = container;
598  set_bit(container->flags, pe_rsc_is_container);
599  container->fillers = g_list_append(container->fillers, rsc);
600  pe_rsc_trace(rsc, "Resource %s's container is %s", rsc->id, container_id);
601  } else {
602  pe_err("Resource %s: Unknown resource container (%s)", rsc->id, container_id);
603  }
604  }
605 }
606 
607 gboolean
608 unpack_remote_nodes(xmlNode * xml_resources, pe_working_set_t * data_set)
609 {
610  xmlNode *xml_obj = NULL;
611 
612  /* Create remote nodes and guest nodes from the resource configuration
613  * before unpacking resources.
614  */
615  for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
616  xml_obj = __xml_next_element(xml_obj)) {
617 
618  const char *new_node_id = NULL;
619 
620  /* Check for remote nodes, which are defined by ocf:pacemaker:remote
621  * primitives.
622  */
623  if (xml_contains_remote_node(xml_obj)) {
624  new_node_id = ID(xml_obj);
625  /* The "pe_find_node" check is here to make sure we don't iterate over
626  * an expanded node that has already been added to the node list. */
627  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
628  crm_trace("Found remote node %s defined by resource %s",
629  new_node_id, ID(xml_obj));
630  pe_create_node(new_node_id, new_node_id, "remote", NULL,
631  data_set);
632  }
633  continue;
634  }
635 
636  /* Check for guest nodes, which are defined by special meta-attributes
637  * of a primitive of any type (for example, VirtualDomain or Xen).
638  */
639  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RESOURCE, TRUE)) {
640  /* This will add an ocf:pacemaker:remote primitive to the
641  * configuration for the guest node's connection, to be unpacked
642  * later.
643  */
644  new_node_id = expand_remote_rsc_meta(xml_obj, xml_resources, data_set);
645  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
646  crm_trace("Found guest node %s in resource %s",
647  new_node_id, ID(xml_obj));
648  pe_create_node(new_node_id, new_node_id, "remote", NULL,
649  data_set);
650  }
651  continue;
652  }
653 
654  /* Check for guest nodes inside a group. Clones are currently not
655  * supported as guest nodes.
656  */
657  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_GROUP, TRUE)) {
658  xmlNode *xml_obj2 = NULL;
659  for (xml_obj2 = __xml_first_child_element(xml_obj); xml_obj2 != NULL;
660  xml_obj2 = __xml_next_element(xml_obj2)) {
661 
662  new_node_id = expand_remote_rsc_meta(xml_obj2, xml_resources, data_set);
663 
664  if (new_node_id && pe_find_node(data_set->nodes, new_node_id) == NULL) {
665  crm_trace("Found guest node %s in resource %s inside group %s",
666  new_node_id, ID(xml_obj2), ID(xml_obj));
667  pe_create_node(new_node_id, new_node_id, "remote", NULL,
668  data_set);
669  }
670  }
671  }
672  }
673  return TRUE;
674 }
675 
676 /* Call this after all the nodes and resources have been
677  * unpacked, but before the status section is read.
678  *
679  * A remote node's online status is reflected by the state
680  * of the remote node's connection resource. We need to link
681  * the remote node to this connection resource so we can have
682  * easy access to the connection resource during the scheduler calculations.
683  */
684 static void
685 link_rsc2remotenode(pe_working_set_t *data_set, pe_resource_t *new_rsc)
686 {
687  pe_node_t *remote_node = NULL;
688 
689  if (new_rsc->is_remote_node == FALSE) {
690  return;
691  }
692 
693  if (is_set(data_set->flags, pe_flag_quick_location)) {
694  /* remote_nodes and remote_resources are not linked in quick location calculations */
695  return;
696  }
697 
698  remote_node = pe_find_node(data_set->nodes, new_rsc->id);
699  CRM_CHECK(remote_node != NULL, return;);
700 
701  pe_rsc_trace(new_rsc, "Linking remote connection resource %s to node %s",
702  new_rsc->id, remote_node->details->uname);
703  remote_node->details->remote_rsc = new_rsc;
704 
705  if (new_rsc->container == NULL) {
706  /* Handle start-up fencing for remote nodes (as opposed to guest nodes)
707  * the same as is done for cluster nodes.
708  */
709  handle_startup_fencing(data_set, remote_node);
710 
711  } else {
712  /* pe_create_node() marks the new node as "remote" or "cluster"; now
713  * that we know the node is a guest node, update it correctly.
714  */
715  g_hash_table_replace(remote_node->details->attrs, strdup(CRM_ATTR_KIND),
716  strdup("container"));
717  }
718 }
719 
720 static void
721 destroy_tag(gpointer data)
722 {
723  pe_tag_t *tag = data;
724 
725  if (tag) {
726  free(tag->id);
727  g_list_free_full(tag->refs, free);
728  free(tag);
729  }
730 }
731 
744 gboolean
745 unpack_resources(xmlNode * xml_resources, pe_working_set_t * data_set)
746 {
747  xmlNode *xml_obj = NULL;
748  GListPtr gIter = NULL;
749 
750  data_set->template_rsc_sets = g_hash_table_new_full(crm_str_hash,
751  g_str_equal, free,
752  destroy_tag);
753 
754  for (xml_obj = __xml_first_child_element(xml_resources); xml_obj != NULL;
755  xml_obj = __xml_next_element(xml_obj)) {
756 
757  pe_resource_t *new_rsc = NULL;
758 
759  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_RSC_TEMPLATE, TRUE)) {
760  const char *template_id = ID(xml_obj);
761 
762  if (template_id && g_hash_table_lookup_extended(data_set->template_rsc_sets,
763  template_id, NULL, NULL) == FALSE) {
764  /* Record the template's ID for the knowledge of its existence anyway. */
765  g_hash_table_insert(data_set->template_rsc_sets, strdup(template_id), NULL);
766  }
767  continue;
768  }
769 
770  crm_trace("Beginning unpack... <%s id=%s... >", crm_element_name(xml_obj), ID(xml_obj));
771  if (common_unpack(xml_obj, &new_rsc, NULL, data_set) && (new_rsc != NULL)) {
772  data_set->resources = g_list_append(data_set->resources, new_rsc);
773  pe_rsc_trace(new_rsc, "Added resource %s", new_rsc->id);
774 
775  } else {
776  pcmk__config_err("Ignoring <%s> resource '%s' "
777  "because configuration is invalid",
778  crm_element_name(xml_obj), crm_str(ID(xml_obj)));
779  if (new_rsc != NULL && new_rsc->fns != NULL) {
780  new_rsc->fns->free(new_rsc);
781  }
782  }
783  }
784 
785  for (gIter = data_set->resources; gIter != NULL; gIter = gIter->next) {
786  pe_resource_t *rsc = (pe_resource_t *) gIter->data;
787 
788  setup_container(rsc, data_set);
789  link_rsc2remotenode(data_set, rsc);
790  }
791 
792  data_set->resources = g_list_sort(data_set->resources, sort_rsc_priority);
793  if (is_set(data_set->flags, pe_flag_quick_location)) {
794  /* Ignore */
795 
796  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)
797  && is_set(data_set->flags, pe_flag_have_stonith_resource) == FALSE) {
798 
799  pcmk__config_err("Resource start-up disabled since no STONITH resources have been defined");
800  pcmk__config_err("Either configure some or disable STONITH with the stonith-enabled option");
801  pcmk__config_err("NOTE: Clusters with shared data need STONITH to ensure data integrity");
802  }
803 
804  return TRUE;
805 }
806 
807 gboolean
808 unpack_tags(xmlNode * xml_tags, pe_working_set_t * data_set)
809 {
810  xmlNode *xml_tag = NULL;
811 
812  data_set->tags = g_hash_table_new_full(crm_str_hash, g_str_equal, free,
813  destroy_tag);
814 
815  for (xml_tag = __xml_first_child_element(xml_tags); xml_tag != NULL;
816  xml_tag = __xml_next_element(xml_tag)) {
817 
818  xmlNode *xml_obj_ref = NULL;
819  const char *tag_id = ID(xml_tag);
820 
821  if (crm_str_eq((const char *)xml_tag->name, XML_CIB_TAG_TAG, TRUE) == FALSE) {
822  continue;
823  }
824 
825  if (tag_id == NULL) {
826  pcmk__config_err("Ignoring <%s> without " XML_ATTR_ID,
827  crm_element_name(xml_tag));
828  continue;
829  }
830 
831  for (xml_obj_ref = __xml_first_child_element(xml_tag); xml_obj_ref != NULL;
832  xml_obj_ref = __xml_next_element(xml_obj_ref)) {
833 
834  const char *obj_ref = ID(xml_obj_ref);
835 
836  if (crm_str_eq((const char *)xml_obj_ref->name, XML_CIB_TAG_OBJ_REF, TRUE) == FALSE) {
837  continue;
838  }
839 
840  if (obj_ref == NULL) {
841  pcmk__config_err("Ignoring <%s> for tag '%s' without " XML_ATTR_ID,
842  crm_element_name(xml_obj_ref), tag_id);
843  continue;
844  }
845 
846  if (add_tag_ref(data_set->tags, tag_id, obj_ref) == FALSE) {
847  return FALSE;
848  }
849  }
850  }
851 
852  return TRUE;
853 }
854 
855 /* The ticket state section:
856  * "/cib/status/tickets/ticket_state" */
857 static gboolean
858 unpack_ticket_state(xmlNode * xml_ticket, pe_working_set_t * data_set)
859 {
860  const char *ticket_id = NULL;
861  const char *granted = NULL;
862  const char *last_granted = NULL;
863  const char *standby = NULL;
864  xmlAttrPtr xIter = NULL;
865 
866  pe_ticket_t *ticket = NULL;
867 
868  ticket_id = ID(xml_ticket);
869  if (ticket_id == NULL || strlen(ticket_id) == 0) {
870  return FALSE;
871  }
872 
873  crm_trace("Processing ticket state for %s", ticket_id);
874 
875  ticket = g_hash_table_lookup(data_set->tickets, ticket_id);
876  if (ticket == NULL) {
877  ticket = ticket_new(ticket_id, data_set);
878  if (ticket == NULL) {
879  return FALSE;
880  }
881  }
882 
883  for (xIter = xml_ticket->properties; xIter; xIter = xIter->next) {
884  const char *prop_name = (const char *)xIter->name;
885  const char *prop_value = crm_element_value(xml_ticket, prop_name);
886 
887  if (crm_str_eq(prop_name, XML_ATTR_ID, TRUE)) {
888  continue;
889  }
890  g_hash_table_replace(ticket->state, strdup(prop_name), strdup(prop_value));
891  }
892 
893  granted = g_hash_table_lookup(ticket->state, "granted");
894  if (granted && crm_is_true(granted)) {
895  ticket->granted = TRUE;
896  crm_info("We have ticket '%s'", ticket->id);
897  } else {
898  ticket->granted = FALSE;
899  crm_info("We do not have ticket '%s'", ticket->id);
900  }
901 
902  last_granted = g_hash_table_lookup(ticket->state, "last-granted");
903  if (last_granted) {
904  ticket->last_granted = crm_parse_int(last_granted, 0);
905  }
906 
907  standby = g_hash_table_lookup(ticket->state, "standby");
908  if (standby && crm_is_true(standby)) {
909  ticket->standby = TRUE;
910  if (ticket->granted) {
911  crm_info("Granted ticket '%s' is in standby-mode", ticket->id);
912  }
913  } else {
914  ticket->standby = FALSE;
915  }
916 
917  crm_trace("Done with ticket state for %s", ticket_id);
918 
919  return TRUE;
920 }
921 
922 static gboolean
923 unpack_tickets_state(xmlNode * xml_tickets, pe_working_set_t * data_set)
924 {
925  xmlNode *xml_obj = NULL;
926 
927  for (xml_obj = __xml_first_child_element(xml_tickets); xml_obj != NULL;
928  xml_obj = __xml_next_element(xml_obj)) {
929 
930  if (crm_str_eq((const char *)xml_obj->name, XML_CIB_TAG_TICKET_STATE, TRUE) == FALSE) {
931  continue;
932  }
933  unpack_ticket_state(xml_obj, data_set);
934  }
935 
936  return TRUE;
937 }
938 
939 static void
940 unpack_handle_remote_attrs(pe_node_t *this_node, xmlNode *state, pe_working_set_t * data_set)
941 {
942  const char *resource_discovery_enabled = NULL;
943  xmlNode *attrs = NULL;
944  pe_resource_t *rsc = NULL;
945 
946  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
947  return;
948  }
949 
950  if ((this_node == NULL) || !pe__is_guest_or_remote_node(this_node)) {
951  return;
952  }
953  crm_trace("Processing remote node id=%s, uname=%s", this_node->details->id, this_node->details->uname);
954 
955  this_node->details->remote_maintenance =
957 
958  rsc = this_node->details->remote_rsc;
959  if (this_node->details->remote_requires_reset == FALSE) {
960  this_node->details->unclean = FALSE;
961  this_node->details->unseen = FALSE;
962  }
963  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
964  add_node_attrs(attrs, this_node, TRUE, data_set);
965 
966  if (pe__shutdown_requested(this_node)) {
967  crm_info("Node %s is shutting down", this_node->details->uname);
968  this_node->details->shutdown = TRUE;
969  if (rsc) {
971  }
972  }
973 
974  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
975  crm_info("Node %s is in standby-mode", this_node->details->uname);
976  this_node->details->standby = TRUE;
977  }
978 
979  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance")) ||
980  (rsc && !is_set(rsc->flags, pe_rsc_managed))) {
981  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
982  this_node->details->maintenance = TRUE;
983  }
984 
985  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
986  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
987  if (pe__is_remote_node(this_node)
988  && is_not_set(data_set->flags, pe_flag_stonith_enabled)) {
989  crm_warn("Ignoring %s attribute on remote node %s because stonith is disabled",
991  } else {
992  /* This is either a remote node with fencing enabled, or a guest
993  * node. We don't care whether fencing is enabled when fencing guest
994  * nodes, because they are "fenced" by recovering their containing
995  * resource.
996  */
997  crm_info("Node %s has resource discovery disabled", this_node->details->uname);
998  this_node->details->rsc_discovery_enabled = FALSE;
999  }
1000  }
1001 }
1002 
1003 static bool
1004 unpack_node_loop(xmlNode * status, bool fence, pe_working_set_t * data_set)
1005 {
1006  bool changed = false;
1007  xmlNode *lrm_rsc = NULL;
1008 
1009  for (xmlNode *state = __xml_first_child_element(status); state != NULL;
1010  state = __xml_next_element(state)) {
1011 
1012  const char *id = NULL;
1013  const char *uname = NULL;
1014  pe_node_t *this_node = NULL;
1015  bool process = FALSE;
1016 
1017  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE) == FALSE) {
1018  continue;
1019  }
1020 
1021  id = crm_element_value(state, XML_ATTR_ID);
1023  this_node = pe_find_node_any(data_set->nodes, id, uname);
1024 
1025  if (this_node == NULL) {
1026  crm_info("Node %s is unknown", id);
1027  continue;
1028 
1029  } else if (this_node->details->unpacked) {
1030  crm_trace("Node %s was already processed", id);
1031  continue;
1032 
1033  } else if (!pe__is_guest_or_remote_node(this_node)
1034  && is_set(data_set->flags, pe_flag_stonith_enabled)) {
1035  // A redundant test, but preserves the order for regression tests
1036  process = TRUE;
1037 
1038  } else if (pe__is_guest_or_remote_node(this_node)) {
1039  bool check = FALSE;
1040  pe_resource_t *rsc = this_node->details->remote_rsc;
1041 
1042  if(fence) {
1043  check = TRUE;
1044 
1045  } else if(rsc == NULL) {
1046  /* Not ready yet */
1047 
1048  } else if (pe__is_guest_node(this_node)
1049  && rsc->role == RSC_ROLE_STARTED
1050  && rsc->container->role == RSC_ROLE_STARTED) {
1051  /* Both the connection and its containing resource need to be
1052  * known to be up before we process resources running in it.
1053  */
1054  check = TRUE;
1055  crm_trace("Checking node %s/%s/%s status %d/%d/%d", id, rsc->id, rsc->container->id, fence, rsc->role, RSC_ROLE_STARTED);
1056 
1057  } else if (!pe__is_guest_node(this_node)
1058  && ((rsc->role == RSC_ROLE_STARTED)
1059  || is_set(data_set->flags, pe_flag_shutdown_lock))) {
1060  check = TRUE;
1061  crm_trace("Checking node %s/%s status %d/%d/%d", id, rsc->id, fence, rsc->role, RSC_ROLE_STARTED);
1062  }
1063 
1064  if (check) {
1065  determine_remote_online_status(data_set, this_node);
1066  unpack_handle_remote_attrs(this_node, state, data_set);
1067  process = TRUE;
1068  }
1069 
1070  } else if (this_node->details->online) {
1071  process = TRUE;
1072 
1073  } else if (fence) {
1074  process = TRUE;
1075 
1076  } else if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
1077  process = TRUE;
1078  }
1079 
1080  if(process) {
1081  crm_trace("Processing lrm resource entries on %shealthy%s node: %s",
1082  fence?"un":"",
1083  (pe__is_guest_or_remote_node(this_node)? " remote" : ""),
1084  this_node->details->uname);
1085  changed = TRUE;
1086  this_node->details->unpacked = TRUE;
1087 
1088  lrm_rsc = find_xml_node(state, XML_CIB_TAG_LRM, FALSE);
1089  lrm_rsc = find_xml_node(lrm_rsc, XML_LRM_TAG_RESOURCES, FALSE);
1090  unpack_lrm_resources(this_node, lrm_rsc, data_set);
1091  }
1092  }
1093  return changed;
1094 }
1095 
1096 /* remove nodes that are down, stopping */
1097 /* create positive rsc_to_node constraints between resources and the nodes they are running on */
1098 /* anything else? */
1099 gboolean
1100 unpack_status(xmlNode * status, pe_working_set_t * data_set)
1101 {
1102  const char *id = NULL;
1103  const char *uname = NULL;
1104 
1105  xmlNode *state = NULL;
1106  pe_node_t *this_node = NULL;
1107 
1108  crm_trace("Beginning unpack");
1109 
1110  if (data_set->tickets == NULL) {
1111  data_set->tickets = g_hash_table_new_full(crm_str_hash, g_str_equal,
1112  free, destroy_ticket);
1113  }
1114 
1115  for (state = __xml_first_child_element(status); state != NULL;
1116  state = __xml_next_element(state)) {
1117 
1118  if (crm_str_eq((const char *)state->name, XML_CIB_TAG_TICKETS, TRUE)) {
1119  unpack_tickets_state((xmlNode *) state, data_set);
1120 
1121  } else if (crm_str_eq((const char *)state->name, XML_CIB_TAG_STATE, TRUE)) {
1122  xmlNode *attrs = NULL;
1123  const char *resource_discovery_enabled = NULL;
1124 
1125  id = crm_element_value(state, XML_ATTR_ID);
1127  this_node = pe_find_node_any(data_set->nodes, id, uname);
1128 
1129  if (uname == NULL) {
1130  /* error */
1131  continue;
1132 
1133  } else if (this_node == NULL) {
1134  pcmk__config_warn("Ignoring recorded node status for '%s' "
1135  "because no longer in configuration", uname);
1136  continue;
1137 
1138  } else if (pe__is_guest_or_remote_node(this_node)) {
1139  /* online state for remote nodes is determined by the
1140  * rsc state after all the unpacking is done. we do however
1141  * need to mark whether or not the node has been fenced as this plays
1142  * a role during unpacking cluster node resource state */
1143  this_node->details->remote_was_fenced =
1145  continue;
1146  }
1147 
1148  crm_trace("Processing node id=%s, uname=%s", id, uname);
1149 
1150  /* Mark the node as provisionally clean
1151  * - at least we have seen it in the current cluster's lifetime
1152  */
1153  this_node->details->unclean = FALSE;
1154  this_node->details->unseen = FALSE;
1155  attrs = find_xml_node(state, XML_TAG_TRANSIENT_NODEATTRS, FALSE);
1156  add_node_attrs(attrs, this_node, TRUE, data_set);
1157 
1158  if (crm_is_true(pe_node_attribute_raw(this_node, "standby"))) {
1159  crm_info("Node %s is in standby-mode", this_node->details->uname);
1160  this_node->details->standby = TRUE;
1161  }
1162 
1163  if (crm_is_true(pe_node_attribute_raw(this_node, "maintenance"))) {
1164  crm_info("Node %s is in maintenance-mode", this_node->details->uname);
1165  this_node->details->maintenance = TRUE;
1166  }
1167 
1168  resource_discovery_enabled = pe_node_attribute_raw(this_node, XML_NODE_ATTR_RSC_DISCOVERY);
1169  if (resource_discovery_enabled && !crm_is_true(resource_discovery_enabled)) {
1170  crm_warn("ignoring %s attribute on node %s, disabling resource discovery is not allowed on cluster nodes",
1172  }
1173 
1174  crm_trace("determining node state");
1175  determine_online_status(state, this_node, data_set);
1176 
1177  if (is_not_set(data_set->flags, pe_flag_have_quorum)
1178  && this_node->details->online
1179  && (data_set->no_quorum_policy == no_quorum_suicide)) {
1180  /* Everything else should flow from this automatically
1181  * (at least until the scheduler becomes able to migrate off
1182  * healthy resources)
1183  */
1184  pe_fence_node(data_set, this_node, "cluster does not have quorum", FALSE);
1185  }
1186  }
1187  }
1188 
1189 
1190  while(unpack_node_loop(status, FALSE, data_set)) {
1191  crm_trace("Start another loop");
1192  }
1193 
1194  // Now catch any nodes we didn't see
1195  unpack_node_loop(status, is_set(data_set->flags, pe_flag_stonith_enabled), data_set);
1196 
1197  /* Now that we know where resources are, we can schedule stops of containers
1198  * with failed bundle connections
1199  */
1200  if (data_set->stop_needed != NULL) {
1201  for (GList *item = data_set->stop_needed; item; item = item->next) {
1202  pe_resource_t *container = item->data;
1203  pe_node_t *node = pe__current_node(container);
1204 
1205  if (node) {
1206  stop_action(container, node, FALSE);
1207  }
1208  }
1209  g_list_free(data_set->stop_needed);
1210  data_set->stop_needed = NULL;
1211  }
1212 
1213  for (GListPtr gIter = data_set->nodes; gIter != NULL; gIter = gIter->next) {
1214  pe_node_t *this_node = gIter->data;
1215 
1216  if (this_node == NULL) {
1217  continue;
1218  } else if (!pe__is_guest_or_remote_node(this_node)) {
1219  continue;
1220  } else if(this_node->details->unpacked) {
1221  continue;
1222  }
1223  determine_remote_online_status(data_set, this_node);
1224  }
1225 
1226  return TRUE;
1227 }
1228 
1229 static gboolean
1230 determine_online_status_no_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1231  pe_node_t * this_node)
1232 {
1233  gboolean online = FALSE;
1234  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1235  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1236  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1237  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1238 
1239  if (!crm_is_true(in_cluster)) {
1240  crm_trace("Node is down: in_cluster=%s", crm_str(in_cluster));
1241 
1242  } else if (safe_str_eq(is_peer, ONLINESTATUS)) {
1243  if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1244  online = TRUE;
1245  } else {
1246  crm_debug("Node is not ready to run resources: %s", join);
1247  }
1248 
1249  } else if (this_node->details->expected_up == FALSE) {
1250  crm_trace("Controller is down: in_cluster=%s", crm_str(in_cluster));
1251  crm_trace("\tis_peer=%s, join=%s, expected=%s",
1252  crm_str(is_peer), crm_str(join), crm_str(exp_state));
1253 
1254  } else {
1255  /* mark it unclean */
1256  pe_fence_node(data_set, this_node, "peer is unexpectedly down", FALSE);
1257  crm_info("\tin_cluster=%s, is_peer=%s, join=%s, expected=%s",
1258  crm_str(in_cluster), crm_str(is_peer), crm_str(join), crm_str(exp_state));
1259  }
1260  return online;
1261 }
1262 
1263 static gboolean
1264 determine_online_status_fencing(pe_working_set_t * data_set, xmlNode * node_state,
1265  pe_node_t * this_node)
1266 {
1267  gboolean online = FALSE;
1268  gboolean do_terminate = FALSE;
1269  bool crmd_online = FALSE;
1270  const char *join = crm_element_value(node_state, XML_NODE_JOIN_STATE);
1271  const char *is_peer = crm_element_value(node_state, XML_NODE_IS_PEER);
1272  const char *in_cluster = crm_element_value(node_state, XML_NODE_IN_CLUSTER);
1273  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1274  const char *terminate = pe_node_attribute_raw(this_node, "terminate");
1275 
1276 /*
1277  - XML_NODE_IN_CLUSTER ::= true|false
1278  - XML_NODE_IS_PEER ::= online|offline
1279  - XML_NODE_JOIN_STATE ::= member|down|pending|banned
1280  - XML_NODE_EXPECTED ::= member|down
1281 */
1282 
1283  if (crm_is_true(terminate)) {
1284  do_terminate = TRUE;
1285 
1286  } else if (terminate != NULL && strlen(terminate) > 0) {
1287  /* could be a time() value */
1288  char t = terminate[0];
1289 
1290  if (t != '0' && isdigit(t)) {
1291  do_terminate = TRUE;
1292  }
1293  }
1294 
1295  crm_trace("%s: in_cluster=%s, is_peer=%s, join=%s, expected=%s, term=%d",
1296  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1297  crm_str(join), crm_str(exp_state), do_terminate);
1298 
1299  online = crm_is_true(in_cluster);
1300  crmd_online = safe_str_eq(is_peer, ONLINESTATUS);
1301  if (exp_state == NULL) {
1302  exp_state = CRMD_JOINSTATE_DOWN;
1303  }
1304 
1305  if (this_node->details->shutdown) {
1306  crm_debug("%s is shutting down", this_node->details->uname);
1307 
1308  /* Slightly different criteria since we can't shut down a dead peer */
1309  online = crmd_online;
1310 
1311  } else if (in_cluster == NULL) {
1312  pe_fence_node(data_set, this_node, "peer has not been seen by the cluster", FALSE);
1313 
1314  } else if (safe_str_eq(join, CRMD_JOINSTATE_NACK)) {
1315  pe_fence_node(data_set, this_node, "peer failed the pacemaker membership criteria", FALSE);
1316 
1317  } else if (do_terminate == FALSE && safe_str_eq(exp_state, CRMD_JOINSTATE_DOWN)) {
1318 
1319  if (crm_is_true(in_cluster) || crmd_online) {
1320  crm_info("- Node %s is not ready to run resources", this_node->details->uname);
1321  this_node->details->standby = TRUE;
1322  this_node->details->pending = TRUE;
1323 
1324  } else {
1325  crm_trace("%s is down or still coming up", this_node->details->uname);
1326  }
1327 
1328  } else if (do_terminate && safe_str_eq(join, CRMD_JOINSTATE_DOWN)
1329  && crm_is_true(in_cluster) == FALSE && !crmd_online) {
1330  crm_info("Node %s was just shot", this_node->details->uname);
1331  online = FALSE;
1332 
1333  } else if (crm_is_true(in_cluster) == FALSE) {
1334  // Consider `priority-fencing-delay` for lost nodes
1335  pe_fence_node(data_set, this_node, "peer is no longer part of the cluster", TRUE);
1336 
1337  } else if (!crmd_online) {
1338  pe_fence_node(data_set, this_node, "peer process is no longer available", FALSE);
1339 
1340  /* Everything is running at this point, now check join state */
1341  } else if (do_terminate) {
1342  pe_fence_node(data_set, this_node, "termination was requested", FALSE);
1343 
1344  } else if (safe_str_eq(join, CRMD_JOINSTATE_MEMBER)) {
1345  crm_info("Node %s is active", this_node->details->uname);
1346 
1347  } else if (safe_str_eq(join, CRMD_JOINSTATE_PENDING)
1348  || safe_str_eq(join, CRMD_JOINSTATE_DOWN)) {
1349  crm_info("Node %s is not ready to run resources", this_node->details->uname);
1350  this_node->details->standby = TRUE;
1351  this_node->details->pending = TRUE;
1352 
1353  } else {
1354  pe_fence_node(data_set, this_node, "peer was in an unknown state", FALSE);
1355  crm_warn("%s: in-cluster=%s, is-peer=%s, join=%s, expected=%s, term=%d, shutdown=%d",
1356  this_node->details->uname, crm_str(in_cluster), crm_str(is_peer),
1357  crm_str(join), crm_str(exp_state), do_terminate, this_node->details->shutdown);
1358  }
1359 
1360  return online;
1361 }
1362 
1363 static void
1364 determine_remote_online_status(pe_working_set_t * data_set, pe_node_t * this_node)
1365 {
1366  pe_resource_t *rsc = this_node->details->remote_rsc;
1367  pe_resource_t *container = NULL;
1368  pe_node_t *host = NULL;
1369 
1370  /* If there is a node state entry for a (former) Pacemaker Remote node
1371  * but no resource creating that node, the node's connection resource will
1372  * be NULL. Consider it an offline remote node in that case.
1373  */
1374  if (rsc == NULL) {
1375  this_node->details->online = FALSE;
1376  goto remote_online_done;
1377  }
1378 
1379  container = rsc->container;
1380 
1381  if (container && pcmk__list_of_1(rsc->running_on)) {
1382  host = rsc->running_on->data;
1383  }
1384 
1385  /* If the resource is currently started, mark it online. */
1386  if (rsc->role == RSC_ROLE_STARTED) {
1387  crm_trace("%s node %s presumed ONLINE because connection resource is started",
1388  (container? "Guest" : "Remote"), this_node->details->id);
1389  this_node->details->online = TRUE;
1390  }
1391 
1392  /* consider this node shutting down if transitioning start->stop */
1393  if (rsc->role == RSC_ROLE_STARTED && rsc->next_role == RSC_ROLE_STOPPED) {
1394  crm_trace("%s node %s shutting down because connection resource is stopping",
1395  (container? "Guest" : "Remote"), this_node->details->id);
1396  this_node->details->shutdown = TRUE;
1397  }
1398 
1399  /* Now check all the failure conditions. */
1400  if(container && is_set(container->flags, pe_rsc_failed)) {
1401  crm_trace("Guest node %s UNCLEAN because guest resource failed",
1402  this_node->details->id);
1403  this_node->details->online = FALSE;
1404  this_node->details->remote_requires_reset = TRUE;
1405 
1406  } else if(is_set(rsc->flags, pe_rsc_failed)) {
1407  crm_trace("%s node %s OFFLINE because connection resource failed",
1408  (container? "Guest" : "Remote"), this_node->details->id);
1409  this_node->details->online = FALSE;
1410 
1411  } else if (rsc->role == RSC_ROLE_STOPPED
1412  || (container && container->role == RSC_ROLE_STOPPED)) {
1413 
1414  crm_trace("%s node %s OFFLINE because its resource is stopped",
1415  (container? "Guest" : "Remote"), this_node->details->id);
1416  this_node->details->online = FALSE;
1417  this_node->details->remote_requires_reset = FALSE;
1418 
1419  } else if (host && (host->details->online == FALSE)
1420  && host->details->unclean) {
1421  crm_trace("Guest node %s UNCLEAN because host is unclean",
1422  this_node->details->id);
1423  this_node->details->online = FALSE;
1424  this_node->details->remote_requires_reset = TRUE;
1425  }
1426 
1427 remote_online_done:
1428  crm_trace("Remote node %s online=%s",
1429  this_node->details->id, this_node->details->online ? "TRUE" : "FALSE");
1430 }
1431 
1432 static void
1433 determine_online_status(xmlNode * node_state, pe_node_t * this_node, pe_working_set_t * data_set)
1434 {
1435  gboolean online = FALSE;
1436  const char *exp_state = crm_element_value(node_state, XML_NODE_EXPECTED);
1437 
1438  CRM_CHECK(this_node != NULL, return);
1439 
1440  this_node->details->shutdown = FALSE;
1441  this_node->details->expected_up = FALSE;
1442 
1443  if (pe__shutdown_requested(this_node)) {
1444  this_node->details->shutdown = TRUE;
1445 
1446  } else if (safe_str_eq(exp_state, CRMD_JOINSTATE_MEMBER)) {
1447  this_node->details->expected_up = TRUE;
1448  }
1449 
1450  if (this_node->details->type == node_ping) {
1451  this_node->details->unclean = FALSE;
1452  online = FALSE; /* As far as resource management is concerned,
1453  * the node is safely offline.
1454  * Anyone caught abusing this logic will be shot
1455  */
1456 
1457  } else if (is_set(data_set->flags, pe_flag_stonith_enabled) == FALSE) {
1458  online = determine_online_status_no_fencing(data_set, node_state, this_node);
1459 
1460  } else {
1461  online = determine_online_status_fencing(data_set, node_state, this_node);
1462  }
1463 
1464  if (online) {
1465  this_node->details->online = TRUE;
1466 
1467  } else {
1468  /* remove node from contention */
1469  this_node->fixed = TRUE;
1470  this_node->weight = -INFINITY;
1471  }
1472 
1473  if (online && this_node->details->shutdown) {
1474  /* don't run resources here */
1475  this_node->fixed = TRUE;
1476  this_node->weight = -INFINITY;
1477  }
1478 
1479  if (this_node->details->type == node_ping) {
1480  crm_info("Node %s is not a pacemaker node", this_node->details->uname);
1481 
1482  } else if (this_node->details->unclean) {
1483  pe_proc_warn("Node %s is unclean", this_node->details->uname);
1484 
1485  } else if (this_node->details->online) {
1486  crm_info("Node %s is %s", this_node->details->uname,
1487  this_node->details->shutdown ? "shutting down" :
1488  this_node->details->pending ? "pending" :
1489  this_node->details->standby ? "standby" :
1490  this_node->details->maintenance ? "maintenance" : "online");
1491 
1492  } else {
1493  crm_trace("Node %s is offline", this_node->details->uname);
1494  }
1495 }
1496 
1505 const char *
1506 pe_base_name_end(const char *id)
1507 {
1508  if (!pcmk__str_empty(id)) {
1509  const char *end = id + strlen(id) - 1;
1510 
1511  for (const char *s = end; s > id; --s) {
1512  switch (*s) {
1513  case '0':
1514  case '1':
1515  case '2':
1516  case '3':
1517  case '4':
1518  case '5':
1519  case '6':
1520  case '7':
1521  case '8':
1522  case '9':
1523  break;
1524  case ':':
1525  return (s == end)? s : (s - 1);
1526  default:
1527  return end;
1528  }
1529  }
1530  return end;
1531  }
1532  return NULL;
1533 }
1534 
1545 char *
1546 clone_strip(const char *last_rsc_id)
1547 {
1548  const char *end = pe_base_name_end(last_rsc_id);
1549  char *basename = NULL;
1550 
1551  CRM_ASSERT(end);
1552  basename = strndup(last_rsc_id, end - last_rsc_id + 1);
1553  CRM_ASSERT(basename);
1554  return basename;
1555 }
1556 
1567 char *
1568 clone_zero(const char *last_rsc_id)
1569 {
1570  const char *end = pe_base_name_end(last_rsc_id);
1571  size_t base_name_len = end - last_rsc_id + 1;
1572  char *zero = NULL;
1573 
1574  CRM_ASSERT(end);
1575  zero = calloc(base_name_len + 3, sizeof(char));
1576  CRM_ASSERT(zero);
1577  memcpy(zero, last_rsc_id, base_name_len);
1578  zero[base_name_len] = ':';
1579  zero[base_name_len + 1] = '0';
1580  return zero;
1581 }
1582 
1583 static pe_resource_t *
1584 create_fake_resource(const char *rsc_id, xmlNode * rsc_entry, pe_working_set_t * data_set)
1585 {
1586  pe_resource_t *rsc = NULL;
1587  xmlNode *xml_rsc = create_xml_node(NULL, XML_CIB_TAG_RESOURCE);
1588 
1589  copy_in_properties(xml_rsc, rsc_entry);
1590  crm_xml_add(xml_rsc, XML_ATTR_ID, rsc_id);
1591  crm_log_xml_debug(xml_rsc, "Orphan resource");
1592 
1593  if (!common_unpack(xml_rsc, &rsc, NULL, data_set)) {
1594  return NULL;
1595  }
1596 
1597  if (xml_contains_remote_node(xml_rsc)) {
1598  pe_node_t *node;
1599 
1600  crm_debug("Detected orphaned remote node %s", rsc_id);
1601  node = pe_find_node(data_set->nodes, rsc_id);
1602  if (node == NULL) {
1603  node = pe_create_node(rsc_id, rsc_id, "remote", NULL, data_set);
1604  }
1605  link_rsc2remotenode(data_set, rsc);
1606 
1607  if (node) {
1608  crm_trace("Setting node %s as shutting down due to orphaned connection resource", rsc_id);
1609  node->details->shutdown = TRUE;
1610  }
1611  }
1612 
1613  if (crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER)) {
1614  /* This orphaned rsc needs to be mapped to a container. */
1615  crm_trace("Detected orphaned container filler %s", rsc_id);
1617  }
1618  set_bit(rsc->flags, pe_rsc_orphan);
1619  data_set->resources = g_list_append(data_set->resources, rsc);
1620  return rsc;
1621 }
1622 
1627 static pe_resource_t *
1628 create_anonymous_orphan(pe_resource_t *parent, const char *rsc_id,
1629  pe_node_t *node, pe_working_set_t *data_set)
1630 {
1631  pe_resource_t *top = pe__create_clone_child(parent, data_set);
1632 
1633  // find_rsc() because we might be a cloned group
1634  pe_resource_t *orphan = top->fns->find_rsc(top, rsc_id, NULL, pe_find_clone);
1635 
1636  pe_rsc_debug(parent, "Created orphan %s for %s: %s on %s",
1637  top->id, parent->id, rsc_id, node->details->uname);
1638  return orphan;
1639 }
1640 
1655 static pe_resource_t *
1656 find_anonymous_clone(pe_working_set_t * data_set, pe_node_t * node, pe_resource_t * parent,
1657  const char *rsc_id)
1658 {
1659  GListPtr rIter = NULL;
1660  pe_resource_t *rsc = NULL;
1661  pe_resource_t *inactive_instance = NULL;
1662  gboolean skip_inactive = FALSE;
1663 
1664  CRM_ASSERT(parent != NULL);
1665  CRM_ASSERT(pe_rsc_is_clone(parent));
1666  CRM_ASSERT(is_not_set(parent->flags, pe_rsc_unique));
1667 
1668  // Check for active (or partially active, for cloned groups) instance
1669  pe_rsc_trace(parent, "Looking for %s on %s in %s", rsc_id, node->details->uname, parent->id);
1670  for (rIter = parent->children; rsc == NULL && rIter; rIter = rIter->next) {
1671  GListPtr locations = NULL;
1672  pe_resource_t *child = rIter->data;
1673 
1674  /* Check whether this instance is already known to be active or pending
1675  * anywhere, at this stage of unpacking. Because this function is called
1676  * for a resource before the resource's individual operation history
1677  * entries are unpacked, locations will generally not contain the
1678  * desired node.
1679  *
1680  * However, there are three exceptions:
1681  * (1) when child is a cloned group and we have already unpacked the
1682  * history of another member of the group on the same node;
1683  * (2) when we've already unpacked the history of another numbered
1684  * instance on the same node (which can happen if globally-unique
1685  * was flipped from true to false); and
1686  * (3) when we re-run calculations on the same data set as part of a
1687  * simulation.
1688  */
1689  child->fns->location(child, &locations, 2);
1690  if (locations) {
1691  /* We should never associate the same numbered anonymous clone
1692  * instance with multiple nodes, and clone instances can't migrate,
1693  * so there must be only one location, regardless of history.
1694  */
1695  CRM_LOG_ASSERT(locations->next == NULL);
1696 
1697  if (((pe_node_t *)locations->data)->details == node->details) {
1698  /* This child instance is active on the requested node, so check
1699  * for a corresponding configured resource. We use find_rsc()
1700  * instead of child because child may be a cloned group, and we
1701  * need the particular member corresponding to rsc_id.
1702  *
1703  * If the history entry is orphaned, rsc will be NULL.
1704  */
1705  rsc = parent->fns->find_rsc(child, rsc_id, NULL, pe_find_clone);
1706  if (rsc) {
1707  /* If there are multiple instance history entries for an
1708  * anonymous clone in a single node's history (which can
1709  * happen if globally-unique is switched from true to
1710  * false), we want to consider the instances beyond the
1711  * first as orphans, even if there are inactive instance
1712  * numbers available.
1713  */
1714  if (rsc->running_on) {
1715  crm_notice("Active (now-)anonymous clone %s has "
1716  "multiple (orphan) instance histories on %s",
1717  parent->id, node->details->uname);
1718  skip_inactive = TRUE;
1719  rsc = NULL;
1720  } else {
1721  pe_rsc_trace(parent, "Resource %s, active", rsc->id);
1722  }
1723  }
1724  }
1725  g_list_free(locations);
1726 
1727  } else {
1728  pe_rsc_trace(parent, "Resource %s, skip inactive", child->id);
1729  if (!skip_inactive && !inactive_instance
1730  && is_not_set(child->flags, pe_rsc_block)) {
1731  // Remember one inactive instance in case we don't find active
1732  inactive_instance = parent->fns->find_rsc(child, rsc_id, NULL,
1733  pe_find_clone);
1734 
1735  /* ... but don't use it if it was already associated with a
1736  * pending action on another node
1737  */
1738  if (inactive_instance && inactive_instance->pending_node
1739  && (inactive_instance->pending_node->details != node->details)) {
1740  inactive_instance = NULL;
1741  }
1742  }
1743  }
1744  }
1745 
1746  if ((rsc == NULL) && !skip_inactive && (inactive_instance != NULL)) {
1747  pe_rsc_trace(parent, "Resource %s, empty slot", inactive_instance->id);
1748  rsc = inactive_instance;
1749  }
1750 
1751  /* If the resource has "requires" set to "quorum" or "nothing", and we don't
1752  * have a clone instance for every node, we don't want to consume a valid
1753  * instance number for unclean nodes. Such instances may appear to be active
1754  * according to the history, but should be considered inactive, so we can
1755  * start an instance elsewhere. Treat such instances as orphans.
1756  *
1757  * An exception is instances running on guest nodes -- since guest node
1758  * "fencing" is actually just a resource stop, requires shouldn't apply.
1759  *
1760  * @TODO Ideally, we'd use an inactive instance number if it is not needed
1761  * for any clean instances. However, we don't know that at this point.
1762  */
1763  if ((rsc != NULL) && is_not_set(rsc->flags, pe_rsc_needs_fencing)
1764  && (!node->details->online || node->details->unclean)
1765  && !pe__is_guest_node(node)
1766  && !pe__is_universal_clone(parent, data_set)) {
1767 
1768  rsc = NULL;
1769  }
1770 
1771  if (rsc == NULL) {
1772  rsc = create_anonymous_orphan(parent, rsc_id, node, data_set);
1773  pe_rsc_trace(parent, "Resource %s, orphan", rsc->id);
1774  }
1775  return rsc;
1776 }
1777 
1778 static pe_resource_t *
1779 unpack_find_resource(pe_working_set_t * data_set, pe_node_t * node, const char *rsc_id,
1780  xmlNode * rsc_entry)
1781 {
1782  pe_resource_t *rsc = NULL;
1783  pe_resource_t *parent = NULL;
1784 
1785  crm_trace("looking for %s", rsc_id);
1786  rsc = pe_find_resource(data_set->resources, rsc_id);
1787 
1788  if (rsc == NULL) {
1789  /* If we didn't find the resource by its name in the operation history,
1790  * check it again as a clone instance. Even when clone-max=0, we create
1791  * a single :0 orphan to match against here.
1792  */
1793  char *clone0_id = clone_zero(rsc_id);
1794  pe_resource_t *clone0 = pe_find_resource(data_set->resources, clone0_id);
1795 
1796  if (clone0 && is_not_set(clone0->flags, pe_rsc_unique)) {
1797  rsc = clone0;
1798  parent = uber_parent(clone0);
1799  crm_trace("%s found as %s (%s)", rsc_id, clone0_id, parent->id);
1800  } else {
1801  crm_trace("%s is not known as %s either (orphan)",
1802  rsc_id, clone0_id);
1803  }
1804  free(clone0_id);
1805 
1806  } else if (rsc->variant > pe_native) {
1807  crm_trace("Resource history for %s is orphaned because it is no longer primitive",
1808  rsc_id);
1809  return NULL;
1810 
1811  } else {
1812  parent = uber_parent(rsc);
1813  }
1814 
1815  if (pe_rsc_is_anon_clone(parent)) {
1816 
1817  if (pe_rsc_is_bundled(parent)) {
1818  rsc = pe__find_bundle_replica(parent->parent, node);
1819  } else {
1820  char *base = clone_strip(rsc_id);
1821 
1822  rsc = find_anonymous_clone(data_set, node, parent, base);
1823  free(base);
1824  CRM_ASSERT(rsc != NULL);
1825  }
1826  }
1827 
1828  if (rsc && safe_str_neq(rsc_id, rsc->id)
1829  && safe_str_neq(rsc_id, rsc->clone_name)) {
1830 
1831  free(rsc->clone_name);
1832  rsc->clone_name = strdup(rsc_id);
1833  pe_rsc_debug(rsc, "Internally renamed %s on %s to %s%s",
1834  rsc_id, node->details->uname, rsc->id,
1835  (is_set(rsc->flags, pe_rsc_orphan)? " (ORPHAN)" : ""));
1836  }
1837  return rsc;
1838 }
1839 
1840 static pe_resource_t *
1841 process_orphan_resource(xmlNode * rsc_entry, pe_node_t * node, pe_working_set_t * data_set)
1842 {
1843  pe_resource_t *rsc = NULL;
1844  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
1845 
1846  crm_debug("Detected orphan resource %s on %s", rsc_id, node->details->uname);
1847  rsc = create_fake_resource(rsc_id, rsc_entry, data_set);
1848 
1849  if (is_set(data_set->flags, pe_flag_stop_rsc_orphans) == FALSE) {
1851 
1852  } else {
1853  CRM_CHECK(rsc != NULL, return NULL);
1854  pe_rsc_trace(rsc, "Added orphan %s", rsc->id);
1855  resource_location(rsc, NULL, -INFINITY, "__orphan_do_not_run__", data_set);
1856  }
1857  return rsc;
1858 }
1859 
1860 static void
1861 process_rsc_state(pe_resource_t * rsc, pe_node_t * node,
1862  enum action_fail_response on_fail,
1863  xmlNode * migrate_op, pe_working_set_t * data_set)
1864 {
1865  pe_node_t *tmpnode = NULL;
1866  char *reason = NULL;
1867 
1868  CRM_ASSERT(rsc);
1869  pe_rsc_trace(rsc, "Resource %s is %s on %s: on_fail=%s",
1870  rsc->id, role2text(rsc->role), node->details->uname, fail2text(on_fail));
1871 
1872  /* process current state */
1873  if (rsc->role != RSC_ROLE_UNKNOWN) {
1874  pe_resource_t *iter = rsc;
1875 
1876  while (iter) {
1877  if (g_hash_table_lookup(iter->known_on, node->details->id) == NULL) {
1878  pe_node_t *n = pe__copy_node(node);
1879 
1880  pe_rsc_trace(rsc, "%s (aka. %s) known on %s", rsc->id, rsc->clone_name,
1881  n->details->uname);
1882  g_hash_table_insert(iter->known_on, (gpointer) n->details->id, n);
1883  }
1884  if (is_set(iter->flags, pe_rsc_unique)) {
1885  break;
1886  }
1887  iter = iter->parent;
1888  }
1889  }
1890 
1891  /* If a managed resource is believed to be running, but node is down ... */
1892  if (rsc->role > RSC_ROLE_STOPPED
1893  && node->details->online == FALSE
1894  && node->details->maintenance == FALSE
1895  && is_set(rsc->flags, pe_rsc_managed)) {
1896 
1897  gboolean should_fence = FALSE;
1898 
1899  /* If this is a guest node, fence it (regardless of whether fencing is
1900  * enabled, because guest node fencing is done by recovery of the
1901  * container resource rather than by the fencer). Mark the resource
1902  * we're processing as failed. When the guest comes back up, its
1903  * operation history in the CIB will be cleared, freeing the affected
1904  * resource to run again once we are sure we know its state.
1905  */
1906  if (pe__is_guest_node(node)) {
1907  set_bit(rsc->flags, pe_rsc_failed);
1908  set_bit(rsc->flags, pe_rsc_stop);
1909  should_fence = TRUE;
1910 
1911  } else if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
1912  if (pe__is_remote_node(node) && node->details->remote_rsc
1913  && is_not_set(node->details->remote_rsc->flags, pe_rsc_failed)) {
1914 
1915  /* Setting unseen means that fencing of the remote node will
1916  * occur only if the connection resource is not going to start
1917  * somewhere. This allows connection resources on a failed
1918  * cluster node to move to another node without requiring the
1919  * remote nodes to be fenced as well.
1920  */
1921  node->details->unseen = TRUE;
1922  reason = crm_strdup_printf("%s is active there (fencing will be"
1923  " revoked if remote connection can "
1924  "be re-established elsewhere)",
1925  rsc->id);
1926  }
1927  should_fence = TRUE;
1928  }
1929 
1930  if (should_fence) {
1931  if (reason == NULL) {
1932  reason = crm_strdup_printf("%s is thought to be active there", rsc->id);
1933  }
1934  pe_fence_node(data_set, node, reason, FALSE);
1935  }
1936  free(reason);
1937  }
1938 
1939  if (node->details->unclean) {
1940  /* No extra processing needed
1941  * Also allows resources to be started again after a node is shot
1942  */
1943  on_fail = action_fail_ignore;
1944  }
1945 
1946  switch (on_fail) {
1947  case action_fail_ignore:
1948  /* nothing to do */
1949  break;
1950 
1951  case action_fail_demote:
1952  set_bit(rsc->flags, pe_rsc_failed);
1953  demote_action(rsc, node, FALSE);
1954  break;
1955 
1956  case action_fail_fence:
1957  /* treat it as if it is still running
1958  * but also mark the node as unclean
1959  */
1960  reason = crm_strdup_printf("%s failed there", rsc->id);
1961  pe_fence_node(data_set, node, reason, FALSE);
1962  free(reason);
1963  break;
1964 
1965  case action_fail_standby:
1966  node->details->standby = TRUE;
1967  node->details->standby_onfail = TRUE;
1968  break;
1969 
1970  case action_fail_block:
1971  /* is_managed == FALSE will prevent any
1972  * actions being sent for the resource
1973  */
1975  set_bit(rsc->flags, pe_rsc_block);
1976  break;
1977 
1978  case action_fail_migrate:
1979  /* make sure it comes up somewhere else
1980  * or not at all
1981  */
1982  resource_location(rsc, node, -INFINITY, "__action_migration_auto__", data_set);
1983  break;
1984 
1985  case action_fail_stop:
1986  rsc->next_role = RSC_ROLE_STOPPED;
1987  break;
1988 
1989  case action_fail_recover:
1990  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
1991  set_bit(rsc->flags, pe_rsc_failed);
1992  set_bit(rsc->flags, pe_rsc_stop);
1993  stop_action(rsc, node, FALSE);
1994  }
1995  break;
1996 
1998  set_bit(rsc->flags, pe_rsc_failed);
1999  set_bit(rsc->flags, pe_rsc_stop);
2000 
2001  if (rsc->container && pe_rsc_is_bundled(rsc)) {
2002  /* A bundle's remote connection can run on a different node than
2003  * the bundle's container. We don't necessarily know where the
2004  * container is running yet, so remember it and add a stop
2005  * action for it later.
2006  */
2007  data_set->stop_needed = g_list_prepend(data_set->stop_needed,
2008  rsc->container);
2009  } else if (rsc->container) {
2010  stop_action(rsc->container, node, FALSE);
2011  } else if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2012  stop_action(rsc, node, FALSE);
2013  }
2014  break;
2015 
2017  set_bit(rsc->flags, pe_rsc_failed);
2018  set_bit(rsc->flags, pe_rsc_stop);
2019  if (is_set(data_set->flags, pe_flag_stonith_enabled)) {
2020  tmpnode = NULL;
2021  if (rsc->is_remote_node) {
2022  tmpnode = pe_find_node(data_set->nodes, rsc->id);
2023  }
2024  if (tmpnode &&
2025  pe__is_remote_node(tmpnode) &&
2026  tmpnode->details->remote_was_fenced == 0) {
2027 
2028  /* The remote connection resource failed in a way that
2029  * should result in fencing the remote node.
2030  */
2031  pe_fence_node(data_set, tmpnode,
2032  "remote connection is unrecoverable", FALSE);
2033  }
2034  }
2035 
2036  /* require the stop action regardless if fencing is occurring or not. */
2037  if (rsc->role > RSC_ROLE_STOPPED) {
2038  stop_action(rsc, node, FALSE);
2039  }
2040 
2041  /* if reconnect delay is in use, prevent the connection from exiting the
2042  * "STOPPED" role until the failure is cleared by the delay timeout. */
2043  if (rsc->remote_reconnect_ms) {
2044  rsc->next_role = RSC_ROLE_STOPPED;
2045  }
2046  break;
2047  }
2048 
2049  /* ensure a remote-node connection failure forces an unclean remote-node
2050  * to be fenced. By setting unseen = FALSE, the remote-node failure will
2051  * result in a fencing operation regardless if we're going to attempt to
2052  * reconnect to the remote-node in this transition or not. */
2053  if (is_set(rsc->flags, pe_rsc_failed) && rsc->is_remote_node) {
2054  tmpnode = pe_find_node(data_set->nodes, rsc->id);
2055  if (tmpnode && tmpnode->details->unclean) {
2056  tmpnode->details->unseen = FALSE;
2057  }
2058  }
2059 
2060  if (rsc->role != RSC_ROLE_STOPPED && rsc->role != RSC_ROLE_UNKNOWN) {
2061  if (is_set(rsc->flags, pe_rsc_orphan)) {
2062  if (is_set(rsc->flags, pe_rsc_managed)) {
2063  pcmk__config_warn("Detected active orphan %s running on %s",
2064  rsc->id, node->details->uname);
2065  } else {
2066  pcmk__config_warn("Resource '%s' must be stopped manually on "
2067  "%s because cluster is configured not to "
2068  "stop active orphans",
2069  rsc->id, node->details->uname);
2070  }
2071  }
2072 
2073  native_add_running(rsc, node, data_set);
2074  switch (on_fail) {
2075  case action_fail_ignore:
2076  break;
2077  case action_fail_demote:
2078  case action_fail_block:
2079  set_bit(rsc->flags, pe_rsc_failed);
2080  break;
2081  default:
2082  set_bit(rsc->flags, pe_rsc_failed);
2083  set_bit(rsc->flags, pe_rsc_stop);
2084  break;
2085  }
2086 
2087  } else if (rsc->clone_name && strchr(rsc->clone_name, ':') != NULL) {
2088  /* Only do this for older status sections that included instance numbers
2089  * Otherwise stopped instances will appear as orphans
2090  */
2091  pe_rsc_trace(rsc, "Resetting clone_name %s for %s (stopped)", rsc->clone_name, rsc->id);
2092  free(rsc->clone_name);
2093  rsc->clone_name = NULL;
2094 
2095  } else {
2096  GList *possible_matches = pe__resource_actions(rsc, node, RSC_STOP,
2097  FALSE);
2098  GListPtr gIter = possible_matches;
2099 
2100  for (; gIter != NULL; gIter = gIter->next) {
2101  pe_action_t *stop = (pe_action_t *) gIter->data;
2102 
2103  stop->flags |= pe_action_optional;
2104  }
2105 
2106  g_list_free(possible_matches);
2107  }
2108 }
2109 
2110 /* create active recurring operations as optional */
2111 static void
2112 process_recurring(pe_node_t * node, pe_resource_t * rsc,
2113  int start_index, int stop_index,
2114  GListPtr sorted_op_list, pe_working_set_t * data_set)
2115 {
2116  int counter = -1;
2117  const char *task = NULL;
2118  const char *status = NULL;
2119  GListPtr gIter = sorted_op_list;
2120 
2121  CRM_ASSERT(rsc);
2122  pe_rsc_trace(rsc, "%s: Start index %d, stop index = %d", rsc->id, start_index, stop_index);
2123 
2124  for (; gIter != NULL; gIter = gIter->next) {
2125  xmlNode *rsc_op = (xmlNode *) gIter->data;
2126 
2127  guint interval_ms = 0;
2128  char *key = NULL;
2129  const char *id = ID(rsc_op);
2130 
2131  counter++;
2132 
2133  if (node->details->online == FALSE) {
2134  pe_rsc_trace(rsc, "Skipping %s/%s: node is offline", rsc->id, node->details->uname);
2135  break;
2136 
2137  /* Need to check if there's a monitor for role="Stopped" */
2138  } else if (start_index < stop_index && counter <= stop_index) {
2139  pe_rsc_trace(rsc, "Skipping %s/%s: resource is not active", id, node->details->uname);
2140  continue;
2141 
2142  } else if (counter < start_index) {
2143  pe_rsc_trace(rsc, "Skipping %s/%s: old %d", id, node->details->uname, counter);
2144  continue;
2145  }
2146 
2147  crm_element_value_ms(rsc_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2148  if (interval_ms == 0) {
2149  pe_rsc_trace(rsc, "Skipping %s/%s: non-recurring", id, node->details->uname);
2150  continue;
2151  }
2152 
2153  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2154  if (safe_str_eq(status, "-1")) {
2155  pe_rsc_trace(rsc, "Skipping %s/%s: status", id, node->details->uname);
2156  continue;
2157  }
2158  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2159  /* create the action */
2160  key = pcmk__op_key(rsc->id, task, interval_ms);
2161  pe_rsc_trace(rsc, "Creating %s/%s", key, node->details->uname);
2162  custom_action(rsc, key, task, node, TRUE, TRUE, data_set);
2163  }
2164 }
2165 
2166 void
2167 calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
2168 {
2169  int counter = -1;
2170  int implied_monitor_start = -1;
2171  int implied_clone_start = -1;
2172  const char *task = NULL;
2173  const char *status = NULL;
2174  GListPtr gIter = sorted_op_list;
2175 
2176  *stop_index = -1;
2177  *start_index = -1;
2178 
2179  for (; gIter != NULL; gIter = gIter->next) {
2180  xmlNode *rsc_op = (xmlNode *) gIter->data;
2181 
2182  counter++;
2183 
2184  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2185  status = crm_element_value(rsc_op, XML_LRM_ATTR_OPSTATUS);
2186 
2187  if (safe_str_eq(task, CRMD_ACTION_STOP)
2188  && safe_str_eq(status, "0")) {
2189  *stop_index = counter;
2190 
2191  } else if (safe_str_eq(task, CRMD_ACTION_START) || safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2192  *start_index = counter;
2193 
2194  } else if ((implied_monitor_start <= *stop_index) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
2195  const char *rc = crm_element_value(rsc_op, XML_LRM_ATTR_RC);
2196 
2197  if (safe_str_eq(rc, "0") || safe_str_eq(rc, "8")) {
2198  implied_monitor_start = counter;
2199  }
2200  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE) || safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
2201  implied_clone_start = counter;
2202  }
2203  }
2204 
2205  if (*start_index == -1) {
2206  if (implied_clone_start != -1) {
2207  *start_index = implied_clone_start;
2208  } else if (implied_monitor_start != -1) {
2209  *start_index = implied_monitor_start;
2210  }
2211  }
2212 }
2213 
2214 // If resource history entry has shutdown lock, remember lock node and time
2215 static void
2216 unpack_shutdown_lock(xmlNode *rsc_entry, pe_resource_t *rsc, pe_node_t *node,
2217  pe_working_set_t *data_set)
2218 {
2219  time_t lock_time = 0; // When lock started (i.e. node shutdown time)
2220 
2222  &lock_time) == pcmk_ok) && (lock_time != 0)) {
2223 
2224  if ((data_set->shutdown_lock > 0)
2225  && (get_effective_time(data_set)
2226  > (lock_time + data_set->shutdown_lock))) {
2227  pe_rsc_info(rsc, "Shutdown lock for %s on %s expired",
2228  rsc->id, node->details->uname);
2229  pe__clear_resource_history(rsc, node, data_set);
2230  } else {
2231  rsc->lock_node = node;
2232  rsc->lock_time = lock_time;
2233  }
2234  }
2235 }
2236 
2237 static pe_resource_t *
2238 unpack_lrm_rsc_state(pe_node_t * node, xmlNode * rsc_entry, pe_working_set_t * data_set)
2239 {
2240  GListPtr gIter = NULL;
2241  int stop_index = -1;
2242  int start_index = -1;
2243  enum rsc_role_e req_role = RSC_ROLE_UNKNOWN;
2244 
2245  const char *task = NULL;
2246  const char *rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2247 
2248  pe_resource_t *rsc = NULL;
2249  GListPtr op_list = NULL;
2250  GListPtr sorted_op_list = NULL;
2251 
2252  xmlNode *migrate_op = NULL;
2253  xmlNode *rsc_op = NULL;
2254  xmlNode *last_failure = NULL;
2255 
2257  enum rsc_role_e saved_role = RSC_ROLE_UNKNOWN;
2258 
2259  crm_trace("[%s] Processing %s on %s",
2260  crm_element_name(rsc_entry), rsc_id, node->details->uname);
2261 
2262  /* extract operations */
2263  op_list = NULL;
2264  sorted_op_list = NULL;
2265 
2266  for (rsc_op = __xml_first_child_element(rsc_entry); rsc_op != NULL;
2267  rsc_op = __xml_next_element(rsc_op)) {
2268  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
2269  op_list = g_list_prepend(op_list, rsc_op);
2270  }
2271  }
2272 
2273  if (is_not_set(data_set->flags, pe_flag_shutdown_lock)) {
2274  if (op_list == NULL) {
2275  // If there are no operations, there is nothing to do
2276  return NULL;
2277  }
2278  }
2279 
2280  /* find the resource */
2281  rsc = unpack_find_resource(data_set, node, rsc_id, rsc_entry);
2282  if (rsc == NULL) {
2283  if (op_list == NULL) {
2284  // If there are no operations, there is nothing to do
2285  return NULL;
2286  } else {
2287  rsc = process_orphan_resource(rsc_entry, node, data_set);
2288  }
2289  }
2290  CRM_ASSERT(rsc != NULL);
2291 
2292  // Check whether the resource is "shutdown-locked" to this node
2293  if (is_set(data_set->flags, pe_flag_shutdown_lock)) {
2294  unpack_shutdown_lock(rsc_entry, rsc, node, data_set);
2295  }
2296 
2297  /* process operations */
2298  saved_role = rsc->role;
2299  rsc->role = RSC_ROLE_UNKNOWN;
2300  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
2301 
2302  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
2303  xmlNode *rsc_op = (xmlNode *) gIter->data;
2304 
2305  task = crm_element_value(rsc_op, XML_LRM_ATTR_TASK);
2306  if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
2307  migrate_op = rsc_op;
2308  }
2309 
2310  unpack_rsc_op(rsc, node, rsc_op, &last_failure, &on_fail, data_set);
2311  }
2312 
2313  /* create active recurring operations as optional */
2314  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
2315  process_recurring(node, rsc, start_index, stop_index, sorted_op_list, data_set);
2316 
2317  /* no need to free the contents */
2318  g_list_free(sorted_op_list);
2319 
2320  process_rsc_state(rsc, node, on_fail, migrate_op, data_set);
2321 
2322  if (get_target_role(rsc, &req_role)) {
2323  if (rsc->next_role == RSC_ROLE_UNKNOWN || req_role < rsc->next_role) {
2324  pe_rsc_debug(rsc, "%s: Overwriting calculated next role %s"
2325  " with requested next role %s",
2326  rsc->id, role2text(rsc->next_role), role2text(req_role));
2327  rsc->next_role = req_role;
2328 
2329  } else if (req_role > rsc->next_role) {
2330  pe_rsc_info(rsc, "%s: Not overwriting calculated next role %s"
2331  " with requested next role %s",
2332  rsc->id, role2text(rsc->next_role), role2text(req_role));
2333  }
2334  }
2335 
2336  if (saved_role > rsc->role) {
2337  rsc->role = saved_role;
2338  }
2339 
2340  return rsc;
2341 }
2342 
2343 static void
2344 handle_orphaned_container_fillers(xmlNode * lrm_rsc_list, pe_working_set_t * data_set)
2345 {
2346  xmlNode *rsc_entry = NULL;
2347  for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
2348  rsc_entry = __xml_next_element(rsc_entry)) {
2349 
2350  pe_resource_t *rsc;
2351  pe_resource_t *container;
2352  const char *rsc_id;
2353  const char *container_id;
2354 
2355  if (safe_str_neq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE)) {
2356  continue;
2357  }
2358 
2359  container_id = crm_element_value(rsc_entry, XML_RSC_ATTR_CONTAINER);
2360  rsc_id = crm_element_value(rsc_entry, XML_ATTR_ID);
2361  if (container_id == NULL || rsc_id == NULL) {
2362  continue;
2363  }
2364 
2365  container = pe_find_resource(data_set->resources, container_id);
2366  if (container == NULL) {
2367  continue;
2368  }
2369 
2370  rsc = pe_find_resource(data_set->resources, rsc_id);
2371  if (rsc == NULL ||
2372  is_set(rsc->flags, pe_rsc_orphan_container_filler) == FALSE ||
2373  rsc->container != NULL) {
2374  continue;
2375  }
2376 
2377  pe_rsc_trace(rsc, "Mapped container of orphaned resource %s to %s",
2378  rsc->id, container_id);
2379  rsc->container = container;
2380  container->fillers = g_list_append(container->fillers, rsc);
2381  }
2382 }
2383 
2384 static void
2385 unpack_lrm_resources(pe_node_t *node, xmlNode *lrm_rsc_list,
2386  pe_working_set_t *data_set)
2387 {
2388  xmlNode *rsc_entry = NULL;
2389  gboolean found_orphaned_container_filler = FALSE;
2390 
2391  for (rsc_entry = __xml_first_child_element(lrm_rsc_list); rsc_entry != NULL;
2392  rsc_entry = __xml_next_element(rsc_entry)) {
2393 
2394  if (crm_str_eq((const char *)rsc_entry->name, XML_LRM_TAG_RESOURCE, TRUE)) {
2395  pe_resource_t *rsc = unpack_lrm_rsc_state(node, rsc_entry, data_set);
2396  if (!rsc) {
2397  continue;
2398  }
2399  if (is_set(rsc->flags, pe_rsc_orphan_container_filler)) {
2400  found_orphaned_container_filler = TRUE;
2401  }
2402  }
2403  }
2404 
2405  /* now that all the resource state has been unpacked for this node
2406  * we have to go back and map any orphaned container fillers to their
2407  * container resource */
2408  if (found_orphaned_container_filler) {
2409  handle_orphaned_container_fillers(lrm_rsc_list, data_set);
2410  }
2411 }
2412 
2413 static void
2414 set_active(pe_resource_t * rsc)
2415 {
2416  pe_resource_t *top = uber_parent(rsc);
2417 
2418  if (top && is_set(top->flags, pe_rsc_promotable)) {
2419  rsc->role = RSC_ROLE_SLAVE;
2420  } else {
2421  rsc->role = RSC_ROLE_STARTED;
2422  }
2423 }
2424 
2425 static void
2426 set_node_score(gpointer key, gpointer value, gpointer user_data)
2427 {
2428  pe_node_t *node = value;
2429  int *score = user_data;
2430 
2431  node->weight = *score;
2432 }
2433 
2434 #define STATUS_PATH_MAX 1024
2435 static xmlNode *
2436 find_lrm_op(const char *resource, const char *op, const char *node, const char *source,
2437  bool success_only, pe_working_set_t *data_set)
2438 {
2439  int offset = 0;
2440  char xpath[STATUS_PATH_MAX];
2441  xmlNode *xml = NULL;
2442 
2443  offset += snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//node_state[@uname='%s']", node);
2444  offset +=
2445  snprintf(xpath + offset, STATUS_PATH_MAX - offset, "//" XML_LRM_TAG_RESOURCE "[@id='%s']",
2446  resource);
2447 
2448  /* Need to check against transition_magic too? */
2449  if (source && safe_str_eq(op, CRMD_ACTION_MIGRATE)) {
2450  offset +=
2451  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2452  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_target='%s']", op,
2453  source);
2454  } else if (source && safe_str_eq(op, CRMD_ACTION_MIGRATED)) {
2455  offset +=
2456  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2457  "/" XML_LRM_TAG_RSC_OP "[@operation='%s' and @migrate_source='%s']", op,
2458  source);
2459  } else {
2460  offset +=
2461  snprintf(xpath + offset, STATUS_PATH_MAX - offset,
2462  "/" XML_LRM_TAG_RSC_OP "[@operation='%s']", op);
2463  }
2464 
2465  CRM_LOG_ASSERT(offset > 0);
2466  xml = get_xpath_object(xpath, data_set->input, LOG_DEBUG);
2467 
2468  if (xml && success_only) {
2469  int rc = PCMK_OCF_UNKNOWN_ERROR;
2470  int status = PCMK_LRM_OP_ERROR;
2471 
2474  if ((rc != PCMK_OCF_OK) || (status != PCMK_LRM_OP_DONE)) {
2475  return NULL;
2476  }
2477  }
2478  return xml;
2479 }
2480 
2481 static int
2482 pe__call_id(xmlNode *op_xml)
2483 {
2484  int id = 0;
2485 
2486  if (op_xml) {
2488  }
2489  return id;
2490 }
2491 
2508 static bool
2509 stop_happened_after(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2510  pe_working_set_t *data_set)
2511 {
2512  xmlNode *stop_op = find_lrm_op(rsc->id, CRMD_ACTION_STOP,
2513  node->details->uname, NULL, TRUE, data_set);
2514 
2515  return (stop_op && (pe__call_id(stop_op) > pe__call_id(xml_op)));
2516 }
2517 
2518 static void
2519 unpack_migrate_to_success(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2520  pe_working_set_t *data_set)
2521 {
2522  /* A successful migration sequence is:
2523  * migrate_to on source node
2524  * migrate_from on target node
2525  * stop on source node
2526  *
2527  * If a migrate_to is followed by a stop, the entire migration (successful
2528  * or failed) is complete, and we don't care what happened on the target.
2529  *
2530  * If no migrate_from has happened, the migration is considered to be
2531  * "partial". If the migrate_from failed, make sure the resource gets
2532  * stopped on both source and target (if up).
2533  *
2534  * If the migrate_to and migrate_from both succeeded (which also implies the
2535  * resource is no longer running on the source), but there is no stop, the
2536  * migration is considered to be "dangling". Schedule a stop on the source
2537  * in this case.
2538  */
2539  int from_rc = 0;
2540  int from_status = 0;
2541  pe_node_t *target_node = NULL;
2542  pe_node_t *source_node = NULL;
2543  xmlNode *migrate_from = NULL;
2544  const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2545  const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2546 
2547  // Sanity check
2548  CRM_CHECK(source && target && !strcmp(source, node->details->uname), return);
2549 
2550  if (stop_happened_after(rsc, node, xml_op, data_set)) {
2551  return;
2552  }
2553 
2554  // Clones are not allowed to migrate, so role can't be master
2555  rsc->role = RSC_ROLE_STARTED;
2556 
2557  target_node = pe_find_node(data_set->nodes, target);
2558  source_node = pe_find_node(data_set->nodes, source);
2559 
2560  // Check whether there was a migrate_from action on the target
2561  migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
2562  source, FALSE, data_set);
2563  if (migrate_from) {
2564  crm_element_value_int(migrate_from, XML_LRM_ATTR_RC, &from_rc);
2565  crm_element_value_int(migrate_from, XML_LRM_ATTR_OPSTATUS, &from_status);
2566  pe_rsc_trace(rsc, "%s op on %s exited with status=%d, rc=%d",
2567  ID(migrate_from), target, from_status, from_rc);
2568  }
2569 
2570  if (migrate_from && from_rc == PCMK_OCF_OK
2571  && from_status == PCMK_LRM_OP_DONE) {
2572  /* The migrate_to and migrate_from both succeeded, so mark the migration
2573  * as "dangling". This will be used to schedule a stop action on the
2574  * source without affecting the target.
2575  */
2576  pe_rsc_trace(rsc, "Detected dangling migration op: %s on %s", ID(xml_op),
2577  source);
2578  rsc->role = RSC_ROLE_STOPPED;
2579  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2580 
2581  } else if (migrate_from && (from_status != PCMK_LRM_OP_PENDING)) { // Failed
2582  if (target_node && target_node->details->online) {
2583  pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
2584  target_node->details->online);
2585  native_add_running(rsc, target_node, data_set);
2586  }
2587 
2588  } else { // Pending, or complete but erased
2589  if (target_node && target_node->details->online) {
2590  pe_rsc_trace(rsc, "Marking active on %s %p %d", target, target_node,
2591  target_node->details->online);
2592 
2593  native_add_running(rsc, target_node, data_set);
2594  if (source_node && source_node->details->online) {
2595  /* This is a partial migration: the migrate_to completed
2596  * successfully on the source, but the migrate_from has not
2597  * completed. Remember the source and target; if the newly
2598  * chosen target remains the same when we schedule actions
2599  * later, we may continue with the migration.
2600  */
2601  rsc->partial_migration_target = target_node;
2602  rsc->partial_migration_source = source_node;
2603  }
2604  } else {
2605  /* Consider it failed here - forces a restart, prevents migration */
2606  set_bit(rsc->flags, pe_rsc_failed);
2607  set_bit(rsc->flags, pe_rsc_stop);
2609  }
2610  }
2611 }
2612 
2613 static void
2614 unpack_migrate_to_failure(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
2615  pe_working_set_t *data_set)
2616 {
2617  int target_stop_id = 0;
2618  int target_migrate_from_id = 0;
2619  xmlNode *target_stop = NULL;
2620  xmlNode *target_migrate_from = NULL;
2621  const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2622  const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2623 
2624  // Sanity check
2625  CRM_CHECK(source && target && !strcmp(source, node->details->uname), return);
2626 
2627  /* If a migration failed, we have to assume the resource is active. Clones
2628  * are not allowed to migrate, so role can't be master.
2629  */
2630  rsc->role = RSC_ROLE_STARTED;
2631 
2632  // Check for stop on the target
2633  target_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, target, NULL,
2634  TRUE, data_set);
2635  target_stop_id = pe__call_id(target_stop);
2636 
2637  // Check for migrate_from on the target
2638  target_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, target,
2639  source, TRUE, data_set);
2640  target_migrate_from_id = pe__call_id(target_migrate_from);
2641 
2642  if ((target_stop == NULL) || (target_stop_id < target_migrate_from_id)) {
2643  /* There was no stop on the source, or a stop that happened before a
2644  * migrate_from, so assume the resource is still active on the target
2645  * (if it is up).
2646  */
2647  pe_node_t *target_node = pe_find_node(data_set->nodes, target);
2648 
2649  pe_rsc_trace(rsc, "stop (%d) + migrate_from (%d)",
2650  target_stop_id, target_migrate_from_id);
2651  if (target_node && target_node->details->online) {
2652  native_add_running(rsc, target_node, data_set);
2653  }
2654 
2655  } else if (target_migrate_from == NULL) {
2656  /* We know there was a stop on the target, but there may not have been a
2657  * migrate_from (the stop could have happened before migrate_from was
2658  * scheduled or attempted).
2659  *
2660  * That means this could be a "dangling" migration. But first, check
2661  * whether there is a newer migrate_from or start on the source node --
2662  * it's possible the failed migration was followed by a successful
2663  * full restart or migration in the reverse direction, in which case we
2664  * don't want to force it to stop.
2665  */
2666  xmlNode *source_migrate_from = NULL;
2667  xmlNode *source_start = NULL;
2668  int source_migrate_to_id = pe__call_id(xml_op);
2669 
2670  source_migrate_from = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATED, source,
2671  NULL, TRUE, data_set);
2672  if (pe__call_id(source_migrate_from) > source_migrate_to_id) {
2673  return;
2674  }
2675 
2676  source_start = find_lrm_op(rsc->id, CRMD_ACTION_START, source, NULL,
2677  TRUE, data_set);
2678  if (pe__call_id(source_start) > source_migrate_to_id) {
2679  return;
2680  }
2681 
2682  // Mark node as having dangling migration so we can force a stop later
2683  rsc->dangling_migrations = g_list_prepend(rsc->dangling_migrations, node);
2684  }
2685 }
2686 
2687 static void
2688 unpack_migrate_from_failure(pe_resource_t *rsc, pe_node_t *node,
2689  xmlNode *xml_op, pe_working_set_t *data_set)
2690 {
2691  xmlNode *source_stop = NULL;
2692  xmlNode *source_migrate_to = NULL;
2693  const char *source = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_SOURCE);
2694  const char *target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
2695 
2696  // Sanity check
2697  CRM_CHECK(source && target && !strcmp(target, node->details->uname), return);
2698 
2699  /* If a migration failed, we have to assume the resource is active. Clones
2700  * are not allowed to migrate, so role can't be master.
2701  */
2702  rsc->role = RSC_ROLE_STARTED;
2703 
2704  // Check for a stop on the source
2705  source_stop = find_lrm_op(rsc->id, CRMD_ACTION_STOP, source, NULL,
2706  TRUE, data_set);
2707 
2708  // Check for a migrate_to on the source
2709  source_migrate_to = find_lrm_op(rsc->id, CRMD_ACTION_MIGRATE,
2710  source, target, TRUE, data_set);
2711 
2712  if ((source_stop == NULL)
2713  || (pe__call_id(source_stop) < pe__call_id(source_migrate_to))) {
2714  /* There was no stop on the source, or a stop that happened before
2715  * migrate_to, so assume the resource is still active on the source (if
2716  * it is up).
2717  */
2718  pe_node_t *source_node = pe_find_node(data_set->nodes, source);
2719 
2720  if (source_node && source_node->details->online) {
2721  native_add_running(rsc, source_node, data_set);
2722  }
2723  }
2724 }
2725 
2726 static void
2727 record_failed_op(xmlNode *op, const pe_node_t *node,
2728  const pe_resource_t *rsc, pe_working_set_t *data_set)
2729 {
2730  xmlNode *xIter = NULL;
2731  const char *op_key = crm_element_value(op, XML_LRM_ATTR_TASK_KEY);
2732 
2733  if (node->details->online == FALSE) {
2734  return;
2735  }
2736 
2737  for (xIter = data_set->failed->children; xIter; xIter = xIter->next) {
2738  const char *key = crm_element_value(xIter, XML_LRM_ATTR_TASK_KEY);
2739  const char *uname = crm_element_value(xIter, XML_ATTR_UNAME);
2740 
2741  if(safe_str_eq(op_key, key) && safe_str_eq(uname, node->details->uname)) {
2742  crm_trace("Skipping duplicate entry %s on %s", op_key, node->details->uname);
2743  return;
2744  }
2745  }
2746 
2747  crm_trace("Adding entry %s on %s", op_key, node->details->uname);
2748  crm_xml_add(op, XML_ATTR_UNAME, node->details->uname);
2749  crm_xml_add(op, XML_LRM_ATTR_RSCID, rsc->id);
2750  add_node_copy(data_set->failed, op);
2751 }
2752 
2753 static const char *get_op_key(xmlNode *xml_op)
2754 {
2755  const char *key = crm_element_value(xml_op, XML_LRM_ATTR_TASK_KEY);
2756  if(key == NULL) {
2757  key = ID(xml_op);
2758  }
2759  return key;
2760 }
2761 
2762 static const char *
2763 last_change_str(xmlNode *xml_op)
2764 {
2765  time_t when;
2766  const char *when_s = NULL;
2767 
2769  &when) == pcmk_ok) {
2770  when_s = pcmk__epoch2str(&when);
2771  if (when_s) {
2772  // Skip day of week to make message shorter
2773  when_s = strchr(when_s, ' ');
2774  if (when_s) {
2775  ++when_s;
2776  }
2777  }
2778  }
2779  return ((when_s && *when_s)? when_s : "unknown time");
2780 }
2781 
2794 static int
2795 cmp_on_fail(enum action_fail_response first, enum action_fail_response second)
2796 {
2797  switch (first) {
2798  case action_fail_demote:
2799  switch (second) {
2800  case action_fail_ignore:
2801  return 1;
2802  case action_fail_demote:
2803  return 0;
2804  default:
2805  return -1;
2806  }
2807  break;
2808 
2810  switch (second) {
2811  case action_fail_ignore:
2812  case action_fail_demote:
2813  case action_fail_recover:
2814  return 1;
2816  return 0;
2817  default:
2818  return -1;
2819  }
2820  break;
2821 
2823  switch (second) {
2824  case action_fail_ignore:
2825  case action_fail_demote:
2826  case action_fail_recover:
2828  return 1;
2830  return 0;
2831  default:
2832  return -1;
2833  }
2834  break;
2835 
2836  default:
2837  break;
2838  }
2839  switch (second) {
2840  case action_fail_demote:
2841  return (first == action_fail_ignore)? -1 : 1;
2842 
2844  switch (first) {
2845  case action_fail_ignore:
2846  case action_fail_demote:
2847  case action_fail_recover:
2848  return -1;
2849  default:
2850  return 1;
2851  }
2852  break;
2853 
2855  switch (first) {
2856  case action_fail_ignore:
2857  case action_fail_demote:
2858  case action_fail_recover:
2860  return -1;
2861  default:
2862  return 1;
2863  }
2864  break;
2865 
2866  default:
2867  break;
2868  }
2869  return first - second;
2870 }
2871 
2872 static void
2873 unpack_rsc_op_failure(pe_resource_t * rsc, pe_node_t * node, int rc, xmlNode * xml_op, xmlNode ** last_failure,
2874  enum action_fail_response * on_fail, pe_working_set_t * data_set)
2875 {
2876  guint interval_ms = 0;
2877  bool is_probe = false;
2878  pe_action_t *action = NULL;
2879 
2880  const char *key = get_op_key(xml_op);
2881  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
2882  const char *exit_reason = crm_element_value(xml_op,
2884 
2885  CRM_ASSERT(rsc);
2886  CRM_CHECK(task != NULL, return);
2887 
2888  *last_failure = xml_op;
2889 
2890  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
2891  if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) {
2892  is_probe = true;
2893  }
2894 
2895  if (exit_reason == NULL) {
2896  exit_reason = "";
2897  }
2898 
2899  if (is_not_set(data_set->flags, pe_flag_symmetric_cluster)
2900  && (rc == PCMK_OCF_NOT_INSTALLED)) {
2901  crm_trace("Unexpected result (%s%s%s) was recorded for "
2902  "%s of %s on %s at %s " CRM_XS " rc=%d id=%s",
2903  services_ocf_exitcode_str(rc),
2904  (*exit_reason? ": " : ""), exit_reason,
2905  (is_probe? "probe" : task), rsc->id, node->details->uname,
2906  last_change_str(xml_op), rc, ID(xml_op));
2907  } else {
2908  crm_warn("Unexpected result (%s%s%s) was recorded for "
2909  "%s of %s on %s at %s " CRM_XS " rc=%d id=%s",
2910  services_ocf_exitcode_str(rc),
2911  (*exit_reason? ": " : ""), exit_reason,
2912  (is_probe? "probe" : task), rsc->id, node->details->uname,
2913  last_change_str(xml_op), rc, ID(xml_op));
2914 
2915  if (is_probe && (rc != PCMK_OCF_OK)
2916  && (rc != PCMK_OCF_NOT_RUNNING)
2917  && (rc != PCMK_OCF_RUNNING_MASTER)) {
2918 
2919  /* A failed (not just unexpected) probe result could mean the user
2920  * didn't know resources will be probed even where they can't run.
2921  */
2922  crm_notice("If it is not possible for %s to run on %s, see "
2923  "the resource-discovery option for location constraints",
2924  rsc->id, node->details->uname);
2925  }
2926 
2927  record_failed_op(xml_op, node, rsc, data_set);
2928  }
2929 
2930  action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
2931  if (cmp_on_fail(*on_fail, action->on_fail) < 0) {
2932  pe_rsc_trace(rsc, "on-fail %s -> %s for %s (%s)", fail2text(*on_fail),
2933  fail2text(action->on_fail), action->uuid, key);
2934  *on_fail = action->on_fail;
2935  }
2936 
2937  if (!strcmp(task, CRMD_ACTION_STOP)) {
2938  resource_location(rsc, node, -INFINITY, "__stop_fail__", data_set);
2939 
2940  } else if (!strcmp(task, CRMD_ACTION_MIGRATE)) {
2941  unpack_migrate_to_failure(rsc, node, xml_op, data_set);
2942 
2943  } else if (!strcmp(task, CRMD_ACTION_MIGRATED)) {
2944  unpack_migrate_from_failure(rsc, node, xml_op, data_set);
2945 
2946  } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) {
2947  rsc->role = RSC_ROLE_MASTER;
2948 
2949  } else if (!strcmp(task, CRMD_ACTION_DEMOTE)) {
2950  if (action->on_fail == action_fail_block) {
2951  rsc->role = RSC_ROLE_MASTER;
2952  rsc->next_role = RSC_ROLE_STOPPED;
2953 
2954  } else if(rc == PCMK_OCF_NOT_RUNNING) {
2955  rsc->role = RSC_ROLE_STOPPED;
2956 
2957  } else {
2958  /* Staying in master role would put the scheduler and controller
2959  * into a loop. Setting slave role is not dangerous because the
2960  * resource will be stopped as part of recovery, and any master
2961  * promotion will be ordered after that stop.
2962  */
2963  rsc->role = RSC_ROLE_SLAVE;
2964  }
2965  }
2966 
2967  if(is_probe && rc == PCMK_OCF_NOT_INSTALLED) {
2968  /* leave stopped */
2969  pe_rsc_trace(rsc, "Leaving %s stopped", rsc->id);
2970  rsc->role = RSC_ROLE_STOPPED;
2971 
2972  } else if (rsc->role < RSC_ROLE_STARTED) {
2973  pe_rsc_trace(rsc, "Setting %s active", rsc->id);
2974  set_active(rsc);
2975  }
2976 
2977  pe_rsc_trace(rsc, "Resource %s: role=%s, unclean=%s, on_fail=%s, fail_role=%s",
2978  rsc->id, role2text(rsc->role),
2979  node->details->unclean ? "true" : "false",
2980  fail2text(action->on_fail), role2text(action->fail_role));
2981 
2982  if (action->fail_role != RSC_ROLE_STARTED && rsc->next_role < action->fail_role) {
2983  rsc->next_role = action->fail_role;
2984  }
2985 
2986  if (action->fail_role == RSC_ROLE_STOPPED) {
2987  int score = -INFINITY;
2988 
2989  pe_resource_t *fail_rsc = rsc;
2990 
2991  if (fail_rsc->parent) {
2992  pe_resource_t *parent = uber_parent(fail_rsc);
2993 
2994  if (pe_rsc_is_clone(parent)
2995  && is_not_set(parent->flags, pe_rsc_unique)) {
2996  /* For clone resources, if a child fails on an operation
2997  * with on-fail = stop, all the resources fail. Do this by preventing
2998  * the parent from coming up again. */
2999  fail_rsc = parent;
3000  }
3001  }
3002  crm_notice("%s will not be started under current conditions",
3003  fail_rsc->id);
3004  /* make sure it doesn't come up again */
3005  if (fail_rsc->allowed_nodes != NULL) {
3006  g_hash_table_destroy(fail_rsc->allowed_nodes);
3007  }
3008  fail_rsc->allowed_nodes = pe__node_list2table(data_set->nodes);
3009  g_hash_table_foreach(fail_rsc->allowed_nodes, set_node_score, &score);
3010  }
3011 
3013 }
3014 
3034 static int
3035 determine_op_status(
3036  pe_resource_t *rsc, int rc, int target_rc, pe_node_t * node, xmlNode * xml_op, enum action_fail_response * on_fail, pe_working_set_t * data_set)
3037 {
3038  guint interval_ms = 0;
3039  bool is_probe = false;
3040  int result = PCMK_LRM_OP_DONE;
3041  const char *key = get_op_key(xml_op);
3042  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
3043  const char *exit_reason = crm_element_value(xml_op,
3045 
3046  CRM_ASSERT(rsc);
3047  CRM_CHECK(task != NULL, return PCMK_LRM_OP_ERROR);
3048 
3049  if (exit_reason == NULL) {
3050  exit_reason = "";
3051  }
3052 
3053  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
3054  if ((interval_ms == 0) && !strcmp(task, CRMD_ACTION_STATUS)) {
3055  is_probe = true;
3056  task = "probe";
3057  }
3058 
3059  if (target_rc < 0) {
3060  /* Pre-1.0 Pacemaker versions, and Pacemaker 1.1.6 or earlier with
3061  * Heartbeat 2.0.7 or earlier as the cluster layer, did not include the
3062  * target_rc in the transition key, which (along with the similar case
3063  * of a corrupted transition key in the CIB) will be reported to this
3064  * function as -1. Pacemaker 2.0+ does not support rolling upgrades from
3065  * those versions or processing of saved CIB files from those versions,
3066  * so we do not need to care much about this case.
3067  */
3068  result = PCMK_LRM_OP_ERROR;
3069  crm_warn("Expected result not found for %s on %s (corrupt or obsolete CIB?)",
3070  key, node->details->uname);
3071 
3072  } else if (target_rc != rc) {
3073  result = PCMK_LRM_OP_ERROR;
3074  pe_rsc_debug(rsc, "%s on %s: expected %d (%s), got %d (%s%s%s)",
3075  key, node->details->uname,
3076  target_rc, services_ocf_exitcode_str(target_rc),
3077  rc, services_ocf_exitcode_str(rc),
3078  (*exit_reason? ": " : ""), exit_reason);
3079  }
3080 
3081  switch (rc) {
3082  case PCMK_OCF_OK:
3083  if (is_probe && (target_rc == PCMK_OCF_NOT_RUNNING)) {
3084  result = PCMK_LRM_OP_DONE;
3085  pe_rsc_info(rsc, "Probe found %s active on %s at %s",
3086  rsc->id, node->details->uname,
3087  last_change_str(xml_op));
3088  }
3089  break;
3090 
3091  case PCMK_OCF_NOT_RUNNING:
3092  if (is_probe || target_rc == rc || is_not_set(rsc->flags, pe_rsc_managed)) {
3093  result = PCMK_LRM_OP_DONE;
3094  rsc->role = RSC_ROLE_STOPPED;
3095 
3096  /* clear any previous failure actions */
3097  *on_fail = action_fail_ignore;
3098  rsc->next_role = RSC_ROLE_UNKNOWN;
3099  }
3100  break;
3101 
3103  if (is_probe && (rc != target_rc)) {
3104  result = PCMK_LRM_OP_DONE;
3105  pe_rsc_info(rsc,
3106  "Probe found %s active and promoted on %s at %s",
3107  rsc->id, node->details->uname,
3108  last_change_str(xml_op));
3109  }
3110  rsc->role = RSC_ROLE_MASTER;
3111  break;
3112 
3115  rsc->role = RSC_ROLE_MASTER;
3116  result = PCMK_LRM_OP_ERROR;
3117  break;
3118 
3120  result = PCMK_LRM_OP_ERROR_FATAL;
3121  break;
3122 
3124  if (interval_ms > 0) {
3125  result = PCMK_LRM_OP_NOTSUPPORTED;
3126  break;
3127  }
3128  // fall through
3132  if (!pe_can_fence(data_set, node)
3133  && !strcmp(task, CRMD_ACTION_STOP)) {
3134  /* If a stop fails and we can't fence, there's nothing else we can do */
3135  pe_proc_err("No further recovery can be attempted for %s "
3136  "because %s on %s failed (%s%s%s) at %s "
3137  CRM_XS " rc=%d id=%s", rsc->id, task,
3138  node->details->uname, services_ocf_exitcode_str(rc),
3139  (*exit_reason? ": " : ""), exit_reason,
3140  last_change_str(xml_op), rc, ID(xml_op));
3142  set_bit(rsc->flags, pe_rsc_block);
3143  }
3144  result = PCMK_LRM_OP_ERROR_HARD;
3145  break;
3146 
3147  default:
3148  if (result == PCMK_LRM_OP_DONE) {
3149  crm_info("Treating unknown exit status %d from %s of %s "
3150  "on %s at %s as failure",
3151  rc, task, rsc->id, node->details->uname,
3152  last_change_str(xml_op));
3153  result = PCMK_LRM_OP_ERROR;
3154  }
3155  break;
3156  }
3157  return result;
3158 }
3159 
3160 // return TRUE if start or monitor last failure but parameters changed
3161 static bool
3162 should_clear_for_param_change(xmlNode *xml_op, const char *task,
3163  pe_resource_t *rsc, pe_node_t *node,
3164  pe_working_set_t *data_set)
3165 {
3166  if (!strcmp(task, "start") || !strcmp(task, "monitor")) {
3167 
3168  if (pe__bundle_needs_remote_name(rsc)) {
3169  /* We haven't allocated resources yet, so we can't reliably
3170  * substitute addr parameters for the REMOTE_CONTAINER_HACK.
3171  * When that's needed, defer the check until later.
3172  */
3173  pe__add_param_check(xml_op, rsc, node, pe_check_last_failure,
3174  data_set);
3175 
3176  } else {
3177  op_digest_cache_t *digest_data = NULL;
3178 
3179  digest_data = rsc_action_digest_cmp(rsc, xml_op, node, data_set);
3180  switch (digest_data->rc) {
3181  case RSC_DIGEST_UNKNOWN:
3182  crm_trace("Resource %s history entry %s on %s"
3183  " has no digest to compare",
3184  rsc->id, get_op_key(xml_op), node->details->id);
3185  break;
3186  case RSC_DIGEST_MATCH:
3187  break;
3188  default:
3189  return TRUE;
3190  }
3191  }
3192  }
3193  return FALSE;
3194 }
3195 
3196 // Order action after fencing of remote node, given connection rsc
3197 static void
3198 order_after_remote_fencing(pe_action_t *action, pe_resource_t *remote_conn,
3199  pe_working_set_t *data_set)
3200 {
3201  pe_node_t *remote_node = pe_find_node(data_set->nodes, remote_conn->id);
3202 
3203  if (remote_node) {
3204  pe_action_t *fence = pe_fence_op(remote_node, NULL, TRUE, NULL,
3205  FALSE, data_set);
3206 
3208  }
3209 }
3210 
3211 static bool
3212 should_ignore_failure_timeout(pe_resource_t *rsc, xmlNode *xml_op,
3213  const char *task, guint interval_ms,
3214  bool is_last_failure, pe_working_set_t *data_set)
3215 {
3216  /* Clearing failures of recurring monitors has special concerns. The
3217  * executor reports only changes in the monitor result, so if the
3218  * monitor is still active and still getting the same failure result,
3219  * that will go undetected after the failure is cleared.
3220  *
3221  * Also, the operation history will have the time when the recurring
3222  * monitor result changed to the given code, not the time when the
3223  * result last happened.
3224  *
3225  * @TODO We probably should clear such failures only when the failure
3226  * timeout has passed since the last occurrence of the failed result.
3227  * However we don't record that information. We could maybe approximate
3228  * that by clearing only if there is a more recent successful monitor or
3229  * stop result, but we don't even have that information at this point
3230  * since we are still unpacking the resource's operation history.
3231  *
3232  * This is especially important for remote connection resources with a
3233  * reconnect interval, so in that case, we skip clearing failures
3234  * if the remote node hasn't been fenced.
3235  */
3236  if (rsc->remote_reconnect_ms
3237  && is_set(data_set->flags, pe_flag_stonith_enabled)
3238  && (interval_ms != 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3239 
3240  pe_node_t *remote_node = pe_find_node(data_set->nodes, rsc->id);
3241 
3242  if (remote_node && !remote_node->details->remote_was_fenced) {
3243  if (is_last_failure) {
3244  crm_info("Waiting to clear monitor failure for remote node %s"
3245  " until fencing has occurred", rsc->id);
3246  }
3247  return TRUE;
3248  }
3249  }
3250  return FALSE;
3251 }
3252 
3275 static bool
3276 check_operation_expiry(pe_resource_t *rsc, pe_node_t *node, int rc,
3277  xmlNode *xml_op, pe_working_set_t *data_set)
3278 {
3279  bool expired = FALSE;
3280  bool is_last_failure = pcmk__ends_with(ID(xml_op), "_last_failure_0");
3281  time_t last_run = 0;
3282  guint interval_ms = 0;
3283  int unexpired_fail_count = 0;
3284  const char *task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
3285  const char *clear_reason = NULL;
3286 
3287  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
3288 
3289  if ((rsc->failure_timeout > 0)
3291  &last_run) == 0)) {
3292 
3293  // Resource has a failure-timeout, and history entry has a timestamp
3294 
3295  time_t now = get_effective_time(data_set);
3296  time_t last_failure = 0;
3297 
3298  // Is this particular operation history older than the failure timeout?
3299  if ((now >= (last_run + rsc->failure_timeout))
3300  && !should_ignore_failure_timeout(rsc, xml_op, task, interval_ms,
3301  is_last_failure, data_set)) {
3302  expired = TRUE;
3303  }
3304 
3305  // Does the resource as a whole have an unexpired fail count?
3306  unexpired_fail_count = pe_get_failcount(node, rsc, &last_failure,
3307  pe_fc_effective, xml_op,
3308  data_set);
3309 
3310  // Update scheduler recheck time according to *last* failure
3311  crm_trace("%s@%lld is %sexpired @%lld with unexpired_failures=%d timeout=%ds"
3312  " last-failure@%lld",
3313  ID(xml_op), (long long) last_run, (expired? "" : "not "),
3314  (long long) now, unexpired_fail_count, rsc->failure_timeout,
3315  (long long) last_failure);
3316  last_failure += rsc->failure_timeout + 1;
3317  if (unexpired_fail_count && (now < last_failure)) {
3318  pe__update_recheck_time(last_failure, data_set);
3319  }
3320  }
3321 
3322  if (expired) {
3323  if (pe_get_failcount(node, rsc, NULL, pe_fc_default, xml_op, data_set)) {
3324 
3325  // There is a fail count ignoring timeout
3326 
3327  if (unexpired_fail_count == 0) {
3328  // There is no fail count considering timeout
3329  clear_reason = "it expired";
3330 
3331  } else {
3332  /* This operation is old, but there is an unexpired fail count.
3333  * In a properly functioning cluster, this should only be
3334  * possible if this operation is not a failure (otherwise the
3335  * fail count should be expired too), so this is really just a
3336  * failsafe.
3337  */
3338  expired = FALSE;
3339  }
3340 
3341  } else if (is_last_failure && rsc->remote_reconnect_ms) {
3342  /* Clear any expired last failure when reconnect interval is set,
3343  * even if there is no fail count.
3344  */
3345  clear_reason = "reconnect interval is set";
3346  }
3347  }
3348 
3349  if (!expired && is_last_failure
3350  && should_clear_for_param_change(xml_op, task, rsc, node, data_set)) {
3351  clear_reason = "resource parameters have changed";
3352  }
3353 
3354  if (clear_reason != NULL) {
3355  // Schedule clearing of the fail count
3356  pe_action_t *clear_op = pe__clear_failcount(rsc, node, clear_reason,
3357  data_set);
3358 
3359  if (is_set(data_set->flags, pe_flag_stonith_enabled)
3360  && rsc->remote_reconnect_ms) {
3361  /* If we're clearing a remote connection due to a reconnect
3362  * interval, we want to wait until any scheduled fencing
3363  * completes.
3364  *
3365  * We could limit this to remote_node->details->unclean, but at
3366  * this point, that's always true (it won't be reliable until
3367  * after unpack_node_loop() is done).
3368  */
3369  crm_info("Clearing %s failure will wait until any scheduled "
3370  "fencing of %s completes", task, rsc->id);
3371  order_after_remote_fencing(clear_op, rsc, data_set);
3372  }
3373  }
3374 
3375  if (expired && (interval_ms == 0) && safe_str_eq(task, CRMD_ACTION_STATUS)) {
3376  switch(rc) {
3377  case PCMK_OCF_OK:
3378  case PCMK_OCF_NOT_RUNNING:
3380  case PCMK_OCF_DEGRADED:
3382  // Don't expire probes that return these values
3383  expired = FALSE;
3384  break;
3385  }
3386  }
3387 
3388  return expired;
3389 }
3390 
3391 int pe__target_rc_from_xml(xmlNode *xml_op)
3392 {
3393  int target_rc = 0;
3394  const char *key = crm_element_value(xml_op, XML_ATTR_TRANSITION_KEY);
3395 
3396  if (key == NULL) {
3397  return -1;
3398  }
3399  decode_transition_key(key, NULL, NULL, NULL, &target_rc);
3400  return target_rc;
3401 }
3402 
3403 static enum action_fail_response
3404 get_action_on_fail(pe_resource_t *rsc, const char *key, const char *task, pe_working_set_t * data_set)
3405 {
3407  pe_action_t *action = custom_action(rsc, strdup(key), task, NULL, TRUE, FALSE, data_set);
3408 
3409  result = action->on_fail;
3411 
3412  return result;
3413 }
3414 
3415 static void
3416 update_resource_state(pe_resource_t * rsc, pe_node_t * node, xmlNode * xml_op, const char * task, int rc,
3417  xmlNode * last_failure, enum action_fail_response * on_fail, pe_working_set_t * data_set)
3418 {
3419  gboolean clear_past_failure = FALSE;
3420 
3421  CRM_ASSERT(rsc);
3422  CRM_ASSERT(xml_op);
3423 
3424  if (rc == PCMK_OCF_NOT_RUNNING) {
3425  clear_past_failure = TRUE;
3426 
3427  } else if (rc == PCMK_OCF_NOT_INSTALLED) {
3428  rsc->role = RSC_ROLE_STOPPED;
3429 
3430  } else if (safe_str_eq(task, CRMD_ACTION_STATUS)) {
3431  if (last_failure) {
3432  const char *op_key = get_op_key(xml_op);
3433  const char *last_failure_key = get_op_key(last_failure);
3434 
3435  if (safe_str_eq(op_key, last_failure_key)) {
3436  clear_past_failure = TRUE;
3437  }
3438  }
3439 
3440  if (rsc->role < RSC_ROLE_STARTED) {
3441  set_active(rsc);
3442  }
3443 
3444  } else if (safe_str_eq(task, CRMD_ACTION_START)) {
3445  rsc->role = RSC_ROLE_STARTED;
3446  clear_past_failure = TRUE;
3447 
3448  } else if (safe_str_eq(task, CRMD_ACTION_STOP)) {
3449  rsc->role = RSC_ROLE_STOPPED;
3450  clear_past_failure = TRUE;
3451 
3452  } else if (safe_str_eq(task, CRMD_ACTION_PROMOTE)) {
3453  rsc->role = RSC_ROLE_MASTER;
3454  clear_past_failure = TRUE;
3455 
3456  } else if (safe_str_eq(task, CRMD_ACTION_DEMOTE)) {
3457 
3458  if (*on_fail == action_fail_demote) {
3459  // Demote clears an error only if on-fail=demote
3460  clear_past_failure = TRUE;
3461  }
3462  rsc->role = RSC_ROLE_SLAVE;
3463 
3464  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATED)) {
3465  rsc->role = RSC_ROLE_STARTED;
3466  clear_past_failure = TRUE;
3467 
3468  } else if (safe_str_eq(task, CRMD_ACTION_MIGRATE)) {
3469  unpack_migrate_to_success(rsc, node, xml_op, data_set);
3470 
3471  } else if (rsc->role < RSC_ROLE_STARTED) {
3472  pe_rsc_trace(rsc, "%s active on %s", rsc->id, node->details->uname);
3473  set_active(rsc);
3474  }
3475 
3476  /* clear any previous failure actions */
3477  if (clear_past_failure) {
3478  switch (*on_fail) {
3479  case action_fail_stop:
3480  case action_fail_fence:
3481  case action_fail_migrate:
3482  case action_fail_standby:
3483  pe_rsc_trace(rsc, "%s.%s is not cleared by a completed stop",
3484  rsc->id, fail2text(*on_fail));
3485  break;
3486 
3487  case action_fail_block:
3488  case action_fail_ignore:
3489  case action_fail_demote:
3490  case action_fail_recover:
3492  *on_fail = action_fail_ignore;
3493  rsc->next_role = RSC_ROLE_UNKNOWN;
3494  break;
3496  if (rsc->remote_reconnect_ms == 0) {
3497  /* With no reconnect interval, the connection is allowed to
3498  * start again after the remote node is fenced and
3499  * completely stopped. (With a reconnect interval, we wait
3500  * for the failure to be cleared entirely before attempting
3501  * to reconnect.)
3502  */
3503  *on_fail = action_fail_ignore;
3504  rsc->next_role = RSC_ROLE_UNKNOWN;
3505  }
3506  break;
3507  }
3508  }
3509 }
3510 
3531 static int
3532 remap_monitor_rc(int rc, xmlNode *xml_op, const pe_node_t *node,
3533  const pe_resource_t *rsc, pe_working_set_t *data_set)
3534 {
3535  int remapped_rc = rc;
3536 
3537  switch (rc) {
3538  case PCMK_OCF_DEGRADED:
3539  remapped_rc = PCMK_OCF_OK;
3540  break;
3541 
3543  remapped_rc = PCMK_OCF_RUNNING_MASTER;
3544  break;
3545 
3546  default:
3547  break;
3548  }
3549 
3550  if (rc != remapped_rc) {
3551  crm_trace("Remapping monitor result %d to %d", rc, remapped_rc);
3552  if (!node->details->shutdown || node->details->online) {
3553  record_failed_op(xml_op, node, rsc, data_set);
3554  }
3555  }
3556  return remapped_rc;
3557 }
3558 
3559 static void
3560 unpack_rsc_op(pe_resource_t *rsc, pe_node_t *node, xmlNode *xml_op,
3561  xmlNode **last_failure, enum action_fail_response *on_fail,
3562  pe_working_set_t *data_set)
3563 {
3564  int rc = 0;
3565  int task_id = 0;
3566  int target_rc = 0;
3567  int status = PCMK_LRM_OP_UNKNOWN;
3568  guint interval_ms = 0;
3569  const char *task = NULL;
3570  const char *task_key = NULL;
3571  const char *exit_reason = NULL;
3572  bool expired = FALSE;
3573  pe_resource_t *parent = rsc;
3574  enum action_fail_response failure_strategy = action_fail_recover;
3575 
3576  CRM_CHECK(rsc && node && xml_op, return);
3577 
3578  target_rc = pe__target_rc_from_xml(xml_op);
3579  task_key = get_op_key(xml_op);
3580  task = crm_element_value(xml_op, XML_LRM_ATTR_TASK);
3581  exit_reason = crm_element_value(xml_op, XML_LRM_ATTR_EXIT_REASON);
3582  if (exit_reason == NULL) {
3583  exit_reason = "";
3584  }
3585 
3587  crm_element_value_int(xml_op, XML_LRM_ATTR_CALLID, &task_id);
3588  crm_element_value_int(xml_op, XML_LRM_ATTR_OPSTATUS, &status);
3589  crm_element_value_ms(xml_op, XML_LRM_ATTR_INTERVAL_MS, &interval_ms);
3590 
3591  CRM_CHECK(task != NULL, return);
3592  CRM_CHECK(status <= PCMK_LRM_OP_INVALID, return);
3593  CRM_CHECK(status >= PCMK_LRM_OP_PENDING, return);
3594 
3595  if (!strcmp(task, CRMD_ACTION_NOTIFY) ||
3596  !strcmp(task, CRMD_ACTION_METADATA)) {
3597  /* safe to ignore these */
3598  return;
3599  }
3600 
3601  if (is_not_set(rsc->flags, pe_rsc_unique)) {
3602  parent = uber_parent(rsc);
3603  }
3604 
3605  pe_rsc_trace(rsc, "Unpacking task %s/%s (call_id=%d, status=%d, rc=%d) on %s (role=%s)",
3606  task_key, task, task_id, status, rc, node->details->uname, role2text(rsc->role));
3607 
3608  if (node->details->unclean) {
3609  pe_rsc_trace(rsc, "Node %s (where %s is running) is unclean."
3610  " Further action depends on the value of the stop's on-fail attribute",
3611  node->details->uname, rsc->id);
3612  }
3613 
3614  /* It should be possible to call remap_monitor_rc() first then call
3615  * check_operation_expiry() only if rc != target_rc, because there should
3616  * never be a fail count without at least one unexpected result in the
3617  * resource history. That would be more efficient by avoiding having to call
3618  * check_operation_expiry() for expected results.
3619  *
3620  * However, we do have such configurations in the scheduler regression
3621  * tests, even if it shouldn't be possible with the current code. It's
3622  * probably a good idea anyway, but that would require updating the test
3623  * inputs to something currently possible.
3624  */
3625 
3626  if ((status != PCMK_LRM_OP_NOT_INSTALLED)
3627  && check_operation_expiry(rsc, node, rc, xml_op, data_set)) {
3628  expired = TRUE;
3629  }
3630 
3631  if (!strcmp(task, CRMD_ACTION_STATUS)) {
3632  rc = remap_monitor_rc(rc, xml_op, node, rsc, data_set);
3633  }
3634 
3635  if (expired && (rc != target_rc)) {
3636  const char *magic = crm_element_value(xml_op, XML_ATTR_TRANSITION_MAGIC);
3637 
3638  if (interval_ms == 0) {
3639  crm_notice("Ignoring expired %s failure on %s "
3640  CRM_XS " actual=%d expected=%d magic=%s",
3641  task_key, node->details->uname, rc, target_rc, magic);
3642  goto done;
3643 
3644  } else if(node->details->online && node->details->unclean == FALSE) {
3645  /* Reschedule the recurring monitor. CancelXmlOp() won't work at
3646  * this stage, so as a hacky workaround, forcibly change the restart
3647  * digest so check_action_definition() does what we want later.
3648  *
3649  * @TODO We should skip this if there is a newer successful monitor.
3650  * Also, this causes rescheduling only if the history entry
3651  * has an op-digest (which the expire-non-blocked-failure
3652  * scheduler regression test doesn't, but that may not be a
3653  * realistic scenario in production).
3654  */
3655  crm_notice("Rescheduling %s after failure expired on %s "
3656  CRM_XS " actual=%d expected=%d magic=%s",
3657  task_key, node->details->uname, rc, target_rc, magic);
3658  crm_xml_add(xml_op, XML_LRM_ATTR_RESTART_DIGEST, "calculated-failure-timeout");
3659  goto done;
3660  }
3661  }
3662 
3663  /* If the executor reported an operation status of anything but done or
3664  * error, consider that final. But for done or error, we know better whether
3665  * it should be treated as a failure or not, because we know the expected
3666  * result.
3667  */
3668  if(status == PCMK_LRM_OP_DONE || status == PCMK_LRM_OP_ERROR) {
3669  status = determine_op_status(rsc, rc, target_rc, node, xml_op, on_fail, data_set);
3670  pe_rsc_trace(rsc, "Remapped %s status to %d", task_key, status);
3671  }
3672 
3673  switch (status) {
3674  case PCMK_LRM_OP_CANCELLED:
3675  // Should never happen
3676  pe_err("Resource history contains cancellation '%s' "
3677  "(%s of %s on %s at %s)",
3678  ID(xml_op), task, rsc->id, node->details->uname,
3679  last_change_str(xml_op));
3680  break;
3681 
3682  case PCMK_LRM_OP_PENDING:
3683  if (!strcmp(task, CRMD_ACTION_START)) {
3685  set_active(rsc);
3686 
3687  } else if (!strcmp(task, CRMD_ACTION_PROMOTE)) {
3688  rsc->role = RSC_ROLE_MASTER;
3689 
3690  } else if (!strcmp(task, CRMD_ACTION_MIGRATE) && node->details->unclean) {
3691  /* If a pending migrate_to action is out on a unclean node,
3692  * we have to force the stop action on the target. */
3693  const char *migrate_target = crm_element_value(xml_op, XML_LRM_ATTR_MIGRATE_TARGET);
3694  pe_node_t *target = pe_find_node(data_set->nodes, migrate_target);
3695  if (target) {
3696  stop_action(rsc, target, FALSE);
3697  }
3698  }
3699 
3700  if (rsc->pending_task == NULL) {
3701  if ((interval_ms != 0) || strcmp(task, CRMD_ACTION_STATUS)) {
3702  rsc->pending_task = strdup(task);
3703  rsc->pending_node = node;
3704  } else {
3705  /* Pending probes are not printed, even if pending
3706  * operations are requested. If someone ever requests that
3707  * behavior, enable the below and the corresponding part of
3708  * native.c:native_pending_task().
3709  */
3710 #if 0
3711  rsc->pending_task = strdup("probe");
3712  rsc->pending_node = node;
3713 #endif
3714  }
3715  }
3716  break;
3717 
3718  case PCMK_LRM_OP_DONE:
3719  pe_rsc_trace(rsc, "%s of %s on %s completed at %s " CRM_XS " id=%s",
3720  task, rsc->id, node->details->uname,
3721  last_change_str(xml_op), ID(xml_op));
3722  update_resource_state(rsc, node, xml_op, task, rc, *last_failure, on_fail, data_set);
3723  break;
3724 
3726  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3727  if (failure_strategy == action_fail_ignore) {
3728  crm_warn("Cannot ignore failed %s of %s on %s: "
3729  "Resource agent doesn't exist "
3730  CRM_XS " status=%d rc=%d id=%s",
3731  task, rsc->id, node->details->uname, status, rc,
3732  ID(xml_op));
3733  /* Also for printing it as "FAILED" by marking it as pe_rsc_failed later */
3734  *on_fail = action_fail_migrate;
3735  }
3736  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3737  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3738  break;
3739 
3741  if (pe__is_guest_or_remote_node(node)
3742  && is_set(node->details->remote_rsc->flags, pe_rsc_managed)) {
3743  /* We should never get into a situation where a managed remote
3744  * connection resource is considered OK but a resource action
3745  * behind the connection gets a "not connected" status. But as a
3746  * fail-safe in case a bug or unusual circumstances do lead to
3747  * that, ensure the remote connection is considered failed.
3748  */
3751  }
3752 
3753  // fall through
3754 
3755  case PCMK_LRM_OP_ERROR:
3758  case PCMK_LRM_OP_TIMEOUT:
3760  case PCMK_LRM_OP_INVALID:
3761 
3762  failure_strategy = get_action_on_fail(rsc, task_key, task, data_set);
3763  if ((failure_strategy == action_fail_ignore)
3764  || (failure_strategy == action_fail_restart_container
3765  && !strcmp(task, CRMD_ACTION_STOP))) {
3766 
3767  crm_warn("Pretending failed %s (%s%s%s) of %s on %s at %s "
3768  "succeeded " CRM_XS " rc=%d id=%s",
3769  task, services_ocf_exitcode_str(rc),
3770  (*exit_reason? ": " : ""), exit_reason, rsc->id,
3771  node->details->uname, last_change_str(xml_op), rc,
3772  ID(xml_op));
3773 
3774  update_resource_state(rsc, node, xml_op, task, target_rc, *last_failure, on_fail, data_set);
3775  crm_xml_add(xml_op, XML_ATTR_UNAME, node->details->uname);
3777 
3778  record_failed_op(xml_op, node, rsc, data_set);
3779 
3780  if ((failure_strategy == action_fail_restart_container)
3781  && cmp_on_fail(*on_fail, action_fail_recover) <= 0) {
3782  *on_fail = failure_strategy;
3783  }
3784 
3785  } else {
3786  unpack_rsc_op_failure(rsc, node, rc, xml_op, last_failure, on_fail, data_set);
3787 
3788  if(status == PCMK_LRM_OP_ERROR_HARD) {
3789  do_crm_log(rc != PCMK_OCF_NOT_INSTALLED?LOG_ERR:LOG_NOTICE,
3790  "Preventing %s from restarting on %s because "
3791  "of hard failure (%s%s%s)" CRM_XS " rc=%d id=%s",
3792  parent->id, node->details->uname,
3793  services_ocf_exitcode_str(rc),
3794  (*exit_reason? ": " : ""), exit_reason,
3795  rc, ID(xml_op));
3796  resource_location(parent, node, -INFINITY, "hard-error", data_set);
3797 
3798  } else if(status == PCMK_LRM_OP_ERROR_FATAL) {
3799  crm_err("Preventing %s from restarting anywhere because "
3800  "of fatal failure (%s%s%s) " CRM_XS " rc=%d id=%s",
3801  parent->id, services_ocf_exitcode_str(rc),
3802  (*exit_reason? ": " : ""), exit_reason,
3803  rc, ID(xml_op));
3804  resource_location(parent, NULL, -INFINITY, "fatal-error", data_set);
3805  }
3806  }
3807  break;
3808  }
3809 
3810  done:
3811  pe_rsc_trace(rsc, "Resource %s after %s: role=%s, next=%s",
3812  rsc->id, task, role2text(rsc->role),
3813  role2text(rsc->next_role));
3814 }
3815 
3816 static void
3817 add_node_attrs(xmlNode *xml_obj, pe_node_t *node, bool overwrite,
3818  pe_working_set_t *data_set)
3819 {
3820  const char *cluster_name = NULL;
3821 
3822  g_hash_table_insert(node->details->attrs,
3823  strdup(CRM_ATTR_UNAME), strdup(node->details->uname));
3824 
3825  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_ID),
3826  strdup(node->details->id));
3827  if (safe_str_eq(node->details->id, data_set->dc_uuid)) {
3828  data_set->dc_node = node;
3829  node->details->is_dc = TRUE;
3830  g_hash_table_insert(node->details->attrs,
3831  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_TRUE));
3832  } else {
3833  g_hash_table_insert(node->details->attrs,
3834  strdup(CRM_ATTR_IS_DC), strdup(XML_BOOLEAN_FALSE));
3835  }
3836 
3837  cluster_name = g_hash_table_lookup(data_set->config_hash, "cluster-name");
3838  if (cluster_name) {
3839  g_hash_table_insert(node->details->attrs, strdup(CRM_ATTR_CLUSTER_NAME),
3840  strdup(cluster_name));
3841  }
3842 
3844  node->details->attrs, NULL, overwrite, data_set);
3845 
3846  if (pe_node_attribute_raw(node, CRM_ATTR_SITE_NAME) == NULL) {
3847  const char *site_name = pe_node_attribute_raw(node, "site-name");
3848 
3849  if (site_name) {
3850  g_hash_table_insert(node->details->attrs,
3851  strdup(CRM_ATTR_SITE_NAME),
3852  strdup(site_name));
3853 
3854  } else if (cluster_name) {
3855  /* Default to cluster-name if unset */
3856  g_hash_table_insert(node->details->attrs,
3857  strdup(CRM_ATTR_SITE_NAME),
3858  strdup(cluster_name));
3859  }
3860  }
3861 }
3862 
3863 static GListPtr
3864 extract_operations(const char *node, const char *rsc, xmlNode * rsc_entry, gboolean active_filter)
3865 {
3866  int counter = -1;
3867  int stop_index = -1;
3868  int start_index = -1;
3869 
3870  xmlNode *rsc_op = NULL;
3871 
3872  GListPtr gIter = NULL;
3873  GListPtr op_list = NULL;
3874  GListPtr sorted_op_list = NULL;
3875 
3876  /* extract operations */
3877  op_list = NULL;
3878  sorted_op_list = NULL;
3879 
3880  for (rsc_op = __xml_first_child_element(rsc_entry);
3881  rsc_op != NULL; rsc_op = __xml_next_element(rsc_op)) {
3882  if (crm_str_eq((const char *)rsc_op->name, XML_LRM_TAG_RSC_OP, TRUE)) {
3883  crm_xml_add(rsc_op, "resource", rsc);
3884  crm_xml_add(rsc_op, XML_ATTR_UNAME, node);
3885  op_list = g_list_prepend(op_list, rsc_op);
3886  }
3887  }
3888 
3889  if (op_list == NULL) {
3890  /* if there are no operations, there is nothing to do */
3891  return NULL;
3892  }
3893 
3894  sorted_op_list = g_list_sort(op_list, sort_op_by_callid);
3895 
3896  /* create active recurring operations as optional */
3897  if (active_filter == FALSE) {
3898  return sorted_op_list;
3899  }
3900 
3901  op_list = NULL;
3902 
3903  calculate_active_ops(sorted_op_list, &start_index, &stop_index);
3904 
3905  for (gIter = sorted_op_list; gIter != NULL; gIter = gIter->next) {
3906  xmlNode *rsc_op = (xmlNode *) gIter->data;
3907 
3908  counter++;
3909 
3910  if (start_index < stop_index) {
3911  crm_trace("Skipping %s: not active", ID(rsc_entry));
3912  break;
3913 
3914  } else if (counter < start_index) {
3915  crm_trace("Skipping %s: old", ID(rsc_op));
3916  continue;
3917  }
3918  op_list = g_list_append(op_list, rsc_op);
3919  }
3920 
3921  g_list_free(sorted_op_list);
3922  return op_list;
3923 }
3924 
3925 GListPtr
3926 find_operations(const char *rsc, const char *node, gboolean active_filter,
3927  pe_working_set_t * data_set)
3928 {
3929  GListPtr output = NULL;
3930  GListPtr intermediate = NULL;
3931 
3932  xmlNode *tmp = NULL;
3933  xmlNode *status = find_xml_node(data_set->input, XML_CIB_TAG_STATUS, TRUE);
3934 
3935  pe_node_t *this_node = NULL;
3936 
3937  xmlNode *node_state = NULL;
3938 
3939  for (node_state = __xml_first_child_element(status); node_state != NULL;
3940  node_state = __xml_next_element(node_state)) {
3941 
3942  if (crm_str_eq((const char *)node_state->name, XML_CIB_TAG_STATE, TRUE)) {
3943  const char *uname = crm_element_value(node_state, XML_ATTR_UNAME);
3944 
3945  if (node != NULL && safe_str_neq(uname, node)) {
3946  continue;
3947  }
3948 
3949  this_node = pe_find_node(data_set->nodes, uname);
3950  if(this_node == NULL) {
3951  CRM_LOG_ASSERT(this_node != NULL);
3952  continue;
3953 
3954  } else if (pe__is_guest_or_remote_node(this_node)) {
3955  determine_remote_online_status(data_set, this_node);
3956 
3957  } else {
3958  determine_online_status(node_state, this_node, data_set);
3959  }
3960 
3961  if (this_node->details->online || is_set(data_set->flags, pe_flag_stonith_enabled)) {
3962  /* offline nodes run no resources...
3963  * unless stonith is enabled in which case we need to
3964  * make sure rsc start events happen after the stonith
3965  */
3966  xmlNode *lrm_rsc = NULL;
3967 
3968  tmp = find_xml_node(node_state, XML_CIB_TAG_LRM, FALSE);
3969  tmp = find_xml_node(tmp, XML_LRM_TAG_RESOURCES, FALSE);
3970 
3971  for (lrm_rsc = __xml_first_child_element(tmp); lrm_rsc != NULL;
3972  lrm_rsc = __xml_next_element(lrm_rsc)) {
3973  if (crm_str_eq((const char *)lrm_rsc->name, XML_LRM_TAG_RESOURCE, TRUE)) {
3974 
3975  const char *rsc_id = crm_element_value(lrm_rsc, XML_ATTR_ID);
3976 
3977  if (rsc != NULL && safe_str_neq(rsc_id, rsc)) {
3978  continue;
3979  }
3980 
3981  intermediate = extract_operations(uname, rsc_id, lrm_rsc, active_filter);
3982  output = g_list_concat(output, intermediate);
3983  }
3984  }
3985  }
3986  }
3987  }
3988 
3989  return output;
3990 }
CRMD_ACTION_METADATA
#define CRMD_ACTION_METADATA
Definition: crm.h:187
resource_object_functions_s::location
pe_node_t *(* location)(const pe_resource_t *, GList **, int)
Definition: pe_types.h:53
pe__unpack_dataset_nvpairs
void pe__unpack_dataset_nvpairs(xmlNode *xml_obj, const char *set_name, GHashTable *node_hash, GHashTable *hash, const char *always_first, gboolean overwrite, pe_working_set_t *data_set)
Definition: utils.c:2758
pe_rsc_orphan
#define pe_rsc_orphan
Definition: pe_types.h:236
pe_ticket_s::last_granted
time_t last_granted
Definition: pe_types.h:439
pe_native
@ pe_native
Definition: pe_types.h:37
GListPtr
GList * GListPtr
Definition: crm.h:215
XML_LRM_ATTR_MIGRATE_SOURCE
#define XML_LRM_ATTR_MIGRATE_SOURCE
Definition: msg_xml.h:285
pe_working_set_s::input
xmlNode * input
Definition: pe_types.h:127
CRM_ATTR_KIND
#define CRM_ATTR_KIND
Definition: crm.h:113
INFINITY
#define INFINITY
Definition: crm.h:96
CRMD_JOINSTATE_DOWN
#define CRMD_JOINSTATE_DOWN
Definition: crm.h:160
pe_wo_poweroff
@ pe_wo_poweroff
Definition: internal.h:34
action_fail_response
action_fail_response
Definition: common.h:31
pcmk__config_warn
#define pcmk__config_warn(fmt...)
Definition: internal.h:100
pe_get_failcount
int pe_get_failcount(pe_node_t *node, pe_resource_t *rsc, time_t *last_failure, uint32_t flags, xmlNode *xml_op, pe_working_set_t *data_set)
Definition: failcounts.c:251
XML_LRM_ATTR_TASK_KEY
#define XML_LRM_ATTR_TASK_KEY
Definition: msg_xml.h:261
crm_str_eq
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: strings.c:326
pe_resource_s::variant
enum pe_obj_types variant
Definition: pe_types.h:316
XML_LRM_TAG_RESOURCE
#define XML_LRM_TAG_RESOURCE
Definition: msg_xml.h:227
pe_fc_default
@ pe_fc_default
Definition: internal.h:216
XML_RSC_ATTR_REMOTE_NODE
#define XML_RSC_ATTR_REMOTE_NODE
Definition: msg_xml.h:208
pe_tag_s::id
char * id
Definition: pe_types.h:445
pe_node_shared_s::rsc_discovery_enabled
gboolean rsc_discovery_enabled
Definition: pe_types.h:211
rc
int rc
Definition: pcmk_fence.c:34
pe_resource_s::dangling_migrations
GListPtr dangling_migrations
Definition: pe_types.h:364
pe_proc_warn
#define pe_proc_warn(fmt...)
Definition: internal.h:24
pe_rsc_allow_migrate
#define pe_rsc_allow_migrate
Definition: pe_types.h:260
XML_LRM_ATTR_OPSTATUS
#define XML_LRM_ATTR_OPSTATUS
Definition: msg_xml.h:270
rsc_action_digest_cmp
op_digest_cache_t * rsc_action_digest_cmp(pe_resource_t *rsc, xmlNode *xml_op, pe_node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2158
pe_find_resource
pe_resource_t * pe_find_resource(GListPtr rsc_list, const char *id_rh)
Definition: status.c:375
pe_working_set_s::resources
GListPtr resources
Definition: pe_types.h:148
action_fail_standby
@ action_fail_standby
Definition: common.h:40
pe_working_set_s::priority_fencing_delay
int priority_fencing_delay
Definition: pe_types.h:180
PCMK_OCF_DEGRADED_MASTER
@ PCMK_OCF_DEGRADED_MASTER
Definition: services.h:106
pe_node_shared_s::expected_up
gboolean expected_up
Definition: pe_types.h:208
op_digest_cache_s::rc
enum rsc_digest_cmp_val rc
Definition: internal.h:396
no_quorum_freeze
@ no_quorum_freeze
Definition: pe_types.h:61
pe_resource_s::next_role
enum rsc_role_e next_role
Definition: pe_types.h:357
pe_find_node
pe_node_t * pe_find_node(GListPtr node_list, const char *uname)
Definition: status.c:427
pe_node_shared_s::unseen
gboolean unseen
Definition: pe_types.h:206
pe_working_set_s::nodes
GListPtr nodes
Definition: pe_types.h:147
msg_xml.h
RSC_ROLE_STOPPED
@ RSC_ROLE_STOPPED
Definition: common.h:90
XML_NODE_JOIN_STATE
#define XML_NODE_JOIN_STATE
Definition: msg_xml.h:238
pe_free_action
void pe_free_action(pe_action_t *action)
Definition: utils.c:1420
XML_CIB_TAG_TICKETS
#define XML_CIB_TAG_TICKETS
Definition: msg_xml.h:388
pe_rsc_info
#define pe_rsc_info(rsc, fmt, args...)
Definition: internal.h:17
pe_create_node
pe_node_t * pe_create_node(const char *id, const char *uname, const char *type, const char *score, pe_working_set_t *data_set)
Definition: unpack.c:378
action_fail_stop
@ action_fail_stop
Definition: common.h:39
data
char data[0]
Definition: internal.h:10
PCMK_LRM_OP_ERROR
@ PCMK_LRM_OP_ERROR
Definition: services.h:125
pe_node_shared_s::remote_rsc
pe_resource_t * remote_rsc
Definition: pe_types.h:218
RSC_ROLE_MASTER
@ RSC_ROLE_MASTER
Definition: common.h:93
pe_rsc_debug
#define pe_rsc_debug(rsc, fmt, args...)
Definition: internal.h:18
CRMD_ACTION_NOTIFY
#define CRMD_ACTION_NOTIFY
Definition: crm.h:183
XML_ATTR_TRANSITION_KEY
#define XML_ATTR_TRANSITION_KEY
Definition: msg_xml.h:361
action_fail_demote
@ action_fail_demote
Definition: common.h:55
crm_element_value_int
int crm_element_value_int(const xmlNode *data, const char *name, int *dest)
Retrieve the integer value of an XML attribute.
Definition: nvpair.c:558
pe_find_node_any
pe_node_t * pe_find_node_any(GListPtr node_list, const char *id, const char *uname)
Definition: status.c:399
pe_ticket_s::granted
gboolean granted
Definition: pe_types.h:438
create_xml_node
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition: xml.c:1976
PCMK_LRM_OP_CANCELLED
@ PCMK_LRM_OP_CANCELLED
Definition: services.h:122
native_add_running
void native_add_running(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set)
Definition: native.c:83
pe_resource_s::known_on
GHashTable * known_on
Definition: pe_types.h:353
CRM_ATTR_CLUSTER_NAME
#define CRM_ATTR_CLUSTER_NAME
Definition: crm.h:116
pe_resource_s::children
GListPtr children
Definition: pe_types.h:363
pe_resource_s::id
char * id
Definition: pe_types.h:307
stop_action
#define stop_action(rsc, node, optional)
Definition: internal.h:297
XML_NODE_IN_CLUSTER
#define XML_NODE_IN_CLUSTER
Definition: msg_xml.h:240
rsc_role_e
rsc_role_e
Definition: common.h:88
pe_working_set_s::stonith_timeout
int stonith_timeout
Definition: pe_types.h:138
unpack_tags
gboolean unpack_tags(xmlNode *xml_tags, pe_working_set_t *data_set)
Definition: unpack.c:808
pe_node_shared_s::running_rsc
GListPtr running_rsc
Definition: pe_types.h:219
get_xpath_object
xmlNode * get_xpath_object(const char *xpath, xmlNode *xml_obj, int error_level)
Definition: xpath.c:211
sort_rsc_priority
gint sort_rsc_priority(gconstpointer a, gconstpointer b)
Definition: utils.c:458
pe_fc_effective
@ pe_fc_effective
Definition: internal.h:217
PCMK_LRM_OP_NOT_CONNECTED
@ PCMK_LRM_OP_NOT_CONNECTED
Definition: services.h:129
XML_CIB_TAG_STATE
#define XML_CIB_TAG_STATE
Definition: msg_xml.h:158
XML_RULE_ATTR_SCORE
#define XML_RULE_ATTR_SCORE
Definition: msg_xml.h:296
CRM_CHECK
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:233
pe_node_s::weight
int weight
Definition: pe_types.h:229
CRMD_JOINSTATE_MEMBER
#define CRMD_JOINSTATE_MEMBER
Definition: crm.h:162
clear_bit
#define clear_bit(word, bit)
Definition: crm_internal.h:69
pe_pref
const char * pe_pref(GHashTable *options, const char *name)
Definition: common.c:315
XML_CIB_TAG_LRM
#define XML_CIB_TAG_LRM
Definition: msg_xml.h:225
CRM_ATTR_UNAME
#define CRM_ATTR_UNAME
Definition: crm.h:111
pe_node_shared_s::digest_cache
GHashTable * digest_cache
cache of calculated resource digests
Definition: pe_types.h:224
pe_node_s::details
struct pe_node_shared_s * details
Definition: pe_types.h:232
XML_CIB_TAG_TICKET_STATE
#define XML_CIB_TAG_TICKET_STATE
Definition: msg_xml.h:389
pe_working_set_s::stonith_action
const char * stonith_action
Definition: pe_types.h:133
crm_notice
#define crm_notice(fmt, args...)
Definition: logging.h:365
PCMK_LRM_OP_INVALID
@ PCMK_LRM_OP_INVALID
Definition: services.h:130
action_fail_recover
@ action_fail_recover
Definition: common.h:34
pe_node_shared_s::id
const char * id
Definition: pe_types.h:196
pe_working_set_s::dc_node
pe_node_t * dc_node
Definition: pe_types.h:132
type
enum crm_ais_msg_types type
Definition: internal.h:3
crm_err
#define crm_err(fmt, args...)
Definition: logging.h:363
internal.h
pe__update_recheck_time
void pe__update_recheck_time(time_t recheck, pe_working_set_t *data_set)
Definition: utils.c:2744
XML_NODE_ATTR_RSC_DISCOVERY
#define XML_NODE_ATTR_RSC_DISCOVERY
Definition: msg_xml.h:342
CRMD_JOINSTATE_PENDING
#define CRMD_JOINSTATE_PENDING
Definition: crm.h:161
pe_resource_s::pending_node
pe_node_t * pending_node
Definition: pe_types.h:369
pe_working_set_s::stop_needed
GList * stop_needed
Definition: pe_types.h:176
crm_str_hash
#define crm_str_hash
Definition: util.h:66
crm_element_value_ms
int crm_element_value_ms(const xmlNode *data, const char *name, guint *dest)
Retrieve the millisecond value of an XML attribute.
Definition: nvpair.c:614
RSC_DIGEST_MATCH
@ RSC_DIGEST_MATCH
Definition: internal.h:385
pe_flag_stop_action_orphans
#define pe_flag_stop_action_orphans
Definition: pe_types.h:102
crm_trace
#define crm_trace(fmt, args...)
Definition: logging.h:369
pe_working_set_s::dc_uuid
char * dc_uuid
Definition: pe_types.h:131
pe_resource_s::meta
GHashTable * meta
Definition: pe_types.h:359
unpack_remote_nodes
gboolean unpack_remote_nodes(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:608
safe_str_eq
#define safe_str_eq(a, b)
Definition: util.h:65
XML_NVPAIR_ATTR_VALUE
#define XML_NVPAIR_ATTR_VALUE
Definition: msg_xml.h:340
pe_node_shared_s::pending
gboolean pending
Definition: pe_types.h:204
node_remote
@ node_remote
Definition: pe_types.h:71
XML_TAG_ATTR_SETS
#define XML_TAG_ATTR_SETS
Definition: msg_xml.h:163
uber_parent
pe_resource_t * uber_parent(pe_resource_t *rsc)
Definition: complex.c:762
ONLINESTATUS
#define ONLINESTATUS
Definition: util.h:37
pe__is_guest_or_remote_node
gboolean pe__is_guest_or_remote_node(pe_node_t *node)
Definition: remote.c:58
XML_ATTR_UNAME
#define XML_ATTR_UNAME
Definition: msg_xml.h:118
pe_ticket_s
Definition: pe_types.h:436
crm_warn
#define crm_warn(fmt, args...)
Definition: logging.h:364
pe_action_s::flags
enum pe_action_flags flags
Definition: pe_types.h:399
free_xml
void free_xml(xmlNode *child)
Definition: xml.c:2136
XML_TAG_TRANSIENT_NODEATTRS
#define XML_TAG_TRANSIENT_NODEATTRS
Definition: msg_xml.h:365
action
const char * action
Definition: pcmk_fence.c:29
pe_resource_s::fillers
GListPtr fillers
Definition: pe_types.h:367
find_operations
GListPtr find_operations(const char *rsc, const char *node, gboolean active_filter, pe_working_set_t *data_set)
Definition: unpack.c:3926
pe_resource_s::running_on
GListPtr running_on
Definition: pe_types.h:352
pe_resource_s::partial_migration_target
pe_node_t * partial_migration_target
Definition: pe_types.h:350
pe_node_shared_s::utilization
GHashTable * utilization
Definition: pe_types.h:223
pe_rsc_is_container
#define pe_rsc_is_container
Definition: pe_types.h:264
decode_transition_key
gboolean decode_transition_key(const char *key, char **uuid, int *transition_id, int *action_id, int *target_rc)
Parse a transition key into its constituent parts.
Definition: operations.c:218
xpath_search
xmlXPathObjectPtr xpath_search(xmlNode *xml_top, const char *path)
Definition: xpath.c:136
pe_resource_s::lock_node
pe_node_t * lock_node
Definition: pe_types.h:370
pe_node_shared_s::standby_onfail
gboolean standby_onfail
Definition: pe_types.h:203
pe_flag_quick_location
#define pe_flag_quick_location
Definition: pe_types.h:114
xml.h
Wrappers for and extensions to libxml2.
XML_LRM_ATTR_RC
#define XML_LRM_ATTR_RC
Definition: msg_xml.h:271
pe_node_shared_s::is_dc
gboolean is_dc
Definition: pe_types.h:209
clone_strip
char * clone_strip(const char *last_rsc_id)
Definition: unpack.c:1546
pe_flag_shutdown_lock
#define pe_flag_shutdown_lock
Definition: pe_types.h:108
crm_is_true
gboolean crm_is_true(const char *s)
Definition: strings.c:278
xml_contains_remote_node
gboolean xml_contains_remote_node(xmlNode *xml)
Definition: remote.c:99
XML_NODE_EXPECTED
#define XML_NODE_EXPECTED
Definition: msg_xml.h:239
pe_working_set_s::placement_strategy
const char * placement_strategy
Definition: pe_types.h:134
XML_CIB_TAG_STATUS
#define XML_CIB_TAG_STATUS
Definition: msg_xml.h:139
PCMK_OCF_DEGRADED
@ PCMK_OCF_DEGRADED
Definition: services.h:105
PCMK_LRM_OP_TIMEOUT
@ PCMK_LRM_OP_TIMEOUT
Definition: services.h:123
pe_warn_once
#define pe_warn_once(pe_wo_bit, fmt...)
Definition: internal.h:42
clone_zero
char * clone_zero(const char *last_rsc_id)
Definition: unpack.c:1568
pe_node_shared_s::remote_requires_reset
gboolean remote_requires_reset
Definition: pe_types.h:212
set_bit
#define set_bit(word, bit)
Definition: crm_internal.h:68
resource_object_functions_s::free
void(* free)(pe_resource_t *)
Definition: pe_types.h:54
CRM_ATTR_ID
#define CRM_ATTR_ID
Definition: crm.h:112
XML_CONFIG_ATTR_SHUTDOWN_LOCK
#define XML_CONFIG_ATTR_SHUTDOWN_LOCK
Definition: msg_xml.h:349
pe_working_set_s::tags
GHashTable * tags
Definition: pe_types.h:170
calculate_active_ops
void calculate_active_ops(GListPtr sorted_op_list, int *start_index, int *stop_index)
Definition: unpack.c:2167
XML_ATTR_ID
#define XML_ATTR_ID
Definition: msg_xml.h:96
XML_TAG_UTILIZATION
#define XML_TAG_UTILIZATION
Definition: msg_xml.h:171
get_target_role
gboolean get_target_role(pe_resource_t *rsc, enum rsc_role_e *role)
Definition: utils.c:1848
ID
#define ID(x)
Definition: msg_xml.h:418
XML_CIB_TAG_RESOURCE
#define XML_CIB_TAG_RESOURCE
Definition: msg_xml.h:174
PCMK_LRM_OP_NOTSUPPORTED
@ PCMK_LRM_OP_NOTSUPPORTED
Definition: services.h:124
pe_ticket_s::id
char * id
Definition: pe_types.h:437
pe_err
#define pe_err(fmt...)
Definition: internal.h:21
action_fail_block
@ action_fail_block
Definition: common.h:38
RSC_ROLE_SLAVE
@ RSC_ROLE_SLAVE
Definition: common.h:92
pe_action_s
Definition: pe_types.h:386
pe_node_shared_s::shutdown
gboolean shutdown
Definition: pe_types.h:207
pe__create_clone_child
pe_resource_t * pe__create_clone_child(pe_resource_t *rsc, pe_working_set_t *data_set)
Definition: clone.c:59
pe_check_last_failure
@ pe_check_last_failure
Definition: pe_types.h:187
CRMD_JOINSTATE_NACK
#define CRMD_JOINSTATE_NACK
Definition: crm.h:163
crm_info
#define crm_info(fmt, args...)
Definition: logging.h:366
XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY
#define XML_CONFIG_ATTR_PRIORITY_FENCING_DELAY
Definition: msg_xml.h:351
action_fail_fence
@ action_fail_fence
Definition: common.h:41
pe_flag_startup_fencing
#define pe_flag_startup_fencing
Definition: pe_types.h:107
CRM_LOG_ASSERT
#define CRM_LOG_ASSERT(expr)
Definition: logging.h:219
XML_CIB_TAG_PROPSET
#define XML_CIB_TAG_PROPSET
Definition: msg_xml.h:162
pe__is_remote_node
gboolean pe__is_remote_node(pe_node_t *node)
Definition: remote.c:36
pe__find_bundle_replica
pe_resource_t * pe__find_bundle_replica(const pe_resource_t *bundle, const pe_node_t *node)
Definition: bundle.c:1410
PCMK_OCF_FAILED_MASTER
@ PCMK_OCF_FAILED_MASTER
Definition: services.h:99
CRMD_ACTION_MIGRATED
#define CRMD_ACTION_MIGRATED
Definition: crm.h:170
XML_LRM_ATTR_RSCID
#define XML_LRM_ATTR_RSCID
Definition: msg_xml.h:269
CRM_ATTR_IS_DC
#define CRM_ATTR_IS_DC
Definition: crm.h:115
CRM_XS
#define CRM_XS
Definition: logging.h:54
uname
char uname[MAX_NAME]
Definition: internal.h:5
XML_LRM_ATTR_TASK
#define XML_LRM_ATTR_TASK
Definition: msg_xml.h:260
CIB_OPTIONS_FIRST
#define CIB_OPTIONS_FIRST
Definition: msg_xml.h:49
id
uint32_t id
Definition: internal.h:0
pe_rsc_orphan_container_filler
#define pe_rsc_orphan_container_filler
Definition: pe_types.h:239
pe_working_set_s::localhost
const char * localhost
Definition: pe_types.h:169
pe_node_s::fixed
gboolean fixed
Definition: pe_types.h:230
action_fail_migrate
@ action_fail_migrate
Definition: common.h:37
demote_action
#define demote_action(rsc, node, optional)
Definition: internal.h:323
XML_RSC_ATTR_MANAGED
#define XML_RSC_ATTR_MANAGED
Definition: msg_xml.h:195
STATUS_PATH_MAX
#define STATUS_PATH_MAX
Definition: unpack.c:2434
action_fail_reset_remote
@ action_fail_reset_remote
Definition: common.h:53
pe_working_set_s::config_hash
GHashTable * config_hash
Definition: pe_types.h:141
role2text
const char * role2text(enum rsc_role_e role)
Definition: common.c:466
crm_strdup_printf
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
custom_action
pe_action_t * custom_action(pe_resource_t *rsc, char *key, const char *task, pe_node_t *on_node, gboolean optional, gboolean foo, pe_working_set_t *data_set)
Definition: utils.c:510
pe_flag_stop_everything
#define pe_flag_stop_everything
Definition: pe_types.h:103
PCMK_LRM_OP_PENDING
@ PCMK_LRM_OP_PENDING
Definition: services.h:120
PCMK_OCF_INVALID_PARAM
@ PCMK_OCF_INVALID_PARAM
Definition: services.h:92
CRMD_ACTION_START
#define CRMD_ACTION_START
Definition: crm.h:172
RSC_ROLE_UNKNOWN
@ RSC_ROLE_UNKNOWN
Definition: common.h:89
crm_debug
#define crm_debug(fmt, args...)
Definition: logging.h:368
PCMK_OCF_INSUFFICIENT_PRIV
@ PCMK_OCF_INSUFFICIENT_PRIV
Definition: services.h:94
CRMD_ACTION_MIGRATE
#define CRMD_ACTION_MIGRATE
Definition: crm.h:169
XML_CIB_TAG_TAG
#define XML_CIB_TAG_TAG
Definition: msg_xml.h:392
pe__copy_node
pe_node_t * pe__copy_node(const pe_node_t *this_node)
Definition: utils.c:139
CRMD_ACTION_STOP
#define CRMD_ACTION_STOP
Definition: crm.h:175
pe_node_shared_s::standby
gboolean standby
Definition: pe_types.h:202
sort_op_by_callid
gint sort_op_by_callid(gconstpointer a, gconstpointer b)
Definition: utils.c:1716
pe_action_optional
@ pe_action_optional
Definition: pe_types.h:281
PCMK_LRM_OP_NOT_INSTALLED
@ PCMK_LRM_OP_NOT_INSTALLED
Definition: services.h:128
pe_resource_s::partial_migration_source
pe_node_t * partial_migration_source
Definition: pe_types.h:351
XML_RSC_OP_LAST_CHANGE
#define XML_RSC_OP_LAST_CHANGE
Definition: msg_xml.h:280
pe_tag_s
Definition: pe_types.h:444
get_effective_time
time_t get_effective_time(pe_working_set_t *data_set)
Definition: utils.c:1833
no_quorum_demote
@ no_quorum_demote
Definition: pe_types.h:65
do_crm_log
#define do_crm_log(level, fmt, args...)
Log a message.
Definition: logging.h:150
RSC_STOP
#define RSC_STOP
Definition: crm.h:200
pe_find_clone
@ pe_find_clone
match only clone instances
Definition: pe_types.h:84
crm_xml_add
const char * crm_xml_add(xmlNode *node, const char *name, const char *value)
Create an XML attribute with specified name and value.
Definition: nvpair.c:316
action_fail_restart_container
@ action_fail_restart_container
Definition: common.h:45
PCMK_OCF_UNKNOWN_ERROR
@ PCMK_OCF_UNKNOWN_ERROR
Definition: services.h:91
pe_create_remote_xml
xmlNode * pe_create_remote_xml(xmlNode *parent, const char *uname, const char *container_id, const char *migrateable, const char *is_managed, const char *start_timeout, const char *server, const char *port)
Definition: remote.c:158
XML_LRM_TAG_RESOURCES
#define XML_LRM_TAG_RESOURCES
Definition: msg_xml.h:226
pe_working_set_s
Definition: pe_types.h:126
XML_LRM_TAG_RSC_OP
#define XML_LRM_TAG_RSC_OP
Definition: msg_xml.h:228
pe__shutdown_requested
bool pe__shutdown_requested(pe_node_t *node)
Definition: utils.c:2729
pcmk__config_err
#define pcmk__config_err(fmt...)
Definition: internal.h:95
node_member
@ node_member
Definition: pe_types.h:70
crm_element_value
const char * crm_element_value(const xmlNode *data, const char *name)
Retrieve the value of an XML attribute.
Definition: nvpair.c:522
XML_NODE_IS_FENCED
#define XML_NODE_IS_FENCED
Definition: msg_xml.h:243
no_quorum_ignore
@ no_quorum_ignore
Definition: pe_types.h:63
pe_status_private.h
PCMK_LRM_OP_UNKNOWN
@ PCMK_LRM_OP_UNKNOWN
Definition: services.h:119
CRM_TRACE_INIT_DATA
CRM_TRACE_INIT_DATA(pe_status)
XML_CIB_TAG_GROUP
#define XML_CIB_TAG_GROUP
Definition: msg_xml.h:175
sort_node_uname
gint sort_node_uname(gconstpointer a, gconstpointer b)
Definition: utils.c:215
XML_TAG_META_SETS
#define XML_TAG_META_SETS
Definition: msg_xml.h:164
pe_tag_s::refs
GListPtr refs
Definition: pe_types.h:446
crm_log_xml_debug
#define crm_log_xml_debug(xml, text)
Definition: logging.h:376
pe_working_set_s::template_rsc_sets
GHashTable * template_rsc_sets
Definition: pe_types.h:168
pe_wo
uint32_t pe_wo
Definition: unpack.c:57
order_actions
gboolean order_actions(pe_action_t *lh_action, pe_action_t *rh_action, enum pe_ordering order)
Definition: utils.c:1886
pe_ticket_s::state
GHashTable * state
Definition: pe_types.h:441
pe_base_name_end
const char * pe_base_name_end(const char *id)
Definition: unpack.c:1506
PCMK_LRM_OP_ERROR_FATAL
@ PCMK_LRM_OP_ERROR_FATAL
Definition: services.h:127
rules.h
PCMK_OCF_UNIMPLEMENT_FEATURE
@ PCMK_OCF_UNIMPLEMENT_FEATURE
Definition: services.h:93
add_tag_ref
gboolean add_tag_ref(GHashTable *tags, const char *tag_name, const char *obj_ref)
Definition: utils.c:2601
pe_resource_s::container
pe_resource_t * container
Definition: pe_types.h:366
pe__target_rc_from_xml
int pe__target_rc_from_xml(xmlNode *xml_op)
Definition: unpack.c:3391
pe_rsc_needs_fencing
#define pe_rsc_needs_fencing
Definition: pe_types.h:267
pe_flag_have_stonith_resource
#define pe_flag_have_stonith_resource
Definition: pe_types.h:97
target
const char * target
Definition: pcmk_fence.c:28
pe_rsc_unique
#define pe_rsc_unique
Definition: pe_types.h:242
crm_parse_int
int crm_parse_int(const char *text, const char *default_text)
Parse an integer value from a string.
Definition: strings.c:126
pe_order_implies_then
@ pe_order_implies_then
Definition: pe_types.h:469
XML_LRM_ATTR_EXIT_REASON
#define XML_LRM_ATTR_EXIT_REASON
Definition: msg_xml.h:278
XML_BOOLEAN_FALSE
#define XML_BOOLEAN_FALSE
Definition: msg_xml.h:108
PCMK_LRM_OP_ERROR_HARD
@ PCMK_LRM_OP_ERROR_HARD
Definition: services.h:126
host
AIS_Host host
Definition: internal.h:4
pe_ticket_s::standby
gboolean standby
Definition: pe_types.h:440
XML_CIB_TAG_RSC_TEMPLATE
#define XML_CIB_TAG_RSC_TEMPLATE
Definition: msg_xml.h:180
XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT
#define XML_CONFIG_ATTR_SHUTDOWN_LOCK_LIMIT
Definition: msg_xml.h:350
pe_resource_s::clone_name
char * clone_name
Definition: pe_types.h:308
safe_str_neq
gboolean safe_str_neq(const char *a, const char *b)
Definition: strings.c:263
verify_pe_options
void verify_pe_options(GHashTable *options)
Definition: common.c:309
PCMK_OCF_NOT_CONFIGURED
@ PCMK_OCF_NOT_CONFIGURED
Definition: services.h:96
PCMK_LRM_OP_DONE
@ PCMK_LRM_OP_DONE
Definition: services.h:121
XPATH_ENABLE_UNFENCING
#define XPATH_ENABLE_UNFENCING
Definition: unpack.c:166
pe_resource_s::parent
pe_resource_t * parent
Definition: pe_types.h:314
crm_parse_interval_spec
guint crm_parse_interval_spec(const char *input)
Parse milliseconds from a Pacemaker interval specification.
Definition: utils.c:309
XML_LRM_ATTR_MIGRATE_TARGET
#define XML_LRM_ATTR_MIGRATE_TARGET
Definition: msg_xml.h:286
crm_str
#define crm_str(x)
Definition: logging.h:389
char2score
int char2score(const char *score)
Definition: utils.c:59
pe_wo_blind
@ pe_wo_blind
Definition: internal.h:31
pe_can_fence
bool pe_can_fence(pe_working_set_t *data_set, pe_node_t *node)
Definition: utils.c:87
XML_LRM_ATTR_RESTART_DIGEST
#define XML_LRM_ATTR_RESTART_DIGEST
Definition: msg_xml.h:276
services.h
Services API.
pe_flag_concurrent_fencing
#define pe_flag_concurrent_fencing
Definition: pe_types.h:99
pe_resource_s::flags
unsigned long long flags
Definition: pe_types.h:334
pe_flag_maintenance_mode
#define pe_flag_maintenance_mode
Definition: pe_types.h:94
pe_resource_s::remote_reconnect_ms
guint remote_reconnect_ms
Definition: pe_types.h:331
resource_location
void resource_location(pe_resource_t *rsc, pe_node_t *node, int score, const char *tag, pe_working_set_t *data_set)
Definition: utils.c:1672
pe_flag_enable_unfencing
#define pe_flag_enable_unfencing
Definition: pe_types.h:98
pe_flag_remove_after_stop
#define pe_flag_remove_after_stop
Definition: pe_types.h:106
unpack_nodes
gboolean unpack_nodes(xmlNode *xml_nodes, pe_working_set_t *data_set)
Definition: unpack.c:518
PCMK_OCF_RUNNING_MASTER
@ PCMK_OCF_RUNNING_MASTER
Definition: services.h:98
pe_resource_s::role
enum rsc_role_e role
Definition: pe_types.h:356
pe_rsc_trace
#define pe_rsc_trace(rsc, fmt, args...)
Definition: internal.h:19
PCMK_OCF_OK
@ PCMK_OCF_OK
Definition: services.h:90
XML_ATTR_TRANSITION_MAGIC
#define XML_ATTR_TRANSITION_MAGIC
Definition: msg_xml.h:360
XML_NODE_IS_PEER
#define XML_NODE_IS_PEER
Definition: msg_xml.h:241
pe_node_shared_s::remote_was_fenced
gboolean remote_was_fenced
Definition: pe_types.h:213
pe_flag_startup_probes
#define pe_flag_startup_probes
Definition: pe_types.h:110
pcmk__op_key
char * pcmk__op_key(const char *rsc_id, const char *op_type, guint interval_ms)
Generate an operation key (RESOURCE_ACTION_INTERVAL)
Definition: operations.c:40
pe__add_param_check
void pe__add_param_check(xmlNode *rsc_op, pe_resource_t *rsc, pe_node_t *node, enum pe_check_parameters, pe_working_set_t *data_set)
Definition: remote.c:222
CRM_ASSERT
#define CRM_ASSERT(expr)
Definition: results.h:42
find_xml_node
xmlNode * find_xml_node(xmlNode *cib, const char *node_path, gboolean must_find)
Definition: xml.c:1764
pe__bundle_needs_remote_name
bool pe__bundle_needs_remote_name(pe_resource_t *rsc)
Definition: bundle.c:956
pe__resource_actions
GList * pe__resource_actions(const pe_resource_t *rsc, const pe_node_t *node, const char *task, bool require_node)
Find all actions of given type for a resource.
Definition: utils.c:1624
pe_rsc_start_pending
#define pe_rsc_start_pending
Definition: pe_types.h:256
XML_ATTR_QUORUM_PANIC
#define XML_ATTR_QUORUM_PANIC
Definition: msg_xml.h:84
pcmk__score_yellow
int pcmk__score_yellow
Definition: utils.c:56
pe_working_set_s::tickets
GHashTable * tickets
Definition: pe_types.h:142
pe_node_shared_s
Definition: pe_types.h:195
pe_flag_start_failure_fatal
#define pe_flag_start_failure_fatal
Definition: pe_types.h:105
pe__clear_failcount
pe_action_t * pe__clear_failcount(pe_resource_t *rsc, pe_node_t *node, const char *reason, pe_working_set_t *data_set)
Schedule a controller operation to clear a fail count.
Definition: failcounts.c:360
CRMD_ACTION_DEMOTE
#define CRMD_ACTION_DEMOTE
Definition: crm.h:180
set_config_flag
#define set_config_flag(data_set, option, flag)
Definition: unpack.c:30
pe_rsc_managed
#define pe_rsc_managed
Definition: pe_types.h:237
action_fail_ignore
@ action_fail_ignore
Definition: common.h:32
iso8601_internal.h
CRMD_ACTION_STATUS
#define CRMD_ACTION_STATUS
Definition: crm.h:186
pe_node_shared_s::unpacked
gboolean unpacked
Definition: pe_types.h:215
pe_resource_s::pending_task
char * pending_task
Definition: pe_types.h:332
XML_LRM_ATTR_INTERVAL_MS
#define XML_LRM_ATTR_INTERVAL_MS
Definition: msg_xml.h:258
unpack_resources
gboolean unpack_resources(xmlNode *xml_resources, pe_working_set_t *data_set)
Definition: unpack.c:745
no_quorum_suicide
@ no_quorum_suicide
Definition: pe_types.h:64
crm_atoi
#define crm_atoi(text, default_text)
Definition: util.h:114
PCMK_OCF_NOT_RUNNING
@ PCMK_OCF_NOT_RUNNING
Definition: services.h:97
pcmk__epoch2str
const char * pcmk__epoch2str(time_t *when)
Definition: iso8601.c:1715
pe_node_shared_s::remote_maintenance
gboolean remote_maintenance
Definition: pe_types.h:214
pe_fence_op
pe_action_t * pe_fence_op(pe_node_t *node, const char *op, bool optional, const char *reason, bool priority_delay, pe_working_set_t *data_set)
Definition: utils.c:2461
pe_node_attribute_raw
const char * pe_node_attribute_raw(pe_node_t *node, const char *name)
Definition: common.c:632
XML_ATTR_TYPE
#define XML_ATTR_TYPE
Definition: msg_xml.h:99
RSC_DIGEST_UNKNOWN
@ RSC_DIGEST_UNKNOWN
Definition: internal.h:392
pe_rsc_promotable
#define pe_rsc_promotable
Definition: pe_types.h:244
copy_in_properties
void copy_in_properties(xmlNode *target, xmlNode *src)
Definition: xml.c:1836
pe_flag_stonith_enabled
#define pe_flag_stonith_enabled
Definition: pe_types.h:96
pe_node_shared_s::maintenance
gboolean maintenance
Definition: pe_types.h:210
pe__is_guest_node
gboolean pe__is_guest_node(pe_node_t *node)
Definition: remote.c:47
pe_resource_s
Definition: pe_types.h:306
pe_resource_s::allowed_nodes
GHashTable * allowed_nodes
Definition: pe_types.h:354
pe_working_set_s::flags
unsigned long long flags
Definition: pe_types.h:136
pe_node_shared_s::unclean
gboolean unclean
Definition: pe_types.h:205
XML_ATTR_HAVE_WATCHDOG
#define XML_ATTR_HAVE_WATCHDOG
Definition: msg_xml.h:86
unpack_config
gboolean unpack_config(xmlNode *config, pe_working_set_t *data_set)
Definition: unpack.c:188
pe_working_set_s::failed
xmlNode * failed
Definition: pe_types.h:155
RSC_ROLE_STARTED
@ RSC_ROLE_STARTED
Definition: common.h:91
pe_working_set_s::no_quorum_policy
enum pe_quorum_policy no_quorum_policy
Definition: pe_types.h:139
crm_element_value_epoch
int crm_element_value_epoch(const xmlNode *xml, const char *name, time_t *dest)
Retrieve the seconds-since-epoch value of an XML attribute.
Definition: nvpair.c:649
pe_flag_symmetric_cluster
#define pe_flag_symmetric_cluster
Definition: pe_types.h:93
XML_LRM_ATTR_CALLID
#define XML_LRM_ATTR_CALLID
Definition: msg_xml.h:272
pe_resource_s::failure_timeout
int failure_timeout
Definition: pe_types.h:329
CRM_ATTR_SITE_NAME
#define CRM_ATTR_SITE_NAME
Definition: crm.h:117
XML_CIB_TAG_NODE
#define XML_CIB_TAG_NODE
Definition: msg_xml.h:159
pe_node_shared_s::type
enum node_type type
Definition: pe_types.h:198
pe_node_shared_s::online
gboolean online
Definition: pe_types.h:201
pe_node_shared_s::uname
const char * uname
Definition: pe_types.h:197
no_quorum_stop
@ no_quorum_stop
Definition: pe_types.h:62
strndup
char * strndup(const char *str, size_t len)
destroy_ticket
void destroy_ticket(gpointer data)
Definition: utils.c:1955
XML_NODE_IS_MAINTENANCE
#define XML_NODE_IS_MAINTENANCE
Definition: msg_xml.h:244
resource_object_functions_s::find_rsc
pe_resource_t *(* find_rsc)(pe_resource_t *parent, const char *search, const pe_node_t *node, int flags)
Definition: pe_types.h:45
pe_rsc_stop
#define pe_rsc_stop
Definition: pe_types.h:250
PCMK_OCF_NOT_INSTALLED
@ PCMK_OCF_NOT_INSTALLED
Definition: services.h:95
pe_flag_stop_rsc_orphans
#define pe_flag_stop_rsc_orphans
Definition: pe_types.h:101
add_node_copy
xmlNode * add_node_copy(xmlNode *new_parent, xmlNode *xml_node)
Definition: xml.c:1954
pe_resource_s::is_remote_node
gboolean is_remote_node
Definition: pe_types.h:337
pcmk__ends_with
bool pcmk__ends_with(const char *s, const char *match)
Definition: strings.c:410
crm_internal.h
util.h
Utility functions.
pe_node_s
Definition: pe_types.h:228
name
char * name
Definition: pcmk_fence.c:30
pe__is_universal_clone
bool pe__is_universal_clone(pe_resource_t *rsc, pe_working_set_t *data_set)
Definition: clone.c:1095
pcmk__score_green
int pcmk__score_green
Definition: utils.c:55
freeXpathObject
void freeXpathObject(xmlXPathObjectPtr xpathObj)
Definition: xpath.c:36
pe_flag_have_quorum
#define pe_flag_have_quorum
Definition: pe_types.h:92
XML_CIB_TAG_OBJ_REF
#define XML_CIB_TAG_OBJ_REF
Definition: msg_xml.h:393
crm.h
A dumping ground.
CRMD_ACTION_PROMOTE
#define CRMD_ACTION_PROMOTE
Definition: crm.h:178
XML_RSC_ATTR_CONTAINER
#define XML_RSC_ATTR_CONTAINER
Definition: msg_xml.h:205
pe__node_list2table
GHashTable * pe__node_list2table(GList *list)
Definition: utils.c:201
ticket_new
pe_ticket_t * ticket_new(const char *ticket_id, pe_working_set_t *data_set)
Definition: utils.c:1967
XML_NVPAIR_ATTR_NAME
#define XML_NVPAIR_ATTR_NAME
Definition: msg_xml.h:339
pe_fence_node
void pe_fence_node(pe_working_set_t *data_set, pe_node_t *node, const char *reason, bool priority_delay)
Schedule a fence action for a node.
Definition: unpack.c:85
pe_rsc_block
#define pe_rsc_block
Definition: pe_types.h:238
pe_working_set_s::shutdown_lock
guint shutdown_lock
Definition: pe_types.h:179
fail2text
const char * fail2text(enum action_fail_response fail)
Definition: common.c:321
XML_BOOLEAN_TRUE
#define XML_BOOLEAN_TRUE
Definition: msg_xml.h:107
pe_rsc_failure_ignored
#define pe_rsc_failure_ignored
Definition: pe_types.h:262
common_unpack
gboolean common_unpack(xmlNode *xml_obj, pe_resource_t **rsc, pe_resource_t *parent, pe_working_set_t *data_set)
Definition: complex.c:366
pcmk__score_red
int pcmk__score_red
Definition: utils.c:54
pe_resource_s::fns
resource_object_functions_t * fns
Definition: pe_types.h:318
pcmk_ok
#define pcmk_ok
Definition: results.h:67
pe_flag_have_remote_nodes
#define pe_flag_have_remote_nodes
Definition: pe_types.h:112
unpack_status
gboolean unpack_status(xmlNode *status, pe_working_set_t *data_set)
Definition: unpack.c:1100
node_ping
@ node_ping
Definition: pe_types.h:69
pe_node_shared_s::attrs
GHashTable * attrs
Definition: pe_types.h:222
op_digest_cache_s
Definition: internal.h:395
pe_resource_s::lock_time
time_t lock_time
Definition: pe_types.h:371
pe__clear_resource_history
pe_action_t * pe__clear_resource_history(pe_resource_t *rsc, pe_node_t *node, pe_working_set_t *data_set)
Definition: utils.c:2804
pe_proc_err
#define pe_proc_err(fmt...)
Definition: internal.h:23
pe_rsc_failed
#define pe_rsc_failed
Definition: pe_types.h:254