pacemaker  1.1.14-70404b0
Scalable High-Availability cluster resource manager
legacy.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2004 Andrew Beekhof <andrew@beekhof.net>
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include <crm_internal.h>
20 #include <crm/cluster/internal.h>
21 #include <bzlib.h>
22 #include <crm/common/ipc.h>
23 #include <crm/cluster.h>
24 #include <crm/common/mainloop.h>
25 #include <sys/utsname.h>
26 #include <sys/socket.h>
27 #include <netdb.h>
28 
29 #if SUPPORT_COROSYNC
30 # include <corosync/confdb.h>
31 # include <corosync/corodefs.h>
32 # include <corosync/cpg.h>
33 # include <corosync/cfg.h>
34 #endif
35 
36 #if HAVE_CMAP
37 # include <corosync/cmap.h>
38 #endif
39 
40 #if SUPPORT_CMAN
41 # include <libcman.h>
42 cman_handle_t pcmk_cman_handle = NULL;
43 #endif
44 
46 gboolean ais_membership_force = FALSE;
47 int plugin_dispatch(gpointer user_data);
48 
49 int ais_fd_sync = -1;
50 int ais_fd_async = -1; /* never send messages via this channel */
51 void *ais_ipc_ctx = NULL;
52 
53 hdb_handle_t ais_ipc_handle = 0;
54 
55 static bool valid_cman_name(const char *name, uint32_t nodeid)
56 {
57  bool rc = TRUE;
58 
59  /* Yes, %d, because that's what CMAN does */
60  char *fakename = crm_strdup_printf("Node%d", nodeid);
61 
62  if(crm_str_eq(fakename, name, TRUE)) {
63  rc = FALSE;
64  crm_notice("Ignoring inferred name from cman: %s", fakename);
65  }
66  free(fakename);
67  return rc;
68 }
69 
70 static gboolean
71 plugin_get_details(uint32_t * id, char **uname)
72 {
73  struct iovec iov;
74  int retries = 0;
75  int rc = CS_OK;
77  struct crm_ais_nodeid_resp_s answer;
78 
79  static uint32_t local_id = 0;
80  static char *local_uname = NULL;
81 
82  if(local_id) {
83  if(id) *id = local_id;
84  if(uname) *uname = strdup(local_uname);
85  return TRUE;
86  }
87 
88  header.error = CS_OK;
89  header.id = crm_class_nodeid;
90  header.size = sizeof(cs_ipc_header_response_t);
91 
92  iov.iov_base = &header;
93  iov.iov_len = header.size;
94 
95  retry:
96  errno = 0;
97  rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, &iov, 1, &answer, sizeof(answer));
98  if (rc == CS_OK) {
99  CRM_CHECK(answer.header.size == sizeof(struct crm_ais_nodeid_resp_s),
100  crm_err("Odd message: id=%d, size=%d, error=%d",
101  answer.header.id, answer.header.size, answer.header.error));
102  CRM_CHECK(answer.header.id == crm_class_nodeid,
103  crm_err("Bad response id: %d", answer.header.id));
104  }
105 
106  if ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20) {
107  retries++;
108  crm_info("Peer overloaded: Re-sending message (Attempt %d of 20)", retries);
109  sleep(retries); /* Proportional back off */
110  goto retry;
111  }
112 
113  if (rc != CS_OK) {
114  crm_err("Sending nodeid request: FAILED (rc=%d): %s", rc, ais_error2text(rc));
115  return FALSE;
116 
117  } else if (answer.header.error != CS_OK) {
118  crm_err("Bad response from peer: (rc=%d): %s", rc, ais_error2text(rc));
119  return FALSE;
120  }
121 
122  crm_info("Server details: id=%u uname=%s cname=%s", answer.id, answer.uname, answer.cname);
123 
124  local_id = answer.id;
125  local_uname = strdup(answer.uname);
126 
127  if(id) *id = local_id;
128  if(uname) *uname = strdup(local_uname);
129  return TRUE;
130 }
131 
132 bool
133 send_plugin_text(int class, struct iovec *iov)
134 {
135  int rc = CS_OK;
136  int retries = 0;
137  int buf_len = sizeof(cs_ipc_header_response_t);
138  char *buf = malloc(buf_len);
139  AIS_Message *ais_msg = (AIS_Message*)iov[0].iov_base;
140  cs_ipc_header_response_t *header = (cs_ipc_header_response_t *)(void*)buf;
141 
142  CRM_ASSERT(buf != NULL);
143  /* There are only 6 handlers registered to crm_lib_service in plugin.c */
144  CRM_CHECK(class < 6, crm_err("Invalid message class: %d", class);
145  return FALSE);
146 
147  do {
148  if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {
149  retries++;
150  crm_info("Peer overloaded or membership in flux:"
151  " Re-sending message (Attempt %d of 20)", retries);
152  sleep(retries); /* Proportional back off */
153  }
154 
155  errno = 0;
156  rc = coroipcc_msg_send_reply_receive(ais_ipc_handle, iov, 1, buf, buf_len);
157 
158  } while ((rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) && retries < 20);
159 
160  if (rc == CS_OK) {
161  CRM_CHECK(header->size == sizeof(cs_ipc_header_response_t),
162  crm_err("Odd message: id=%d, size=%d, class=%d, error=%d",
163  header->id, header->size, class, header->error));
164 
165  CRM_ASSERT(buf_len >= header->size);
166  CRM_CHECK(header->id == CRM_MESSAGE_IPC_ACK,
167  crm_err("Bad response id (%d) for request (%d)", header->id,
168  ais_msg->header.id));
169  CRM_CHECK(header->error == CS_OK, rc = header->error);
170 
171  } else {
172  crm_perror(LOG_ERR, "Sending plugin message %d FAILED: %s (%d)",
173  ais_msg->id, ais_error2text(rc), rc);
174  }
175 
176  free(iov[0].iov_base);
177  free(iov);
178  free(buf);
179 
180  return (rc == CS_OK);
181 }
182 
183 void
185 {
186  crm_notice("Disconnecting from Corosync");
187 
188  if (is_classic_ais_cluster()) {
189  if (ais_ipc_handle) {
190  crm_trace("Disconnecting plugin");
191  coroipcc_service_disconnect(ais_ipc_handle);
192  ais_ipc_handle = 0;
193  } else {
194  crm_info("No plugin connection");
195  }
196  }
197  cluster_disconnect_cpg(cluster);
198 
199 # if SUPPORT_CMAN
200  if (is_cman_cluster()) {
201  if (pcmk_cman_handle) {
202  crm_info("Disconnecting cman");
203  if (cman_stop_notification(pcmk_cman_handle) >= 0) {
204  crm_info("Destroying cman");
205  cman_finish(pcmk_cman_handle);
206  }
207 
208  } else {
209  crm_info("No cman connection");
210  }
211  }
212 # endif
213  ais_fd_async = -1;
214  ais_fd_sync = -1;
215 }
216 
217 void
219 {
220  if (msg->header.id == crm_class_members || msg->header.id == crm_class_quorum) {
221  xmlNode *member = NULL;
222  const char *value = NULL;
223  gboolean quorate = FALSE;
224  xmlNode *xml = string2xml(msg->data);
225 
226  if (xml == NULL) {
227  crm_err("Invalid membership update: %s", msg->data);
228  return;
229  }
230 
231  value = crm_element_value(xml, "quorate");
232  CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No quorum value:"); return);
233  if (crm_is_true(value)) {
234  quorate = TRUE;
235  }
236 
237  value = crm_element_value(xml, "id");
238  CRM_CHECK(value != NULL, crm_log_xml_err(xml, "No membership id"); return);
239  crm_peer_seq = crm_int_helper(value, NULL);
240 
241  if (quorate != crm_have_quorum) {
242  crm_notice("Membership %s: quorum %s", value, quorate ? "acquired" : "lost");
244 
245  } else {
246  crm_info("Membership %s: quorum %s", value, quorate ? "retained" : "still lost");
247  }
248 
249  for (member = __xml_first_child(xml); member != NULL; member = __xml_next(member)) {
250  const char *id_s = crm_element_value(member, "id");
251  const char *addr = crm_element_value(member, "addr");
252  const char *uname = crm_element_value(member, "uname");
253  const char *state = crm_element_value(member, "state");
254  const char *born_s = crm_element_value(member, "born");
255  const char *seen_s = crm_element_value(member, "seen");
256  const char *votes_s = crm_element_value(member, "votes");
257  const char *procs_s = crm_element_value(member, "processes");
258 
259  int votes = crm_int_helper(votes_s, NULL);
260  unsigned int id = crm_int_helper(id_s, NULL);
261  unsigned int procs = crm_int_helper(procs_s, NULL);
262 
263  /* TODO: These values will contain garbage if version < 0.7.1 */
264  uint64_t born = crm_int_helper(born_s, NULL);
265  uint64_t seen = crm_int_helper(seen_s, NULL);
266 
267  crm_update_peer(__FUNCTION__, id, born, seen, votes, procs, uname, uname, addr, state);
268  }
269  free_xml(xml);
270  }
271 }
272 
273 static void
274 plugin_default_deliver_message(cpg_handle_t handle,
275  const struct cpg_name *groupName,
276  uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len)
277 {
278  uint32_t kind = 0;
279  const char *from = NULL;
280  char *data = pcmk_message_common_cs(handle, nodeid, pid, msg, &kind, &from);
281 
282  free(data);
283 }
284 
285 int
286 plugin_dispatch(gpointer user_data)
287 {
288  int rc = CS_OK;
289  crm_cluster_t *cluster = (crm_cluster_t *) user_data;
290 
291  do {
292  char *buffer = NULL;
293 
294  rc = coroipcc_dispatch_get(ais_ipc_handle, (void **)&buffer, 0);
295  if (rc == CS_ERR_TRY_AGAIN || rc == CS_ERR_QUEUE_FULL) {
296  return 0;
297  }
298  if (rc != CS_OK) {
299  crm_perror(LOG_ERR, "Receiving message body failed: (%d) %s", rc, ais_error2text(rc));
300  return -1;
301  }
302  if (buffer == NULL) {
303  /* NULL is a legal "no message afterall" value */
304  return 0;
305  }
306  /*
307  cpg_deliver_fn_t(cpg_handle_t handle, const struct cpg_name *group_name,
308  uint32_t nodeid, uint32_t pid, void *msg, size_t msg_len);
309  */
310  if (cluster && cluster->cpg.cpg_deliver_fn) {
311  cluster->cpg.cpg_deliver_fn(0, NULL, 0, 0, buffer, 0);
312 
313  } else {
314  plugin_default_deliver_message(0, NULL, 0, 0, buffer, 0);
315  }
316 
317  coroipcc_dispatch_put(ais_ipc_handle);
318 
319  } while (ais_ipc_handle);
320 
321  return 0;
322 }
323 
324 static void
325 plugin_destroy(gpointer user_data)
326 {
327  crm_err("AIS connection terminated");
328  ais_fd_sync = -1;
329  crm_exit(ENOTCONN);
330 }
331 
332 # if SUPPORT_CMAN
333 
334 static int
335 pcmk_cman_dispatch(gpointer user_data)
336 {
337  int rc = cman_dispatch(pcmk_cman_handle, CMAN_DISPATCH_ALL);
338 
339  if (rc < 0) {
340  crm_err("Connection to cman failed: %d", rc);
341  pcmk_cman_handle = 0;
342  return FALSE;
343  }
344  return TRUE;
345 }
346 
347 # define MAX_NODES 256
348 
349 static void
350 cman_event_callback(cman_handle_t handle, void *privdata, int reason, int arg)
351 {
352  int rc = 0, lpc = 0, node_count = 0;
353 
354  cman_cluster_t cluster;
355  static cman_node_t cman_nodes[MAX_NODES];
356 
357  gboolean(*dispatch) (unsigned long long, gboolean) = privdata;
358 
359  switch (reason) {
360  case CMAN_REASON_STATECHANGE:
361 
362  memset(&cluster, 0, sizeof(cluster));
363  rc = cman_get_cluster(pcmk_cman_handle, &cluster);
364  if (rc < 0) {
365  crm_err("Couldn't query cman cluster details: %d %d", rc, errno);
366  return;
367  }
368 
369  crm_peer_seq = cluster.ci_generation;
370  if (arg != crm_have_quorum) {
371  crm_notice("Membership %llu: quorum %s", crm_peer_seq, arg ? "acquired" : "lost");
372  crm_have_quorum = arg;
373 
374  } else {
375  crm_info("Membership %llu: quorum %s", crm_peer_seq,
376  arg ? "retained" : "still lost");
377  }
378 
379  memset(cman_nodes, 0, MAX_NODES * sizeof(cman_node_t));
380  rc = cman_get_nodes(pcmk_cman_handle, MAX_NODES, &node_count, cman_nodes);
381  if (rc < 0) {
382  crm_err("Couldn't query cman node list: %d %d", rc, errno);
383  return;
384  }
385 
386  for (lpc = 0; lpc < node_count; lpc++) {
387  crm_node_t *peer = NULL;
388  const char *name = NULL;
389 
390  if (cman_nodes[lpc].cn_nodeid == 0) {
391  /* Never allow node ID 0 to be considered a member #315711 */
392  /* Skip entirely, its a qdisk */
393  continue;
394  }
395 
396  if(valid_cman_name(cman_nodes[lpc].cn_name, cman_nodes[lpc].cn_nodeid)) {
397  name = cman_nodes[lpc].cn_name;
398  }
399 
400  peer = crm_get_peer(cman_nodes[lpc].cn_nodeid, name);
401  if(cman_nodes[lpc].cn_member) {
402  crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_MEMBER, crm_peer_seq);
403 
404  } else if(peer->state) {
405  crm_update_peer_state(__FUNCTION__, peer, CRM_NODE_LOST, 0);
406 
407  } else {
408  crm_info("State of node %s[%u] is still unknown", peer->uname, peer->id);
409  }
410  }
411 
412  if (dispatch) {
413  dispatch(crm_peer_seq, crm_have_quorum);
414  }
415  break;
416 
417  case CMAN_REASON_TRY_SHUTDOWN:
418  /* Always reply with a negative - pacemaker needs to be stopped first */
419  crm_notice("CMAN wants to shut down: %s", arg ? "forced" : "optional");
420  cman_replyto_shutdown(pcmk_cman_handle, 0);
421  break;
422 
423  case CMAN_REASON_CONFIG_UPDATE:
424  /* Ignore */
425  break;
426  }
427 }
428 # endif
429 
430 gboolean
431 init_cman_connection(gboolean(*dispatch) (unsigned long long, gboolean), void (*destroy) (gpointer))
432 {
433 # if SUPPORT_CMAN
434  int rc = -1, fd = -1;
435  cman_cluster_t cluster;
436 
437  struct mainloop_fd_callbacks cman_fd_callbacks = {
438  .dispatch = pcmk_cman_dispatch,
439  .destroy = destroy,
440  };
441 
442  crm_info("Configuring Pacemaker to obtain quorum from cman");
443 
444  memset(&cluster, 0, sizeof(cluster));
445 
446  pcmk_cman_handle = cman_init(dispatch);
447  if (pcmk_cman_handle == NULL || cman_is_active(pcmk_cman_handle) == FALSE) {
448  crm_err("Couldn't connect to cman");
449  goto cman_bail;
450  }
451 
452  rc = cman_start_notification(pcmk_cman_handle, cman_event_callback);
453  if (rc < 0) {
454  crm_err("Couldn't register for cman notifications: %d %d", rc, errno);
455  goto cman_bail;
456  }
457 
458  /* Get the current membership state */
459  cman_event_callback(pcmk_cman_handle, dispatch, CMAN_REASON_STATECHANGE,
460  cman_is_quorate(pcmk_cman_handle));
461 
462  fd = cman_get_fd(pcmk_cman_handle);
463 
464  mainloop_add_fd("cman", G_PRIORITY_MEDIUM, fd, dispatch, &cman_fd_callbacks);
465 
466  cman_bail:
467  if (rc < 0) {
468  cman_finish(pcmk_cman_handle);
469  return FALSE;
470  }
471 # else
472  crm_err("cman qorum is not supported in this build");
474 # endif
475  return TRUE;
476 }
477 
478 # ifdef SUPPORT_COROSYNC
479 
480 gboolean
481 cluster_connect_quorum(gboolean(*dispatch) (unsigned long long, gboolean),
482  void (*destroy) (gpointer))
483 {
484  crm_err("The Corosync quorum API is not supported in this build");
486  return TRUE;
487 }
488 
489 static gboolean
490 init_cs_connection_classic(crm_cluster_t * cluster)
491 {
492  int rc;
493  int pid = 0;
494  char *pid_s = NULL;
495  const char *name = NULL;
496  crm_node_t *peer = NULL;
497  enum crm_proc_flag proc = 0;
498 
499  struct mainloop_fd_callbacks ais_fd_callbacks = {
501  .destroy = cluster->destroy,
502  };
503 
504  crm_info("Creating connection to our Corosync plugin");
505  rc = coroipcc_service_connect(COROSYNC_SOCKET_NAME, PCMK_SERVICE_ID,
507  &ais_ipc_handle);
508  if (ais_ipc_handle) {
509  coroipcc_fd_get(ais_ipc_handle, &ais_fd_async);
510  } else {
511  crm_info("Connection to our Corosync plugin (%d) failed: %s (%d)",
512  PCMK_SERVICE_ID, strerror(errno), errno);
513  return FALSE;
514  }
515  if (ais_fd_async <= 0 && rc == CS_OK) {
516  crm_err("No context created, but connection reported 'ok'");
517  rc = CS_ERR_LIBRARY;
518  }
519  if (rc != CS_OK) {
520  crm_info("Connection to our Corosync plugin (%d) failed: %s (%d)", PCMK_SERVICE_ID,
521  ais_error2text(rc), rc);
522  }
523 
524  if (rc != CS_OK) {
525  return FALSE;
526  }
527 
528  if (ais_fd_callbacks.destroy == NULL) {
529  ais_fd_callbacks.destroy = plugin_destroy;
530  }
531 
532  mainloop_add_fd("corosync-plugin", G_PRIORITY_MEDIUM, ais_fd_async, cluster, &ais_fd_callbacks);
533  crm_info("AIS connection established");
534 
535  pid = getpid();
536  pid_s = crm_itoa(pid);
537  send_cluster_text(crm_class_cluster, pid_s, TRUE, NULL, crm_msg_ais);
538  free(pid_s);
539 
540  cluster->nodeid = get_local_nodeid(0);
541 
542  name = get_local_node_name();
543  plugin_get_details(NULL, &(cluster->uname));
544  if (safe_str_neq(name, cluster->uname)) {
545  crm_crit("Node name mismatch! Corosync supplied %s but our lookup returned %s",
546  cluster->uname, name);
547  crm_notice
548  ("Node name mismatches usually occur when assigned automatically by DHCP servers");
550  }
551 
552  proc = text2proc(crm_system_name);
553  peer = crm_get_peer(cluster->nodeid, cluster->uname);
554  crm_update_peer_proc(__FUNCTION__, peer, proc|crm_proc_plugin, ONLINESTATUS);
555 
556  return TRUE;
557 }
558 
559 static int
560 pcmk_mcp_dispatch(const char *buffer, ssize_t length, gpointer userdata)
561 {
562  xmlNode *msg = string2xml(buffer);
563 
564  if (msg && is_classic_ais_cluster()) {
565  xmlNode *node = NULL;
566 
567  for (node = __xml_first_child(msg); node != NULL; node = __xml_next(node)) {
568  int id = 0;
569  int children = 0;
570  const char *uname = crm_element_value(node, "uname");
571 
572  crm_element_value_int(node, "id", &id);
573  crm_element_value_int(node, "processes", &children);
574  if (id == 0) {
575  crm_log_xml_err(msg, "Bad Update");
576  } else {
577  crm_node_t *peer = crm_get_peer(id, uname);
578 
579  crm_update_peer_proc(__FUNCTION__, peer, children, NULL);
580  }
581  }
582  }
583 
584  free_xml(msg);
585  return 0;
586 }
587 
588 static void
589 pcmk_mcp_destroy(gpointer user_data)
590 {
591  void (*callback) (gpointer data) = user_data;
592 
593  if (callback) {
594  callback(NULL);
595  }
596 }
597 
598 gboolean
600 {
601  int retries = 0;
602 
603  static struct ipc_client_callbacks mcp_callbacks = {
604  .dispatch = pcmk_mcp_dispatch,
605  .destroy = pcmk_mcp_destroy
606  };
607 
608  while (retries < 5) {
609  int rc = init_cs_connection_once(cluster);
610 
611  retries++;
612  switch (rc) {
613  case CS_OK:
614  if (getenv("HA_mcp") && get_cluster_type() != pcmk_cluster_cman) {
615  xmlNode *poke = create_xml_node(NULL, "poke");
616  mainloop_io_t *ipc =
618  cluster->destroy, &mcp_callbacks);
619 
620  crm_ipc_send(mainloop_get_ipc_client(ipc), poke, 0, 0, NULL);
621  free_xml(poke);
622  }
623  return TRUE;
624  break;
625  case CS_ERR_TRY_AGAIN:
626  case CS_ERR_QUEUE_FULL:
627  sleep(retries);
628  break;
629  default:
630  return FALSE;
631  }
632  }
633 
634  crm_err("Retry count exceeded: %d", retries);
635  return FALSE;
636 }
637 
638 char *
639 classic_node_name(uint32_t nodeid)
640 {
641  return NULL; /* Always use the uname() default for localhost. No way to look up peers */
642 }
643 
644 char *
645 cman_node_name(uint32_t nodeid)
646 {
647  char *name = NULL;
648 
649 # if SUPPORT_CMAN
650  cman_node_t us;
651  cman_handle_t cman;
652 
653  cman = cman_init(NULL);
654  if (cman != NULL && cman_is_active(cman)) {
655 
656  memset(&us, 0, sizeof(cman_node_t));
657  cman_get_node(cman, nodeid, &us);
658  if(valid_cman_name(us.cn_name, nodeid)) {
659  name = strdup(us.cn_name);
660  crm_info("Using CMAN node name %s for %u", name, nodeid);
661  }
662  }
663 
664  cman_finish(cman);
665 # endif
666  if (name == NULL) {
667  crm_debug("Unable to get node name for nodeid %u", nodeid);
668  }
669  return name;
670 }
671 
672 extern int set_cluster_type(enum cluster_type_e type);
673 
674 gboolean
676 {
677  crm_node_t *peer = NULL;
678  enum cluster_type_e stack = get_cluster_type();
679 
680  crm_peer_init();
681 
682  /* Here we just initialize comms */
683  switch (stack) {
685  if (init_cs_connection_classic(cluster) == FALSE) {
686  return FALSE;
687  }
688  break;
689  case pcmk_cluster_cman:
690  if (cluster_connect_cpg(cluster) == FALSE) {
691  return FALSE;
692  }
693  break;
695  crm_info("Could not find an active corosync based cluster");
696  return FALSE;
697  break;
698  default:
699  crm_err("Invalid cluster type: %s (%d)", name_for_cluster_type(stack), stack);
700  return FALSE;
701  break;
702  }
703 
704  crm_info("Connection to '%s': established", name_for_cluster_type(stack));
705 
706  cluster->nodeid = get_local_nodeid(0);
707  if(cluster->nodeid == 0) {
708  crm_err("Could not establish local nodeid");
709  return FALSE;
710  }
711 
712  cluster->uname = get_node_name(0);
713  if(cluster->uname == NULL) {
714  crm_err("Could not establish local node name");
715  return FALSE;
716  }
717 
718  /* Ensure the local node always exists */
719  peer = crm_get_peer(cluster->nodeid, cluster->uname);
720  cluster->uuid = get_corosync_uuid(peer);
721 
722  return TRUE;
723 }
724 
725 gboolean
726 check_message_sanity(const AIS_Message * msg, const char *data)
727 {
728  gboolean sane = TRUE;
729  int dest = msg->host.type;
730  int tmp_size = msg->header.size - sizeof(AIS_Message);
731 
732  if (sane && msg->header.size == 0) {
733  crm_warn("Message with no size");
734  sane = FALSE;
735  }
736 
737  if (sane && msg->header.error != CS_OK) {
738  crm_warn("Message header contains an error: %d", msg->header.error);
739  sane = FALSE;
740  }
741 
742  if (sane && ais_data_len(msg) != tmp_size) {
743  crm_warn("Message payload size is incorrect: expected %d, got %d", ais_data_len(msg),
744  tmp_size);
745  sane = TRUE;
746  }
747 
748  if (sane && ais_data_len(msg) == 0) {
749  crm_warn("Message with no payload");
750  sane = FALSE;
751  }
752 
753  if (sane && data && msg->is_compressed == FALSE) {
754  int str_size = strlen(data) + 1;
755 
756  if (ais_data_len(msg) != str_size) {
757  int lpc = 0;
758 
759  crm_warn("Message payload is corrupted: expected %d bytes, got %d",
760  ais_data_len(msg), str_size);
761  sane = FALSE;
762  for (lpc = (str_size - 10); lpc < msg->size; lpc++) {
763  if (lpc < 0) {
764  lpc = 0;
765  }
766  crm_debug("bad_data[%d]: %d / '%c'", lpc, data[lpc], data[lpc]);
767  }
768  }
769  }
770 
771  if (sane == FALSE) {
772  crm_err("Invalid message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
773  msg->id, ais_dest(&(msg->host)), msg_type2text(dest),
774  ais_dest(&(msg->sender)), msg_type2text(msg->sender.type),
775  msg->sender.pid, msg->is_compressed, ais_data_len(msg), msg->header.size);
776 
777  } else {
778  crm_trace
779  ("Verified message %d: (dest=%s:%s, from=%s:%s.%d, compressed=%d, size=%d, total=%d)",
780  msg->id, ais_dest(&(msg->host)), msg_type2text(dest), ais_dest(&(msg->sender)),
781  msg_type2text(msg->sender.type), msg->sender.pid, msg->is_compressed,
782  ais_data_len(msg), msg->header.size);
783  }
784 
785  return sane;
786 }
787 #endif
788 
789 static int
790 get_config_opt(confdb_handle_t config,
791  hdb_handle_t object_handle, const char *key, char **value, const char *fallback)
792 {
793  size_t len = 0;
794  char *env_key = NULL;
795  const char *env_value = NULL;
796  char buffer[256];
797 
798  if (*value) {
799  free(*value);
800  *value = NULL;
801  }
802 
803  if (object_handle > 0) {
804  if (CS_OK == confdb_key_get(config, object_handle, key, strlen(key), &buffer, &len)) {
805  *value = strdup(buffer);
806  }
807  }
808 
809  if (*value) {
810  crm_info("Found '%s' for option: %s", *value, key);
811  return 0;
812  }
813 
814  env_key = crm_concat("HA", key, '_');
815  env_value = getenv(env_key);
816  free(env_key);
817 
818  if (*value) {
819  crm_info("Found '%s' in ENV for option: %s", *value, key);
820  *value = strdup(env_value);
821  return 0;
822  }
823 
824  if (fallback) {
825  crm_info("Defaulting to '%s' for option: %s", fallback, key);
826  *value = strdup(fallback);
827 
828  } else {
829  crm_info("No default for option: %s", key);
830  }
831 
832  return -1;
833 }
834 
835 static confdb_handle_t
836 config_find_init(confdb_handle_t config)
837 {
838  cs_error_t rc = CS_OK;
839  confdb_handle_t local_handle = OBJECT_PARENT_HANDLE;
840 
841  rc = confdb_object_find_start(config, local_handle);
842  if (rc == CS_OK) {
843  return local_handle;
844  } else {
845  crm_err("Couldn't create search context: %d", rc);
846  }
847  return 0;
848 }
849 
850 static hdb_handle_t
851 config_find_next(confdb_handle_t config, const char *name, confdb_handle_t top_handle)
852 {
853  cs_error_t rc = CS_OK;
854  hdb_handle_t local_handle = 0;
855 
856  if (top_handle == 0) {
857  crm_err("Couldn't search for %s: no valid context", name);
858  return 0;
859  }
860 
861  crm_trace("Searching for %s in " HDB_X_FORMAT, name, top_handle);
862  rc = confdb_object_find(config, top_handle, name, strlen(name), &local_handle);
863  if (rc != CS_OK) {
864  crm_info("No additional configuration supplied for: %s", name);
865  local_handle = 0;
866  } else {
867  crm_info("Processing additional %s options...", name);
868  }
869  return local_handle;
870 }
871 
872 enum cluster_type_e
874 {
875  confdb_handle_t config;
877 
878  int rc;
879  char *value = NULL;
880  confdb_handle_t top_handle = 0;
881  hdb_handle_t local_handle = 0;
882  static confdb_callbacks_t callbacks = { };
883 
884  rc = confdb_initialize(&config, &callbacks);
885  if (rc != CS_OK) {
886  crm_debug("Could not initialize Cluster Configuration Database API instance error %d", rc);
887  return found;
888  }
889 
890  top_handle = config_find_init(config);
891  local_handle = config_find_next(config, "service", top_handle);
892  while (local_handle) {
893  get_config_opt(config, local_handle, "name", &value, NULL);
894  if (safe_str_eq("pacemaker", value)) {
895  found = pcmk_cluster_classic_ais;
896 
897  get_config_opt(config, local_handle, "ver", &value, "0");
898  crm_trace("Found Pacemaker plugin version: %s", value);
899  break;
900  }
901 
902  local_handle = config_find_next(config, "service", top_handle);
903  }
904 
905  if (found == pcmk_cluster_unknown) {
906  top_handle = config_find_init(config);
907  local_handle = config_find_next(config, "quorum", top_handle);
908  get_config_opt(config, local_handle, "provider", &value, NULL);
909 
910  if (safe_str_eq("quorum_cman", value)) {
911  crm_trace("Found CMAN quorum provider");
912  found = pcmk_cluster_cman;
913  }
914  }
915  free(value);
916 
917  confdb_finalize(config);
918  if (found == pcmk_cluster_unknown) {
919  crm_err
920  ("Corosync is running, but Pacemaker could not find the CMAN or Pacemaker plugin loaded");
921  found = pcmk_cluster_invalid;
922  }
923  return found;
924 }
925 
926 gboolean
928 {
929  enum crm_proc_flag proc = crm_proc_none;
930 
931  if (node == NULL) {
932  crm_trace("NULL");
933  return FALSE;
934 
935  } else if (safe_str_neq(node->state, CRM_NODE_MEMBER)) {
936  crm_trace("%s: state=%s", node->uname, node->state);
937  return FALSE;
938 
939  } else if (is_cman_cluster() && (node->processes & crm_proc_cpg)) {
940  /* If we can still talk to our peer process on that node,
941  * then its also part of the corosync membership
942  */
943  crm_trace("%s: processes=%.8x", node->uname, node->processes);
944  return TRUE;
945 
946  } else if (is_classic_ais_cluster()) {
947  if (node->processes < crm_proc_none) {
948  crm_debug("%s: unknown process list, assuming active for now", node->uname);
949  return TRUE;
950 
951  } else if (is_set(node->processes, crm_proc_none)) {
952  crm_debug("%s: all processes are inactive", node->uname);
953  return FALSE;
954 
955  } else if (is_not_set(node->processes, crm_proc_plugin)) {
956  crm_trace("%s: processes=%.8x", node->uname, node->processes);
957  return FALSE;
958  }
959  }
960 
961  proc = text2proc(crm_system_name);
962  if (proc > crm_proc_none && (node->processes & proc) == 0) {
963  crm_trace("%s: proc %.8x not in %.8x", node->uname, proc, node->processes);
964  return FALSE;
965  }
966 
967  return TRUE;
968 }
bool send_plugin_text(int class, struct iovec *iov)
Definition: legacy.c:133
uint32_t votes
Definition: internal.h:50
enum crm_ais_msg_types type
Definition: internal.h:39
#define CRM_CHECK(expr, failure_action)
Definition: logging.h:164
gboolean init_cman_connection(gboolean(*dispatch)(unsigned long long, gboolean), void(*destroy)(gpointer))
Definition: legacy.c:431
char data[0]
Definition: internal.h:56
void terminate_cs_connection(crm_cluster_t *cluster)
Definition: legacy.c:184
#define crm_notice(fmt, args...)
Definition: logging.h:250
#define CRM_NODE_LOST
Definition: cluster.h:43
gboolean is_compressed
Definition: internal.h:48
uint32_t size
Definition: internal.h:53
#define crm_crit(fmt, args...)
Definition: logging.h:247
gboolean safe_str_neq(const char *a, const char *b)
Definition: utils.c:659
mainloop_io_t * mainloop_add_fd(const char *name, int priority, int fd, void *userdata, struct mainloop_fd_callbacks *callbacks)
Definition: mainloop.c:806
hdb_handle_t config_find_init(struct corosync_api_v1 *config, char *name)
Definition: utils.c:620
uint32_t nodeid
Definition: cluster.h:94
uint32_t quorate
Definition: internal.h:52
uint32_t id
Definition: cluster.h:70
void(* destroy)(gpointer userdata)
Definition: mainloop.h:91
gboolean crm_have_quorum
Definition: membership.c:38
char * get_corosync_uuid(crm_node_t *peer)
Definition: cluster.c:106
const char * get_local_node_name(void)
Definition: cluster.c:289
void crm_peer_init(void)
Definition: membership.c:262
int ais_membership_timer
Definition: legacy.c:45
void(* destroy)(gpointer)
Definition: cluster.h:96
struct mainloop_io_s mainloop_io_t
Definition: mainloop.h:35
int plugin_dispatch(gpointer user_data)
Definition: legacy.c:286
crm_node_t * crm_get_peer(unsigned int id, const char *uname)
Definition: membership.c:519
char * crm_system_name
Definition: utils.c:65
char * uuid
Definition: cluster.h:92
uint32_t pid
Definition: internal.h:49
char * get_node_name(uint32_t nodeid)
Definition: cluster.c:301
char * strerror(int errnum)
enum cluster_type_e find_corosync_variant(void)
Definition: legacy.c:873
char * pcmk_message_common_cs(cpg_handle_t handle, uint32_t nodeid, uint32_t pid, void *content, uint32_t *kind, const char **from)
Definition: cpg.c:255
#define AIS_IPC_MESSAGE_SIZE
Definition: internal.h:25
gboolean cluster_connect_quorum(gboolean(*dispatch)(unsigned long long, gboolean), void(*destroy)(gpointer))
Definition: corosync.c:261
Wrappers for and extensions to glib mainloop.
char uname[MAX_NAME]
Definition: internal.h:64
xmlNode * string2xml(const char *input)
Definition: xml.c:2957
crm_node_t * crm_update_peer(const char *source, unsigned int id, uint64_t born, uint64_t seen, int32_t votes, uint32_t children, const char *uuid, const char *uname, const char *addr, const char *state)
Definition: membership.c:598
gboolean init_cs_connection(crm_cluster_t *cluster)
Definition: corosync.c:342
#define CRM_SYSTEM_MCP
Definition: crm.h:89
hdb_handle_t ais_ipc_handle
Definition: legacy.c:53
#define PCMK_SERVICE_ID
Definition: config.h:547
void plugin_handle_membership(AIS_Message *msg)
Definition: legacy.c:218
void cluster_disconnect_cpg(crm_cluster_t *cluster)
Definition: cpg.c:51
int(* dispatch)(gpointer userdata)
Definition: mainloop.h:90
char uname[MAX_NAME]
Definition: internal.h:53
#define crm_warn(fmt, args...)
Definition: logging.h:249
uint32_t processes
Definition: cluster.h:76
int ais_fd_sync
Definition: legacy.c:49
#define crm_debug(fmt, args...)
Definition: logging.h:253
int get_config_opt(struct corosync_api_v1 *config, hdb_handle_t object_service_handle, char *key, char **value, const char *fallback)
Definition: utils.c:660
cluster_type_e
Definition: cluster.h:206
#define crm_trace(fmt, args...)
Definition: logging.h:254
crm_node_t * crm_update_peer_proc(const char *source, crm_node_t *peer, uint32_t flag, const char *status)
Definition: membership.c:723
#define CRM_MESSAGE_IPC_ACK
Definition: internal.h:26
AIS_Host sender
Definition: internal.h:51
uint32_t id
Definition: internal.h:47
gboolean crm_is_corosync_peer_active(const crm_node_t *node)
Definition: legacy.c:927
gboolean send_cluster_text(int class, const char *data, gboolean local, crm_node_t *node, enum crm_ais_msg_types dest)
Definition: cpg.c:644
gboolean check_message_sanity(const AIS_Message *msg, const char *data)
Definition: plugin.c:1372
xmlNode * create_xml_node(xmlNode *parent, const char *name)
Definition: xml.c:2793
struct crm_ais_msg_s AIS_Message
Definition: internal.h:33
int crm_element_value_int(xmlNode *data, const char *name, int *dest)
Definition: xml.c:4006
const char * crm_element_value(xmlNode *data, const char *name)
Definition: xml.c:5839
#define ais_data_len(msg)
Definition: internal.h:210
struct qb_ipc_response_header cs_ipc_header_response_t
Definition: crm_internal.h:325
#define CRM_NODE_MEMBER
Definition: cluster.h:44
unsigned long long crm_peer_seq
Definition: membership.c:37
gboolean is_cman_cluster(void)
Definition: cluster.c:601
void free_xml(xmlNode *child)
Definition: xml.c:2848
gboolean crm_str_eq(const char *a, const char *b, gboolean use_case)
Definition: utils.c:1428
const char * name_for_cluster_type(enum cluster_type_e type)
Definition: cluster.c:457
int set_cluster_type(enum cluster_type_e type)
Definition: cluster.c:482
#define DAEMON_RESPAWN_STOP
Definition: crm.h:67
gboolean init_cs_connection_once(crm_cluster_t *cluster)
Definition: corosync.c:369
hdb_handle_t config_find_next(struct corosync_api_v1 *config, char *name, hdb_handle_t top_handle)
Definition: utils.c:633
crm_node_t * crm_update_peer_state(const char *source, crm_node_t *node, const char *state, int membership)
Update a node&#39;s state and membership information.
Definition: membership.c:906
char * local_uname
Definition: plugin.c:61
char * uname
Definition: cluster.h:93
#define crm_log_xml_err(xml, text)
Definition: logging.h:257
#define crm_perror(level, fmt, args...)
Log a system error message.
Definition: logging.h:226
uint32_t get_local_nodeid(cpg_handle_t handle)
Definition: cpg.c:65
char cname[MAX_NAME]
Definition: internal.h:65
crm_ipc_t * mainloop_get_ipc_client(mainloop_io_t *client)
Definition: mainloop.c:797
#define crm_err(fmt, args...)
Definition: logging.h:248
#define G_PRIORITY_MEDIUM
Definition: mainloop.h:124
#define ENOTUNIQ
Definition: portability.h:227
int crm_ipc_send(crm_ipc_t *client, xmlNode *message, enum crm_ipc_flags flags, int32_t ms_timeout, xmlNode **reply)
Definition: ipc.c:1161
void * ais_ipc_ctx
Definition: legacy.c:51
#define uint32_t
Definition: stdint.in.h:158
#define CRM_ASSERT(expr)
Definition: error.h:35
char data[0]
Definition: internal.h:58
int crm_exit(int rc)
Definition: utils.c:78
char * state
Definition: cluster.h:81
int ais_fd_async
Definition: legacy.c:50
Wrappers for and extensions to libqb IPC.
uint32_t pid
Definition: internal.h:37
char * uname
Definition: cluster.h:79
gboolean ais_membership_force
Definition: legacy.c:46
gboolean crm_is_true(const char *s)
Definition: utils.c:674
mainloop_io_t * mainloop_add_ipc_client(const char *name, int priority, size_t max_size, void *userdata, struct ipc_client_callbacks *callbacks)
Definition: mainloop.c:763
AIS_Host host
Definition: internal.h:50
char * crm_concat(const char *prefix, const char *suffix, char join)
Definition: utils.c:394
char * crm_itoa(int an_int)
Definition: utils.c:432
#define safe_str_eq(a, b)
Definition: util.h:74
#define ONLINESTATUS
Definition: util.h:48
char * crm_strdup_printf(char const *format,...) __attribute__((__format__(__printf__
long long crm_int_helper(const char *text, char **end_text)
Definition: utils.c:588
#define crm_info(fmt, args...)
Definition: logging.h:251
crm_proc_flag
Definition: internal.h:77
int(* dispatch)(const char *buffer, ssize_t length, gpointer userdata)
Definition: mainloop.h:73
gboolean cluster_connect_cpg(crm_cluster_t *cluster)
Definition: cpg.c:537
gboolean is_classic_ais_cluster(void)
Definition: cluster.c:613
enum crm_ais_msg_types type
Definition: internal.h:51
enum cluster_type_e get_cluster_type(void)
Definition: cluster.c:502