Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ AC_CHECK_LIB(crmcluster, crm_peer_init, , missing="yes")
AC_CHECK_LIB(uuid, uuid_unparse, , missing="yes")
AC_CHECK_LIB(cmap, cmap_initialize, , HAVE_cmap=0)
AC_CHECK_LIB(votequorum, votequorum_getinfo, , HAVE_votequorum=0)
AC_CHECK_LIB(crmcommon, pcmk_pacemakerd_api_ping, HAVE_pacemakerd_api=1, HAVE_pacemakerd_api=0)

dnl pacemaker >= 1.1.8
AC_CHECK_HEADERS(crm/cluster.h)
Expand Down Expand Up @@ -153,6 +154,9 @@ AM_CONDITIONAL(CHECK_QDEVICE_SYNC_TIMEOUT,
test "$HAVE_DECL_VOTEQUORUM_QDEVICE_DEFAULT_SYNC_TIMEOUT" = "1" &&
test "$HAVE_cmap" = "1")

AC_DEFINE_UNQUOTED(USE_PACEMAKERD_API, $HAVE_pacemakerd_api, Turn on synchronization between sbd & pacemakerd)
AM_CONDITIONAL(USE_PACEMAKERD_API, test "$HAVE_pacemakerd_api" = "1")

CONFIGDIR=""
AC_ARG_WITH(configdir,
[ --with-configdir=DIR
Expand Down
20 changes: 20 additions & 0 deletions src/sbd-inquisitor.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ bool do_flush = true;
char timeout_sysrq_char = 'b';
bool move_to_root_cgroup = true;
bool enforce_moving_to_root_cgroup = false;
bool sync_resource_startup = false;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be nice if the default were true when USE_PACEMAKERD_API is set, but pacemaker would have no way of knowing that. I can think of a couple of ways around that:

  • distros can change the sysconfig setting when they know their versions are compatible
  • or sysconfig could be a .in file substituted at configure time

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, a pitty that the default has to be false to serve its purpose :-(
it was the intention to change the sysconfig-setting from the distro-spec-file.
there is already a sed-line for s390x. having a configure configure switch
(set by the distro-spec-file + maybe some additional auto-magic derived
from what is seen in pacemaker) would probably be more elegant though.


int parse_device_line(const char *line);

Expand Down Expand Up @@ -964,6 +965,25 @@ int main(int argc, char **argv, char **envp)
}
}

value = getenv("SBD_SYNC_RESOURCE_STARTUP");
if(value) {
sync_resource_startup = crm_is_true(value);
}
#if !USE_PACEMAKERD_API
if (sync_resource_startup) {
fprintf(stderr, "Failed to sync resource-startup as "
"SBD was built against pacemaker not supporting pacemakerd-API.\n");

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fprintf() or cl_log()?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that is an arguable point. using fprintf is at least consistent as all early-error-exits are using fprintf so far. if changed I guess it should be done consistently as an own commit.

exit_status = -1;
goto out;
}
#else
if (!sync_resource_startup) {
cl_log(LOG_WARNING, "SBD built against pacemaker supporting "
"pacemakerd-API. Should think about enabling "
"SBD_SYNC_RESOURCE_STARTUP.");
}
#endif

while ((c = getopt(argc, argv, "czC:DPRTWZhvw:d:n:p:1:2:3:4:5:t:I:F:S:s:r:")) != -1) {
switch (c) {
case 'D':
Expand Down
134 changes: 123 additions & 11 deletions src/sbd-pacemaker.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,62 @@ pe_free_working_set(pe_working_set_t *data_set)

#endif

static void clean_up(int rc);

#if USE_PACEMAKERD_API
#include <crm/common/ipc_pacemakerd.h>

static pcmk_ipc_api_t *pacemakerd_api = NULL;
static time_t last_ok = (time_t) 0;

static void
pacemakerd_event_cb(pcmk_ipc_api_t *pacemakerd_api,
enum pcmk_ipc_event event_type, crm_exit_t status,
void *event_data, void *user_data)
{
pcmk_pacemakerd_api_reply_t *reply = event_data;

switch (event_type) {
case pcmk_ipc_event_disconnect:
/* Unexpected */
cl_log(LOG_ERR, "Lost connection to pacemakerd\n");
return;

case pcmk_ipc_event_reply:
break;

default:
return;
}

if (status != CRM_EX_OK) {
cl_log(LOG_ERR, "Bad reply from pacemakerd: %s",
crm_exit_str(status));
return;
}

if (reply->reply_type != pcmk_pacemakerd_reply_ping) {
cl_log(LOG_ERR, "Unknown reply type %d from pacemakerd\n",
reply->reply_type);
} else {
if ((reply->data.ping.last_good != (time_t) 0) &&
(reply->data.ping.status == pcmk_rc_ok)) {
switch (reply->data.ping.state) {
case pcmk_pacemakerd_state_running:
case pcmk_pacemakerd_state_shutting_down:
last_ok = reply->data.ping.last_good;
break;
case pcmk_pacemakerd_state_shutdown_complete:
clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN);
break;
default:
break;
}
}
}
}
#endif

extern int disk_count;

static void clean_up(int rc);
Expand Down Expand Up @@ -133,10 +189,13 @@ mon_cib_connection_destroy(gpointer user_data)
cib->cmds->signoff(cib);
/* retrigger as last one might have been skipped */
mon_refresh_state(NULL);
if (pcmk_clean_shutdown) {


if ((pcmk_clean_shutdown) && (!sync_resource_startup)) {
/* assume a graceful pacemaker-shutdown */
clean_up(EXIT_PCMK_SERVANT_GRACEFUL_SHUTDOWN);
}

/* getting here we aren't sure about the pacemaker-state
so try to use the timeout to reconnect and get
everything sorted out again
Expand Down Expand Up @@ -196,6 +255,13 @@ mon_timer_notify(gpointer data)
g_source_remove(timer_id_notify);
}

#if USE_PACEMAKERD_API
{
time_t now = time(NULL);

if ((last_ok <= now) && (now - last_ok < timeout_watchdog)) {
#endif

if (cib_connected) {
if (counter == counter_max) {
mon_retrieve_current_cib();
Expand All @@ -207,6 +273,16 @@ mon_timer_notify(gpointer data)
counter++;
}
}

#if USE_PACEMAKERD_API
}
}
if (pcmk_connect_ipc(pacemakerd_api,
pcmk_ipc_dispatch_main) == pcmk_rc_ok) {
pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name);
}
#endif

timer_id_notify = g_timeout_add(timeout_loop * 1000, mon_timer_notify, NULL);
return FALSE;
}
Expand Down Expand Up @@ -526,6 +602,14 @@ clean_up(int rc)
cib = NULL;
}

#if USE_PACEMAKERD_API
if (pacemakerd_api != NULL) {
pcmk_ipc_api_t *capi = pacemakerd_api;
pacemakerd_api = NULL; // Ensure we can't free this twice
pcmk_free_ipc_api(capi);
}
#endif

if (rc >= 0) {
exit(rc);
}
Expand All @@ -535,11 +619,11 @@ clean_up(int rc)
int
servant_pcmk(const char *diskname, int mode, const void* argp)
{
int exit_code = 0;
int exit_code = 0;

crm_system_name = strdup("sbd:pcmk");
cl_log(LOG_NOTICE, "Monitoring Pacemaker health");
set_proc_title("sbd: watcher: Pacemaker");
crm_system_name = strdup("sbd:pcmk");
cl_log(LOG_NOTICE, "Monitoring Pacemaker health");
set_proc_title("sbd: watcher: Pacemaker");
setenv("PCMK_watchdog", "true", 1);

if(debug == 0) {
Expand All @@ -548,12 +632,40 @@ servant_pcmk(const char *diskname, int mode, const void* argp)
}


if (data_set == NULL) {
data_set = pe_new_working_set();
}
if (data_set == NULL) {
return -1;
}
if (data_set == NULL) {
data_set = pe_new_working_set();
}
if (data_set == NULL) {
return -1;
}

#if USE_PACEMAKERD_API
{
int rc;

rc = pcmk_new_ipc_api(&pacemakerd_api, pcmk_ipc_pacemakerd);
if (pacemakerd_api == NULL) {
cl_log(LOG_ERR, "Could not connect to pacemakerd: %s\n",
pcmk_rc_str(rc));
return -1;
}
pcmk_register_ipc_callback(pacemakerd_api, pacemakerd_event_cb, NULL);
do {
rc = pcmk_connect_ipc(pacemakerd_api, pcmk_ipc_dispatch_main);
if (rc != pcmk_rc_ok) {
cl_log(LOG_DEBUG, "Could not connect to pacemakerd: %s\n",
pcmk_rc_str(rc));
sleep(reconnect_msec / 1000);
}
} while (rc != pcmk_rc_ok);
/* send a ping to pacemakerd to wake it up */
pcmk_pacemakerd_api_ping(pacemakerd_api, crm_system_name);
/* cib should come up now as well so it's time
* to have the inquisitor have a closer look
*/
notify_parent();
}
#endif

if (current_cib == NULL) {
cib = cib_new();
Expand Down
1 change: 1 addition & 0 deletions src/sbd.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ extern bool do_flush;
extern char timeout_sysrq_char;
extern bool move_to_root_cgroup;
extern bool enforce_moving_to_root_cgroup;
extern bool sync_resource_startup;

/* Global, non-tunable variables: */
extern int sector_size;
Expand Down
14 changes: 14 additions & 0 deletions src/sbd.sysconfig
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,20 @@ SBD_TIMEOUT_ACTION=flush,reboot
#
SBD_MOVE_TO_ROOT_CGROUP=auto

## Type: yesno
## Default: no
#
# If resource startup syncing is enabled then pacemakerd is
# gonna wait to be pinged via IPC before it starts resources.
# On shutdown pacemakerd is going to wait in a state where it
# has cleanly shutdown resources till sbd fetches that state.
#
# Default is 'no' to prevent pacemaker from waiting for a
# ping that will never come when working together with an sbd
# version that doesn't support the feature.
#
SBD_SYNC_RESOURCE_STARTUP=no

## Type: string
## Default: ""
#
Expand Down