Skip to content

Commit eb73f22

Browse files
author
Ken Gaillot
authored
Merge pull request #2027 from gao-yan/combine-priority-fencing-delay
Feature: any delays from pcmk_delay_base/max are added to priority-fencing-delay
2 parents 0a67b34 + 6ab6d38 commit eb73f22

File tree

14 files changed

+79
-84
lines changed

14 files changed

+79
-84
lines changed

cts/cts-fencing.in

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1126,22 +1126,23 @@ class Tests(object):
11261126
test.add_stonith_neg_log_pattern("does not advertise support for 'reboot', performing 'off'")
11271127
test.add_stonith_log_pattern("with device 'true1' returned: 0 (OK)")
11281128

1129-
# make sure enforced fencing delay is applied only for the first device in the first level
1129+
# make sure requested fencing delay is applied only for the first device in the first level
1130+
# make sure static delay from pcmk_delay_base is added
11301131
for test_type in test_types:
11311132
if test_type["use_cpg"] == 0:
11321133
continue
11331134

11341135
test = self.new_test("%s_topology_delay" % test_type["prefix"],
1135-
"Verify enforced fencing delay is applied only for the first device in the first level.",
1136+
"Verify requested fencing delay is applied only for the first device in the first level and pcmk_delay_base is added.",
11361137
test_type["use_cpg"])
11371138
test.add_cmd("stonith_admin",
1138-
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
1139+
"--output-as=xml -R true1 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"")
11391140
test.add_cmd("stonith_admin",
1140-
"--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\"")
1141+
"--output-as=xml -R false1 -a fence_dummy -o \"mode=fail\" -o \"pcmk_host_list=node1 node2 node3\" -o \"pcmk_delay_base=1\"")
11411142
test.add_cmd("stonith_admin",
1142-
"--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
1143+
"--output-as=xml -R true2 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
11431144
test.add_cmd("stonith_admin",
1144-
"--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
1145+
"--output-as=xml -R true3 -a fence_dummy -o \"mode=pass\" -o \"pcmk_host_list=node1 node2 node3\"")
11451146

11461147
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v true1")
11471148
test.add_cmd("stonith_admin", "--output-as=xml -r node3 -i 1 -v false1")
@@ -1150,8 +1151,8 @@ class Tests(object):
11501151

11511152
test.add_cmd("stonith_admin", "--output-as=xml -F node3 --delay 1")
11521153

1153-
test.add_stonith_log_pattern("Delaying 'off' action targeting node3 on true1 for enforced 1s")
1154-
test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 on false1")
1154+
test.add_stonith_log_pattern("Delaying 'off' action targeting node3 on true1 for 2s (timeout=120s, requested_delay=1s, base=1s, max=1s)")
1155+
test.add_stonith_log_pattern("Delaying 'off' action targeting node3 on false1 for 1s (timeout=120s, requested_delay=0s, base=1s, max=1s)")
11551156
test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 on true2")
11561157
test.add_stonith_neg_log_pattern("Delaying 'off' action targeting node3 on true3")
11571158

daemons/controld/controld_fencing.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -848,7 +848,7 @@ te_fence_node(crm_graph_t *graph, crm_action_t *action)
848848

849849
rc = stonith_api->cmds->fence_with_delay(stonith_api, options, target, type,
850850
(int) (transition_graph->stonith_timeout / 1000),
851-
0, crm_atoi(priority_delay, "-1"));
851+
0, crm_atoi(priority_delay, "0"));
852852

853853
transition_key = pcmk__transition_key(transition_graph->id, action->id, 0,
854854
te_uuid),

daemons/fenced/fenced_commands.c

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -241,8 +241,7 @@ create_async_command(xmlNode * msg)
241241
crm_element_value_int(msg, F_STONITH_CALLOPTS, &(cmd->options));
242242
crm_element_value_int(msg, F_STONITH_TIMEOUT, &(cmd->default_timeout));
243243
cmd->timeout = cmd->default_timeout;
244-
// Default value -1 means no enforced fencing delay
245-
cmd->start_delay = -1;
244+
// Value -1 means disable any static/random fencing delays
246245
crm_element_value_int(msg, F_STONITH_DELAY, &(cmd->start_delay));
247246

248247
cmd->origin = crm_element_value_copy(msg, F_ORIG);
@@ -465,7 +464,7 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
465464
{
466465
int delay_max = 0;
467466
int delay_base = 0;
468-
bool delay_enforced = (cmd->start_delay >= 0);
467+
int requested_delay = cmd->start_delay;
469468

470469
CRM_CHECK(cmd != NULL, return);
471470
CRM_CHECK(device != NULL, return);
@@ -498,35 +497,36 @@ schedule_stonith_command(async_command_t * cmd, stonith_device_t * device)
498497
device->pending_ops = g_list_append(device->pending_ops, cmd);
499498
mainloop_set_trigger(device->work);
500499

501-
// No enforced fencing delay
502-
if (delay_enforced == FALSE) {
503-
delay_max = get_action_delay_max(device, cmd->action);
504-
delay_base = get_action_delay_base(device, cmd->action);
505-
if (delay_max == 0) {
506-
delay_max = delay_base;
507-
}
508-
if (delay_max < delay_base) {
509-
crm_warn("Base-delay (%ds) is larger than max-delay (%ds) "
510-
"for %s on %s - limiting to max-delay",
511-
delay_base, delay_max, cmd->action, device->id);
512-
delay_base = delay_max;
513-
}
514-
if (delay_max > 0) {
515-
// coverity[dont_call] We're not using rand() for security
516-
cmd->start_delay =
517-
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
518-
+ delay_base;
519-
}
500+
// Value -1 means disable any static/random fencing delays
501+
if (requested_delay < 0) {
502+
return;
503+
}
504+
505+
delay_max = get_action_delay_max(device, cmd->action);
506+
delay_base = get_action_delay_base(device, cmd->action);
507+
if (delay_max == 0) {
508+
delay_max = delay_base;
509+
}
510+
if (delay_max < delay_base) {
511+
crm_warn("Base-delay (%ds) is larger than max-delay (%ds) "
512+
"for %s on %s - limiting to max-delay",
513+
delay_base, delay_max, cmd->action, device->id);
514+
delay_base = delay_max;
515+
}
516+
if (delay_max > 0) {
517+
// coverity[dont_call] We're not using rand() for security
518+
cmd->start_delay +=
519+
((delay_max != delay_base)?(rand() % (delay_max - delay_base)):0)
520+
+ delay_base;
520521
}
521522

522523
if (cmd->start_delay > 0) {
523-
crm_notice("Delaying '%s' action%s%s on %s for %s%ds (timeout=%ds, base=%ds, "
524-
"max=%ds)",
524+
crm_notice("Delaying '%s' action%s%s on %s for %ds (timeout=%ds, "
525+
"requested_delay=%ds, base=%ds, max=%ds)",
525526
cmd->action,
526527
cmd->victim ? " targeting " : "", cmd->victim ? cmd->victim : "",
527-
device->id, delay_enforced ? "enforced " : "",
528-
cmd->start_delay, cmd->timeout,
529-
delay_base, delay_max);
528+
device->id, cmd->start_delay, cmd->timeout,
529+
requested_delay, delay_base, delay_max);
530530
cmd->delay_id =
531531
g_timeout_add_seconds(cmd->start_delay, start_delay_helper, cmd);
532532
}

daemons/fenced/fenced_remote.c

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -842,7 +842,7 @@ stonith_topology_next(remote_fencing_op_t * op)
842842
op->client_name, op->originator, op->id);
843843
set_op_device_list(op, tp->levels[op->level]);
844844

845-
// The enforced delay has been applied for the first fencing level
845+
// The requested delay has been applied for the first fencing level
846846
if (op->level > 1 && op->delay > 0) {
847847
op->delay = 0;
848848
}
@@ -1004,9 +1004,7 @@ create_remote_stonith_op(const char *client, xmlNode * request, gboolean peer)
10041004
op = calloc(1, sizeof(remote_fencing_op_t));
10051005

10061006
crm_element_value_int(request, F_STONITH_TIMEOUT, &(op->base_timeout));
1007-
1008-
// Default value -1 means no enforced fencing delay
1009-
op->delay = -1;
1007+
// Value -1 means disable any static/random fencing delays
10101008
crm_element_value_int(request, F_STONITH_DELAY, &(op->delay));
10111009

10121010
if (peer && dev) {
@@ -1458,7 +1456,7 @@ advance_op_topology(remote_fencing_op_t *op, const char *device, xmlNode *msg,
14581456
crm_trace("Next targeting %s on behalf of %s@%s (rc was %d)",
14591457
op->target, op->originator, op->client_name, rc);
14601458

1461-
// The enforced delay has been applied for the first device
1459+
// The requested delay has been applied for the first device
14621460
if (op->delay > 0) {
14631461
op->delay = 0;
14641462
}
@@ -1517,10 +1515,7 @@ call_remote_stonith(remote_fencing_op_t * op, st_query_result_t * peer)
15171515
crm_xml_add(remote_op, F_STONITH_CLIENTNAME, op->client_name);
15181516
crm_xml_add_int(remote_op, F_STONITH_TIMEOUT, timeout);
15191517
crm_xml_add_int(remote_op, F_STONITH_CALLOPTS, op->call_options);
1520-
1521-
if (op->delay >= 0) {
1522-
crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);
1523-
}
1518+
crm_xml_add_int(remote_op, F_STONITH_DELAY, op->delay);
15241519

15251520
if (device) {
15261521
timeout_one = TIMEOUT_MULTIPLY_FACTOR *

daemons/fenced/pacemaker-fenced.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ typedef struct remote_fencing_op_s {
113113
* values associated with the devices this fencing operation may call */
114114
gint total_timeout;
115115

116-
/*! Enforced fencing delay.
117-
* Default value -1 means no enforced fencing delay. */
116+
/*! Requested fencing delay.
117+
* Value -1 means disable any static/random fencing delays. */
118118
int delay;
119119

120120
/*! Delegate is the node being asked to perform a fencing action

doc/Pacemaker_Explained/en-US/Ch-Options.txt

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -292,21 +292,21 @@ are +stop+ to attempt to immediately stop pacemaker and stay stopped, or
292292
on failure. The default is likely to be changed to +panic+ in a future release.
293293
'(since 2.0.3)'
294294

295-
| priority-fencing-delay | |
295+
| priority-fencing-delay | 0 |
296296
indexterm:[priority-fencing-delay,Cluster Option]
297297
indexterm:[Cluster,Option,priority-fencing-delay]
298-
Enforce specified delay for the fencings that are targeting the lost
298+
Apply specified delay for the fencings that are targeting the lost
299299
nodes with the highest total resource priority in case we don't
300300
have the majority of the nodes in our cluster partition, so that
301301
the more significant nodes potentially win any fencing match,
302302
which is especially meaningful under split-brain of 2-node
303303
cluster. A promoted resource instance takes the base priority + 1
304-
on calculation if the base priority is not 0. If all the nodes
305-
have equal priority, then any pcmk_delay_base/max configured for
306-
the corresponding fencing resources will be applied. Otherwise as
307-
long as it's set, even if to 0, it takes precedence over any
308-
configured pcmk_delay_base/max. By default, priority fencing
309-
delay is disabled. '(since 2.0.4)'
304+
on calculation if the base priority is not 0. Any static/random
305+
delays that are introduced by `pcmk_delay_base/max` configured
306+
for the corresponding fencing resources will be added to this
307+
delay. This delay should be significantly greater than, safely
308+
twice, the maximum `pcmk_delay_base/max`. By default, priority
309+
fencing delay is disabled. '(since 2.0.4)'
310310

311311
| cluster-delay | 60s |
312312
indexterm:[cluster-delay,Cluster Option]

include/crm/pengine/pe_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ struct pe_working_set_s {
176176
time_t recheck_by; // Hint to controller to re-run scheduler by this time
177177
int ninstances; // Total number of resource instances
178178
guint shutdown_lock;// How long (seconds) to lock resources to shutdown node
179-
int priority_fencing_delay; // Enforced priority fencing delay
179+
int priority_fencing_delay; // Priority fencing delay
180180
};
181181

182182
enum pe_check_parameters {

include/crm/stonith-ng.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ typedef struct stonith_api_operations_s
393393
char **error_output);
394394

395395
/*!
396-
* \brief Issue a fencing action against a node with enforced fencing delay.
396+
* \brief Issue a fencing action against a node with requested fencing delay.
397397
*
398398
* \note Possible actions are, 'on', 'off', and 'reboot'.
399399
*
@@ -403,7 +403,8 @@ typedef struct stonith_api_operations_s
403403
* \param action, The fencing action to take
404404
* \param timeout, The default per device timeout to use with each device
405405
* capable of fencing the target.
406-
* \param delay, Any enforced fencing delay. -1 to disable
406+
* \param delay, Apply a fencing delay. Value -1 means disable also any
407+
* static/random fencing delays from pcmk_delay_base/max
407408
*
408409
* \retval 0 success
409410
* \retval negative error code on failure.

include/pcmki/pcmki_fence.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
* \param[in] tolerance If a successful action for \p target happened within
2727
* this many ms, return 0 without performing the action
2828
* again.
29-
* \param[in] delay Enforce a fencing delay. Value -1 means disabled.
29+
* \param[in] delay Apply a fencing delay. Value -1 means disable also any
30+
* static/random fencing delays from pcmk_delay_base/max
3031
*
3132
* \return Standard Pacemaker return code
3233
*/

lib/fencing/st_client.c

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,10 +1080,7 @@ stonith_api_fence_with_delay(stonith_t * stonith, int call_options, const char *
10801080
crm_xml_add(data, F_STONITH_ACTION, action);
10811081
crm_xml_add_int(data, F_STONITH_TIMEOUT, timeout);
10821082
crm_xml_add_int(data, F_STONITH_TOLERANCE, tolerance);
1083-
1084-
if (delay >= 0) {
1085-
crm_xml_add_int(data, F_STONITH_DELAY, delay);
1086-
}
1083+
crm_xml_add_int(data, F_STONITH_DELAY, delay);
10871084

10881085
rc = stonith_send_command(stonith, STONITH_OP_FENCE, data, NULL, call_options, timeout);
10891086
free_xml(data);
@@ -1096,7 +1093,7 @@ stonith_api_fence(stonith_t * stonith, int call_options, const char *node, const
10961093
int timeout, int tolerance)
10971094
{
10981095
return stonith_api_fence_with_delay(stonith, call_options, node, action,
1099-
timeout, tolerance, -1);
1096+
timeout, tolerance, 0);
11001097
}
11011098

11021099
static int

0 commit comments

Comments
 (0)