| From 03e90f29073d0f2f51e3dfb49ff0d99ea2a992ce Mon Sep 17 00:00:00 2001 |
| From: Lars Ellenberg <lars.ellenberg@linbit.com> |
| Date: Thu, 20 Dec 2018 17:23:41 +0100 |
| Subject: drbd: skip spurious timeout (ping-timeo) when failing promote |
| |
| [ Upstream commit 9848b6ddd8c92305252f94592c5e278574e7a6ac ] |
| |
| If you try to promote a Secondary while connected to a Primary |
| and allow-two-primaries is NOT set, we will wait for "ping-timeout" |
| to give this node a chance to detect a dead primary, |
| in case the cluster manager noticed faster than we did. |
| |
| But if we then are *still* connected to a Primary, |
| we fail (after an additional timeout of ping-timout). |
| |
| This change skips the spurious second timeout. |
| |
| Most people won't notice really, |
| since "ping-timeout" by default is half a second. |
| |
| But in some installations, ping-timeout may be 10 or 20 seconds or more, |
| and spuriously delaying the error return becomes annoying. |
| |
| Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> |
| Signed-off-by: Jens Axboe <axboe@kernel.dk> |
| Signed-off-by: Sasha Levin <sashal@kernel.org> |
| --- |
| drivers/block/drbd/drbd_nl.c | 15 ++++++++------- |
| 1 file changed, 8 insertions(+), 7 deletions(-) |
| |
| diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c |
| index a12f77e6891e..ad13ec66c8e4 100644 |
| --- a/drivers/block/drbd/drbd_nl.c |
| +++ b/drivers/block/drbd/drbd_nl.c |
| @@ -668,14 +668,15 @@ drbd_set_role(struct drbd_device *const device, enum drbd_role new_role, int for |
| if (rv == SS_TWO_PRIMARIES) { |
| /* Maybe the peer is detected as dead very soon... |
| retry at most once more in this case. */ |
| - int timeo; |
| - rcu_read_lock(); |
| - nc = rcu_dereference(connection->net_conf); |
| - timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; |
| - rcu_read_unlock(); |
| - schedule_timeout_interruptible(timeo); |
| - if (try < max_tries) |
| + if (try < max_tries) { |
| + int timeo; |
| try = max_tries - 1; |
| + rcu_read_lock(); |
| + nc = rcu_dereference(connection->net_conf); |
| + timeo = nc ? (nc->ping_timeo + 1) * HZ / 10 : 1; |
| + rcu_read_unlock(); |
| + schedule_timeout_interruptible(timeo); |
| + } |
| continue; |
| } |
| if (rv < SS_SUCCESS) { |
| -- |
| 2.19.1 |
| |