releases/3.16.60/ocfs2-dlm-wait-for-dlm-recovery-done-when-migrating-all-lock.patch - pub/scm/linux/kernel/git/bwh/linux-stable-queue - Git at Google

 From: piaojun <piaojun@huawei.com>
 Date: Thu, 5 Apr 2018 16:19:11 -0700
 Subject: ocfs2/dlm: wait for dlm recovery done when migrating all lock
  resources

 commit 60c7ec9ee4a3410c2cb08850102d363c7e207f48 upstream.

 Wait for dlm recovery done when migrating all lock resources in case that
 new lock resource left after leaving dlm domain.  And the left lock
 resource will cause other nodes BUG.

         NodeA                       NodeB                NodeC

   umount:
     dlm_unregister_domain()
       dlm_migrate_all_locks()

                                    NodeB down

   do recovery for NodeB
   and collect a new lockres
   form other live nodes:

     dlm_do_recovery
       dlm_remaster_locks
         dlm_request_all_locks:

     dlm_mig_lockres_handler
       dlm_new_lockres
         __dlm_insert_lockres

   at last NodeA become the
   master of the new lockres
   and leave domain:
     dlm_leave_domain()

                                                     mount:
                                                       dlm_join_domain()

                                                     touch file and request
                                                     for the owner of the new
                                                     lockres, but all the
                                                     other nodes said 'NO',
                                                     so NodeC decide to be
                                                     the owner, and send do
                                                     assert msg to other
                                                     nodes:
                                                     dlmlock()
                                                       dlm_get_lock_resource()
                                                         dlm_do_assert_master()

                                                     other nodes receive the msg
                                                     and found two masters exist.
                                                     at last cause BUG in
                                                     dlm_assert_master_handler()
                                                     -->BUG();

 Link: http://lkml.kernel.org/r/5AAA6E25.7090303@huawei.com
 Fixes: bc9838c4d44a ("dlm: allow dlm do recovery during shutdown")
 Signed-off-by: Jun Piao <piaojun@huawei.com>
 Reviewed-by: Alex Chen <alex.chen@huawei.com>
 Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
 Acked-by: Joseph Qi <jiangqi903@gmail.com>
 Cc: Mark Fasheh <mark@fasheh.com>
 Cc: Joel Becker <jlbec@evilplan.org>
 Cc: Junxiao Bi <junxiao.bi@oracle.com>
 Cc: Changwei Ge <ge.changwei@h3c.com>
 Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
 Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
 Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
 ---
  fs/ocfs2/dlm/dlmcommon.h   |  1 +
  fs/ocfs2/dlm/dlmdomain.c   | 15 +++++++++++++++
  fs/ocfs2/dlm/dlmrecovery.c | 13 ++++++++++---
  3 files changed, 26 insertions(+), 3 deletions(-)

 --- a/fs/ocfs2/dlm/dlmcommon.h
 +++ b/fs/ocfs2/dlm/dlmcommon.h
 @@ -140,6 +140,7 @@ struct dlm_ctxt
  	u8 node_num;
  	u32 key;
  	u8  joining_node;
 +	u8 migrate_done; /* set to 1 means node has migrated all lock resources */
  	wait_queue_head_t dlm_join_events;
  	unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
  	unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
 --- a/fs/ocfs2/dlm/dlmdomain.c
 +++ b/fs/ocfs2/dlm/dlmdomain.c
 @@ -460,6 +460,19 @@ redo_bucket:
  		cond_resched_lock(&dlm->spinlock);
  		num += n;
  	}
 +
 +	if (!num) {
 +		if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
 +			mlog(0, "%s: perhaps there are more lock resources "
 +			     "need to be migrated after dlm recovery\n", dlm->name);
 +			ret = -EAGAIN;
 +		} else {
 +			mlog(0, "%s: we won't do dlm recovery after migrating "
 +			     "all lock resources\n", dlm->name);
 +			dlm->migrate_done = 1;
 +		}
 +	}
 +
  	spin_unlock(&dlm->spinlock);
  	wake_up(&dlm->dlm_thread_wq);

 @@ -2063,6 +2076,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(c
  	dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
  	init_waitqueue_head(&dlm->dlm_join_events);

 +	dlm->migrate_done = 0;
 +
  	dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
  	dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;

 --- a/fs/ocfs2/dlm/dlmrecovery.c
 +++ b/fs/ocfs2/dlm/dlmrecovery.c
 @@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ct

  static void dlm_begin_recovery(struct dlm_ctxt *dlm)
  {
 -	spin_lock(&dlm->spinlock);
 +	assert_spin_locked(&dlm->spinlock);
  	BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
  	printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
  	       dlm->name, dlm->reco.dead_node);
  	dlm->reco.state |= DLM_RECO_STATE_ACTIVE;
 -	spin_unlock(&dlm->spinlock);
  }

  static void dlm_end_recovery(struct dlm_ctxt *dlm)
 @@ -456,6 +455,13 @@ static int dlm_do_recovery(struct dlm_ct

  	spin_lock(&dlm->spinlock);

 +	if (dlm->migrate_done) {
 +		mlog(0, "%s: no need do recovery after migrating all "
 +		     "lock resources\n", dlm->name);
 +		spin_unlock(&dlm->spinlock);
 +		return 0;
 +	}
 +
  	/* check to see if the new master has died */
  	if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM &&
  	    test_bit(dlm->reco.new_master, dlm->recovery_map)) {
 @@ -490,12 +496,13 @@ static int dlm_do_recovery(struct dlm_ct
  	mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
  	     dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
  	     dlm->reco.dead_node);
 -	spin_unlock(&dlm->spinlock);

  	/* take write barrier */
  	/* (stops the list reshuffling thread, proxy ast handling) */
  	dlm_begin_recovery(dlm);

 +	spin_unlock(&dlm->spinlock);
 +
  	if (dlm->reco.new_master == dlm->node_num)
  		goto master_here;
	From: piaojun <piaojun@huawei.com>
	Date: Thu, 5 Apr 2018 16:19:11 -0700
	Subject: ocfs2/dlm: wait for dlm recovery done when migrating all lock
	resources

	commit 60c7ec9ee4a3410c2cb08850102d363c7e207f48 upstream.

	Wait for dlm recovery done when migrating all lock resources in case that
	new lock resource left after leaving dlm domain. And the left lock
	resource will cause other nodes BUG.

	NodeA NodeB NodeC

	umount:
	dlm_unregister_domain()
	dlm_migrate_all_locks()

	NodeB down

	do recovery for NodeB
	and collect a new lockres
	form other live nodes:

	dlm_do_recovery
	dlm_remaster_locks
	dlm_request_all_locks:

	dlm_mig_lockres_handler
	dlm_new_lockres
	__dlm_insert_lockres

	at last NodeA become the
	master of the new lockres
	and leave domain:
	dlm_leave_domain()

	mount:
	dlm_join_domain()

	touch file and request
	for the owner of the new
	lockres, but all the
	other nodes said 'NO',
	so NodeC decide to be
	the owner, and send do
	assert msg to other
	nodes:
	dlmlock()
	dlm_get_lock_resource()
	dlm_do_assert_master()

	other nodes receive the msg
	and found two masters exist.
	at last cause BUG in
	dlm_assert_master_handler()
	-->BUG();

	Link: http://lkml.kernel.org/r/5AAA6E25.7090303@huawei.com
	Fixes: bc9838c4d44a ("dlm: allow dlm do recovery during shutdown")
	Signed-off-by: Jun Piao <piaojun@huawei.com>
	Reviewed-by: Alex Chen <alex.chen@huawei.com>
	Reviewed-by: Yiwen Jiang <jiangyiwen@huawei.com>
	Acked-by: Joseph Qi <jiangqi903@gmail.com>
	Cc: Mark Fasheh <mark@fasheh.com>
	Cc: Joel Becker <jlbec@evilplan.org>
	Cc: Junxiao Bi <junxiao.bi@oracle.com>
	Cc: Changwei Ge <ge.changwei@h3c.com>
	Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
	Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
	Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
	---
	fs/ocfs2/dlm/dlmcommon.h \| 1 +
	fs/ocfs2/dlm/dlmdomain.c \| 15 +++++++++++++++
	fs/ocfs2/dlm/dlmrecovery.c \| 13 ++++++++++---
	3 files changed, 26 insertions(+), 3 deletions(-)

	--- a/fs/ocfs2/dlm/dlmcommon.h
	+++ b/fs/ocfs2/dlm/dlmcommon.h
	@@ -140,6 +140,7 @@ struct dlm_ctxt
	u8 node_num;
	u32 key;
	u8 joining_node;
	+ u8 migrate_done; /* set to 1 means node has migrated all lock resources */
	wait_queue_head_t dlm_join_events;
	unsigned long live_nodes_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
	unsigned long domain_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
	--- a/fs/ocfs2/dlm/dlmdomain.c
	+++ b/fs/ocfs2/dlm/dlmdomain.c
	@@ -460,6 +460,19 @@ redo_bucket:
	cond_resched_lock(&dlm->spinlock);
	num += n;
	}
	+
	+ if (!num) {
	+ if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) {
	+ mlog(0, "%s: perhaps there are more lock resources "
	+ "need to be migrated after dlm recovery\n", dlm->name);
	+ ret = -EAGAIN;
	+ } else {
	+ mlog(0, "%s: we won't do dlm recovery after migrating "
	+ "all lock resources\n", dlm->name);
	+ dlm->migrate_done = 1;
	+ }
	+ }
	+
	spin_unlock(&dlm->spinlock);
	wake_up(&dlm->dlm_thread_wq);

	@@ -2063,6 +2076,8 @@ static struct dlm_ctxt *dlm_alloc_ctxt(c
	dlm->joining_node = DLM_LOCK_RES_OWNER_UNKNOWN;
	init_waitqueue_head(&dlm->dlm_join_events);

	+ dlm->migrate_done = 0;
	+
	dlm->reco.new_master = O2NM_INVALID_NODE_NUM;
	dlm->reco.dead_node = O2NM_INVALID_NODE_NUM;

	--- a/fs/ocfs2/dlm/dlmrecovery.c
	+++ b/fs/ocfs2/dlm/dlmrecovery.c
	@@ -423,12 +423,11 @@ void dlm_wait_for_recovery(struct dlm_ct

	static void dlm_begin_recovery(struct dlm_ctxt *dlm)
	{
	- spin_lock(&dlm->spinlock);
	+ assert_spin_locked(&dlm->spinlock);
	BUG_ON(dlm->reco.state & DLM_RECO_STATE_ACTIVE);
	printk(KERN_NOTICE "o2dlm: Begin recovery on domain %s for node %u\n",
	dlm->name, dlm->reco.dead_node);
	dlm->reco.state \|= DLM_RECO_STATE_ACTIVE;
	- spin_unlock(&dlm->spinlock);
	}

	static void dlm_end_recovery(struct dlm_ctxt *dlm)
	@@ -456,6 +455,13 @@ static int dlm_do_recovery(struct dlm_ct

	spin_lock(&dlm->spinlock);

	+ if (dlm->migrate_done) {
	+ mlog(0, "%s: no need do recovery after migrating all "
	+ "lock resources\n", dlm->name);
	+ spin_unlock(&dlm->spinlock);
	+ return 0;
	+ }
	+
	/* check to see if the new master has died */
	if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM &&
	test_bit(dlm->reco.new_master, dlm->recovery_map)) {
	@@ -490,12 +496,13 @@ static int dlm_do_recovery(struct dlm_ct
	mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n",
	dlm->name, task_pid_nr(dlm->dlm_reco_thread_task),
	dlm->reco.dead_node);
	- spin_unlock(&dlm->spinlock);

	/* take write barrier */
	/* (stops the list reshuffling thread, proxy ast handling) */
	dlm_begin_recovery(dlm);

	+ spin_unlock(&dlm->spinlock);
	+
	if (dlm->reco.new_master == dlm->node_num)
	goto master_here;