patches/md-raid1-fix-test-for-was-read-error-from-last-working-device.patch - pub/scm/linux/kernel/git/lizf/linux-3.4.y-queue - Git at Google

 From 34cab6f42003cb06f48f86a86652984dec338ae9 Mon Sep 17 00:00:00 2001
 From: NeilBrown <neilb@suse.com>
 Date: Fri, 24 Jul 2015 09:22:16 +1000
 Subject: md/raid1: fix test for 'was read error from last working device'.

 commit 34cab6f42003cb06f48f86a86652984dec338ae9 upstream.

 When we get a read error from the last working device, we don't
 try to repair it, and don't fail the device.  We simple report a
 read error to the caller.

 However the current test for 'is this the last working device' is
 wrong.
 When there is only one fully working device, it assumes that a
 non-faulty device is that device.  However a spare which is rebuilding
 would be non-faulty but so not the only working device.

 So change the test from "!Faulty" to "In_sync".  If ->degraded says
 there is only one fully working device and this device is in_sync,
 this must be the one.

 This bug has existed since we allowed read_balance to read from
 a recovering spare in v3.0

 Reported-and-tested-by: Alexander Lyakas <alex.bolshoy@gmail.com>
 Fixes: 76073054c95b ("md/raid1: clean up read_balance.")
 Signed-off-by: NeilBrown <neilb@suse.com>
 Signed-off-by: Zefan Li <lizefan@huawei.com>
 ---
  drivers/md/raid1.c |    2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 --- a/drivers/md/raid1.c
 +++ b/drivers/md/raid1.c
 @@ -314,7 +314,7 @@ static void raid1_end_read_request(struc
  		spin_lock_irqsave(&conf->device_lock, flags);
  		if (r1_bio->mddev->degraded == conf->raid_disks ||
  		    (r1_bio->mddev->degraded == conf->raid_disks-1 &&
 -		     !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)))
 +		     test_bit(In_sync, &conf->mirrors[mirror].rdev->flags)))
  			uptodate = 1;
  		spin_unlock_irqrestore(&conf->device_lock, flags);
  	}
	From 34cab6f42003cb06f48f86a86652984dec338ae9 Mon Sep 17 00:00:00 2001
	From: NeilBrown <neilb@suse.com>
	Date: Fri, 24 Jul 2015 09:22:16 +1000
	Subject: md/raid1: fix test for 'was read error from last working device'.

	commit 34cab6f42003cb06f48f86a86652984dec338ae9 upstream.

	When we get a read error from the last working device, we don't
	try to repair it, and don't fail the device. We simple report a
	read error to the caller.

	However the current test for 'is this the last working device' is
	wrong.
	When there is only one fully working device, it assumes that a
	non-faulty device is that device. However a spare which is rebuilding
	would be non-faulty but so not the only working device.

	So change the test from "!Faulty" to "In_sync". If ->degraded says
	there is only one fully working device and this device is in_sync,
	this must be the one.

	This bug has existed since we allowed read_balance to read from
	a recovering spare in v3.0

	Reported-and-tested-by: Alexander Lyakas <alex.bolshoy@gmail.com>
	Fixes: 76073054c95b ("md/raid1: clean up read_balance.")
	Signed-off-by: NeilBrown <neilb@suse.com>
	Signed-off-by: Zefan Li <lizefan@huawei.com>
	---
	drivers/md/raid1.c \| 2 +-
	1 file changed, 1 insertion(+), 1 deletion(-)

	--- a/drivers/md/raid1.c
	+++ b/drivers/md/raid1.c
	@@ -314,7 +314,7 @@ static void raid1_end_read_request(struc
	spin_lock_irqsave(&conf->device_lock, flags);
	if (r1_bio->mddev->degraded == conf->raid_disks \|\|
	(r1_bio->mddev->degraded == conf->raid_disks-1 &&
	- !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags)))
	+ test_bit(In_sync, &conf->mirrors[mirror].rdev->flags)))
	uptodate = 1;
	spin_unlock_irqrestore(&conf->device_lock, flags);
	}