| From ea038f63ac52439e7816295fa6064fe95e6c1f51 Mon Sep 17 00:00:00 2001 |
| From: James Bottomley <James.Bottomley@suse.de> |
| Date: Fri, 21 Aug 2009 09:47:54 -0600 |
| Subject: SCSI: fix oops during scsi scanning |
| |
| From: James Bottomley <James.Bottomley@suse.de> |
| |
| commit ea038f63ac52439e7816295fa6064fe95e6c1f51 upstream. |
| |
| Chris Webb reported: |
| p0# uname -a |
| Linux f7ea8425-d45b-490f-a738-d181d0df6963.host.elastichosts.com 2.6.30.4-elastic-lon-p #2 SMP PREEMPT Thu Aug 20 14:30:50 BST 2009 x86_64 Intel(R) Xeon(R) CPU E5420 @ 2.50GHz GenuineIntel GNU/Linux |
| p0# zgrep SCAN_ASYNC /proc/config.gz |
| # CONFIG_SCSI_SCAN_ASYNC is not set |
| |
| p0# cat /var/log/kern/2009-08-20 |
| [...] |
| 15:27:10.485 kernel: scsi9 : iSCSI Initiator over TCP/IP |
| 15:27:11.493 kernel: scsi 9:0:0:0: RAID IET Controller 0001 PQ: 0 ANSI: 5 |
| 15:27:11.493 kernel: scsi 9:0:0:0: Attached scsi generic sg6 type 12 |
| 15:27:11.495 kernel: scsi 9:0:0:1: Direct-Access IET VIRTUAL-DISK 0001 PQ: 0 ANSI: 5 |
| 15:27:11.495 kernel: sd 9:0:0:1: Attached scsi generic sg7 type 0 |
| 15:27:11.495 kernel: sd 9:0:0:1: [sdg] 4194304 512-byte hardware sectors: (2.14 GB/2.00 GiB) |
| 15:27:11.495 kernel: sd 9:0:0:1: [sdg] Write Protect is off |
| 15:27:11.495 kernel: sd 9:0:0:1: [sdg] Write cache: disabled, read cache: enabled, doesn't support DPO or FUA |
| 15:27:13.012 kernel: sdg:<6>scsi 9:0:0:1: [sdg] Unhandled error code |
| 15:27:13.012 kernel: scsi 9:0:0:1: [sdg] Result: hostbyte=0x07 driverbyte=0x00 |
| 15:27:13.012 kernel: end_request: I/O error, dev sdg, sector 0 |
| 15:27:13.012 kernel: Buffer I/O error on device sdg, logical block 0 |
| 15:27:13.012 kernel: ldm_validate_partition_table(): Disk read failed. |
| 15:27:13.012 kernel: unable to read partition table |
| 15:27:13.014 kernel: BUG: unable to handle kernel NULL pointer dereference at 0000000000000010 |
| 15:27:13.014 kernel: IP: [<ffffffff803f0d77>] disk_part_iter_next+0x74/0xfd |
| 15:27:13.014 kernel: PGD 82ad0b067 PUD 82cd7e067 PMD 0 |
| 15:27:13.014 kernel: Oops: 0000 [#1] PREEMPT SMP |
| 15:27:13.014 kernel: last sysfs file: /sys/devices/platform/host9/session4/iscsi_session/session4/ifacename |
| 15:27:13.014 kernel: CPU 5 |
| 15:27:13.014 kernel: Modules linked in: |
| 15:27:13.014 kernel: Pid: 13999, comm: async/0 Not tainted 2.6.30.4-elastic-lon-p #2 X7DBN |
| 15:27:13.014 kernel: RIP: 0010:[<ffffffff803f0d77>] [<ffffffff803f0d77>] disk_part_iter_next+0x74/0xfd |
| 15:27:13.014 kernel: RSP: 0018:ffff88066afa3dd0 EFLAGS: 00010246 |
| 15:27:13.014 kernel: RAX: ffff88082b58a000 RBX: ffff88066afa3e00 RCX: 0000000000000000 |
| 15:27:13.014 kernel: RDX: 0000000000000000 RSI: ffff88082b58a000 RDI: 0000000000000000 |
| 15:27:13.014 kernel: RBP: ffff88066afa3df0 R08: ffff88066afa2000 R09: ffff8806a204f000 |
| 15:27:13.014 kernel: R10: 000000fb12c7d274 R11: ffff8806c2bf0628 R12: ffff88066afa3e00 |
| 15:27:13.014 kernel: R13: ffff88082c829a00 R14: 0000000000000000 R15: ffff8806bc50c920 |
| 15:27:13.014 kernel: FS: 0000000000000000(0000) GS:ffff88002818a000(0000) knlGS:0000000000000000 |
| 15:27:13.014 kernel: CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b |
| 15:27:13.014 kernel: CR2: 0000000000000010 CR3: 000000082ade3000 CR4: 00000000000426e0 |
| 15:27:13.014 kernel: DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 |
| 15:27:13.014 kernel: DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 |
| 15:27:13.014 kernel: Process async/0 (pid: 13999, threadinfo ffff88066afa2000, task ffff8806c2bf05e0) |
| 15:27:13.014 kernel: Stack: |
| 15:27:13.014 kernel: 0000000000000000 ffff88066afa3e00 ffff88066afa3e00 ffff88082c829a00 |
| 15:27:13.014 kernel: ffff88066afa3e40 ffffffff80306feb ffff88082b58a000 0000000000000000 |
| 15:27:13.014 kernel: 0000000000000001 ffff8806bc50c920 ffff88066afa3e40 ffff88082b58a000 |
| 15:27:13.014 kernel: Call Trace: |
| 15:27:13.014 kernel: [<ffffffff80306feb>] register_disk+0x122/0x13a |
| 15:27:13.014 kernel: [<ffffffff803f0b0f>] add_disk+0xaa/0x106 |
| 15:27:13.014 kernel: [<ffffffff80493609>] sd_probe_async+0x198/0x25b |
| 15:27:13.014 kernel: [<ffffffff80270482>] async_thread+0x10c/0x20d |
| 15:27:13.014 kernel: [<ffffffff802545ff>] ? default_wake_function+0x0/0xf |
| 15:27:13.014 kernel: [<ffffffff80270376>] ? async_thread+0x0/0x20d |
| 15:27:13.014 kernel: [<ffffffff8026ad89>] kthread+0x55/0x80 |
| 15:27:13.014 kernel: [<ffffffff8022be6a>] child_rip+0xa/0x20 |
| 15:27:13.014 kernel: [<ffffffff8026ad34>] ? kthread+0x0/0x80 |
| 15:27:13.014 kernel: [<ffffffff8022be60>] ? child_rip+0x0/0x20 |
| 15:27:13.014 kernel: Code: c8 ff 80 e1 0c b9 00 00 00 00 0f 44 c1 41 83 cd ff 48 8d 7a 20 48 be ff ff ff ff 08 00 00 00 48 b9 00 00 00 00 08 00 00 00 eb 50 <8b> 42 10 41 bd 01 00 00 00 eb db 4c 63 c2 4e 8d 04 c7 4d 8b 20 |
| 15:27:13.015 kernel: RIP [<ffffffff803f0d77>] disk_part_iter_next+0x74/0xfd |
| 15:27:13.015 kernel: RSP <ffff88066afa3dd0> |
| 15:27:13.015 kernel: CR2: 0000000000000010 |
| 15:27:13.015 kernel: ---[ end trace 6104b56ef5590e25 ]--- |
| |
| The problem is caused because the async scanning split in sd.c doesn't hold |
| any reference to the device when it kicks off the async piece. What's |
| happening is that an iSCSI disconnect is destorying the device again *before* |
| the async sd scanning thread even starts. Fix this by taking a reference |
| before starting the thread and dropping it again when the thread completes. |
| |
| Reported-by: Chris Webb <chris@arachsys.com> |
| Signed-off-by: James Bottomley <James.Bottomley@suse.de> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de> |
| |
| --- |
| drivers/scsi/sd.c | 2 ++ |
| 1 file changed, 2 insertions(+) |
| |
| --- a/drivers/scsi/sd.c |
| +++ b/drivers/scsi/sd.c |
| @@ -2021,6 +2021,7 @@ static void sd_probe_async(void *data, a |
| |
| sd_printk(KERN_NOTICE, sdkp, "Attached SCSI %sdisk\n", |
| sdp->removable ? "removable " : ""); |
| + put_device(&sdkp->dev); |
| } |
| |
| /** |
| @@ -2106,6 +2107,7 @@ static int sd_probe(struct device *dev) |
| |
| get_device(&sdp->sdev_gendev); |
| |
| + get_device(&sdkp->dev); /* prevent release before async_schedule */ |
| async_schedule(sd_probe_async, sdkp); |
| |
| return 0; |