drivers/scsi/scsi_error.c - pub/scm/linux/kernel/git/joro/linux - Git at Google

 /*
  *  scsi_error.c Copyright (C) 1997 Eric Youngdale
  *
  *  SCSI error/timeout handling
  *      Initial versions: Eric Youngdale.  Based upon conversations with
  *                        Leonard Zubkoff and David Miller at Linux Expo,
  *                        ideas originating from all over the place.
  *
  */

 #include <linux/module.h>

 #include <linux/sched.h>
 #include <linux/timer.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/ioport.h>
 #include <linux/kernel.h>
 #include <linux/stat.h>
 #include <linux/blk.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/smp_lock.h>

 #define __KERNEL_SYSCALLS__

 #include <linux/unistd.h>

 #include <asm/system.h>
 #include <asm/irq.h>
 #include <asm/dma.h>

 #include "scsi.h"
 #include "hosts.h"
 #include "constants.h"

 /*
  * We must always allow SHUTDOWN_SIGS.  Even if we are not a module,
  * the host drivers that we are using may be loaded as modules, and
  * when we unload these,  we need to ensure that the error handler thread
  * can be shut down.
  *
  * Note - when we unload a module, we send a SIGHUP.  We mustn't
  * enable SIGTERM, as this is how the init shuts things down when you
  * go to single-user mode.  For that matter, init also sends SIGKILL,
  * so we mustn't enable that one either.  We use SIGHUP instead.  Other
  * options would be SIGPWR, I suppose.
  */
 #define SHUTDOWN_SIGS	(sigmask(SIGHUP))

 #ifdef DEBUG
 #define SENSE_TIMEOUT SCSI_TIMEOUT
 #define ABORT_TIMEOUT SCSI_TIMEOUT
 #define RESET_TIMEOUT SCSI_TIMEOUT
 #else
 #define SENSE_TIMEOUT (10*HZ)
 #define RESET_TIMEOUT (2*HZ)
 #define ABORT_TIMEOUT (15*HZ)
 #endif

 #define STATIC

 /*
  * These should *probably* be handled by the host itself.
  * Since it is allowed to sleep, it probably should.
  */
 #define BUS_RESET_SETTLE_TIME   5*HZ
 #define HOST_RESET_SETTLE_TIME  10*HZ


 static const char RCSid[] = "$Header: /mnt/ide/home/eric/CVSROOT/linux/drivers/scsi/scsi_error.c,v 1.10 1997/12/08 04:50:35 eric Exp $";

 STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt);
 STATIC int scsi_request_sense(Scsi_Cmnd *);
 STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout);
 STATIC int scsi_try_to_abort_command(Scsi_Cmnd *, int);
 STATIC int scsi_test_unit_ready(Scsi_Cmnd *);
 STATIC int scsi_try_bus_device_reset(Scsi_Cmnd *, int timeout);
 STATIC int scsi_try_bus_reset(Scsi_Cmnd *);
 STATIC int scsi_try_host_reset(Scsi_Cmnd *);
 STATIC int scsi_unit_is_ready(Scsi_Cmnd *);
 STATIC void scsi_eh_action_done(Scsi_Cmnd *, int);
 STATIC int scsi_eh_retry_command(Scsi_Cmnd *);
 STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt);
 STATIC void scsi_restart_operations(struct Scsi_Host *);
 STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt);


 /*
  * Function:    scsi_add_timer()
  *
  * Purpose:     Start timeout timer for a single scsi command.
  *
  * Arguments:   SCset   - command that is about to start running.
  *              timeout - amount of time to allow this command to run.
  *              complete - timeout function to call if timer isn't
  *                      canceled.
  *
  * Returns:     Nothing
  *
  * Notes:       This should be turned into an inline function.
  *
  * More Notes:  Each scsi command has it's own timer, and as it is added to
  *              the queue, we set up the timer.  When the command completes,
  *              we cancel the timer.  Pretty simple, really, especially
  *              compared to the old way of handling this crap.
  */
 void scsi_add_timer(Scsi_Cmnd * SCset,
 		    int timeout,
 		    void (*complete) (Scsi_Cmnd *))
 {

 	/*
 	 * If the clock was already running for this command, then
 	 * first delete the timer.  The timer handling code gets rather
 	 * confused if we don't do this.
 	 */
 	if (SCset->eh_timeout.function != NULL) {
 		del_timer(&SCset->eh_timeout);
 	}
 	SCset->eh_timeout.data = (unsigned long) SCset;
 	SCset->eh_timeout.expires = jiffies + timeout;
 	SCset->eh_timeout.function = (void (*)(unsigned long)) complete;

 	SCset->done_late = 0;

 	SCSI_LOG_ERROR_RECOVERY(5, printk("Adding timer for command %p at %d (%p)\n", SCset, timeout, complete));

 	add_timer(&SCset->eh_timeout);

 }

 /*
  * Function:    scsi_delete_timer()
  *
  * Purpose:     Delete/cancel timer for a given function.
  *
  * Arguments:   SCset   - command that we are canceling timer for.
  *
  * Returns:     1 if we were able to detach the timer.  0 if we
  *              blew it, and the timer function has already started
  *              to run.
  *
  * Notes:       This should be turned into an inline function.
  */
 int scsi_delete_timer(Scsi_Cmnd * SCset)
 {
 	int rtn;

 	rtn = del_timer(&SCset->eh_timeout);

 	SCSI_LOG_ERROR_RECOVERY(5, printk("Clearing timer for command %p %d\n", SCset, rtn));

 	SCset->eh_timeout.data = (unsigned long) NULL;
 	SCset->eh_timeout.function = NULL;

 	return rtn;
 }

 /*
  * Function:    scsi_times_out()
  *
  * Purpose:     Timeout function for normal scsi commands..
  *
  * Arguments:   SCpnt   - command that is timing out.
  *
  * Returns:     Nothing.
  *
  * Notes:       We do not need to lock this.  There is the potential for
  *              a race only in that the normal completion handling might
  *              run, but if the normal completion function determines
  *              that the timer has already fired, then it mustn't do
  *              anything.
  */
 void scsi_times_out(Scsi_Cmnd * SCpnt)
 {
 	/*
 	 * Notify the low-level code that this operation failed and we are
 	 * reposessing the command.
 	 */
 #ifdef ERIC_neverdef
 	/*
 	 * FIXME(eric)
 	 * Allow the host adapter to push a queue ordering tag
 	 * out to the bus to force the command in question to complete.
 	 * If the host wants to do this, then we just restart the timer
 	 * for the command.  Before we really do this, some real thought
 	 * as to the optimum way to handle this should be done.  We *do*
 	 * need to force ordering every so often to ensure that all requests
 	 * do eventually complete, but I am not sure if this is the best way
 	 * to actually go about it.
 	 *
 	 * Better yet, force a sync here, but don't block since we are in an
 	 * interrupt.
 	 */
 	if (SCpnt->host->hostt->eh_ordered_queue_tag) {
 		if ((*SCpnt->host->hostt->eh_ordered_queue_tag) (SCpnt)) {
 			scsi_add_timer(SCpnt, SCpnt->internal_timeout,
 				       scsi_times_out);
 			return;
 		}
 	}
 	/*
 	 * FIXME(eric) - add a second special interface to handle this
 	 * case.  Ideally that interface can also be used to request
 	 * a queu
 	 */
 	if (SCpnt->host->can_queue) {
 		SCpnt->host->hostt->queuecommand(SCpnt, NULL);
 	}
 #endif

 	/* Set the serial_number_at_timeout to the current serial_number */
 	SCpnt->serial_number_at_timeout = SCpnt->serial_number;

 	SCpnt->eh_state = FAILED;
 	SCpnt->state = SCSI_STATE_TIMEOUT;
 	SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;

 	SCpnt->host->in_recovery = 1;
 	SCpnt->host->host_failed++;

 	SCSI_LOG_TIMEOUT(3, printk("Command timed out active=%d busy=%d failed=%d\n",
 				   atomic_read(&SCpnt->host->host_active),
 				   SCpnt->host->host_busy,
 				   SCpnt->host->host_failed));

 	/*
 	 * If the host is having troubles, then look to see if this was the last
 	 * command that might have failed.  If so, wake up the error handler.
 	 */
 	if( SCpnt->host->eh_wait == NULL ) {
 		panic("Error handler thread not present at %p %p %s %d",
 		      SCpnt, SCpnt->host, __FILE__, __LINE__);
 	}
 	if (SCpnt->host->host_busy == SCpnt->host->host_failed) {
 		up(SCpnt->host->eh_wait);
 	}
 }

 /*
  * Function     scsi_block_when_processing_errors
  *
  * Purpose:     Prevent more commands from being queued while error recovery
  *              is taking place.
  *
  * Arguments:   SDpnt - device on which we are performing recovery.
  *
  * Returns:     FALSE   The device was taken offline by error recovery.
  *              TRUE    OK to proceed.
  *
  * Notes:       We block until the host is out of error recovery, and then
  *              check to see whether the host or the device is offline.
  */
 int scsi_block_when_processing_errors(Scsi_Device * SDpnt)
 {

 	SCSI_SLEEP(&SDpnt->host->host_wait, SDpnt->host->in_recovery);

 	SCSI_LOG_ERROR_RECOVERY(5, printk("Open returning %d\n", SDpnt->online));

 	return SDpnt->online;
 }

 /*
  * Function:    scsi_eh_times_out()
  *
  * Purpose:     Timeout function for error handling.
  *
  * Arguments:   SCpnt   - command that is timing out.
  *
  * Returns:     Nothing.
  *
  * Notes:       During error handling, the kernel thread will be sleeping
  *              waiting for some action to complete on the device.  Our only
  *              job is to record that it timed out, and to wake up the
  *              thread.
  */
 STATIC
 void scsi_eh_times_out(Scsi_Cmnd * SCpnt)
 {
 	SCpnt->eh_state = SCSI_STATE_TIMEOUT;
 	SCSI_LOG_ERROR_RECOVERY(5, printk("In scsi_eh_times_out %p\n", SCpnt));

 	if (SCpnt->host->eh_action != NULL)
 		up(SCpnt->host->eh_action);
 	else
 		printk("Missing scsi error handler thread\n");
 }


 /*
  * Function:    scsi_eh_done()
  *
  * Purpose:     Completion function for error handling.
  *
  * Arguments:   SCpnt   - command that is timing out.
  *
  * Returns:     Nothing.
  *
  * Notes:       During error handling, the kernel thread will be sleeping
  *              waiting for some action to complete on the device.  Our only
  *              job is to record that the action completed, and to wake up the
  *              thread.
  */
 STATIC
 void scsi_eh_done(Scsi_Cmnd * SCpnt)
 {
 	int     rtn;

 	/*
 	 * If the timeout handler is already running, then just set the
 	 * flag which says we finished late, and return.  We have no
 	 * way of stopping the timeout handler from running, so we must
 	 * always defer to it.
 	 */
 	rtn = del_timer(&SCpnt->eh_timeout);
 	if (!rtn) {
 		SCpnt->done_late = 1;
 		return;
 	}

 	SCpnt->request.rq_status = RQ_SCSI_DONE;

 	SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
 	SCpnt->eh_state = SUCCESS;

 	SCSI_LOG_ERROR_RECOVERY(5, printk("In eh_done %p result:%x\n", SCpnt,
 					  SCpnt->result));

 	if (SCpnt->host->eh_action != NULL)
 		up(SCpnt->host->eh_action);
 }

 /*
  * Function:    scsi_eh_action_done()
  *
  * Purpose:     Completion function for error handling.
  *
  * Arguments:   SCpnt   - command that is timing out.
  *              answer  - boolean that indicates whether operation succeeded.
  *
  * Returns:     Nothing.
  *
  * Notes:       This callback is only used for abort and reset operations.
  */
 STATIC
 void scsi_eh_action_done(Scsi_Cmnd * SCpnt, int answer)
 {
 	SCpnt->request.rq_status = RQ_SCSI_DONE;

 	SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;
 	SCpnt->eh_state = (answer ? SUCCESS : FAILED);

 	if (SCpnt->host->eh_action != NULL)
 		up(SCpnt->host->eh_action);
 }

 /*
  * Function:  scsi_sense_valid()
  *
  * Purpose:     Determine whether a host has automatically obtained sense
  *              information or not.  If we have it, then give a recommendation
  *              as to what we should do next.
  */
 int scsi_sense_valid(Scsi_Cmnd * SCpnt)
 {
 	if (((SCpnt->sense_buffer[0] & 0x70) >> 4) != 7) {
 		return FALSE;
 	}
 	return TRUE;
 }

 /*
  * Function:  scsi_eh_retry_command()
  *
  * Purpose:     Retry the original command
  *
  * Returns:     SUCCESS - we were able to get the sense data.
  *              FAILED  - we were not able to get the sense data.
  *
  * Notes:       This function will *NOT* return until the command either
  *              times out, or it completes.
  */
 STATIC int scsi_eh_retry_command(Scsi_Cmnd * SCpnt)
 {
 	memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
 	       sizeof(SCpnt->data_cmnd));
 	SCpnt->request_buffer = SCpnt->buffer;
 	SCpnt->request_bufflen = SCpnt->bufflen;
 	SCpnt->use_sg = SCpnt->old_use_sg;
 	SCpnt->cmd_len = SCpnt->old_cmd_len;
 	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
 	SCpnt->underflow = SCpnt->old_underflow;

 	scsi_send_eh_cmnd(SCpnt, SCpnt->timeout_per_command);

 	/*
 	 * Hey, we are done.  Let's look to see what happened.
 	 */
 	return SCpnt->eh_state;
 }

 /*
  * Function:  scsi_request_sense()
  *
  * Purpose:     Request sense data from a particular target.
  *
  * Returns:     SUCCESS - we were able to get the sense data.
  *              FAILED  - we were not able to get the sense data.
  *
  * Notes:       Some hosts automatically obtain this information, others
  *              require that we obtain it on our own.
  *
  *              This function will *NOT* return until the command either
  *              times out, or it completes.
  */
 STATIC int scsi_request_sense(Scsi_Cmnd * SCpnt)
 {
 	static unsigned char generic_sense[6] =
 	{REQUEST_SENSE, 0, 0, 0, 255, 0};
 	unsigned char scsi_result0[256], *scsi_result = NULL;
 	int saved_result;

 	memcpy((void *) SCpnt->cmnd, (void *) generic_sense,
 	       sizeof(generic_sense));

 	if (SCpnt->device->scsi_level <= SCSI_2)
 		SCpnt->cmnd[1] = SCpnt->lun << 5;

 	scsi_result = (!SCpnt->host->hostt->unchecked_isa_dma)
 	    ? &scsi_result0[0] : kmalloc(512, GFP_ATOMIC | GFP_DMA);

 	if (scsi_result == NULL) {
 		printk("cannot allocate scsi_result in scsi_request_sense.\n");
 		return FAILED;
 	}
 	/*
 	 * Zero the sense buffer.  Some host adapters automatically always request
 	 * sense, so it is not a good idea that SCpnt->request_buffer and
 	 * SCpnt->sense_buffer point to the same address (DB).
 	 * 0 is not a valid sense code.
 	 */
 	memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));
 	memset((void *) scsi_result, 0, 256);

 	saved_result = SCpnt->result;
 	SCpnt->request_buffer = scsi_result;
 	SCpnt->request_bufflen = 256;
 	SCpnt->use_sg = 0;
 	SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
 	SCpnt->sc_data_direction = SCSI_DATA_READ;
 	SCpnt->underflow = 0;

 	scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);

 	/* Last chance to have valid sense data */
 	if (!scsi_sense_valid(SCpnt))
 		memcpy((void *) SCpnt->sense_buffer,
 		       SCpnt->request_buffer,
 		       sizeof(SCpnt->sense_buffer));

 	if (scsi_result != &scsi_result0[0] && scsi_result != NULL)
 		kfree(scsi_result);

 	/*
 	 * When we eventually call scsi_finish, we really wish to complete
 	 * the original request, so let's restore the original data. (DB)
 	 */
 	memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
 	       sizeof(SCpnt->data_cmnd));
 	SCpnt->result = saved_result;
 	SCpnt->request_buffer = SCpnt->buffer;
 	SCpnt->request_bufflen = SCpnt->bufflen;
 	SCpnt->use_sg = SCpnt->old_use_sg;
 	SCpnt->cmd_len = SCpnt->old_cmd_len;
 	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
 	SCpnt->underflow = SCpnt->old_underflow;

 	/*
 	 * Hey, we are done.  Let's look to see what happened.
 	 */
 	return SCpnt->eh_state;
 }

 /*
  * Function:  scsi_test_unit_ready()
  *
  * Purpose:     Run test unit ready command to see if the device is talking to us or not.
  *
  */
 STATIC int scsi_test_unit_ready(Scsi_Cmnd * SCpnt)
 {
 	static unsigned char tur_command[6] =
 	{TEST_UNIT_READY, 0, 0, 0, 0, 0};

 	memcpy((void *) SCpnt->cmnd, (void *) tur_command,
 	       sizeof(tur_command));

 	if (SCpnt->device->scsi_level <= SCSI_2)
 		SCpnt->cmnd[1] = SCpnt->lun << 5;

 	/*
 	 * Zero the sense buffer.  The SCSI spec mandates that any
 	 * untransferred sense data should be interpreted as being zero.
 	 */
 	memset((void *) SCpnt->sense_buffer, 0, sizeof(SCpnt->sense_buffer));

 	SCpnt->request_buffer = NULL;
 	SCpnt->request_bufflen = 0;
 	SCpnt->use_sg = 0;
 	SCpnt->cmd_len = COMMAND_SIZE(SCpnt->cmnd[0]);
 	SCpnt->underflow = 0;
 	SCpnt->sc_data_direction = SCSI_DATA_NONE;

 	scsi_send_eh_cmnd(SCpnt, SENSE_TIMEOUT);

 	/*
 	 * When we eventually call scsi_finish, we really wish to complete
 	 * the original request, so let's restore the original data. (DB)
 	 */
 	memcpy((void *) SCpnt->cmnd, (void *) SCpnt->data_cmnd,
 	       sizeof(SCpnt->data_cmnd));
 	SCpnt->request_buffer = SCpnt->buffer;
 	SCpnt->request_bufflen = SCpnt->bufflen;
 	SCpnt->use_sg = SCpnt->old_use_sg;
 	SCpnt->cmd_len = SCpnt->old_cmd_len;
 	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
 	SCpnt->underflow = SCpnt->old_underflow;

 	/*
 	 * Hey, we are done.  Let's look to see what happened.
 	 */
 	SCSI_LOG_ERROR_RECOVERY(3,
 		printk("scsi_test_unit_ready: SCpnt %p eh_state %x\n",
 		SCpnt, SCpnt->eh_state));
 	return SCpnt->eh_state;
 }

 /*
  * This would normally need to get the IO request lock,
  * but as it doesn't actually touch anything that needs
  * to be locked we can avoid the lock here..
  */
 STATIC
 void scsi_sleep_done(struct semaphore *sem)
 {
 	if (sem != NULL) {
 		up(sem);
 	}
 }

 void scsi_sleep(int timeout)
 {
 	DECLARE_MUTEX_LOCKED(sem);
 	struct timer_list timer;

 	init_timer(&timer);
 	timer.data = (unsigned long) &sem;
 	timer.expires = jiffies + timeout;
 	timer.function = (void (*)(unsigned long)) scsi_sleep_done;

 	SCSI_LOG_ERROR_RECOVERY(5, printk("Sleeping for timer tics %d\n", timeout));

 	add_timer(&timer);

 	down(&sem);
 	del_timer(&timer);
 }

 /*
  * Function:  scsi_send_eh_cmnd
  *
  * Purpose:     Send a command out to a device as part of error recovery.
  *
  * Notes:       The initialization of the structures is quite a bit different
  *              in this case, and furthermore, there is a different completion
  *              handler.
  */
 STATIC void scsi_send_eh_cmnd(Scsi_Cmnd * SCpnt, int timeout)
 {
 	unsigned long flags;
 	struct Scsi_Host *host = SCpnt->host;

 	ASSERT_LOCK(host->host_lock, 0);

 retry:
 	/*
 	 * We will use a queued command if possible, otherwise we will
 	 * emulate the queuing and calling of completion function ourselves.
 	 */
 	SCpnt->owner = SCSI_OWNER_LOWLEVEL;

 	if (host->can_queue) {
 		DECLARE_MUTEX_LOCKED(sem);

 		SCpnt->eh_state = SCSI_STATE_QUEUED;

 		scsi_add_timer(SCpnt, timeout, scsi_eh_times_out);

 		/*
 		 * Set up the semaphore so we wait for the command to complete.
 		 */
 		SCpnt->host->eh_action = &sem;
 		SCpnt->request.rq_status = RQ_SCSI_BUSY;

 		spin_lock_irqsave(SCpnt->host->host_lock, flags);
 		host->hostt->queuecommand(SCpnt, scsi_eh_done);
 		spin_unlock_irqrestore(SCpnt->host->host_lock, flags);

 		down(&sem);

 		SCpnt->host->eh_action = NULL;

 		/*
 		 * See if timeout.  If so, tell the host to forget about it.
 		 * In other words, we don't want a callback any more.
 		 */
 		if (SCpnt->eh_state == SCSI_STATE_TIMEOUT) {
                         SCpnt->owner = SCSI_OWNER_LOWLEVEL;

 			/*
 			 * As far as the low level driver is
 			 * concerned, this command is still active, so
 			 * we must give the low level driver a chance
 			 * to abort it. (DB)
 			 *
 			 * FIXME(eric) - we are not tracking whether we could
 			 * abort a timed out command or not.  Not sure how
 			 * we should treat them differently anyways.
 			 */
 			spin_lock_irqsave(SCpnt->host->host_lock, flags);
 			if (SCpnt->host->hostt->eh_abort_handler)
 				SCpnt->host->hostt->eh_abort_handler(SCpnt);
 			spin_unlock_irqrestore(SCpnt->host->host_lock, flags);

 			SCpnt->request.rq_status = RQ_SCSI_DONE;
 			SCpnt->owner = SCSI_OWNER_ERROR_HANDLER;

 			SCpnt->eh_state = FAILED;
 		}
 		SCSI_LOG_ERROR_RECOVERY(5, printk("send_eh_cmnd: %p eh_state:%x\n",
 						SCpnt, SCpnt->eh_state));
 	} else {
 		int temp;

 		/*
 		 * We damn well had better never use this code.  There is no
 		 * timeout protection here, since we would end up waiting in
 		 * the actual low level driver, we don't know how to wake it up.
 		 */
 		spin_lock_irqsave(host->host_lock, flags);
 		temp = host->hostt->command(SCpnt);
 		spin_unlock_irqrestore(host->host_lock, flags);

 		SCpnt->result = temp;
 		/* Fall through to code below to examine status. */
 		SCpnt->eh_state = SUCCESS;
 	}

 	/*
 	 * Now examine the actual status codes to see whether the command
 	 * actually did complete normally.
 	 */
 	if (SCpnt->eh_state == SUCCESS) {
 		int ret = scsi_eh_completed_normally(SCpnt);
 		SCSI_LOG_ERROR_RECOVERY(3,
 			printk("scsi_send_eh_cmnd: scsi_eh_completed_normally %x\n", ret));
 		switch (ret) {
 		case SUCCESS:
 			SCpnt->eh_state = SUCCESS;
 			break;
 		case NEEDS_RETRY:
 			goto retry;
 		case FAILED:
 		default:
 			SCpnt->eh_state = FAILED;
 			break;
 		}
 	} else {
 		SCpnt->eh_state = FAILED;
 	}
 }

 /*
  * Function:  scsi_unit_is_ready()
  *
  * Purpose:     Called after TEST_UNIT_READY is run, to test to see if
  *              the unit responded in a way that indicates it is ready.
  */
 STATIC int scsi_unit_is_ready(Scsi_Cmnd * SCpnt)
 {
 	if (SCpnt->result) {
 		if (((driver_byte(SCpnt->result) & DRIVER_SENSE) ||
 		     (status_byte(SCpnt->result) & CHECK_CONDITION)) &&
 		    ((SCpnt->sense_buffer[0] & 0x70) >> 4) == 7) {
 			if (((SCpnt->sense_buffer[2] & 0xf) != NOT_READY) &&
 			    ((SCpnt->sense_buffer[2] & 0xf) != UNIT_ATTENTION) &&
 			    ((SCpnt->sense_buffer[2] & 0xf) != ILLEGAL_REQUEST)) {
 				return 0;
 			}
 		}
 	}
 	return 1;
 }

 /*
  * Function:    scsi_eh_finish_command
  *
  * Purpose:     Handle a command that we are finished with WRT error handling.
  *
  * Arguments:   SClist - pointer to list into which we are putting completed commands.
  *              SCpnt  - command that is completing
  *
  * Notes:       We don't want to use the normal command completion while we are
  *              are still handling errors - it may cause other commands to be queued,
  *              and that would disturb what we are doing.  Thus we really want to keep
  *              a list of pending commands for final completion, and once we
  *              are ready to leave error handling we handle completion for real.
  */
 STATIC void scsi_eh_finish_command(Scsi_Cmnd ** SClist, Scsi_Cmnd * SCpnt)
 {
 	SCpnt->state = SCSI_STATE_BHQUEUE;
 	SCpnt->bh_next = *SClist;
 	/*
 	 * Set this back so that the upper level can correctly free up
 	 * things.
 	 */
 	SCpnt->use_sg = SCpnt->old_use_sg;
 	SCpnt->sc_data_direction = SCpnt->sc_old_data_direction;
 	SCpnt->underflow = SCpnt->old_underflow;
 	*SClist = SCpnt;
 }

 /*
  * Function:  scsi_try_to_abort_command
  *
  * Purpose:     Ask host adapter to abort a running command.
  *
  * Returns:     FAILED          Operation failed or not supported.
  *              SUCCESS         Succeeded.
  *
  * Notes:       This function will not return until the user's completion
  *              function has been called.  There is no timeout on this
  *              operation.  If the author of the low-level driver wishes
  *              this operation to be timed, they can provide this facility
  *              themselves.  Helper functions in scsi_error.c can be supplied
  *              to make this easier to do.
  *
  * Notes:       It may be possible to combine this with all of the reset
  *              handling to eliminate a lot of code duplication.  I don't
  *              know what makes more sense at the moment - this is just a
  *              prototype.
  */
 STATIC int scsi_try_to_abort_command(Scsi_Cmnd * SCpnt, int timeout)
 {
 	int rtn;
 	unsigned long flags;

 	SCpnt->eh_state = FAILED;	/* Until we come up with something better */

 	if (SCpnt->host->hostt->eh_abort_handler == NULL) {
 		return FAILED;
 	}
 	/*
 	 * scsi_done was called just after the command timed out and before
 	 * we had a chance to process it. (DB)
 	 */
 	if (SCpnt->serial_number == 0)
 		return SUCCESS;

 	SCpnt->owner = SCSI_OWNER_LOWLEVEL;

 	spin_lock_irqsave(SCpnt->host->host_lock, flags);
 	rtn = SCpnt->host->hostt->eh_abort_handler(SCpnt);
 	spin_unlock_irqrestore(SCpnt->host->host_lock, flags);
 	return rtn;
 }

 /*
  * Function:  scsi_try_bus_device_reset
  *
  * Purpose:     Ask host adapter to perform a bus device reset for a given
  *              device.
  *
  * Returns:     FAILED          Operation failed or not supported.
  *              SUCCESS         Succeeded.
  *
  * Notes:       There is no timeout for this operation.  If this operation is
  *              unreliable for a given host, then the host itself needs to put a
  *              timer on it, and set the host back to a consistent state prior
  *              to returning.
  */
 STATIC int scsi_try_bus_device_reset(Scsi_Cmnd * SCpnt, int timeout)
 {
 	unsigned long flags;
 	int rtn;

 	SCpnt->eh_state = FAILED;	/* Until we come up with something better */

 	if (SCpnt->host->hostt->eh_device_reset_handler == NULL) {
 		return FAILED;
 	}
 	SCpnt->owner = SCSI_OWNER_LOWLEVEL;

 	spin_lock_irqsave(SCpnt->host->host_lock, flags);
 	rtn = SCpnt->host->hostt->eh_device_reset_handler(SCpnt);
 	spin_unlock_irqrestore(SCpnt->host->host_lock, flags);

 	if (rtn == SUCCESS)
 		SCpnt->eh_state = SUCCESS;

 	return SCpnt->eh_state;
 }

 /*
  * Function:  scsi_try_bus_reset
  *
  * Purpose:     Ask host adapter to perform a bus reset for a host.
  *
  * Returns:     FAILED          Operation failed or not supported.
  *              SUCCESS         Succeeded.
  *
  * Notes:
  */
 STATIC int scsi_try_bus_reset(Scsi_Cmnd * SCpnt)
 {
 	unsigned long flags;
 	int rtn;

 	SCpnt->eh_state = FAILED;	/* Until we come up with something better */
 	SCpnt->owner = SCSI_OWNER_LOWLEVEL;
 	SCpnt->serial_number_at_timeout = SCpnt->serial_number;

 	if (SCpnt->host->hostt->eh_bus_reset_handler == NULL) {
 		return FAILED;
 	}

 	spin_lock_irqsave(SCpnt->host->host_lock, flags);
 	rtn = SCpnt->host->hostt->eh_bus_reset_handler(SCpnt);
 	spin_unlock_irqrestore(SCpnt->host->host_lock, flags);

 	if (rtn == SUCCESS)
 		SCpnt->eh_state = SUCCESS;

 	/*
 	 * If we had a successful bus reset, mark the command blocks to expect
 	 * a condition code of unit attention.
 	 */
 	scsi_sleep(BUS_RESET_SETTLE_TIME);
 	if (SCpnt->eh_state == SUCCESS) {
 		Scsi_Device *SDloop;
 		for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
 			if (SCpnt->channel == SDloop->channel) {
 				SDloop->was_reset = 1;
 				SDloop->expecting_cc_ua = 1;
 			}
 		}
 	}
 	return SCpnt->eh_state;
 }

 /*
  * Function:  scsi_try_host_reset
  *
  * Purpose:     Ask host adapter to reset itself, and the bus.
  *
  * Returns:     FAILED          Operation failed or not supported.
  *              SUCCESS         Succeeded.
  *
  * Notes:
  */
 STATIC int scsi_try_host_reset(Scsi_Cmnd * SCpnt)
 {
 	unsigned long flags;
 	int rtn;

 	SCpnt->eh_state = FAILED;	/* Until we come up with something better */
 	SCpnt->owner = SCSI_OWNER_LOWLEVEL;
 	SCpnt->serial_number_at_timeout = SCpnt->serial_number;

 	if (SCpnt->host->hostt->eh_host_reset_handler == NULL) {
 		return FAILED;
 	}
 	spin_lock_irqsave(SCpnt->host->host_lock, flags);
 	rtn = SCpnt->host->hostt->eh_host_reset_handler(SCpnt);
 	spin_unlock_irqrestore(SCpnt->host->host_lock, flags);

 	if (rtn == SUCCESS)
 		SCpnt->eh_state = SUCCESS;

 	/*
 	 * If we had a successful host reset, mark the command blocks to expect
 	 * a condition code of unit attention.
 	 */
 	scsi_sleep(HOST_RESET_SETTLE_TIME);
 	if (SCpnt->eh_state == SUCCESS) {
 		Scsi_Device *SDloop;
 		for (SDloop = SCpnt->host->host_queue; SDloop; SDloop = SDloop->next) {
 			SDloop->was_reset = 1;
 			SDloop->expecting_cc_ua = 1;
 		}
 	}
 	return SCpnt->eh_state;
 }

 /*
  * Function:  scsi_decide_disposition
  *
  * Purpose:     Examine a command block that has come back from the low-level
  *              and figure out what to do next.
  *
  * Returns:     SUCCESS         - pass on to upper level.
  *              FAILED          - pass on to error handler thread.
  *              RETRY           - command should be retried.
  *              SOFTERR         - command succeeded, but we need to log
  *                                a soft error.
  *
  * Notes:       This is *ONLY* called when we are examining the status
  *              after sending out the actual data command.  Any commands
  *              that are queued for error recovery (i.e. TEST_UNIT_READY)
  *              do *NOT* come through here.
  *
  *              NOTE - When this routine returns FAILED, it means the error
  *              handler thread is woken.  In cases where the error code
  *              indicates an error that doesn't require the error handler
  *              thread (i.e. we don't need to abort/reset), then this function
  *              should return SUCCESS.
  */
 int scsi_decide_disposition(Scsi_Cmnd * SCpnt)
 {
 	int rtn;

 	/*
 	 * If the device is offline, then we clearly just pass the result back
 	 * up to the top level.
 	 */
 	if (SCpnt->device->online == FALSE) {
 		SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: device offline - report as SUCCESS\n"));
 		return SUCCESS;
 	}
 	/*
 	 * First check the host byte, to see if there is anything in there
 	 * that would indicate what we need to do.
 	 */

 	switch (host_byte(SCpnt->result)) {
 	case DID_PASSTHROUGH:
 		/*
 		 * No matter what, pass this through to the upper layer.
 		 * Nuke this special code so that it looks like we are saying
 		 * DID_OK.
 		 */
 		SCpnt->result &= 0xff00ffff;
 		return SUCCESS;
 	case DID_OK:
 		/*
 		 * Looks good.  Drop through, and check the next byte.
 		 */
 		break;
 	case DID_NO_CONNECT:
 	case DID_BAD_TARGET:
 	case DID_ABORT:
 		/*
 		 * Note - this means that we just report the status back to the
 		 * top level driver, not that we actually think that it indicates
 		 * success.
 		 */
 		return SUCCESS;
 		/*
 		 * When the low level driver returns DID_SOFT_ERROR,
 		 * it is responsible for keeping an internal retry counter
 		 * in order to avoid endless loops (DB)
 		 *
 		 * Actually this is a bug in this function here.  We should
 		 * be mindful of the maximum number of retries specified
 		 * and not get stuck in a loop.
 		 */
 	case DID_SOFT_ERROR:
 		goto maybe_retry;

 	case DID_ERROR:
 		if (msg_byte(SCpnt->result) == COMMAND_COMPLETE &&
 		    status_byte(SCpnt->result) == RESERVATION_CONFLICT)
 			/*
 			 * execute reservation conflict processing code
 			 * lower down
 			 */
 			break;
 		/* FALLTHROUGH */

 	case DID_BUS_BUSY:
 	case DID_PARITY:
 		goto maybe_retry;
 	case DID_TIME_OUT:
 		/*
 		 * When we scan the bus, we get timeout messages for
 		 * these commands if there is no device available.
 		 * Other hosts report DID_NO_CONNECT for the same thing.
 		 */
 		if ((SCpnt->cmnd[0] == TEST_UNIT_READY ||
 		     SCpnt->cmnd[0] == INQUIRY)) {
 			return SUCCESS;
 		} else {
 			return FAILED;
 		}
 	case DID_RESET:
 		/*
 		 * In the normal case where we haven't initiated a reset, this is
 		 * a failure.
 		 */
 		if (SCpnt->flags & IS_RESETTING) {
 			SCpnt->flags &= ~IS_RESETTING;
 			goto maybe_retry;
 		}
 		return SUCCESS;
 	default:
 		return FAILED;
 	}

 	/*
 	 * Next, check the message byte.
 	 */
 	if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
 		return FAILED;
 	}
 	/*
 	 * Now, check the status byte to see if this indicates anything special.
 	 */
 	switch (status_byte(SCpnt->result)) {
 	case QUEUE_FULL:
 		/*
 		 * The case of trying to send too many commands to a tagged queueing
 		 * device.
 		 */
 		return ADD_TO_MLQUEUE;
 	case GOOD:
 	case COMMAND_TERMINATED:
 		return SUCCESS;
 	case CHECK_CONDITION:
 		rtn = scsi_check_sense(SCpnt);
 		if (rtn == NEEDS_RETRY) {
 			goto maybe_retry;
 		}
 		return rtn;
 	case CONDITION_GOOD:
 	case INTERMEDIATE_GOOD:
 	case INTERMEDIATE_C_GOOD:
 		/*
 		 * Who knows?  FIXME(eric)
 		 */
 		return SUCCESS;
 	case BUSY:
 		goto maybe_retry;

 	case RESERVATION_CONFLICT:
 		printk("scsi%d (%d,%d,%d) : RESERVATION CONFLICT\n",
 		       SCpnt->host->host_no, SCpnt->channel,
 		       SCpnt->device->id, SCpnt->device->lun);
 		return SUCCESS; /* causes immediate I/O error */
 	default:
 		return FAILED;
 	}
 	return FAILED;

       maybe_retry:

 	if ((++SCpnt->retries) < SCpnt->allowed) {
 		return NEEDS_RETRY;
 	} else {
                 /*
                  * No more retries - report this one back to upper level.
                  */
 		return SUCCESS;
 	}
 }

 /*
  * Function:  scsi_eh_completed_normally
  *
  * Purpose:     Examine a command block that has come back from the low-level
  *              and figure out what to do next.
  *
  * Returns:     SUCCESS         - pass on to upper level.
  *              FAILED          - pass on to error handler thread.
  *              RETRY           - command should be retried.
  *              SOFTERR         - command succeeded, but we need to log
  *                                a soft error.
  *
  * Notes:       This is *ONLY* called when we are examining the status
  *              of commands queued during error recovery.  The main
  *              difference here is that we don't allow for the possibility
  *              of retries here, and we are a lot more restrictive about what
  *              we consider acceptable.
  */
 STATIC int scsi_eh_completed_normally(Scsi_Cmnd * SCpnt)
 {
 	/*
 	 * First check the host byte, to see if there is anything in there
 	 * that would indicate what we need to do.
 	 */
 	if (host_byte(SCpnt->result) == DID_RESET) {
 		if (SCpnt->flags & IS_RESETTING) {
 			/*
 			 * OK, this is normal.  We don't know whether in fact the
 			 * command in question really needs to be rerun or not -
 			 * if this was the original data command then the answer is yes,
 			 * otherwise we just flag it as success.
 			 */
 			SCpnt->flags &= ~IS_RESETTING;
 			return NEEDS_RETRY;
 		}
 		/*
 		 * Rats.  We are already in the error handler, so we now get to try
 		 * and figure out what to do next.  If the sense is valid, we have
 		 * a pretty good idea of what to do.  If not, we mark it as failed.
 		 */
 		return scsi_check_sense(SCpnt);
 	}
 	if (host_byte(SCpnt->result) != DID_OK) {
 		return FAILED;
 	}
 	/*
 	 * Next, check the message byte.
 	 */
 	if (msg_byte(SCpnt->result) != COMMAND_COMPLETE) {
 		return FAILED;
 	}
 	/*
 	 * Now, check the status byte to see if this indicates anything special.
 	 */
 	switch (status_byte(SCpnt->result)) {
 	case GOOD:
 	case COMMAND_TERMINATED:
 		return SUCCESS;
 	case CHECK_CONDITION:
 		return scsi_check_sense(SCpnt);
 	case CONDITION_GOOD:
 	case INTERMEDIATE_GOOD:
 	case INTERMEDIATE_C_GOOD:
 		/*
 		 * Who knows?  FIXME(eric)
 		 */
 		return SUCCESS;
 	case BUSY:
 	case QUEUE_FULL:
 	case RESERVATION_CONFLICT:
 	default:
 		return FAILED;
 	}
 	return FAILED;
 }

 /*
  * Function:  scsi_check_sense
  *
  * Purpose:     Examine sense information - give suggestion as to what
  *              we should do with it.
  */
 STATIC int scsi_check_sense(Scsi_Cmnd * SCpnt)
 {
 	if (!scsi_sense_valid(SCpnt)) {
 		return FAILED;
 	}
 	if (SCpnt->sense_buffer[2] & 0xe0)
 		return SUCCESS;

 	switch (SCpnt->sense_buffer[2] & 0xf) {
 	case NO_SENSE:
 		return SUCCESS;
 	case RECOVERED_ERROR:
 		return /* SOFT_ERROR */ SUCCESS;

 	case ABORTED_COMMAND:
 		return NEEDS_RETRY;
 	case NOT_READY:
 	case UNIT_ATTENTION:
 		/*
 		 * If we are expecting a CC/UA because of a bus reset that we
 		 * performed, treat this just as a retry.  Otherwise this is
 		 * information that we should pass up to the upper-level driver
 		 * so that we can deal with it there.
 		 */
 		if (SCpnt->device->expecting_cc_ua) {
 			SCpnt->device->expecting_cc_ua = 0;
 			return NEEDS_RETRY;
 		}
 		/*
 		 * If the device is in the process of becoming ready, we
 		 * should retry.
 		 */
 		if ((SCpnt->sense_buffer[12] == 0x04) &&
 			(SCpnt->sense_buffer[13] == 0x01)) {
 			return NEEDS_RETRY;
 		}
 		return SUCCESS;

 		/* these three are not supported */
 	case COPY_ABORTED:
 	case VOLUME_OVERFLOW:
 	case MISCOMPARE:
 		return SUCCESS;

 	case MEDIUM_ERROR:
 		return NEEDS_RETRY;

 	case ILLEGAL_REQUEST:
 	case BLANK_CHECK:
 	case DATA_PROTECT:
 	case HARDWARE_ERROR:
 	default:
 		return SUCCESS;
 	}
 }


 /*
  * Function:  scsi_restart_operations
  *
  * Purpose:     Restart IO operations to the specified host.
  *
  * Arguments:   host  - host that we are restarting
  *
  * Lock status: Assumed that locks are not held upon entry.
  *
  * Returns:     Nothing
  *
  * Notes:       When we entered the error handler, we blocked all further
  *              I/O to this device.  We need to 'reverse' this process.
  */
 STATIC void scsi_restart_operations(struct Scsi_Host *host)
 {
 	Scsi_Device *SDpnt;
 	unsigned long flags;

 	ASSERT_LOCK(host->host_lock, 0);

 	/*
 	 * Next free up anything directly waiting upon the host.  This will be
 	 * requests for character device operations, and also for ioctls to queued
 	 * block devices.
 	 */
 	SCSI_LOG_ERROR_RECOVERY(5, printk("scsi_error.c: Waking up host to restart\n"));

 	wake_up(&host->host_wait);

 	/*
 	 * Finally we need to re-initiate requests that may be pending.  We will
 	 * have had everything blocked while error handling is taking place, and
 	 * now that error recovery is done, we will need to ensure that these
 	 * requests are started.
 	 */
 	spin_lock_irqsave(host->host_lock, flags);
 	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
 		request_queue_t *q = &SDpnt->request_queue;

 		if ((host->can_queue > 0 && (host->host_busy >= host->can_queue))
 		    || (host->host_blocked)
 		    || (host->host_self_blocked)
 		    || (SDpnt->device_blocked)) {
 			break;
 		}

 		q->request_fn(q);
 	}
 	spin_unlock_irqrestore(host->host_lock, flags);
 }

 /*
  * Function:  scsi_unjam_host
  *
  * Purpose:     Attempt to fix a host which has a command that failed for
  *              some reason.
  *
  * Arguments:   host    - host that needs unjamming.
  *
  * Returns:     Nothing
  *
  * Notes:       When we come in here, we *know* that all commands on the
  *              bus have either completed, failed or timed out.  We also
  *              know that no further commands are being sent to the host,
  *              so things are relatively quiet and we have freedom to
  *              fiddle with things as we wish.
  *
  * Additional note:  This is only the *default* implementation.  It is possible
  *              for individual drivers to supply their own version of this
  *              function, and if the maintainer wishes to do this, it is
  *              strongly suggested that this function be taken as a template
  *              and modified.  This function was designed to correctly handle
  *              problems for about 95% of the different cases out there, and
  *              it should always provide at least a reasonable amount of error
  *              recovery.
  *
  * Note3:       Any command marked 'FAILED' or 'TIMEOUT' must eventually
  *              have scsi_finish_command() called for it.  We do all of
  *              the retry stuff here, so when we restart the host after we
  *              return it should have an empty queue.
  */
 STATIC int scsi_unjam_host(struct Scsi_Host *host)
 {
 	int devices_failed;
 	int numfailed;
 	int ourrtn;
 	int rtn = FALSE;
 	int result;
 	Scsi_Cmnd *SCloop;
 	Scsi_Cmnd *SCpnt;
 	Scsi_Device *SDpnt;
 	Scsi_Device *SDloop;
 	Scsi_Cmnd *SCdone;
 	int timed_out;

 	ASSERT_LOCK(host->host_lock, 0);

 	SCdone = NULL;

 	/*
 	 * First, protect against any sort of race condition.  If any of the outstanding
 	 * commands are in states that indicate that we are not yet blocked (i.e. we are
 	 * not in a quiet state) then we got woken up in error.  If we ever end up here,
 	 * we need to re-examine some of the assumptions.
 	 */
 	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
 		for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
 			if (SCpnt->state == SCSI_STATE_FAILED
 			    || SCpnt->state == SCSI_STATE_TIMEOUT
 			    || SCpnt->state == SCSI_STATE_INITIALIZING
 			    || SCpnt->state == SCSI_STATE_UNUSED) {
 				continue;
 			}
 			/*
 			 * Rats.  Something is still floating around out there.  This could
 			 * be the result of the fact that the upper level drivers are still frobbing
 			 * commands that might have succeeded.  There are two outcomes.  One is that
 			 * the command block will eventually be freed, and the other one is that
 			 * the command will be queued and will be finished along the way.
 			 */
 			SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler prematurely woken - commands still active (%p %x %d)\n", SCpnt, SCpnt->state, SCpnt->target));

 /*
  *        panic("SCSI Error handler woken too early\n");
  *
  * This is no longer a problem, since now the code cares only about
  * SCSI_STATE_TIMEOUT and SCSI_STATE_FAILED.
  * Other states are useful only to release active commands when devices are
  * set offline. If (host->host_active == host->host_busy) we can safely assume
  * that there are no commands in state other then TIMEOUT od FAILED. (DB)
  *
  * FIXME:
  * It is not easy to release correctly commands according to their state when
  * devices are set offline, when the state is neither TIMEOUT nor FAILED.
  * When a device is set offline, we can have some command with
  * rq_status=RQ_SCSY_BUSY, owner=SCSI_STATE_HIGHLEVEL,
  * state=SCSI_STATE_INITIALIZING and the driver module cannot be released.
  * (DB, 17 May 1998)
  */
 		}
 	}

 	/*
 	 * Next, see if we need to request sense information.  if so,
 	 * then get it now, so we have a better idea of what to do.
 	 * FIXME(eric) this has the unfortunate side effect that if a host
 	 * adapter does not automatically request sense information, that we end
 	 * up shutting it down before we request it.  All hosts should be doing this
 	 * anyways, so for now all I have to say is tough noogies if you end up in here.
 	 * On second thought, this is probably a good idea.  We *really* want to give
 	 * authors an incentive to automatically request this.
 	 */
 	SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we need to request sense\n"));

 	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
 		for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
 			if (SCpnt->state != SCSI_STATE_FAILED || scsi_sense_valid(SCpnt)) {
 				continue;
 			}
 			SCSI_LOG_ERROR_RECOVERY(2, printk("scsi_unjam_host: Requesting sense for %d\n",
 							  SCpnt->target));
 			rtn = scsi_request_sense(SCpnt);
 			if (rtn != SUCCESS) {
 				continue;
 			}
 			SCSI_LOG_ERROR_RECOVERY(3, printk("Sense requested for %p - result %x\n",
 						  SCpnt, SCpnt->result));
 			SCSI_LOG_ERROR_RECOVERY(3, print_sense("bh", SCpnt));

 			result = scsi_decide_disposition(SCpnt);

 			/*
 			 * If the result was normal, then just pass it along to the
 			 * upper level.
 			 */
 			if (result == SUCCESS) {
 				SCpnt->host->host_failed--;
 				scsi_eh_finish_command(&SCdone, SCpnt);
 			}
 			if (result != NEEDS_RETRY) {
 				continue;
 			}
 			/*
 			 * We only come in here if we want to retry a
 			 * command.  The test to see whether the command
 			 * should be retried should be keeping track of the
 			 * number of tries, so we don't end up looping, of
 			 * course.
 			 */
 			SCpnt->state = NEEDS_RETRY;
 			rtn = scsi_eh_retry_command(SCpnt);
 			if (rtn != SUCCESS) {
 				continue;
 			}
 			/*
 			 * We eventually hand this one back to the top level.
 			 */
 			SCpnt->host->host_failed--;
 			scsi_eh_finish_command(&SCdone, SCpnt);
 		}
 	}

 	/*
 	 * Go through the list of commands and figure out where we stand and how bad things
 	 * really are.
 	 */
 	numfailed = 0;
 	timed_out = 0;
 	devices_failed = 0;
 	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
 		unsigned int device_error = 0;

 		for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
 			if (SCpnt->state == SCSI_STATE_FAILED) {
 				SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d failed\n",
 							 SCpnt->target));
 				numfailed++;
 				device_error++;
 			}
 			if (SCpnt->state == SCSI_STATE_TIMEOUT) {
 				SCSI_LOG_ERROR_RECOVERY(5, printk("Command to ID %d timedout\n",
 							 SCpnt->target));
 				timed_out++;
 				device_error++;
 			}
 		}
 		if (device_error > 0) {
 			devices_failed++;
 		}
 	}

 	SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d+%d commands on %d devices require eh work\n",
 				  numfailed, timed_out, devices_failed));

 	if (host->host_failed == 0) {
 		ourrtn = TRUE;
 		goto leave;
 	}
 	/*
 	 * Next, try and see whether or not it makes sense to try and abort
 	 * the running command.  This only works out to be the case if we have
 	 * one command that has timed out.  If the command simply failed, it
 	 * makes no sense to try and abort the command, since as far as the
 	 * host adapter is concerned, it isn't running.
 	 */

 	SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try abort\n"));

 	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
 		for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
 			if (SCloop->state != SCSI_STATE_TIMEOUT) {
 				continue;
 			}
 			rtn = scsi_try_to_abort_command(SCloop, ABORT_TIMEOUT);
 			if (rtn == SUCCESS) {
 				rtn = scsi_test_unit_ready(SCloop);

 				if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
 					rtn = scsi_eh_retry_command(SCloop);

 					if (rtn == SUCCESS) {
 						SCloop->host->host_failed--;
 						scsi_eh_finish_command(&SCdone, SCloop);
 					}
 				}
 			}
 		}
 	}

 	/*
 	 * If we have corrected all of the problems, then we are done.
 	 */
 	if (host->host_failed == 0) {
 		ourrtn = TRUE;
 		goto leave;
 	}
 	/*
 	 * Either the abort wasn't appropriate, or it didn't succeed.
 	 * Now try a bus device reset.  Still, look to see whether we have
 	 * multiple devices that are jammed or not - if we have multiple devices,
 	 * it makes no sense to try BUS_DEVICE_RESET - we really would need
 	 * to try a BUS_RESET instead.
 	 *
 	 * Does this make sense - should we try BDR on each device individually?
 	 * Yes, definitely.
 	 */
 	SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Checking to see if we want to try BDR\n"));

 	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
 		for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
 			if (SCloop->state == SCSI_STATE_FAILED
 			    || SCloop->state == SCSI_STATE_TIMEOUT) {
 				break;
 			}
 		}

 		if (SCloop == NULL) {
 			continue;
 		}
 		/*
 		 * OK, we have a device that is having problems.  Try and send
 		 * a bus device reset to it.
 		 *
 		 * FIXME(eric) - make sure we handle the case where multiple
 		 * commands to the same device have failed. They all must
 		 * get properly restarted.
 		 */
 		rtn = scsi_try_bus_device_reset(SCloop, RESET_TIMEOUT);

 		if (rtn == SUCCESS) {
 			rtn = scsi_test_unit_ready(SCloop);

 			if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
 				rtn = scsi_eh_retry_command(SCloop);

 				if (rtn == SUCCESS) {
 					SCloop->host->host_failed--;
 					scsi_eh_finish_command(&SCdone, SCloop);
 				}
 			}
 		}
 	}

 	if (host->host_failed == 0) {
 		ourrtn = TRUE;
 		goto leave;
 	}
 	/*
 	 * If we ended up here, we have serious problems.  The only thing left
 	 * to try is a full bus reset.  If someone has grabbed the bus and isn't
 	 * letting go, then perhaps this will help.
 	 */
 	SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard bus reset\n"));

 	/*
 	 * We really want to loop over the various channels, and do this on
 	 * a channel by channel basis.  We should also check to see if any
 	 * of the failed commands are on soft_reset devices, and if so, skip
 	 * the reset.
 	 */
 	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
 	      next_device:
 		for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
 			if (SCpnt->state != SCSI_STATE_FAILED
 			    && SCpnt->state != SCSI_STATE_TIMEOUT) {
 				continue;
 			}
 			/*
 			 * We have a failed command.  Make sure there are no other failed
 			 * commands on the same channel that are timed out and implement a
 			 * soft reset.
 			 */
 			for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
 				for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
 					if (SCloop->channel != SCpnt->channel) {
 						continue;
 					}
 					if (SCloop->state != SCSI_STATE_FAILED
 					    && SCloop->state != SCSI_STATE_TIMEOUT) {
 						continue;
 					}
 					if (SDloop->soft_reset && SCloop->state == SCSI_STATE_TIMEOUT) {
 						/*
 						 * If this device uses the soft reset option, and this
 						 * is one of the devices acting up, then our only
 						 * option is to wait a bit, since the command is
 						 * supposedly still running.
 						 *
 						 * FIXME(eric) - right now we will just end up falling
 						 * through to the 'take device offline' case.
 						 *
 						 * FIXME(eric) - It is possible that the command completed
 						 * *after* the error recovery procedure started, and if this
 						 * is the case, we are worrying about nothing here.
 						 */

 						scsi_sleep(1 * HZ);
 						goto next_device;
 					}
 				}
 			}

 			/*
 			 * We now know that we are able to perform a reset for the
 			 * bus that SCpnt points to.  There are no soft-reset devices
 			 * with outstanding timed out commands.
 			 */
 			rtn = scsi_try_bus_reset(SCpnt);
 			if (rtn == SUCCESS) {
 				for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
 					for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
 						if (SCloop->channel != SCpnt->channel) {
 							continue;
 						}
 						if (SCloop->state != SCSI_STATE_FAILED
 						    && SCloop->state != SCSI_STATE_TIMEOUT) {
 							continue;
 						}
 						rtn = scsi_test_unit_ready(SCloop);

 						if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
 							rtn = scsi_eh_retry_command(SCloop);

 							if (rtn == SUCCESS) {
 								SCpnt->host->host_failed--;
 								scsi_eh_finish_command(&SCdone, SCloop);
 							}
 						}
 						/*
 						 * If the bus reset worked, but we are still unable to
 						 * talk to the device, take it offline.
 						 * FIXME(eric) - is this really the correct thing to do?
 						 */
 						if (rtn != SUCCESS) {
 							printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after bus reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);

 							SDloop->online = FALSE;
 							SDloop->host->host_failed--;
 							scsi_eh_finish_command(&SCdone, SCloop);
 						}
 					}
 				}
 			}
 		}
 	}

 	if (host->host_failed == 0) {
 		ourrtn = TRUE;
 		goto leave;
 	}
 	/*
 	 * If we ended up here, we have serious problems.  The only thing left
 	 * to try is a full host reset - perhaps the firmware on the device
 	 * crashed, or something like that.
 	 *
 	 * It is assumed that a succesful host reset will cause *all* information
 	 * about the command to be flushed from both the host adapter *and* the
 	 * device.
 	 *
 	 * FIXME(eric) - it isn't clear that devices that implement the soft reset
 	 * option can ever be cleared except via cycling the power.  The problem is
 	 * that sending the host reset command will cause the host to forget
 	 * about the pending command, but the device won't forget.  For now, we
 	 * skip the host reset option if any of the failed devices are configured
 	 * to use the soft reset option.
 	 */
 	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
 	      next_device2:
 		for (SCpnt = SDpnt->device_queue; SCpnt; SCpnt = SCpnt->next) {
 			if (SCpnt->state != SCSI_STATE_FAILED
 			    && SCpnt->state != SCSI_STATE_TIMEOUT) {
 				continue;
 			}
 			if (SDpnt->soft_reset && SCpnt->state == SCSI_STATE_TIMEOUT) {
 				/*
 				 * If this device uses the soft reset option, and this
 				 * is one of the devices acting up, then our only
 				 * option is to wait a bit, since the command is
 				 * supposedly still running.
 				 *
 				 * FIXME(eric) - right now we will just end up falling
 				 * through to the 'take device offline' case.
 				 */
 				SCSI_LOG_ERROR_RECOVERY(3,
 							printk("scsi_unjam_host: Unable to try hard host reset\n"));

 				/*
 				 * Due to the spinlock, we will never get out of this
 				 * loop without a proper wait. (DB)
 				 */
 				scsi_sleep(1 * HZ);

 				goto next_device2;
 			}
 			SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Try hard host reset\n"));

 			/*
 			 * FIXME(eric) - we need to obtain a valid SCpnt to perform this call.
 			 */
 			rtn = scsi_try_host_reset(SCpnt);
 			if (rtn == SUCCESS) {
 				/*
 				 * FIXME(eric) we assume that all commands are flushed from the
 				 * controller.  We should get a DID_RESET for all of the commands
 				 * that were pending.  We should ignore these so that we can
 				 * guarantee that we are in a consistent state.
 				 *
 				 * I believe this to be the case right now, but this needs to be
 				 * tested.
 				 */
 				for (SDloop = host->host_queue; SDloop; SDloop = SDloop->next) {
 					for (SCloop = SDloop->device_queue; SCloop; SCloop = SCloop->next) {
 						if (SCloop->state != SCSI_STATE_FAILED
 						    && SCloop->state != SCSI_STATE_TIMEOUT) {
 							continue;
 						}
 						rtn = scsi_test_unit_ready(SCloop);

 						if (rtn == SUCCESS && scsi_unit_is_ready(SCloop)) {
 							rtn = scsi_eh_retry_command(SCloop);

 							if (rtn == SUCCESS) {
 								SCpnt->host->host_failed--;
 								scsi_eh_finish_command(&SCdone, SCloop);
 							}
 						}
 						if (rtn != SUCCESS) {
 							printk(KERN_INFO "scsi: device set offline - not ready or command retry failed after host reset: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
 							SDloop->online = FALSE;
 							SDloop->host->host_failed--;
 							scsi_eh_finish_command(&SCdone, SCloop);
 						}
 					}
 				}
 			}
 		}
 	}

 	/*
 	 * If we solved all of the problems, then let's rev up the engines again.
 	 */
 	if (host->host_failed == 0) {
 		ourrtn = TRUE;
 		goto leave;
 	}
 	/*
 	 * If the HOST RESET failed, then for now we assume that the entire host
 	 * adapter is too hosed to be of any use.  For our purposes, however, it is
 	 * easier to simply take the devices offline that correspond to commands
 	 * that failed.
 	 */
 	SCSI_LOG_ERROR_RECOVERY(1, printk("scsi_unjam_host: Take device offline\n"));

 	for (SDpnt = host->host_queue; SDpnt; SDpnt = SDpnt->next) {
 		for (SCloop = SDpnt->device_queue; SCloop; SCloop = SCloop->next) {
 			if (SCloop->state == SCSI_STATE_FAILED || SCloop->state == SCSI_STATE_TIMEOUT) {
 				SDloop = SCloop->device;
 				if (SDloop->online == TRUE) {
 					printk(KERN_INFO "scsi: device set offline - command error recover failed: host %d channel %d id %d lun %d\n", SDloop->host->host_no, SDloop->channel, SDloop->id, SDloop->lun);
 					SDloop->online = FALSE;
 				}

 				/*
 				 * This should pass the failure up to the top level driver, and
 				 * it will have to try and do something intelligent with it.
 				 */
 				SCloop->host->host_failed--;

 				if (SCloop->state == SCSI_STATE_TIMEOUT) {
 					SCloop->result |= (DRIVER_TIMEOUT << 24);
 				}
 				SCSI_LOG_ERROR_RECOVERY(3, printk("Finishing command for device %d %x\n",
 				    SDloop->id, SCloop->result));

 				scsi_eh_finish_command(&SCdone, SCloop);
 			}
 		}
 	}

 	if (host->host_failed != 0) {
 		panic("scsi_unjam_host: Miscount of number of failed commands.\n");
 	}
 	SCSI_LOG_ERROR_RECOVERY(3, printk("scsi_unjam_host: Returning\n"));

 	ourrtn = FALSE;

       leave:

 	/*
 	 * We should have a list of commands that we 'finished' during the course of
 	 * error recovery.  This should be the same as the list of commands that timed out
 	 * or failed.  We are currently holding these things in a linked list - we didn't
 	 * put them in the bottom half queue because we wanted to keep things quiet while
 	 * we were working on recovery, and passing them up to the top level could easily
 	 * cause the top level to try and queue something else again.
 	 *
 	 * Start by marking that the host is no longer in error recovery.
 	 */
 	host->in_recovery = 0;

 	/*
 	 * Take the list of commands, and stick them in the bottom half queue.
 	 * The current implementation of scsi_done will do this for us - if need
 	 * be we can create a special version of this function to do the
 	 * same job for us.
 	 */
 	for (SCpnt = SCdone; SCpnt != NULL; SCpnt = SCdone) {
 		SCdone = SCpnt->bh_next;
 		SCpnt->bh_next = NULL;
                 /*
                  * Oh, this is a vile hack.  scsi_done() expects a timer
                  * to be running on the command.  If there isn't, it assumes
                  * that the command has actually timed out, and a timer
                  * handler is running.  That may well be how we got into
                  * this fix, but right now things are stable.  We add
                  * a timer back again so that we can report completion.
                  * scsi_done() will immediately remove said timer from
                  * the command, and then process it.
                  */
 		scsi_add_timer(SCpnt, 100, scsi_eh_times_out);
 		scsi_done(SCpnt);
 	}

 	return (ourrtn);
 }


 /*
  * Function:  scsi_error_handler
  *
  * Purpose:     Handle errors/timeouts of scsi commands, try and clean up
  *              and unjam the bus, and restart things.
  *
  * Arguments:   host    - host for which we are running.
  *
  * Returns:     Never returns.
  *
  * Notes:       This is always run in the context of a kernel thread.  The
  *              idea is that we start this thing up when the kernel starts
  *              up (one per host that we detect), and it immediately goes to
  *              sleep and waits for some event (i.e. failure).  When this
  *              takes place, we have the job of trying to unjam the bus
  *              and restarting things.
  *
  */
 void scsi_error_handler(void *data)
 {
 	struct Scsi_Host *host = (struct Scsi_Host *) data;
 	int rtn;
 	DECLARE_MUTEX_LOCKED(sem);

         /*
          * We only listen to signals if the HA was loaded as a module.
          * If the HA was compiled into the kernel, then we don't listen
          * to any signals.
          */
 	siginitsetinv(&current->blocked, SHUTDOWN_SIGS);

 	lock_kernel();

 	/*
 	 *    Flush resources
 	 */

 	daemonize();

 	/*
 	 * Set the name of this process.
 	 */

 	sprintf(current->comm, "scsi_eh_%d", host->host_no);

 	host->eh_wait = &sem;
 	host->ehandler = current;

 	unlock_kernel();

 	/*
 	 * Wake up the thread that created us.
 	 */
 	SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent %d\n", host->eh_notify->count.counter));

 	up(host->eh_notify);

 	while (1) {
 		/*
 		 * If we get a signal, it means we are supposed to go
 		 * away and die.  This typically happens if the user is
 		 * trying to unload a module.
 		 */
 		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler sleeping\n"));

 		/*
 		 * Note - we always use down_interruptible with the semaphore
 		 * even if the module was loaded as part of the kernel.  The
 		 * reason is that down() will cause this thread to be counted
 		 * in the load average as a running process, and down
 		 * interruptible doesn't.  Given that we need to allow this
 		 * thread to die if the driver was loaded as a module, using
 		 * semaphores isn't unreasonable.
 		 */
 		down_interruptible(&sem);
 		if (signal_pending(current))
 			break;

 		SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler waking up\n"));

 		host->eh_active = 1;

 		/*
 		 * We have a host that is failing for some reason.  Figure out
 		 * what we need to do to get it up and online again (if we can).
 		 * If we fail, we end up taking the thing offline.
 		 */
 		if (host->hostt->eh_strategy_handler != NULL) {
 			rtn = host->hostt->eh_strategy_handler(host);
 		} else {
 			rtn = scsi_unjam_host(host);
 		}

 		host->eh_active = 0;

 		/*
 		 * Note - if the above fails completely, the action is to take
 		 * individual devices offline and flush the queue of any
 		 * outstanding requests that may have been pending.  When we
 		 * restart, we restart any I/O to any other devices on the bus
 		 * which are still online.
 		 */
 		scsi_restart_operations(host);

 	}

 	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler exiting\n"));

 	/*
 	 * Make sure that nobody tries to wake us up again.
 	 */
 	host->eh_wait = NULL;

 	/*
 	 * Knock this down too.  From this point on, the host is flying
 	 * without a pilot.  If this is because the module is being unloaded,
 	 * that's fine.  If the user sent a signal to this thing, we are
 	 * potentially in real danger.
 	 */
 	host->in_recovery = 0;
 	host->eh_active = 0;
 	host->ehandler = NULL;

 	/*
 	 * If anyone is waiting for us to exit (i.e. someone trying to unload
 	 * a driver), then wake up that process to let them know we are on
 	 * the way out the door.  This may be overkill - I *think* that we
 	 * could probably just unload the driver and send the signal, and when
 	 * the error handling thread wakes up that it would just exit without
 	 * needing to touch any memory associated with the driver itself.
 	 */
 	if (host->eh_notify != NULL)
 		up(host->eh_notify);
 }

 /*
  * Function:	scsi_new_reset
  *
  * Purpose:	Send requested reset to a bus or device at any phase.
  *
  * Arguments:	SCpnt	- command ptr to send reset with (usually a dummy)
  *		flag - reset type (see scsi.h)
  *
  * Returns:	SUCCESS/FAILURE.
  *
  * Notes:	This is used by the SCSI Generic driver to provide
  *		Bus/Device reset capability.
  */
 int
 scsi_new_reset(Scsi_Cmnd *SCpnt, int flag)
 {
 	int rtn;

 	switch(flag) {
 	case SCSI_TRY_RESET_DEVICE:
 		rtn = scsi_try_bus_device_reset(SCpnt, 0);
 		if (rtn == SUCCESS)
 			break;
 		/* FALLTHROUGH */
 	case SCSI_TRY_RESET_BUS:
 		rtn = scsi_try_bus_reset(SCpnt);
 		if (rtn == SUCCESS)
 			break;
 		/* FALLTHROUGH */
 	case SCSI_TRY_RESET_HOST:
 		rtn = scsi_try_host_reset(SCpnt);
 		break;
 	default:
 		rtn = FAILED;
 	}

 	return rtn;
 }


 /*
  * Overrides for Emacs so that we follow Linus's tabbing style.
  * Emacs will notice this stuff at the end of the file and automatically
  * adjust the settings for this buffer only.  This must remain at the end
  * of the file.
  * ---------------------------------------------------------------------------
  * Local variables:
  * c-indent-level: 4
  * c-brace-imaginary-offset: 0
  * c-brace-offset: -4
  * c-argdecl-indent: 4
  * c-label-offset: -4
  * c-continued-statement-offset: 4
  * c-continued-brace-offset: 0
  * indent-tabs-mode: nil
  * tab-width: 8
  * End:
  */