Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending

Pull SCSI target updates from Nicholas Bellinger:
 "This contains the usual set of updates and bugfixes to target-core +
  existing fabric module code, along with a handful of the patches
  destined for v3.3 stable.

  It also contains the necessary target-core infrastructure pieces
  required to run using tcm_qla2xxx.ko WWPNs with the new Qlogic Fibre
  Channel fabric module currently queued in target-pending/for-next-merge,
  and coming for round 2.

  The highlights for this series include:

   - Add target_submit_tmr() helper function for fabric task management
     (andy)
   - Convert tcm_fc to use target_submit_tmr() (andy)
   - Replace target core various cmd flags with a transport state (hch)
   - Convert loopback to use workqueue submission (hch)
   - Convert target core to use array_zalloc for tpg_lun_list (joern)
   - Convert target core to use array_zalloc for device_list (joern)
   - Add target core support for TMR_ABORT_TASK (nab)
   - Add target core se_sess->sess_kref + get/put helpers (nab)
   - Add target core se_node_acl->acl_kref for ->acl_free_comp usage
     (nab)
   - Convert iscsi-target to use target_put_session + sess_kref (nab)
   - Fix tcm_fc fc_exch memory leak in ft_send_resp_status (nab)
   - Fix ib_srpt srpt_handle_cmd send_ioctx->ioctx_kref leak on
     exception (nab)
   - Fix target core up handling of short INQUIRY buffers (roland)
   - Untangle target-core front-end and back-end meanings of max_sectors
     attribute (roland)
   - Set loopback residual field for SCSI commands (roland)
   - Fix target-core 16-bit target ports for SET TARGET PORT GROUPS
     emulation (roland)

  Thanks again to Andy, Christoph, Joern, Roland, and everyone who has
  contributed this round!"

* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/nab/target-pending: (64 commits)
  ib_srpt: Fix srpt_handle_cmd send_ioctx->ioctx_kref leak on exception
  loopback: Fix transport_generic_allocate_tasks error handling
  iscsi-target: remove improper externs
  iscsi-target: Remove unused variables in iscsi_target_parameters.c
  target: remove obvious warnings
  target: Use array_zalloc for device_list
  target: Use array_zalloc for tpg_lun_list
  target: Fix sense code for unsupported SERVICE ACTION IN
  target: Remove hack to make READ CAPACITY(10) lie if thin provisioning is enabled
  target: Bump core version to v4.1.0-rc2-ml + fabric versions
  tcm_fc: Fix fc_exch memory leak in ft_send_resp_status
  target: Drop unused legacy target_core_fabric_ops API callers
  iscsi-target: Convert to use target_put_session + sess_kref
  target: Convert se_node_acl->acl_group removal to use ->acl_kref
  target: Add se_node_acl->acl_kref for ->acl_free_comp usage
  target: Add se_node_acl->acl_free_comp for NodeACL release path
  target: Add se_sess->sess_kref + get/put helpers
  target: Convert session_lock to irqsave
  target: Fix typo in drivers/target
  iscsi-target: Fix dynamic -> explict NodeACL pointer reference
  ...
diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index ebe33d9..69e2ad0 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c
@@ -1378,7 +1378,9 @@
 		break;
 	case SRPT_STATE_NEED_DATA:
 		/* DMA_TO_DEVICE (write) - RDMA read error. */
-		atomic_set(&ioctx->cmd.transport_lun_stop, 1);
+		spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
+		ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
+		spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
 		transport_generic_handle_data(&ioctx->cmd);
 		break;
 	case SRPT_STATE_CMD_RSP_SENT:
@@ -1387,7 +1389,9 @@
 		 * not been received in time.
 		 */
 		srpt_unmap_sg_to_ib_sge(ioctx->ch, ioctx);
-		atomic_set(&ioctx->cmd.transport_lun_stop, 1);
+		spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
+		ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
+		spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
 		kref_put(&ioctx->kref, srpt_put_send_ioctx_kref);
 		break;
 	case SRPT_STATE_MGMT_RSP_SENT:
@@ -1494,6 +1498,7 @@
 {
 	struct se_cmd *cmd;
 	enum srpt_command_state state;
+	unsigned long flags;
 
 	cmd = &ioctx->cmd;
 	state = srpt_get_cmd_state(ioctx);
@@ -1513,7 +1518,9 @@
 			       __func__, __LINE__, state);
 		break;
 	case SRPT_RDMA_WRITE_LAST:
-		atomic_set(&ioctx->cmd.transport_lun_stop, 1);
+		spin_lock_irqsave(&ioctx->cmd.t_state_lock, flags);
+		ioctx->cmd.transport_state |= CMD_T_LUN_STOP;
+		spin_unlock_irqrestore(&ioctx->cmd.t_state_lock, flags);
 		break;
 	default:
 		printk(KERN_ERR "%s[%d]: opcode = %u\n", __func__,
@@ -1750,6 +1757,7 @@
 		       srp_cmd->tag);
 		cmd->se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
 		cmd->scsi_sense_reason = TCM_INVALID_CDB_FIELD;
+		kref_put(&send_ioctx->kref, srpt_put_send_ioctx_kref);
 		goto send_sense;
 	}
 
@@ -1757,15 +1765,19 @@
 	cmd->data_direction = dir;
 	unpacked_lun = srpt_unpack_lun((uint8_t *)&srp_cmd->lun,
 				       sizeof(srp_cmd->lun));
-	if (transport_lookup_cmd_lun(cmd, unpacked_lun) < 0)
+	if (transport_lookup_cmd_lun(cmd, unpacked_lun) < 0) {
+		kref_put(&send_ioctx->kref, srpt_put_send_ioctx_kref);
 		goto send_sense;
+	}
 	ret = transport_generic_allocate_tasks(cmd, srp_cmd->cdb);
-	if (cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT)
-		srpt_queue_status(cmd);
-	else if (cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION)
-		goto send_sense;
-	else
-		WARN_ON_ONCE(ret);
+	if (ret < 0) {
+		kref_put(&send_ioctx->kref, srpt_put_send_ioctx_kref);
+		if (cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT) {
+			srpt_queue_status(cmd);
+			return 0;
+		} else
+			goto send_sense;
+	}
 
 	transport_handle_cdb_direct(cmd);
 	return 0;
@@ -1871,8 +1883,8 @@
 			TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED;
 		goto process_tmr;
 	}
-	cmd->se_tmr_req = core_tmr_alloc_req(cmd, NULL, tcm_tmr, GFP_KERNEL);
-	if (!cmd->se_tmr_req) {
+	res = core_tmr_alloc_req(cmd, NULL, tcm_tmr, GFP_KERNEL);
+	if (res < 0) {
 		send_ioctx->cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
 		send_ioctx->cmd.se_tmr_req->response = TMR_FUNCTION_REJECTED;
 		goto process_tmr;
@@ -3514,25 +3526,6 @@
 }
 
 /**
- * To do: Find out whether stop_session() has a meaning for transports
- * other than iSCSI.
- */
-static void srpt_stop_session(struct se_session *se_sess, int sess_sleep,
-			      int conn_sleep)
-{
-}
-
-static void srpt_reset_nexus(struct se_session *sess)
-{
-	printk(KERN_ERR "This is the SRP protocol, not iSCSI\n");
-}
-
-static int srpt_sess_logged_in(struct se_session *se_sess)
-{
-	return true;
-}
-
-/**
  * srpt_sess_get_index() - Return the value of scsiAttIntrPortIndex (SCSI-MIB).
  *
  * A quote from RFC 4455 (SCSI-MIB) about this MIB object:
@@ -3576,11 +3569,6 @@
 	return 0;
 }
 
-static int srpt_is_state_remove(struct se_cmd *se_cmd)
-{
-	return 0;
-}
-
 /**
  * srpt_parse_i_port_id() - Parse an initiator port ID.
  * @name: ASCII representation of a 128-bit initiator port ID.
@@ -3950,9 +3938,6 @@
 	.check_stop_free		= srpt_check_stop_free,
 	.shutdown_session		= srpt_shutdown_session,
 	.close_session			= srpt_close_session,
-	.stop_session			= srpt_stop_session,
-	.fall_back_to_erl0		= srpt_reset_nexus,
-	.sess_logged_in			= srpt_sess_logged_in,
 	.sess_get_index			= srpt_sess_get_index,
 	.sess_get_initiator_sid		= NULL,
 	.write_pending			= srpt_write_pending,
@@ -3965,7 +3950,6 @@
 	.queue_tm_rsp			= srpt_queue_response,
 	.get_fabric_sense_len		= srpt_get_fabric_sense_len,
 	.set_fabric_sense_len		= srpt_set_fabric_sense_len,
-	.is_state_remove		= srpt_is_state_remove,
 	/*
 	 * Setup function pointers for generic logic in
 	 * target_core_fabric_configfs.c
diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c
index 84a78af..e897ce9 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_io.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c
@@ -1682,9 +1682,7 @@
 
 	memset(fcp_cmnd, 0, sizeof(struct fcp_cmnd));
 
-	int_to_scsilun(sc_cmd->device->lun,
-			(struct scsi_lun *) fcp_cmnd->fc_lun);
-
+	int_to_scsilun(sc_cmd->device->lun, &fcp_cmnd->fc_lun);
 
 	fcp_cmnd->fc_dl = htonl(io_req->data_xfer_len);
 	memcpy(fcp_cmnd->fc_cdb, sc_cmd->cmnd, sc_cmd->cmd_len);
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index b577c90..f735730 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -1074,8 +1074,7 @@
 	fsp->cdb_cmd.fc_dl = htonl(fsp->data_len);
 	fsp->cdb_cmd.fc_flags = fsp->req_flags & ~FCP_CFL_LEN_MASK;
 
-	int_to_scsilun(fsp->cmd->device->lun,
-		       (struct scsi_lun *)fsp->cdb_cmd.fc_lun);
+	int_to_scsilun(fsp->cmd->device->lun, &fsp->cdb_cmd.fc_lun);
 	memcpy(fsp->cdb_cmd.fc_cdb, fsp->cmd->cmnd, fsp->cmd->cmd_len);
 
 	spin_lock_irqsave(&si->scsi_queue_lock, flags);
@@ -1257,7 +1256,7 @@
 
 	fsp->cdb_cmd.fc_dl = htonl(fsp->data_len);
 	fsp->cdb_cmd.fc_tm_flags = FCP_TMF_LUN_RESET;
-	int_to_scsilun(lun, (struct scsi_lun *)fsp->cdb_cmd.fc_lun);
+	int_to_scsilun(lun, &fsp->cdb_cmd.fc_lun);
 
 	fsp->wait_for_comp = 1;
 	init_completion(&fsp->tm_done);
diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 1c6f700..8b1d5e6 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c
@@ -781,7 +781,7 @@
 	struct scatterlist *sgl;
 	u32 length = cmd->se_cmd.data_length;
 	int nents = DIV_ROUND_UP(length, PAGE_SIZE);
-	int i = 0, ret;
+	int i = 0, j = 0, ret;
 	/*
 	 * If no SCSI payload is present, allocate the default iovecs used for
 	 * iSCSI PDU Header
@@ -822,17 +822,15 @@
 	 */
         ret = iscsit_allocate_iovecs(cmd);
         if (ret < 0)
-		goto page_alloc_failed;
+		return -ENOMEM;
 
 	return 0;
 
 page_alloc_failed:
-	while (i >= 0) {
-		__free_page(sg_page(&sgl[i]));
-		i--;
-	}
-	kfree(cmd->t_mem_sg);
-	cmd->t_mem_sg = NULL;
+	while (j < i)
+		__free_page(sg_page(&sgl[j++]));
+
+	kfree(sgl);
 	return -ENOMEM;
 }
 
@@ -1007,8 +1005,8 @@
 	/*
 	 * The CDB is going to an se_device_t.
 	 */
-	ret = iscsit_get_lun_for_cmd(cmd, hdr->cdb,
-				get_unaligned_le64(&hdr->lun));
+	ret = transport_lookup_cmd_lun(&cmd->se_cmd,
+				       scsilun_to_int(&hdr->lun));
 	if (ret < 0) {
 		if (cmd->se_cmd.scsi_sense_reason == TCM_NON_EXISTENT_LUN) {
 			pr_debug("Responding to non-acl'ed,"
@@ -1364,7 +1362,7 @@
 		 * outstanding_r2ts reaches zero, go ahead and send the delayed
 		 * TASK_ABORTED status.
 		 */
-		if (atomic_read(&se_cmd->t_transport_aborted) != 0) {
+		if (se_cmd->transport_state & CMD_T_ABORTED) {
 			if (hdr->flags & ISCSI_FLAG_CMD_FINAL)
 				if (--cmd->outstanding_r2ts < 1) {
 					iscsit_stop_dataout_timer(cmd);
@@ -1472,14 +1470,12 @@
 	unsigned char *ping_data = NULL;
 	int cmdsn_ret, niov = 0, ret = 0, rx_got, rx_size;
 	u32 checksum, data_crc, padding = 0, payload_length;
-	u64 lun;
 	struct iscsi_cmd *cmd = NULL;
 	struct kvec *iov = NULL;
 	struct iscsi_nopout *hdr;
 
 	hdr			= (struct iscsi_nopout *) buf;
 	payload_length		= ntoh24(hdr->dlength);
-	lun			= get_unaligned_le64(&hdr->lun);
 	hdr->itt		= be32_to_cpu(hdr->itt);
 	hdr->ttt		= be32_to_cpu(hdr->ttt);
 	hdr->cmdsn		= be32_to_cpu(hdr->cmdsn);
@@ -1689,13 +1685,11 @@
 	struct se_tmr_req *se_tmr;
 	struct iscsi_tmr_req *tmr_req;
 	struct iscsi_tm *hdr;
-	u32 payload_length;
 	int out_of_order_cmdsn = 0;
 	int ret;
 	u8 function;
 
 	hdr			= (struct iscsi_tm *) buf;
-	payload_length		= ntoh24(hdr->dlength);
 	hdr->itt		= be32_to_cpu(hdr->itt);
 	hdr->rtt		= be32_to_cpu(hdr->rtt);
 	hdr->cmdsn		= be32_to_cpu(hdr->cmdsn);
@@ -1747,8 +1741,8 @@
 	 * Locate the struct se_lun for all TMRs not related to ERL=2 TASK_REASSIGN
 	 */
 	if (function != ISCSI_TM_FUNC_TASK_REASSIGN) {
-		ret = iscsit_get_lun_for_tmr(cmd,
-				get_unaligned_le64(&hdr->lun));
+		ret = transport_lookup_tmr_lun(&cmd->se_cmd,
+					       scsilun_to_int(&hdr->lun));
 		if (ret < 0) {
 			cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION;
 			se_tmr->response = ISCSI_TMF_RSP_NO_LUN;
@@ -2207,14 +2201,10 @@
 	struct iscsi_conn *conn,
 	unsigned char *buf)
 {
-	u32 unpacked_lun;
-	u64 lun;
 	struct iscsi_snack *hdr;
 
 	hdr			= (struct iscsi_snack *) buf;
 	hdr->flags		&= ~ISCSI_FLAG_CMD_FINAL;
-	lun			= get_unaligned_le64(&hdr->lun);
-	unpacked_lun		= scsilun_to_int((struct scsi_lun *)&lun);
 	hdr->itt		= be32_to_cpu(hdr->itt);
 	hdr->ttt		= be32_to_cpu(hdr->ttt);
 	hdr->exp_statsn		= be32_to_cpu(hdr->exp_statsn);
@@ -3514,7 +3504,6 @@
 	struct iscsi_cmd *cmd = NULL;
 	struct iscsi_conn *conn;
 	struct iscsi_queue_req *qr = NULL;
-	struct se_cmd *se_cmd;
 	struct iscsi_thread_set *ts = arg;
 	/*
 	 * Allow ourselves to be interrupted by SIGINT so that a
@@ -3697,8 +3686,6 @@
 				goto transport_err;
 			}
 
-			se_cmd = &cmd->se_cmd;
-
 			if (map_sg && !conn->conn_ops->IFMarker) {
 				if (iscsit_fe_sendpage_sg(cmd, conn) < 0) {
 					conn->tx_response_queue = 0;
@@ -4171,7 +4158,7 @@
 	if (!atomic_read(&sess->session_reinstatement) &&
 	     atomic_read(&sess->session_fall_back_to_erl0)) {
 		spin_unlock_bh(&sess->conn_lock);
-		iscsit_close_session(sess);
+		target_put_session(sess->se_sess);
 
 		return 0;
 	} else if (atomic_read(&sess->session_logout)) {
@@ -4292,7 +4279,7 @@
 	iscsit_dec_conn_usage_count(conn);
 	iscsit_stop_session(sess, 1, 1);
 	iscsit_dec_session_usage_count(sess);
-	iscsit_close_session(sess);
+	target_put_session(sess->se_sess);
 }
 
 static void iscsit_logout_post_handler_samecid(
@@ -4458,7 +4445,7 @@
 	} else
 		spin_unlock_bh(&sess->conn_lock);
 
-	iscsit_close_session(sess);
+	target_put_session(sess->se_sess);
 	return 0;
 }
 
diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c
index 6b35b37..00c58cc 100644
--- a/drivers/target/iscsi/iscsi_target_configfs.c
+++ b/drivers/target/iscsi/iscsi_target_configfs.c
@@ -812,9 +812,6 @@
 	if (!se_nacl_new)
 		return ERR_PTR(-ENOMEM);
 
-	acl = container_of(se_nacl_new, struct iscsi_node_acl,
-				se_node_acl);
-
 	cmdsn_depth = ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth;
 	/*
 	 * se_nacl_new may be released by core_tpg_add_initiator_node_acl()
@@ -825,7 +822,8 @@
 	if (IS_ERR(se_nacl))
 		return se_nacl;
 
-	stats_cg = &acl->se_node_acl.acl_fabric_stat_group;
+	acl = container_of(se_nacl, struct iscsi_node_acl, se_node_acl);
+	stats_cg = &se_nacl->acl_fabric_stat_group;
 
 	stats_cg->default_groups = kzalloc(sizeof(struct config_group) * 2,
 				GFP_KERNEL);
@@ -1505,28 +1503,6 @@
 	return cmd->i_state;
 }
 
-static int iscsi_is_state_remove(struct se_cmd *se_cmd)
-{
-	struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
-
-	return (cmd->i_state == ISTATE_REMOVE);
-}
-
-static int lio_sess_logged_in(struct se_session *se_sess)
-{
-	struct iscsi_session *sess = se_sess->fabric_sess_ptr;
-	int ret;
-	/*
-	 * Called with spin_lock_bh(&tpg_lock); and
-	 * spin_lock(&se_tpg->session_lock); held.
-	 */
-	spin_lock(&sess->conn_lock);
-	ret = (sess->session_state != TARG_SESS_STATE_LOGGED_IN);
-	spin_unlock(&sess->conn_lock);
-
-	return ret;
-}
-
 static u32 lio_sess_get_index(struct se_session *se_sess)
 {
 	struct iscsi_session *sess = se_sess->fabric_sess_ptr;
@@ -1700,8 +1676,8 @@
 	atomic_set(&sess->session_reinstatement, 1);
 	spin_unlock(&sess->conn_lock);
 
-	iscsit_inc_session_usage_count(sess);
 	iscsit_stop_time2retain_timer(sess);
+	iscsit_stop_session(sess, 1, 1);
 
 	return 1;
 }
@@ -1717,28 +1693,9 @@
 	 * If the iSCSI Session for the iSCSI Initiator Node exists,
 	 * forcefully shutdown the iSCSI NEXUS.
 	 */
-	iscsit_stop_session(sess, 1, 1);
-	iscsit_dec_session_usage_count(sess);
 	iscsit_close_session(sess);
 }
 
-static void lio_tpg_stop_session(
-	struct se_session *se_sess,
-	int sess_sleep,
-	int conn_sleep)
-{
-	struct iscsi_session *sess = se_sess->fabric_sess_ptr;
-
-	iscsit_stop_session(sess, sess_sleep, conn_sleep);
-}
-
-static void lio_tpg_fall_back_to_erl0(struct se_session *se_sess)
-{
-	struct iscsi_session *sess = se_sess->fabric_sess_ptr;
-
-	iscsit_fall_back_to_erl0(sess);
-}
-
 static u32 lio_tpg_get_inst_index(struct se_portal_group *se_tpg)
 {
 	struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr;
@@ -1802,9 +1759,6 @@
 	fabric->tf_ops.release_cmd = &lio_release_cmd;
 	fabric->tf_ops.shutdown_session = &lio_tpg_shutdown_session;
 	fabric->tf_ops.close_session = &lio_tpg_close_session;
-	fabric->tf_ops.stop_session = &lio_tpg_stop_session;
-	fabric->tf_ops.fall_back_to_erl0 = &lio_tpg_fall_back_to_erl0;
-	fabric->tf_ops.sess_logged_in = &lio_sess_logged_in;
 	fabric->tf_ops.sess_get_index = &lio_sess_get_index;
 	fabric->tf_ops.sess_get_initiator_sid = &lio_sess_get_initiator_sid;
 	fabric->tf_ops.write_pending = &lio_write_pending;
@@ -1818,7 +1772,6 @@
 	fabric->tf_ops.queue_tm_rsp = &lio_queue_tm_rsp;
 	fabric->tf_ops.set_fabric_sense_len = &lio_set_fabric_sense_len;
 	fabric->tf_ops.get_fabric_sense_len = &lio_get_fabric_sense_len;
-	fabric->tf_ops.is_state_remove = &iscsi_is_state_remove;
 	/*
 	 * Setup function pointers for generic logic in target_core_fabric_configfs.c
 	 */
diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h
index 0ec3b77..2aaee7e 100644
--- a/drivers/target/iscsi/iscsi_target_core.h
+++ b/drivers/target/iscsi/iscsi_target_core.h
@@ -9,7 +9,7 @@
 #include <scsi/iscsi_proto.h>
 #include <target/target_core_base.h>
 
-#define ISCSIT_VERSION			"v4.1.0-rc1"
+#define ISCSIT_VERSION			"v4.1.0-rc2"
 #define ISCSI_MAX_DATASN_MISSING_COUNT	16
 #define ISCSI_TX_THREAD_TCP_TIMEOUT	2
 #define ISCSI_RX_THREAD_TCP_TIMEOUT	2
diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c
index f63ea35..bcc4098 100644
--- a/drivers/target/iscsi/iscsi_target_device.c
+++ b/drivers/target/iscsi/iscsi_target_device.c
@@ -28,25 +28,6 @@
 #include "iscsi_target_tpg.h"
 #include "iscsi_target_util.h"
 
-int iscsit_get_lun_for_tmr(
-	struct iscsi_cmd *cmd,
-	u64 lun)
-{
-	u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
-
-	return transport_lookup_tmr_lun(&cmd->se_cmd, unpacked_lun);
-}
-
-int iscsit_get_lun_for_cmd(
-	struct iscsi_cmd *cmd,
-	unsigned char *cdb,
-	u64 lun)
-{
-	u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun);
-
-	return transport_lookup_cmd_lun(&cmd->se_cmd, unpacked_lun);
-}
-
 void iscsit_determine_maxcmdsn(struct iscsi_session *sess)
 {
 	struct se_node_acl *se_nacl;
diff --git a/drivers/target/iscsi/iscsi_target_device.h b/drivers/target/iscsi/iscsi_target_device.h
index bef1cad..a0e2df9 100644
--- a/drivers/target/iscsi/iscsi_target_device.h
+++ b/drivers/target/iscsi/iscsi_target_device.h
@@ -1,8 +1,6 @@
 #ifndef ISCSI_TARGET_DEVICE_H
 #define ISCSI_TARGET_DEVICE_H
 
-extern int iscsit_get_lun_for_tmr(struct iscsi_cmd *, u64);
-extern int iscsit_get_lun_for_cmd(struct iscsi_cmd *, unsigned char *, u64);
 extern void iscsit_determine_maxcmdsn(struct iscsi_session *);
 extern void iscsit_increment_maxcmdsn(struct iscsi_cmd *, struct iscsi_session *);
 
diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c
index 4784511..1ab0560 100644
--- a/drivers/target/iscsi/iscsi_target_erl0.c
+++ b/drivers/target/iscsi/iscsi_target_erl0.c
@@ -783,7 +783,7 @@
 	}
 
 	spin_unlock_bh(&se_tpg->session_lock);
-	iscsit_close_session(sess);
+	target_put_session(sess->se_sess);
 }
 
 extern void iscsit_start_time2retain_handler(struct iscsi_session *sess)
diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c
index 27901e3..006f605 100644
--- a/drivers/target/iscsi/iscsi_target_erl1.c
+++ b/drivers/target/iscsi/iscsi_target_erl1.c
@@ -416,7 +416,7 @@
 	struct iscsi_datain_req *dr;
 	struct se_cmd *se_cmd = &cmd->se_cmd;
 
-	if (!atomic_read(&se_cmd->t_transport_complete)) {
+	if (!(se_cmd->transport_state & CMD_T_COMPLETE)) {
 		pr_err("Ignoring ITT: 0x%08x Data SNACK\n",
 				cmd->init_task_tag);
 		return 0;
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 1ee33a8..a3656c9 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -181,14 +181,16 @@
 	if (sess->session_state == TARG_SESS_STATE_FAILED) {
 		spin_unlock_bh(&sess->conn_lock);
 		iscsit_dec_session_usage_count(sess);
-		return iscsit_close_session(sess);
+		target_put_session(sess->se_sess);
+		return 0;
 	}
 	spin_unlock_bh(&sess->conn_lock);
 
 	iscsit_stop_session(sess, 1, 1);
 	iscsit_dec_session_usage_count(sess);
 
-	return iscsit_close_session(sess);
+	target_put_session(sess->se_sess);
+	return 0;
 }
 
 static void iscsi_login_set_conn_values(
@@ -881,7 +883,7 @@
 static int __iscsi_target_login_thread(struct iscsi_np *np)
 {
 	u8 buffer[ISCSI_HDR_LEN], iscsi_opcode, zero_tsih = 0;
-	int err, ret = 0, ip_proto, sock_type, set_sctp_conn_flag, stop;
+	int err, ret = 0, set_sctp_conn_flag, stop;
 	struct iscsi_conn *conn = NULL;
 	struct iscsi_login *login;
 	struct iscsi_portal_group *tpg = NULL;
@@ -894,8 +896,6 @@
 	flush_signals(current);
 	set_sctp_conn_flag = 0;
 	sock = np->np_socket;
-	ip_proto = np->np_ip_proto;
-	sock_type = np->np_sock_type;
 
 	spin_lock_bh(&np->np_thread_lock);
 	if (np->np_thread_state == ISCSI_NP_THREAD_RESET) {
diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index e89fa74..2dba448 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -90,7 +90,7 @@
 		return -1;
 
 	if (len > max_length) {
-		pr_err("Length of input: %d exeeds max_length:"
+		pr_err("Length of input: %d exceeds max_length:"
 			" %d\n", len, max_length);
 		return -1;
 	}
@@ -173,13 +173,11 @@
 	struct iscsi_conn *conn,
 	struct iscsi_login *login)
 {
-	int req_csg, req_nsg, rsp_csg, rsp_nsg;
+	int req_csg, req_nsg;
 	u32 payload_length;
 	struct iscsi_login_req *login_req;
-	struct iscsi_login_rsp *login_rsp;
 
 	login_req = (struct iscsi_login_req *) login->req;
-	login_rsp = (struct iscsi_login_rsp *) login->rsp;
 	payload_length = ntoh24(login_req->dlength);
 
 	switch (login_req->opcode & ISCSI_OPCODE_MASK) {
@@ -203,9 +201,7 @@
 	}
 
 	req_csg = (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
-	rsp_csg = (login_rsp->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2;
 	req_nsg = (login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK);
-	rsp_nsg = (login_rsp->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK);
 
 	if (req_csg != login->current_stage) {
 		pr_err("Initiator unexpectedly changed login stage"
@@ -753,12 +749,10 @@
 	struct iscsi_session *sess = conn->sess;
 	struct iscsi_tiqn *tiqn;
 	struct iscsi_login_req *login_req;
-	struct iscsi_targ_login_rsp *login_rsp;
 	u32 payload_length;
 	int sessiontype = 0, ret = 0;
 
 	login_req = (struct iscsi_login_req *) login->req;
-	login_rsp = (struct iscsi_targ_login_rsp *) login->rsp;
 	payload_length = ntoh24(login_req->dlength);
 
 	login->first_request	= 1;
diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.c b/drivers/target/iscsi/iscsi_target_nodeattrib.c
index b3c699c..11dc293 100644
--- a/drivers/target/iscsi/iscsi_target_nodeattrib.c
+++ b/drivers/target/iscsi/iscsi_target_nodeattrib.c
@@ -49,7 +49,7 @@
 	a->default_erl = NA_DEFAULT_ERL;
 }
 
-extern int iscsit_na_dataout_timeout(
+int iscsit_na_dataout_timeout(
 	struct iscsi_node_acl *acl,
 	u32 dataout_timeout)
 {
@@ -74,7 +74,7 @@
 	return 0;
 }
 
-extern int iscsit_na_dataout_timeout_retries(
+int iscsit_na_dataout_timeout_retries(
 	struct iscsi_node_acl *acl,
 	u32 dataout_timeout_retries)
 {
@@ -100,7 +100,7 @@
 	return 0;
 }
 
-extern int iscsit_na_nopin_timeout(
+int iscsit_na_nopin_timeout(
 	struct iscsi_node_acl *acl,
 	u32 nopin_timeout)
 {
@@ -155,7 +155,7 @@
 	return 0;
 }
 
-extern int iscsit_na_nopin_response_timeout(
+int iscsit_na_nopin_response_timeout(
 	struct iscsi_node_acl *acl,
 	u32 nopin_response_timeout)
 {
@@ -181,7 +181,7 @@
 	return 0;
 }
 
-extern int iscsit_na_random_datain_pdu_offsets(
+int iscsit_na_random_datain_pdu_offsets(
 	struct iscsi_node_acl *acl,
 	u32 random_datain_pdu_offsets)
 {
@@ -201,7 +201,7 @@
 	return 0;
 }
 
-extern int iscsit_na_random_datain_seq_offsets(
+int iscsit_na_random_datain_seq_offsets(
 	struct iscsi_node_acl *acl,
 	u32 random_datain_seq_offsets)
 {
@@ -221,7 +221,7 @@
 	return 0;
 }
 
-extern int iscsit_na_random_r2t_offsets(
+int iscsit_na_random_r2t_offsets(
 	struct iscsi_node_acl *acl,
 	u32 random_r2t_offsets)
 {
@@ -241,7 +241,7 @@
 	return 0;
 }
 
-extern int iscsit_na_default_erl(
+int iscsit_na_default_erl(
 	struct iscsi_node_acl *acl,
 	u32 default_erl)
 {
diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c
index 5b77316..eb05c9d 100644
--- a/drivers/target/iscsi/iscsi_target_parameters.c
+++ b/drivers/target/iscsi/iscsi_target_parameters.c
@@ -874,8 +874,8 @@
 static int iscsi_check_numerical_range_value(struct iscsi_param *param, char *value)
 {
 	char *left_val_ptr = NULL, *right_val_ptr = NULL;
-	char *tilde_ptr = NULL, *tmp_ptr = NULL;
-	u32 left_val, right_val, local_left_val, local_right_val;
+	char *tilde_ptr = NULL;
+	u32 left_val, right_val, local_left_val;
 
 	if (strcmp(param->name, IFMARKINT) &&
 	    strcmp(param->name, OFMARKINT)) {
@@ -903,8 +903,8 @@
 	if (iscsi_check_numerical_value(param, right_val_ptr) < 0)
 		return -1;
 
-	left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
-	right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
+	left_val = simple_strtoul(left_val_ptr, NULL, 0);
+	right_val = simple_strtoul(right_val_ptr, NULL, 0);
 	*tilde_ptr = '~';
 
 	if (right_val < left_val) {
@@ -928,8 +928,7 @@
 	left_val_ptr = param->value;
 	right_val_ptr = param->value + strlen(left_val_ptr) + 1;
 
-	local_left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
-	local_right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
+	local_left_val = simple_strtoul(left_val_ptr, NULL, 0);
 	*tilde_ptr = '~';
 
 	if (param->set_param) {
@@ -1189,7 +1188,7 @@
 	if (IS_TYPE_NUMBER_RANGE(param)) {
 		u32 left_val = 0, right_val = 0, recieved_value = 0;
 		char *left_val_ptr = NULL, *right_val_ptr = NULL;
-		char *tilde_ptr = NULL, *tmp_ptr = NULL;
+		char *tilde_ptr = NULL;
 
 		if (!strcmp(value, IRRELEVANT) || !strcmp(value, REJECT)) {
 			if (iscsi_update_param_value(param, value) < 0)
@@ -1213,9 +1212,9 @@
 
 		left_val_ptr = param->value;
 		right_val_ptr = param->value + strlen(left_val_ptr) + 1;
-		left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0);
-		right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0);
-		recieved_value = simple_strtoul(value, &tmp_ptr, 0);
+		left_val = simple_strtoul(left_val_ptr, NULL, 0);
+		right_val = simple_strtoul(right_val_ptr, NULL, 0);
+		recieved_value = simple_strtoul(value, NULL, 0);
 
 		*tilde_ptr = '~';
 
diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c
index 255ed35..e01da9d 100644
--- a/drivers/target/iscsi/iscsi_target_tmr.c
+++ b/drivers/target/iscsi/iscsi_target_tmr.c
@@ -250,7 +250,7 @@
 	 * so if we have received all DataOUT we can safety ignore Initiator.
 	 */
 	if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) {
-		if (!atomic_read(&cmd->se_cmd.t_transport_sent)) {
+		if (!(cmd->se_cmd.transport_state & CMD_T_SENT)) {
 			pr_debug("WRITE ITT: 0x%08x: t_state: %d"
 				" never sent to transport\n",
 				cmd->init_task_tag, cmd->se_cmd.t_state);
@@ -314,7 +314,7 @@
 		cmd->acked_data_sn = (tmr_req->exp_data_sn - 1);
 	}
 
-	if (!atomic_read(&cmd->se_cmd.t_transport_sent)) {
+	if (!(cmd->se_cmd.transport_state & CMD_T_SENT)) {
 		pr_debug("READ ITT: 0x%08x: t_state: %d never sent to"
 			" transport\n", cmd->init_task_tag,
 			cmd->se_cmd.t_state);
@@ -322,7 +322,7 @@
 		return 0;
 	}
 
-	if (!atomic_read(&se_cmd->t_transport_complete)) {
+	if (!(se_cmd->transport_state & CMD_T_COMPLETE)) {
 		pr_err("READ ITT: 0x%08x: t_state: %d, never returned"
 			" from transport\n", cmd->init_task_tag,
 			cmd->se_cmd.t_state);
diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c
index 0baac5b..977e1cf 100644
--- a/drivers/target/iscsi/iscsi_target_tq.c
+++ b/drivers/target/iscsi/iscsi_target_tq.c
@@ -536,12 +536,6 @@
 		return -ENOMEM;
 	}
 
-	spin_lock_init(&active_ts_lock);
-	spin_lock_init(&inactive_ts_lock);
-	spin_lock_init(&ts_bitmap_lock);
-	INIT_LIST_HEAD(&active_ts_list);
-	INIT_LIST_HEAD(&inactive_ts_list);
-
 	return 0;
 }
 
diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 11287e1..4eba86d 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -229,6 +229,7 @@
 {
 	struct iscsi_cmd *cmd;
 	struct se_cmd *se_cmd;
+	int rc;
 	u8 tcm_function;
 
 	cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
@@ -286,10 +287,8 @@
 		goto out;
 	}
 
-	se_cmd->se_tmr_req = core_tmr_alloc_req(se_cmd,
-				cmd->tmr_req, tcm_function,
-				GFP_KERNEL);
-	if (!se_cmd->se_tmr_req)
+	rc = core_tmr_alloc_req(se_cmd, cmd->tmr_req, tcm_function, GFP_KERNEL);
+	if (rc < 0)
 		goto out;
 
 	cmd->tmr_req->se_tmr_req = se_cmd->se_tmr_req;
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index c47ff7f..a9b4eee 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -44,138 +44,12 @@
 /* Local pointer to allocated TCM configfs fabric module */
 static struct target_fabric_configfs *tcm_loop_fabric_configfs;
 
+static struct workqueue_struct *tcm_loop_workqueue;
 static struct kmem_cache *tcm_loop_cmd_cache;
 
 static int tcm_loop_hba_no_cnt;
 
-/*
- * Allocate a tcm_loop cmd descriptor from target_core_mod code
- *
- * Can be called from interrupt context in tcm_loop_queuecommand() below
- */
-static struct se_cmd *tcm_loop_allocate_core_cmd(
-	struct tcm_loop_hba *tl_hba,
-	struct se_portal_group *se_tpg,
-	struct scsi_cmnd *sc)
-{
-	struct se_cmd *se_cmd;
-	struct se_session *se_sess;
-	struct tcm_loop_nexus *tl_nexus = tl_hba->tl_nexus;
-	struct tcm_loop_cmd *tl_cmd;
-	int sam_task_attr;
-
-	if (!tl_nexus) {
-		scmd_printk(KERN_ERR, sc, "TCM_Loop I_T Nexus"
-				" does not exist\n");
-		set_host_byte(sc, DID_ERROR);
-		return NULL;
-	}
-	se_sess = tl_nexus->se_sess;
-
-	tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_ATOMIC);
-	if (!tl_cmd) {
-		pr_err("Unable to allocate struct tcm_loop_cmd\n");
-		set_host_byte(sc, DID_ERROR);
-		return NULL;
-	}
-	se_cmd = &tl_cmd->tl_se_cmd;
-	/*
-	 * Save the pointer to struct scsi_cmnd *sc
-	 */
-	tl_cmd->sc = sc;
-	/*
-	 * Locate the SAM Task Attr from struct scsi_cmnd *
-	 */
-	if (sc->device->tagged_supported) {
-		switch (sc->tag) {
-		case HEAD_OF_QUEUE_TAG:
-			sam_task_attr = MSG_HEAD_TAG;
-			break;
-		case ORDERED_QUEUE_TAG:
-			sam_task_attr = MSG_ORDERED_TAG;
-			break;
-		default:
-			sam_task_attr = MSG_SIMPLE_TAG;
-			break;
-		}
-	} else
-		sam_task_attr = MSG_SIMPLE_TAG;
-
-	/*
-	 * Initialize struct se_cmd descriptor from target_core_mod infrastructure
-	 */
-	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess,
-			scsi_bufflen(sc), sc->sc_data_direction, sam_task_attr,
-			&tl_cmd->tl_sense_buf[0]);
-
-	if (scsi_bidi_cmnd(sc))
-		se_cmd->se_cmd_flags |= SCF_BIDI;
-
-	/*
-	 * Locate the struct se_lun pointer and attach it to struct se_cmd
-	 */
-	if (transport_lookup_cmd_lun(se_cmd, tl_cmd->sc->device->lun) < 0) {
-		kmem_cache_free(tcm_loop_cmd_cache, tl_cmd);
-		set_host_byte(sc, DID_NO_CONNECT);
-		return NULL;
-	}
-
-	return se_cmd;
-}
-
-/*
- * Called by struct target_core_fabric_ops->new_cmd_map()
- *
- * Always called in process context.  A non zero return value
- * here will signal to handle an exception based on the return code.
- */
-static int tcm_loop_new_cmd_map(struct se_cmd *se_cmd)
-{
-	struct tcm_loop_cmd *tl_cmd = container_of(se_cmd,
-				struct tcm_loop_cmd, tl_se_cmd);
-	struct scsi_cmnd *sc = tl_cmd->sc;
-	struct scatterlist *sgl_bidi = NULL;
-	u32 sgl_bidi_count = 0;
-	int ret;
-	/*
-	 * Allocate the necessary tasks to complete the received CDB+data
-	 */
-	ret = transport_generic_allocate_tasks(se_cmd, sc->cmnd);
-	if (ret != 0)
-		return ret;
-	/*
-	 * For BIDI commands, pass in the extra READ buffer
-	 * to transport_generic_map_mem_to_cmd() below..
-	 */
-	if (se_cmd->se_cmd_flags & SCF_BIDI) {
-		struct scsi_data_buffer *sdb = scsi_in(sc);
-
-		sgl_bidi = sdb->table.sgl;
-		sgl_bidi_count = sdb->table.nents;
-	}
-	/*
-	 * Because some userspace code via scsi-generic do not memset their
-	 * associated read buffers, go ahead and do that here for type
-	 * SCF_SCSI_CONTROL_SG_IO_CDB.  Also note that this is currently
-	 * guaranteed to be a single SGL for SCF_SCSI_CONTROL_SG_IO_CDB
-	 * by target core in transport_generic_allocate_tasks() ->
-	 * transport_generic_cmd_sequencer().
-	 */
-	if (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_SG_IO_CDB &&
-	    se_cmd->data_direction == DMA_FROM_DEVICE) {
-		struct scatterlist *sg = scsi_sglist(sc);
-		unsigned char *buf = kmap(sg_page(sg)) + sg->offset;
-
-		if (buf != NULL) {
-			memset(buf, 0, sg->length);
-			kunmap(sg_page(sg));
-		}
-	}
-
-	/* Tell the core about our preallocated memory */
-	return transport_generic_map_mem_to_cmd(se_cmd, scsi_sglist(sc),
-			scsi_sg_count(sc), sgl_bidi, sgl_bidi_count);
-}
+static int tcm_loop_queue_status(struct se_cmd *se_cmd);
 
 /*
  * Called from struct target_core_fabric_ops->check_stop_free()
@@ -187,7 +61,7 @@
 	 * pointer.  These will be released directly in tcm_loop_device_reset()
 	 * with transport_generic_free_cmd().
 	 */
-	if (se_cmd->se_tmr_req)
+	if (se_cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
 		return 0;
 	/*
 	 * Release the struct se_cmd, which will make a callback to release
@@ -263,50 +137,152 @@
 }
 
 /*
- * Main entry point from struct scsi_host_template for incoming SCSI CDB+Data
- * from Linux/SCSI subsystem for SCSI low level device drivers (LLDs)
+ * Locate the SAM Task Attr from struct scsi_cmnd *
  */
-static int tcm_loop_queuecommand(
-	struct Scsi_Host *sh,
-	struct scsi_cmnd *sc)
+static int tcm_loop_sam_attr(struct scsi_cmnd *sc)
 {
-	struct se_cmd *se_cmd;
-	struct se_portal_group *se_tpg;
+	if (sc->device->tagged_supported) {
+		switch (sc->tag) {
+		case HEAD_OF_QUEUE_TAG:
+			return MSG_HEAD_TAG;
+		case ORDERED_QUEUE_TAG:
+			return MSG_ORDERED_TAG;
+		default:
+			break;
+		}
+	}
+
+	return MSG_SIMPLE_TAG;
+}
+
+static void tcm_loop_submission_work(struct work_struct *work)
+{
+	struct tcm_loop_cmd *tl_cmd =
+		container_of(work, struct tcm_loop_cmd, work);
+	struct se_cmd *se_cmd = &tl_cmd->tl_se_cmd;
+	struct scsi_cmnd *sc = tl_cmd->sc;
+	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_hba *tl_hba;
 	struct tcm_loop_tpg *tl_tpg;
+	struct scatterlist *sgl_bidi = NULL;
+	u32 sgl_bidi_count = 0;
+	int ret;
 
-	pr_debug("tcm_loop_queuecommand() %d:%d:%d:%d got CDB: 0x%02x"
-		" scsi_buf_len: %u\n", sc->device->host->host_no,
-		sc->device->id, sc->device->channel, sc->device->lun,
-		sc->cmnd[0], scsi_bufflen(sc));
-	/*
-	 * Locate the tcm_loop_hba_t pointer
-	 */
 	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
 	tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
+
 	/*
 	 * Ensure that this tl_tpg reference from the incoming sc->device->id
 	 * has already been configured via tcm_loop_make_naa_tpg().
 	 */
 	if (!tl_tpg->tl_hba) {
 		set_host_byte(sc, DID_NO_CONNECT);
-		sc->scsi_done(sc);
-		return 0;
+		goto out_done;
 	}
-	se_tpg = &tl_tpg->tl_se_tpg;
+
+	tl_nexus = tl_hba->tl_nexus;
+	if (!tl_nexus) {
+		scmd_printk(KERN_ERR, sc, "TCM_Loop I_T Nexus"
+				" does not exist\n");
+		set_host_byte(sc, DID_ERROR);
+		goto out_done;
+	}
+
+	transport_init_se_cmd(se_cmd, tl_tpg->tl_se_tpg.se_tpg_tfo,
+			tl_nexus->se_sess,
+			scsi_bufflen(sc), sc->sc_data_direction,
+			tcm_loop_sam_attr(sc), &tl_cmd->tl_sense_buf[0]);
+
+	if (scsi_bidi_cmnd(sc)) {
+		struct scsi_data_buffer *sdb = scsi_in(sc);
+
+		sgl_bidi = sdb->table.sgl;
+		sgl_bidi_count = sdb->table.nents;
+		se_cmd->se_cmd_flags |= SCF_BIDI;
+
+	}
+
+	if (transport_lookup_cmd_lun(se_cmd, tl_cmd->sc->device->lun) < 0) {
+		kmem_cache_free(tcm_loop_cmd_cache, tl_cmd);
+		set_host_byte(sc, DID_NO_CONNECT);
+		goto out_done;
+	}
+
 	/*
-	 * Determine the SAM Task Attribute and allocate tl_cmd and
-	 * tl_cmd->tl_se_cmd from TCM infrastructure
+	 * Because some userspace code via scsi-generic do not memset their
+	 * associated read buffers, go ahead and do that here for type
+	 * SCF_SCSI_CONTROL_SG_IO_CDB.  Also note that this is currently
+	 * guaranteed to be a single SGL for SCF_SCSI_CONTROL_SG_IO_CDB
+	 * by target core in transport_generic_allocate_tasks() ->
+	 * transport_generic_cmd_sequencer().
 	 */
-	se_cmd = tcm_loop_allocate_core_cmd(tl_hba, se_tpg, sc);
-	if (!se_cmd) {
+	if (se_cmd->se_cmd_flags & SCF_SCSI_CONTROL_SG_IO_CDB &&
+	    se_cmd->data_direction == DMA_FROM_DEVICE) {
+		struct scatterlist *sg = scsi_sglist(sc);
+		unsigned char *buf = kmap(sg_page(sg)) + sg->offset;
+
+		if (buf != NULL) {
+			memset(buf, 0, sg->length);
+			kunmap(sg_page(sg));
+		}
+	}
+
+	ret = transport_generic_allocate_tasks(se_cmd, sc->cmnd);
+	if (ret == -ENOMEM) {
+		transport_send_check_condition_and_sense(se_cmd,
+				TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0);
+		transport_generic_free_cmd(se_cmd, 0);
+		return;
+	} else if (ret < 0) {
+		if (se_cmd->se_cmd_flags & SCF_SCSI_RESERVATION_CONFLICT)
+			tcm_loop_queue_status(se_cmd);
+		else
+			transport_send_check_condition_and_sense(se_cmd,
+					se_cmd->scsi_sense_reason, 0);
+		transport_generic_free_cmd(se_cmd, 0);
+		return;
+	}
+
+	ret = transport_generic_map_mem_to_cmd(se_cmd, scsi_sglist(sc),
+			scsi_sg_count(sc), sgl_bidi, sgl_bidi_count);
+	if (ret) {
+		transport_send_check_condition_and_sense(se_cmd,
+					se_cmd->scsi_sense_reason, 0);
+		transport_generic_free_cmd(se_cmd, 0);
+		return;
+	}
+	transport_handle_cdb_direct(se_cmd);
+	return;
+
+out_done:
+	sc->scsi_done(sc);
+	return;
+}
+
+/*
+ * ->queuecommand can be and usually is called from interrupt context, so
+ * defer the actual submission to a workqueue.
+ */
+static int tcm_loop_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
+{
+	struct tcm_loop_cmd *tl_cmd;
+
+	pr_debug("tcm_loop_queuecommand() %d:%d:%d:%d got CDB: 0x%02x"
+		" scsi_buf_len: %u\n", sc->device->host->host_no,
+		sc->device->id, sc->device->channel, sc->device->lun,
+		sc->cmnd[0], scsi_bufflen(sc));
+
+	tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_ATOMIC);
+	if (!tl_cmd) {
+		pr_err("Unable to allocate struct tcm_loop_cmd\n");
+		set_host_byte(sc, DID_ERROR);
 		sc->scsi_done(sc);
 		return 0;
 	}
-	/*
-	 * Queue up the newly allocated to be processed in TCM thread context.
-	*/
-	transport_generic_handle_cdb_map(se_cmd);
+
+	tl_cmd->sc = sc;
+	INIT_WORK(&tl_cmd->work, tcm_loop_submission_work);
+	queue_work(tcm_loop_workqueue, &tl_cmd->work);
 	return 0;
 }
 
@@ -324,7 +300,7 @@
 	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_tmr *tl_tmr = NULL;
 	struct tcm_loop_tpg *tl_tpg;
-	int ret = FAILED;
+	int ret = FAILED, rc;
 	/*
 	 * Locate the tcm_loop_hba_t pointer
 	 */
@@ -365,12 +341,9 @@
 	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess, 0,
 				DMA_NONE, MSG_SIMPLE_TAG,
 				&tl_cmd->tl_sense_buf[0]);
-	/*
-	 * Allocate the LUN_RESET TMR
-	 */
-	se_cmd->se_tmr_req = core_tmr_alloc_req(se_cmd, tl_tmr,
-						TMR_LUN_RESET, GFP_KERNEL);
-	if (IS_ERR(se_cmd->se_tmr_req))
+
+	rc = core_tmr_alloc_req(se_cmd, tl_tmr, TMR_LUN_RESET, GFP_KERNEL);
+	if (rc < 0)
 		goto release;
 	/*
 	 * Locate the underlying TCM struct se_lun from sc->device->lun
@@ -762,22 +735,6 @@
 	return 1;
 }
 
-static int tcm_loop_is_state_remove(struct se_cmd *se_cmd)
-{
-	/*
-	 * Assume struct scsi_cmnd is not in remove state..
-	 */
-	return 0;
-}
-
-static int tcm_loop_sess_logged_in(struct se_session *se_sess)
-{
-	/*
-	 * Assume that TL Nexus is always active
-	 */
-	return 1;
-}
-
 static u32 tcm_loop_sess_get_index(struct se_session *se_sess)
 {
 	return 1;
@@ -811,19 +768,6 @@
 	return;
 };
 
-static void tcm_loop_stop_session(
-	struct se_session *se_sess,
-	int sess_sleep,
-	int conn_sleep)
-{
-	return;
-}
-
-static void tcm_loop_fall_back_to_erl0(struct se_session *se_sess)
-{
-	return;
-}
-
 static int tcm_loop_write_pending(struct se_cmd *se_cmd)
 {
 	/*
@@ -855,6 +799,9 @@
 
 	sc->result = SAM_STAT_GOOD;
 	set_host_byte(sc, DID_OK);
+	if ((se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT) ||
+	    (se_cmd->se_cmd_flags & SCF_UNDERFLOW_BIT))
+		scsi_set_resid(sc, se_cmd->residual_count);
 	sc->scsi_done(sc);
 	return 0;
 }
@@ -880,6 +827,9 @@
 		sc->result = se_cmd->scsi_status;
 
 	set_host_byte(sc, DID_OK);
+	if ((se_cmd->se_cmd_flags & SCF_OVERFLOW_BIT) ||
+	    (se_cmd->se_cmd_flags & SCF_UNDERFLOW_BIT))
+		scsi_set_resid(sc, se_cmd->residual_count);
 	sc->scsi_done(sc);
 	return 0;
 }
@@ -1361,7 +1311,6 @@
 static int tcm_loop_register_configfs(void)
 {
 	struct target_fabric_configfs *fabric;
-	struct config_group *tf_cg;
 	int ret;
 	/*
 	 * Set the TCM Loop HBA counter to zero
@@ -1407,14 +1356,10 @@
 	/*
 	 * Used for setting up remaining TCM resources in process context
 	 */
-	fabric->tf_ops.new_cmd_map = &tcm_loop_new_cmd_map;
 	fabric->tf_ops.check_stop_free = &tcm_loop_check_stop_free;
 	fabric->tf_ops.release_cmd = &tcm_loop_release_cmd;
 	fabric->tf_ops.shutdown_session = &tcm_loop_shutdown_session;
 	fabric->tf_ops.close_session = &tcm_loop_close_session;
-	fabric->tf_ops.stop_session = &tcm_loop_stop_session;
-	fabric->tf_ops.fall_back_to_erl0 = &tcm_loop_fall_back_to_erl0;
-	fabric->tf_ops.sess_logged_in = &tcm_loop_sess_logged_in;
 	fabric->tf_ops.sess_get_index = &tcm_loop_sess_get_index;
 	fabric->tf_ops.sess_get_initiator_sid = NULL;
 	fabric->tf_ops.write_pending = &tcm_loop_write_pending;
@@ -1431,9 +1376,7 @@
 	fabric->tf_ops.queue_tm_rsp = &tcm_loop_queue_tm_rsp;
 	fabric->tf_ops.set_fabric_sense_len = &tcm_loop_set_fabric_sense_len;
 	fabric->tf_ops.get_fabric_sense_len = &tcm_loop_get_fabric_sense_len;
-	fabric->tf_ops.is_state_remove = &tcm_loop_is_state_remove;
 
-	tf_cg = &fabric->tf_group;
 	/*
 	 * Setup function pointers for generic logic in target_core_fabric_configfs.c
 	 */
@@ -1490,7 +1433,11 @@
 
 static int __init tcm_loop_fabric_init(void)
 {
-	int ret;
+	int ret = -ENOMEM;
+
+	tcm_loop_workqueue = alloc_workqueue("tcm_loop", 0, 0);
+	if (!tcm_loop_workqueue)
+		goto out;
 
 	tcm_loop_cmd_cache = kmem_cache_create("tcm_loop_cmd_cache",
 				sizeof(struct tcm_loop_cmd),
@@ -1499,20 +1446,27 @@
 	if (!tcm_loop_cmd_cache) {
 		pr_debug("kmem_cache_create() for"
 			" tcm_loop_cmd_cache failed\n");
-		return -ENOMEM;
+		goto out_destroy_workqueue;
 	}
 
 	ret = tcm_loop_alloc_core_bus();
 	if (ret)
-		return ret;
+		goto out_destroy_cache;
 
 	ret = tcm_loop_register_configfs();
-	if (ret) {
-		tcm_loop_release_core_bus();
-		return ret;
-	}
+	if (ret)
+		goto out_release_core_bus;
 
 	return 0;
+
+out_release_core_bus:
+	tcm_loop_release_core_bus();
+out_destroy_cache:
+	kmem_cache_destroy(tcm_loop_cmd_cache);
+out_destroy_workqueue:
+	destroy_workqueue(tcm_loop_workqueue);
+out:
+	return ret;
 }
 
 static void __exit tcm_loop_fabric_exit(void)
@@ -1520,6 +1474,7 @@
 	tcm_loop_deregister_configfs();
 	tcm_loop_release_core_bus();
 	kmem_cache_destroy(tcm_loop_cmd_cache);
+	destroy_workqueue(tcm_loop_workqueue);
 }
 
 MODULE_DESCRIPTION("TCM loopback virtual Linux/SCSI fabric module");
diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h
index 15a0364..7b54893 100644
--- a/drivers/target/loopback/tcm_loop.h
+++ b/drivers/target/loopback/tcm_loop.h
@@ -1,4 +1,4 @@
-#define TCM_LOOP_VERSION		"v2.1-rc1"
+#define TCM_LOOP_VERSION		"v2.1-rc2"
 #define TL_WWN_ADDR_LEN			256
 #define TL_TPGS_PER_HBA			32
 
@@ -12,9 +12,9 @@
 	u32 sc_cmd_state;
 	/* Pointer to the CDB+Data descriptor from Linux/SCSI subsystem */
 	struct scsi_cmnd *sc;
-	struct list_head *tl_cmd_list;
 	/* The TCM I/O descriptor that is accessed via container_of() */
 	struct se_cmd tl_se_cmd;
+	struct work_struct work;
 	/* Sense buffer that will be mapped into outgoing status */
 	unsigned char tl_sense_buf[TRANSPORT_SENSE_BUFFER];
 };
diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c
index 01a2691..c7746a3 100644
--- a/drivers/target/target_core_alua.c
+++ b/drivers/target/target_core_alua.c
@@ -30,6 +30,7 @@
 #include <linux/export.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
+#include <asm/unaligned.h>
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
@@ -267,8 +268,7 @@
 		 * changed.
 		 */
 		if (primary) {
-			tg_pt_id = ((ptr[2] << 8) & 0xff);
-			tg_pt_id |= (ptr[3] & 0xff);
+			tg_pt_id = get_unaligned_be16(ptr + 2);
 			/*
 			 * Locate the matching target port group ID from
 			 * the global tg_pt_gp list
@@ -312,8 +312,7 @@
 			 * the Target Port in question for the the incoming
 			 * SET_TARGET_PORT_GROUPS op.
 			 */
-			rtpi = ((ptr[2] << 8) & 0xff);
-			rtpi |= (ptr[3] & 0xff);
+			rtpi = get_unaligned_be16(ptr + 2);
 			/*
 			 * Locate the matching relative target port identifer
 			 * for the struct se_device storage object.
diff --git a/drivers/target/target_core_cdb.c b/drivers/target/target_core_cdb.c
index f3d71fa..30a6770 100644
--- a/drivers/target/target_core_cdb.c
+++ b/drivers/target/target_core_cdb.c
@@ -66,32 +66,15 @@
 }
 
 static int
-target_emulate_inquiry_std(struct se_cmd *cmd)
+target_emulate_inquiry_std(struct se_cmd *cmd, char *buf)
 {
 	struct se_lun *lun = cmd->se_lun;
 	struct se_device *dev = cmd->se_dev;
-	struct se_portal_group *tpg = lun->lun_sep->sep_tpg;
-	unsigned char *buf;
 
-	/*
-	 * Make sure we at least have 6 bytes of INQUIRY response
-	 * payload going back for EVPD=0
-	 */
-	if (cmd->data_length < 6) {
-		pr_err("SCSI Inquiry payload length: %u"
-			" too small for EVPD=0\n", cmd->data_length);
-		return -EINVAL;
-	}
+	/* Set RMB (removable media) for tape devices */
+	if (dev->transport->get_device_type(dev) == TYPE_TAPE)
+		buf[1] = 0x80;
 
-	buf = transport_kmap_data_sg(cmd);
-
-	if (dev == tpg->tpg_virt_lun0.lun_se_dev) {
-		buf[0] = 0x3f; /* Not connected */
-	} else {
-		buf[0] = dev->transport->get_device_type(dev);
-		if (buf[0] == TYPE_TAPE)
-			buf[1] = 0x80;
-	}
 	buf[2] = dev->transport->get_device_rev(dev);
 
 	/*
@@ -112,29 +95,13 @@
 	if (dev->se_sub_dev->t10_alua.alua_type == SPC3_ALUA_EMULATED)
 		target_fill_alua_data(lun->lun_sep, buf);
 
-	if (cmd->data_length < 8) {
-		buf[4] = 1; /* Set additional length to 1 */
-		goto out;
-	}
-
-	buf[7] = 0x32; /* Sync=1 and CmdQue=1 */
-
-	/*
-	 * Do not include vendor, product, reversion info in INQUIRY
-	 * response payload for cdbs with a small allocation length.
-	 */
-	if (cmd->data_length < 36) {
-		buf[4] = 3; /* Set additional length to 3 */
-		goto out;
-	}
+	buf[7] = 0x2; /* CmdQue=1 */
 
 	snprintf(&buf[8], 8, "LIO-ORG");
 	snprintf(&buf[16], 16, "%s", dev->se_sub_dev->t10_wwn.model);
 	snprintf(&buf[32], 4, "%s", dev->se_sub_dev->t10_wwn.revision);
 	buf[4] = 31; /* Set additional length to 31 */
 
-out:
-	transport_kunmap_data_sg(cmd);
 	return 0;
 }
 
@@ -152,12 +119,6 @@
 		unit_serial_len = strlen(dev->se_sub_dev->t10_wwn.unit_serial);
 		unit_serial_len++; /* For NULL Terminator */
 
-		if (((len + 4) + unit_serial_len) > cmd->data_length) {
-			len += unit_serial_len;
-			buf[2] = ((len >> 8) & 0xff);
-			buf[3] = (len & 0xff);
-			return 0;
-		}
 		len += sprintf(&buf[4], "%s",
 			dev->se_sub_dev->t10_wwn.unit_serial);
 		len++; /* Extra Byte for NULL Terminator */
@@ -229,9 +190,6 @@
 	if (!(dev->se_sub_dev->su_dev_flags & SDF_EMULATED_VPD_UNIT_SERIAL))
 		goto check_t10_vend_desc;
 
-	if (off + 20 > cmd->data_length)
-		goto check_t10_vend_desc;
-
 	/* CODE SET == Binary */
 	buf[off++] = 0x1;
 
@@ -283,12 +241,6 @@
 			strlen(&dev->se_sub_dev->t10_wwn.unit_serial[0]);
 		unit_serial_len++; /* For NULL Terminator */
 
-		if ((len + (id_len + 4) +
-		    (prod_len + unit_serial_len)) >
-				cmd->data_length) {
-			len += (prod_len + unit_serial_len);
-			goto check_port;
-		}
 		id_len += sprintf(&buf[off+12], "%s:%s", prod,
 				&dev->se_sub_dev->t10_wwn.unit_serial[0]);
 	}
@@ -306,7 +258,6 @@
 	/*
 	 * struct se_port is only set for INQUIRY VPD=1 through $FABRIC_MOD
 	 */
-check_port:
 	port = lun->lun_sep;
 	if (port) {
 		struct t10_alua_lu_gp *lu_gp;
@@ -323,10 +274,6 @@
 		 * Get the PROTOCOL IDENTIFIER as defined by spc4r17
 		 * section 7.5.1 Table 362
 		 */
-		if (((len + 4) + 8) > cmd->data_length) {
-			len += 8;
-			goto check_tpgi;
-		}
 		buf[off] =
 			(tpg->se_tpg_tfo->get_fabric_proto_ident(tpg) << 4);
 		buf[off++] |= 0x1; /* CODE SET == Binary */
@@ -350,15 +297,10 @@
 		 * Get the PROTOCOL IDENTIFIER as defined by spc4r17
 		 * section 7.5.1 Table 362
 		 */
-check_tpgi:
 		if (dev->se_sub_dev->t10_alua.alua_type !=
 				SPC3_ALUA_EMULATED)
 			goto check_scsi_name;
 
-		if (((len + 4) + 8) > cmd->data_length) {
-			len += 8;
-			goto check_lu_gp;
-		}
 		tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem;
 		if (!tg_pt_gp_mem)
 			goto check_lu_gp;
@@ -391,10 +333,6 @@
 		 * section 7.7.3.8
 		 */
 check_lu_gp:
-		if (((len + 4) + 8) > cmd->data_length) {
-			len += 8;
-			goto check_scsi_name;
-		}
 		lu_gp_mem = dev->dev_alua_lu_gp_mem;
 		if (!lu_gp_mem)
 			goto check_scsi_name;
@@ -435,10 +373,6 @@
 		/* Header size + Designation descriptor */
 		scsi_name_len += 4;
 
-		if (((len + 4) + scsi_name_len) > cmd->data_length) {
-			len += scsi_name_len;
-			goto set_len;
-		}
 		buf[off] =
 			(tpg->se_tpg_tfo->get_fabric_proto_ident(tpg) << 4);
 		buf[off++] |= 0x3; /* CODE SET == UTF-8 */
@@ -474,7 +408,6 @@
 		/* Header size + Designation descriptor */
 		len += (scsi_name_len + 4);
 	}
-set_len:
 	buf[2] = ((len >> 8) & 0xff);
 	buf[3] = (len & 0xff); /* Page Length for VPD 0x83 */
 	return 0;
@@ -484,9 +417,6 @@
 static int
 target_emulate_evpd_86(struct se_cmd *cmd, unsigned char *buf)
 {
-	if (cmd->data_length < 60)
-		return 0;
-
 	buf[3] = 0x3c;
 	/* Set HEADSUP, ORDSUP, SIMPSUP */
 	buf[5] = 0x07;
@@ -512,20 +442,6 @@
 	if (dev->se_sub_dev->se_dev_attrib.emulate_tpu || dev->se_sub_dev->se_dev_attrib.emulate_tpws)
 		have_tp = 1;
 
-	if (cmd->data_length < (0x10 + 4)) {
-		pr_debug("Received data_length: %u"
-			" too small for EVPD 0xb0\n",
-			cmd->data_length);
-		return -EINVAL;
-	}
-
-	if (have_tp && cmd->data_length < (0x3c + 4)) {
-		pr_debug("Received data_length: %u"
-			" too small for TPE=1 EVPD 0xb0\n",
-			cmd->data_length);
-		have_tp = 0;
-	}
-
 	buf[0] = dev->transport->get_device_type(dev);
 	buf[3] = have_tp ? 0x3c : 0x10;
 
@@ -540,7 +456,7 @@
 	/*
 	 * Set MAXIMUM TRANSFER LENGTH
 	 */
-	put_unaligned_be32(dev->se_sub_dev->se_dev_attrib.max_sectors, &buf[8]);
+	put_unaligned_be32(dev->se_sub_dev->se_dev_attrib.fabric_max_sectors, &buf[8]);
 
 	/*
 	 * Set OPTIMAL TRANSFER LENGTH
@@ -548,10 +464,9 @@
 	put_unaligned_be32(dev->se_sub_dev->se_dev_attrib.optimal_sectors, &buf[12]);
 
 	/*
-	 * Exit now if we don't support TP or the initiator sent a too
-	 * short buffer.
+	 * Exit now if we don't support TP.
 	 */
-	if (!have_tp || cmd->data_length < (0x3c + 4))
+	if (!have_tp)
 		return 0;
 
 	/*
@@ -589,10 +504,7 @@
 
 	buf[0] = dev->transport->get_device_type(dev);
 	buf[3] = 0x3c;
-
-	if (cmd->data_length >= 5 &&
-	    dev->se_sub_dev->se_dev_attrib.is_nonrot)
-		buf[5] = 1;
+	buf[5] = dev->se_sub_dev->se_dev_attrib.is_nonrot ? 1 : 0;
 
 	return 0;
 }
@@ -671,8 +583,6 @@
 {
 	int p;
 
-	if (cmd->data_length < 8)
-		return 0;
 	/*
 	 * Only report the INQUIRY EVPD=1 pages after a valid NAA
 	 * Registered Extended LUN WWN has been set via ConfigFS
@@ -681,8 +591,7 @@
 	if (cmd->se_dev->se_sub_dev->su_dev_flags &
 			SDF_EMULATED_VPD_UNIT_SERIAL) {
 		buf[3] = ARRAY_SIZE(evpd_handlers);
-		for (p = 0; p < min_t(int, ARRAY_SIZE(evpd_handlers),
-				      cmd->data_length - 4); ++p)
+		for (p = 0; p < ARRAY_SIZE(evpd_handlers); ++p)
 			buf[p + 4] = evpd_handlers[p].page;
 	}
 
@@ -693,45 +602,54 @@
 {
 	struct se_cmd *cmd = task->task_se_cmd;
 	struct se_device *dev = cmd->se_dev;
-	unsigned char *buf;
+	struct se_portal_group *tpg = cmd->se_lun->lun_sep->sep_tpg;
+	unsigned char *buf, *map_buf;
 	unsigned char *cdb = cmd->t_task_cdb;
 	int p, ret;
 
+	map_buf = transport_kmap_data_sg(cmd);
+	/*
+	 * If SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC is not set, then we
+	 * know we actually allocated a full page.  Otherwise, if the
+	 * data buffer is too small, allocate a temporary buffer so we
+	 * don't have to worry about overruns in all our INQUIRY
+	 * emulation handling.
+	 */
+	if (cmd->data_length < SE_INQUIRY_BUF &&
+	    (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC)) {
+		buf = kzalloc(SE_INQUIRY_BUF, GFP_KERNEL);
+		if (!buf) {
+			transport_kunmap_data_sg(cmd);
+			cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+			return -ENOMEM;
+		}
+	} else {
+		buf = map_buf;
+	}
+
+	if (dev == tpg->tpg_virt_lun0.lun_se_dev)
+		buf[0] = 0x3f; /* Not connected */
+	else
+		buf[0] = dev->transport->get_device_type(dev);
+
 	if (!(cdb[1] & 0x1)) {
 		if (cdb[2]) {
 			pr_err("INQUIRY with EVPD==0 but PAGE CODE=%02x\n",
 			       cdb[2]);
 			cmd->scsi_sense_reason = TCM_INVALID_CDB_FIELD;
-			return -EINVAL;
+			ret = -EINVAL;
+			goto out;
 		}
 
-		ret = target_emulate_inquiry_std(cmd);
+		ret = target_emulate_inquiry_std(cmd, buf);
 		goto out;
 	}
 
-	/*
-	 * Make sure we at least have 4 bytes of INQUIRY response
-	 * payload for 0x00 going back for EVPD=1.  Note that 0x80
-	 * and 0x83 will check for enough payload data length and
-	 * jump to set_len: label when there is not enough inquiry EVPD
-	 * payload length left for the next outgoing EVPD metadata
-	 */
-	if (cmd->data_length < 4) {
-		pr_err("SCSI Inquiry payload length: %u"
-			" too small for EVPD=1\n", cmd->data_length);
-		cmd->scsi_sense_reason = TCM_INVALID_CDB_FIELD;
-		return -EINVAL;
-	}
-
-	buf = transport_kmap_data_sg(cmd);
-
-	buf[0] = dev->transport->get_device_type(dev);
-
 	for (p = 0; p < ARRAY_SIZE(evpd_handlers); ++p) {
 		if (cdb[2] == evpd_handlers[p].page) {
 			buf[1] = cdb[2];
 			ret = evpd_handlers[p].emulate(cmd, buf);
-			goto out_unmap;
+			goto out;
 		}
 	}
 
@@ -739,9 +657,13 @@
 	cmd->scsi_sense_reason = TCM_INVALID_CDB_FIELD;
 	ret = -EINVAL;
 
-out_unmap:
-	transport_kunmap_data_sg(cmd);
 out:
+	if (buf != map_buf) {
+		memcpy(map_buf, buf, cmd->data_length);
+		kfree(buf);
+	}
+	transport_kunmap_data_sg(cmd);
+
 	if (!ret) {
 		task->task_scsi_status = GOOD;
 		transport_complete_task(task, 1);
@@ -772,11 +694,6 @@
 	buf[5] = (dev->se_sub_dev->se_dev_attrib.block_size >> 16) & 0xff;
 	buf[6] = (dev->se_sub_dev->se_dev_attrib.block_size >> 8) & 0xff;
 	buf[7] = dev->se_sub_dev->se_dev_attrib.block_size & 0xff;
-	/*
-	 * Set max 32-bit blocks to signal SERVICE ACTION READ_CAPACITY_16
-	*/
-	if (dev->se_sub_dev->se_dev_attrib.emulate_tpu || dev->se_sub_dev->se_dev_attrib.emulate_tpws)
-		put_unaligned_be32(0xFFFFFFFF, &buf[0]);
 
 	transport_kunmap_data_sg(cmd);
 
diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 6e043ee..cbb6653 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -52,8 +52,8 @@
 
 extern struct t10_alua_lu_gp *default_lu_gp;
 
-static struct list_head g_tf_list;
-static struct mutex g_tf_lock;
+static LIST_HEAD(g_tf_list);
+static DEFINE_MUTEX(g_tf_lock);
 
 struct target_core_configfs_attribute {
 	struct configfs_attribute attr;
@@ -421,18 +421,6 @@
 		pr_err("Missing tfo->close_session()\n");
 		return -EINVAL;
 	}
-	if (!tfo->stop_session) {
-		pr_err("Missing tfo->stop_session()\n");
-		return -EINVAL;
-	}
-	if (!tfo->fall_back_to_erl0) {
-		pr_err("Missing tfo->fall_back_to_erl0()\n");
-		return -EINVAL;
-	}
-	if (!tfo->sess_logged_in) {
-		pr_err("Missing tfo->sess_logged_in()\n");
-		return -EINVAL;
-	}
 	if (!tfo->sess_get_index) {
 		pr_err("Missing tfo->sess_get_index()\n");
 		return -EINVAL;
@@ -477,10 +465,6 @@
 		pr_err("Missing tfo->get_fabric_sense_len()\n");
 		return -EINVAL;
 	}
-	if (!tfo->is_state_remove) {
-		pr_err("Missing tfo->is_state_remove()\n");
-		return -EINVAL;
-	}
 	/*
 	 * We at least require tfo->fabric_make_wwn(), tfo->fabric_drop_wwn()
 	 * tfo->fabric_make_tpg() and tfo->fabric_drop_tpg() in
@@ -702,6 +686,9 @@
 DEF_DEV_ATTRIB(max_sectors);
 SE_DEV_ATTR(max_sectors, S_IRUGO | S_IWUSR);
 
+DEF_DEV_ATTRIB(fabric_max_sectors);
+SE_DEV_ATTR(fabric_max_sectors, S_IRUGO | S_IWUSR);
+
 DEF_DEV_ATTRIB(optimal_sectors);
 SE_DEV_ATTR(optimal_sectors, S_IRUGO | S_IWUSR);
 
@@ -741,6 +728,7 @@
 	&target_core_dev_attrib_block_size.attr,
 	&target_core_dev_attrib_hw_max_sectors.attr,
 	&target_core_dev_attrib_max_sectors.attr,
+	&target_core_dev_attrib_fabric_max_sectors.attr,
 	&target_core_dev_attrib_optimal_sectors.attr,
 	&target_core_dev_attrib_hw_queue_depth.attr,
 	&target_core_dev_attrib_queue_depth.attr,
@@ -2304,7 +2292,7 @@
 
 	if (!(tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_IMPLICT_ALUA)) {
 		pr_err("Unable to process implict configfs ALUA"
-			" transition while TPGS_IMPLICT_ALUA is diabled\n");
+			" transition while TPGS_IMPLICT_ALUA is disabled\n");
 		return -EINVAL;
 	}
 
@@ -2865,7 +2853,6 @@
 	struct se_subsystem_dev *se_dev = container_of(to_config_group(item),
 				struct se_subsystem_dev, se_dev_group);
 	struct se_hba *hba;
-	struct se_subsystem_api *t;
 	struct config_item *df_item;
 	struct config_group *dev_cg, *tg_pt_gp_cg, *dev_stat_grp;
 	int i;
@@ -2873,7 +2860,6 @@
 	hba = item_to_hba(&se_dev->se_dev_hba->hba_group.cg_item);
 
 	mutex_lock(&hba->hba_access_mutex);
-	t = hba->transport;
 
 	dev_stat_grp = &se_dev->dev_stat_grps.stat_group;
 	for (i = 0; dev_stat_grp->default_groups[i]; i++) {
@@ -3117,8 +3103,6 @@
 	config_group_init(&subsys->su_group);
 	mutex_init(&subsys->su_mutex);
 
-	INIT_LIST_HEAD(&g_tf_list);
-	mutex_init(&g_tf_lock);
 	ret = init_se_kmem_caches();
 	if (ret < 0)
 		return ret;
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index edbcabb..aa62677 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -72,7 +72,7 @@
 	}
 
 	spin_lock_irqsave(&se_sess->se_node_acl->device_list_lock, flags);
-	se_cmd->se_deve = &se_sess->se_node_acl->device_list[unpacked_lun];
+	se_cmd->se_deve = se_sess->se_node_acl->device_list[unpacked_lun];
 	if (se_cmd->se_deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) {
 		struct se_dev_entry *deve = se_cmd->se_deve;
 
@@ -159,13 +159,8 @@
 		dev->read_bytes += se_cmd->data_length;
 	spin_unlock_irqrestore(&dev->stats_lock, flags);
 
-	/*
-	 * Add the iscsi_cmd_t to the struct se_lun's cmd list.  This list is used
-	 * for tracking state of struct se_cmds during LUN shutdown events.
-	 */
 	spin_lock_irqsave(&se_lun->lun_cmd_lock, flags);
 	list_add_tail(&se_cmd->se_lun_node, &se_lun->lun_cmd_list);
-	atomic_set(&se_cmd->transport_lun_active, 1);
 	spin_unlock_irqrestore(&se_lun->lun_cmd_lock, flags);
 
 	return 0;
@@ -187,7 +182,7 @@
 	}
 
 	spin_lock_irqsave(&se_sess->se_node_acl->device_list_lock, flags);
-	se_cmd->se_deve = &se_sess->se_node_acl->device_list[unpacked_lun];
+	se_cmd->se_deve = se_sess->se_node_acl->device_list[unpacked_lun];
 	deve = se_cmd->se_deve;
 
 	if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS) {
@@ -245,7 +240,7 @@
 
 	spin_lock_irq(&nacl->device_list_lock);
 	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-		deve = &nacl->device_list[i];
+		deve = nacl->device_list[i];
 
 		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
 			continue;
@@ -291,7 +286,7 @@
 
 	spin_lock_irq(&nacl->device_list_lock);
 	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-		deve = &nacl->device_list[i];
+		deve = nacl->device_list[i];
 
 		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
 			continue;
@@ -311,7 +306,7 @@
 	}
 	spin_unlock_irq(&nacl->device_list_lock);
 
-	kfree(nacl->device_list);
+	array_free(nacl->device_list, TRANSPORT_MAX_LUNS_PER_TPG);
 	nacl->device_list = NULL;
 
 	return 0;
@@ -323,7 +318,7 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&se_nacl->device_list_lock, flags);
-	deve = &se_nacl->device_list[se_cmd->orig_fe_lun];
+	deve = se_nacl->device_list[se_cmd->orig_fe_lun];
 	deve->deve_cmds--;
 	spin_unlock_irqrestore(&se_nacl->device_list_lock, flags);
 }
@@ -336,7 +331,7 @@
 	struct se_dev_entry *deve;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[mapped_lun];
+	deve = nacl->device_list[mapped_lun];
 	if (lun_access & TRANSPORT_LUNFLAGS_READ_WRITE) {
 		deve->lun_flags &= ~TRANSPORT_LUNFLAGS_READ_ONLY;
 		deve->lun_flags |= TRANSPORT_LUNFLAGS_READ_WRITE;
@@ -361,7 +356,7 @@
 	int enable)
 {
 	struct se_port *port = lun->lun_sep;
-	struct se_dev_entry *deve = &nacl->device_list[mapped_lun];
+	struct se_dev_entry *deve = nacl->device_list[mapped_lun];
 	int trans = 0;
 	/*
 	 * If the MappedLUN entry is being disabled, the entry in
@@ -475,7 +470,7 @@
 
 		spin_lock_irq(&nacl->device_list_lock);
 		for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-			deve = &nacl->device_list[i];
+			deve = nacl->device_list[i];
 			if (lun != deve->se_lun)
 				continue;
 			spin_unlock_irq(&nacl->device_list_lock);
@@ -652,12 +647,13 @@
 {
 	struct se_cmd *se_cmd = se_task->task_se_cmd;
 	struct se_dev_entry *deve;
-	struct se_lun *se_lun;
 	struct se_session *se_sess = se_cmd->se_sess;
 	unsigned char *buf;
-	u32 cdb_offset = 0, lun_count = 0, offset = 8, i;
+	u32 lun_count = 0, offset = 8, i;
 
-	buf = (unsigned char *) transport_kmap_data_sg(se_cmd);
+	buf = transport_kmap_data_sg(se_cmd);
+	if (!buf)
+		return -ENOMEM;
 
 	/*
 	 * If no struct se_session pointer is present, this struct se_cmd is
@@ -672,22 +668,20 @@
 
 	spin_lock_irq(&se_sess->se_node_acl->device_list_lock);
 	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-		deve = &se_sess->se_node_acl->device_list[i];
+		deve = se_sess->se_node_acl->device_list[i];
 		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
 			continue;
-		se_lun = deve->se_lun;
 		/*
 		 * We determine the correct LUN LIST LENGTH even once we
 		 * have reached the initial allocation length.
 		 * See SPC2-R20 7.19.
 		 */
 		lun_count++;
-		if ((cdb_offset + 8) >= se_cmd->data_length)
+		if ((offset + 8) > se_cmd->data_length)
 			continue;
 
 		int_to_scsilun(deve->mapped_lun, (struct scsi_lun *)&buf[offset]);
 		offset += 8;
-		cdb_offset += 8;
 	}
 	spin_unlock_irq(&se_sess->se_node_acl->device_list_lock);
 
@@ -695,12 +689,12 @@
 	 * See SPC3 r07, page 159.
 	 */
 done:
-	transport_kunmap_data_sg(se_cmd);
 	lun_count *= 8;
 	buf[0] = ((lun_count >> 24) & 0xff);
 	buf[1] = ((lun_count >> 16) & 0xff);
 	buf[2] = ((lun_count >> 8) & 0xff);
 	buf[3] = (lun_count & 0xff);
+	transport_kunmap_data_sg(se_cmd);
 
 	se_task->task_scsi_status = GOOD;
 	transport_complete_task(se_task, 1);
@@ -894,10 +888,15 @@
 						limits->logical_block_size);
 	dev->se_sub_dev->se_dev_attrib.max_sectors = limits->max_sectors;
 	/*
-	 * Set optimal_sectors from max_sectors, which can be lowered via
-	 * configfs.
+	 * Set fabric_max_sectors, which is reported in block limits
+	 * VPD page (B0h).
 	 */
-	dev->se_sub_dev->se_dev_attrib.optimal_sectors = limits->max_sectors;
+	dev->se_sub_dev->se_dev_attrib.fabric_max_sectors = DA_FABRIC_MAX_SECTORS;
+	/*
+	 * Set optimal_sectors from fabric_max_sectors, which can be
+	 * lowered via configfs.
+	 */
+	dev->se_sub_dev->se_dev_attrib.optimal_sectors = DA_FABRIC_MAX_SECTORS;
 	/*
 	 * queue_depth is based on subsystem plugin dependent requirements.
 	 */
@@ -1229,6 +1228,54 @@
 	return 0;
 }
 
+int se_dev_set_fabric_max_sectors(struct se_device *dev, u32 fabric_max_sectors)
+{
+	if (atomic_read(&dev->dev_export_obj.obj_access_count)) {
+		pr_err("dev[%p]: Unable to change SE Device"
+			" fabric_max_sectors while dev_export_obj: %d count exists\n",
+			dev, atomic_read(&dev->dev_export_obj.obj_access_count));
+		return -EINVAL;
+	}
+	if (!fabric_max_sectors) {
+		pr_err("dev[%p]: Illegal ZERO value for"
+			" fabric_max_sectors\n", dev);
+		return -EINVAL;
+	}
+	if (fabric_max_sectors < DA_STATUS_MAX_SECTORS_MIN) {
+		pr_err("dev[%p]: Passed fabric_max_sectors: %u less than"
+			" DA_STATUS_MAX_SECTORS_MIN: %u\n", dev, fabric_max_sectors,
+				DA_STATUS_MAX_SECTORS_MIN);
+		return -EINVAL;
+	}
+	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
+		if (fabric_max_sectors > dev->se_sub_dev->se_dev_attrib.hw_max_sectors) {
+			pr_err("dev[%p]: Passed fabric_max_sectors: %u"
+				" greater than TCM/SE_Device max_sectors:"
+				" %u\n", dev, fabric_max_sectors,
+				dev->se_sub_dev->se_dev_attrib.hw_max_sectors);
+			 return -EINVAL;
+		}
+	} else {
+		if (fabric_max_sectors > DA_STATUS_MAX_SECTORS_MAX) {
+			pr_err("dev[%p]: Passed fabric_max_sectors: %u"
+				" greater than DA_STATUS_MAX_SECTORS_MAX:"
+				" %u\n", dev, fabric_max_sectors,
+				DA_STATUS_MAX_SECTORS_MAX);
+			return -EINVAL;
+		}
+	}
+	/*
+	 * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
+	 */
+	fabric_max_sectors = se_dev_align_max_sectors(fabric_max_sectors,
+						      dev->se_sub_dev->se_dev_attrib.block_size);
+
+	dev->se_sub_dev->se_dev_attrib.fabric_max_sectors = fabric_max_sectors;
+	pr_debug("dev[%p]: SE Device max_sectors changed to %u\n",
+			dev, fabric_max_sectors);
+	return 0;
+}
+
 int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors)
 {
 	if (atomic_read(&dev->dev_export_obj.obj_access_count)) {
@@ -1242,10 +1289,10 @@
 				" changed for TCM/pSCSI\n", dev);
 		return -EINVAL;
 	}
-	if (optimal_sectors > dev->se_sub_dev->se_dev_attrib.max_sectors) {
+	if (optimal_sectors > dev->se_sub_dev->se_dev_attrib.fabric_max_sectors) {
 		pr_err("dev[%p]: Passed optimal_sectors %u cannot be"
-			" greater than max_sectors: %u\n", dev,
-			optimal_sectors, dev->se_sub_dev->se_dev_attrib.max_sectors);
+			" greater than fabric_max_sectors: %u\n", dev,
+			optimal_sectors, dev->se_sub_dev->se_dev_attrib.fabric_max_sectors);
 		return -EINVAL;
 	}
 
@@ -1380,7 +1427,7 @@
 		spin_unlock(&tpg->tpg_lun_lock);
 		return NULL;
 	}
-	lun = &tpg->tpg_lun_list[unpacked_lun];
+	lun = tpg->tpg_lun_list[unpacked_lun];
 
 	if (lun->lun_status != TRANSPORT_LUN_STATUS_FREE) {
 		pr_err("%s Logical Unit Number: %u is not free on"
@@ -1413,7 +1460,7 @@
 		spin_unlock(&tpg->tpg_lun_lock);
 		return NULL;
 	}
-	lun = &tpg->tpg_lun_list[unpacked_lun];
+	lun = tpg->tpg_lun_list[unpacked_lun];
 
 	if (lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) {
 		pr_err("%s Logical Unit Number: %u is not active on"
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 9a2ce11..405cc98 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -108,7 +108,7 @@
 	 * tpg_1/attrib/demo_mode_write_protect=1
 	 */
 	spin_lock_irq(&lacl->se_lun_nacl->device_list_lock);
-	deve = &lacl->se_lun_nacl->device_list[lacl->mapped_lun];
+	deve = lacl->se_lun_nacl->device_list[lacl->mapped_lun];
 	if (deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS)
 		lun_access = deve->lun_flags;
 	else
@@ -137,7 +137,7 @@
 	struct se_lun_acl *lacl = container_of(to_config_group(lun_acl_ci),
 			struct se_lun_acl, se_lun_group);
 	struct se_node_acl *nacl = lacl->se_lun_nacl;
-	struct se_dev_entry *deve = &nacl->device_list[lacl->mapped_lun];
+	struct se_dev_entry *deve = nacl->device_list[lacl->mapped_lun];
 	struct se_portal_group *se_tpg;
 	/*
 	 * Determine if the underlying MappedLUN has already been released..
@@ -168,7 +168,7 @@
 	ssize_t len;
 
 	spin_lock_irq(&se_nacl->device_list_lock);
-	deve = &se_nacl->device_list[lacl->mapped_lun];
+	deve = se_nacl->device_list[lacl->mapped_lun];
 	len = sprintf(page, "%d\n",
 			(deve->lun_flags & TRANSPORT_LUNFLAGS_READ_ONLY) ?
 			1 : 0);
diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 8572eae..2ec299e 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -46,6 +46,9 @@
 
 #include "target_core_iblock.h"
 
+#define IBLOCK_MAX_BIO_PER_TASK	 32	/* max # of bios to submit at a time */
+#define IBLOCK_BIO_POOL_SIZE	128
+
 static struct se_subsystem_api iblock_template;
 
 static void iblock_bio_done(struct bio *, int);
@@ -56,51 +59,25 @@
  */
 static int iblock_attach_hba(struct se_hba *hba, u32 host_id)
 {
-	struct iblock_hba *ib_host;
-
-	ib_host = kzalloc(sizeof(struct iblock_hba), GFP_KERNEL);
-	if (!ib_host) {
-		pr_err("Unable to allocate memory for"
-				" struct iblock_hba\n");
-		return -ENOMEM;
-	}
-
-	ib_host->iblock_host_id = host_id;
-
-	hba->hba_ptr = ib_host;
-
 	pr_debug("CORE_HBA[%d] - TCM iBlock HBA Driver %s on"
 		" Generic Target Core Stack %s\n", hba->hba_id,
 		IBLOCK_VERSION, TARGET_CORE_MOD_VERSION);
-
-	pr_debug("CORE_HBA[%d] - Attached iBlock HBA: %u to Generic\n",
-		hba->hba_id, ib_host->iblock_host_id);
-
 	return 0;
 }
 
 static void iblock_detach_hba(struct se_hba *hba)
 {
-	struct iblock_hba *ib_host = hba->hba_ptr;
-
-	pr_debug("CORE_HBA[%d] - Detached iBlock HBA: %u from Generic"
-		" Target Core\n", hba->hba_id, ib_host->iblock_host_id);
-
-	kfree(ib_host);
-	hba->hba_ptr = NULL;
 }
 
 static void *iblock_allocate_virtdevice(struct se_hba *hba, const char *name)
 {
 	struct iblock_dev *ib_dev = NULL;
-	struct iblock_hba *ib_host = hba->hba_ptr;
 
 	ib_dev = kzalloc(sizeof(struct iblock_dev), GFP_KERNEL);
 	if (!ib_dev) {
 		pr_err("Unable to allocate struct iblock_dev\n");
 		return NULL;
 	}
-	ib_dev->ibd_host = ib_host;
 
 	pr_debug( "IBLOCK: Allocated ib_dev for %s\n", name);
 
@@ -126,10 +103,8 @@
 		return ERR_PTR(ret);
 	}
 	memset(&dev_limits, 0, sizeof(struct se_dev_limits));
-	/*
-	 * These settings need to be made tunable..
-	 */
-	ib_dev->ibd_bio_set = bioset_create(32, 0);
+
+	ib_dev->ibd_bio_set = bioset_create(IBLOCK_BIO_POOL_SIZE, 0);
 	if (!ib_dev->ibd_bio_set) {
 		pr_err("IBLOCK: Unable to create bioset()\n");
 		return ERR_PTR(-ENOMEM);
@@ -155,8 +130,8 @@
 	q = bdev_get_queue(bd);
 	limits = &dev_limits.limits;
 	limits->logical_block_size = bdev_logical_block_size(bd);
-	limits->max_hw_sectors = queue_max_hw_sectors(q);
-	limits->max_sectors = queue_max_sectors(q);
+	limits->max_hw_sectors = UINT_MAX;
+	limits->max_sectors = UINT_MAX;
 	dev_limits.hw_queue_depth = q->nr_requests;
 	dev_limits.queue_depth = q->nr_requests;
 
@@ -230,7 +205,7 @@
 		return NULL;
 	}
 
-	atomic_set(&ib_req->ib_bio_cnt, 0);
+	atomic_set(&ib_req->pending, 1);
 	return &ib_req->ib_task;
 }
 
@@ -510,24 +485,35 @@
 	bio->bi_destructor = iblock_bio_destructor;
 	bio->bi_end_io = &iblock_bio_done;
 	bio->bi_sector = lba;
-	atomic_inc(&ib_req->ib_bio_cnt);
+	atomic_inc(&ib_req->pending);
 
 	pr_debug("Set bio->bi_sector: %llu\n", (unsigned long long)bio->bi_sector);
-	pr_debug("Set ib_req->ib_bio_cnt: %d\n",
-			atomic_read(&ib_req->ib_bio_cnt));
+	pr_debug("Set ib_req->pending: %d\n", atomic_read(&ib_req->pending));
 	return bio;
 }
 
+static void iblock_submit_bios(struct bio_list *list, int rw)
+{
+	struct blk_plug plug;
+	struct bio *bio;
+
+	blk_start_plug(&plug);
+	while ((bio = bio_list_pop(list)))
+		submit_bio(rw, bio);
+	blk_finish_plug(&plug);
+}
+
 static int iblock_do_task(struct se_task *task)
 {
 	struct se_cmd *cmd = task->task_se_cmd;
 	struct se_device *dev = cmd->se_dev;
+	struct iblock_req *ibr = IBLOCK_REQ(task);
 	struct bio *bio;
 	struct bio_list list;
 	struct scatterlist *sg;
 	u32 i, sg_num = task->task_sg_nents;
 	sector_t block_lba;
-	struct blk_plug plug;
+	unsigned bio_cnt;
 	int rw;
 
 	if (task->task_data_direction == DMA_TO_DEVICE) {
@@ -572,6 +558,7 @@
 
 	bio_list_init(&list);
 	bio_list_add(&list, bio);
+	bio_cnt = 1;
 
 	for_each_sg(task->task_sg, sg, task->task_sg_nents, i) {
 		/*
@@ -581,10 +568,16 @@
 		 */
 		while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
 				!= sg->length) {
+			if (bio_cnt >= IBLOCK_MAX_BIO_PER_TASK) {
+				iblock_submit_bios(&list, rw);
+				bio_cnt = 0;
+			}
+
 			bio = iblock_get_bio(task, block_lba, sg_num);
 			if (!bio)
 				goto fail;
 			bio_list_add(&list, bio);
+			bio_cnt++;
 		}
 
 		/* Always in 512 byte units for Linux/Block */
@@ -592,11 +585,12 @@
 		sg_num--;
 	}
 
-	blk_start_plug(&plug);
-	while ((bio = bio_list_pop(&list)))
-		submit_bio(rw, bio);
-	blk_finish_plug(&plug);
+	iblock_submit_bios(&list, rw);
 
+	if (atomic_dec_and_test(&ibr->pending)) {
+		transport_complete_task(task,
+				!atomic_read(&ibr->ib_bio_err_cnt));
+	}
 	return 0;
 
 fail:
@@ -648,7 +642,7 @@
 
 	bio_put(bio);
 
-	if (!atomic_dec_and_test(&ibr->ib_bio_cnt))
+	if (!atomic_dec_and_test(&ibr->pending))
 		return;
 
 	pr_debug("done[%p] bio: %p task_lba: %llu bio_lba: %llu err=%d\n",
diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h
index 5cf1860..e929370 100644
--- a/drivers/target/target_core_iblock.h
+++ b/drivers/target/target_core_iblock.h
@@ -8,7 +8,7 @@
 
 struct iblock_req {
 	struct se_task ib_task;
-	atomic_t ib_bio_cnt;
+	atomic_t pending;
 	atomic_t ib_bio_err_cnt;
 } ____cacheline_aligned;
 
@@ -19,11 +19,6 @@
 	u32	ibd_flags;
 	struct bio_set	*ibd_bio_set;
 	struct block_device *ibd_bd;
-	struct iblock_hba *ibd_host;
-} ____cacheline_aligned;
-
-struct iblock_hba {
-	int		iblock_host_id;
 } ____cacheline_aligned;
 
 #endif /* TARGET_CORE_IBLOCK_H */
diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index 4500136..21c0563 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h
@@ -53,6 +53,7 @@
 int	se_dev_set_emulate_rest_reord(struct se_device *dev, int);
 int	se_dev_set_queue_depth(struct se_device *, u32);
 int	se_dev_set_max_sectors(struct se_device *, u32);
+int	se_dev_set_fabric_max_sectors(struct se_device *, u32);
 int	se_dev_set_optimal_sectors(struct se_device *, u32);
 int	se_dev_set_block_size(struct se_device *, u32);
 struct se_lun *core_dev_add_lun(struct se_portal_group *, struct se_hba *,
@@ -75,6 +76,8 @@
 int	core_delete_hba(struct se_hba *);
 
 /* target_core_tmr.c */
+void	core_tmr_abort_task(struct se_device *, struct se_tmr_req *,
+			struct se_session *);
 int	core_tmr_lun_reset(struct se_device *, struct se_tmr_req *,
 		struct list_head *, struct se_cmd *);
 
diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 63e703b..86f0c3b 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c
@@ -338,7 +338,7 @@
 		return core_scsi2_reservation_seq_non_holder(cmd,
 					cdb, pr_reg_type);
 
-	se_deve = &se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
+	se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
 	/*
 	 * Determine if the registration should be ignored due to
 	 * non-matching ISIDs in core_scsi3_pr_reservation_check().
@@ -1000,7 +1000,7 @@
 {
 	struct se_subsystem_dev *su_dev = dev->se_sub_dev;
 	struct se_node_acl *nacl = lun_acl->se_lun_nacl;
-	struct se_dev_entry *deve = &nacl->device_list[lun_acl->mapped_lun];
+	struct se_dev_entry *deve = nacl->device_list[lun_acl->mapped_lun];
 
 	if (su_dev->t10_pr.res_type != SPC3_PERSISTENT_RESERVATIONS)
 		return 0;
@@ -1497,7 +1497,7 @@
 	struct se_dev_entry *dest_se_deve = NULL, *local_se_deve;
 	struct t10_pr_registration *dest_pr_reg, *local_pr_reg, *pr_reg_e;
 	struct t10_pr_registration *pr_reg_tmp, *pr_reg_tmp_safe;
-	struct list_head tid_dest_list;
+	LIST_HEAD(tid_dest_list);
 	struct pr_transport_id_holder *tidh_new, *tidh, *tidh_tmp;
 	struct target_core_fabric_ops *tmp_tf_ops;
 	unsigned char *buf;
@@ -1508,9 +1508,8 @@
 	u32 dest_rtpi = 0;
 
 	memset(dest_iport, 0, 64);
-	INIT_LIST_HEAD(&tid_dest_list);
 
-	local_se_deve = &se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
+	local_se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
 	/*
 	 * Allocate a struct pr_transport_id_holder and setup the
 	 * local_node_acl and local_se_deve pointers and add to
@@ -2127,7 +2126,7 @@
 		return -EINVAL;
 	}
 	se_tpg = se_sess->se_tpg;
-	se_deve = &se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
+	se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
 
 	if (se_tpg->se_tpg_tfo->sess_get_initiator_sid) {
 		memset(&isid_buf[0], 0, PR_REG_ISID_LEN);
@@ -2427,9 +2426,7 @@
 	u64 res_key)
 {
 	struct se_session *se_sess = cmd->se_sess;
-	struct se_dev_entry *se_deve;
 	struct se_lun *se_lun = cmd->se_lun;
-	struct se_portal_group *se_tpg;
 	struct t10_pr_registration *pr_reg, *pr_res_holder;
 	struct t10_reservation *pr_tmpl = &dev->se_sub_dev->t10_pr;
 	char i_buf[PR_REG_ISID_ID_LEN];
@@ -2442,8 +2439,6 @@
 		cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		return -EINVAL;
 	}
-	se_tpg = se_sess->se_tpg;
-	se_deve = &se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
 	/*
 	 * Locate the existing *pr_reg via struct se_node_acl pointers
 	 */
@@ -3001,10 +2996,9 @@
 	int abort)
 {
 	struct se_device *dev = cmd->se_dev;
-	struct se_dev_entry *se_deve;
 	struct se_node_acl *pr_reg_nacl;
 	struct se_session *se_sess = cmd->se_sess;
-	struct list_head preempt_and_abort_list;
+	LIST_HEAD(preempt_and_abort_list);
 	struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_reg_n, *pr_res_holder;
 	struct t10_reservation *pr_tmpl = &dev->se_sub_dev->t10_pr;
 	u32 pr_res_mapped_lun = 0;
@@ -3016,7 +3010,6 @@
 		return -EINVAL;
 	}
 
-	se_deve = &se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
 	pr_reg_n = core_scsi3_locate_pr_reg(cmd->se_dev, se_sess->se_node_acl,
 				se_sess);
 	if (!pr_reg_n) {
@@ -3037,7 +3030,6 @@
 		cmd->scsi_sense_reason = TCM_INVALID_PARAMETER_LIST;
 		return -EINVAL;
 	}
-	INIT_LIST_HEAD(&preempt_and_abort_list);
 
 	spin_lock(&dev->dev_reservation_lock);
 	pr_res_holder = dev->dev_pr_res_holder;
@@ -3353,7 +3345,7 @@
 {
 	struct se_session *se_sess = cmd->se_sess;
 	struct se_device *dev = cmd->se_dev;
-	struct se_dev_entry *se_deve, *dest_se_deve = NULL;
+	struct se_dev_entry *dest_se_deve = NULL;
 	struct se_lun *se_lun = cmd->se_lun;
 	struct se_node_acl *pr_res_nacl, *pr_reg_nacl, *dest_node_acl = NULL;
 	struct se_port *se_port;
@@ -3378,7 +3370,6 @@
 	memset(i_buf, 0, PR_REG_ISID_ID_LEN);
 	se_tpg = se_sess->se_tpg;
 	tf_ops = se_tpg->se_tpg_tfo;
-	se_deve = &se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
 	/*
 	 * Follow logic from spc4r17 Section 5.7.8, Table 50 --
 	 *	Register behaviors for a REGISTER AND MOVE service action
diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 8d4def3..94c905f 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c
@@ -69,7 +69,7 @@
 		return -ENOMEM;
 	}
 	phv->phv_host_id = host_id;
-	phv->phv_mode = PHV_VIRUTAL_HOST_ID;
+	phv->phv_mode = PHV_VIRTUAL_HOST_ID;
 
 	hba->hba_ptr = phv;
 
@@ -114,7 +114,7 @@
 			return 0;
 
 		phv->phv_lld_host = NULL;
-		phv->phv_mode = PHV_VIRUTAL_HOST_ID;
+		phv->phv_mode = PHV_VIRTUAL_HOST_ID;
 
 		pr_debug("CORE_HBA[%d] - Disabled pSCSI HBA Passthrough"
 			" %s\n", hba->hba_id, (sh->hostt->name) ?
@@ -531,7 +531,7 @@
 			return ERR_PTR(-ENODEV);
 		}
 		/*
-		 * For the newer PHV_VIRUTAL_HOST_ID struct scsi_device
+		 * For the newer PHV_VIRTUAL_HOST_ID struct scsi_device
 		 * reference, we enforce that udev_path has been set
 		 */
 		if (!(se_dev->su_dev_flags & SDF_USING_UDEV_PATH)) {
@@ -540,7 +540,7 @@
 			return ERR_PTR(-EINVAL);
 		}
 		/*
-		 * If no scsi_host_id= was passed for PHV_VIRUTAL_HOST_ID,
+		 * If no scsi_host_id= was passed for PHV_VIRTUAL_HOST_ID,
 		 * use the original TCM hba ID to reference Linux/SCSI Host No
 		 * and enable for PHV_LLD_SCSI_HOST_NO mode.
 		 */
@@ -569,8 +569,8 @@
 			}
 		}
 	} else {
-		if (phv->phv_mode == PHV_VIRUTAL_HOST_ID) {
-			pr_err("pSCSI: PHV_VIRUTAL_HOST_ID set while"
+		if (phv->phv_mode == PHV_VIRTUAL_HOST_ID) {
+			pr_err("pSCSI: PHV_VIRTUAL_HOST_ID set while"
 				" struct Scsi_Host exists\n");
 			return ERR_PTR(-EEXIST);
 		}
@@ -600,7 +600,7 @@
 		}
 
 		if (!dev) {
-			if (phv->phv_mode == PHV_VIRUTAL_HOST_ID)
+			if (phv->phv_mode == PHV_VIRTUAL_HOST_ID)
 				scsi_host_put(sh);
 			else if (legacy_mode_enable) {
 				pscsi_pmode_enable_hba(hba, 0);
@@ -616,7 +616,7 @@
 	pr_err("pSCSI: Unable to locate %d:%d:%d:%d\n", sh->host_no,
 		pdv->pdv_channel_id,  pdv->pdv_target_id, pdv->pdv_lun_id);
 
-	if (phv->phv_mode == PHV_VIRUTAL_HOST_ID)
+	if (phv->phv_mode == PHV_VIRTUAL_HOST_ID)
 		scsi_host_put(sh);
 	else if (legacy_mode_enable) {
 		pscsi_pmode_enable_hba(hba, 0);
@@ -898,7 +898,7 @@
 	ssize_t bl;
 	int i;
 
-	if (phv->phv_mode == PHV_VIRUTAL_HOST_ID)
+	if (phv->phv_mode == PHV_VIRTUAL_HOST_ID)
 		snprintf(host_id, 16, "%d", pdv->pdv_host_id);
 	else
 		snprintf(host_id, 16, "PHBA Mode");
diff --git a/drivers/target/target_core_pscsi.h b/drivers/target/target_core_pscsi.h
index fdc17b6..43f1c41 100644
--- a/drivers/target/target_core_pscsi.h
+++ b/drivers/target/target_core_pscsi.h
@@ -49,7 +49,7 @@
 } ____cacheline_aligned;
 
 typedef enum phv_modes {
-	PHV_VIRUTAL_HOST_ID,
+	PHV_VIRTUAL_HOST_ID,
 	PHV_LLD_SCSI_HOST_NO
 } phv_modes_t;
 
diff --git a/drivers/target/target_core_stat.c b/drivers/target/target_core_stat.c
index f8c2d2c..3d44beb 100644
--- a/drivers/target/target_core_stat.c
+++ b/drivers/target/target_core_stat.c
@@ -954,7 +954,6 @@
 {
 	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
 	struct se_port *sep;
-	struct se_portal_group *tpg;
 	ssize_t ret;
 
 	spin_lock(&lun->lun_sep_lock);
@@ -963,7 +962,6 @@
 		spin_unlock(&lun->lun_sep_lock);
 		return -ENODEV;
 	}
-	tpg = sep->sep_tpg;
 
 	ret = snprintf(page, PAGE_SIZE, "%llu\n", sep->sep_stats.cmd_pdus);
 	spin_unlock(&lun->lun_sep_lock);
@@ -976,7 +974,6 @@
 {
 	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
 	struct se_port *sep;
-	struct se_portal_group *tpg;
 	ssize_t ret;
 
 	spin_lock(&lun->lun_sep_lock);
@@ -985,7 +982,6 @@
 		spin_unlock(&lun->lun_sep_lock);
 		return -ENODEV;
 	}
-	tpg = sep->sep_tpg;
 
 	ret = snprintf(page, PAGE_SIZE, "%u\n",
 			(u32)(sep->sep_stats.rx_data_octets >> 20));
@@ -999,7 +995,6 @@
 {
 	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
 	struct se_port *sep;
-	struct se_portal_group *tpg;
 	ssize_t ret;
 
 	spin_lock(&lun->lun_sep_lock);
@@ -1008,7 +1003,6 @@
 		spin_unlock(&lun->lun_sep_lock);
 		return -ENODEV;
 	}
-	tpg = sep->sep_tpg;
 
 	ret = snprintf(page, PAGE_SIZE, "%u\n",
 			(u32)(sep->sep_stats.tx_data_octets >> 20));
@@ -1022,7 +1016,6 @@
 {
 	struct se_lun *lun = container_of(pgrps, struct se_lun, port_stat_grps);
 	struct se_port *sep;
-	struct se_portal_group *tpg;
 	ssize_t ret;
 
 	spin_lock(&lun->lun_sep_lock);
@@ -1031,7 +1024,6 @@
 		spin_unlock(&lun->lun_sep_lock);
 		return -ENODEV;
 	}
-	tpg = sep->sep_tpg;
 
 	/* FIXME: scsiTgtPortHsInCommands */
 	ret = snprintf(page, PAGE_SIZE, "%u\n", 0);
@@ -1253,7 +1245,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1275,16 +1267,14 @@
 	struct se_node_acl *nacl = lacl->se_lun_nacl;
 	struct se_dev_entry *deve;
 	struct se_lun *lun;
-	struct se_portal_group *tpg;
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
 	}
-	tpg = nacl->se_tpg;
 	lun = deve->se_lun;
 	/* scsiDeviceIndex */
 	ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_se_dev->dev_index);
@@ -1304,7 +1294,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1327,7 +1317,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1349,7 +1339,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1371,7 +1361,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1393,7 +1383,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1415,7 +1405,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1437,7 +1427,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1459,7 +1449,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1481,7 +1471,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1503,7 +1493,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1525,7 +1515,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1548,7 +1538,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1621,7 +1611,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1643,16 +1633,14 @@
 	struct se_node_acl *nacl = lacl->se_lun_nacl;
 	struct se_dev_entry *deve;
 	struct se_lun *lun;
-	struct se_portal_group *tpg;
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
 	}
-	tpg = nacl->se_tpg;
 	lun = deve->se_lun;
 	/* scsiDeviceIndex */
 	ret = snprintf(page, PAGE_SIZE, "%u\n", lun->lun_se_dev->dev_index);
@@ -1672,7 +1660,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
@@ -1721,7 +1709,7 @@
 	ssize_t ret;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[lacl->mapped_lun];
+	deve = nacl->device_list[lacl->mapped_lun];
 	if (!deve->se_lun || !deve->se_lun_acl) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -ENODEV;
diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c
index dcb0618..f015839 100644
--- a/drivers/target/target_core_tmr.c
+++ b/drivers/target/target_core_tmr.c
@@ -40,7 +40,7 @@
 #include "target_core_alua.h"
 #include "target_core_pr.h"
 
-struct se_tmr_req *core_tmr_alloc_req(
+int core_tmr_alloc_req(
 	struct se_cmd *se_cmd,
 	void *fabric_tmr_ptr,
 	u8 function,
@@ -48,17 +48,20 @@
 {
 	struct se_tmr_req *tmr;
 
-	tmr = kmem_cache_zalloc(se_tmr_req_cache, gfp_flags);
+	tmr = kzalloc(sizeof(struct se_tmr_req), gfp_flags);
 	if (!tmr) {
 		pr_err("Unable to allocate struct se_tmr_req\n");
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 	}
+
+	se_cmd->se_cmd_flags |= SCF_SCSI_TMR_CDB;
+	se_cmd->se_tmr_req = tmr;
 	tmr->task_cmd = se_cmd;
 	tmr->fabric_tmr_ptr = fabric_tmr_ptr;
 	tmr->function = function;
 	INIT_LIST_HEAD(&tmr->tmr_list);
 
-	return tmr;
+	return 0;
 }
 EXPORT_SYMBOL(core_tmr_alloc_req);
 
@@ -69,7 +72,7 @@
 	unsigned long flags;
 
 	if (!dev) {
-		kmem_cache_free(se_tmr_req_cache, tmr);
+		kfree(tmr);
 		return;
 	}
 
@@ -77,7 +80,7 @@
 	list_del(&tmr->tmr_list);
 	spin_unlock_irqrestore(&dev->se_tmr_lock, flags);
 
-	kmem_cache_free(se_tmr_req_cache, tmr);
+	kfree(tmr);
 }
 
 static void core_tmr_handle_tas_abort(
@@ -115,6 +118,70 @@
 	return 1;
 }
 
+void core_tmr_abort_task(
+	struct se_device *dev,
+	struct se_tmr_req *tmr,
+	struct se_session *se_sess)
+{
+	struct se_cmd *se_cmd, *tmp_cmd;
+	unsigned long flags;
+	int ref_tag;
+
+	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
+	list_for_each_entry_safe(se_cmd, tmp_cmd,
+			&se_sess->sess_cmd_list, se_cmd_list) {
+
+		if (dev != se_cmd->se_dev)
+			continue;
+		ref_tag = se_cmd->se_tfo->get_task_tag(se_cmd);
+		if (tmr->ref_task_tag != ref_tag)
+			continue;
+
+		printk("ABORT_TASK: Found referenced %s task_tag: %u\n",
+			se_cmd->se_tfo->get_fabric_name(), ref_tag);
+
+		spin_lock_irq(&se_cmd->t_state_lock);
+		if (se_cmd->transport_state & CMD_T_COMPLETE) {
+			printk("ABORT_TASK: ref_tag: %u already complete, skipping\n", ref_tag);
+			spin_unlock_irq(&se_cmd->t_state_lock);
+			spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+			goto out;
+		}
+		se_cmd->transport_state |= CMD_T_ABORTED;
+		spin_unlock_irq(&se_cmd->t_state_lock);
+
+		list_del_init(&se_cmd->se_cmd_list);
+		kref_get(&se_cmd->cmd_kref);
+		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+
+		cancel_work_sync(&se_cmd->work);
+		transport_wait_for_tasks(se_cmd);
+		/*
+		 * Now send SAM_STAT_TASK_ABORTED status for the referenced
+		 * se_cmd descriptor..
+		 */
+		transport_send_task_abort(se_cmd);
+		/*
+		 * Also deal with possible extra acknowledge reference..
+		 */
+		if (se_cmd->se_cmd_flags & SCF_ACK_KREF)
+			target_put_sess_cmd(se_sess, se_cmd);
+
+		target_put_sess_cmd(se_sess, se_cmd);
+
+		printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for"
+				" ref_tag: %d\n", ref_tag);
+		tmr->response = TMR_FUNCTION_COMPLETE;
+		return;
+	}
+	spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
+
+out:
+	printk("ABORT_TASK: Sending TMR_TASK_DOES_NOT_EXIST for ref_tag: %d\n",
+			tmr->ref_task_tag);
+	tmr->response = TMR_TASK_DOES_NOT_EXIST;
+}
+
 static void core_tmr_drain_tmr_list(
 	struct se_device *dev,
 	struct se_tmr_req *tmr,
@@ -150,7 +217,7 @@
 			continue;
 
 		spin_lock(&cmd->t_state_lock);
-		if (!atomic_read(&cmd->t_transport_active)) {
+		if (!(cmd->transport_state & CMD_T_ACTIVE)) {
 			spin_unlock(&cmd->t_state_lock);
 			continue;
 		}
@@ -255,15 +322,15 @@
 			cmd->t_task_cdb[0]);
 		pr_debug("LUN_RESET: ITT[0x%08x] - pr_res_key: 0x%016Lx"
 			" t_task_cdbs: %d t_task_cdbs_left: %d"
-			" t_task_cdbs_sent: %d -- t_transport_active: %d"
-			" t_transport_stop: %d t_transport_sent: %d\n",
+			" t_task_cdbs_sent: %d -- CMD_T_ACTIVE: %d"
+			" CMD_T_STOP: %d CMD_T_SENT: %d\n",
 			cmd->se_tfo->get_task_tag(cmd), cmd->pr_res_key,
 			cmd->t_task_list_num,
 			atomic_read(&cmd->t_task_cdbs_left),
 			atomic_read(&cmd->t_task_cdbs_sent),
-			atomic_read(&cmd->t_transport_active),
-			atomic_read(&cmd->t_transport_stop),
-			atomic_read(&cmd->t_transport_sent));
+			(cmd->transport_state & CMD_T_ACTIVE) != 0,
+			(cmd->transport_state & CMD_T_STOP) != 0,
+			(cmd->transport_state & CMD_T_SENT) != 0);
 
 		/*
 		 * If the command may be queued onto a workqueue cancel it now.
@@ -287,19 +354,19 @@
 		}
 		fe_count = atomic_read(&cmd->t_fe_count);
 
-		if (atomic_read(&cmd->t_transport_active)) {
-			pr_debug("LUN_RESET: got t_transport_active = 1 for"
+		if (!(cmd->transport_state & CMD_T_ACTIVE)) {
+			pr_debug("LUN_RESET: got CMD_T_ACTIVE for"
 				" task: %p, t_fe_count: %d dev: %p\n", task,
 				fe_count, dev);
-			atomic_set(&cmd->t_transport_aborted, 1);
+			cmd->transport_state |= CMD_T_ABORTED;
 			spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
 			core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
 			continue;
 		}
-		pr_debug("LUN_RESET: Got t_transport_active = 0 for task: %p,"
+		pr_debug("LUN_RESET: Got !CMD_T_ACTIVE for task: %p,"
 			" t_fe_count: %d dev: %p\n", task, fe_count, dev);
-		atomic_set(&cmd->t_transport_aborted, 1);
+		cmd->transport_state |= CMD_T_ABORTED;
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
 		core_tmr_handle_tas_abort(tmr_nacl, cmd, tas, fe_count);
@@ -339,7 +406,7 @@
 		if (prout_cmd == cmd)
 			continue;
 
-		atomic_set(&cmd->t_transport_queue_active, 0);
+		cmd->transport_state &= ~CMD_T_QUEUED;
 		atomic_dec(&qobj->queue_cnt);
 		list_move_tail(&cmd->se_queue_node, &drain_cmd_list);
 	}
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 06336ec..70c3ffb 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -64,7 +64,7 @@
 
 	spin_lock_irq(&nacl->device_list_lock);
 	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-		deve = &nacl->device_list[i];
+		deve = nacl->device_list[i];
 
 		if (!(deve->lun_flags & TRANSPORT_LUNFLAGS_INITIATOR_ACCESS))
 			continue;
@@ -163,7 +163,7 @@
 
 	spin_lock(&tpg->tpg_lun_lock);
 	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-		lun = &tpg->tpg_lun_list[i];
+		lun = tpg->tpg_lun_list[i];
 		if (lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE)
 			continue;
 
@@ -222,6 +222,34 @@
 	return 0;
 }
 
+void array_free(void *array, int n)
+{
+	void **a = array;
+	int i;
+
+	for (i = 0; i < n; i++)
+		kfree(a[i]);
+	kfree(a);
+}
+
+static void *array_zalloc(int n, size_t size, gfp_t flags)
+{
+	void **a;
+	int i;
+
+	a = kzalloc(n * sizeof(void*), flags);
+	if (!a)
+		return NULL;
+	for (i = 0; i < n; i++) {
+		a[i] = kzalloc(size, flags);
+		if (!a[i]) {
+			array_free(a, n);
+			return NULL;
+		}
+	}
+	return a;
+}
+
 /*      core_create_device_list_for_node():
  *
  *
@@ -231,15 +259,15 @@
 	struct se_dev_entry *deve;
 	int i;
 
-	nacl->device_list = kzalloc(sizeof(struct se_dev_entry) *
-				TRANSPORT_MAX_LUNS_PER_TPG, GFP_KERNEL);
+	nacl->device_list = array_zalloc(TRANSPORT_MAX_LUNS_PER_TPG,
+			sizeof(struct se_dev_entry), GFP_KERNEL);
 	if (!nacl->device_list) {
 		pr_err("Unable to allocate memory for"
 			" struct se_node_acl->device_list\n");
 		return -ENOMEM;
 	}
 	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-		deve = &nacl->device_list[i];
+		deve = nacl->device_list[i];
 
 		atomic_set(&deve->ua_count, 0);
 		atomic_set(&deve->pr_ref_count, 0);
@@ -274,6 +302,8 @@
 
 	INIT_LIST_HEAD(&acl->acl_list);
 	INIT_LIST_HEAD(&acl->acl_sess_list);
+	kref_init(&acl->acl_kref);
+	init_completion(&acl->acl_free_comp);
 	spin_lock_init(&acl->device_list_lock);
 	spin_lock_init(&acl->nacl_sess_lock);
 	atomic_set(&acl->acl_pr_ref_count, 0);
@@ -329,19 +359,19 @@
 
 void core_tpg_clear_object_luns(struct se_portal_group *tpg)
 {
-	int i, ret;
+	int i;
 	struct se_lun *lun;
 
 	spin_lock(&tpg->tpg_lun_lock);
 	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-		lun = &tpg->tpg_lun_list[i];
+		lun = tpg->tpg_lun_list[i];
 
 		if ((lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) ||
 		    (lun->lun_se_dev == NULL))
 			continue;
 
 		spin_unlock(&tpg->tpg_lun_lock);
-		ret = core_dev_del_lun(tpg, lun->unpacked_lun);
+		core_dev_del_lun(tpg, lun->unpacked_lun);
 		spin_lock(&tpg->tpg_lun_lock);
 	}
 	spin_unlock(&tpg->tpg_lun_lock);
@@ -402,6 +432,8 @@
 
 	INIT_LIST_HEAD(&acl->acl_list);
 	INIT_LIST_HEAD(&acl->acl_sess_list);
+	kref_init(&acl->acl_kref);
+	init_completion(&acl->acl_free_comp);
 	spin_lock_init(&acl->device_list_lock);
 	spin_lock_init(&acl->nacl_sess_lock);
 	atomic_set(&acl->acl_pr_ref_count, 0);
@@ -448,39 +480,47 @@
 	struct se_node_acl *acl,
 	int force)
 {
+	LIST_HEAD(sess_list);
 	struct se_session *sess, *sess_tmp;
-	int dynamic_acl = 0;
+	unsigned long flags;
+	int rc;
 
 	spin_lock_irq(&tpg->acl_node_lock);
 	if (acl->dynamic_node_acl) {
 		acl->dynamic_node_acl = 0;
-		dynamic_acl = 1;
 	}
 	list_del(&acl->acl_list);
 	tpg->num_node_acls--;
 	spin_unlock_irq(&tpg->acl_node_lock);
 
-	spin_lock_bh(&tpg->session_lock);
-	list_for_each_entry_safe(sess, sess_tmp,
-				&tpg->tpg_sess_list, sess_list) {
-		if (sess->se_node_acl != acl)
-			continue;
-		/*
-		 * Determine if the session needs to be closed by our context.
-		 */
-		if (!tpg->se_tpg_tfo->shutdown_session(sess))
+	spin_lock_irqsave(&acl->nacl_sess_lock, flags);
+	acl->acl_stop = 1;
+
+	list_for_each_entry_safe(sess, sess_tmp, &acl->acl_sess_list,
+				sess_acl_list) {
+		if (sess->sess_tearing_down != 0)
 			continue;
 
-		spin_unlock_bh(&tpg->session_lock);
-		/*
-		 * If the $FABRIC_MOD session for the Initiator Node ACL exists,
-		 * forcefully shutdown the $FABRIC_MOD session/nexus.
-		 */
-		tpg->se_tpg_tfo->close_session(sess);
-
-		spin_lock_bh(&tpg->session_lock);
+		target_get_session(sess);
+		list_move(&sess->sess_acl_list, &sess_list);
 	}
-	spin_unlock_bh(&tpg->session_lock);
+	spin_unlock_irqrestore(&acl->nacl_sess_lock, flags);
+
+	list_for_each_entry_safe(sess, sess_tmp, &sess_list, sess_acl_list) {
+		list_del(&sess->sess_acl_list);
+
+		rc = tpg->se_tpg_tfo->shutdown_session(sess);
+		target_put_session(sess);
+		if (!rc)
+			continue;
+		target_put_session(sess);
+	}
+	target_put_nacl(acl);
+	/*
+	 * Wait for last target_put_nacl() to complete in target_complete_nacl()
+	 * for active fabric session transport_deregister_session() callbacks.
+	 */
+	wait_for_completion(&acl->acl_free_comp);
 
 	core_tpg_wait_for_nacl_pr_ref(acl);
 	core_clear_initiator_node_from_tpg(acl, tpg);
@@ -507,6 +547,7 @@
 {
 	struct se_session *sess, *init_sess = NULL;
 	struct se_node_acl *acl;
+	unsigned long flags;
 	int dynamic_acl = 0;
 
 	spin_lock_irq(&tpg->acl_node_lock);
@@ -525,7 +566,7 @@
 	}
 	spin_unlock_irq(&tpg->acl_node_lock);
 
-	spin_lock_bh(&tpg->session_lock);
+	spin_lock_irqsave(&tpg->session_lock, flags);
 	list_for_each_entry(sess, &tpg->tpg_sess_list, sess_list) {
 		if (sess->se_node_acl != acl)
 			continue;
@@ -537,7 +578,7 @@
 				" depth and force session reinstatement"
 				" use the \"force=1\" parameter.\n",
 				tpg->se_tpg_tfo->get_fabric_name(), initiatorname);
-			spin_unlock_bh(&tpg->session_lock);
+			spin_unlock_irqrestore(&tpg->session_lock, flags);
 
 			spin_lock_irq(&tpg->acl_node_lock);
 			if (dynamic_acl)
@@ -567,7 +608,7 @@
 	acl->queue_depth = queue_depth;
 
 	if (core_set_queue_depth_for_node(tpg, acl) < 0) {
-		spin_unlock_bh(&tpg->session_lock);
+		spin_unlock_irqrestore(&tpg->session_lock, flags);
 		/*
 		 * Force session reinstatement if
 		 * core_set_queue_depth_for_node() failed, because we assume
@@ -583,7 +624,7 @@
 		spin_unlock_irq(&tpg->acl_node_lock);
 		return -EINVAL;
 	}
-	spin_unlock_bh(&tpg->session_lock);
+	spin_unlock_irqrestore(&tpg->session_lock, flags);
 	/*
 	 * If the $FABRIC_MOD session for the Initiator Node ACL exists,
 	 * forcefully shutdown the $FABRIC_MOD session/nexus.
@@ -647,8 +688,8 @@
 	struct se_lun *lun;
 	u32 i;
 
-	se_tpg->tpg_lun_list = kzalloc((sizeof(struct se_lun) *
-				TRANSPORT_MAX_LUNS_PER_TPG), GFP_KERNEL);
+	se_tpg->tpg_lun_list = array_zalloc(TRANSPORT_MAX_LUNS_PER_TPG,
+			sizeof(struct se_lun), GFP_KERNEL);
 	if (!se_tpg->tpg_lun_list) {
 		pr_err("Unable to allocate struct se_portal_group->"
 				"tpg_lun_list\n");
@@ -656,7 +697,7 @@
 	}
 
 	for (i = 0; i < TRANSPORT_MAX_LUNS_PER_TPG; i++) {
-		lun = &se_tpg->tpg_lun_list[i];
+		lun = se_tpg->tpg_lun_list[i];
 		lun->unpacked_lun = i;
 		lun->lun_status = TRANSPORT_LUN_STATUS_FREE;
 		atomic_set(&lun->lun_acl_count, 0);
@@ -742,7 +783,7 @@
 		core_tpg_release_virtual_lun0(se_tpg);
 
 	se_tpg->se_tpg_fabric_ptr = NULL;
-	kfree(se_tpg->tpg_lun_list);
+	array_free(se_tpg->tpg_lun_list, TRANSPORT_MAX_LUNS_PER_TPG);
 	return 0;
 }
 EXPORT_SYMBOL(core_tpg_deregister);
@@ -763,7 +804,7 @@
 	}
 
 	spin_lock(&tpg->tpg_lun_lock);
-	lun = &tpg->tpg_lun_list[unpacked_lun];
+	lun = tpg->tpg_lun_list[unpacked_lun];
 	if (lun->lun_status == TRANSPORT_LUN_STATUS_ACTIVE) {
 		pr_err("TPG Logical Unit Number: %u is already active"
 			" on %s Target Portal Group: %u, ignoring request.\n",
@@ -821,7 +862,7 @@
 	}
 
 	spin_lock(&tpg->tpg_lun_lock);
-	lun = &tpg->tpg_lun_list[unpacked_lun];
+	lun = tpg->tpg_lun_list[unpacked_lun];
 	if (lun->lun_status != TRANSPORT_LUN_STATUS_ACTIVE) {
 		pr_err("%s Logical Unit Number: %u is not active on"
 			" Target Portal Group: %u, ignoring request.\n",
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 929cc93..443704f 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -37,6 +37,7 @@
 #include <linux/in.h>
 #include <linux/cdrom.h>
 #include <linux/module.h>
+#include <linux/ratelimit.h>
 #include <asm/unaligned.h>
 #include <net/sock.h>
 #include <net/tcp.h>
@@ -58,7 +59,6 @@
 
 static struct workqueue_struct *target_completion_wq;
 static struct kmem_cache *se_sess_cache;
-struct kmem_cache *se_tmr_req_cache;
 struct kmem_cache *se_ua_cache;
 struct kmem_cache *t10_pr_reg_cache;
 struct kmem_cache *t10_alua_lu_gp_cache;
@@ -77,26 +77,17 @@
 static void transport_put_cmd(struct se_cmd *cmd);
 static void transport_remove_cmd_from_queue(struct se_cmd *cmd);
 static int transport_set_sense_codes(struct se_cmd *cmd, u8 asc, u8 ascq);
-static void transport_generic_request_failure(struct se_cmd *);
 static void target_complete_ok_work(struct work_struct *work);
 
 int init_se_kmem_caches(void)
 {
-	se_tmr_req_cache = kmem_cache_create("se_tmr_cache",
-			sizeof(struct se_tmr_req), __alignof__(struct se_tmr_req),
-			0, NULL);
-	if (!se_tmr_req_cache) {
-		pr_err("kmem_cache_create() for struct se_tmr_req"
-				" failed\n");
-		goto out;
-	}
 	se_sess_cache = kmem_cache_create("se_sess_cache",
 			sizeof(struct se_session), __alignof__(struct se_session),
 			0, NULL);
 	if (!se_sess_cache) {
 		pr_err("kmem_cache_create() for struct se_session"
 				" failed\n");
-		goto out_free_tmr_req_cache;
+		goto out;
 	}
 	se_ua_cache = kmem_cache_create("se_ua_cache",
 			sizeof(struct se_ua), __alignof__(struct se_ua),
@@ -169,8 +160,6 @@
 	kmem_cache_destroy(se_ua_cache);
 out_free_sess_cache:
 	kmem_cache_destroy(se_sess_cache);
-out_free_tmr_req_cache:
-	kmem_cache_destroy(se_tmr_req_cache);
 out:
 	return -ENOMEM;
 }
@@ -178,7 +167,6 @@
 void release_se_kmem_caches(void)
 {
 	destroy_workqueue(target_completion_wq);
-	kmem_cache_destroy(se_tmr_req_cache);
 	kmem_cache_destroy(se_sess_cache);
 	kmem_cache_destroy(se_ua_cache);
 	kmem_cache_destroy(t10_pr_reg_cache);
@@ -258,13 +246,14 @@
 	INIT_LIST_HEAD(&se_sess->sess_cmd_list);
 	INIT_LIST_HEAD(&se_sess->sess_wait_list);
 	spin_lock_init(&se_sess->sess_cmd_lock);
+	kref_init(&se_sess->sess_kref);
 
 	return se_sess;
 }
 EXPORT_SYMBOL(transport_init_session);
 
 /*
- * Called with spin_lock_bh(&struct se_portal_group->session_lock called.
+ * Called with spin_lock_irqsave(&struct se_portal_group->session_lock called.
  */
 void __transport_register_session(
 	struct se_portal_group *se_tpg,
@@ -293,6 +282,8 @@
 					&buf[0], PR_REG_ISID_LEN);
 			se_sess->sess_bin_isid = get_unaligned_be64(&buf[0]);
 		}
+		kref_get(&se_nacl->acl_kref);
+
 		spin_lock_irq(&se_nacl->nacl_sess_lock);
 		/*
 		 * The se_nacl->nacl_sess pointer will be set to the
@@ -317,12 +308,48 @@
 	struct se_session *se_sess,
 	void *fabric_sess_ptr)
 {
-	spin_lock_bh(&se_tpg->session_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&se_tpg->session_lock, flags);
 	__transport_register_session(se_tpg, se_nacl, se_sess, fabric_sess_ptr);
-	spin_unlock_bh(&se_tpg->session_lock);
+	spin_unlock_irqrestore(&se_tpg->session_lock, flags);
 }
 EXPORT_SYMBOL(transport_register_session);
 
+static void target_release_session(struct kref *kref)
+{
+	struct se_session *se_sess = container_of(kref,
+			struct se_session, sess_kref);
+	struct se_portal_group *se_tpg = se_sess->se_tpg;
+
+	se_tpg->se_tpg_tfo->close_session(se_sess);
+}
+
+void target_get_session(struct se_session *se_sess)
+{
+	kref_get(&se_sess->sess_kref);
+}
+EXPORT_SYMBOL(target_get_session);
+
+int target_put_session(struct se_session *se_sess)
+{
+	return kref_put(&se_sess->sess_kref, target_release_session);
+}
+EXPORT_SYMBOL(target_put_session);
+
+static void target_complete_nacl(struct kref *kref)
+{
+	struct se_node_acl *nacl = container_of(kref,
+				struct se_node_acl, acl_kref);
+
+	complete(&nacl->acl_free_comp);
+}
+
+void target_put_nacl(struct se_node_acl *nacl)
+{
+	kref_put(&nacl->acl_kref, target_complete_nacl);
+}
+
 void transport_deregister_session_configfs(struct se_session *se_sess)
 {
 	struct se_node_acl *se_nacl;
@@ -333,7 +360,8 @@
 	se_nacl = se_sess->se_node_acl;
 	if (se_nacl) {
 		spin_lock_irqsave(&se_nacl->nacl_sess_lock, flags);
-		list_del(&se_sess->sess_acl_list);
+		if (se_nacl->acl_stop == 0)
+			list_del(&se_sess->sess_acl_list);
 		/*
 		 * If the session list is empty, then clear the pointer.
 		 * Otherwise, set the struct se_session pointer from the tail
@@ -360,13 +388,16 @@
 void transport_deregister_session(struct se_session *se_sess)
 {
 	struct se_portal_group *se_tpg = se_sess->se_tpg;
+	struct target_core_fabric_ops *se_tfo;
 	struct se_node_acl *se_nacl;
 	unsigned long flags;
+	bool comp_nacl = true;
 
 	if (!se_tpg) {
 		transport_free_session(se_sess);
 		return;
 	}
+	se_tfo = se_tpg->se_tpg_tfo;
 
 	spin_lock_irqsave(&se_tpg->session_lock, flags);
 	list_del(&se_sess->sess_list);
@@ -379,29 +410,34 @@
 	 * struct se_node_acl if it had been previously dynamically generated.
 	 */
 	se_nacl = se_sess->se_node_acl;
-	if (se_nacl) {
-		spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
-		if (se_nacl->dynamic_node_acl) {
-			if (!se_tpg->se_tpg_tfo->tpg_check_demo_mode_cache(
-					se_tpg)) {
-				list_del(&se_nacl->acl_list);
-				se_tpg->num_node_acls--;
-				spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
 
-				core_tpg_wait_for_nacl_pr_ref(se_nacl);
-				core_free_device_list_for_node(se_nacl, se_tpg);
-				se_tpg->se_tpg_tfo->tpg_release_fabric_acl(se_tpg,
-						se_nacl);
-				spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
-			}
+	spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
+	if (se_nacl && se_nacl->dynamic_node_acl) {
+		if (!se_tfo->tpg_check_demo_mode_cache(se_tpg)) {
+			list_del(&se_nacl->acl_list);
+			se_tpg->num_node_acls--;
+			spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
+			core_tpg_wait_for_nacl_pr_ref(se_nacl);
+			core_free_device_list_for_node(se_nacl, se_tpg);
+			se_tfo->tpg_release_fabric_acl(se_tpg, se_nacl);
+
+			comp_nacl = false;
+			spin_lock_irqsave(&se_tpg->acl_node_lock, flags);
 		}
-		spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
 	}
-
-	transport_free_session(se_sess);
+	spin_unlock_irqrestore(&se_tpg->acl_node_lock, flags);
 
 	pr_debug("TARGET_CORE[%s]: Deregistered fabric_sess\n",
 		se_tpg->se_tpg_tfo->get_fabric_name());
+	/*
+	 * If last kref is dropping now for an explict NodeACL, awake sleeping
+	 * ->acl_free_comp caller to wakeup configfs se_node_acl->acl_group
+	 * removal context.
+	 */
+	if (se_nacl && comp_nacl == true)
+		target_put_nacl(se_nacl);
+
+	transport_free_session(se_sess);
 }
 EXPORT_SYMBOL(transport_deregister_session);
 
@@ -437,7 +473,7 @@
 
 /*	transport_cmd_check_stop():
  *
- *	'transport_off = 1' determines if t_transport_active should be cleared.
+ *	'transport_off = 1' determines if CMD_T_ACTIVE should be cleared.
  *	'transport_off = 2' determines if task_dev_state should be removed.
  *
  *	A non-zero u8 t_state sets cmd->t_state.
@@ -455,12 +491,11 @@
 	 * Determine if IOCTL context caller in requesting the stopping of this
 	 * command for LUN shutdown purposes.
 	 */
-	if (atomic_read(&cmd->transport_lun_stop)) {
-		pr_debug("%s:%d atomic_read(&cmd->transport_lun_stop)"
-			" == TRUE for ITT: 0x%08x\n", __func__, __LINE__,
-			cmd->se_tfo->get_task_tag(cmd));
+	if (cmd->transport_state & CMD_T_LUN_STOP) {
+		pr_debug("%s:%d CMD_T_LUN_STOP for ITT: 0x%08x\n",
+			__func__, __LINE__, cmd->se_tfo->get_task_tag(cmd));
 
-		atomic_set(&cmd->t_transport_active, 0);
+		cmd->transport_state &= ~CMD_T_ACTIVE;
 		if (transport_off == 2)
 			transport_all_task_dev_remove_state(cmd);
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
@@ -472,9 +507,9 @@
 	 * Determine if frontend context caller is requesting the stopping of
 	 * this command for frontend exceptions.
 	 */
-	if (atomic_read(&cmd->t_transport_stop)) {
-		pr_debug("%s:%d atomic_read(&cmd->t_transport_stop) =="
-			" TRUE for ITT: 0x%08x\n", __func__, __LINE__,
+	if (cmd->transport_state & CMD_T_STOP) {
+		pr_debug("%s:%d CMD_T_STOP for ITT: 0x%08x\n",
+			__func__, __LINE__,
 			cmd->se_tfo->get_task_tag(cmd));
 
 		if (transport_off == 2)
@@ -492,7 +527,7 @@
 		return 1;
 	}
 	if (transport_off) {
-		atomic_set(&cmd->t_transport_active, 0);
+		cmd->transport_state &= ~CMD_T_ACTIVE;
 		if (transport_off == 2) {
 			transport_all_task_dev_remove_state(cmd);
 			/*
@@ -540,31 +575,21 @@
 		return;
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	if (!atomic_read(&cmd->transport_dev_active)) {
-		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-		goto check_lun;
+	if (cmd->transport_state & CMD_T_DEV_ACTIVE) {
+		cmd->transport_state &= ~CMD_T_DEV_ACTIVE;
+		transport_all_task_dev_remove_state(cmd);
 	}
-	atomic_set(&cmd->transport_dev_active, 0);
-	transport_all_task_dev_remove_state(cmd);
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
-
-check_lun:
 	spin_lock_irqsave(&lun->lun_cmd_lock, flags);
-	if (atomic_read(&cmd->transport_lun_active)) {
-		list_del(&cmd->se_lun_node);
-		atomic_set(&cmd->transport_lun_active, 0);
-#if 0
-		pr_debug("Removed ITT: 0x%08x from LUN LIST[%d]\n"
-			cmd->se_tfo->get_task_tag(cmd), lun->unpacked_lun);
-#endif
-	}
+	if (!list_empty(&cmd->se_lun_node))
+		list_del_init(&cmd->se_lun_node);
 	spin_unlock_irqrestore(&lun->lun_cmd_lock, flags);
 }
 
 void transport_cmd_finish_abort(struct se_cmd *cmd, int remove)
 {
-	if (!cmd->se_tmr_req)
+	if (!(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB))
 		transport_lun_remove_cmd(cmd);
 
 	if (transport_cmd_check_stop_to_fabric(cmd))
@@ -585,7 +610,7 @@
 	if (t_state) {
 		spin_lock_irqsave(&cmd->t_state_lock, flags);
 		cmd->t_state = t_state;
-		atomic_set(&cmd->t_transport_active, 1);
+		cmd->transport_state |= CMD_T_ACTIVE;
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 	}
 
@@ -601,7 +626,7 @@
 		list_add(&cmd->se_queue_node, &qobj->qobj_list);
 	else
 		list_add_tail(&cmd->se_queue_node, &qobj->qobj_list);
-	atomic_set(&cmd->t_transport_queue_active, 1);
+	cmd->transport_state |= CMD_T_QUEUED;
 	spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
 
 	wake_up_interruptible(&qobj->thread_wq);
@@ -620,8 +645,7 @@
 	}
 	cmd = list_first_entry(&qobj->qobj_list, struct se_cmd, se_queue_node);
 
-	atomic_set(&cmd->t_transport_queue_active, 0);
-
+	cmd->transport_state &= ~CMD_T_QUEUED;
 	list_del_init(&cmd->se_queue_node);
 	atomic_dec(&qobj->queue_cnt);
 	spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
@@ -635,20 +659,14 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&qobj->cmd_queue_lock, flags);
-	if (!atomic_read(&cmd->t_transport_queue_active)) {
+	if (!(cmd->transport_state & CMD_T_QUEUED)) {
 		spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
 		return;
 	}
-	atomic_set(&cmd->t_transport_queue_active, 0);
+	cmd->transport_state &= ~CMD_T_QUEUED;
 	atomic_dec(&qobj->queue_cnt);
 	list_del_init(&cmd->se_queue_node);
 	spin_unlock_irqrestore(&qobj->cmd_queue_lock, flags);
-
-	if (atomic_read(&cmd->t_transport_queue_active)) {
-		pr_err("ITT: 0x%08x t_transport_queue_active: %d\n",
-			cmd->se_tfo->get_task_tag(cmd),
-			atomic_read(&cmd->t_transport_queue_active));
-	}
 }
 
 /*
@@ -719,7 +737,7 @@
 	}
 
 	if (!success)
-		cmd->t_tasks_failed = 1;
+		cmd->transport_state |= CMD_T_FAILED;
 
 	/*
 	 * Decrement the outstanding t_task_cdbs_left count.  The last
@@ -730,17 +748,24 @@
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 		return;
 	}
-
-	if (cmd->t_tasks_failed) {
+	/*
+	 * Check for case where an explict ABORT_TASK has been received
+	 * and transport_wait_for_tasks() will be waiting for completion..
+	 */
+	if (cmd->transport_state & CMD_T_ABORTED &&
+	    cmd->transport_state & CMD_T_STOP) {
+		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
+		complete(&cmd->t_transport_stop_comp);
+		return;
+	} else if (cmd->transport_state & CMD_T_FAILED) {
 		cmd->scsi_sense_reason = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		INIT_WORK(&cmd->work, target_complete_failure_work);
 	} else {
-		atomic_set(&cmd->t_transport_complete, 1);
 		INIT_WORK(&cmd->work, target_complete_ok_work);
 	}
 
 	cmd->t_state = TRANSPORT_COMPLETE;
-	atomic_set(&cmd->t_transport_active, 1);
+	cmd->transport_state |= (CMD_T_COMPLETE | CMD_T_ACTIVE);
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
 	queue_work(target_completion_wq, &cmd->work);
@@ -1488,7 +1513,7 @@
 	init_completion(&cmd->t_transport_stop_comp);
 	init_completion(&cmd->cmd_wait_comp);
 	spin_lock_init(&cmd->t_state_lock);
-	atomic_set(&cmd->transport_dev_active, 1);
+	cmd->transport_state = CMD_T_DEV_ACTIVE;
 
 	cmd->se_tfo = tfo;
 	cmd->se_sess = se_sess;
@@ -1618,7 +1643,7 @@
 		return -EINVAL;
 	}
 	/*
-	 * Set TRANSPORT_NEW_CMD state and cmd->t_transport_active=1 following
+	 * Set TRANSPORT_NEW_CMD state and CMD_T_ACTIVE following
 	 * transport_generic_handle_cdb*() -> transport_add_cmd_to_queue()
 	 * in existing usage to ensure that outstanding descriptors are handled
 	 * correctly during shutdown via transport_wait_for_tasks()
@@ -1627,7 +1652,8 @@
 	 * this to be called for initial descriptor submission.
 	 */
 	cmd->t_state = TRANSPORT_NEW_CMD;
-	atomic_set(&cmd->t_transport_active, 1);
+	cmd->transport_state |= CMD_T_ACTIVE;
+
 	/*
 	 * transport_generic_new_cmd() is already handling QUEUE_FULL,
 	 * so follow TRANSPORT_NEW_CMD processing thread context usage
@@ -1716,6 +1742,74 @@
 }
 EXPORT_SYMBOL(target_submit_cmd);
 
+static void target_complete_tmr_failure(struct work_struct *work)
+{
+	struct se_cmd *se_cmd = container_of(work, struct se_cmd, work);
+
+	se_cmd->se_tmr_req->response = TMR_LUN_DOES_NOT_EXIST;
+	se_cmd->se_tfo->queue_tm_rsp(se_cmd);
+	transport_generic_free_cmd(se_cmd, 0);
+}
+
+/**
+ * target_submit_tmr - lookup unpacked lun and submit uninitialized se_cmd
+ *                     for TMR CDBs
+ *
+ * @se_cmd: command descriptor to submit
+ * @se_sess: associated se_sess for endpoint
+ * @sense: pointer to SCSI sense buffer
+ * @unpacked_lun: unpacked LUN to reference for struct se_lun
+ * @fabric_context: fabric context for TMR req
+ * @tm_type: Type of TM request
+ * @gfp: gfp type for caller
+ * @tag: referenced task tag for TMR_ABORT_TASK
+ * @flags: submit cmd flags
+ *
+ * Callable from all contexts.
+ **/
+
+int target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess,
+		unsigned char *sense, u32 unpacked_lun,
+		void *fabric_tmr_ptr, unsigned char tm_type,
+		gfp_t gfp, unsigned int tag, int flags)
+{
+	struct se_portal_group *se_tpg;
+	int ret;
+
+	se_tpg = se_sess->se_tpg;
+	BUG_ON(!se_tpg);
+
+	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess,
+			      0, DMA_NONE, MSG_SIMPLE_TAG, sense);
+	/*
+	 * FIXME: Currently expect caller to handle se_cmd->se_tmr_req
+	 * allocation failure.
+	 */
+	ret = core_tmr_alloc_req(se_cmd, fabric_tmr_ptr, tm_type, gfp);
+	if (ret < 0)
+		return -ENOMEM;
+
+	if (tm_type == TMR_ABORT_TASK)
+		se_cmd->se_tmr_req->ref_task_tag = tag;
+
+	/* See target_submit_cmd for commentary */
+	target_get_sess_cmd(se_sess, se_cmd, (flags & TARGET_SCF_ACK_KREF));
+
+	ret = transport_lookup_tmr_lun(se_cmd, unpacked_lun);
+	if (ret) {
+		/*
+		 * For callback during failure handling, push this work off
+		 * to process context with TMR_LUN_DOES_NOT_EXIST status.
+		 */
+		INIT_WORK(&se_cmd->work, target_complete_tmr_failure);
+		schedule_work(&se_cmd->work);
+		return 0;
+	}
+	transport_generic_handle_tmr(se_cmd);
+	return 0;
+}
+EXPORT_SYMBOL(target_submit_tmr);
+
 /*
  * Used by fabric module frontends defining a TFO->new_cmd_map() caller
  * to  queue up a newly setup se_cmd w/ TRANSPORT_NEW_CMD_MAP in order to
@@ -1847,7 +1941,7 @@
 /*
  * Handle SAM-esque emulation for generic transport request failures.
  */
-static void transport_generic_request_failure(struct se_cmd *cmd)
+void transport_generic_request_failure(struct se_cmd *cmd)
 {
 	int ret = 0;
 
@@ -1859,14 +1953,14 @@
 		cmd->t_state, cmd->scsi_sense_reason);
 	pr_debug("-----[ t_tasks: %d t_task_cdbs_left: %d"
 		" t_task_cdbs_sent: %d t_task_cdbs_ex_left: %d --"
-		" t_transport_active: %d t_transport_stop: %d"
-		" t_transport_sent: %d\n", cmd->t_task_list_num,
+		" CMD_T_ACTIVE: %d CMD_T_STOP: %d CMD_T_SENT: %d\n",
+		cmd->t_task_list_num,
 		atomic_read(&cmd->t_task_cdbs_left),
 		atomic_read(&cmd->t_task_cdbs_sent),
 		atomic_read(&cmd->t_task_cdbs_ex_left),
-		atomic_read(&cmd->t_transport_active),
-		atomic_read(&cmd->t_transport_stop),
-		atomic_read(&cmd->t_transport_sent));
+		(cmd->transport_state & CMD_T_ACTIVE) != 0,
+		(cmd->transport_state & CMD_T_STOP) != 0,
+		(cmd->transport_state & CMD_T_SENT) != 0);
 
 	/*
 	 * For SAM Task Attribute emulation for failed struct se_cmd
@@ -1939,6 +2033,7 @@
 	cmd->t_state = TRANSPORT_COMPLETE_QF_OK;
 	transport_handle_queue_full(cmd, cmd->se_dev);
 }
+EXPORT_SYMBOL(transport_generic_request_failure);
 
 static inline u32 transport_lba_21(unsigned char *cdb)
 {
@@ -2125,7 +2220,7 @@
 
 	if (atomic_read(&cmd->t_task_cdbs_sent) ==
 	    cmd->t_task_list_num)
-		atomic_set(&cmd->t_transport_sent, 1);
+		cmd->transport_state |= CMD_T_SENT;
 
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
@@ -2136,8 +2231,9 @@
 	if (error != 0) {
 		spin_lock_irqsave(&cmd->t_state_lock, flags);
 		task->task_flags &= ~TF_ACTIVE;
+		cmd->transport_state &= ~CMD_T_SENT;
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
-		atomic_set(&cmd->t_transport_sent, 0);
+
 		transport_stop_tasks_for_cmd(cmd);
 		transport_generic_request_failure(cmd);
 	}
@@ -2847,7 +2943,7 @@
 
 			pr_err("Unsupported SA: 0x%02x\n",
 				cmd->t_task_cdb[1] & 0x1f);
-			goto out_unsupported_cdb;
+			goto out_invalid_cdb_field;
 		}
 		/*FALLTHROUGH*/
 	case ACCESS_CONTROL_IN:
@@ -2929,7 +3025,7 @@
 		cmd->se_cmd_flags |= SCF_SCSI_NON_DATA_CDB;
 		break;
 	case SYNCHRONIZE_CACHE:
-	case 0x91: /* SYNCHRONIZE_CACHE_16: */
+	case SYNCHRONIZE_CACHE_16:
 		/*
 		 * Extract LBA and range to be flushed for emulated SYNCHRONIZE_CACHE
 		 */
@@ -3081,6 +3177,13 @@
 		cmd->data_length = size;
 	}
 
+	if (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB &&
+	    sectors > dev->se_sub_dev->se_dev_attrib.fabric_max_sectors) {
+		printk_ratelimited(KERN_ERR "SCSI OP %02xh with too big sectors %u\n",
+				   cdb[0], sectors);
+		goto out_invalid_cdb_field;
+	}
+
 	/* reject any command that we don't have a handler for */
 	if (!(passthrough || cmd->execute_task ||
 	     (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB)))
@@ -3384,7 +3487,7 @@
 {
 	BUG_ON(!cmd->se_tfo);
 
-	if (cmd->se_tmr_req)
+	if (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)
 		core_tmr_release_req(cmd->se_tmr_req);
 	if (cmd->t_task_cdb != cmd->__t_task_cdb)
 		kfree(cmd->t_task_cdb);
@@ -3421,8 +3524,8 @@
 			goto out_busy;
 	}
 
-	if (atomic_read(&cmd->transport_dev_active)) {
-		atomic_set(&cmd->transport_dev_active, 0);
+	if (cmd->transport_state & CMD_T_DEV_ACTIVE) {
+		cmd->transport_state &= ~CMD_T_DEV_ACTIVE;
 		transport_all_task_dev_remove_state(cmd);
 		free_tasks = 1;
 	}
@@ -3527,10 +3630,12 @@
 
 void transport_kunmap_data_sg(struct se_cmd *cmd)
 {
-	if (!cmd->t_data_nents)
+	if (!cmd->t_data_nents) {
 		return;
-	else if (cmd->t_data_nents == 1)
+	} else if (cmd->t_data_nents == 1) {
 		kunmap(sg_page(cmd->t_data_sg));
+		return;
+	}
 
 	vunmap(cmd->t_data_vmap);
 	cmd->t_data_vmap = NULL;
@@ -3860,8 +3965,10 @@
 	if (task_cdbs < 0)
 		goto out_fail;
 	else if (!task_cdbs && (cmd->se_cmd_flags & SCF_SCSI_DATA_SG_IO_CDB)) {
+		spin_lock_irq(&cmd->t_state_lock);
 		cmd->t_state = TRANSPORT_COMPLETE;
-		atomic_set(&cmd->t_transport_active, 1);
+		cmd->transport_state |= CMD_T_ACTIVE;
+		spin_unlock_irq(&cmd->t_state_lock);
 
 		if (cmd->t_task_cdb[0] == REQUEST_SENSE) {
 			u8 ua_asc = 0, ua_ascq = 0;
@@ -3942,9 +4049,9 @@
 
 	/*
 	 * Clear the se_cmd for WRITE_PENDING status in order to set
-	 * cmd->t_transport_active=0 so that transport_generic_handle_data
-	 * can be called from HW target mode interrupt code.  This is safe
-	 * to be called with transport_off=1 before the cmd->se_tfo->write_pending
+	 * CMD_T_ACTIVE so that transport_generic_handle_data can be called
+	 * from HW target mode interrupt code.  This is safe to be called
+	 * with transport_off=1 before the cmd->se_tfo->write_pending
 	 * because the se_cmd->se_lun pointer is not being cleared.
 	 */
 	transport_cmd_check_stop(cmd, 1, 0);
@@ -3971,7 +4078,7 @@
 void transport_generic_free_cmd(struct se_cmd *cmd, int wait_for_tasks)
 {
 	if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD)) {
-		if (wait_for_tasks && cmd->se_tmr_req)
+		if (wait_for_tasks && (cmd->se_cmd_flags & SCF_SCSI_TMR_CDB))
 			 transport_wait_for_tasks(cmd);
 
 		transport_release_cmd(cmd);
@@ -4007,8 +4114,10 @@
 	 * fabric acknowledgement that requires two target_put_sess_cmd()
 	 * invocations before se_cmd descriptor release.
 	 */
-	if (ack_kref == true)
+	if (ack_kref == true) {
 		kref_get(&se_cmd->cmd_kref);
+		se_cmd->se_cmd_flags |= SCF_ACK_KREF;
+	}
 
 	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 	list_add_tail(&se_cmd->se_cmd_list, &se_sess->sess_cmd_list);
@@ -4026,7 +4135,7 @@
 	spin_lock_irqsave(&se_sess->sess_cmd_lock, flags);
 	if (list_empty(&se_cmd->se_cmd_list)) {
 		spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags);
-		WARN_ON(1);
+		se_cmd->se_tfo->release_cmd(se_cmd);
 		return;
 	}
 	if (se_sess->sess_tearing_down && se_cmd->cmd_wait_set) {
@@ -4130,15 +4239,16 @@
 	 * be stopped, we can safely ignore this struct se_cmd.
 	 */
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	if (atomic_read(&cmd->t_transport_stop)) {
-		atomic_set(&cmd->transport_lun_stop, 0);
-		pr_debug("ConfigFS ITT[0x%08x] - t_transport_stop =="
-			" TRUE, skipping\n", cmd->se_tfo->get_task_tag(cmd));
+	if (cmd->transport_state & CMD_T_STOP) {
+		cmd->transport_state &= ~CMD_T_LUN_STOP;
+
+		pr_debug("ConfigFS ITT[0x%08x] - CMD_T_STOP, skipping\n",
+			 cmd->se_tfo->get_task_tag(cmd));
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 		transport_cmd_check_stop(cmd, 1, 0);
 		return -EPERM;
 	}
-	atomic_set(&cmd->transport_lun_fe_stop, 1);
+	cmd->transport_state |= CMD_T_LUN_FE_STOP;
 	spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 
 	wake_up_interruptible(&cmd->se_dev->dev_queue_obj.thread_wq);
@@ -4171,9 +4281,8 @@
 	while (!list_empty(&lun->lun_cmd_list)) {
 		cmd = list_first_entry(&lun->lun_cmd_list,
 		       struct se_cmd, se_lun_node);
-		list_del(&cmd->se_lun_node);
+		list_del_init(&cmd->se_lun_node);
 
-		atomic_set(&cmd->transport_lun_active, 0);
 		/*
 		 * This will notify iscsi_target_transport.c:
 		 * transport_cmd_check_stop() that a LUN shutdown is in
@@ -4184,7 +4293,7 @@
 			"_lun_stop for  ITT: 0x%08x\n",
 			cmd->se_lun->unpacked_lun,
 			cmd->se_tfo->get_task_tag(cmd));
-		atomic_set(&cmd->transport_lun_stop, 1);
+		cmd->transport_state |= CMD_T_LUN_STOP;
 		spin_unlock(&cmd->t_state_lock);
 
 		spin_unlock_irqrestore(&lun->lun_cmd_lock, lun_flags);
@@ -4214,11 +4323,11 @@
 			cmd->se_tfo->get_task_tag(cmd));
 
 		spin_lock_irqsave(&cmd->t_state_lock, cmd_flags);
-		if (!atomic_read(&cmd->transport_dev_active)) {
+		if (!(cmd->transport_state & CMD_T_DEV_ACTIVE)) {
 			spin_unlock_irqrestore(&cmd->t_state_lock, cmd_flags);
 			goto check_cond;
 		}
-		atomic_set(&cmd->transport_dev_active, 0);
+		cmd->transport_state &= ~CMD_T_DEV_ACTIVE;
 		transport_all_task_dev_remove_state(cmd);
 		spin_unlock_irqrestore(&cmd->t_state_lock, cmd_flags);
 
@@ -4238,7 +4347,7 @@
 		 * finished accessing it.
 		 */
 		spin_lock_irqsave(&cmd->t_state_lock, cmd_flags);
-		if (atomic_read(&cmd->transport_lun_fe_stop)) {
+		if (cmd->transport_state & CMD_T_LUN_FE_STOP) {
 			pr_debug("SE_LUN[%d] - Detected FE stop for"
 				" struct se_cmd: %p ITT: 0x%08x\n",
 				lun->unpacked_lun,
@@ -4297,7 +4406,8 @@
 	unsigned long flags;
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD) && !(cmd->se_tmr_req)) {
+	if (!(cmd->se_cmd_flags & SCF_SE_LUN_CMD) &&
+	    !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) {
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 		return false;
 	}
@@ -4305,7 +4415,8 @@
 	 * Only perform a possible wait_for_tasks if SCF_SUPPORTED_SAM_OPCODE
 	 * has been set in transport_set_supported_SAM_opcode().
 	 */
-	if (!(cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) && !cmd->se_tmr_req) {
+	if (!(cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) &&
+	    !(cmd->se_cmd_flags & SCF_SCSI_TMR_CDB)) {
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 		return false;
 	}
@@ -4316,8 +4427,7 @@
 	 * transport_clear_lun_from_sessions() once the ConfigFS context caller
 	 * has completed its operation on the struct se_cmd.
 	 */
-	if (atomic_read(&cmd->transport_lun_stop)) {
-
+	if (cmd->transport_state & CMD_T_LUN_STOP) {
 		pr_debug("wait_for_tasks: Stopping"
 			" wait_for_completion(&cmd->t_tasktransport_lun_fe"
 			"_stop_comp); for ITT: 0x%08x\n",
@@ -4345,18 +4455,18 @@
 			"stop_comp); for ITT: 0x%08x\n",
 			cmd->se_tfo->get_task_tag(cmd));
 
-		atomic_set(&cmd->transport_lun_stop, 0);
+		cmd->transport_state &= ~CMD_T_LUN_STOP;
 	}
-	if (!atomic_read(&cmd->t_transport_active) ||
-	     atomic_read(&cmd->t_transport_aborted)) {
+
+	if (!(cmd->transport_state & CMD_T_ACTIVE)) {
 		spin_unlock_irqrestore(&cmd->t_state_lock, flags);
 		return false;
 	}
 
-	atomic_set(&cmd->t_transport_stop, 1);
+	cmd->transport_state |= CMD_T_STOP;
 
 	pr_debug("wait_for_tasks: Stopping %p ITT: 0x%08x"
-		" i_state: %d, t_state: %d, t_transport_stop = TRUE\n",
+		" i_state: %d, t_state: %d, CMD_T_STOP\n",
 		cmd, cmd->se_tfo->get_task_tag(cmd),
 		cmd->se_tfo->get_cmd_state(cmd), cmd->t_state);
 
@@ -4367,8 +4477,7 @@
 	wait_for_completion(&cmd->t_transport_stop_comp);
 
 	spin_lock_irqsave(&cmd->t_state_lock, flags);
-	atomic_set(&cmd->t_transport_active, 0);
-	atomic_set(&cmd->t_transport_stop, 0);
+	cmd->transport_state &= ~(CMD_T_ACTIVE | CMD_T_STOP);
 
 	pr_debug("wait_for_tasks: Stopped wait_for_compltion("
 		"&cmd->t_transport_stop_comp) for ITT: 0x%08x\n",
@@ -4597,7 +4706,7 @@
 {
 	int ret = 0;
 
-	if (atomic_read(&cmd->t_transport_aborted) != 0) {
+	if (cmd->transport_state & CMD_T_ABORTED) {
 		if (!send_status ||
 		     (cmd->se_cmd_flags & SCF_SENT_DELAYED_TAS))
 			return 1;
@@ -4634,7 +4743,7 @@
 	 */
 	if (cmd->data_direction == DMA_TO_DEVICE) {
 		if (cmd->se_tfo->write_pending_status(cmd) != 0) {
-			atomic_inc(&cmd->t_transport_aborted);
+			cmd->transport_state |= CMD_T_ABORTED;
 			smp_mb__after_atomic_inc();
 		}
 	}
@@ -4655,7 +4764,7 @@
 
 	switch (tmr->function) {
 	case TMR_ABORT_TASK:
-		tmr->response = TMR_FUNCTION_REJECTED;
+		core_tmr_abort_task(dev, tmr, cmd->se_sess);
 		break;
 	case TMR_ABORT_TASK_SET:
 	case TMR_CLEAR_ACA:
diff --git a/drivers/target/target_core_ua.c b/drivers/target/target_core_ua.c
index 3e12f6b..6666a0c 100644
--- a/drivers/target/target_core_ua.c
+++ b/drivers/target/target_core_ua.c
@@ -53,7 +53,7 @@
 	if (!nacl)
 		return 0;
 
-	deve = &nacl->device_list[cmd->orig_fe_lun];
+	deve = nacl->device_list[cmd->orig_fe_lun];
 	if (!atomic_read(&deve->ua_count))
 		return 0;
 	/*
@@ -110,7 +110,7 @@
 	ua->ua_ascq = ascq;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[unpacked_lun];
+	deve = nacl->device_list[unpacked_lun];
 
 	spin_lock(&deve->ua_lock);
 	list_for_each_entry_safe(ua_p, ua_tmp, &deve->ua_list, ua_nacl_list) {
@@ -220,7 +220,7 @@
 		return;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[cmd->orig_fe_lun];
+	deve = nacl->device_list[cmd->orig_fe_lun];
 	if (!atomic_read(&deve->ua_count)) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return;
@@ -289,7 +289,7 @@
 		return -EINVAL;
 
 	spin_lock_irq(&nacl->device_list_lock);
-	deve = &nacl->device_list[cmd->orig_fe_lun];
+	deve = nacl->device_list[cmd->orig_fe_lun];
 	if (!atomic_read(&deve->ua_count)) {
 		spin_unlock_irq(&nacl->device_list_lock);
 		return -EPERM;
diff --git a/drivers/target/tcm_fc/tcm_fc.h b/drivers/target/tcm_fc/tcm_fc.h
index e05c551..8306579 100644
--- a/drivers/target/tcm_fc/tcm_fc.h
+++ b/drivers/target/tcm_fc/tcm_fc.h
@@ -17,7 +17,7 @@
 #ifndef __TCM_FC_H__
 #define __TCM_FC_H__
 
-#define FT_VERSION "0.3"
+#define FT_VERSION "0.4"
 
 #define FT_NAMELEN 32		/* length of ASCII WWPNs including pad */
 #define FT_TPG_NAMELEN 32	/* max length of TPG name */
@@ -113,12 +113,10 @@
  * Commands
  */
 struct ft_cmd {
-	u32 lun;                        /* LUN from request */
 	struct ft_sess *sess;		/* session held for cmd */
 	struct fc_seq *seq;		/* sequence in exchange mgr */
 	struct se_cmd se_cmd;		/* Local TCM I/O descriptor */
 	struct fc_frame *req_frame;
-	unsigned char *cdb;		/* pointer to CDB inside frame */
 	u32 write_data_len;		/* data received on writes */
 	struct work_struct work;
 	/* Local sense buffer */
@@ -143,11 +141,8 @@
 void ft_sess_put(struct ft_sess *);
 int ft_sess_shutdown(struct se_session *);
 void ft_sess_close(struct se_session *);
-void ft_sess_stop(struct se_session *, int, int);
-int ft_sess_logged_in(struct se_session *);
 u32 ft_sess_get_index(struct se_session *);
 u32 ft_sess_get_port_name(struct se_session *, unsigned char *, u32);
-void ft_sess_set_erl0(struct se_session *);
 
 void ft_lport_add(struct fc_lport *, void *);
 void ft_lport_del(struct fc_lport *, void *);
@@ -165,7 +160,6 @@
 u32 ft_get_task_tag(struct se_cmd *);
 int ft_get_cmd_state(struct se_cmd *);
 int ft_queue_tm_resp(struct se_cmd *);
-int ft_is_state_remove(struct se_cmd *);
 
 /*
  * other internal functions.
diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index 9e7e26c..62dec97 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c
@@ -59,9 +59,6 @@
 	se_cmd = &cmd->se_cmd;
 	pr_debug("%s: cmd %p sess %p seq %p se_cmd %p\n",
 		caller, cmd, cmd->sess, cmd->seq, se_cmd);
-	pr_debug("%s: cmd %p cdb %p\n",
-		caller, cmd, cmd->cdb);
-	pr_debug("%s: cmd %p lun %d\n", caller, cmd, cmd->lun);
 
 	pr_debug("%s: cmd %p data_nents %u len %u se_cmd_flags <0x%x>\n",
 		caller, cmd, se_cmd->t_data_nents,
@@ -81,8 +78,6 @@
 			caller, cmd, ep->sid, ep->did, ep->oxid, ep->rxid,
 			sp->id, ep->esb_stat);
 	}
-	print_hex_dump(KERN_INFO, "ft_dump_cmd ", DUMP_PREFIX_NONE,
-		16, 4, cmd->cdb, MAX_COMMAND_SIZE, 0);
 }
 
 static void ft_free_cmd(struct ft_cmd *cmd)
@@ -249,11 +244,6 @@
 	return 0;
 }
 
-int ft_is_state_remove(struct se_cmd *se_cmd)
-{
-	return 0;	/* XXX TBD */
-}
-
 /*
  * FC sequence response handler for follow-on sequences (data) and aborts.
  */
@@ -325,10 +315,12 @@
 
 	fc_fill_reply_hdr(fp, rx_fp, FC_RCTL_DD_CMD_STATUS, 0);
 	sp = fr_seq(fp);
-	if (sp)
+	if (sp) {
 		lport->tt.seq_send(lport, sp, fp);
-	else
+		lport->tt.exch_done(sp);
+	} else {
 		lport->tt.frame_send(lport, fp);
+	}
 }
 
 /*
@@ -358,16 +350,10 @@
  */
 static void ft_send_tm(struct ft_cmd *cmd)
 {
-	struct se_tmr_req *tmr;
 	struct fcp_cmnd *fcp;
-	struct ft_sess *sess;
+	int rc;
 	u8 tm_func;
 
-	transport_init_se_cmd(&cmd->se_cmd, &ft_configfs->tf_ops,
-			cmd->sess->se_sess, 0, DMA_NONE, 0,
-			&cmd->ft_sense_buffer[0]);
-	target_get_sess_cmd(cmd->sess->se_sess, &cmd->se_cmd, false);
-
 	fcp = fc_frame_payload_get(cmd->req_frame, sizeof(*fcp));
 
 	switch (fcp->fc_tm_flags) {
@@ -396,44 +382,12 @@
 		return;
 	}
 
-	pr_debug("alloc tm cmd fn %d\n", tm_func);
-	tmr = core_tmr_alloc_req(&cmd->se_cmd, cmd, tm_func, GFP_KERNEL);
-	if (!tmr) {
-		pr_debug("alloc failed\n");
+	/* FIXME: Add referenced task tag for ABORT_TASK */
+	rc = target_submit_tmr(&cmd->se_cmd, cmd->sess->se_sess,
+		&cmd->ft_sense_buffer[0], scsilun_to_int(&fcp->fc_lun),
+		cmd, tm_func, GFP_KERNEL, 0, 0);
+	if (rc < 0)
 		ft_send_resp_code_and_free(cmd, FCP_TMF_FAILED);
-		return;
-	}
-	cmd->se_cmd.se_tmr_req = tmr;
-
-	switch (fcp->fc_tm_flags) {
-	case FCP_TMF_LUN_RESET:
-		cmd->lun = scsilun_to_int((struct scsi_lun *)fcp->fc_lun);
-		if (transport_lookup_tmr_lun(&cmd->se_cmd, cmd->lun) < 0) {
-			/*
-			 * Make sure to clean up newly allocated TMR request
-			 * since "unable to  handle TMR request because failed
-			 * to get to LUN"
-			 */
-			pr_debug("Failed to get LUN for TMR func %d, "
-				  "se_cmd %p, unpacked_lun %d\n",
-				  tm_func, &cmd->se_cmd, cmd->lun);
-			ft_dump_cmd(cmd, __func__);
-			sess = cmd->sess;
-			transport_send_check_condition_and_sense(&cmd->se_cmd,
-				cmd->se_cmd.scsi_sense_reason, 0);
-			ft_sess_put(sess);
-			return;
-		}
-		break;
-	case FCP_TMF_TGT_RESET:
-	case FCP_TMF_CLR_TASK_SET:
-	case FCP_TMF_ABT_TASK_SET:
-	case FCP_TMF_CLR_ACA:
-		break;
-	default:
-		return;
-	}
-	transport_generic_handle_tmr(&cmd->se_cmd);
 }
 
 /*
@@ -538,7 +492,6 @@
 	struct fc_frame_header *fh = fc_frame_header_get(cmd->req_frame);
 	struct fcp_cmnd *fcp;
 	int data_dir = 0;
-	u32 data_len;
 	int task_attr;
 
 	fcp = fc_frame_payload_get(cmd->req_frame, sizeof(*fcp));
@@ -548,47 +501,6 @@
 	if (fcp->fc_flags & FCP_CFL_LEN_MASK)
 		goto err;		/* not handling longer CDBs yet */
 
-	if (fcp->fc_tm_flags) {
-		task_attr = FCP_PTA_SIMPLE;
-		data_dir = DMA_NONE;
-		data_len = 0;
-	} else {
-		switch (fcp->fc_flags & (FCP_CFL_RDDATA | FCP_CFL_WRDATA)) {
-		case 0:
-			data_dir = DMA_NONE;
-			break;
-		case FCP_CFL_RDDATA:
-			data_dir = DMA_FROM_DEVICE;
-			break;
-		case FCP_CFL_WRDATA:
-			data_dir = DMA_TO_DEVICE;
-			break;
-		case FCP_CFL_WRDATA | FCP_CFL_RDDATA:
-			goto err;	/* TBD not supported by tcm_fc yet */
-		}
-		/*
-		 * Locate the SAM Task Attr from fc_pri_ta
-		 */
-		switch (fcp->fc_pri_ta & FCP_PTA_MASK) {
-		case FCP_PTA_HEADQ:
-			task_attr = MSG_HEAD_TAG;
-			break;
-		case FCP_PTA_ORDERED:
-			task_attr = MSG_ORDERED_TAG;
-			break;
-		case FCP_PTA_ACA:
-			task_attr = MSG_ACA_TAG;
-			break;
-		case FCP_PTA_SIMPLE: /* Fallthrough */
-		default:
-			task_attr = MSG_SIMPLE_TAG;
-		}
-
-
-		task_attr = fcp->fc_pri_ta & FCP_PTA_MASK;
-		data_len = ntohl(fcp->fc_dl);
-		cmd->cdb = fcp->fc_cdb;
-	}
 	/*
 	 * Check for FCP task management flags
 	 */
@@ -596,15 +508,46 @@
 		ft_send_tm(cmd);
 		return;
 	}
+
+	switch (fcp->fc_flags & (FCP_CFL_RDDATA | FCP_CFL_WRDATA)) {
+	case 0:
+		data_dir = DMA_NONE;
+		break;
+	case FCP_CFL_RDDATA:
+		data_dir = DMA_FROM_DEVICE;
+		break;
+	case FCP_CFL_WRDATA:
+		data_dir = DMA_TO_DEVICE;
+		break;
+	case FCP_CFL_WRDATA | FCP_CFL_RDDATA:
+		goto err;	/* TBD not supported by tcm_fc yet */
+	}
+	/*
+	 * Locate the SAM Task Attr from fc_pri_ta
+	 */
+	switch (fcp->fc_pri_ta & FCP_PTA_MASK) {
+	case FCP_PTA_HEADQ:
+		task_attr = MSG_HEAD_TAG;
+		break;
+	case FCP_PTA_ORDERED:
+		task_attr = MSG_ORDERED_TAG;
+		break;
+	case FCP_PTA_ACA:
+		task_attr = MSG_ACA_TAG;
+		break;
+	case FCP_PTA_SIMPLE: /* Fallthrough */
+	default:
+		task_attr = MSG_SIMPLE_TAG;
+	}
+
 	fc_seq_exch(cmd->seq)->lp->tt.seq_set_resp(cmd->seq, ft_recv_seq, cmd);
-	cmd->lun = scsilun_to_int((struct scsi_lun *)fcp->fc_lun);
 	/*
 	 * Use a single se_cmd->cmd_kref as we expect to release se_cmd
 	 * directly from ft_check_stop_free callback in response path.
 	 */
-	target_submit_cmd(&cmd->se_cmd, cmd->sess->se_sess, cmd->cdb,
-				&cmd->ft_sense_buffer[0], cmd->lun, data_len,
-				task_attr, data_dir, 0);
+	target_submit_cmd(&cmd->se_cmd, cmd->sess->se_sess, fcp->fc_cdb,
+			&cmd->ft_sense_buffer[0], scsilun_to_int(&fcp->fc_lun),
+			ntohl(fcp->fc_dl), task_attr, data_dir, 0);
 	pr_debug("r_ctl %x alloc target_submit_cmd\n", fh->fh_r_ctl);
 	return;
 
diff --git a/drivers/target/tcm_fc/tfc_conf.c b/drivers/target/tcm_fc/tfc_conf.c
index 73852fb..f357039 100644
--- a/drivers/target/tcm_fc/tfc_conf.c
+++ b/drivers/target/tcm_fc/tfc_conf.c
@@ -529,9 +529,6 @@
 	.release_cmd =			ft_release_cmd,
 	.shutdown_session =		ft_sess_shutdown,
 	.close_session =		ft_sess_close,
-	.stop_session =			ft_sess_stop,
-	.fall_back_to_erl0 =		ft_sess_set_erl0,
-	.sess_logged_in =		ft_sess_logged_in,
 	.sess_get_index =		ft_sess_get_index,
 	.sess_get_initiator_sid =	NULL,
 	.write_pending =		ft_write_pending,
@@ -544,7 +541,6 @@
 	.queue_tm_rsp =			ft_queue_tm_resp,
 	.get_fabric_sense_len =		ft_get_fabric_sense_len,
 	.set_fabric_sense_len =		ft_set_fabric_sense_len,
-	.is_state_remove =		ft_is_state_remove,
 	/*
 	 * Setup function pointers for generic logic in
 	 * target_core_fabric_configfs.c
diff --git a/drivers/target/tcm_fc/tfc_sess.c b/drivers/target/tcm_fc/tfc_sess.c
index eff512b..cb99da9 100644
--- a/drivers/target/tcm_fc/tfc_sess.c
+++ b/drivers/target/tcm_fc/tfc_sess.c
@@ -309,11 +309,9 @@
 void ft_sess_close(struct se_session *se_sess)
 {
 	struct ft_sess *sess = se_sess->fabric_sess_ptr;
-	struct fc_lport *lport;
 	u32 port_id;
 
 	mutex_lock(&ft_lport_lock);
-	lport = sess->tport->lport;
 	port_id = sess->port_id;
 	if (port_id == -1) {
 		mutex_unlock(&ft_lport_lock);
@@ -328,20 +326,6 @@
 	synchronize_rcu();		/* let transport deregister happen */
 }
 
-void ft_sess_stop(struct se_session *se_sess, int sess_sleep, int conn_sleep)
-{
-	struct ft_sess *sess = se_sess->fabric_sess_ptr;
-
-	pr_debug("port_id %x\n", sess->port_id);
-}
-
-int ft_sess_logged_in(struct se_session *se_sess)
-{
-	struct ft_sess *sess = se_sess->fabric_sess_ptr;
-
-	return sess->port_id != -1;
-}
-
 u32 ft_sess_get_index(struct se_session *se_sess)
 {
 	struct ft_sess *sess = se_sess->fabric_sess_ptr;
@@ -357,11 +341,6 @@
 	return ft_format_wwn(buf, len, sess->port_name);
 }
 
-void ft_sess_set_erl0(struct se_session *se_sess)
-{
-	/* XXX TBD called when out of memory */
-}
-
 /*
  * libfc ops involving sessions.
  */
diff --git a/include/scsi/fc/fc_fcp.h b/include/scsi/fc/fc_fcp.h
index 652dec2..0d7d67e 100644
--- a/include/scsi/fc/fc_fcp.h
+++ b/include/scsi/fc/fc_fcp.h
@@ -20,6 +20,8 @@
 #ifndef _FC_FCP_H_
 #define	_FC_FCP_H_
 
+#include <scsi/scsi.h>
+
 /*
  * Fibre Channel Protocol for SCSI.
  * From T10 FCP-3, T10 project 1560-D Rev 4, Sept. 13, 2005.
@@ -45,7 +47,7 @@
  * FCP_CMND IU Payload.
  */
 struct fcp_cmnd {
-	__u8		fc_lun[8];	/* logical unit number */
+	struct scsi_lun	fc_lun;		/* logical unit number */
 	__u8		fc_cmdref;	/* command reference number */
 	__u8		fc_pri_ta;	/* priority and task attribute */
 	__u8		fc_tm_flags;	/* task management flags */
@@ -57,7 +59,7 @@
 #define	FCP_CMND_LEN	32	/* expected length of structure */
 
 struct fcp_cmnd32 {
-	__u8		fc_lun[8];	/* logical unit number */
+	struct scsi_lun	fc_lun;		/* logical unit number */
 	__u8		fc_cmdref;	/* command reference number */
 	__u8		fc_pri_ta;	/* priority and task attribute */
 	__u8		fc_tm_flags;	/* task management flags */
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 8001ae4..f34a5a8 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -143,6 +143,7 @@
 #define READ_ATTRIBUTE        0x8c
 #define WRITE_ATTRIBUTE	      0x8d
 #define VERIFY_16	      0x8f
+#define SYNCHRONIZE_CACHE_16  0x91
 #define WRITE_SAME_16	      0x93
 #define SERVICE_ACTION_IN     0x9e
 /* values for service action in */
diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index e5e6ff9..8c9ff1b 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h
@@ -62,4 +62,6 @@
 void	*transport_kmap_data_sg(struct se_cmd *);
 void	transport_kunmap_data_sg(struct se_cmd *);
 
+void	array_free(void *array, int n);
+
 #endif /* TARGET_CORE_BACKEND_H */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index dc4e345..aaccc5f 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -9,7 +9,7 @@
 #include <net/sock.h>
 #include <net/tcp.h>
 
-#define TARGET_CORE_MOD_VERSION		"v4.1.0-rc1-ml"
+#define TARGET_CORE_MOD_VERSION		"v4.1.0-rc2-ml"
 #define TARGET_CORE_VERSION		TARGET_CORE_MOD_VERSION
 
 /* Maximum Number of LUNs per Target Portal Group */
@@ -86,6 +86,8 @@
 #define DA_UNMAP_GRANULARITY_DEFAULT		0
 /* Default unmap_granularity_alignment */
 #define DA_UNMAP_GRANULARITY_ALIGNMENT_DEFAULT	0
+/* Default max transfer length */
+#define DA_FABRIC_MAX_SECTORS			8192
 /* Emulation for Direct Page Out */
 #define DA_EMULATE_DPO				0
 /* Emulation for Forced Unit Access WRITEs */
@@ -118,9 +120,9 @@
 /* Queue Algorithm Modifier default for restricted reordering in control mode page */
 #define DA_EMULATE_REST_REORD			0
 
+#define SE_INQUIRY_BUF				512
 #define SE_MODE_PAGE_BUF			512
 
-
 /* struct se_hba->hba_flags */
 enum hba_flags_table {
 	HBA_FLAGS_INTERNAL_USE	= 0x01,
@@ -169,7 +171,8 @@
 	SCF_EMULATED_TASK_SENSE		= 0x00000004,
 	SCF_SCSI_DATA_SG_IO_CDB		= 0x00000008,
 	SCF_SCSI_CONTROL_SG_IO_CDB	= 0x00000010,
-	SCF_SCSI_NON_DATA_CDB		= 0x00000040,
+	SCF_SCSI_NON_DATA_CDB		= 0x00000020,
+	SCF_SCSI_TMR_CDB		= 0x00000040,
 	SCF_SCSI_CDB_EXCEPTION		= 0x00000080,
 	SCF_SCSI_RESERVATION_CONFLICT	= 0x00000100,
 	SCF_FUA				= 0x00000200,
@@ -183,7 +186,8 @@
 	SCF_ALUA_NON_OPTIMIZED		= 0x00040000,
 	SCF_DELAYED_CMD_FROM_SAM_ATTR	= 0x00080000,
 	SCF_UNUSED			= 0x00100000,
-	SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00400000,
+	SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00200000,
+	SCF_ACK_KREF			= 0x00400000,
 };
 
 /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */
@@ -474,12 +478,6 @@
 	struct t10_reservation_ops pr_ops;
 };
 
-struct se_queue_req {
-	int			state;
-	struct se_cmd		*cmd;
-	struct list_head	qr_list;
-};
-
 struct se_queue_obj {
 	atomic_t		queue_cnt;
 	spinlock_t		cmd_queue_lock;
@@ -504,6 +502,24 @@
 	struct completion	task_stop_comp;
 };
 
+struct se_tmr_req {
+	/* Task Management function to be performed */
+	u8			function;
+	/* Task Management response to send */
+	u8			response;
+	int			call_transport;
+	/* Reference to ITT that Task Mgmt should be performed */
+	u32			ref_task_tag;
+	/* 64-bit encoded SAM LUN from $FABRIC_MOD TMR header */
+	u64			ref_task_lun;
+	void 			*fabric_tmr_ptr;
+	struct se_cmd		*task_cmd;
+	struct se_cmd		*ref_cmd;
+	struct se_device	*tmr_dev;
+	struct se_lun		*tmr_lun;
+	struct list_head	tmr_list;
+};
+
 struct se_cmd {
 	/* SAM response code being sent to initiator */
 	u8			scsi_status;
@@ -555,23 +571,23 @@
 	unsigned char		*t_task_cdb;
 	unsigned char		__t_task_cdb[TCM_MAX_COMMAND_SIZE];
 	unsigned long long	t_task_lba;
-	int			t_tasks_failed;
 	u32			t_tasks_sg_chained_no;
 	atomic_t		t_fe_count;
 	atomic_t		t_se_count;
 	atomic_t		t_task_cdbs_left;
 	atomic_t		t_task_cdbs_ex_left;
 	atomic_t		t_task_cdbs_sent;
-	atomic_t		t_transport_aborted;
-	atomic_t		t_transport_active;
-	atomic_t		t_transport_complete;
-	atomic_t		t_transport_queue_active;
-	atomic_t		t_transport_sent;
-	atomic_t		t_transport_stop;
-	atomic_t		transport_dev_active;
-	atomic_t		transport_lun_active;
-	atomic_t		transport_lun_fe_stop;
-	atomic_t		transport_lun_stop;
+	unsigned int		transport_state;
+#define CMD_T_ABORTED		(1 << 0)
+#define CMD_T_ACTIVE		(1 << 1)
+#define CMD_T_COMPLETE		(1 << 2)
+#define CMD_T_QUEUED		(1 << 3)
+#define CMD_T_SENT		(1 << 4)
+#define CMD_T_STOP		(1 << 5)
+#define CMD_T_FAILED		(1 << 6)
+#define CMD_T_LUN_STOP		(1 << 7)
+#define CMD_T_LUN_FE_STOP	(1 << 8)
+#define CMD_T_DEV_ACTIVE	(1 << 9)
 	spinlock_t		t_state_lock;
 	struct completion	t_transport_stop_comp;
 	struct completion	transport_lun_fe_stop_comp;
@@ -592,24 +608,6 @@
 
 };
 
-struct se_tmr_req {
-	/* Task Management function to be preformed */
-	u8			function;
-	/* Task Management response to send */
-	u8			response;
-	int			call_transport;
-	/* Reference to ITT that Task Mgmt should be preformed */
-	u32			ref_task_tag;
-	/* 64-bit encoded SAM LUN from $FABRIC_MOD TMR header */
-	u64			ref_task_lun;
-	void 			*fabric_tmr_ptr;
-	struct se_cmd		*task_cmd;
-	struct se_cmd		*ref_cmd;
-	struct se_device	*tmr_dev;
-	struct se_lun		*tmr_lun;
-	struct list_head	tmr_list;
-};
-
 struct se_ua {
 	u8			ua_asc;
 	u8			ua_ascq;
@@ -622,6 +620,7 @@
 	char			initiatorname[TRANSPORT_IQN_LEN];
 	/* Used to signal demo mode created ACL, disabled by default */
 	bool			dynamic_node_acl;
+	bool			acl_stop:1;
 	u32			queue_depth;
 	u32			acl_index;
 	u64			num_cmds;
@@ -630,7 +629,7 @@
 	spinlock_t		stats_lock;
 	/* Used for PR SPEC_I_PT=1 and REGISTER_AND_MOVE */
 	atomic_t		acl_pr_ref_count;
-	struct se_dev_entry	*device_list;
+	struct se_dev_entry	**device_list;
 	struct se_session	*nacl_sess;
 	struct se_portal_group *se_tpg;
 	spinlock_t		device_list_lock;
@@ -643,6 +642,8 @@
 	struct config_group	*acl_default_groups[5];
 	struct list_head	acl_list;
 	struct list_head	acl_sess_list;
+	struct completion	acl_free_comp;
+	struct kref		acl_kref;
 };
 
 struct se_session {
@@ -656,6 +657,7 @@
 	struct list_head	sess_cmd_list;
 	struct list_head	sess_wait_list;
 	spinlock_t		sess_cmd_lock;
+	struct kref		sess_kref;
 };
 
 struct se_device;
@@ -730,6 +732,7 @@
 	u32		block_size;
 	u32		hw_max_sectors;
 	u32		max_sectors;
+	u32		fabric_max_sectors;
 	u32		optimal_sectors;
 	u32		hw_queue_depth;
 	u32		queue_depth;
@@ -931,7 +934,7 @@
 	struct list_head	se_tpg_node;
 	/* linked list for initiator ACL list */
 	struct list_head	acl_node_list;
-	struct se_lun		*tpg_lun_list;
+	struct se_lun		**tpg_lun_list;
 	struct se_lun		tpg_virt_lun0;
 	/* List of TCM sessions associated wth this TPG */
 	struct list_head	tpg_sess_list;
diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h
index d36fad3..10c6908 100644
--- a/include/target/target_core_fabric.h
+++ b/include/target/target_core_fabric.h
@@ -58,9 +58,6 @@
 	 */
 	int (*shutdown_session)(struct se_session *);
 	void (*close_session)(struct se_session *);
-	void (*stop_session)(struct se_session *, int, int);
-	void (*fall_back_to_erl0)(struct se_session *);
-	int (*sess_logged_in)(struct se_session *);
 	u32 (*sess_get_index)(struct se_session *);
 	/*
 	 * Used only for SCSI fabrics that contain multi-value TransportIDs
@@ -78,7 +75,6 @@
 	int (*queue_tm_rsp)(struct se_cmd *);
 	u16 (*set_fabric_sense_len)(struct se_cmd *, u32);
 	u16 (*get_fabric_sense_len)(void);
-	int (*is_state_remove)(struct se_cmd *);
 	/*
 	 * fabric module calls for target_core_fabric_configfs.c
 	 */
@@ -105,7 +101,10 @@
 		struct se_node_acl *, struct se_session *, void *);
 void	transport_register_session(struct se_portal_group *,
 		struct se_node_acl *, struct se_session *, void *);
+void	target_get_session(struct se_session *);
+int	target_put_session(struct se_session *);
 void	transport_free_session(struct se_session *);
+void	target_put_nacl(struct se_node_acl *);
 void	transport_deregister_session_configfs(struct se_session *);
 void	transport_deregister_session(struct se_session *);
 
@@ -116,6 +115,10 @@
 int	transport_generic_allocate_tasks(struct se_cmd *, unsigned char *);
 void	target_submit_cmd(struct se_cmd *, struct se_session *, unsigned char *,
 		unsigned char *, u32, u32, int, int, int);
+int	target_submit_tmr(struct se_cmd *se_cmd, struct se_session *se_sess,
+		unsigned char *sense, u32 unpacked_lun,
+		void *fabric_tmr_ptr, unsigned char tm_type,
+		gfp_t, unsigned int, int);
 int	transport_handle_cdb_direct(struct se_cmd *);
 int	transport_generic_handle_cdb_map(struct se_cmd *);
 int	transport_generic_handle_data(struct se_cmd *);
@@ -139,9 +142,10 @@
 
 int	core_alua_check_nonop_delay(struct se_cmd *);
 
-struct se_tmr_req *core_tmr_alloc_req(struct se_cmd *, void *, u8, gfp_t);
+int	core_tmr_alloc_req(struct se_cmd *, void *, u8, gfp_t);
 void	core_tmr_release_req(struct se_tmr_req *);
 int	transport_generic_handle_tmr(struct se_cmd *);
+void	transport_generic_request_failure(struct se_cmd *);
 int	transport_lookup_tmr_lun(struct se_cmd *, u32);
 
 struct se_node_acl *core_tpg_check_initiator_node_acl(struct se_portal_group *,