Merge tag 'arc-4.9-final' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc

Pull ARC fixes from Vineet Gupta:

 - fix PAE40 crash [Yuriy]

 - disable IO-Coherency by default

 - use a different inline asm constraint for Zero Overhead loops

* tag 'arc-4.9-final' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc:
  ARC: mm: PAE40: Fix crash at munmap
  ARC: mm: IOC: Don't enable IOC by default
  ARC: Don't use "+l" inline asm constraint
diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
index 9669fc7..74f4c66 100644
--- a/drivers/ata/ahci.c
+++ b/drivers/ata/ahci.c
@@ -1436,13 +1436,6 @@
 				"ahci: MRSM is on, fallback to single MSI\n");
 			pci_free_irq_vectors(pdev);
 		}
-
-		/*
-		 * -ENOSPC indicated we don't have enough vectors.  Don't bother
-		 * trying a single vectors for any other error:
-		 */
-		if (nvec < 0 && nvec != -ENOSPC)
-			return nvec;
 	}
 
 	/*
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 9cceb4a..c4eb4ae 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1088,7 +1088,7 @@
 		desc[1] = tf->command; /* status */
 		desc[2] = tf->device;
 		desc[3] = tf->nsect;
-		desc[0] = 0;
+		desc[7] = 0;
 		if (tf->flags & ATA_TFLAG_LBA48)  {
 			desc[8] |= 0x80;
 			if (tf->hob_nsect)
diff --git a/drivers/scsi/be2iscsi/be_mgmt.c b/drivers/scsi/be2iscsi/be_mgmt.c
index aebc4dd..ac05317 100644
--- a/drivers/scsi/be2iscsi/be_mgmt.c
+++ b/drivers/scsi/be2iscsi/be_mgmt.c
@@ -1083,7 +1083,7 @@
 	nonemb_cmd = &phba->boot_struct.nonemb_cmd;
 	nonemb_cmd->size = sizeof(*resp);
 	nonemb_cmd->va = pci_alloc_consistent(phba->ctrl.pdev,
-					      sizeof(nonemb_cmd->size),
+					      nonemb_cmd->size,
 					      &nonemb_cmd->dma);
 	if (!nonemb_cmd->va) {
 		mutex_unlock(&ctrl->mbox_lock);
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index d007ec1..a1d6ab7 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -2009,7 +2009,7 @@
 
 static int hpsa_slave_alloc(struct scsi_device *sdev)
 {
-	struct hpsa_scsi_dev_t *sd;
+	struct hpsa_scsi_dev_t *sd = NULL;
 	unsigned long flags;
 	struct ctlr_info *h;
 
@@ -2026,7 +2026,8 @@
 			sd->target = sdev_id(sdev);
 			sd->lun = sdev->lun;
 		}
-	} else
+	}
+	if (!sd)
 		sd = lookup_hpsa_scsi_dev(h, sdev_channel(sdev),
 					sdev_id(sdev), sdev->lun);
 
@@ -3840,6 +3841,7 @@
 		sizeof(this_device->vendor));
 	memcpy(this_device->model, &inq_buff[16],
 		sizeof(this_device->model));
+	this_device->rev = inq_buff[2];
 	memset(this_device->device_id, 0,
 		sizeof(this_device->device_id));
 	if (hpsa_get_device_id(h, scsi3addr, this_device->device_id, 8,
@@ -3929,10 +3931,14 @@
 
 	if (!is_logical_dev_addr_mode(lunaddrbytes)) {
 		/* physical device, target and lun filled in later */
-		if (is_hba_lunid(lunaddrbytes))
+		if (is_hba_lunid(lunaddrbytes)) {
+			int bus = HPSA_HBA_BUS;
+
+			if (!device->rev)
+				bus = HPSA_LEGACY_HBA_BUS;
 			hpsa_set_bus_target_lun(device,
-					HPSA_HBA_BUS, 0, lunid & 0x3fff);
-		else
+					bus, 0, lunid & 0x3fff);
+		} else
 			/* defer target, lun assignment for physical devices */
 			hpsa_set_bus_target_lun(device,
 					HPSA_PHYSICAL_DEVICE_BUS, -1, -1);
diff --git a/drivers/scsi/hpsa.h b/drivers/scsi/hpsa.h
index 82cdfad..9ea162d 100644
--- a/drivers/scsi/hpsa.h
+++ b/drivers/scsi/hpsa.h
@@ -69,6 +69,7 @@
 	u64 sas_address;
 	unsigned char vendor[8];        /* bytes 8-15 of inquiry data */
 	unsigned char model[16];        /* bytes 16-31 of inquiry data */
+	unsigned char rev;		/* byte 2 of inquiry data */
 	unsigned char raid_level;	/* from inquiry page 0xC1 */
 	unsigned char volume_offline;	/* discovered via TUR or VPD */
 	u16 queue_depth;		/* max queue_depth for this device */
@@ -402,6 +403,7 @@
 #define HPSA_RAID_VOLUME_BUS		1
 #define HPSA_EXTERNAL_RAID_VOLUME_BUS	2
 #define HPSA_HBA_BUS			0
+#define HPSA_LEGACY_HBA_BUS		3
 
 /*
 	Send the command to the hardware
diff --git a/drivers/scsi/libfc/fc_lport.c b/drivers/scsi/libfc/fc_lport.c
index 04ce7cf..50c7167 100644
--- a/drivers/scsi/libfc/fc_lport.c
+++ b/drivers/scsi/libfc/fc_lport.c
@@ -308,7 +308,7 @@
 	fc_stats = &lport->host_stats;
 	memset(fc_stats, 0, sizeof(struct fc_host_statistics));
 
-	fc_stats->seconds_since_last_reset = (lport->boot_time - jiffies) / HZ;
+	fc_stats->seconds_since_last_reset = (jiffies - lport->boot_time) / HZ;
 
 	for_each_possible_cpu(cpu) {
 		struct fc_stats *stats;
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 91b70bc..1c4744e 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -3885,6 +3885,11 @@
 	}
 }
 
+static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
+{
+	return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16);
+}
+
 /**
  * _scsih_flush_running_cmds - completing outstanding commands.
  * @ioc: per adapter object
@@ -3906,6 +3911,9 @@
 		if (!scmd)
 			continue;
 		count++;
+		if (ata_12_16_cmd(scmd))
+			scsi_internal_device_unblock(scmd->device,
+							SDEV_RUNNING);
 		mpt3sas_base_free_smid(ioc, smid);
 		scsi_dma_unmap(scmd);
 		if (ioc->pci_error_recovery)
@@ -4010,11 +4018,6 @@
 	    SAM_STAT_CHECK_CONDITION;
 }
 
-static inline bool ata_12_16_cmd(struct scsi_cmnd *scmd)
-{
-	return (scmd->cmnd[0] == ATA_12 || scmd->cmnd[0] == ATA_16);
-}
-
 /**
  * scsih_qcmd - main scsi request entry point
  * @scmd: pointer to scsi command object
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index 86eb199..c7cc803 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -791,8 +791,10 @@
 	slot->slot_tag = tag;
 
 	slot->buf = pci_pool_alloc(mvi->dma_pool, GFP_ATOMIC, &slot->buf_dma);
-	if (!slot->buf)
+	if (!slot->buf) {
+		rc = -ENOMEM;
 		goto err_out_tag;
+	}
 	memset(slot->buf, 0, MVS_SLOT_BUF_SZ);
 
 	tei.task = task;
diff --git a/drivers/scsi/qlogicpti.h b/drivers/scsi/qlogicpti.h
index 4377e87..892a0b0 100644
--- a/drivers/scsi/qlogicpti.h
+++ b/drivers/scsi/qlogicpti.h
@@ -356,8 +356,8 @@
 
 	/* The rest of the elements are unimportant for performance. */
 	struct qlogicpti         *next;
-	__u32                     res_dvma;             /* Ptr to RESPONSE bufs (DVMA)*/
-	__u32                     req_dvma;             /* Ptr to REQUEST bufs (DVMA) */
+	dma_addr_t                res_dvma;             /* Ptr to RESPONSE bufs (DVMA)*/
+	dma_addr_t                req_dvma;             /* Ptr to REQUEST bufs (DVMA) */
 	u_char	                  fware_majrev, fware_minrev, fware_micrev;
 	struct Scsi_Host         *qhost;
 	int                       qpti_id;
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 8347c90..5eb0412 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -808,7 +808,11 @@
 	struct crypto_skcipher *tfm_arc4;
 	struct scatterlist sgin, sgout;
 	struct skcipher_request *req;
-	unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
+	unsigned char *sec_key;
+
+	sec_key = kmalloc(CIFS_SESS_KEY_SIZE, GFP_KERNEL);
+	if (sec_key == NULL)
+		return -ENOMEM;
 
 	get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
 
@@ -816,7 +820,7 @@
 	if (IS_ERR(tfm_arc4)) {
 		rc = PTR_ERR(tfm_arc4);
 		cifs_dbg(VFS, "could not allocate crypto API arc4\n");
-		return rc;
+		goto out;
 	}
 
 	rc = crypto_skcipher_setkey(tfm_arc4, ses->auth_key.response,
@@ -854,7 +858,8 @@
 
 out_free_cipher:
 	crypto_free_skcipher(tfm_arc4);
-
+out:
+	kfree(sec_key);
 	return rc;
 }
 
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 3f3185f..e3fed92 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3427,6 +3427,7 @@
 	__u16 rc = 0;
 	struct cifs_posix_acl *cifs_acl = (struct cifs_posix_acl *)parm_data;
 	struct posix_acl_xattr_header *local_acl = (void *)pACL;
+	struct posix_acl_xattr_entry *ace = (void *)(local_acl + 1);
 	int count;
 	int i;
 
@@ -3453,8 +3454,7 @@
 		return 0;
 	}
 	for (i = 0; i < count; i++) {
-		rc = convert_ace_to_cifs_ace(&cifs_acl->ace_array[i],
-			(struct posix_acl_xattr_entry *)(local_acl + 1));
+		rc = convert_ace_to_cifs_ace(&cifs_acl->ace_array[i], &ace[i]);
 		if (rc != 0) {
 			/* ACE not converted */
 			break;
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index aab5227..4547aed 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -412,6 +412,9 @@
 		}
 	} while (server->tcpStatus == CifsNeedReconnect);
 
+	if (server->tcpStatus == CifsNeedNegotiate)
+		mod_delayed_work(cifsiod_wq, &server->echo, 0);
+
 	return rc;
 }
 
@@ -421,17 +424,25 @@
 	int rc;
 	struct TCP_Server_Info *server = container_of(work,
 					struct TCP_Server_Info, echo.work);
-	unsigned long echo_interval = server->echo_interval;
+	unsigned long echo_interval;
 
 	/*
-	 * We cannot send an echo if it is disabled or until the
-	 * NEGOTIATE_PROTOCOL request is done, which is indicated by
-	 * server->ops->need_neg() == true. Also, no need to ping if
-	 * we got a response recently.
+	 * If we need to renegotiate, set echo interval to zero to
+	 * immediately call echo service where we can renegotiate.
+	 */
+	if (server->tcpStatus == CifsNeedNegotiate)
+		echo_interval = 0;
+	else
+		echo_interval = server->echo_interval;
+
+	/*
+	 * We cannot send an echo if it is disabled.
+	 * Also, no need to ping if we got a response recently.
 	 */
 
 	if (server->tcpStatus == CifsNeedReconnect ||
-	    server->tcpStatus == CifsExiting || server->tcpStatus == CifsNew ||
+	    server->tcpStatus == CifsExiting ||
+	    server->tcpStatus == CifsNew ||
 	    (server->ops->can_echo && !server->ops->can_echo(server)) ||
 	    time_before(jiffies, server->lstrp + echo_interval - HZ))
 		goto requeue_echo;
@@ -442,7 +453,7 @@
 			 server->hostname);
 
 requeue_echo:
-	queue_delayed_work(cifsiod_wq, &server->echo, echo_interval);
+	queue_delayed_work(cifsiod_wq, &server->echo, server->echo_interval);
 }
 
 static bool
diff --git a/init/Kconfig b/init/Kconfig
index c4fbc1e..34407f1 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1945,7 +1945,6 @@
 
 config MODVERSIONS
 	bool "Module versioning support"
-	depends on BROKEN
 	help
 	  Usually, you have to use modules compiled with your kernel.
 	  Saying Y here makes it sometimes possible to use modules
diff --git a/kernel/module.c b/kernel/module.c
index f57dd63..0e54d5b 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1301,8 +1301,9 @@
 		goto bad_version;
 	}
 
-	pr_warn("%s: no symbol version for %s\n", mod->name, symname);
-	return 0;
+	/* Broken toolchain. Warn once, then let it go.. */
+	pr_warn_once("%s: no symbol version for %s\n", mod->name, symname);
+	return 1;
 
 bad_version:
 	pr_warn("%s: disagrees about version of symbol %s\n",
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index eff3de3..d4a6e40 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1456,9 +1456,9 @@
 		new_ptl = pmd_lockptr(mm, new_pmd);
 		if (new_ptl != old_ptl)
 			spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING);
-		if (pmd_present(*old_pmd) && pmd_dirty(*old_pmd))
-			force_flush = true;
 		pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd);
+		if (pmd_present(pmd) && pmd_dirty(pmd))
+			force_flush = true;
 		VM_BUG_ON(!pmd_none(*new_pmd));
 
 		if (pmd_move_must_withdraw(new_ptl, old_ptl) &&
diff --git a/mm/mremap.c b/mm/mremap.c
index 6ccecc0..30d7d24 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -149,14 +149,18 @@
 		if (pte_none(*old_pte))
 			continue;
 
-		/*
-		 * We are remapping a dirty PTE, make sure to
-		 * flush TLB before we drop the PTL for the
-		 * old PTE or we may race with page_mkclean().
-		 */
-		if (pte_present(*old_pte) && pte_dirty(*old_pte))
-			force_flush = true;
 		pte = ptep_get_and_clear(mm, old_addr, old_pte);
+		/*
+		 * If we are remapping a dirty PTE, make sure
+		 * to flush TLB before we drop the PTL for the
+		 * old PTE or we may race with page_mkclean().
+		 *
+		 * This check has to be done after we removed the
+		 * old PTE from page tables or another thread may
+		 * dirty it after the check and before the removal.
+		 */
+		if (pte_present(pte) && pte_dirty(pte))
+			force_flush = true;
 		pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
 		pte = move_soft_dirty_pte(pte);
 		set_pte_at(mm, new_addr, new_pte, pte);
diff --git a/sound/sparc/dbri.c b/sound/sparc/dbri.c
index 0190cb6..3fe4468 100644
--- a/sound/sparc/dbri.c
+++ b/sound/sparc/dbri.c
@@ -304,7 +304,7 @@
 	spinlock_t lock;
 
 	struct dbri_dma *dma;	/* Pointer to our DMA block */
-	u32 dma_dvma;		/* DBRI visible DMA address */
+	dma_addr_t dma_dvma;	/* DBRI visible DMA address */
 
 	void __iomem *regs;	/* dbri HW regs */
 	int dbri_irqp;		/* intr queue pointer */
@@ -657,12 +657,14 @@
  */
 static s32 *dbri_cmdlock(struct snd_dbri *dbri, int len)
 {
+	u32 dvma_addr = (u32)dbri->dma_dvma;
+
 	/* Space for 2 WAIT cmds (replaced later by 1 JUMP cmd) */
 	len += 2;
 	spin_lock(&dbri->cmdlock);
 	if (dbri->cmdptr - dbri->dma->cmd + len < DBRI_NO_CMDS - 2)
 		return dbri->cmdptr + 2;
-	else if (len < sbus_readl(dbri->regs + REG8) - dbri->dma_dvma)
+	else if (len < sbus_readl(dbri->regs + REG8) - dvma_addr)
 		return dbri->dma->cmd;
 	else
 		printk(KERN_ERR "DBRI: no space for commands.");
@@ -680,6 +682,7 @@
  */
 static void dbri_cmdsend(struct snd_dbri *dbri, s32 *cmd, int len)
 {
+	u32 dvma_addr = (u32)dbri->dma_dvma;
 	s32 tmp, addr;
 	static int wait_id = 0;
 
@@ -689,7 +692,7 @@
 	*(cmd+1) = DBRI_CMD(D_WAIT, 1, wait_id);
 
 	/* Replace the last command with JUMP */
-	addr = dbri->dma_dvma + (cmd - len - dbri->dma->cmd) * sizeof(s32);
+	addr = dvma_addr + (cmd - len - dbri->dma->cmd) * sizeof(s32);
 	*(dbri->cmdptr+1) = addr;
 	*(dbri->cmdptr) = DBRI_CMD(D_JUMP, 0, 0);
 
@@ -747,6 +750,7 @@
 /* Lock must not be held before calling this */
 static void dbri_initialize(struct snd_dbri *dbri)
 {
+	u32 dvma_addr = (u32)dbri->dma_dvma;
 	s32 *cmd;
 	u32 dma_addr;
 	unsigned long flags;
@@ -764,7 +768,7 @@
 	/*
 	 * Initialize the interrupt ring buffer.
 	 */
-	dma_addr = dbri->dma_dvma + dbri_dma_off(intr, 0);
+	dma_addr = dvma_addr + dbri_dma_off(intr, 0);
 	dbri->dma->intr[0] = dma_addr;
 	dbri->dbri_irqp = 1;
 	/*
@@ -778,7 +782,7 @@
 	dbri->cmdptr = cmd;
 	*(cmd++) = DBRI_CMD(D_WAIT, 1, 0);
 	*(cmd++) = DBRI_CMD(D_WAIT, 1, 0);
-	dma_addr = dbri->dma_dvma + dbri_dma_off(cmd, 0);
+	dma_addr = dvma_addr + dbri_dma_off(cmd, 0);
 	sbus_writel(dma_addr, dbri->regs + REG8);
 	spin_unlock(&dbri->cmdlock);
 
@@ -1077,6 +1081,7 @@
 static int setup_descs(struct snd_dbri *dbri, int streamno, unsigned int period)
 {
 	struct dbri_streaminfo *info = &dbri->stream_info[streamno];
+	u32 dvma_addr = (u32)dbri->dma_dvma;
 	__u32 dvma_buffer;
 	int desc;
 	int len;
@@ -1177,7 +1182,7 @@
 		else {
 			dbri->next_desc[last_desc] = desc;
 			dbri->dma->desc[last_desc].nda =
-			    dbri->dma_dvma + dbri_dma_off(desc, desc);
+			    dvma_addr + dbri_dma_off(desc, desc);
 		}
 
 		last_desc = desc;
@@ -1192,7 +1197,7 @@
 	}
 
 	dbri->dma->desc[last_desc].nda =
-	    dbri->dma_dvma + dbri_dma_off(desc, first_desc);
+	    dvma_addr + dbri_dma_off(desc, first_desc);
 	dbri->next_desc[last_desc] = first_desc;
 	dbri->pipes[info->pipe].first_desc = first_desc;
 	dbri->pipes[info->pipe].desc = first_desc;
@@ -1697,6 +1702,7 @@
 static void xmit_descs(struct snd_dbri *dbri)
 {
 	struct dbri_streaminfo *info;
+	u32 dvma_addr = (u32)dbri->dma_dvma;
 	s32 *cmd;
 	unsigned long flags;
 	int first_td;
@@ -1718,7 +1724,7 @@
 			*(cmd++) = DBRI_CMD(D_SDP, 0,
 					    dbri->pipes[info->pipe].sdp
 					    | D_SDP_P | D_SDP_EVERY | D_SDP_C);
-			*(cmd++) = dbri->dma_dvma +
+			*(cmd++) = dvma_addr +
 				   dbri_dma_off(desc, first_td);
 			dbri_cmdsend(dbri, cmd, 2);
 
@@ -1740,7 +1746,7 @@
 			*(cmd++) = DBRI_CMD(D_SDP, 0,
 					    dbri->pipes[info->pipe].sdp
 					    | D_SDP_P | D_SDP_EVERY | D_SDP_C);
-			*(cmd++) = dbri->dma_dvma +
+			*(cmd++) = dvma_addr +
 				   dbri_dma_off(desc, first_td);
 			dbri_cmdsend(dbri, cmd, 2);
 
@@ -2539,7 +2545,7 @@
 	if (!dbri->dma)
 		return -ENOMEM;
 
-	dprintk(D_GEN, "DMA Cmd Block 0x%p (0x%08x)\n",
+	dprintk(D_GEN, "DMA Cmd Block 0x%p (%pad)\n",
 		dbri->dma, dbri->dma_dvma);
 
 	/* Map the registers into memory. */