Merge tag 'v1.1.1' into stable-1.1

* tag 'v1.1.1': (34 commits)
  update VERSION for v1.1.1
  s390x: fix s390 virtio aliases
  rtl8139: validate rx ring before receiving packets
  ahci: SATA FIS is 20 bytes, not 0x20
  qemu-img: document qed format on qemu-img man page
  virtio: Fix compiler warning for non Linux hosts
  sheepdog: fix return value of do_load_save_vm_state
  qemu/xendisk: set maximum number of grants to be used
  build: install qmp-commands.txt
  fdc: fix implied seek while there is no media in drive
  qcow2: fix autoclear image header update
  Prevent disk data loss when closing qemu
  qcow2: fix endianness conversion
  pci_bridge_dev: fix error path in pci_bridge_dev_initfn()
  qdev: release parent properties on dc->init failure
  intel-hda: Fix reset of MSI function
  ahci: Fix reset of MSI function
  rtl8139: honor RxOverflow flag in can_receive method
  configure: Fix build for some versions of glibc (9pfs)
  monitor: Fix memory leak with readline completion
  ...

Signed-off-by: Avi Kivity <avi@redhat.com>
diff --git a/Makefile b/Makefile
index 9b7a85e..9707fa0 100644
--- a/Makefile
+++ b/Makefile
@@ -271,6 +271,7 @@
 install-doc: $(DOCS)
 	$(INSTALL_DIR) "$(DESTDIR)$(qemu_docdir)"
 	$(INSTALL_DATA) qemu-doc.html  qemu-tech.html "$(DESTDIR)$(qemu_docdir)"
+	$(INSTALL_DATA) QMP/qmp-commands.txt "$(DESTDIR)$(qemu_docdir)"
 ifdef CONFIG_POSIX
 	$(INSTALL_DIR) "$(DESTDIR)$(mandir)/man1"
 	$(INSTALL_DATA) qemu.1 qemu-img.1 "$(DESTDIR)$(mandir)/man1"
diff --git a/VERSION b/VERSION
index 9084fa2..524cb55 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.1.0
+1.1.1
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index 4b3345b..c173fcd 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -471,6 +471,8 @@
                 QCOW_OFLAG_COMPRESSED | QCOW_OFLAG_ZERO);
         *cluster_offset &= L2E_OFFSET_MASK;
         break;
+    default:
+        abort();
     }
 
     qcow2_cache_put(bs, s->l2_table_cache, (void**) &l2_table);
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
index 812c93c..443c021 100644
--- a/block/qcow2-refcount.c
+++ b/block/qcow2-refcount.c
@@ -367,7 +367,7 @@
     }
 
     for(i = 0; i < table_size; i++) {
-        cpu_to_be64s(&new_table[i]);
+        be64_to_cpus(&new_table[i]);
     }
 
     /* Hook up the new refcount table in the qcow2 header */
diff --git a/block/qcow2.c b/block/qcow2.c
index c2e49cd..79201fc 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -298,14 +298,6 @@
         goto fail;
     }
 
-    if (!bs->read_only && s->autoclear_features != 0) {
-        s->autoclear_features = 0;
-        ret = qcow2_update_header(bs);
-        if (ret < 0) {
-            goto fail;
-        }
-    }
-
     /* Check support for various header values */
     if (header.refcount_order != 4) {
         report_unsupported(bs, "%d bit reference counts",
@@ -411,6 +403,15 @@
         goto fail;
     }
 
+    /* Clear unknown autoclear feature bits */
+    if (!bs->read_only && s->autoclear_features != 0) {
+        s->autoclear_features = 0;
+        ret = qcow2_update_header(bs);
+        if (ret < 0) {
+            goto fail;
+        }
+    }
+
     /* Initialise locks */
     qemu_co_mutex_init(&s->lock);
 
diff --git a/block/sheepdog.c b/block/sheepdog.c
index 6d52277..f46ca8f 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -1957,7 +1957,7 @@
                                 int64_t pos, int size, int load)
 {
     int fd, create;
-    int ret = 0;
+    int ret = 0, remaining = size;
     unsigned int data_len;
     uint64_t vmstate_oid;
     uint32_t vdi_index;
@@ -1968,11 +1968,11 @@
         return fd;
     }
 
-    while (size) {
+    while (remaining) {
         vdi_index = pos / SD_DATA_OBJ_SIZE;
         offset = pos % SD_DATA_OBJ_SIZE;
 
-        data_len = MIN(size, SD_DATA_OBJ_SIZE);
+        data_len = MIN(remaining, SD_DATA_OBJ_SIZE);
 
         vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index);
 
@@ -1993,9 +1993,9 @@
         }
 
         pos += data_len;
-        size -= data_len;
-        ret += data_len;
+        remaining -= data_len;
     }
+    ret = size;
 cleanup:
     closesocket(fd);
     return ret;
diff --git a/configure b/configure
index 38dafec..c304f68 100755
--- a/configure
+++ b/configure
@@ -2828,7 +2828,11 @@
 open_by_hande_at=no
 cat > $TMPC << EOF
 #include <fcntl.h>
+#if !defined(AT_EMPTY_PATH)
+# error missing definition
+#else
 int main(void) { struct file_handle fh; return open_by_handle_at(0, &fh, 0); }
+#endif
 EOF
 if compile_prog "" "" ; then
     open_by_handle_at=yes
@@ -2932,7 +2936,8 @@
       tools="$tools fsdev/virtfs-proxy-helper\$(EXESUF)"
     else
       if test "$virtfs" = yes; then
-        feature_not_found "virtfs"
+        echo "VirtFS is supported only on Linux and requires libcap-devel and libattr-devel"
+        exit 1
       fi
       virtfs=no
     fi
diff --git a/exec.c b/exec.c
index a0494c7..0a67f07 100644
--- a/exec.c
+++ b/exec.c
@@ -1492,7 +1492,8 @@
 
 static void breakpoint_invalidate(CPUArchState *env, target_ulong pc)
 {
-    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc));
+    tb_invalidate_phys_addr(cpu_get_phys_page_debug(env, pc) |
+            (pc & ~TARGET_PAGE_MASK));
 }
 #endif
 #endif /* TARGET_HAS_ICE */
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index a883a92..2d7d03d 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -462,7 +462,7 @@
 
 static void ahci_init_d2h(AHCIDevice *ad)
 {
-    uint8_t init_fis[0x20];
+    uint8_t init_fis[20];
     IDEState *ide_state = &ad->port.ifs[0];
 
     memset(init_fis, 0, sizeof(init_fis));
@@ -619,7 +619,7 @@
     d2h_fis[11] = cmd_fis[11];
     d2h_fis[12] = cmd_fis[12];
     d2h_fis[13] = cmd_fis[13];
-    for (i = 14; i < 0x20; i++) {
+    for (i = 14; i < 20; i++) {
         d2h_fis[i] = 0;
     }
 
diff --git a/hw/ide/ich.c b/hw/ide/ich.c
index 560ae37..242254e 100644
--- a/hw/ide/ich.c
+++ b/hw/ide/ich.c
@@ -84,6 +84,14 @@
     .unmigratable = 1,
 };
 
+static void pci_ich9_reset(void *opaque)
+{
+    struct AHCIPCIState *d = opaque;
+
+    msi_reset(&d->card);
+    ahci_reset(opaque);
+}
+
 static int pci_ich9_ahci_init(PCIDevice *dev)
 {
     struct AHCIPCIState *d;
@@ -102,7 +110,7 @@
     /* XXX Software should program this register */
     d->card.config[0x90]   = 1 << 6; /* Address Map Register - AHCI mode */
 
-    qemu_register_reset(ahci_reset, d);
+    qemu_register_reset(pci_ich9_reset, d);
 
     msi_init(dev, 0x50, 1, true, false);
     d->ahci.irq = d->card.irq[0];
@@ -133,7 +141,7 @@
     d = DO_UPCAST(struct AHCIPCIState, card, dev);
 
     msi_uninit(dev);
-    qemu_unregister_reset(ahci_reset, d);
+    qemu_unregister_reset(pci_ich9_reset, d);
     ahci_uninit(&d->ahci);
 
     return 0;
diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index bb11af2..e38861e 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -1107,6 +1107,9 @@
     DeviceState *qdev;
     HDACodecDevice *cdev;
 
+    if (d->msi) {
+        msi_reset(&d->pci);
+    }
     intel_hda_regs_reset(d);
     d->wall_base_ns = qemu_get_clock_ns(vm_clock);
 
diff --git a/hw/kvm/apic.c b/hw/kvm/apic.c
index ffe7a52..a0ab503 100644
--- a/hw/kvm/apic.c
+++ b/hw/kvm/apic.c
@@ -29,7 +29,7 @@
     APICCommonState *s = DO_UPCAST(APICCommonState, busdev.qdev, d);
     int i;
 
-    memset(kapic, 0, sizeof(kapic));
+    memset(kapic, 0, sizeof(*kapic));
     kvm_apic_set_reg(kapic, 0x2, s->id << 24);
     kvm_apic_set_reg(kapic, 0x8, s->tpr);
     kvm_apic_set_reg(kapic, 0xd, s->log_dest << 24);
diff --git a/hw/pci_bridge_dev.c b/hw/pci_bridge_dev.c
index eccaa58..ad63703 100644
--- a/hw/pci_bridge_dev.c
+++ b/hw/pci_bridge_dev.c
@@ -52,7 +52,7 @@
 {
     PCIBridge *br = DO_UPCAST(PCIBridge, dev, dev);
     PCIBridgeDev *bridge_dev = DO_UPCAST(PCIBridgeDev, bridge, br);
-    int err;
+    int err, ret;
     pci_bridge_map_irq(br, NULL, pci_bridge_dev_map_irq_fn);
     err = pci_bridge_initfn(dev);
     if (err) {
@@ -86,6 +86,8 @@
     shpc_cleanup(dev, &bridge_dev->bar);
 shpc_error:
     memory_region_destroy(&bridge_dev->bar);
+    ret = pci_bridge_exitfn(dev);
+    assert(!ret);
 bridge_error:
     return err;
 }
diff --git a/hw/qdev-monitor.c b/hw/qdev-monitor.c
index eed781d..405697e 100644
--- a/hw/qdev-monitor.c
+++ b/hw/qdev-monitor.c
@@ -20,6 +20,7 @@
 #include "qdev.h"
 #include "monitor.h"
 #include "qmp-commands.h"
+#include "arch_init.h"
 
 /*
  * Aliases were a bad idea from the start.  Let's keep them
@@ -29,16 +30,18 @@
 {
     const char *typename;
     const char *alias;
+    uint32_t arch_mask;
 } QDevAlias;
 
 static const QDevAlias qdev_alias_table[] = {
-    { "virtio-blk-pci", "virtio-blk" },
-    { "virtio-net-pci", "virtio-net" },
-    { "virtio-serial-pci", "virtio-serial" },
-    { "virtio-balloon-pci", "virtio-balloon" },
-    { "virtio-blk-s390", "virtio-blk" },
-    { "virtio-net-s390", "virtio-net" },
-    { "virtio-serial-s390", "virtio-serial" },
+    { "virtio-blk-pci", "virtio-blk", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
+    { "virtio-net-pci", "virtio-net", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
+    { "virtio-serial-pci", "virtio-serial", QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
+    { "virtio-balloon-pci", "virtio-balloon",
+            QEMU_ARCH_ALL & ~QEMU_ARCH_S390X },
+    { "virtio-blk-s390", "virtio-blk", QEMU_ARCH_S390X },
+    { "virtio-net-s390", "virtio-net", QEMU_ARCH_S390X },
+    { "virtio-serial-s390", "virtio-serial", QEMU_ARCH_S390X },
     { "lsi53c895a", "lsi" },
     { "ich9-ahci", "ahci" },
     { }
@@ -50,6 +53,11 @@
     int i;
 
     for (i = 0; qdev_alias_table[i].typename; i++) {
+        if (qdev_alias_table[i].arch_mask &&
+            !(qdev_alias_table[i].arch_mask & arch_type)) {
+            continue;
+        }
+
         if (strcmp(qdev_alias_table[i].typename, typename) == 0) {
             return qdev_alias_table[i].alias;
         }
@@ -110,6 +118,11 @@
     int i;
 
     for (i = 0; qdev_alias_table[i].alias; i++) {
+        if (qdev_alias_table[i].arch_mask &&
+            !(qdev_alias_table[i].arch_mask & arch_type)) {
+            continue;
+        }
+
         if (strcmp(qdev_alias_table[i].alias, alias) == 0) {
             return qdev_alias_table[i].typename;
         }
diff --git a/hw/qdev.c b/hw/qdev.c
index 6a8f6bd..af419b9 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -150,6 +150,7 @@
 
     rc = dc->init(dev);
     if (rc < 0) {
+        object_unparent(OBJECT(dev));
         qdev_free(dev);
         return rc;
     }
diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index 2413bc3..7b15047 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -781,6 +781,13 @@
 #endif
 }
 
+/* Workaround for buggy guest driver such as linux who allocates rx
+ * rings after the receiver were enabled. */
+static bool rtl8139_cp_rx_valid(RTL8139State *s)
+{
+    return !(s->RxRingAddrLO == 0 && s->RxRingAddrHI == 0);
+}
+
 static int rtl8139_can_receive(VLANClientState *nc)
 {
     RTL8139State *s = DO_UPCAST(NICState, nc, nc)->opaque;
@@ -792,14 +799,14 @@
     if (!rtl8139_receiver_enabled(s))
       return 1;
 
-    if (rtl8139_cp_receiver_enabled(s)) {
+    if (rtl8139_cp_receiver_enabled(s) && rtl8139_cp_rx_valid(s)) {
         /* ??? Flow control not implemented in c+ mode.
            This is a hack to work around slirp deficiencies anyway.  */
         return 1;
     } else {
         avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr,
                      s->RxBufferSize);
-        return (avail == 0 || avail >= 1514);
+        return (avail == 0 || avail >= 1514 || (s->IntrMask & RxOverflow));
     }
 }
 
@@ -937,6 +944,10 @@
 
     if (rtl8139_cp_receiver_enabled(s))
     {
+        if (!rtl8139_cp_rx_valid(s)) {
+            return size;
+        }
+
         DPRINTF("in C+ Rx mode ================\n");
 
         /* begin C+ receiver mode */
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index f9e1896..fe07746 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -147,9 +147,11 @@
 
 static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 {
+#ifdef __linux__
     int ret;
-    int status = VIRTIO_BLK_S_OK;
     int i;
+#endif
+    int status = VIRTIO_BLK_S_OK;
 
     /*
      * We require at least one output segment each for the virtio_blk_outhdr
@@ -489,7 +491,22 @@
     stw_raw(&blkcfg.min_io_size, s->conf->min_io_size / blk_size);
     stw_raw(&blkcfg.opt_io_size, s->conf->opt_io_size / blk_size);
     blkcfg.heads = heads;
-    blkcfg.sectors = secs & ~s->sector_mask;
+    /*
+     * We must ensure that the block device capacity is a multiple of
+     * the logical block size. If that is not the case, lets use
+     * sector_mask to adopt the geometry to have a correct picture.
+     * For those devices where the capacity is ok for the given geometry
+     * we dont touch the sector value of the geometry, since some devices
+     * (like s390 dasd) need a specific value. Here the capacity is already
+     * cyls*heads*secs*blk_size and the sector value is not block size
+     * divided by 512 - instead it is the amount of blk_size blocks
+     * per track (cylinder).
+     */
+    if (bdrv_getlength(s->bs) /  heads / secs % blk_size) {
+        blkcfg.sectors = secs & ~s->sector_mask;
+    } else {
+        blkcfg.sectors = secs;
+    }
     blkcfg.size_max = 0;
     blkcfg.physical_block_exp = get_physical_block_exp(s->conf);
     blkcfg.alignment_offset = 0;
diff --git a/hw/xen_disk.c b/hw/xen_disk.c
index 07594bc..de7e8a4 100644
--- a/hw/xen_disk.c
+++ b/hw/xen_disk.c
@@ -537,6 +537,15 @@
     blk_handle_requests(blkdev);
 }
 
+/*
+ * We need to account for the grant allocations requiring contiguous
+ * chunks; the worst case number would be
+ *     max_req * max_seg + (max_req - 1) * (max_seg - 1) + 1,
+ * but in order to keep things simple just use
+ *     2 * max_req * max_seg.
+ */
+#define MAX_GRANTS(max_req, max_seg) (2 * (max_req) * (max_seg))
+
 static void blk_alloc(struct XenDevice *xendev)
 {
     struct XenBlkDev *blkdev = container_of(xendev, struct XenBlkDev, xendev);
@@ -548,6 +557,11 @@
     if (xen_mode != XEN_EMULATE) {
         batch_maps = 1;
     }
+    if (xc_gnttab_set_max_grants(xendev->gnttabdev,
+            MAX_GRANTS(max_requests, BLKIF_MAX_SEGMENTS_PER_REQUEST)) < 0) {
+        xen_be_printf(xendev, 0, "xc_gnttab_set_max_grants failed: %s\n",
+                      strerror(errno));
+    }
 }
 
 static int blk_init(struct XenDevice *xendev)
diff --git a/os-win32.c b/os-win32.c
index ad76370..13892ba 100644
--- a/os-win32.c
+++ b/os-win32.c
@@ -57,7 +57,13 @@
 
 static BOOL WINAPI qemu_ctrl_handler(DWORD type)
 {
-    exit(STATUS_CONTROL_C_EXIT);
+    qemu_system_shutdown_request();
+    /* Windows 7 kills application when the function returns.
+       Sleep here to give QEMU a try for closing.
+       Sleep period is 10000ms because Windows kills the program
+       after 10 seconds anyway. */
+    Sleep(10000);
+
     return TRUE;
 }
 
diff --git a/qemu-img.texi b/qemu-img.texi
index 6fc3c28..9cee2ea 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -232,6 +232,29 @@
 
 @end table
 
+@item qed
+Image format with support for backing files and compact image files (when your
+filesystem or transport medium does not support holes).  Good performance due
+to less metadata than the more featureful qcow2 format, especially with
+cache=writethrough or cache=directsync.  Consider using qcow2 which will soon
+have a similar optimization and is most actively developed.
+
+Supported options:
+@table @code
+@item backing_file
+File name of a base image (see @option{create} subcommand).
+@item backing_fmt
+Image file format of backing file (optional).  Useful if the format cannot be
+autodetected because it has no header, like some vhd/vpc files.
+@item cluster_size
+Changes the cluster size (must be power-of-2 between 4K and 64K). Smaller
+cluster sizes can improve the image file size whereas larger cluster sizes
+generally provide better performance.
+@item table_size
+Changes the number of clusters per L1/L2 table (must be power-of-2 between 1
+and 16).  There is normally no need to change this value but this option can be
+used for performance benchmarking.
+@end table
 
 @item qcow
 Old QEMU image format. Left for compatibility.
diff --git a/readline.c b/readline.c
index a6c0039..540cd8a 100644
--- a/readline.c
+++ b/readline.c
@@ -337,6 +337,9 @@
         }
         readline_show_prompt(rs);
     }
+    for (i = 0; i < rs->nb_completions; i++) {
+        g_free(rs->completions[i]);
+    }
 }
 
 /* return true if command handled */
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 6d0ea7c..d5b50d1 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -370,9 +370,12 @@
         uint32_t *vpn, uint32_t wi, uint32_t *ei);
 int xtensa_tlb_lookup(const CPUXtensaState *env, uint32_t addr, bool dtlb,
         uint32_t *pwi, uint32_t *pei, uint8_t *pring);
+void xtensa_tlb_set_entry_mmu(const CPUXtensaState *env,
+        xtensa_tlb_entry *entry, bool dtlb,
+        unsigned wi, unsigned ei, uint32_t vpn, uint32_t pte);
 void xtensa_tlb_set_entry(CPUXtensaState *env, bool dtlb,
         unsigned wi, unsigned ei, uint32_t vpn, uint32_t pte);
-int xtensa_get_physical_addr(CPUXtensaState *env,
+int xtensa_get_physical_addr(CPUXtensaState *env, bool update_tlb,
         uint32_t vaddr, int is_write, int mmu_idx,
         uint32_t *paddr, uint32_t *page_size, unsigned *access);
 void reset_mmu(CPUXtensaState *env);
diff --git a/target-xtensa/helper.c b/target-xtensa/helper.c
index 2094227..8ebef72 100644
--- a/target-xtensa/helper.c
+++ b/target-xtensa/helper.c
@@ -135,11 +135,11 @@
     uint32_t page_size;
     unsigned access;
 
-    if (xtensa_get_physical_addr(env, addr, 0, 0,
+    if (xtensa_get_physical_addr(env, false, addr, 0, 0,
                 &paddr, &page_size, &access) == 0) {
         return paddr;
     }
-    if (xtensa_get_physical_addr(env, addr, 2, 0,
+    if (xtensa_get_physical_addr(env, false, addr, 2, 0,
                 &paddr, &page_size, &access) == 0) {
         return paddr;
     }
@@ -448,30 +448,48 @@
     }
 }
 
-static int autorefill_mmu(CPUXtensaState *env, uint32_t vaddr, bool dtlb,
-        uint32_t *wi, uint32_t *ei, uint8_t *ring);
+static int get_pte(CPUXtensaState *env, uint32_t vaddr, uint32_t *pte);
 
-static int get_physical_addr_mmu(CPUXtensaState *env,
+static int get_physical_addr_mmu(CPUXtensaState *env, bool update_tlb,
         uint32_t vaddr, int is_write, int mmu_idx,
-        uint32_t *paddr, uint32_t *page_size, unsigned *access)
+        uint32_t *paddr, uint32_t *page_size, unsigned *access,
+        bool may_lookup_pt)
 {
     bool dtlb = is_write != 2;
     uint32_t wi;
     uint32_t ei;
     uint8_t ring;
+    uint32_t vpn;
+    uint32_t pte;
+    const xtensa_tlb_entry *entry = NULL;
+    xtensa_tlb_entry tmp_entry;
     int ret = xtensa_tlb_lookup(env, vaddr, dtlb, &wi, &ei, &ring);
 
     if ((ret == INST_TLB_MISS_CAUSE || ret == LOAD_STORE_TLB_MISS_CAUSE) &&
-            (mmu_idx != 0 || ((vaddr ^ env->sregs[PTEVADDR]) & 0xffc00000)) &&
-            autorefill_mmu(env, vaddr, dtlb, &wi, &ei, &ring) == 0) {
+            may_lookup_pt && get_pte(env, vaddr, &pte) == 0) {
+        ring = (pte >> 4) & 0x3;
+        wi = 0;
+        split_tlb_entry_spec_way(env, vaddr, dtlb, &vpn, wi, &ei);
+
+        if (update_tlb) {
+            wi = ++env->autorefill_idx & 0x3;
+            xtensa_tlb_set_entry(env, dtlb, wi, ei, vpn, pte);
+            env->sregs[EXCVADDR] = vaddr;
+            qemu_log("%s: autorefill(%08x): %08x -> %08x\n",
+                    __func__, vaddr, vpn, pte);
+        } else {
+            xtensa_tlb_set_entry_mmu(env, &tmp_entry, dtlb, wi, ei, vpn, pte);
+            entry = &tmp_entry;
+        }
         ret = 0;
     }
     if (ret != 0) {
         return ret;
     }
 
-    const xtensa_tlb_entry *entry =
-        xtensa_tlb_get_entry(env, dtlb, wi, ei);
+    if (entry == NULL) {
+        entry = xtensa_tlb_get_entry(env, dtlb, wi, ei);
+    }
 
     if (ring < mmu_idx) {
         return dtlb ?
@@ -494,30 +512,21 @@
     return 0;
 }
 
-static int autorefill_mmu(CPUXtensaState *env, uint32_t vaddr, bool dtlb,
-        uint32_t *wi, uint32_t *ei, uint8_t *ring)
+static int get_pte(CPUXtensaState *env, uint32_t vaddr, uint32_t *pte)
 {
     uint32_t paddr;
     uint32_t page_size;
     unsigned access;
     uint32_t pt_vaddr =
         (env->sregs[PTEVADDR] | (vaddr >> 10)) & 0xfffffffc;
-    int ret = get_physical_addr_mmu(env, pt_vaddr, 0, 0,
-            &paddr, &page_size, &access);
+    int ret = get_physical_addr_mmu(env, false, pt_vaddr, 0, 0,
+            &paddr, &page_size, &access, false);
 
     qemu_log("%s: trying autorefill(%08x) -> %08x\n", __func__,
             vaddr, ret ? ~0 : paddr);
 
     if (ret == 0) {
-        uint32_t vpn;
-        uint32_t pte = ldl_phys(paddr);
-
-        *ring = (pte >> 4) & 0x3;
-        *wi = (++env->autorefill_idx) & 0x3;
-        split_tlb_entry_spec_way(env, vaddr, dtlb, &vpn, *wi, ei);
-        xtensa_tlb_set_entry(env, dtlb, *wi, *ei, vpn, pte);
-        qemu_log("%s: autorefill(%08x): %08x -> %08x\n",
-                __func__, vaddr, vpn, pte);
+        *pte = ldl_phys(paddr);
     }
     return ret;
 }
@@ -553,13 +562,13 @@
  *
  * \return 0 if ok, exception cause code otherwise
  */
-int xtensa_get_physical_addr(CPUXtensaState *env,
+int xtensa_get_physical_addr(CPUXtensaState *env, bool update_tlb,
         uint32_t vaddr, int is_write, int mmu_idx,
         uint32_t *paddr, uint32_t *page_size, unsigned *access)
 {
     if (xtensa_option_enabled(env->config, XTENSA_OPTION_MMU)) {
-        return get_physical_addr_mmu(env, vaddr, is_write, mmu_idx,
-                paddr, page_size, access);
+        return get_physical_addr_mmu(env, update_tlb,
+                vaddr, is_write, mmu_idx, paddr, page_size, access, true);
     } else if (xtensa_option_bits_enabled(env->config,
                 XTENSA_OPTION_BIT(XTENSA_OPTION_REGION_PROTECTION) |
                 XTENSA_OPTION_BIT(XTENSA_OPTION_REGION_TRANSLATION))) {
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index 364dc19..41107ff 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -79,7 +79,7 @@
         uint32_t paddr;
         uint32_t page_size;
         unsigned access;
-        int ret = xtensa_get_physical_addr(env, vaddr, is_write, mmu_idx,
+        int ret = xtensa_get_physical_addr(env, true, vaddr, is_write, mmu_idx,
                 &paddr, &page_size, &access);
 
         qemu_log("%s(%08x, %d, %d) -> %08x, ret = %d\n", __func__,
@@ -103,7 +103,7 @@
     uint32_t paddr;
     uint32_t page_size;
     unsigned access;
-    int ret = xtensa_get_physical_addr(env, vaddr, 2, 0,
+    int ret = xtensa_get_physical_addr(env, false, vaddr, 2, 0,
             &paddr, &page_size, &access);
     if (ret == 0) {
         tb_invalidate_phys_addr(paddr);
@@ -655,6 +655,16 @@
     }
 }
 
+void xtensa_tlb_set_entry_mmu(const CPUXtensaState *env,
+        xtensa_tlb_entry *entry, bool dtlb,
+        unsigned wi, unsigned ei, uint32_t vpn, uint32_t pte)
+{
+    entry->vaddr = vpn;
+    entry->paddr = pte & xtensa_tlb_get_addr_mask(env, dtlb, wi);
+    entry->asid = (env->sregs[RASID] >> ((pte >> 1) & 0x18)) & 0xff;
+    entry->attr = pte & 0xf;
+}
+
 void xtensa_tlb_set_entry(CPUXtensaState *env, bool dtlb,
         unsigned wi, unsigned ei, uint32_t vpn, uint32_t pte)
 {
@@ -665,10 +675,8 @@
             if (entry->asid) {
                 tlb_flush_page(env, entry->vaddr);
             }
-            entry->vaddr = vpn;
-            entry->paddr = pte & xtensa_tlb_get_addr_mask(env, dtlb, wi);
-            entry->asid = (env->sregs[RASID] >> ((pte >> 1) & 0x18)) & 0xff;
-            entry->attr = pte & 0xf;
+            xtensa_tlb_set_entry_mmu(env, entry, dtlb, wi, ei, vpn, pte);
+            tlb_flush_page(env, entry->vaddr);
         } else {
             qemu_log("%s %d, %d, %d trying to set immutable entry\n",
                     __func__, dtlb, wi, ei);
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 521c0e6..a542a31 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -388,6 +388,7 @@
             dc->next_pc == dc->lend) {
         int label = gen_new_label();
 
+        gen_advance_ccount(dc);
         tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_SR[LCOUNT], 0, label);
         tcg_gen_subi_i32(cpu_SR[LCOUNT], cpu_SR[LCOUNT], 1);
         gen_jumpi(dc, dc->lbeg, slot);
@@ -410,6 +411,7 @@
 {
     int label = gen_new_label();
 
+    gen_advance_ccount(dc);
     tcg_gen_brcond_i32(cond, t0, t1, label);
     gen_jumpi_check_loop_end(dc, 0);
     gen_set_label(label);
diff --git a/tests/tcg/xtensa/test_mmu.S b/tests/tcg/xtensa/test_mmu.S
index 52d5774..5d87fbb 100644
--- a/tests/tcg/xtensa/test_mmu.S
+++ b/tests/tcg/xtensa/test_mmu.S
@@ -293,26 +293,219 @@
     assert  eq, a2, a3
 test_end
 
-test dtlb_autoload
-    set_vector kernel, 0
-
-    movi    a2, 0xd4000000
+/* Set up page table entry vaddr->paddr, ring=pte_ring, attr=pte_attr
+ * and DTLB way 7 to cover this PTE, ring=pt_ring, attr=pt_attr
+ */
+.macro pt_setup pt_ring, pt_attr, pte_ring, vaddr, paddr, pte_attr
+    movi    a2, 0x80000000
     wsr     a2, ptevaddr
-    movi    a3, 0x00001013
-    s32i    a3, a2, 4
+
+    movi    a3, 0x80000007 | (((\vaddr) >> 10) & 0xfffff000) /* way 7 */
+    movi    a4, 0x04000003 | ((\pt_ring) << 4) /* PADDR 64M */
+    wdtlb   a4, a3
+    isync
+
+    movi    a3, ((\paddr) & 0xfffff000) | ((\pte_ring) << 4) | (\pte_attr)
+    movi    a1, ((\vaddr) >> 12) << 2
+    add     a2, a1, a2
+    s32i    a3, a2, 0
+
+    movi    a3, 0x80000007 | (((\vaddr) >> 10) & 0xfffff000) /* way 7 */
+    movi    a4, 0x04000000 | ((\pt_ring) << 4) | (\pt_attr) /* PADDR 64M */
+    wdtlb   a4, a3
+    isync
+
+    movi    a3, (\vaddr)
+.endm
+
+/* out: PS.RING=ring, PS.EXCM=excm, a3=vaddr */
+.macro go_ring ring, excm, vaddr
+    movi    a3, 10f
+    pitlb   a3, a3
+    ritlb1  a2, a3
+    movi    a1, 0x10
+    or      a2, a2, a1
+    movi    a1, 0x000ff000
+    and     a3, a3, a1
+    movi    a1, 4
+    or      a3, a3, a1
+    witlb   a2, a3
+    movi    a3, 10f
+    movi    a1, 0x000fffff
+    and     a1, a3, a1
+
+    movi    a2, 0
+    wsr     a2, excvaddr
+
+    movi    a3, \vaddr
+    movi    a2, 0x4000f | ((\ring) << 6) | ((\excm) << 4)
+    jx      a1
+10:
+    wsr     a2, ps
+    isync
+.endm
+
+/* in: a3 -- virtual address to test */
+.macro assert_auto_tlb
+    movi    a2, 0x4000f
+    wsr     a2, ps
+    isync
+    pdtlb   a2, a3
+    movi    a1, 0xfffff01f
+    and     a2, a2, a1
+    movi    a1, 0xfffff000
+    and     a1, a1, a3
+    xor     a1, a1, a2
+    assert  gei, a1, 0x10
+    movi    a2, 0x14
+    assert  lt, a1, a2
+.endm
+
+/* in: a3 -- virtual address to test */
+.macro assert_no_auto_tlb
+    movi    a2, 0x4000f
+    wsr     a2, ps
+    isync
     pdtlb   a2, a3
     movi    a1, 0x10
     and     a1, a1, a2
     assert  eqi, a1, 0
+.endm
+
+.macro assert_sr sr, v
+    rsr     a2, \sr
+    movi    a1, (\v)
+    assert  eq, a1, a2
+.endm
+
+.macro assert_epc1_1m vaddr
+    movi    a2, (\vaddr)
+    movi    a1, 0xfffff
+    and     a1, a1, a2
+    rsr     a2, epc1
+    assert  eq, a1, a2
+.endm
+
+test dtlb_autoload
+    set_vector kernel, 0
+
+    pt_setup    0, 3, 1, 0x1000, 0x1000, 3
+    assert_no_auto_tlb
+
     l8ui    a1, a3, 0
-    pdtlb   a2, a3
-    movi    a1, 0xfffff010
-    and     a1, a1, a2
-    movi    a3, 0x00001010
-    assert  eq, a1, a3
-    movi    a1, 0xf
-    and     a1, a1, a2
-    assert  lti, a1, 4
+
+    rsr     a2, excvaddr
+    assert  eq, a2, a3
+
+    assert_auto_tlb
+test_end
+
+test autoload_load_store_privilege
+    set_vector kernel, 0
+    set_vector double, 2f
+
+    pt_setup    0, 3, 0, 0x2000, 0x2000, 3
+    movi    a3, 0x2004
+    assert_no_auto_tlb
+
+    movi    a2, 0x4005f    /* ring 1 + excm => cring == 0 */
+    wsr     a2, ps
+    isync
+1:
+    l32e    a2, a3, -4     /* ring used */
+    test_fail
+2:
+    rsr     a2, excvaddr
+    addi    a1, a3, -4
+    assert  eq, a1, a2
+
+    assert_auto_tlb
+    assert_sr depc, 1b
+    assert_sr exccause, 26
+test_end
+
+test autoload_pte_load_prohibited
+    set_vector kernel, 2f
+
+    pt_setup    0, 3, 0, 0x3000, 0, 0xc
+    assert_no_auto_tlb
+1:
+    l32i    a2, a3, 0
+    test_fail
+2:
+    rsr     a2, excvaddr
+    assert  eq, a2, a3
+
+    assert_auto_tlb
+    assert_sr epc1, 1b
+    assert_sr exccause, 28
+test_end
+
+test autoload_pt_load_prohibited
+    set_vector kernel, 2f
+
+    pt_setup    0, 0xc, 0, 0x4000, 0x4000, 3
+    assert_no_auto_tlb
+1:
+    l32i    a2, a3, 0
+    test_fail
+2:
+    rsr     a2, excvaddr
+    assert  eq, a2, a3
+
+    assert_no_auto_tlb
+    assert_sr epc1, 1b
+    assert_sr exccause, 24
+test_end
+
+test autoload_pt_privilege
+    set_vector  kernel, 2f
+    pt_setup    0, 3, 1, 0x5000, 0, 3
+    go_ring     1, 0, 0x5001
+
+    l8ui    a2, a3, 0
+1:
+    syscall
+2:
+    rsr     a2, excvaddr
+    assert  eq, a2, a3
+
+    assert_auto_tlb
+    assert_epc1_1m 1b
+    assert_sr exccause, 1
+test_end
+
+test autoload_pte_privilege
+    set_vector  kernel, 2f
+    pt_setup    0, 3, 0, 0x6000, 0, 3
+    go_ring     1, 0, 0x6001
+1:
+    l8ui    a2, a3, 0
+    syscall
+2:
+    rsr     a2, excvaddr
+    assert  eq, a2, a3
+
+    assert_auto_tlb
+    assert_epc1_1m 1b
+    assert_sr exccause, 26
+test_end
+
+test autoload_3_level_pt
+    set_vector  kernel, 2f
+    pt_setup    1, 3, 1, 0x00400000, 0, 3
+    pt_setup    1, 3, 1, 0x80001000, 0x2000000, 3
+    go_ring     1, 0, 0x00400001
+1:
+    l8ui    a2, a3, 0
+    syscall
+2:
+    rsr     a2, excvaddr
+    assert  eq, a2, a3
+
+    assert_no_auto_tlb
+    assert_epc1_1m 1b
+    assert_sr exccause, 24
 test_end
 
 test_suite_end
diff --git a/trace/simple.c b/trace/simple.c
index 33ae486..b4a3c6e 100644
--- a/trace/simple.c
+++ b/trace/simple.c
@@ -161,8 +161,11 @@
     }
 
     timestamp = get_clock();
-
+#if GLIB_CHECK_VERSION(2, 30, 0)
+    idx = g_atomic_int_add((gint *)&trace_idx, 1) % TRACE_BUF_LEN;
+#else
     idx = g_atomic_int_exchange_and_add((gint *)&trace_idx, 1) % TRACE_BUF_LEN;
+#endif
     trace_buf[idx] = (TraceRecord){
         .event = event,
         .timestamp_ns = timestamp,