Merge branch 'upstream-merge'

* upstream-merge: (575 commits)
  ssi: Add slave autoconnect helper
  MAINTAINERS: Added maintainerships for SSI
  xilinx_zynq: Added SPI controllers + flashes
  xilinx_spips: Xilinx Zynq SPI cntrlr device model
  petalogix-ml605: added SPI controller with n25q128
  xilinx_spi: Initial impl. of Xilinx SPI controller
  m25p80: Initial implementation of SPI flash device
  hw: Added generic FIFO API.
  stellaris: Removed SSI mux
  qdev: allow multiple qdev_init_gpio_in() calls
  ssi: Added create_slave_no_init()
  ssi: Implemented CS behaviour
  ssi: Support for multiple attached devices
  qemu-barrier: Fix compilation on i386 hosts
  target-sparc: Optimize conditionals using SUBCC
  target-sparc: Fall through from not-taken trap
  target-sparc: Cleanup "global" temporary allocation
  target-sparc: Use movcond for FMOV*R
  target-sparc: Use movcond in mulscc
  target-sparc: Move taddcctv and tsubcctv out of line
  ...

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
diff --git a/.gitignore b/.gitignore
index 824c0d2..bd6ba1c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -12,8 +12,6 @@
 *-linux-user
 *-bsd-user
 libdis*
-libhw32
-libhw64
 libuser
 linux-headers/asm
 qapi-generated
diff --git a/HACKING b/HACKING
index 471cf1d..dddd617 100644
--- a/HACKING
+++ b/HACKING
@@ -91,10 +91,11 @@
 
 4. String manipulation
 
-Do not use the strncpy function.  According to the man page, it does
-*not* guarantee a NULL-terminated buffer, which makes it extremely dangerous
-to use.  Instead, use functionally equivalent function:
-void pstrcpy(char *buf, int buf_size, const char *str)
+Do not use the strncpy function.  As mentioned in the man page, it does *not*
+guarantee a NULL-terminated buffer, which makes it extremely dangerous to use.
+It also zeros trailing destination bytes out to the specified length.  Instead,
+use this similar function when possible, but note its different signature:
+void pstrcpy(char *dest, int dest_buf_size, const char *src)
 
 Don't use strcat because it can't check for buffer overflows, but:
 char *pstrcat(char *buf, int buf_size, const char *s)
diff --git a/MAINTAINERS b/MAINTAINERS
index 61f8b45..f1f9250 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -268,6 +268,7 @@
 F: hw/xilinx_zynq.c
 F: hw/zynq_slcr.c
 F: hw/cadence_*
+F: hw/xilinx_spips.c
 
 CRIS Machines
 -------------
@@ -349,9 +350,31 @@
 405
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
-S: Maintained
+S: Odd Fixes
 F: hw/ppc405_boards.c
 
+Bamboo
+M: Alexander Graf <agraf@suse.de>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/ppc440_bamboo.c
+
+e500
+M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/ppc/e500.[hc]
+F: hw/ppc/e500plat.c
+
+mpc8544ds
+M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/ppc/mpc8544ds.c
+F: hw/mpc8544_guts.c
+
 New World
 M: Alexander Graf <agraf@suse.de>
 L: qemu-ppc@nongnu.org
@@ -374,6 +397,19 @@
 F: hw/ppc_prep.c
 F: hw/prep_pci.[hc]
 
+sPAPR
+M: David Gibson <david@gibson.dropbear.id.au>
+M: Alexander Graf <agraf@suse.de>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/spapr*
+
+virtex_ml507
+M: Edgar E. Iglesias <edgar.iglesias@gmail.com>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/virtex_ml507.c
+
 SH4 Machines
 ------------
 R2D
@@ -457,6 +493,19 @@
 F: hw/pci*
 F: hw/piix*
 
+ppc4xx
+M: Alexander Graf <agraf@suse.de>
+L: qemu-ppc@nongnu.org
+S: Odd Fixes
+F: hw/ppc4xx*.[hc]
+
+ppce500
+M: Alexander Graf <agraf@suse.de>
+M: Scott Wood <scottwood@freescale.com>
+L: qemu-ppc@nongnu.org
+S: Supported
+F: hw/ppce500_*
+
 SCSI
 M: Paolo Bonzini <pbonzini@redhat.com>
 S: Supported
@@ -469,11 +518,22 @@
 S: Odd Fixes
 F: hw/lsi53c895a.c
 
+SSI
+M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
+S: Maintained
+F: hw/ssi.*
+F: hw/m25p80.c
+
 USB
 M: Gerd Hoffmann <kraxel@redhat.com>
 S: Maintained
 F: hw/usb*
 
+VFIO
+M: Alex Williamson <alex.williamson@redhat.com>
+S: Supported
+F: hw/vfio*
+
 vhost
 M: Michael S. Tsirkin <mst@redhat.com>
 S: Supported
@@ -512,6 +572,7 @@
 F: hw/xilinx_ethlite.c
 F: hw/xilinx_timer.c
 F: hw/xilinx.h
+F: hw/xilinx_spi.c
 
 Subsystems
 ----------
@@ -531,6 +592,12 @@
 S: Maintained
 F: qemu-char.c
 
+CPU
+M: Andreas Färber <afaerber@suse.de>
+S: Supported
+F: qom/cpu.c
+F: include/qemu/cpu.h
+
 Device Tree
 M: Peter Crosthwaite <peter.crosthwaite@petalogix.com>
 M: Alexander Graf <agraf@suse.de>
diff --git a/Makefile b/Makefile
index 1cd5bc8..a9c22bf 100644
--- a/Makefile
+++ b/Makefile
@@ -52,8 +52,13 @@
 SUBDIR_DEVICES_MAK=$(patsubst %, %/config-devices.mak, $(TARGET_DIRS))
 SUBDIR_DEVICES_MAK_DEP=$(patsubst %, %/config-devices.mak.d, $(TARGET_DIRS))
 
+ifeq ($(SUBDIR_DEVICES_MAK),)
+config-all-devices.mak:
+	$(call quiet-command,echo '# no devices' > $@,"  GEN   $@")
+else
 config-all-devices.mak: $(SUBDIR_DEVICES_MAK)
 	$(call quiet-command,cat $(SUBDIR_DEVICES_MAK) | grep =y | sort -u > $@,"  GEN   $@")
+endif
 
 -include $(SUBDIR_DEVICES_MAK_DEP)
 
@@ -157,7 +162,8 @@
 	iohandler.o cutils.o iov.o async.o
 tools-obj-$(CONFIG_POSIX) += compatfd.o
 
-qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y)
+qemu-img$(EXESUF): qemu-img.o $(tools-obj-y) $(block-obj-y) $(qapi-obj-y) \
+                              qapi-visit.o qapi-types.o
 qemu-nbd$(EXESUF): qemu-nbd.o $(tools-obj-y) $(block-obj-y)
 qemu-io$(EXESUF): qemu-io.o cmd.o $(tools-obj-y) $(block-obj-y)
 
@@ -208,7 +214,7 @@
 
 qemu-ga$(EXESUF): qemu-ga.o $(qga-obj-y) $(tools-obj-y) $(qapi-obj-y) $(qobject-obj-y) $(version-obj-y)
 
-QEMULIBS=libhw32 libhw64 libuser libdis libdis-user
+QEMULIBS=libuser libdis libdis-user
 
 clean:
 # avoid old build problems by removing potentially incorrect old files
@@ -297,7 +303,6 @@
 
 install-sysconfig: install-datadir install-confdir
 	$(INSTALL_DATA) $(SRC_PATH)/sysconfigs/target/target-x86_64.conf "$(DESTDIR)$(qemu_confdir)"
-	$(INSTALL_DATA) $(SRC_PATH)/sysconfigs/target/cpus-x86_64.conf "$(DESTDIR)$(qemu_datadir)"
 
 install: all $(if $(BUILD_DOCS),install-doc) install-sysconfig install-datadir
 	$(INSTALL_DIR) "$(DESTDIR)$(bindir)"
diff --git a/Makefile.hw b/Makefile.hw
deleted file mode 100644
index 59f5b48..0000000
--- a/Makefile.hw
+++ /dev/null
@@ -1,23 +0,0 @@
-# Makefile for qemu target independent devices.
-
-include ../config-host.mak
-include ../config-all-devices.mak
-include config.mak
-include $(SRC_PATH)/rules.mak
-
-.PHONY: all
-
-$(call set-vpath, $(SRC_PATH))
-
-QEMU_CFLAGS+=-I..
-QEMU_CFLAGS += -I$(SRC_PATH)/include
-
-include $(SRC_PATH)/Makefile.objs
-
-all: $(hw-obj-y)
-# Dummy command so that make thinks it has done something
-	@true
-
-clean:
-	rm -f $(addsuffix *.o, $(sort $(dir $(hw-obj-y))))
-	rm -f $(addsuffix *.d, $(sort $(dir $(hw-obj-y))))
diff --git a/Makefile.objs b/Makefile.objs
index 4412757..74b3542 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -42,7 +42,8 @@
 # block-obj-y is code used by both qemu system emulation and qemu-img
 
 block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o
-block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o
+block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o
+block-obj-y += qemu-progress.o qemu-sockets.o uri.o
 block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y)
 block-obj-$(CONFIG_POSIX) += posix-aio-compat.o
 block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
@@ -59,7 +60,7 @@
 # suppress *all* target specific code in case of system emulation, i.e. a
 # single QEMU executable should support all CPUs and machines.
 
-common-obj-y = $(block-obj-y) blockdev.o
+common-obj-y = $(block-obj-y) blockdev.o block/
 common-obj-y += net.o net/
 common-obj-y += qom/
 common-obj-y += readline.o console.o cursor.o
@@ -89,10 +90,13 @@
 common-obj-y += ui/
 common-obj-y += bt-host.o bt-vhci.o
 
+common-obj-y += dma-helpers.o
 common-obj-y += iov.o acl.o
 common-obj-$(CONFIG_POSIX) += compatfd.o
 common-obj-y += notify.o event_notifier.o
 common-obj-y += qemu-timer.o qemu-timer-common.o
+common-obj-y += qtest.o
+common-obj-y += vl.o
 
 common-obj-$(CONFIG_SLIRP) += slirp/
 
@@ -115,11 +119,6 @@
 user-obj-y += qom/
 
 ######################################################################
-# libhw
-
-hw-obj-y = vl.o dma-helpers.o qtest.o hw/
-
-######################################################################
 # libdis
 # NOTE: the disassembler code is only needed for debugging
 
@@ -239,7 +238,6 @@
 QEMU_CFLAGS+=$(GLIB_CFLAGS)
 
 nested-vars += \
-	hw-obj-y \
 	qga-obj-y \
 	block-obj-y \
 	qom-obj-y \
diff --git a/Makefile.target b/Makefile.target
index 7892a8d..3822bc5 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -4,9 +4,6 @@
 include config-devices.mak
 include config-target.mak
 include $(SRC_PATH)/rules.mak
-ifneq ($(HWDIR),)
-include $(HWDIR)/config.mak
-endif
 
 $(call set-vpath, $(SRC_PATH))
 ifdef CONFIG_LINUX
@@ -80,14 +77,6 @@
 
 tci-dis.o: QEMU_CFLAGS += -I$(SRC_PATH)/tcg -I$(SRC_PATH)/tcg/tci
 
-# HELPER_CFLAGS is used for all the legacy code compiled with static register
-# variables
-user-exec.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
-
-# Note: this is a workaround. The real fix is to avoid compiling
-# cpu_signal_handler() in user-exec.c.
-%/signal.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
-
 #########################################################
 # Linux user emulator target
 
@@ -165,7 +154,6 @@
 ifdef CONFIG_SOFTMMU
 all-obj-y += $(addprefix ../, $(common-obj-y))
 all-obj-y += $(addprefix ../libdis/, $(libdis-y))
-all-obj-y += $(addprefix $(HWDIR)/, $(hw-obj-y))
 all-obj-y += $(addprefix ../, $(trace-obj-y))
 else
 all-obj-y += $(addprefix ../libuser/, $(user-obj-y))
diff --git a/QMP/qemu-ga-client b/QMP/qemu-ga-client
new file mode 100755
index 0000000..46676c3
--- /dev/null
+++ b/QMP/qemu-ga-client
@@ -0,0 +1,299 @@
+#!/usr/bin/python
+
+# QEMU Guest Agent Client
+#
+# Copyright (C) 2012 Ryota Ozaki <ozaki.ryota@gmail.com>
+#
+# This work is licensed under the terms of the GNU GPL, version 2.  See
+# the COPYING file in the top-level directory.
+#
+# Usage:
+#
+# Start QEMU with:
+#
+# # qemu [...] -chardev socket,path=/tmp/qga.sock,server,nowait,id=qga0 \
+#   -device virtio-serial -device virtserialport,chardev=qga0,name=org.qemu.guest_agent.0
+#
+# Run the script:
+#
+# $ qemu-ga-client --address=/tmp/qga.sock <command> [args...]
+#
+# or
+#
+# $ export QGA_CLIENT_ADDRESS=/tmp/qga.sock
+# $ qemu-ga-client <command> [args...]
+#
+# For example:
+#
+# $ qemu-ga-client cat /etc/resolv.conf
+# # Generated by NetworkManager
+# nameserver 10.0.2.3
+# $ qemu-ga-client fsfreeze status
+# thawed
+# $ qemu-ga-client fsfreeze freeze
+# 2 filesystems frozen
+#
+# See also: http://wiki.qemu.org/Features/QAPI/GuestAgent
+#
+
+import base64
+import random
+
+import qmp
+
+
+class QemuGuestAgent(qmp.QEMUMonitorProtocol):
+    def __getattr__(self, name):
+        def wrapper(**kwds):
+            return self.command('guest-' + name.replace('_', '-'), **kwds)
+        return wrapper
+
+
+class QemuGuestAgentClient:
+    error = QemuGuestAgent.error
+
+    def __init__(self, address):
+        self.qga = QemuGuestAgent(address)
+        self.qga.connect(negotiate=False)
+
+    def sync(self, timeout=3):
+        # Avoid being blocked forever
+        if not self.ping(timeout):
+            raise EnvironmentError('Agent seems not alive')
+        uid = random.randint(0, (1 << 32) - 1)
+        while True:
+            ret = self.qga.sync(id=uid)
+            if isinstance(ret, int) and int(ret) == uid:
+                break
+
+    def __file_read_all(self, handle):
+        eof = False
+        data = ''
+        while not eof:
+            ret = self.qga.file_read(handle=handle, count=1024)
+            _data = base64.b64decode(ret['buf-b64'])
+            data += _data
+            eof = ret['eof']
+        return data
+
+    def read(self, path):
+        handle = self.qga.file_open(path=path)
+        try:
+            data = self.__file_read_all(handle)
+        finally:
+            self.qga.file_close(handle=handle)
+        return data
+
+    def info(self):
+        info = self.qga.info()
+
+        msgs = []
+        msgs.append('version: ' + info['version'])
+        msgs.append('supported_commands:')
+        enabled = [c['name'] for c in info['supported_commands'] if c['enabled']]
+        msgs.append('\tenabled: ' + ', '.join(enabled))
+        disabled = [c['name'] for c in info['supported_commands'] if not c['enabled']]
+        msgs.append('\tdisabled: ' + ', '.join(disabled))
+
+        return '\n'.join(msgs)
+
+    def __gen_ipv4_netmask(self, prefixlen):
+        mask = int('1' * prefixlen + '0' * (32 - prefixlen), 2)
+        return '.'.join([str(mask >> 24),
+                         str((mask >> 16) & 0xff),
+                         str((mask >> 8) & 0xff),
+                         str(mask & 0xff)])
+
+    def ifconfig(self):
+        nifs = self.qga.network_get_interfaces()
+
+        msgs = []
+        for nif in nifs:
+            msgs.append(nif['name'] + ':')
+            if 'ip-addresses' in nif:
+                for ipaddr in nif['ip-addresses']:
+                    if ipaddr['ip-address-type'] == 'ipv4':
+                        addr = ipaddr['ip-address']
+                        mask = self.__gen_ipv4_netmask(int(ipaddr['prefix']))
+                        msgs.append("\tinet %s  netmask %s" % (addr, mask))
+                    elif ipaddr['ip-address-type'] == 'ipv6':
+                        addr = ipaddr['ip-address']
+                        prefix = ipaddr['prefix']
+                        msgs.append("\tinet6 %s  prefixlen %s" % (addr, prefix))
+            if nif['hardware-address'] != '00:00:00:00:00:00':
+                msgs.append("\tether " + nif['hardware-address'])
+
+        return '\n'.join(msgs)
+
+    def ping(self, timeout):
+        self.qga.settimeout(timeout)
+        try:
+            self.qga.ping()
+        except self.qga.timeout:
+            return False
+        return True
+
+    def fsfreeze(self, cmd):
+        if cmd not in ['status', 'freeze', 'thaw']:
+            raise StandardError('Invalid command: ' + cmd)
+
+        return getattr(self.qga, 'fsfreeze' + '_' + cmd)()
+
+    def fstrim(self, minimum=0):
+        return getattr(self.qga, 'fstrim')(minimum=minimum)
+
+    def suspend(self, mode):
+        if mode not in ['disk', 'ram', 'hybrid']:
+            raise StandardError('Invalid mode: ' + mode)
+
+        try:
+            getattr(self.qga, 'suspend' + '_' + mode)()
+            # On error exception will raise
+        except self.qga.timeout:
+            # On success command will timed out
+            return
+
+    def shutdown(self, mode='powerdown'):
+        if mode not in ['powerdown', 'halt', 'reboot']:
+            raise StandardError('Invalid mode: ' + mode)
+
+        try:
+            self.qga.shutdown(mode=mode)
+        except self.qga.timeout:
+            return
+
+
+def _cmd_cat(client, args):
+    if len(args) != 1:
+        print('Invalid argument')
+        print('Usage: cat <file>')
+        sys.exit(1)
+    print(client.read(args[0]))
+
+
+def _cmd_fsfreeze(client, args):
+    usage = 'Usage: fsfreeze status|freeze|thaw'
+    if len(args) != 1:
+        print('Invalid argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['status', 'freeze', 'thaw']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    cmd = args[0]
+    ret = client.fsfreeze(cmd)
+    if cmd == 'status':
+        print(ret)
+    elif cmd == 'freeze':
+        print("%d filesystems frozen" % ret)
+    else:
+        print("%d filesystems thawed" % ret)
+
+
+def _cmd_fstrim(client, args):
+    if len(args) == 0:
+        minimum = 0
+    else:
+        minimum = int(args[0])
+    print(client.fstrim(minimum))
+
+
+def _cmd_ifconfig(client, args):
+    print(client.ifconfig())
+
+
+def _cmd_info(client, args):
+    print(client.info())
+
+
+def _cmd_ping(client, args):
+    if len(args) == 0:
+        timeout = 3
+    else:
+        timeout = float(args[0])
+    alive = client.ping(timeout)
+    if not alive:
+        print("Not responded in %s sec" % args[0])
+        sys.exit(1)
+
+
+def _cmd_suspend(client, args):
+    usage = 'Usage: suspend disk|ram|hybrid'
+    if len(args) != 1:
+        print('Less argument')
+        print(usage)
+        sys.exit(1)
+    if args[0] not in ['disk', 'ram', 'hybrid']:
+        print('Invalid command: ' + args[0])
+        print(usage)
+        sys.exit(1)
+    client.suspend(args[0])
+
+
+def _cmd_shutdown(client, args):
+    client.shutdown()
+_cmd_powerdown = _cmd_shutdown
+
+
+def _cmd_halt(client, args):
+    client.shutdown('halt')
+
+
+def _cmd_reboot(client, args):
+    client.shutdown('reboot')
+
+
+commands = [m.replace('_cmd_', '') for m in dir() if '_cmd_' in m]
+
+
+def main(address, cmd, args):
+    if not os.path.exists(address):
+        print('%s not found' % address)
+        sys.exit(1)
+
+    if cmd not in commands:
+        print('Invalid command: ' + cmd)
+        print('Available commands: ' + ', '.join(commands))
+        sys.exit(1)
+
+    try:
+        client = QemuGuestAgentClient(address)
+    except QemuGuestAgent.error, e:
+        import errno
+
+        print(e)
+        if e.errno == errno.ECONNREFUSED:
+            print('Hint: qemu is not running?')
+        sys.exit(1)
+
+    if cmd != 'ping':
+        client.sync()
+
+    globals()['_cmd_' + cmd](client, args)
+
+
+if __name__ == '__main__':
+    import sys
+    import os
+    import optparse
+
+    address = os.environ['QGA_CLIENT_ADDRESS'] if 'QGA_CLIENT_ADDRESS' in os.environ else None
+
+    usage = "%prog [--address=<unix_path>|<ipv4_address>] <command> [args...]\n"
+    usage += '<command>: ' + ', '.join(commands)
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option('--address', action='store', type='string',
+                      default=address, help='Specify a ip:port pair or a unix socket path')
+    options, args = parser.parse_args()
+
+    address = options.address
+    if address is None:
+        parser.error('address is not specified')
+        sys.exit(1)
+
+    if len(args) == 0:
+        parser.error('Less argument')
+        sys.exit(1)
+
+    main(address, args[0], args[1:])
diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt
index 2878058..987c575 100644
--- a/QMP/qmp-events.txt
+++ b/QMP/qmp-events.txt
@@ -50,7 +50,8 @@
 
 Data:
 
-- "type":     Job type ("stream" for image streaming, json-string)
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
 - "device":   Device name (json-string)
 - "len":      Maximum progress value (json-int)
 - "offset":   Current progress value (json-int)
@@ -73,7 +74,8 @@
 
 Data:
 
-- "type":     Job type ("stream" for image streaming, json-string)
+- "type":     Job type (json-string; "stream" for image streaming
+                                     "commit" for block commit)
 - "device":   Device name (json-string)
 - "len":      Maximum progress value (json-int)
 - "offset":   Current progress value (json-int)
@@ -94,6 +96,28 @@
                "speed": 0 },
      "timestamp": { "seconds": 1267061043, "microseconds": 959568 } }
 
+BLOCK_JOB_ERROR
+---------------
+
+Emitted when a block job encounters an error.
+
+Data:
+
+- "device": device name (json-string)
+- "operation": I/O operation (json-string, "read" or "write")
+- "action": action that has been taken, it's one of the following (json-string):
+    "ignore": error has been ignored, the job may fail later
+    "report": error will be reported and the job canceled
+    "stop": error caused job to be paused
+
+Example:
+
+{ "event": "BLOCK_JOB_ERROR",
+    "data": { "device": "ide0-hd1",
+              "operation": "write",
+              "action": "stop" },
+    "timestamp": { "seconds": 1265044230, "microseconds": 450486 } }
+
 DEVICE_TRAY_MOVED
 -----------------
 
diff --git a/QMP/qmp.py b/QMP/qmp.py
index 36ecc1d..33c7d36 100644
--- a/QMP/qmp.py
+++ b/QMP/qmp.py
@@ -49,7 +49,6 @@
         return socket.socket(family, socket.SOCK_STREAM)
 
     def __negotiate_capabilities(self):
-        self.__sockfile = self.__sock.makefile()
         greeting = self.__json_read()
         if greeting is None or not greeting.has_key('QMP'):
             raise QMPConnectError
@@ -73,7 +72,7 @@
 
     error = socket.error
 
-    def connect(self):
+    def connect(self, negotiate=True):
         """
         Connect to the QMP Monitor and perform capabilities negotiation.
 
@@ -83,7 +82,9 @@
         @raise QMPCapabilitiesError if fails to negotiate capabilities
         """
         self.__sock.connect(self.__address)
-        return self.__negotiate_capabilities()
+        self.__sockfile = self.__sock.makefile()
+        if negotiate:
+            return self.__negotiate_capabilities()
 
     def accept(self):
         """
@@ -161,3 +162,8 @@
     def close(self):
         self.__sock.close()
         self.__sockfile.close()
+
+    timeout = socket.timeout
+
+    def settimeout(self, timeout):
+        self.__sock.settimeout(timeout)
diff --git a/aio.c b/aio.c
index 0a9eb10..c738a4e 100644
--- a/aio.c
+++ b/aio.c
@@ -119,7 +119,7 @@
         return true;
     }
 
-    walking_handlers = 1;
+    walking_handlers++;
 
     FD_ZERO(&rdfds);
     FD_ZERO(&wrfds);
@@ -147,7 +147,7 @@
         }
     }
 
-    walking_handlers = 0;
+    walking_handlers--;
 
     /* No AIO operations?  Get us out of here */
     if (!busy) {
@@ -159,14 +159,14 @@
 
     /* if we have any readable fds, dispatch event */
     if (ret > 0) {
-        walking_handlers = 1;
-
         /* we have to walk very carefully in case
          * qemu_aio_set_fd_handler is called while we're walking */
         node = QLIST_FIRST(&aio_handlers);
         while (node) {
             AioHandler *tmp;
 
+            walking_handlers++;
+
             if (!node->deleted &&
                 FD_ISSET(node->fd, &rdfds) &&
                 node->io_read) {
@@ -181,13 +181,13 @@
             tmp = node;
             node = QLIST_NEXT(node, node);
 
-            if (tmp->deleted) {
+            walking_handlers--;
+
+            if (!walking_handlers && tmp->deleted) {
                 QLIST_REMOVE(tmp, node);
                 g_free(tmp);
             }
         }
-
-        walking_handlers = 0;
     }
 
     return true;
diff --git a/arch_init.c b/arch_init.c
index 5a1173e..9904f95 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -136,7 +136,6 @@
     /* Indicates it is an user config file (disabled by -no-user-config) */
     bool userconfig;
 } default_config_files[] = {
-    { CONFIG_QEMU_DATADIR "/cpus-" TARGET_ARCH ".conf",  false },
     { CONFIG_QEMU_CONFDIR "/qemu.conf",                   true },
     { CONFIG_QEMU_CONFDIR "/target-" TARGET_ARCH ".conf", true },
     { NULL }, /* end of list */
@@ -562,7 +561,7 @@
         if ((i & 63) == 0) {
             uint64_t t1 = (qemu_get_clock_ns(rt_clock) - bwidth) / 1000000;
             if (t1 > MAX_WAIT) {
-                DPRINTF("big wait: " PRIu64 " milliseconds, %d iterations\n",
+                DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n",
                         t1, i);
                 break;
             }
@@ -587,7 +586,7 @@
 
     expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
 
-    DPRINTF("ram_save_live: expected(" PRIu64 ") <= max(" PRIu64 ")?\n",
+    DPRINTF("ram_save_live: expected(%" PRIu64 ") <= max(%" PRIu64 ")?\n",
             expected_time, migrate_max_downtime());
 
     if (expected_time <= migrate_max_downtime()) {
@@ -799,8 +798,8 @@
     } while (!(flags & RAM_SAVE_FLAG_EOS));
 
 done:
-    DPRINTF("Completed load of VM with exit code %d seq iteration " PRIu64 "\n",
-            ret, seq_iter);
+    DPRINTF("Completed load of VM with exit code %d seq iteration "
+            "%" PRIu64 "\n", ret, seq_iter);
     return ret;
 }
 
@@ -922,11 +921,16 @@
     if (is_help_option(optarg)) {
     show_valid_cards:
 
+#ifdef HAS_AUDIO_CHOICE
         printf("Valid sound card names (comma separated):\n");
         for (c = soundhw; c->name; ++c) {
             printf ("%-11s %s\n", c->name, c->descr);
         }
         printf("\n-soundhw all will enable all of the above\n");
+#else
+        printf("Machine has no user-selectable audio hardware "
+               "(it may or may not have always-present audio hardware).\n");
+#endif
         exit(!is_help_option(optarg));
     }
     else {
diff --git a/audio/audio_template.h b/audio/audio_template.h
index 519432a..16f7880 100644
--- a/audio/audio_template.h
+++ b/audio/audio_template.h
@@ -410,15 +410,15 @@
     SW *old_sw = NULL;
 #endif
 
-    ldebug ("open %s, freq %d, nchannels %d, fmt %d\n",
-            name, as->freq, as->nchannels, as->fmt);
-
     if (audio_bug (AUDIO_FUNC, !card || !name || !callback_fn || !as)) {
         dolog ("card=%p name=%p callback_fn=%p as=%p\n",
                card, name, callback_fn, as);
         goto fail;
     }
 
+    ldebug ("open %s, freq %d, nchannels %d, fmt %d\n",
+            name, as->freq, as->nchannels, as->fmt);
+
     if (audio_bug (AUDIO_FUNC, audio_validate_settings (as))) {
         audio_print_settings (as);
         goto fail;
diff --git a/block-migration.c b/block-migration.c
index 7def8ab..ed93301 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -519,6 +519,8 @@
     BlkMigDevState *bmds;
     BlkMigBlock *blk;
 
+    bdrv_drain_all();
+
     set_dirty_tracking(0);
 
     while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) {
diff --git a/block.c b/block.c
index 470bdcc..e95f613 100644
--- a/block.c
+++ b/block.c
@@ -26,8 +26,10 @@
 #include "trace.h"
 #include "monitor.h"
 #include "block_int.h"
+#include "blockjob.h"
 #include "module.h"
 #include "qjson.h"
+#include "sysemu.h"
 #include "qemu-coroutine.h"
 #include "qmp-commands.h"
 #include "qemu-timer.h"
@@ -433,7 +435,11 @@
         return -EOVERFLOW;
     }
     fd = mkstemp(filename);
-    if (fd < 0 || close(fd)) {
+    if (fd < 0) {
+        return -errno;
+    }
+    if (close(fd) != 0) {
+        unlink(filename);
         return -errno;
     }
     return 0;
@@ -664,7 +670,7 @@
         open_flags |= BDRV_O_RDWR;
     }
 
-    bs->keep_read_only = bs->read_only = !(open_flags & BDRV_O_RDWR);
+    bs->read_only = !(open_flags & BDRV_O_RDWR);
 
     /* Open the image, either directly or using a protocol */
     if (drv->bdrv_file_open) {
@@ -804,6 +810,10 @@
         goto unlink_and_fail;
     }
 
+    if (flags & BDRV_O_RDWR) {
+        flags |= BDRV_O_ALLOW_RDWR;
+    }
+
     /* Open the image */
     ret = bdrv_open_common(bs, filename, flags, drv);
     if (ret < 0) {
@@ -833,12 +843,6 @@
             bdrv_close(bs);
             return ret;
         }
-        if (bs->is_temporary) {
-            bs->backing_hd->keep_read_only = !(flags & BDRV_O_RDWR);
-        } else {
-            /* base image inherits from "parent" */
-            bs->backing_hd->keep_read_only = bs->keep_read_only;
-        }
     }
 
     if (!bdrv_key_required(bs)) {
@@ -859,6 +863,238 @@
     return ret;
 }
 
+typedef struct BlockReopenQueueEntry {
+     bool prepared;
+     BDRVReopenState state;
+     QSIMPLEQ_ENTRY(BlockReopenQueueEntry) entry;
+} BlockReopenQueueEntry;
+
+/*
+ * Adds a BlockDriverState to a simple queue for an atomic, transactional
+ * reopen of multiple devices.
+ *
+ * bs_queue can either be an existing BlockReopenQueue that has had QSIMPLE_INIT
+ * already performed, or alternatively may be NULL a new BlockReopenQueue will
+ * be created and initialized. This newly created BlockReopenQueue should be
+ * passed back in for subsequent calls that are intended to be of the same
+ * atomic 'set'.
+ *
+ * bs is the BlockDriverState to add to the reopen queue.
+ *
+ * flags contains the open flags for the associated bs
+ *
+ * returns a pointer to bs_queue, which is either the newly allocated
+ * bs_queue, or the existing bs_queue being used.
+ *
+ */
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+                                    BlockDriverState *bs, int flags)
+{
+    assert(bs != NULL);
+
+    BlockReopenQueueEntry *bs_entry;
+    if (bs_queue == NULL) {
+        bs_queue = g_new0(BlockReopenQueue, 1);
+        QSIMPLEQ_INIT(bs_queue);
+    }
+
+    if (bs->file) {
+        bdrv_reopen_queue(bs_queue, bs->file, flags);
+    }
+
+    bs_entry = g_new0(BlockReopenQueueEntry, 1);
+    QSIMPLEQ_INSERT_TAIL(bs_queue, bs_entry, entry);
+
+    bs_entry->state.bs = bs;
+    bs_entry->state.flags = flags;
+
+    return bs_queue;
+}
+
+/*
+ * Reopen multiple BlockDriverStates atomically & transactionally.
+ *
+ * The queue passed in (bs_queue) must have been built up previous
+ * via bdrv_reopen_queue().
+ *
+ * Reopens all BDS specified in the queue, with the appropriate
+ * flags.  All devices are prepared for reopen, and failure of any
+ * device will cause all device changes to be abandonded, and intermediate
+ * data cleaned up.
+ *
+ * If all devices prepare successfully, then the changes are committed
+ * to all devices.
+ *
+ */
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp)
+{
+    int ret = -1;
+    BlockReopenQueueEntry *bs_entry, *next;
+    Error *local_err = NULL;
+
+    assert(bs_queue != NULL);
+
+    bdrv_drain_all();
+
+    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+        if (bdrv_reopen_prepare(&bs_entry->state, bs_queue, &local_err)) {
+            error_propagate(errp, local_err);
+            goto cleanup;
+        }
+        bs_entry->prepared = true;
+    }
+
+    /* If we reach this point, we have success and just need to apply the
+     * changes
+     */
+    QSIMPLEQ_FOREACH(bs_entry, bs_queue, entry) {
+        bdrv_reopen_commit(&bs_entry->state);
+    }
+
+    ret = 0;
+
+cleanup:
+    QSIMPLEQ_FOREACH_SAFE(bs_entry, bs_queue, entry, next) {
+        if (ret && bs_entry->prepared) {
+            bdrv_reopen_abort(&bs_entry->state);
+        }
+        g_free(bs_entry);
+    }
+    g_free(bs_queue);
+    return ret;
+}
+
+
+/* Reopen a single BlockDriverState with the specified flags. */
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp)
+{
+    int ret = -1;
+    Error *local_err = NULL;
+    BlockReopenQueue *queue = bdrv_reopen_queue(NULL, bs, bdrv_flags);
+
+    ret = bdrv_reopen_multiple(queue, &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+    }
+    return ret;
+}
+
+
+/*
+ * Prepares a BlockDriverState for reopen. All changes are staged in the
+ * 'opaque' field of the BDRVReopenState, which is used and allocated by
+ * the block driver layer .bdrv_reopen_prepare()
+ *
+ * bs is the BlockDriverState to reopen
+ * flags are the new open flags
+ * queue is the reopen queue
+ *
+ * Returns 0 on success, non-zero on error.  On error errp will be set
+ * as well.
+ *
+ * On failure, bdrv_reopen_abort() will be called to clean up any data.
+ * It is the responsibility of the caller to then call the abort() or
+ * commit() for any other BDS that have been left in a prepare() state
+ *
+ */
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
+                        Error **errp)
+{
+    int ret = -1;
+    Error *local_err = NULL;
+    BlockDriver *drv;
+
+    assert(reopen_state != NULL);
+    assert(reopen_state->bs->drv != NULL);
+    drv = reopen_state->bs->drv;
+
+    /* if we are to stay read-only, do not allow permission change
+     * to r/w */
+    if (!(reopen_state->bs->open_flags & BDRV_O_ALLOW_RDWR) &&
+        reopen_state->flags & BDRV_O_RDWR) {
+        error_set(errp, QERR_DEVICE_IS_READ_ONLY,
+                  reopen_state->bs->device_name);
+        goto error;
+    }
+
+
+    ret = bdrv_flush(reopen_state->bs);
+    if (ret) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR, "Error (%s) flushing drive",
+                  strerror(-ret));
+        goto error;
+    }
+
+    if (drv->bdrv_reopen_prepare) {
+        ret = drv->bdrv_reopen_prepare(reopen_state, queue, &local_err);
+        if (ret) {
+            if (local_err != NULL) {
+                error_propagate(errp, local_err);
+            } else {
+                error_set(errp, QERR_OPEN_FILE_FAILED,
+                          reopen_state->bs->filename);
+            }
+            goto error;
+        }
+    } else {
+        /* It is currently mandatory to have a bdrv_reopen_prepare()
+         * handler for each supported drv. */
+        error_set(errp, QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED,
+                  drv->format_name, reopen_state->bs->device_name,
+                 "reopening of file");
+        ret = -1;
+        goto error;
+    }
+
+    ret = 0;
+
+error:
+    return ret;
+}
+
+/*
+ * Takes the staged changes for the reopen from bdrv_reopen_prepare(), and
+ * makes them final by swapping the staging BlockDriverState contents into
+ * the active BlockDriverState contents.
+ */
+void bdrv_reopen_commit(BDRVReopenState *reopen_state)
+{
+    BlockDriver *drv;
+
+    assert(reopen_state != NULL);
+    drv = reopen_state->bs->drv;
+    assert(drv != NULL);
+
+    /* If there are any driver level actions to take */
+    if (drv->bdrv_reopen_commit) {
+        drv->bdrv_reopen_commit(reopen_state);
+    }
+
+    /* set BDS specific flags now */
+    reopen_state->bs->open_flags         = reopen_state->flags;
+    reopen_state->bs->enable_write_cache = !!(reopen_state->flags &
+                                              BDRV_O_CACHE_WB);
+    reopen_state->bs->read_only = !(reopen_state->flags & BDRV_O_RDWR);
+}
+
+/*
+ * Abort the reopen, and delete and free the staged changes in
+ * reopen_state
+ */
+void bdrv_reopen_abort(BDRVReopenState *reopen_state)
+{
+    BlockDriver *drv;
+
+    assert(reopen_state != NULL);
+    drv = reopen_state->bs->drv;
+    assert(drv != NULL);
+
+    if (drv->bdrv_reopen_abort) {
+        drv->bdrv_reopen_abort(reopen_state);
+    }
+}
+
+
 void bdrv_close(BlockDriverState *bs)
 {
     bdrv_flush(bs);
@@ -897,10 +1133,10 @@
             bdrv_delete(bs->file);
             bs->file = NULL;
         }
-
-        bdrv_dev_change_media_cb(bs, false);
     }
 
+    bdrv_dev_change_media_cb(bs, false);
+
     /*throttling disk I/O limits*/
     if (bs->io_limits_enabled) {
         bdrv_io_limits_disable(bs);
@@ -1152,7 +1388,8 @@
 }
 
 void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                               BlockQMPEventAction action, int is_read)
+                               enum MonitorEvent ev,
+                               BlockErrorAction action, bool is_read)
 {
     QObject *data;
     const char *action_str;
@@ -1175,7 +1412,7 @@
                               bdrv->device_name,
                               action_str,
                               is_read ? "read" : "write");
-    monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data);
+    monitor_protocol_event(ev, data);
 
     qobject_decref(data);
 }
@@ -1265,13 +1502,11 @@
 int bdrv_commit(BlockDriverState *bs)
 {
     BlockDriver *drv = bs->drv;
-    BlockDriver *backing_drv;
     int64_t sector, total_sectors;
     int n, ro, open_flags;
-    int ret = 0, rw_ret = 0;
+    int ret = 0;
     uint8_t *buf;
-    char filename[1024];
-    BlockDriverState *bs_rw, *bs_ro;
+    char filename[PATH_MAX];
 
     if (!drv)
         return -ENOMEDIUM;
@@ -1280,42 +1515,19 @@
         return -ENOTSUP;
     }
 
-    if (bs->backing_hd->keep_read_only) {
-        return -EACCES;
-    }
-
     if (bdrv_in_use(bs) || bdrv_in_use(bs->backing_hd)) {
         return -EBUSY;
     }
 
-    backing_drv = bs->backing_hd->drv;
     ro = bs->backing_hd->read_only;
-    strncpy(filename, bs->backing_hd->filename, sizeof(filename));
+    /* Use pstrcpy (not strncpy): filename must be NUL-terminated. */
+    pstrcpy(filename, sizeof(filename), bs->backing_hd->filename);
     open_flags =  bs->backing_hd->open_flags;
 
     if (ro) {
-        /* re-open as RW */
-        bdrv_delete(bs->backing_hd);
-        bs->backing_hd = NULL;
-        bs_rw = bdrv_new("");
-        rw_ret = bdrv_open(bs_rw, filename, open_flags | BDRV_O_RDWR,
-            backing_drv);
-        if (rw_ret < 0) {
-            bdrv_delete(bs_rw);
-            /* try to re-open read-only */
-            bs_ro = bdrv_new("");
-            ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
-                backing_drv);
-            if (ret < 0) {
-                bdrv_delete(bs_ro);
-                /* drive not functional anymore */
-                bs->drv = NULL;
-                return ret;
-            }
-            bs->backing_hd = bs_ro;
-            return rw_ret;
+        if (bdrv_reopen(bs->backing_hd, open_flags | BDRV_O_RDWR, NULL)) {
+            return -EACCES;
         }
-        bs->backing_hd = bs_rw;
     }
 
     total_sectors = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
@@ -1352,20 +1564,8 @@
     g_free(buf);
 
     if (ro) {
-        /* re-open as RO */
-        bdrv_delete(bs->backing_hd);
-        bs->backing_hd = NULL;
-        bs_ro = bdrv_new("");
-        ret = bdrv_open(bs_ro, filename, open_flags & ~BDRV_O_RDWR,
-            backing_drv);
-        if (ret < 0) {
-            bdrv_delete(bs_ro);
-            /* drive not functional anymore */
-            bs->drv = NULL;
-            return ret;
-        }
-        bs->backing_hd = bs_ro;
-        bs->backing_hd->keep_read_only = 0;
+        /* ignoring error return here */
+        bdrv_reopen(bs->backing_hd, open_flags & ~BDRV_O_RDWR, NULL);
     }
 
     return ret;
@@ -1528,6 +1728,149 @@
     return ret;
 }
 
+/*
+ * Finds the image layer in the chain that has 'bs' as its backing file.
+ *
+ * active is the current topmost image.
+ *
+ * Returns NULL if bs is not found in active's image chain,
+ * or if active == bs.
+ */
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+                                    BlockDriverState *bs)
+{
+    BlockDriverState *overlay = NULL;
+    BlockDriverState *intermediate;
+
+    assert(active != NULL);
+    assert(bs != NULL);
+
+    /* if bs is the same as active, then by definition it has no overlay
+     */
+    if (active == bs) {
+        return NULL;
+    }
+
+    intermediate = active;
+    while (intermediate->backing_hd) {
+        if (intermediate->backing_hd == bs) {
+            overlay = intermediate;
+            break;
+        }
+        intermediate = intermediate->backing_hd;
+    }
+
+    return overlay;
+}
+
+typedef struct BlkIntermediateStates {
+    BlockDriverState *bs;
+    QSIMPLEQ_ENTRY(BlkIntermediateStates) entry;
+} BlkIntermediateStates;
+
+
+/*
+ * Drops images above 'base' up to and including 'top', and sets the image
+ * above 'top' to have base as its backing file.
+ *
+ * Requires that the overlay to 'top' is opened r/w, so that the backing file
+ * information in 'bs' can be properly updated.
+ *
+ * E.g., this will convert the following chain:
+ * bottom <- base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * bottom <- base <- active
+ *
+ * It is allowed for bottom==base, in which case it converts:
+ *
+ * base <- intermediate <- top <- active
+ *
+ * to
+ *
+ * base <- active
+ *
+ * Error conditions:
+ *  if active == top, that is considered an error
+ *
+ */
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+                           BlockDriverState *base)
+{
+    BlockDriverState *intermediate;
+    BlockDriverState *base_bs = NULL;
+    BlockDriverState *new_top_bs = NULL;
+    BlkIntermediateStates *intermediate_state, *next;
+    int ret = -EIO;
+
+    QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete;
+    QSIMPLEQ_INIT(&states_to_delete);
+
+    if (!top->drv || !base->drv) {
+        goto exit;
+    }
+
+    new_top_bs = bdrv_find_overlay(active, top);
+
+    if (new_top_bs == NULL) {
+        /* we could not find the image above 'top', this is an error */
+        goto exit;
+    }
+
+    /* special case of new_top_bs->backing_hd already pointing to base - nothing
+     * to do, no intermediate images */
+    if (new_top_bs->backing_hd == base) {
+        ret = 0;
+        goto exit;
+    }
+
+    intermediate = top;
+
+    /* now we will go down through the list, and add each BDS we find
+     * into our deletion queue, until we hit the 'base'
+     */
+    while (intermediate) {
+        intermediate_state = g_malloc0(sizeof(BlkIntermediateStates));
+        intermediate_state->bs = intermediate;
+        QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry);
+
+        if (intermediate->backing_hd == base) {
+            base_bs = intermediate->backing_hd;
+            break;
+        }
+        intermediate = intermediate->backing_hd;
+    }
+    if (base_bs == NULL) {
+        /* something went wrong, we did not end at the base. safely
+         * unravel everything, and exit with error */
+        goto exit;
+    }
+
+    /* success - we can delete the intermediate states, and link top->base */
+    ret = bdrv_change_backing_file(new_top_bs, base_bs->filename,
+                                   base_bs->drv ? base_bs->drv->format_name : "");
+    if (ret) {
+        goto exit;
+    }
+    new_top_bs->backing_hd = base_bs;
+
+
+    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+        /* so that bdrv_close() does not recursively close the chain */
+        intermediate_state->bs->backing_hd = NULL;
+        bdrv_delete(intermediate_state->bs);
+    }
+    ret = 0;
+
+exit:
+    QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) {
+        g_free(intermediate_state);
+    }
+    return ret;
+}
+
+
 static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset,
                                    size_t size)
 {
@@ -2134,18 +2477,51 @@
     bs->io_limits_enabled = bdrv_io_limits_enabled(bs);
 }
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
-                       BlockErrorAction on_write_error)
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+                       BlockdevOnError on_write_error)
 {
     bs->on_read_error = on_read_error;
     bs->on_write_error = on_write_error;
 }
 
-BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read)
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read)
 {
     return is_read ? bs->on_read_error : bs->on_write_error;
 }
 
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error)
+{
+    BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error;
+
+    switch (on_err) {
+    case BLOCKDEV_ON_ERROR_ENOSPC:
+        return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_STOP:
+        return BDRV_ACTION_STOP;
+    case BLOCKDEV_ON_ERROR_REPORT:
+        return BDRV_ACTION_REPORT;
+    case BLOCKDEV_ON_ERROR_IGNORE:
+        return BDRV_ACTION_IGNORE;
+    default:
+        abort();
+    }
+}
+
+/* This is done by device models because, while the block layer knows
+ * about the error, it does not know whether an operation comes from
+ * the device or the block layer (from a job, for example).
+ */
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+                       bool is_read, int error)
+{
+    assert(error >= 0);
+    bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read);
+    if (action == BDRV_ACTION_STOP) {
+        vm_stop(RUN_STATE_IO_ERROR);
+        bdrv_iostatus_set_err(bs, error);
+    }
+}
+
 int bdrv_is_read_only(BlockDriverState *bs)
 {
     return bs->read_only;
@@ -2164,6 +2540,13 @@
 void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce)
 {
     bs->enable_write_cache = wce;
+
+    /* so a reopen() will preserve wce */
+    if (wce) {
+        bs->open_flags |= BDRV_O_CACHE_WB;
+    } else {
+        bs->open_flags &= ~BDRV_O_CACHE_WB;
+    }
 }
 
 int bdrv_is_encrypted(BlockDriverState *bs)
@@ -2771,6 +3154,22 @@
     return 1 + bdrv_get_backing_file_depth(bs->backing_hd);
 }
 
+BlockDriverState *bdrv_find_base(BlockDriverState *bs)
+{
+    BlockDriverState *curr_bs = NULL;
+
+    if (!bs) {
+        return NULL;
+    }
+
+    curr_bs = bs;
+
+    while (curr_bs->backing_hd) {
+        curr_bs = curr_bs->backing_hd;
+    }
+    return curr_bs;
+}
+
 #define NB_SUFFIXES 4
 
 char *get_human_readable_size(char *buf, int buf_size, int64_t size)
@@ -3846,9 +4245,9 @@
 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs)
 {
     return (bs->iostatus_enabled &&
-           (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC ||
-            bs->on_write_error == BLOCK_ERR_STOP_ANY    ||
-            bs->on_read_error == BLOCK_ERR_STOP_ANY));
+           (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC ||
+            bs->on_write_error == BLOCKDEV_ON_ERROR_STOP   ||
+            bs->on_read_error == BLOCKDEV_ON_ERROR_STOP));
 }
 
 void bdrv_iostatus_disable(BlockDriverState *bs)
@@ -3863,14 +4262,10 @@
     }
 }
 
-/* XXX: Today this is set by device models because it makes the implementation
-   quite simple. However, the block layer knows about the error, so it's
-   possible to implement this without device models being involved */
 void bdrv_iostatus_set_err(BlockDriverState *bs, int error)
 {
-    if (bdrv_iostatus_is_enabled(bs) &&
-        bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
-        assert(error >= 0);
+    assert(bdrv_iostatus_is_enabled(bs));
+    if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
         bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
                                          BLOCK_DEVICE_IO_STATUS_FAILED;
     }
@@ -4044,130 +4439,3 @@
 
     return ret;
 }
-
-void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
-                       int64_t speed, BlockDriverCompletionFunc *cb,
-                       void *opaque, Error **errp)
-{
-    BlockJob *job;
-
-    if (bs->job || bdrv_in_use(bs)) {
-        error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
-        return NULL;
-    }
-    bdrv_set_in_use(bs, 1);
-
-    job = g_malloc0(job_type->instance_size);
-    job->job_type      = job_type;
-    job->bs            = bs;
-    job->cb            = cb;
-    job->opaque        = opaque;
-    job->busy          = true;
-    bs->job = job;
-
-    /* Only set speed when necessary to avoid NotSupported error */
-    if (speed != 0) {
-        Error *local_err = NULL;
-
-        block_job_set_speed(job, speed, &local_err);
-        if (error_is_set(&local_err)) {
-            bs->job = NULL;
-            g_free(job);
-            bdrv_set_in_use(bs, 0);
-            error_propagate(errp, local_err);
-            return NULL;
-        }
-    }
-    return job;
-}
-
-void block_job_complete(BlockJob *job, int ret)
-{
-    BlockDriverState *bs = job->bs;
-
-    assert(bs->job == job);
-    job->cb(job->opaque, ret);
-    bs->job = NULL;
-    g_free(job);
-    bdrv_set_in_use(bs, 0);
-}
-
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
-{
-    Error *local_err = NULL;
-
-    if (!job->job_type->set_speed) {
-        error_set(errp, QERR_NOT_SUPPORTED);
-        return;
-    }
-    job->job_type->set_speed(job, speed, &local_err);
-    if (error_is_set(&local_err)) {
-        error_propagate(errp, local_err);
-        return;
-    }
-
-    job->speed = speed;
-}
-
-void block_job_cancel(BlockJob *job)
-{
-    job->cancelled = true;
-    if (job->co && !job->busy) {
-        qemu_coroutine_enter(job->co, NULL);
-    }
-}
-
-bool block_job_is_cancelled(BlockJob *job)
-{
-    return job->cancelled;
-}
-
-struct BlockCancelData {
-    BlockJob *job;
-    BlockDriverCompletionFunc *cb;
-    void *opaque;
-    bool cancelled;
-    int ret;
-};
-
-static void block_job_cancel_cb(void *opaque, int ret)
-{
-    struct BlockCancelData *data = opaque;
-
-    data->cancelled = block_job_is_cancelled(data->job);
-    data->ret = ret;
-    data->cb(data->opaque, ret);
-}
-
-int block_job_cancel_sync(BlockJob *job)
-{
-    struct BlockCancelData data;
-    BlockDriverState *bs = job->bs;
-
-    assert(bs->job == job);
-
-    /* Set up our own callback to store the result and chain to
-     * the original callback.
-     */
-    data.job = job;
-    data.cb = job->cb;
-    data.opaque = job->opaque;
-    data.ret = -EINPROGRESS;
-    job->cb = block_job_cancel_cb;
-    job->opaque = &data;
-    block_job_cancel(job);
-    while (data.ret == -EINPROGRESS) {
-        qemu_aio_wait();
-    }
-    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
-}
-
-void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
-{
-    /* Check cancellation *before* setting busy = false, too!  */
-    if (!block_job_is_cancelled(job)) {
-        job->busy = false;
-        co_sleep_ns(clock, ns);
-        job->busy = true;
-    }
-}
diff --git a/block.h b/block.h
index 2e2be11..e2d89d7 100644
--- a/block.h
+++ b/block.h
@@ -6,9 +6,11 @@
 #include "qemu-option.h"
 #include "qemu-coroutine.h"
 #include "qobject.h"
+#include "qapi-types.h"
 
 /* block.c */
 typedef struct BlockDriver BlockDriver;
+typedef struct BlockJob BlockJob;
 
 typedef struct BlockDriverInfo {
     /* in bytes, 0 if irrelevant */
@@ -80,6 +82,7 @@
 #define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
 #define BDRV_O_INCOMING    0x0800  /* consistency hint for incoming migration */
 #define BDRV_O_CHECK       0x1000  /* open solely for consistency check */
+#define BDRV_O_ALLOW_RDWR  0x2000  /* allow reopen to change from r/o to r/w */
 
 #define BDRV_O_CACHE_MASK  (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH)
 
@@ -88,21 +91,23 @@
 #define BDRV_SECTOR_MASK   ~(BDRV_SECTOR_SIZE - 1)
 
 typedef enum {
-    BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC,
-    BLOCK_ERR_STOP_ANY
+    BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
 } BlockErrorAction;
 
-typedef enum {
-    BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP
-} BlockQMPEventAction;
+typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
+
+typedef struct BDRVReopenState {
+    BlockDriverState *bs;
+    int flags;
+    void *opaque;
+} BDRVReopenState;
+
 
 void bdrv_iostatus_enable(BlockDriverState *bs);
 void bdrv_iostatus_reset(BlockDriverState *bs);
 void bdrv_iostatus_disable(BlockDriverState *bs);
 bool bdrv_iostatus_is_enabled(const BlockDriverState *bs);
 void bdrv_iostatus_set_err(BlockDriverState *bs, int error);
-void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
-                               BlockQMPEventAction action, int is_read);
 void bdrv_info_print(Monitor *mon, const QObject *data);
 void bdrv_info(Monitor *mon, QObject **ret_data);
 void bdrv_stats_print(Monitor *mon, const QObject *data);
@@ -130,6 +135,14 @@
 int bdrv_file_open(BlockDriverState **pbs, const char *filename, int flags);
 int bdrv_open(BlockDriverState *bs, const char *filename, int flags,
               BlockDriver *drv);
+BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
+                                    BlockDriverState *bs, int flags);
+int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
+int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp);
+int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
+                        BlockReopenQueue *queue, Error **errp);
+void bdrv_reopen_commit(BDRVReopenState *reopen_state);
+void bdrv_reopen_abort(BDRVReopenState *reopen_state);
 void bdrv_close(BlockDriverState *bs);
 int bdrv_attach_dev(BlockDriverState *bs, void *dev);
 void bdrv_attach_dev_nofail(BlockDriverState *bs, void *dev);
@@ -185,6 +198,11 @@
 int bdrv_change_backing_file(BlockDriverState *bs,
     const char *backing_file, const char *backing_fmt);
 void bdrv_register(BlockDriver *bdrv);
+int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top,
+                           BlockDriverState *base);
+BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
+                                    BlockDriverState *bs);
+BlockDriverState *bdrv_find_base(BlockDriverState *bs);
 
 
 typedef struct BdrvCheckResult {
@@ -259,9 +277,12 @@
 int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors,
                       int *pnum);
 
-void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error,
-                       BlockErrorAction on_write_error);
-BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read);
+void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error,
+                       BlockdevOnError on_write_error);
+BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read);
+BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error);
+void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action,
+                       bool is_read, int error);
 int bdrv_is_read_only(BlockDriverState *bs);
 int bdrv_is_sg(BlockDriverState *bs);
 int bdrv_enable_write_cache(BlockDriverState *bs);
diff --git a/block/Makefile.objs b/block/Makefile.objs
index b5754d3..554f429 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -3,9 +3,12 @@
 block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o
 block-obj-y += qed-check.o
 block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o
-block-obj-y += stream.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
 block-obj-$(CONFIG_LIBISCSI) += iscsi.o
 block-obj-$(CONFIG_CURL) += curl.o
 block-obj-$(CONFIG_RBD) += rbd.o
+block-obj-$(CONFIG_GLUSTERFS) += gluster.o
+
+common-obj-y += stream.o
+common-obj-y += commit.o
diff --git a/block/blkdebug.c b/block/blkdebug.c
index 59dcea0..1206d52 100644
--- a/block/blkdebug.c
+++ b/block/blkdebug.c
@@ -28,6 +28,7 @@
 
 typedef struct BDRVBlkdebugState {
     int state;
+    int new_state;
     QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX];
     QSIMPLEQ_HEAD(, BlkdebugRule) active_rules;
 } BDRVBlkdebugState;
@@ -403,12 +404,12 @@
 }
 
 static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule,
-    int old_state, bool injected)
+    bool injected)
 {
     BDRVBlkdebugState *s = bs->opaque;
 
     /* Only process rules for the current state */
-    if (rule->state && rule->state != old_state) {
+    if (rule->state && rule->state != s->state) {
         return injected;
     }
 
@@ -423,7 +424,7 @@
         break;
 
     case ACTION_SET_STATE:
-        s->state = rule->options.set_state.new_state;
+        s->new_state = rule->options.set_state.new_state;
         break;
     }
     return injected;
@@ -433,15 +434,16 @@
 {
     BDRVBlkdebugState *s = bs->opaque;
     struct BlkdebugRule *rule;
-    int old_state = s->state;
     bool injected;
 
     assert((int)event >= 0 && event < BLKDBG_EVENT_MAX);
 
     injected = false;
+    s->new_state = s->state;
     QLIST_FOREACH(rule, &s->rules[event], next) {
-        injected = process_rule(bs, rule, old_state, injected);
+        injected = process_rule(bs, rule, injected);
     }
+    s->state = s->new_state;
 }
 
 static int64_t blkdebug_getlength(BlockDriverState *bs)
diff --git a/block/commit.c b/block/commit.c
new file mode 100644
index 0000000..733c914
--- /dev/null
+++ b/block/commit.c
@@ -0,0 +1,268 @@
+/*
+ * Live block commit
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Jeff Cody   <jcody@redhat.com>
+ *  Based on stream.c by Stefan Hajnoczi
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "block_int.h"
+#include "blockjob.h"
+#include "qemu/ratelimit.h"
+
+enum {
+    /*
+     * Size of data buffer for populating the image file.  This should be large
+     * enough to process multiple clusters in a single call, so that populating
+     * contiguous regions of the image is efficient.
+     */
+    COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
+};
+
+#define SLICE_TIME 100000000ULL /* ns */
+
+typedef struct CommitBlockJob {
+    BlockJob common;
+    RateLimit limit;
+    BlockDriverState *active;
+    BlockDriverState *top;
+    BlockDriverState *base;
+    BlockdevOnError on_error;
+    int base_flags;
+    int orig_overlay_flags;
+} CommitBlockJob;
+
+static int coroutine_fn commit_populate(BlockDriverState *bs,
+                                        BlockDriverState *base,
+                                        int64_t sector_num, int nb_sectors,
+                                        void *buf)
+{
+    int ret = 0;
+
+    ret = bdrv_read(bs, sector_num, buf, nb_sectors);
+    if (ret) {
+        return ret;
+    }
+
+    ret = bdrv_write(base, sector_num, buf, nb_sectors);
+    if (ret) {
+        return ret;
+    }
+
+    return 0;
+}
+
+static void coroutine_fn commit_run(void *opaque)
+{
+    CommitBlockJob *s = opaque;
+    BlockDriverState *active = s->active;
+    BlockDriverState *top = s->top;
+    BlockDriverState *base = s->base;
+    BlockDriverState *overlay_bs = NULL;
+    int64_t sector_num, end;
+    int ret = 0;
+    int n = 0;
+    void *buf;
+    int bytes_written = 0;
+    int64_t base_len;
+
+    ret = s->common.len = bdrv_getlength(top);
+
+
+    if (s->common.len < 0) {
+        goto exit_restore_reopen;
+    }
+
+    ret = base_len = bdrv_getlength(base);
+    if (base_len < 0) {
+        goto exit_restore_reopen;
+    }
+
+    if (base_len < s->common.len) {
+        ret = bdrv_truncate(base, s->common.len);
+        if (ret) {
+            goto exit_restore_reopen;
+        }
+    }
+
+    overlay_bs = bdrv_find_overlay(active, top);
+
+    end = s->common.len >> BDRV_SECTOR_BITS;
+    buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE);
+
+    for (sector_num = 0; sector_num < end; sector_num += n) {
+        uint64_t delay_ns = 0;
+        bool copy;
+
+wait:
+        /* Note that even when no rate limit is applied we need to yield
+         * with no pending I/O here so that qemu_aio_flush() returns.
+         */
+        block_job_sleep_ns(&s->common, rt_clock, delay_ns);
+        if (block_job_is_cancelled(&s->common)) {
+            break;
+        }
+        /* Copy if allocated above the base */
+        ret = bdrv_co_is_allocated_above(top, base, sector_num,
+                                         COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
+                                         &n);
+        copy = (ret == 1);
+        trace_commit_one_iteration(s, sector_num, n, ret);
+        if (copy) {
+            if (s->common.speed) {
+                delay_ns = ratelimit_calculate_delay(&s->limit, n);
+                if (delay_ns > 0) {
+                    goto wait;
+                }
+            }
+            ret = commit_populate(top, base, sector_num, n, buf);
+            bytes_written += n * BDRV_SECTOR_SIZE;
+        }
+        if (ret < 0) {
+            if (s->on_error == BLOCKDEV_ON_ERROR_STOP ||
+                s->on_error == BLOCKDEV_ON_ERROR_REPORT||
+                (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) {
+                goto exit_free_buf;
+            } else {
+                n = 0;
+                continue;
+            }
+        }
+        /* Publish progress */
+        s->common.offset += n * BDRV_SECTOR_SIZE;
+    }
+
+    ret = 0;
+
+    if (!block_job_is_cancelled(&s->common) && sector_num == end) {
+        /* success */
+        ret = bdrv_drop_intermediate(active, top, base);
+    }
+
+exit_free_buf:
+    qemu_vfree(buf);
+
+exit_restore_reopen:
+    /* restore base open flags here if appropriate (e.g., change the base back
+     * to r/o). These reopens do not need to be atomic, since we won't abort
+     * even on failure here */
+    if (s->base_flags != bdrv_get_flags(base)) {
+        bdrv_reopen(base, s->base_flags, NULL);
+    }
+    if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
+        bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
+    }
+
+    block_job_complete(&s->common, ret);
+}
+
+static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+    CommitBlockJob *s = container_of(job, CommitBlockJob, common);
+
+    if (speed < 0) {
+        error_set(errp, QERR_INVALID_PARAMETER, "speed");
+        return;
+    }
+    ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
+}
+
+static BlockJobType commit_job_type = {
+    .instance_size = sizeof(CommitBlockJob),
+    .job_type      = "commit",
+    .set_speed     = commit_set_speed,
+};
+
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+                  BlockDriverState *top, int64_t speed,
+                  BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+                  void *opaque, Error **errp)
+{
+    CommitBlockJob *s;
+    BlockReopenQueue *reopen_queue = NULL;
+    int orig_overlay_flags;
+    int orig_base_flags;
+    BlockDriverState *overlay_bs;
+    Error *local_err = NULL;
+
+    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
+         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
+        !bdrv_iostatus_is_enabled(bs)) {
+        error_set(errp, QERR_INVALID_PARAMETER_COMBINATION);
+        return;
+    }
+
+    /* Once we support top == active layer, remove this check */
+    if (top == bs) {
+        error_setg(errp,
+                   "Top image as the active layer is currently unsupported");
+        return;
+    }
+
+    if (top == base) {
+        error_setg(errp, "Invalid files for merge: top and base are the same");
+        return;
+    }
+
+    /* top and base may be valid, but let's make sure that base is reachable
+     * from top */
+    if (bdrv_find_backing_image(top, base->filename) != base) {
+        error_setg(errp,
+                   "Base (%s) is not reachable from top (%s)",
+                   base->filename, top->filename);
+        return;
+    }
+
+    overlay_bs = bdrv_find_overlay(bs, top);
+
+    if (overlay_bs == NULL) {
+        error_setg(errp, "Could not find overlay image for %s:", top->filename);
+        return;
+    }
+
+    orig_base_flags    = bdrv_get_flags(base);
+    orig_overlay_flags = bdrv_get_flags(overlay_bs);
+
+    /* convert base & overlay_bs to r/w, if necessary */
+    if (!(orig_base_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, base,
+                                         orig_base_flags | BDRV_O_RDWR);
+    }
+    if (!(orig_overlay_flags & BDRV_O_RDWR)) {
+        reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs,
+                                         orig_overlay_flags | BDRV_O_RDWR);
+    }
+    if (reopen_queue) {
+        bdrv_reopen_multiple(reopen_queue, &local_err);
+        if (local_err != NULL) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+
+
+    s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp);
+    if (!s) {
+        return;
+    }
+
+    s->base   = base;
+    s->top    = top;
+    s->active = bs;
+
+    s->base_flags          = orig_base_flags;
+    s->orig_overlay_flags  = orig_overlay_flags;
+
+    s->on_error = on_error;
+    s->common.co = qemu_coroutine_create(commit_run);
+
+    trace_commit_start(bs, base, top, s, s->common.co, opaque);
+    qemu_coroutine_enter(s->common.co, s);
+}
diff --git a/block/curl.c b/block/curl.c
index e7c3634..c1074cd 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -542,8 +542,7 @@
     }
     if (s->multi)
         curl_multi_cleanup(s->multi);
-    if (s->url)
-        free(s->url);
+    g_free(s->url);
 }
 
 static int64_t curl_getlength(BlockDriverState *bs)
diff --git a/block/gluster.c b/block/gluster.c
new file mode 100644
index 0000000..3588d73
--- /dev/null
+++ b/block/gluster.c
@@ -0,0 +1,624 @@
+/*
+ * GlusterFS backend for QEMU
+ *
+ * Copyright (C) 2012 Bharata B Rao <bharata@linux.vnet.ibm.com>
+ *
+ * Pipe handling mechanism in AIO implementation is derived from
+ * block/rbd.c. Hence,
+ *
+ * Copyright (C) 2010-2011 Christian Brunner <chb@muc.de>,
+ *                         Josh Durgin <josh.durgin@dreamhost.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#include <glusterfs/api/glfs.h>
+#include "block_int.h"
+#include "qemu_socket.h"
+#include "uri.h"
+
+typedef struct GlusterAIOCB {
+    BlockDriverAIOCB common;
+    int64_t size;
+    int ret;
+    bool *finished;
+    QEMUBH *bh;
+} GlusterAIOCB;
+
+typedef struct BDRVGlusterState {
+    struct glfs *glfs;
+    int fds[2];
+    struct glfs_fd *fd;
+    int qemu_aio_count;
+    int event_reader_pos;
+    GlusterAIOCB *event_acb;
+} BDRVGlusterState;
+
+#define GLUSTER_FD_READ  0
+#define GLUSTER_FD_WRITE 1
+
+typedef struct GlusterConf {
+    char *server;
+    int port;
+    char *volname;
+    char *image;
+    char *transport;
+} GlusterConf;
+
+static void qemu_gluster_gconf_free(GlusterConf *gconf)
+{
+    g_free(gconf->server);
+    g_free(gconf->volname);
+    g_free(gconf->image);
+    g_free(gconf->transport);
+    g_free(gconf);
+}
+
+static int parse_volume_options(GlusterConf *gconf, char *path)
+{
+    char *p, *q;
+
+    if (!path) {
+        return -EINVAL;
+    }
+
+    /* volume */
+    p = q = path + strspn(path, "/");
+    p += strcspn(p, "/");
+    if (*p == '\0') {
+        return -EINVAL;
+    }
+    gconf->volname = g_strndup(q, p - q);
+
+    /* image */
+    p += strspn(p, "/");
+    if (*p == '\0') {
+        return -EINVAL;
+    }
+    gconf->image = g_strdup(p);
+    return 0;
+}
+
+/*
+ * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...]
+ *
+ * 'gluster' is the protocol.
+ *
+ * 'transport' specifies the transport type used to connect to gluster
+ * management daemon (glusterd). Valid transport types are
+ * tcp, unix and rdma. If a transport type isn't specified, then tcp
+ * type is assumed.
+ *
+ * 'server' specifies the server where the volume file specification for
+ * the given volume resides. This can be either hostname, ipv4 address
+ * or ipv6 address. ipv6 address needs to be within square brackets [ ].
+ * If transport type is 'unix', then 'server' field should not be specifed.
+ * The 'socket' field needs to be populated with the path to unix domain
+ * socket.
+ *
+ * 'port' is the port number on which glusterd is listening. This is optional
+ * and if not specified, QEMU will send 0 which will make gluster to use the
+ * default port. If the transport type is unix, then 'port' should not be
+ * specified.
+ *
+ * 'volname' is the name of the gluster volume which contains the VM image.
+ *
+ * 'image' is the path to the actual VM image that resides on gluster volume.
+ *
+ * Examples:
+ *
+ * file=gluster://1.2.3.4/testvol/a.img
+ * file=gluster+tcp://1.2.3.4/testvol/a.img
+ * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img
+ * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img
+ * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img
+ * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img
+ * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket
+ * file=gluster+rdma://1.2.3.4:24007/testvol/a.img
+ */
+static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename)
+{
+    URI *uri;
+    QueryParams *qp = NULL;
+    bool is_unix = false;
+    int ret = 0;
+
+    uri = uri_parse(filename);
+    if (!uri) {
+        return -EINVAL;
+    }
+
+    /* transport */
+    if (!strcmp(uri->scheme, "gluster")) {
+        gconf->transport = g_strdup("tcp");
+    } else if (!strcmp(uri->scheme, "gluster+tcp")) {
+        gconf->transport = g_strdup("tcp");
+    } else if (!strcmp(uri->scheme, "gluster+unix")) {
+        gconf->transport = g_strdup("unix");
+        is_unix = true;
+    } else if (!strcmp(uri->scheme, "gluster+rdma")) {
+        gconf->transport = g_strdup("rdma");
+    } else {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = parse_volume_options(gconf, uri->path);
+    if (ret < 0) {
+        goto out;
+    }
+
+    qp = query_params_parse(uri->query);
+    if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) {
+        ret = -EINVAL;
+        goto out;
+    }
+
+    if (is_unix) {
+        if (uri->server || uri->port) {
+            ret = -EINVAL;
+            goto out;
+        }
+        if (strcmp(qp->p[0].name, "socket")) {
+            ret = -EINVAL;
+            goto out;
+        }
+        gconf->server = g_strdup(qp->p[0].value);
+    } else {
+        gconf->server = g_strdup(uri->server);
+        gconf->port = uri->port;
+    }
+
+out:
+    if (qp) {
+        query_params_free(qp);
+    }
+    uri_free(uri);
+    return ret;
+}
+
+static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename)
+{
+    struct glfs *glfs = NULL;
+    int ret;
+    int old_errno;
+
+    ret = qemu_gluster_parseuri(gconf, filename);
+    if (ret < 0) {
+        error_report("Usage: file=gluster[+transport]://[server[:port]]/"
+            "volname/image[?socket=...]");
+        errno = -ret;
+        goto out;
+    }
+
+    glfs = glfs_new(gconf->volname);
+    if (!glfs) {
+        goto out;
+    }
+
+    ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server,
+            gconf->port);
+    if (ret < 0) {
+        goto out;
+    }
+
+    /*
+     * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when
+     * GlusterFS makes GF_LOG_* macros available to libgfapi users.
+     */
+    ret = glfs_set_logging(glfs, "-", 4);
+    if (ret < 0) {
+        goto out;
+    }
+
+    ret = glfs_init(glfs);
+    if (ret) {
+        error_report("Gluster connection failed for server=%s port=%d "
+             "volume=%s image=%s transport=%s\n", gconf->server, gconf->port,
+             gconf->volname, gconf->image, gconf->transport);
+        goto out;
+    }
+    return glfs;
+
+out:
+    if (glfs) {
+        old_errno = errno;
+        glfs_fini(glfs);
+        errno = old_errno;
+    }
+    return NULL;
+}
+
+static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s)
+{
+    int ret;
+    bool *finished = acb->finished;
+    BlockDriverCompletionFunc *cb = acb->common.cb;
+    void *opaque = acb->common.opaque;
+
+    if (!acb->ret || acb->ret == acb->size) {
+        ret = 0; /* Success */
+    } else if (acb->ret < 0) {
+        ret = acb->ret; /* Read/Write failed */
+    } else {
+        ret = -EIO; /* Partial read/write - fail it */
+    }
+
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    cb(opaque, ret);
+    if (finished) {
+        *finished = true;
+    }
+}
+
+static void qemu_gluster_aio_event_reader(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+    ssize_t ret;
+
+    do {
+        char *p = (char *)&s->event_acb;
+
+        ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos,
+                   sizeof(s->event_acb) - s->event_reader_pos);
+        if (ret > 0) {
+            s->event_reader_pos += ret;
+            if (s->event_reader_pos == sizeof(s->event_acb)) {
+                s->event_reader_pos = 0;
+                qemu_gluster_complete_aio(s->event_acb, s);
+            }
+        }
+    } while (ret < 0 && errno == EINTR);
+}
+
+static int qemu_gluster_aio_flush_cb(void *opaque)
+{
+    BDRVGlusterState *s = opaque;
+
+    return (s->qemu_aio_count > 0);
+}
+
+static int qemu_gluster_open(BlockDriverState *bs, const char *filename,
+    int bdrv_flags)
+{
+    BDRVGlusterState *s = bs->opaque;
+    int open_flags = O_BINARY;
+    int ret = 0;
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
+
+    s->glfs = qemu_gluster_init(gconf, filename);
+    if (!s->glfs) {
+        ret = -errno;
+        goto out;
+    }
+
+    if (bdrv_flags & BDRV_O_RDWR) {
+        open_flags |= O_RDWR;
+    } else {
+        open_flags |= O_RDONLY;
+    }
+
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        open_flags |= O_DIRECT;
+    }
+
+    s->fd = glfs_open(s->glfs, gconf->image, open_flags);
+    if (!s->fd) {
+        ret = -errno;
+        goto out;
+    }
+
+    ret = qemu_pipe(s->fds);
+    if (ret < 0) {
+        ret = -errno;
+        goto out;
+    }
+    fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ],
+        qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s);
+
+out:
+    qemu_gluster_gconf_free(gconf);
+    if (!ret) {
+        return ret;
+    }
+    if (s->fd) {
+        glfs_close(s->fd);
+    }
+    if (s->glfs) {
+        glfs_fini(s->glfs);
+    }
+    return ret;
+}
+
+static int qemu_gluster_create(const char *filename,
+        QEMUOptionParameter *options)
+{
+    struct glfs *glfs;
+    struct glfs_fd *fd;
+    int ret = 0;
+    int64_t total_size = 0;
+    GlusterConf *gconf = g_malloc0(sizeof(GlusterConf));
+
+    glfs = qemu_gluster_init(gconf, filename);
+    if (!glfs) {
+        ret = -errno;
+        goto out;
+    }
+
+    while (options && options->name) {
+        if (!strcmp(options->name, BLOCK_OPT_SIZE)) {
+            total_size = options->value.n / BDRV_SECTOR_SIZE;
+        }
+        options++;
+    }
+
+    fd = glfs_creat(glfs, gconf->image,
+        O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR);
+    if (!fd) {
+        ret = -errno;
+    } else {
+        if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) {
+            ret = -errno;
+        }
+        if (glfs_close(fd) != 0) {
+            ret = -errno;
+        }
+    }
+out:
+    qemu_gluster_gconf_free(gconf);
+    if (glfs) {
+        glfs_fini(glfs);
+    }
+    return ret;
+}
+
+static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)blockacb;
+    bool finished = false;
+
+    acb->finished = &finished;
+    while (!finished) {
+        qemu_aio_wait();
+    }
+}
+
+static AIOPool gluster_aio_pool = {
+    .aiocb_size = sizeof(GlusterAIOCB),
+    .cancel = qemu_gluster_aio_cancel,
+};
+
+static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg)
+{
+    GlusterAIOCB *acb = (GlusterAIOCB *)arg;
+    BlockDriverState *bs = acb->common.bs;
+    BDRVGlusterState *s = bs->opaque;
+    int retval;
+
+    acb->ret = ret;
+    retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb));
+    if (retval != sizeof(acb)) {
+        /*
+         * Gluster AIO callback thread failed to notify the waiting
+         * QEMU thread about IO completion.
+         *
+         * Complete this IO request and make the disk inaccessible for
+         * subsequent reads and writes.
+         */
+        error_report("Gluster failed to notify QEMU about IO completion");
+
+        qemu_mutex_lock_iothread(); /* We are in gluster thread context */
+        acb->common.cb(acb->common.opaque, -EIO);
+        qemu_aio_release(acb);
+        s->qemu_aio_count--;
+        close(s->fds[GLUSTER_FD_READ]);
+        close(s->fds[GLUSTER_FD_WRITE]);
+        qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL,
+            NULL);
+        bs->drv = NULL; /* Make the disk inaccessible */
+        qemu_mutex_unlock_iothread();
+    }
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque, int write)
+{
+    int ret;
+    GlusterAIOCB *acb;
+    BDRVGlusterState *s = bs->opaque;
+    size_t size;
+    off_t offset;
+
+    offset = sector_num * BDRV_SECTOR_SIZE;
+    size = nb_sectors * BDRV_SECTOR_SIZE;
+    s->qemu_aio_count++;
+
+    acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+    acb->size = size;
+    acb->ret = 0;
+    acb->finished = NULL;
+
+    if (write) {
+        ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+            &gluster_finish_aiocb, acb);
+    } else {
+        ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0,
+            &gluster_finish_aiocb, acb);
+    }
+
+    if (ret < 0) {
+        goto out;
+    }
+    return &acb->common;
+
+out:
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs,
+        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1);
+}
+
+static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs,
+        BlockDriverCompletionFunc *cb, void *opaque)
+{
+    int ret;
+    GlusterAIOCB *acb;
+    BDRVGlusterState *s = bs->opaque;
+
+    acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque);
+    acb->size = 0;
+    acb->ret = 0;
+    acb->finished = NULL;
+    s->qemu_aio_count++;
+
+    ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb);
+    if (ret < 0) {
+        goto out;
+    }
+    return &acb->common;
+
+out:
+    s->qemu_aio_count--;
+    qemu_aio_release(acb);
+    return NULL;
+}
+
+static int64_t qemu_gluster_getlength(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+    int64_t ret;
+
+    ret = glfs_lseek(s->fd, 0, SEEK_END);
+    if (ret < 0) {
+        return -errno;
+    } else {
+        return ret;
+    }
+}
+
+static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+    struct stat st;
+    int ret;
+
+    ret = glfs_fstat(s->fd, &st);
+    if (ret < 0) {
+        return -errno;
+    } else {
+        return st.st_blocks * 512;
+    }
+}
+
+static void qemu_gluster_close(BlockDriverState *bs)
+{
+    BDRVGlusterState *s = bs->opaque;
+
+    close(s->fds[GLUSTER_FD_READ]);
+    close(s->fds[GLUSTER_FD_WRITE]);
+    qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL);
+
+    if (s->fd) {
+        glfs_close(s->fd);
+        s->fd = NULL;
+    }
+    glfs_fini(s->glfs);
+}
+
+static QEMUOptionParameter qemu_gluster_create_options[] = {
+    {
+        .name = BLOCK_OPT_SIZE,
+        .type = OPT_SIZE,
+        .help = "Virtual disk size"
+    },
+    { NULL }
+};
+
+static BlockDriver bdrv_gluster = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_tcp = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+tcp",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_unix = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+unix",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static BlockDriver bdrv_gluster_rdma = {
+    .format_name                  = "gluster",
+    .protocol_name                = "gluster+rdma",
+    .instance_size                = sizeof(BDRVGlusterState),
+    .bdrv_file_open               = qemu_gluster_open,
+    .bdrv_close                   = qemu_gluster_close,
+    .bdrv_create                  = qemu_gluster_create,
+    .bdrv_getlength               = qemu_gluster_getlength,
+    .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size,
+    .bdrv_aio_readv               = qemu_gluster_aio_readv,
+    .bdrv_aio_writev              = qemu_gluster_aio_writev,
+    .bdrv_aio_flush               = qemu_gluster_aio_flush,
+    .create_options               = qemu_gluster_create_options,
+};
+
+static void bdrv_gluster_init(void)
+{
+    bdrv_register(&bdrv_gluster_rdma);
+    bdrv_register(&bdrv_gluster_unix);
+    bdrv_register(&bdrv_gluster_tcp);
+    bdrv_register(&bdrv_gluster);
+}
+
+block_init(bdrv_gluster_init);
diff --git a/block/iscsi.c b/block/iscsi.c
index 0b96165..d0b1a10 100644
--- a/block/iscsi.c
+++ b/block/iscsi.c
@@ -167,12 +167,6 @@
 
     }
 
-    /* If we just added an event, the callback might be delayed
-     * unless we call qemu_notify_event().
-     */
-    if (ev & ~iscsilun->events) {
-        qemu_notify_event();
-    }
     iscsilun->events = ev;
 }
 
@@ -268,10 +262,6 @@
     acb->task->xfer_dir = SCSI_XFER_WRITE;
     acb->task->cdb_size = 16;
     acb->task->cdb[0] = 0x8a;
-    if (!(bs->open_flags & BDRV_O_CACHE_WB)) {
-        /* set FUA on writes when cache mode is write through */
-        acb->task->cdb[1] |= 0x04;
-    }
     lba = sector_qemu2lun(sector_num, iscsilun);
     *(uint32_t *)&acb->task->cdb[2]  = htonl(lba >> 32);
     *(uint32_t *)&acb->task->cdb[6]  = htonl(lba & 0xffffffff);
@@ -628,9 +618,17 @@
     return &acb->common;
 }
 
+
+static void ioctl_cb(void *opaque, int status)
+{
+    int *p_status = opaque;
+    *p_status = status;
+}
+
 static int iscsi_ioctl(BlockDriverState *bs, unsigned long int req, void *buf)
 {
     IscsiLun *iscsilun = bs->opaque;
+    int status;
 
     switch (req) {
     case SG_GET_VERSION_NUM:
@@ -639,6 +637,15 @@
     case SG_GET_SCSI_ID:
         ((struct sg_scsi_id *)buf)->scsi_type = iscsilun->type;
         break;
+    case SG_IO:
+        status = -EINPROGRESS;
+        iscsi_aio_ioctl(bs, req, buf, ioctl_cb, &status);
+
+        while (status == -EINPROGRESS) {
+            qemu_aio_wait();
+        }
+
+        return 0;
     default:
         return -1;
     }
diff --git a/block/qcow.c b/block/qcow.c
index 7b5ab87..b239c82 100644
--- a/block/qcow.c
+++ b/block/qcow.c
@@ -197,6 +197,15 @@
     return ret;
 }
 
+
+/* We have nothing to do for QCOW reopen, stubs just return
+ * success */
+static int qcow_reopen_prepare(BDRVReopenState *state,
+                               BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 static int qcow_set_key(BlockDriverState *bs, const char *key)
 {
     BDRVQcowState *s = bs->opaque;
@@ -868,6 +877,7 @@
     .bdrv_probe		= qcow_probe,
     .bdrv_open		= qcow_open,
     .bdrv_close		= qcow_close,
+    .bdrv_reopen_prepare = qcow_reopen_prepare,
     .bdrv_create	= qcow_create,
 
     .bdrv_co_readv          = qcow_co_readv,
diff --git a/block/qcow2.c b/block/qcow2.c
index 8f183f1..c1ff31f 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -52,6 +52,7 @@
     uint32_t magic;
     uint32_t len;
 } QCowExtension;
+
 #define  QCOW2_EXT_MAGIC_END 0
 #define  QCOW2_EXT_MAGIC_BACKING_FORMAT 0xE2792ACA
 #define  QCOW2_EXT_MAGIC_FEATURE_TABLE 0x6803f857
@@ -558,6 +559,14 @@
     return 0;
 }
 
+/* We have nothing to do for QCOW2 reopen, stubs just return
+ * success */
+static int qcow2_reopen_prepare(BDRVReopenState *state,
+                                BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 static int coroutine_fn qcow2_co_is_allocated(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, int *pnum)
 {
@@ -1087,6 +1096,7 @@
             goto fail;
         }
 
+        /* Using strncpy is ok here, since buf is not NUL-terminated. */
         strncpy(buf, bs->backing_file, buflen);
 
         header->backing_file_offset = cpu_to_be64(buf - ((char*) header));
@@ -1679,6 +1689,7 @@
     .bdrv_probe         = qcow2_probe,
     .bdrv_open          = qcow2_open,
     .bdrv_close         = qcow2_close,
+    .bdrv_reopen_prepare  = qcow2_reopen_prepare,
     .bdrv_create        = qcow2_create,
     .bdrv_co_is_allocated = qcow2_co_is_allocated,
     .bdrv_set_key       = qcow2_set_key,
diff --git a/block/qed-table.c b/block/qed-table.c
index ce07b05..de845ec 100644
--- a/block/qed-table.c
+++ b/block/qed-table.c
@@ -103,7 +103,6 @@
 out:
     qemu_vfree(write_table_cb->table);
     gencb_complete(&write_table_cb->gencb, ret);
-    return;
 }
 
 /**
diff --git a/block/qed.c b/block/qed.c
index 21cb239..6c182ca 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -505,6 +505,14 @@
     return ret;
 }
 
+/* We have nothing to do for QED reopen, stubs just return
+ * success */
+static int bdrv_qed_reopen_prepare(BDRVReopenState *state,
+                                   BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 static void bdrv_qed_close(BlockDriverState *bs)
 {
     BDRVQEDState *s = bs->opaque;
@@ -1564,6 +1572,7 @@
     .bdrv_rebind              = bdrv_qed_rebind,
     .bdrv_open                = bdrv_qed_open,
     .bdrv_close               = bdrv_qed_close,
+    .bdrv_reopen_prepare      = bdrv_qed_reopen_prepare,
     .bdrv_create              = bdrv_qed_create,
     .bdrv_co_is_allocated     = bdrv_qed_co_is_allocated,
     .bdrv_make_empty          = bdrv_qed_make_empty,
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 6be20b1..28d439f 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -133,13 +133,19 @@
     int use_aio;
     void *aio_ctx;
 #endif
-    uint8_t *aligned_buf;
-    unsigned aligned_buf_size;
 #ifdef CONFIG_XFS
     bool is_xfs : 1;
 #endif
 } BDRVRawState;
 
+typedef struct BDRVRawReopenState {
+    int fd;
+    int open_flags;
+#ifdef CONFIG_LINUX_AIO
+    int use_aio;
+#endif
+} BDRVRawReopenState;
+
 static int fd_open(BlockDriverState *bs);
 static int64_t raw_getlength(BlockDriverState *bs);
 
@@ -185,6 +191,57 @@
 }
 #endif
 
+static void raw_parse_flags(int bdrv_flags, int *open_flags)
+{
+    assert(open_flags != NULL);
+
+    *open_flags |= O_BINARY;
+    *open_flags &= ~O_ACCMODE;
+    if (bdrv_flags & BDRV_O_RDWR) {
+        *open_flags |= O_RDWR;
+    } else {
+        *open_flags |= O_RDONLY;
+    }
+
+    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
+     * and O_DIRECT for no caching. */
+    if ((bdrv_flags & BDRV_O_NOCACHE)) {
+        *open_flags |= O_DIRECT;
+    }
+}
+
+#ifdef CONFIG_LINUX_AIO
+static int raw_set_aio(void **aio_ctx, int *use_aio, int bdrv_flags)
+{
+    int ret = -1;
+    assert(aio_ctx != NULL);
+    assert(use_aio != NULL);
+    /*
+     * Currently Linux do AIO only for files opened with O_DIRECT
+     * specified so check NOCACHE flag too
+     */
+    if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
+                      (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
+
+        /* if non-NULL, laio_init() has already been run */
+        if (*aio_ctx == NULL) {
+            *aio_ctx = laio_init();
+            if (!*aio_ctx) {
+                goto error;
+            }
+        }
+        *use_aio = 1;
+    } else {
+        *use_aio = 0;
+    }
+
+    ret = 0;
+
+error:
+    return ret;
+}
+#endif
+
 static int raw_open_common(BlockDriverState *bs, const char *filename,
                            int bdrv_flags, int open_flags)
 {
@@ -196,20 +253,8 @@
         return ret;
     }
 
-    s->open_flags = open_flags | O_BINARY;
-    s->open_flags &= ~O_ACCMODE;
-    if (bdrv_flags & BDRV_O_RDWR) {
-        s->open_flags |= O_RDWR;
-    } else {
-        s->open_flags |= O_RDONLY;
-    }
-
-    /* Use O_DSYNC for write-through caching, no flags for write-back caching,
-     * and O_DIRECT for no caching. */
-    if ((bdrv_flags & BDRV_O_NOCACHE))
-        s->open_flags |= O_DIRECT;
-    if (!(bdrv_flags & BDRV_O_CACHE_WB))
-        s->open_flags |= O_DSYNC;
+    s->open_flags = open_flags;
+    raw_parse_flags(bdrv_flags, &s->open_flags);
 
     s->fd = -1;
     fd = qemu_open(filename, s->open_flags, 0644);
@@ -220,45 +265,17 @@
         return ret;
     }
     s->fd = fd;
-    s->aligned_buf = NULL;
-
-    if ((bdrv_flags & BDRV_O_NOCACHE)) {
-        /*
-         * Allocate a buffer for read/modify/write cycles.  Chose the size
-         * pessimistically as we don't know the block size yet.
-         */
-        s->aligned_buf_size = 32 * MAX_BLOCKSIZE;
-        s->aligned_buf = qemu_memalign(MAX_BLOCKSIZE, s->aligned_buf_size);
-        if (s->aligned_buf == NULL) {
-            goto out_close;
-        }
-    }
 
     /* We're falling back to POSIX AIO in some cases so init always */
     if (paio_init() < 0) {
-        goto out_free_buf;
+        goto out_close;
     }
 
 #ifdef CONFIG_LINUX_AIO
-    /*
-     * Currently Linux do AIO only for files opened with O_DIRECT
-     * specified so check NOCACHE flag too
-     */
-    if ((bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
-                      (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) {
-
-        s->aio_ctx = laio_init();
-        if (!s->aio_ctx) {
-            goto out_free_buf;
-        }
-        s->use_aio = 1;
-    } else
-#endif
-    {
-#ifdef CONFIG_LINUX_AIO
-        s->use_aio = 0;
-#endif
+    if (raw_set_aio(&s->aio_ctx, &s->use_aio, bdrv_flags)) {
+        goto out_close;
     }
+#endif
 
 #ifdef CONFIG_XFS
     if (platform_test_xfs_fd(s->fd)) {
@@ -268,8 +285,6 @@
 
     return 0;
 
-out_free_buf:
-    qemu_vfree(s->aligned_buf);
 out_close:
     qemu_close(fd);
     return -errno;
@@ -283,6 +298,109 @@
     return raw_open_common(bs, filename, flags, 0);
 }
 
+static int raw_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue, Error **errp)
+{
+    BDRVRawState *s;
+    BDRVRawReopenState *raw_s;
+    int ret = 0;
+
+    assert(state != NULL);
+    assert(state->bs != NULL);
+
+    s = state->bs->opaque;
+
+    state->opaque = g_malloc0(sizeof(BDRVRawReopenState));
+    raw_s = state->opaque;
+
+#ifdef CONFIG_LINUX_AIO
+    raw_s->use_aio = s->use_aio;
+
+    /* we can use s->aio_ctx instead of a copy, because the use_aio flag is
+     * valid in the 'false' condition even if aio_ctx is set, and raw_set_aio()
+     * won't override aio_ctx if aio_ctx is non-NULL */
+    if (raw_set_aio(&s->aio_ctx, &raw_s->use_aio, state->flags)) {
+        return -1;
+    }
+#endif
+
+    raw_parse_flags(state->flags, &raw_s->open_flags);
+
+    raw_s->fd = -1;
+
+    int fcntl_flags = O_APPEND | O_ASYNC | O_NONBLOCK;
+#ifdef O_NOATIME
+    fcntl_flags |= O_NOATIME;
+#endif
+
+    if ((raw_s->open_flags & ~fcntl_flags) == (s->open_flags & ~fcntl_flags)) {
+        /* dup the original fd */
+        /* TODO: use qemu fcntl wrapper */
+#ifdef F_DUPFD_CLOEXEC
+        raw_s->fd = fcntl(s->fd, F_DUPFD_CLOEXEC, 0);
+#else
+        raw_s->fd = dup(s->fd);
+        if (raw_s->fd != -1) {
+            qemu_set_cloexec(raw_s->fd);
+        }
+#endif
+        if (raw_s->fd >= 0) {
+            ret = fcntl_setfl(raw_s->fd, raw_s->open_flags);
+            if (ret) {
+                qemu_close(raw_s->fd);
+                raw_s->fd = -1;
+            }
+        }
+    }
+
+    /* If we cannot use fcntl, or fcntl failed, fall back to qemu_open() */
+    if (raw_s->fd == -1) {
+        assert(!(raw_s->open_flags & O_CREAT));
+        raw_s->fd = qemu_open(state->bs->filename, raw_s->open_flags);
+        if (raw_s->fd == -1) {
+            ret = -1;
+        }
+    }
+    return ret;
+}
+
+
+static void raw_reopen_commit(BDRVReopenState *state)
+{
+    BDRVRawReopenState *raw_s = state->opaque;
+    BDRVRawState *s = state->bs->opaque;
+
+    s->open_flags = raw_s->open_flags;
+
+    qemu_close(s->fd);
+    s->fd = raw_s->fd;
+#ifdef CONFIG_LINUX_AIO
+    s->use_aio = raw_s->use_aio;
+#endif
+
+    g_free(state->opaque);
+    state->opaque = NULL;
+}
+
+
+static void raw_reopen_abort(BDRVReopenState *state)
+{
+    BDRVRawReopenState *raw_s = state->opaque;
+
+     /* nothing to do if NULL, we didn't get far enough */
+    if (raw_s == NULL) {
+        return;
+    }
+
+    if (raw_s->fd >= 0) {
+        qemu_close(raw_s->fd);
+        raw_s->fd = -1;
+    }
+    g_free(state->opaque);
+    state->opaque = NULL;
+}
+
+
 /* XXX: use host sector size if necessary with:
 #ifdef DIOCGSECTORSIZE
         {
@@ -330,7 +448,7 @@
      * boundary.  Check if this is the case or tell the low-level
      * driver that it needs to copy the buffer.
      */
-    if (s->aligned_buf) {
+    if ((bs->open_flags & BDRV_O_NOCACHE)) {
         if (!qiov_is_aligned(bs, qiov)) {
             type |= QEMU_AIO_MISALIGNED;
 #ifdef CONFIG_LINUX_AIO
@@ -378,8 +496,6 @@
     if (s->fd >= 0) {
         qemu_close(s->fd);
         s->fd = -1;
-        if (s->aligned_buf != NULL)
-            qemu_vfree(s->aligned_buf);
     }
 }
 
@@ -735,6 +851,9 @@
     .instance_size = sizeof(BDRVRawState),
     .bdrv_probe = NULL, /* no probe for protocols */
     .bdrv_file_open = raw_open,
+    .bdrv_reopen_prepare = raw_reopen_prepare,
+    .bdrv_reopen_commit = raw_reopen_commit,
+    .bdrv_reopen_abort = raw_reopen_abort,
     .bdrv_close = raw_close,
     .bdrv_create = raw_create,
     .bdrv_co_discard = raw_co_discard,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index c56bf83..78c8306 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -77,6 +77,23 @@
 				 NULL, 0, NULL, 0, &returned, NULL);
 }
 
+static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
+{
+    assert(access_flags != NULL);
+    assert(overlapped != NULL);
+
+    if (flags & BDRV_O_RDWR) {
+        *access_flags = GENERIC_READ | GENERIC_WRITE;
+    } else {
+        *access_flags = GENERIC_READ;
+    }
+
+    *overlapped = FILE_ATTRIBUTE_NORMAL;
+    if (flags & BDRV_O_NOCACHE) {
+        *overlapped |= FILE_FLAG_NO_BUFFERING;
+    }
+}
+
 static int raw_open(BlockDriverState *bs, const char *filename, int flags)
 {
     BDRVRawState *s = bs->opaque;
@@ -85,17 +102,8 @@
 
     s->type = FTYPE_FILE;
 
-    if (flags & BDRV_O_RDWR) {
-        access_flags = GENERIC_READ | GENERIC_WRITE;
-    } else {
-        access_flags = GENERIC_READ;
-    }
+    raw_parse_flags(flags, &access_flags, &overlapped);
 
-    overlapped = FILE_ATTRIBUTE_NORMAL;
-    if (flags & BDRV_O_NOCACHE)
-        overlapped |= FILE_FLAG_NO_BUFFERING;
-    if (!(flags & BDRV_O_CACHE_WB))
-        overlapped |= FILE_FLAG_WRITE_THROUGH;
     s->hfile = CreateFile(filename, access_flags,
                           FILE_SHARE_READ, NULL,
                           OPEN_EXISTING, overlapped, NULL);
@@ -374,18 +382,10 @@
     }
     s->type = find_device_type(bs, filename);
 
-    if (flags & BDRV_O_RDWR) {
-        access_flags = GENERIC_READ | GENERIC_WRITE;
-    } else {
-        access_flags = GENERIC_READ;
-    }
+    raw_parse_flags(flags, &access_flags, &overlapped);
+
     create_flags = OPEN_EXISTING;
 
-    overlapped = FILE_ATTRIBUTE_NORMAL;
-    if (flags & BDRV_O_NOCACHE)
-        overlapped |= FILE_FLAG_NO_BUFFERING;
-    if (!(flags & BDRV_O_CACHE_WB))
-        overlapped |= FILE_FLAG_WRITE_THROUGH;
     s->hfile = CreateFile(filename, access_flags,
                           FILE_SHARE_READ, NULL,
                           create_flags, overlapped, NULL);
diff --git a/block/raw.c b/block/raw.c
index ff34ea4..253e949 100644
--- a/block/raw.c
+++ b/block/raw.c
@@ -9,6 +9,14 @@
     return 0;
 }
 
+/* We have nothing to do for raw reopen, stubs just return
+ * success */
+static int raw_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue,  Error **errp)
+{
+    return 0;
+}
+
 static int coroutine_fn raw_co_readv(BlockDriverState *bs, int64_t sector_num,
                                      int nb_sectors, QEMUIOVector *qiov)
 {
@@ -115,6 +123,8 @@
     .bdrv_open          = raw_open,
     .bdrv_close         = raw_close,
 
+    .bdrv_reopen_prepare  = raw_reopen_prepare,
+
     .bdrv_co_readv          = raw_co_readv,
     .bdrv_co_writev         = raw_co_writev,
     .bdrv_co_is_allocated   = raw_co_is_allocated,
diff --git a/block/rbd.c b/block/rbd.c
index 5a0f79f..015a9db 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -487,12 +487,6 @@
         rados_conf_set(s->cluster, "rbd_cache", "false");
     } else {
         rados_conf_set(s->cluster, "rbd_cache", "true");
-        if (!(flags & BDRV_O_CACHE_WB)) {
-            r = rados_conf_set(s->cluster, "rbd_cache_max_dirty", "0");
-            if (r < 0) {
-                rados_conf_set(s->cluster, "rbd_cache", "false");
-            }
-        }
     }
 
     if (strstr(conf, "conf=") == NULL) {
diff --git a/block/sheepdog.c b/block/sheepdog.c
index df4f441..f35ff5b 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -866,14 +866,14 @@
         s->port = 0;
     }
 
-    strncpy(vdi, p, SD_MAX_VDI_LEN);
+    pstrcpy(vdi, SD_MAX_VDI_LEN, p);
 
     p = strchr(vdi, ':');
     if (p) {
         *p++ = '\0';
         *snapid = strtoul(p, NULL, 10);
         if (*snapid == 0) {
-            strncpy(tag, p, SD_MAX_VDI_TAG_LEN);
+            pstrcpy(tag, SD_MAX_VDI_TAG_LEN, p);
         }
     } else {
         *snapid = CURRENT_VDI_ID; /* search current vdi */
@@ -900,7 +900,10 @@
         return fd;
     }
 
-    memset(buf, 0, sizeof(buf));
+    /* This pair of strncpy calls ensures that the buffer is zero-filled,
+     * which is desirable since we'll soon be sending those bytes, and
+     * don't want the send_req to read uninitialized data.
+     */
     strncpy(buf, filename, SD_MAX_VDI_LEN);
     strncpy(buf + SD_MAX_VDI_LEN, tag, SD_MAX_VDI_TAG_LEN);
 
@@ -1114,14 +1117,12 @@
         goto out;
     }
 
-    if (flags & BDRV_O_CACHE_WB) {
-        s->cache_enabled = 1;
-        s->flush_fd = connect_to_sdog(s->addr, s->port);
-        if (s->flush_fd < 0) {
-            error_report("failed to connect");
-            ret = s->flush_fd;
-            goto out;
-        }
+    s->cache_enabled = 1;
+    s->flush_fd = connect_to_sdog(s->addr, s->port);
+    if (s->flush_fd < 0) {
+        error_report("failed to connect");
+        ret = s->flush_fd;
+        goto out;
     }
 
     if (snapid || tag[0] != '\0') {
@@ -1151,7 +1152,7 @@
     s->max_dirty_data_idx = 0;
 
     bs->total_sectors = s->inode.vdi_size / SECTOR_SIZE;
-    strncpy(s->name, vdi, sizeof(s->name));
+    pstrcpy(s->name, sizeof(s->name), vdi);
     qemu_co_mutex_init(&s->lock);
     g_free(buf);
     return 0;
@@ -1179,8 +1180,11 @@
         return fd;
     }
 
+    /* FIXME: would it be better to fail (e.g., return -EIO) when filename
+     * does not fit in buf?  For now, just truncate and avoid buffer overrun.
+     */
     memset(buf, 0, sizeof(buf));
-    strncpy(buf, filename, SD_MAX_VDI_LEN);
+    pstrcpy(buf, sizeof(buf), filename);
 
     memset(&hdr, 0, sizeof(hdr));
     hdr.opcode = SD_OP_NEW_VDI;
@@ -1754,6 +1758,9 @@
 
     s->inode.vm_state_size = sn_info->vm_state_size;
     s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
+    /* It appears that inode.tag does not require a NUL terminator,
+     * which means this use of strncpy is ok.
+     */
     strncpy(s->inode.tag, sn_info->name, sizeof(s->inode.tag));
     /* we don't need to update entire object */
     datalen = SD_INODE_SIZE - sizeof(s->inode.data_vdi_id);
@@ -1813,13 +1820,13 @@
 
     memcpy(old_s, s, sizeof(BDRVSheepdogState));
 
-    memset(vdi, 0, sizeof(vdi));
-    strncpy(vdi, s->name, sizeof(vdi));
+    pstrcpy(vdi, sizeof(vdi), s->name);
 
-    memset(tag, 0, sizeof(tag));
     snapid = strtoul(snapshot_id, NULL, 10);
-    if (!snapid) {
-        strncpy(tag, s->name, sizeof(tag));
+    if (snapid) {
+        tag[0] = 0;
+    } else {
+        pstrcpy(tag, sizeof(tag), s->name);
     }
 
     ret = find_vdi_name(s, vdi, snapid, tag, &vid, 1);
@@ -1948,8 +1955,9 @@
 
             snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u",
                      inode.snap_id);
-            strncpy(sn_tab[found].name, inode.tag,
-                    MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)));
+            pstrcpy(sn_tab[found].name,
+                    MIN(sizeof(sn_tab[found].name), sizeof(inode.tag)),
+                    inode.tag);
             found++;
         }
     }
@@ -1986,7 +1994,7 @@
         vdi_index = pos / SD_DATA_OBJ_SIZE;
         offset = pos % SD_DATA_OBJ_SIZE;
 
-        data_len = MIN(remaining, SD_DATA_OBJ_SIZE);
+        data_len = MIN(remaining, SD_DATA_OBJ_SIZE - offset);
 
         vmstate_oid = vid_to_vmstate_oid(s->inode.vdi_id, vdi_index);
 
@@ -2007,6 +2015,7 @@
         }
 
         pos += data_len;
+        data += data_len;
         remaining -= data_len;
     }
     ret = size;
diff --git a/block/stream.c b/block/stream.c
index c4f87dd..7926652 100644
--- a/block/stream.c
+++ b/block/stream.c
@@ -13,6 +13,7 @@
 
 #include "trace.h"
 #include "block_int.h"
+#include "blockjob.h"
 #include "qemu/ratelimit.h"
 
 enum {
@@ -30,6 +31,7 @@
     BlockJob common;
     RateLimit limit;
     BlockDriverState *base;
+    BlockdevOnError on_error;
     char backing_file_id[1024];
 } StreamBlockJob;
 
@@ -77,6 +79,7 @@
     BlockDriverState *bs = s->common.bs;
     BlockDriverState *base = s->base;
     int64_t sector_num, end;
+    int error = 0;
     int ret = 0;
     int n = 0;
     void *buf;
@@ -141,7 +144,19 @@
             ret = stream_populate(bs, sector_num, n, buf);
         }
         if (ret < 0) {
-            break;
+            BlockErrorAction action =
+                block_job_error_action(&s->common, s->common.bs, s->on_error,
+                                       true, -ret);
+            if (action == BDRV_ACTION_STOP) {
+                n = 0;
+                continue;
+            }
+            if (error == 0) {
+                error = ret;
+            }
+            if (action == BDRV_ACTION_REPORT) {
+                break;
+            }
         }
         ret = 0;
 
@@ -153,6 +168,9 @@
         bdrv_disable_copy_on_read(bs);
     }
 
+    /* Do not remove the backing file if an error was there but ignored.  */
+    ret = error;
+
     if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) {
         const char *base_id = NULL, *base_fmt = NULL;
         if (base) {
@@ -188,11 +206,19 @@
 
 void stream_start(BlockDriverState *bs, BlockDriverState *base,
                   const char *base_id, int64_t speed,
+                  BlockdevOnError on_error,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp)
 {
     StreamBlockJob *s;
 
+    if ((on_error == BLOCKDEV_ON_ERROR_STOP ||
+         on_error == BLOCKDEV_ON_ERROR_ENOSPC) &&
+        !bdrv_iostatus_is_enabled(bs)) {
+        error_set(errp, QERR_INVALID_PARAMETER, "on-error");
+        return;
+    }
+
     s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp);
     if (!s) {
         return;
@@ -203,6 +229,7 @@
         pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id);
     }
 
+    s->on_error = on_error;
     s->common.co = qemu_coroutine_create(stream_run);
     trace_stream_start(bs, base, s, s->common.co, opaque);
     qemu_coroutine_enter(s->common.co, s);
diff --git a/block/vdi.c b/block/vdi.c
index c4f1529..f35b12e 100644
--- a/block/vdi.c
+++ b/block/vdi.c
@@ -454,6 +454,12 @@
     return -1;
 }
 
+static int vdi_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 static int coroutine_fn vdi_co_is_allocated(BlockDriverState *bs,
         int64_t sector_num, int nb_sectors, int *pnum)
 {
@@ -628,7 +634,6 @@
     VdiHeader header;
     size_t i;
     size_t bmap_size;
-    uint32_t *bmap;
 
     logout("\n");
 
@@ -693,21 +698,21 @@
         result = -errno;
     }
 
-    bmap = NULL;
     if (bmap_size > 0) {
-        bmap = (uint32_t *)g_malloc0(bmap_size);
-    }
-    for (i = 0; i < blocks; i++) {
-        if (image_type == VDI_TYPE_STATIC) {
-            bmap[i] = i;
-        } else {
-            bmap[i] = VDI_UNALLOCATED;
+        uint32_t *bmap = g_malloc0(bmap_size);
+        for (i = 0; i < blocks; i++) {
+            if (image_type == VDI_TYPE_STATIC) {
+                bmap[i] = i;
+            } else {
+                bmap[i] = VDI_UNALLOCATED;
+            }
         }
+        if (write(fd, bmap, bmap_size) < 0) {
+            result = -errno;
+        }
+        g_free(bmap);
     }
-    if (write(fd, bmap, bmap_size) < 0) {
-        result = -errno;
-    }
-    g_free(bmap);
+
     if (image_type == VDI_TYPE_STATIC) {
         if (ftruncate(fd, sizeof(header) + bmap_size + blocks * block_size)) {
             result = -errno;
@@ -762,6 +767,7 @@
     .bdrv_probe = vdi_probe,
     .bdrv_open = vdi_open,
     .bdrv_close = vdi_close,
+    .bdrv_reopen_prepare = vdi_reopen_prepare,
     .bdrv_create = vdi_create,
     .bdrv_co_is_allocated = vdi_co_is_allocated,
     .bdrv_make_empty = vdi_make_empty,
diff --git a/block/vmdk.c b/block/vmdk.c
index bba4c61..1a80e5a 100644
--- a/block/vmdk.c
+++ b/block/vmdk.c
@@ -300,6 +300,40 @@
     return 1;
 }
 
+/* Queue extents, if any, for reopen() */
+static int vmdk_reopen_prepare(BDRVReopenState *state,
+                               BlockReopenQueue *queue, Error **errp)
+{
+    BDRVVmdkState *s;
+    int ret = -1;
+    int i;
+    VmdkExtent *e;
+
+    assert(state != NULL);
+    assert(state->bs != NULL);
+
+    if (queue == NULL) {
+        error_set(errp, ERROR_CLASS_GENERIC_ERROR,
+                 "No reopen queue for VMDK extents");
+        goto exit;
+    }
+
+    s = state->bs->opaque;
+
+    assert(s != NULL);
+
+    for (i = 0; i < s->num_extents; i++) {
+        e = &s->extents[i];
+        if (e->file != state->bs->file) {
+            bdrv_reopen_queue(queue, e->file, state->flags);
+        }
+    }
+    ret = 0;
+
+exit:
+    return ret;
+}
+
 static int vmdk_parent_open(BlockDriverState *bs)
 {
     char *p_name;
@@ -1374,8 +1408,7 @@
         return -1;
     }
     if (path_is_absolute(target)) {
-        dest[dest_size - 1] = '\0';
-        strncpy(dest, target, dest_size - 1);
+        pstrcpy(dest, dest_size, target);
         return 0;
     }
     while (base[i] == target[i]) {
@@ -1646,6 +1679,7 @@
     .instance_size  = sizeof(BDRVVmdkState),
     .bdrv_probe     = vmdk_probe,
     .bdrv_open      = vmdk_open,
+    .bdrv_reopen_prepare = vmdk_reopen_prepare,
     .bdrv_read      = vmdk_co_read,
     .bdrv_write     = vmdk_co_write,
     .bdrv_close     = vmdk_close,
diff --git a/block/vpc.c b/block/vpc.c
index c0b82c4..b6bf52f 100644
--- a/block/vpc.c
+++ b/block/vpc.c
@@ -265,6 +265,12 @@
     return err;
 }
 
+static int vpc_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
 /*
  * Returns the absolute byte offset of the given sector in the image file.
  * If the sector is not allocated, -1 is returned instead.
@@ -783,6 +789,7 @@
     .bdrv_probe     = vpc_probe,
     .bdrv_open      = vpc_open,
     .bdrv_close     = vpc_close,
+    .bdrv_reopen_prepare = vpc_reopen_prepare,
     .bdrv_create    = vpc_create,
 
     .bdrv_read              = vpc_co_read,
diff --git a/block_int.h b/block_int.h
index 4452f6f..f4bae04 100644
--- a/block_int.h
+++ b/block_int.h
@@ -31,6 +31,7 @@
 #include "qemu-timer.h"
 #include "qapi-types.h"
 #include "qerror.h"
+#include "monitor.h"
 
 #define BLOCK_FLAG_ENCRYPT          1
 #define BLOCK_FLAG_COMPAT6          4
@@ -67,78 +68,18 @@
     uint64_t ios[2];
 } BlockIOBaseValue;
 
-typedef struct BlockJob BlockJob;
-
-/**
- * BlockJobType:
- *
- * A class type for block job objects.
- */
-typedef struct BlockJobType {
-    /** Derived BlockJob struct size */
-    size_t instance_size;
-
-    /** String describing the operation, part of query-block-jobs QMP API */
-    const char *job_type;
-
-    /** Optional callback for job types that support setting a speed limit */
-    void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
-} BlockJobType;
-
-/**
- * BlockJob:
- *
- * Long-running operation on a BlockDriverState.
- */
-struct BlockJob {
-    /** The job type, including the job vtable.  */
-    const BlockJobType *job_type;
-
-    /** The block device on which the job is operating.  */
-    BlockDriverState *bs;
-
-    /**
-     * The coroutine that executes the job.  If not NULL, it is
-     * reentered when busy is false and the job is cancelled.
-     */
-    Coroutine *co;
-
-    /**
-     * Set to true if the job should cancel itself.  The flag must
-     * always be tested just before toggling the busy flag from false
-     * to true.  After a job has been cancelled, it should only yield
-     * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
-     */
-    bool cancelled;
-
-    /**
-     * Set to false by the job while it is in a quiescent state, where
-     * no I/O is pending and the job has yielded on any condition
-     * that is not detected by #qemu_aio_wait, such as a timer.
-     */
-    bool busy;
-
-    /** Offset that is published by the query-block-jobs QMP API */
-    int64_t offset;
-
-    /** Length that is published by the query-block-jobs QMP API */
-    int64_t len;
-
-    /** Speed that was set with @block_job_set_speed.  */
-    int64_t speed;
-
-    /** The completion function that will be called when the job completes.  */
-    BlockDriverCompletionFunc *cb;
-
-    /** The opaque value that is passed to the completion function.  */
-    void *opaque;
-};
-
 struct BlockDriver {
     const char *format_name;
     int instance_size;
     int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename);
     int (*bdrv_probe_device)(const char *filename);
+
+    /* For handling image reopen for split or non-split files */
+    int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state,
+                               BlockReopenQueue *queue, Error **errp);
+    void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state);
+    void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state);
+
     int (*bdrv_open)(BlockDriverState *bs, int flags);
     int (*bdrv_file_open)(BlockDriverState *bs, const char *filename, int flags);
     int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num,
@@ -268,7 +209,6 @@
     int64_t total_sectors; /* if we are reading a disk image, give its
                               size in sectors */
     int read_only; /* if true, the media is read only */
-    int keep_read_only; /* if true, the media was requested to stay read only */
     int open_flags; /* flags used to open the file, re-used for re-open */
     int encrypted; /* if true, the media is encrypted */
     int valid_key; /* if true, a valid encryption key has been set */
@@ -323,7 +263,7 @@
 
     /* NOTE: the following infos are only hints for real hardware
        drivers. They are not used by the block driver */
-    BlockErrorAction on_read_error, on_write_error;
+    BlockdevOnError on_read_error, on_write_error;
     bool iostatus_enabled;
     BlockDeviceIoStatus iostatus;
     char device_name[32];
@@ -336,6 +276,7 @@
 
     /* long-running background operation */
     BlockJob *job;
+
 };
 
 int get_tmp_filename(char *filename, int size);
@@ -346,92 +287,9 @@
 #ifdef _WIN32
 int is_windows_drive(const char *filename);
 #endif
-
-/**
- * block_job_create:
- * @job_type: The class object for the newly-created job.
- * @bs: The block
- * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
- * @cb: Completion function for the job.
- * @opaque: Opaque pointer value passed to @cb.
- * @errp: Error object.
- *
- * Create a new long-running block device job and return it.  The job
- * will call @cb asynchronously when the job completes.  Note that
- * @bs may have been closed at the time the @cb it is called.  If
- * this is the case, the job may be reported as either cancelled or
- * completed.
- *
- * This function is not part of the public job interface; it should be
- * called from a wrapper that is specific to the job type.
- */
-void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
-                       int64_t speed, BlockDriverCompletionFunc *cb,
-                       void *opaque, Error **errp);
-
-/**
- * block_job_sleep_ns:
- * @job: The job that calls the function.
- * @clock: The clock to sleep on.
- * @ns: How many nanoseconds to stop for.
- *
- * Put the job to sleep (assuming that it wasn't canceled) for @ns
- * nanoseconds.  Canceling the job will interrupt the wait immediately.
- */
-void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
-
-/**
- * block_job_complete:
- * @job: The job being completed.
- * @ret: The status code.
- *
- * Call the completion function that was registered at creation time, and
- * free @job.
- */
-void block_job_complete(BlockJob *job, int ret);
-
-/**
- * block_job_set_speed:
- * @job: The job to set the speed for.
- * @speed: The new value
- * @errp: Error object.
- *
- * Set a rate-limiting parameter for the job; the actual meaning may
- * vary depending on the job type.
- */
-void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
-
-/**
- * block_job_cancel:
- * @job: The job to be canceled.
- *
- * Asynchronously cancel the specified job.
- */
-void block_job_cancel(BlockJob *job);
-
-/**
- * block_job_is_cancelled:
- * @job: The job being queried.
- *
- * Returns whether the job is scheduled for cancellation.
- */
-bool block_job_is_cancelled(BlockJob *job);
-
-/**
- * block_job_cancel:
- * @job: The job to be canceled.
- *
- * Asynchronously cancel the job and wait for it to reach a quiescent
- * state.  Note that the completion callback will still be called
- * asynchronously, hence it is *not* valid to call #bdrv_delete
- * immediately after #block_job_cancel_sync.  Users of block jobs
- * will usually protect the BlockDriverState objects with a reference
- * count, should this be a concern.
- *
- * Returns the return value from the job if the job actually completed
- * during the call, or -ECANCELED if it was canceled.
- */
-int block_job_cancel_sync(BlockJob *job);
+void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv,
+                               enum MonitorEvent ev,
+                               BlockErrorAction action, bool is_read);
 
 /**
  * stream_start:
@@ -441,6 +299,7 @@
  * @base_id: The file name that will be written to @bs as the new
  * backing file if the job completes.  Ignored if @base is %NULL.
  * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
  * @cb: Completion function for the job.
  * @opaque: Opaque pointer value passed to @cb.
  * @errp: Error object.
@@ -452,8 +311,24 @@
  * @base_id in the written image and to @base in the live BlockDriverState.
  */
 void stream_start(BlockDriverState *bs, BlockDriverState *base,
-                  const char *base_id, int64_t speed,
+                  const char *base_id, int64_t speed, BlockdevOnError on_error,
                   BlockDriverCompletionFunc *cb,
                   void *opaque, Error **errp);
 
+/**
+ * commit_start:
+ * @bs: Top Block device
+ * @base: Block device that will be written into, and become the new top
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @on_error: The action to take upon error.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ */
+void commit_start(BlockDriverState *bs, BlockDriverState *base,
+                 BlockDriverState *top, int64_t speed,
+                 BlockdevOnError on_error, BlockDriverCompletionFunc *cb,
+                 void *opaque, Error **errp);
+
 #endif /* BLOCK_INT_H */
diff --git a/blockdev.c b/blockdev.c
index 4a5266e..636fc56 100644
--- a/blockdev.c
+++ b/blockdev.c
@@ -9,6 +9,7 @@
 
 #include "blockdev.h"
 #include "hw/block-common.h"
+#include "blockjob.h"
 #include "monitor.h"
 #include "qerror.h"
 #include "qemu-option.h"
@@ -237,16 +238,16 @@
     qemu_bh_schedule(s->bh);
 }
 
-static int parse_block_error_action(const char *buf, int is_read)
+static int parse_block_error_action(const char *buf, bool is_read)
 {
     if (!strcmp(buf, "ignore")) {
-        return BLOCK_ERR_IGNORE;
+        return BLOCKDEV_ON_ERROR_IGNORE;
     } else if (!is_read && !strcmp(buf, "enospc")) {
-        return BLOCK_ERR_STOP_ENOSPC;
+        return BLOCKDEV_ON_ERROR_ENOSPC;
     } else if (!strcmp(buf, "stop")) {
-        return BLOCK_ERR_STOP_ANY;
+        return BLOCKDEV_ON_ERROR_STOP;
     } else if (!strcmp(buf, "report")) {
-        return BLOCK_ERR_REPORT;
+        return BLOCKDEV_ON_ERROR_REPORT;
     } else {
         error_report("'%s' invalid %s error action",
                      buf, is_read ? "read" : "write");
@@ -438,7 +439,7 @@
                 "update your scripts.\n");
     }
 
-    on_write_error = BLOCK_ERR_STOP_ENOSPC;
+    on_write_error = BLOCKDEV_ON_ERROR_ENOSPC;
     if ((buf = qemu_opt_get(opts, "werror")) != NULL) {
         if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) {
             error_report("werror is not supported by this bus type");
@@ -451,7 +452,7 @@
         }
     }
 
-    on_read_error = BLOCK_ERR_REPORT;
+    on_read_error = BLOCKDEV_ON_ERROR_REPORT;
     if ((buf = qemu_opt_get(opts, "rerror")) != NULL) {
         if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) {
             error_report("rerror is not supported by this bus type");
@@ -533,6 +534,8 @@
                      if_name[type], mediastr, unit_id);
     }
     dinfo->bdrv = bdrv_new(dinfo->id);
+    dinfo->bdrv->open_flags = snapshot ? BDRV_O_SNAPSHOT : 0;
+    dinfo->bdrv->read_only = ro;
     dinfo->devaddr = devaddr;
     dinfo->type = type;
     dinfo->bus = bus_id;
@@ -809,6 +812,11 @@
     QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) {
         /* This removes our old bs from the bdrv_states, and adds the new bs */
         bdrv_append(states->new_bs, states->old_bs);
+        /* We don't need (or want) to use the transactional
+         * bdrv_reopen_multiple() across all the entries at once, because we
+         * don't want to abort all of them if one of them fails the reopen */
+        bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR,
+                    NULL);
     }
 
     /* success */
@@ -828,7 +836,6 @@
     QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) {
         g_free(states);
     }
-    return;
 }
 
 
@@ -1069,12 +1076,12 @@
                               job->speed);
 }
 
-static void block_stream_cb(void *opaque, int ret)
+static void block_job_cb(void *opaque, int ret)
 {
     BlockDriverState *bs = opaque;
     QObject *obj;
 
-    trace_block_stream_cb(bs, bs->job, ret);
+    trace_block_job_cb(bs, bs->job, ret);
 
     assert(bs->job);
     obj = qobject_from_block_job(bs->job);
@@ -1094,13 +1101,18 @@
 }
 
 void qmp_block_stream(const char *device, bool has_base,
-                      const char *base, bool has_speed,
-                      int64_t speed, Error **errp)
+                      const char *base, bool has_speed, int64_t speed,
+                      bool has_on_error, BlockdevOnError on_error,
+                      Error **errp)
 {
     BlockDriverState *bs;
     BlockDriverState *base_bs = NULL;
     Error *local_err = NULL;
 
+    if (!has_on_error) {
+        on_error = BLOCKDEV_ON_ERROR_REPORT;
+    }
+
     bs = bdrv_find(device);
     if (!bs) {
         error_set(errp, QERR_DEVICE_NOT_FOUND, device);
@@ -1116,7 +1128,7 @@
     }
 
     stream_start(bs, base_bs, base, has_speed ? speed : 0,
-                 block_stream_cb, bs, &local_err);
+                 on_error, block_job_cb, bs, &local_err);
     if (error_is_set(&local_err)) {
         error_propagate(errp, local_err);
         return;
@@ -1130,6 +1142,64 @@
     trace_qmp_block_stream(bs, bs->job);
 }
 
+void qmp_block_commit(const char *device,
+                      bool has_base, const char *base, const char *top,
+                      bool has_speed, int64_t speed,
+                      Error **errp)
+{
+    BlockDriverState *bs;
+    BlockDriverState *base_bs, *top_bs;
+    Error *local_err = NULL;
+    /* This will be part of the QMP command, if/when the
+     * BlockdevOnError change for blkmirror makes it in
+     */
+    BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT;
+
+    /* drain all i/o before commits */
+    bdrv_drain_all();
+
+    bs = bdrv_find(device);
+    if (!bs) {
+        error_set(errp, QERR_DEVICE_NOT_FOUND, device);
+        return;
+    }
+    if (base && has_base) {
+        base_bs = bdrv_find_backing_image(bs, base);
+    } else {
+        base_bs = bdrv_find_base(bs);
+    }
+
+    if (base_bs == NULL) {
+        error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL");
+        return;
+    }
+
+    /* default top_bs is the active layer */
+    top_bs = bs;
+
+    if (top) {
+        if (strcmp(bs->filename, top) != 0) {
+            top_bs = bdrv_find_backing_image(bs, top);
+        }
+    }
+
+    if (top_bs == NULL) {
+        error_setg(errp, "Top image file %s not found", top ? top : "NULL");
+        return;
+    }
+
+    commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs,
+                &local_err);
+    if (local_err != NULL) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    /* Grab a reference so hotplug does not delete the BlockDriverState from
+     * underneath us.
+     */
+    drive_get_ref(drive_get_by_blockdev(bs));
+}
+
 static BlockJob *find_block_job(const char *device)
 {
     BlockDriverState *bs;
@@ -1146,19 +1216,28 @@
     BlockJob *job = find_block_job(device);
 
     if (!job) {
-        error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
         return;
     }
 
     block_job_set_speed(job, speed, errp);
 }
 
-void qmp_block_job_cancel(const char *device, Error **errp)
+void qmp_block_job_cancel(const char *device,
+                          bool has_force, bool force, Error **errp)
 {
     BlockJob *job = find_block_job(device);
 
+    if (!has_force) {
+        force = false;
+    }
+
     if (!job) {
-        error_set(errp, QERR_DEVICE_NOT_ACTIVE, device);
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+    if (job->paused && !force) {
+        error_set(errp, QERR_BLOCK_JOB_PAUSED, device);
         return;
     }
 
@@ -1166,25 +1245,40 @@
     block_job_cancel(job);
 }
 
+void qmp_block_job_pause(const char *device, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    trace_qmp_block_job_pause(job);
+    block_job_pause(job);
+}
+
+void qmp_block_job_resume(const char *device, Error **errp)
+{
+    BlockJob *job = find_block_job(device);
+
+    if (!job) {
+        error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device);
+        return;
+    }
+
+    trace_qmp_block_job_resume(job);
+    block_job_resume(job);
+}
+
 static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs)
 {
     BlockJobInfoList **prev = opaque;
     BlockJob *job = bs->job;
 
     if (job) {
-        BlockJobInfoList *elem;
-        BlockJobInfo *info = g_new(BlockJobInfo, 1);
-        *info = (BlockJobInfo){
-            .type   = g_strdup(job->job_type->job_type),
-            .device = g_strdup(bdrv_get_device_name(bs)),
-            .len    = job->len,
-            .offset = job->offset,
-            .speed  = job->speed,
-        };
-
-        elem = g_new0(BlockJobInfoList, 1);
-        elem->value = info;
-
+        BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1);
+        elem->value = block_job_query(bs->job);
         (*prev)->next = elem;
         *prev = elem;
     }
diff --git a/blockjob.c b/blockjob.c
new file mode 100644
index 0000000..f55f55a
--- /dev/null
+++ b/blockjob.c
@@ -0,0 +1,249 @@
+/*
+ * QEMU System Emulator block driver
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "config-host.h"
+#include "qemu-common.h"
+#include "trace.h"
+#include "monitor.h"
+#include "block.h"
+#include "blockjob.h"
+#include "block_int.h"
+#include "qjson.h"
+#include "qemu-coroutine.h"
+#include "qmp-commands.h"
+#include "qemu-timer.h"
+
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+                       int64_t speed, BlockDriverCompletionFunc *cb,
+                       void *opaque, Error **errp)
+{
+    BlockJob *job;
+
+    if (bs->job || bdrv_in_use(bs)) {
+        error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs));
+        return NULL;
+    }
+    bdrv_set_in_use(bs, 1);
+
+    job = g_malloc0(job_type->instance_size);
+    job->job_type      = job_type;
+    job->bs            = bs;
+    job->cb            = cb;
+    job->opaque        = opaque;
+    job->busy          = true;
+    bs->job = job;
+
+    /* Only set speed when necessary to avoid NotSupported error */
+    if (speed != 0) {
+        Error *local_err = NULL;
+
+        block_job_set_speed(job, speed, &local_err);
+        if (error_is_set(&local_err)) {
+            bs->job = NULL;
+            g_free(job);
+            bdrv_set_in_use(bs, 0);
+            error_propagate(errp, local_err);
+            return NULL;
+        }
+    }
+    return job;
+}
+
+void block_job_complete(BlockJob *job, int ret)
+{
+    BlockDriverState *bs = job->bs;
+
+    assert(bs->job == job);
+    job->cb(job->opaque, ret);
+    bs->job = NULL;
+    g_free(job);
+    bdrv_set_in_use(bs, 0);
+}
+
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
+{
+    Error *local_err = NULL;
+
+    if (!job->job_type->set_speed) {
+        error_set(errp, QERR_NOT_SUPPORTED);
+        return;
+    }
+    job->job_type->set_speed(job, speed, &local_err);
+    if (error_is_set(&local_err)) {
+        error_propagate(errp, local_err);
+        return;
+    }
+
+    job->speed = speed;
+}
+
+void block_job_pause(BlockJob *job)
+{
+    job->paused = true;
+}
+
+bool block_job_is_paused(BlockJob *job)
+{
+    return job->paused;
+}
+
+void block_job_resume(BlockJob *job)
+{
+    job->paused = false;
+    block_job_iostatus_reset(job);
+    if (job->co && !job->busy) {
+        qemu_coroutine_enter(job->co, NULL);
+    }
+}
+
+void block_job_cancel(BlockJob *job)
+{
+    job->cancelled = true;
+    block_job_resume(job);
+}
+
+bool block_job_is_cancelled(BlockJob *job)
+{
+    return job->cancelled;
+}
+
+void block_job_iostatus_reset(BlockJob *job)
+{
+    job->iostatus = BLOCK_DEVICE_IO_STATUS_OK;
+}
+
+struct BlockCancelData {
+    BlockJob *job;
+    BlockDriverCompletionFunc *cb;
+    void *opaque;
+    bool cancelled;
+    int ret;
+};
+
+static void block_job_cancel_cb(void *opaque, int ret)
+{
+    struct BlockCancelData *data = opaque;
+
+    data->cancelled = block_job_is_cancelled(data->job);
+    data->ret = ret;
+    data->cb(data->opaque, ret);
+}
+
+int block_job_cancel_sync(BlockJob *job)
+{
+    struct BlockCancelData data;
+    BlockDriverState *bs = job->bs;
+
+    assert(bs->job == job);
+
+    /* Set up our own callback to store the result and chain to
+     * the original callback.
+     */
+    data.job = job;
+    data.cb = job->cb;
+    data.opaque = job->opaque;
+    data.ret = -EINPROGRESS;
+    job->cb = block_job_cancel_cb;
+    job->opaque = &data;
+    block_job_cancel(job);
+    while (data.ret == -EINPROGRESS) {
+        qemu_aio_wait();
+    }
+    return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
+}
+
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns)
+{
+    assert(job->busy);
+
+    /* Check cancellation *before* setting busy = false, too!  */
+    if (block_job_is_cancelled(job)) {
+        return;
+    }
+
+    job->busy = false;
+    if (block_job_is_paused(job)) {
+        qemu_coroutine_yield();
+    } else {
+        co_sleep_ns(clock, ns);
+    }
+    job->busy = true;
+}
+
+BlockJobInfo *block_job_query(BlockJob *job)
+{
+    BlockJobInfo *info = g_new0(BlockJobInfo, 1);
+    info->type      = g_strdup(job->job_type->job_type);
+    info->device    = g_strdup(bdrv_get_device_name(job->bs));
+    info->len       = job->len;
+    info->busy      = job->busy;
+    info->paused    = job->paused;
+    info->offset    = job->offset;
+    info->speed     = job->speed;
+    info->io_status = job->iostatus;
+    return info;
+}
+
+static void block_job_iostatus_set_err(BlockJob *job, int error)
+{
+    if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
+        job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE :
+                                          BLOCK_DEVICE_IO_STATUS_FAILED;
+    }
+}
+
+
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+                                        BlockdevOnError on_err,
+                                        int is_read, int error)
+{
+    BlockErrorAction action;
+
+    switch (on_err) {
+    case BLOCKDEV_ON_ERROR_ENOSPC:
+        action = (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT;
+        break;
+    case BLOCKDEV_ON_ERROR_STOP:
+        action = BDRV_ACTION_STOP;
+        break;
+    case BLOCKDEV_ON_ERROR_REPORT:
+        action = BDRV_ACTION_REPORT;
+        break;
+    case BLOCKDEV_ON_ERROR_IGNORE:
+        action = BDRV_ACTION_IGNORE;
+        break;
+    default:
+        abort();
+    }
+    bdrv_emit_qmp_error_event(job->bs, QEVENT_BLOCK_JOB_ERROR, action, is_read);
+    if (action == BDRV_ACTION_STOP) {
+        block_job_pause(job);
+        block_job_iostatus_set_err(job, error);
+        if (bs != job->bs) {
+            bdrv_iostatus_set_err(bs, error);
+        }
+    }
+    return action;
+}
diff --git a/blockjob.h b/blockjob.h
new file mode 100644
index 0000000..930cc3c
--- /dev/null
+++ b/blockjob.h
@@ -0,0 +1,243 @@
+/*
+ * Declarations for long-running block device operations
+ *
+ * Copyright (c) 2011 IBM Corp.
+ * Copyright (c) 2012 Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef BLOCKJOB_H
+#define BLOCKJOB_H 1
+
+#include "block.h"
+
+/**
+ * BlockJobType:
+ *
+ * A class type for block job objects.
+ */
+typedef struct BlockJobType {
+    /** Derived BlockJob struct size */
+    size_t instance_size;
+
+    /** String describing the operation, part of query-block-jobs QMP API */
+    const char *job_type;
+
+    /** Optional callback for job types that support setting a speed limit */
+    void (*set_speed)(BlockJob *job, int64_t speed, Error **errp);
+} BlockJobType;
+
+/**
+ * BlockJob:
+ *
+ * Long-running operation on a BlockDriverState.
+ */
+struct BlockJob {
+    /** The job type, including the job vtable.  */
+    const BlockJobType *job_type;
+
+    /** The block device on which the job is operating.  */
+    BlockDriverState *bs;
+
+    /**
+     * The coroutine that executes the job.  If not NULL, it is
+     * reentered when busy is false and the job is cancelled.
+     */
+    Coroutine *co;
+
+    /**
+     * Set to true if the job should cancel itself.  The flag must
+     * always be tested just before toggling the busy flag from false
+     * to true.  After a job has been cancelled, it should only yield
+     * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
+     */
+    bool cancelled;
+
+    /**
+     * Set to true if the job is either paused, or will pause itself
+     * as soon as possible (if busy == true).
+     */
+    bool paused;
+
+    /**
+     * Set to false by the job while it is in a quiescent state, where
+     * no I/O is pending and the job has yielded on any condition
+     * that is not detected by #qemu_aio_wait, such as a timer.
+     */
+    bool busy;
+
+    /** Status that is published by the query-block-jobs QMP API */
+    BlockDeviceIoStatus iostatus;
+
+    /** Offset that is published by the query-block-jobs QMP API */
+    int64_t offset;
+
+    /** Length that is published by the query-block-jobs QMP API */
+    int64_t len;
+
+    /** Speed that was set with @block_job_set_speed.  */
+    int64_t speed;
+
+    /** The completion function that will be called when the job completes.  */
+    BlockDriverCompletionFunc *cb;
+
+    /** The opaque value that is passed to the completion function.  */
+    void *opaque;
+};
+
+/**
+ * block_job_create:
+ * @job_type: The class object for the newly-created job.
+ * @bs: The block
+ * @speed: The maximum speed, in bytes per second, or 0 for unlimited.
+ * @cb: Completion function for the job.
+ * @opaque: Opaque pointer value passed to @cb.
+ * @errp: Error object.
+ *
+ * Create a new long-running block device job and return it.  The job
+ * will call @cb asynchronously when the job completes.  Note that
+ * @bs may have been closed at the time the @cb it is called.  If
+ * this is the case, the job may be reported as either cancelled or
+ * completed.
+ *
+ * This function is not part of the public job interface; it should be
+ * called from a wrapper that is specific to the job type.
+ */
+void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs,
+                       int64_t speed, BlockDriverCompletionFunc *cb,
+                       void *opaque, Error **errp);
+
+/**
+ * block_job_sleep_ns:
+ * @job: The job that calls the function.
+ * @clock: The clock to sleep on.
+ * @ns: How many nanoseconds to stop for.
+ *
+ * Put the job to sleep (assuming that it wasn't canceled) for @ns
+ * nanoseconds.  Canceling the job will interrupt the wait immediately.
+ */
+void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns);
+
+/**
+ * block_job_complete:
+ * @job: The job being completed.
+ * @ret: The status code.
+ *
+ * Call the completion function that was registered at creation time, and
+ * free @job.
+ */
+void block_job_complete(BlockJob *job, int ret);
+
+/**
+ * block_job_set_speed:
+ * @job: The job to set the speed for.
+ * @speed: The new value
+ * @errp: Error object.
+ *
+ * Set a rate-limiting parameter for the job; the actual meaning may
+ * vary depending on the job type.
+ */
+void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp);
+
+/**
+ * block_job_cancel:
+ * @job: The job to be canceled.
+ *
+ * Asynchronously cancel the specified job.
+ */
+void block_job_cancel(BlockJob *job);
+
+/**
+ * block_job_is_cancelled:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is scheduled for cancellation.
+ */
+bool block_job_is_cancelled(BlockJob *job);
+
+/**
+ * block_job_query:
+ * @job: The job to get information about.
+ *
+ * Return information about a job.
+ */
+BlockJobInfo *block_job_query(BlockJob *job);
+
+/**
+ * block_job_pause:
+ * @job: The job to be paused.
+ *
+ * Asynchronously pause the specified job.
+ */
+void block_job_pause(BlockJob *job);
+
+/**
+ * block_job_resume:
+ * @job: The job to be resumed.
+ *
+ * Resume the specified job.
+ */
+void block_job_resume(BlockJob *job);
+
+/**
+ * block_job_is_paused:
+ * @job: The job being queried.
+ *
+ * Returns whether the job is currently paused, or will pause
+ * as soon as it reaches a sleeping point.
+ */
+bool block_job_is_paused(BlockJob *job);
+
+/**
+ * block_job_cancel_sync:
+ * @job: The job to be canceled.
+ *
+ * Synchronously cancel the job.  The completion callback is called
+ * before the function returns.  The job may actually complete
+ * instead of canceling itself; the circumstances under which this
+ * happens depend on the kind of job that is active.
+ *
+ * Returns the return value from the job if the job actually completed
+ * during the call, or -ECANCELED if it was canceled.
+ */
+int block_job_cancel_sync(BlockJob *job);
+
+/**
+ * block_job_iostatus_reset:
+ * @job: The job whose I/O status should be reset.
+ *
+ * Reset I/O status on @job.
+ */
+void block_job_iostatus_reset(BlockJob *job);
+
+/**
+ * block_job_error_action:
+ * @job: The job to signal an error for.
+ * @bs: The block device on which to set an I/O error.
+ * @on_err: The error action setting.
+ * @is_read: Whether the operation was a read.
+ * @error: The error that was reported.
+ *
+ * Report an I/O error for a block job and possibly stop the VM.  Return the
+ * action that was selected based on @on_err and @error.
+ */
+BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs,
+                                        BlockdevOnError on_err,
+                                        int is_read, int error);
+#endif
diff --git a/compiler.h b/compiler.h
index 07ba1f8..c734a71 100644
--- a/compiler.h
+++ b/compiler.h
@@ -44,6 +44,11 @@
    /* Use gnu_printf when supported (qemu uses standard format strings). */
 #  define GCC_ATTR __attribute__((__unused__, format(gnu_printf, 1, 2)))
 #  define GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
+#  if defined(_WIN32)
+    /* Map __printf__ to __gnu_printf__ because we want standard format strings
+     * even when MinGW or GLib include files use __printf__. */
+#   define __printf__ __gnu_printf__
+#  endif
 # endif
 #if defined(_WIN32)
 #define GCC_WEAK __attribute__((weak))
diff --git a/configure b/configure
index b67be2c..b26161d 100755
--- a/configure
+++ b/configure
@@ -111,14 +111,12 @@
 cpu=""
 interp_prefix="/usr/gnemul/qemu-%M"
 static="no"
-sparc_cpu=""
 cross_prefix=""
 audio_drv_list=""
 audio_card_list="ac97 es1370 sb16 hda"
 audio_possible_cards="ac97 es1370 sb16 cs4231a adlib gus hda"
 block_drv_whitelist=""
 host_cc="gcc"
-helper_cflags=""
 libs_softmmu=""
 libs_tools=""
 audio_pt_int=""
@@ -127,6 +125,7 @@
 libs_qga=""
 debug_info="yes"
 
+
 target_list="x86_64-softmmu"
 kvm_version() {
     local fname="$(dirname "$0")/KVM_VERSION"
@@ -137,7 +136,6 @@
         echo "qemu-kvm-devel"
     fi
 }
-
 # Default value for a variable defining feature "foo".
 #  * foo="no"  feature will only be used if --enable-foo arg is given
 #  * foo=""    feature will be searched for, and if found, will be used
@@ -230,6 +228,7 @@
 libiscsi=""
 coroutine=""
 seccomp=""
+glusterfs=""
 
 # parse CC options first
 for opt do
@@ -251,21 +250,6 @@
   ;;
   --disable-debug-info) debug_info="no"
   ;;
-  --sparc_cpu=*)
-    sparc_cpu="$optarg"
-    case $sparc_cpu in
-    v7|v8|v8plus|v8plusa)
-      cpu="sparc"
-    ;;
-    v9)
-      cpu="sparc64"
-    ;;
-    *)
-      echo "undefined SPARC architecture. Exiting";
-      exit 1
-    ;;
-    esac
-  ;;
   esac
 done
 # OS specific
@@ -353,8 +337,6 @@
 elif check_define __x86_64__ ; then
   cpu="x86_64"
 elif check_define __sparc__ ; then
-  # We can't check for 64 bit (when gcc is biarch) or V8PLUSA
-  # They must be specified using --sparc_cpu
   if check_define __arch64__ ; then
     cpu="sparc64"
   else
@@ -802,8 +784,6 @@
   ;;
   --enable-uname-release=*) uname_release="$optarg"
   ;;
-  --sparc_cpu=*)
-  ;;
   --enable-werror) werror="yes"
   ;;
   --disable-werror) werror="no"
@@ -886,41 +866,26 @@
   ;;
   --disable-seccomp) seccomp="no"
   ;;
+  --disable-glusterfs) glusterfs="no"
+  ;;
+  --enable-glusterfs) glusterfs="yes"
+  ;;
   *) echo "ERROR: unknown option $opt"; show_help="yes"
   ;;
   esac
 done
 
-#
-# If cpu ~= sparc and  sparc_cpu hasn't been defined, plug in the right
-# QEMU_CFLAGS/LDFLAGS (assume sparc_v8plus for 32-bit and sparc_v9 for 64-bit)
-#
 host_guest_base="no"
 case "$cpu" in
-    sparc) case $sparc_cpu in
-           v7|v8)
-             QEMU_CFLAGS="-mcpu=${sparc_cpu} -D__sparc_${sparc_cpu}__ $QEMU_CFLAGS"
-           ;;
-           v8plus|v8plusa)
-             QEMU_CFLAGS="-mcpu=ultrasparc -D__sparc_${sparc_cpu}__ $QEMU_CFLAGS"
-           ;;
-           *) # sparc_cpu not defined in the command line
-             QEMU_CFLAGS="-mcpu=ultrasparc -D__sparc_v8plus__ $QEMU_CFLAGS"
-           esac
+    sparc)
            LDFLAGS="-m32 $LDFLAGS"
-           QEMU_CFLAGS="-m32 -ffixed-g2 -ffixed-g3 $QEMU_CFLAGS"
-           if test "$solaris" = "no" ; then
-             QEMU_CFLAGS="-ffixed-g1 -ffixed-g6 $QEMU_CFLAGS"
-             helper_cflags="-ffixed-i0"
-           fi
+           QEMU_CFLAGS="-m32 -mcpu=ultrasparc $QEMU_CFLAGS"
+           host_guest_base="yes"
            ;;
     sparc64)
-           QEMU_CFLAGS="-m64 -mcpu=ultrasparc -D__sparc_v9__ $QEMU_CFLAGS"
            LDFLAGS="-m64 $LDFLAGS"
-           QEMU_CFLAGS="-ffixed-g5 -ffixed-g6 -ffixed-g7 $QEMU_CFLAGS"
-           if test "$solaris" != "no" ; then
-             QEMU_CFLAGS="-ffixed-g1 $QEMU_CFLAGS"
-           fi
+           QEMU_CFLAGS="-m64 -mcpu=ultrasparc $QEMU_CFLAGS"
+           host_guest_base="yes"
            ;;
     s390)
            QEMU_CFLAGS="-m31 -march=z990 $QEMU_CFLAGS"
@@ -936,7 +901,6 @@
            QEMU_CFLAGS="-m32 $QEMU_CFLAGS"
            LDFLAGS="-m32 $LDFLAGS"
            cc_i386='$(CC) -m32'
-           helper_cflags="-fomit-frame-pointer"
            host_guest_base="yes"
            ;;
     x86_64)
@@ -1178,6 +1142,8 @@
 echo "  --enable-seccomp         enables seccomp support"
 echo "  --with-coroutine=BACKEND coroutine backend. Supported options:"
 echo "                           gthread, ucontext, sigaltstack, windows"
+echo "  --enable-glusterfs       enable GlusterFS backend"
+echo "  --disable-glusterfs      disable GlusterFS backend"
 echo ""
 echo "NOTE: The object files are built at the place where configure is launched"
 exit 1
@@ -1331,10 +1297,11 @@
   exit 1
 fi
 
-if test "$target_list" = "DEFAULT" ; then
-    target_list=`echo "$default_target_list" | sed -e 's/,/ /g'`
+if test -z "${target_list+xxx}" ; then
+    target_list="$default_target_list"
+else
+    target_list=`echo "$target_list" | sed -e 's/,/ /g'`
 fi
-
 # see if system emulation was really requested
 case " $target_list " in
   *"-softmmu "*) softmmu=yes
@@ -1440,10 +1407,10 @@
         LIBS=`$pkg_config --libs libseccomp`
 	seccomp="yes"
     else
-	seccomp="no"
 	if test "$seccomp" = "yes"; then
             feature_not_found "libseccomp"
 	fi
+	seccomp="no"
     fi
 fi
 ##########################################
@@ -2352,6 +2319,29 @@
   fi
 fi
 
+##########################################
+# glusterfs probe
+if test "$glusterfs" != "no" ; then
+  cat > $TMPC <<EOF
+#include <glusterfs/api/glfs.h>
+int main(void) {
+    (void) glfs_new("volume");
+    return 0;
+}
+EOF
+  glusterfs_libs="-lgfapi -lgfrpc -lgfxdr"
+  if compile_prog "" "$glusterfs_libs" ; then
+    glusterfs=yes
+    libs_tools="$glusterfs_libs $libs_tools"
+    libs_softmmu="$glusterfs_libs $libs_softmmu"
+  else
+    if test "$glusterfs" = "yes" ; then
+      feature_not_found "GlusterFS backend support"
+    fi
+    glusterfs=no
+  fi
+fi
+
 #
 # Check for xxxat() functions when we are building linux-user
 # emulator.  This is done because older glibc versions don't
@@ -2680,17 +2670,44 @@
 
 
 ##########################################
+# Do we need libm
+cat > $TMPC << EOF
+#include <math.h>
+int main(void) { return isnan(sin(0.0)); }
+EOF
+if compile_prog "" "" ; then
+  :
+elif compile_prog "" "-lm" ; then
+  LIBS="-lm $LIBS"
+  libs_qga="-lm $libs_qga"
+else
+  echo
+  echo "Error: libm check failed"
+  echo
+  exit 1
+fi
+
+##########################################
 # Do we need librt
+# uClibc provides 2 versions of clock_gettime(), one with realtime
+# support and one without. This means that the clock_gettime() don't
+# need -lrt. We still need it for timer_create() so we check for this
+# function in addition.
 cat > $TMPC <<EOF
 #include <signal.h>
 #include <time.h>
-int main(void) { return clock_gettime(CLOCK_REALTIME, NULL); }
+int main(void) {
+  timer_create(CLOCK_REALTIME, NULL, NULL);
+  return clock_gettime(CLOCK_REALTIME, NULL);
+}
 EOF
 
 if compile_prog "" "" ; then
   :
-elif compile_prog "" "-lrt" ; then
+# we need pthread for static linking. use previous pthread test result
+elif compile_prog "" "-lrt $pthread_lib" ; then
   LIBS="-lrt $LIBS"
+  libs_qga="-lrt $libs_qga"
 fi
 
 if test "$darwin" != "yes" -a "$mingw32" != "yes" -a "$solaris" != yes -a \
@@ -2718,6 +2735,9 @@
     if $pkg_config --atleast-version=0.12.0 spice-protocol >/dev/null 2>&1; then
         spice_qxl_io_monitors_config_async="yes"
     fi
+    if $pkg_config --atleast-version=0.12.2 spice-protocol > /dev/null 2>&1; then
+        spice_qxl_client_monitors_config="yes"
+    fi
   else
     if test "$spice" = "yes" ; then
       feature_not_found "spice"
@@ -2767,12 +2787,12 @@
 
 # check for usbredirparser for usb network redirection support
 if test "$usb_redir" != "no" ; then
-    if $pkg_config --atleast-version=0.3.4 libusbredirparser >/dev/null 2>&1 ; then
+    if $pkg_config --atleast-version=0.5 libusbredirparser-0.5 >/dev/null 2>&1 ; then
         usb_redir="yes"
-        usb_redir_cflags=$($pkg_config --cflags libusbredirparser 2>/dev/null)
-        usb_redir_libs=$($pkg_config --libs libusbredirparser 2>/dev/null)
+        usb_redir_cflags=$($pkg_config --cflags libusbredirparser-0.5 2>/dev/null)
+        usb_redir_libs=$($pkg_config --libs libusbredirparser-0.5 2>/dev/null)
         QEMU_CFLAGS="$QEMU_CFLAGS $usb_redir_cflags"
-        LIBS="$LIBS $usb_redir_libs"
+        libs_softmmu="$libs_softmmu $usb_redir_libs"
     else
         if test "$usb_redir" = "yes"; then
             feature_not_found "usb-redir"
@@ -3189,6 +3209,7 @@
 echo "build guest agent $guest_agent"
 echo "seccomp support   $seccomp"
 echo "coroutine backend $coroutine_backend"
+echo "GlusterFS support $glusterfs"
 
 if test "$sdl_too_old" = "yes"; then
 echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -3465,6 +3486,10 @@
   echo "CONFIG_QXL_IO_MONITORS_CONFIG_ASYNC=y" >> $config_host_mak
 fi
 
+if test "$spice_qxl_client_monitors_config" = "yes" ; then
+  echo "CONFIG_QXL_CLIENT_MONITORS_CONFIG=y" >> $config_host_mak
+fi
+
 if test "$smartcard" = "yes" ; then
   echo "CONFIG_SMARTCARD=y" >> $config_host_mak
 fi
@@ -3531,6 +3556,10 @@
   echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak
 fi
 
+if test "$glusterfs" = "yes" ; then
+  echo "CONFIG_GLUSTERFS=y" >> $config_host_mak
+fi
+
 # USB host support
 case "$usb" in
 linux)
@@ -3599,7 +3628,6 @@
   echo "HOST_CC      := REAL_CC=\"\$(HOST_CC)\" cgcc"  >> $config_host_mak
   echo "QEMU_CFLAGS  += -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-non-pointer-null" >> $config_host_mak
 fi
-echo "HELPER_CFLAGS=$helper_cflags" >> $config_host_mak
 echo "LDFLAGS=$LDFLAGS" >> $config_host_mak
 echo "ARLIBS_BEGIN=$arlibs_begin" >> $config_host_mak
 echo "ARLIBS_END=$arlibs_end" >> $config_host_mak
@@ -3710,7 +3738,6 @@
 
 case "$target_arch2" in
   i386)
-    target_phys_bits=64
   ;;
   x86_64)
     TARGET_BASE_ARCH=i386
@@ -3718,7 +3745,6 @@
     target_long_alignment=8
   ;;
   alpha)
-    target_phys_bits=64
     target_long_alignment=8
     target_nptl="yes"
   ;;
@@ -3727,22 +3753,18 @@
     bflt="yes"
     target_nptl="yes"
     gdb_xml_files="arm-core.xml arm-vfp.xml arm-vfp3.xml arm-neon.xml"
-    target_phys_bits=64
     target_llong_alignment=4
     target_libs_softmmu="$fdt_libs"
   ;;
   cris)
     target_nptl="yes"
-    target_phys_bits=32
   ;;
   lm32)
-    target_phys_bits=32
     target_libs_softmmu="$opengl_libs"
   ;;
   m68k)
     bflt="yes"
     gdb_xml_files="cf-core.xml cf-fp.xml"
-    target_phys_bits=32
     target_int_alignment=2
     target_long_alignment=2
     target_llong_alignment=2
@@ -3751,36 +3773,30 @@
     TARGET_ARCH=microblaze
     bflt="yes"
     target_nptl="yes"
-    target_phys_bits=32
     target_libs_softmmu="$fdt_libs"
   ;;
   mips|mipsel)
     TARGET_ARCH=mips
     echo "TARGET_ABI_MIPSO32=y" >> $config_target_mak
     target_nptl="yes"
-    target_phys_bits=64
   ;;
   mipsn32|mipsn32el)
     TARGET_ARCH=mipsn32
     TARGET_BASE_ARCH=mips
     echo "TARGET_ABI_MIPSN32=y" >> $config_target_mak
-    target_phys_bits=64
   ;;
   mips64|mips64el)
     TARGET_ARCH=mips64
     TARGET_BASE_ARCH=mips
     echo "TARGET_ABI_MIPSN64=y" >> $config_target_mak
-    target_phys_bits=64
     target_long_alignment=8
   ;;
   or32)
     TARGET_ARCH=openrisc
     TARGET_BASE_ARCH=openrisc
-    target_phys_bits=32
   ;;
   ppc)
     gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
     target_nptl="yes"
     target_libs_softmmu="$fdt_libs"
   ;;
@@ -3788,7 +3804,6 @@
     TARGET_BASE_ARCH=ppc
     TARGET_ABI_DIR=ppc
     gdb_xml_files="power-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
     target_nptl="yes"
     target_libs_softmmu="$fdt_libs"
   ;;
@@ -3796,7 +3811,6 @@
     TARGET_BASE_ARCH=ppc
     TARGET_ABI_DIR=ppc
     gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
     target_long_alignment=8
     target_libs_softmmu="$fdt_libs"
   ;;
@@ -3806,21 +3820,17 @@
     TARGET_ABI_DIR=ppc
     echo "TARGET_ABI32=y" >> $config_target_mak
     gdb_xml_files="power64-core.xml power-fpu.xml power-altivec.xml power-spe.xml"
-    target_phys_bits=64
     target_libs_softmmu="$fdt_libs"
   ;;
   sh4|sh4eb)
     TARGET_ARCH=sh4
     bflt="yes"
     target_nptl="yes"
-    target_phys_bits=32
   ;;
   sparc)
-    target_phys_bits=64
   ;;
   sparc64)
     TARGET_BASE_ARCH=sparc
-    target_phys_bits=64
     target_long_alignment=8
   ;;
   sparc32plus)
@@ -3828,11 +3838,9 @@
     TARGET_BASE_ARCH=sparc
     TARGET_ABI_DIR=sparc
     echo "TARGET_ABI32=y" >> $config_target_mak
-    target_phys_bits=64
   ;;
   s390x)
     target_nptl="yes"
-    target_phys_bits=64
     target_long_alignment=8
   ;;
   unicore32)
@@ -3840,7 +3848,6 @@
   ;;
   xtensa|xtensaeb)
     TARGET_ARCH=xtensa
-    target_phys_bits=32
   ;;
   *)
     echo "Unsupported target CPU"
@@ -3854,13 +3861,6 @@
 
 symlink "$source_path/Makefile.target" "$target_dir/Makefile"
 
-
-case "$target_arch2" in
-  alpha | i386 | or32 | s390x | sparc* | x86_64 | xtensa* | ppc*)
-    echo "CONFIG_TCG_PASS_AREG0=y" >> $config_target_mak
-  ;;
-esac
-
 upper() {
     echo "$@"| LC_ALL=C tr '[a-z]' '[A-Z]'
 }
@@ -3882,7 +3882,6 @@
 case "$target_arch2" in
   i386|x86_64)
     if test "$xen" = "yes" -a "$target_softmmu" = "yes" ; then
-      target_phys_bits=64
       echo "CONFIG_XEN=y" >> $config_target_mak
       if test "$xen_pci_passthrough" = yes; then
         echo "CONFIG_XEN_PCI_PASSTHROUGH=y" >> "$config_target_mak"
@@ -3923,11 +3922,8 @@
   echo "TARGET_WORDS_BIGENDIAN=y" >> $config_target_mak
 fi
 if test "$target_softmmu" = "yes" ; then
-  echo "TARGET_PHYS_ADDR_BITS=$target_phys_bits" >> $config_target_mak
   echo "CONFIG_SOFTMMU=y" >> $config_target_mak
   echo "LIBS+=$libs_softmmu $target_libs_softmmu" >> $config_target_mak
-  echo "HWDIR=../libhw$target_phys_bits" >> $config_target_mak
-  echo "subdir-$target: subdir-libhw$target_phys_bits" >> $config_host_mak
   if test "$smartcard_nss" = "yes" ; then
     echo "subdir-$target: subdir-libcacard" >> $config_host_mak
   fi
@@ -4113,10 +4109,6 @@
 
 if test "$target_linux_user" = "yes" -o "$target_bsd_user" = "yes" ; then
   case "$ARCH" in
-  sparc)
-    # -static is used to avoid g1/g3 usage by the dynamic linker
-    ldflags="$linker_script -static $ldflags"
-    ;;
   alpha | s390x)
     # The default placement of the application is fine.
     ;;
@@ -4173,12 +4165,6 @@
     echo "LD=$ld" >> $config_mak
 done
 
-for hwlib in 32 64; do
-  d=libhw$hwlib
-  symlink "$source_path/Makefile.hw" "$d/Makefile"
-  echo "QEMU_CFLAGS+=-DTARGET_PHYS_ADDR_BITS=$hwlib" > $d/config.mak
-done
-
 d=libuser
 symlink "$source_path/Makefile.user" "$d/Makefile"
 
diff --git a/console.c b/console.c
index c1ed5e0..3f3d254 100644
--- a/console.c
+++ b/console.c
@@ -938,8 +938,11 @@
     case TTY_STATE_CSI: /* handle escape sequence parameters */
         if (ch >= '0' && ch <= '9') {
             if (s->nb_esc_params < MAX_ESC_PARAMS) {
-                s->esc_params[s->nb_esc_params] =
-                    s->esc_params[s->nb_esc_params] * 10 + ch - '0';
+                int *param = &s->esc_params[s->nb_esc_params];
+                int digit = (ch - '0');
+
+                *param = (*param <= (INT_MAX - digit) / 10) ?
+                         *param * 10 + digit : INT_MAX;
             }
         } else {
             if (s->nb_esc_params < MAX_ESC_PARAMS)
@@ -1612,7 +1615,7 @@
     memset(&pf, 0x00, sizeof(PixelFormat));
 
     pf.bits_per_pixel = bpp;
-    pf.bytes_per_pixel = bpp / 8;
+    pf.bytes_per_pixel = DIV_ROUND_UP(bpp, 8);
     pf.depth = bpp == 32 ? 24 : bpp;
 
     switch (bpp) {
@@ -1661,13 +1664,12 @@
     memset(&pf, 0x00, sizeof(PixelFormat));
 
     pf.bits_per_pixel = bpp;
-    pf.bytes_per_pixel = bpp / 8;
+    pf.bytes_per_pixel = DIV_ROUND_UP(bpp, 8);
     pf.depth = bpp == 32 ? 24 : bpp;
 
     switch (bpp) {
         case 15:
             pf.bits_per_pixel = 16;
-            pf.bytes_per_pixel = 2;
             pf.rmask = 0x00007c00;
             pf.gmask = 0x000003E0;
             pf.bmask = 0x0000001F;
diff --git a/cpu-all.h b/cpu-all.h
index 5e07d28..2b99682 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -260,14 +260,6 @@
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)
 
-#ifndef CONFIG_TCG_PASS_AREG0
-#define ldub_code(p) ldub_raw(p)
-#define ldsb_code(p) ldsb_raw(p)
-#define lduw_code(p) lduw_raw(p)
-#define ldsw_code(p) ldsw_raw(p)
-#define ldl_code(p) ldl_raw(p)
-#define ldq_code(p) ldq_raw(p)
-#else
 #define cpu_ldub_code(env1, p) ldub_raw(p)
 #define cpu_ldsb_code(env1, p) ldsb_raw(p)
 #define cpu_lduw_code(env1, p) lduw_raw(p)
@@ -296,7 +288,6 @@
 #define cpu_stw_kernel(env, addr, data) stw_raw(addr, data)
 #define cpu_stl_kernel(env, addr, data) stl_raw(addr, data)
 #define cpu_stq_kernel(env, addr, data) stq_raw(addr, data)
-#endif
 
 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
@@ -313,7 +304,6 @@
 #define stfl_kernel(p, v) stfl_raw(p, v)
 #define stfq_kernel(p, vt) stfq_raw(p, v)
 
-#ifdef CONFIG_TCG_PASS_AREG0
 #define cpu_ldub_data(env, addr) ldub_raw(addr)
 #define cpu_lduw_data(env, addr) lduw_raw(addr)
 #define cpu_ldl_data(env, addr) ldl_raw(addr)
@@ -321,7 +311,6 @@
 #define cpu_stb_data(env, addr, data) stb_raw(addr, data)
 #define cpu_stw_data(env, addr, data) stw_raw(addr, data)
 #define cpu_stl_data(env, addr, data) stl_raw(addr, data)
-#endif
 #endif /* defined(CONFIG_USER_ONLY) */
 
 /* page related stuff */
@@ -367,6 +356,9 @@
 CPUArchState *qemu_get_cpu(int cpu);
 
 #define CPU_DUMP_CODE 0x00010000
+#define CPU_DUMP_FPU 0x00020000 /* dump FPU register state, not just integer */
+/* dump info about TCG QEMU's condition code optimization state */
+#define CPU_DUMP_CCOP 0x00040000
 
 void cpu_dump_state(CPUArchState *env, FILE *f, fprintf_function cpu_fprintf,
                     int flags);
diff --git a/cpu-common.h b/cpu-common.h
index 85548de..c0d27af 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -21,7 +21,7 @@
 };
 
 /* address in the RAM (different from a physical address) */
-#if defined(CONFIG_XEN_BACKEND) && TARGET_PHYS_ADDR_BITS == 64
+#if defined(CONFIG_XEN_BACKEND)
 typedef uint64_t ram_addr_t;
 #  define RAM_ADDR_MAX UINT64_MAX
 #  define RAM_ADDR_FMT "%" PRIx64
diff --git a/cpu-exec.c b/cpu-exec.c
index 134b3c4..252da86 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -552,7 +552,7 @@
 #if defined(TARGET_I386)
                     env->eflags = env->eflags | cpu_cc_compute_all(env, CC_OP)
                         | (DF & DF_MASK);
-                    log_cpu_state(env, X86_DUMP_CCOP);
+                    log_cpu_state(env, CPU_DUMP_CCOP);
                     env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
 #elif defined(TARGET_M68K)
                     cpu_m68k_flush_flags(env, env->cc_op);
diff --git a/cpus.c b/cpus.c
index e476a3c..750a76f 100644
--- a/cpus.c
+++ b/cpus.c
@@ -395,11 +395,7 @@
     fprintf(stderr, "\n");
     for(env = first_cpu; env != NULL; env = env->next_cpu) {
         fprintf(stderr, "CPU #%d:\n", env->cpu_index);
-#ifdef TARGET_I386
-        cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU);
-#else
-        cpu_dump_state(env, stderr, fprintf, 0);
-#endif
+        cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU);
     }
     va_end(ap);
     abort();
@@ -613,7 +609,7 @@
 }
 #endif /* _WIN32 */
 
-QemuMutex qemu_global_mutex;
+static QemuMutex qemu_global_mutex;
 static QemuCond qemu_io_proceeded_cond;
 static bool iothread_requesting_mutex;
 
@@ -1192,10 +1188,8 @@
 void list_cpus(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
 {
     /* XXX: implement xxx_cpu_list for targets that still miss it */
-#if defined(cpu_list_id)
-    cpu_list_id(f, cpu_fprintf, optarg);
-#elif defined(cpu_list)
-    cpu_list(f, cpu_fprintf); /* deprecated */
+#if defined(cpu_list)
+    cpu_list(f, cpu_fprintf);
 #endif
 }
 
diff --git a/cputlb.c b/cputlb.c
index d3e7b25..51b5897 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -325,11 +325,7 @@
     mmu_idx = cpu_mmu_index(env1);
     if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
                  (addr & TARGET_PAGE_MASK))) {
-#ifdef CONFIG_TCG_PASS_AREG0
         cpu_ldub_code(env1, addr);
-#else
-        ldub_code(addr);
-#endif
     }
     pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
     mr = iotlb_to_region(pd);
@@ -348,7 +344,6 @@
 #define MMUSUFFIX _cmmu
 #undef GETPC
 #define GETPC() ((uintptr_t)0)
-#define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS
 
 #define SHIFT 0
diff --git a/cutils.c b/cutils.c
index 8ef648f..8edd8fa 100644
--- a/cutils.c
+++ b/cutils.c
@@ -115,7 +115,7 @@
         m += 12;
         y--;
     }
-    t = 86400 * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 + 
+    t = 86400ULL * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 + 
                  y / 400 - 719469);
     t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec;
     return t;
diff --git a/def-helper.h b/def-helper.h
index b98ff69..022a9ce 100644
--- a/def-helper.h
+++ b/def-helper.h
@@ -128,6 +128,8 @@
 #define DEF_HELPER_5(name, ret, t1, t2, t3, t4, t5) \
     DEF_HELPER_FLAGS_5(name, 0, ret, t1, t2, t3, t4, t5)
 
+/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
+
 #endif /* DEF_HELPER_H */
 
 #ifndef GEN_HELPER
diff --git a/default-configs/arm-softmmu.mak b/default-configs/arm-softmmu.mak
index f335a72..2f1a5c9 100644
--- a/default-configs/arm-softmmu.mak
+++ b/default-configs/arm-softmmu.mak
@@ -22,6 +22,7 @@
 CONFIG_MAX111X=y
 CONFIG_SSI=y
 CONFIG_SSI_SD=y
+CONFIG_SSI_M25P80=y
 CONFIG_LAN9118=y
 CONFIG_SMC91C111=y
 CONFIG_DS1338=y
diff --git a/default-configs/microblaze-softmmu.mak b/default-configs/microblaze-softmmu.mak
index 64c9485..2f442e5 100644
--- a/default-configs/microblaze-softmmu.mak
+++ b/default-configs/microblaze-softmmu.mak
@@ -5,3 +5,5 @@
 CONFIG_SERIAL=y
 CONFIG_XILINX=y
 CONFIG_XILINX_AXI=y
+CONFIG_SSI=y
+CONFIG_SSI_M25P80=y
diff --git a/default-configs/microblazeel-softmmu.mak b/default-configs/microblazeel-softmmu.mak
index a962276..af9a3cd 100644
--- a/default-configs/microblazeel-softmmu.mak
+++ b/default-configs/microblazeel-softmmu.mak
@@ -5,3 +5,5 @@
 CONFIG_SERIAL=y
 CONFIG_XILINX=y
 CONFIG_XILINX_AXI=y
+CONFIG_SSI=y
+CONFIG_SSI_M25P80=y
diff --git a/device_tree.c b/device_tree.c
index d7a9b6b..a923613 100644
--- a/device_tree.c
+++ b/device_tree.c
@@ -304,3 +304,18 @@
     g_free(dupname);
     return retval;
 }
+
+void qemu_devtree_dumpdtb(void *fdt, int size)
+{
+    QemuOpts *machine_opts;
+
+    machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+    if (machine_opts) {
+        const char *dumpdtb = qemu_opt_get(machine_opts, "dumpdtb");
+        if (dumpdtb) {
+            /* Dump the dtb to a file and quit */
+            exit(g_file_set_contents(dumpdtb, fdt, size, NULL) ? 0 : 1);
+        }
+    }
+
+}
diff --git a/device_tree.h b/device_tree.h
index f7a3e6c..f0b3f35 100644
--- a/device_tree.h
+++ b/device_tree.h
@@ -49,4 +49,6 @@
                              sizeof(qdt_tmp));                                \
     } while (0)
 
+void qemu_devtree_dumpdtb(void *fdt, int size);
+
 #endif /* __DEVICE_TREE_H__ */
diff --git a/disas.c b/disas.c
index 7b2acc9..b801c8f 100644
--- a/disas.c
+++ b/disas.c
@@ -316,9 +316,7 @@
     print_insn = print_insn_alpha;
 #elif defined(__sparc__)
     print_insn = print_insn_sparc;
-#if defined(__sparc_v8plus__) || defined(__sparc_v8plusa__) || defined(__sparc_v9__)
     disasm_info.mach = bfd_mach_sparc_v9b;
-#endif
 #elif defined(__arm__)
     print_insn = print_insn_arm;
 #elif defined(__MIPSEB__)
diff --git a/dma.h b/dma.h
index f35c4b6..1a33603 100644
--- a/dma.h
+++ b/dma.h
@@ -31,7 +31,7 @@
     DMAContext *dma;
 };
 
-#if defined(TARGET_PHYS_ADDR_BITS)
+#ifndef CONFIG_USER_ONLY
 
 /*
  * When an IOMMU is present, bus addresses become distinct from
diff --git a/docs/specs/ppc-spapr-hcalls.txt b/docs/specs/ppc-spapr-hcalls.txt
index 52ba8d4..667b3fa 100644
--- a/docs/specs/ppc-spapr-hcalls.txt
+++ b/docs/specs/ppc-spapr-hcalls.txt
@@ -31,7 +31,7 @@
 
 Returns:
 
-  H_SUCCESS   : Successully called the RTAS function (RTAS result
+  H_SUCCESS   : Successfully called the RTAS function (RTAS result
                 will have been stored in the parameter block)
   H_PARAMETER : Unknown token
 
diff --git a/docs/usb2.txt b/docs/usb2.txt
index d17e3c0..43dacde 100644
--- a/docs/usb2.txt
+++ b/docs/usb2.txt
@@ -58,11 +58,11 @@
 xhci controller support
 -----------------------
 
-There also is xhci host controller support available.  It got alot
+There is also xhci host controller support available.  It got a lot
 less testing than ehci and there are a bunch of known limitations, so
 ehci may work better for you.  On the other hand the xhci hardware
 design is much more virtualization-friendly, thus xhci emulation uses
-less ressources (especially cpu).  If you wanna give xhci a try
+less resources (especially cpu).  If you want to give xhci a try
 use this to add the host controller ...
 
     qemu -device nec-usb-xhci,id=xhci
diff --git a/dump.c b/dump.c
index 2bf8d8d..6b7c127 100644
--- a/dump.c
+++ b/dump.c
@@ -100,18 +100,11 @@
 static int fd_write_vmcore(void *buf, size_t size, void *opaque)
 {
     DumpState *s = opaque;
-    int fd = s->fd;
-    size_t writen_size;
+    size_t written_size;
 
-    /* The fd may be passed from user, and it can be non-blocked */
-    while (size) {
-        writen_size = qemu_write_full(fd, buf, size);
-        if (writen_size != size && errno != EAGAIN) {
-            return -1;
-        }
-
-        buf += writen_size;
-        size -= writen_size;
+    written_size = qemu_write_full(s->fd, buf, size);
+    if (written_size != size) {
+        return -1;
     }
 
     return 0;
@@ -836,9 +829,8 @@
 
 #if !defined(WIN32)
     if (strstart(file, "fd:", &p)) {
-        fd = monitor_get_fd(cur_mon, p);
+        fd = monitor_get_fd(cur_mon, p, errp);
         if (fd == -1) {
-            error_set(errp, QERR_FD_NOT_FOUND, p);
             return;
         }
     }
diff --git a/dyngen-exec.h b/dyngen-exec.h
deleted file mode 100644
index 083e20b..0000000
--- a/dyngen-exec.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  dyngen defines for micro operation code
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#if !defined(__DYNGEN_EXEC_H__)
-#define __DYNGEN_EXEC_H__
-
-#if defined(CONFIG_TCG_INTERPRETER)
-/* The TCG interpreter does not need a special register AREG0,
- * but it is possible to use one by defining AREG0.
- * On i386, register edi seems to work. */
-/* Run without special register AREG0 or use a value defined elsewhere. */
-#elif defined(__i386__)
-#define AREG0 "ebp"
-#elif defined(__x86_64__)
-#define AREG0 "r14"
-#elif defined(_ARCH_PPC)
-#define AREG0 "r27"
-#elif defined(__arm__)
-#define AREG0 "r6"
-#elif defined(__hppa__)
-#define AREG0 "r17"
-#elif defined(__mips__)
-#define AREG0 "s0"
-#elif defined(__sparc__)
-#ifdef CONFIG_SOLARIS
-#define AREG0 "g2"
-#else
-#ifdef __sparc_v9__
-#define AREG0 "g5"
-#else
-#define AREG0 "g6"
-#endif
-#endif
-#elif defined(__s390__)
-#define AREG0 "r10"
-#elif defined(__alpha__)
-/* Note $15 is the frame pointer, so anything in op-i386.c that would
-   require a frame pointer, like alloca, would probably loose.  */
-#define AREG0 "$15"
-#elif defined(__mc68000)
-#define AREG0 "%a5"
-#elif defined(__ia64__)
-#define AREG0 "r7"
-#else
-#error unsupported CPU
-#endif
-
-#if defined(AREG0)
-register CPUArchState *env asm(AREG0);
-#else
-/* TODO: Try env = cpu_single_env. */
-extern CPUArchState *env;
-#endif
-
-#endif /* !defined(__DYNGEN_EXEC_H__) */
diff --git a/exec-all.h b/exec-all.h
index c5ec8e1..6516da0 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -51,7 +51,7 @@
 #else
 #define MAX_OPC_PARAM_PER_ARG 1
 #endif
-#define MAX_OPC_PARAM_IARGS 4
+#define MAX_OPC_PARAM_IARGS 5
 #define MAX_OPC_PARAM_OARGS 1
 #define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
 
@@ -132,9 +132,10 @@
 #define CODE_GEN_AVG_BLOCK_SIZE 64
 #endif
 
-#if defined(_ARCH_PPC) || defined(__x86_64__) || defined(__arm__) || defined(__i386__)
-#define USE_DIRECT_JUMP
-#elif defined(CONFIG_TCG_INTERPRETER)
+#if defined(__arm__) || defined(_ARCH_PPC) \
+    || defined(__x86_64__) || defined(__i386__) \
+    || defined(__sparc__) \
+    || defined(CONFIG_TCG_INTERPRETER)
 #define USE_DIRECT_JUMP
 #endif
 
@@ -244,6 +245,8 @@
     __asm __volatile__ ("swi 0x9f0002" : : "r" (_beg), "r" (_end), "r" (_flg));
 #endif
 }
+#elif defined(__sparc__)
+void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr);
 #else
 #error tb_set_jmp_target1 is missing
 #endif
@@ -323,9 +326,6 @@
 
 #define ACCESS_TYPE (NB_MMU_MODES + 1)
 #define MEMSUFFIX _code
-#ifndef CONFIG_TCG_PASS_AREG0
-#define env cpu_single_env
-#endif
 
 #define DATA_SIZE 1
 #include "softmmu_header.h"
@@ -341,7 +341,6 @@
 
 #undef ACCESS_TYPE
 #undef MEMSUFFIX
-#undef env
 
 #endif
 
diff --git a/exec-obsolete.h b/exec-obsolete.h
index c099256..286e2f7 100644
--- a/exec-obsolete.h
+++ b/exec-obsolete.h
@@ -24,6 +24,7 @@
 #endif
 
 #ifndef CONFIG_USER_ONLY
+#include "hw/xen.h"
 
 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr);
@@ -111,6 +112,7 @@
     for (addr = start; addr < end; addr += TARGET_PAGE_SIZE) {
         cpu_physical_memory_set_dirty_flags(addr, dirty_flags);
     }
+    xen_modified_memory(addr, length);
 }
 
 static inline void cpu_physical_memory_mask_dirty_range(ram_addr_t start,
diff --git a/exec.c b/exec.c
index 5834766..7899042 100644
--- a/exec.c
+++ b/exec.c
@@ -86,7 +86,7 @@
 /* any access to the tbs or the page table must use this lock */
 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
 
-#if defined(__arm__) || defined(__sparc_v9__)
+#if defined(__arm__) || defined(__sparc__)
 /* The prologue must be reachable with a direct jump. ARM and Sparc64
  have limited branch ranges (possibly also PPC) so place it in a
  section close to code segment. */
@@ -541,10 +541,9 @@
         /* Cannot map more than that */
         if (code_gen_buffer_size > (800 * 1024 * 1024))
             code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc_v9__)
+#elif defined(__sparc__) && HOST_LONG_BITS == 64
         // Map the buffer below 2G, so we can use direct calls and branches
-        flags |= MAP_FIXED;
-        start = (void *) 0x60000000UL;
+        start = (void *) 0x40000000UL;
         if (code_gen_buffer_size > (512 * 1024 * 1024))
             code_gen_buffer_size = (512 * 1024 * 1024);
 #elif defined(__arm__)
@@ -582,10 +581,9 @@
         /* Cannot map more than that */
         if (code_gen_buffer_size > (800 * 1024 * 1024))
             code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc_v9__)
+#elif defined(__sparc__) && HOST_LONG_BITS == 64
         // Map the buffer below 2G, so we can use direct calls and branches
-        flags |= MAP_FIXED;
-        addr = (void *) 0x60000000UL;
+        addr = (void *) 0x40000000UL;
         if (code_gen_buffer_size > (512 * 1024 * 1024)) {
             code_gen_buffer_size = (512 * 1024 * 1024);
         }
@@ -1744,20 +1742,12 @@
     fprintf(stderr, "qemu: fatal: ");
     vfprintf(stderr, fmt, ap);
     fprintf(stderr, "\n");
-#ifdef TARGET_I386
-    cpu_dump_state(env, stderr, fprintf, X86_DUMP_FPU | X86_DUMP_CCOP);
-#else
-    cpu_dump_state(env, stderr, fprintf, 0);
-#endif
+    cpu_dump_state(env, stderr, fprintf, CPU_DUMP_FPU | CPU_DUMP_CCOP);
     if (qemu_log_enabled()) {
         qemu_log("qemu: fatal: ");
         qemu_log_vprintf(fmt, ap2);
         qemu_log("\n");
-#ifdef TARGET_I386
-        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
-#else
-        log_cpu_state(env, 0);
-#endif
+        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
         qemu_log_flush();
         qemu_log_close();
     }
@@ -2525,6 +2515,19 @@
     }
 }
 
+static int memory_try_enable_merging(void *addr, size_t len)
+{
+    QemuOpts *opts;
+
+    opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+    if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
+        /* disabled by the user */
+        return 0;
+    }
+
+    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
+}
+
 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr)
 {
@@ -2544,7 +2547,7 @@
             new_block->host = file_ram_alloc(new_block, size, mem_path);
             if (!new_block->host) {
                 new_block->host = qemu_vmalloc(size);
-                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
+                memory_try_enable_merging(new_block->host, size);
             }
 #else
             fprintf(stderr, "-mem-path option unsupported\n");
@@ -2559,7 +2562,7 @@
             } else {
                 new_block->host = qemu_vmalloc(size);
             }
-            qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
+            memory_try_enable_merging(new_block->host, size);
         }
     }
     new_block->length = size;
@@ -2689,7 +2692,7 @@
                             length, addr);
                     exit(1);
                 }
-                qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
+                memory_try_enable_merging(vaddr, length);
                 qemu_ram_setup_dump(vaddr, length);
             }
             return;
@@ -3406,6 +3409,19 @@
 }
 
 #else
+
+static void invalidate_and_set_dirty(target_phys_addr_t addr,
+                                     target_phys_addr_t length)
+{
+    if (!cpu_physical_memory_is_dirty(addr)) {
+        /* invalidate code */
+        tb_invalidate_phys_page_range(addr, addr + length, 0);
+        /* set dirty bit */
+        cpu_physical_memory_set_dirty_flags(addr, (0xff & ~CODE_DIRTY_FLAG));
+    }
+    xen_modified_memory(addr, length);
+}
+
 void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
                             int len, int is_write)
 {
@@ -3451,13 +3467,7 @@
                 /* RAM case */
                 ptr = qemu_get_ram_ptr(addr1);
                 memcpy(ptr, buf, l);
-                if (!cpu_physical_memory_is_dirty(addr1)) {
-                    /* invalidate code */
-                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
-                    /* set dirty bit */
-                    cpu_physical_memory_set_dirty_flags(
-                        addr1, (0xff & ~CODE_DIRTY_FLAG));
-                }
+                invalidate_and_set_dirty(addr1, l);
                 qemu_put_ram_ptr(ptr);
             }
         } else {
@@ -3523,6 +3533,7 @@
             /* ROM/RAM case */
             ptr = qemu_get_ram_ptr(addr1);
             memcpy(ptr, buf, l);
+            invalidate_and_set_dirty(addr1, l);
             qemu_put_ram_ptr(ptr);
         }
         len -= l;
@@ -3648,13 +3659,7 @@
                 l = TARGET_PAGE_SIZE;
                 if (l > access_len)
                     l = access_len;
-                if (!cpu_physical_memory_is_dirty(addr1)) {
-                    /* invalidate code */
-                    tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
-                    /* set dirty bit */
-                    cpu_physical_memory_set_dirty_flags(
-                        addr1, (0xff & ~CODE_DIRTY_FLAG));
-                }
+                invalidate_and_set_dirty(addr1, l);
                 addr1 += l;
                 access_len -= l;
             }
@@ -3960,13 +3965,7 @@
             stl_p(ptr, val);
             break;
         }
-        if (!cpu_physical_memory_is_dirty(addr1)) {
-            /* invalidate code */
-            tb_invalidate_phys_page_range(addr1, addr1 + 4, 0);
-            /* set dirty bit */
-            cpu_physical_memory_set_dirty_flags(addr1,
-                (0xff & ~CODE_DIRTY_FLAG));
-        }
+        invalidate_and_set_dirty(addr1, 4);
     }
 }
 
@@ -4033,13 +4032,7 @@
             stw_p(ptr, val);
             break;
         }
-        if (!cpu_physical_memory_is_dirty(addr1)) {
-            /* invalidate code */
-            tb_invalidate_phys_page_range(addr1, addr1 + 2, 0);
-            /* set dirty bit */
-            cpu_physical_memory_set_dirty_flags(addr1,
-                (0xff & ~CODE_DIRTY_FLAG));
-        }
+        invalidate_and_set_dirty(addr1, 2);
     }
 }
 
diff --git a/fpu/softfloat-specialize.h b/fpu/softfloat-specialize.h
index 4902450..a1d489e 100644
--- a/fpu/softfloat-specialize.h
+++ b/fpu/softfloat-specialize.h
@@ -41,6 +41,13 @@
 #define SNAN_BIT_IS_ONE		0
 #endif
 
+#if defined(TARGET_XTENSA)
+/* Define for architectures which deviate from IEEE in not supporting
+ * signaling NaNs (so all NaNs are treated as quiet).
+ */
+#define NO_SIGNALING_NANS 1
+#endif
+
 /*----------------------------------------------------------------------------
 | The pattern for a default generated half-precision NaN.
 *----------------------------------------------------------------------------*/
@@ -57,7 +64,8 @@
 *----------------------------------------------------------------------------*/
 #if defined(TARGET_SPARC)
 const float32 float32_default_nan = const_float32(0x7FFFFFFF);
-#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA)
+#elif defined(TARGET_PPC) || defined(TARGET_ARM) || defined(TARGET_ALPHA) || \
+      defined(TARGET_XTENSA)
 const float32 float32_default_nan = const_float32(0x7FC00000);
 #elif SNAN_BIT_IS_ONE
 const float32 float32_default_nan = const_float32(0x7FBFFFFF);
@@ -127,6 +135,17 @@
     uint64_t high, low;
 } commonNaNT;
 
+#ifdef NO_SIGNALING_NANS
+int float16_is_quiet_nan(float16 a_)
+{
+    return float16_is_any_nan(a_);
+}
+
+int float16_is_signaling_nan(float16 a_)
+{
+    return 0;
+}
+#else
 /*----------------------------------------------------------------------------
 | Returns 1 if the half-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
@@ -156,6 +175,7 @@
     return (((a >> 9) & 0x3F) == 0x3E) && (a & 0x1FF);
 #endif
 }
+#endif
 
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN if the half-precision floating point value `a' is a
@@ -217,6 +237,17 @@
     }
 }
 
+#ifdef NO_SIGNALING_NANS
+int float32_is_quiet_nan(float32 a_)
+{
+    return float32_is_any_nan(a_);
+}
+
+int float32_is_signaling_nan(float32 a_)
+{
+    return 0;
+}
+#else
 /*----------------------------------------------------------------------------
 | Returns 1 if the single-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
@@ -246,6 +277,7 @@
     return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
 #endif
 }
+#endif
 
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN if the single-precision floating point value `a' is a
@@ -372,7 +404,7 @@
         return 1;
     }
 }
-#elif defined(TARGET_PPC)
+#elif defined(TARGET_PPC) || defined(TARGET_XTENSA)
 static int pickNaN(flag aIsQNaN, flag aIsSNaN, flag bIsQNaN, flag bIsSNaN,
                    flag aIsLargerSignificand)
 {
@@ -586,6 +618,17 @@
     }
 }
 
+#ifdef NO_SIGNALING_NANS
+int float64_is_quiet_nan(float64 a_)
+{
+    return float64_is_any_nan(a_);
+}
+
+int float64_is_signaling_nan(float64 a_)
+{
+    return 0;
+}
+#else
 /*----------------------------------------------------------------------------
 | Returns 1 if the double-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
@@ -619,6 +662,7 @@
         && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
 #endif
 }
+#endif
 
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN if the double-precision floating point value `a' is a
@@ -773,6 +817,17 @@
     }
 }
 
+#ifdef NO_SIGNALING_NANS
+int floatx80_is_quiet_nan(floatx80 a_)
+{
+    return floatx80_is_any_nan(a_);
+}
+
+int floatx80_is_signaling_nan(floatx80 a_)
+{
+    return 0;
+}
+#else
 /*----------------------------------------------------------------------------
 | Returns 1 if the extended double-precision floating-point value `a' is a
 | quiet NaN; otherwise returns 0. This slightly differs from the same
@@ -816,6 +871,7 @@
         && ( a.low == aLow );
 #endif
 }
+#endif
 
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN if the extended double-precision floating point value
@@ -929,6 +985,17 @@
     }
 }
 
+#ifdef NO_SIGNALING_NANS
+int float128_is_quiet_nan(float128 a_)
+{
+    return float128_is_any_nan(a_);
+}
+
+int float128_is_signaling_nan(float128 a_)
+{
+    return 0;
+}
+#else
 /*----------------------------------------------------------------------------
 | Returns 1 if the quadruple-precision floating-point value `a' is a quiet
 | NaN; otherwise returns 0.
@@ -964,6 +1031,7 @@
         && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
 #endif
 }
+#endif
 
 /*----------------------------------------------------------------------------
 | Returns a quiet NaN if the quadruple-precision floating point value `a' is
diff --git a/fpu/softfloat.c b/fpu/softfloat.c
index b29256a..8413146 100644
--- a/fpu/softfloat.c
+++ b/fpu/softfloat.c
@@ -1238,7 +1238,7 @@
     if ( a == 0 ) return float32_zero;
     shiftCount = countLeadingZeros64( a ) - 40;
     if ( 0 <= shiftCount ) {
-        return packFloat32( 1 > 0, 0x95 - shiftCount, a<<shiftCount );
+        return packFloat32(0, 0x95 - shiftCount, a<<shiftCount);
     }
     else {
         shiftCount += 7;
@@ -1248,7 +1248,7 @@
         else {
             a <<= shiftCount;
         }
-        return roundAndPackFloat32( 1 > 0, 0x9C - shiftCount, a STATUS_VAR );
+        return roundAndPackFloat32(0, 0x9C - shiftCount, a STATUS_VAR);
     }
 }
 
@@ -3007,7 +3007,7 @@
         if (aSig) {
             return commonNaNToFloat32(float16ToCommonNaN(a STATUS_VAR) STATUS_VAR);
         }
-        return packFloat32(aSign, 0xff, aSig << 13);
+        return packFloat32(aSign, 0xff, 0);
     }
     if (aExp == 0) {
         int8 shiftCount;
diff --git a/fpu/softfloat.h b/fpu/softfloat.h
index feec3a1..d8999b3 100644
--- a/fpu/softfloat.h
+++ b/fpu/softfloat.h
@@ -219,7 +219,7 @@
 enum {
     float_muladd_negate_c = 1,
     float_muladd_negate_product = 2,
-    float_muladd_negate_result = 3,
+    float_muladd_negate_result = 4,
 };
 
 /*----------------------------------------------------------------------------
@@ -251,6 +251,11 @@
 int float16_is_signaling_nan( float16 );
 float16 float16_maybe_silence_nan( float16 );
 
+INLINE int float16_is_any_nan(float16 a)
+{
+    return ((float16_val(a) & ~0x8000) > 0x7c00);
+}
+
 /*----------------------------------------------------------------------------
 | The pattern for a default generated half-precision NaN.
 *----------------------------------------------------------------------------*/
diff --git a/gdbstub.c b/gdbstub.c
index 5d37dd9..d02ec75 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1226,33 +1226,48 @@
 
 static int cpu_gdb_read_register(CPUSH4State *env, uint8_t *mem_buf, int n)
 {
-    if (n < 8) {
+    switch (n) {
+    case 0 ... 7:
         if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) {
             GET_REGL(env->gregs[n + 16]);
         } else {
             GET_REGL(env->gregs[n]);
         }
-    } else if (n < 16) {
+    case 8 ... 15:
         GET_REGL(env->gregs[n]);
-    } else if (n >= 25 && n < 41) {
-	GET_REGL(env->fregs[(n - 25) + ((env->fpscr & FPSCR_FR) ? 16 : 0)]);
-    } else if (n >= 43 && n < 51) {
-	GET_REGL(env->gregs[n - 43]);
-    } else if (n >= 51 && n < 59) {
-	GET_REGL(env->gregs[n - (51 - 16)]);
-    }
-    switch (n) {
-    case 16: GET_REGL(env->pc);
-    case 17: GET_REGL(env->pr);
-    case 18: GET_REGL(env->gbr);
-    case 19: GET_REGL(env->vbr);
-    case 20: GET_REGL(env->mach);
-    case 21: GET_REGL(env->macl);
-    case 22: GET_REGL(env->sr);
-    case 23: GET_REGL(env->fpul);
-    case 24: GET_REGL(env->fpscr);
-    case 41: GET_REGL(env->ssr);
-    case 42: GET_REGL(env->spc);
+    case 16:
+        GET_REGL(env->pc);
+    case 17:
+        GET_REGL(env->pr);
+    case 18:
+        GET_REGL(env->gbr);
+    case 19:
+        GET_REGL(env->vbr);
+    case 20:
+        GET_REGL(env->mach);
+    case 21:
+        GET_REGL(env->macl);
+    case 22:
+        GET_REGL(env->sr);
+    case 23:
+        GET_REGL(env->fpul);
+    case 24:
+        GET_REGL(env->fpscr);
+    case 25 ... 40:
+        if (env->fpscr & FPSCR_FR) {
+            stfl_p(mem_buf, env->fregs[n - 9]);
+        } else {
+            stfl_p(mem_buf, env->fregs[n - 25]);
+        }
+        return 4;
+    case 41:
+        GET_REGL(env->ssr);
+    case 42:
+        GET_REGL(env->spc);
+    case 43 ... 50:
+        GET_REGL(env->gregs[n - 43]);
+    case 51 ... 58:
+        GET_REGL(env->gregs[n - (51 - 16)]);
     }
 
     return 0;
@@ -1260,42 +1275,63 @@
 
 static int cpu_gdb_write_register(CPUSH4State *env, uint8_t *mem_buf, int n)
 {
-    uint32_t tmp;
-
-    tmp = ldl_p(mem_buf);
-
-    if (n < 8) {
-        if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) {
-            env->gregs[n + 16] = tmp;
-        } else {
-            env->gregs[n] = tmp;
-        }
-	return 4;
-    } else if (n < 16) {
-        env->gregs[n] = tmp;
-	return 4;
-    } else if (n >= 25 && n < 41) {
-	env->fregs[(n - 25) + ((env->fpscr & FPSCR_FR) ? 16 : 0)] = tmp;
-	return 4;
-    } else if (n >= 43 && n < 51) {
-	env->gregs[n - 43] = tmp;
-	return 4;
-    } else if (n >= 51 && n < 59) {
-	env->gregs[n - (51 - 16)] = tmp;
-	return 4;
-    }
     switch (n) {
-    case 16: env->pc = tmp; break;
-    case 17: env->pr = tmp; break;
-    case 18: env->gbr = tmp; break;
-    case 19: env->vbr = tmp; break;
-    case 20: env->mach = tmp; break;
-    case 21: env->macl = tmp; break;
-    case 22: env->sr = tmp; break;
-    case 23: env->fpul = tmp; break;
-    case 24: env->fpscr = tmp; break;
-    case 41: env->ssr = tmp; break;
-    case 42: env->spc = tmp; break;
+    case 0 ... 7:
+        if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) {
+            env->gregs[n + 16] = ldl_p(mem_buf);
+        } else {
+            env->gregs[n] = ldl_p(mem_buf);
+        }
+        break;
+    case 8 ... 15:
+        env->gregs[n] = ldl_p(mem_buf);
+        break;
+    case 16:
+        env->pc = ldl_p(mem_buf);
+        break;
+    case 17:
+        env->pr = ldl_p(mem_buf);
+        break;
+    case 18:
+        env->gbr = ldl_p(mem_buf);
+        break;
+    case 19:
+        env->vbr = ldl_p(mem_buf);
+        break;
+    case 20:
+        env->mach = ldl_p(mem_buf);
+        break;
+    case 21:
+        env->macl = ldl_p(mem_buf);
+        break;
+    case 22:
+        env->sr = ldl_p(mem_buf);
+        break;
+    case 23:
+        env->fpul = ldl_p(mem_buf);
+        break;
+    case 24:
+        env->fpscr = ldl_p(mem_buf);
+        break;
+    case 25 ... 40:
+        if (env->fpscr & FPSCR_FR) {
+            env->fregs[n - 9] = ldfl_p(mem_buf);
+        } else {
+            env->fregs[n - 25] = ldfl_p(mem_buf);
+        }
+        break;
+    case 41:
+        env->ssr = ldl_p(mem_buf);
+        break;
+    case 42:
+        env->spc = ldl_p(mem_buf);
+        break;
+    case 43 ... 50:
+        env->gregs[n - 43] = ldl_p(mem_buf);
+        break;
+    case 51 ... 58:
+        env->gregs[n - (51 - 16)] = ldl_p(mem_buf);
+        break;
     default: return 0;
     }
 
@@ -1660,6 +1696,10 @@
         GET_REG32(env->uregs[reg->targno & 0xff]);
         break;
 
+    case 4: /*f*/
+        GET_REG32(float32_val(env->fregs[reg->targno & 0x0f]));
+        break;
+
     case 8: /*a*/
         GET_REG32(env->regs[reg->targno & 0x0f]);
         break;
@@ -1700,6 +1740,10 @@
         env->uregs[reg->targno & 0xff] = tmp;
         break;
 
+    case 4: /*f*/
+        env->fregs[reg->targno & 0x0f] = make_float32(tmp);
+        break;
+
     case 8: /*a*/
         env->regs[reg->targno & 0x0f] = tmp;
         break;
diff --git a/hmp-commands.hx b/hmp-commands.hx
index 20806b7..698e114 100644
--- a/hmp-commands.hx
+++ b/hmp-commands.hx
@@ -99,9 +99,10 @@
 
     {
         .name       = "block_job_cancel",
-        .args_type  = "device:B",
-        .params     = "device",
-        .help       = "stop an active background block operation",
+        .args_type  = "force:-f,device:B",
+        .params     = "[-f] device",
+        .help       = "stop an active background block operation (use -f"
+                      "\n\t\t\t if the operation is currently paused)",
         .mhandler.cmd = hmp_block_job_cancel,
     },
 
@@ -112,6 +113,34 @@
 ETEXI
 
     {
+        .name       = "block_job_pause",
+        .args_type  = "device:B",
+        .params     = "device",
+        .help       = "pause an active background block operation",
+        .mhandler.cmd = hmp_block_job_pause,
+    },
+
+STEXI
+@item block_job_pause
+@findex block_job_pause
+Pause an active block streaming operation.
+ETEXI
+
+    {
+        .name       = "block_job_resume",
+        .args_type  = "device:B",
+        .params     = "device",
+        .help       = "resume a paused background block operation",
+        .mhandler.cmd = hmp_block_job_resume,
+    },
+
+STEXI
+@item block_job_resume
+@findex block_job_resume
+Resume a paused block streaming operation.
+ETEXI
+
+    {
         .name       = "eject",
         .args_type  = "force:-f,device:B",
         .params     = "[-f] device",
@@ -914,12 +943,11 @@
 #if defined(CONFIG_HAVE_CORE_DUMP)
     {
         .name       = "dump-guest-memory",
-        .args_type  = "paging:-p,protocol:s,begin:i?,length:i?",
-        .params     = "[-p] protocol [begin] [length]",
+        .args_type  = "paging:-p,filename:F,begin:i?,length:i?",
+        .params     = "[-p] filename [begin] [length]",
         .help       = "dump guest memory to file"
                       "\n\t\t\t begin(optional): the starting physical address"
                       "\n\t\t\t length(optional): the memory size, in bytes",
-        .user_print = monitor_user_noop,
         .mhandler.cmd = hmp_dump_guest_memory,
     },
 
@@ -929,8 +957,7 @@
 @findex dump-guest-memory
 Dump guest memory to @var{protocol}. The file can be processed with crash or
 gdb.
-  protocol: destination file(started with "file:") or destination file
-            descriptor (started with "fd:")
+  filename: dump file name
     paging: do paging to get guest's memory mapping
      begin: the starting physical address. It's optional, and should be
             specified with length together.
diff --git a/hmp.c b/hmp.c
index ba6fbd3..70bdec2 100644
--- a/hmp.c
+++ b/hmp.c
@@ -930,7 +930,8 @@
     int64_t speed = qdict_get_try_int(qdict, "speed", 0);
 
     qmp_block_stream(device, base != NULL, base,
-                     qdict_haskey(qdict, "speed"), speed, &error);
+                     qdict_haskey(qdict, "speed"), speed,
+                     BLOCKDEV_ON_ERROR_REPORT, true, &error);
 
     hmp_handle_error(mon, &error);
 }
@@ -950,8 +951,29 @@
 {
     Error *error = NULL;
     const char *device = qdict_get_str(qdict, "device");
+    bool force = qdict_get_try_bool(qdict, "force", 0);
 
-    qmp_block_job_cancel(device, &error);
+    qmp_block_job_cancel(device, true, force, &error);
+
+    hmp_handle_error(mon, &error);
+}
+
+void hmp_block_job_pause(Monitor *mon, const QDict *qdict)
+{
+    Error *error = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+
+    qmp_block_job_pause(device, &error);
+
+    hmp_handle_error(mon, &error);
+}
+
+void hmp_block_job_resume(Monitor *mon, const QDict *qdict)
+{
+    Error *error = NULL;
+    const char *device = qdict_get_str(qdict, "device");
+
+    qmp_block_job_resume(device, &error);
 
     hmp_handle_error(mon, &error);
 }
@@ -1042,11 +1064,12 @@
 {
     Error *errp = NULL;
     int paging = qdict_get_try_bool(qdict, "paging", 0);
-    const char *file = qdict_get_str(qdict, "protocol");
+    const char *file = qdict_get_str(qdict, "filename");
     bool has_begin = qdict_haskey(qdict, "begin");
     bool has_length = qdict_haskey(qdict, "length");
     int64_t begin = 0;
     int64_t length = 0;
+    char *prot;
 
     if (has_begin) {
         begin = qdict_get_int(qdict, "begin");
@@ -1055,9 +1078,12 @@
         length = qdict_get_int(qdict, "length");
     }
 
-    qmp_dump_guest_memory(paging, file, has_begin, begin, has_length, length,
+    prot = g_strconcat("file:", file, NULL);
+
+    qmp_dump_guest_memory(paging, prot, has_begin, begin, has_length, length,
                           &errp);
     hmp_handle_error(mon, &errp);
+    g_free(prot);
 }
 
 void hmp_netdev_add(Monitor *mon, const QDict *qdict)
@@ -1109,13 +1135,13 @@
 void hmp_send_key(Monitor *mon, const QDict *qdict)
 {
     const char *keys = qdict_get_str(qdict, "keys");
-    QKeyCodeList *keylist, *head = NULL, *tmp = NULL;
+    KeyValueList *keylist, *head = NULL, *tmp = NULL;
     int has_hold_time = qdict_haskey(qdict, "hold-time");
     int hold_time = qdict_get_try_int(qdict, "hold-time", -1);
     Error *err = NULL;
     char keyname_buf[16];
     char *separator;
-    int keyname_len, idx;
+    int keyname_len;
 
     while (1) {
         separator = strchr(keys, '-');
@@ -1129,15 +1155,8 @@
         }
         keyname_buf[keyname_len] = 0;
 
-        idx = index_from_key(keyname_buf);
-        if (idx == Q_KEY_CODE_MAX) {
-            monitor_printf(mon, "invalid parameter: %s\n", keyname_buf);
-            break;
-        }
-
         keylist = g_malloc0(sizeof(*keylist));
-        keylist->value = idx;
-        keylist->next = NULL;
+        keylist->value = g_malloc0(sizeof(*keylist->value));
 
         if (!head) {
             head = keylist;
@@ -1147,17 +1166,39 @@
         }
         tmp = keylist;
 
+        if (strstart(keyname_buf, "0x", NULL)) {
+            char *endp;
+            int value = strtoul(keyname_buf, &endp, 0);
+            if (*endp != '\0') {
+                goto err_out;
+            }
+            keylist->value->kind = KEY_VALUE_KIND_NUMBER;
+            keylist->value->number = value;
+        } else {
+            int idx = index_from_key(keyname_buf);
+            if (idx == Q_KEY_CODE_MAX) {
+                goto err_out;
+            }
+            keylist->value->kind = KEY_VALUE_KIND_QCODE;
+            keylist->value->qcode = idx;
+        }
+
         if (!separator) {
             break;
         }
         keys = separator + 1;
     }
 
-    if (idx != Q_KEY_CODE_MAX) {
-        qmp_send_key(head, has_hold_time, hold_time, &err);
-    }
+    qmp_send_key(head, has_hold_time, hold_time, &err);
     hmp_handle_error(mon, &err);
-    qapi_free_QKeyCodeList(head);
+
+out:
+    qapi_free_KeyValueList(head);
+    return;
+
+err_out:
+    monitor_printf(mon, "invalid parameter: %s\n", keyname_buf);
+    goto out;
 }
 
 void hmp_screen_dump(Monitor *mon, const QDict *qdict)
diff --git a/hmp.h b/hmp.h
index 48b9c59..71ea384 100644
--- a/hmp.h
+++ b/hmp.h
@@ -64,6 +64,8 @@
 void hmp_block_stream(Monitor *mon, const QDict *qdict);
 void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict);
 void hmp_block_job_cancel(Monitor *mon, const QDict *qdict);
+void hmp_block_job_pause(Monitor *mon, const QDict *qdict);
+void hmp_block_job_resume(Monitor *mon, const QDict *qdict);
 void hmp_migrate(Monitor *mon, const QDict *qdict);
 void hmp_device_del(Monitor *mon, const QDict *qdict);
 void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict);
diff --git a/hw/9pfs/Makefile.objs b/hw/9pfs/Makefile.objs
index 972df24..1e9b595 100644
--- a/hw/9pfs/Makefile.objs
+++ b/hw/9pfs/Makefile.objs
@@ -1,9 +1,9 @@
-hw-obj-y  = virtio-9p.o
-hw-obj-y += virtio-9p-local.o virtio-9p-xattr.o
-hw-obj-y += virtio-9p-xattr-user.o virtio-9p-posix-acl.o
-hw-obj-y += virtio-9p-coth.o cofs.o codir.o cofile.o
-hw-obj-y += coxattr.o virtio-9p-synth.o
-hw-obj-$(CONFIG_OPEN_BY_HANDLE) +=  virtio-9p-handle.o
-hw-obj-y += virtio-9p-proxy.o
+common-obj-y  = virtio-9p.o
+common-obj-y += virtio-9p-local.o virtio-9p-xattr.o
+common-obj-y += virtio-9p-xattr-user.o virtio-9p-posix-acl.o
+common-obj-y += virtio-9p-coth.o cofs.o codir.o cofile.o
+common-obj-y += coxattr.o virtio-9p-synth.o
+common-obj-$(CONFIG_OPEN_BY_HANDLE) +=  virtio-9p-handle.o
+common-obj-y += virtio-9p-proxy.o
 
 obj-y += virtio-9p-device.o
diff --git a/hw/9pfs/virtio-9p-posix-acl.c b/hw/9pfs/virtio-9p-posix-acl.c
index a1948e3..c064017 100644
--- a/hw/9pfs/virtio-9p-posix-acl.c
+++ b/hw/9pfs/virtio-9p-posix-acl.c
@@ -44,7 +44,8 @@
         return -1;
     }
 
-    strncpy(value, ACL_ACCESS, len);
+    /* len includes the trailing NUL */
+    memcpy(value, ACL_ACCESS, len);
     return 0;
 }
 
@@ -95,7 +96,8 @@
         return -1;
     }
 
-    strncpy(value, ACL_DEFAULT, len);
+    /* len includes the trailing NUL */
+    memcpy(value, ACL_ACCESS, len);
     return 0;
 }
 
diff --git a/hw/9pfs/virtio-9p-synth.c b/hw/9pfs/virtio-9p-synth.c
index 92e0b09..e95a856 100644
--- a/hw/9pfs/virtio-9p-synth.c
+++ b/hw/9pfs/virtio-9p-synth.c
@@ -58,7 +58,7 @@
         node->attr->read  = NULL;
     }
     node->private = node;
-    strncpy(node->name, name, sizeof(node->name));
+    pstrcpy(node->name, sizeof(node->name), name);
     QLIST_INSERT_HEAD_RCU(&parent->child, node, sibling);
     return node;
 }
@@ -132,7 +132,7 @@
     node->attr->write  = write;
     node->attr->mode   = mode;
     node->private      = arg;
-    strncpy(node->name, name, sizeof(node->name));
+    pstrcpy(node->name, sizeof(node->name), name);
     QLIST_INSERT_HEAD_RCU(&parent->child, node, sibling);
     ret = 0;
 err_out:
diff --git a/hw/9pfs/virtio-9p-xattr-user.c b/hw/9pfs/virtio-9p-xattr-user.c
index 5044a3e..5bb6020 100644
--- a/hw/9pfs/virtio-9p-xattr-user.c
+++ b/hw/9pfs/virtio-9p-xattr-user.c
@@ -61,7 +61,8 @@
         return -1;
     }
 
-    strncpy(value, name, name_size);
+    /* name_size includes the trailing NUL. */
+    memcpy(value, name, name_size);
     return name_size;
 }
 
diff --git a/hw/9pfs/virtio-9p-xattr.c b/hw/9pfs/virtio-9p-xattr.c
index 7f08f6e..a839606 100644
--- a/hw/9pfs/virtio-9p-xattr.c
+++ b/hw/9pfs/virtio-9p-xattr.c
@@ -53,7 +53,8 @@
         return -1;
     }
 
-    strncpy(value, name, name_size);
+    /* no need for strncpy: name_size is strlen(name)+1 */
+    memcpy(value, name, name_size);
     return name_size;
 }
 
diff --git a/hw/9pfs/virtio-9p.c b/hw/9pfs/virtio-9p.c
index 4b52540..8b9cdc9 100644
--- a/hw/9pfs/virtio-9p.c
+++ b/hw/9pfs/virtio-9p.c
@@ -505,7 +505,6 @@
         error_report("9pfs:%s: One or more uncluncked fids "
                      "found during reset", __func__);
     }
-    return;
 }
 
 #define P9_QID_TYPE_DIR         0x80
@@ -934,7 +933,6 @@
 out:
     complete_pdu(s, pdu, offset);
     v9fs_string_free(&version);
-    return;
 }
 
 static void v9fs_attach(void *opaque)
@@ -1314,7 +1312,6 @@
         g_free(wnames);
         g_free(qids);
     }
-    return;
 }
 
 static int32_t get_iounit(V9fsPDU *pdu, V9fsPath *path)
@@ -2257,7 +2254,6 @@
         free_pdu(pdu->s, cancel_pdu);
     }
     complete_pdu(s, pdu, 7);
-    return;
 }
 
 static void v9fs_link(void *opaque)
@@ -2763,7 +2759,6 @@
     put_fid(pdu, fidp);
 out_nofid:
     complete_pdu(s, pdu, retval);
-    return;
 }
 
 static void v9fs_mknod(void *opaque)
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index 850b87b..66ff868 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -1,141 +1,142 @@
-hw-obj-y = usb/ ide/
-hw-obj-y += loader.o
-hw-obj-$(CONFIG_VIRTIO) += virtio-console.o
-hw-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
-hw-obj-y += fw_cfg.o
-hw-obj-$(CONFIG_PCI) += pci.o pci_bridge.o pci_bridge_dev.o
-hw-obj-$(CONFIG_PCI) += msix.o msi.o
-hw-obj-$(CONFIG_PCI) += shpc.o
-hw-obj-$(CONFIG_PCI) += slotid_cap.o
-hw-obj-$(CONFIG_PCI) += pci_host.o pcie_host.o
-hw-obj-$(CONFIG_PCI) += ioh3420.o xio3130_upstream.o xio3130_downstream.o
-hw-obj-y += watchdog.o
-hw-obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
-hw-obj-$(CONFIG_ECC) += ecc.o
-hw-obj-$(CONFIG_NAND) += nand.o
-hw-obj-$(CONFIG_PFLASH_CFI01) += pflash_cfi01.o
-hw-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
+common-obj-y = usb/ ide/
+common-obj-y += loader.o
+common-obj-$(CONFIG_VIRTIO) += virtio-console.o
+common-obj-$(CONFIG_VIRTIO_PCI) += virtio-pci.o
+common-obj-y += fw_cfg.o
+common-obj-$(CONFIG_PCI) += pci.o pci_bridge.o pci_bridge_dev.o
+common-obj-$(CONFIG_PCI) += msix.o msi.o
+common-obj-$(CONFIG_PCI) += shpc.o
+common-obj-$(CONFIG_PCI) += slotid_cap.o
+common-obj-$(CONFIG_PCI) += pci_host.o pcie_host.o
+common-obj-$(CONFIG_PCI) += ioh3420.o xio3130_upstream.o xio3130_downstream.o
+common-obj-y += watchdog.o
+common-obj-$(CONFIG_ISA_MMIO) += isa_mmio.o
+common-obj-$(CONFIG_ECC) += ecc.o
+common-obj-$(CONFIG_NAND) += nand.o
+common-obj-$(CONFIG_PFLASH_CFI01) += pflash_cfi01.o
+common-obj-$(CONFIG_PFLASH_CFI02) += pflash_cfi02.o
 
-hw-obj-$(CONFIG_M48T59) += m48t59.o
-hw-obj-$(CONFIG_ESCC) += escc.o
-hw-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o
+common-obj-$(CONFIG_M48T59) += m48t59.o
+common-obj-$(CONFIG_ESCC) += escc.o
+common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o
 
-hw-obj-$(CONFIG_SERIAL) += serial.o
-hw-obj-$(CONFIG_PARALLEL) += parallel.o
-hw-obj-$(CONFIG_I8254) += i8254_common.o i8254.o
-hw-obj-$(CONFIG_PCSPK) += pcspk.o
-hw-obj-$(CONFIG_PCKBD) += pckbd.o
-hw-obj-$(CONFIG_FDC) += fdc.o
-# needs fixes for cpu hotplug, so moved to Makefile.target:
-# hw-obj-$(CONFIG_ACPI) += acpi.o acpi_piix4.o
-hw-obj-$(CONFIG_APM) += pm_smbus.o apm.o
-hw-obj-$(CONFIG_DMA) += dma.o
-hw-obj-$(CONFIG_I82374) += i82374.o
-hw-obj-$(CONFIG_HPET) += hpet.o
-hw-obj-$(CONFIG_APPLESMC) += applesmc.o
-hw-obj-$(CONFIG_SMARTCARD) += ccid-card-passthru.o
-hw-obj-$(CONFIG_SMARTCARD_NSS) += ccid-card-emulated.o
-hw-obj-$(CONFIG_I8259) += i8259_common.o i8259.o
+common-obj-$(CONFIG_SERIAL) += serial.o
+common-obj-$(CONFIG_PARALLEL) += parallel.o
+common-obj-$(CONFIG_I8254) += i8254_common.o i8254.o
+common-obj-$(CONFIG_PCSPK) += pcspk.o
+common-obj-$(CONFIG_PCKBD) += pckbd.o
+common-obj-$(CONFIG_FDC) += fdc.o
+# qemu-kvm: acpi_piix4.c uses TARGET_I386 define
+#common-obj-$(CONFIG_ACPI) += acpi.o acpi_piix4.o
+common-obj-$(CONFIG_APM) += pm_smbus.o apm.o
+common-obj-$(CONFIG_DMA) += dma.o
+common-obj-$(CONFIG_I82374) += i82374.o
+common-obj-$(CONFIG_HPET) += hpet.o
+common-obj-$(CONFIG_APPLESMC) += applesmc.o
+common-obj-$(CONFIG_SMARTCARD) += ccid-card-passthru.o
+common-obj-$(CONFIG_SMARTCARD_NSS) += ccid-card-emulated.o
+common-obj-$(CONFIG_I8259) += i8259_common.o i8259.o
+common-obj-y += fifo.o
 
 # PPC devices
-hw-obj-$(CONFIG_PREP_PCI) += prep_pci.o
-hw-obj-$(CONFIG_I82378) += i82378.o
+common-obj-$(CONFIG_PREP_PCI) += prep_pci.o
+common-obj-$(CONFIG_I82378) += i82378.o
 # Mac shared devices
-hw-obj-$(CONFIG_MACIO) += macio.o
-hw-obj-$(CONFIG_CUDA) += cuda.o
-hw-obj-$(CONFIG_ADB) += adb.o
-hw-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o
-hw-obj-$(CONFIG_MAC_DBDMA) += mac_dbdma.o
+common-obj-$(CONFIG_MACIO) += macio.o
+common-obj-$(CONFIG_CUDA) += cuda.o
+common-obj-$(CONFIG_ADB) += adb.o
+common-obj-$(CONFIG_MAC_NVRAM) += mac_nvram.o
+common-obj-$(CONFIG_MAC_DBDMA) += mac_dbdma.o
 # OldWorld PowerMac
-hw-obj-$(CONFIG_HEATHROW_PIC) += heathrow_pic.o
-hw-obj-$(CONFIG_GRACKLE_PCI) += grackle_pci.o
+common-obj-$(CONFIG_HEATHROW_PIC) += heathrow_pic.o
+common-obj-$(CONFIG_GRACKLE_PCI) += grackle_pci.o
 # NewWorld PowerMac
-hw-obj-$(CONFIG_UNIN_PCI) += unin_pci.o
-hw-obj-$(CONFIG_DEC_PCI) += dec_pci.o
+common-obj-$(CONFIG_UNIN_PCI) += unin_pci.o
+common-obj-$(CONFIG_DEC_PCI) += dec_pci.o
 # PowerPC E500 boards
-hw-obj-$(CONFIG_PPCE500_PCI) += ppce500_pci.o
+common-obj-$(CONFIG_PPCE500_PCI) += ppce500_pci.o
 
 # MIPS devices
-hw-obj-$(CONFIG_PIIX4) += piix4.o
-hw-obj-$(CONFIG_G364FB) += g364fb.o
-hw-obj-$(CONFIG_JAZZ_LED) += jazz_led.o
+common-obj-$(CONFIG_PIIX4) += piix4.o
+common-obj-$(CONFIG_G364FB) += g364fb.o
+common-obj-$(CONFIG_JAZZ_LED) += jazz_led.o
 
 # Xilinx devices
-hw-obj-$(CONFIG_XILINX) += xilinx_intc.o
-hw-obj-$(CONFIG_XILINX) += xilinx_timer.o
-hw-obj-$(CONFIG_XILINX) += xilinx_uartlite.o
-hw-obj-$(CONFIG_XILINX_AXI) += xilinx_axidma.o
-hw-obj-$(CONFIG_XILINX_AXI) += xilinx_axienet.o
-hw-obj-$(CONFIG_XILINX_AXI) += stream.o
+common-obj-$(CONFIG_XILINX) += xilinx_intc.o
+common-obj-$(CONFIG_XILINX) += xilinx_timer.o
+common-obj-$(CONFIG_XILINX) += xilinx_uartlite.o
+common-obj-$(CONFIG_XILINX_AXI) += xilinx_axidma.o
+common-obj-$(CONFIG_XILINX_AXI) += xilinx_axienet.o
+common-obj-$(CONFIG_XILINX_AXI) += stream.o
 
 # PKUnity SoC devices
-hw-obj-$(CONFIG_PUV3) += puv3_intc.o
-hw-obj-$(CONFIG_PUV3) += puv3_ost.o
-hw-obj-$(CONFIG_PUV3) += puv3_gpio.o
-hw-obj-$(CONFIG_PUV3) += puv3_pm.o
-hw-obj-$(CONFIG_PUV3) += puv3_dma.o
+common-obj-$(CONFIG_PUV3) += puv3_intc.o
+common-obj-$(CONFIG_PUV3) += puv3_ost.o
+common-obj-$(CONFIG_PUV3) += puv3_gpio.o
+common-obj-$(CONFIG_PUV3) += puv3_pm.o
+common-obj-$(CONFIG_PUV3) += puv3_dma.o
 
 # ARM devices
-hw-obj-$(CONFIG_ARM_TIMER) += arm_timer.o
-hw-obj-$(CONFIG_PL011) += pl011.o
-hw-obj-$(CONFIG_PL022) += pl022.o
-hw-obj-$(CONFIG_PL031) += pl031.o
-hw-obj-$(CONFIG_PL041) += pl041.o lm4549.o
-hw-obj-$(CONFIG_PL050) += pl050.o
-hw-obj-$(CONFIG_PL061) += pl061.o
-hw-obj-$(CONFIG_PL080) += pl080.o
-hw-obj-$(CONFIG_PL110) += pl110.o
-hw-obj-$(CONFIG_PL181) += pl181.o
-hw-obj-$(CONFIG_PL190) += pl190.o
-hw-obj-$(CONFIG_PL310) += arm_l2x0.o
-hw-obj-$(CONFIG_VERSATILE_PCI) += versatile_pci.o
-hw-obj-$(CONFIG_VERSATILE_I2C) += versatile_i2c.o
-hw-obj-$(CONFIG_CADENCE) += cadence_uart.o
-hw-obj-$(CONFIG_CADENCE) += cadence_ttc.o
-hw-obj-$(CONFIG_CADENCE) += cadence_gem.o
-hw-obj-$(CONFIG_XGMAC) += xgmac.o
+common-obj-$(CONFIG_ARM_TIMER) += arm_timer.o
+common-obj-$(CONFIG_PL011) += pl011.o
+common-obj-$(CONFIG_PL022) += pl022.o
+common-obj-$(CONFIG_PL031) += pl031.o
+common-obj-$(CONFIG_PL041) += pl041.o lm4549.o
+common-obj-$(CONFIG_PL050) += pl050.o
+common-obj-$(CONFIG_PL061) += pl061.o
+common-obj-$(CONFIG_PL080) += pl080.o
+common-obj-$(CONFIG_PL110) += pl110.o
+common-obj-$(CONFIG_PL181) += pl181.o
+common-obj-$(CONFIG_PL190) += pl190.o
+common-obj-$(CONFIG_PL310) += arm_l2x0.o
+common-obj-$(CONFIG_VERSATILE_PCI) += versatile_pci.o
+common-obj-$(CONFIG_VERSATILE_I2C) += versatile_i2c.o
+common-obj-$(CONFIG_CADENCE) += cadence_uart.o
+common-obj-$(CONFIG_CADENCE) += cadence_ttc.o
+common-obj-$(CONFIG_CADENCE) += cadence_gem.o
+common-obj-$(CONFIG_XGMAC) += xgmac.o
 
 # PCI watchdog devices
-hw-obj-$(CONFIG_PCI) += wdt_i6300esb.o
+common-obj-$(CONFIG_PCI) += wdt_i6300esb.o
 
-hw-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o
+common-obj-$(CONFIG_PCI) += pcie.o pcie_aer.o pcie_port.o
 
 # PCI network cards
-hw-obj-$(CONFIG_NE2000_PCI) += ne2000.o
-hw-obj-$(CONFIG_EEPRO100_PCI) += eepro100.o
-hw-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o
-hw-obj-$(CONFIG_PCNET_COMMON) += pcnet.o
-hw-obj-$(CONFIG_E1000_PCI) += e1000.o
-hw-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o
+common-obj-$(CONFIG_NE2000_PCI) += ne2000.o
+common-obj-$(CONFIG_EEPRO100_PCI) += eepro100.o
+common-obj-$(CONFIG_PCNET_PCI) += pcnet-pci.o
+common-obj-$(CONFIG_PCNET_COMMON) += pcnet.o
+common-obj-$(CONFIG_E1000_PCI) += e1000.o
+common-obj-$(CONFIG_RTL8139_PCI) += rtl8139.o
 
-hw-obj-$(CONFIG_SMC91C111) += smc91c111.o
-hw-obj-$(CONFIG_LAN9118) += lan9118.o
-hw-obj-$(CONFIG_NE2000_ISA) += ne2000-isa.o
-hw-obj-$(CONFIG_OPENCORES_ETH) += opencores_eth.o
+common-obj-$(CONFIG_SMC91C111) += smc91c111.o
+common-obj-$(CONFIG_LAN9118) += lan9118.o
+common-obj-$(CONFIG_NE2000_ISA) += ne2000-isa.o
+common-obj-$(CONFIG_OPENCORES_ETH) += opencores_eth.o
 
 # SCSI layer
-hw-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
-hw-obj-$(CONFIG_MEGASAS_SCSI_PCI) += megasas.o
-hw-obj-$(CONFIG_ESP) += esp.o
-hw-obj-$(CONFIG_ESP_PCI) += esp-pci.o
+common-obj-$(CONFIG_LSI_SCSI_PCI) += lsi53c895a.o
+common-obj-$(CONFIG_MEGASAS_SCSI_PCI) += megasas.o
+common-obj-$(CONFIG_ESP) += esp.o
+common-obj-$(CONFIG_ESP_PCI) += esp-pci.o
 
-hw-obj-y += sysbus.o isa-bus.o
-hw-obj-y += qdev-addr.o
+common-obj-y += sysbus.o isa-bus.o
+common-obj-y += qdev-addr.o
 
 # VGA
-hw-obj-$(CONFIG_VGA_PCI) += vga-pci.o
-hw-obj-$(CONFIG_VGA_ISA) += vga-isa.o
-hw-obj-$(CONFIG_VGA_ISA_MM) += vga-isa-mm.o
-hw-obj-$(CONFIG_VMWARE_VGA) += vmware_vga.o
-hw-obj-$(CONFIG_VMMOUSE) += vmmouse.o
-hw-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o
+common-obj-$(CONFIG_VGA_PCI) += vga-pci.o
+common-obj-$(CONFIG_VGA_ISA) += vga-isa.o
+common-obj-$(CONFIG_VGA_ISA_MM) += vga-isa-mm.o
+common-obj-$(CONFIG_VMWARE_VGA) += vmware_vga.o
+common-obj-$(CONFIG_VMMOUSE) += vmmouse.o
+common-obj-$(CONFIG_VGA_CIRRUS) += cirrus_vga.o
 
-hw-obj-$(CONFIG_RC4030) += rc4030.o
-hw-obj-$(CONFIG_DP8393X) += dp8393x.o
-hw-obj-$(CONFIG_DS1225Y) += ds1225y.o
-hw-obj-$(CONFIG_MIPSNET) += mipsnet.o
+common-obj-$(CONFIG_RC4030) += rc4030.o
+common-obj-$(CONFIG_DP8393X) += dp8393x.o
+common-obj-$(CONFIG_DS1225Y) += ds1225y.o
+common-obj-$(CONFIG_MIPSNET) += mipsnet.o
 
-hw-obj-y += null-machine.o
+common-obj-y += null-machine.o
 
 # Sound
 sound-obj-y =
@@ -149,9 +150,9 @@
 
 $(obj)/adlib.o $(obj)/fmopl.o: QEMU_CFLAGS += -DBUILD_Y8950=0
 
-hw-obj-$(CONFIG_SOUND) += $(sound-obj-y)
+common-obj-$(CONFIG_SOUND) += $(sound-obj-y)
 
-hw-obj-$(CONFIG_REALLY_VIRTFS) += 9pfs/
+common-obj-$(CONFIG_REALLY_VIRTFS) += 9pfs/
 
 common-obj-y += usb/
 common-obj-y += irq.o
@@ -174,6 +175,7 @@
 common-obj-y += scsi-generic.o scsi-bus.o
 common-obj-y += hid.o
 common-obj-$(CONFIG_SSI) += ssi.o
+common-obj-$(CONFIG_SSI_M25P80) += m25p80.o
 common-obj-$(CONFIG_SSI_SD) += ssi-sd.o
 common-obj-$(CONFIG_SD) += sd.o
 common-obj-y += bt.o bt-l2cap.o bt-sdp.o bt-hci.o bt-hid.o
@@ -199,7 +201,8 @@
 obj-$(CONFIG_SOFTMMU) += device-hotplug.o
 obj-$(CONFIG_XEN) += xen_domainbuild.o xen_machine_pv.o
 
-# Inter-VM PCI shared memory
+# Inter-VM PCI shared memory & VFIO PCI device assignment
 ifeq ($(CONFIG_PCI), y)
 obj-$(CONFIG_KVM) += ivshmem.o
+obj-$(CONFIG_LINUX) += vfio_pci.o
 endif
diff --git a/hw/acpi.c b/hw/acpi.c
index f7950be..f4aca49 100644
--- a/hw/acpi.c
+++ b/hw/acpi.c
@@ -61,18 +61,6 @@
     return (-sum) & 0xff;
 }
 
-/* like strncpy() but zero-fills the tail of destination */
-static void strzcpy(char *dst, const char *src, size_t size)
-{
-    size_t len = strlen(src);
-    if (len >= size) {
-        len = size;
-    } else {
-      memset(dst + len, 0, size - len);
-    }
-    memcpy(dst, src, len);
-}
-
 /* XXX fixme: this function uses obsolete argument parsing interface */
 int acpi_table_add(const char *t)
 {
@@ -157,7 +145,8 @@
     hdr._length = cpu_to_le16(len);
 
     if (get_param_value(buf, sizeof(buf), "sig", t)) {
-        strzcpy(hdr.sig, buf, sizeof(hdr.sig));
+        /* strncpy is justified: the field need not be NUL-terminated. */
+        strncpy(hdr.sig, buf, sizeof(hdr.sig));
         ++changed;
     }
 
@@ -187,12 +176,14 @@
     }
 
     if (get_param_value(buf, sizeof(buf), "oem_id", t)) {
-        strzcpy(hdr.oem_id, buf, sizeof(hdr.oem_id));
+        /* strncpy is justified: the field need not be NUL-terminated. */
+        strncpy(hdr.oem_id, buf, sizeof(hdr.oem_id));
         ++changed;
     }
 
     if (get_param_value(buf, sizeof(buf), "oem_table_id", t)) {
-        strzcpy(hdr.oem_table_id, buf, sizeof(hdr.oem_table_id));
+        /* strncpy is justified: the field need not be NUL-terminated. */
+        strncpy(hdr.oem_table_id, buf, sizeof(hdr.oem_table_id));
         ++changed;
     }
 
@@ -207,7 +198,8 @@
     }
 
     if (get_param_value(buf, sizeof(buf), "asl_compiler_id", t)) {
-        strzcpy(hdr.asl_compiler_id, buf, sizeof(hdr.asl_compiler_id));
+        /* strncpy is justified: the field need not be NUL-terminated. */
+        strncpy(hdr.asl_compiler_id, buf, sizeof(hdr.asl_compiler_id));
         ++changed;
     }
 
diff --git a/hw/acpi_piix4.c b/hw/acpi_piix4.c
index dad3887..a48187f 100644
--- a/hw/acpi_piix4.c
+++ b/hw/acpi_piix4.c
@@ -73,6 +73,7 @@
     qemu_irq smi_irq;
     int kvm_enabled;
     Notifier machine_ready;
+    Notifier powerdown_notifier;
 
     /* for pci hotplug */
     struct gpe_regs gpe_cpu;
@@ -368,9 +369,9 @@
     piix4_update_hotplug(s);
 }
 
-static void piix4_powerdown(void *opaque, int irq, int power_failing)
+static void piix4_pm_powerdown_req(Notifier *n, void *opaque)
 {
-    PIIX4PMState *s = opaque;
+    PIIX4PMState *s = container_of(n, PIIX4PMState, powerdown_notifier);
 
     assert(s != NULL);
     acpi_pm1_evt_power_down(&s->ar);
@@ -427,7 +428,8 @@
     acpi_pm_tmr_init(&s->ar, pm_tmr_timer);
     acpi_gpe_init(&s->ar, GPE_LEN);
 
-    qemu_system_powerdown = *qemu_allocate_irqs(piix4_powerdown, s, 1);
+    s->powerdown_notifier.notify = piix4_pm_powerdown_req;
+    qemu_register_powerdown_notifier(&s->powerdown_notifier);
 
     pm_smbus_init(&s->dev.qdev, &s->smb);
     s->machine_ready.notify = piix4_pm_machine_ready;
diff --git a/hw/ads7846.c b/hw/ads7846.c
index 41c7f10..2ea9e55 100644
--- a/hw/ads7846.c
+++ b/hw/ads7846.c
@@ -119,11 +119,12 @@
 
 static const VMStateDescription vmstate_ads7846 = {
     .name = "ads7846",
-    .version_id = 0,
-    .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
     .post_load = ads7856_post_load,
     .fields      = (VMStateField[]) {
+        VMSTATE_SSI_SLAVE(ssidev, ADS7846State),
         VMSTATE_INT32_ARRAY(input, ADS7846State, 8),
         VMSTATE_INT32(noise, ADS7846State),
         VMSTATE_INT32(cycle, ADS7846State),
diff --git a/hw/alpha_dp264.c b/hw/alpha_dp264.c
index 9eb939f..5ea04c7 100644
--- a/hw/alpha_dp264.c
+++ b/hw/alpha_dp264.c
@@ -77,7 +77,7 @@
     isa_create_simple(isa_bus, "i8042");
 
     /* VGA setup.  Don't bother loading the bios.  */
-    alpha_pci_vga_setup(pci_bus);
+    pci_vga_init(pci_bus);
 
     /* Serial code setup.  */
     for (i = 0; i < MAX_SERIAL_PORTS; ++i) {
diff --git a/hw/alpha_pci.c b/hw/alpha_pci.c
index ea546f8..8079a46 100644
--- a/hw/alpha_pci.c
+++ b/hw/alpha_pci.c
@@ -10,8 +10,6 @@
 #include "alpha_sys.h"
 #include "qemu-log.h"
 #include "sysemu.h"
-#include "vmware_vga.h"
-#include "vga-pci.h"
 
 
 /* PCI IO reads/writes, to byte-word addressable memory.  */
@@ -109,25 +107,3 @@
         .max_access_size = 4,
     },
 };
-
-void alpha_pci_vga_setup(PCIBus *pci_bus)
-{
-    switch (vga_interface_type) {
-#ifdef CONFIG_SPICE
-    case VGA_QXL:
-        pci_create_simple(pci_bus, -1, "qxl-vga");
-        return;
-#endif
-    case VGA_CIRRUS:
-        pci_cirrus_vga_init(pci_bus);
-        return;
-    case VGA_VMWARE:
-        pci_vmsvga_init(pci_bus);
-        return;
-    }
-    /* If VGA is enabled at all, and one of the above didn't work, then
-       fallback to Standard VGA.  */
-    if (vga_interface_type != VGA_NONE) {
-        pci_vga_init(pci_bus);
-    }
-}
diff --git a/hw/alpha_sys.h b/hw/alpha_sys.h
index de40f8b..7604d09 100644
--- a/hw/alpha_sys.h
+++ b/hw/alpha_sys.h
@@ -19,6 +19,4 @@
 extern const MemoryRegionOps alpha_pci_conf1_ops;
 extern const MemoryRegionOps alpha_pci_iack_ops;
 
-void alpha_pci_vga_setup(PCIBus *pci_bus);
-
 #endif
diff --git a/hw/arm/Makefile.objs b/hw/arm/Makefile.objs
index 2b39fb3..6d049e7 100644
--- a/hw/arm/Makefile.objs
+++ b/hw/arm/Makefile.objs
@@ -1,6 +1,7 @@
 obj-y = integratorcp.o versatilepb.o arm_pic.o
 obj-y += arm_boot.o
 obj-y += xilinx_zynq.o zynq_slcr.o
+obj-y += xilinx_spips.o
 obj-y += arm_gic.o arm_gic_common.o
 obj-y += realview_gic.o realview.o arm_sysctl.o arm11mpcore.o a9mpcore.o
 obj-y += exynos4210_gic.o exynos4210_combiner.o exynos4210.o
diff --git a/hw/armv7m_nvic.c b/hw/armv7m_nvic.c
index 6a0832e..5c09116 100644
--- a/hw/armv7m_nvic.c
+++ b/hw/armv7m_nvic.c
@@ -489,7 +489,8 @@
      */
     memory_region_init_alias(&s->gic_iomem_alias, "nvic-gic", &s->gic.iomem,
                              0x100, 0xc00);
-    memory_region_add_subregion_overlap(&s->container, 0x100, &s->gic.iomem, 1);
+    memory_region_add_subregion_overlap(&s->container, 0x100,
+                                        &s->gic_iomem_alias, 1);
     /* Map the whole thing into system memory at the location required
      * by the v7M architecture.
      */
diff --git a/hw/blizzard.c b/hw/blizzard.c
index d1c9d81..06e19b3 100644
--- a/hw/blizzard.c
+++ b/hw/blizzard.c
@@ -878,8 +878,6 @@
         len -= 2;
         buf += 2;
     }
-
-    return;
 }
 
 static void blizzard_update_display(void *opaque)
diff --git a/hw/bt-hci.c b/hw/bt-hci.c
index a3a7fb4..e54cfd7 100644
--- a/hw/bt-hci.c
+++ b/hw/bt-hci.c
@@ -786,7 +786,6 @@
     memcpy(&params.dev_class, &link->host->class, sizeof(params.dev_class));
     params.link_type	= ACL_LINK;
     bt_hci_event(hci, EVT_CONN_REQUEST, &params, EVT_CONN_REQUEST_SIZE);
-    return;
 }
 
 static void bt_hci_conn_accept_timeout(void *opaque)
@@ -943,7 +942,6 @@
 {
     struct bt_device_s *slave;
     evt_remote_name_req_complete params;
-    int len;
 
     for (slave = hci->device.net->slave; slave; slave = slave->next)
         if (slave->page_scan && !bacmp(&slave->bd_addr, bdaddr))
@@ -955,9 +953,7 @@
 
     params.status       = HCI_SUCCESS;
     bacpy(&params.bdaddr, &slave->bd_addr);
-    len = snprintf(params.name, sizeof(params.name),
-                    "%s", slave->lmp_name ?: "");
-    memset(params.name + len, 0, sizeof(params.name) - len);
+    pstrcpy(params.name, sizeof(params.name), slave->lmp_name ?: "");
     bt_hci_event(hci, EVT_REMOTE_NAME_REQ_COMPLETE,
                     &params, EVT_REMOTE_NAME_REQ_COMPLETE_SIZE);
 
@@ -1388,7 +1384,7 @@
     params.status = HCI_SUCCESS;
     memset(params.name, 0, sizeof(params.name));
     if (hci->device.lmp_name)
-        strncpy(params.name, hci->device.lmp_name, sizeof(params.name));
+        pstrcpy(params.name, sizeof(params.name), hci->device.lmp_name);
 
     bt_hci_event_complete(hci, &params, READ_LOCAL_NAME_RP_SIZE);
 }
diff --git a/hw/cadence_uart.c b/hw/cadence_uart.c
index d98e531..f8afc4e 100644
--- a/hw/cadence_uart.c
+++ b/hw/cadence_uart.c
@@ -404,7 +404,7 @@
     uint32_t c = 0;
 
     offset >>= 2;
-    if (offset > R_MAX) {
+    if (offset >= R_MAX) {
         return 0;
     } else if (offset == R_TX_RX) {
         uart_read_rx_fifo(s, &c);
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index 035cc34..df05930 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -28,7 +28,6 @@
  */
 #include "hw.h"
 #include "pci.h"
-#include "vga-pci.h"
 #include "console.h"
 #include "vga_int.h"
 #include "loader.h"
@@ -2441,6 +2440,8 @@
     VGACommonState *s = &c->vga;
     int val, index;
 
+    qemu_flush_coalesced_mmio_buffer();
+
     if (vga_ioport_invalid(s, addr)) {
 	val = 0xff;
     } else {
@@ -2534,6 +2535,8 @@
     VGACommonState *s = &c->vga;
     int index;
 
+    qemu_flush_coalesced_mmio_buffer();
+
     /* check port range access depending on color/monochrome mode */
     if (vga_ioport_invalid(s, addr)) {
 	return;
@@ -2854,6 +2857,7 @@
     /* I/O handler for LFB */
     memory_region_init_io(&s->cirrus_linear_io, &cirrus_linear_io_ops, s,
                           "cirrus-linear-io", VGA_RAM_SIZE);
+    memory_region_set_flush_coalesced(&s->cirrus_linear_io);
 
     /* I/O handler for LFB */
     memory_region_init_io(&s->cirrus_linear_bitblt_io,
@@ -2861,10 +2865,12 @@
                           s,
                           "cirrus-bitblt-mmio",
                           0x400000);
+    memory_region_set_flush_coalesced(&s->cirrus_linear_bitblt_io);
 
     /* I/O handler for memory-mapped I/O */
     memory_region_init_io(&s->cirrus_mmio_io, &cirrus_mmio_io_ops, s,
                           "cirrus-mmio", CIRRUS_PNPMMIO_SIZE);
+    memory_region_set_flush_coalesced(&s->cirrus_mmio_io);
 
     s->real_vram_size =
         (s->device_id == CIRRUS_ID_CLGD5446) ? 4096 * 1024 : 2048 * 1024;
@@ -2963,11 +2969,6 @@
      return 0;
 }
 
-DeviceState *pci_cirrus_vga_init(PCIBus *bus)
-{
-    return &pci_create_simple(bus, -1, "cirrus-vga")->qdev;
-}
-
 static void cirrus_vga_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
diff --git a/hw/device-hotplug.c b/hw/device-hotplug.c
index 2bdc615..eec0fe3 100644
--- a/hw/device-hotplug.c
+++ b/hw/device-hotplug.c
@@ -89,5 +89,4 @@
     if (dinfo) {
         drive_put_ref(dinfo);
     }
-    return;
 }
diff --git a/hw/e1000.c b/hw/e1000.c
index ae8a6c5..ec3a7c4 100644
--- a/hw/e1000.c
+++ b/hw/e1000.c
@@ -295,6 +295,7 @@
     s->rxbuf_min_shift = ((val / E1000_RCTL_RDMTS_QUAT) & 3) + 1;
     DBGOUT(RX, "RCTL: %d, mac_reg[RCTL] = 0x%x\n", s->mac_reg[RDT],
            s->mac_reg[RCTL]);
+    qemu_flush_queued_packets(&s->nic->nc);
 }
 
 static void
@@ -926,6 +927,9 @@
 {
     s->check_rxov = 0;
     s->mac_reg[index] = val & 0xffff;
+    if (e1000_has_rxbufs(s, 1)) {
+        qemu_flush_queued_packets(&s->nic->nc);
+    }
 }
 
 static void
diff --git a/hw/eepro100.c b/hw/eepro100.c
index 50d117e..5b23116 100644
--- a/hw/eepro100.c
+++ b/hw/eepro100.c
@@ -1036,6 +1036,7 @@
         }
         set_ru_state(s, ru_ready);
         s->ru_offset = e100_read_reg4(s, SCBPointer);
+        qemu_flush_queued_packets(&s->nic->nc);
         TRACE(OTHER, logout("val=0x%02x (rx start)\n", val));
         break;
     case RX_RESUME:
@@ -1770,7 +1771,8 @@
     if (rfd_command & COMMAND_EL) {
         /* EL bit is set, so this was the last frame. */
         logout("receive: Running out of frames\n");
-        set_ru_state(s, ru_suspended);
+        set_ru_state(s, ru_no_resources);
+        eepro100_rnr_interrupt(s);
     }
     if (rfd_command & COMMAND_S) {
         /* S bit is set. */
diff --git a/hw/exynos4210_combiner.c b/hw/exynos4210_combiner.c
index 80af22c..60b33c7 100644
--- a/hw/exynos4210_combiner.c
+++ b/hw/exynos4210_combiner.c
@@ -347,8 +347,6 @@
                 TARGET_FMT_plx "\n", offset);
         break;
     }
-
-    return;
 }
 
 /* Get combiner group and bit from irq number */
@@ -380,8 +378,6 @@
     }
 
     exynos4210_combiner_update(s, group_n);
-
-    return;
 }
 
 static void exynos4210_combiner_reset(DeviceState *d)
diff --git a/hw/exynos4210_gic.c b/hw/exynos4210_gic.c
index 7d03dd9..4fea098 100644
--- a/hw/exynos4210_gic.c
+++ b/hw/exynos4210_gic.c
@@ -193,8 +193,6 @@
 
     /* Bypass */
     qemu_set_irq(s->board_irqs[irq], level);
-
-    return;
 }
 
 /*
@@ -410,8 +408,6 @@
     }
 
     qemu_irq_lower(s->out);
-
-    return;
 }
 
 static void exynos4210_irq_gate_reset(DeviceState *d)
diff --git a/hw/exynos4210_mct.c b/hw/exynos4210_mct.c
index 7a22b1f..6f94ce2 100644
--- a/hw/exynos4210_mct.c
+++ b/hw/exynos4210_mct.c
@@ -574,8 +574,6 @@
     exynos4210_gfrc_set_count(&s->g_timer, distance);
 
     exynos4210_gfrc_start(&s->g_timer);
-
-    return;
 }
 
 /*
diff --git a/hw/fdc.c b/hw/fdc.c
index 08830c1..25a49e3 100644
--- a/hw/fdc.c
+++ b/hw/fdc.c
@@ -1286,8 +1286,6 @@
         fdctrl->msr |= FD_MSR_DIO;
     /* IO based transfer: calculate len */
     fdctrl_raise_irq(fdctrl, FD_SR0_SEEK);
-
-    return;
 }
 
 /* Prepare a transfer of deleted data */
@@ -1994,11 +1992,11 @@
         drive->fdctrl = fdctrl;
 
         if (drive->bs) {
-            if (bdrv_get_on_error(drive->bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
+            if (bdrv_get_on_error(drive->bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) {
                 error_report("fdc doesn't support drive option werror");
                 return -1;
             }
-            if (bdrv_get_on_error(drive->bs, 1) != BLOCK_ERR_REPORT) {
+            if (bdrv_get_on_error(drive->bs, 1) != BLOCKDEV_ON_ERROR_REPORT) {
                 error_report("fdc doesn't support drive option rerror");
                 return -1;
             }
diff --git a/hw/fifo.c b/hw/fifo.c
new file mode 100644
index 0000000..68a955a
--- /dev/null
+++ b/hw/fifo.c
@@ -0,0 +1,78 @@
+/*
+ * Generic FIFO component, implemented as a circular buffer.
+ *
+ * Copyright (c) 2012 Peter A. G. Crosthwaite
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "fifo.h"
+
+void fifo8_create(Fifo8 *fifo, uint32_t capacity)
+{
+    fifo->data = g_new(uint8_t, capacity);
+    fifo->capacity = capacity;
+    fifo->head = 0;
+    fifo->num = 0;
+}
+
+void fifo8_destroy(Fifo8 *fifo)
+{
+    g_free(fifo->data);
+}
+
+void fifo8_push(Fifo8 *fifo, uint8_t data)
+{
+    if (fifo->num == fifo->capacity) {
+        abort();
+    }
+    fifo->data[(fifo->head + fifo->num) % fifo->capacity] = data;
+    fifo->num++;
+}
+
+uint8_t fifo8_pop(Fifo8 *fifo)
+{
+    uint8_t ret;
+
+    if (fifo->num == 0) {
+        abort();
+    }
+    ret = fifo->data[fifo->head++];
+    fifo->head %= fifo->capacity;
+    fifo->num--;
+    return ret;
+}
+
+void fifo8_reset(Fifo8 *fifo)
+{
+    fifo->num = 0;
+}
+
+bool fifo8_is_empty(Fifo8 *fifo)
+{
+    return (fifo->num == 0);
+}
+
+bool fifo8_is_full(Fifo8 *fifo)
+{
+    return (fifo->num == fifo->capacity);
+}
+
+const VMStateDescription vmstate_fifo8 = {
+    .name = "Fifo8",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_VBUFFER_UINT32(data, Fifo8, 1, NULL, 0, capacity),
+        VMSTATE_UINT32(head, Fifo8),
+        VMSTATE_UINT32(num, Fifo8),
+        VMSTATE_END_OF_LIST()
+    }
+};
diff --git a/hw/fifo.h b/hw/fifo.h
new file mode 100644
index 0000000..f23890a
--- /dev/null
+++ b/hw/fifo.h
@@ -0,0 +1,99 @@
+#ifndef FIFO_H
+#define FIFO_H
+
+#include "hw.h"
+
+typedef struct {
+    /* All fields are private */
+    uint8_t *data;
+    uint32_t capacity;
+    uint32_t head;
+    uint32_t num;
+} Fifo8;
+
+/**
+ * fifo8_create:
+ * @fifo: struct Fifo8 to initialise with new FIFO
+ * @capacity: capacity of the newly created FIFO
+ *
+ * Create a FIFO of the specified size. Clients should call fifo8_destroy()
+ * when finished using the fifo. The FIFO is initially empty.
+ */
+
+void fifo8_create(Fifo8 *fifo, uint32_t capacity);
+
+/**
+ * fifo8_destroy:
+ * @fifo: FIFO to cleanup
+ *
+ * Cleanup a FIFO created with fifo8_create(). Frees memory created for FIFO
+  *storage. The FIFO is no longer usable after this has been called.
+ */
+
+void fifo8_destroy(Fifo8 *fifo);
+
+/**
+ * fifo8_push:
+ * @fifo: FIFO to push to
+ * @data: data byte to push
+ *
+ * Push a data byte to the FIFO. Behaviour is undefined if the FIFO is full.
+ * Clients are responsible for checking for fullness using fifo8_is_full().
+ */
+
+void fifo8_push(Fifo8 *fifo, uint8_t data);
+
+/**
+ * fifo8_pop:
+ * @fifo: fifo to pop from
+ *
+ * Pop a data byte from the FIFO. Behaviour is undefined if the FIFO is empty.
+ * Clients are responsible for checking for emptyness using fifo8_is_empty().
+ *
+ * Returns: The popped data byte.
+ */
+
+uint8_t fifo8_pop(Fifo8 *fifo);
+
+/**
+ * fifo8_reset:
+ * @fifo: FIFO to reset
+ *
+ * Reset a FIFO. All data is discarded and the FIFO is emptied.
+ */
+
+void fifo8_reset(Fifo8 *fifo);
+
+/**
+ * fifo8_is_empty:
+ * @fifo: FIFO to check
+ *
+ * Check if a FIFO is empty.
+ *
+ * Returns: True if the fifo is empty, false otherwise.
+ */
+
+bool fifo8_is_empty(Fifo8 *fifo);
+
+/**
+ * fifo8_is_full:
+ * @fifo: FIFO to check
+ *
+ * Check if a FIFO is full.
+ *
+ * Returns: True if the fifo is full, false otherwise.
+ */
+
+bool fifo8_is_full(Fifo8 *fifo);
+
+extern const VMStateDescription vmstate_fifo8;
+
+#define VMSTATE_FIFO8(_field, _state) {                              \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(Fifo8),                                     \
+    .vmsd       = &vmstate_fifo8,                                    \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, Fifo8),       \
+}
+
+#endif /* FIFO_H */
diff --git a/hw/framebuffer.c b/hw/framebuffer.c
index 85a00a5..27fa6f5 100644
--- a/hw/framebuffer.c
+++ b/hw/framebuffer.c
@@ -107,5 +107,4 @@
                               DIRTY_MEMORY_VGA);
     *first_row = first;
     *last_row = last;
-    return;
 }
diff --git a/hw/fw_cfg.c b/hw/fw_cfg.c
index 7b3b576..dcde1a9 100644
--- a/hw/fw_cfg.c
+++ b/hw/fw_cfg.c
@@ -183,6 +183,30 @@
     }
 }
 
+static void fw_cfg_reboot(FWCfgState *s)
+{
+    int reboot_timeout = -1;
+    char *p;
+    const char *temp;
+
+    /* get user configuration */
+    QemuOptsList *plist = qemu_find_opts("boot-opts");
+    QemuOpts *opts = QTAILQ_FIRST(&plist->head);
+    if (opts != NULL) {
+        temp = qemu_opt_get(opts, "reboot-timeout");
+        if (temp != NULL) {
+            p = (char *)temp;
+            reboot_timeout = strtol(p, (char **)&p, 10);
+        }
+    }
+    /* validate the input */
+    if (reboot_timeout > 0xffff) {
+        error_report("reboot timeout is larger than 65535, force it to 65535.");
+        reboot_timeout = 0xffff;
+    }
+    fw_cfg_add_file(s, "etc/boot-fail-wait", g_memdup(&reboot_timeout, 4), 4);
+}
+
 static void fw_cfg_write(FWCfgState *s, uint8_t value)
 {
     int arch = !!(s->cur_entry & FW_CFG_ARCH_LOCAL);
@@ -497,6 +521,7 @@
     fw_cfg_add_i16(s, FW_CFG_MAX_CPUS, (uint16_t)max_cpus);
     fw_cfg_add_i16(s, FW_CFG_BOOT_MENU, (uint16_t)boot_menu);
     fw_cfg_bootsplash(s);
+    fw_cfg_reboot(s);
 
     s->machine_ready.notify = fw_cfg_machine_ready;
     qemu_add_machine_init_done_notifier(&s->machine_ready);
diff --git a/hw/hw.h b/hw/hw.h
index e5cb9bf..16101de 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -4,7 +4,7 @@
 
 #include "qemu-common.h"
 
-#if defined(TARGET_PHYS_ADDR_BITS) && !defined(NEED_CPU_H)
+#if !defined(CONFIG_USER_ONLY) && !defined(NEED_CPU_H)
 #include "cpu-common.h"
 #endif
 
diff --git a/hw/ide/Makefile.objs b/hw/ide/Makefile.objs
index cf718dd..5c8c22a 100644
--- a/hw/ide/Makefile.objs
+++ b/hw/ide/Makefile.objs
@@ -1,10 +1,10 @@
-hw-obj-$(CONFIG_IDE_CORE) += core.o atapi.o
-hw-obj-$(CONFIG_IDE_QDEV) += qdev.o
-hw-obj-$(CONFIG_IDE_PCI) += pci.o
-hw-obj-$(CONFIG_IDE_ISA) += isa.o
-hw-obj-$(CONFIG_IDE_PIIX) += piix.o
-hw-obj-$(CONFIG_IDE_CMD646) += cmd646.o
-hw-obj-$(CONFIG_IDE_MACIO) += macio.o
-hw-obj-$(CONFIG_IDE_VIA) += via.o
-hw-obj-$(CONFIG_AHCI) += ahci.o
-hw-obj-$(CONFIG_AHCI) += ich.o
+common-obj-$(CONFIG_IDE_CORE) += core.o atapi.o
+common-obj-$(CONFIG_IDE_QDEV) += qdev.o
+common-obj-$(CONFIG_IDE_PCI) += pci.o
+common-obj-$(CONFIG_IDE_ISA) += isa.o
+common-obj-$(CONFIG_IDE_PIIX) += piix.o
+common-obj-$(CONFIG_IDE_CMD646) += cmd646.o
+common-obj-$(CONFIG_IDE_MACIO) += macio.o
+common-obj-$(CONFIG_IDE_VIA) += via.o
+common-obj-$(CONFIG_AHCI) += ahci.o
+common-obj-$(CONFIG_AHCI) += ich.o
diff --git a/hw/ide/ahci.c b/hw/ide/ahci.c
index 5ea3cad..68671bc 100644
--- a/hw/ide/ahci.c
+++ b/hw/ide/ahci.c
@@ -1175,7 +1175,6 @@
         ad->port_no = i;
         ad->port.dma = &ad->dma;
         ad->port.dma->ops = &ahci_dma_ops;
-        ad->port_regs.cmd = PORT_CMD_SPIN_UP | PORT_CMD_POWER_ON;
     }
 }
 
@@ -1199,6 +1198,7 @@
         pr->irq_stat = 0;
         pr->irq_mask = 0;
         pr->scr_ctl = 0;
+        pr->cmd = PORT_CMD_SPIN_UP | PORT_CMD_POWER_ON;
         ahci_reset_port(s, i);
     }
 }
diff --git a/hw/ide/atapi.c b/hw/ide/atapi.c
index f7f714c..685cbaa 100644
--- a/hw/ide/atapi.c
+++ b/hw/ide/atapi.c
@@ -875,6 +875,12 @@
     int sense;
     bool start = buf[4] & 1;
     bool loej = buf[4] & 2;     /* load on start, eject on !start */
+    int pwrcnd = buf[4] & 0xf0;
+
+    if (pwrcnd) {
+        /* eject/load only happens for power condition == 0 */
+        return;
+    }
 
     if (loej) {
         if (!start && !s->tray_open && s->tray_locked) {
diff --git a/hw/ide/core.c b/hw/ide/core.c
index d65ef3d..d683a8c 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -53,8 +53,6 @@
     { 0x0c, 0x03, 0x00, 0x64, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
     /* airflow-temperature-celsius */
     { 190,  0x03, 0x00, 0x45, 0x45, 0x1f, 0x00, 0x1f, 0x1f, 0x00, 0x00, 0x32},
-    /* end of list */
-    { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
 };
 
 static int ide_handle_rw_error(IDEState *s, int error, int op);
@@ -558,32 +556,22 @@
 
 static int ide_handle_rw_error(IDEState *s, int error, int op)
 {
-    int is_read = (op & BM_STATUS_RETRY_READ);
-    BlockErrorAction action = bdrv_get_on_error(s->bs, is_read);
+    bool is_read = (op & BM_STATUS_RETRY_READ) != 0;
+    BlockErrorAction action = bdrv_get_error_action(s->bs, is_read, error);
 
-    if (action == BLOCK_ERR_IGNORE) {
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
-        return 0;
-    }
-
-    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
-            || action == BLOCK_ERR_STOP_ANY) {
+    if (action == BDRV_ACTION_STOP) {
         s->bus->dma->ops->set_unit(s->bus->dma, s->unit);
         s->bus->error_status = op;
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read);
-        vm_stop(RUN_STATE_IO_ERROR);
-        bdrv_iostatus_set_err(s->bs, error);
-    } else {
+    } else if (action == BDRV_ACTION_REPORT) {
         if (op & BM_STATUS_DMA_RETRY) {
             dma_buf_commit(s);
             ide_dma_error(s);
         } else {
             ide_rw_error(s);
         }
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read);
     }
-
-    return 1;
+    bdrv_error_action(s->bs, action, is_read, error);
+    return action != BDRV_ACTION_IGNORE;
 }
 
 void ide_dma_cb(void *opaque, int ret)
@@ -1468,9 +1456,7 @@
 	case SMART_READ_THRESH:
 		memset(s->io_buffer, 0, 0x200);
 		s->io_buffer[0] = 0x01; /* smart struct version */
-		for (n=0; n<30; n++) {
-		if (smart_attributes[n][0] == 0)
-			break;
+		for (n = 0; n < ARRAY_SIZE(smart_attributes); n++) {
 		s->io_buffer[2+0+(n*12)] = smart_attributes[n][0];
 		s->io_buffer[2+1+(n*12)] = smart_attributes[n][11];
 		}
@@ -1484,10 +1470,7 @@
 	case SMART_READ_DATA:
 		memset(s->io_buffer, 0, 0x200);
 		s->io_buffer[0] = 0x01; /* smart struct version */
-		for (n=0; n<30; n++) {
-		    if (smart_attributes[n][0] == 0) {
-			break;
-		    }
+		for (n = 0; n < ARRAY_SIZE(smart_attributes); n++) {
 		    int i;
 		    for(i = 0; i < 11; i++) {
 			s->io_buffer[2+i+(n*12)] = smart_attributes[n][i];
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
index 848cb31..f228725 100644
--- a/hw/ide/macio.c
+++ b/hw/ide/macio.c
@@ -89,7 +89,6 @@
 done:
     bdrv_acct_done(s->bs, &s->acct);
     io->dma_end(opaque);
-    return;
 }
 
 static void pmac_ide_transfer_cb(void *opaque, int ret)
diff --git a/hw/ide/pci.c b/hw/ide/pci.c
index 88c0942..644533f 100644
--- a/hw/ide/pci.c
+++ b/hw/ide/pci.c
@@ -188,7 +188,7 @@
 {
     BMDMAState *bm = opaque;
     IDEBus *bus = bm->bus;
-    int is_read;
+    bool is_read;
     int error_status;
 
     qemu_bh_delete(bm->bh);
@@ -198,7 +198,7 @@
         return;
     }
 
-    is_read = !!(bus->error_status & BM_STATUS_RETRY_READ);
+    is_read = (bus->error_status & BM_STATUS_RETRY_READ) != 0;
 
     /* The error status must be cleared before resubmitting the request: The
      * request may fail again, and this case can only be distinguished if the
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
index 5ea9b8f..f2e4ea4 100644
--- a/hw/ide/qdev.c
+++ b/hw/ide/qdev.c
@@ -60,7 +60,7 @@
     snprintf(path, sizeof(path), "%s@%d", qdev_fw_name(dev),
              ((IDEBus*)dev->parent_bus)->bus_id);
 
-    return strdup(path);
+    return g_strdup(path);
 }
 
 static int ide_qdev_init(DeviceState *qdev)
diff --git a/hw/imx_avic.c b/hw/imx_avic.c
index 4f010e8..b1a8fe6 100644
--- a/hw/imx_avic.c
+++ b/hw/imx_avic.c
@@ -6,9 +6,9 @@
  *
  * Copyright (c) 2008 OKL
  * Copyright (c) 2011 NICTA Pty Ltd
- * Originally Written by Hans Jiang
+ * Originally written by Hans Jiang
  *
- * This code is licenced under the GPL version 2 or later.  See
+ * This code is licensed under the GPL version 2 or later.  See
  * the COPYING file in the top-level directory.
  *
  * TODO: implement vectors.
diff --git a/hw/imx_timer.c b/hw/imx_timer.c
index 16215cc..c28c537 100644
--- a/hw/imx_timer.c
+++ b/hw/imx_timer.c
@@ -3,10 +3,10 @@
  *
  * Copyright (c) 2008 OK Labs
  * Copyright (c) 2011 NICTA Pty Ltd
- * Originally Written by Hans Jiang
+ * Originally written by Hans Jiang
  * Updated by Peter Chubb
  *
- * This code is licenced under GPL version 2 or later.  See
+ * This code is licensed under GPL version 2 or later.  See
  * the COPYING file in the top-level directory.
  *
  */
diff --git a/hw/intel-hda.c b/hw/intel-hda.c
index 127e818..d8e1b23 100644
--- a/hw/intel-hda.c
+++ b/hw/intel-hda.c
@@ -210,13 +210,7 @@
 {
     target_phys_addr_t addr;
 
-#if TARGET_PHYS_ADDR_BITS == 32
-    addr = lbase;
-#else
-    addr = ubase;
-    addr <<= 32;
-    addr |= lbase;
-#endif
+    addr = ((uint64_t)ubase << 32) | lbase;
     return addr;
 }
 
diff --git a/hw/ioh3420.c b/hw/ioh3420.c
index 94a537c..4d31473 100644
--- a/hw/ioh3420.c
+++ b/hw/ioh3420.c
@@ -125,7 +125,6 @@
     rc = pcie_chassis_add_slot(s);
     if (rc < 0) {
         goto err_pcie_cap;
-        return rc;
     }
     pcie_cap_root_init(d);
     rc = pcie_aer_init(d, IOH_EP_AER_OFFSET);
diff --git a/hw/irq.c b/hw/irq.c
index d413a0b..f4e2a78 100644
--- a/hw/irq.c
+++ b/hw/irq.c
@@ -38,24 +38,37 @@
     irq->handler(irq->opaque, irq->n, level);
 }
 
-qemu_irq *qemu_allocate_irqs(qemu_irq_handler handler, void *opaque, int n)
+qemu_irq *qemu_extend_irqs(qemu_irq *old, int n_old, qemu_irq_handler handler,
+                           void *opaque, int n)
 {
     qemu_irq *s;
     struct IRQState *p;
     int i;
 
-    s = (qemu_irq *)g_malloc0(sizeof(qemu_irq) * n);
-    p = (struct IRQState *)g_malloc0(sizeof(struct IRQState) * n);
-    for (i = 0; i < n; i++) {
-        p->handler = handler;
-        p->opaque = opaque;
-        p->n = i;
+    if (!old) {
+        n_old = 0;
+    }
+    s = old ? g_renew(qemu_irq, old, n + n_old) : g_new(qemu_irq, n);
+    p = old ? g_renew(struct IRQState, s[0], n + n_old) :
+                g_new(struct IRQState, n);
+    for (i = 0; i < n + n_old; i++) {
+        if (i >= n_old) {
+            p->handler = handler;
+            p->opaque = opaque;
+            p->n = i;
+        }
         s[i] = p;
         p++;
     }
     return s;
 }
 
+qemu_irq *qemu_allocate_irqs(qemu_irq_handler handler, void *opaque, int n)
+{
+    return qemu_extend_irqs(NULL, 0, handler, opaque, n);
+}
+
+
 void qemu_free_irqs(qemu_irq *s)
 {
     g_free(s[0]);
diff --git a/hw/irq.h b/hw/irq.h
index 56c55f0..e640c10 100644
--- a/hw/irq.h
+++ b/hw/irq.h
@@ -23,8 +23,17 @@
     qemu_set_irq(irq, 0);
 }
 
-/* Returns an array of N IRQs.  */
+/* Returns an array of N IRQs. Each IRQ is assigned the argument handler and
+ * opaque data.
+ */
 qemu_irq *qemu_allocate_irqs(qemu_irq_handler handler, void *opaque, int n);
+
+/* Extends an Array of IRQs. Old IRQs have their handlers and opaque data
+ * preserved. New IRQs are assigned the argument handler and opaque data.
+ */
+qemu_irq *qemu_extend_irqs(qemu_irq *old, int n_old, qemu_irq_handler handler,
+                                void *opaque, int n);
+
 void qemu_free_irqs(qemu_irq *s);
 
 /* Returns a new IRQ with opposite polarity.  */
diff --git a/hw/isa-bus.c b/hw/isa-bus.c
index f9b2373..214f194 100644
--- a/hw/isa-bus.c
+++ b/hw/isa-bus.c
@@ -19,6 +19,7 @@
 #include "hw.h"
 #include "monitor.h"
 #include "sysbus.h"
+#include "sysemu.h"
 #include "isa.h"
 #include "exec-memory.h"
 
@@ -166,6 +167,25 @@
     return dev;
 }
 
+ISADevice *isa_vga_init(ISABus *bus)
+{
+    switch (vga_interface_type) {
+    case VGA_CIRRUS:
+        return isa_create_simple(bus, "isa-cirrus-vga");
+    case VGA_QXL:
+        fprintf(stderr, "%s: qxl: no PCI bus\n", __func__);
+        return NULL;
+    case VGA_STD:
+        return isa_create_simple(bus, "isa-vga");
+    case VGA_VMWARE:
+        fprintf(stderr, "%s: vmware_vga: no PCI bus\n", __func__);
+        return NULL;
+    case VGA_NONE:
+    default:
+        return NULL;
+    }
+}
+
 static void isabus_dev_print(Monitor *mon, DeviceState *dev, int indent)
 {
     ISADevice *d = ISA_DEVICE(dev);
@@ -236,7 +256,7 @@
         snprintf(path + off, sizeof(path) - off, "@%04x", d->ioport_id);
     }
 
-    return strdup(path);
+    return g_strdup(path);
 }
 
 MemoryRegion *isa_address_space(ISADevice *dev)
diff --git a/hw/isa.h b/hw/isa.h
index dc97052..8fb498a 100644
--- a/hw/isa.h
+++ b/hw/isa.h
@@ -47,6 +47,8 @@
 ISADevice *isa_try_create(ISABus *bus, const char *name);
 ISADevice *isa_create_simple(ISABus *bus, const char *name);
 
+ISADevice *isa_vga_init(ISABus *bus);
+
 /**
  * isa_register_ioport: Install an I/O port region on the ISA bus.
  *
diff --git a/hw/ivshmem.c b/hw/ivshmem.c
index 62fe53a..5c4ccb8 100644
--- a/hw/ivshmem.c
+++ b/hw/ivshmem.c
@@ -71,6 +71,8 @@
     MemoryRegion bar;
     MemoryRegion ivshmem;
     uint64_t ivshmem_size; /* size of shared memory region */
+    uint32_t ivshmem_attr;
+    uint32_t ivshmem_64bit;
     int shm_fd; /* shared memory file descriptor */
 
     Peer *peers;
@@ -147,7 +149,6 @@
     s->intrstatus = val;
 
     ivshmem_update_irq(s, val);
-    return;
 }
 
 static uint32_t ivshmem_IntrStatus_read(IVShmemState *s)
@@ -339,7 +340,7 @@
     memory_region_add_subregion(&s->bar, 0, &s->ivshmem);
 
     /* region for shared memory */
-    pci_register_bar(&s->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar);
+    pci_register_bar(&s->dev, 2, s->ivshmem_attr, &s->bar);
 }
 
 static void ivshmem_add_eventfd(IVShmemState *s, int posn, int i)
@@ -508,8 +509,6 @@
     if (ivshmem_has_feature(s, IVSHMEM_IOEVENTFD)) {
         ivshmem_add_eventfd(s, incoming_posn, guest_max_eventfd);
     }
-
-    return;
 }
 
 /* Select the MSI-X vectors used by device.
@@ -534,7 +533,6 @@
 
     s->intrstatus = 0;
     ivshmem_use_msix(s);
-    return;
 }
 
 static uint64_t ivshmem_get_size(IVShmemState * s) {
@@ -690,6 +688,11 @@
                      &s->ivshmem_mmio);
 
     memory_region_init(&s->bar, "ivshmem-bar2-container", s->ivshmem_size);
+    s->ivshmem_attr = PCI_BASE_ADDRESS_SPACE_MEMORY |
+        PCI_BASE_ADDRESS_MEM_PREFETCH;
+    if (s->ivshmem_64bit) {
+        s->ivshmem_attr |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+    }
 
     if ((s->server_chr != NULL) &&
                         (strncmp(s->server_chr->filename, "unix:", 5) == 0)) {
@@ -715,8 +718,7 @@
         /* allocate/initialize space for interrupt handling */
         s->peers = g_malloc0(s->nb_peers * sizeof(Peer));
 
-        pci_register_bar(&s->dev, 2,
-                         PCI_BASE_ADDRESS_SPACE_MEMORY, &s->bar);
+        pci_register_bar(&s->dev, 2, s->ivshmem_attr, &s->bar);
 
         s->eventfd_chr = g_malloc0(s->vectors * sizeof(CharDriverState *));
 
@@ -786,6 +788,7 @@
     DEFINE_PROP_BIT("msi", IVShmemState, features, IVSHMEM_MSI, true),
     DEFINE_PROP_STRING("shm", IVShmemState, shmobj),
     DEFINE_PROP_STRING("role", IVShmemState, role),
+    DEFINE_PROP_UINT32("use64", IVShmemState, ivshmem_64bit, 1),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/kvm/pci-assign.c b/hw/kvm/pci-assign.c
index 05b93d9..7a0998c 100644
--- a/hw/kvm/pci-assign.c
+++ b/hw/kvm/pci-assign.c
@@ -579,15 +579,9 @@
     snprintf(name, sizeof(name), "%sconfig", dir);
 
     if (pci_dev->configfd_name && *pci_dev->configfd_name) {
-        if (qemu_isdigit(pci_dev->configfd_name[0])) {
-            dev->config_fd = strtol(pci_dev->configfd_name, NULL, 0);
-        } else {
-            dev->config_fd = monitor_get_fd(cur_mon, pci_dev->configfd_name);
-            if (dev->config_fd < 0) {
-                error_report("%s: (%s) unkown", __func__,
-                             pci_dev->configfd_name);
-                return 1;
-            }
+        dev->config_fd = monitor_handle_fd_param(cur_mon, pci_dev->configfd_name);
+        if (dev->config_fd < 0) {
+            return 1;
         }
     } else {
         dev->config_fd = open(name, O_RDWR);
diff --git a/hw/kzm.c b/hw/kzm.c
index 6a5e9df..68cd1b4 100644
--- a/hw/kzm.c
+++ b/hw/kzm.c
@@ -5,7 +5,7 @@
  * Written by Hans at OK-Labs
  * Updated by Peter Chubb.
  *
- * This code is licenced under the GPL, version 2 or later.
+ * This code is licensed under the GPL, version 2 or later.
  * See the file `COPYING' in the top level directory.
  *
  * It (partially) emulates a Kyoto Microcomputer
diff --git a/hw/leon3.c b/hw/leon3.c
index 878d3aa..7a9729d 100644
--- a/hw/leon3.c
+++ b/hw/leon3.c
@@ -210,7 +210,7 @@
     }
 }
 
-QEMUMachine leon3_generic_machine = {
+static QEMUMachine leon3_generic_machine = {
     .name     = "leon3_generic",
     .desc     = "Leon-3 generic",
     .init     = leon3_generic_hw_init,
diff --git a/hw/lm32_hwsetup.h b/hw/lm32_hwsetup.h
index 8fc285e..70dc61f 100644
--- a/hw/lm32_hwsetup.h
+++ b/hw/lm32_hwsetup.h
@@ -96,7 +96,7 @@
 
 static inline void hwsetup_add_str(HWSetup *hw, const char *str)
 {
-    strncpy(hw->ptr, str, 31); /* make sure last byte is zero */
+    pstrcpy(hw->ptr, 32, str);
     hw->ptr += 32;
 }
 
diff --git a/hw/lm4549.c b/hw/lm4549.c
index 80b3ec4..e0137d5 100644
--- a/hw/lm4549.c
+++ b/hw/lm4549.c
@@ -224,7 +224,7 @@
        This model supports 16-bit playback.
     */
 
-    if (s->buffer_level >= LM4549_BUFFER_SIZE) {
+    if (s->buffer_level > LM4549_BUFFER_SIZE - 2) {
         DPRINTF("write_sample Buffer full\n");
         return 0;
     }
diff --git a/hw/m25p80.c b/hw/m25p80.c
new file mode 100644
index 0000000..9a56de8
--- /dev/null
+++ b/hw/m25p80.c
@@ -0,0 +1,598 @@
+/*
+ * ST M25P80 emulator. Emulate all SPI flash devices based on the m25p80 command
+ * set. Known devices table current as of Jun/2012 and taken from linux.
+ * See drivers/mtd/devices/m25p80.c.
+ *
+ * Copyright (C) 2011 Edgar E. Iglesias <edgar.iglesias@gmail.com>
+ * Copyright (C) 2012 Peter A. G. Crosthwaite <peter.crosthwaite@petalogix.com>
+ * Copyright (C) 2012 PetaLogix
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) a later version of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw.h"
+#include "blockdev.h"
+#include "ssi.h"
+#include "devices.h"
+
+#ifdef M25P80_ERR_DEBUG
+#define DB_PRINT(...) do { \
+    fprintf(stderr,  ": %s: ", __func__); \
+    fprintf(stderr, ## __VA_ARGS__); \
+    } while (0);
+#else
+    #define DB_PRINT(...)
+#endif
+
+/* Fields for FlashPartInfo->flags */
+
+/* erase capabilities */
+#define ER_4K 1
+#define ER_32K 2
+/* set to allow the page program command to write 0s back to 1. Useful for
+ * modelling EEPROM with SPI flash command set
+ */
+#define WR_1 0x100
+
+typedef struct FlashPartInfo {
+    const char *part_name;
+    /* jedec code. (jedec >> 16) & 0xff is the 1st byte, >> 8 the 2nd etc */
+    uint32_t jedec;
+    /* extended jedec code */
+    uint16_t ext_jedec;
+    /* there is confusion between manufacturers as to what a sector is. In this
+     * device model, a "sector" is the size that is erased by the ERASE_SECTOR
+     * command (opcode 0xd8).
+     */
+    uint32_t sector_size;
+    uint32_t n_sectors;
+    uint32_t page_size;
+    uint8_t flags;
+} FlashPartInfo;
+
+/* adapted from linux */
+
+#define INFO(_part_name, _jedec, _ext_jedec, _sector_size, _n_sectors, _flags)\
+    .part_name = (_part_name),\
+    .jedec = (_jedec),\
+    .ext_jedec = (_ext_jedec),\
+    .sector_size = (_sector_size),\
+    .n_sectors = (_n_sectors),\
+    .page_size = 256,\
+    .flags = (_flags),\
+
+static const FlashPartInfo known_devices[] = {
+    /* Atmel -- some are (confusingly) marketed as "DataFlash" */
+    { INFO("at25fs010",   0x1f6601,      0,  32 << 10,   4, ER_4K) },
+    { INFO("at25fs040",   0x1f6604,      0,  64 << 10,   8, ER_4K) },
+
+    { INFO("at25df041a",  0x1f4401,      0,  64 << 10,   8, ER_4K) },
+    { INFO("at25df321a",  0x1f4701,      0,  64 << 10,  64, ER_4K) },
+    { INFO("at25df641",   0x1f4800,      0,  64 << 10, 128, ER_4K) },
+
+    { INFO("at26f004",    0x1f0400,      0,  64 << 10,   8, ER_4K) },
+    { INFO("at26df081a",  0x1f4501,      0,  64 << 10,  16, ER_4K) },
+    { INFO("at26df161a",  0x1f4601,      0,  64 << 10,  32, ER_4K) },
+    { INFO("at26df321",   0x1f4700,      0,  64 << 10,  64, ER_4K) },
+
+    /* EON -- en25xxx */
+    { INFO("en25f32",     0x1c3116,      0,  64 << 10,  64, ER_4K) },
+    { INFO("en25p32",     0x1c2016,      0,  64 << 10,  64, 0) },
+    { INFO("en25q32b",    0x1c3016,      0,  64 << 10,  64, 0) },
+    { INFO("en25p64",     0x1c2017,      0,  64 << 10, 128, 0) },
+
+    /* Intel/Numonyx -- xxxs33b */
+    { INFO("160s33b",     0x898911,      0,  64 << 10,  32, 0) },
+    { INFO("320s33b",     0x898912,      0,  64 << 10,  64, 0) },
+    { INFO("640s33b",     0x898913,      0,  64 << 10, 128, 0) },
+
+    /* Macronix */
+    { INFO("mx25l4005a",  0xc22013,      0,  64 << 10,   8, ER_4K) },
+    { INFO("mx25l8005",   0xc22014,      0,  64 << 10,  16, 0) },
+    { INFO("mx25l1606e",  0xc22015,      0,  64 << 10,  32, ER_4K) },
+    { INFO("mx25l3205d",  0xc22016,      0,  64 << 10,  64, 0) },
+    { INFO("mx25l6405d",  0xc22017,      0,  64 << 10, 128, 0) },
+    { INFO("mx25l12805d", 0xc22018,      0,  64 << 10, 256, 0) },
+    { INFO("mx25l12855e", 0xc22618,      0,  64 << 10, 256, 0) },
+    { INFO("mx25l25635e", 0xc22019,      0,  64 << 10, 512, 0) },
+    { INFO("mx25l25655e", 0xc22619,      0,  64 << 10, 512, 0) },
+
+    /* Spansion -- single (large) sector size only, at least
+     * for the chips listed here (without boot sectors).
+     */
+    { INFO("s25sl004a",   0x010212,      0,  64 << 10,   8, 0) },
+    { INFO("s25sl008a",   0x010213,      0,  64 << 10,  16, 0) },
+    { INFO("s25sl016a",   0x010214,      0,  64 << 10,  32, 0) },
+    { INFO("s25sl032a",   0x010215,      0,  64 << 10,  64, 0) },
+    { INFO("s25sl032p",   0x010215, 0x4d00,  64 << 10,  64, ER_4K) },
+    { INFO("s25sl064a",   0x010216,      0,  64 << 10, 128, 0) },
+    { INFO("s25fl256s0",  0x010219, 0x4d00, 256 << 10, 128, 0) },
+    { INFO("s25fl256s1",  0x010219, 0x4d01,  64 << 10, 512, 0) },
+    { INFO("s25fl512s",   0x010220, 0x4d00, 256 << 10, 256, 0) },
+    { INFO("s70fl01gs",   0x010221, 0x4d00, 256 << 10, 256, 0) },
+    { INFO("s25sl12800",  0x012018, 0x0300, 256 << 10,  64, 0) },
+    { INFO("s25sl12801",  0x012018, 0x0301,  64 << 10, 256, 0) },
+    { INFO("s25fl129p0",  0x012018, 0x4d00, 256 << 10,  64, 0) },
+    { INFO("s25fl129p1",  0x012018, 0x4d01,  64 << 10, 256, 0) },
+    { INFO("s25fl016k",   0xef4015,      0,  64 << 10,  32, ER_4K | ER_32K) },
+    { INFO("s25fl064k",   0xef4017,      0,  64 << 10, 128, ER_4K | ER_32K) },
+
+    /* SST -- large erase sizes are "overlays", "sectors" are 4<< 10 */
+    { INFO("sst25vf040b", 0xbf258d,      0,  64 << 10,   8, ER_4K) },
+    { INFO("sst25vf080b", 0xbf258e,      0,  64 << 10,  16, ER_4K) },
+    { INFO("sst25vf016b", 0xbf2541,      0,  64 << 10,  32, ER_4K) },
+    { INFO("sst25vf032b", 0xbf254a,      0,  64 << 10,  64, ER_4K) },
+    { INFO("sst25wf512",  0xbf2501,      0,  64 << 10,   1, ER_4K) },
+    { INFO("sst25wf010",  0xbf2502,      0,  64 << 10,   2, ER_4K) },
+    { INFO("sst25wf020",  0xbf2503,      0,  64 << 10,   4, ER_4K) },
+    { INFO("sst25wf040",  0xbf2504,      0,  64 << 10,   8, ER_4K) },
+
+    /* ST Microelectronics -- newer production may have feature updates */
+    { INFO("m25p05",      0x202010,      0,  32 << 10,   2, 0) },
+    { INFO("m25p10",      0x202011,      0,  32 << 10,   4, 0) },
+    { INFO("m25p20",      0x202012,      0,  64 << 10,   4, 0) },
+    { INFO("m25p40",      0x202013,      0,  64 << 10,   8, 0) },
+    { INFO("m25p80",      0x202014,      0,  64 << 10,  16, 0) },
+    { INFO("m25p16",      0x202015,      0,  64 << 10,  32, 0) },
+    { INFO("m25p32",      0x202016,      0,  64 << 10,  64, 0) },
+    { INFO("m25p64",      0x202017,      0,  64 << 10, 128, 0) },
+    { INFO("m25p128",     0x202018,      0, 256 << 10,  64, 0) },
+
+    { INFO("m45pe10",     0x204011,      0,  64 << 10,   2, 0) },
+    { INFO("m45pe80",     0x204014,      0,  64 << 10,  16, 0) },
+    { INFO("m45pe16",     0x204015,      0,  64 << 10,  32, 0) },
+
+    { INFO("m25pe80",     0x208014,      0,  64 << 10,  16, 0) },
+    { INFO("m25pe16",     0x208015,      0,  64 << 10,  32, ER_4K) },
+
+    { INFO("m25px32",     0x207116,      0,  64 << 10,  64, ER_4K) },
+    { INFO("m25px32-s0",  0x207316,      0,  64 << 10,  64, ER_4K) },
+    { INFO("m25px32-s1",  0x206316,      0,  64 << 10,  64, ER_4K) },
+    { INFO("m25px64",     0x207117,      0,  64 << 10, 128, 0) },
+
+    /* Winbond -- w25x "blocks" are 64k, "sectors" are 4KiB */
+    { INFO("w25x10",      0xef3011,      0,  64 << 10,   2, ER_4K) },
+    { INFO("w25x20",      0xef3012,      0,  64 << 10,   4, ER_4K) },
+    { INFO("w25x40",      0xef3013,      0,  64 << 10,   8, ER_4K) },
+    { INFO("w25x80",      0xef3014,      0,  64 << 10,  16, ER_4K) },
+    { INFO("w25x16",      0xef3015,      0,  64 << 10,  32, ER_4K) },
+    { INFO("w25x32",      0xef3016,      0,  64 << 10,  64, ER_4K) },
+    { INFO("w25q32",      0xef4016,      0,  64 << 10,  64, ER_4K) },
+    { INFO("w25x64",      0xef3017,      0,  64 << 10, 128, ER_4K) },
+    { INFO("w25q64",      0xef4017,      0,  64 << 10, 128, ER_4K) },
+
+    /* Numonyx -- n25q128 */
+    { INFO("n25q128",      0x20ba18,      0,  64 << 10, 256, 0) },
+
+    { },
+};
+
+typedef enum {
+    NOP = 0,
+    PP = 0x2,
+    READ = 0x3,
+    WRDI = 0x4,
+    RDSR = 0x5,
+    WREN = 0x6,
+    FAST_READ = 0xb,
+    ERASE_4K = 0x20,
+    ERASE_32K = 0x52,
+    ERASE_SECTOR = 0xd8,
+    JEDEC_READ = 0x9f,
+    BULK_ERASE = 0xc7,
+} FlashCMD;
+
+typedef enum {
+    STATE_IDLE,
+    STATE_PAGE_PROGRAM,
+    STATE_READ,
+    STATE_COLLECTING_DATA,
+    STATE_READING_DATA,
+} CMDState;
+
+typedef struct Flash {
+    SSISlave ssidev;
+    uint32_t r;
+
+    BlockDriverState *bdrv;
+
+    uint8_t *storage;
+    uint32_t size;
+    int page_size;
+
+    uint8_t state;
+    uint8_t data[16];
+    uint32_t len;
+    uint32_t pos;
+    uint8_t needed_bytes;
+    uint8_t cmd_in_progress;
+    uint64_t cur_addr;
+    bool write_enable;
+
+    int64_t dirty_page;
+
+    char *part_name;
+    const FlashPartInfo *pi;
+
+} Flash;
+
+static void bdrv_sync_complete(void *opaque, int ret)
+{
+    /* do nothing. Masters do not directly interact with the backing store,
+     * only the working copy so no mutexing required.
+     */
+}
+
+static void flash_sync_page(Flash *s, int page)
+{
+    if (s->bdrv) {
+        int bdrv_sector, nb_sectors;
+        QEMUIOVector iov;
+
+        bdrv_sector = (page * s->pi->page_size) / BDRV_SECTOR_SIZE;
+        nb_sectors = DIV_ROUND_UP(s->pi->page_size, BDRV_SECTOR_SIZE);
+        qemu_iovec_init(&iov, 1);
+        qemu_iovec_add(&iov, s->storage + bdrv_sector * BDRV_SECTOR_SIZE,
+                                                nb_sectors * BDRV_SECTOR_SIZE);
+        bdrv_aio_writev(s->bdrv, bdrv_sector, &iov, nb_sectors,
+                                                bdrv_sync_complete, NULL);
+    }
+}
+
+static inline void flash_sync_area(Flash *s, int64_t off, int64_t len)
+{
+    int64_t start, end, nb_sectors;
+    QEMUIOVector iov;
+
+    if (!s->bdrv) {
+        return;
+    }
+
+    assert(!(len % BDRV_SECTOR_SIZE));
+    start = off / BDRV_SECTOR_SIZE;
+    end = (off + len) / BDRV_SECTOR_SIZE;
+    nb_sectors = end - start;
+    qemu_iovec_init(&iov, 1);
+    qemu_iovec_add(&iov, s->storage + (start * BDRV_SECTOR_SIZE),
+                                        nb_sectors * BDRV_SECTOR_SIZE);
+    bdrv_aio_writev(s->bdrv, start, &iov, nb_sectors, bdrv_sync_complete, NULL);
+}
+
+static void flash_erase(Flash *s, int offset, FlashCMD cmd)
+{
+    uint32_t len;
+    uint8_t capa_to_assert = 0;
+
+    switch (cmd) {
+    case ERASE_4K:
+        len = 4 << 10;
+        capa_to_assert = ER_4K;
+        break;
+    case ERASE_32K:
+        len = 32 << 10;
+        capa_to_assert = ER_32K;
+        break;
+    case ERASE_SECTOR:
+        len = s->pi->sector_size;
+        break;
+    case BULK_ERASE:
+        len = s->size;
+        break;
+    default:
+        abort();
+    }
+
+    DB_PRINT("offset = %#x, len = %d\n", offset, len);
+    if ((s->pi->flags & capa_to_assert) != capa_to_assert) {
+        hw_error("m25p80: %dk erase size not supported by device\n", len);
+    }
+
+    if (!s->write_enable) {
+        DB_PRINT("erase with write protect!\n");
+        return;
+    }
+    memset(s->storage + offset, 0xff, len);
+    flash_sync_area(s, offset, len);
+}
+
+static inline void flash_sync_dirty(Flash *s, int64_t newpage)
+{
+    if (s->dirty_page >= 0 && s->dirty_page != newpage) {
+        flash_sync_page(s, s->dirty_page);
+        s->dirty_page = newpage;
+    }
+}
+
+static inline
+void flash_write8(Flash *s, uint64_t addr, uint8_t data)
+{
+    int64_t page = addr / s->pi->page_size;
+    uint8_t prev = s->storage[s->cur_addr];
+
+    if (!s->write_enable) {
+        DB_PRINT("write with write protect!\n");
+    }
+
+    if ((prev ^ data) & data) {
+        DB_PRINT("programming zero to one! addr=%lx  %x -> %x\n",
+                  addr, prev, data);
+    }
+
+    if (s->pi->flags & WR_1) {
+        s->storage[s->cur_addr] = data;
+    } else {
+        s->storage[s->cur_addr] &= data;
+    }
+
+    flash_sync_dirty(s, page);
+    s->dirty_page = page;
+}
+
+static void complete_collecting_data(Flash *s)
+{
+    s->cur_addr = s->data[0] << 16;
+    s->cur_addr |= s->data[1] << 8;
+    s->cur_addr |= s->data[2];
+
+    switch (s->cmd_in_progress) {
+    case PP:
+        s->state = STATE_PAGE_PROGRAM;
+        break;
+    case READ:
+    case FAST_READ:
+        s->state = STATE_READ;
+        break;
+    case ERASE_4K:
+    case ERASE_32K:
+    case ERASE_SECTOR:
+        flash_erase(s, s->cur_addr, s->cmd_in_progress);
+        break;
+    default:
+        break;
+    }
+}
+
+static void decode_new_cmd(Flash *s, uint32_t value)
+{
+    s->cmd_in_progress = value;
+    DB_PRINT("decoded new command:%x\n", value);
+
+    switch (value) {
+
+    case ERASE_4K:
+    case ERASE_32K:
+    case ERASE_SECTOR:
+    case READ:
+    case PP:
+        s->needed_bytes = 3;
+        s->pos = 0;
+        s->len = 0;
+        s->state = STATE_COLLECTING_DATA;
+        break;
+
+    case FAST_READ:
+        s->needed_bytes = 4;
+        s->pos = 0;
+        s->len = 0;
+        s->state = STATE_COLLECTING_DATA;
+        break;
+
+    case WRDI:
+        s->write_enable = false;
+        break;
+    case WREN:
+        s->write_enable = true;
+        break;
+
+    case RDSR:
+        s->data[0] = (!!s->write_enable) << 1;
+        s->pos = 0;
+        s->len = 1;
+        s->state = STATE_READING_DATA;
+        break;
+
+    case JEDEC_READ:
+        DB_PRINT("populated jedec code\n");
+        s->data[0] = (s->pi->jedec >> 16) & 0xff;
+        s->data[1] = (s->pi->jedec >> 8) & 0xff;
+        s->data[2] = s->pi->jedec & 0xff;
+        if (s->pi->ext_jedec) {
+            s->data[3] = (s->pi->ext_jedec >> 8) & 0xff;
+            s->data[4] = s->pi->ext_jedec & 0xff;
+            s->len = 5;
+        } else {
+            s->len = 3;
+        }
+        s->pos = 0;
+        s->state = STATE_READING_DATA;
+        break;
+
+    case BULK_ERASE:
+        if (s->write_enable) {
+            DB_PRINT("chip erase\n");
+            flash_erase(s, 0, BULK_ERASE);
+        } else {
+            DB_PRINT("chip erase with write protect!\n");
+        }
+        break;
+    case NOP:
+        break;
+    default:
+        DB_PRINT("Unknown cmd %x\n", value);
+        break;
+    }
+}
+
+static int m25p80_cs(SSISlave *ss, bool select)
+{
+    Flash *s = FROM_SSI_SLAVE(Flash, ss);
+
+    if (select) {
+        s->len = 0;
+        s->pos = 0;
+        s->state = STATE_IDLE;
+        flash_sync_dirty(s, -1);
+    }
+
+    DB_PRINT("%sselect\n", select ? "de" : "");
+
+    return 0;
+}
+
+static uint32_t m25p80_transfer8(SSISlave *ss, uint32_t tx)
+{
+    Flash *s = FROM_SSI_SLAVE(Flash, ss);
+    uint32_t r = 0;
+
+    switch (s->state) {
+
+    case STATE_PAGE_PROGRAM:
+        DB_PRINT("page program cur_addr=%lx data=%x\n", s->cur_addr,
+                 (uint8_t)tx);
+        flash_write8(s, s->cur_addr, (uint8_t)tx);
+        s->cur_addr++;
+        break;
+
+    case STATE_READ:
+        r = s->storage[s->cur_addr];
+        DB_PRINT("READ 0x%lx=%x\n", s->cur_addr, r);
+        s->cur_addr = (s->cur_addr + 1) % s->size;
+        break;
+
+    case STATE_COLLECTING_DATA:
+        s->data[s->len] = (uint8_t)tx;
+        s->len++;
+
+        if (s->len == s->needed_bytes) {
+            complete_collecting_data(s);
+        }
+        break;
+
+    case STATE_READING_DATA:
+        r = s->data[s->pos];
+        s->pos++;
+        if (s->pos == s->len) {
+            s->pos = 0;
+            s->state = STATE_IDLE;
+        }
+        break;
+
+    default:
+    case STATE_IDLE:
+        decode_new_cmd(s, (uint8_t)tx);
+        break;
+    }
+
+    return r;
+}
+
+static int m25p80_init(SSISlave *ss)
+{
+    DriveInfo *dinfo;
+    Flash *s = FROM_SSI_SLAVE(Flash, ss);
+    const FlashPartInfo *i;
+
+    if (!s->part_name) { /* default to actual m25p80 if no partname given */
+        s->part_name = (char *)"m25p80";
+    }
+
+    i = known_devices;
+    for (i = known_devices;; i++) {
+        assert(i);
+        if (!i->part_name) {
+            fprintf(stderr, "Unknown SPI flash part: \"%s\"\n", s->part_name);
+            return 1;
+        } else if (!strcmp(i->part_name, s->part_name)) {
+            s->pi = i;
+            break;
+        }
+    }
+
+    s->size = s->pi->sector_size * s->pi->n_sectors;
+    s->dirty_page = -1;
+    s->storage = qemu_blockalign(s->bdrv, s->size);
+
+    dinfo = drive_get_next(IF_MTD);
+
+    if (dinfo && dinfo->bdrv) {
+        DB_PRINT("Binding to IF_MTD drive\n");
+        s->bdrv = dinfo->bdrv;
+        /* FIXME: Move to late init */
+        if (bdrv_read(s->bdrv, 0, s->storage, DIV_ROUND_UP(s->size,
+                                                    BDRV_SECTOR_SIZE))) {
+            fprintf(stderr, "Failed to initialize SPI flash!\n");
+            return 1;
+        }
+    } else {
+        memset(s->storage, 0xFF, s->size);
+    }
+
+    return 0;
+}
+
+static void m25p80_pre_save(void *opaque)
+{
+    flash_sync_dirty((Flash *)opaque, -1);
+}
+
+static const VMStateDescription vmstate_m25p80 = {
+    .name = "xilinx_spi",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = m25p80_pre_save,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(state, Flash),
+        VMSTATE_UINT8_ARRAY(data, Flash, 16),
+        VMSTATE_UINT32(len, Flash),
+        VMSTATE_UINT32(pos, Flash),
+        VMSTATE_UINT8(needed_bytes, Flash),
+        VMSTATE_UINT8(cmd_in_progress, Flash),
+        VMSTATE_UINT64(cur_addr, Flash),
+        VMSTATE_BOOL(write_enable, Flash),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property m25p80_properties[] = {
+    DEFINE_PROP_STRING("partname", Flash, part_name),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void m25p80_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SSISlaveClass *k = SSI_SLAVE_CLASS(klass);
+
+    k->init = m25p80_init;
+    k->transfer = m25p80_transfer8;
+    k->set_cs = m25p80_cs;
+    k->cs_polarity = SSI_CS_LOW;
+    dc->props = m25p80_properties;
+    dc->vmsd = &vmstate_m25p80;
+}
+
+static const TypeInfo m25p80_info = {
+    .name           = "m25p80",
+    .parent         = TYPE_SSI_SLAVE,
+    .instance_size  = sizeof(Flash),
+    .class_init     = m25p80_class_init,
+};
+
+static void m25p80_register_types(void)
+{
+    type_register_static(&m25p80_info);
+}
+
+type_init(m25p80_register_types)
diff --git a/hw/max111x.c b/hw/max111x.c
index 706d89f..67640f1 100644
--- a/hw/max111x.c
+++ b/hw/max111x.c
@@ -99,10 +99,11 @@
 
 static const VMStateDescription vmstate_max111x = {
     .name = "max111x",
-    .version_id = 0,
-    .minimum_version_id = 0,
-    .minimum_version_id_old = 0,
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
     .fields      = (VMStateField[]) {
+        VMSTATE_SSI_SLAVE(ssidev, MAX111xState),
         VMSTATE_UINT8(tb1, MAX111xState),
         VMSTATE_UINT8(rb2, MAX111xState),
         VMSTATE_UINT8(rb3, MAX111xState),
diff --git a/hw/mc146818rtc.c b/hw/mc146818rtc.c
index d63554f..332a77d 100644
--- a/hw/mc146818rtc.c
+++ b/hw/mc146818rtc.c
@@ -399,6 +399,10 @@
             s->cmos_data[s->cmos_index] = data;
             check_update_timer(s);
             break;
+	case RTC_IBM_PS2_CENTURY_BYTE:
+            s->cmos_index = RTC_CENTURY;
+            /* fall through */
+        case RTC_CENTURY:
         case RTC_SECONDS:
         case RTC_MINUTES:
         case RTC_HOURS:
@@ -515,7 +519,9 @@
     tm->tm_wday = rtc_from_bcd(s, s->cmos_data[RTC_DAY_OF_WEEK]) - 1;
     tm->tm_mday = rtc_from_bcd(s, s->cmos_data[RTC_DAY_OF_MONTH]);
     tm->tm_mon = rtc_from_bcd(s, s->cmos_data[RTC_MONTH]) - 1;
-    tm->tm_year = rtc_from_bcd(s, s->cmos_data[RTC_YEAR]) + s->base_year - 1900;
+    tm->tm_year =
+        rtc_from_bcd(s, s->cmos_data[RTC_YEAR]) + s->base_year +
+        rtc_from_bcd(s, s->cmos_data[RTC_CENTURY]) * 100 - 1900;
 }
 
 static void rtc_set_time(RTCState *s)
@@ -548,10 +554,9 @@
     s->cmos_data[RTC_DAY_OF_WEEK] = rtc_to_bcd(s, tm->tm_wday + 1);
     s->cmos_data[RTC_DAY_OF_MONTH] = rtc_to_bcd(s, tm->tm_mday);
     s->cmos_data[RTC_MONTH] = rtc_to_bcd(s, tm->tm_mon + 1);
-    year = (tm->tm_year - s->base_year) % 100;
-    if (year < 0)
-        year += 100;
-    s->cmos_data[RTC_YEAR] = rtc_to_bcd(s, year);
+    year = tm->tm_year + 1900 - s->base_year;
+    s->cmos_data[RTC_YEAR] = rtc_to_bcd(s, year % 100);
+    s->cmos_data[RTC_CENTURY] = rtc_to_bcd(s, year / 100);
 }
 
 static void rtc_update_time(RTCState *s)
@@ -598,6 +603,10 @@
         return 0xff;
     } else {
         switch(s->cmos_index) {
+	case RTC_IBM_PS2_CENTURY_BYTE:
+            s->cmos_index = RTC_CENTURY;
+            /* fall through */
+        case RTC_CENTURY:
         case RTC_SECONDS:
         case RTC_MINUTES:
         case RTC_HOURS:
@@ -661,15 +670,10 @@
         s->cmos_data[addr] = val;
 }
 
-/* PC cmos mappings */
-#define REG_IBM_CENTURY_BYTE        0x32
-#define REG_IBM_PS2_CENTURY_BYTE    0x37
-
 static void rtc_set_date_from_host(ISADevice *dev)
 {
     RTCState *s = DO_UPCAST(RTCState, dev, dev);
     struct tm tm;
-    int val;
 
     qemu_get_timedate(&tm, 0);
 
@@ -679,10 +683,6 @@
 
     /* set the CMOS date */
     rtc_set_cmos(s, &tm);
-
-    val = rtc_to_bcd(s, (tm.tm_year / 100) + 19);
-    rtc_set_memory(dev, REG_IBM_CENTURY_BYTE, val);
-    rtc_set_memory(dev, REG_IBM_PS2_CENTURY_BYTE, val);
 }
 
 static int rtc_post_load(void *opaque, int version_id)
@@ -807,6 +807,18 @@
     s->cmos_data[RTC_REG_C] = 0x00;
     s->cmos_data[RTC_REG_D] = 0x80;
 
+    /* This is for historical reasons.  The default base year qdev property
+     * was set to 2000 for most machine types before the century byte was
+     * implemented.
+     *
+     * This if statement means that the century byte will be always 0
+     * (at least until 2079...) for base_year = 1980, but will be set
+     * correctly for base_year = 2000.
+     */
+    if (s->base_year == 2000) {
+        s->base_year = 0;
+    }
+
     rtc_set_date_from_host(dev);
 
 #ifdef TARGET_I386
diff --git a/hw/mc146818rtc_regs.h b/hw/mc146818rtc_regs.h
index fc10076..ccdee42 100644
--- a/hw/mc146818rtc_regs.h
+++ b/hw/mc146818rtc_regs.h
@@ -44,6 +44,10 @@
 #define RTC_REG_C               12
 #define RTC_REG_D               13
 
+/* PC cmos mappings */
+#define RTC_CENTURY              0x32
+#define RTC_IBM_PS2_CENTURY_BYTE 0x37
+
 #define REG_A_UIP 0x80
 
 #define REG_B_SET  0x80
diff --git a/hw/megasas.c b/hw/megasas.c
index c728aea..0e57740 100644
--- a/hw/megasas.c
+++ b/hw/megasas.c
@@ -652,7 +652,6 @@
         }
     }
     cmd->iov_size = 0;
-    return;
 }
 
 static int megasas_ctrl_get_info(MegasasState *s, MegasasCmd *cmd)
diff --git a/hw/microblaze/Makefile.objs b/hw/microblaze/Makefile.objs
index 274d2c5..3028e65 100644
--- a/hw/microblaze/Makefile.objs
+++ b/hw/microblaze/Makefile.objs
@@ -1,6 +1,7 @@
 obj-y = petalogix_s3adsp1800_mmu.o
 obj-y += petalogix_ml605_mmu.o
 obj-y += microblaze_boot.o
+obj-y += xilinx_spi.o
 
 obj-y += microblaze_pic_cpu.o
 obj-y += xilinx_ethlite.o
diff --git a/hw/mips_fulong2e.c b/hw/mips_fulong2e.c
index 38e4b86..d4a8672 100644
--- a/hw/mips_fulong2e.c
+++ b/hw/mips_fulong2e.c
@@ -392,7 +392,7 @@
     network_init();
 }
 
-QEMUMachine mips_fulong2e_machine = {
+static QEMUMachine mips_fulong2e_machine = {
     .name = "fulong2e",
     .desc = "Fulong 2e mini pc",
     .init = mips_fulong2e_init,
diff --git a/hw/mips_malta.c b/hw/mips_malta.c
index ad23f26..632b466 100644
--- a/hw/mips_malta.c
+++ b/hw/mips_malta.c
@@ -33,7 +33,6 @@
 #include "mips.h"
 #include "mips_cpudevs.h"
 #include "pci.h"
-#include "vmware_vga.h"
 #include "qemu-char.h"
 #include "sysemu.h"
 #include "arch_init.h"
@@ -48,7 +47,6 @@
 #include "blockdev.h"
 #include "exec-memory.h"
 #include "sysbus.h"             /* SysBusDevice */
-#include "vga-pci.h"
 
 //#define DEBUG_BOARD_INIT
 
@@ -987,13 +985,7 @@
     network_init();
 
     /* Optional PCI video card */
-    if (cirrus_vga_enabled) {
-        pci_cirrus_vga_init(pci_bus);
-    } else if (vmsvga_enabled) {
-        pci_vmsvga_init(pci_bus);
-    } else if (std_vga_enabled) {
-        pci_vga_init(pci_bus);
-    }
+    pci_vga_init(pci_bus);
 }
 
 static int mips_malta_sysbus_device_init(SysBusDevice *sysbusdev)
diff --git a/hw/msix.c b/hw/msix.c
index d812094..b623cb5 100644
--- a/hw/msix.c
+++ b/hw/msix.c
@@ -366,7 +366,6 @@
     g_free(dev->msix_entry_used);
     dev->msix_entry_used = NULL;
     dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
-    return;
 }
 
 void msix_uninit_exclusive_bar(PCIDevice *dev)
diff --git a/hw/nand.c b/hw/nand.c
index e9501ae..01f3ada 100644
--- a/hw/nand.c
+++ b/hw/nand.c
@@ -654,7 +654,7 @@
         sector = SECTOR(s->addr);
         off = (s->addr & PAGE_MASK) + s->offset;
         soff = SECTOR_OFFSET(s->addr);
-        if (bdrv_read(s->bdrv, sector, iobuf, PAGE_SECTORS) == -1) {
+        if (bdrv_read(s->bdrv, sector, iobuf, PAGE_SECTORS) < 0) {
             printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
             return;
         }
@@ -666,21 +666,23 @@
                             MIN(OOB_SIZE, off + s->iolen - PAGE_SIZE));
         }
 
-        if (bdrv_write(s->bdrv, sector, iobuf, PAGE_SECTORS) == -1)
+        if (bdrv_write(s->bdrv, sector, iobuf, PAGE_SECTORS) < 0) {
             printf("%s: write error in sector %" PRIu64 "\n", __func__, sector);
+        }
     } else {
         off = PAGE_START(s->addr) + (s->addr & PAGE_MASK) + s->offset;
         sector = off >> 9;
         soff = off & 0x1ff;
-        if (bdrv_read(s->bdrv, sector, iobuf, PAGE_SECTORS + 2) == -1) {
+        if (bdrv_read(s->bdrv, sector, iobuf, PAGE_SECTORS + 2) < 0) {
             printf("%s: read error in sector %" PRIu64 "\n", __func__, sector);
             return;
         }
 
         mem_and(iobuf + soff, s->io, s->iolen);
 
-        if (bdrv_write(s->bdrv, sector, iobuf, PAGE_SECTORS + 2) == -1)
+        if (bdrv_write(s->bdrv, sector, iobuf, PAGE_SECTORS + 2) < 0) {
             printf("%s: write error in sector %" PRIu64 "\n", __func__, sector);
+        }
     }
     s->offset = 0;
 }
@@ -704,31 +706,37 @@
         i = SECTOR(addr);
         page = SECTOR(addr + (ADDR_SHIFT + s->erase_shift));
         for (; i < page; i ++)
-            if (bdrv_write(s->bdrv, i, iobuf, 1) == -1)
+            if (bdrv_write(s->bdrv, i, iobuf, 1) < 0) {
                 printf("%s: write error in sector %" PRIu64 "\n", __func__, i);
+            }
     } else {
         addr = PAGE_START(addr);
         page = addr >> 9;
-        if (bdrv_read(s->bdrv, page, iobuf, 1) == -1)
+        if (bdrv_read(s->bdrv, page, iobuf, 1) < 0) {
             printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
+        }
         memset(iobuf + (addr & 0x1ff), 0xff, (~addr & 0x1ff) + 1);
-        if (bdrv_write(s->bdrv, page, iobuf, 1) == -1)
+        if (bdrv_write(s->bdrv, page, iobuf, 1) < 0) {
             printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
+        }
 
         memset(iobuf, 0xff, 0x200);
         i = (addr & ~0x1ff) + 0x200;
         for (addr += ((PAGE_SIZE + OOB_SIZE) << s->erase_shift) - 0x200;
                         i < addr; i += 0x200)
-            if (bdrv_write(s->bdrv, i >> 9, iobuf, 1) == -1)
+            if (bdrv_write(s->bdrv, i >> 9, iobuf, 1) < 0) {
                 printf("%s: write error in sector %" PRIu64 "\n",
                        __func__, i >> 9);
+            }
 
         page = i >> 9;
-        if (bdrv_read(s->bdrv, page, iobuf, 1) == -1)
+        if (bdrv_read(s->bdrv, page, iobuf, 1) < 0) {
             printf("%s: read error in sector %" PRIu64 "\n", __func__, page);
+        }
         memset(iobuf, 0xff, ((addr - 1) & 0x1ff) + 1);
-        if (bdrv_write(s->bdrv, page, iobuf, 1) == -1)
+        if (bdrv_write(s->bdrv, page, iobuf, 1) < 0) {
             printf("%s: write error in sector %" PRIu64 "\n", __func__, page);
+        }
     }
 }
 
@@ -740,18 +748,20 @@
 
     if (s->bdrv) {
         if (s->mem_oob) {
-            if (bdrv_read(s->bdrv, SECTOR(addr), s->io, PAGE_SECTORS) == -1)
+            if (bdrv_read(s->bdrv, SECTOR(addr), s->io, PAGE_SECTORS) < 0) {
                 printf("%s: read error in sector %" PRIu64 "\n",
                                 __func__, SECTOR(addr));
+            }
             memcpy(s->io + SECTOR_OFFSET(s->addr) + PAGE_SIZE,
                             s->storage + (PAGE(s->addr) << OOB_SHIFT),
                             OOB_SIZE);
             s->ioaddr = s->io + SECTOR_OFFSET(s->addr) + offset;
         } else {
             if (bdrv_read(s->bdrv, PAGE_START(addr) >> 9,
-                                    s->io, (PAGE_SECTORS + 2)) == -1)
+                                    s->io, (PAGE_SECTORS + 2)) < 0) {
                 printf("%s: read error in sector %" PRIu64 "\n",
                                 __func__, PAGE_START(addr) >> 9);
+            }
             s->ioaddr = s->io + (PAGE_START(addr) & 0x1ff) + offset;
         }
     } else {
diff --git a/hw/nseries.c b/hw/nseries.c
index 4df2670..6df71eb 100644
--- a/hw/nseries.c
+++ b/hw/nseries.c
@@ -189,6 +189,17 @@
     /* XXX: in theory should also update the OOB for both pages */
 }
 
+static qemu_irq n8x0_system_powerdown;
+
+static void n8x0_powerdown_req(Notifier *n, void *opaque)
+{
+    qemu_irq_raise(n8x0_system_powerdown);
+}
+
+static Notifier n8x0_system_powerdown_notifier = {
+    .notify = n8x0_powerdown_req
+};
+
 static void n8x0_i2c_setup(struct n800_s *s)
 {
     DeviceState *dev;
@@ -201,7 +212,8 @@
                           qdev_get_gpio_in(s->mpu->ih[0],
                                            OMAP_INT_24XX_SYS_NIRQ));
 
-    qemu_system_powerdown = qdev_get_gpio_in(dev, 3);
+    n8x0_system_powerdown = qdev_get_gpio_in(dev, 3);
+    qemu_register_powerdown_notifier(&n8x0_system_powerdown_notifier);
 
     /* Attach a TMP105 PM chip (A0 wired to ground) */
     dev = i2c_create_slave(i2c, "tmp105", N8X0_TMP105_ADDR);
diff --git a/hw/onenand.c b/hw/onenand.c
index db6af68..0f7b755 100644
--- a/hw/onenand.c
+++ b/hw/onenand.c
@@ -351,7 +351,7 @@
     for (; num > 0; num--, sec++) {
         if (s->bdrv_cur) {
             int erasesec = s->secs_cur + (sec >> 5);
-            if (bdrv_write(s->bdrv_cur, sec, blankbuf, 1)) {
+            if (bdrv_write(s->bdrv_cur, sec, blankbuf, 1) < 0) {
                 goto fail;
             }
             if (bdrv_read(s->bdrv_cur, erasesec, tmpbuf, 1) < 0) {
diff --git a/hw/pc.c b/hw/pc.c
index c32ee8e..0e417c9 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -27,7 +27,6 @@
 #include "fdc.h"
 #include "ide.h"
 #include "pci.h"
-#include "vmware_vga.h"
 #include "monitor.h"
 #include "fw_cfg.h"
 #include "hpet_emul.h"
@@ -51,10 +50,6 @@
 #include "exec-memory.h"
 #include "arch_init.h"
 #include "bitmap.h"
-#include "vga-pci.h"
-
-/* output Bochs bios info messages */
-//#define DEBUG_BIOS
 
 /* debug PC/ISA interrupts */
 //#define DEBUG_IRQ
@@ -534,17 +529,6 @@
     static int shutdown_index = 0;
 
     switch(addr) {
-        /* Bochs BIOS messages */
-    case 0x400:
-    case 0x401:
-        /* used to be panic, now unused */
-        break;
-    case 0x402:
-    case 0x403:
-#ifdef DEBUG_BIOS
-        fprintf(stderr, "%c", val);
-#endif
-        break;
     case 0x8900:
         /* same as Bochs power off */
         if (val == shutdown_str[shutdown_index]) {
@@ -558,16 +542,9 @@
         }
         break;
 
-        /* LGPL'ed VGA BIOS messages */
     case 0x501:
     case 0x502:
         exit((val << 1) | 1);
-    case 0x500:
-    case 0x503:
-#ifdef DEBUG_BIOS
-        fprintf(stderr, "%c", val);
-#endif
-        break;
     }
 }
 
@@ -596,17 +573,11 @@
     uint64_t *numa_fw_cfg;
     int i, j;
 
-    register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
-    register_ioport_write(0x401, 1, 2, bochs_bios_write, NULL);
-    register_ioport_write(0x402, 1, 1, bochs_bios_write, NULL);
-    register_ioport_write(0x403, 1, 1, bochs_bios_write, NULL);
     register_ioport_write(0x8900, 1, 1, bochs_bios_write, NULL);
 
     register_ioport_write(0x501, 1, 1, bochs_bios_write, NULL);
     register_ioport_write(0x501, 1, 2, bochs_bios_write, NULL);
     register_ioport_write(0x502, 1, 2, bochs_bios_write, NULL);
-    register_ioport_write(0x500, 1, 1, bochs_bios_write, NULL);
-    register_ioport_write(0x503, 1, 1, bochs_bios_write, NULL);
 
     fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
 
@@ -1019,34 +990,13 @@
 {
     DeviceState *dev = NULL;
 
-    if (cirrus_vga_enabled) {
-        if (pci_bus) {
-            dev = pci_cirrus_vga_init(pci_bus);
-        } else {
-            dev = &isa_create_simple(isa_bus, "isa-cirrus-vga")->qdev;
-        }
-    } else if (vmsvga_enabled) {
-        if (pci_bus) {
-            dev = pci_vmsvga_init(pci_bus);
-        } else {
-            fprintf(stderr, "%s: vmware_vga: no PCI bus\n", __FUNCTION__);
-        }
-#ifdef CONFIG_SPICE
-    } else if (qxl_enabled) {
-        if (pci_bus) {
-            dev = &pci_create_simple(pci_bus, -1, "qxl-vga")->qdev;
-        } else {
-            fprintf(stderr, "%s: qxl: no PCI bus\n", __FUNCTION__);
-        }
-#endif
-    } else if (std_vga_enabled) {
-        if (pci_bus) {
-            dev = pci_vga_init(pci_bus);
-        } else {
-            dev = isa_vga_init(isa_bus);
-        }
+    if (pci_bus) {
+        PCIDevice *pcidev = pci_vga_init(pci_bus);
+        dev = pcidev ? &pcidev->qdev : NULL;
+    } else if (isa_bus) {
+        ISADevice *isadev = isa_vga_init(isa_bus);
+        dev = isadev ? &isadev->qdev : NULL;
     }
-
     return dev;
 }
 
diff --git a/hw/pc.h b/hw/pc.h
index e4db071..9923d96 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -176,19 +176,6 @@
 
 extern enum vga_retrace_method vga_retrace_method;
 
-static inline DeviceState *isa_vga_init(ISABus *bus)
-{
-    ISADevice *dev;
-
-    dev = isa_try_create(bus, "isa-vga");
-    if (!dev) {
-        fprintf(stderr, "Warning: isa-vga not available\n");
-        return NULL;
-    }
-    qdev_init_nofail(&dev->qdev);
-    return &dev->qdev;
-}
-
 int isa_vga_mm_init(target_phys_addr_t vram_base,
                     target_phys_addr_t ctrl_base, int it_shift,
                     MemoryRegion *address_space);
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index a771d79..1fc20cd 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -359,8 +359,8 @@
 #define KVM_MACHINE_OPTIONS ""
 #endif
 
-static QEMUMachine pc_machine_v1_2 = {
-    .name = "pc-1.2",
+static QEMUMachine pc_machine_v1_3 = {
+    .name = "pc-1.3",
     .alias = "pc",
     .desc = "Standard PC",
     .init = pc_init_pci,
@@ -369,7 +369,34 @@
     .default_machine_opts = KVM_MACHINE_OPTIONS,
 };
 
+#define PC_COMPAT_1_2 \
+        {\
+            .driver   = "nec-usb-xhci",\
+            .property = "msi",\
+            .value    = "off",\
+        },{\
+            .driver   = "nec-usb-xhci",\
+            .property = "msix",\
+            .value    = "off",\
+        },{\
+            .driver   = "ivshmem",\
+            .property = "use64",\
+            .value    = "0",\
+        }
+
+static QEMUMachine pc_machine_v1_2 = {
+    .name = "pc-1.2",
+    .desc = "Standard PC",
+    .init = pc_init_pci,
+    .max_cpus = 255,
+    .compat_props = (GlobalProperty[]) {
+        PC_COMPAT_1_2,
+        { /* end of list */ }
+    },
+};
+
 #define PC_COMPAT_1_1 \
+        PC_COMPAT_1_2,\
         {\
             .driver   = "virtio-scsi-pci",\
             .property = "hotplug",\
@@ -675,6 +702,7 @@
 
 static void pc_machine_init(void)
 {
+    qemu_register_machine(&pc_machine_v1_3);
     qemu_register_machine(&pc_machine_v1_2);
     qemu_register_machine(&pc_machine_v1_1);
     qemu_register_machine(&pc_machine_v1_0);
diff --git a/hw/pci.c b/hw/pci.c
index f855cf3..2ca6ff6 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -1474,6 +1474,24 @@
     return res;
 }
 
+PCIDevice *pci_vga_init(PCIBus *bus)
+{
+    switch (vga_interface_type) {
+    case VGA_CIRRUS:
+        return pci_create_simple(bus, -1, "cirrus-vga");
+    case VGA_QXL:
+        return pci_create_simple(bus, -1, "qxl-vga");
+    case VGA_STD:
+        return pci_create_simple(bus, -1, "VGA");
+    case VGA_VMWARE:
+        return pci_create_simple(bus, -1, "vmware-svga");
+    case VGA_NONE:
+    default: /* Other non-PCI types. Checking for unsupported types is already
+                done in vl.c. */
+        return NULL;
+    }
+}
+
 /* Whether a given bus number is in range of the secondary
  * bus of the given bridge device. */
 static bool pci_secondary_bus_in_range(PCIDevice *dev, int bus_num)
@@ -1962,7 +1980,7 @@
                    PCI_SLOT(d->devfn));
     if (PCI_FUNC(d->devfn))
         snprintf(path + off, sizeof(path) + off, ",%x", PCI_FUNC(d->devfn));
-    return strdup(path);
+    return g_strdup(path);
 }
 
 static char *pcibus_get_dev_path(DeviceState *dev)
diff --git a/hw/pci.h b/hw/pci.h
index 4b6ab3d..d50d26c 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -334,6 +334,9 @@
                         const char *default_devaddr);
 PCIDevice *pci_nic_init_nofail(NICInfo *nd, const char *default_model,
                                const char *default_devaddr);
+
+PCIDevice *pci_vga_init(PCIBus *bus);
+
 int pci_bus_num(PCIBus *s);
 void pci_for_each_device(PCIBus *bus, int bus_num,
                          void (*fn)(PCIBus *bus, PCIDevice *d, void *opaque),
diff --git a/hw/petalogix_ml605_mmu.c b/hw/petalogix_ml605_mmu.c
index dced648..b9bfbed 100644
--- a/hw/petalogix_ml605_mmu.c
+++ b/hw/petalogix_ml605_mmu.c
@@ -36,6 +36,7 @@
 #include "blockdev.h"
 #include "pc.h"
 #include "exec-memory.h"
+#include "ssi.h"
 
 #include "microblaze_boot.h"
 #include "microblaze_pic_cpu.h"
@@ -47,6 +48,8 @@
 
 #define BINARY_DEVICE_TREE_FILE "petalogix-ml605.dtb"
 
+#define NUM_SPI_FLASHES 4
+
 #define MEMORY_BASEADDR 0x50000000
 #define FLASH_BASEADDR 0x86000000
 #define INTC_BASEADDR 0x81800000
@@ -79,6 +82,7 @@
     MemoryRegion *address_space_mem = get_system_memory();
     DeviceState *dev, *dma, *eth0;
     MicroBlazeCPU *cpu;
+    SysBusDevice *busdev;
     CPUMBState *env;
     DriveInfo *dinfo;
     int i;
@@ -139,6 +143,29 @@
     xilinx_axiethernetdma_init(dma, STREAM_SLAVE(eth0),
                                0x84600000, irq[1], irq[0], 100 * 1000000);
 
+    {
+        SSIBus *spi;
+
+        dev = qdev_create(NULL, "xlnx.xps-spi");
+        qdev_prop_set_uint8(dev, "num-ss-bits", NUM_SPI_FLASHES);
+        qdev_init_nofail(dev);
+        busdev = sysbus_from_qdev(dev);
+        sysbus_mmio_map(busdev, 0, 0x40a00000);
+        sysbus_connect_irq(busdev, 0, irq[4]);
+
+        spi = (SSIBus *)qdev_get_child_bus(dev, "spi");
+
+        for (i = 0; i < NUM_SPI_FLASHES; i++) {
+            qemu_irq cs_line;
+
+            dev = ssi_create_slave_no_init(spi, "m25p80");
+            qdev_prop_set_string(dev, "partname", "n25q128");
+            qdev_init_nofail(dev);
+            cs_line = qdev_get_gpio_in(dev, 0);
+            sysbus_connect_irq(busdev, i+1, cs_line);
+        }
+    }
+
     microblaze_load_kernel(cpu, ddr_base, ram_size, BINARY_DEVICE_TREE_FILE,
                                                             machine_cpu_reset);
 
diff --git a/hw/pflash_cfi01.c b/hw/pflash_cfi01.c
index d1c7423..3b437da 100644
--- a/hw/pflash_cfi01.c
+++ b/hw/pflash_cfi01.c
@@ -41,6 +41,7 @@
 #include "block.h"
 #include "qemu-timer.h"
 #include "exec-memory.h"
+#include "host-utils.h"
 
 #define PFLASH_BUG(fmt, ...) \
 do { \
@@ -320,7 +321,7 @@
         }
         pfl->wcycle++;
         pfl->cmd = cmd;
-        return;
+        break;
     case 1:
         switch (pfl->cmd) {
         case 0x10: /* Single Byte Program */
@@ -375,7 +376,7 @@
         default:
             goto error_flash;
         }
-        return;
+        break;
     case 2:
         switch (pfl->cmd) {
         case 0xe8: /* Block write */
@@ -406,7 +407,7 @@
         default:
             goto error_flash;
         }
-        return;
+        break;
     case 3: /* Confirm mode */
         switch (pfl->cmd) {
         case 0xe8: /* Block write */
@@ -422,7 +423,7 @@
         default:
             goto error_flash;
         }
-        return;
+        break;
     default:
         /* Should never happen */
         DPRINTF("%s: invalid write state\n",  __func__);
@@ -441,7 +442,6 @@
     pfl->bypass = 0;
     pfl->wcycle = 0;
     pfl->cmd = 0;
-    return;
 }
 
 
@@ -543,42 +543,6 @@
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-/* Count trailing zeroes of a 32 bits quantity */
-static int ctz32 (uint32_t n)
-{
-    int ret;
-
-    ret = 0;
-    if (!(n & 0xFFFF)) {
-        ret += 16;
-        n = n >> 16;
-    }
-    if (!(n & 0xFF)) {
-        ret += 8;
-        n = n >> 8;
-    }
-    if (!(n & 0xF)) {
-        ret += 4;
-        n = n >> 4;
-    }
-    if (!(n & 0x3)) {
-        ret += 2;
-        n = n >> 2;
-    }
-    if (!(n & 0x1)) {
-        ret++;
-#if 0 /* This is not necessary as n is never 0 */
-        n = n >> 1;
-#endif
-    }
-#if 0 /* This is not necessary as n is never 0 */
-    if (!n)
-        ret++;
-#endif
-
-    return ret;
-}
-
 pflash_t *pflash_cfi01_register(target_phys_addr_t base,
                                 DeviceState *qdev, const char *name,
                                 target_phys_addr_t size,
@@ -711,7 +675,7 @@
     pfl->cfi_table[0x33] = 'I';
 
     pfl->cfi_table[0x34] = '1';
-    pfl->cfi_table[0x35] = '1';
+    pfl->cfi_table[0x35] = '0';
 
     pfl->cfi_table[0x36] = 0x00;
     pfl->cfi_table[0x37] = 0x00;
@@ -723,6 +687,8 @@
     pfl->cfi_table[0x3b] = 0x00;
     pfl->cfi_table[0x3c] = 0x00;
 
+    pfl->cfi_table[0x3f] = 0x01; /* Number of protection fields */
+
     return pfl;
 }
 
diff --git a/hw/pflash_cfi02.c b/hw/pflash_cfi02.c
index 3e2002e..39337ec 100644
--- a/hw/pflash_cfi02.c
+++ b/hw/pflash_cfi02.c
@@ -40,6 +40,7 @@
 #include "qemu-timer.h"
 #include "block.h"
 #include "exec-memory.h"
+#include "host-utils.h"
 
 //#define PFLASH_DEBUG
 #ifdef PFLASH_DEBUG
@@ -473,7 +474,6 @@
  do_bypass:
     pfl->wcycle = 2;
     pfl->cmd = 0;
-    return;
 }
 
 
@@ -575,42 +575,6 @@
     .endianness = DEVICE_NATIVE_ENDIAN,
 };
 
-/* Count trailing zeroes of a 32 bits quantity */
-static int ctz32 (uint32_t n)
-{
-    int ret;
-
-    ret = 0;
-    if (!(n & 0xFFFF)) {
-        ret += 16;
-        n = n >> 16;
-    }
-    if (!(n & 0xFF)) {
-        ret += 8;
-        n = n >> 8;
-    }
-    if (!(n & 0xF)) {
-        ret += 4;
-        n = n >> 4;
-    }
-    if (!(n & 0x3)) {
-        ret += 2;
-        n = n >> 2;
-    }
-    if (!(n & 0x1)) {
-        ret++;
-#if 0 /* This is not necessary as n is never 0 */
-        n = n >> 1;
-#endif
-    }
-#if 0 /* This is not necessary as n is never 0 */
-    if (!n)
-        ret++;
-#endif
-
-    return ret;
-}
-
 pflash_t *pflash_cfi02_register(target_phys_addr_t base,
                                 DeviceState *qdev, const char *name,
                                 target_phys_addr_t size,
diff --git a/hw/pl190.c b/hw/pl190.c
index cb50afb..7332f4d 100644
--- a/hw/pl190.c
+++ b/hw/pl190.c
@@ -117,12 +117,18 @@
         return s->protected;
     case 12: /* VECTADDR */
         /* Read vector address at the start of an ISR.  Increases the
-           current priority level to that of the current interrupt.  */
-        for (i = 0; i < s->priority; i++)
-          {
-            if ((s->level | s->soft_level) & s->prio_mask[i])
-              break;
-          }
+         * current priority level to that of the current interrupt.
+         *
+         * Since an enabled interrupt X at priority P causes prio_mask[Y]
+         * to have bit X set for all Y > P, this loop will stop with
+         * i == the priority of the highest priority set interrupt.
+         */
+        for (i = 0; i < s->priority; i++) {
+            if ((s->level | s->soft_level) & s->prio_mask[i + 1]) {
+                break;
+            }
+        }
+
         /* Reading this value with no pending interrupts is undefined.
            We return the default address.  */
         if (i == PL190_NUM_PRIO)
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index 6f0de6d..d23f9b2 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -36,7 +36,7 @@
 
 #define BINARY_DEVICE_TREE_FILE    "mpc8544ds.dtb"
 #define UIMAGE_LOAD_BASE           0
-#define DTC_LOAD_PAD               0x500000
+#define DTC_LOAD_PAD               0x1800000
 #define DTC_PAD_MASK               0xFFFFF
 #define INITRD_LOAD_PAD            0x2000000
 #define INITRD_PAD_MASK            0xFFFFFF
@@ -139,12 +139,10 @@
             0x0, 0x10000,
         };
     QemuOpts *machine_opts;
-    const char *dumpdtb = NULL;
     const char *dtb_file = NULL;
 
     machine_opts = qemu_opts_find(qemu_find_opts("machine"), 0);
     if (machine_opts) {
-        dumpdtb = qemu_opt_get(machine_opts, "dumpdtb");
         dtb_file = qemu_opt_get(machine_opts, "dtb");
         toplevel_compat = qemu_opt_get(machine_opts, "dt_compatible");
     }
@@ -334,18 +332,7 @@
     }
 
 done:
-    if (dumpdtb) {
-        /* Dump the dtb to a file and quit */
-        FILE *f = fopen(dumpdtb, "wb");
-        size_t len;
-        len = fwrite(fdt, fdt_size, 1, f);
-        fclose(f);
-        if (len != fdt_size) {
-            exit(1);
-        }
-        exit(0);
-    }
-
+    qemu_devtree_dumpdtb(fdt, fdt_size);
     ret = rom_add_blob_fixed(BINARY_DEVICE_TREE_FILE, fdt, fdt_size, addr);
     if (ret < 0) {
         goto out;
@@ -375,6 +362,10 @@
        the device tree top */
     dt_end = bi->dt_base + bi->dt_size;
     ps = booke206_page_size_to_tlb(dt_end) + 1;
+    if (ps & 1) {
+        /* e500v2 can only do even TLB size bits */
+        ps++;
+    }
     size = (ps << MAS1_TSIZE_SHIFT);
     tlb->mas1 = MAS1_VALID | size;
     tlb->mas2 = 0;
@@ -553,7 +544,8 @@
 
     /* Load initrd. */
     if (params->initrd_filename) {
-        initrd_base = (kernel_size + INITRD_LOAD_PAD) & ~INITRD_PAD_MASK;
+        initrd_base = (loadaddr + kernel_size + INITRD_LOAD_PAD) &
+            ~INITRD_PAD_MASK;
         initrd_size = load_image_targphys(params->initrd_filename, initrd_base,
                                           ram_size - initrd_base);
 
diff --git a/hw/ppc405_uc.c b/hw/ppc405_uc.c
index 89e5013..b52ab2f 100644
--- a/hw/ppc405_uc.c
+++ b/hw/ppc405_uc.c
@@ -191,7 +191,8 @@
 typedef struct ppc4xx_pob_t ppc4xx_pob_t;
 struct ppc4xx_pob_t {
     uint32_t bear;
-    uint32_t besr[2];
+    uint32_t besr0;
+    uint32_t besr1;
 };
 
 static uint32_t dcr_read_pob (void *opaque, int dcrn)
@@ -205,8 +206,10 @@
         ret = pob->bear;
         break;
     case POB0_BESR0:
+        ret = pob->besr0;
+        break;
     case POB0_BESR1:
-        ret = pob->besr[dcrn - POB0_BESR0];
+        ret = pob->besr1;
         break;
     default:
         /* Avoid gcc warning */
@@ -227,9 +230,12 @@
         /* Read only */
         break;
     case POB0_BESR0:
+        /* Write-clear */
+        pob->besr0 &= ~val;
+        break;
     case POB0_BESR1:
         /* Write-clear */
-        pob->besr[dcrn - POB0_BESR0] &= ~val;
+        pob->besr1 &= ~val;
         break;
     }
 }
@@ -241,8 +247,8 @@
     pob = opaque;
     /* No error */
     pob->bear = 0x00000000;
-    pob->besr[0] = 0x0000000;
-    pob->besr[1] = 0x0000000;
+    pob->besr0 = 0x0000000;
+    pob->besr1 = 0x0000000;
 }
 
 static void ppc4xx_pob_init(CPUPPCState *env)
diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index e95cfe8..b8d3c9c 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -67,7 +67,6 @@
 #include "hw/usb.h"
 #include "blockdev.h"
 #include "exec-memory.h"
-#include "vga-pci.h"
 
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
diff --git a/hw/ppc_oldworld.c b/hw/ppc_oldworld.c
index 1dcd8a6..2c4a478 100644
--- a/hw/ppc_oldworld.c
+++ b/hw/ppc_oldworld.c
@@ -43,7 +43,6 @@
 #include "kvm_ppc.h"
 #include "blockdev.h"
 #include "exec-memory.h"
-#include "vga-pci.h"
 
 #define MAX_IDE_BUS 2
 #define CFG_ADDR 0xf0000510
diff --git a/hw/ppc_prep.c b/hw/ppc_prep.c
index 592b7b2..1544430 100644
--- a/hw/ppc_prep.c
+++ b/hw/ppc_prep.c
@@ -39,7 +39,6 @@
 #include "blockdev.h"
 #include "arch_init.h"
 #include "exec-memory.h"
-#include "vga-pci.h"
 
 //#define HARD_DEBUG_PPC_IO
 //#define DEBUG_PPC_IO
diff --git a/hw/pxa2xx_keypad.c b/hw/pxa2xx_keypad.c
index 59db025..1a997c9 100644
--- a/hw/pxa2xx_keypad.c
+++ b/hw/pxa2xx_keypad.c
@@ -172,7 +172,6 @@
         kp->kpc |= KPC_MI;
         qemu_irq_raise(kp->irq);
     }
-    return;
 }
 
 static uint64_t pxa2xx_keypad_read(void *opaque, target_phys_addr_t offset,
diff --git a/hw/qdev.c b/hw/qdev.c
index b5a52ac..9b9aba3 100644
--- a/hw/qdev.c
+++ b/hw/qdev.c
@@ -34,7 +34,6 @@
 static bool qdev_hot_added = false;
 static bool qdev_hot_removed = false;
 
-/* Register a new device type.  */
 const VMStateDescription *qdev_get_vmsd(DeviceState *dev)
 {
     DeviceClass *dc = DEVICE_GET_CLASS(dev);
@@ -52,11 +51,6 @@
     return object_get_typename(OBJECT(dev));
 }
 
-bool qdev_exists(const char *name)
-{
-    return !!object_class_by_name(name);
-}
-
 static void qdev_property_add_legacy(DeviceState *dev, Property *prop,
                                      Error **errp);
 
@@ -291,9 +285,9 @@
 
 void qdev_init_gpio_in(DeviceState *dev, qemu_irq_handler handler, int n)
 {
-    assert(dev->num_gpio_in == 0);
-    dev->num_gpio_in = n;
-    dev->gpio_in = qemu_allocate_irqs(handler, dev, n);
+    dev->gpio_in = qemu_extend_irqs(dev->gpio_in, dev->num_gpio_in, handler,
+                                        dev, n);
+    dev->num_gpio_in += n;
 }
 
 void qdev_init_gpio_out(DeviceState *dev, qemu_irq *pins, int n)
@@ -520,7 +514,7 @@
 
     path[l-1] = '\0';
 
-    return strdup(path);
+    return g_strdup(path);
 }
 
 char *qdev_get_dev_path(DeviceState *dev)
diff --git a/hw/qdev.h b/hw/qdev.h
index d699194..c6ac636 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -153,7 +153,6 @@
 
 DeviceState *qdev_create(BusState *bus, const char *name);
 DeviceState *qdev_try_create(BusState *bus, const char *name);
-bool qdev_exists(const char *name);
 int qdev_device_help(QemuOpts *opts);
 DeviceState *qdev_device_add(QemuOpts *opts);
 int qdev_init(DeviceState *dev) QEMU_WARN_UNUSED_RESULT;
diff --git a/hw/qxl.c b/hw/qxl.c
index 5b3f484..33169f3 100644
--- a/hw/qxl.c
+++ b/hw/qxl.c
@@ -18,6 +18,8 @@
  * along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <zlib.h>
+
 #include "qemu-common.h"
 #include "qemu-timer.h"
 #include "qemu-queue.h"
@@ -141,6 +143,7 @@
 
 void qxl_set_guest_bug(PCIQXLDevice *qxl, const char *msg, ...)
 {
+    trace_qxl_set_guest_bug(qxl->id);
     qxl_send_events(qxl, QXL_INTERRUPT_ERROR);
     qxl->guest_bug = 1;
     if (qxl->guestdebug) {
@@ -201,6 +204,7 @@
         spice_qxl_destroy_surface_async(&qxl->ssd.qxl, id, (uintptr_t)cookie);
     } else {
         qxl->ssd.worker->destroy_surface_wait(qxl->ssd.worker, id);
+        qxl_spice_destroy_surface_wait_complete(qxl, id);
     }
 }
 
@@ -597,9 +601,9 @@
     case QXL_MODE_VGA:
         ret = false;
         qemu_mutex_lock(&qxl->ssd.lock);
-        if (qxl->ssd.update != NULL) {
-            update = qxl->ssd.update;
-            qxl->ssd.update = NULL;
+        update = QTAILQ_FIRST(&qxl->ssd.updates);
+        if (update != NULL) {
+            QTAILQ_REMOVE(&qxl->ssd.updates, update, next);
             *ext = update->ext;
             ret = true;
         }
@@ -953,6 +957,11 @@
 {
     PCIQXLDevice *qxl = container_of(sin, PCIQXLDevice, ssd.qxl);
 
+    if (runstate_check(RUN_STATE_INMIGRATE) ||
+        runstate_check(RUN_STATE_POSTMIGRATE)) {
+        return;
+    }
+
     qxl->shadow_rom.client_present = client_present;
     memcpy(qxl->shadow_rom.client_capabilities, caps, sizeof(caps));
     qxl->rom->client_present = client_present;
@@ -964,6 +973,79 @@
 
 #endif
 
+#if defined(CONFIG_QXL_CLIENT_MONITORS_CONFIG) \
+    && SPICE_SERVER_VERSION >= 0x000b05
+
+static uint32_t qxl_crc32(const uint8_t *p, unsigned len)
+{
+    /*
+     * zlib xors the seed with 0xffffffff, and xors the result
+     * again with 0xffffffff; Both are not done with linux's crc32,
+     * which we want to be compatible with, so undo that.
+     */
+    return crc32(0xffffffff, p, len) ^ 0xffffffff;
+}
+
+/* called from main context only */
+static int interface_client_monitors_config(QXLInstance *sin,
+                                        VDAgentMonitorsConfig *monitors_config)
+{
+    PCIQXLDevice *qxl = container_of(sin, PCIQXLDevice, ssd.qxl);
+    QXLRom *rom = memory_region_get_ram_ptr(&qxl->rom_bar);
+    int i;
+
+    /*
+     * Older windows drivers set int_mask to 0 when their ISR is called,
+     * then later set it to ~0. So it doesn't relate to the actual interrupts
+     * handled. However, they are old, so clearly they don't support this
+     * interrupt
+     */
+    if (qxl->ram->int_mask == 0 || qxl->ram->int_mask == ~0 ||
+        !(qxl->ram->int_mask & QXL_INTERRUPT_CLIENT_MONITORS_CONFIG)) {
+        trace_qxl_client_monitors_config_unsupported_by_guest(qxl->id,
+                                                            qxl->ram->int_mask,
+                                                            monitors_config);
+        return 0;
+    }
+    if (!monitors_config) {
+        return 1;
+    }
+    memset(&rom->client_monitors_config, 0,
+           sizeof(rom->client_monitors_config));
+    rom->client_monitors_config.count = monitors_config->num_of_monitors;
+    /* monitors_config->flags ignored */
+    if (rom->client_monitors_config.count >=
+            ARRAY_SIZE(rom->client_monitors_config.heads)) {
+        trace_qxl_client_monitors_config_capped(qxl->id,
+                                monitors_config->num_of_monitors,
+                                ARRAY_SIZE(rom->client_monitors_config.heads));
+        rom->client_monitors_config.count =
+            ARRAY_SIZE(rom->client_monitors_config.heads);
+    }
+    for (i = 0 ; i < rom->client_monitors_config.count ; ++i) {
+        VDAgentMonConfig *monitor = &monitors_config->monitors[i];
+        QXLURect *rect = &rom->client_monitors_config.heads[i];
+        /* monitor->depth ignored */
+        rect->left = monitor->x;
+        rect->top = monitor->y;
+        rect->right = monitor->x + monitor->width;
+        rect->bottom = monitor->y + monitor->height;
+    }
+    rom->client_monitors_config_crc = qxl_crc32(
+            (const uint8_t *)&rom->client_monitors_config,
+            sizeof(rom->client_monitors_config));
+    trace_qxl_client_monitors_config_crc(qxl->id,
+            sizeof(rom->client_monitors_config),
+            rom->client_monitors_config_crc);
+
+    trace_qxl_interrupt_client_monitors_config(qxl->id,
+                        rom->client_monitors_config.count,
+                        rom->client_monitors_config.heads);
+    qxl_send_events(qxl, QXL_INTERRUPT_CLIENT_MONITORS_CONFIG);
+    return 1;
+}
+#endif
+
 static const QXLInterface qxl_interface = {
     .base.type               = SPICE_INTERFACE_QXL,
     .base.description        = "qxl gpu",
@@ -988,6 +1070,10 @@
 #if SPICE_SERVER_VERSION >= 0x000b04
     .set_client_capabilities = interface_set_client_capabilities,
 #endif
+#if SPICE_SERVER_VERSION >= 0x000b05 && \
+    defined(CONFIG_QXL_CLIENT_MONITORS_CONFIG)
+    .client_monitors_config = interface_client_monitors_config,
+#endif
 };
 
 static void qxl_enter_vga_mode(PCIQXLDevice *d)
@@ -1402,7 +1488,7 @@
             break;
         }
         trace_qxl_io_unexpected_vga_mode(d->id,
-            io_port, io_port_to_string(io_port));
+            addr, val, io_port_to_string(io_port));
         /* be nice to buggy guest drivers */
         if (io_port >= QXL_IO_UPDATE_AREA_ASYNC &&
             io_port < QXL_IO_RANGE_SIZE) {
@@ -1470,6 +1556,13 @@
             return;
         }
 
+        if (update.left < 0 || update.top < 0 || update.left >= update.right ||
+            update.top >= update.bottom) {
+            qxl_set_guest_bug(d, "QXL_IO_UPDATE_AREA: "
+                              "invalid area(%d,%d,%d,%d)\n", update.left,
+                              update.right, update.top, update.bottom);
+            break;
+        }
         if (async == QXL_ASYNC) {
             cookie = qxl_cookie_new(QXL_COOKIE_TYPE_IO,
                                     QXL_IO_UPDATE_AREA_ASYNC);
@@ -1501,6 +1594,7 @@
         qxl_set_mode(d, val, 0);
         break;
     case QXL_IO_LOG:
+        trace_qxl_io_log(d->id, d->ram->log_buf);
         if (d->guestdebug) {
             fprintf(stderr, "qxl/guest-%d: %" PRId64 ": %s", d->id,
                     qemu_get_clock_ns(vm_clock), d->ram->log_buf);
@@ -1594,9 +1688,9 @@
 static uint64_t ioport_read(void *opaque, target_phys_addr_t addr,
                             unsigned size)
 {
-    PCIQXLDevice *d = opaque;
+    PCIQXLDevice *qxl = opaque;
 
-    trace_qxl_io_read_unexpected(d->id);
+    trace_qxl_io_read_unexpected(qxl->id);
     return 0xff;
 }
 
@@ -1626,6 +1720,7 @@
     uint32_t old_pending;
     uint32_t le_events = cpu_to_le32(events);
 
+    trace_qxl_send_events(d->id, events);
     assert(qemu_spice_display_is_running(&d->ssd));
     old_pending = __sync_fetch_and_or(&d->ram->int_pending, le_events);
     if ((old_pending & le_events) == le_events) {
@@ -1910,6 +2005,7 @@
     if (qxl->id == 0) {
         vga_dirty_log_start(&qxl->vga);
     }
+    memory_region_set_flush_coalesced(&qxl->io_bar);
 
 
     pci_register_bar(&qxl->pci, QXL_IO_RANGE_INDEX,
diff --git a/hw/r2d.c b/hw/r2d.c
index 0f16e81..1bc191f 100644
--- a/hw/r2d.c
+++ b/hw/r2d.c
@@ -332,6 +332,8 @@
     }
 
     if (kernel_cmdline) {
+        /* I see no evidence that this .kernel_cmdline buffer requires
+           NUL-termination, so using strncpy should be ok. */
         strncpy(boot_params.kernel_cmdline, kernel_cmdline,
                 sizeof(boot_params.kernel_cmdline));
     }
diff --git a/hw/rtl8139.c b/hw/rtl8139.c
index 844f1b8..b7c82ee 100644
--- a/hw/rtl8139.c
+++ b/hw/rtl8139.c
@@ -774,11 +774,7 @@
 #define MIN_BUF_SIZE 60
 static inline dma_addr_t rtl8139_addr64(uint32_t low, uint32_t high)
 {
-#if TARGET_PHYS_ADDR_BITS > 32
-    return low | ((target_phys_addr_t)high << 32);
-#else
-    return low;
-#endif
+    return low | ((uint64_t)high << 32);
 }
 
 /* Workaround for buggy guest driver such as linux who allocates rx
diff --git a/hw/sb16.c b/hw/sb16.c
index c81455d..523ab0d 100644
--- a/hw/sb16.c
+++ b/hw/sb16.c
@@ -822,7 +822,6 @@
 
     ldebug ("\n");
     s->cmd = -1;
-    return;
 }
 
 static void legacy_reset (SB16State *s)
diff --git a/hw/scsi-bus.c b/hw/scsi-bus.c
index 4981a02..dfb2631 100644
--- a/hw/scsi-bus.c
+++ b/hw/scsi-bus.c
@@ -801,26 +801,39 @@
     return xfer * unit;
 }
 
-static int scsi_req_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
+uint32_t scsi_data_cdb_length(uint8_t *buf)
+{
+    if ((buf[0] >> 5) == 0 && buf[4] == 0) {
+        return 256;
+    } else {
+        return scsi_cdb_length(buf);
+    }
+}
+
+uint32_t scsi_cdb_length(uint8_t *buf)
 {
     switch (buf[0] >> 5) {
     case 0:
-        cmd->xfer = buf[4];
+        return buf[4];
         break;
     case 1:
     case 2:
-        cmd->xfer = lduw_be_p(&buf[7]);
+        return lduw_be_p(&buf[7]);
         break;
     case 4:
-        cmd->xfer = ldl_be_p(&buf[10]) & 0xffffffffULL;
+        return ldl_be_p(&buf[10]) & 0xffffffffULL;
         break;
     case 5:
-        cmd->xfer = ldl_be_p(&buf[6]) & 0xffffffffULL;
+        return ldl_be_p(&buf[6]) & 0xffffffffULL;
         break;
     default:
         return -1;
     }
+}
 
+static int scsi_req_length(SCSICommand *cmd, SCSIDevice *dev, uint8_t *buf)
+{
+    cmd->xfer = scsi_cdb_length(buf);
     switch (buf[0]) {
     case TEST_UNIT_READY:
     case REWIND:
@@ -1710,12 +1723,8 @@
 static char *scsibus_get_fw_dev_path(DeviceState *dev)
 {
     SCSIDevice *d = SCSI_DEVICE(dev);
-    char path[100];
-
-    snprintf(path, sizeof(path), "channel@%x/%s@%x,%x", d->channel,
-             qdev_fw_name(dev), d->id, d->lun);
-
-    return strdup(path);
+    return g_strdup_printf("channel@%x/%s@%x,%x", d->channel,
+                           qdev_fw_name(dev), d->id, d->lun);
 }
 
 SCSIDevice *scsi_device_find(SCSIBus *bus, int channel, int id, int lun)
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 1585683..1b0afa6 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -386,23 +386,11 @@
  */
 static int scsi_handle_rw_error(SCSIDiskReq *r, int error)
 {
-    int is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
+    bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
-    BlockErrorAction action = bdrv_get_on_error(s->qdev.conf.bs, is_read);
+    BlockErrorAction action = bdrv_get_error_action(s->qdev.conf.bs, is_read, error);
 
-    if (action == BLOCK_ERR_IGNORE) {
-        bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read);
-        return 0;
-    }
-
-    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
-            || action == BLOCK_ERR_STOP_ANY) {
-
-        bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read);
-        vm_stop(RUN_STATE_IO_ERROR);
-        bdrv_iostatus_set_err(s->qdev.conf.bs, error);
-        scsi_req_retry(&r->req);
-    } else {
+    if (action == BDRV_ACTION_REPORT) {
         switch (error) {
         case ENOMEDIUM:
             scsi_check_condition(r, SENSE_CODE(NO_MEDIUM));
@@ -417,9 +405,12 @@
             scsi_check_condition(r, SENSE_CODE(IO_ERROR));
             break;
         }
-        bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_REPORT, is_read);
     }
-    return 1;
+    bdrv_error_action(s->qdev.conf.bs, action, is_read, error);
+    if (action == BDRV_ACTION_STOP) {
+        scsi_req_retry(&r->req);
+    }
+    return action != BDRV_ACTION_IGNORE;
 }
 
 static void scsi_write_complete(void * opaque, int ret)
@@ -678,7 +669,7 @@
      * is actually implemented, but we're good enough.
      */
     outbuf[2] = 5;
-    outbuf[3] = 2; /* Format 2 */
+    outbuf[3] = 2 | 0x10; /* Format 2, HiSup */
 
     if (buflen > 36) {
         outbuf[4] = buflen - 5; /* Additional Length = (Len - 1) - 4 */
@@ -1446,7 +1437,22 @@
 
 invalid_field:
     scsi_check_condition(r, SENSE_CODE(INVALID_FIELD));
-    return;
+}
+
+static inline bool check_lba_range(SCSIDiskState *s,
+                                   uint64_t sector_num, uint32_t nb_sectors)
+{
+    /*
+     * The first line tests that no overflow happens when computing the last
+     * sector.  The second line tests that the last accessed sector is in
+     * range.
+     *
+     * Careful, the computations should not underflow for nb_sectors == 0,
+     * and a 0-block read to the first LBA beyond the end of device is
+     * valid.
+     */
+    return (sector_num <= sector_num + nb_sectors &&
+            sector_num + nb_sectors <= s->qdev.max_lba + 1);
 }
 
 typedef struct UnmapCBData {
@@ -1473,8 +1479,7 @@
     if (data->count > 0 && !r->req.io_canceled) {
         sector_num = ldq_be_p(&data->inbuf[0]);
         nb_sectors = ldl_be_p(&data->inbuf[8]) & 0xffffffffULL;
-        if (sector_num > sector_num + nb_sectors ||
-            sector_num + nb_sectors - 1 > s->qdev.max_lba) {
+        if (!check_lba_range(s, sector_num, nb_sectors)) {
             scsi_check_condition(r, SENSE_CODE(LBA_OUT_OF_RANGE));
             goto done;
         }
@@ -1529,7 +1534,6 @@
 
 invalid_param_len:
     scsi_check_condition(r, SENSE_CODE(INVALID_PARAM_LEN));
-    return;
 }
 
 static void scsi_disk_emulate_write_data(SCSIRequest *req)
@@ -1793,17 +1797,13 @@
         DPRINTF("Unmap (len %lu)\n", (long)r->req.cmd.xfer);
         break;
     case WRITE_SAME_10:
-        nb_sectors = lduw_be_p(&req->cmd.buf[7]);
-        goto write_same;
     case WRITE_SAME_16:
-        nb_sectors = ldl_be_p(&req->cmd.buf[10]) & 0xffffffffULL;
-    write_same:
+        nb_sectors = scsi_data_cdb_length(r->req.cmd.buf);
         if (bdrv_is_read_only(s->qdev.conf.bs)) {
             scsi_check_condition(r, SENSE_CODE(WRITE_PROTECTED));
             return 0;
         }
-        if (r->req.cmd.lba > r->req.cmd.lba + nb_sectors ||
-            r->req.cmd.lba + nb_sectors - 1 > s->qdev.max_lba) {
+        if (!check_lba_range(s, r->req.cmd.lba, nb_sectors)) {
             goto illegal_lba;
         }
 
@@ -1858,7 +1858,7 @@
 {
     SCSIDiskReq *r = DO_UPCAST(SCSIDiskReq, req, req);
     SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, req->dev);
-    int32_t len;
+    uint32_t len;
     uint8_t command;
 
     command = buf[0];
@@ -1868,18 +1868,17 @@
         return 0;
     }
 
+    len = scsi_data_cdb_length(r->req.cmd.buf);
     switch (command) {
     case READ_6:
     case READ_10:
     case READ_12:
     case READ_16:
-        len = r->req.cmd.xfer / s->qdev.blocksize;
-        DPRINTF("Read (sector %" PRId64 ", count %d)\n", r->req.cmd.lba, len);
+        DPRINTF("Read (sector %" PRId64 ", count %u)\n", r->req.cmd.lba, len);
         if (r->req.cmd.buf[1] & 0xe0) {
             goto illegal_request;
         }
-        if (r->req.cmd.lba > r->req.cmd.lba + len ||
-            r->req.cmd.lba + len - 1 > s->qdev.max_lba) {
+        if (!check_lba_range(s, r->req.cmd.lba, len)) {
             goto illegal_lba;
         }
         r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
@@ -1900,15 +1899,13 @@
     case VERIFY_10:
     case VERIFY_12:
     case VERIFY_16:
-        len = r->req.cmd.xfer / s->qdev.blocksize;
-        DPRINTF("Write %s(sector %" PRId64 ", count %d)\n",
+        DPRINTF("Write %s(sector %" PRId64 ", count %u)\n",
                 (command & 0xe) == 0xe ? "And Verify " : "",
                 r->req.cmd.lba, len);
         if (r->req.cmd.buf[1] & 0xe0) {
             goto illegal_request;
         }
-        if (r->req.cmd.lba > r->req.cmd.lba + len ||
-            r->req.cmd.lba + len - 1 > s->qdev.max_lba) {
+        if (!check_lba_range(s, r->req.cmd.lba, len)) {
             goto illegal_lba;
         }
         r->sector = r->req.cmd.lba * (s->qdev.blocksize / 512);
diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c
index a5eb663..d904534 100644
--- a/hw/scsi-generic.c
+++ b/hw/scsi-generic.c
@@ -400,11 +400,11 @@
         return -1;
     }
 
-    if (bdrv_get_on_error(s->conf.bs, 0) != BLOCK_ERR_STOP_ENOSPC) {
+    if (bdrv_get_on_error(s->conf.bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) {
         error_report("Device doesn't support drive option werror");
         return -1;
     }
-    if (bdrv_get_on_error(s->conf.bs, 1) != BLOCK_ERR_REPORT) {
+    if (bdrv_get_on_error(s->conf.bs, 1) != BLOCKDEV_ON_ERROR_REPORT) {
         error_report("Device doesn't support drive option rerror");
         return -1;
     }
diff --git a/hw/scsi.h b/hw/scsi.h
index 1aeee46..b8f7357 100644
--- a/hw/scsi.h
+++ b/hw/scsi.h
@@ -218,6 +218,8 @@
 
 #define SENSE_CODE(x) sense_code_ ## x
 
+uint32_t scsi_data_cdb_length(uint8_t *buf);
+uint32_t scsi_cdb_length(uint8_t *buf);
 int scsi_sense_valid(SCSISense sense);
 int scsi_build_sense(uint8_t *in_buf, int in_len,
                      uint8_t *buf, int len, bool fixed);
diff --git a/hw/sd.c b/hw/sd.c
index ec26407..297580a 100644
--- a/hw/sd.c
+++ b/hw/sd.c
@@ -1407,7 +1407,7 @@
 
     DPRINTF("sd_blk_read: addr = 0x%08llx, len = %d\n",
             (unsigned long long) addr, len);
-    if (!sd->bdrv || bdrv_read(sd->bdrv, addr >> 9, sd->buf, 1) == -1) {
+    if (!sd->bdrv || bdrv_read(sd->bdrv, addr >> 9, sd->buf, 1) < 0) {
         fprintf(stderr, "sd_blk_read: read error on host side\n");
         return;
     }
@@ -1415,7 +1415,7 @@
     if (end > (addr & ~511) + 512) {
         memcpy(sd->data, sd->buf + (addr & 511), 512 - (addr & 511));
 
-        if (bdrv_read(sd->bdrv, end >> 9, sd->buf, 1) == -1) {
+        if (bdrv_read(sd->bdrv, end >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_read: read error on host side\n");
             return;
         }
@@ -1429,29 +1429,31 @@
     uint64_t end = addr + len;
 
     if ((addr & 511) || len < 512)
-        if (!sd->bdrv || bdrv_read(sd->bdrv, addr >> 9, sd->buf, 1) == -1) {
+        if (!sd->bdrv || bdrv_read(sd->bdrv, addr >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_write: read error on host side\n");
             return;
         }
 
     if (end > (addr & ~511) + 512) {
         memcpy(sd->buf + (addr & 511), sd->data, 512 - (addr & 511));
-        if (bdrv_write(sd->bdrv, addr >> 9, sd->buf, 1) == -1) {
+        if (bdrv_write(sd->bdrv, addr >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_write: write error on host side\n");
             return;
         }
 
-        if (bdrv_read(sd->bdrv, end >> 9, sd->buf, 1) == -1) {
+        if (bdrv_read(sd->bdrv, end >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_write: read error on host side\n");
             return;
         }
         memcpy(sd->buf, sd->data + 512 - (addr & 511), end & 511);
-        if (bdrv_write(sd->bdrv, end >> 9, sd->buf, 1) == -1)
+        if (bdrv_write(sd->bdrv, end >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_write: write error on host side\n");
+        }
     } else {
         memcpy(sd->buf + (addr & 511), sd->data, len);
-        if (!sd->bdrv || bdrv_write(sd->bdrv, addr >> 9, sd->buf, 1) == -1)
+        if (!sd->bdrv || bdrv_write(sd->bdrv, addr >> 9, sd->buf, 1) < 0) {
             fprintf(stderr, "sd_blk_write: write error on host side\n");
+        }
     }
 }
 
diff --git a/hw/spapr.c b/hw/spapr.c
index c34b767..09b8e99 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -46,7 +46,6 @@
 #include "kvm.h"
 #include "kvm_ppc.h"
 #include "pci.h"
-#include "vga-pci.h"
 
 #include "exec-memory.h"
 #include "hw/usb.h"
@@ -85,9 +84,11 @@
 
 #define PHANDLE_XICP            0x00001111
 
+#define HTAB_SIZE(spapr)        (1ULL << ((spapr)->htab_shift))
+
 sPAPREnvironment *spapr;
 
-int spapr_allocate_irq(int hint, enum xics_irq_type type)
+int spapr_allocate_irq(int hint, bool lsi)
 {
     int irq;
 
@@ -103,13 +104,13 @@
         return 0;
     }
 
-    xics_set_irq_type(spapr->icp, irq, type);
+    xics_set_irq_type(spapr->icp, irq, lsi);
 
     return irq;
 }
 
 /* Allocate block of consequtive IRQs, returns a number of the first */
-int spapr_allocate_irq_block(int num, enum xics_irq_type type)
+int spapr_allocate_irq_block(int num, bool lsi)
 {
     int first = -1;
     int i;
@@ -117,7 +118,7 @@
     for (i = 0; i < num; ++i) {
         int irq;
 
-        irq = spapr_allocate_irq(0, type);
+        irq = spapr_allocate_irq(0, lsi);
         if (!irq) {
             return -1;
         }
@@ -134,12 +135,13 @@
     return first;
 }
 
-static int spapr_set_associativity(void *fdt, sPAPREnvironment *spapr)
+static int spapr_fixup_cpu_dt(void *fdt, sPAPREnvironment *spapr)
 {
     int ret = 0, offset;
     CPUPPCState *env;
     char cpu_model[32];
     int smt = kvmppc_smt_threads();
+    uint32_t pft_size_prop[] = {0, cpu_to_be32(spapr->htab_shift)};
 
     assert(spapr->cpu_model);
 
@@ -163,8 +165,16 @@
             return offset;
         }
 
-        ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
-                          sizeof(associativity));
+        if (nb_numa_nodes > 1) {
+            ret = fdt_setprop(fdt, offset, "ibm,associativity", associativity,
+                              sizeof(associativity));
+            if (ret < 0) {
+                return ret;
+            }
+        }
+
+        ret = fdt_setprop(fdt, offset, "ibm,pft-size",
+                          pft_size_prop, sizeof(pft_size_prop));
         if (ret < 0) {
             return ret;
         }
@@ -206,36 +216,6 @@
     return (p - prop) * sizeof(uint32_t);
 }
 
-static void *spapr_create_fdt_skel(const char *cpu_model,
-                                   target_phys_addr_t rma_size,
-                                   target_phys_addr_t initrd_base,
-                                   target_phys_addr_t initrd_size,
-                                   target_phys_addr_t kernel_size,
-                                   const char *boot_device,
-                                   const char *kernel_cmdline,
-                                   long hash_shift)
-{
-    void *fdt;
-    CPUPPCState *env;
-    uint64_t mem_reg_property[2];
-    uint32_t start_prop = cpu_to_be32(initrd_base);
-    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
-    uint32_t pft_size_prop[] = {0, cpu_to_be32(hash_shift)};
-    char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
-        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
-    char qemu_hypertas_prop[] = "hcall-memop1";
-    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
-    int i;
-    char *modelname;
-    int smt = kvmppc_smt_threads();
-    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
-    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
-    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0), cpu_to_be32(0x0),
-                                cpu_to_be32(0x0)};
-    char mem_name[32];
-    target_phys_addr_t node0_size, mem_start;
-
 #define _FDT(exp) \
     do { \
         int ret = (exp);                                           \
@@ -246,6 +226,27 @@
         }                                                          \
     } while (0)
 
+
+static void *spapr_create_fdt_skel(const char *cpu_model,
+                                   target_phys_addr_t initrd_base,
+                                   target_phys_addr_t initrd_size,
+                                   target_phys_addr_t kernel_size,
+                                   const char *boot_device,
+                                   const char *kernel_cmdline)
+{
+    void *fdt;
+    CPUPPCState *env;
+    uint32_t start_prop = cpu_to_be32(initrd_base);
+    uint32_t end_prop = cpu_to_be32(initrd_base + initrd_size);
+    char hypertas_prop[] = "hcall-pft\0hcall-term\0hcall-dabr\0hcall-interrupt"
+        "\0hcall-tce\0hcall-vio\0hcall-splpar\0hcall-bulk";
+    char qemu_hypertas_prop[] = "hcall-memop1";
+    uint32_t refpoints[] = {cpu_to_be32(0x4), cpu_to_be32(0x4)};
+    uint32_t interrupt_server_ranges_prop[] = {0, cpu_to_be32(smp_cpus)};
+    char *modelname;
+    int i, smt = kvmppc_smt_threads();
+    unsigned char vec5[] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x80};
+
     fdt = g_malloc0(FDT_MAX_SIZE);
     _FDT((fdt_create(fdt, FDT_MAX_SIZE)));
 
@@ -289,55 +290,6 @@
 
     _FDT((fdt_end_node(fdt)));
 
-    /* memory node(s) */
-    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
-    if (rma_size > node0_size) {
-        rma_size = node0_size;
-    }
-
-    /* RMA */
-    mem_reg_property[0] = 0;
-    mem_reg_property[1] = cpu_to_be64(rma_size);
-    _FDT((fdt_begin_node(fdt, "memory@0")));
-    _FDT((fdt_property_string(fdt, "device_type", "memory")));
-    _FDT((fdt_property(fdt, "reg", mem_reg_property,
-        sizeof(mem_reg_property))));
-    _FDT((fdt_property(fdt, "ibm,associativity", associativity,
-        sizeof(associativity))));
-    _FDT((fdt_end_node(fdt)));
-
-    /* RAM: Node 0 */
-    if (node0_size > rma_size) {
-        mem_reg_property[0] = cpu_to_be64(rma_size);
-        mem_reg_property[1] = cpu_to_be64(node0_size - rma_size);
-
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, rma_size);
-        _FDT((fdt_begin_node(fdt, mem_name)));
-        _FDT((fdt_property_string(fdt, "device_type", "memory")));
-        _FDT((fdt_property(fdt, "reg", mem_reg_property,
-                           sizeof(mem_reg_property))));
-        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
-                           sizeof(associativity))));
-        _FDT((fdt_end_node(fdt)));
-    }
-
-    /* RAM: Node 1 and beyond */
-    mem_start = node0_size;
-    for (i = 1; i < nb_numa_nodes; i++) {
-        mem_reg_property[0] = cpu_to_be64(mem_start);
-        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
-        associativity[3] = associativity[4] = cpu_to_be32(i);
-        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
-        _FDT((fdt_begin_node(fdt, mem_name)));
-        _FDT((fdt_property_string(fdt, "device_type", "memory")));
-        _FDT((fdt_property(fdt, "reg", mem_reg_property,
-            sizeof(mem_reg_property))));
-        _FDT((fdt_property(fdt, "ibm,associativity", associativity,
-            sizeof(associativity))));
-        _FDT((fdt_end_node(fdt)));
-        mem_start += node_mem[i];
-    }
-
     /* cpus */
     _FDT((fdt_begin_node(fdt, "cpus")));
 
@@ -389,8 +341,6 @@
         _FDT((fdt_property_cell(fdt, "timebase-frequency", tbfreq)));
         _FDT((fdt_property_cell(fdt, "clock-frequency", cpufreq)));
         _FDT((fdt_property_cell(fdt, "ibm,slb-size", env->slb_nr)));
-        _FDT((fdt_property(fdt, "ibm,pft-size",
-                           pft_size_prop, sizeof(pft_size_prop))));
         _FDT((fdt_property_string(fdt, "status", "okay")));
         _FDT((fdt_property(fdt, "64-bit", NULL, 0)));
 
@@ -489,6 +439,68 @@
     return fdt;
 }
 
+static int spapr_populate_memory(sPAPREnvironment *spapr, void *fdt)
+{
+    uint32_t associativity[] = {cpu_to_be32(0x4), cpu_to_be32(0x0),
+                                cpu_to_be32(0x0), cpu_to_be32(0x0),
+                                cpu_to_be32(0x0)};
+    char mem_name[32];
+    target_phys_addr_t node0_size, mem_start;
+    uint64_t mem_reg_property[2];
+    int i, off;
+
+    /* memory node(s) */
+    node0_size = (nb_numa_nodes > 1) ? node_mem[0] : ram_size;
+    if (spapr->rma_size > node0_size) {
+        spapr->rma_size = node0_size;
+    }
+
+    /* RMA */
+    mem_reg_property[0] = 0;
+    mem_reg_property[1] = cpu_to_be64(spapr->rma_size);
+    off = fdt_add_subnode(fdt, 0, "memory@0");
+    _FDT(off);
+    _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+    _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                      sizeof(mem_reg_property))));
+    _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
+                      sizeof(associativity))));
+
+    /* RAM: Node 0 */
+    if (node0_size > spapr->rma_size) {
+        mem_reg_property[0] = cpu_to_be64(spapr->rma_size);
+        mem_reg_property[1] = cpu_to_be64(node0_size - spapr->rma_size);
+
+        sprintf(mem_name, "memory@" TARGET_FMT_lx, spapr->rma_size);
+        off = fdt_add_subnode(fdt, 0, mem_name);
+        _FDT(off);
+        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                          sizeof(mem_reg_property))));
+        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
+                          sizeof(associativity))));
+    }
+
+    /* RAM: Node 1 and beyond */
+    mem_start = node0_size;
+    for (i = 1; i < nb_numa_nodes; i++) {
+        mem_reg_property[0] = cpu_to_be64(mem_start);
+        mem_reg_property[1] = cpu_to_be64(node_mem[i]);
+        associativity[3] = associativity[4] = cpu_to_be32(i);
+        sprintf(mem_name, "memory@" TARGET_FMT_lx, mem_start);
+        off = fdt_add_subnode(fdt, 0, mem_name);
+        _FDT(off);
+        _FDT((fdt_setprop_string(fdt, off, "device_type", "memory")));
+        _FDT((fdt_setprop(fdt, off, "reg", mem_reg_property,
+                          sizeof(mem_reg_property))));
+        _FDT((fdt_setprop(fdt, off, "ibm,associativity", associativity,
+                          sizeof(associativity))));
+        mem_start += node_mem[i];
+    }
+
+    return 0;
+}
+
 static void spapr_finalize_fdt(sPAPREnvironment *spapr,
                                target_phys_addr_t fdt_addr,
                                target_phys_addr_t rtas_addr,
@@ -503,6 +515,12 @@
     /* open out the base tree into a temp buffer for the final tweaks */
     _FDT((fdt_open_into(spapr->fdt_skel, fdt, FDT_MAX_SIZE)));
 
+    ret = spapr_populate_memory(spapr, fdt);
+    if (ret < 0) {
+        fprintf(stderr, "couldn't setup memory nodes in fdt\n");
+        exit(1);
+    }
+
     ret = spapr_populate_vdevice(spapr->vio_bus, fdt);
     if (ret < 0) {
         fprintf(stderr, "couldn't setup vio devices in fdt\n");
@@ -525,11 +543,9 @@
     }
 
     /* Advertise NUMA via ibm,associativity */
-    if (nb_numa_nodes > 1) {
-        ret = spapr_set_associativity(fdt, spapr);
-        if (ret < 0) {
-            fprintf(stderr, "Couldn't set up NUMA device tree properties\n");
-        }
+    ret = spapr_fixup_cpu_dt(fdt, spapr);
+    if (ret < 0) {
+        fprintf(stderr, "Couldn't finalize CPU device tree properties\n");
     }
 
     if (!spapr->has_graphics) {
@@ -556,15 +572,49 @@
 
 static void emulate_spapr_hypercall(CPUPPCState *env)
 {
-    env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
+    if (msr_pr) {
+        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
+        env->gpr[3] = H_PRIVILEGE;
+    } else {
+        env->gpr[3] = spapr_hypercall(env, env->gpr[3], &env->gpr[4]);
+    }
 }
 
-static void spapr_reset(void *opaque)
+static void spapr_reset_htab(sPAPREnvironment *spapr)
 {
-    sPAPREnvironment *spapr = (sPAPREnvironment *)opaque;
+    long shift;
 
-    /* flush out the hash table */
-    memset(spapr->htab, 0, spapr->htab_size);
+    /* allocate hash page table.  For now we always make this 16mb,
+     * later we should probably make it scale to the size of guest
+     * RAM */
+
+    shift = kvmppc_reset_htab(spapr->htab_shift);
+
+    if (shift > 0) {
+        /* Kernel handles htab, we don't need to allocate one */
+        spapr->htab_shift = shift;
+    } else {
+        if (!spapr->htab) {
+            /* Allocate an htab if we don't yet have one */
+            spapr->htab = qemu_memalign(HTAB_SIZE(spapr), HTAB_SIZE(spapr));
+        }
+
+        /* And clear it */
+        memset(spapr->htab, 0, HTAB_SIZE(spapr));
+    }
+
+    /* Update the RMA size if necessary */
+    if (spapr->vrma_adjust) {
+        spapr->rma_size = kvmppc_rma_size(ram_size, spapr->htab_shift);
+    }
+}
+
+static void ppc_spapr_reset(void)
+{
+    /* Reset the hash table & recalc the RMA */
+    spapr_reset_htab(spapr);
+
+    qemu_devices_reset();
 
     /* Load the fdt */
     spapr_finalize_fdt(spapr, spapr->fdt_addr, spapr->rtas_addr,
@@ -581,19 +631,31 @@
 static void spapr_cpu_reset(void *opaque)
 {
     PowerPCCPU *cpu = opaque;
+    CPUPPCState *env = &cpu->env;
 
     cpu_reset(CPU(cpu));
+
+    /* All CPUs start halted.  CPU0 is unhalted from the machine level
+     * reset code and the rest are explicitly started up by the guest
+     * using an RTAS call */
+    env->halted = 1;
+
+    env->spr[SPR_HIOR] = 0;
+
+    env->external_htab = spapr->htab;
+    env->htab_base = -1;
+    env->htab_mask = HTAB_SIZE(spapr) - 1;
+    env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
+        (spapr->htab_shift - 18);
 }
 
 /* Returns whether we want to use VGA or not */
 static int spapr_vga_init(PCIBus *pci_bus)
 {
     switch (vga_interface_type) {
-    case VGA_STD:
-        pci_vga_init(pci_bus);
-        return 1;
     case VGA_NONE:
-        return 0;
+    case VGA_STD:
+        return pci_vga_init(pci_bus) != NULL;
     default:
         fprintf(stderr, "This vga model is not supported,"
                 "currently it only supports -vga std\n");
@@ -616,11 +678,10 @@
     int i;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
-    target_phys_addr_t rma_alloc_size, rma_size;
+    target_phys_addr_t rma_alloc_size;
     uint32_t initrd_base = 0;
     long kernel_size = 0, initrd_size = 0;
     long load_limit, rtas_limit, fw_size;
-    long pteg_shift = 17;
     char *filename;
 
     msi_supported = true;
@@ -637,20 +698,46 @@
         hw_error("qemu: Unable to create RMA\n");
         exit(1);
     }
+
     if (rma_alloc_size && (rma_alloc_size < ram_size)) {
-        rma_size = rma_alloc_size;
+        spapr->rma_size = rma_alloc_size;
     } else {
-        rma_size = ram_size;
+        spapr->rma_size = ram_size;
+
+        /* With KVM, we don't actually know whether KVM supports an
+         * unbounded RMA (PR KVM) or is limited by the hash table size
+         * (HV KVM using VRMA), so we always assume the latter
+         *
+         * In that case, we also limit the initial allocations for RTAS
+         * etc... to 256M since we have no way to know what the VRMA size
+         * is going to be as it depends on the size of the hash table
+         * isn't determined yet.
+         */
+        if (kvm_enabled()) {
+            spapr->vrma_adjust = 1;
+            spapr->rma_size = MIN(spapr->rma_size, 0x10000000);
+        }
     }
 
     /* We place the device tree and RTAS just below either the top of the RMA,
      * or just below 2GB, whichever is lowere, so that it can be
      * processed with 32-bit real mode code if necessary */
-    rtas_limit = MIN(rma_size, 0x80000000);
+    rtas_limit = MIN(spapr->rma_size, 0x80000000);
     spapr->rtas_addr = rtas_limit - RTAS_MAX_SIZE;
     spapr->fdt_addr = spapr->rtas_addr - FDT_MAX_SIZE;
     load_limit = spapr->fdt_addr - FW_OVERHEAD;
 
+    /* We aim for a hash table of size 1/128 the size of RAM.  The
+     * normal rule of thumb is 1/64 the size of RAM, but that's much
+     * more than needed for the Linux guests we support. */
+    spapr->htab_shift = 18; /* Minimum architected size */
+    while (spapr->htab_shift <= 46) {
+        if ((1ULL << (spapr->htab_shift + 7)) >= ram_size) {
+            break;
+        }
+        spapr->htab_shift++;
+    }
+
     /* init CPUs */
     if (cpu_model == NULL) {
         cpu_model = kvm_enabled() ? "host" : "POWER7";
@@ -665,11 +752,16 @@
 
         /* Set time-base frequency to 512 MHz */
         cpu_ppc_tb_init(env, TIMEBASE_FREQ);
-        qemu_register_reset(spapr_cpu_reset, cpu);
 
-        env->hreset_vector = 0x60;
+        /* PAPR always has exception vectors in RAM not ROM */
         env->hreset_excp_prefix = 0;
-        env->gpr[3] = env->cpu_index;
+
+        /* Tell KVM that we're in PAPR mode */
+        if (kvm_enabled()) {
+            kvmppc_set_papr(env);
+        }
+
+        qemu_register_reset(spapr_cpu_reset, cpu);
     }
 
     /* allocate RAM */
@@ -683,27 +775,6 @@
         memory_region_add_subregion(sysmem, nonrma_base, ram);
     }
 
-    /* allocate hash page table.  For now we always make this 16mb,
-     * later we should probably make it scale to the size of guest
-     * RAM */
-    spapr->htab_size = 1ULL << (pteg_shift + 7);
-    spapr->htab = qemu_memalign(spapr->htab_size, spapr->htab_size);
-
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        env->external_htab = spapr->htab;
-        env->htab_base = -1;
-        env->htab_mask = spapr->htab_size - 1;
-
-        /* Tell KVM that we're in PAPR mode */
-        env->spr[SPR_SDR1] = (unsigned long)spapr->htab |
-                             ((pteg_shift + 7) - 18);
-        env->spr[SPR_HIOR] = 0;
-
-        if (kvm_enabled()) {
-            kvmppc_set_papr(env);
-        }
-    }
-
     filename = qemu_find_file(QEMU_FILE_TYPE_BIOS, "spapr-rtas.bin");
     spapr->rtas_size = load_image_targphys(filename, spapr->rtas_addr,
                                            rtas_limit - spapr->rtas_addr);
@@ -776,7 +847,7 @@
         }
     }
 
-    if (rma_size < (MIN_RMA_SLOF << 20)) {
+    if (spapr->rma_size < (MIN_RMA_SLOF << 20)) {
         fprintf(stderr, "qemu: pSeries SLOF firmware requires >= "
                 "%ldM guest RMA (Real Mode Area memory)\n", MIN_RMA_SLOF);
         exit(1);
@@ -827,26 +898,19 @@
 
     spapr->entry_point = 0x100;
 
-    /* SLOF will startup the secondary CPUs using RTAS */
-    for (env = first_cpu; env != NULL; env = env->next_cpu) {
-        env->halted = 1;
-    }
-
     /* Prepare the device tree */
-    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model, rma_size,
+    spapr->fdt_skel = spapr_create_fdt_skel(cpu_model,
                                             initrd_base, initrd_size,
                                             kernel_size,
-                                            boot_device, kernel_cmdline,
-                                            pteg_shift + 7);
+                                            boot_device, kernel_cmdline);
     assert(spapr->fdt_skel != NULL);
-
-    qemu_register_reset(spapr_reset, spapr);
 }
 
 static QEMUMachine spapr_machine = {
     .name = "pseries",
     .desc = "pSeries Logical Partition (PAPR compliant)",
     .init = ppc_spapr_init,
+    .reset = ppc_spapr_reset,
     .max_cpus = MAX_CPUS,
     .no_parallel = 1,
     .use_scsi = 1,
diff --git a/hw/spapr.h b/hw/spapr.h
index ac34a17..e984e3f 100644
--- a/hw/spapr.h
+++ b/hw/spapr.h
@@ -15,7 +15,9 @@
 
     target_phys_addr_t ram_limit;
     void *htab;
-    long htab_size;
+    long htab_shift;
+    target_phys_addr_t rma_size;
+    int vrma_adjust;
     target_phys_addr_t fdt_addr, rtas_addr;
     long rtas_size;
     void *fdt_skel;
@@ -289,17 +291,17 @@
 target_ulong spapr_hypercall(CPUPPCState *env, target_ulong opcode,
                              target_ulong *args);
 
-int spapr_allocate_irq(int hint, enum xics_irq_type type);
-int spapr_allocate_irq_block(int num, enum xics_irq_type type);
+int spapr_allocate_irq(int hint, bool lsi);
+int spapr_allocate_irq_block(int num, bool lsi);
 
 static inline int spapr_allocate_msi(int hint)
 {
-    return spapr_allocate_irq(hint, XICS_MSI);
+    return spapr_allocate_irq(hint, false);
 }
 
 static inline int spapr_allocate_lsi(int hint)
 {
-    return spapr_allocate_irq(hint, XICS_LSI);
+    return spapr_allocate_irq(hint, true);
 }
 
 static inline uint32_t rtas_ld(target_ulong phys, int n)
@@ -336,6 +338,8 @@
 void spapr_iommu_init(void);
 DMAContext *spapr_tce_new_dma_context(uint32_t liobn, size_t window_size);
 void spapr_tce_free(DMAContext *dma);
+void spapr_tce_reset(DMAContext *dma);
+void spapr_tce_set_bypass(DMAContext *dma, bool bypass);
 int spapr_dma_dt(void *fdt, int node_off, const char *propname,
                  uint32_t liobn, uint64_t window, uint32_t size);
 int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c
index a5990a9..194d9c2 100644
--- a/hw/spapr_hcall.c
+++ b/hw/spapr_hcall.c
@@ -1,6 +1,5 @@
 #include "sysemu.h"
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "qemu-char.h"
 #include "sysemu.h"
 #include "qemu-char.h"
@@ -40,22 +39,6 @@
 #define HPTE_V_1TB_SEG          0x4000000000000000ULL
 #define HPTE_V_VRMA_MASK        0x4001ffffff000000ULL
 
-#define HPTE_V_HVLOCK           0x40ULL
-
-static inline int lock_hpte(void *hpte, target_ulong bits)
-{
-    uint64_t pteh;
-
-    pteh = ldq_p(hpte);
-
-    /* We're protected by qemu's global lock here */
-    if (pteh & bits) {
-        return 0;
-    }
-    stq_p(hpte, pteh | HPTE_V_HVLOCK);
-    return 1;
-}
-
 static target_ulong compute_tlbie_rb(target_ulong v, target_ulong r,
                                      target_ulong pte_index)
 {
@@ -152,8 +135,7 @@
             if (i == 8) {
                 return H_PTEG_FULL;
             }
-            if (((ldq_p(hpte) & HPTE_V_VALID) == 0) &&
-                lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) {
+            if ((ldq_p(hpte) & HPTE_V_VALID) == 0) {
                 break;
             }
             hpte += HASH_PTE_SIZE_64;
@@ -161,7 +143,7 @@
     } else {
         i = 0;
         hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
-        if (!lock_hpte(hpte, HPTE_V_HVLOCK | HPTE_V_VALID)) {
+        if (ldq_p(hpte) & HPTE_V_VALID) {
             return H_PTEG_FULL;
         }
     }
@@ -169,7 +151,6 @@
     /* eieio();  FIXME: need some sort of barrier for smp? */
     stq_p(hpte, pteh);
 
-    assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
     args[0] = pte_index + i;
     return H_SUCCESS;
 }
@@ -194,11 +175,6 @@
     }
 
     hpte = env->external_htab + (ptex * HASH_PTE_SIZE_64);
-    while (!lock_hpte(hpte, HPTE_V_HVLOCK)) {
-        /* We have no real concurrency in qemu soft-emulation, so we
-         * will never actually have a contested lock */
-        assert(0);
-    }
 
     v = ldq_p(hpte);
     r = ldq_p(hpte + (HASH_PTE_SIZE_64/2));
@@ -206,16 +182,13 @@
     if ((v & HPTE_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn) ||
         ((flags & H_ANDCOND) && (v & avpn) != 0)) {
-        stq_p(hpte, v & ~HPTE_V_HVLOCK);
-        assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
         return REMOVE_NOT_FOUND;
     }
-    *vp = v & ~HPTE_V_HVLOCK;
+    *vp = v;
     *rp = r;
     stq_p(hpte, 0);
     rb = compute_tlbie_rb(v, r, ptex);
     ppc_tlb_invalidate_one(env, rb);
-    assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
     return REMOVE_SUCCESS;
 }
 
@@ -325,19 +298,12 @@
     }
 
     hpte = env->external_htab + (pte_index * HASH_PTE_SIZE_64);
-    while (!lock_hpte(hpte, HPTE_V_HVLOCK)) {
-        /* We have no real concurrency in qemu soft-emulation, so we
-         * will never actually have a contested lock */
-        assert(0);
-    }
 
     v = ldq_p(hpte);
     r = ldq_p(hpte + (HASH_PTE_SIZE_64/2));
 
     if ((v & HPTE_V_VALID) == 0 ||
         ((flags & H_AVPN) && (v & ~0x7fULL) != avpn)) {
-        stq_p(hpte, v & ~HPTE_V_HVLOCK);
-        assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
         return H_NOT_FOUND;
     }
 
@@ -351,8 +317,7 @@
     ppc_tlb_invalidate_one(env, rb);
     stq_p(hpte + (HASH_PTE_SIZE_64/2), r);
     /* Don't need a memory barrier, due to qemu's global lock */
-    stq_p(hpte, v & ~HPTE_V_HVLOCK);
-    assert(!(ldq_p(hpte) & HPTE_V_HVLOCK));
+    stq_p(hpte, v);
     return H_SUCCESS;
 }
 
@@ -545,6 +510,8 @@
     hreg_compute_hflags(env);
     if (!cpu_has_work(env)) {
         env->halted = 1;
+        env->exception_index = EXCP_HLT;
+        env->exit_request = 1;
     }
     return H_SUCCESS;
 }
@@ -714,11 +681,6 @@
 target_ulong spapr_hypercall(CPUPPCState *env, target_ulong opcode,
                              target_ulong *args)
 {
-    if (msr_pr) {
-        hcall_dprintf("Hypercall made with MSR[PR]=1\n");
-        return H_PRIVILEGE;
-    }
-
     if ((opcode <= MAX_HCALL_OPCODE)
         && ((opcode & 0x3) == 0)) {
         spapr_hcall_fn fn = papr_hypercall_table[opcode / 4];
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 53b7317..38034c0 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -42,6 +42,7 @@
     uint32_t liobn;
     uint32_t window_size;
     sPAPRTCE *table;
+    bool bypass;
     int fd;
     QLIST_ENTRY(sPAPRTCETable) list;
 };
@@ -78,6 +79,12 @@
             DMA_ADDR_FMT "\n", tcet->liobn, addr);
 #endif
 
+    if (tcet->bypass) {
+        *paddr = addr;
+        *len = (target_phys_addr_t)-1;
+        return 0;
+    }
+
     /* Check if we are in bound */
     if (addr >= tcet->window_size) {
 #ifdef DEBUG_TCE
@@ -162,6 +169,23 @@
     }
 }
 
+void spapr_tce_set_bypass(DMAContext *dma, bool bypass)
+{
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+
+    tcet->bypass = bypass;
+}
+
+void spapr_tce_reset(DMAContext *dma)
+{
+    sPAPRTCETable *tcet = DO_UPCAST(sPAPRTCETable, dma, dma);
+    size_t table_size = (tcet->window_size >> SPAPR_TCE_PAGE_SHIFT)
+        * sizeof(sPAPRTCE);
+
+    tcet->bypass = false;
+    memset(tcet->table, 0, table_size);
+}
+
 static target_ulong put_tce_emu(sPAPRTCETable *tcet, target_ulong ioba,
                                 target_ulong tce)
 {
diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
index 661c05b..b628f89 100644
--- a/hw/spapr_pci.c
+++ b/hw/spapr_pci.c
@@ -351,7 +351,7 @@
 
     /* There is no cached config, allocate MSIs */
     if (!phb->msi_table[ndev].nvec) {
-        irq = spapr_allocate_irq_block(req_num, XICS_MSI);
+        irq = spapr_allocate_irq_block(req_num, true);
         if (irq < 0) {
             fprintf(stderr, "Cannot allocate MSIs for device#%d", ndev);
             rtas_st(rets, 0, -1); /* Hardware error */
@@ -595,6 +595,15 @@
     return 0;
 }
 
+static void spapr_phb_reset(DeviceState *qdev)
+{
+    SysBusDevice *s = sysbus_from_qdev(qdev);
+    sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
+
+    /* Reset the IOMMU state */
+    spapr_tce_reset(sphb->dma);
+}
+
 static Property spapr_phb_properties[] = {
     DEFINE_PROP_HEX64("buid", sPAPRPHBState, buid, 0),
     DEFINE_PROP_STRING("busname", sPAPRPHBState, busname),
@@ -613,6 +622,7 @@
 
     sdc->init = spapr_phb_init;
     dc->props = spapr_phb_properties;
+    dc->reset = spapr_phb_reset;
 }
 
 static const TypeInfo spapr_phb_info = {
diff --git a/hw/spapr_rtas.c b/hw/spapr_rtas.c
index ae18595..b808f80 100644
--- a/hw/spapr_rtas.c
+++ b/hw/spapr_rtas.c
@@ -184,6 +184,11 @@
             return;
         }
 
+        /* This will make sure qemu state is up to date with kvm, and
+         * mark it dirty so our changes get flushed back before the
+         * new cpu enters */
+        kvm_cpu_synchronize_state(env);
+
         env->msr = (1ULL << MSR_SF) | (1ULL << MSR_ME);
         env->nip = start;
         env->gpr[3] = r3;
diff --git a/hw/spapr_vio.c b/hw/spapr_vio.c
index 7ca4452..848806d 100644
--- a/hw/spapr_vio.c
+++ b/hw/spapr_vio.c
@@ -316,17 +316,10 @@
 
 static void spapr_vio_quiesce_one(VIOsPAPRDevice *dev)
 {
-    VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-    uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
-
     if (dev->dma) {
-        spapr_tce_free(dev->dma);
+        spapr_tce_reset(dev->dma);
     }
-    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
-
-    dev->crq.qladdr = 0;
-    dev->crq.qsize = 0;
-    dev->crq.qnext = 0;
+    free_crq(dev);
 }
 
 static void rtas_set_tce_bypass(sPAPREnvironment *spapr, uint32_t token,
@@ -348,16 +341,14 @@
         rtas_st(rets, 0, -3);
         return;
     }
-    if (enable) {
-        spapr_tce_free(dev->dma);
-        dev->dma = NULL;
-    } else {
-        VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-        uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
 
-        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    if (!dev->dma) {
+        rtas_st(rets, 0, -3);
+        return;
     }
 
+    spapr_tce_set_bypass(dev->dma, !!enable);
+
     rtas_st(rets, 0, 0);
 }
 
@@ -409,9 +400,10 @@
     VIOsPAPRDevice *dev = DO_UPCAST(VIOsPAPRDevice, qdev, qdev);
     VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
 
-    if (dev->crq.qsize) {
-        free_crq(dev);
-    }
+    /* Shut down the request queue and TCEs if necessary */
+    spapr_vio_quiesce_one(dev);
+
+    dev->signal_state = 0;
 
     if (pc->reset) {
         pc->reset(dev);
@@ -422,7 +414,6 @@
 {
     VIOsPAPRDevice *dev = (VIOsPAPRDevice *)qdev;
     VIOsPAPRDeviceClass *pc = VIO_SPAPR_DEVICE_GET_CLASS(dev);
-    uint32_t liobn;
     char *id;
 
     if (dev->reg != -1) {
@@ -464,8 +455,10 @@
         return -1;
     }
 
-    liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
-    dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    if (pc->rtce_window_size) {
+        uint32_t liobn = SPAPR_VIO_BASE_LIOBN | dev->reg;
+        dev->dma = spapr_tce_new_dma_context(liobn, pc->rtce_window_size);
+    }
 
     return pc->init(dev);
 }
diff --git a/hw/spapr_vio.h b/hw/spapr_vio.h
index ea6aa43..cc85d26 100644
--- a/hw/spapr_vio.h
+++ b/hw/spapr_vio.h
@@ -60,7 +60,6 @@
 struct VIOsPAPRDevice {
     DeviceState qdev;
     uint32_t reg;
-    uint32_t flags;
     uint32_t irq;
     target_ulong signal_state;
     VIOsPAPR_CRQ crq;
@@ -132,7 +131,6 @@
 
 VIOsPAPRDevice *spapr_vty_get_default(VIOsPAPRBus *bus);
 
-int spapr_tce_set_bypass(uint32_t unit, uint32_t enable);
 void spapr_vio_quiesce(void);
 
 #endif /* _HW_SPAPR_VIO_H */
diff --git a/hw/spapr_vscsi.c b/hw/spapr_vscsi.c
index 3cf5844..e3d4b23 100644
--- a/hw/spapr_vscsi.c
+++ b/hw/spapr_vscsi.c
@@ -737,7 +737,7 @@
 #endif
     memset(&info, 0, sizeof(info));
     strcpy(info.srp_version, SRP_VERSION);
-    strncpy(info.partition_name, "qemu", sizeof("qemu"));
+    memcpy(info.partition_name, "qemu", sizeof("qemu"));
     info.partition_number = cpu_to_be32(0);
     info.mad_version = cpu_to_be32(1);
     info.os_type = cpu_to_be32(2);
diff --git a/hw/spitz.c b/hw/spitz.c
index 20e7835..24346dc 100644
--- a/hw/spitz.c
+++ b/hw/spitz.c
@@ -1083,10 +1083,11 @@
 
 static const VMStateDescription vmstate_corgi_ssp_regs = {
     .name = "corgi-ssp",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .minimum_version_id_old = 2,
     .fields = (VMStateField []) {
+        VMSTATE_SSI_SLAVE(ssidev, CorgiSSPState),
         VMSTATE_UINT32_ARRAY(enable, CorgiSSPState, 3),
         VMSTATE_END_OF_LIST(),
     }
@@ -1115,6 +1116,7 @@
     .minimum_version_id = 1,
     .minimum_version_id_old = 1,
     .fields = (VMStateField []) {
+        VMSTATE_SSI_SLAVE(ssidev, SpitzLCDTG),
         VMSTATE_UINT32(bl_intensity, SpitzLCDTG),
         VMSTATE_UINT32(bl_power, SpitzLCDTG),
         VMSTATE_END_OF_LIST(),
diff --git a/hw/srp.h b/hw/srp.h
index 3009bd5..5e0cad5 100644
--- a/hw/srp.h
+++ b/hw/srp.h
@@ -177,13 +177,13 @@
     uint8_t    reserved1[6];
     uint64_t   tag;
     uint8_t    reserved2[4];
-    uint64_t   lun QEMU_PACKED;
+    uint64_t   lun;
     uint8_t    reserved3[2];
     uint8_t    tsk_mgmt_func;
     uint8_t    reserved4;
     uint64_t   task_tag;
     uint8_t    reserved5[8];
-};
+} QEMU_PACKED;
 
 /*
  * We need the packed attribute because the SRP spec only aligns the
@@ -198,14 +198,14 @@
     uint8_t    data_in_desc_cnt;
     uint64_t   tag;
     uint8_t    reserved2[4];
-    uint64_t   lun QEMU_PACKED;
+    uint64_t   lun;
     uint8_t    reserved3;
     uint8_t    task_attr;
     uint8_t    reserved4;
     uint8_t    add_cdb_len;
     uint8_t    cdb[16];
     uint8_t    add_data[0];
-};
+} QEMU_PACKED;
 
 enum {
     SRP_RSP_FLAG_RSPVALID = 1 << 0,
diff --git a/hw/ssd0323.c b/hw/ssd0323.c
index b101c51..9c42d64 100644
--- a/hw/ssd0323.c
+++ b/hw/ssd0323.c
@@ -279,6 +279,7 @@
 
 static void ssd0323_save(QEMUFile *f, void *opaque)
 {
+    SSISlave *ss = SSI_SLAVE(opaque);
     ssd0323_state *s = (ssd0323_state *)opaque;
     int i;
 
@@ -296,10 +297,13 @@
     qemu_put_be32(f, s->remap);
     qemu_put_be32(f, s->mode);
     qemu_put_buffer(f, s->framebuffer, sizeof(s->framebuffer));
+
+    qemu_put_be32(f, ss->cs);
 }
 
 static int ssd0323_load(QEMUFile *f, void *opaque, int version_id)
 {
+    SSISlave *ss = SSI_SLAVE(opaque);
     ssd0323_state *s = (ssd0323_state *)opaque;
     int i;
 
@@ -321,6 +325,8 @@
     s->mode = qemu_get_be32(f);
     qemu_get_buffer(f, s->framebuffer, sizeof(s->framebuffer));
 
+    ss->cs = qemu_get_be32(f);
+
     return 0;
 }
 
@@ -348,6 +354,7 @@
 
     k->init = ssd0323_init;
     k->transfer = ssd0323_transfer;
+    k->cs_polarity = SSI_CS_HIGH;
 }
 
 static TypeInfo ssd0323_info = {
diff --git a/hw/ssi-sd.c b/hw/ssi-sd.c
index b519bdb..c5505ee 100644
--- a/hw/ssi-sd.c
+++ b/hw/ssi-sd.c
@@ -197,6 +197,7 @@
 
 static void ssi_sd_save(QEMUFile *f, void *opaque)
 {
+    SSISlave *ss = SSI_SLAVE(opaque);
     ssi_sd_state *s = (ssi_sd_state *)opaque;
     int i;
 
@@ -209,10 +210,13 @@
     qemu_put_be32(f, s->arglen);
     qemu_put_be32(f, s->response_pos);
     qemu_put_be32(f, s->stopping);
+
+    qemu_put_be32(f, ss->cs);
 }
 
 static int ssi_sd_load(QEMUFile *f, void *opaque, int version_id)
 {
+    SSISlave *ss = SSI_SLAVE(opaque);
     ssi_sd_state *s = (ssi_sd_state *)opaque;
     int i;
 
@@ -229,6 +233,8 @@
     s->response_pos = qemu_get_be32(f);
     s->stopping = qemu_get_be32(f);
 
+    ss->cs = qemu_get_be32(f);
+
     return 0;
 }
 
@@ -250,6 +256,7 @@
 
     k->init = ssi_sd_init;
     k->transfer = ssi_sd_transfer;
+    k->cs_polarity = SSI_CS_LOW;
 }
 
 static TypeInfo ssi_sd_info = {
diff --git a/hw/ssi.c b/hw/ssi.c
index e5f14a0..2b56357 100644
--- a/hw/ssi.c
+++ b/hw/ssi.c
@@ -2,6 +2,8 @@
  * QEMU Synchronous Serial Interface support
  *
  * Copyright (c) 2009 CodeSourcery.
+ * Copyright (c) 2012 Peter A.G. Crosthwaite (peter.crosthwaite@petalogix.com)
+ * Copyright (c) 2012 PetaLogix Pty Ltd.
  * Written by Paul Brook
  *
  * This code is licensed under the GNU GPL v2.
@@ -25,17 +27,40 @@
     .instance_size = sizeof(SSIBus),
 };
 
+static void ssi_cs_default(void *opaque, int n, int level)
+{
+    SSISlave *s = SSI_SLAVE(opaque);
+    bool cs = !!level;
+    assert(n == 0);
+    if (s->cs != cs) {
+        SSISlaveClass *ssc = SSI_SLAVE_GET_CLASS(s);
+        if (ssc->set_cs) {
+            ssc->set_cs(s, cs);
+        }
+    }
+    s->cs = cs;
+}
+
+static uint32_t ssi_transfer_raw_default(SSISlave *dev, uint32_t val)
+{
+    SSISlaveClass *ssc = SSI_SLAVE_GET_CLASS(dev);
+
+    if ((dev->cs && ssc->cs_polarity == SSI_CS_HIGH) ||
+            (!dev->cs && ssc->cs_polarity == SSI_CS_LOW) ||
+            ssc->cs_polarity == SSI_CS_NONE) {
+        return ssc->transfer(dev, val);
+    }
+    return 0;
+}
+
 static int ssi_slave_init(DeviceState *dev)
 {
     SSISlave *s = SSI_SLAVE(dev);
     SSISlaveClass *ssc = SSI_SLAVE_GET_CLASS(s);
-    SSIBus *bus;
-    BusChild *kid;
 
-    bus = FROM_QBUS(SSIBus, qdev_get_parent_bus(dev));
-    kid = QTAILQ_FIRST(&bus->qbus.children);
-    if (kid->child != dev || QTAILQ_NEXT(kid, sibling) != NULL) {
-        hw_error("Too many devices on SSI bus");
+    if (ssc->transfer_raw == ssi_transfer_raw_default &&
+            ssc->cs_polarity != SSI_CS_NONE) {
+        qdev_init_gpio_in(&s->qdev, ssi_cs_default, 1);
     }
 
     return ssc->init(s);
@@ -43,9 +68,14 @@
 
 static void ssi_slave_class_init(ObjectClass *klass, void *data)
 {
+    SSISlaveClass *ssc = SSI_SLAVE_CLASS(klass);
     DeviceClass *dc = DEVICE_CLASS(klass);
+
     dc->init = ssi_slave_init;
     dc->bus_type = TYPE_SSI_BUS;
+    if (!ssc->transfer_raw) {
+        ssc->transfer_raw = ssi_transfer_raw_default;
+    }
 }
 
 static TypeInfo ssi_slave_info = {
@@ -56,10 +86,15 @@
     .abstract = true,
 };
 
+DeviceState *ssi_create_slave_no_init(SSIBus *bus, const char *name)
+{
+    return qdev_create(&bus->qbus, name);
+}
+
 DeviceState *ssi_create_slave(SSIBus *bus, const char *name)
 {
-    DeviceState *dev;
-    dev = qdev_create(&bus->qbus, name);
+    DeviceState *dev = ssi_create_slave_no_init(bus, name);
+
     qdev_init_nofail(dev);
     return dev;
 }
@@ -74,18 +109,29 @@
 uint32_t ssi_transfer(SSIBus *bus, uint32_t val)
 {
     BusChild *kid;
-    SSISlave *slave;
     SSISlaveClass *ssc;
+    uint32_t r = 0;
 
-    kid = QTAILQ_FIRST(&bus->qbus.children);
-    if (!kid) {
-        return 0;
+    QTAILQ_FOREACH(kid, &bus->qbus.children, sibling) {
+        SSISlave *slave = SSI_SLAVE(kid->child);
+        ssc = SSI_SLAVE_GET_CLASS(slave);
+        r |= ssc->transfer_raw(slave, val);
     }
-    slave = SSI_SLAVE(kid->child);
-    ssc = SSI_SLAVE_GET_CLASS(slave);
-    return ssc->transfer(slave, val);
+
+    return r;
 }
 
+const VMStateDescription vmstate_ssi_slave = {
+    .name = "SSISlave",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_BOOL(cs, SSISlave),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static void ssi_slave_register_types(void)
 {
     type_register_static(&ssi_bus_info);
@@ -93,3 +139,36 @@
 }
 
 type_init(ssi_slave_register_types)
+
+typedef struct SSIAutoConnectArg {
+    qemu_irq **cs_linep;
+    SSIBus *bus;
+} SSIAutoConnectArg;
+
+static int ssi_auto_connect_slave(Object *child, void *opaque)
+{
+    SSIAutoConnectArg *arg = opaque;
+    SSISlave *dev = (SSISlave *)object_dynamic_cast(child, TYPE_SSI_SLAVE);
+    qemu_irq cs_line;
+
+    if (!dev) {
+        return 0;
+    }
+
+    cs_line = qdev_get_gpio_in(DEVICE(dev), 0);
+    qdev_set_parent_bus(DEVICE(dev), &arg->bus->qbus);
+    **arg->cs_linep = cs_line;
+    (*arg->cs_linep)++;
+    return 0;
+}
+
+void ssi_auto_connect_slaves(DeviceState *parent, qemu_irq *cs_line,
+                             SSIBus *bus)
+{
+    SSIAutoConnectArg arg = {
+        .cs_linep = &cs_line,
+        .bus = bus
+    };
+
+    object_child_foreach(OBJECT(parent), ssi_auto_connect_slave, &arg);
+}
diff --git a/hw/ssi.h b/hw/ssi.h
index 06657d7..a05d60b 100644
--- a/hw/ssi.h
+++ b/hw/ssi.h
@@ -23,28 +23,70 @@
 #define SSI_SLAVE_GET_CLASS(obj) \
      OBJECT_GET_CLASS(SSISlaveClass, (obj), TYPE_SSI_SLAVE)
 
+typedef enum {
+    SSI_CS_NONE = 0,
+    SSI_CS_LOW,
+    SSI_CS_HIGH,
+} SSICSMode;
+
 /* Slave devices.  */
 typedef struct SSISlaveClass {
     DeviceClass parent_class;
 
     int (*init)(SSISlave *dev);
+
+    /* if you have standard or no CS behaviour, just override transfer.
+     * This is called when the device cs is active (true by default).
+     */
     uint32_t (*transfer)(SSISlave *dev, uint32_t val);
+    /* called when the CS line changes. Optional, devices only need to implement
+     * this if they have side effects associated with the cs line (beyond
+     * tristating the txrx lines).
+     */
+    int (*set_cs)(SSISlave *dev, bool select);
+    /* define whether or not CS exists and is active low/high */
+    SSICSMode cs_polarity;
+
+    /* if you have non-standard CS behaviour override this to take control
+     * of the CS behaviour at the device level. transfer, set_cs, and
+     * cs_polarity are unused if this is overwritten. Transfer_raw will
+     * always be called for the device for every txrx access to the parent bus
+     */
+    uint32_t (*transfer_raw)(SSISlave *dev, uint32_t val);
 } SSISlaveClass;
 
 struct SSISlave {
     DeviceState qdev;
+
+    /* Chip select state */
+    bool cs;
 };
 
 #define SSI_SLAVE_FROM_QDEV(dev) DO_UPCAST(SSISlave, qdev, dev)
 #define FROM_SSI_SLAVE(type, dev) DO_UPCAST(type, ssidev, dev)
 
+extern const VMStateDescription vmstate_ssi_slave;
+
+#define VMSTATE_SSI_SLAVE(_field, _state) {                          \
+    .name       = (stringify(_field)),                               \
+    .size       = sizeof(SSISlave),                                  \
+    .vmsd       = &vmstate_ssi_slave,                                \
+    .flags      = VMS_STRUCT,                                        \
+    .offset     = vmstate_offset_value(_state, _field, SSISlave),    \
+}
+
 DeviceState *ssi_create_slave(SSIBus *bus, const char *name);
+DeviceState *ssi_create_slave_no_init(SSIBus *bus, const char *name);
 
 /* Master interface.  */
 SSIBus *ssi_create_bus(DeviceState *parent, const char *name);
 
 uint32_t ssi_transfer(SSIBus *bus, uint32_t val);
 
+/* Automatically connect all children nodes a spi controller as slaves */
+void ssi_auto_connect_slaves(DeviceState *parent, qemu_irq *cs_lines,
+                             SSIBus *bus);
+
 /* max111x.c */
 void max111x_set_input(DeviceState *dev, int line, uint8_t value);
 
diff --git a/hw/stellaris.c b/hw/stellaris.c
index 562fbbf..353ca4c 100644
--- a/hw/stellaris.c
+++ b/hw/stellaris.c
@@ -1154,57 +1154,6 @@
     return 0;
 }
 
-/* Some boards have both an OLED controller and SD card connected to
-   the same SSI port, with the SD card chip select connected to a
-   GPIO pin.  Technically the OLED chip select is connected to the SSI
-   Fss pin.  We do not bother emulating that as both devices should
-   never be selected simultaneously, and our OLED controller ignores stray
-   0xff commands that occur when deselecting the SD card.  */
-
-typedef struct {
-    SSISlave ssidev;
-    qemu_irq irq;
-    int current_dev;
-    SSIBus *bus[2];
-} stellaris_ssi_bus_state;
-
-static void stellaris_ssi_bus_select(void *opaque, int irq, int level)
-{
-    stellaris_ssi_bus_state *s = (stellaris_ssi_bus_state *)opaque;
-
-    s->current_dev = level;
-}
-
-static uint32_t stellaris_ssi_bus_transfer(SSISlave *dev, uint32_t val)
-{
-    stellaris_ssi_bus_state *s = FROM_SSI_SLAVE(stellaris_ssi_bus_state, dev);
-
-    return ssi_transfer(s->bus[s->current_dev], val);
-}
-
-static const VMStateDescription vmstate_stellaris_ssi_bus = {
-    .name = "stellaris_ssi_bus",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
-    .fields      = (VMStateField[]) {
-        VMSTATE_INT32(current_dev, stellaris_ssi_bus_state),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
-static int stellaris_ssi_bus_init(SSISlave *dev)
-{
-    stellaris_ssi_bus_state *s = FROM_SSI_SLAVE(stellaris_ssi_bus_state, dev);
-
-    s->bus[0] = ssi_create_bus(&dev->qdev, "ssi0");
-    s->bus[1] = ssi_create_bus(&dev->qdev, "ssi1");
-    qdev_init_gpio_in(&dev->qdev, stellaris_ssi_bus_select, 1);
-
-    vmstate_register(&dev->qdev, -1, &vmstate_stellaris_ssi_bus, s);
-    return 0;
-}
-
 /* Board init.  */
 static stellaris_board_info stellaris_boards[] = {
   { "LM3S811EVB",
@@ -1305,19 +1254,25 @@
     if (board->dc2 & (1 << 4)) {
         dev = sysbus_create_simple("pl022", 0x40008000, pic[7]);
         if (board->peripherals & BP_OLED_SSI) {
-            DeviceState *mux;
             void *bus;
+            DeviceState *sddev;
+            DeviceState *ssddev;
 
+            /* Some boards have both an OLED controller and SD card connected to
+             * the same SSI port, with the SD card chip select connected to a
+             * GPIO pin.  Technically the OLED chip select is connected to the
+             * SSI Fss pin.  We do not bother emulating that as both devices
+             * should never be selected simultaneously, and our OLED controller
+             * ignores stray 0xff commands that occur when deselecting the SD
+             * card.
+             */
             bus = qdev_get_child_bus(dev, "ssi");
-            mux = ssi_create_slave(bus, "evb6965-ssi");
-            gpio_out[GPIO_D][0] = qdev_get_gpio_in(mux, 0);
 
-            bus = qdev_get_child_bus(mux, "ssi0");
-            ssi_create_slave(bus, "ssi-sd");
-
-            bus = qdev_get_child_bus(mux, "ssi1");
-            dev = ssi_create_slave(bus, "ssd0323");
-            gpio_out[GPIO_C][7] = qdev_get_gpio_in(dev, 0);
+            sddev = ssi_create_slave(bus, "ssi-sd");
+            ssddev = ssi_create_slave(bus, "ssd0323");
+            gpio_out[GPIO_D][0] = qemu_irq_split(qdev_get_gpio_in(sddev, 0),
+                                                 qdev_get_gpio_in(ssddev, 0));
+            gpio_out[GPIO_C][7] = qdev_get_gpio_in(ssddev, 1);
 
             /* Make sure the select pin is high.  */
             qemu_irq_raise(gpio_out[GPIO_D][0]);
@@ -1394,21 +1349,6 @@
 
 machine_init(stellaris_machine_init);
 
-static void stellaris_ssi_bus_class_init(ObjectClass *klass, void *data)
-{
-    SSISlaveClass *k = SSI_SLAVE_CLASS(klass);
-
-    k->init = stellaris_ssi_bus_init;
-    k->transfer = stellaris_ssi_bus_transfer;
-}
-
-static TypeInfo stellaris_ssi_bus_info = {
-    .name          = "evb6965-ssi",
-    .parent        = TYPE_SSI_SLAVE,
-    .instance_size = sizeof(stellaris_ssi_bus_state),
-    .class_init    = stellaris_ssi_bus_class_init,
-};
-
 static void stellaris_i2c_class_init(ObjectClass *klass, void *data)
 {
     SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
@@ -1456,7 +1396,6 @@
     type_register_static(&stellaris_i2c_info);
     type_register_static(&stellaris_gptm_info);
     type_register_static(&stellaris_adc_info);
-    type_register_static(&stellaris_ssi_bus_info);
 }
 
 type_init(stellaris_register_types)
diff --git a/hw/sun4m.c b/hw/sun4m.c
index 0f909b5..c98cd5e 100644
--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -472,6 +472,17 @@
     }
 }
 
+static qemu_irq  slavio_system_powerdown;
+
+static void slavio_powerdown_req(Notifier *n, void *opaque)
+{
+    qemu_irq_raise(slavio_system_powerdown);
+}
+
+static Notifier slavio_system_powerdown_notifier = {
+    .notify = slavio_powerdown_req
+};
+
 #define MISC_LEDS 0x01600000
 #define MISC_CFG  0x01800000
 #define MISC_DIAG 0x01a00000
@@ -514,7 +525,8 @@
     }
     sysbus_connect_irq(s, 0, irq);
     sysbus_connect_irq(s, 1, fdc_tc);
-    qemu_system_powerdown = qdev_get_gpio_in(dev, 0);
+    slavio_system_powerdown = qdev_get_gpio_in(dev, 0);
+    qemu_register_powerdown_notifier(&slavio_system_powerdown_notifier);
 }
 
 static void ecc_init(target_phys_addr_t base, qemu_irq irq, uint32_t version)
diff --git a/hw/sun4u.c b/hw/sun4u.c
index 07cd042..137a7c6 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -39,7 +39,6 @@
 #include "elf.h"
 #include "blockdev.h"
 #include "exec-memory.h"
-#include "vga-pci.h"
 
 //#define DEBUG_IRQ
 //#define DEBUG_EBUS
diff --git a/hw/sysbus.c b/hw/sysbus.c
index 9d8b1ea..c173840 100644
--- a/hw/sysbus.c
+++ b/hw/sysbus.c
@@ -211,7 +211,7 @@
         snprintf(path + off, sizeof(path) - off, "@i%04x", s->pio[0]);
     }
 
-    return strdup(path);
+    return g_strdup(path);
 }
 
 void sysbus_add_memory(SysBusDevice *dev, target_phys_addr_t addr,
diff --git a/hw/tcx.c b/hw/tcx.c
index 93994d6..2db2db1 100644
--- a/hw/tcx.c
+++ b/hw/tcx.c
@@ -472,7 +472,6 @@
     default:
         break;
     }
-    return;
 }
 
 static const MemoryRegionOps tcx_dac_ops = {
diff --git a/hw/usb.h b/hw/usb.h
index b8fceec..48c8926 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -135,8 +135,15 @@
 #define USB_DT_OTHER_SPEED_CONFIG       0x07
 #define USB_DT_DEBUG                    0x0A
 #define USB_DT_INTERFACE_ASSOC          0x0B
+#define USB_DT_BOS                      0x0F
+#define USB_DT_DEVICE_CAPABILITY        0x10
 #define USB_DT_CS_INTERFACE             0x24
 #define USB_DT_CS_ENDPOINT              0x25
+#define USB_DT_ENDPOINT_COMPANION       0x30
+
+#define USB_DEV_CAP_WIRELESS            0x01
+#define USB_DEV_CAP_USB2_EXT            0x02
+#define USB_DEV_CAP_SUPERSPEED          0x03
 
 #define USB_ENDPOINT_XFER_CONTROL	0
 #define USB_ENDPOINT_XFER_ISOC		1
@@ -377,6 +384,8 @@
                                 uint16_t raw);
 int usb_ep_get_max_packet_size(USBDevice *dev, int pid, int ep);
 void usb_ep_set_pipeline(USBDevice *dev, int pid, int ep, bool enabled);
+USBPacket *usb_ep_find_packet_by_id(USBDevice *dev, int pid, int ep,
+                                    uint64_t id);
 
 void usb_attach(USBPort *port);
 void usb_detach(USBPort *port);
diff --git a/hw/usb/Makefile.objs b/hw/usb/Makefile.objs
index 4225136..6425c1f 100644
--- a/hw/usb/Makefile.objs
+++ b/hw/usb/Makefile.objs
@@ -1,11 +1,11 @@
-hw-obj-$(CONFIG_USB_UHCI) += hcd-uhci.o
-hw-obj-$(CONFIG_USB_OHCI) += hcd-ohci.o
-hw-obj-$(CONFIG_USB_EHCI) += hcd-ehci.o
-hw-obj-$(CONFIG_USB_XHCI) += hcd-xhci.o
-hw-obj-y += libhw.o
+common-obj-$(CONFIG_USB_UHCI) += hcd-uhci.o
+common-obj-$(CONFIG_USB_OHCI) += hcd-ohci.o
+common-obj-$(CONFIG_USB_EHCI) += hcd-ehci.o
+common-obj-$(CONFIG_USB_XHCI) += hcd-xhci.o
+common-obj-y += libhw.o
 
-hw-obj-$(CONFIG_SMARTCARD) += dev-smartcard-reader.o
-hw-obj-$(CONFIG_USB_REDIR) += redirect.o
+common-obj-$(CONFIG_SMARTCARD) += dev-smartcard-reader.o
+common-obj-$(CONFIG_USB_REDIR) += redirect.o
 
 common-obj-y += core.o bus.o desc.o dev-hub.o
 common-obj-y += host-$(HOST_USB).o dev-bluetooth.o
diff --git a/hw/usb/core.c b/hw/usb/core.c
index 2da38e7..b9f1f7a 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -398,9 +398,11 @@
              * When pipelining is enabled usb-devices must always return async,
              * otherwise packets can complete out of order!
              */
-            assert(!p->ep->pipeline);
-            p->result = ret;
-            usb_packet_set_state(p, USB_PACKET_COMPLETE);
+            assert(!p->ep->pipeline || QTAILQ_EMPTY(&p->ep->queue));
+            if (ret != USB_RET_NAK) {
+                p->result = ret;
+                usb_packet_set_state(p, USB_PACKET_COMPLETE);
+            }
         }
     } else {
         ret = USB_RET_ASYNC;
@@ -724,3 +726,18 @@
     struct USBEndpoint *uep = usb_ep_get(dev, pid, ep);
     uep->pipeline = enabled;
 }
+
+USBPacket *usb_ep_find_packet_by_id(USBDevice *dev, int pid, int ep,
+                                    uint64_t id)
+{
+    struct USBEndpoint *uep = usb_ep_get(dev, pid, ep);
+    USBPacket *p;
+
+    while ((p = QTAILQ_FIRST(&uep->queue)) != NULL) {
+        if (p->id == id) {
+            return p;
+        }
+    }
+
+    return NULL;
+}
diff --git a/hw/usb/desc.c b/hw/usb/desc.c
index 0a9d3c9..1f12eae 100644
--- a/hw/usb/desc.c
+++ b/hw/usb/desc.c
@@ -76,7 +76,8 @@
     return bLength;
 }
 
-int usb_desc_config(const USBDescConfig *conf, uint8_t *dest, size_t len)
+int usb_desc_config(const USBDescConfig *conf, int flags,
+                    uint8_t *dest, size_t len)
 {
     uint8_t  bLength = 0x09;
     uint16_t wTotalLength = 0;
@@ -99,7 +100,7 @@
 
     /* handle grouped interfaces if any */
     for (i = 0; i < conf->nif_groups; i++) {
-        rc = usb_desc_iface_group(&(conf->if_groups[i]),
+        rc = usb_desc_iface_group(&(conf->if_groups[i]), flags,
                                   dest + wTotalLength,
                                   len - wTotalLength);
         if (rc < 0) {
@@ -110,7 +111,8 @@
 
     /* handle normal (ungrouped / no IAD) interfaces if any */
     for (i = 0; i < conf->nif; i++) {
-        rc = usb_desc_iface(conf->ifs + i, dest + wTotalLength, len - wTotalLength);
+        rc = usb_desc_iface(conf->ifs + i, flags,
+                            dest + wTotalLength, len - wTotalLength);
         if (rc < 0) {
             return rc;
         }
@@ -122,8 +124,8 @@
     return wTotalLength;
 }
 
-int usb_desc_iface_group(const USBDescIfaceAssoc *iad, uint8_t *dest,
-                         size_t len)
+int usb_desc_iface_group(const USBDescIfaceAssoc *iad, int flags,
+                         uint8_t *dest, size_t len)
 {
     int pos = 0;
     int i = 0;
@@ -147,7 +149,7 @@
 
     /* handle associated interfaces in this group */
     for (i = 0; i < iad->nif; i++) {
-        int rc = usb_desc_iface(&(iad->ifs[i]), dest + pos, len - pos);
+        int rc = usb_desc_iface(&(iad->ifs[i]), flags, dest + pos, len - pos);
         if (rc < 0) {
             return rc;
         }
@@ -157,7 +159,8 @@
     return pos;
 }
 
-int usb_desc_iface(const USBDescIface *iface, uint8_t *dest, size_t len)
+int usb_desc_iface(const USBDescIface *iface, int flags,
+                   uint8_t *dest, size_t len)
 {
     uint8_t bLength = 0x09;
     int i, rc, pos = 0;
@@ -188,7 +191,7 @@
     }
 
     for (i = 0; i < iface->bNumEndpoints; i++) {
-        rc = usb_desc_endpoint(iface->eps + i, dest + pos, len - pos);
+        rc = usb_desc_endpoint(iface->eps + i, flags, dest + pos, len - pos);
         if (rc < 0) {
             return rc;
         }
@@ -198,13 +201,15 @@
     return pos;
 }
 
-int usb_desc_endpoint(const USBDescEndpoint *ep, uint8_t *dest, size_t len)
+int usb_desc_endpoint(const USBDescEndpoint *ep, int flags,
+                      uint8_t *dest, size_t len)
 {
     uint8_t bLength = ep->is_audio ? 0x09 : 0x07;
     uint8_t extralen = ep->extra ? ep->extra[0] : 0;
+    uint8_t superlen = (flags & USB_DESC_FLAG_SUPER) ? 0x06 : 0;
     USBDescriptor *d = (void *)dest;
 
-    if (len < bLength + extralen) {
+    if (len < bLength + extralen + superlen) {
         return -1;
     }
 
@@ -224,7 +229,21 @@
         memcpy(dest + bLength, ep->extra, extralen);
     }
 
-    return bLength + extralen;
+    if (superlen) {
+        USBDescriptor *d = (void *)(dest + bLength + extralen);
+
+        d->bLength                       = 0x06;
+        d->bDescriptorType               = USB_DT_ENDPOINT_COMPANION;
+
+        d->u.super_endpoint.bMaxBurst    = ep->bMaxBurst;
+        d->u.super_endpoint.bmAttributes = ep->bmAttributes_super;
+        d->u.super_endpoint.wBytesPerInterval_lo =
+            usb_lo(ep->wBytesPerInterval);
+        d->u.super_endpoint.wBytesPerInterval_hi =
+            usb_hi(ep->wBytesPerInterval);
+    }
+
+    return bLength + extralen + superlen;
 }
 
 int usb_desc_other(const USBDescOther *desc, uint8_t *dest, size_t len)
@@ -239,6 +258,111 @@
     return bLength;
 }
 
+static int usb_desc_cap_usb2_ext(const USBDesc *desc, uint8_t *dest, size_t len)
+{
+    uint8_t  bLength = 0x07;
+    USBDescriptor *d = (void *)dest;
+
+    if (len < bLength) {
+        return -1;
+    }
+
+    d->bLength                          = bLength;
+    d->bDescriptorType                  = USB_DT_DEVICE_CAPABILITY;
+    d->u.cap.bDevCapabilityType         = USB_DEV_CAP_USB2_EXT;
+
+    d->u.cap.u.usb2_ext.bmAttributes_1  = (1 << 1);  /* LPM */
+    d->u.cap.u.usb2_ext.bmAttributes_2  = 0;
+    d->u.cap.u.usb2_ext.bmAttributes_3  = 0;
+    d->u.cap.u.usb2_ext.bmAttributes_4  = 0;
+
+    return bLength;
+}
+
+static int usb_desc_cap_super(const USBDesc *desc, uint8_t *dest, size_t len)
+{
+    uint8_t  bLength = 0x0a;
+    USBDescriptor *d = (void *)dest;
+
+    if (len < bLength) {
+        return -1;
+    }
+
+    d->bLength                           = bLength;
+    d->bDescriptorType                   = USB_DT_DEVICE_CAPABILITY;
+    d->u.cap.bDevCapabilityType          = USB_DEV_CAP_SUPERSPEED;
+
+    d->u.cap.u.super.bmAttributes        = 0;
+    d->u.cap.u.super.wSpeedsSupported_lo = 0;
+    d->u.cap.u.super.wSpeedsSupported_hi = 0;
+    d->u.cap.u.super.bFunctionalitySupport = 0;
+    d->u.cap.u.super.bU1DevExitLat       = 0x0a;
+    d->u.cap.u.super.wU2DevExitLat_lo    = 0x20;
+    d->u.cap.u.super.wU2DevExitLat_hi    = 0;
+
+    if (desc->full) {
+        d->u.cap.u.super.wSpeedsSupported_lo |= (1 << 1);
+        d->u.cap.u.super.bFunctionalitySupport = 1;
+    }
+    if (desc->high) {
+        d->u.cap.u.super.wSpeedsSupported_lo |= (1 << 2);
+        if (!d->u.cap.u.super.bFunctionalitySupport) {
+            d->u.cap.u.super.bFunctionalitySupport = 2;
+        }
+    }
+    if (desc->super) {
+        d->u.cap.u.super.wSpeedsSupported_lo |= (1 << 3);
+        if (!d->u.cap.u.super.bFunctionalitySupport) {
+            d->u.cap.u.super.bFunctionalitySupport = 3;
+        }
+    }
+
+    return bLength;
+}
+
+static int usb_desc_bos(const USBDesc *desc, uint8_t *dest, size_t len)
+{
+    uint8_t  bLength = 0x05;
+    uint16_t wTotalLength = 0;
+    uint8_t  bNumDeviceCaps = 0;
+    USBDescriptor *d = (void *)dest;
+    int rc;
+
+    if (len < bLength) {
+        return -1;
+    }
+
+    d->bLength                      = bLength;
+    d->bDescriptorType              = USB_DT_BOS;
+
+    wTotalLength += bLength;
+
+    if (desc->high != NULL) {
+        rc = usb_desc_cap_usb2_ext(desc, dest + wTotalLength,
+                                   len - wTotalLength);
+        if (rc < 0) {
+            return rc;
+        }
+        wTotalLength += rc;
+        bNumDeviceCaps++;
+    }
+
+    if (desc->super != NULL) {
+        rc = usb_desc_cap_super(desc, dest + wTotalLength,
+                                len - wTotalLength);
+        if (rc < 0) {
+            return rc;
+        }
+        wTotalLength += rc;
+        bNumDeviceCaps++;
+    }
+
+    d->u.bos.wTotalLength_lo = usb_lo(wTotalLength);
+    d->u.bos.wTotalLength_hi = usb_hi(wTotalLength);
+    d->u.bos.bNumDeviceCaps  = bNumDeviceCaps;
+    return wTotalLength;
+}
+
 /* ------------------------------------------------------------------ */
 
 static void usb_desc_ep_init(USBDevice *dev)
@@ -359,6 +483,9 @@
     case USB_SPEED_HIGH:
         dev->device = desc->high;
         break;
+    case USB_SPEED_SUPER:
+        dev->device = desc->super;
+        break;
     }
     usb_desc_set_config(dev, 0);
 }
@@ -376,6 +503,9 @@
     if (desc->high) {
         dev->speedmask |= USB_SPEED_MASK_HIGH;
     }
+    if (desc->super) {
+        dev->speedmask |= USB_SPEED_MASK_SUPER;
+    }
     usb_desc_setdefaults(dev);
 }
 
@@ -384,7 +514,9 @@
     const USBDesc *desc = usb_device_get_usb_desc(dev);
 
     assert(desc != NULL);
-    if (desc->high && (dev->port->speedmask & USB_SPEED_MASK_HIGH)) {
+    if (desc->super && (dev->port->speedmask & USB_SPEED_MASK_SUPER)) {
+        dev->speed = USB_SPEED_SUPER;
+    } else if (desc->high && (dev->port->speedmask & USB_SPEED_MASK_HIGH)) {
         dev->speed = USB_SPEED_HIGH;
     } else if (desc->full && (dev->port->speedmask & USB_SPEED_MASK_FULL)) {
         dev->speed = USB_SPEED_FULL;
@@ -501,7 +633,7 @@
     uint8_t buf[256];
     uint8_t type = value >> 8;
     uint8_t index = value & 0xff;
-    int ret = -1;
+    int flags, ret = -1;
 
     if (dev->speed == USB_SPEED_HIGH) {
         other_dev = usb_device_get_usb_desc(dev)->full;
@@ -509,6 +641,11 @@
         other_dev = usb_device_get_usb_desc(dev)->high;
     }
 
+    flags = 0;
+    if (dev->device->bcdUSB >= 0x0300) {
+        flags |= USB_DESC_FLAG_SUPER;
+    }
+
     switch(type) {
     case USB_DT_DEVICE:
         ret = usb_desc_device(&desc->id, dev->device, buf, sizeof(buf));
@@ -516,7 +653,8 @@
         break;
     case USB_DT_CONFIG:
         if (index < dev->device->bNumConfigurations) {
-            ret = usb_desc_config(dev->device->confs + index, buf, sizeof(buf));
+            ret = usb_desc_config(dev->device->confs + index, flags,
+                                  buf, sizeof(buf));
         }
         trace_usb_desc_config(dev->addr, index, len, ret);
         break;
@@ -524,7 +662,6 @@
         ret = usb_desc_string(dev, index, buf, sizeof(buf));
         trace_usb_desc_string(dev->addr, index, len, ret);
         break;
-
     case USB_DT_DEVICE_QUALIFIER:
         if (other_dev != NULL) {
             ret = usb_desc_device_qualifier(other_dev, buf, sizeof(buf));
@@ -533,11 +670,16 @@
         break;
     case USB_DT_OTHER_SPEED_CONFIG:
         if (other_dev != NULL && index < other_dev->bNumConfigurations) {
-            ret = usb_desc_config(other_dev->confs + index, buf, sizeof(buf));
+            ret = usb_desc_config(other_dev->confs + index, flags,
+                                  buf, sizeof(buf));
             buf[0x01] = USB_DT_OTHER_SPEED_CONFIG;
         }
         trace_usb_desc_other_speed_config(dev->addr, index, len, ret);
         break;
+    case USB_DT_BOS:
+        ret = usb_desc_bos(desc, buf, sizeof(buf));
+        trace_usb_desc_bos(dev->addr, len, ret);
+        break;
 
     case USB_DT_DEBUG:
         /* ignore silently */
diff --git a/hw/usb/desc.h b/hw/usb/desc.h
index 7cf5442..68bb570 100644
--- a/hw/usb/desc.h
+++ b/hw/usb/desc.h
@@ -63,6 +63,37 @@
             uint8_t           bRefresh;        /* only audio ep */
             uint8_t           bSynchAddress;   /* only audio ep */
         } endpoint;
+        struct {
+            uint8_t           bMaxBurst;
+            uint8_t           bmAttributes;
+            uint8_t           wBytesPerInterval_lo;
+            uint8_t           wBytesPerInterval_hi;
+        } super_endpoint;
+        struct {
+            uint8_t           wTotalLength_lo;
+            uint8_t           wTotalLength_hi;
+            uint8_t           bNumDeviceCaps;
+        } bos;
+        struct {
+            uint8_t           bDevCapabilityType;
+            union {
+                struct {
+                    uint8_t   bmAttributes_1;
+                    uint8_t   bmAttributes_2;
+                    uint8_t   bmAttributes_3;
+                    uint8_t   bmAttributes_4;
+                } usb2_ext;
+                struct {
+                    uint8_t   bmAttributes;
+                    uint8_t   wSpeedsSupported_lo;
+                    uint8_t   wSpeedsSupported_hi;
+                    uint8_t   bFunctionalitySupport;
+                    uint8_t   bU1DevExitLat;
+                    uint8_t   wU2DevExitLat_lo;
+                    uint8_t   wU2DevExitLat_hi;
+                } super;
+            } u;
+        } cap;
     } u;
 } QEMU_PACKED USBDescriptor;
 
@@ -139,6 +170,11 @@
 
     uint8_t                   is_audio; /* has bRefresh + bSynchAddress */
     uint8_t                   *extra;
+
+    /* superspeed endpoint companion */
+    uint8_t                   bMaxBurst;
+    uint8_t                   bmAttributes_super;
+    uint16_t                  wBytesPerInterval;
 };
 
 struct USBDescOther {
@@ -152,19 +188,25 @@
     USBDescID                 id;
     const USBDescDevice       *full;
     const USBDescDevice       *high;
+    const USBDescDevice       *super;
     const char* const         *str;
 };
 
+#define USB_DESC_FLAG_SUPER (1 << 1)
+
 /* generate usb packages from structs */
 int usb_desc_device(const USBDescID *id, const USBDescDevice *dev,
                     uint8_t *dest, size_t len);
 int usb_desc_device_qualifier(const USBDescDevice *dev,
                               uint8_t *dest, size_t len);
-int usb_desc_config(const USBDescConfig *conf, uint8_t *dest, size_t len);
-int usb_desc_iface_group(const USBDescIfaceAssoc *iad, uint8_t *dest,
-                         size_t len);
-int usb_desc_iface(const USBDescIface *iface, uint8_t *dest, size_t len);
-int usb_desc_endpoint(const USBDescEndpoint *ep, uint8_t *dest, size_t len);
+int usb_desc_config(const USBDescConfig *conf, int flags,
+                    uint8_t *dest, size_t len);
+int usb_desc_iface_group(const USBDescIfaceAssoc *iad, int flags,
+                         uint8_t *dest, size_t len);
+int usb_desc_iface(const USBDescIface *iface, int flags,
+                   uint8_t *dest, size_t len);
+int usb_desc_endpoint(const USBDescEndpoint *ep, int flags,
+                      uint8_t *dest, size_t len);
 int usb_desc_other(const USBDescOther *desc, uint8_t *dest, size_t len);
 
 /* control message emulation helpers */
diff --git a/hw/usb/dev-audio.c b/hw/usb/dev-audio.c
index 79b75fb..2594c78 100644
--- a/hw/usb/dev-audio.c
+++ b/hw/usb/dev-audio.c
@@ -217,7 +217,7 @@
 };
 
 static const USBDescDevice desc_device = {
-    .bcdUSB                        = 0x0200,
+    .bcdUSB                        = 0x0100,
     .bMaxPacketSize0               = 64,
     .bNumConfigurations            = 1,
     .confs = (USBDescConfig[]) {
diff --git a/hw/usb/dev-network.c b/hw/usb/dev-network.c
index c84892c..e4a4359 100644
--- a/hw/usb/dev-network.c
+++ b/hw/usb/dev-network.c
@@ -1001,6 +1001,13 @@
     return 0;
 }
 
+/* Prepare to receive the next packet */
+static void usb_net_reset_in_buf(USBNetState *s)
+{
+    s->in_ptr = s->in_len = 0;
+    qemu_flush_queued_packets(&s->nic->nc);
+}
+
 static int rndis_parse(USBNetState *s, uint8_t *data, int length)
 {
     uint32_t msg_type;
@@ -1025,7 +1032,8 @@
 
     case RNDIS_RESET_MSG:
         rndis_clear_responsequeue(s);
-        s->out_ptr = s->in_ptr = s->in_len = 0;
+        s->out_ptr = 0;
+        usb_net_reset_in_buf(s);
         return rndis_reset_response(s, (rndis_reset_msg_type *) data);
 
     case RNDIS_KEEPALIVE_MSG:
@@ -1135,7 +1143,7 @@
     int ret = USB_RET_NAK;
 
     if (s->in_ptr > s->in_len) {
-        s->in_ptr = s->in_len = 0;
+        usb_net_reset_in_buf(s);
         ret = USB_RET_NAK;
         return ret;
     }
@@ -1152,7 +1160,7 @@
     if (s->in_ptr >= s->in_len &&
                     (is_rndis(s) || (s->in_len & (64 - 1)) || !ret)) {
         /* no short packet necessary */
-        s->in_ptr = s->in_len = 0;
+        usb_net_reset_in_buf(s);
     }
 
 #ifdef TRAFFIC_DEBUG
@@ -1250,20 +1258,32 @@
 static ssize_t usbnet_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     USBNetState *s = DO_UPCAST(NICState, nc, nc)->opaque;
-    struct rndis_packet_msg_type *msg;
+    uint8_t *in_buf = s->in_buf;
+    size_t total_size = size;
 
     if (is_rndis(s)) {
-        msg = (struct rndis_packet_msg_type *) s->in_buf;
         if (s->rndis_state != RNDIS_DATA_INITIALIZED) {
             return -1;
         }
-        if (size + sizeof(struct rndis_packet_msg_type) > sizeof(s->in_buf))
-            return -1;
+        total_size += sizeof(struct rndis_packet_msg_type);
+    }
+    if (total_size > sizeof(s->in_buf)) {
+        return -1;
+    }
 
+    /* Only accept packet if input buffer is empty */
+    if (s->in_len > 0) {
+        return 0;
+    }
+
+    if (is_rndis(s)) {
+        struct rndis_packet_msg_type *msg;
+
+        msg = (struct rndis_packet_msg_type *)in_buf;
         memset(msg, 0, sizeof(struct rndis_packet_msg_type));
         msg->MessageType = cpu_to_le32(RNDIS_PACKET_MSG);
-        msg->MessageLength = cpu_to_le32(size + sizeof(struct rndis_packet_msg_type));
-        msg->DataOffset = cpu_to_le32(sizeof(struct rndis_packet_msg_type) - 8);
+        msg->MessageLength = cpu_to_le32(size + sizeof(*msg));
+        msg->DataOffset = cpu_to_le32(sizeof(*msg) - 8);
         msg->DataLength = cpu_to_le32(size);
         /* msg->OOBDataOffset;
          * msg->OOBDataLength;
@@ -1273,14 +1293,11 @@
          * msg->VcHandle;
          * msg->Reserved;
          */
-        memcpy(msg + 1, buf, size);
-        s->in_len = size + sizeof(struct rndis_packet_msg_type);
-    } else {
-        if (size > sizeof(s->in_buf))
-            return -1;
-        memcpy(s->in_buf, buf, size);
-        s->in_len = size;
+        in_buf += sizeof(*msg);
     }
+
+    memcpy(in_buf, buf, size);
+    s->in_len = total_size;
     s->in_ptr = 0;
     return size;
 }
diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c
index 8aa6552..69b6e48 100644
--- a/hw/usb/dev-serial.c
+++ b/hw/usb/dev-serial.c
@@ -113,7 +113,7 @@
 static const USBDescStrings desc_strings = {
     [STR_MANUFACTURER]    = "QEMU",
     [STR_PRODUCT_SERIAL]  = "QEMU USB SERIAL",
-    [STR_PRODUCT_BRAILLE] = "QEMU USB BRAILLE",
+    [STR_PRODUCT_BRAILLE] = "QEMU USB BAUM BRAILLE",
     [STR_SERIALNUMBER]    = "1",
 };
 
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index ff48d91..e732191 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -78,6 +78,7 @@
     STR_SERIALNUMBER,
     STR_CONFIG_FULL,
     STR_CONFIG_HIGH,
+    STR_CONFIG_SUPER,
 };
 
 static const USBDescStrings desc_strings = {
@@ -86,6 +87,7 @@
     [STR_SERIALNUMBER] = "1",
     [STR_CONFIG_FULL]  = "Full speed config (usb 1.1)",
     [STR_CONFIG_HIGH]  = "High speed config (usb 2.0)",
+    [STR_CONFIG_SUPER] = "Super speed config (usb 3.0)",
 };
 
 static const USBDescIface desc_iface_full = {
@@ -158,6 +160,43 @@
     },
 };
 
+static const USBDescIface desc_iface_super = {
+    .bInterfaceNumber              = 0,
+    .bNumEndpoints                 = 2,
+    .bInterfaceClass               = USB_CLASS_MASS_STORAGE,
+    .bInterfaceSubClass            = 0x06, /* SCSI */
+    .bInterfaceProtocol            = 0x50, /* Bulk */
+    .eps = (USBDescEndpoint[]) {
+        {
+            .bEndpointAddress      = USB_DIR_IN | 0x01,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 1024,
+            .bMaxBurst             = 15,
+        },{
+            .bEndpointAddress      = USB_DIR_OUT | 0x02,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 1024,
+            .bMaxBurst             = 15,
+        },
+    }
+};
+
+static const USBDescDevice desc_device_super = {
+    .bcdUSB                        = 0x0300,
+    .bMaxPacketSize0               = 9,
+    .bNumConfigurations            = 1,
+    .confs = (USBDescConfig[]) {
+        {
+            .bNumInterfaces        = 1,
+            .bConfigurationValue   = 1,
+            .iConfiguration        = STR_CONFIG_SUPER,
+            .bmAttributes          = 0xc0,
+            .nif = 1,
+            .ifs = &desc_iface_super,
+        },
+    },
+};
+
 static const USBDesc desc = {
     .id = {
         .idVendor          = 0x46f4, /* CRC16() of "QEMU" */
@@ -167,9 +206,10 @@
         .iProduct          = STR_PRODUCT,
         .iSerialNumber     = STR_SERIALNUMBER,
     },
-    .full = &desc_device_full,
-    .high = &desc_device_high,
-    .str  = desc_strings,
+    .full  = &desc_device_full,
+    .high  = &desc_device_high,
+    .super = &desc_device_super,
+    .str   = desc_strings,
 };
 
 static void usb_msd_copy_data(MSDState *s, USBPacket *p)
diff --git a/hw/usb/dev-uas.c b/hw/usb/dev-uas.c
index 5a0057a..4389380 100644
--- a/hw/usb/dev-uas.c
+++ b/hw/usb/dev-uas.c
@@ -576,7 +576,6 @@
      */
     usb_uas_queue_response(uas, req->tag, UAS_RC_INVALID_INFO_UNIT, 0);
     g_free(req);
-    return;
 }
 
 static void usb_uas_task(UASDevice *uas, uas_ui *ui)
@@ -640,7 +639,6 @@
 
 incorrect_lun:
     usb_uas_queue_response(uas, tag, UAS_RC_INCORRECT_LUN, 0);
-    return;
 }
 
 static int usb_uas_handle_data(USBDevice *dev, USBPacket *p)
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 017342b..8bdb806 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2,6 +2,11 @@
  * QEMU USB EHCI Emulation
  *
  * Copyright(c) 2008  Emutex Ltd. (address@hidden)
+ * Copyright(c) 2011-2012 Red Hat, Inc.
+ *
+ * Red Hat Authors:
+ * Gerd Hoffmann <kraxel@redhat.com>
+ * Hans de Goede <hdegoede@redhat.com>
  *
  * EHCI project was started by Mark Burkley, with contributions by
  * Niels de Vos.  David S. Ahern continued working on it.  Kevin Wolf,
@@ -29,6 +34,7 @@
 #include "monitor.h"
 #include "trace.h"
 #include "dma.h"
+#include "sysemu.h"
 
 #define EHCI_DEBUG   0
 
@@ -134,6 +140,7 @@
 #define NB_PORTS         6        // Number of downstream ports
 #define BUFF_SIZE        5*4096   // Max bytes to transfer per transaction
 #define MAX_QH           100      // Max allowable queue heads in a chain
+#define MIN_FR_PER_TICK  3        // Min frames to process when catching up
 
 /*  Internal periodic / asynchronous schedule state machine states
  */
@@ -340,6 +347,7 @@
 
 enum async_state {
     EHCI_ASYNC_NONE = 0,
+    EHCI_ASYNC_INITIALIZED,
     EHCI_ASYNC_INFLIGHT,
     EHCI_ASYNC_FINISHED,
 };
@@ -365,7 +373,7 @@
     uint32_t seen;
     uint64_t ts;
     int async;
-    int revalidate;
+    int transact_ctr;
 
     /* cached data from guest - needs to be flushed
      * when guest removes an entry (doorbell, handshake sequence)
@@ -384,6 +392,9 @@
     USBBus bus;
     qemu_irq irq;
     MemoryRegion mem;
+    MemoryRegion mem_caps;
+    MemoryRegion mem_opreg;
+    MemoryRegion mem_ports;
     int companion_count;
 
     /* properties */
@@ -393,10 +404,10 @@
      *  EHCI spec version 1.0 Section 2.3
      *  Host Controller Operational Registers
      */
+    uint8_t caps[OPREGBASE];
     union {
-        uint8_t mmio[MMIO_SIZE];
+        uint32_t opreg[(PORTSC_BEGIN-OPREGBASE)/sizeof(uint32_t)];
         struct {
-            uint8_t cap[OPREGBASE];
             uint32_t usbcmd;
             uint32_t usbsts;
             uint32_t usbintr;
@@ -406,9 +417,9 @@
             uint32_t asynclistaddr;
             uint32_t notused[9];
             uint32_t configflag;
-            uint32_t portsc[NB_PORTS];
         };
     };
+    uint32_t portsc[NB_PORTS];
 
     /*
      *  Internal states, shadow registers, etc
@@ -466,25 +477,18 @@
 };
 
 static const char *ehci_mmio_names[] = {
-    [CAPLENGTH]         = "CAPLENGTH",
-    [HCIVERSION]        = "HCIVERSION",
-    [HCSPARAMS]         = "HCSPARAMS",
-    [HCCPARAMS]         = "HCCPARAMS",
     [USBCMD]            = "USBCMD",
     [USBSTS]            = "USBSTS",
     [USBINTR]           = "USBINTR",
     [FRINDEX]           = "FRINDEX",
     [PERIODICLISTBASE]  = "P-LIST BASE",
     [ASYNCLISTADDR]     = "A-LIST ADDR",
-    [PORTSC_BEGIN]      = "PORTSC #0",
-    [PORTSC_BEGIN + 4]  = "PORTSC #1",
-    [PORTSC_BEGIN + 8]  = "PORTSC #2",
-    [PORTSC_BEGIN + 12] = "PORTSC #3",
-    [PORTSC_BEGIN + 16] = "PORTSC #4",
-    [PORTSC_BEGIN + 20] = "PORTSC #5",
     [CONFIGFLAG]        = "CONFIGFLAG",
 };
 
+static int ehci_state_executing(EHCIQueue *q);
+static int ehci_state_writeback(EHCIQueue *q);
+
 static const char *nr2str(const char **n, size_t len, uint32_t nr)
 {
     if (nr < len && n[nr] != NULL) {
@@ -501,7 +505,8 @@
 
 static const char *addr2str(target_phys_addr_t addr)
 {
-    return nr2str(ehci_mmio_names, ARRAY_SIZE(ehci_mmio_names), addr);
+    return nr2str(ehci_mmio_names, ARRAY_SIZE(ehci_mmio_names),
+                  addr + OPREGBASE);
 }
 
 static void ehci_trace_usbsts(uint32_t mask, int state)
@@ -709,6 +714,12 @@
                         (bool)(sitd->results & SITD_RESULTS_ACTIVE));
 }
 
+static void ehci_trace_guest_bug(EHCIState *s, const char *message)
+{
+    trace_usb_ehci_guest_bug(message);
+    fprintf(stderr, "ehci warning: %s\n", message);
+}
+
 static inline bool ehci_enabled(EHCIState *s)
 {
     return s->usbcmd & USBCMD_RUNSTOP;
@@ -740,9 +751,25 @@
 
 static void ehci_free_packet(EHCIPacket *p)
 {
+    if (p->async == EHCI_ASYNC_FINISHED) {
+        int state = ehci_get_state(p->queue->ehci, p->queue->async);
+        /* This is a normal, but rare condition (cancel racing completion) */
+        fprintf(stderr, "EHCI: Warning packet completed but not processed\n");
+        ehci_state_executing(p->queue);
+        ehci_state_writeback(p->queue);
+        ehci_set_state(p->queue->ehci, p->queue->async, state);
+        /* state_writeback recurses into us with async == EHCI_ASYNC_NONE!! */
+        return;
+    }
     trace_usb_ehci_packet_action(p->queue, p, "free");
+    if (p->async == EHCI_ASYNC_INITIALIZED) {
+        usb_packet_unmap(&p->packet, &p->sgl);
+        qemu_sglist_destroy(&p->sgl);
+    }
     if (p->async == EHCI_ASYNC_INFLIGHT) {
         usb_cancel_packet(&p->packet);
+        usb_packet_unmap(&p->packet, &p->sgl);
+        qemu_sglist_destroy(&p->sgl);
     }
     QTAILQ_REMOVE(&p->queue->packets, p, next);
     usb_packet_cleanup(&p->packet);
@@ -766,27 +793,45 @@
     return q;
 }
 
-static void ehci_cancel_queue(EHCIQueue *q)
+static int ehci_cancel_queue(EHCIQueue *q)
 {
     EHCIPacket *p;
+    int packets = 0;
 
     p = QTAILQ_FIRST(&q->packets);
     if (p == NULL) {
-        return;
+        return 0;
     }
 
     trace_usb_ehci_queue_action(q, "cancel");
     do {
         ehci_free_packet(p);
+        packets++;
     } while ((p = QTAILQ_FIRST(&q->packets)) != NULL);
+    return packets;
 }
 
-static void ehci_free_queue(EHCIQueue *q)
+static int ehci_reset_queue(EHCIQueue *q)
+{
+    int packets;
+
+    trace_usb_ehci_queue_action(q, "reset");
+    packets = ehci_cancel_queue(q);
+    q->dev = NULL;
+    q->qtdaddr = 0;
+    return packets;
+}
+
+static void ehci_free_queue(EHCIQueue *q, const char *warn)
 {
     EHCIQueueHead *head = q->async ? &q->ehci->aqueues : &q->ehci->pqueues;
+    int cancelled;
 
     trace_usb_ehci_queue_action(q, "free");
-    ehci_cancel_queue(q);
+    cancelled = ehci_cancel_queue(q);
+    if (warn && cancelled > 0) {
+        ehci_trace_guest_bug(q->ehci, warn);
+    }
     QTAILQ_REMOVE(head, q, next);
     g_free(q);
 }
@@ -805,20 +850,10 @@
     return NULL;
 }
 
-static void ehci_queues_tag_unused_async(EHCIState *ehci)
-{
-    EHCIQueue *q;
-
-    QTAILQ_FOREACH(q, &ehci->aqueues, next) {
-        if (!q->seen) {
-            q->revalidate = 1;
-        }
-    }
-}
-
 static void ehci_queues_rip_unused(EHCIState *ehci, int async)
 {
     EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues;
+    const char *warn = async ? "guest unlinked busy QH" : NULL;
     uint64_t maxage = FRAME_TIMER_NS * ehci->maxframes * 4;
     EHCIQueue *q, *tmp;
 
@@ -831,7 +866,19 @@
         if (ehci->last_run_ns < q->ts + maxage) {
             continue;
         }
-        ehci_free_queue(q);
+        ehci_free_queue(q, warn);
+    }
+}
+
+static void ehci_queues_rip_unseen(EHCIState *ehci, int async)
+{
+    EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues;
+    EHCIQueue *q, *tmp;
+
+    QTAILQ_FOREACH_SAFE(q, head, next, tmp) {
+        if (!q->seen) {
+            ehci_free_queue(q, NULL);
+        }
     }
 }
 
@@ -844,17 +891,18 @@
         if (q->dev != dev) {
             continue;
         }
-        ehci_free_queue(q);
+        ehci_free_queue(q, NULL);
     }
 }
 
 static void ehci_queues_rip_all(EHCIState *ehci, int async)
 {
     EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues;
+    const char *warn = async ? "guest stopped busy async schedule" : NULL;
     EHCIQueue *q, *tmp;
 
     QTAILQ_FOREACH_SAFE(q, head, next, tmp) {
-        ehci_free_queue(q);
+        ehci_free_queue(q, warn);
     }
 }
 
@@ -979,7 +1027,7 @@
     }
 
     s->companion_count++;
-    s->mmio[0x05] = (s->companion_count << 4) | portcount;
+    s->caps[0x05] = (s->companion_count << 4) | portcount;
 
     return 0;
 }
@@ -1024,7 +1072,8 @@
         }
     }
 
-    memset(&s->mmio[OPREGBASE], 0x00, MMIO_SIZE - OPREGBASE);
+    memset(&s->opreg, 0x00, sizeof(s->opreg));
+    memset(&s->portsc, 0x00, sizeof(s->portsc));
 
     s->usbcmd = NB_MAXINTRATE << USBCMD_ITC_SH;
     s->usbsts = USBSTS_HALT;
@@ -1051,50 +1100,35 @@
     qemu_bh_cancel(s->async_bh);
 }
 
-static uint32_t ehci_mem_readb(void *ptr, target_phys_addr_t addr)
+static uint64_t ehci_caps_read(void *ptr, target_phys_addr_t addr,
+                               unsigned size)
+{
+    EHCIState *s = ptr;
+    return s->caps[addr];
+}
+
+static uint64_t ehci_opreg_read(void *ptr, target_phys_addr_t addr,
+                                unsigned size)
 {
     EHCIState *s = ptr;
     uint32_t val;
 
-    val = s->mmio[addr];
-
+    val = s->opreg[addr >> 2];
+    trace_usb_ehci_opreg_read(addr + OPREGBASE, addr2str(addr), val);
     return val;
 }
 
-static uint32_t ehci_mem_readw(void *ptr, target_phys_addr_t addr)
+static uint64_t ehci_port_read(void *ptr, target_phys_addr_t addr,
+                               unsigned size)
 {
     EHCIState *s = ptr;
     uint32_t val;
 
-    val = s->mmio[addr] | (s->mmio[addr+1] << 8);
-
+    val = s->portsc[addr >> 2];
+    trace_usb_ehci_portsc_read(addr + PORTSC_BEGIN, addr >> 2, val);
     return val;
 }
 
-static uint32_t ehci_mem_readl(void *ptr, target_phys_addr_t addr)
-{
-    EHCIState *s = ptr;
-    uint32_t val;
-
-    val = s->mmio[addr] | (s->mmio[addr+1] << 8) |
-          (s->mmio[addr+2] << 16) | (s->mmio[addr+3] << 24);
-
-    trace_usb_ehci_mmio_readl(addr, addr2str(addr), val);
-    return val;
-}
-
-static void ehci_mem_writeb(void *ptr, target_phys_addr_t addr, uint32_t val)
-{
-    fprintf(stderr, "EHCI doesn't handle byte writes to MMIO\n");
-    exit(1);
-}
-
-static void ehci_mem_writew(void *ptr, target_phys_addr_t addr, uint32_t val)
-{
-    fprintf(stderr, "EHCI doesn't handle 16-bit writes to MMIO\n");
-    exit(1);
-}
-
 static void handle_port_owner_write(EHCIState *s, int port, uint32_t owner)
 {
     USBDevice *dev = s->ports[port].dev;
@@ -1123,11 +1157,17 @@
     }
 }
 
-static void handle_port_status_write(EHCIState *s, int port, uint32_t val)
+static void ehci_port_write(void *ptr, target_phys_addr_t addr,
+                            uint64_t val, unsigned size)
 {
+    EHCIState *s = ptr;
+    int port = addr >> 2;
     uint32_t *portsc = &s->portsc[port];
+    uint32_t old = *portsc;
     USBDevice *dev = s->ports[port].dev;
 
+    trace_usb_ehci_portsc_write(addr + PORTSC_BEGIN, addr >> 2, val);
+
     /* Clear rwc bits */
     *portsc &= ~(val & PORTSC_RWC_MASK);
     /* The guest may clear, but not set the PED bit */
@@ -1159,39 +1199,20 @@
 
     *portsc &= ~PORTSC_RO_MASK;
     *portsc |= val;
+    trace_usb_ehci_portsc_change(addr + PORTSC_BEGIN, addr >> 2, *portsc, old);
 }
 
-static void ehci_mem_writel(void *ptr, target_phys_addr_t addr, uint32_t val)
+static void ehci_opreg_write(void *ptr, target_phys_addr_t addr,
+                             uint64_t val, unsigned size)
 {
     EHCIState *s = ptr;
-    uint32_t *mmio = (uint32_t *)(&s->mmio[addr]);
+    uint32_t *mmio = s->opreg + (addr >> 2);
     uint32_t old = *mmio;
     int i;
 
-    trace_usb_ehci_mmio_writel(addr, addr2str(addr), val);
+    trace_usb_ehci_opreg_write(addr + OPREGBASE, addr2str(addr), val);
 
-    /* Only aligned reads are allowed on OHCI */
-    if (addr & 3) {
-        fprintf(stderr, "usb-ehci: Mis-aligned write to addr 0x"
-                TARGET_FMT_plx "\n", addr);
-        return;
-    }
-
-    if (addr >= PORTSC && addr < PORTSC + 4 * NB_PORTS) {
-        handle_port_status_write(s, (addr-PORTSC)/4, val);
-        trace_usb_ehci_mmio_change(addr, addr2str(addr), *mmio, old);
-        return;
-    }
-
-    if (addr < OPREGBASE) {
-        fprintf(stderr, "usb-ehci: write attempt to read-only register"
-                TARGET_FMT_plx "\n", addr);
-        return;
-    }
-
-
-    /* Do any register specific pre-write processing here.  */
-    switch(addr) {
+    switch (addr + OPREGBASE) {
     case USBCMD:
         if (val & USBCMD_HCRESET) {
             ehci_reset(s);
@@ -1202,7 +1223,7 @@
         /* not supporting dynamic frame list size at the moment */
         if ((val & USBCMD_FLS) && !(s->usbcmd & USBCMD_FLS)) {
             fprintf(stderr, "attempt to set frame list size -- value %d\n",
-                    val & USBCMD_FLS);
+                    (int)val & USBCMD_FLS);
             val &= ~USBCMD_FLS;
         }
 
@@ -1213,6 +1234,7 @@
              */
             s->async_stepdown = 0;
             qemu_bh_schedule(s->async_bh);
+            trace_usb_ehci_doorbell_ring();
         }
 
         if (((USBCMD_RUNSTOP | USBCMD_PSE | USBCMD_ASE) & val) !=
@@ -1268,7 +1290,7 @@
     }
 
     *mmio = val;
-    trace_usb_ehci_mmio_change(addr, addr2str(addr), *mmio, old);
+    trace_usb_ehci_opreg_change(addr + OPREGBASE, addr2str(addr), *mmio, old);
 }
 
 
@@ -1450,8 +1472,8 @@
 
     assert(p != NULL);
     assert(p->qtdaddr == q->qtdaddr);
-    assert(p->async != EHCI_ASYNC_INFLIGHT);
-    p->async = EHCI_ASYNC_NONE;
+    assert(p->async == EHCI_ASYNC_INITIALIZED ||
+           p->async == EHCI_ASYNC_FINISHED);
 
     DPRINTF("execute_complete: qhaddr 0x%x, next %x, qtdaddr 0x%x, status %d\n",
             q->qhaddr, q->qh.next, q->qtdaddr, q->usb_status);
@@ -1481,10 +1503,6 @@
             assert(0);
             break;
         }
-    } else if ((p->usb_status > p->tbytes) && (p->pid == USB_TOKEN_IN)) {
-        p->usb_status = USB_RET_BABBLE;
-        q->qh.token |= (QTD_TOKEN_HALT | QTD_TOKEN_BABBLE);
-        ehci_raise_irq(q->ehci, USBSTS_ERRINT);
     } else {
         // TODO check 4.12 for splits
 
@@ -1500,6 +1518,7 @@
     ehci_finish_transfer(q, p->usb_status);
     usb_packet_unmap(&p->packet, &p->sgl);
     qemu_sglist_destroy(&p->sgl);
+    p->async = EHCI_ASYNC_NONE;
 
     q->qh.token ^= QTD_TOKEN_DTOGGLE;
     q->qh.token &= ~QTD_TOKEN_ACTIVE;
@@ -1517,6 +1536,9 @@
     int ret;
     int endp;
 
+    assert(p->async == EHCI_ASYNC_NONE ||
+           p->async == EHCI_ASYNC_INITIALIZED);
+
     if (!(p->qtd.token & QTD_TOKEN_ACTIVE)) {
         fprintf(stderr, "Attempting to execute inactive qtd\n");
         return USB_RET_PROCERR;
@@ -1524,7 +1546,8 @@
 
     p->tbytes = (p->qtd.token & QTD_TOKEN_TBYTES_MASK) >> QTD_TOKEN_TBYTES_SH;
     if (p->tbytes > BUFF_SIZE) {
-        fprintf(stderr, "Request for more bytes than allowed\n");
+        ehci_trace_guest_bug(p->queue->ehci,
+                             "guest requested more bytes than allowed");
         return USB_RET_PROCERR;
     }
 
@@ -1544,15 +1567,18 @@
         break;
     }
 
-    if (ehci_init_transfer(p) != 0) {
-        return USB_RET_PROCERR;
-    }
-
     endp = get_field(p->queue->qh.epchar, QH_EPCHAR_EP);
     ep = usb_ep_get(p->queue->dev, p->pid, endp);
 
-    usb_packet_setup(&p->packet, p->pid, ep, p->qtdaddr);
-    usb_packet_map(&p->packet, &p->sgl);
+    if (p->async == EHCI_ASYNC_NONE) {
+        if (ehci_init_transfer(p) != 0) {
+            return USB_RET_PROCERR;
+        }
+
+        usb_packet_setup(&p->packet, p->pid, ep, p->qtdaddr);
+        usb_packet_map(&p->packet, &p->sgl);
+        p->async = EHCI_ASYNC_INITIALIZED;
+    }
 
     trace_usb_ehci_packet_action(p->queue, p, action);
     ret = usb_handle_packet(p->queue->dev, &p->packet);
@@ -1771,7 +1797,7 @@
 static EHCIQueue *ehci_state_fetchqh(EHCIState *ehci, int async)
 {
     EHCIPacket *p;
-    uint32_t entry, devaddr;
+    uint32_t entry, devaddr, endp;
     EHCIQueue *q;
     EHCIqh qh;
 
@@ -1792,26 +1818,31 @@
 
     get_dwords(ehci, NLPTR_GET(q->qhaddr),
                (uint32_t *) &qh, sizeof(EHCIqh) >> 2);
-    if (q->revalidate && (q->qh.epchar      != qh.epchar ||
-                          q->qh.epcap       != qh.epcap  ||
-                          q->qh.current_qtd != qh.current_qtd)) {
-        ehci_free_queue(q);
-        q = ehci_alloc_queue(ehci, entry, async);
-        q->seen++;
+    ehci_trace_qh(q, NLPTR_GET(q->qhaddr), &qh);
+
+    /*
+     * The overlay area of the qh should never be changed by the guest,
+     * except when idle, in which case the reset is a nop.
+     */
+    devaddr = get_field(qh.epchar, QH_EPCHAR_DEVADDR);
+    endp    = get_field(qh.epchar, QH_EPCHAR_EP);
+    if ((devaddr != get_field(q->qh.epchar, QH_EPCHAR_DEVADDR)) ||
+        (endp    != get_field(q->qh.epchar, QH_EPCHAR_EP)) ||
+        (memcmp(&qh.current_qtd, &q->qh.current_qtd,
+                                 9 * sizeof(uint32_t)) != 0) ||
+        (q->dev != NULL && q->dev->addr != devaddr)) {
+        if (ehci_reset_queue(q) > 0) {
+            ehci_trace_guest_bug(ehci, "guest updated active QH");
+        }
         p = NULL;
     }
     q->qh = qh;
-    q->revalidate = 0;
-    ehci_trace_qh(q, NLPTR_GET(q->qhaddr), &q->qh);
 
-    devaddr = get_field(q->qh.epchar, QH_EPCHAR_DEVADDR);
-    if (q->dev != NULL && q->dev->addr != devaddr) {
-        if (!QTAILQ_EMPTY(&q->packets)) {
-            /* should not happen (guest bug) */
-            ehci_cancel_queue(q);
-        }
-        q->dev = NULL;
+    q->transact_ctr = get_field(q->qh.epcap, QH_EPCAP_MULT);
+    if (q->transact_ctr == 0) { /* Guest bug in some versions of windows */
+        q->transact_ctr = 4;
     }
+
     if (q->dev == NULL) {
         q->dev = ehci_find_device(q->ehci, devaddr);
     }
@@ -1969,8 +2000,8 @@
             (!NLPTR_TBIT(p->qtd.next) && (p->qtd.next != qtd.next)) ||
             (!NLPTR_TBIT(p->qtd.altnext) && (p->qtd.altnext != qtd.altnext)) ||
             p->qtd.bufptr[0] != qtd.bufptr[0]) {
-            /* guest bug: guest updated active QH or qTD underneath us */
             ehci_cancel_queue(q);
+            ehci_trace_guest_bug(q->ehci, "guest updated active QH or qTD");
             p = NULL;
         } else {
             p->qtd = qtd;
@@ -1989,15 +2020,19 @@
     } else if (p != NULL) {
         switch (p->async) {
         case EHCI_ASYNC_NONE:
-            /* Previously nacked packet (likely interrupt ep) */
-           ehci_set_state(q->ehci, q->async, EST_EXECUTE);
-           break;
+        case EHCI_ASYNC_INITIALIZED:
+            /* Not yet executed (MULT), or previously nacked (int) packet */
+            ehci_set_state(q->ehci, q->async, EST_EXECUTE);
+            break;
         case EHCI_ASYNC_INFLIGHT:
-            /* Unfinyshed async handled packet, go horizontal */
+            /* Unfinished async handled packet, go horizontal */
             ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
             break;
         case EHCI_ASYNC_FINISHED:
-            /* Should never happen, as this case is caught by fetchqh */
+            /*
+             * We get here when advqueue moves to a packet which is already
+             * finished, which can happen with packets queued up by fill_queue
+             */
             ehci_set_state(q->ehci, q->async, EST_EXECUTING);
             break;
         }
@@ -2028,7 +2063,7 @@
     return again;
 }
 
-static void ehci_fill_queue(EHCIPacket *p)
+static int ehci_fill_queue(EHCIPacket *p)
 {
     EHCIQueue *q = p->queue;
     EHCIqtd qtd = p->qtd;
@@ -2052,9 +2087,13 @@
         p->qtdaddr = qtdaddr;
         p->qtd = qtd;
         p->usb_status = ehci_execute(p, "queue");
+        if (p->usb_status == USB_RET_PROCERR) {
+            break;
+        }
         assert(p->usb_status == USB_RET_ASYNC);
         p->async = EHCI_ASYNC_INFLIGHT;
     }
+    return p->usb_status;
 }
 
 static int ehci_state_execute(EHCIQueue *q)
@@ -2071,15 +2110,12 @@
 
     // TODO verify enough time remains in the uframe as in 4.4.1.1
     // TODO write back ptr to async list when done or out of time
-    // TODO Windows does not seem to ever set the MULT field
 
-    if (!q->async) {
-        int transactCtr = get_field(q->qh.epcap, QH_EPCAP_MULT);
-        if (!transactCtr) {
-            ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
-            again = 1;
-            goto out;
-        }
+    /* 4.10.3, bottom of page 82, go horizontal on transaction counter == 0 */
+    if (!q->async && q->transact_ctr == 0) {
+        ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
+        again = 1;
+        goto out;
     }
 
     if (q->async) {
@@ -2096,8 +2132,11 @@
         trace_usb_ehci_packet_action(p->queue, p, "async");
         p->async = EHCI_ASYNC_INFLIGHT;
         ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
-        again = 1;
-        ehci_fill_queue(p);
+        if (q->async) {
+            again = (ehci_fill_queue(p) == USB_RET_PROCERR) ? -1 : 1;
+        } else {
+            again = 1;
+        }
         goto out;
     }
 
@@ -2117,13 +2156,9 @@
 
     ehci_execute_complete(q);
 
-    // 4.10.3
-    if (!q->async) {
-        int transactCtr = get_field(q->qh.epcap, QH_EPCAP_MULT);
-        transactCtr--;
-        set_field(&q->qh.epcap, transactCtr, QH_EPCAP_MULT);
-        // 4.10.3, bottom of page 82, should exit this state when transaction
-        // counter decrements to 0
+    /* 4.10.3 */
+    if (!q->async && q->transact_ctr > 0) {
+        q->transact_ctr--;
     }
 
     /* 4.10.5 */
@@ -2310,8 +2345,8 @@
          */
         if (ehci->usbcmd & USBCMD_IAAD) {
             /* Remove all unseen qhs from the async qhs queue */
-            ehci_queues_tag_unused_async(ehci);
-            DPRINTF("ASYNC: doorbell request acknowledged\n");
+            ehci_queues_rip_unseen(ehci, async);
+            trace_usb_ehci_doorbell_ack();
             ehci->usbcmd &= ~USBCMD_IAAD;
             ehci_raise_irq(ehci, USBSTS_IAA);
         }
@@ -2392,7 +2427,7 @@
         if (ehci->frindex == 0x00004000) {
             ehci_raise_irq(ehci, USBSTS_FLR);
             ehci->frindex = 0;
-            if (ehci->usbsts_frindex > 0x00004000) {
+            if (ehci->usbsts_frindex >= 0x00004000) {
                 ehci->usbsts_frindex -= 0x00004000;
             } else {
                 ehci->usbsts_frindex = 0;
@@ -2427,6 +2462,19 @@
         }
 
         for (i = 0; i < frames; i++) {
+            /*
+             * If we're running behind schedule, we should not catch up
+             * too fast, as that will make some guests unhappy:
+             * 1) We must process a minimum of MIN_FR_PER_TICK frames,
+             *    otherwise we will never catch up
+             * 2) Process frames until the guest has requested an irq (IOC)
+             */
+            if (i >= MIN_FR_PER_TICK) {
+                ehci_commit_irq(ehci);
+                if ((ehci->usbsts & USBINTR_MASK) & ehci->usbintr) {
+                    break;
+                }
+            }
             ehci_update_frindex(ehci, 1);
             ehci_advance_periodic_state(ehci);
             ehci->last_run_ns += FRAME_TIMER_NS;
@@ -2466,11 +2514,28 @@
     ehci_advance_async_state(ehci);
 }
 
-static const MemoryRegionOps ehci_mem_ops = {
-    .old_mmio = {
-        .read = { ehci_mem_readb, ehci_mem_readw, ehci_mem_readl },
-        .write = { ehci_mem_writeb, ehci_mem_writew, ehci_mem_writel },
-    },
+static const MemoryRegionOps ehci_mmio_caps_ops = {
+    .read = ehci_caps_read,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 1,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static const MemoryRegionOps ehci_mmio_opreg_ops = {
+    .read = ehci_opreg_read,
+    .write = ehci_opreg_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static const MemoryRegionOps ehci_mmio_port_ops = {
+    .read = ehci_port_read,
+    .write = ehci_port_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
     .endianness = DEVICE_LITTLE_ENDIAN,
 };
 
@@ -2508,6 +2573,32 @@
     return 0;
 }
 
+static void usb_ehci_vm_state_change(void *opaque, int running, RunState state)
+{
+    EHCIState *ehci = opaque;
+
+    /*
+     * We don't migrate the EHCIQueue-s, instead we rebuild them for the
+     * schedule in guest memory. We must do the rebuilt ASAP, so that
+     * USB-devices which have async handled packages have a packet in the
+     * ep queue to match the completion with.
+     */
+    if (state == RUN_STATE_RUNNING) {
+        ehci_advance_async_state(ehci);
+    }
+
+    /*
+     * The schedule rebuilt from guest memory could cause the migration dest
+     * to miss a QH unlink, and fail to cancel packets, since the unlinked QH
+     * will never have existed on the destination. Therefor we must flush the
+     * async schedule on savevm to catch any not yet noticed unlinks.
+     */
+    if (state == RUN_STATE_SAVE_VM) {
+        ehci_advance_async_state(ehci);
+        ehci_queues_rip_unseen(ehci, 1);
+    }
+}
+
 static const VMStateDescription vmstate_ehci = {
     .name        = "ehci",
     .version_id  = 2,
@@ -2627,19 +2718,19 @@
     pci_conf[0x6e] = 0x00;
     pci_conf[0x6f] = 0xc0;  // USBLEFCTLSTS
 
-    // 2.2 host controller interface version
-    s->mmio[0x00] = (uint8_t) OPREGBASE;
-    s->mmio[0x01] = 0x00;
-    s->mmio[0x02] = 0x00;
-    s->mmio[0x03] = 0x01;        // HC version
-    s->mmio[0x04] = NB_PORTS;    // Number of downstream ports
-    s->mmio[0x05] = 0x00;        // No companion ports at present
-    s->mmio[0x06] = 0x00;
-    s->mmio[0x07] = 0x00;
-    s->mmio[0x08] = 0x80;        // We can cache whole frame, not 64-bit capable
-    s->mmio[0x09] = 0x68;        // EECP
-    s->mmio[0x0a] = 0x00;
-    s->mmio[0x0b] = 0x00;
+    /* 2.2 host controller interface version */
+    s->caps[0x00] = (uint8_t) OPREGBASE;
+    s->caps[0x01] = 0x00;
+    s->caps[0x02] = 0x00;
+    s->caps[0x03] = 0x01;        /* HC version */
+    s->caps[0x04] = NB_PORTS;    /* Number of downstream ports */
+    s->caps[0x05] = 0x00;        /* No companion ports at present */
+    s->caps[0x06] = 0x00;
+    s->caps[0x07] = 0x00;
+    s->caps[0x08] = 0x80;        /* We can cache whole frame, no 64-bit */
+    s->caps[0x09] = 0x68;        /* EECP */
+    s->caps[0x0a] = 0x00;
+    s->caps[0x0b] = 0x00;
 
     s->irq = s->dev.irq[3];
 
@@ -2657,8 +2748,20 @@
     usb_packet_init(&s->ipacket);
 
     qemu_register_reset(ehci_reset, s);
+    qemu_add_vm_change_state_handler(usb_ehci_vm_state_change, s);
 
-    memory_region_init_io(&s->mem, &ehci_mem_ops, s, "ehci", MMIO_SIZE);
+    memory_region_init(&s->mem, "ehci", MMIO_SIZE);
+    memory_region_init_io(&s->mem_caps, &ehci_mmio_caps_ops, s,
+                          "capabilities", OPREGBASE);
+    memory_region_init_io(&s->mem_opreg, &ehci_mmio_opreg_ops, s,
+                          "operational", PORTSC_BEGIN - OPREGBASE);
+    memory_region_init_io(&s->mem_ports, &ehci_mmio_port_ops, s,
+                          "ports", PORTSC_END - PORTSC_BEGIN);
+
+    memory_region_add_subregion(&s->mem, 0,            &s->mem_caps);
+    memory_region_add_subregion(&s->mem, OPREGBASE,    &s->mem_opreg);
+    memory_region_add_subregion(&s->mem, PORTSC_BEGIN, &s->mem_ports);
+
     pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mem);
 
     return 0;
diff --git a/hw/usb/hcd-ohci.c b/hw/usb/hcd-ohci.c
index c36184a..59c7055 100644
--- a/hw/usb/hcd-ohci.c
+++ b/hw/usb/hcd-ohci.c
@@ -1470,8 +1470,6 @@
 
     if (old_state != port->ctrl)
         ohci_set_interrupt(ohci, OHCI_INTR_RHSC);
-
-    return;
 }
 
 static uint64_t ohci_mem_read(void *opaque,
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index b0db921..cdc8bc3 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -729,11 +729,6 @@
         *int_mask |= 0x01;
 
     if (pid == USB_TOKEN_IN) {
-        if (len > max_len) {
-            ret = USB_RET_BABBLE;
-            goto out;
-        }
-
         if ((td->ctrl & TD_CTRL_SPD) && len < max_len) {
             *int_mask |= 0x02;
             /* short packet: do not update QH */
@@ -1005,6 +1000,9 @@
         }
         assert(ret == TD_RESULT_ASYNC_START);
         assert(int_mask == 0);
+        if (ptd.ctrl & TD_CTRL_SPD) {
+            break;
+        }
         plink = ptd.link;
     }
 }
@@ -1102,7 +1100,7 @@
 
         case TD_RESULT_ASYNC_START:
             trace_usb_uhci_td_async(curr_qh & ~0xf, link & ~0xf);
-            if (is_valid(td.link)) {
+            if (is_valid(td.link) && !(td.ctrl & TD_CTRL_SPD)) {
                 uhci_fill_queue(s, &td);
             }
             link = curr_qh ? qh.link : td.link;
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 3eb27fa..3a41b06 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -23,6 +23,7 @@
 #include "hw/usb.h"
 #include "hw/pci.h"
 #include "hw/msi.h"
+#include "hw/msix.h"
 #include "trace.h"
 
 //#define DEBUG_XHCI
@@ -36,17 +37,14 @@
 #define FIXME() do { fprintf(stderr, "FIXME %s:%d\n", \
                              __func__, __LINE__); abort(); } while (0)
 
-#define MAXSLOTS 8
-#define MAXINTRS 1
+#define MAXPORTS_2 15
+#define MAXPORTS_3 15
 
-#define USB2_PORTS 4
-#define USB3_PORTS 4
-
-#define MAXPORTS (USB2_PORTS+USB3_PORTS)
+#define MAXPORTS (MAXPORTS_2+MAXPORTS_3)
+#define MAXSLOTS 64
+#define MAXINTRS 16
 
 #define TD_QUEUE 24
-#define BG_XFERS 8
-#define BG_PKTS 8
 
 /* Very pessimistic, let's hope it's enough for all cases */
 #define EV_QUEUE (((3*TD_QUEUE)+16)*MAXSLOTS)
@@ -55,24 +53,28 @@
 #define ER_FULL_HACK
 
 #define LEN_CAP         0x40
-#define OFF_OPER        LEN_CAP
 #define LEN_OPER        (0x400 + 0x10 * MAXPORTS)
-#define OFF_RUNTIME     ((OFF_OPER + LEN_OPER + 0x20) & ~0x1f)
-#define LEN_RUNTIME     (0x20 + MAXINTRS * 0x20)
-#define OFF_DOORBELL    (OFF_RUNTIME + LEN_RUNTIME)
+#define LEN_RUNTIME     ((MAXINTRS + 1) * 0x20)
 #define LEN_DOORBELL    ((MAXSLOTS + 1) * 0x20)
 
+#define OFF_OPER        LEN_CAP
+#define OFF_RUNTIME     0x1000
+#define OFF_DOORBELL    0x2000
+#define OFF_MSIX_TABLE  0x3000
+#define OFF_MSIX_PBA    0x3800
 /* must be power of 2 */
-#define LEN_REGS        0x2000
+#define LEN_REGS        0x4000
 
+#if (OFF_OPER + LEN_OPER) > OFF_RUNTIME
+#error Increase OFF_RUNTIME
+#endif
+#if (OFF_RUNTIME + LEN_RUNTIME) > OFF_DOORBELL
+#error Increase OFF_DOORBELL
+#endif
 #if (OFF_DOORBELL + LEN_DOORBELL) > LEN_REGS
 # error Increase LEN_REGS
 #endif
 
-#if MAXINTRS > 1
-# error TODO: only one interrupter supported
-#endif
-
 /* bit definitions */
 #define USBCMD_RS       (1<<0)
 #define USBCMD_HCRST    (1<<1)
@@ -258,6 +260,10 @@
 
 #define TRB_LK_TC           (1<<1)
 
+#define TRB_INTR_SHIFT          22
+#define TRB_INTR_MASK       0x3ff
+#define TRB_INTR(t)         (((t).status >> TRB_INTR_SHIFT) & TRB_INTR_MASK)
+
 #define EP_TYPE_MASK        0x7
 #define EP_TYPE_SHIFT           3
 
@@ -279,6 +285,8 @@
 #define SLOT_CONTEXT_ENTRIES_MASK 0x1f
 #define SLOT_CONTEXT_ENTRIES_SHIFT 27
 
+typedef struct XHCIState XHCIState;
+
 typedef enum EPType {
     ET_INVALID = 0,
     ET_ISO_OUT,
@@ -297,64 +305,67 @@
 } XHCIRing;
 
 typedef struct XHCIPort {
-    USBPort port;
+    XHCIState *xhci;
     uint32_t portsc;
+    uint32_t portnr;
+    USBPort  *uport;
+    uint32_t speedmask;
+    char name[16];
+    MemoryRegion mem;
 } XHCIPort;
 
-struct XHCIState;
-typedef struct XHCIState XHCIState;
-
 typedef struct XHCITransfer {
     XHCIState *xhci;
     USBPacket packet;
+    QEMUSGList sgl;
     bool running_async;
     bool running_retry;
     bool cancelled;
     bool complete;
-    bool backgrounded;
     unsigned int iso_pkts;
     unsigned int slotid;
     unsigned int epid;
     bool in_xfer;
     bool iso_xfer;
-    bool bg_xfer;
 
     unsigned int trb_count;
     unsigned int trb_alloced;
     XHCITRB *trbs;
 
-    unsigned int data_length;
-    unsigned int data_alloced;
-    uint8_t *data;
-
     TRBCCode status;
 
     unsigned int pkts;
     unsigned int pktsize;
     unsigned int cur_pkt;
+
+    uint64_t mfindex_kick;
 } XHCITransfer;
 
 typedef struct XHCIEPContext {
+    XHCIState *xhci;
+    unsigned int slotid;
+    unsigned int epid;
+
     XHCIRing ring;
     unsigned int next_xfer;
     unsigned int comp_xfer;
     XHCITransfer transfers[TD_QUEUE];
     XHCITransfer *retry;
-    bool bg_running;
-    bool bg_updating;
-    unsigned int next_bg;
-    XHCITransfer bg_transfers[BG_XFERS];
     EPType type;
     dma_addr_t pctx;
     unsigned int max_psize;
-    bool has_bg;
     uint32_t state;
+
+    /* iso xfer scheduling */
+    unsigned int interval;
+    int64_t mfindex_last;
+    QEMUTimer *kick_timer;
 } XHCIEPContext;
 
 typedef struct XHCISlot {
     bool enabled;
     dma_addr_t ctx;
-    unsigned int port;
+    USBPort *uport;
     unsigned int devaddr;
     XHCIEPContext * eps[31];
 } XHCISlot;
@@ -369,15 +380,44 @@
     uint8_t epid;
 } XHCIEvent;
 
+typedef struct XHCIInterrupter {
+    uint32_t iman;
+    uint32_t imod;
+    uint32_t erstsz;
+    uint32_t erstba_low;
+    uint32_t erstba_high;
+    uint32_t erdp_low;
+    uint32_t erdp_high;
+
+    bool msix_used, er_pcs, er_full;
+
+    dma_addr_t er_start;
+    uint32_t er_size;
+    unsigned int er_ep_idx;
+
+    XHCIEvent ev_buffer[EV_QUEUE];
+    unsigned int ev_buffer_put;
+    unsigned int ev_buffer_get;
+
+} XHCIInterrupter;
+
 struct XHCIState {
     PCIDevice pci_dev;
     USBBus bus;
     qemu_irq irq;
     MemoryRegion mem;
+    MemoryRegion mem_cap;
+    MemoryRegion mem_oper;
+    MemoryRegion mem_runtime;
+    MemoryRegion mem_doorbell;
     const char *name;
-    uint32_t msi;
     unsigned int devaddr;
 
+    /* properties */
+    uint32_t numports_2;
+    uint32_t numports_3;
+    uint32_t flags;
+
     /* Operational Registers */
     uint32_t usbcmd;
     uint32_t usbsts;
@@ -388,29 +428,15 @@
     uint32_t dcbaap_high;
     uint32_t config;
 
+    USBPort  uports[MAX(MAXPORTS_2, MAXPORTS_3)];
     XHCIPort ports[MAXPORTS];
     XHCISlot slots[MAXSLOTS];
+    uint32_t numports;
 
     /* Runtime Registers */
-    uint32_t mfindex;
-    /* note: we only support one interrupter */
-    uint32_t iman;
-    uint32_t imod;
-    uint32_t erstsz;
-    uint32_t erstba_low;
-    uint32_t erstba_high;
-    uint32_t erdp_low;
-    uint32_t erdp_high;
-
-    dma_addr_t er_start;
-    uint32_t er_size;
-    bool er_pcs;
-    unsigned int er_ep_idx;
-    bool er_full;
-
-    XHCIEvent ev_buffer[EV_QUEUE];
-    unsigned int ev_buffer_put;
-    unsigned int ev_buffer_get;
+    int64_t mfindex_start;
+    QEMUTimer *mfwrap_timer;
+    XHCIInterrupter intr[MAXINTRS];
 
     XHCIRing cmd_ring;
 };
@@ -422,6 +448,16 @@
     uint32_t rsvd;
 } XHCIEvRingSeg;
 
+enum xhci_flags {
+    XHCI_FLAG_USE_MSI = 1,
+    XHCI_FLAG_USE_MSI_X,
+};
+
+static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
+                         unsigned int epid);
+static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v);
+static void xhci_write_event(XHCIState *xhci, XHCIEvent *event, int v);
+
 static const char *TRBType_names[] = {
     [TRB_RESERVED]                     = "TRB_RESERVED",
     [TR_NORMAL]                        = "TR_NORMAL",
@@ -460,6 +496,45 @@
     [CR_VENDOR_NEC_CHALLENGE_RESPONSE] = "CR_VENDOR_NEC_CHALLENGE_RESPONSE",
 };
 
+static const char *TRBCCode_names[] = {
+    [CC_INVALID]                       = "CC_INVALID",
+    [CC_SUCCESS]                       = "CC_SUCCESS",
+    [CC_DATA_BUFFER_ERROR]             = "CC_DATA_BUFFER_ERROR",
+    [CC_BABBLE_DETECTED]               = "CC_BABBLE_DETECTED",
+    [CC_USB_TRANSACTION_ERROR]         = "CC_USB_TRANSACTION_ERROR",
+    [CC_TRB_ERROR]                     = "CC_TRB_ERROR",
+    [CC_STALL_ERROR]                   = "CC_STALL_ERROR",
+    [CC_RESOURCE_ERROR]                = "CC_RESOURCE_ERROR",
+    [CC_BANDWIDTH_ERROR]               = "CC_BANDWIDTH_ERROR",
+    [CC_NO_SLOTS_ERROR]                = "CC_NO_SLOTS_ERROR",
+    [CC_INVALID_STREAM_TYPE_ERROR]     = "CC_INVALID_STREAM_TYPE_ERROR",
+    [CC_SLOT_NOT_ENABLED_ERROR]        = "CC_SLOT_NOT_ENABLED_ERROR",
+    [CC_EP_NOT_ENABLED_ERROR]          = "CC_EP_NOT_ENABLED_ERROR",
+    [CC_SHORT_PACKET]                  = "CC_SHORT_PACKET",
+    [CC_RING_UNDERRUN]                 = "CC_RING_UNDERRUN",
+    [CC_RING_OVERRUN]                  = "CC_RING_OVERRUN",
+    [CC_VF_ER_FULL]                    = "CC_VF_ER_FULL",
+    [CC_PARAMETER_ERROR]               = "CC_PARAMETER_ERROR",
+    [CC_BANDWIDTH_OVERRUN]             = "CC_BANDWIDTH_OVERRUN",
+    [CC_CONTEXT_STATE_ERROR]           = "CC_CONTEXT_STATE_ERROR",
+    [CC_NO_PING_RESPONSE_ERROR]        = "CC_NO_PING_RESPONSE_ERROR",
+    [CC_EVENT_RING_FULL_ERROR]         = "CC_EVENT_RING_FULL_ERROR",
+    [CC_INCOMPATIBLE_DEVICE_ERROR]     = "CC_INCOMPATIBLE_DEVICE_ERROR",
+    [CC_MISSED_SERVICE_ERROR]          = "CC_MISSED_SERVICE_ERROR",
+    [CC_COMMAND_RING_STOPPED]          = "CC_COMMAND_RING_STOPPED",
+    [CC_COMMAND_ABORTED]               = "CC_COMMAND_ABORTED",
+    [CC_STOPPED]                       = "CC_STOPPED",
+    [CC_STOPPED_LENGTH_INVALID]        = "CC_STOPPED_LENGTH_INVALID",
+    [CC_MAX_EXIT_LATENCY_TOO_LARGE_ERROR]
+    = "CC_MAX_EXIT_LATENCY_TOO_LARGE_ERROR",
+    [CC_ISOCH_BUFFER_OVERRUN]          = "CC_ISOCH_BUFFER_OVERRUN",
+    [CC_EVENT_LOST_ERROR]              = "CC_EVENT_LOST_ERROR",
+    [CC_UNDEFINED_ERROR]               = "CC_UNDEFINED_ERROR",
+    [CC_INVALID_STREAM_ID_ERROR]       = "CC_INVALID_STREAM_ID_ERROR",
+    [CC_SECONDARY_BANDWIDTH_ERROR]     = "CC_SECONDARY_BANDWIDTH_ERROR",
+    [CC_SPLIT_TRANSACTION_ERROR]       = "CC_SPLIT_TRANSACTION_ERROR",
+};
+
 static const char *lookup_name(uint32_t index, const char **list, uint32_t llen)
 {
     if (index >= llen || list[index] == NULL) {
@@ -474,8 +549,42 @@
                        ARRAY_SIZE(TRBType_names));
 }
 
-static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
-                         unsigned int epid);
+static const char *event_name(XHCIEvent *event)
+{
+    return lookup_name(event->ccode, TRBCCode_names,
+                       ARRAY_SIZE(TRBCCode_names));
+}
+
+static uint64_t xhci_mfindex_get(XHCIState *xhci)
+{
+    int64_t now = qemu_get_clock_ns(vm_clock);
+    return (now - xhci->mfindex_start) / 125000;
+}
+
+static void xhci_mfwrap_update(XHCIState *xhci)
+{
+    const uint32_t bits = USBCMD_RS | USBCMD_EWE;
+    uint32_t mfindex, left;
+    int64_t now;
+
+    if ((xhci->usbcmd & bits) == bits) {
+        now = qemu_get_clock_ns(vm_clock);
+        mfindex = ((now - xhci->mfindex_start) / 125000) & 0x3fff;
+        left = 0x4000 - mfindex;
+        qemu_mod_timer(xhci->mfwrap_timer, now + left * 125000);
+    } else {
+        qemu_del_timer(xhci->mfwrap_timer);
+    }
+}
+
+static void xhci_mfwrap_timer(void *opaque)
+{
+    XHCIState *xhci = opaque;
+    XHCIEvent wrap = { ER_MFINDEX_WRAP, CC_SUCCESS };
+
+    xhci_event(xhci, &wrap, 0);
+    xhci_mfwrap_update(xhci);
+}
 
 static inline dma_addr_t xhci_addr64(uint32_t low, uint32_t high)
 {
@@ -495,29 +604,106 @@
     }
 }
 
-static void xhci_irq_update(XHCIState *xhci)
+static XHCIPort *xhci_lookup_port(XHCIState *xhci, struct USBPort *uport)
+{
+    int index;
+
+    if (!uport->dev) {
+        return NULL;
+    }
+    switch (uport->dev->speed) {
+    case USB_SPEED_LOW:
+    case USB_SPEED_FULL:
+    case USB_SPEED_HIGH:
+        index = uport->index;
+        break;
+    case USB_SPEED_SUPER:
+        index = uport->index + xhci->numports_2;
+        break;
+    default:
+        return NULL;
+    }
+    return &xhci->ports[index];
+}
+
+static void xhci_intx_update(XHCIState *xhci)
 {
     int level = 0;
 
-    if (xhci->iman & IMAN_IP && xhci->iman & IMAN_IE &&
+    if (msix_enabled(&xhci->pci_dev) ||
+        msi_enabled(&xhci->pci_dev)) {
+        return;
+    }
+
+    if (xhci->intr[0].iman & IMAN_IP &&
+        xhci->intr[0].iman & IMAN_IE &&
         xhci->usbcmd & USBCMD_INTE) {
         level = 1;
     }
 
-    if (xhci->msi && msi_enabled(&xhci->pci_dev)) {
-        if (level) {
-            trace_usb_xhci_irq_msi(0);
-            msi_notify(&xhci->pci_dev, 0);
-        }
+    trace_usb_xhci_irq_intx(level);
+    qemu_set_irq(xhci->irq, level);
+}
+
+static void xhci_msix_update(XHCIState *xhci, int v)
+{
+    bool enabled;
+
+    if (!msix_enabled(&xhci->pci_dev)) {
+        return;
+    }
+
+    enabled = xhci->intr[v].iman & IMAN_IE;
+    if (enabled == xhci->intr[v].msix_used) {
+        return;
+    }
+
+    if (enabled) {
+        trace_usb_xhci_irq_msix_use(v);
+        msix_vector_use(&xhci->pci_dev, v);
+        xhci->intr[v].msix_used = true;
     } else {
-        trace_usb_xhci_irq_intx(level);
-        qemu_set_irq(xhci->irq, level);
+        trace_usb_xhci_irq_msix_unuse(v);
+        msix_vector_unuse(&xhci->pci_dev, v);
+        xhci->intr[v].msix_used = false;
+    }
+}
+
+static void xhci_intr_raise(XHCIState *xhci, int v)
+{
+    xhci->intr[v].erdp_low |= ERDP_EHB;
+    xhci->intr[v].iman |= IMAN_IP;
+    xhci->usbsts |= USBSTS_EINT;
+
+    if (!(xhci->intr[v].iman & IMAN_IE)) {
+        return;
+    }
+
+    if (!(xhci->usbcmd & USBCMD_INTE)) {
+        return;
+    }
+
+    if (msix_enabled(&xhci->pci_dev)) {
+        trace_usb_xhci_irq_msix(v);
+        msix_notify(&xhci->pci_dev, v);
+        return;
+    }
+
+    if (msi_enabled(&xhci->pci_dev)) {
+        trace_usb_xhci_irq_msi(v);
+        msi_notify(&xhci->pci_dev, v);
+        return;
+    }
+
+    if (v == 0) {
+        trace_usb_xhci_irq_intx(1);
+        qemu_set_irq(xhci->irq, 1);
     }
 }
 
 static inline int xhci_running(XHCIState *xhci)
 {
-    return !(xhci->usbsts & USBSTS_HCH) && !xhci->er_full;
+    return !(xhci->usbsts & USBSTS_HCH) && !xhci->intr[0].er_full;
 }
 
 static void xhci_die(XHCIState *xhci)
@@ -526,8 +712,9 @@
     fprintf(stderr, "xhci: asserted controller error\n");
 }
 
-static void xhci_write_event(XHCIState *xhci, XHCIEvent *event)
+static void xhci_write_event(XHCIState *xhci, XHCIEvent *event, int v)
 {
+    XHCIInterrupter *intr = &xhci->intr[v];
     XHCITRB ev_trb;
     dma_addr_t addr;
 
@@ -535,26 +722,28 @@
     ev_trb.status = cpu_to_le32(event->length | (event->ccode << 24));
     ev_trb.control = (event->slotid << 24) | (event->epid << 16) |
                      event->flags | (event->type << TRB_TYPE_SHIFT);
-    if (xhci->er_pcs) {
+    if (intr->er_pcs) {
         ev_trb.control |= TRB_C;
     }
     ev_trb.control = cpu_to_le32(ev_trb.control);
 
-    trace_usb_xhci_queue_event(xhci->er_ep_idx, trb_name(&ev_trb),
-                               ev_trb.parameter, ev_trb.status, ev_trb.control);
+    trace_usb_xhci_queue_event(v, intr->er_ep_idx, trb_name(&ev_trb),
+                               event_name(event), ev_trb.parameter,
+                               ev_trb.status, ev_trb.control);
 
-    addr = xhci->er_start + TRB_SIZE*xhci->er_ep_idx;
+    addr = intr->er_start + TRB_SIZE*intr->er_ep_idx;
     pci_dma_write(&xhci->pci_dev, addr, &ev_trb, TRB_SIZE);
 
-    xhci->er_ep_idx++;
-    if (xhci->er_ep_idx >= xhci->er_size) {
-        xhci->er_ep_idx = 0;
-        xhci->er_pcs = !xhci->er_pcs;
+    intr->er_ep_idx++;
+    if (intr->er_ep_idx >= intr->er_size) {
+        intr->er_ep_idx = 0;
+        intr->er_pcs = !intr->er_pcs;
     }
 }
 
-static void xhci_events_update(XHCIState *xhci)
+static void xhci_events_update(XHCIState *xhci, int v)
 {
+    XHCIInterrupter *intr = &xhci->intr[v];
     dma_addr_t erdp;
     unsigned int dp_idx;
     bool do_irq = 0;
@@ -563,122 +752,121 @@
         return;
     }
 
-    erdp = xhci_addr64(xhci->erdp_low, xhci->erdp_high);
-    if (erdp < xhci->er_start ||
-        erdp >= (xhci->er_start + TRB_SIZE*xhci->er_size)) {
+    erdp = xhci_addr64(intr->erdp_low, intr->erdp_high);
+    if (erdp < intr->er_start ||
+        erdp >= (intr->er_start + TRB_SIZE*intr->er_size)) {
         fprintf(stderr, "xhci: ERDP out of bounds: "DMA_ADDR_FMT"\n", erdp);
-        fprintf(stderr, "xhci: ER at "DMA_ADDR_FMT" len %d\n",
-                xhci->er_start, xhci->er_size);
+        fprintf(stderr, "xhci: ER[%d] at "DMA_ADDR_FMT" len %d\n",
+                v, intr->er_start, intr->er_size);
         xhci_die(xhci);
         return;
     }
-    dp_idx = (erdp - xhci->er_start) / TRB_SIZE;
-    assert(dp_idx < xhci->er_size);
+    dp_idx = (erdp - intr->er_start) / TRB_SIZE;
+    assert(dp_idx < intr->er_size);
 
     /* NEC didn't read section 4.9.4 of the spec (v1.0 p139 top Note) and thus
      * deadlocks when the ER is full. Hack it by holding off events until
      * the driver decides to free at least half of the ring */
-    if (xhci->er_full) {
-        int er_free = dp_idx - xhci->er_ep_idx;
+    if (intr->er_full) {
+        int er_free = dp_idx - intr->er_ep_idx;
         if (er_free <= 0) {
-            er_free += xhci->er_size;
+            er_free += intr->er_size;
         }
-        if (er_free < (xhci->er_size/2)) {
+        if (er_free < (intr->er_size/2)) {
             DPRINTF("xhci_events_update(): event ring still "
                     "more than half full (hack)\n");
             return;
         }
     }
 
-    while (xhci->ev_buffer_put != xhci->ev_buffer_get) {
-        assert(xhci->er_full);
-        if (((xhci->er_ep_idx+1) % xhci->er_size) == dp_idx) {
+    while (intr->ev_buffer_put != intr->ev_buffer_get) {
+        assert(intr->er_full);
+        if (((intr->er_ep_idx+1) % intr->er_size) == dp_idx) {
             DPRINTF("xhci_events_update(): event ring full again\n");
 #ifndef ER_FULL_HACK
             XHCIEvent full = {ER_HOST_CONTROLLER, CC_EVENT_RING_FULL_ERROR};
-            xhci_write_event(xhci, &full);
+            xhci_write_event(xhci, &full, v);
 #endif
             do_irq = 1;
             break;
         }
-        XHCIEvent *event = &xhci->ev_buffer[xhci->ev_buffer_get];
-        xhci_write_event(xhci, event);
-        xhci->ev_buffer_get++;
+        XHCIEvent *event = &intr->ev_buffer[intr->ev_buffer_get];
+        xhci_write_event(xhci, event, v);
+        intr->ev_buffer_get++;
         do_irq = 1;
-        if (xhci->ev_buffer_get == EV_QUEUE) {
-            xhci->ev_buffer_get = 0;
+        if (intr->ev_buffer_get == EV_QUEUE) {
+            intr->ev_buffer_get = 0;
         }
     }
 
     if (do_irq) {
-        xhci->erdp_low |= ERDP_EHB;
-        xhci->iman |= IMAN_IP;
-        xhci->usbsts |= USBSTS_EINT;
-        xhci_irq_update(xhci);
+        xhci_intr_raise(xhci, v);
     }
 
-    if (xhci->er_full && xhci->ev_buffer_put == xhci->ev_buffer_get) {
+    if (intr->er_full && intr->ev_buffer_put == intr->ev_buffer_get) {
         DPRINTF("xhci_events_update(): event ring no longer full\n");
-        xhci->er_full = 0;
+        intr->er_full = 0;
     }
-    return;
 }
 
-static void xhci_event(XHCIState *xhci, XHCIEvent *event)
+static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v)
 {
+    XHCIInterrupter *intr;
     dma_addr_t erdp;
     unsigned int dp_idx;
 
-    if (xhci->er_full) {
+    if (v >= MAXINTRS) {
+        DPRINTF("intr nr out of range (%d >= %d)\n", v, MAXINTRS);
+        return;
+    }
+    intr = &xhci->intr[v];
+
+    if (intr->er_full) {
         DPRINTF("xhci_event(): ER full, queueing\n");
-        if (((xhci->ev_buffer_put+1) % EV_QUEUE) == xhci->ev_buffer_get) {
+        if (((intr->ev_buffer_put+1) % EV_QUEUE) == intr->ev_buffer_get) {
             fprintf(stderr, "xhci: event queue full, dropping event!\n");
             return;
         }
-        xhci->ev_buffer[xhci->ev_buffer_put++] = *event;
-        if (xhci->ev_buffer_put == EV_QUEUE) {
-            xhci->ev_buffer_put = 0;
+        intr->ev_buffer[intr->ev_buffer_put++] = *event;
+        if (intr->ev_buffer_put == EV_QUEUE) {
+            intr->ev_buffer_put = 0;
         }
         return;
     }
 
-    erdp = xhci_addr64(xhci->erdp_low, xhci->erdp_high);
-    if (erdp < xhci->er_start ||
-        erdp >= (xhci->er_start + TRB_SIZE*xhci->er_size)) {
+    erdp = xhci_addr64(intr->erdp_low, intr->erdp_high);
+    if (erdp < intr->er_start ||
+        erdp >= (intr->er_start + TRB_SIZE*intr->er_size)) {
         fprintf(stderr, "xhci: ERDP out of bounds: "DMA_ADDR_FMT"\n", erdp);
-        fprintf(stderr, "xhci: ER at "DMA_ADDR_FMT" len %d\n",
-                xhci->er_start, xhci->er_size);
+        fprintf(stderr, "xhci: ER[%d] at "DMA_ADDR_FMT" len %d\n",
+                v, intr->er_start, intr->er_size);
         xhci_die(xhci);
         return;
     }
 
-    dp_idx = (erdp - xhci->er_start) / TRB_SIZE;
-    assert(dp_idx < xhci->er_size);
+    dp_idx = (erdp - intr->er_start) / TRB_SIZE;
+    assert(dp_idx < intr->er_size);
 
-    if ((xhci->er_ep_idx+1) % xhci->er_size == dp_idx) {
+    if ((intr->er_ep_idx+1) % intr->er_size == dp_idx) {
         DPRINTF("xhci_event(): ER full, queueing\n");
 #ifndef ER_FULL_HACK
         XHCIEvent full = {ER_HOST_CONTROLLER, CC_EVENT_RING_FULL_ERROR};
         xhci_write_event(xhci, &full);
 #endif
-        xhci->er_full = 1;
-        if (((xhci->ev_buffer_put+1) % EV_QUEUE) == xhci->ev_buffer_get) {
+        intr->er_full = 1;
+        if (((intr->ev_buffer_put+1) % EV_QUEUE) == intr->ev_buffer_get) {
             fprintf(stderr, "xhci: event queue full, dropping event!\n");
             return;
         }
-        xhci->ev_buffer[xhci->ev_buffer_put++] = *event;
-        if (xhci->ev_buffer_put == EV_QUEUE) {
-            xhci->ev_buffer_put = 0;
+        intr->ev_buffer[intr->ev_buffer_put++] = *event;
+        if (intr->ev_buffer_put == EV_QUEUE) {
+            intr->ev_buffer_put = 0;
         }
     } else {
-        xhci_write_event(xhci, event);
+        xhci_write_event(xhci, event, v);
     }
 
-    xhci->erdp_low |= ERDP_EHB;
-    xhci->iman |= IMAN_IP;
-    xhci->usbsts |= USBSTS_EINT;
-
-    xhci_irq_update(xhci);
+    xhci_intr_raise(xhci, v);
 }
 
 static void xhci_ring_init(XHCIState *xhci, XHCIRing *ring,
@@ -770,17 +958,18 @@
     }
 }
 
-static void xhci_er_reset(XHCIState *xhci)
+static void xhci_er_reset(XHCIState *xhci, int v)
 {
+    XHCIInterrupter *intr = &xhci->intr[v];
     XHCIEvRingSeg seg;
 
     /* cache the (sole) event ring segment location */
-    if (xhci->erstsz != 1) {
-        fprintf(stderr, "xhci: invalid value for ERSTSZ: %d\n", xhci->erstsz);
+    if (intr->erstsz != 1) {
+        fprintf(stderr, "xhci: invalid value for ERSTSZ: %d\n", intr->erstsz);
         xhci_die(xhci);
         return;
     }
-    dma_addr_t erstba = xhci_addr64(xhci->erstba_low, xhci->erstba_high);
+    dma_addr_t erstba = xhci_addr64(intr->erstba_low, intr->erstba_high);
     pci_dma_read(&xhci->pci_dev, erstba, &seg, sizeof(seg));
     le32_to_cpus(&seg.addr_low);
     le32_to_cpus(&seg.addr_high);
@@ -790,21 +979,22 @@
         xhci_die(xhci);
         return;
     }
-    xhci->er_start = xhci_addr64(seg.addr_low, seg.addr_high);
-    xhci->er_size = seg.size;
+    intr->er_start = xhci_addr64(seg.addr_low, seg.addr_high);
+    intr->er_size = seg.size;
 
-    xhci->er_ep_idx = 0;
-    xhci->er_pcs = 1;
-    xhci->er_full = 0;
+    intr->er_ep_idx = 0;
+    intr->er_pcs = 1;
+    intr->er_full = 0;
 
-    DPRINTF("xhci: event ring:" DMA_ADDR_FMT " [%d]\n",
-            xhci->er_start, xhci->er_size);
+    DPRINTF("xhci: event ring[%d]:" DMA_ADDR_FMT " [%d]\n",
+            v, intr->er_start, intr->er_size);
 }
 
 static void xhci_run(XHCIState *xhci)
 {
     trace_usb_xhci_run();
     xhci->usbsts &= ~USBSTS_HCH;
+    xhci->mfindex_start = qemu_get_clock_ns(vm_clock);
 }
 
 static void xhci_stop(XHCIState *xhci)
@@ -833,6 +1023,12 @@
     epctx->state = state;
 }
 
+static void xhci_ep_kick_timer(void *opaque)
+{
+    XHCIEPContext *epctx = opaque;
+    xhci_kick_ep(epctx->xhci, epctx->slotid, epctx->epid);
+}
+
 static TRBCCode xhci_enable_ep(XHCIState *xhci, unsigned int slotid,
                                unsigned int epid, dma_addr_t pctx,
                                uint32_t *ctx)
@@ -854,6 +1050,9 @@
 
     epctx = g_malloc(sizeof(XHCIEPContext));
     memset(epctx, 0, sizeof(XHCIEPContext));
+    epctx->xhci = xhci;
+    epctx->slotid = slotid;
+    epctx->epid = epid;
 
     slot->eps[epid-1] = epctx;
 
@@ -866,16 +1065,16 @@
     epctx->pctx = pctx;
     epctx->max_psize = ctx[1]>>16;
     epctx->max_psize *= 1+((ctx[1]>>8)&0xff);
-    epctx->has_bg = false;
-    if (epctx->type == ET_ISO_IN) {
-        epctx->has_bg = true;
-    }
     DPRINTF("xhci: endpoint %d.%d max transaction (burst) size is %d\n",
             epid/2, epid%2, epctx->max_psize);
     for (i = 0; i < ARRAY_SIZE(epctx->transfers); i++) {
         usb_packet_init(&epctx->transfers[i].packet);
     }
 
+    epctx->interval = 1 << (ctx[0] >> 16) & 0xff;
+    epctx->mfindex_last = 0;
+    epctx->kick_timer = qemu_new_timer_ns(vm_clock, xhci_ep_kick_timer, epctx);
+
     epctx->state = EP_RUNNING;
     ctx[0] &= ~EP_STATE_MASK;
     ctx[0] |= EP_RUNNING;
@@ -915,42 +1114,16 @@
         if (t->running_retry) {
             t->running_retry = 0;
             epctx->retry = NULL;
-        }
-        if (t->backgrounded) {
-            t->backgrounded = 0;
+            qemu_del_timer(epctx->kick_timer);
         }
         if (t->trbs) {
             g_free(t->trbs);
         }
-        if (t->data) {
-            g_free(t->data);
-        }
 
         t->trbs = NULL;
-        t->data = NULL;
         t->trb_count = t->trb_alloced = 0;
-        t->data_length = t->data_alloced = 0;
         xferi = (xferi + 1) % TD_QUEUE;
     }
-    if (epctx->has_bg) {
-        xferi = epctx->next_bg;
-        for (i = 0; i < BG_XFERS; i++) {
-            XHCITransfer *t = &epctx->bg_transfers[xferi];
-            if (t->running_async) {
-                usb_cancel_packet(&t->packet);
-                t->running_async = 0;
-                t->cancelled = 1;
-                DPRINTF("xhci: cancelling bg transfer %d, waiting for it to complete...\n", i);
-                killed++;
-            }
-            if (t->data) {
-                g_free(t->data);
-            }
-
-            t->data = NULL;
-            xferi = (xferi + 1) % BG_XFERS;
-        }
-    }
     return killed;
 }
 
@@ -977,6 +1150,7 @@
 
     xhci_set_ep_state(xhci, epctx, EP_DISABLED);
 
+    qemu_free_timer(epctx->kick_timer);
     g_free(epctx);
     slot->eps[epid-1] = NULL;
 
@@ -1057,7 +1231,7 @@
         ep |= 0x80;
     }
 
-    dev = xhci->ports[xhci->slots[slotid-1].port-1].port.dev;
+    dev = xhci->slots[slotid-1].uport->dev;
     if (!dev) {
         return CC_USB_TRANSACTION_ERROR;
     }
@@ -1081,7 +1255,7 @@
         return CC_TRB_ERROR;
     }
 
-    DPRINTF("xhci_set_ep_dequeue(%d, %d, %016"PRIx64")\n", slotid, epid, pdequeue);
+    trace_usb_xhci_ep_set_dequeue(slotid, epid, pdequeue);
     dequeue = xhci_mask64(pdequeue);
 
     slot = &xhci->slots[slotid-1];
@@ -1107,24 +1281,13 @@
     return CC_SUCCESS;
 }
 
-static int xhci_xfer_data(XHCITransfer *xfer, uint8_t *data,
-                          unsigned int length, bool in_xfer, bool out_xfer,
-                          bool report)
+static int xhci_xfer_map(XHCITransfer *xfer)
 {
-    int i;
-    uint32_t edtla = 0;
-    unsigned int transferred = 0;
-    unsigned int left = length;
-    bool reported = 0;
-    bool shortpkt = 0;
-    XHCIEvent event = {ER_TRANSFER, CC_SUCCESS};
+    int in_xfer = (xfer->packet.pid == USB_TOKEN_IN);
     XHCIState *xhci = xfer->xhci;
+    int i;
 
-    DPRINTF("xhci_xfer_data(len=%d, in_xfer=%d, out_xfer=%d, report=%d)\n",
-            length, in_xfer, out_xfer, report);
-
-    assert(!(in_xfer && out_xfer));
-
+    pci_dma_sglist_init(&xfer->sgl, &xhci->pci_dev, xfer->trb_count);
     for (i = 0; i < xfer->trb_count; i++) {
         XHCITRB *trb = &xfer->trbs[i];
         dma_addr_t addr;
@@ -1134,54 +1297,70 @@
         case TR_DATA:
             if ((!(trb->control & TRB_TR_DIR)) != (!in_xfer)) {
                 fprintf(stderr, "xhci: data direction mismatch for TR_DATA\n");
-                xhci_die(xhci);
-                return transferred;
+                goto err;
             }
             /* fallthrough */
         case TR_NORMAL:
         case TR_ISOCH:
             addr = xhci_mask64(trb->parameter);
             chunk = trb->status & 0x1ffff;
+            if (trb->control & TRB_TR_IDT) {
+                if (chunk > 8 || in_xfer) {
+                    fprintf(stderr, "xhci: invalid immediate data TRB\n");
+                    goto err;
+                }
+                qemu_sglist_add(&xfer->sgl, trb->addr, chunk);
+            } else {
+                qemu_sglist_add(&xfer->sgl, addr, chunk);
+            }
+            break;
+        }
+    }
+
+    usb_packet_map(&xfer->packet, &xfer->sgl);
+    return 0;
+
+err:
+    qemu_sglist_destroy(&xfer->sgl);
+    xhci_die(xhci);
+    return -1;
+}
+
+static void xhci_xfer_unmap(XHCITransfer *xfer)
+{
+    usb_packet_unmap(&xfer->packet, &xfer->sgl);
+    qemu_sglist_destroy(&xfer->sgl);
+}
+
+static void xhci_xfer_report(XHCITransfer *xfer)
+{
+    uint32_t edtla = 0;
+    unsigned int left;
+    bool reported = 0;
+    bool shortpkt = 0;
+    XHCIEvent event = {ER_TRANSFER, CC_SUCCESS};
+    XHCIState *xhci = xfer->xhci;
+    int i;
+
+    left = xfer->packet.result < 0 ? 0 : xfer->packet.result;
+
+    for (i = 0; i < xfer->trb_count; i++) {
+        XHCITRB *trb = &xfer->trbs[i];
+        unsigned int chunk = 0;
+
+        switch (TRB_TYPE(*trb)) {
+        case TR_DATA:
+        case TR_NORMAL:
+        case TR_ISOCH:
+            chunk = trb->status & 0x1ffff;
             if (chunk > left) {
                 chunk = left;
-                shortpkt = 1;
-            }
-            if (in_xfer || out_xfer) {
-                if (trb->control & TRB_TR_IDT) {
-                    uint64_t idata;
-                    if (chunk > 8 || in_xfer) {
-                        fprintf(stderr, "xhci: invalid immediate data TRB\n");
-                        xhci_die(xhci);
-                        return transferred;
-                    }
-                    idata = le64_to_cpu(trb->parameter);
-                    memcpy(data, &idata, chunk);
-                } else {
-                    DPRINTF("xhci_xfer_data: r/w(%d) %d bytes at "
-                            DMA_ADDR_FMT "\n", in_xfer, chunk, addr);
-                    if (in_xfer) {
-                        pci_dma_write(&xhci->pci_dev, addr, data, chunk);
-                    } else {
-                        pci_dma_read(&xhci->pci_dev, addr, data, chunk);
-                    }
-#ifdef DEBUG_DATA
-                    unsigned int count = chunk;
-                    int i;
-                    if (count > 16) {
-                        count = 16;
-                    }
-                    DPRINTF(" ::");
-                    for (i = 0; i < count; i++) {
-                        DPRINTF(" %02x", data[i]);
-                    }
-                    DPRINTF("\n");
-#endif
+                if (xfer->status == CC_SUCCESS) {
+                    shortpkt = 1;
                 }
             }
             left -= chunk;
-            data += chunk;
             edtla += chunk;
-            transferred += chunk;
             break;
         case TR_STATUS:
             reported = 0;
@@ -1189,8 +1368,9 @@
             break;
         }
 
-        if (report && !reported && (trb->control & TRB_TR_IOC ||
-            (shortpkt && (trb->control & TRB_TR_ISP)))) {
+        if (!reported && ((trb->control & TRB_TR_IOC) ||
+                          (shortpkt && (trb->control & TRB_TR_ISP)) ||
+                          (xfer->status != CC_SUCCESS))) {
             event.slotid = xfer->slotid;
             event.epid = xfer->epid;
             event.length = (trb->status & 0x1ffff) - chunk;
@@ -1208,11 +1388,13 @@
                 DPRINTF("xhci_xfer_data: EDTLA=%d\n", event.length);
                 edtla = 0;
             }
-            xhci_event(xhci, &event);
+            xhci_event(xhci, &event, TRB_INTR(*trb));
             reported = 1;
+            if (xfer->status != CC_SUCCESS) {
+                return;
+            }
         }
     }
-    return transferred;
 }
 
 static void xhci_stall_ep(XHCITransfer *xfer)
@@ -1231,169 +1413,30 @@
 static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer,
                        XHCIEPContext *epctx);
 
-static void xhci_bg_update(XHCIState *xhci, XHCIEPContext *epctx)
+static int xhci_setup_packet(XHCITransfer *xfer)
 {
-    if (epctx->bg_updating) {
-        return;
-    }
-    DPRINTF("xhci_bg_update(%p, %p)\n", xhci, epctx);
-    assert(epctx->has_bg);
-    DPRINTF("xhci: fg=%d bg=%d\n", epctx->comp_xfer, epctx->next_bg);
-    epctx->bg_updating = 1;
-    while (epctx->transfers[epctx->comp_xfer].backgrounded &&
-           epctx->bg_transfers[epctx->next_bg].complete) {
-        XHCITransfer *fg = &epctx->transfers[epctx->comp_xfer];
-        XHCITransfer *bg = &epctx->bg_transfers[epctx->next_bg];
-#if 0
-        DPRINTF("xhci: completing fg %d from bg %d.%d (stat: %d)\n",
-                epctx->comp_xfer, epctx->next_bg, bg->cur_pkt,
-                bg->usbxfer->iso_packet_desc[bg->cur_pkt].status
-               );
-#endif
-        assert(epctx->type == ET_ISO_IN);
-        assert(bg->iso_xfer);
-        assert(bg->in_xfer);
-        uint8_t *p = bg->data + bg->cur_pkt * bg->pktsize;
-#if 0
-        int len = bg->usbxfer->iso_packet_desc[bg->cur_pkt].actual_length;
-        fg->status = libusb_to_ccode(bg->usbxfer->iso_packet_desc[bg->cur_pkt].status);
-#else
-        int len = 0;
-        FIXME();
-#endif
-        fg->complete = 1;
-        fg->backgrounded = 0;
-
-        if (fg->status == CC_STALL_ERROR) {
-            xhci_stall_ep(fg);
-        }
-
-        xhci_xfer_data(fg, p, len, 1, 0, 1);
-
-        epctx->comp_xfer++;
-        if (epctx->comp_xfer == TD_QUEUE) {
-            epctx->comp_xfer = 0;
-        }
-        DPRINTF("next fg xfer: %d\n", epctx->comp_xfer);
-        bg->cur_pkt++;
-        if (bg->cur_pkt == bg->pkts) {
-            bg->complete = 0;
-            if (xhci_submit(xhci, bg, epctx) < 0) {
-                fprintf(stderr, "xhci: bg resubmit failed\n");
-            }
-            epctx->next_bg++;
-            if (epctx->next_bg == BG_XFERS) {
-                epctx->next_bg = 0;
-            }
-            DPRINTF("next bg xfer: %d\n", epctx->next_bg);
-
-        xhci_kick_ep(xhci, fg->slotid, fg->epid);
-        }
-    }
-    epctx->bg_updating = 0;
-}
-
-#if 0
-static void xhci_xfer_cb(struct libusb_transfer *transfer)
-{
-    XHCIState *xhci;
-    XHCITransfer *xfer;
-
-    xfer = (XHCITransfer *)transfer->user_data;
-    xhci = xfer->xhci;
-
-    DPRINTF("xhci_xfer_cb(slot=%d, ep=%d, status=%d)\n", xfer->slotid,
-            xfer->epid, transfer->status);
-
-    assert(xfer->slotid >= 1 && xfer->slotid <= MAXSLOTS);
-    assert(xfer->epid >= 1 && xfer->epid <= 31);
-
-    if (xfer->cancelled) {
-        DPRINTF("xhci: transfer cancelled, not reporting anything\n");
-        xfer->running = 0;
-        return;
-    }
-
-    XHCIEPContext *epctx;
-    XHCISlot *slot;
-    slot = &xhci->slots[xfer->slotid-1];
-    assert(slot->eps[xfer->epid-1]);
-    epctx = slot->eps[xfer->epid-1];
-
-    if (xfer->bg_xfer) {
-        DPRINTF("xhci: background transfer, updating\n");
-        xfer->complete = 1;
-        xfer->running = 0;
-        xhci_bg_update(xhci, epctx);
-        return;
-    }
-
-    if (xfer->iso_xfer) {
-        transfer->status = transfer->iso_packet_desc[0].status;
-        transfer->actual_length = transfer->iso_packet_desc[0].actual_length;
-    }
-
-    xfer->status = libusb_to_ccode(transfer->status);
-
-    xfer->complete = 1;
-    xfer->running = 0;
-
-    if (transfer->status == LIBUSB_TRANSFER_STALL)
-        xhci_stall_ep(xhci, epctx, xfer);
-
-    DPRINTF("xhci: transfer actual length = %d\n", transfer->actual_length);
-
-    if (xfer->in_xfer) {
-        if (xfer->epid == 1) {
-            xhci_xfer_data(xhci, xfer, xfer->data + 8,
-                           transfer->actual_length, 1, 0, 1);
-        } else {
-            xhci_xfer_data(xhci, xfer, xfer->data,
-                           transfer->actual_length, 1, 0, 1);
-        }
-    } else {
-        xhci_xfer_data(xhci, xfer, NULL, transfer->actual_length, 0, 0, 1);
-    }
-
-    xhci_kick_ep(xhci, xfer->slotid, xfer->epid);
-}
-
-static int xhci_hle_control(XHCIState *xhci, XHCITransfer *xfer,
-                            uint8_t bmRequestType, uint8_t bRequest,
-                            uint16_t wValue, uint16_t wIndex, uint16_t wLength)
-{
-    uint16_t type_req = (bmRequestType << 8) | bRequest;
-
-    switch (type_req) {
-        case 0x0000 | USB_REQ_SET_CONFIGURATION:
-            DPRINTF("xhci: HLE switch configuration\n");
-            return xhci_switch_config(xhci, xfer->slotid, wValue) == 0;
-        case 0x0100 | USB_REQ_SET_INTERFACE:
-            DPRINTF("xhci: HLE set interface altsetting\n");
-            return xhci_set_iface_alt(xhci, xfer->slotid, wIndex, wValue) == 0;
-        case 0x0200 | USB_REQ_CLEAR_FEATURE:
-            if (wValue == 0) { // endpoint halt
-                DPRINTF("xhci: HLE clear halt\n");
-                return xhci_clear_halt(xhci, xfer->slotid, wIndex);
-            }
-        case 0x0000 | USB_REQ_SET_ADDRESS:
-            fprintf(stderr, "xhci: warn: illegal SET_ADDRESS request\n");
-            return 0;
-        default:
-            return 0;
-    }
-}
-#endif
-
-static int xhci_setup_packet(XHCITransfer *xfer, USBDevice *dev)
-{
+    XHCIState *xhci = xfer->xhci;
+    USBDevice *dev;
     USBEndpoint *ep;
     int dir;
 
     dir = xfer->in_xfer ? USB_TOKEN_IN : USB_TOKEN_OUT;
-    ep = usb_ep_get(dev, dir, xfer->epid >> 1);
+
+    if (xfer->packet.ep) {
+        ep = xfer->packet.ep;
+        dev = ep->dev;
+    } else {
+        if (!xhci->slots[xfer->slotid-1].uport) {
+            fprintf(stderr, "xhci: slot %d has no device\n",
+                    xfer->slotid);
+            return -1;
+        }
+        dev = xhci->slots[xfer->slotid-1].uport->dev;
+        ep = usb_ep_get(dev, dir, xfer->epid >> 1);
+    }
+
     usb_packet_setup(&xfer->packet, dir, ep, xfer->trbs[0].addr);
-    usb_packet_addbuf(&xfer->packet, xfer->data, xfer->data_length);
+    xhci_xfer_map(xfer);
     DPRINTF("xhci: setup packet pid 0x%x addr %d ep %d\n",
             xfer->packet.pid, dev->addr, ep->nr);
     return 0;
@@ -1419,12 +1462,13 @@
         xfer->running_async = 0;
         xfer->running_retry = 0;
         xfer->complete = 1;
+        xhci_xfer_unmap(xfer);
     }
 
     if (ret >= 0) {
-        xfer->status = CC_SUCCESS;
-        xhci_xfer_data(xfer, xfer->data, ret, xfer->in_xfer, 0, 1);
         trace_usb_xhci_xfer_success(xfer, ret);
+        xfer->status = CC_SUCCESS;
+        xhci_xfer_report(xfer);
         return 0;
     }
 
@@ -1433,12 +1477,12 @@
     switch (ret) {
     case USB_RET_NODEV:
         xfer->status = CC_USB_TRANSACTION_ERROR;
-        xhci_xfer_data(xfer, xfer->data, 0, xfer->in_xfer, 0, 1);
+        xhci_xfer_report(xfer);
         xhci_stall_ep(xfer);
         break;
     case USB_RET_STALL:
         xfer->status = CC_STALL_ERROR;
-        xhci_xfer_data(xfer, xfer->data, 0, xfer->in_xfer, 0, 1);
+        xhci_xfer_report(xfer);
         xhci_stall_ep(xfer);
         break;
     default:
@@ -1448,28 +1492,16 @@
     return 0;
 }
 
-static USBDevice *xhci_find_device(XHCIPort *port, uint8_t addr)
-{
-    if (!(port->portsc & PORTSC_PED)) {
-        return NULL;
-    }
-    return usb_find_device(&port->port, addr);
-}
-
 static int xhci_fire_ctl_transfer(XHCIState *xhci, XHCITransfer *xfer)
 {
     XHCITRB *trb_setup, *trb_status;
     uint8_t bmRequestType;
-    uint16_t wLength;
-    XHCIPort *port;
-    USBDevice *dev;
     int ret;
 
     trb_setup = &xfer->trbs[0];
     trb_status = &xfer->trbs[xfer->trb_count-1];
 
-    trace_usb_xhci_xfer_start(xfer, xfer->slotid, xfer->epid,
-                              trb_setup->parameter >> 48);
+    trace_usb_xhci_xfer_start(xfer, xfer->slotid, xfer->epid);
 
     /* at most one Event Data TRB allowed after STATUS */
     if (TRB_TYPE(*trb_status) == TR_EVDATA && xfer->trb_count > 2) {
@@ -1498,38 +1530,16 @@
     }
 
     bmRequestType = trb_setup->parameter;
-    wLength = trb_setup->parameter >> 48;
-
-    if (xfer->data && xfer->data_alloced < wLength) {
-        xfer->data_alloced = 0;
-        g_free(xfer->data);
-        xfer->data = NULL;
-    }
-    if (!xfer->data) {
-        DPRINTF("xhci: alloc %d bytes data\n", wLength);
-        xfer->data = g_malloc(wLength+1);
-        xfer->data_alloced = wLength;
-    }
-    xfer->data_length = wLength;
-
-    port = &xhci->ports[xhci->slots[xfer->slotid-1].port-1];
-    dev = xhci_find_device(port, xhci->slots[xfer->slotid-1].devaddr);
-    if (!dev) {
-        fprintf(stderr, "xhci: slot %d port %d has no device\n", xfer->slotid,
-                xhci->slots[xfer->slotid-1].port);
-        return -1;
-    }
 
     xfer->in_xfer = bmRequestType & USB_DIR_IN;
     xfer->iso_xfer = false;
 
-    xhci_setup_packet(xfer, dev);
-    xfer->packet.parameter = trb_setup->parameter;
-    if (!xfer->in_xfer) {
-        xhci_xfer_data(xfer, xfer->data, wLength, 0, 1, 0);
+    if (xhci_setup_packet(xfer) < 0) {
+        return -1;
     }
+    xfer->packet.parameter = trb_setup->parameter;
 
-    ret = usb_handle_packet(dev, &xfer->packet);
+    ret = usb_handle_packet(xfer->packet.ep->dev, &xfer->packet);
 
     xhci_complete_packet(xfer, ret);
     if (!xfer->running_async && !xfer->running_retry) {
@@ -1538,53 +1548,70 @@
     return 0;
 }
 
+static void xhci_calc_iso_kick(XHCIState *xhci, XHCITransfer *xfer,
+                               XHCIEPContext *epctx, uint64_t mfindex)
+{
+    if (xfer->trbs[0].control & TRB_TR_SIA) {
+        uint64_t asap = ((mfindex + epctx->interval - 1) &
+                         ~(epctx->interval-1));
+        if (asap >= epctx->mfindex_last &&
+            asap <= epctx->mfindex_last + epctx->interval * 4) {
+            xfer->mfindex_kick = epctx->mfindex_last + epctx->interval;
+        } else {
+            xfer->mfindex_kick = asap;
+        }
+    } else {
+        xfer->mfindex_kick = (xfer->trbs[0].control >> TRB_TR_FRAMEID_SHIFT)
+            & TRB_TR_FRAMEID_MASK;
+        xfer->mfindex_kick |= mfindex & ~0x3fff;
+        if (xfer->mfindex_kick < mfindex) {
+            xfer->mfindex_kick += 0x4000;
+        }
+    }
+}
+
+static void xhci_check_iso_kick(XHCIState *xhci, XHCITransfer *xfer,
+                                XHCIEPContext *epctx, uint64_t mfindex)
+{
+    if (xfer->mfindex_kick > mfindex) {
+        qemu_mod_timer(epctx->kick_timer, qemu_get_clock_ns(vm_clock) +
+                       (xfer->mfindex_kick - mfindex) * 125000);
+        xfer->running_retry = 1;
+    } else {
+        epctx->mfindex_last = xfer->mfindex_kick;
+        qemu_del_timer(epctx->kick_timer);
+        xfer->running_retry = 0;
+    }
+}
+
+
 static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer, XHCIEPContext *epctx)
 {
-    XHCIPort *port;
-    USBDevice *dev;
+    uint64_t mfindex;
     int ret;
 
     DPRINTF("xhci_submit(slotid=%d,epid=%d)\n", xfer->slotid, xfer->epid);
 
     xfer->in_xfer = epctx->type>>2;
 
-    if (xfer->data && xfer->data_alloced < xfer->data_length) {
-        xfer->data_alloced = 0;
-        g_free(xfer->data);
-        xfer->data = NULL;
-    }
-    if (!xfer->data && xfer->data_length) {
-        DPRINTF("xhci: alloc %d bytes data\n", xfer->data_length);
-        xfer->data = g_malloc(xfer->data_length);
-        xfer->data_alloced = xfer->data_length;
-    }
-    if (epctx->type == ET_ISO_IN || epctx->type == ET_ISO_OUT) {
-        if (!xfer->bg_xfer) {
-            xfer->pkts = 1;
-        }
-    } else {
-        xfer->pkts = 0;
-    }
-
-    port = &xhci->ports[xhci->slots[xfer->slotid-1].port-1];
-    dev = xhci_find_device(port, xhci->slots[xfer->slotid-1].devaddr);
-    if (!dev) {
-        fprintf(stderr, "xhci: slot %d port %d has no device\n", xfer->slotid,
-                xhci->slots[xfer->slotid-1].port);
-        return -1;
-    }
-
-    xhci_setup_packet(xfer, dev);
-
     switch(epctx->type) {
     case ET_INTR_OUT:
     case ET_INTR_IN:
     case ET_BULK_OUT:
     case ET_BULK_IN:
+        xfer->pkts = 0;
+        xfer->iso_xfer = false;
         break;
     case ET_ISO_OUT:
     case ET_ISO_IN:
-        FIXME();
+        xfer->pkts = 1;
+        xfer->iso_xfer = true;
+        mfindex = xhci_mfindex_get(xhci);
+        xhci_calc_iso_kick(xhci, xfer, epctx, mfindex);
+        xhci_check_iso_kick(xhci, xfer, epctx, mfindex);
+        if (xfer->running_retry) {
+            return -1;
+        }
         break;
     default:
         fprintf(stderr, "xhci: unknown or unhandled EP "
@@ -1593,10 +1620,10 @@
         return -1;
     }
 
-    if (!xfer->in_xfer) {
-        xhci_xfer_data(xfer, xfer->data, xfer->data_length, 0, 1, 0);
+    if (xhci_setup_packet(xfer) < 0) {
+        return -1;
     }
-    ret = usb_handle_packet(dev, &xfer->packet);
+    ret = usb_handle_packet(xfer->packet.ep->dev, &xfer->packet);
 
     xhci_complete_packet(xfer, ret);
     if (!xfer->running_async && !xfer->running_retry) {
@@ -1607,50 +1634,14 @@
 
 static int xhci_fire_transfer(XHCIState *xhci, XHCITransfer *xfer, XHCIEPContext *epctx)
 {
-    int i;
-    unsigned int length = 0;
-    XHCITRB *trb;
-
-    for (i = 0; i < xfer->trb_count; i++) {
-        trb = &xfer->trbs[i];
-        if (TRB_TYPE(*trb) == TR_NORMAL || TRB_TYPE(*trb) == TR_ISOCH) {
-            length += trb->status & 0x1ffff;
-        }
-    }
-
-    trace_usb_xhci_xfer_start(xfer, xfer->slotid, xfer->epid, length);
-
-    if (!epctx->has_bg) {
-        xfer->data_length = length;
-        xfer->backgrounded = 0;
-        return xhci_submit(xhci, xfer, epctx);
-    } else {
-        if (!epctx->bg_running) {
-            for (i = 0; i < BG_XFERS; i++) {
-                XHCITransfer *t = &epctx->bg_transfers[i];
-                t->xhci = xhci;
-                t->epid = xfer->epid;
-                t->slotid = xfer->slotid;
-                t->pkts = BG_PKTS;
-                t->pktsize = epctx->max_psize;
-                t->data_length = t->pkts * t->pktsize;
-                t->bg_xfer = 1;
-                if (xhci_submit(xhci, t, epctx) < 0) {
-                    fprintf(stderr, "xhci: bg submit failed\n");
-                    return -1;
-                }
-            }
-            epctx->bg_running = 1;
-        }
-        xfer->backgrounded = 1;
-        xhci_bg_update(xhci, epctx);
-        return 0;
-    }
+    trace_usb_xhci_xfer_start(xfer, xfer->slotid, xfer->epid);
+    return xhci_submit(xhci, xfer, epctx);
 }
 
 static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid)
 {
     XHCIEPContext *epctx;
+    uint64_t mfindex;
     int length;
     int i;
 
@@ -1670,18 +1661,35 @@
     }
 
     if (epctx->retry) {
-        /* retry nak'ed transfer */
         XHCITransfer *xfer = epctx->retry;
         int result;
 
         trace_usb_xhci_xfer_retry(xfer);
         assert(xfer->running_retry);
-        xhci_setup_packet(xfer, xfer->packet.ep->dev);
-        result = usb_handle_packet(xfer->packet.ep->dev, &xfer->packet);
-        if (result == USB_RET_NAK) {
-            return;
+        if (xfer->iso_xfer) {
+            /* retry delayed iso transfer */
+            mfindex = xhci_mfindex_get(xhci);
+            xhci_check_iso_kick(xhci, xfer, epctx, mfindex);
+            if (xfer->running_retry) {
+                return;
+            }
+            if (xhci_setup_packet(xfer) < 0) {
+                return;
+            }
+            result = usb_handle_packet(xfer->packet.ep->dev, &xfer->packet);
+            assert(result != USB_RET_NAK);
+            xhci_complete_packet(xfer, result);
+        } else {
+            /* retry nak'ed transfer */
+            if (xhci_setup_packet(xfer) < 0) {
+                return;
+            }
+            result = usb_handle_packet(xfer->packet.ep->dev, &xfer->packet);
+            if (result == USB_RET_NAK) {
+                return;
+            }
+            xhci_complete_packet(xfer, result);
         }
-        xhci_complete_packet(xfer, result);
         assert(!xfer->running_retry);
         epctx->retry = NULL;
     }
@@ -1695,7 +1703,7 @@
 
     while (1) {
         XHCITransfer *xfer = &epctx->transfers[epctx->next_xfer];
-        if (xfer->running_async || xfer->running_retry || xfer->backgrounded) {
+        if (xfer->running_async || xfer->running_retry) {
             break;
         }
         length = xhci_ring_chain_length(xhci, &epctx->ring);
@@ -1733,7 +1741,9 @@
             if (xhci_fire_transfer(xhci, xfer, epctx) >= 0) {
                 epctx->next_xfer = (epctx->next_xfer + 1) % TD_QUEUE;
             } else {
-                fprintf(stderr, "xhci: error firing data transfer\n");
+                if (!xfer->iso_xfer) {
+                    fprintf(stderr, "xhci: error firing data transfer\n");
+                }
             }
         }
 
@@ -1753,7 +1763,7 @@
     trace_usb_xhci_slot_enable(slotid);
     assert(slotid >= 1 && slotid <= MAXSLOTS);
     xhci->slots[slotid-1].enabled = 1;
-    xhci->slots[slotid-1].port = 0;
+    xhci->slots[slotid-1].uport = NULL;
     memset(xhci->slots[slotid-1].eps, 0, sizeof(XHCIEPContext*)*31);
 
     return CC_SUCCESS;
@@ -1776,17 +1786,42 @@
     return CC_SUCCESS;
 }
 
+static USBPort *xhci_lookup_uport(XHCIState *xhci, uint32_t *slot_ctx)
+{
+    USBPort *uport;
+    char path[32];
+    int i, pos, port;
+
+    port = (slot_ctx[1]>>16) & 0xFF;
+    port = xhci->ports[port-1].uport->index+1;
+    pos = snprintf(path, sizeof(path), "%d", port);
+    for (i = 0; i < 5; i++) {
+        port = (slot_ctx[0] >> 4*i) & 0x0f;
+        if (!port) {
+            break;
+        }
+        pos += snprintf(path + pos, sizeof(path) - pos, ".%d", port);
+    }
+
+    QTAILQ_FOREACH(uport, &xhci->bus.used, next) {
+        if (strcmp(uport->path, path) == 0) {
+            return uport;
+        }
+    }
+    return NULL;
+}
+
 static TRBCCode xhci_address_slot(XHCIState *xhci, unsigned int slotid,
                                   uint64_t pictx, bool bsr)
 {
     XHCISlot *slot;
+    USBPort *uport;
     USBDevice *dev;
     dma_addr_t ictx, octx, dcbaap;
     uint64_t poctx;
     uint32_t ictl_ctx[2];
     uint32_t slot_ctx[4];
     uint32_t ep0_ctx[5];
-    unsigned int port;
     int i;
     TRBCCode res;
 
@@ -1818,27 +1853,28 @@
     DPRINTF("xhci: input ep0 context: %08x %08x %08x %08x %08x\n",
             ep0_ctx[0], ep0_ctx[1], ep0_ctx[2], ep0_ctx[3], ep0_ctx[4]);
 
-    port = (slot_ctx[1]>>16) & 0xFF;
-    dev = xhci->ports[port-1].port.dev;
-
-    if (port < 1 || port > MAXPORTS) {
-        fprintf(stderr, "xhci: bad port %d\n", port);
+    uport = xhci_lookup_uport(xhci, slot_ctx);
+    if (uport == NULL) {
+        fprintf(stderr, "xhci: port not found\n");
         return CC_TRB_ERROR;
-    } else if (!dev) {
-        fprintf(stderr, "xhci: port %d not connected\n", port);
+    }
+
+    dev = uport->dev;
+    if (!dev) {
+        fprintf(stderr, "xhci: port %s not connected\n", uport->path);
         return CC_USB_TRANSACTION_ERROR;
     }
 
     for (i = 0; i < MAXSLOTS; i++) {
-        if (xhci->slots[i].port == port) {
-            fprintf(stderr, "xhci: port %d already assigned to slot %d\n",
-                    port, i+1);
+        if (xhci->slots[i].uport == uport) {
+            fprintf(stderr, "xhci: port %s already assigned to slot %d\n",
+                    uport->path, i+1);
             return CC_TRB_ERROR;
         }
     }
 
     slot = &xhci->slots[slotid-1];
-    slot->port = port;
+    slot->uport = uport;
     slot->ctx = octx;
 
     if (bsr) {
@@ -2070,7 +2106,7 @@
 static TRBCCode xhci_get_port_bandwidth(XHCIState *xhci, uint64_t pctx)
 {
     dma_addr_t ctx;
-    uint8_t bw_ctx[MAXPORTS+1];
+    uint8_t bw_ctx[xhci->numports+1];
 
     DPRINTF("xhci_get_port_bandwidth()\n");
 
@@ -2080,7 +2116,7 @@
 
     /* TODO: actually implement real values here */
     bw_ctx[0] = 0;
-    memset(&bw_ctx[1], 80, MAXPORTS); /* 80% */
+    memset(&bw_ctx[1], 80, xhci->numports); /* 80% */
     pci_dma_write(&xhci->pci_dev, ctx, bw_ctx, sizeof(bw_ctx));
 
     return CC_SUCCESS;
@@ -2244,18 +2280,17 @@
             break;
         }
         event.slotid = slotid;
-        xhci_event(xhci, &event);
+        xhci_event(xhci, &event, 0);
     }
 }
 
 static void xhci_update_port(XHCIState *xhci, XHCIPort *port, int is_detach)
 {
-    int nr = port->port.index + 1;
-
     port->portsc = PORTSC_PP;
-    if (port->port.dev && port->port.dev->attached && !is_detach) {
+    if (port->uport->dev && port->uport->dev->attached && !is_detach &&
+        (1 << port->uport->dev->speed) & port->speedmask) {
         port->portsc |= PORTSC_CCS;
-        switch (port->port.dev->speed) {
+        switch (port->uport->dev->speed) {
         case USB_SPEED_LOW:
             port->portsc |= PORTSC_SPEED_LOW;
             break;
@@ -2265,14 +2300,18 @@
         case USB_SPEED_HIGH:
             port->portsc |= PORTSC_SPEED_HIGH;
             break;
+        case USB_SPEED_SUPER:
+            port->portsc |= PORTSC_SPEED_SUPER;
+            break;
         }
     }
 
     if (xhci_running(xhci)) {
         port->portsc |= PORTSC_CSC;
-        XHCIEvent ev = { ER_PORT_STATUS_CHANGE, CC_SUCCESS, nr << 24};
-        xhci_event(xhci, &ev);
-        DPRINTF("xhci: port change event for port %d\n", nr);
+        XHCIEvent ev = { ER_PORT_STATUS_CHANGE, CC_SUCCESS,
+                         port->portnr << 24};
+        xhci_event(xhci, &ev, 0);
+        DPRINTF("xhci: port change event for port %d\n", port->portnr);
     }
 }
 
@@ -2300,28 +2339,34 @@
         xhci_disable_slot(xhci, i+1);
     }
 
-    for (i = 0; i < MAXPORTS; i++) {
+    for (i = 0; i < xhci->numports; i++) {
         xhci_update_port(xhci, xhci->ports + i, 0);
     }
 
-    xhci->mfindex = 0;
-    xhci->iman = 0;
-    xhci->imod = 0;
-    xhci->erstsz = 0;
-    xhci->erstba_low = 0;
-    xhci->erstba_high = 0;
-    xhci->erdp_low = 0;
-    xhci->erdp_high = 0;
+    for (i = 0; i < MAXINTRS; i++) {
+        xhci->intr[i].iman = 0;
+        xhci->intr[i].imod = 0;
+        xhci->intr[i].erstsz = 0;
+        xhci->intr[i].erstba_low = 0;
+        xhci->intr[i].erstba_high = 0;
+        xhci->intr[i].erdp_low = 0;
+        xhci->intr[i].erdp_high = 0;
+        xhci->intr[i].msix_used = 0;
 
-    xhci->er_ep_idx = 0;
-    xhci->er_pcs = 1;
-    xhci->er_full = 0;
-    xhci->ev_buffer_put = 0;
-    xhci->ev_buffer_get = 0;
+        xhci->intr[i].er_ep_idx = 0;
+        xhci->intr[i].er_pcs = 1;
+        xhci->intr[i].er_full = 0;
+        xhci->intr[i].ev_buffer_put = 0;
+        xhci->intr[i].ev_buffer_get = 0;
+    }
+
+    xhci->mfindex_start = qemu_get_clock_ns(vm_clock);
+    xhci_mfwrap_update(xhci);
 }
 
-static uint32_t xhci_cap_read(XHCIState *xhci, uint32_t reg)
+static uint64_t xhci_cap_read(void *ptr, target_phys_addr_t reg, unsigned size)
 {
+    XHCIState *xhci = ptr;
     uint32_t ret;
 
     switch (reg) {
@@ -2329,7 +2374,8 @@
         ret = 0x01000000 | LEN_CAP;
         break;
     case 0x04: /* HCSPARAMS 1 */
-        ret = (MAXPORTS<<24) | (MAXINTRS<<8) | MAXSLOTS;
+        ret = ((xhci->numports_2+xhci->numports_3)<<24)
+            | (MAXINTRS<<8) | MAXSLOTS;
         break;
     case 0x08: /* HCSPARAMS 2 */
         ret = 0x0000000f;
@@ -2359,7 +2405,7 @@
         ret = 0x20425455; /* "USB " */
         break;
     case 0x28: /* Supported Protocol:08 */
-        ret = 0x00000001 | (USB2_PORTS<<8);
+        ret = 0x00000001 | (xhci->numports_2<<8);
         break;
     case 0x2c: /* Supported Protocol:0c */
         ret = 0x00000000; /* reserved */
@@ -2371,13 +2417,13 @@
         ret = 0x20425455; /* "USB " */
         break;
     case 0x38: /* Supported Protocol:08 */
-        ret = 0x00000000 | (USB2_PORTS+1) | (USB3_PORTS<<8);
+        ret = 0x00000000 | (xhci->numports_2+1) | (xhci->numports_3<<8);
         break;
     case 0x3c: /* Supported Protocol:0c */
         ret = 0x00000000; /* reserved */
         break;
     default:
-        fprintf(stderr, "xhci_cap_read: reg %d unimplemented\n", reg);
+        fprintf(stderr, "xhci_cap_read: reg %d unimplemented\n", (int)reg);
         ret = 0;
     }
 
@@ -2385,20 +2431,14 @@
     return ret;
 }
 
-static uint32_t xhci_port_read(XHCIState *xhci, uint32_t reg)
+static uint64_t xhci_port_read(void *ptr, target_phys_addr_t reg, unsigned size)
 {
-    uint32_t port = reg >> 4;
+    XHCIPort *port = ptr;
     uint32_t ret;
 
-    if (port >= MAXPORTS) {
-        fprintf(stderr, "xhci_port_read: port %d out of bounds\n", port);
-        ret = 0;
-        goto out;
-    }
-
-    switch (reg & 0xf) {
+    switch (reg) {
     case 0x00: /* PORTSC */
-        ret = xhci->ports[port].portsc;
+        ret = port->portsc;
         break;
     case 0x04: /* PORTPMSC */
     case 0x08: /* PORTLI */
@@ -2407,30 +2447,25 @@
     case 0x0c: /* reserved */
     default:
         fprintf(stderr, "xhci_port_read (port %d): reg 0x%x unimplemented\n",
-                port, reg);
+                port->portnr, (uint32_t)reg);
         ret = 0;
     }
 
-out:
-    trace_usb_xhci_port_read(port, reg & 0x0f, ret);
+    trace_usb_xhci_port_read(port->portnr, reg, ret);
     return ret;
 }
 
-static void xhci_port_write(XHCIState *xhci, uint32_t reg, uint32_t val)
+static void xhci_port_write(void *ptr, target_phys_addr_t reg,
+                            uint64_t val, unsigned size)
 {
-    uint32_t port = reg >> 4;
+    XHCIPort *port = ptr;
     uint32_t portsc;
 
-    trace_usb_xhci_port_write(port, reg & 0x0f, val);
+    trace_usb_xhci_port_write(port->portnr, reg, val);
 
-    if (port >= MAXPORTS) {
-        fprintf(stderr, "xhci_port_read: port %d out of bounds\n", port);
-        return;
-    }
-
-    switch (reg & 0xf) {
+    switch (reg) {
     case 0x00: /* PORTSC */
-        portsc = xhci->ports[port].portsc;
+        portsc = port->portsc;
         /* write-1-to-clear bits*/
         portsc &= ~(val & (PORTSC_CSC|PORTSC_PEC|PORTSC_WRC|PORTSC_OCC|
                            PORTSC_PRC|PORTSC_PLC|PORTSC_CEC));
@@ -2445,27 +2480,24 @@
         /* write-1-to-start bits */
         if (val & PORTSC_PR) {
             DPRINTF("xhci: port %d reset\n", port);
-            usb_device_reset(xhci->ports[port].port.dev);
+            usb_device_reset(port->uport->dev);
             portsc |= PORTSC_PRC | PORTSC_PED;
         }
-        xhci->ports[port].portsc = portsc;
+        port->portsc = portsc;
         break;
     case 0x04: /* PORTPMSC */
     case 0x08: /* PORTLI */
     default:
         fprintf(stderr, "xhci_port_write (port %d): reg 0x%x unimplemented\n",
-                port, reg);
+                port->portnr, (uint32_t)reg);
     }
 }
 
-static uint32_t xhci_oper_read(XHCIState *xhci, uint32_t reg)
+static uint64_t xhci_oper_read(void *ptr, target_phys_addr_t reg, unsigned size)
 {
+    XHCIState *xhci = ptr;
     uint32_t ret;
 
-    if (reg >= 0x400) {
-        return xhci_port_read(xhci, reg - 0x400);
-    }
-
     switch (reg) {
     case 0x00: /* USBCMD */
         ret = xhci->usbcmd;
@@ -2495,7 +2527,7 @@
         ret = xhci->config;
         break;
     default:
-        fprintf(stderr, "xhci_oper_read: reg 0x%x unimplemented\n", reg);
+        fprintf(stderr, "xhci_oper_read: reg 0x%x unimplemented\n", (int)reg);
         ret = 0;
     }
 
@@ -2503,12 +2535,10 @@
     return ret;
 }
 
-static void xhci_oper_write(XHCIState *xhci, uint32_t reg, uint32_t val)
+static void xhci_oper_write(void *ptr, target_phys_addr_t reg,
+                            uint64_t val, unsigned size)
 {
-    if (reg >= 0x400) {
-        xhci_port_write(xhci, reg - 0x400, val);
-        return;
-    }
+    XHCIState *xhci = ptr;
 
     trace_usb_xhci_oper_write(reg, val);
 
@@ -2520,16 +2550,17 @@
             xhci_stop(xhci);
         }
         xhci->usbcmd = val & 0xc0f;
+        xhci_mfwrap_update(xhci);
         if (val & USBCMD_HCRST) {
             xhci_reset(&xhci->pci_dev.qdev);
         }
-        xhci_irq_update(xhci);
+        xhci_intx_update(xhci);
         break;
 
     case 0x04: /* USBSTS */
         /* these bits are write-1-to-clear */
         xhci->usbsts &= ~(val & (USBSTS_HSE|USBSTS_EINT|USBSTS_PCD|USBSTS_SRE));
-        xhci_irq_update(xhci);
+        xhci_intx_update(xhci);
         break;
 
     case 0x14: /* DNCTRL */
@@ -2543,7 +2574,7 @@
         if (xhci->crcr_low & (CRCR_CA|CRCR_CS) && (xhci->crcr_low & CRCR_CRR)) {
             XHCIEvent event = {ER_COMMAND_COMPLETE, CC_COMMAND_RING_STOPPED};
             xhci->crcr_low &= ~CRCR_CRR;
-            xhci_event(xhci, &event);
+            xhci_event(xhci, &event, 0);
             DPRINTF("xhci: command ring stopped (CRCR=%08x)\n", xhci->crcr_low);
         } else {
             dma_addr_t base = xhci_addr64(xhci->crcr_low & ~0x3f, val);
@@ -2561,101 +2592,127 @@
         xhci->config = val & 0xff;
         break;
     default:
-        fprintf(stderr, "xhci_oper_write: reg 0x%x unimplemented\n", reg);
+        fprintf(stderr, "xhci_oper_write: reg 0x%x unimplemented\n", (int)reg);
     }
 }
 
-static uint32_t xhci_runtime_read(XHCIState *xhci, uint32_t reg)
+static uint64_t xhci_runtime_read(void *ptr, target_phys_addr_t reg,
+                                  unsigned size)
 {
-    uint32_t ret;
+    XHCIState *xhci = ptr;
+    uint32_t ret = 0;
 
-    switch (reg) {
-    case 0x00: /* MFINDEX */
-        fprintf(stderr, "xhci_runtime_read: MFINDEX not yet implemented\n");
-        ret = xhci->mfindex;
-        break;
-    case 0x20: /* IMAN */
-        ret = xhci->iman;
-        break;
-    case 0x24: /* IMOD */
-        ret = xhci->imod;
-        break;
-    case 0x28: /* ERSTSZ */
-        ret = xhci->erstsz;
-        break;
-    case 0x30: /* ERSTBA low */
-        ret = xhci->erstba_low;
-        break;
-    case 0x34: /* ERSTBA high */
-        ret = xhci->erstba_high;
-        break;
-    case 0x38: /* ERDP low */
-        ret = xhci->erdp_low;
-        break;
-    case 0x3c: /* ERDP high */
-        ret = xhci->erdp_high;
-        break;
-    default:
-        fprintf(stderr, "xhci_runtime_read: reg 0x%x unimplemented\n", reg);
-        ret = 0;
+    if (reg < 0x20) {
+        switch (reg) {
+        case 0x00: /* MFINDEX */
+            ret = xhci_mfindex_get(xhci) & 0x3fff;
+            break;
+        default:
+            fprintf(stderr, "xhci_runtime_read: reg 0x%x unimplemented\n",
+                    (int)reg);
+            break;
+        }
+    } else {
+        int v = (reg - 0x20) / 0x20;
+        XHCIInterrupter *intr = &xhci->intr[v];
+        switch (reg & 0x1f) {
+        case 0x00: /* IMAN */
+            ret = intr->iman;
+            break;
+        case 0x04: /* IMOD */
+            ret = intr->imod;
+            break;
+        case 0x08: /* ERSTSZ */
+            ret = intr->erstsz;
+            break;
+        case 0x10: /* ERSTBA low */
+            ret = intr->erstba_low;
+            break;
+        case 0x14: /* ERSTBA high */
+            ret = intr->erstba_high;
+            break;
+        case 0x18: /* ERDP low */
+            ret = intr->erdp_low;
+            break;
+        case 0x1c: /* ERDP high */
+            ret = intr->erdp_high;
+            break;
+        }
     }
 
     trace_usb_xhci_runtime_read(reg, ret);
     return ret;
 }
 
-static void xhci_runtime_write(XHCIState *xhci, uint32_t reg, uint32_t val)
+static void xhci_runtime_write(void *ptr, target_phys_addr_t reg,
+                               uint64_t val, unsigned size)
 {
-    trace_usb_xhci_runtime_read(reg, val);
+    XHCIState *xhci = ptr;
+    int v = (reg - 0x20) / 0x20;
+    XHCIInterrupter *intr = &xhci->intr[v];
+    trace_usb_xhci_runtime_write(reg, val);
 
-    switch (reg) {
-    case 0x20: /* IMAN */
+    if (reg < 0x20) {
+        fprintf(stderr, "xhci_oper_write: reg 0x%x unimplemented\n", (int)reg);
+        return;
+    }
+
+    switch (reg & 0x1f) {
+    case 0x00: /* IMAN */
         if (val & IMAN_IP) {
-            xhci->iman &= ~IMAN_IP;
+            intr->iman &= ~IMAN_IP;
         }
-        xhci->iman &= ~IMAN_IE;
-        xhci->iman |= val & IMAN_IE;
-        xhci_irq_update(xhci);
+        intr->iman &= ~IMAN_IE;
+        intr->iman |= val & IMAN_IE;
+        if (v == 0) {
+            xhci_intx_update(xhci);
+        }
+        xhci_msix_update(xhci, v);
         break;
-    case 0x24: /* IMOD */
-        xhci->imod = val;
+    case 0x04: /* IMOD */
+        intr->imod = val;
         break;
-    case 0x28: /* ERSTSZ */
-        xhci->erstsz = val & 0xffff;
+    case 0x08: /* ERSTSZ */
+        intr->erstsz = val & 0xffff;
         break;
-    case 0x30: /* ERSTBA low */
+    case 0x10: /* ERSTBA low */
         /* XXX NEC driver bug: it doesn't align this to 64 bytes
-        xhci->erstba_low = val & 0xffffffc0; */
-        xhci->erstba_low = val & 0xfffffff0;
+        intr->erstba_low = val & 0xffffffc0; */
+        intr->erstba_low = val & 0xfffffff0;
         break;
-    case 0x34: /* ERSTBA high */
-        xhci->erstba_high = val;
-        xhci_er_reset(xhci);
+    case 0x14: /* ERSTBA high */
+        intr->erstba_high = val;
+        xhci_er_reset(xhci, v);
         break;
-    case 0x38: /* ERDP low */
+    case 0x18: /* ERDP low */
         if (val & ERDP_EHB) {
-            xhci->erdp_low &= ~ERDP_EHB;
+            intr->erdp_low &= ~ERDP_EHB;
         }
-        xhci->erdp_low = (val & ~ERDP_EHB) | (xhci->erdp_low & ERDP_EHB);
+        intr->erdp_low = (val & ~ERDP_EHB) | (intr->erdp_low & ERDP_EHB);
         break;
-    case 0x3c: /* ERDP high */
-        xhci->erdp_high = val;
-        xhci_events_update(xhci);
+    case 0x1c: /* ERDP high */
+        intr->erdp_high = val;
+        xhci_events_update(xhci, v);
         break;
     default:
-        fprintf(stderr, "xhci_oper_write: reg 0x%x unimplemented\n", reg);
+        fprintf(stderr, "xhci_oper_write: reg 0x%x unimplemented\n",
+                (int)reg);
     }
 }
 
-static uint32_t xhci_doorbell_read(XHCIState *xhci, uint32_t reg)
+static uint64_t xhci_doorbell_read(void *ptr, target_phys_addr_t reg,
+                                   unsigned size)
 {
     /* doorbells always read as 0 */
     trace_usb_xhci_doorbell_read(reg, 0);
     return 0;
 }
 
-static void xhci_doorbell_write(XHCIState *xhci, uint32_t reg, uint32_t val)
+static void xhci_doorbell_write(void *ptr, target_phys_addr_t reg,
+                                uint64_t val, unsigned size)
 {
+    XHCIState *xhci = ptr;
+
     trace_usb_xhci_doorbell_write(reg, val);
 
     if (!xhci_running(xhci)) {
@@ -2669,69 +2726,57 @@
         if (val == 0) {
             xhci_process_commands(xhci);
         } else {
-            fprintf(stderr, "xhci: bad doorbell 0 write: 0x%x\n", val);
+            fprintf(stderr, "xhci: bad doorbell 0 write: 0x%x\n",
+                    (uint32_t)val);
         }
     } else {
         if (reg > MAXSLOTS) {
-            fprintf(stderr, "xhci: bad doorbell %d\n", reg);
+            fprintf(stderr, "xhci: bad doorbell %d\n", (int)reg);
         } else if (val > 31) {
-            fprintf(stderr, "xhci: bad doorbell %d write: 0x%x\n", reg, val);
+            fprintf(stderr, "xhci: bad doorbell %d write: 0x%x\n",
+                    (int)reg, (uint32_t)val);
         } else {
             xhci_kick_ep(xhci, reg, val);
         }
     }
 }
 
-static uint64_t xhci_mem_read(void *ptr, target_phys_addr_t addr,
-                              unsigned size)
-{
-    XHCIState *xhci = ptr;
+static const MemoryRegionOps xhci_cap_ops = {
+    .read = xhci_cap_read,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
 
-    /* Only aligned reads are allowed on xHCI */
-    if (addr & 3) {
-        fprintf(stderr, "xhci_mem_read: Mis-aligned read\n");
-        return 0;
-    }
+static const MemoryRegionOps xhci_oper_ops = {
+    .read = xhci_oper_read,
+    .write = xhci_oper_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
 
-    if (addr < LEN_CAP) {
-        return xhci_cap_read(xhci, addr);
-    } else if (addr >= OFF_OPER && addr < (OFF_OPER + LEN_OPER)) {
-        return xhci_oper_read(xhci, addr - OFF_OPER);
-    } else if (addr >= OFF_RUNTIME && addr < (OFF_RUNTIME + LEN_RUNTIME)) {
-        return xhci_runtime_read(xhci, addr - OFF_RUNTIME);
-    } else if (addr >= OFF_DOORBELL && addr < (OFF_DOORBELL + LEN_DOORBELL)) {
-        return xhci_doorbell_read(xhci, addr - OFF_DOORBELL);
-    } else {
-        fprintf(stderr, "xhci_mem_read: Bad offset %x\n", (int)addr);
-        return 0;
-    }
-}
+static const MemoryRegionOps xhci_port_ops = {
+    .read = xhci_port_read,
+    .write = xhci_port_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
 
-static void xhci_mem_write(void *ptr, target_phys_addr_t addr,
-                           uint64_t val, unsigned size)
-{
-    XHCIState *xhci = ptr;
+static const MemoryRegionOps xhci_runtime_ops = {
+    .read = xhci_runtime_read,
+    .write = xhci_runtime_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
 
-    /* Only aligned writes are allowed on xHCI */
-    if (addr & 3) {
-        fprintf(stderr, "xhci_mem_write: Mis-aligned write\n");
-        return;
-    }
-
-    if (addr >= OFF_OPER && addr < (OFF_OPER + LEN_OPER)) {
-        xhci_oper_write(xhci, addr - OFF_OPER, val);
-    } else if (addr >= OFF_RUNTIME && addr < (OFF_RUNTIME + LEN_RUNTIME)) {
-        xhci_runtime_write(xhci, addr - OFF_RUNTIME, val);
-    } else if (addr >= OFF_DOORBELL && addr < (OFF_DOORBELL + LEN_DOORBELL)) {
-        xhci_doorbell_write(xhci, addr - OFF_DOORBELL, val);
-    } else {
-        fprintf(stderr, "xhci_mem_write: Bad offset %x\n", (int)addr);
-    }
-}
-
-static const MemoryRegionOps xhci_mem_ops = {
-    .read = xhci_mem_read,
-    .write = xhci_mem_write,
+static const MemoryRegionOps xhci_doorbell_ops = {
+    .read = xhci_doorbell_read,
+    .write = xhci_doorbell_write,
     .valid.min_access_size = 4,
     .valid.max_access_size = 4,
     .endianness = DEVICE_LITTLE_ENDIAN,
@@ -2740,7 +2785,7 @@
 static void xhci_attach(USBPort *usbport)
 {
     XHCIState *xhci = usbport->opaque;
-    XHCIPort *port = &xhci->ports[usbport->index];
+    XHCIPort *port = xhci_lookup_port(xhci, usbport);
 
     xhci_update_port(xhci, port, 0);
 }
@@ -2748,7 +2793,7 @@
 static void xhci_detach(USBPort *usbport)
 {
     XHCIState *xhci = usbport->opaque;
-    XHCIPort *port = &xhci->ports[usbport->index];
+    XHCIPort *port = xhci_lookup_port(xhci, usbport);
 
     xhci_update_port(xhci, port, 1);
 }
@@ -2756,9 +2801,9 @@
 static void xhci_wakeup(USBPort *usbport)
 {
     XHCIState *xhci = usbport->opaque;
-    XHCIPort *port = &xhci->ports[usbport->index];
-    int nr = port->port.index + 1;
-    XHCIEvent ev = { ER_PORT_STATUS_CHANGE, CC_SUCCESS, nr << 24};
+    XHCIPort *port = xhci_lookup_port(xhci, usbport);
+    XHCIEvent ev = { ER_PORT_STATUS_CHANGE, CC_SUCCESS,
+                     port->portnr << 24};
     uint32_t pls;
 
     pls = (port->portsc >> PORTSC_PLS_SHIFT) & PORTSC_PLS_MASK;
@@ -2770,7 +2815,7 @@
         return;
     }
     port->portsc |= PORTSC_PLC;
-    xhci_event(xhci, &ev);
+    xhci_event(xhci, &ev, 0);
 }
 
 static void xhci_complete(USBPort *port, USBPacket *packet)
@@ -2781,12 +2826,20 @@
     xhci_kick_ep(xfer->xhci, xfer->slotid, xfer->epid);
 }
 
-static void xhci_child_detach(USBPort *port, USBDevice *child)
+static void xhci_child_detach(USBPort *uport, USBDevice *child)
 {
-    FIXME();
+    USBBus *bus = usb_bus_from_device(child);
+    XHCIState *xhci = container_of(bus, XHCIState, bus);
+    int i;
+
+    for (i = 0; i < MAXSLOTS; i++) {
+        if (xhci->slots[i].uport == uport) {
+            xhci->slots[i].uport = NULL;
+        }
+    }
 }
 
-static USBPortOps xhci_port_ops = {
+static USBPortOps xhci_uport_ops = {
     .attach   = xhci_attach,
     .detach   = xhci_detach,
     .wakeup   = xhci_wakeup,
@@ -2840,28 +2893,51 @@
 
 static void usb_xhci_init(XHCIState *xhci, DeviceState *dev)
 {
-    int i;
+    XHCIPort *port;
+    int i, usbports, speedmask;
 
     xhci->usbsts = USBSTS_HCH;
 
+    if (xhci->numports_2 > MAXPORTS_2) {
+        xhci->numports_2 = MAXPORTS_2;
+    }
+    if (xhci->numports_3 > MAXPORTS_3) {
+        xhci->numports_3 = MAXPORTS_3;
+    }
+    usbports = MAX(xhci->numports_2, xhci->numports_3);
+    xhci->numports = xhci->numports_2 + xhci->numports_3;
+
     usb_bus_new(&xhci->bus, &xhci_bus_ops, &xhci->pci_dev.qdev);
 
-    for (i = 0; i < MAXPORTS; i++) {
-        memset(&xhci->ports[i], 0, sizeof(xhci->ports[i]));
-        usb_register_port(&xhci->bus, &xhci->ports[i].port, xhci, i,
-                          &xhci_port_ops,
-                          USB_SPEED_MASK_LOW  |
-                          USB_SPEED_MASK_FULL |
-                          USB_SPEED_MASK_HIGH);
-    }
-    for (i = 0; i < MAXSLOTS; i++) {
-        xhci->slots[i].enabled = 0;
+    for (i = 0; i < usbports; i++) {
+        speedmask = 0;
+        if (i < xhci->numports_2) {
+            port = &xhci->ports[i];
+            port->portnr = i + 1;
+            port->uport = &xhci->uports[i];
+            port->speedmask =
+                USB_SPEED_MASK_LOW  |
+                USB_SPEED_MASK_FULL |
+                USB_SPEED_MASK_HIGH;
+            snprintf(port->name, sizeof(port->name), "usb2 port #%d", i+1);
+            speedmask |= port->speedmask;
+        }
+        if (i < xhci->numports_3) {
+            port = &xhci->ports[i + xhci->numports_2];
+            port->portnr = i + 1 + xhci->numports_2;
+            port->uport = &xhci->uports[i];
+            port->speedmask = USB_SPEED_MASK_SUPER;
+            snprintf(port->name, sizeof(port->name), "usb3 port #%d", i+1);
+            speedmask |= port->speedmask;
+        }
+        usb_register_port(&xhci->bus, &xhci->uports[i], xhci, i,
+                          &xhci_uport_ops, speedmask);
     }
 }
 
 static int usb_xhci_initfn(struct PCIDevice *dev)
 {
-    int ret;
+    int i, ret;
 
     XHCIState *xhci = DO_UPCAST(XHCIState, pci_dev, dev);
 
@@ -2872,10 +2948,34 @@
 
     usb_xhci_init(xhci, &dev->qdev);
 
+    xhci->mfwrap_timer = qemu_new_timer_ns(vm_clock, xhci_mfwrap_timer, xhci);
+
     xhci->irq = xhci->pci_dev.irq[0];
 
-    memory_region_init_io(&xhci->mem, &xhci_mem_ops, xhci,
-                          "xhci", LEN_REGS);
+    memory_region_init(&xhci->mem, "xhci", LEN_REGS);
+    memory_region_init_io(&xhci->mem_cap, &xhci_cap_ops, xhci,
+                          "capabilities", LEN_CAP);
+    memory_region_init_io(&xhci->mem_oper, &xhci_oper_ops, xhci,
+                          "operational", 0x400);
+    memory_region_init_io(&xhci->mem_runtime, &xhci_runtime_ops, xhci,
+                          "runtime", LEN_RUNTIME);
+    memory_region_init_io(&xhci->mem_doorbell, &xhci_doorbell_ops, xhci,
+                          "doorbell", LEN_DOORBELL);
+
+    memory_region_add_subregion(&xhci->mem, 0,            &xhci->mem_cap);
+    memory_region_add_subregion(&xhci->mem, OFF_OPER,     &xhci->mem_oper);
+    memory_region_add_subregion(&xhci->mem, OFF_RUNTIME,  &xhci->mem_runtime);
+    memory_region_add_subregion(&xhci->mem, OFF_DOORBELL, &xhci->mem_doorbell);
+
+    for (i = 0; i < xhci->numports; i++) {
+        XHCIPort *port = &xhci->ports[i];
+        uint32_t offset = OFF_OPER + 0x400 + 0x10 * i;
+        port->xhci = xhci;
+        memory_region_init_io(&port->mem, &xhci_port_ops, port,
+                              port->name, 0x10);
+        memory_region_add_subregion(&xhci->mem, offset, &port->mem);
+    }
+
     pci_register_bar(&xhci->pci_dev, 0,
                      PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64,
                      &xhci->mem);
@@ -2883,32 +2983,29 @@
     ret = pcie_cap_init(&xhci->pci_dev, 0xa0, PCI_EXP_TYPE_ENDPOINT, 0);
     assert(ret >= 0);
 
-    if (xhci->msi) {
-        ret = msi_init(&xhci->pci_dev, 0x70, 1, true, false);
-        assert(ret >= 0);
+    if (xhci->flags & (1 << XHCI_FLAG_USE_MSI)) {
+        msi_init(&xhci->pci_dev, 0x70, MAXINTRS, true, false);
+    }
+    if (xhci->flags & (1 << XHCI_FLAG_USE_MSI_X)) {
+        msix_init(&xhci->pci_dev, MAXINTRS,
+                  &xhci->mem, 0, OFF_MSIX_TABLE,
+                  &xhci->mem, 0, OFF_MSIX_PBA,
+                  0x90);
     }
 
     return 0;
 }
 
-static void xhci_write_config(PCIDevice *dev, uint32_t addr, uint32_t val,
-                              int len)
-{
-    XHCIState *xhci = DO_UPCAST(XHCIState, pci_dev, dev);
-
-    pci_default_write_config(dev, addr, val, len);
-    if (xhci->msi) {
-        msi_write_config(dev, addr, val, len);
-    }
-}
-
 static const VMStateDescription vmstate_xhci = {
     .name = "xhci",
     .unmigratable = 1,
 };
 
 static Property xhci_properties[] = {
-    DEFINE_PROP_UINT32("msi", XHCIState, msi, 0),
+    DEFINE_PROP_BIT("msi",    XHCIState, flags, XHCI_FLAG_USE_MSI, true),
+    DEFINE_PROP_BIT("msix",   XHCIState, flags, XHCI_FLAG_USE_MSI_X, true),
+    DEFINE_PROP_UINT32("p2",  XHCIState, numports_2, 4),
+    DEFINE_PROP_UINT32("p3",  XHCIState, numports_3, 4),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2926,7 +3023,6 @@
     k->class_id     = PCI_CLASS_SERIAL_USB;
     k->revision     = 0x03;
     k->is_express   = 1;
-    k->config_write = xhci_write_config;
 }
 
 static TypeInfo xhci_info = {
diff --git a/hw/usb/host-linux.c b/hw/usb/host-linux.c
index 8df9207..44f1a64 100644
--- a/hw/usb/host-linux.c
+++ b/hw/usb/host-linux.c
@@ -1045,7 +1045,6 @@
 
     /* Note request is (bRequestType << 8) | bRequest */
     trace_usb_host_req_control(s->bus_num, s->addr, p, request, value, index);
-    assert(p->result == 0);
 
     switch (request) {
     case DeviceOutRequest | USB_REQ_SET_ADDRESS:
@@ -1074,6 +1073,7 @@
     }
 
     /* The rest are asynchronous */
+    assert(p && p->result == 0);
 
     if (length > sizeof(dev->data_buf)) {
         fprintf(stderr, "husb: ctrl buffer too small (%d > %zu)\n",
diff --git a/hw/usb/libhw.c b/hw/usb/libhw.c
index c0de30e..703e2d2 100644
--- a/hw/usb/libhw.c
+++ b/hw/usb/libhw.c
@@ -28,19 +28,25 @@
 {
     DMADirection dir = (p->pid == USB_TOKEN_IN) ?
         DMA_DIRECTION_FROM_DEVICE : DMA_DIRECTION_TO_DEVICE;
-    dma_addr_t len;
     void *mem;
     int i;
 
     for (i = 0; i < sgl->nsg; i++) {
-        len = sgl->sg[i].len;
-        mem = dma_memory_map(sgl->dma, sgl->sg[i].base, &len, dir);
-        if (!mem) {
-            goto err;
-        }
-        qemu_iovec_add(&p->iov, mem, len);
-        if (len != sgl->sg[i].len) {
-            goto err;
+        dma_addr_t base = sgl->sg[i].base;
+        dma_addr_t len = sgl->sg[i].len;
+
+        while (len) {
+            dma_addr_t xlen = len;
+            mem = dma_memory_map(sgl->dma, sgl->sg[i].base, &xlen, dir);
+            if (!mem) {
+                goto err;
+            }
+            if (xlen > len) {
+                xlen = len;
+            }
+            qemu_iovec_add(&p->iov, mem, xlen);
+            len -= xlen;
+            base += xlen;
         }
     }
     return 0;
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 10b4fbb..b10241a 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1,7 +1,7 @@
 /*
  * USB redirector usb-guest
  *
- * Copyright (c) 2011 Red Hat, Inc.
+ * Copyright (c) 2011-2012 Red Hat, Inc.
  *
  * Red Hat Authors:
  * Hans de Goede <hdegoede@redhat.com>
@@ -43,7 +43,6 @@
 #define EP2I(ep_address) (((ep_address & 0x80) >> 3) | (ep_address & 0x0f))
 #define I2EP(i) (((i & 0x10) << 3) | (i & 0x0f))
 
-typedef struct AsyncURB AsyncURB;
 typedef struct USBRedirDevice USBRedirDevice;
 
 /* Struct to hold buffered packets (iso or int input packets) */
@@ -58,6 +57,7 @@
     uint8_t type;
     uint8_t interval;
     uint8_t interface; /* bInterfaceNumber this ep belongs to */
+    uint16_t max_packet_size; /* In bytes, not wMaxPacketSize format !! */
     uint8_t iso_started;
     uint8_t iso_error; /* For reporting iso errors to the HC */
     uint8_t interrupt_started;
@@ -65,8 +65,20 @@
     uint8_t bufpq_prefilled;
     uint8_t bufpq_dropping_packets;
     QTAILQ_HEAD(, buf_packet) bufpq;
-    int bufpq_size;
-    int bufpq_target_size;
+    int32_t bufpq_size;
+    int32_t bufpq_target_size;
+};
+
+struct PacketIdQueueEntry {
+    uint64_t id;
+    QTAILQ_ENTRY(PacketIdQueueEntry)next;
+};
+
+struct PacketIdQueue {
+    USBRedirDevice *dev;
+    const char *name;
+    QTAILQ_HEAD(, PacketIdQueueEntry) head;
+    int size;
 };
 
 struct USBRedirDevice {
@@ -79,15 +91,15 @@
     /* Data passed from chardev the fd_read cb to the usbredirparser read cb */
     const uint8_t *read_buf;
     int read_buf_size;
-    /* For async handling of open/close */
-    QEMUBH *open_close_bh;
+    /* For async handling of close */
+    QEMUBH *chardev_close_bh;
     /* To delay the usb attach in case of quick chardev close + open */
     QEMUTimer *attach_timer;
     int64_t next_attach_time;
     struct usbredirparser *parser;
     struct endp_data endpoint[MAX_ENDPOINTS];
-    uint32_t packet_id;
-    QTAILQ_HEAD(, AsyncURB) asyncq;
+    struct PacketIdQueue cancelled;
+    struct PacketIdQueue already_in_flight;
     /* Data for device filtering */
     struct usb_redir_device_connect_header device_info;
     struct usb_redir_interface_info_header interface_info;
@@ -95,19 +107,6 @@
     int filter_rules_count;
 };
 
-struct AsyncURB {
-    USBRedirDevice *dev;
-    USBPacket *packet;
-    uint32_t packet_id;
-    int get;
-    union {
-        struct usb_redir_control_packet_header control_packet;
-        struct usb_redir_bulk_packet_header bulk_packet;
-        struct usb_redir_interrupt_packet_header interrupt_packet;
-    };
-    QTAILQ_ENTRY(AsyncURB)next;
-};
-
 static void usbredir_hello(void *priv, struct usb_redir_hello_header *h);
 static void usbredir_device_connect(void *priv,
     struct usb_redir_device_connect_header *device_connect);
@@ -116,33 +115,35 @@
     struct usb_redir_interface_info_header *interface_info);
 static void usbredir_ep_info(void *priv,
     struct usb_redir_ep_info_header *ep_info);
-static void usbredir_configuration_status(void *priv, uint32_t id,
+static void usbredir_configuration_status(void *priv, uint64_t id,
     struct usb_redir_configuration_status_header *configuration_status);
-static void usbredir_alt_setting_status(void *priv, uint32_t id,
+static void usbredir_alt_setting_status(void *priv, uint64_t id,
     struct usb_redir_alt_setting_status_header *alt_setting_status);
-static void usbredir_iso_stream_status(void *priv, uint32_t id,
+static void usbredir_iso_stream_status(void *priv, uint64_t id,
     struct usb_redir_iso_stream_status_header *iso_stream_status);
-static void usbredir_interrupt_receiving_status(void *priv, uint32_t id,
+static void usbredir_interrupt_receiving_status(void *priv, uint64_t id,
     struct usb_redir_interrupt_receiving_status_header
     *interrupt_receiving_status);
-static void usbredir_bulk_streams_status(void *priv, uint32_t id,
+static void usbredir_bulk_streams_status(void *priv, uint64_t id,
     struct usb_redir_bulk_streams_status_header *bulk_streams_status);
-static void usbredir_control_packet(void *priv, uint32_t id,
+static void usbredir_control_packet(void *priv, uint64_t id,
     struct usb_redir_control_packet_header *control_packet,
     uint8_t *data, int data_len);
-static void usbredir_bulk_packet(void *priv, uint32_t id,
+static void usbredir_bulk_packet(void *priv, uint64_t id,
     struct usb_redir_bulk_packet_header *bulk_packet,
     uint8_t *data, int data_len);
-static void usbredir_iso_packet(void *priv, uint32_t id,
+static void usbredir_iso_packet(void *priv, uint64_t id,
     struct usb_redir_iso_packet_header *iso_packet,
     uint8_t *data, int data_len);
-static void usbredir_interrupt_packet(void *priv, uint32_t id,
+static void usbredir_interrupt_packet(void *priv, uint64_t id,
     struct usb_redir_interrupt_packet_header *interrupt_header,
     uint8_t *data, int data_len);
 
 static int usbredir_handle_status(USBRedirDevice *dev,
                                        int status, int actual_len);
 
+#define VERSION "qemu usb-redir guest " QEMU_VERSION
+
 /*
  * Logging stuff
  */
@@ -241,62 +242,133 @@
         return 0;
     }
 
+    /* Don't send new data to the chardev until our state is fully synced */
+    if (!runstate_check(RUN_STATE_RUNNING)) {
+        return 0;
+    }
+
     return qemu_chr_fe_write(dev->cs, data, count);
 }
 
 /*
- * Async and buffered packets helpers
+ * Cancelled and buffered packets helpers
  */
 
-static AsyncURB *async_alloc(USBRedirDevice *dev, USBPacket *p)
+static void packet_id_queue_init(struct PacketIdQueue *q,
+    USBRedirDevice *dev, const char *name)
 {
-    AsyncURB *aurb = (AsyncURB *) g_malloc0(sizeof(AsyncURB));
-    aurb->dev = dev;
-    aurb->packet = p;
-    aurb->packet_id = dev->packet_id;
-    QTAILQ_INSERT_TAIL(&dev->asyncq, aurb, next);
-    dev->packet_id++;
-
-    return aurb;
+    q->dev = dev;
+    q->name = name;
+    QTAILQ_INIT(&q->head);
+    q->size = 0;
 }
 
-static void async_free(USBRedirDevice *dev, AsyncURB *aurb)
+static void packet_id_queue_add(struct PacketIdQueue *q, uint64_t id)
 {
-    QTAILQ_REMOVE(&dev->asyncq, aurb, next);
-    g_free(aurb);
+    USBRedirDevice *dev = q->dev;
+    struct PacketIdQueueEntry *e;
+
+    DPRINTF("adding packet id %"PRIu64" to %s queue\n", id, q->name);
+
+    e = g_malloc0(sizeof(struct PacketIdQueueEntry));
+    e->id = id;
+    QTAILQ_INSERT_TAIL(&q->head, e, next);
+    q->size++;
 }
 
-static AsyncURB *async_find(USBRedirDevice *dev, uint32_t packet_id)
+static int packet_id_queue_remove(struct PacketIdQueue *q, uint64_t id)
 {
-    AsyncURB *aurb;
+    USBRedirDevice *dev = q->dev;
+    struct PacketIdQueueEntry *e;
 
-    QTAILQ_FOREACH(aurb, &dev->asyncq, next) {
-        if (aurb->packet_id == packet_id) {
-            return aurb;
+    QTAILQ_FOREACH(e, &q->head, next) {
+        if (e->id == id) {
+            DPRINTF("removing packet id %"PRIu64" from %s queue\n",
+                    id, q->name);
+            QTAILQ_REMOVE(&q->head, e, next);
+            q->size--;
+            g_free(e);
+            return 1;
         }
     }
-    DPRINTF("could not find async urb for packet_id %u\n", packet_id);
-    return NULL;
+    return 0;
+}
+
+static void packet_id_queue_empty(struct PacketIdQueue *q)
+{
+    USBRedirDevice *dev = q->dev;
+    struct PacketIdQueueEntry *e, *next_e;
+
+    DPRINTF("removing %d packet-ids from %s queue\n", q->size, q->name);
+
+    QTAILQ_FOREACH_SAFE(e, &q->head, next, next_e) {
+        QTAILQ_REMOVE(&q->head, e, next);
+        g_free(e);
+    }
+    q->size = 0;
 }
 
 static void usbredir_cancel_packet(USBDevice *udev, USBPacket *p)
 {
     USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev);
-    AsyncURB *aurb;
 
-    QTAILQ_FOREACH(aurb, &dev->asyncq, next) {
-        if (p != aurb->packet) {
-            continue;
-        }
+    packet_id_queue_add(&dev->cancelled, p->id);
+    usbredirparser_send_cancel_data_packet(dev->parser, p->id);
+    usbredirparser_do_write(dev->parser);
+}
 
-        DPRINTF("async cancel id %u\n", aurb->packet_id);
-        usbredirparser_send_cancel_data_packet(dev->parser, aurb->packet_id);
-        usbredirparser_do_write(dev->parser);
-
-        /* Mark it as dead */
-        aurb->packet = NULL;
-        break;
+static int usbredir_is_cancelled(USBRedirDevice *dev, uint64_t id)
+{
+    if (!dev->dev.attached) {
+        return 1; /* Treat everything as cancelled after a disconnect */
     }
+    return packet_id_queue_remove(&dev->cancelled, id);
+}
+
+static void usbredir_fill_already_in_flight_from_ep(USBRedirDevice *dev,
+    struct USBEndpoint *ep)
+{
+    static USBPacket *p;
+
+    QTAILQ_FOREACH(p, &ep->queue, queue) {
+        packet_id_queue_add(&dev->already_in_flight, p->id);
+    }
+}
+
+static void usbredir_fill_already_in_flight(USBRedirDevice *dev)
+{
+    int ep;
+    struct USBDevice *udev = &dev->dev;
+
+    usbredir_fill_already_in_flight_from_ep(dev, &udev->ep_ctl);
+
+    for (ep = 0; ep < USB_MAX_ENDPOINTS; ep++) {
+        usbredir_fill_already_in_flight_from_ep(dev, &udev->ep_in[ep]);
+        usbredir_fill_already_in_flight_from_ep(dev, &udev->ep_out[ep]);
+    }
+}
+
+static int usbredir_already_in_flight(USBRedirDevice *dev, uint64_t id)
+{
+    return packet_id_queue_remove(&dev->already_in_flight, id);
+}
+
+static USBPacket *usbredir_find_packet_by_id(USBRedirDevice *dev,
+    uint8_t ep, uint64_t id)
+{
+    USBPacket *p;
+
+    if (usbredir_is_cancelled(dev, id)) {
+        return NULL;
+    }
+
+    p = usb_ep_find_packet_by_id(&dev->dev,
+                            (ep & USB_DIR_IN) ? USB_TOKEN_IN : USB_TOKEN_OUT,
+                            ep & 0x0f, id);
+    if (p == NULL) {
+        ERROR("could not find packet with id %"PRIu64"\n", id);
+    }
+    return p;
 }
 
 static void bufp_alloc(USBRedirDevice *dev,
@@ -492,25 +564,26 @@
 static int usbredir_handle_bulk_data(USBRedirDevice *dev, USBPacket *p,
                                       uint8_t ep)
 {
-    AsyncURB *aurb = async_alloc(dev, p);
     struct usb_redir_bulk_packet_header bulk_packet;
 
-    DPRINTF("bulk-out ep %02X len %zd id %u\n", ep,
-            p->iov.size, aurb->packet_id);
+    DPRINTF("bulk-out ep %02X len %zd id %"PRIu64"\n", ep, p->iov.size, p->id);
+
+    if (usbredir_already_in_flight(dev, p->id)) {
+        return USB_RET_ASYNC;
+    }
 
     bulk_packet.endpoint  = ep;
     bulk_packet.length    = p->iov.size;
     bulk_packet.stream_id = 0;
-    aurb->bulk_packet = bulk_packet;
 
     if (ep & USB_DIR_IN) {
-        usbredirparser_send_bulk_packet(dev->parser, aurb->packet_id,
+        usbredirparser_send_bulk_packet(dev->parser, p->id,
                                         &bulk_packet, NULL, 0);
     } else {
         uint8_t buf[p->iov.size];
         usb_packet_copy(p, buf, p->iov.size);
         usbredir_log_data(dev, "bulk data out:", buf, p->iov.size);
-        usbredirparser_send_bulk_packet(dev->parser, aurb->packet_id,
+        usbredirparser_send_bulk_packet(dev->parser, p->id,
                                         &bulk_packet, buf, p->iov.size);
     }
     usbredirparser_do_write(dev->parser);
@@ -573,20 +646,22 @@
         return len;
     } else {
         /* Output interrupt endpoint, normal async operation */
-        AsyncURB *aurb = async_alloc(dev, p);
         struct usb_redir_interrupt_packet_header interrupt_packet;
         uint8_t buf[p->iov.size];
 
-        DPRINTF("interrupt-out ep %02X len %zd id %u\n", ep, p->iov.size,
-                aurb->packet_id);
+        DPRINTF("interrupt-out ep %02X len %zd id %"PRIu64"\n", ep,
+                p->iov.size, p->id);
+
+        if (usbredir_already_in_flight(dev, p->id)) {
+            return USB_RET_ASYNC;
+        }
 
         interrupt_packet.endpoint  = ep;
         interrupt_packet.length    = p->iov.size;
-        aurb->interrupt_packet     = interrupt_packet;
 
         usb_packet_copy(p, buf, p->iov.size);
         usbredir_log_data(dev, "interrupt data out:", buf, p->iov.size);
-        usbredirparser_send_interrupt_packet(dev->parser, aurb->packet_id,
+        usbredirparser_send_interrupt_packet(dev->parser, p->id,
                                         &interrupt_packet, buf, p->iov.size);
         usbredirparser_do_write(dev->parser);
         return USB_RET_ASYNC;
@@ -640,10 +715,9 @@
                                 int config)
 {
     struct usb_redir_set_configuration_header set_config;
-    AsyncURB *aurb = async_alloc(dev, p);
     int i;
 
-    DPRINTF("set config %d id %u\n", config, aurb->packet_id);
+    DPRINTF("set config %d id %"PRIu64"\n", config, p->id);
 
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         switch (dev->endpoint[i].type) {
@@ -660,20 +734,16 @@
     }
 
     set_config.configuration = config;
-    usbredirparser_send_set_configuration(dev->parser, aurb->packet_id,
-                                          &set_config);
+    usbredirparser_send_set_configuration(dev->parser, p->id, &set_config);
     usbredirparser_do_write(dev->parser);
     return USB_RET_ASYNC;
 }
 
 static int usbredir_get_config(USBRedirDevice *dev, USBPacket *p)
 {
-    AsyncURB *aurb = async_alloc(dev, p);
+    DPRINTF("get config id %"PRIu64"\n", p->id);
 
-    DPRINTF("get config id %u\n", aurb->packet_id);
-
-    aurb->get = 1;
-    usbredirparser_send_get_configuration(dev->parser, aurb->packet_id);
+    usbredirparser_send_get_configuration(dev->parser, p->id);
     usbredirparser_do_write(dev->parser);
     return USB_RET_ASYNC;
 }
@@ -682,11 +752,9 @@
                                    int interface, int alt)
 {
     struct usb_redir_set_alt_setting_header set_alt;
-    AsyncURB *aurb = async_alloc(dev, p);
     int i;
 
-    DPRINTF("set interface %d alt %d id %u\n", interface, alt,
-            aurb->packet_id);
+    DPRINTF("set interface %d alt %d id %"PRIu64"\n", interface, alt, p->id);
 
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         if (dev->endpoint[i].interface == interface) {
@@ -706,8 +774,7 @@
 
     set_alt.interface = interface;
     set_alt.alt = alt;
-    usbredirparser_send_set_alt_setting(dev->parser, aurb->packet_id,
-                                        &set_alt);
+    usbredirparser_send_set_alt_setting(dev->parser, p->id, &set_alt);
     usbredirparser_do_write(dev->parser);
     return USB_RET_ASYNC;
 }
@@ -716,14 +783,11 @@
                                    int interface)
 {
     struct usb_redir_get_alt_setting_header get_alt;
-    AsyncURB *aurb = async_alloc(dev, p);
 
-    DPRINTF("get interface %d id %u\n", interface, aurb->packet_id);
+    DPRINTF("get interface %d id %"PRIu64"\n", interface, p->id);
 
     get_alt.interface = interface;
-    aurb->get = 1;
-    usbredirparser_send_get_alt_setting(dev->parser, aurb->packet_id,
-                                        &get_alt);
+    usbredirparser_send_get_alt_setting(dev->parser, p->id, &get_alt);
     usbredirparser_do_write(dev->parser);
     return USB_RET_ASYNC;
 }
@@ -733,7 +797,10 @@
 {
     USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev);
     struct usb_redir_control_packet_header control_packet;
-    AsyncURB *aurb;
+
+    if (usbredir_already_in_flight(dev, p->id)) {
+        return USB_RET_ASYNC;
+    }
 
     /* Special cases for certain standard device requests */
     switch (request) {
@@ -751,13 +818,10 @@
         return usbredir_get_interface(dev, p, index);
     }
 
-    /* "Normal" ctrl requests */
-    aurb = async_alloc(dev, p);
-
-    /* Note request is (bRequestType << 8) | bRequest */
-    DPRINTF("ctrl-out type 0x%x req 0x%x val 0x%x index %d len %d id %u\n",
-            request >> 8, request & 0xff, value, index, length,
-            aurb->packet_id);
+    /* Normal ctrl requests, note request is (bRequestType << 8) | bRequest */
+    DPRINTF(
+        "ctrl-out type 0x%x req 0x%x val 0x%x index %d len %d id %"PRIu64"\n",
+        request >> 8, request & 0xff, value, index, length, p->id);
 
     control_packet.request     = request & 0xFF;
     control_packet.requesttype = request >> 8;
@@ -765,14 +829,13 @@
     control_packet.value       = value;
     control_packet.index       = index;
     control_packet.length      = length;
-    aurb->control_packet       = control_packet;
 
     if (control_packet.requesttype & USB_DIR_IN) {
-        usbredirparser_send_control_packet(dev->parser, aurb->packet_id,
+        usbredirparser_send_control_packet(dev->parser, p->id,
                                            &control_packet, NULL, 0);
     } else {
         usbredir_log_data(dev, "ctrl data out:", data, length);
-        usbredirparser_send_control_packet(dev->parser, aurb->packet_id,
+        usbredirparser_send_control_packet(dev->parser, p->id,
                                            &control_packet, data, length);
     }
     usbredirparser_do_write(dev->parser);
@@ -784,53 +847,73 @@
  * from within the USBDevice data / control packet callbacks and doing a
  * usb_detach from within these callbacks is not a good idea.
  *
- * So we use a bh handler to take care of close events. We also handle
- * open events from this callback to make sure that a close directly followed
- * by an open gets handled in the right order.
+ * So we use a bh handler to take care of close events.
  */
-static void usbredir_open_close_bh(void *opaque)
+static void usbredir_chardev_close_bh(void *opaque)
 {
     USBRedirDevice *dev = opaque;
-    uint32_t caps[USB_REDIR_CAPS_SIZE] = { 0, };
-    char version[32];
-
-    strcpy(version, "qemu usb-redir guest ");
-    pstrcat(version, sizeof(version), qemu_get_version());
 
     usbredir_device_disconnect(dev);
 
     if (dev->parser) {
+        DPRINTF("destroying usbredirparser\n");
         usbredirparser_destroy(dev->parser);
         dev->parser = NULL;
     }
+}
 
-    if (dev->cs->opened) {
-        dev->parser = qemu_oom_check(usbredirparser_create());
-        dev->parser->priv = dev;
-        dev->parser->log_func = usbredir_log;
-        dev->parser->read_func = usbredir_read;
-        dev->parser->write_func = usbredir_write;
-        dev->parser->hello_func = usbredir_hello;
-        dev->parser->device_connect_func = usbredir_device_connect;
-        dev->parser->device_disconnect_func = usbredir_device_disconnect;
-        dev->parser->interface_info_func = usbredir_interface_info;
-        dev->parser->ep_info_func = usbredir_ep_info;
-        dev->parser->configuration_status_func = usbredir_configuration_status;
-        dev->parser->alt_setting_status_func = usbredir_alt_setting_status;
-        dev->parser->iso_stream_status_func = usbredir_iso_stream_status;
-        dev->parser->interrupt_receiving_status_func =
-            usbredir_interrupt_receiving_status;
-        dev->parser->bulk_streams_status_func = usbredir_bulk_streams_status;
-        dev->parser->control_packet_func = usbredir_control_packet;
-        dev->parser->bulk_packet_func = usbredir_bulk_packet;
-        dev->parser->iso_packet_func = usbredir_iso_packet;
-        dev->parser->interrupt_packet_func = usbredir_interrupt_packet;
-        dev->read_buf = NULL;
-        dev->read_buf_size = 0;
+static void usbredir_chardev_open(USBRedirDevice *dev)
+{
+    uint32_t caps[USB_REDIR_CAPS_SIZE] = { 0, };
+    int flags = 0;
 
-        usbredirparser_caps_set_cap(caps, usb_redir_cap_connect_device_version);
-        usbredirparser_caps_set_cap(caps, usb_redir_cap_filter);
-        usbredirparser_init(dev->parser, version, caps, USB_REDIR_CAPS_SIZE, 0);
+    /* Make sure any pending closes are handled (no-op if none pending) */
+    usbredir_chardev_close_bh(dev);
+    qemu_bh_cancel(dev->chardev_close_bh);
+
+    DPRINTF("creating usbredirparser\n");
+
+    dev->parser = qemu_oom_check(usbredirparser_create());
+    dev->parser->priv = dev;
+    dev->parser->log_func = usbredir_log;
+    dev->parser->read_func = usbredir_read;
+    dev->parser->write_func = usbredir_write;
+    dev->parser->hello_func = usbredir_hello;
+    dev->parser->device_connect_func = usbredir_device_connect;
+    dev->parser->device_disconnect_func = usbredir_device_disconnect;
+    dev->parser->interface_info_func = usbredir_interface_info;
+    dev->parser->ep_info_func = usbredir_ep_info;
+    dev->parser->configuration_status_func = usbredir_configuration_status;
+    dev->parser->alt_setting_status_func = usbredir_alt_setting_status;
+    dev->parser->iso_stream_status_func = usbredir_iso_stream_status;
+    dev->parser->interrupt_receiving_status_func =
+        usbredir_interrupt_receiving_status;
+    dev->parser->bulk_streams_status_func = usbredir_bulk_streams_status;
+    dev->parser->control_packet_func = usbredir_control_packet;
+    dev->parser->bulk_packet_func = usbredir_bulk_packet;
+    dev->parser->iso_packet_func = usbredir_iso_packet;
+    dev->parser->interrupt_packet_func = usbredir_interrupt_packet;
+    dev->read_buf = NULL;
+    dev->read_buf_size = 0;
+
+    usbredirparser_caps_set_cap(caps, usb_redir_cap_connect_device_version);
+    usbredirparser_caps_set_cap(caps, usb_redir_cap_filter);
+    usbredirparser_caps_set_cap(caps, usb_redir_cap_ep_info_max_packet_size);
+    usbredirparser_caps_set_cap(caps, usb_redir_cap_64bits_ids);
+
+    if (runstate_check(RUN_STATE_INMIGRATE)) {
+        flags |= usbredirparser_fl_no_hello;
+    }
+    usbredirparser_init(dev->parser, VERSION, caps, USB_REDIR_CAPS_SIZE,
+                        flags);
+    usbredirparser_do_write(dev->parser);
+}
+
+static void usbredir_reject_device(USBRedirDevice *dev)
+{
+    usbredir_device_disconnect(dev);
+    if (usbredirparser_peer_has_cap(dev->parser, usb_redir_cap_filter)) {
+        usbredirparser_send_filter_reject(dev->parser);
         usbredirparser_do_write(dev->parser);
     }
 }
@@ -839,12 +922,19 @@
 {
     USBRedirDevice *dev = opaque;
 
+    /* In order to work properly with XHCI controllers we need these caps */
+    if ((dev->dev.port->speedmask & USB_SPEED_MASK_SUPER) && !(
+        usbredirparser_peer_has_cap(dev->parser,
+                                    usb_redir_cap_ep_info_max_packet_size) &&
+        usbredirparser_peer_has_cap(dev->parser,
+                                    usb_redir_cap_64bits_ids))) {
+        ERROR("usb-redir-host lacks capabilities needed for use with XHCI\n");
+        usbredir_reject_device(dev);
+        return;
+    }
+
     if (usb_device_attach(&dev->dev) != 0) {
-        usbredir_device_disconnect(dev);
-        if (usbredirparser_peer_has_cap(dev->parser, usb_redir_cap_filter)) {
-            usbredirparser_send_filter_reject(dev->parser);
-            usbredirparser_do_write(dev->parser);
-        }
+        usbredir_reject_device(dev);
     }
 }
 
@@ -856,13 +946,18 @@
 {
     USBRedirDevice *dev = opaque;
 
-    if (dev->parser) {
-        /* usbredir_parser_do_read will consume *all* data we give it */
-        return 1024 * 1024;
-    } else {
-        /* usbredir_open_close_bh hasn't handled the open event yet */
+    if (!dev->parser) {
+        WARNING("chardev_can_read called on non open chardev!\n");
         return 0;
     }
+
+    /* Don't read new data from the chardev until our state is fully synced */
+    if (!runstate_check(RUN_STATE_RUNNING)) {
+        return 0;
+    }
+
+    /* usbredir_parser_do_read will consume *all* data we give it */
+    return 1024 * 1024;
 }
 
 static void usbredir_chardev_read(void *opaque, const uint8_t *buf, int size)
@@ -886,8 +981,12 @@
 
     switch (event) {
     case CHR_EVENT_OPENED:
+        DPRINTF("chardev open\n");
+        usbredir_chardev_open(dev);
+        break;
     case CHR_EVENT_CLOSED:
-        qemu_bh_schedule(dev->open_close_bh);
+        DPRINTF("chardev close\n");
+        qemu_bh_schedule(dev->chardev_close_bh);
         break;
     }
 }
@@ -896,6 +995,15 @@
  * init + destroy
  */
 
+static void usbredir_vm_state_change(void *priv, int running, RunState state)
+{
+    USBRedirDevice *dev = priv;
+
+    if (state == RUN_STATE_RUNNING && dev->parser != NULL) {
+        usbredirparser_do_write(dev->parser); /* Flush any pending writes */
+    }
+}
+
 static int usbredir_initfn(USBDevice *udev)
 {
     USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev);
@@ -917,10 +1025,11 @@
         }
     }
 
-    dev->open_close_bh = qemu_bh_new(usbredir_open_close_bh, dev);
+    dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
     dev->attach_timer = qemu_new_timer_ms(vm_clock, usbredir_do_attach, dev);
 
-    QTAILQ_INIT(&dev->asyncq);
+    packet_id_queue_init(&dev->cancelled, dev, "cancelled");
+    packet_id_queue_init(&dev->already_in_flight, dev, "already-in-flight");
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         QTAILQ_INIT(&dev->endpoint[i].bufpq);
     }
@@ -933,18 +1042,17 @@
     qemu_chr_add_handlers(dev->cs, usbredir_chardev_can_read,
                           usbredir_chardev_read, usbredir_chardev_event, dev);
 
+    qemu_add_vm_change_state_handler(usbredir_vm_state_change, dev);
     add_boot_device_path(dev->bootindex, &udev->qdev, NULL);
     return 0;
 }
 
 static void usbredir_cleanup_device_queues(USBRedirDevice *dev)
 {
-    AsyncURB *aurb, *next_aurb;
     int i;
 
-    QTAILQ_FOREACH_SAFE(aurb, &dev->asyncq, next, next_aurb) {
-        async_free(dev, aurb);
-    }
+    packet_id_queue_empty(&dev->cancelled);
+    packet_id_queue_empty(&dev->already_in_flight);
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         usbredir_free_bufpq(dev, I2EP(i));
     }
@@ -957,7 +1065,7 @@
     qemu_chr_fe_close(dev->cs);
     qemu_chr_delete(dev->cs);
     /* Note must be done after qemu_chr_close, as that causes a close event */
-    qemu_bh_delete(dev->open_close_bh);
+    qemu_bh_delete(dev->chardev_close_bh);
 
     qemu_del_timer(dev->attach_timer);
     qemu_free_timer(dev->attach_timer);
@@ -1007,11 +1115,7 @@
     return 0;
 
 error:
-    usbredir_device_disconnect(dev);
-    if (usbredirparser_peer_has_cap(dev->parser, usb_redir_cap_filter)) {
-        usbredirparser_send_filter_reject(dev->parser);
-        usbredirparser_do_write(dev->parser);
-    }
+    usbredir_reject_device(dev);
     return -1;
 }
 
@@ -1028,11 +1132,14 @@
     case usb_redir_stall:
         return USB_RET_STALL;
     case usb_redir_cancelled:
-        WARNING("returning cancelled packet to HC?\n");
-        return USB_RET_NAK;
+        /*
+         * When the usbredir-host unredirects a device, it will report a status
+         * of cancelled for all pending packets, followed by a disconnect msg.
+         */
+        return USB_RET_IOERROR;
     case usb_redir_inval:
         WARNING("got invalid param error from usb-host?\n");
-        return USB_RET_NAK;
+        return USB_RET_IOERROR;
     case usb_redir_babble:
         return USB_RET_BABBLE;
     case usb_redir_ioerror:
@@ -1124,6 +1231,7 @@
     qemu_del_timer(dev->attach_timer);
 
     if (dev->dev.attached) {
+        DPRINTF("detaching device\n");
         usb_device_detach(&dev->dev);
         /*
          * Delay next usb device attach to give the guest a chance to see
@@ -1199,70 +1307,68 @@
                             i & 0x0f);
         usb_ep->type = dev->endpoint[i].type;
         usb_ep->ifnum = dev->endpoint[i].interface;
+        if (usbredirparser_peer_has_cap(dev->parser,
+                                     usb_redir_cap_ep_info_max_packet_size)) {
+            dev->endpoint[i].max_packet_size =
+                usb_ep->max_packet_size = ep_info->max_packet_size[i];
+        }
+        if (ep_info->type[i] == usb_redir_type_bulk) {
+            usb_ep->pipeline = true;
+        }
     }
 }
 
-static void usbredir_configuration_status(void *priv, uint32_t id,
+static void usbredir_configuration_status(void *priv, uint64_t id,
     struct usb_redir_configuration_status_header *config_status)
 {
     USBRedirDevice *dev = priv;
-    AsyncURB *aurb;
+    USBPacket *p;
     int len = 0;
 
-    DPRINTF("set config status %d config %d id %u\n", config_status->status,
-            config_status->configuration, id);
+    DPRINTF("set config status %d config %d id %"PRIu64"\n",
+            config_status->status, config_status->configuration, id);
 
-    aurb = async_find(dev, id);
-    if (!aurb) {
-        return;
-    }
-    if (aurb->packet) {
-        if (aurb->get) {
+    p = usbredir_find_packet_by_id(dev, 0, id);
+    if (p) {
+        if (dev->dev.setup_buf[0] & USB_DIR_IN) {
             dev->dev.data_buf[0] = config_status->configuration;
             len = 1;
         }
-        aurb->packet->result =
-            usbredir_handle_status(dev, config_status->status, len);
-        usb_generic_async_ctrl_complete(&dev->dev, aurb->packet);
+        p->result = usbredir_handle_status(dev, config_status->status, len);
+        usb_generic_async_ctrl_complete(&dev->dev, p);
     }
-    async_free(dev, aurb);
 }
 
-static void usbredir_alt_setting_status(void *priv, uint32_t id,
+static void usbredir_alt_setting_status(void *priv, uint64_t id,
     struct usb_redir_alt_setting_status_header *alt_setting_status)
 {
     USBRedirDevice *dev = priv;
-    AsyncURB *aurb;
+    USBPacket *p;
     int len = 0;
 
-    DPRINTF("alt status %d intf %d alt %d id: %u\n",
-            alt_setting_status->status,
-            alt_setting_status->interface,
+    DPRINTF("alt status %d intf %d alt %d id: %"PRIu64"\n",
+            alt_setting_status->status, alt_setting_status->interface,
             alt_setting_status->alt, id);
 
-    aurb = async_find(dev, id);
-    if (!aurb) {
-        return;
-    }
-    if (aurb->packet) {
-        if (aurb->get) {
+    p = usbredir_find_packet_by_id(dev, 0, id);
+    if (p) {
+        if (dev->dev.setup_buf[0] & USB_DIR_IN) {
             dev->dev.data_buf[0] = alt_setting_status->alt;
             len = 1;
         }
-        aurb->packet->result =
+        p->result =
             usbredir_handle_status(dev, alt_setting_status->status, len);
-        usb_generic_async_ctrl_complete(&dev->dev, aurb->packet);
+        usb_generic_async_ctrl_complete(&dev->dev, p);
     }
-    async_free(dev, aurb);
 }
 
-static void usbredir_iso_stream_status(void *priv, uint32_t id,
+static void usbredir_iso_stream_status(void *priv, uint64_t id,
     struct usb_redir_iso_stream_status_header *iso_stream_status)
 {
     USBRedirDevice *dev = priv;
     uint8_t ep = iso_stream_status->endpoint;
 
-    DPRINTF("iso status %d ep %02X id %u\n", iso_stream_status->status,
+    DPRINTF("iso status %d ep %02X id %"PRIu64"\n", iso_stream_status->status,
             ep, id);
 
     if (!dev->dev.attached || !dev->endpoint[EP2I(ep)].iso_started) {
@@ -1276,14 +1382,14 @@
     }
 }
 
-static void usbredir_interrupt_receiving_status(void *priv, uint32_t id,
+static void usbredir_interrupt_receiving_status(void *priv, uint64_t id,
     struct usb_redir_interrupt_receiving_status_header
     *interrupt_receiving_status)
 {
     USBRedirDevice *dev = priv;
     uint8_t ep = interrupt_receiving_status->endpoint;
 
-    DPRINTF("interrupt recv status %d ep %02X id %u\n",
+    DPRINTF("interrupt recv status %d ep %02X id %"PRIu64"\n",
             interrupt_receiving_status->status, ep, id);
 
     if (!dev->dev.attached || !dev->endpoint[EP2I(ep)].interrupt_started) {
@@ -1298,37 +1404,24 @@
     }
 }
 
-static void usbredir_bulk_streams_status(void *priv, uint32_t id,
+static void usbredir_bulk_streams_status(void *priv, uint64_t id,
     struct usb_redir_bulk_streams_status_header *bulk_streams_status)
 {
 }
 
-static void usbredir_control_packet(void *priv, uint32_t id,
+static void usbredir_control_packet(void *priv, uint64_t id,
     struct usb_redir_control_packet_header *control_packet,
     uint8_t *data, int data_len)
 {
     USBRedirDevice *dev = priv;
+    USBPacket *p;
     int len = control_packet->length;
-    AsyncURB *aurb;
 
-    DPRINTF("ctrl-in status %d len %d id %u\n", control_packet->status,
+    DPRINTF("ctrl-in status %d len %d id %"PRIu64"\n", control_packet->status,
             len, id);
 
-    aurb = async_find(dev, id);
-    if (!aurb) {
-        free(data);
-        return;
-    }
-
-    aurb->control_packet.status = control_packet->status;
-    aurb->control_packet.length = control_packet->length;
-    if (memcmp(&aurb->control_packet, control_packet,
-               sizeof(*control_packet))) {
-        ERROR("return control packet mismatch, please report this!\n");
-        len = USB_RET_NAK;
-    }
-
-    if (aurb->packet) {
+    p = usbredir_find_packet_by_id(dev, 0, id);
+    if (p) {
         len = usbredir_handle_status(dev, control_packet->status, len);
         if (len > 0) {
             usbredir_log_data(dev, "ctrl data in:", data, data_len);
@@ -1340,65 +1433,52 @@
                 len = USB_RET_STALL;
             }
         }
-        aurb->packet->result = len;
-        usb_generic_async_ctrl_complete(&dev->dev, aurb->packet);
+        p->result = len;
+        usb_generic_async_ctrl_complete(&dev->dev, p);
     }
-    async_free(dev, aurb);
     free(data);
 }
 
-static void usbredir_bulk_packet(void *priv, uint32_t id,
+static void usbredir_bulk_packet(void *priv, uint64_t id,
     struct usb_redir_bulk_packet_header *bulk_packet,
     uint8_t *data, int data_len)
 {
     USBRedirDevice *dev = priv;
     uint8_t ep = bulk_packet->endpoint;
     int len = bulk_packet->length;
-    AsyncURB *aurb;
+    USBPacket *p;
 
-    DPRINTF("bulk-in status %d ep %02X len %d id %u\n", bulk_packet->status,
-            ep, len, id);
+    DPRINTF("bulk-in status %d ep %02X len %d id %"PRIu64"\n",
+            bulk_packet->status, ep, len, id);
 
-    aurb = async_find(dev, id);
-    if (!aurb) {
-        free(data);
-        return;
-    }
-
-    if (aurb->bulk_packet.endpoint != bulk_packet->endpoint ||
-            aurb->bulk_packet.stream_id != bulk_packet->stream_id) {
-        ERROR("return bulk packet mismatch, please report this!\n");
-        len = USB_RET_NAK;
-    }
-
-    if (aurb->packet) {
+    p = usbredir_find_packet_by_id(dev, ep, id);
+    if (p) {
         len = usbredir_handle_status(dev, bulk_packet->status, len);
         if (len > 0) {
             usbredir_log_data(dev, "bulk data in:", data, data_len);
-            if (data_len <= aurb->packet->iov.size) {
-                usb_packet_copy(aurb->packet, data, data_len);
+            if (data_len <= p->iov.size) {
+                usb_packet_copy(p, data, data_len);
             } else {
-                ERROR("bulk buffer too small (%d > %zd)\n", data_len,
-                      aurb->packet->iov.size);
-                len = USB_RET_STALL;
+                ERROR("bulk got more data then requested (%d > %zd)\n",
+                      data_len, p->iov.size);
+                len = USB_RET_BABBLE;
             }
         }
-        aurb->packet->result = len;
-        usb_packet_complete(&dev->dev, aurb->packet);
+        p->result = len;
+        usb_packet_complete(&dev->dev, p);
     }
-    async_free(dev, aurb);
     free(data);
 }
 
-static void usbredir_iso_packet(void *priv, uint32_t id,
+static void usbredir_iso_packet(void *priv, uint64_t id,
     struct usb_redir_iso_packet_header *iso_packet,
     uint8_t *data, int data_len)
 {
     USBRedirDevice *dev = priv;
     uint8_t ep = iso_packet->endpoint;
 
-    DPRINTF2("iso-in status %d ep %02X len %d id %u\n", iso_packet->status, ep,
-             data_len, id);
+    DPRINTF2("iso-in status %d ep %02X len %d id %"PRIu64"\n",
+             iso_packet->status, ep, data_len, id);
 
     if (dev->endpoint[EP2I(ep)].type != USB_ENDPOINT_XFER_ISOC) {
         ERROR("received iso packet for non iso endpoint %02X\n", ep);
@@ -1416,14 +1496,14 @@
     bufp_alloc(dev, data, data_len, iso_packet->status, ep);
 }
 
-static void usbredir_interrupt_packet(void *priv, uint32_t id,
+static void usbredir_interrupt_packet(void *priv, uint64_t id,
     struct usb_redir_interrupt_packet_header *interrupt_packet,
     uint8_t *data, int data_len)
 {
     USBRedirDevice *dev = priv;
     uint8_t ep = interrupt_packet->endpoint;
 
-    DPRINTF("interrupt-in status %d ep %02X len %d id %u\n",
+    DPRINTF("interrupt-in status %d ep %02X len %d id %"PRIu64"\n",
             interrupt_packet->status, ep, data_len, id);
 
     if (dev->endpoint[EP2I(ep)].type != USB_ENDPOINT_XFER_INT) {
@@ -1444,25 +1524,331 @@
     } else {
         int len = interrupt_packet->length;
 
-        AsyncURB *aurb = async_find(dev, id);
-        if (!aurb) {
-            return;
-        }
-
-        if (aurb->interrupt_packet.endpoint != interrupt_packet->endpoint) {
-            ERROR("return int packet mismatch, please report this!\n");
-            len = USB_RET_NAK;
-        }
-
-        if (aurb->packet) {
-            aurb->packet->result = usbredir_handle_status(dev,
+        USBPacket *p = usbredir_find_packet_by_id(dev, ep, id);
+        if (p) {
+            p->result = usbredir_handle_status(dev,
                                                interrupt_packet->status, len);
-            usb_packet_complete(&dev->dev, aurb->packet);
+            usb_packet_complete(&dev->dev, p);
         }
-        async_free(dev, aurb);
     }
 }
 
+/*
+ * Migration code
+ */
+
+static void usbredir_pre_save(void *priv)
+{
+    USBRedirDevice *dev = priv;
+
+    usbredir_fill_already_in_flight(dev);
+}
+
+static int usbredir_post_load(void *priv, int version_id)
+{
+    USBRedirDevice *dev = priv;
+    struct USBEndpoint *usb_ep;
+    int i;
+
+    switch (dev->device_info.speed) {
+    case usb_redir_speed_low:
+        dev->dev.speed = USB_SPEED_LOW;
+        break;
+    case usb_redir_speed_full:
+        dev->dev.speed = USB_SPEED_FULL;
+        break;
+    case usb_redir_speed_high:
+        dev->dev.speed = USB_SPEED_HIGH;
+        break;
+    case usb_redir_speed_super:
+        dev->dev.speed = USB_SPEED_SUPER;
+        break;
+    default:
+        dev->dev.speed = USB_SPEED_FULL;
+    }
+    dev->dev.speedmask = (1 << dev->dev.speed);
+
+    for (i = 0; i < MAX_ENDPOINTS; i++) {
+        usb_ep = usb_ep_get(&dev->dev,
+                            (i & 0x10) ? USB_TOKEN_IN : USB_TOKEN_OUT,
+                            i & 0x0f);
+        usb_ep->type = dev->endpoint[i].type;
+        usb_ep->ifnum = dev->endpoint[i].interface;
+        usb_ep->max_packet_size = dev->endpoint[i].max_packet_size;
+        if (dev->endpoint[i].type == usb_redir_type_bulk) {
+            usb_ep->pipeline = true;
+        }
+    }
+    return 0;
+}
+
+/* For usbredirparser migration */
+static void usbredir_put_parser(QEMUFile *f, void *priv, size_t unused)
+{
+    USBRedirDevice *dev = priv;
+    uint8_t *data;
+    int len;
+
+    if (dev->parser == NULL) {
+        qemu_put_be32(f, 0);
+        return;
+    }
+
+    usbredirparser_serialize(dev->parser, &data, &len);
+    qemu_oom_check(data);
+
+    qemu_put_be32(f, len);
+    qemu_put_buffer(f, data, len);
+
+    free(data);
+}
+
+static int usbredir_get_parser(QEMUFile *f, void *priv, size_t unused)
+{
+    USBRedirDevice *dev = priv;
+    uint8_t *data;
+    int len, ret;
+
+    len = qemu_get_be32(f);
+    if (len == 0) {
+        return 0;
+    }
+
+    /*
+     * Our chardev should be open already at this point, otherwise
+     * the usbredir channel will be broken (ie spice without seamless)
+     */
+    if (dev->parser == NULL) {
+        ERROR("get_parser called with closed chardev, failing migration\n");
+        return -1;
+    }
+
+    data = g_malloc(len);
+    qemu_get_buffer(f, data, len);
+
+    ret = usbredirparser_unserialize(dev->parser, data, len);
+
+    g_free(data);
+
+    return ret;
+}
+
+static const VMStateInfo usbredir_parser_vmstate_info = {
+    .name = "usb-redir-parser",
+    .put  = usbredir_put_parser,
+    .get  = usbredir_get_parser,
+};
+
+
+/* For buffered packets (iso/irq) queue migration */
+static void usbredir_put_bufpq(QEMUFile *f, void *priv, size_t unused)
+{
+    struct endp_data *endp = priv;
+    struct buf_packet *bufp;
+    int remain = endp->bufpq_size;
+
+    qemu_put_be32(f, endp->bufpq_size);
+    QTAILQ_FOREACH(bufp, &endp->bufpq, next) {
+        qemu_put_be32(f, bufp->len);
+        qemu_put_be32(f, bufp->status);
+        qemu_put_buffer(f, bufp->data, bufp->len);
+        remain--;
+    }
+    assert(remain == 0);
+}
+
+static int usbredir_get_bufpq(QEMUFile *f, void *priv, size_t unused)
+{
+    struct endp_data *endp = priv;
+    struct buf_packet *bufp;
+    int i;
+
+    endp->bufpq_size = qemu_get_be32(f);
+    for (i = 0; i < endp->bufpq_size; i++) {
+        bufp = g_malloc(sizeof(struct buf_packet));
+        bufp->len = qemu_get_be32(f);
+        bufp->status = qemu_get_be32(f);
+        bufp->data = qemu_oom_check(malloc(bufp->len)); /* regular malloc! */
+        qemu_get_buffer(f, bufp->data, bufp->len);
+        QTAILQ_INSERT_TAIL(&endp->bufpq, bufp, next);
+    }
+    return 0;
+}
+
+static const VMStateInfo usbredir_ep_bufpq_vmstate_info = {
+    .name = "usb-redir-bufpq",
+    .put  = usbredir_put_bufpq,
+    .get  = usbredir_get_bufpq,
+};
+
+
+/* For endp_data migration */
+static const VMStateDescription usbredir_ep_vmstate = {
+    .name = "usb-redir-ep",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(type, struct endp_data),
+        VMSTATE_UINT8(interval, struct endp_data),
+        VMSTATE_UINT8(interface, struct endp_data),
+        VMSTATE_UINT16(max_packet_size, struct endp_data),
+        VMSTATE_UINT8(iso_started, struct endp_data),
+        VMSTATE_UINT8(iso_error, struct endp_data),
+        VMSTATE_UINT8(interrupt_started, struct endp_data),
+        VMSTATE_UINT8(interrupt_error, struct endp_data),
+        VMSTATE_UINT8(bufpq_prefilled, struct endp_data),
+        VMSTATE_UINT8(bufpq_dropping_packets, struct endp_data),
+        {
+            .name         = "bufpq",
+            .version_id   = 0,
+            .field_exists = NULL,
+            .size         = 0,
+            .info         = &usbredir_ep_bufpq_vmstate_info,
+            .flags        = VMS_SINGLE,
+            .offset       = 0,
+        },
+        VMSTATE_INT32(bufpq_target_size, struct endp_data),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+
+/* For PacketIdQueue migration */
+static void usbredir_put_packet_id_q(QEMUFile *f, void *priv, size_t unused)
+{
+    struct PacketIdQueue *q = priv;
+    USBRedirDevice *dev = q->dev;
+    struct PacketIdQueueEntry *e;
+    int remain = q->size;
+
+    DPRINTF("put_packet_id_q %s size %d\n", q->name, q->size);
+    qemu_put_be32(f, q->size);
+    QTAILQ_FOREACH(e, &q->head, next) {
+        qemu_put_be64(f, e->id);
+        remain--;
+    }
+    assert(remain == 0);
+}
+
+static int usbredir_get_packet_id_q(QEMUFile *f, void *priv, size_t unused)
+{
+    struct PacketIdQueue *q = priv;
+    USBRedirDevice *dev = q->dev;
+    int i, size;
+    uint64_t id;
+
+    size = qemu_get_be32(f);
+    DPRINTF("get_packet_id_q %s size %d\n", q->name, size);
+    for (i = 0; i < size; i++) {
+        id = qemu_get_be64(f);
+        packet_id_queue_add(q, id);
+    }
+    assert(q->size == size);
+    return 0;
+}
+
+static const VMStateInfo usbredir_ep_packet_id_q_vmstate_info = {
+    .name = "usb-redir-packet-id-q",
+    .put  = usbredir_put_packet_id_q,
+    .get  = usbredir_get_packet_id_q,
+};
+
+static const VMStateDescription usbredir_ep_packet_id_queue_vmstate = {
+    .name = "usb-redir-packet-id-queue",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        {
+            .name         = "queue",
+            .version_id   = 0,
+            .field_exists = NULL,
+            .size         = 0,
+            .info         = &usbredir_ep_packet_id_q_vmstate_info,
+            .flags        = VMS_SINGLE,
+            .offset       = 0,
+        },
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+
+/* For usb_redir_device_connect_header migration */
+static const VMStateDescription usbredir_device_info_vmstate = {
+    .name = "usb-redir-device-info",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT8(speed, struct usb_redir_device_connect_header),
+        VMSTATE_UINT8(device_class, struct usb_redir_device_connect_header),
+        VMSTATE_UINT8(device_subclass, struct usb_redir_device_connect_header),
+        VMSTATE_UINT8(device_protocol, struct usb_redir_device_connect_header),
+        VMSTATE_UINT16(vendor_id, struct usb_redir_device_connect_header),
+        VMSTATE_UINT16(product_id, struct usb_redir_device_connect_header),
+        VMSTATE_UINT16(device_version_bcd,
+                       struct usb_redir_device_connect_header),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+
+/* For usb_redir_interface_info_header migration */
+static const VMStateDescription usbredir_interface_info_vmstate = {
+    .name = "usb-redir-interface-info",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT32(interface_count,
+                       struct usb_redir_interface_info_header),
+        VMSTATE_UINT8_ARRAY(interface,
+                            struct usb_redir_interface_info_header, 32),
+        VMSTATE_UINT8_ARRAY(interface_class,
+                            struct usb_redir_interface_info_header, 32),
+        VMSTATE_UINT8_ARRAY(interface_subclass,
+                            struct usb_redir_interface_info_header, 32),
+        VMSTATE_UINT8_ARRAY(interface_protocol,
+                            struct usb_redir_interface_info_header, 32),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+
+/* And finally the USBRedirDevice vmstate itself */
+static const VMStateDescription usbredir_vmstate = {
+    .name = "usb-redir",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .pre_save = usbredir_pre_save,
+    .post_load = usbredir_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_USB_DEVICE(dev, USBRedirDevice),
+        VMSTATE_TIMER(attach_timer, USBRedirDevice),
+        {
+            .name         = "parser",
+            .version_id   = 0,
+            .field_exists = NULL,
+            .size         = 0,
+            .info         = &usbredir_parser_vmstate_info,
+            .flags        = VMS_SINGLE,
+            .offset       = 0,
+        },
+        VMSTATE_STRUCT_ARRAY(endpoint, USBRedirDevice, MAX_ENDPOINTS, 1,
+                             usbredir_ep_vmstate, struct endp_data),
+        VMSTATE_STRUCT(cancelled, USBRedirDevice, 1,
+                       usbredir_ep_packet_id_queue_vmstate,
+                       struct PacketIdQueue),
+        VMSTATE_STRUCT(already_in_flight, USBRedirDevice, 1,
+                       usbredir_ep_packet_id_queue_vmstate,
+                       struct PacketIdQueue),
+        VMSTATE_STRUCT(device_info, USBRedirDevice, 1,
+                       usbredir_device_info_vmstate,
+                       struct usb_redir_device_connect_header),
+        VMSTATE_STRUCT(interface_info, USBRedirDevice, 1,
+                       usbredir_interface_info_vmstate,
+                       struct usb_redir_interface_info_header),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
 static Property usbredir_properties[] = {
     DEFINE_PROP_CHR("chardev", USBRedirDevice, cs),
     DEFINE_PROP_UINT8("debug", USBRedirDevice, debug, 0),
@@ -1483,6 +1869,7 @@
     uc->handle_reset   = usbredir_handle_reset;
     uc->handle_data    = usbredir_handle_data;
     uc->handle_control = usbredir_handle_control;
+    dc->vmsd           = &usbredir_vmstate;
     dc->props          = usbredir_properties;
 }
 
diff --git a/hw/versatilepb.c b/hw/versatilepb.c
index 7a92034..b3f8077 100644
--- a/hw/versatilepb.c
+++ b/hw/versatilepb.c
@@ -211,7 +211,8 @@
 
     cpu_pic = arm_pic_init_cpu(cpu);
     dev = sysbus_create_varargs("pl190", 0x10140000,
-                                cpu_pic[0], cpu_pic[1], NULL);
+                                cpu_pic[ARM_PIC_CPU_IRQ],
+                                cpu_pic[ARM_PIC_CPU_FIQ], NULL);
     for (n = 0; n < 32; n++) {
         pic[n] = qdev_get_gpio_in(dev, n);
     }
diff --git a/hw/vexpress.c b/hw/vexpress.c
index b615844..3596d1e 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -29,8 +29,12 @@
 #include "sysemu.h"
 #include "boards.h"
 #include "exec-memory.h"
+#include "blockdev.h"
+#include "flash.h"
 
 #define VEXPRESS_BOARD_ID 0x8e0
+#define VEXPRESS_FLASH_SIZE (64 * 1024 * 1024)
+#define VEXPRESS_FLASH_SECT_SIZE (256 * 1024)
 
 static struct arm_boot_info vexpress_binfo;
 
@@ -62,7 +66,6 @@
     VE_COMPACTFLASH,
     VE_CLCD,
     VE_NORFLASH0,
-    VE_NORFLASH0ALIAS,
     VE_NORFLASH1,
     VE_SRAM,
     VE_VIDEORAM,
@@ -104,9 +107,8 @@
 };
 
 static target_phys_addr_t motherboard_aseries_map[] = {
-    /* CS0: 0x00000000 .. 0x0c000000 */
-    [VE_NORFLASH0] = 0x00000000,
-    [VE_NORFLASH0ALIAS] = 0x08000000,
+    /* CS0: 0x08000000 .. 0x0c000000 */
+    [VE_NORFLASH0] = 0x08000000,
     /* CS4: 0x0c000000 .. 0x10000000 */
     [VE_NORFLASH1] = 0x0c000000,
     /* CS5: 0x10000000 .. 0x14000000 */
@@ -357,6 +359,7 @@
     qemu_irq pic[64];
     uint32_t proc_id;
     uint32_t sys_id;
+    DriveInfo *dinfo;
     ram_addr_t vram_size, sram_size;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *vram = g_new(MemoryRegion, 1);
@@ -412,9 +415,25 @@
 
     sysbus_create_simple("pl111", map[VE_CLCD], pic[14]);
 
-    /* VE_NORFLASH0: not modelled */
-    /* VE_NORFLASH0ALIAS: not modelled */
-    /* VE_NORFLASH1: not modelled */
+    dinfo = drive_get_next(IF_PFLASH);
+    if (!pflash_cfi01_register(map[VE_NORFLASH0], NULL, "vexpress.flash0",
+            VEXPRESS_FLASH_SIZE, dinfo ? dinfo->bdrv : NULL,
+            VEXPRESS_FLASH_SECT_SIZE,
+            VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4,
+            0x00, 0x89, 0x00, 0x18, 0)) {
+        fprintf(stderr, "vexpress: error registering flash 0.\n");
+        exit(1);
+    }
+
+    dinfo = drive_get_next(IF_PFLASH);
+    if (!pflash_cfi01_register(map[VE_NORFLASH1], NULL, "vexpress.flash1",
+            VEXPRESS_FLASH_SIZE, dinfo ? dinfo->bdrv : NULL,
+            VEXPRESS_FLASH_SECT_SIZE,
+            VEXPRESS_FLASH_SIZE / VEXPRESS_FLASH_SECT_SIZE, 4,
+            0x00, 0x89, 0x00, 0x18, 0)) {
+        fprintf(stderr, "vexpress: error registering flash 1.\n");
+        exit(1);
+    }
 
     sram_size = 0x2000000;
     memory_region_init_ram(sram, "vexpress.sram", sram_size);
diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
new file mode 100644
index 0000000..a1eeced
--- /dev/null
+++ b/hw/vfio_pci.c
@@ -0,0 +1,1864 @@
+/*
+ * vfio based device assignment support
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Based on qemu-kvm device-assignment:
+ *  Adapted for KVM by Qumranet.
+ *  Copyright (c) 2007, Neocleus, Alex Novik (alex@neocleus.com)
+ *  Copyright (c) 2007, Neocleus, Guy Zana (guy@neocleus.com)
+ *  Copyright (C) 2008, Qumranet, Amit Shah (amit.shah@qumranet.com)
+ *  Copyright (C) 2008, Red Hat, Amit Shah (amit.shah@redhat.com)
+ *  Copyright (C) 2008, IBM, Muli Ben-Yehuda (muli@il.ibm.com)
+ */
+
+#include <dirent.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <linux/vfio.h>
+
+#include "config.h"
+#include "event_notifier.h"
+#include "exec-memory.h"
+#include "kvm.h"
+#include "memory.h"
+#include "msi.h"
+#include "msix.h"
+#include "qemu-error.h"
+#include "range.h"
+#include "vfio_pci_int.h"
+
+/* #define DEBUG_VFIO */
+#ifdef DEBUG_VFIO
+#define DPRINTF(fmt, ...) \
+    do { fprintf(stderr, "vfio: " fmt, ## __VA_ARGS__); } while (0)
+#else
+#define DPRINTF(fmt, ...) \
+    do { } while (0)
+#endif
+
+#define MSIX_CAP_LENGTH 12
+
+static QLIST_HEAD(, VFIOContainer)
+    container_list = QLIST_HEAD_INITIALIZER(container_list);
+
+static QLIST_HEAD(, VFIOGroup)
+    group_list = QLIST_HEAD_INITIALIZER(group_list);
+
+static void vfio_disable_interrupts(VFIODevice *vdev);
+static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
+static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled);
+
+/*
+ * Common VFIO interrupt disable
+ */
+static void vfio_disable_irqindex(VFIODevice *vdev, int index)
+{
+    struct vfio_irq_set irq_set = {
+        .argsz = sizeof(irq_set),
+        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER,
+        .index = index,
+        .start = 0,
+        .count = 0,
+    };
+
+    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
+
+    vdev->interrupt = VFIO_INT_NONE;
+}
+
+/*
+ * INTx
+ */
+static void vfio_unmask_intx(VFIODevice *vdev)
+{
+    struct vfio_irq_set irq_set = {
+        .argsz = sizeof(irq_set),
+        .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
+        .index = VFIO_PCI_INTX_IRQ_INDEX,
+        .start = 0,
+        .count = 1,
+    };
+
+    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set);
+}
+
+static void vfio_intx_interrupt(void *opaque)
+{
+    VFIODevice *vdev = opaque;
+
+    if (!event_notifier_test_and_clear(&vdev->intx.interrupt)) {
+        return;
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) Pin %c\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function,
+            'A' + vdev->intx.pin);
+
+    vdev->intx.pending = true;
+    qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 1);
+}
+
+static void vfio_eoi(VFIODevice *vdev)
+{
+    if (!vdev->intx.pending) {
+        return;
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) EOI\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+    vdev->intx.pending = false;
+    qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
+    vfio_unmask_intx(vdev);
+}
+
+typedef struct QEMU_PACKED VFIOIRQSetFD {
+    struct vfio_irq_set irq_set;
+    int32_t fd;
+} VFIOIRQSetFD;
+
+static int vfio_enable_intx(VFIODevice *vdev)
+{
+    VFIOIRQSetFD irq_set_fd = {
+        .irq_set = {
+            .argsz = sizeof(irq_set_fd),
+            .flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER,
+            .index = VFIO_PCI_INTX_IRQ_INDEX,
+            .start = 0,
+            .count = 1,
+        },
+    };
+    uint8_t pin = vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1);
+    int ret;
+
+    if (vdev->intx.disabled || !pin) {
+        return 0;
+    }
+
+    vfio_disable_interrupts(vdev);
+
+    vdev->intx.pin = pin - 1; /* Pin A (1) -> irq[0] */
+    ret = event_notifier_init(&vdev->intx.interrupt, 0);
+    if (ret) {
+        error_report("vfio: Error: event_notifier_init failed\n");
+        return ret;
+    }
+
+    irq_set_fd.fd = event_notifier_get_fd(&vdev->intx.interrupt);
+    qemu_set_fd_handler(irq_set_fd.fd, vfio_intx_interrupt, NULL, vdev);
+
+    if (ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd)) {
+        error_report("vfio: Error: Failed to setup INTx fd: %m\n");
+        return -errno;
+    }
+
+    /*
+     * Disable mmaps so we can trap on BAR accesses.  We interpret any
+     * access as a response to an interrupt and unmask the physical
+     * device.  The device will re-assert if the interrupt is still
+     * pending.  We'll likely retrigger on the host multiple times per
+     * guest interrupt, but without EOI notification it's better than
+     * nothing.  Acceleration paths through KVM will avoid this.
+     */
+    vfio_mmap_set_enabled(vdev, false);
+
+    vdev->interrupt = VFIO_INT_INTx;
+
+    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+    return 0;
+}
+
+static void vfio_disable_intx(VFIODevice *vdev)
+{
+    int fd;
+
+    vfio_disable_irqindex(vdev, VFIO_PCI_INTX_IRQ_INDEX);
+    vdev->intx.pending = false;
+    qemu_set_irq(vdev->pdev.irq[vdev->intx.pin], 0);
+    vfio_mmap_set_enabled(vdev, true);
+
+    fd = event_notifier_get_fd(&vdev->intx.interrupt);
+    qemu_set_fd_handler(fd, NULL, NULL, vdev);
+    event_notifier_cleanup(&vdev->intx.interrupt);
+
+    vdev->interrupt = VFIO_INT_NONE;
+
+    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+}
+
+/*
+ * MSI/X
+ */
+static void vfio_msi_interrupt(void *opaque)
+{
+    VFIOMSIVector *vector = opaque;
+    VFIODevice *vdev = vector->vdev;
+    int nr = vector - vdev->msi_vectors;
+
+    if (!event_notifier_test_and_clear(&vector->interrupt)) {
+        return;
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) vector %d\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, nr);
+
+    if (vdev->interrupt == VFIO_INT_MSIX) {
+        msix_notify(&vdev->pdev, nr);
+    } else if (vdev->interrupt == VFIO_INT_MSI) {
+        msi_notify(&vdev->pdev, nr);
+    } else {
+        error_report("vfio: MSI interrupt receieved, but not enabled?\n");
+    }
+}
+
+static int vfio_enable_vectors(VFIODevice *vdev, bool msix)
+{
+    struct vfio_irq_set *irq_set;
+    int ret = 0, i, argsz;
+    int32_t *fds;
+
+    argsz = sizeof(*irq_set) + (vdev->nr_vectors * sizeof(*fds));
+
+    irq_set = g_malloc0(argsz);
+    irq_set->argsz = argsz;
+    irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
+    irq_set->index = msix ? VFIO_PCI_MSIX_IRQ_INDEX : VFIO_PCI_MSI_IRQ_INDEX;
+    irq_set->start = 0;
+    irq_set->count = vdev->nr_vectors;
+    fds = (int32_t *)&irq_set->data;
+
+    for (i = 0; i < vdev->nr_vectors; i++) {
+        if (!vdev->msi_vectors[i].use) {
+            fds[i] = -1;
+            continue;
+        }
+
+        fds[i] = event_notifier_get_fd(&vdev->msi_vectors[i].interrupt);
+    }
+
+    ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+
+    g_free(irq_set);
+
+    if (!ret) {
+        vdev->interrupt = msix ? VFIO_INT_MSIX : VFIO_INT_MSI;
+    }
+
+    return ret;
+}
+
+static int vfio_msix_vector_use(PCIDevice *pdev,
+                                unsigned int nr, MSIMessage msg)
+{
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    VFIOMSIVector *vector;
+    int ret;
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) vector %d used\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, nr);
+
+    if (vdev->interrupt != VFIO_INT_MSIX) {
+        vfio_disable_interrupts(vdev);
+    }
+
+    if (!vdev->msi_vectors) {
+        vdev->msi_vectors = g_malloc0(vdev->msix->entries *
+                                      sizeof(VFIOMSIVector));
+    }
+
+    vector = &vdev->msi_vectors[nr];
+    vector->vdev = vdev;
+    vector->use = true;
+
+    msix_vector_use(pdev, nr);
+
+    if (event_notifier_init(&vector->interrupt, 0)) {
+        error_report("vfio: Error: event_notifier_init failed\n");
+    }
+
+    /*
+     * Attempt to enable route through KVM irqchip,
+     * default to userspace handling if unavailable.
+     */
+    vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
+    if (vector->virq < 0 ||
+        kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
+                                       vector->virq) < 0) {
+        if (vector->virq >= 0) {
+            kvm_irqchip_release_virq(kvm_state, vector->virq);
+            vector->virq = -1;
+        }
+        qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+                            vfio_msi_interrupt, NULL, vector);
+    }
+
+    /*
+     * We don't want to have the host allocate all possible MSI vectors
+     * for a device if they're not in use, so we shutdown and incrementally
+     * increase them as needed.
+     */
+    if (vdev->nr_vectors < nr + 1) {
+        int i;
+
+        vfio_disable_irqindex(vdev, VFIO_PCI_MSIX_IRQ_INDEX);
+        vdev->nr_vectors = nr + 1;
+        ret = vfio_enable_vectors(vdev, true);
+        if (ret) {
+            error_report("vfio: failed to enable vectors, %d\n", ret);
+        }
+
+        /* We don't know if we've missed interrupts in the interim... */
+        for (i = 0; i < vdev->msix->entries; i++) {
+            if (vdev->msi_vectors[i].use) {
+                msix_notify(&vdev->pdev, i);
+            }
+        }
+    } else {
+        VFIOIRQSetFD irq_set_fd = {
+            .irq_set = {
+                .argsz = sizeof(irq_set_fd),
+                .flags = VFIO_IRQ_SET_DATA_EVENTFD |
+                         VFIO_IRQ_SET_ACTION_TRIGGER,
+                .index = VFIO_PCI_MSIX_IRQ_INDEX,
+                .start = nr,
+                .count = 1,
+            },
+            .fd = event_notifier_get_fd(&vector->interrupt),
+        };
+        ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd);
+        if (ret) {
+            error_report("vfio: failed to modify vector, %d\n", ret);
+        }
+
+        /*
+         * If we were connected to the hardware PBA we could skip this,
+         * until then, a spurious interrupt is better than starvation.
+         */
+        msix_notify(&vdev->pdev, nr);
+    }
+
+    return 0;
+}
+
+static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr)
+{
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    VFIOMSIVector *vector = &vdev->msi_vectors[nr];
+    VFIOIRQSetFD irq_set_fd = {
+        .irq_set = {
+            .argsz = sizeof(irq_set_fd),
+            .flags = VFIO_IRQ_SET_DATA_EVENTFD |
+                     VFIO_IRQ_SET_ACTION_TRIGGER,
+            .index = VFIO_PCI_MSIX_IRQ_INDEX,
+            .start = nr,
+            .count = 1,
+        },
+        .fd = -1,
+    };
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) vector %d released\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, nr);
+
+    /*
+     * XXX What's the right thing to do here?  This turns off the interrupt
+     * completely, but do we really just want to switch the interrupt to
+     * bouncing through userspace and let msix.c drop it?  Not sure.
+     */
+    msix_vector_unuse(pdev, nr);
+    ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, &irq_set_fd);
+
+    if (vector->virq < 0) {
+        qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+                            NULL, NULL, NULL);
+    } else {
+        kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
+                                          vector->virq);
+        kvm_irqchip_release_virq(kvm_state, vector->virq);
+        vector->virq = -1;
+    }
+
+    event_notifier_cleanup(&vector->interrupt);
+    vector->use = false;
+}
+
+/* TODO This should move to msi.c */
+static MSIMessage msi_get_msg(PCIDevice *pdev, unsigned int vector)
+{
+    uint16_t flags = pci_get_word(pdev->config + pdev->msi_cap + PCI_MSI_FLAGS);
+    bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
+    MSIMessage msg;
+
+    if (msi64bit) {
+        msg.address = pci_get_quad(pdev->config +
+                                   pdev->msi_cap + PCI_MSI_ADDRESS_LO);
+    } else {
+        msg.address = pci_get_long(pdev->config +
+                                   pdev->msi_cap + PCI_MSI_ADDRESS_LO);
+    }
+
+    msg.data = pci_get_word(pdev->config + pdev->msi_cap +
+                            (msi64bit ? PCI_MSI_DATA_64 : PCI_MSI_DATA_32));
+    msg.data += vector;
+
+    return msg;
+}
+
+/* So should this */
+static void msi_set_qsize(PCIDevice *pdev, uint8_t size)
+{
+    uint8_t *config = pdev->config + pdev->msi_cap;
+    uint16_t flags;
+
+    flags = pci_get_word(config + PCI_MSI_FLAGS);
+    flags = le16_to_cpu(flags);
+    flags &= ~PCI_MSI_FLAGS_QSIZE;
+    flags |= (size & 0x7) << 4;
+    flags = cpu_to_le16(flags);
+    pci_set_word(config + PCI_MSI_FLAGS, flags);
+}
+
+static void vfio_enable_msi(VFIODevice *vdev)
+{
+    int ret, i;
+
+    vfio_disable_interrupts(vdev);
+
+    vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev);
+retry:
+    vdev->msi_vectors = g_malloc0(vdev->nr_vectors * sizeof(VFIOMSIVector));
+
+    for (i = 0; i < vdev->nr_vectors; i++) {
+        MSIMessage msg;
+        VFIOMSIVector *vector = &vdev->msi_vectors[i];
+
+        vector->vdev = vdev;
+        vector->use = true;
+
+        if (event_notifier_init(&vector->interrupt, 0)) {
+            error_report("vfio: Error: event_notifier_init failed\n");
+        }
+
+        msg = msi_get_msg(&vdev->pdev, i);
+
+        /*
+         * Attempt to enable route through KVM irqchip,
+         * default to userspace handling if unavailable.
+         */
+        vector->virq = kvm_irqchip_add_msi_route(kvm_state, msg);
+        if (vector->virq < 0 ||
+            kvm_irqchip_add_irqfd_notifier(kvm_state, &vector->interrupt,
+                                           vector->virq) < 0) {
+            qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+                                vfio_msi_interrupt, NULL, vector);
+        }
+    }
+
+    ret = vfio_enable_vectors(vdev, false);
+    if (ret) {
+        if (ret < 0) {
+            error_report("vfio: Error: Failed to setup MSI fds: %m\n");
+        } else if (ret != vdev->nr_vectors) {
+            error_report("vfio: Error: Failed to enable %d "
+                         "MSI vectors, retry with %d\n", vdev->nr_vectors, ret);
+        }
+
+        for (i = 0; i < vdev->nr_vectors; i++) {
+            VFIOMSIVector *vector = &vdev->msi_vectors[i];
+            if (vector->virq >= 0) {
+                kvm_irqchip_remove_irqfd_notifier(kvm_state, &vector->interrupt,
+                                                  vector->virq);
+                kvm_irqchip_release_virq(kvm_state, vector->virq);
+                vector->virq = -1;
+            } else {
+                qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+                                    NULL, NULL, NULL);
+            }
+            event_notifier_cleanup(&vector->interrupt);
+        }
+
+        g_free(vdev->msi_vectors);
+
+        if (ret > 0 && ret != vdev->nr_vectors) {
+            vdev->nr_vectors = ret;
+            goto retry;
+        }
+        vdev->nr_vectors = 0;
+
+        return;
+    }
+
+    msi_set_qsize(&vdev->pdev, vdev->nr_vectors);
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) Enabled %d MSI vectors\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, vdev->nr_vectors);
+}
+
+static void vfio_disable_msi_x(VFIODevice *vdev, bool msix)
+{
+    int i;
+
+    vfio_disable_irqindex(vdev, msix ? VFIO_PCI_MSIX_IRQ_INDEX :
+                                       VFIO_PCI_MSI_IRQ_INDEX);
+
+    for (i = 0; i < vdev->nr_vectors; i++) {
+        VFIOMSIVector *vector = &vdev->msi_vectors[i];
+
+        if (!vector->use) {
+            continue;
+        }
+
+        if (vector->virq >= 0) {
+            kvm_irqchip_remove_irqfd_notifier(kvm_state,
+                                              &vector->interrupt, vector->virq);
+            kvm_irqchip_release_virq(kvm_state, vector->virq);
+            vector->virq = -1;
+        } else {
+            qemu_set_fd_handler(event_notifier_get_fd(&vector->interrupt),
+                                NULL, NULL, NULL);
+        }
+
+        if (msix) {
+            msix_vector_unuse(&vdev->pdev, i);
+        }
+
+        event_notifier_cleanup(&vector->interrupt);
+    }
+
+    g_free(vdev->msi_vectors);
+    vdev->msi_vectors = NULL;
+    vdev->nr_vectors = 0;
+
+    if (!msix) {
+        msi_set_qsize(&vdev->pdev, 0); /* Actually still means 1 vector */
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x, msi%s)\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, msix ? "x" : "");
+
+    vfio_enable_intx(vdev);
+}
+
+/*
+ * IO Port/MMIO - Beware of the endians, VFIO is always little endian
+ */
+static void vfio_bar_write(void *opaque, target_phys_addr_t addr,
+                           uint64_t data, unsigned size)
+{
+    VFIOBAR *bar = opaque;
+    union {
+        uint8_t byte;
+        uint16_t word;
+        uint32_t dword;
+        uint64_t qword;
+    } buf;
+
+    switch (size) {
+    case 1:
+        buf.byte = data;
+        break;
+    case 2:
+        buf.word = cpu_to_le16(data);
+        break;
+    case 4:
+        buf.dword = cpu_to_le32(data);
+        break;
+    default:
+        hw_error("vfio: unsupported write size, %d bytes\n", size);
+        break;
+    }
+
+    if (pwrite(bar->fd, &buf, size, bar->fd_offset + addr) != size) {
+        error_report("%s(,0x%"TARGET_PRIxPHYS", 0x%"PRIx64", %d) failed: %m\n",
+                     __func__, addr, data, size);
+    }
+
+    DPRINTF("%s(BAR%d+0x%"TARGET_PRIxPHYS", 0x%"PRIx64", %d)\n",
+            __func__, bar->nr, addr, data, size);
+
+    /*
+     * A read or write to a BAR always signals an INTx EOI.  This will
+     * do nothing if not pending (including not in INTx mode).  We assume
+     * that a BAR access is in response to an interrupt and that BAR
+     * accesses will service the interrupt.  Unfortunately, we don't know
+     * which access will service the interrupt, so we're potentially
+     * getting quite a few host interrupts per guest interrupt.
+     */
+    vfio_eoi(DO_UPCAST(VFIODevice, bars[bar->nr], bar));
+}
+
+static uint64_t vfio_bar_read(void *opaque,
+                              target_phys_addr_t addr, unsigned size)
+{
+    VFIOBAR *bar = opaque;
+    union {
+        uint8_t byte;
+        uint16_t word;
+        uint32_t dword;
+        uint64_t qword;
+    } buf;
+    uint64_t data = 0;
+
+    if (pread(bar->fd, &buf, size, bar->fd_offset + addr) != size) {
+        error_report("%s(,0x%"TARGET_PRIxPHYS", %d) failed: %m\n",
+                     __func__, addr, size);
+        return (uint64_t)-1;
+    }
+
+    switch (size) {
+    case 1:
+        data = buf.byte;
+        break;
+    case 2:
+        data = le16_to_cpu(buf.word);
+        break;
+    case 4:
+        data = le32_to_cpu(buf.dword);
+        break;
+    default:
+        hw_error("vfio: unsupported read size, %d bytes\n", size);
+        break;
+    }
+
+    DPRINTF("%s(BAR%d+0x%"TARGET_PRIxPHYS", %d) = 0x%"PRIx64"\n",
+            __func__, bar->nr, addr, size, data);
+
+    /* Same as write above */
+    vfio_eoi(DO_UPCAST(VFIODevice, bars[bar->nr], bar));
+
+    return data;
+}
+
+static const MemoryRegionOps vfio_bar_ops = {
+    .read = vfio_bar_read,
+    .write = vfio_bar_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+/*
+ * PCI config space
+ */
+static uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len)
+{
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    uint32_t val = 0;
+
+    /*
+     * We only need QEMU PCI config support for the ROM BAR, the MSI and MSIX
+     * capabilities, and the multifunction bit below.  We let VFIO handle
+     * virtualizing everything else.  Performance is not a concern here.
+     */
+    if (ranges_overlap(addr, len, PCI_ROM_ADDRESS, 4) ||
+        (pdev->cap_present & QEMU_PCI_CAP_MSIX &&
+         ranges_overlap(addr, len, pdev->msix_cap, MSIX_CAP_LENGTH)) ||
+        (pdev->cap_present & QEMU_PCI_CAP_MSI &&
+         ranges_overlap(addr, len, pdev->msi_cap, vdev->msi_cap_size))) {
+
+        val = pci_default_read_config(pdev, addr, len);
+    } else {
+        if (pread(vdev->fd, &val, len, vdev->config_offset + addr) != len) {
+            error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m\n",
+                         __func__, vdev->host.domain, vdev->host.bus,
+                         vdev->host.slot, vdev->host.function, addr, len);
+            return -errno;
+        }
+        val = le32_to_cpu(val);
+    }
+
+    /* Multifunction bit is virualized in QEMU */
+    if (unlikely(ranges_overlap(addr, len, PCI_HEADER_TYPE, 1))) {
+        uint32_t mask = PCI_HEADER_TYPE_MULTI_FUNCTION;
+
+        if (len == 4) {
+            mask <<= 16;
+        }
+
+        if (pdev->cap_present & QEMU_PCI_CAP_MULTIFUNCTION) {
+            val |= mask;
+        } else {
+            val &= ~mask;
+        }
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, addr, len, val);
+
+    return val;
+}
+
+static void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
+                                  uint32_t val, int len)
+{
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    uint32_t val_le = cpu_to_le32(val);
+
+    DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, 0x%x, len=0x%x)\n", __func__,
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, addr, val, len);
+
+    /* Write everything to VFIO, let it filter out what we can't write */
+    if (pwrite(vdev->fd, &val_le, len, vdev->config_offset + addr) != len) {
+        error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m\n",
+                     __func__, vdev->host.domain, vdev->host.bus,
+                     vdev->host.slot, vdev->host.function, addr, val, len);
+    }
+
+    /* Write standard header bits to emulation */
+    if (addr < PCI_CONFIG_HEADER_SIZE) {
+        pci_default_write_config(pdev, addr, val, len);
+        return;
+    }
+
+    /* MSI/MSI-X Enabling/Disabling */
+    if (pdev->cap_present & QEMU_PCI_CAP_MSI &&
+        ranges_overlap(addr, len, pdev->msi_cap, vdev->msi_cap_size)) {
+        int is_enabled, was_enabled = msi_enabled(pdev);
+
+        pci_default_write_config(pdev, addr, val, len);
+
+        is_enabled = msi_enabled(pdev);
+
+        if (!was_enabled && is_enabled) {
+            vfio_enable_msi(vdev);
+        } else if (was_enabled && !is_enabled) {
+            vfio_disable_msi_x(vdev, false);
+        }
+    }
+
+    if (pdev->cap_present & QEMU_PCI_CAP_MSIX &&
+        ranges_overlap(addr, len, pdev->msix_cap, MSIX_CAP_LENGTH)) {
+        int is_enabled, was_enabled = msix_enabled(pdev);
+
+        pci_default_write_config(pdev, addr, val, len);
+
+        is_enabled = msix_enabled(pdev);
+
+        if (!was_enabled && is_enabled) {
+            /* vfio_msix_vector_use handles this automatically */
+        } else if (was_enabled && !is_enabled) {
+            vfio_disable_msi_x(vdev, true);
+        }
+    }
+}
+
+/*
+ * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
+ */
+static int vfio_dma_map(VFIOContainer *container, target_phys_addr_t iova,
+                        ram_addr_t size, void *vaddr, bool readonly)
+{
+    struct vfio_iommu_type1_dma_map map = {
+        .argsz = sizeof(map),
+        .flags = VFIO_DMA_MAP_FLAG_READ,
+        .vaddr = (__u64)(intptr_t)vaddr,
+        .iova = iova,
+        .size = size,
+    };
+
+    if (!readonly) {
+        map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
+    }
+
+    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
+        DPRINTF("VFIO_MAP_DMA: %d\n", -errno);
+        return -errno;
+    }
+
+    return 0;
+}
+
+static int vfio_dma_unmap(VFIOContainer *container,
+                          target_phys_addr_t iova, ram_addr_t size)
+{
+    struct vfio_iommu_type1_dma_unmap unmap = {
+        .argsz = sizeof(unmap),
+        .flags = 0,
+        .iova = iova,
+        .size = size,
+    };
+
+    if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
+        DPRINTF("VFIO_UNMAP_DMA: %d\n", -errno);
+        return -errno;
+    }
+
+    return 0;
+}
+
+static void vfio_listener_dummy1(MemoryListener *listener)
+{
+    /* We don't do batching (begin/commit) or care about logging */
+}
+
+static void vfio_listener_dummy2(MemoryListener *listener,
+                                 MemoryRegionSection *section)
+{
+    /* We don't do logging or care about nops */
+}
+
+static void vfio_listener_dummy3(MemoryListener *listener,
+                                 MemoryRegionSection *section,
+                                 bool match_data, uint64_t data,
+                                 EventNotifier *e)
+{
+    /* We don't care about eventfds */
+}
+
+static bool vfio_listener_skipped_section(MemoryRegionSection *section)
+{
+    return !memory_region_is_ram(section->mr);
+}
+
+static void vfio_listener_region_add(MemoryListener *listener,
+                                     MemoryRegionSection *section)
+{
+    VFIOContainer *container = container_of(listener, VFIOContainer,
+                                            iommu_data.listener);
+    target_phys_addr_t iova, end;
+    void *vaddr;
+    int ret;
+
+    if (vfio_listener_skipped_section(section)) {
+        DPRINTF("vfio: SKIPPING region_add %"TARGET_PRIxPHYS" - %"PRIx64"\n",
+                section->offset_within_address_space,
+                section->offset_within_address_space + section->size - 1);
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region\n", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    end = (section->offset_within_address_space + section->size) &
+          TARGET_PAGE_MASK;
+
+    if (iova >= end) {
+        return;
+    }
+
+    vaddr = memory_region_get_ram_ptr(section->mr) +
+            section->offset_within_region +
+            (iova - section->offset_within_address_space);
+
+    DPRINTF("vfio: region_add %"TARGET_PRIxPHYS" - %"TARGET_PRIxPHYS" [%p]\n",
+            iova, end - 1, vaddr);
+
+    ret = vfio_dma_map(container, iova, end - iova, vaddr, section->readonly);
+    if (ret) {
+        error_report("vfio_dma_map(%p, 0x%"TARGET_PRIxPHYS", "
+                     "0x%"TARGET_PRIxPHYS", %p) = %d (%m)\n",
+                     container, iova, end - iova, vaddr, ret);
+    }
+}
+
+static void vfio_listener_region_del(MemoryListener *listener,
+                                     MemoryRegionSection *section)
+{
+    VFIOContainer *container = container_of(listener, VFIOContainer,
+                                            iommu_data.listener);
+    target_phys_addr_t iova, end;
+    int ret;
+
+    if (vfio_listener_skipped_section(section)) {
+        DPRINTF("vfio: SKIPPING region_del %"TARGET_PRIxPHYS" - %"PRIx64"\n",
+                section->offset_within_address_space,
+                section->offset_within_address_space + section->size - 1);
+        return;
+    }
+
+    if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) !=
+                 (section->offset_within_region & ~TARGET_PAGE_MASK))) {
+        error_report("%s received unaligned region\n", __func__);
+        return;
+    }
+
+    iova = TARGET_PAGE_ALIGN(section->offset_within_address_space);
+    end = (section->offset_within_address_space + section->size) &
+          TARGET_PAGE_MASK;
+
+    if (iova >= end) {
+        return;
+    }
+
+    DPRINTF("vfio: region_del %"TARGET_PRIxPHYS" - %"TARGET_PRIxPHYS"\n",
+            iova, end - 1);
+
+    ret = vfio_dma_unmap(container, iova, end - iova);
+    if (ret) {
+        error_report("vfio_dma_unmap(%p, 0x%"TARGET_PRIxPHYS", "
+                     "0x%"TARGET_PRIxPHYS") = %d (%m)\n",
+                     container, iova, end - iova, ret);
+    }
+}
+
+static MemoryListener vfio_memory_listener = {
+    .begin = vfio_listener_dummy1,
+    .commit = vfio_listener_dummy1,
+    .region_add = vfio_listener_region_add,
+    .region_del = vfio_listener_region_del,
+    .region_nop = vfio_listener_dummy2,
+    .log_start = vfio_listener_dummy2,
+    .log_stop = vfio_listener_dummy2,
+    .log_sync = vfio_listener_dummy2,
+    .log_global_start = vfio_listener_dummy1,
+    .log_global_stop = vfio_listener_dummy1,
+    .eventfd_add = vfio_listener_dummy3,
+    .eventfd_del = vfio_listener_dummy3,
+};
+
+static void vfio_listener_release(VFIOContainer *container)
+{
+    memory_listener_unregister(&container->iommu_data.listener);
+}
+
+/*
+ * Interrupt setup
+ */
+static void vfio_disable_interrupts(VFIODevice *vdev)
+{
+    switch (vdev->interrupt) {
+    case VFIO_INT_INTx:
+        vfio_disable_intx(vdev);
+        break;
+    case VFIO_INT_MSI:
+        vfio_disable_msi_x(vdev, false);
+        break;
+    case VFIO_INT_MSIX:
+        vfio_disable_msi_x(vdev, true);
+        break;
+    }
+}
+
+static int vfio_setup_msi(VFIODevice *vdev, int pos)
+{
+    uint16_t ctrl;
+    bool msi_64bit, msi_maskbit;
+    int ret, entries;
+
+    /*
+     * TODO: don't peek into msi_supported, let msi_init fail and
+     * check for ENOTSUP
+     */
+    if (!msi_supported) {
+        return 0;
+    }
+
+    if (pread(vdev->fd, &ctrl, sizeof(ctrl),
+              vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
+        return -errno;
+    }
+    ctrl = le16_to_cpu(ctrl);
+
+    msi_64bit = !!(ctrl & PCI_MSI_FLAGS_64BIT);
+    msi_maskbit = !!(ctrl & PCI_MSI_FLAGS_MASKBIT);
+    entries = 1 << ((ctrl & PCI_MSI_FLAGS_QMASK) >> 1);
+
+    DPRINTF("%04x:%02x:%02x.%x PCI MSI CAP @0x%x\n", vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function, pos);
+
+    ret = msi_init(&vdev->pdev, pos, entries, msi_64bit, msi_maskbit);
+    if (ret < 0) {
+        error_report("vfio: msi_init failed\n");
+        return ret;
+    }
+    vdev->msi_cap_size = 0xa + (msi_maskbit ? 0xa : 0) + (msi_64bit ? 0x4 : 0);
+
+    return 0;
+}
+
+/*
+ * We don't have any control over how pci_add_capability() inserts
+ * capabilities into the chain.  In order to setup MSI-X we need a
+ * MemoryRegion for the BAR.  In order to setup the BAR and not
+ * attempt to mmap the MSI-X table area, which VFIO won't allow, we
+ * need to first look for where the MSI-X table lives.  So we
+ * unfortunately split MSI-X setup across two functions.
+ */
+static int vfio_early_setup_msix(VFIODevice *vdev)
+{
+    uint8_t pos;
+    uint16_t ctrl;
+    uint32_t table, pba;
+
+    pos = pci_find_capability(&vdev->pdev, PCI_CAP_ID_MSIX);
+    if (!pos) {
+        return 0;
+    }
+
+    if (pread(vdev->fd, &ctrl, sizeof(ctrl),
+              vdev->config_offset + pos + PCI_CAP_FLAGS) != sizeof(ctrl)) {
+        return -errno;
+    }
+
+    if (pread(vdev->fd, &table, sizeof(table),
+              vdev->config_offset + pos + PCI_MSIX_TABLE) != sizeof(table)) {
+        return -errno;
+    }
+
+    if (pread(vdev->fd, &pba, sizeof(pba),
+              vdev->config_offset + pos + PCI_MSIX_PBA) != sizeof(pba)) {
+        return -errno;
+    }
+
+    ctrl = le16_to_cpu(ctrl);
+    table = le32_to_cpu(table);
+    pba = le32_to_cpu(pba);
+
+    vdev->msix = g_malloc0(sizeof(*(vdev->msix)));
+    vdev->msix->table_bar = table & PCI_MSIX_FLAGS_BIRMASK;
+    vdev->msix->table_offset = table & ~PCI_MSIX_FLAGS_BIRMASK;
+    vdev->msix->pba_bar = pba & PCI_MSIX_FLAGS_BIRMASK;
+    vdev->msix->pba_offset = pba & ~PCI_MSIX_FLAGS_BIRMASK;
+    vdev->msix->entries = (ctrl & PCI_MSIX_FLAGS_QSIZE) + 1;
+
+    DPRINTF("%04x:%02x:%02x.%x "
+            "PCI MSI-X CAP @0x%x, BAR %d, offset 0x%x, entries %d\n",
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function, pos, vdev->msix->table_bar,
+            vdev->msix->table_offset, vdev->msix->entries);
+
+    return 0;
+}
+
+static int vfio_setup_msix(VFIODevice *vdev, int pos)
+{
+    int ret;
+
+    /*
+     * TODO: don't peek into msi_supported, let msix_init fail and
+     * check for ENOTSUP
+     */
+    if (!msi_supported) {
+        return 0;
+    }
+
+    ret = msix_init(&vdev->pdev, vdev->msix->entries,
+                    &vdev->bars[vdev->msix->table_bar].mem,
+                    vdev->msix->table_bar, vdev->msix->table_offset,
+                    &vdev->bars[vdev->msix->pba_bar].mem,
+                    vdev->msix->pba_bar, vdev->msix->pba_offset, pos);
+    if (ret < 0) {
+        error_report("vfio: msix_init failed\n");
+        return ret;
+    }
+
+    ret = msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use,
+                                    vfio_msix_vector_release);
+    if (ret) {
+        error_report("vfio: msix_set_vector_notifiers failed %d\n", ret);
+        msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem,
+                    &vdev->bars[vdev->msix->pba_bar].mem);
+        return ret;
+    }
+
+    return 0;
+}
+
+static void vfio_teardown_msi(VFIODevice *vdev)
+{
+    msi_uninit(&vdev->pdev);
+
+    if (vdev->msix) {
+        /* FIXME: Why can't unset just silently do nothing?? */
+        if (vdev->pdev.msix_vector_use_notifier &&
+            vdev->pdev.msix_vector_release_notifier) {
+            msix_unset_vector_notifiers(&vdev->pdev);
+        }
+
+        msix_uninit(&vdev->pdev, &vdev->bars[vdev->msix->table_bar].mem,
+                    &vdev->bars[vdev->msix->pba_bar].mem);
+    }
+}
+
+/*
+ * Resource setup
+ */
+static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled)
+{
+    int i;
+
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        VFIOBAR *bar = &vdev->bars[i];
+
+        if (!bar->size) {
+            continue;
+        }
+
+        memory_region_set_enabled(&bar->mmap_mem, enabled);
+        if (vdev->msix && vdev->msix->table_bar == i) {
+            memory_region_set_enabled(&vdev->msix->mmap_mem, enabled);
+        }
+    }
+}
+
+static void vfio_unmap_bar(VFIODevice *vdev, int nr)
+{
+    VFIOBAR *bar = &vdev->bars[nr];
+
+    if (!bar->size) {
+        return;
+    }
+
+    memory_region_del_subregion(&bar->mem, &bar->mmap_mem);
+    munmap(bar->mmap, memory_region_size(&bar->mmap_mem));
+
+    if (vdev->msix && vdev->msix->table_bar == nr) {
+        memory_region_del_subregion(&bar->mem, &vdev->msix->mmap_mem);
+        munmap(vdev->msix->mmap, memory_region_size(&vdev->msix->mmap_mem));
+    }
+
+    memory_region_destroy(&bar->mem);
+}
+
+static int vfio_mmap_bar(VFIOBAR *bar, MemoryRegion *mem, MemoryRegion *submem,
+                         void **map, size_t size, off_t offset,
+                         const char *name)
+{
+    int ret = 0;
+
+    if (size && bar->flags & VFIO_REGION_INFO_FLAG_MMAP) {
+        int prot = 0;
+
+        if (bar->flags & VFIO_REGION_INFO_FLAG_READ) {
+            prot |= PROT_READ;
+        }
+
+        if (bar->flags & VFIO_REGION_INFO_FLAG_WRITE) {
+            prot |= PROT_WRITE;
+        }
+
+        *map = mmap(NULL, size, prot, MAP_SHARED,
+                    bar->fd, bar->fd_offset + offset);
+        if (*map == MAP_FAILED) {
+            *map = NULL;
+            ret = -errno;
+            goto empty_region;
+        }
+
+        memory_region_init_ram_ptr(submem, name, size, *map);
+    } else {
+empty_region:
+        /* Create a zero sized sub-region to make cleanup easy. */
+        memory_region_init(submem, name, 0);
+    }
+
+    memory_region_add_subregion(mem, offset, submem);
+
+    return ret;
+}
+
+static void vfio_map_bar(VFIODevice *vdev, int nr)
+{
+    VFIOBAR *bar = &vdev->bars[nr];
+    unsigned size = bar->size;
+    char name[64];
+    uint32_t pci_bar;
+    uint8_t type;
+    int ret;
+
+    /* Skip both unimplemented BARs and the upper half of 64bit BARS. */
+    if (!size) {
+        return;
+    }
+
+    snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x BAR %d",
+             vdev->host.domain, vdev->host.bus, vdev->host.slot,
+             vdev->host.function, nr);
+
+    /* Determine what type of BAR this is for registration */
+    ret = pread(vdev->fd, &pci_bar, sizeof(pci_bar),
+                vdev->config_offset + PCI_BASE_ADDRESS_0 + (4 * nr));
+    if (ret != sizeof(pci_bar)) {
+        error_report("vfio: Failed to read BAR %d (%m)\n", nr);
+        return;
+    }
+
+    pci_bar = le32_to_cpu(pci_bar);
+    type = pci_bar & (pci_bar & PCI_BASE_ADDRESS_SPACE_IO ?
+           ~PCI_BASE_ADDRESS_IO_MASK : ~PCI_BASE_ADDRESS_MEM_MASK);
+
+    /* A "slow" read/write mapping underlies all BARs */
+    memory_region_init_io(&bar->mem, &vfio_bar_ops, bar, name, size);
+    pci_register_bar(&vdev->pdev, nr, type, &bar->mem);
+
+    /*
+     * We can't mmap areas overlapping the MSIX vector table, so we
+     * potentially insert a direct-mapped subregion before and after it.
+     */
+    if (vdev->msix && vdev->msix->table_bar == nr) {
+        size = vdev->msix->table_offset & TARGET_PAGE_MASK;
+    }
+
+    strncat(name, " mmap", sizeof(name) - strlen(name) - 1);
+    if (vfio_mmap_bar(bar, &bar->mem,
+                      &bar->mmap_mem, &bar->mmap, size, 0, name)) {
+        error_report("%s unsupported. Performance may be slow\n", name);
+    }
+
+    if (vdev->msix && vdev->msix->table_bar == nr) {
+        unsigned start;
+
+        start = TARGET_PAGE_ALIGN(vdev->msix->table_offset +
+                                  (vdev->msix->entries * PCI_MSIX_ENTRY_SIZE));
+
+        size = start < bar->size ? bar->size - start : 0;
+        strncat(name, " msix-hi", sizeof(name) - strlen(name) - 1);
+        /* VFIOMSIXInfo contains another MemoryRegion for this mapping */
+        if (vfio_mmap_bar(bar, &bar->mem, &vdev->msix->mmap_mem,
+                          &vdev->msix->mmap, size, start, name)) {
+            error_report("%s unsupported. Performance may be slow\n", name);
+        }
+    }
+}
+
+static void vfio_map_bars(VFIODevice *vdev)
+{
+    int i;
+
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        vfio_map_bar(vdev, i);
+    }
+}
+
+static void vfio_unmap_bars(VFIODevice *vdev)
+{
+    int i;
+
+    for (i = 0; i < PCI_ROM_SLOT; i++) {
+        vfio_unmap_bar(vdev, i);
+    }
+}
+
+/*
+ * General setup
+ */
+static uint8_t vfio_std_cap_max_size(PCIDevice *pdev, uint8_t pos)
+{
+    uint8_t tmp, next = 0xff;
+
+    for (tmp = pdev->config[PCI_CAPABILITY_LIST]; tmp;
+         tmp = pdev->config[tmp + 1]) {
+        if (tmp > pos && tmp < next) {
+            next = tmp;
+        }
+    }
+
+    return next - pos;
+}
+
+static int vfio_add_std_cap(VFIODevice *vdev, uint8_t pos)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    uint8_t cap_id, next, size;
+    int ret;
+
+    cap_id = pdev->config[pos];
+    next = pdev->config[pos + 1];
+
+    /*
+     * If it becomes important to configure capabilities to their actual
+     * size, use this as the default when it's something we don't recognize.
+     * Since QEMU doesn't actually handle many of the config accesses,
+     * exact size doesn't seem worthwhile.
+     */
+    size = vfio_std_cap_max_size(pdev, pos);
+
+    /*
+     * pci_add_capability always inserts the new capability at the head
+     * of the chain.  Therefore to end up with a chain that matches the
+     * physical device, we insert from the end by making this recursive.
+     * This is also why we pre-caclulate size above as cached config space
+     * will be changed as we unwind the stack.
+     */
+    if (next) {
+        ret = vfio_add_std_cap(vdev, next);
+        if (ret) {
+            return ret;
+        }
+    } else {
+        pdev->config[PCI_CAPABILITY_LIST] = 0; /* Begin the rebuild */
+    }
+
+    switch (cap_id) {
+    case PCI_CAP_ID_MSI:
+        ret = vfio_setup_msi(vdev, pos);
+        break;
+    case PCI_CAP_ID_MSIX:
+        ret = vfio_setup_msix(vdev, pos);
+        break;
+    default:
+        ret = pci_add_capability(pdev, cap_id, pos, size);
+        break;
+    }
+
+    if (ret < 0) {
+        error_report("vfio: %04x:%02x:%02x.%x Error adding PCI capability "
+                     "0x%x[0x%x]@0x%x: %d\n", vdev->host.domain,
+                     vdev->host.bus, vdev->host.slot, vdev->host.function,
+                     cap_id, size, pos, ret);
+        return ret;
+    }
+
+    return 0;
+}
+
+static int vfio_add_capabilities(VFIODevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+
+    if (!(pdev->config[PCI_STATUS] & PCI_STATUS_CAP_LIST) ||
+        !pdev->config[PCI_CAPABILITY_LIST]) {
+        return 0; /* Nothing to add */
+    }
+
+    return vfio_add_std_cap(vdev, pdev->config[PCI_CAPABILITY_LIST]);
+}
+
+static int vfio_load_rom(VFIODevice *vdev)
+{
+    uint64_t size = vdev->rom_size;
+    char name[32];
+    off_t off = 0, voff = vdev->rom_offset;
+    ssize_t bytes;
+    void *ptr;
+
+    /* If loading ROM from file, pci handles it */
+    if (vdev->pdev.romfile || !vdev->pdev.rom_bar || !size) {
+        return 0;
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
+
+    snprintf(name, sizeof(name), "vfio[%04x:%02x:%02x.%x].rom",
+             vdev->host.domain, vdev->host.bus, vdev->host.slot,
+             vdev->host.function);
+    memory_region_init_ram(&vdev->pdev.rom, name, size);
+    ptr = memory_region_get_ram_ptr(&vdev->pdev.rom);
+    memset(ptr, 0xff, size);
+
+    while (size) {
+        bytes = pread(vdev->fd, ptr + off, size, voff + off);
+        if (bytes == 0) {
+            break; /* expect that we could get back less than the ROM BAR */
+        } else if (bytes > 0) {
+            off += bytes;
+            size -= bytes;
+        } else {
+            if (errno == EINTR || errno == EAGAIN) {
+                continue;
+            }
+            error_report("vfio: Error reading device ROM: %m\n");
+            memory_region_destroy(&vdev->pdev.rom);
+            return -errno;
+        }
+    }
+
+    pci_register_bar(&vdev->pdev, PCI_ROM_SLOT, 0, &vdev->pdev.rom);
+    vdev->pdev.has_rom = true;
+    return 0;
+}
+
+static int vfio_connect_container(VFIOGroup *group)
+{
+    VFIOContainer *container;
+    int ret, fd;
+
+    if (group->container) {
+        return 0;
+    }
+
+    QLIST_FOREACH(container, &container_list, next) {
+        if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
+            group->container = container;
+            QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+            return 0;
+        }
+    }
+
+    fd = qemu_open("/dev/vfio/vfio", O_RDWR);
+    if (fd < 0) {
+        error_report("vfio: failed to open /dev/vfio/vfio: %m\n");
+        return -errno;
+    }
+
+    ret = ioctl(fd, VFIO_GET_API_VERSION);
+    if (ret != VFIO_API_VERSION) {
+        error_report("vfio: supported vfio version: %d, "
+                     "reported version: %d\n", VFIO_API_VERSION, ret);
+        close(fd);
+        return -EINVAL;
+    }
+
+    container = g_malloc0(sizeof(*container));
+    container->fd = fd;
+
+    if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU)) {
+        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
+        if (ret) {
+            error_report("vfio: failed to set group container: %m\n");
+            g_free(container);
+            close(fd);
+            return -errno;
+        }
+
+        ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU);
+        if (ret) {
+            error_report("vfio: failed to set iommu for container: %m\n");
+            g_free(container);
+            close(fd);
+            return -errno;
+        }
+
+        container->iommu_data.listener = vfio_memory_listener;
+        container->iommu_data.release = vfio_listener_release;
+
+        memory_listener_register(&container->iommu_data.listener,
+                                 get_system_memory());
+    } else {
+        error_report("vfio: No available IOMMU models\n");
+        g_free(container);
+        close(fd);
+        return -EINVAL;
+    }
+
+    QLIST_INIT(&container->group_list);
+    QLIST_INSERT_HEAD(&container_list, container, next);
+
+    group->container = container;
+    QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+
+    return 0;
+}
+
+static void vfio_disconnect_container(VFIOGroup *group)
+{
+    VFIOContainer *container = group->container;
+
+    if (ioctl(group->fd, VFIO_GROUP_UNSET_CONTAINER, &container->fd)) {
+        error_report("vfio: error disconnecting group %d from container\n",
+                     group->groupid);
+    }
+
+    QLIST_REMOVE(group, container_next);
+    group->container = NULL;
+
+    if (QLIST_EMPTY(&container->group_list)) {
+        if (container->iommu_data.release) {
+            container->iommu_data.release(container);
+        }
+        QLIST_REMOVE(container, next);
+        DPRINTF("vfio_disconnect_container: close container->fd\n");
+        close(container->fd);
+        g_free(container);
+    }
+}
+
+static VFIOGroup *vfio_get_group(int groupid)
+{
+    VFIOGroup *group;
+    char path[32];
+    struct vfio_group_status status = { .argsz = sizeof(status) };
+
+    QLIST_FOREACH(group, &group_list, next) {
+        if (group->groupid == groupid) {
+            return group;
+        }
+    }
+
+    group = g_malloc0(sizeof(*group));
+
+    snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
+    group->fd = qemu_open(path, O_RDWR);
+    if (group->fd < 0) {
+        error_report("vfio: error opening %s: %m\n", path);
+        g_free(group);
+        return NULL;
+    }
+
+    if (ioctl(group->fd, VFIO_GROUP_GET_STATUS, &status)) {
+        error_report("vfio: error getting group status: %m\n");
+        close(group->fd);
+        g_free(group);
+        return NULL;
+    }
+
+    if (!(status.flags & VFIO_GROUP_FLAGS_VIABLE)) {
+        error_report("vfio: error, group %d is not viable, please ensure "
+                     "all devices within the iommu_group are bound to their "
+                     "vfio bus driver.\n", groupid);
+        close(group->fd);
+        g_free(group);
+        return NULL;
+    }
+
+    group->groupid = groupid;
+    QLIST_INIT(&group->device_list);
+
+    if (vfio_connect_container(group)) {
+        error_report("vfio: failed to setup container for group %d\n", groupid);
+        close(group->fd);
+        g_free(group);
+        return NULL;
+    }
+
+    QLIST_INSERT_HEAD(&group_list, group, next);
+
+    return group;
+}
+
+static void vfio_put_group(VFIOGroup *group)
+{
+    if (!QLIST_EMPTY(&group->device_list)) {
+        return;
+    }
+
+    vfio_disconnect_container(group);
+    QLIST_REMOVE(group, next);
+    DPRINTF("vfio_put_group: close group->fd\n");
+    close(group->fd);
+    g_free(group);
+}
+
+static int vfio_get_device(VFIOGroup *group, const char *name, VFIODevice *vdev)
+{
+    struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
+    struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+    int ret, i;
+
+    ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
+    if (ret < 0) {
+        error_report("vfio: error getting device %s from group %d: %m\n",
+                     name, group->groupid);
+        error_report("Verify all devices in group %d are bound to vfio-pci "
+                     "or pci-stub and not already in use\n", group->groupid);
+        return ret;
+    }
+
+    vdev->fd = ret;
+    vdev->group = group;
+    QLIST_INSERT_HEAD(&group->device_list, vdev, next);
+
+    /* Sanity check device */
+    ret = ioctl(vdev->fd, VFIO_DEVICE_GET_INFO, &dev_info);
+    if (ret) {
+        error_report("vfio: error getting device info: %m\n");
+        goto error;
+    }
+
+    DPRINTF("Device %s flags: %u, regions: %u, irgs: %u\n", name,
+            dev_info.flags, dev_info.num_regions, dev_info.num_irqs);
+
+    if (!(dev_info.flags & VFIO_DEVICE_FLAGS_PCI)) {
+        error_report("vfio: Um, this isn't a PCI device\n");
+        goto error;
+    }
+
+    vdev->reset_works = !!(dev_info.flags & VFIO_DEVICE_FLAGS_RESET);
+    if (!vdev->reset_works) {
+        error_report("Warning, device %s does not support reset\n", name);
+    }
+
+    if (dev_info.num_regions != VFIO_PCI_NUM_REGIONS) {
+        error_report("vfio: unexpected number of io regions %u\n",
+                     dev_info.num_regions);
+        goto error;
+    }
+
+    if (dev_info.num_irqs != VFIO_PCI_NUM_IRQS) {
+        error_report("vfio: unexpected number of irqs %u\n", dev_info.num_irqs);
+        goto error;
+    }
+
+    for (i = VFIO_PCI_BAR0_REGION_INDEX; i < VFIO_PCI_ROM_REGION_INDEX; i++) {
+        reg_info.index = i;
+
+        ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+        if (ret) {
+            error_report("vfio: Error getting region %d info: %m\n", i);
+            goto error;
+        }
+
+        DPRINTF("Device %s region %d:\n", name, i);
+        DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
+                (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
+                (unsigned long)reg_info.flags);
+
+        vdev->bars[i].flags = reg_info.flags;
+        vdev->bars[i].size = reg_info.size;
+        vdev->bars[i].fd_offset = reg_info.offset;
+        vdev->bars[i].fd = vdev->fd;
+        vdev->bars[i].nr = i;
+    }
+
+    reg_info.index = VFIO_PCI_ROM_REGION_INDEX;
+
+    ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+    if (ret) {
+        error_report("vfio: Error getting ROM info: %m\n");
+        goto error;
+    }
+
+    DPRINTF("Device %s ROM:\n", name);
+    DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
+            (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
+            (unsigned long)reg_info.flags);
+
+    vdev->rom_size = reg_info.size;
+    vdev->rom_offset = reg_info.offset;
+
+    reg_info.index = VFIO_PCI_CONFIG_REGION_INDEX;
+
+    ret = ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, &reg_info);
+    if (ret) {
+        error_report("vfio: Error getting config info: %m\n");
+        goto error;
+    }
+
+    DPRINTF("Device %s config:\n", name);
+    DPRINTF("  size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n",
+            (unsigned long)reg_info.size, (unsigned long)reg_info.offset,
+            (unsigned long)reg_info.flags);
+
+    vdev->config_size = reg_info.size;
+    vdev->config_offset = reg_info.offset;
+
+error:
+    if (ret) {
+        QLIST_REMOVE(vdev, next);
+        vdev->group = NULL;
+        close(vdev->fd);
+    }
+    return ret;
+}
+
+static void vfio_put_device(VFIODevice *vdev)
+{
+    QLIST_REMOVE(vdev, next);
+    vdev->group = NULL;
+    DPRINTF("vfio_put_device: close vdev->fd\n");
+    close(vdev->fd);
+    if (vdev->msix) {
+        g_free(vdev->msix);
+        vdev->msix = NULL;
+    }
+}
+
+static int vfio_initfn(PCIDevice *pdev)
+{
+    VFIODevice *pvdev, *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    VFIOGroup *group;
+    char path[PATH_MAX], iommu_group_path[PATH_MAX], *group_name;
+    ssize_t len;
+    struct stat st;
+    int groupid;
+    int ret;
+
+    /* Check that the host device exists */
+    snprintf(path, sizeof(path),
+             "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/",
+             vdev->host.domain, vdev->host.bus, vdev->host.slot,
+             vdev->host.function);
+    if (stat(path, &st) < 0) {
+        error_report("vfio: error: no such host device: %s\n", path);
+        return -errno;
+    }
+
+    strncat(path, "iommu_group", sizeof(path) - strlen(path) - 1);
+
+    len = readlink(path, iommu_group_path, PATH_MAX);
+    if (len <= 0) {
+        error_report("vfio: error no iommu_group for device\n");
+        return -errno;
+    }
+
+    iommu_group_path[len] = 0;
+    group_name = basename(iommu_group_path);
+
+    if (sscanf(group_name, "%d", &groupid) != 1) {
+        error_report("vfio: error reading %s: %m\n", path);
+        return -errno;
+    }
+
+    DPRINTF("%s(%04x:%02x:%02x.%x) group %d\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function, groupid);
+
+    group = vfio_get_group(groupid);
+    if (!group) {
+        error_report("vfio: failed to get group %d\n", groupid);
+        return -ENOENT;
+    }
+
+    snprintf(path, sizeof(path), "%04x:%02x:%02x.%01x",
+            vdev->host.domain, vdev->host.bus, vdev->host.slot,
+            vdev->host.function);
+
+    QLIST_FOREACH(pvdev, &group->device_list, next) {
+        if (pvdev->host.domain == vdev->host.domain &&
+            pvdev->host.bus == vdev->host.bus &&
+            pvdev->host.slot == vdev->host.slot &&
+            pvdev->host.function == vdev->host.function) {
+
+            error_report("vfio: error: device %s is already attached\n", path);
+            vfio_put_group(group);
+            return -EBUSY;
+        }
+    }
+
+    ret = vfio_get_device(group, path, vdev);
+    if (ret) {
+        error_report("vfio: failed to get device %s\n", path);
+        vfio_put_group(group);
+        return ret;
+    }
+
+    /* Get a copy of config space */
+    ret = pread(vdev->fd, vdev->pdev.config,
+                MIN(pci_config_size(&vdev->pdev), vdev->config_size),
+                vdev->config_offset);
+    if (ret < (int)MIN(pci_config_size(&vdev->pdev), vdev->config_size)) {
+        ret = ret < 0 ? -errno : -EFAULT;
+        error_report("vfio: Failed to read device config space\n");
+        goto out_put;
+    }
+
+    /*
+     * Clear host resource mapping info.  If we choose not to register a
+     * BAR, such as might be the case with the option ROM, we can get
+     * confusing, unwritable, residual addresses from the host here.
+     */
+    memset(&vdev->pdev.config[PCI_BASE_ADDRESS_0], 0, 24);
+    memset(&vdev->pdev.config[PCI_ROM_ADDRESS], 0, 4);
+
+    vfio_load_rom(vdev);
+
+    ret = vfio_early_setup_msix(vdev);
+    if (ret) {
+        goto out_put;
+    }
+
+    vfio_map_bars(vdev);
+
+    ret = vfio_add_capabilities(vdev);
+    if (ret) {
+        goto out_teardown;
+    }
+
+    if (vfio_pci_read_config(&vdev->pdev, PCI_INTERRUPT_PIN, 1)) {
+        if (vdev->intx.intx && strcmp(vdev->intx.intx, "off")) {
+            error_report("vfio: Unknown option x-intx=%s, "
+                         "valid options: \"off\".\n", vdev->intx.intx);
+            ret = -EINVAL;
+            goto out_teardown;
+        }
+
+        if (vdev->intx.intx && !strcmp(vdev->intx.intx, "off")) {
+            vdev->intx.disabled = true;
+        }
+
+        ret = vfio_enable_intx(vdev);
+        if (ret) {
+            goto out_teardown;
+        }
+    }
+
+    return 0;
+
+out_teardown:
+    pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+    vfio_teardown_msi(vdev);
+    vfio_unmap_bars(vdev);
+out_put:
+    vfio_put_device(vdev);
+    vfio_put_group(group);
+    return ret;
+}
+
+static void vfio_exitfn(PCIDevice *pdev)
+{
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+    VFIOGroup *group = vdev->group;
+
+    pci_device_set_intx_routing_notifier(&vdev->pdev, NULL);
+    vfio_disable_interrupts(vdev);
+    vfio_teardown_msi(vdev);
+    vfio_unmap_bars(vdev);
+    vfio_put_device(vdev);
+    vfio_put_group(group);
+}
+
+static void vfio_pci_reset(DeviceState *dev)
+{
+    PCIDevice *pdev = DO_UPCAST(PCIDevice, qdev, dev);
+    VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev);
+
+    if (!vdev->reset_works) {
+        return;
+    }
+
+    if (ioctl(vdev->fd, VFIO_DEVICE_RESET)) {
+        error_report("vfio: Error unable to reset physical device "
+                     "(%04x:%02x:%02x.%x): %m\n", vdev->host.domain,
+                     vdev->host.bus, vdev->host.slot, vdev->host.function);
+    }
+}
+
+static Property vfio_pci_dev_properties[] = {
+    DEFINE_PROP_PCI_HOST_DEVADDR("host", VFIODevice, host),
+    DEFINE_PROP_STRING("x-intx", VFIODevice, intx.intx),
+    /*
+     * TODO - support passed fds... is this necessary?
+     * DEFINE_PROP_STRING("vfiofd", VFIODevice, vfiofd_name),
+     * DEFINE_PROP_STRING("vfiogroupfd, VFIODevice, vfiogroupfd_name),
+     */
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+
+static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
+
+    dc->reset = vfio_pci_reset;
+    dc->props = vfio_pci_dev_properties;
+    pdc->init = vfio_initfn;
+    pdc->exit = vfio_exitfn;
+    pdc->config_read = vfio_pci_read_config;
+    pdc->config_write = vfio_pci_write_config;
+}
+
+static const TypeInfo vfio_pci_dev_info = {
+    .name = "vfio-pci",
+    .parent = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(VFIODevice),
+    .class_init = vfio_pci_dev_class_init,
+};
+
+static void register_vfio_pci_dev_type(void)
+{
+    type_register_static(&vfio_pci_dev_info);
+}
+
+type_init(register_vfio_pci_dev_type)
diff --git a/hw/vfio_pci_int.h b/hw/vfio_pci_int.h
new file mode 100644
index 0000000..3812d8d
--- /dev/null
+++ b/hw/vfio_pci_int.h
@@ -0,0 +1,114 @@
+/*
+ * vfio based device assignment support
+ *
+ * Copyright Red Hat, Inc. 2012
+ *
+ * Authors:
+ *  Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ */
+
+#ifndef HW_VFIO_PCI_INT_H
+#define HW_VFIO_PCI_INT_H
+
+#include "qemu-common.h"
+#include "qemu-queue.h"
+#include "pci.h"
+#include "event_notifier.h"
+
+typedef struct VFIOBAR {
+    off_t fd_offset; /* offset of BAR within device fd */
+    int fd; /* device fd, allows us to pass VFIOBAR as opaque data */
+    MemoryRegion mem; /* slow, read/write access */
+    MemoryRegion mmap_mem; /* direct mapped access */
+    void *mmap;
+    size_t size;
+    uint32_t flags; /* VFIO region flags (rd/wr/mmap) */
+    uint8_t nr; /* cache the BAR number for debug */
+} VFIOBAR;
+
+typedef struct VFIOINTx {
+    bool pending; /* interrupt pending */
+    bool kvm_accel; /* set when QEMU bypass through KVM enabled */
+    uint8_t pin; /* which pin to pull for qemu_set_irq */
+    EventNotifier interrupt; /* eventfd triggered on interrupt */
+    EventNotifier unmask; /* eventfd for unmask on QEMU bypass */
+    PCIINTxRoute route; /* routing info for QEMU bypass */
+    bool disabled;
+    char *intx;
+} VFIOINTx;
+
+struct VFIODevice;
+
+typedef struct VFIOMSIVector {
+    EventNotifier interrupt; /* eventfd triggered on interrupt */
+    struct VFIODevice *vdev; /* back pointer to device */
+    int virq; /* KVM irqchip route for QEMU bypass */
+    bool use;
+} VFIOMSIVector;
+
+enum {
+    VFIO_INT_NONE = 0,
+    VFIO_INT_INTx = 1,
+    VFIO_INT_MSI  = 2,
+    VFIO_INT_MSIX = 3,
+};
+
+struct VFIOGroup;
+
+typedef struct VFIOContainer {
+    int fd; /* /dev/vfio/vfio, empowered by the attached groups */
+    struct {
+        /* enable abstraction to support various iommu backends */
+        union {
+            MemoryListener listener; /* Used by type1 iommu */
+        };
+        void (*release)(struct VFIOContainer *);
+    } iommu_data;
+    QLIST_HEAD(, VFIOGroup) group_list;
+    QLIST_ENTRY(VFIOContainer) next;
+} VFIOContainer;
+
+/* Cache of MSI-X setup plus extra mmap and memory region for split BAR map */
+typedef struct VFIOMSIXInfo {
+    uint8_t table_bar;
+    uint8_t pba_bar;
+    uint16_t entries;
+    uint32_t table_offset;
+    uint32_t pba_offset;
+    MemoryRegion mmap_mem;
+    void *mmap;
+} VFIOMSIXInfo;
+
+typedef struct VFIODevice {
+    PCIDevice pdev;
+    int fd;
+    VFIOINTx intx;
+    unsigned int config_size;
+    off_t config_offset; /* Offset of config space region within device fd */
+    unsigned int rom_size;
+    off_t rom_offset; /* Offset of ROM region within device fd */
+    int msi_cap_size;
+    VFIOMSIVector *msi_vectors;
+    VFIOMSIXInfo *msix;
+    int nr_vectors; /* Number of MSI/MSIX vectors currently in use */
+    int interrupt; /* Current interrupt type */
+    VFIOBAR bars[PCI_NUM_REGIONS - 1]; /* No ROM */
+    PCIHostDeviceAddress host;
+    QLIST_ENTRY(VFIODevice) next;
+    struct VFIOGroup *group;
+    bool reset_works;
+} VFIODevice;
+
+typedef struct VFIOGroup {
+    int fd;
+    int groupid;
+    VFIOContainer *container;
+    QLIST_HEAD(, VFIODevice) device_list;
+    QLIST_ENTRY(VFIOGroup) next;
+    QLIST_ENTRY(VFIOGroup) container_next;
+} VFIOGroup;
+
+#endif /* HW_VFIO_PCI_INT_H */
diff --git a/hw/vga-isa-mm.c b/hw/vga-isa-mm.c
index 44ae7d9..306e6ba 100644
--- a/hw/vga-isa-mm.c
+++ b/hw/vga-isa-mm.c
@@ -107,6 +107,7 @@
     s_ioport_ctrl = g_malloc(sizeof(*s_ioport_ctrl));
     memory_region_init_io(s_ioport_ctrl, &vga_mm_ctrl_ops, s,
                           "vga-mm-ctrl", 0x100000);
+    memory_region_set_flush_coalesced(s_ioport_ctrl);
 
     vga_io_memory = g_malloc(sizeof(*vga_io_memory));
     /* XXX: endianness? */
diff --git a/hw/vga-pci.c b/hw/vga-pci.c
index 9abbada..996d47f 100644
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c
@@ -24,7 +24,6 @@
 #include "hw.h"
 #include "console.h"
 #include "pci.h"
-#include "vga-pci.h"
 #include "vga_int.h"
 #include "pixel_ops.h"
 #include "qemu-timer.h"
@@ -47,7 +46,7 @@
     }
 };
 
-static int pci_vga_initfn(PCIDevice *dev)
+static int pci_std_vga_initfn(PCIDevice *dev)
 {
      PCIVGAState *d = DO_UPCAST(PCIVGAState, dev, dev);
      VGACommonState *s = &d->vga;
@@ -70,11 +69,6 @@
      return 0;
 }
 
-DeviceState *pci_vga_init(PCIBus *bus)
-{
-    return &pci_create_simple(bus, -1, "VGA")->qdev;
-}
-
 static Property vga_pci_properties[] = {
     DEFINE_PROP_UINT32("vgamem_mb", PCIVGAState, vga.vram_size_mb, 16),
     DEFINE_PROP_END_OF_LIST(),
@@ -86,7 +80,7 @@
     PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
 
     k->no_hotplug = 1;
-    k->init = pci_vga_initfn;
+    k->init = pci_std_vga_initfn;
     k->romfile = "vgabios-stdvga.bin";
     k->vendor_id = PCI_VENDOR_ID_QEMU;
     k->device_id = PCI_DEVICE_ID_QEMU_VGA;
diff --git a/hw/vga-pci.h b/hw/vga-pci.h
deleted file mode 100644
index 49abf13..0000000
--- a/hw/vga-pci.h
+++ /dev/null
@@ -1,12 +0,0 @@
-#ifndef VGA_PCI_H
-#define VGA_PCI_H
-
-#include "qemu-common.h"
-
-/* vga-pci.c */
-DeviceState *pci_vga_init(PCIBus *bus);
-
-/* cirrus_vga.c */
-DeviceState *pci_cirrus_vga_init(PCIBus *bus);
-
-#endif
diff --git a/hw/vga.c b/hw/vga.c
index 80299ea..afaef0d 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -361,6 +361,8 @@
     VGACommonState *s = opaque;
     int val, index;
 
+    qemu_flush_coalesced_mmio_buffer();
+
     if (vga_ioport_invalid(s, addr)) {
         val = 0xff;
     } else {
@@ -453,6 +455,8 @@
     VGACommonState *s = opaque;
     int index;
 
+    qemu_flush_coalesced_mmio_buffer();
+
     /* check port range access depending on color/monochrome mode */
     if (vga_ioport_invalid(s, addr)) {
         return;
@@ -2338,6 +2342,7 @@
     vga_mem = g_malloc(sizeof(*vga_mem));
     memory_region_init_io(vga_mem, &vga_mem_ops, s,
                           "vga-lowmem", 0x20000);
+    memory_region_set_flush_coalesced(vga_mem);
 
     return vga_mem;
 }
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index 6f6d172..e25cc96 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -64,31 +64,22 @@
 }
 
 static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
-    int is_read)
+    bool is_read)
 {
-    BlockErrorAction action = bdrv_get_on_error(req->dev->bs, is_read);
+    BlockErrorAction action = bdrv_get_error_action(req->dev->bs, is_read, error);
     VirtIOBlock *s = req->dev;
 
-    if (action == BLOCK_ERR_IGNORE) {
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read);
-        return 0;
-    }
-
-    if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC)
-            || action == BLOCK_ERR_STOP_ANY) {
+    if (action == BDRV_ACTION_STOP) {
         req->next = s->rq;
         s->rq = req;
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read);
-        vm_stop(RUN_STATE_IO_ERROR);
-        bdrv_iostatus_set_err(s->bs, error);
-    } else {
+    } else if (action == BDRV_ACTION_REPORT) {
         virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
         bdrv_acct_done(s->bs, &req->acct);
         g_free(req);
-        bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read);
     }
 
-    return 1;
+    bdrv_error_action(s->bs, action, is_read, error);
+    return action != BDRV_ACTION_IGNORE;
 }
 
 static void virtio_blk_rw_complete(void *opaque, int ret)
@@ -98,7 +89,7 @@
     trace_virtio_blk_rw_complete(req, ret);
 
     if (ret) {
-        int is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT);
+        bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT);
         if (virtio_blk_handle_rw_error(req, -ret, is_read))
             return;
     }
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index b1998b2..247d7be 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -447,10 +447,6 @@
     VirtIONet *n = to_virtio_net(vdev);
 
     qemu_flush_queued_packets(&n->nic->nc);
-
-    /* We now have RX buffers, signal to the IO thread to break out of the
-     * select to re-poll the tap file descriptor */
-    qemu_notify_event();
 }
 
 static int virtio_net_can_receive(NetClientState *nc)
@@ -694,7 +690,7 @@
 {
     VirtIONet *n = DO_UPCAST(NICState, nc, nc)->opaque;
 
-    virtqueue_push(n->tx_vq, &n->async_tx.elem, n->async_tx.len);
+    virtqueue_push(n->tx_vq, &n->async_tx.elem, 0);
     virtio_notify(&n->vdev, n->tx_vq);
 
     n->async_tx.elem.out_num = n->async_tx.len = 0;
@@ -758,7 +754,7 @@
 
         len += ret;
 
-        virtqueue_push(vq, &elem, len);
+        virtqueue_push(vq, &elem, 0);
         virtio_notify(&n->vdev, vq);
 
         if (++num_packets >= n->tx_burst) {
diff --git a/hw/virtio-serial-bus.c b/hw/virtio-serial-bus.c
index 82073f5..d20bd8b 100644
--- a/hw/virtio-serial-bus.c
+++ b/hw/virtio-serial-bus.c
@@ -287,6 +287,7 @@
 size_t virtio_serial_guest_ready(VirtIOSerialPort *port)
 {
     VirtQueue *vq = port->ivq;
+    unsigned int bytes;
 
     if (!virtio_queue_ready(vq) ||
         !(port->vser->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK) ||
@@ -296,14 +297,8 @@
     if (use_multiport(port->vser) && !port->guest_connected) {
         return 0;
     }
-
-    if (virtqueue_avail_bytes(vq, 4096, 0)) {
-        return 4096;
-    }
-    if (virtqueue_avail_bytes(vq, 1, 0)) {
-        return 1;
-    }
-    return 0;
+    virtqueue_get_avail_bytes(vq, &bytes, NULL);
+    return bytes;
 }
 
 static void flush_queued_data_bh(void *opaque)
diff --git a/hw/virtio.c b/hw/virtio.c
index 209c763..6821092 100644
--- a/hw/virtio.c
+++ b/hw/virtio.c
@@ -241,7 +241,7 @@
                                   elem->in_sg[i].iov_len,
                                   1, size);
 
-        offset += elem->in_sg[i].iov_len;
+        offset += size;
     }
 
     for (i = 0; i < elem->out_num; i++)
@@ -335,10 +335,11 @@
     return next;
 }
 
-int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes)
+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+                               unsigned int *out_bytes)
 {
     unsigned int idx;
-    int total_bufs, in_total, out_total;
+    unsigned int total_bufs, in_total, out_total;
 
     idx = vq->last_avail_idx;
 
@@ -380,13 +381,9 @@
             }
 
             if (vring_desc_flags(desc_pa, i) & VRING_DESC_F_WRITE) {
-                if (in_bytes > 0 &&
-                    (in_total += vring_desc_len(desc_pa, i)) >= in_bytes)
-                    return 1;
+                in_total += vring_desc_len(desc_pa, i);
             } else {
-                if (out_bytes > 0 &&
-                    (out_total += vring_desc_len(desc_pa, i)) >= out_bytes)
-                    return 1;
+                out_total += vring_desc_len(desc_pa, i);
             }
         } while ((i = virtqueue_next_desc(desc_pa, i, max)) != max);
 
@@ -395,7 +392,24 @@
         else
             total_bufs++;
     }
+    if (in_bytes) {
+        *in_bytes = in_total;
+    }
+    if (out_bytes) {
+        *out_bytes = out_total;
+    }
+}
 
+int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
+                          unsigned int out_bytes)
+{
+    unsigned int in_total, out_total;
+
+    virtqueue_get_avail_bytes(vq, &in_total, &out_total);
+    if ((in_bytes && in_bytes < in_total)
+        || (out_bytes && out_bytes < out_total)) {
+        return 1;
+    }
     return 0;
 }
 
diff --git a/hw/virtio.h b/hw/virtio.h
index 7a4f564..80de375 100644
--- a/hw/virtio.h
+++ b/hw/virtio.h
@@ -147,7 +147,10 @@
 void virtqueue_map_sg(struct iovec *sg, target_phys_addr_t *addr,
     size_t num_sg, int is_write);
 int virtqueue_pop(VirtQueue *vq, VirtQueueElement *elem);
-int virtqueue_avail_bytes(VirtQueue *vq, int in_bytes, int out_bytes);
+int virtqueue_avail_bytes(VirtQueue *vq, unsigned int in_bytes,
+                          unsigned int out_bytes);
+void virtqueue_get_avail_bytes(VirtQueue *vq, unsigned int *in_bytes,
+                               unsigned int *out_bytes);
 
 void virtio_notify(VirtIODevice *vdev, VirtQueue *vq);
 
diff --git a/hw/vmware_vga.c b/hw/vmware_vga.c
index b68e883..6f7074e 100644
--- a/hw/vmware_vga.c
+++ b/hw/vmware_vga.c
@@ -25,7 +25,6 @@
 #include "loader.h"
 #include "console.h"
 #include "pci.h"
-#include "vmware_vga.h"
 
 #undef VERBOSE
 #define HW_RECT_ACCEL
@@ -1186,6 +1185,7 @@
 
     memory_region_init_io(&s->io_bar, &vmsvga_io_ops, &s->chip,
                           "vmsvga-io", 0x10);
+    memory_region_set_flush_coalesced(&s->io_bar);
     pci_register_bar(&s->card, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->io_bar);
 
     vmsvga_init(&s->chip, pci_address_space(dev),
diff --git a/hw/vmware_vga.h b/hw/vmware_vga.h
deleted file mode 100644
index 000fbdd..0000000
--- a/hw/vmware_vga.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef QEMU_VMWARE_VGA_H
-#define QEMU_VMWARE_VGA_H
-
-#include "qemu-common.h"
-
-/* vmware_vga.c */
-static inline DeviceState *pci_vmsvga_init(PCIBus *bus)
-{
-    PCIDevice *dev;
-
-    dev = pci_create_simple(bus, -1, "vmware-svga");
-    return &dev->qdev;
-}
-
-#endif
diff --git a/hw/xen-host-pci-device.c b/hw/xen-host-pci-device.c
index e7ff680..743b37b 100644
--- a/hw/xen-host-pci-device.c
+++ b/hw/xen-host-pci-device.c
@@ -47,13 +47,13 @@
 }
 
 
-/* This size should be enough to read the first 7 lines of a ressource file */
-#define XEN_HOST_PCI_RESSOURCE_BUFFER_SIZE 400
+/* This size should be enough to read the first 7 lines of a resource file */
+#define XEN_HOST_PCI_RESOURCE_BUFFER_SIZE 400
 static int xen_host_pci_get_resource(XenHostPCIDevice *d)
 {
     int i, rc, fd;
     char path[PATH_MAX];
-    char buf[XEN_HOST_PCI_RESSOURCE_BUFFER_SIZE];
+    char buf[XEN_HOST_PCI_RESOURCE_BUFFER_SIZE];
     unsigned long long start, end, flags, size;
     char *endptr, *s;
     uint8_t type;
diff --git a/hw/xen.h b/hw/xen.h
index e5926b7..d14e92d 100644
--- a/hw/xen.h
+++ b/hw/xen.h
@@ -48,6 +48,7 @@
 struct MemoryRegion;
 void xen_ram_alloc(ram_addr_t ram_addr, ram_addr_t size,
                    struct MemoryRegion *mr);
+void xen_modified_memory(ram_addr_t start, ram_addr_t length);
 #endif
 
 struct MemoryRegion;
diff --git a/hw/xen_domainbuild.c b/hw/xen_domainbuild.c
index a6a12e5..db14974 100644
--- a/hw/xen_domainbuild.c
+++ b/hw/xen_domainbuild.c
@@ -153,7 +153,6 @@
 
 quit:
     qemu_system_shutdown_request();
-    return;
 }
 
 static int xen_domain_watcher(void)
diff --git a/hw/xen_nic.c b/hw/xen_nic.c
index 8b79bfb..cf7d559 100644
--- a/hw/xen_nic.c
+++ b/hw/xen_nic.c
@@ -415,6 +415,7 @@
 {
     struct XenNetDev *netdev = container_of(xendev, struct XenNetDev, xendev);
     net_tx_packets(netdev);
+    qemu_flush_queued_packets(&netdev->nic->nc);
 }
 
 static int net_free(struct XenDevice *xendev)
diff --git a/hw/xen_platform.c b/hw/xen_platform.c
index 0d6c2ff..956dbfe 100644
--- a/hw/xen_platform.c
+++ b/hw/xen_platform.c
@@ -85,8 +85,10 @@
 
 static void unplug_nic(PCIBus *b, PCIDevice *d, void *o)
 {
+    /* We have to ignore passthrough devices */
     if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
-            PCI_CLASS_NETWORK_ETHERNET) {
+            PCI_CLASS_NETWORK_ETHERNET
+            && strcmp(d->name, "xen-pci-passthrough") != 0) {
         qdev_free(&d->qdev);
     }
 }
@@ -98,8 +100,10 @@
 
 static void unplug_disks(PCIBus *b, PCIDevice *d, void *o)
 {
+    /* We have to ignore passthrough devices */
     if (pci_get_word(d->config + PCI_CLASS_DEVICE) ==
-            PCI_CLASS_STORAGE_IDE) {
+            PCI_CLASS_STORAGE_IDE
+            && strcmp(d->name, "xen-pci-passthrough") != 0) {
         qdev_unplug(&(d->qdev), NULL);
     }
 }
diff --git a/hw/xen_pt.c b/hw/xen_pt.c
index 307119a..838bcea 100644
--- a/hw/xen_pt.c
+++ b/hw/xen_pt.c
@@ -410,14 +410,17 @@
             if (r->type & XEN_HOST_PCI_REGION_TYPE_PREFETCH) {
                 type |= PCI_BASE_ADDRESS_MEM_PREFETCH;
             }
+            if (r->type & XEN_HOST_PCI_REGION_TYPE_MEM_64) {
+                type |= PCI_BASE_ADDRESS_MEM_TYPE_64;
+            }
         }
 
         memory_region_init_io(&s->bar[i], &ops, &s->dev,
                               "xen-pci-pt-bar", r->size);
         pci_register_bar(&s->dev, i, type, &s->bar[i]);
 
-        XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%08"PRIx64
-                   " base_addr=0x%08"PRIx64" type: %#x)\n",
+        XEN_PT_LOG(&s->dev, "IO region %i registered (size=0x%lx"PRIx64
+                   " base_addr=0x%lx"PRIx64" type: %#x)\n",
                    i, r->size, r->base_addr, type);
     }
 
diff --git a/hw/xen_pt.h b/hw/xen_pt.h
index 41904ec..112477a 100644
--- a/hw/xen_pt.h
+++ b/hw/xen_pt.h
@@ -96,7 +96,7 @@
  * - do NOT use ALL F for init_val, otherwise the tbl will not be registered.
  */
 
-/* emulated register infomation */
+/* emulated register information */
 struct XenPTRegInfo {
     uint32_t offset;
     uint32_t size;
@@ -140,7 +140,7 @@
     (XenPCIPassthroughState *, const XenPTRegGroupInfo *,
      uint32_t base_offset, uint8_t *size);
 
-/* emulated register group infomation */
+/* emulated register group information */
 struct XenPTRegGroupInfo {
     uint8_t grp_id;
     XenPTRegisterGroupType grp_type;
diff --git a/hw/xen_pt_config_init.c b/hw/xen_pt_config_init.c
index 00eb3d9..0a5f82c 100644
--- a/hw/xen_pt_config_init.c
+++ b/hw/xen_pt_config_init.c
@@ -342,6 +342,23 @@
 #define XEN_PT_BAR_IO_RO_MASK     0x00000003  /* BAR ReadOnly mask(I/O) */
 #define XEN_PT_BAR_IO_EMU_MASK    0xFFFFFFFC  /* BAR emul mask(I/O) */
 
+static bool is_64bit_bar(PCIIORegion *r)
+{
+    return !!(r->type & PCI_BASE_ADDRESS_MEM_TYPE_64);
+}
+
+static uint64_t xen_pt_get_bar_size(PCIIORegion *r)
+{
+    if (is_64bit_bar(r)) {
+        uint64_t size64;
+        size64 = (r + 1)->size;
+        size64 <<= 32;
+        size64 += r->size;
+        return size64;
+    }
+    return r->size;
+}
+
 static XenPTBarFlag xen_pt_bar_reg_parse(XenPCIPassthroughState *s,
                                          XenPTRegInfo *reg)
 {
@@ -366,7 +383,7 @@
 
     /* check unused BAR */
     r = &d->io_regions[index];
-    if (r->size == 0) {
+    if (!xen_pt_get_bar_size(r)) {
         return XEN_PT_BAR_FLAG_UNUSED;
     }
 
@@ -481,7 +498,12 @@
     switch (s->bases[index].bar_flag) {
     case XEN_PT_BAR_FLAG_MEM:
         bar_emu_mask = XEN_PT_BAR_MEM_EMU_MASK;
-        bar_ro_mask = XEN_PT_BAR_MEM_RO_MASK | (r_size - 1);
+        if (!r_size) {
+            /* low 32 bits mask for 64 bit bars */
+            bar_ro_mask = XEN_PT_BAR_ALLF;
+        } else {
+            bar_ro_mask = XEN_PT_BAR_MEM_RO_MASK | (r_size - 1);
+        }
         break;
     case XEN_PT_BAR_FLAG_IO:
         bar_emu_mask = XEN_PT_BAR_IO_EMU_MASK;
@@ -489,7 +511,7 @@
         break;
     case XEN_PT_BAR_FLAG_UPPER:
         bar_emu_mask = XEN_PT_BAR_ALLF;
-        bar_ro_mask = 0;    /* all upper 32bit are R/W */
+        bar_ro_mask = r_size ? r_size - 1 : 0;
         break;
     default:
         break;
@@ -501,22 +523,13 @@
 
     /* check whether we need to update the virtual region address or not */
     switch (s->bases[index].bar_flag) {
+    case XEN_PT_BAR_FLAG_UPPER:
     case XEN_PT_BAR_FLAG_MEM:
         /* nothing to do */
         break;
     case XEN_PT_BAR_FLAG_IO:
         /* nothing to do */
         break;
-    case XEN_PT_BAR_FLAG_UPPER:
-        if (cfg_entry->data) {
-            if (cfg_entry->data != (XEN_PT_BAR_ALLF & ~bar_ro_mask)) {
-                XEN_PT_WARN(d, "Guest attempt to set high MMIO Base Address. "
-                            "Ignore mapping. "
-                            "(offset: 0x%02x, high address: 0x%08x)\n",
-                            reg->offset, cfg_entry->data);
-            }
-        }
-        break;
     default:
         break;
     }
@@ -562,7 +575,7 @@
     return 0;
 }
 
-/* Header Type0 reg static infomation table */
+/* Header Type0 reg static information table */
 static XenPTRegInfo xen_pt_emu_reg_header0[] = {
     /* Vendor ID reg */
     {
@@ -753,7 +766,7 @@
  * Vital Product Data Capability
  */
 
-/* Vital Product Data Capability Structure reg static infomation table */
+/* Vital Product Data Capability Structure reg static information table */
 static XenPTRegInfo xen_pt_emu_reg_vpd[] = {
     {
         .offset     = PCI_CAP_LIST_NEXT,
@@ -775,7 +788,7 @@
  * Vendor Specific Capability
  */
 
-/* Vendor Specific Capability Structure reg static infomation table */
+/* Vendor Specific Capability Structure reg static information table */
 static XenPTRegInfo xen_pt_emu_reg_vendor[] = {
     {
         .offset     = PCI_CAP_LIST_NEXT,
@@ -866,7 +879,7 @@
     return 0;
 }
 
-/* PCI Express Capability Structure reg static infomation table */
+/* PCI Express Capability Structure reg static information table */
 static XenPTRegInfo xen_pt_emu_reg_pcie[] = {
     /* Next Pointer reg */
     {
@@ -981,7 +994,7 @@
     return 0;
 }
 
-/* Power Management Capability reg static infomation table */
+/* Power Management Capability reg static information table */
 static XenPTRegInfo xen_pt_emu_reg_pm[] = {
     /* Next Pointer reg */
     {
@@ -1259,7 +1272,7 @@
     return 0;
 }
 
-/* MSI Capability Structure reg static infomation table */
+/* MSI Capability Structure reg static information table */
 static XenPTRegInfo xen_pt_emu_reg_msi[] = {
     /* Next Pointer reg */
     {
@@ -1396,7 +1409,7 @@
     return 0;
 }
 
-/* MSI-X Capability Structure reg static infomation table */
+/* MSI-X Capability Structure reg static information table */
 static XenPTRegInfo xen_pt_emu_reg_msix[] = {
     /* Next Pointer reg */
     {
diff --git a/hw/xics.c b/hw/xics.c
index b674771..ce88aa7 100644
--- a/hw/xics.c
+++ b/hw/xics.c
@@ -165,11 +165,12 @@
     int server;
     uint8_t priority;
     uint8_t saved_priority;
-    enum xics_irq_type type;
-    int asserted:1;
-    int sent:1;
-    int rejected:1;
-    int masked_pending:1;
+#define XICS_STATUS_ASSERTED           0x1
+#define XICS_STATUS_SENT               0x2
+#define XICS_STATUS_REJECTED           0x4
+#define XICS_STATUS_MASKED_PENDING     0x8
+    uint8_t status;
+    bool lsi;
 };
 
 struct ics_state {
@@ -191,8 +192,8 @@
     struct ics_irq_state *irq = ics->irqs + srcno;
 
     /* FIXME: filter by server#? */
-    if (irq->rejected) {
-        irq->rejected = 0;
+    if (irq->status & XICS_STATUS_REJECTED) {
+        irq->status &= ~XICS_STATUS_REJECTED;
         if (irq->priority != 0xff) {
             icp_irq(ics->icp, irq->server, srcno + ics->offset,
                     irq->priority);
@@ -204,8 +205,10 @@
 {
     struct ics_irq_state *irq = ics->irqs + srcno;
 
-    if ((irq->priority != 0xff) && irq->asserted && !irq->sent) {
-        irq->sent = 1;
+    if ((irq->priority != 0xff)
+        && (irq->status & XICS_STATUS_ASSERTED)
+        && !(irq->status & XICS_STATUS_SENT)) {
+        irq->status |= XICS_STATUS_SENT;
         icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
     }
 }
@@ -216,7 +219,7 @@
 
     if (val) {
         if (irq->priority == 0xff) {
-            irq->masked_pending = 1;
+            irq->status |= XICS_STATUS_MASKED_PENDING;
             /* masked pending */ ;
         } else  {
             icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
@@ -228,7 +231,11 @@
 {
     struct ics_irq_state *irq = ics->irqs + srcno;
 
-    irq->asserted = val;
+    if (val) {
+        irq->status |= XICS_STATUS_ASSERTED;
+    } else {
+        irq->status &= ~XICS_STATUS_ASSERTED;
+    }
     resend_lsi(ics, srcno);
 }
 
@@ -237,7 +244,7 @@
     struct ics_state *ics = (struct ics_state *)opaque;
     struct ics_irq_state *irq = ics->irqs + srcno;
 
-    if (irq->type == XICS_LSI) {
+    if (irq->lsi) {
         set_irq_lsi(ics, srcno, val);
     } else {
         set_irq_msi(ics, srcno, val);
@@ -248,11 +255,12 @@
 {
     struct ics_irq_state *irq = ics->irqs + srcno;
 
-    if (!irq->masked_pending || (irq->priority == 0xff)) {
+    if (!(irq->status & XICS_STATUS_MASKED_PENDING)
+        || (irq->priority == 0xff)) {
         return;
     }
 
-    irq->masked_pending = 0;
+    irq->status &= ~XICS_STATUS_MASKED_PENDING;
     icp_irq(ics->icp, irq->server, srcno + ics->offset, irq->priority);
 }
 
@@ -262,15 +270,16 @@
 }
 
 static void ics_write_xive(struct ics_state *ics, int nr, int server,
-                           uint8_t priority)
+                           uint8_t priority, uint8_t saved_priority)
 {
     int srcno = nr - ics->offset;
     struct ics_irq_state *irq = ics->irqs + srcno;
 
     irq->server = server;
     irq->priority = priority;
+    irq->saved_priority = saved_priority;
 
-    if (irq->type == XICS_LSI) {
+    if (irq->lsi) {
         write_xive_lsi(ics, srcno);
     } else {
         write_xive_msi(ics, srcno);
@@ -281,8 +290,8 @@
 {
     struct ics_irq_state *irq = ics->irqs + nr - ics->offset;
 
-    irq->rejected = 1; /* Irrelevant but harmless for LSI */
-    irq->sent = 0; /* Irrelevant but harmless for MSI */
+    irq->status |= XICS_STATUS_REJECTED; /* Irrelevant but harmless for LSI */
+    irq->status &= ~XICS_STATUS_SENT; /* Irrelevant but harmless for MSI */
 }
 
 static void ics_resend(struct ics_state *ics)
@@ -293,7 +302,7 @@
         struct ics_irq_state *irq = ics->irqs + i;
 
         /* FIXME: filter by server#? */
-        if (irq->type == XICS_LSI) {
+        if (irq->lsi) {
             resend_lsi(ics, i);
         } else {
             resend_msi(ics, i);
@@ -306,8 +315,8 @@
     int srcno = nr - ics->offset;
     struct ics_irq_state *irq = ics->irqs + srcno;
 
-    if (irq->type == XICS_LSI) {
-        irq->sent = 0;
+    if (irq->lsi) {
+        irq->status &= ~XICS_STATUS_SENT;
     }
 }
 
@@ -325,14 +334,12 @@
     return icp->ics->qirqs[irq - icp->ics->offset];
 }
 
-void xics_set_irq_type(struct icp_state *icp, int irq,
-                       enum xics_irq_type type)
+void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi)
 {
     assert((irq >= icp->ics->offset)
            && (irq < (icp->ics->offset + icp->ics->nr_irqs)));
-    assert((type == XICS_MSI) || (type == XICS_LSI));
 
-    icp->ics->irqs[irq - icp->ics->offset].type = type;
+    icp->ics->irqs[irq - icp->ics->offset].lsi = lsi;
 }
 
 static target_ulong h_cppr(CPUPPCState *env, sPAPREnvironment *spapr,
@@ -399,7 +406,7 @@
         return;
     }
 
-    ics_write_xive(ics, nr, server, priority);
+    ics_write_xive(ics, nr, server, priority, priority);
 
     rtas_st(rets, 0, 0); /* Success */
 }
@@ -447,14 +454,8 @@
         return;
     }
 
-    /* This is a NOP for now, since the described PAPR semantics don't
-     * seem to gel with what Linux does */
-#if 0
-    struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
-
-    irq->saved_priority = irq->priority;
-    ics_write_xive_msi(xics, nr, irq->server, 0xff);
-#endif
+    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server, 0xff,
+                   ics->irqs[nr - ics->offset].priority);
 
     rtas_st(rets, 0, 0); /* Success */
 }
@@ -478,22 +479,40 @@
         return;
     }
 
-    /* This is a NOP for now, since the described PAPR semantics don't
-     * seem to gel with what Linux does */
-#if 0
-    struct ics_irq_state *irq = xics->irqs + (nr - xics->offset);
-
-    ics_write_xive_msi(xics, nr, irq->server, irq->saved_priority);
-#endif
+    ics_write_xive(ics, nr, ics->irqs[nr - ics->offset].server,
+                   ics->irqs[nr - ics->offset].saved_priority,
+                   ics->irqs[nr - ics->offset].saved_priority);
 
     rtas_st(rets, 0, 0); /* Success */
 }
 
+static void xics_reset(void *opaque)
+{
+    struct icp_state *icp = (struct icp_state *)opaque;
+    struct ics_state *ics = icp->ics;
+    int i;
+
+    for (i = 0; i < icp->nr_servers; i++) {
+        icp->ss[i].xirr = 0;
+        icp->ss[i].pending_priority = 0;
+        icp->ss[i].mfrr = 0xff;
+        /* Make all outputs are deasserted */
+        qemu_set_irq(icp->ss[i].output, 0);
+    }
+
+    for (i = 0; i < ics->nr_irqs; i++) {
+        /* Reset everything *except* the type */
+        ics->irqs[i].server = 0;
+        ics->irqs[i].status = 0;
+        ics->irqs[i].priority = 0xff;
+        ics->irqs[i].saved_priority = 0xff;
+    }
+}
+
 struct icp_state *xics_system_init(int nr_irqs)
 {
     CPUPPCState *env;
     int max_server_num;
-    int i;
     struct icp_state *icp;
     struct ics_state *ics;
 
@@ -508,10 +527,6 @@
     icp->nr_servers = max_server_num + 1;
     icp->ss = g_malloc0(icp->nr_servers*sizeof(struct icp_server_state));
 
-    for (i = 0; i < icp->nr_servers; i++) {
-        icp->ss[i].mfrr = 0xff;
-    }
-
     for (env = first_cpu; env != NULL; env = env->next_cpu) {
         struct icp_server_state *ss = &icp->ss[env->cpu_index];
 
@@ -539,11 +554,6 @@
     icp->ics = ics;
     ics->icp = icp;
 
-    for (i = 0; i < nr_irqs; i++) {
-        ics->irqs[i].priority = 0xff;
-        ics->irqs[i].saved_priority = 0xff;
-    }
-
     ics->qirqs = qemu_allocate_irqs(ics_set_irq, ics, nr_irqs);
 
     spapr_register_hypercall(H_CPPR, h_cppr);
@@ -556,5 +566,7 @@
     spapr_rtas_register("ibm,int-off", rtas_int_off);
     spapr_rtas_register("ibm,int-on", rtas_int_on);
 
+    qemu_register_reset(xics_reset, icp);
+
     return icp;
 }
diff --git a/hw/xics.h b/hw/xics.h
index 99b96ac..6817268 100644
--- a/hw/xics.h
+++ b/hw/xics.h
@@ -31,14 +31,8 @@
 
 struct icp_state;
 
-enum xics_irq_type {
-    XICS_MSI,        /* Message-signalled (edge) interrupt */
-    XICS_LSI,        /* Level-signalled interrupt */
-};
-
 qemu_irq xics_get_qirq(struct icp_state *icp, int irq);
-void xics_set_irq_type(struct icp_state *icp, int irq,
-                       enum xics_irq_type type);
+void xics_set_irq_type(struct icp_state *icp, int irq, bool lsi);
 
 struct icp_state *xics_system_init(int nr_irqs);
 
diff --git a/hw/xilinx.h b/hw/xilinx.h
index 556c5aa..9830047 100644
--- a/hw/xilinx.h
+++ b/hw/xilinx.h
@@ -21,9 +21,9 @@
 {
     DeviceState *dev;
 
-    dev = qdev_create(NULL, "xlnx,xps-timer");
+    dev = qdev_create(NULL, "xlnx.xps-timer");
     qdev_prop_set_uint32(dev, "one-timer-only", oto);
-    qdev_prop_set_uint32(dev, "frequency", freq);
+    qdev_prop_set_uint32(dev, "clock-frequency", freq);
     qdev_init_nofail(dev);
     sysbus_mmio_map(sysbus_from_qdev(dev), 0, base);
     sysbus_connect_irq(sysbus_from_qdev(dev), 0, irq);
@@ -55,13 +55,17 @@
                           int txmem, int rxmem)
 {
     DeviceState *dev;
+    Error *errp = NULL;
+
     qemu_check_nic_model(nd, "xlnx.axi-ethernet");
 
     dev = qdev_create(NULL, "xlnx.axi-ethernet");
     qdev_set_nic_properties(dev, nd);
     qdev_prop_set_uint32(dev, "rxmem", rxmem);
     qdev_prop_set_uint32(dev, "txmem", txmem);
-    object_property_set_link(OBJECT(dev), OBJECT(peer), "tx_dev", NULL);
+    object_property_set_link(OBJECT(dev), OBJECT(peer), "axistream-connected",
+                             &errp);
+    assert_no_error(errp);
     qdev_init_nofail(dev);
     sysbus_mmio_map(sysbus_from_qdev(dev), 0, base);
     sysbus_connect_irq(sysbus_from_qdev(dev), 0, irq);
@@ -74,8 +78,12 @@
                            target_phys_addr_t base, qemu_irq irq,
                            qemu_irq irq2, int freqhz)
 {
+    Error *errp = NULL;
+
     qdev_prop_set_uint32(dev, "freqhz", freqhz);
-    object_property_set_link(OBJECT(dev), OBJECT(peer), "tx_dev", NULL);
+    object_property_set_link(OBJECT(dev), OBJECT(peer), "axistream-connected",
+                             &errp);
+    assert_no_error(errp);
     qdev_init_nofail(dev);
 
     sysbus_mmio_map(sysbus_from_qdev(dev), 0, base);
diff --git a/hw/xilinx_spi.c b/hw/xilinx_spi.c
new file mode 100644
index 0000000..5cdf967
--- /dev/null
+++ b/hw/xilinx_spi.c
@@ -0,0 +1,385 @@
+/*
+ * QEMU model of the Xilinx SPI Controller
+ *
+ * Copyright (C) 2010 Edgar E. Iglesias.
+ * Copyright (C) 2012 Peter A. G. Crosthwaite <peter.crosthwaite@petalogix.com>
+ * Copyright (C) 2012 PetaLogix
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "sysbus.h"
+#include "sysemu.h"
+#include "qemu-log.h"
+#include "fifo.h"
+
+#include "ssi.h"
+
+#ifdef XILINX_SPI_ERR_DEBUG
+#define DB_PRINT(...) do { \
+    fprintf(stderr,  ": %s: ", __func__); \
+    fprintf(stderr, ## __VA_ARGS__); \
+    } while (0);
+#else
+    #define DB_PRINT(...)
+#endif
+
+#define R_DGIER     (0x1c / 4)
+#define R_DGIER_IE  (1 << 31)
+
+#define R_IPISR     (0x20 / 4)
+#define IRQ_DRR_NOT_EMPTY    (1 << (31 - 23))
+#define IRQ_DRR_OVERRUN      (1 << (31 - 26))
+#define IRQ_DRR_FULL         (1 << (31 - 27))
+#define IRQ_TX_FF_HALF_EMPTY (1 << 6)
+#define IRQ_DTR_UNDERRUN     (1 << 3)
+#define IRQ_DTR_EMPTY        (1 << (31 - 29))
+
+#define R_IPIER     (0x28 / 4)
+#define R_SRR       (0x40 / 4)
+#define R_SPICR     (0x60 / 4)
+#define R_SPICR_TXFF_RST     (1 << 5)
+#define R_SPICR_RXFF_RST     (1 << 6)
+#define R_SPICR_MTI          (1 << 8)
+
+#define R_SPISR     (0x64 / 4)
+#define SR_TX_FULL    (1 << 3)
+#define SR_TX_EMPTY   (1 << 2)
+#define SR_RX_FULL    (1 << 1)
+#define SR_RX_EMPTY   (1 << 0)
+
+#define R_SPIDTR    (0x68 / 4)
+#define R_SPIDRR    (0x6C / 4)
+#define R_SPISSR    (0x70 / 4)
+#define R_TX_FF_OCY (0x74 / 4)
+#define R_RX_FF_OCY (0x78 / 4)
+#define R_MAX       (0x7C / 4)
+
+#define FIFO_CAPACITY 256
+
+typedef struct XilinxSPI {
+    SysBusDevice busdev;
+    MemoryRegion mmio;
+
+    qemu_irq irq;
+    int irqline;
+
+    uint8_t num_cs;
+    qemu_irq *cs_lines;
+
+    SSIBus *spi;
+
+    Fifo8 rx_fifo;
+    Fifo8 tx_fifo;
+
+    uint32_t regs[R_MAX];
+} XilinxSPI;
+
+static void txfifo_reset(XilinxSPI *s)
+{
+    fifo8_reset(&s->tx_fifo);
+
+    s->regs[R_SPISR] &= ~SR_TX_FULL;
+    s->regs[R_SPISR] |= SR_TX_EMPTY;
+}
+
+static void rxfifo_reset(XilinxSPI *s)
+{
+    fifo8_reset(&s->rx_fifo);
+
+    s->regs[R_SPISR] |= SR_RX_EMPTY;
+    s->regs[R_SPISR] &= ~SR_RX_FULL;
+}
+
+static void xlx_spi_update_cs(XilinxSPI *s)
+{
+   int i;
+
+    for (i = 0; i < s->num_cs; ++i) {
+        qemu_set_irq(s->cs_lines[i], !(~s->regs[R_SPISSR] & 1 << i));
+    }
+}
+
+static void xlx_spi_update_irq(XilinxSPI *s)
+{
+    uint32_t pending;
+
+    s->regs[R_IPISR] |=
+            (!fifo8_is_empty(&s->rx_fifo) ? IRQ_DRR_NOT_EMPTY : 0) |
+            (fifo8_is_full(&s->rx_fifo) ? IRQ_DRR_FULL : 0);
+
+    pending = s->regs[R_IPISR] & s->regs[R_IPIER];
+
+    pending = pending && (s->regs[R_DGIER] & R_DGIER_IE);
+    pending = !!pending;
+
+    /* This call lies right in the data paths so don't call the
+       irq chain unless things really changed.  */
+    if (pending != s->irqline) {
+        s->irqline = pending;
+        DB_PRINT("irq_change of state %d ISR:%x IER:%X\n",
+                    pending, s->regs[R_IPISR], s->regs[R_IPIER]);
+        qemu_set_irq(s->irq, pending);
+    }
+
+}
+
+static void xlx_spi_do_reset(XilinxSPI *s)
+{
+    memset(s->regs, 0, sizeof s->regs);
+
+    rxfifo_reset(s);
+    txfifo_reset(s);
+
+    s->regs[R_SPISSR] = ~0;
+    xlx_spi_update_irq(s);
+    xlx_spi_update_cs(s);
+}
+
+static void xlx_spi_reset(DeviceState *d)
+{
+    xlx_spi_do_reset(DO_UPCAST(XilinxSPI, busdev.qdev, d));
+}
+
+static inline int spi_master_enabled(XilinxSPI *s)
+{
+    return !(s->regs[R_SPICR] & R_SPICR_MTI);
+}
+
+static void spi_flush_txfifo(XilinxSPI *s)
+{
+    uint32_t tx;
+    uint32_t rx;
+
+    while (!fifo8_is_empty(&s->tx_fifo)) {
+        tx = (uint32_t)fifo8_pop(&s->tx_fifo);
+        DB_PRINT("data tx:%x\n", tx);
+        rx = ssi_transfer(s->spi, tx);
+        DB_PRINT("data rx:%x\n", rx);
+        if (fifo8_is_full(&s->rx_fifo)) {
+            s->regs[R_IPISR] |= IRQ_DRR_OVERRUN;
+        } else {
+            fifo8_push(&s->rx_fifo, (uint8_t)rx);
+            if (fifo8_is_full(&s->rx_fifo)) {
+                s->regs[R_SPISR] |= SR_RX_FULL;
+                s->regs[R_IPISR] |= IRQ_DRR_FULL;
+            }
+        }
+
+        s->regs[R_SPISR] &= ~SR_RX_EMPTY;
+        s->regs[R_SPISR] &= ~SR_TX_FULL;
+        s->regs[R_SPISR] |= SR_TX_EMPTY;
+
+        s->regs[R_IPISR] |= IRQ_DTR_EMPTY;
+        s->regs[R_IPISR] |= IRQ_DRR_NOT_EMPTY;
+    }
+
+}
+
+static uint64_t
+spi_read(void *opaque, target_phys_addr_t addr, unsigned int size)
+{
+    XilinxSPI *s = opaque;
+    uint32_t r = 0;
+
+    addr >>= 2;
+    switch (addr) {
+    case R_SPIDRR:
+        if (fifo8_is_empty(&s->rx_fifo)) {
+            DB_PRINT("Read from empty FIFO!\n");
+            return 0xdeadbeef;
+        }
+
+        s->regs[R_SPISR] &= ~SR_RX_FULL;
+        r = fifo8_pop(&s->rx_fifo);
+        if (fifo8_is_empty(&s->rx_fifo)) {
+            s->regs[R_SPISR] |= SR_RX_EMPTY;
+        }
+        break;
+
+    case R_SPISR:
+        r = s->regs[addr];
+        break;
+
+    default:
+        if (addr < ARRAY_SIZE(s->regs)) {
+            r = s->regs[addr];
+        }
+        break;
+
+    }
+    DB_PRINT("addr=" TARGET_FMT_plx " = %x\n", addr * 4, r);
+    xlx_spi_update_irq(s);
+    return r;
+}
+
+static void
+spi_write(void *opaque, target_phys_addr_t addr,
+            uint64_t val64, unsigned int size)
+{
+    XilinxSPI *s = opaque;
+    uint32_t value = val64;
+
+    DB_PRINT("addr=" TARGET_FMT_plx " = %x\n", addr, value);
+    addr >>= 2;
+    switch (addr) {
+    case R_SRR:
+        if (value != 0xa) {
+            DB_PRINT("Invalid write to SRR %x\n", value);
+        } else {
+            xlx_spi_do_reset(s);
+        }
+        break;
+
+    case R_SPIDTR:
+        s->regs[R_SPISR] &= ~SR_TX_EMPTY;
+        fifo8_push(&s->tx_fifo, (uint8_t)value);
+        if (fifo8_is_full(&s->tx_fifo)) {
+            s->regs[R_SPISR] |= SR_TX_FULL;
+        }
+        if (!spi_master_enabled(s)) {
+            goto done;
+        } else {
+            DB_PRINT("DTR and master enabled\n");
+        }
+        spi_flush_txfifo(s);
+        break;
+
+    case R_SPISR:
+        DB_PRINT("Invalid write to SPISR %x\n", value);
+        break;
+
+    case R_IPISR:
+        /* Toggle the bits.  */
+        s->regs[addr] ^= value;
+        break;
+
+    /* Slave Select Register.  */
+    case R_SPISSR:
+        s->regs[addr] = value;
+        xlx_spi_update_cs(s);
+        break;
+
+    case R_SPICR:
+        /* FIXME: reset irq and sr state to empty queues.  */
+        if (value & R_SPICR_RXFF_RST) {
+            rxfifo_reset(s);
+        }
+
+        if (value & R_SPICR_TXFF_RST) {
+            txfifo_reset(s);
+        }
+        value &= ~(R_SPICR_RXFF_RST | R_SPICR_TXFF_RST);
+        s->regs[addr] = value;
+
+        if (!(value & R_SPICR_MTI)) {
+            spi_flush_txfifo(s);
+        }
+        break;
+
+    default:
+        if (addr < ARRAY_SIZE(s->regs)) {
+            s->regs[addr] = value;
+        }
+        break;
+    }
+
+done:
+    xlx_spi_update_irq(s);
+}
+
+static const MemoryRegionOps spi_ops = {
+    .read = spi_read,
+    .write = spi_write,
+    .endianness = DEVICE_NATIVE_ENDIAN,
+    .valid = {
+        .min_access_size = 4,
+        .max_access_size = 4
+    }
+};
+
+static int xilinx_spi_init(SysBusDevice *dev)
+{
+    int i;
+    XilinxSPI *s = FROM_SYSBUS(typeof(*s), dev);
+
+    DB_PRINT("\n");
+
+    s->spi = ssi_create_bus(&dev->qdev, "spi");
+
+    sysbus_init_irq(dev, &s->irq);
+    s->cs_lines = g_new(qemu_irq, s->num_cs);
+    ssi_auto_connect_slaves(DEVICE(s), s->cs_lines, s->spi);
+    for (i = 0; i < s->num_cs; ++i) {
+        sysbus_init_irq(dev, &s->cs_lines[i]);
+    }
+
+    memory_region_init_io(&s->mmio, &spi_ops, s, "xilinx-spi", R_MAX * 4);
+    sysbus_init_mmio(dev, &s->mmio);
+
+    s->irqline = -1;
+
+    fifo8_create(&s->tx_fifo, FIFO_CAPACITY);
+    fifo8_create(&s->rx_fifo, FIFO_CAPACITY);
+
+    return 0;
+}
+
+static const VMStateDescription vmstate_xilinx_spi = {
+    .name = "xilinx_spi",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_FIFO8(tx_fifo, XilinxSPI),
+        VMSTATE_FIFO8(rx_fifo, XilinxSPI),
+        VMSTATE_UINT32_ARRAY(regs, XilinxSPI, R_MAX),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property xilinx_spi_properties[] = {
+    DEFINE_PROP_UINT8("num-ss-bits", XilinxSPI, num_cs, 1),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void xilinx_spi_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *k = SYS_BUS_DEVICE_CLASS(klass);
+
+    k->init = xilinx_spi_init;
+    dc->reset = xlx_spi_reset;
+    dc->props = xilinx_spi_properties;
+    dc->vmsd = &vmstate_xilinx_spi;
+}
+
+static TypeInfo xilinx_spi_info = {
+    .name           = "xlnx.xps-spi",
+    .parent         = TYPE_SYS_BUS_DEVICE,
+    .instance_size  = sizeof(XilinxSPI),
+    .class_init     = xilinx_spi_class_init,
+};
+
+static void xilinx_spi_register_types(void)
+{
+    type_register_static(&xilinx_spi_info);
+}
+
+type_init(xilinx_spi_register_types)
diff --git a/hw/xilinx_spips.c b/hw/xilinx_spips.c
new file mode 100644
index 0000000..f64a782
--- /dev/null
+++ b/hw/xilinx_spips.c
@@ -0,0 +1,354 @@
+/*
+ * QEMU model of the Xilinx Zynq SPI controller
+ *
+ * Copyright (c) 2012 Peter A. G. Crosthwaite
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "sysbus.h"
+#include "sysemu.h"
+#include "ptimer.h"
+#include "qemu-log.h"
+#include "fifo.h"
+#include "ssi.h"
+
+#ifdef XILINX_SPIPS_ERR_DEBUG
+#define DB_PRINT(...) do { \
+    fprintf(stderr,  ": %s: ", __func__); \
+    fprintf(stderr, ## __VA_ARGS__); \
+    } while (0);
+#else
+    #define DB_PRINT(...)
+#endif
+
+/* config register */
+#define R_CONFIG            (0x00 / 4)
+#define MODEFAIL_GEN_EN     (1 << 17)
+#define MAN_START_COM       (1 << 16)
+#define MAN_START_EN        (1 << 15)
+#define MANUAL_CS           (1 << 14)
+#define CS                  (0xF << 10)
+#define CS_SHIFT            (10)
+#define PERI_SEL            (1 << 9)
+#define REF_CLK             (1 << 8)
+#define FIFO_WIDTH          (3 << 6)
+#define BAUD_RATE_DIV       (7 << 3)
+#define CLK_PH              (1 << 2)
+#define CLK_POL             (1 << 1)
+#define MODE_SEL            (1 << 0)
+
+/* interrupt mechanism */
+#define R_INTR_STATUS       (0x04 / 4)
+#define R_INTR_EN           (0x08 / 4)
+#define R_INTR_DIS          (0x0C / 4)
+#define R_INTR_MASK         (0x10 / 4)
+#define IXR_TX_FIFO_UNDERFLOW   (1 << 6)
+#define IXR_RX_FIFO_FULL        (1 << 5)
+#define IXR_RX_FIFO_NOT_EMPTY   (1 << 4)
+#define IXR_TX_FIFO_FULL        (1 << 3)
+#define IXR_TX_FIFO_NOT_FULL    (1 << 2)
+#define IXR_TX_FIFO_MODE_FAIL   (1 << 1)
+#define IXR_RX_FIFO_OVERFLOW    (1 << 0)
+#define IXR_ALL                 ((IXR_TX_FIFO_UNDERFLOW<<1)-1)
+
+#define R_EN                (0x14 / 4)
+#define R_DELAY             (0x18 / 4)
+#define R_TX_DATA           (0x1C / 4)
+#define R_RX_DATA           (0x20 / 4)
+#define R_SLAVE_IDLE_COUNT  (0x24 / 4)
+#define R_TX_THRES          (0x28 / 4)
+#define R_RX_THRES          (0x2C / 4)
+#define R_MOD_ID            (0xFC / 4)
+
+#define R_MAX (R_MOD_ID+1)
+
+/* size of TXRX FIFOs */
+#define NUM_CS_LINES    4
+#define RXFF_A          32
+#define TXFF_A          32
+
+typedef struct {
+    SysBusDevice busdev;
+    MemoryRegion iomem;
+    qemu_irq irq;
+    int irqline;
+
+    qemu_irq cs_lines[NUM_CS_LINES];
+    SSIBus *spi;
+
+    Fifo8 rx_fifo;
+    Fifo8 tx_fifo;
+
+    uint32_t regs[R_MAX];
+} XilinxSPIPS;
+
+static void xilinx_spips_update_cs_lines(XilinxSPIPS *s)
+{
+    int i;
+    bool found = false;
+    int field = s->regs[R_CONFIG] >> CS_SHIFT;
+
+    for (i = 0; i < NUM_CS_LINES; i++) {
+        if (~field & (1 << i) && !found) {
+            found = true;
+            DB_PRINT("selecting slave %d\n", i);
+            qemu_set_irq(s->cs_lines[i], 0);
+        } else {
+            qemu_set_irq(s->cs_lines[i], 1);
+        }
+     }
+}
+
+static void xilinx_spips_update_ixr(XilinxSPIPS *s)
+{
+    /* These are set/cleared as they occur */
+    s->regs[R_INTR_STATUS] &= (IXR_TX_FIFO_UNDERFLOW | IXR_RX_FIFO_OVERFLOW |
+                                IXR_TX_FIFO_MODE_FAIL);
+    /* these are pure functions of fifo state, set them here */
+    s->regs[R_INTR_STATUS] |=
+        (fifo8_is_full(&s->rx_fifo) ? IXR_RX_FIFO_FULL : 0) |
+        (s->rx_fifo.num >= s->regs[R_RX_THRES] ? IXR_RX_FIFO_NOT_EMPTY : 0) |
+        (fifo8_is_full(&s->tx_fifo) ? IXR_TX_FIFO_FULL : 0) |
+        (s->tx_fifo.num < s->regs[R_TX_THRES] ? IXR_TX_FIFO_NOT_FULL : 0);
+    /* drive external interrupt pin */
+    int new_irqline = !!(s->regs[R_INTR_MASK] & s->regs[R_INTR_STATUS] &
+                                                                IXR_ALL);
+    if (new_irqline != s->irqline) {
+        s->irqline = new_irqline;
+        qemu_set_irq(s->irq, s->irqline);
+    }
+}
+
+static void xilinx_spips_reset(DeviceState *d)
+{
+    XilinxSPIPS *s = DO_UPCAST(XilinxSPIPS, busdev.qdev, d);
+
+    int i;
+    for (i = 0; i < R_MAX; i++) {
+        s->regs[i] = 0;
+    }
+
+    fifo8_reset(&s->rx_fifo);
+    fifo8_reset(&s->rx_fifo);
+    /* non zero resets */
+    s->regs[R_CONFIG] |= MODEFAIL_GEN_EN;
+    s->regs[R_SLAVE_IDLE_COUNT] = 0xFF;
+    s->regs[R_TX_THRES] = 1;
+    s->regs[R_RX_THRES] = 1;
+    /* FIXME: move magic number definition somewhere sensible */
+    s->regs[R_MOD_ID] = 0x01090106;
+    xilinx_spips_update_ixr(s);
+    xilinx_spips_update_cs_lines(s);
+}
+
+static void xilinx_spips_flush_txfifo(XilinxSPIPS *s)
+{
+    for (;;) {
+        uint32_t r;
+        uint8_t value;
+
+        if (fifo8_is_empty(&s->tx_fifo)) {
+            s->regs[R_INTR_STATUS] |= IXR_TX_FIFO_UNDERFLOW;
+            break;
+        } else {
+            value = fifo8_pop(&s->tx_fifo);
+        }
+
+        r = ssi_transfer(s->spi, (uint32_t)value);
+        DB_PRINT("tx = %02x rx = %02x\n", value, r);
+        if (fifo8_is_full(&s->rx_fifo)) {
+            s->regs[R_INTR_STATUS] |= IXR_RX_FIFO_OVERFLOW;
+            DB_PRINT("rx FIFO overflow");
+        } else {
+            fifo8_push(&s->rx_fifo, (uint8_t)r);
+        }
+    }
+    xilinx_spips_update_ixr(s);
+}
+
+static uint64_t xilinx_spips_read(void *opaque, target_phys_addr_t addr,
+                                                        unsigned size)
+{
+    XilinxSPIPS *s = opaque;
+    uint32_t mask = ~0;
+    uint32_t ret;
+
+    addr >>= 2;
+    switch (addr) {
+    case R_CONFIG:
+        mask = 0x0002FFFF;
+        break;
+    case R_INTR_STATUS:
+    case R_INTR_MASK:
+        mask = IXR_ALL;
+        break;
+    case  R_EN:
+        mask = 0x1;
+        break;
+    case R_SLAVE_IDLE_COUNT:
+        mask = 0xFF;
+        break;
+    case R_MOD_ID:
+        mask = 0x01FFFFFF;
+        break;
+    case R_INTR_EN:
+    case R_INTR_DIS:
+    case R_TX_DATA:
+        mask = 0;
+        break;
+    case R_RX_DATA:
+        ret = (uint32_t)fifo8_pop(&s->rx_fifo);
+        DB_PRINT("addr=" TARGET_FMT_plx " = %x\n", addr * 4, ret);
+        xilinx_spips_update_ixr(s);
+        return ret;
+    }
+    DB_PRINT("addr=" TARGET_FMT_plx " = %x\n", addr * 4, s->regs[addr] & mask);
+    return s->regs[addr] & mask;
+
+}
+
+static void xilinx_spips_write(void *opaque, target_phys_addr_t addr,
+                                        uint64_t value, unsigned size)
+{
+    int mask = ~0;
+    int man_start_com = 0;
+    XilinxSPIPS *s = opaque;
+
+    DB_PRINT("addr=" TARGET_FMT_plx " = %x\n", addr, (unsigned)value);
+    addr >>= 2;
+    switch (addr) {
+    case R_CONFIG:
+        mask = 0x0002FFFF;
+        if (value & MAN_START_COM) {
+            man_start_com = 1;
+        }
+        break;
+    case R_INTR_STATUS:
+        mask = IXR_ALL;
+        s->regs[R_INTR_STATUS] &= ~(mask & value);
+        goto no_reg_update;
+    case R_INTR_DIS:
+        mask = IXR_ALL;
+        s->regs[R_INTR_MASK] &= ~(mask & value);
+        goto no_reg_update;
+    case R_INTR_EN:
+        mask = IXR_ALL;
+        s->regs[R_INTR_MASK] |= mask & value;
+        goto no_reg_update;
+    case R_EN:
+        mask = 0x1;
+        break;
+    case R_SLAVE_IDLE_COUNT:
+        mask = 0xFF;
+        break;
+    case R_RX_DATA:
+    case R_INTR_MASK:
+    case R_MOD_ID:
+        mask = 0;
+        break;
+    case R_TX_DATA:
+        fifo8_push(&s->tx_fifo, (uint8_t)value);
+        goto no_reg_update;
+    }
+    s->regs[addr] = (s->regs[addr] & ~mask) | (value & mask);
+no_reg_update:
+    if (man_start_com) {
+        xilinx_spips_flush_txfifo(s);
+    }
+    xilinx_spips_update_ixr(s);
+    xilinx_spips_update_cs_lines(s);
+}
+
+static const MemoryRegionOps spips_ops = {
+    .read = xilinx_spips_read,
+    .write = xilinx_spips_write,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static int xilinx_spips_init(SysBusDevice *dev)
+{
+    XilinxSPIPS *s = FROM_SYSBUS(typeof(*s), dev);
+    int i;
+
+    DB_PRINT("inited device model\n");
+
+    s->spi = ssi_create_bus(&dev->qdev, "spi");
+
+    ssi_auto_connect_slaves(DEVICE(s), s->cs_lines, s->spi);
+    sysbus_init_irq(dev, &s->irq);
+    for (i = 0; i < NUM_CS_LINES; ++i) {
+        sysbus_init_irq(dev, &s->cs_lines[i]);
+    }
+
+    memory_region_init_io(&s->iomem, &spips_ops, s, "spi", R_MAX*4);
+    sysbus_init_mmio(dev, &s->iomem);
+
+    s->irqline = -1;
+
+    fifo8_create(&s->rx_fifo, RXFF_A);
+    fifo8_create(&s->tx_fifo, TXFF_A);
+
+    return 0;
+}
+
+static int xilinx_spips_post_load(void *opaque, int version_id)
+{
+    xilinx_spips_update_ixr((XilinxSPIPS *)opaque);
+    xilinx_spips_update_cs_lines((XilinxSPIPS *)opaque);
+    return 0;
+}
+
+static const VMStateDescription vmstate_xilinx_spips = {
+    .name = "xilinx_spips",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .post_load = xilinx_spips_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_FIFO8(tx_fifo, XilinxSPIPS),
+        VMSTATE_FIFO8(rx_fifo, XilinxSPIPS),
+        VMSTATE_UINT32_ARRAY(regs, XilinxSPIPS, R_MAX),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static void xilinx_spips_class_init(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    SysBusDeviceClass *sdc = SYS_BUS_DEVICE_CLASS(klass);
+
+    sdc->init = xilinx_spips_init;
+    dc->reset = xilinx_spips_reset;
+    dc->vmsd = &vmstate_xilinx_spips;
+}
+
+static const TypeInfo xilinx_spips_info = {
+    .name  = "xilinx,spips",
+    .parent = TYPE_SYS_BUS_DEVICE,
+    .instance_size  = sizeof(XilinxSPIPS),
+    .class_init = xilinx_spips_class_init,
+};
+
+static void xilinx_spips_register_types(void)
+{
+    type_register_static(&xilinx_spips_info);
+}
+
+type_init(xilinx_spips_register_types)
diff --git a/hw/xilinx_timer.c b/hw/xilinx_timer.c
index b562bd0..2e48ca2 100644
--- a/hw/xilinx_timer.c
+++ b/hw/xilinx_timer.c
@@ -24,6 +24,7 @@
 
 #include "sysbus.h"
 #include "ptimer.h"
+#include "qemu-log.h"
 
 #define D(x)
 
@@ -119,7 +120,7 @@
             break;
 
     }
-    D(printf("%s timer=%d %x=%x\n", __func__, timer, addr * 4, r));
+    D(fprintf(stderr, "%s timer=%d %x=%x\n", __func__, timer, addr * 4, r));
     return r;
 }
 
@@ -127,7 +128,7 @@
 {
     uint64_t count;
 
-    D(printf("%s timer=%d down=%d\n", __func__,
+    D(fprintf(stderr, "%s timer=%d down=%d\n", __func__,
               xt->nr, xt->regs[R_TCSR] & TCSR_UDT));
 
     ptimer_stop(xt->ptimer);
@@ -152,7 +153,7 @@
     addr >>= 2;
     timer = timer_from_addr(addr);
     xt = &t->timers[timer];
-    D(printf("%s addr=%x val=%x (timer=%d off=%d)\n",
+    D(fprintf(stderr, "%s addr=%x val=%x (timer=%d off=%d)\n",
              __func__, addr * 4, value, timer, addr & 3));
     /* Further decoding to address a specific timers reg.  */
     addr &= 3;
@@ -189,7 +190,7 @@
 {
     struct xlx_timer *xt = opaque;
     struct timerblock *t = xt->parent;
-    D(printf("%s %d\n", __func__, timer));
+    D(fprintf(stderr, "%s %d\n", __func__, xt->nr));
     xt->regs[R_TCSR] |= TCSR_TINT;
 
     if (xt->regs[R_TCSR] & TCSR_ARHT)
@@ -217,14 +218,15 @@
         ptimer_set_freq(xt->ptimer, t->freq_hz);
     }
 
-    memory_region_init_io(&t->mmio, &timer_ops, t, "xlnx,xps-timer",
+    memory_region_init_io(&t->mmio, &timer_ops, t, "xlnx.xps-timer",
                           R_MAX * 4 * num_timers(t));
     sysbus_init_mmio(dev, &t->mmio);
     return 0;
 }
 
 static Property xilinx_timer_properties[] = {
-    DEFINE_PROP_UINT32("frequency", struct timerblock, freq_hz,   62 * 1000000),
+    DEFINE_PROP_UINT32("clock-frequency", struct timerblock, freq_hz,
+                                                                62 * 1000000),
     DEFINE_PROP_UINT8("one-timer-only", struct timerblock, one_timer_only, 0),
     DEFINE_PROP_END_OF_LIST(),
 };
@@ -239,7 +241,7 @@
 }
 
 static TypeInfo xilinx_timer_info = {
-    .name          = "xlnx,xps-timer",
+    .name          = "xlnx.xps-timer",
     .parent        = TYPE_SYS_BUS_DEVICE,
     .instance_size = sizeof(struct timerblock),
     .class_init    = xilinx_timer_class_init,
diff --git a/hw/xilinx_zynq.c b/hw/xilinx_zynq.c
index 7e6c273..fd46ba2 100644
--- a/hw/xilinx_zynq.c
+++ b/hw/xilinx_zynq.c
@@ -24,6 +24,9 @@
 #include "flash.h"
 #include "blockdev.h"
 #include "loader.h"
+#include "ssi.h"
+
+#define NUM_SPI_FLASHES 4
 
 #define FLASH_SIZE (64 * 1024 * 1024)
 #define FLASH_SECTOR_SIZE (128 * 1024)
@@ -46,6 +49,34 @@
     sysbus_connect_irq(s, 0, irq);
 }
 
+static inline void zynq_init_spi_flashes(uint32_t base_addr, qemu_irq irq)
+{
+    DeviceState *dev;
+    SysBusDevice *busdev;
+    SSIBus *spi;
+    int i;
+
+    dev = qdev_create(NULL, "xilinx,spips");
+    qdev_init_nofail(dev);
+    busdev = sysbus_from_qdev(dev);
+    sysbus_mmio_map(busdev, 0, base_addr);
+    sysbus_connect_irq(busdev, 0, irq);
+
+    spi = (SSIBus *)qdev_get_child_bus(dev, "spi");
+
+    for (i = 0; i < NUM_SPI_FLASHES; ++i) {
+        qemu_irq cs_line;
+
+        dev = ssi_create_slave_no_init(spi, "m25p80");
+        qdev_prop_set_string(dev, "partname", "n25q128");
+        qdev_init_nofail(dev);
+
+        cs_line = qdev_get_gpio_in(dev, 0);
+        sysbus_connect_irq(busdev, i+1, cs_line);
+    }
+
+}
+
 static void zynq_init(ram_addr_t ram_size, const char *boot_device,
                         const char *kernel_filename, const char *kernel_cmdline,
                         const char *initrd_filename, const char *cpu_model)
@@ -113,6 +144,9 @@
         pic[n] = qdev_get_gpio_in(dev, n);
     }
 
+    zynq_init_spi_flashes(0xE0006000, pic[58-IRQ_OFFSET]);
+    zynq_init_spi_flashes(0xE0007000, pic[81-IRQ_OFFSET]);
+
     sysbus_create_simple("cadence_uart", 0xE0000000, pic[59-IRQ_OFFSET]);
     sysbus_create_simple("cadence_uart", 0xE0001000, pic[82-IRQ_OFFSET]);
 
diff --git a/hw/z2.c b/hw/z2.c
index 289cee9..076fad2 100644
--- a/hw/z2.c
+++ b/hw/z2.c
@@ -161,10 +161,11 @@
 
 static VMStateDescription vmstate_zipit_lcd_state = {
     .name = "zipit-lcd",
-    .version_id = 1,
-    .minimum_version_id = 1,
-    .minimum_version_id_old = 1,
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .minimum_version_id_old = 2,
     .fields = (VMStateField[]) {
+        VMSTATE_SSI_SLAVE(ssidev, ZipitLCD),
         VMSTATE_INT32(selected, ZipitLCD),
         VMSTATE_INT32(enabled, ZipitLCD),
         VMSTATE_BUFFER(buf, ZipitLCD),
diff --git a/input.c b/input.c
index c4b0619..25d3973 100644
--- a/input.c
+++ b/input.c
@@ -186,8 +186,7 @@
 
 int index_from_key(const char *key)
 {
-    int i, keycode;
-    char *endp;
+    int i;
 
     for (i = 0; QKeyCode_lookup[i] != NULL; i++) {
         if (!strcmp(key, QKeyCode_lookup[i])) {
@@ -195,17 +194,6 @@
         }
     }
 
-    if (strstart(key, "0x", NULL)) {
-        keycode = strtoul(key, &endp, 0);
-        if (*endp == '\0' && keycode >= 0x01 && keycode <= 0xff) {
-            for (i = 0; i < Q_KEY_CODE_MAX; i++) {
-                if (keycode == key_defs[i]) {
-                    break;
-                }
-            }
-        }
-    }
-
     /* Return Q_KEY_CODE_MAX if the key is invalid */
     return i;
 }
@@ -224,30 +212,46 @@
     return i;
 }
 
-static QKeyCodeList *keycodes;
+static int *keycodes;
+static int keycodes_size;
 static QEMUTimer *key_timer;
 
+static int keycode_from_keyvalue(const KeyValue *value)
+{
+    if (value->kind == KEY_VALUE_KIND_QCODE) {
+        return key_defs[value->qcode];
+    } else {
+        assert(value->kind == KEY_VALUE_KIND_NUMBER);
+        return value->number;
+    }
+}
+
+static void free_keycodes(void)
+{
+    g_free(keycodes);
+    keycodes = NULL;
+    keycodes_size = 0;
+}
+
 static void release_keys(void *opaque)
 {
-    int keycode;
-    QKeyCodeList *p;
+    int i;
 
-    for (p = keycodes; p != NULL; p = p->next) {
-        keycode = key_defs[p->value];
-        if (keycode & 0x80) {
+    for (i = 0; i < keycodes_size; i++) {
+        if (keycodes[i] & 0x80) {
             kbd_put_keycode(0xe0);
         }
-        kbd_put_keycode(keycode | 0x80);
+        kbd_put_keycode(keycodes[i]| 0x80);
     }
-    qapi_free_QKeyCodeList(keycodes);
-    keycodes = NULL;
+
+    free_keycodes();
 }
 
-void qmp_send_key(QKeyCodeList *keys, bool has_hold_time, int64_t hold_time,
+void qmp_send_key(KeyValueList *keys, bool has_hold_time, int64_t hold_time,
                   Error **errp)
 {
     int keycode;
-    QKeyCodeList *p, *keylist, *head = NULL, *tmp = NULL;
+    KeyValueList *p;
 
     if (!key_timer) {
         key_timer = qemu_new_timer_ns(vm_clock, release_keys, NULL);
@@ -257,31 +261,28 @@
         qemu_del_timer(key_timer);
         release_keys(NULL);
     }
+
     if (!has_hold_time) {
         hold_time = 100;
     }
 
     for (p = keys; p != NULL; p = p->next) {
-        keylist = g_malloc0(sizeof(*keylist));
-        keylist->value = p->value;
-        keylist->next = NULL;
-
-        if (!head) {
-            head = keylist;
-        }
-        if (tmp) {
-            tmp->next = keylist;
-        }
-        tmp = keylist;
-
         /* key down events */
-        keycode = key_defs[p->value];
+        keycode = keycode_from_keyvalue(p->value);
+        if (keycode < 0x01 || keycode > 0xff) {
+            error_setg(errp, "invalid hex keycode 0x%x\n", keycode);
+            free_keycodes();
+            return;
+        }
+
         if (keycode & 0x80) {
             kbd_put_keycode(0xe0);
         }
         kbd_put_keycode(keycode & 0x7f);
+
+        keycodes = g_realloc(keycodes, sizeof(int) * (keycodes_size + 1));
+        keycodes[keycodes_size++] = keycode;
     }
-    keycodes = head;
 
     /* delayed key up events */
     qemu_mod_timer(key_timer, qemu_get_clock_ns(vm_clock) +
diff --git a/iohandler.c b/iohandler.c
index dea4355..a2d871b 100644
--- a/iohandler.c
+++ b/iohandler.c
@@ -56,6 +56,8 @@
 {
     IOHandlerRecord *ioh;
 
+    assert(fd >= 0);
+
     if (!fd_read && !fd_write) {
         QLIST_FOREACH(ioh, &io_handlers, next) {
             if (ioh->fd == fd) {
diff --git a/iov.c b/iov.c
index 60705c7..c6a66f0 100644
--- a/iov.c
+++ b/iov.c
@@ -26,7 +26,7 @@
 # include <sys/socket.h>
 #endif
 
-size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt,
+size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
                     size_t offset, const void *buf, size_t bytes)
 {
     size_t done;
diff --git a/iov.h b/iov.h
index 381f37a..a73569f 100644
--- a/iov.h
+++ b/iov.h
@@ -36,7 +36,7 @@
  * such "large" value is -1 (sinice size_t is unsigned),
  * so specifying `-1' as `bytes' means 'up to the end of iovec'.
  */
-size_t iov_from_buf(struct iovec *iov, unsigned int iov_cnt,
+size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
                     size_t offset, const void *buf, size_t bytes);
 size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
                   size_t offset, void *buf, size_t bytes);
diff --git a/kvm-all.c b/kvm-all.c
index 39cff55..92a7137 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -92,7 +92,7 @@
     /* The man page (and posix) say ioctl numbers are signed int, but
      * they're not.  Linux, glibc and *BSD all treat ioctl numbers as
      * unsigned, and treating them as signed here can break things */
-    unsigned irqchip_inject_ioctl;
+    unsigned irq_set_ioctl;
 #ifdef KVM_CAP_IRQ_ROUTING
     struct kvm_irq_routing *irq_routes;
     int nr_allocated_irq_routes;
@@ -870,13 +870,13 @@
 
     event.level = level;
     event.irq = irq;
-    ret = kvm_vm_ioctl(s, s->irqchip_inject_ioctl, &event);
+    ret = kvm_vm_ioctl(s, s->irq_set_ioctl, &event);
     if (ret < 0) {
         perror("kvm_set_irq");
         abort();
     }
 
-    return (s->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
+    return (s->irq_set_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
 }
 
 #ifdef KVM_CAP_IRQ_ROUTING
@@ -1237,10 +1237,6 @@
         return ret;
     }
 
-    s->irqchip_inject_ioctl = KVM_IRQ_LINE;
-    if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) {
-        s->irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS;
-    }
     kvm_kernel_irqchip = true;
     /* If we have an in-kernel IRQ chip then we must have asynchronous
      * interrupt delivery (though the reverse is not necessarily true)
@@ -1389,6 +1385,11 @@
 
     s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3);
 
+    s->irq_set_ioctl = KVM_IRQ_LINE;
+    if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) {
+        s->irq_set_ioctl = KVM_IRQ_LINE_STATUS;
+    }
+
     ret = kvm_arch_init(s);
     if (ret < 0) {
         goto err;
@@ -1409,13 +1410,11 @@
     return 0;
 
 err:
-    if (s) {
-        if (s->vmfd >= 0) {
-            close(s->vmfd);
-        }
-        if (s->fd != -1) {
-            close(s->fd);
-        }
+    if (s->vmfd >= 0) {
+        close(s->vmfd);
+    }
+    if (s->fd != -1) {
+        close(s->fd);
     }
     g_free(s);
 
@@ -1576,8 +1575,6 @@
         qemu_mutex_lock_iothread();
         kvm_arch_post_run(env, run);
 
-        kvm_flush_coalesced_mmio_buffer();
-
         if (run_ret < 0) {
             if (run_ret == -EINTR || run_ret == -EAGAIN) {
                 DPRINTF("io window exit\n");
diff --git a/libcacard/Makefile b/libcacard/Makefile
index 63990b7..487f434 100644
--- a/libcacard/Makefile
+++ b/libcacard/Makefile
@@ -14,6 +14,9 @@
 
 libcacard.lib-y=$(patsubst %.o,%.lo,$(libcacard-y))
 
+vscclient: $(libcacard-y) $(QEMU_OBJS) vscclient.o cutils.o
+	$(call quiet-command,$(CC) -o $@ $^ $(libcacard_libs) $(LIBS),"  LINK  $@")
+
 clean:
 	rm -f *.o */*.o *.d */*.d *.a */*.a *~ */*~ vscclient *.lo */*.lo .libs/* */.libs/* *.la */*.la *.pc
 	rm -Rf .libs */.libs
diff --git a/libcacard/vcard.c b/libcacard/vcard.c
index b02556e..539177b 100644
--- a/libcacard/vcard.c
+++ b/libcacard/vcard.c
@@ -200,7 +200,6 @@
     }
     vcard_buffer_response_delete(vcard->vcard_buffer_response);
     g_free(vcard);
-    return;
 }
 
 void
diff --git a/libcacard/vcard_emul_nss.c b/libcacard/vcard_emul_nss.c
index 802cae3..5f565e0 100644
--- a/libcacard/vcard_emul_nss.c
+++ b/libcacard/vcard_emul_nss.c
@@ -168,7 +168,6 @@
     if (key->slot) {
         PK11_FreeSlot(key->slot);
     }
-    return;
 }
 
 /*
@@ -418,7 +417,6 @@
     /* TODO: we may also need to send insertion/removal events? */
     slot = vcard_emul_card_get_slot(card);
     PK11_Logout(slot); /* NOTE: ignoring SECStatus return value */
-    return;
 }
 
 
@@ -535,7 +533,6 @@
 
     memcpy(atr, nss_atr, len);
     *atr_len = len;
-    return;
 }
 
 /*
@@ -1169,8 +1166,7 @@
             NEXT_TOKEN(vname)
             NEXT_TOKEN(type_params)
             type_params_length = MIN(type_params_length, sizeof(type_str)-1);
-            strncpy(type_str, type_params, type_params_length);
-            type_str[type_params_length] = 0;
+            pstrcpy(type_str, type_params_length, type_params);
             type = vcard_emul_type_from_string(type_str);
 
             NEXT_TOKEN(type_params)
diff --git a/libcacard/vreader.c b/libcacard/vreader.c
index ec126df..96d2407 100644
--- a/libcacard/vreader.c
+++ b/libcacard/vreader.c
@@ -93,7 +93,6 @@
         reader->reader_private_free(reader->reader_private);
     }
     g_free(reader);
-    return;
 }
 
 static VCard *
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
new file mode 100644
index 0000000..f787b72
--- /dev/null
+++ b/linux-headers/linux/vfio.h
@@ -0,0 +1,368 @@
+/*
+ * VFIO API definition
+ *
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef VFIO_H
+#define VFIO_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define VFIO_API_VERSION	0
+
+
+/* Kernel & User level defines for VFIO IOCTLs. */
+
+/* Extensions */
+
+#define VFIO_TYPE1_IOMMU		1
+
+/*
+ * The IOCTL interface is designed for extensibility by embedding the
+ * structure length (argsz) and flags into structures passed between
+ * kernel and userspace.  We therefore use the _IO() macro for these
+ * defines to avoid implicitly embedding a size into the ioctl request.
+ * As structure fields are added, argsz will increase to match and flag
+ * bits will be defined to indicate additional fields with valid data.
+ * It's *always* the caller's responsibility to indicate the size of
+ * the structure passed by setting argsz appropriately.
+ */
+
+#define VFIO_TYPE	(';')
+#define VFIO_BASE	100
+
+/* -------- IOCTLs for VFIO file descriptor (/dev/vfio/vfio) -------- */
+
+/**
+ * VFIO_GET_API_VERSION - _IO(VFIO_TYPE, VFIO_BASE + 0)
+ *
+ * Report the version of the VFIO API.  This allows us to bump the entire
+ * API version should we later need to add or change features in incompatible
+ * ways.
+ * Return: VFIO_API_VERSION
+ * Availability: Always
+ */
+#define VFIO_GET_API_VERSION		_IO(VFIO_TYPE, VFIO_BASE + 0)
+
+/**
+ * VFIO_CHECK_EXTENSION - _IOW(VFIO_TYPE, VFIO_BASE + 1, __u32)
+ *
+ * Check whether an extension is supported.
+ * Return: 0 if not supported, 1 (or some other positive integer) if supported.
+ * Availability: Always
+ */
+#define VFIO_CHECK_EXTENSION		_IO(VFIO_TYPE, VFIO_BASE + 1)
+
+/**
+ * VFIO_SET_IOMMU - _IOW(VFIO_TYPE, VFIO_BASE + 2, __s32)
+ *
+ * Set the iommu to the given type.  The type must be supported by an
+ * iommu driver as verified by calling CHECK_EXTENSION using the same
+ * type.  A group must be set to this file descriptor before this
+ * ioctl is available.  The IOMMU interfaces enabled by this call are
+ * specific to the value set.
+ * Return: 0 on success, -errno on failure
+ * Availability: When VFIO group attached
+ */
+#define VFIO_SET_IOMMU			_IO(VFIO_TYPE, VFIO_BASE + 2)
+
+/* -------- IOCTLs for GROUP file descriptors (/dev/vfio/$GROUP) -------- */
+
+/**
+ * VFIO_GROUP_GET_STATUS - _IOR(VFIO_TYPE, VFIO_BASE + 3,
+ *						struct vfio_group_status)
+ *
+ * Retrieve information about the group.  Fills in provided
+ * struct vfio_group_info.  Caller sets argsz.
+ * Return: 0 on succes, -errno on failure.
+ * Availability: Always
+ */
+struct vfio_group_status {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_GROUP_FLAGS_VIABLE		(1 << 0)
+#define VFIO_GROUP_FLAGS_CONTAINER_SET	(1 << 1)
+};
+#define VFIO_GROUP_GET_STATUS		_IO(VFIO_TYPE, VFIO_BASE + 3)
+
+/**
+ * VFIO_GROUP_SET_CONTAINER - _IOW(VFIO_TYPE, VFIO_BASE + 4, __s32)
+ *
+ * Set the container for the VFIO group to the open VFIO file
+ * descriptor provided.  Groups may only belong to a single
+ * container.  Containers may, at their discretion, support multiple
+ * groups.  Only when a container is set are all of the interfaces
+ * of the VFIO file descriptor and the VFIO group file descriptor
+ * available to the user.
+ * Return: 0 on success, -errno on failure.
+ * Availability: Always
+ */
+#define VFIO_GROUP_SET_CONTAINER	_IO(VFIO_TYPE, VFIO_BASE + 4)
+
+/**
+ * VFIO_GROUP_UNSET_CONTAINER - _IO(VFIO_TYPE, VFIO_BASE + 5)
+ *
+ * Remove the group from the attached container.  This is the
+ * opposite of the SET_CONTAINER call and returns the group to
+ * an initial state.  All device file descriptors must be released
+ * prior to calling this interface.  When removing the last group
+ * from a container, the IOMMU will be disabled and all state lost,
+ * effectively also returning the VFIO file descriptor to an initial
+ * state.
+ * Return: 0 on success, -errno on failure.
+ * Availability: When attached to container
+ */
+#define VFIO_GROUP_UNSET_CONTAINER	_IO(VFIO_TYPE, VFIO_BASE + 5)
+
+/**
+ * VFIO_GROUP_GET_DEVICE_FD - _IOW(VFIO_TYPE, VFIO_BASE + 6, char)
+ *
+ * Return a new file descriptor for the device object described by
+ * the provided string.  The string should match a device listed in
+ * the devices subdirectory of the IOMMU group sysfs entry.  The
+ * group containing the device must already be added to this context.
+ * Return: new file descriptor on success, -errno on failure.
+ * Availability: When attached to container
+ */
+#define VFIO_GROUP_GET_DEVICE_FD	_IO(VFIO_TYPE, VFIO_BASE + 6)
+
+/* --------------- IOCTLs for DEVICE file descriptors --------------- */
+
+/**
+ * VFIO_DEVICE_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 7,
+ *						struct vfio_device_info)
+ *
+ * Retrieve information about the device.  Fills in provided
+ * struct vfio_device_info.  Caller sets argsz.
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_device_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */
+#define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
+	__u32	num_regions;	/* Max region index + 1 */
+	__u32	num_irqs;	/* Max IRQ index + 1 */
+};
+#define VFIO_DEVICE_GET_INFO		_IO(VFIO_TYPE, VFIO_BASE + 7)
+
+/**
+ * VFIO_DEVICE_GET_REGION_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 8,
+ *				       struct vfio_region_info)
+ *
+ * Retrieve information about a device region.  Caller provides
+ * struct vfio_region_info with index value set.  Caller sets argsz.
+ * Implementation of region mapping is bus driver specific.  This is
+ * intended to describe MMIO, I/O port, as well as bus specific
+ * regions (ex. PCI config space).  Zero sized regions may be used
+ * to describe unimplemented regions (ex. unimplemented PCI BARs).
+ * Return: 0 on success, -errno on failure.
+ */
+struct vfio_region_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_REGION_INFO_FLAG_READ	(1 << 0) /* Region supports read */
+#define VFIO_REGION_INFO_FLAG_WRITE	(1 << 1) /* Region supports write */
+#define VFIO_REGION_INFO_FLAG_MMAP	(1 << 2) /* Region supports mmap */
+	__u32	index;		/* Region index */
+	__u32	resv;		/* Reserved for alignment */
+	__u64	size;		/* Region size (bytes) */
+	__u64	offset;		/* Region offset from start of device fd */
+};
+#define VFIO_DEVICE_GET_REGION_INFO	_IO(VFIO_TYPE, VFIO_BASE + 8)
+
+/**
+ * VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
+ *				    struct vfio_irq_info)
+ *
+ * Retrieve information about a device IRQ.  Caller provides
+ * struct vfio_irq_info with index value set.  Caller sets argsz.
+ * Implementation of IRQ mapping is bus driver specific.  Indexes
+ * using multiple IRQs are primarily intended to support MSI-like
+ * interrupt blocks.  Zero count irq blocks may be used to describe
+ * unimplemented interrupt types.
+ *
+ * The EVENTFD flag indicates the interrupt index supports eventfd based
+ * signaling.
+ *
+ * The MASKABLE flags indicates the index supports MASK and UNMASK
+ * actions described below.
+ *
+ * AUTOMASKED indicates that after signaling, the interrupt line is
+ * automatically masked by VFIO and the user needs to unmask the line
+ * to receive new interrupts.  This is primarily intended to distinguish
+ * level triggered interrupts.
+ *
+ * The NORESIZE flag indicates that the interrupt lines within the index
+ * are setup as a set and new subindexes cannot be enabled without first
+ * disabling the entire index.  This is used for interrupts like PCI MSI
+ * and MSI-X where the driver may only use a subset of the available
+ * indexes, but VFIO needs to enable a specific number of vectors
+ * upfront.  In the case of MSI-X, where the user can enable MSI-X and
+ * then add and unmask vectors, it's up to userspace to make the decision
+ * whether to allocate the maximum supported number of vectors or tear
+ * down setup and incrementally increase the vectors as each is enabled.
+ */
+struct vfio_irq_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IRQ_INFO_EVENTFD		(1 << 0)
+#define VFIO_IRQ_INFO_MASKABLE		(1 << 1)
+#define VFIO_IRQ_INFO_AUTOMASKED	(1 << 2)
+#define VFIO_IRQ_INFO_NORESIZE		(1 << 3)
+	__u32	index;		/* IRQ index */
+	__u32	count;		/* Number of IRQs within this index */
+};
+#define VFIO_DEVICE_GET_IRQ_INFO	_IO(VFIO_TYPE, VFIO_BASE + 9)
+
+/**
+ * VFIO_DEVICE_SET_IRQS - _IOW(VFIO_TYPE, VFIO_BASE + 10, struct vfio_irq_set)
+ *
+ * Set signaling, masking, and unmasking of interrupts.  Caller provides
+ * struct vfio_irq_set with all fields set.  'start' and 'count' indicate
+ * the range of subindexes being specified.
+ *
+ * The DATA flags specify the type of data provided.  If DATA_NONE, the
+ * operation performs the specified action immediately on the specified
+ * interrupt(s).  For example, to unmask AUTOMASKED interrupt [0,0]:
+ * flags = (DATA_NONE|ACTION_UNMASK), index = 0, start = 0, count = 1.
+ *
+ * DATA_BOOL allows sparse support for the same on arrays of interrupts.
+ * For example, to mask interrupts [0,1] and [0,3] (but not [0,2]):
+ * flags = (DATA_BOOL|ACTION_MASK), index = 0, start = 1, count = 3,
+ * data = {1,0,1}
+ *
+ * DATA_EVENTFD binds the specified ACTION to the provided __s32 eventfd.
+ * A value of -1 can be used to either de-assign interrupts if already
+ * assigned or skip un-assigned interrupts.  For example, to set an eventfd
+ * to be trigger for interrupts [0,0] and [0,2]:
+ * flags = (DATA_EVENTFD|ACTION_TRIGGER), index = 0, start = 0, count = 3,
+ * data = {fd1, -1, fd2}
+ * If index [0,1] is previously set, two count = 1 ioctls calls would be
+ * required to set [0,0] and [0,2] without changing [0,1].
+ *
+ * Once a signaling mechanism is set, DATA_BOOL or DATA_NONE can be used
+ * with ACTION_TRIGGER to perform kernel level interrupt loopback testing
+ * from userspace (ie. simulate hardware triggering).
+ *
+ * Setting of an event triggering mechanism to userspace for ACTION_TRIGGER
+ * enables the interrupt index for the device.  Individual subindex interrupts
+ * can be disabled using the -1 value for DATA_EVENTFD or the index can be
+ * disabled as a whole with: flags = (DATA_NONE|ACTION_TRIGGER), count = 0.
+ *
+ * Note that ACTION_[UN]MASK specify user->kernel signaling (irqfds) while
+ * ACTION_TRIGGER specifies kernel->user signaling.
+ */
+struct vfio_irq_set {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IRQ_SET_DATA_NONE		(1 << 0) /* Data not present */
+#define VFIO_IRQ_SET_DATA_BOOL		(1 << 1) /* Data is bool (u8) */
+#define VFIO_IRQ_SET_DATA_EVENTFD	(1 << 2) /* Data is eventfd (s32) */
+#define VFIO_IRQ_SET_ACTION_MASK	(1 << 3) /* Mask interrupt */
+#define VFIO_IRQ_SET_ACTION_UNMASK	(1 << 4) /* Unmask interrupt */
+#define VFIO_IRQ_SET_ACTION_TRIGGER	(1 << 5) /* Trigger interrupt */
+	__u32	index;
+	__u32	start;
+	__u32	count;
+	__u8	data[];
+};
+#define VFIO_DEVICE_SET_IRQS		_IO(VFIO_TYPE, VFIO_BASE + 10)
+
+#define VFIO_IRQ_SET_DATA_TYPE_MASK	(VFIO_IRQ_SET_DATA_NONE | \
+					 VFIO_IRQ_SET_DATA_BOOL | \
+					 VFIO_IRQ_SET_DATA_EVENTFD)
+#define VFIO_IRQ_SET_ACTION_TYPE_MASK	(VFIO_IRQ_SET_ACTION_MASK | \
+					 VFIO_IRQ_SET_ACTION_UNMASK | \
+					 VFIO_IRQ_SET_ACTION_TRIGGER)
+/**
+ * VFIO_DEVICE_RESET - _IO(VFIO_TYPE, VFIO_BASE + 11)
+ *
+ * Reset a device.
+ */
+#define VFIO_DEVICE_RESET		_IO(VFIO_TYPE, VFIO_BASE + 11)
+
+/*
+ * The VFIO-PCI bus driver makes use of the following fixed region and
+ * IRQ index mapping.  Unimplemented regions return a size of zero.
+ * Unimplemented IRQ types return a count of zero.
+ */
+
+enum {
+	VFIO_PCI_BAR0_REGION_INDEX,
+	VFIO_PCI_BAR1_REGION_INDEX,
+	VFIO_PCI_BAR2_REGION_INDEX,
+	VFIO_PCI_BAR3_REGION_INDEX,
+	VFIO_PCI_BAR4_REGION_INDEX,
+	VFIO_PCI_BAR5_REGION_INDEX,
+	VFIO_PCI_ROM_REGION_INDEX,
+	VFIO_PCI_CONFIG_REGION_INDEX,
+	VFIO_PCI_NUM_REGIONS
+};
+
+enum {
+	VFIO_PCI_INTX_IRQ_INDEX,
+	VFIO_PCI_MSI_IRQ_INDEX,
+	VFIO_PCI_MSIX_IRQ_INDEX,
+	VFIO_PCI_NUM_IRQS
+};
+
+/* -------- API for Type1 VFIO IOMMU -------- */
+
+/**
+ * VFIO_IOMMU_GET_INFO - _IOR(VFIO_TYPE, VFIO_BASE + 12, struct vfio_iommu_info)
+ *
+ * Retrieve information about the IOMMU object. Fills in provided
+ * struct vfio_iommu_info. Caller sets argsz.
+ *
+ * XXX Should we do these by CHECK_EXTENSION too?
+ */
+struct vfio_iommu_type1_info {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_IOMMU_INFO_PGSIZES (1 << 0)	/* supported page sizes info */
+	__u64	iova_pgsizes;		/* Bitmap of supported page sizes */
+};
+
+#define VFIO_IOMMU_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12)
+
+/**
+ * VFIO_IOMMU_MAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 13, struct vfio_dma_map)
+ *
+ * Map process virtual addresses to IO virtual addresses using the
+ * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
+ */
+struct vfio_iommu_type1_dma_map {
+	__u32	argsz;
+	__u32	flags;
+#define VFIO_DMA_MAP_FLAG_READ (1 << 0)		/* readable from device */
+#define VFIO_DMA_MAP_FLAG_WRITE (1 << 1)	/* writable from device */
+	__u64	vaddr;				/* Process virtual address */
+	__u64	iova;				/* IO virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+
+#define VFIO_IOMMU_MAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 13)
+
+/**
+ * VFIO_IOMMU_UNMAP_DMA - _IOW(VFIO_TYPE, VFIO_BASE + 14, struct vfio_dma_unmap)
+ *
+ * Unmap IO virtual addresses using the provided struct vfio_dma_unmap.
+ * Caller sets argsz.
+ */
+struct vfio_iommu_type1_dma_unmap {
+	__u32	argsz;
+	__u32	flags;
+	__u64	iova;				/* IO virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+
+#define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
+
+#endif /* VFIO_H */
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 819fdd5..1d8bcb4 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2442,7 +2442,7 @@
 
 static int fill_psinfo(struct target_elf_prpsinfo *psinfo, const TaskState *ts)
 {
-    char *filename, *base_filename;
+    char *base_filename;
     unsigned int i, len;
 
     (void) memset(psinfo, 0, sizeof (*psinfo));
@@ -2464,13 +2464,15 @@
     psinfo->pr_uid = getuid();
     psinfo->pr_gid = getgid();
 
-    filename = strdup(ts->bprm->filename);
-    base_filename = strdup(basename(filename));
+    base_filename = g_path_get_basename(ts->bprm->filename);
+    /*
+     * Using strncpy here is fine: at max-length,
+     * this field is not NUL-terminated.
+     */
     (void) strncpy(psinfo->pr_fname, base_filename,
                    sizeof(psinfo->pr_fname));
-    free(base_filename);
-    free(filename);
 
+    g_free(base_filename);
     bswap_psinfo(psinfo);
     return (0);
 }
diff --git a/linux-user/main.c b/linux-user/main.c
index 1a1c661..9f3476b 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -89,19 +89,6 @@
 }
 #endif
 
-/* timers for rdtsc */
-
-#if 0
-
-static uint64_t emu_time;
-
-int64_t cpu_get_real_ticks(void)
-{
-    return emu_time++;
-}
-
-#endif
-
 #if defined(CONFIG_USE_NPTL)
 /***********************************************************/
 /* Helper routines for implementing atomic operations.  */
@@ -3143,10 +3130,8 @@
     cpu_model = strdup(arg);
     if (cpu_model == NULL || is_help_option(cpu_model)) {
         /* XXX: implement xxx_cpu_list for targets that still miss it */
-#if defined(cpu_list_id)
-        cpu_list_id(stdout, &fprintf, "");
-#elif defined(cpu_list)
-        cpu_list(stdout, &fprintf); /* deprecated */
+#if defined(cpu_list)
+        cpu_list(stdout, &fprintf);
 #endif
         exit(1);
     }
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index 69b27d7..fc4cc00 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -289,46 +289,29 @@
  * struct has been locked - usually with lock_user_struct().
  */
 #define __put_user(x, hptr)\
-({\
+({ __typeof(*hptr) pu_ = (x);\
     switch(sizeof(*hptr)) {\
-    case 1:\
-        *(uint8_t *)(hptr) = (uint8_t)(typeof(*hptr))(x);\
-        break;\
-    case 2:\
-        *(uint16_t *)(hptr) = tswap16((uint16_t)(typeof(*hptr))(x));\
-        break;\
-    case 4:\
-        *(uint32_t *)(hptr) = tswap32((uint32_t)(typeof(*hptr))(x));\
-        break;\
-    case 8:\
-        *(uint64_t *)(hptr) = tswap64((typeof(*hptr))(x));\
-        break;\
-    default:\
-        abort();\
+    case 1: break;\
+    case 2: pu_ = tswap16(pu_); break; \
+    case 4: pu_ = tswap32(pu_); break; \
+    case 8: pu_ = tswap64(pu_); break; \
+    default: abort();\
     }\
+    memcpy(hptr, &pu_, sizeof(pu_)); \
     0;\
 })
 
 #define __get_user(x, hptr) \
-({\
+({ __typeof(*hptr) gu_; \
+    memcpy(&gu_, hptr, sizeof(gu_)); \
     switch(sizeof(*hptr)) {\
-    case 1:\
-        x = (typeof(*hptr))*(uint8_t *)(hptr);\
-        break;\
-    case 2:\
-        x = (typeof(*hptr))tswap16(*(uint16_t *)(hptr));\
-        break;\
-    case 4:\
-        x = (typeof(*hptr))tswap32(*(uint32_t *)(hptr));\
-        break;\
-    case 8:\
-        x = (typeof(*hptr))tswap64(*(uint64_t *)(hptr));\
-        break;\
-    default:\
-        /* avoid warning */\
-        x = 0;\
-        abort();\
+    case 1: break; \
+    case 2: gu_ = tswap16(gu_); break; \
+    case 4: gu_ = tswap32(gu_); break; \
+    case 8: gu_ = tswap64(gu_); break; \
+    default: abort();\
     }\
+    (x) = gu_; \
     0;\
 })
 
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 7869147..15bc4e8 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -2762,7 +2762,6 @@
 give_sigsegv:
     unlock_user_struct(frame, frame_addr, 1);
     force_sig(TARGET_SIGSEGV/*, current*/);
-    return;
 }
 
 long do_sigreturn(CPUMIPSState *regs)
@@ -2871,7 +2870,6 @@
 give_sigsegv:
     unlock_user_struct(frame, frame_addr, 1);
     force_sig(TARGET_SIGSEGV/*, current*/);
-    return;
 }
 
 long do_rt_sigreturn(CPUMIPSState *env)
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 6257a04..471d060 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -3628,9 +3628,7 @@
         unlock_user(argptr, arg, target_size);
     }
 out:
-    if (big_buf) {
-        free(big_buf);
-    }
+    g_free(big_buf);
     return ret;
 }
 
diff --git a/memory.c b/memory.c
index d528d1f..4f3ade0 100644
--- a/memory.c
+++ b/memory.c
@@ -24,7 +24,6 @@
 #include "exec-obsolete.h"
 
 unsigned memory_region_transaction_depth = 0;
-static bool memory_region_update_pending = false;
 static bool global_dirty_log = false;
 
 static QTAILQ_HEAD(memory_listeners, MemoryListener) memory_listeners
@@ -311,6 +310,9 @@
     MemoryRegion *mr = opaque;
     uint64_t tmp;
 
+    if (mr->flush_coalesced_mmio) {
+        qemu_flush_coalesced_mmio_buffer();
+    }
     tmp = mr->ops->read(mr->opaque, addr, size);
     *value |= (tmp & mask) << shift;
 }
@@ -325,6 +327,9 @@
     MemoryRegion *mr = opaque;
     uint64_t tmp;
 
+    if (mr->flush_coalesced_mmio) {
+        qemu_flush_coalesced_mmio_buffer();
+    }
     tmp = (*value >> shift) & mask;
     mr->ops->write(mr->opaque, addr, tmp, size);
 }
@@ -726,33 +731,9 @@
     address_space_update_ioeventfds(as);
 }
 
-static void memory_region_update_topology(MemoryRegion *mr)
-{
-    if (memory_region_transaction_depth) {
-        memory_region_update_pending |= !mr || mr->enabled;
-        return;
-    }
-
-    if (mr && !mr->enabled) {
-        return;
-    }
-
-    MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
-
-    if (address_space_memory.root) {
-        address_space_update_topology(&address_space_memory);
-    }
-    if (address_space_io.root) {
-        address_space_update_topology(&address_space_io);
-    }
-
-    MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
-
-    memory_region_update_pending = false;
-}
-
 void memory_region_transaction_begin(void)
 {
+    qemu_flush_coalesced_mmio_buffer();
     ++memory_region_transaction_depth;
 }
 
@@ -760,8 +741,17 @@
 {
     assert(memory_region_transaction_depth);
     --memory_region_transaction_depth;
-    if (!memory_region_transaction_depth && memory_region_update_pending) {
-        memory_region_update_topology(NULL);
+    if (!memory_region_transaction_depth) {
+        MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
+
+        if (address_space_memory.root) {
+            address_space_update_topology(&address_space_memory);
+        }
+        if (address_space_io.root) {
+            address_space_update_topology(&address_space_io);
+        }
+
+        MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
     }
 }
 
@@ -826,6 +816,7 @@
     mr->dirty_log_mask = 0;
     mr->ioeventfd_nb = 0;
     mr->ioeventfds = NULL;
+    mr->flush_coalesced_mmio = false;
 }
 
 static bool memory_region_access_valid(MemoryRegion *mr,
@@ -1069,8 +1060,9 @@
 {
     uint8_t mask = 1 << client;
 
+    memory_region_transaction_begin();
     mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask);
-    memory_region_update_topology(mr);
+    memory_region_transaction_commit();
 }
 
 bool memory_region_get_dirty(MemoryRegion *mr, target_phys_addr_t addr,
@@ -1103,16 +1095,18 @@
 void memory_region_set_readonly(MemoryRegion *mr, bool readonly)
 {
     if (mr->readonly != readonly) {
+        memory_region_transaction_begin();
         mr->readonly = readonly;
-        memory_region_update_topology(mr);
+        memory_region_transaction_commit();
     }
 }
 
 void memory_region_rom_device_set_readable(MemoryRegion *mr, bool readable)
 {
     if (mr->readable != readable) {
+        memory_region_transaction_begin();
         mr->readable = readable;
-        memory_region_update_topology(mr);
+        memory_region_transaction_commit();
     }
 }
 
@@ -1176,12 +1170,16 @@
     cmr->addr = addrrange_make(int128_make64(offset), int128_make64(size));
     QTAILQ_INSERT_TAIL(&mr->coalesced, cmr, link);
     memory_region_update_coalesced_range(mr);
+    memory_region_set_flush_coalesced(mr);
 }
 
 void memory_region_clear_coalescing(MemoryRegion *mr)
 {
     CoalescedMemoryRange *cmr;
 
+    qemu_flush_coalesced_mmio_buffer();
+    mr->flush_coalesced_mmio = false;
+
     while (!QTAILQ_EMPTY(&mr->coalesced)) {
         cmr = QTAILQ_FIRST(&mr->coalesced);
         QTAILQ_REMOVE(&mr->coalesced, cmr, link);
@@ -1190,6 +1188,19 @@
     memory_region_update_coalesced_range(mr);
 }
 
+void memory_region_set_flush_coalesced(MemoryRegion *mr)
+{
+    mr->flush_coalesced_mmio = true;
+}
+
+void memory_region_clear_flush_coalesced(MemoryRegion *mr)
+{
+    qemu_flush_coalesced_mmio_buffer();
+    if (QTAILQ_EMPTY(&mr->coalesced)) {
+        mr->flush_coalesced_mmio = false;
+    }
+}
+
 void memory_region_add_eventfd(MemoryRegion *mr,
                                target_phys_addr_t addr,
                                unsigned size,
@@ -1206,6 +1217,7 @@
     };
     unsigned i;
 
+    memory_region_transaction_begin();
     for (i = 0; i < mr->ioeventfd_nb; ++i) {
         if (memory_region_ioeventfd_before(mrfd, mr->ioeventfds[i])) {
             break;
@@ -1217,7 +1229,7 @@
     memmove(&mr->ioeventfds[i+1], &mr->ioeventfds[i],
             sizeof(*mr->ioeventfds) * (mr->ioeventfd_nb-1 - i));
     mr->ioeventfds[i] = mrfd;
-    memory_region_update_topology(mr);
+    memory_region_transaction_commit();
 }
 
 void memory_region_del_eventfd(MemoryRegion *mr,
@@ -1236,6 +1248,7 @@
     };
     unsigned i;
 
+    memory_region_transaction_begin();
     for (i = 0; i < mr->ioeventfd_nb; ++i) {
         if (memory_region_ioeventfd_equal(mrfd, mr->ioeventfds[i])) {
             break;
@@ -1247,7 +1260,7 @@
     --mr->ioeventfd_nb;
     mr->ioeventfds = g_realloc(mr->ioeventfds,
                                   sizeof(*mr->ioeventfds)*mr->ioeventfd_nb + 1);
-    memory_region_update_topology(mr);
+    memory_region_transaction_commit();
 }
 
 static void memory_region_add_subregion_common(MemoryRegion *mr,
@@ -1256,6 +1269,8 @@
 {
     MemoryRegion *other;
 
+    memory_region_transaction_begin();
+
     assert(!subregion->parent);
     subregion->parent = mr;
     subregion->addr = offset;
@@ -1288,7 +1303,7 @@
     }
     QTAILQ_INSERT_TAIL(&mr->subregions, subregion, subregions_link);
 done:
-    memory_region_update_topology(mr);
+    memory_region_transaction_commit();
 }
 
 
@@ -1314,10 +1329,11 @@
 void memory_region_del_subregion(MemoryRegion *mr,
                                  MemoryRegion *subregion)
 {
+    memory_region_transaction_begin();
     assert(subregion->parent == mr);
     subregion->parent = NULL;
     QTAILQ_REMOVE(&mr->subregions, subregion, subregions_link);
-    memory_region_update_topology(mr);
+    memory_region_transaction_commit();
 }
 
 void memory_region_set_enabled(MemoryRegion *mr, bool enabled)
@@ -1325,8 +1341,9 @@
     if (enabled == mr->enabled) {
         return;
     }
+    memory_region_transaction_begin();
     mr->enabled = enabled;
-    memory_region_update_topology(NULL);
+    memory_region_transaction_commit();
 }
 
 void memory_region_set_address(MemoryRegion *mr, target_phys_addr_t addr)
@@ -1352,16 +1369,15 @@
 
 void memory_region_set_alias_offset(MemoryRegion *mr, target_phys_addr_t offset)
 {
-    target_phys_addr_t old_offset = mr->alias_offset;
-
     assert(mr->alias);
-    mr->alias_offset = offset;
 
-    if (offset == old_offset || !mr->parent) {
+    if (offset == mr->alias_offset) {
         return;
     }
 
-    memory_region_update_topology(mr);
+    memory_region_transaction_begin();
+    mr->alias_offset = offset;
+    memory_region_transaction_commit();
 }
 
 ram_addr_t memory_region_get_ram_addr(MemoryRegion *mr)
@@ -1493,14 +1509,16 @@
 
 void set_system_memory_map(MemoryRegion *mr)
 {
+    memory_region_transaction_begin();
     address_space_memory.root = mr;
-    memory_region_update_topology(NULL);
+    memory_region_transaction_commit();
 }
 
 void set_system_io_map(MemoryRegion *mr)
 {
+    memory_region_transaction_begin();
     address_space_io.root = mr;
-    memory_region_update_topology(NULL);
+    memory_region_transaction_commit();
 }
 
 uint64_t io_mem_read(MemoryRegion *mr, target_phys_addr_t addr, unsigned size)
diff --git a/memory.h b/memory.h
index bd1bbae..37ce151 100644
--- a/memory.h
+++ b/memory.h
@@ -133,6 +133,7 @@
     bool enabled;
     bool rom_device;
     bool warning_printed; /* For reservations */
+    bool flush_coalesced_mmio;
     MemoryRegion *alias;
     target_phys_addr_t alias_offset;
     unsigned priority;
@@ -252,9 +253,9 @@
                             uint64_t size);
 
 /**
- * memory_region_init_ram:  Initialize RAM memory region from a user-provided.
- *                          pointer.  Accesses into the region will modify
- *                          memory directly.
+ * memory_region_init_ram_ptr:  Initialize RAM memory region from a
+ *                              user-provided pointer.  Accesses into the
+ *                              region will modify memory directly.
  *
  * @mr: the #MemoryRegion to be initialized.
  * @name: the name of the region.
@@ -521,6 +522,31 @@
 void memory_region_clear_coalescing(MemoryRegion *mr);
 
 /**
+ * memory_region_set_flush_coalesced: Enforce memory coalescing flush before
+ *                                    accesses.
+ *
+ * Ensure that pending coalesced MMIO request are flushed before the memory
+ * region is accessed. This property is automatically enabled for all regions
+ * passed to memory_region_set_coalescing() and memory_region_add_coalescing().
+ *
+ * @mr: the memory region to be updated.
+ */
+void memory_region_set_flush_coalesced(MemoryRegion *mr);
+
+/**
+ * memory_region_clear_flush_coalesced: Disable memory coalescing flush before
+ *                                      accesses.
+ *
+ * Clear the automatic coalesced MMIO flushing enabled via
+ * memory_region_set_flush_coalesced. Note that this service has no effect on
+ * memory regions that have MMIO coalescing enabled for themselves. For them,
+ * automatic flushing will stop once coalescing is disabled.
+ *
+ * @mr: the memory region to be updated.
+ */
+void memory_region_clear_flush_coalesced(MemoryRegion *mr);
+
+/**
  * memory_region_add_eventfd: Request an eventfd to be triggered when a word
  *                            is written to a location.
  *
@@ -581,7 +607,8 @@
                                  target_phys_addr_t offset,
                                  MemoryRegion *subregion);
 /**
- * memory_region_add_subregion: Add a subregion to a container, with overlap.
+ * memory_region_add_subregion_overlap: Add a subregion to a container
+ *                                      with overlap.
  *
  * Adds a subregion at @offset.  The subregion may overlap with other
  * subregions.  Conflicts are resolved by having a higher @priority hide a
@@ -743,7 +770,7 @@
 void memory_global_dirty_log_start(void);
 
 /**
- * memory_global_dirty_log_stop: begin dirty logging for all regions
+ * memory_global_dirty_log_stop: end dirty logging for all regions
  */
 void memory_global_dirty_log_stop(void);
 
diff --git a/migration-fd.c b/migration-fd.c
index 50138ed..7335167 100644
--- a/migration-fd.c
+++ b/migration-fd.c
@@ -75,7 +75,7 @@
 
 int fd_start_outgoing_migration(MigrationState *s, const char *fdname)
 {
-    s->fd = monitor_get_fd(cur_mon, fdname);
+    s->fd = monitor_get_fd(cur_mon, fdname, NULL);
     if (s->fd == -1) {
         DPRINTF("fd_migration: invalid file descriptor identifier\n");
         goto err_after_get_fd;
diff --git a/migration-tcp.c b/migration-tcp.c
index ac891c3..a15c2b8 100644
--- a/migration-tcp.c
+++ b/migration-tcp.c
@@ -53,54 +53,35 @@
     return r;
 }
 
-static void tcp_wait_for_connect(void *opaque)
+static void tcp_wait_for_connect(int fd, void *opaque)
 {
     MigrationState *s = opaque;
-    int val, ret;
-    socklen_t valsize = sizeof(val);
 
-    DPRINTF("connect completed\n");
-    do {
-        ret = getsockopt(s->fd, SOL_SOCKET, SO_ERROR, (void *) &val, &valsize);
-    } while (ret == -1 && (socket_error()) == EINTR);
-
-    if (ret < 0) {
+    if (fd < 0) {
+        DPRINTF("migrate connect error\n");
+        s->fd = -1;
         migrate_fd_error(s);
-        return;
-    }
-
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-
-    if (val == 0)
+    } else {
+        DPRINTF("migrate connect success\n");
+        s->fd = fd;
         migrate_fd_connect(s);
-    else {
-        DPRINTF("error connecting %d\n", val);
-        migrate_fd_error(s);
     }
 }
 
 int tcp_start_outgoing_migration(MigrationState *s, const char *host_port,
                                  Error **errp)
 {
-    bool in_progress;
-
     s->get_error = socket_errno;
     s->write = socket_write;
     s->close = tcp_close;
 
-    s->fd = inet_connect(host_port, false, &in_progress, errp);
+    s->fd = inet_nonblocking_connect(host_port, tcp_wait_for_connect, s,
+                                     errp);
     if (error_is_set(errp)) {
         migrate_fd_error(s);
         return -1;
     }
 
-    if (in_progress) {
-        DPRINTF("connect in progress\n");
-        qemu_set_fd_handler2(s->fd, NULL, NULL, tcp_wait_for_connect, s);
-    } else {
-        migrate_fd_connect(s);
-    }
-
     return 0;
 }
 
diff --git a/migration.c b/migration.c
index 1edeec5..22a05c4 100644
--- a/migration.c
+++ b/migration.c
@@ -240,7 +240,9 @@
 {
     int ret = 0;
 
-    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
+    if (s->fd != -1) {
+        qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
+    }
 
     if (s->file) {
         DPRINTF("closing file\n");
diff --git a/monitor.c b/monitor.c
index 4ca30e1..6e828f9 100644
--- a/monitor.c
+++ b/monitor.c
@@ -450,6 +450,7 @@
     [QEVENT_SPICE_DISCONNECTED] = "SPICE_DISCONNECTED",
     [QEVENT_BLOCK_JOB_COMPLETED] = "BLOCK_JOB_COMPLETED",
     [QEVENT_BLOCK_JOB_CANCELLED] = "BLOCK_JOB_CANCELLED",
+    [QEVENT_BLOCK_JOB_ERROR] = "BLOCK_JOB_ERROR",
     [QEVENT_DEVICE_TRAY_MOVED] = "DEVICE_TRAY_MOVED",
     [QEVENT_SUSPEND] = "SUSPEND",
     [QEVENT_SUSPEND_DISK] = "SUSPEND_DISK",
@@ -897,13 +898,7 @@
 {
     CPUArchState *env;
     env = mon_get_cpu();
-#ifdef TARGET_I386
-    cpu_dump_state(env, (FILE *)mon, monitor_fprintf,
-                   X86_DUMP_FPU);
-#else
-    cpu_dump_state(env, (FILE *)mon, monitor_fprintf,
-                   0);
-#endif
+    cpu_dump_state(env, (FILE *)mon, monitor_fprintf, CPU_DUMP_FPU);
 }
 
 static void do_cpu_set_nr(Monitor *mon, const QDict *qdict)
@@ -965,45 +960,6 @@
     trace_print_events((FILE *)mon, &monitor_fprintf);
 }
 
-static int add_graphics_client(Monitor *mon, const QDict *qdict, QObject **ret_data)
-{
-    const char *protocol  = qdict_get_str(qdict, "protocol");
-    const char *fdname = qdict_get_str(qdict, "fdname");
-    CharDriverState *s;
-
-    if (strcmp(protocol, "spice") == 0) {
-        int fd = monitor_get_fd(mon, fdname);
-        int skipauth = qdict_get_try_bool(qdict, "skipauth", 0);
-        int tls = qdict_get_try_bool(qdict, "tls", 0);
-        if (!using_spice) {
-            /* correct one? spice isn't a device ,,, */
-            qerror_report(QERR_DEVICE_NOT_ACTIVE, "spice");
-            return -1;
-        }
-        if (qemu_spice_display_add_client(fd, skipauth, tls) < 0) {
-            close(fd);
-        }
-        return 0;
-#ifdef CONFIG_VNC
-    } else if (strcmp(protocol, "vnc") == 0) {
-	int fd = monitor_get_fd(mon, fdname);
-        int skipauth = qdict_get_try_bool(qdict, "skipauth", 0);
-	vnc_display_add_client(NULL, fd, skipauth);
-	return 0;
-#endif
-    } else if ((s = qemu_chr_find(protocol)) != NULL) {
-	int fd = monitor_get_fd(mon, fdname);
-	if (qemu_chr_add_client(s, fd) < 0) {
-	    qerror_report(QERR_ADD_CLIENT_FAILED);
-	    return -1;
-	}
-	return 0;
-    }
-
-    qerror_report(QERR_INVALID_PARAMETER, "protocol");
-    return -1;
-}
-
 static int client_migrate_info(Monitor *mon, const QDict *qdict,
                                MonitorCompletion cb, void *opaque)
 {
@@ -2139,7 +2095,7 @@
     }
 }
 
-int monitor_get_fd(Monitor *mon, const char *fdname)
+int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp)
 {
     mon_fd_t *monfd;
 
@@ -2160,6 +2116,7 @@
         return fd;
     }
 
+    error_setg(errp, "File descriptor named '%s' has not been found", fdname);
     return -1;
 }
 
@@ -2431,12 +2388,14 @@
 int monitor_handle_fd_param(Monitor *mon, const char *fdname)
 {
     int fd;
+    Error *local_err = NULL;
 
     if (!qemu_isdigit(fdname[0]) && mon) {
 
-        fd = monitor_get_fd(mon, fdname);
+        fd = monitor_get_fd(mon, fdname, &local_err);
         if (fd == -1) {
-            error_report("No file descriptor named %s found", fdname);
+            qerror_report_err(local_err);
+            error_free(local_err);
             return -1;
         }
     } else {
@@ -3280,11 +3239,7 @@
         break;
     default:
         errno = 0;
-#if TARGET_PHYS_ADDR_BITS > 32
         n = strtoull(pch, &p, 0);
-#else
-        n = strtoul(pch, &p, 0);
-#endif
         if (errno == ERANGE) {
             expr_error(mon, "number too large");
         }
diff --git a/monitor.h b/monitor.h
index 64c1561..b6e7d95 100644
--- a/monitor.h
+++ b/monitor.h
@@ -38,6 +38,7 @@
     QEVENT_SPICE_DISCONNECTED,
     QEVENT_BLOCK_JOB_COMPLETED,
     QEVENT_BLOCK_JOB_CANCELLED,
+    QEVENT_BLOCK_JOB_ERROR,
     QEVENT_DEVICE_TRAY_MOVED,
     QEVENT_SUSPEND,
     QEVENT_SUSPEND_DISK,
@@ -66,7 +67,7 @@
                                   BlockDriverCompletionFunc *completion_cb,
                                   void *opaque);
 
-int monitor_get_fd(Monitor *mon, const char *fdname);
+int monitor_get_fd(Monitor *mon, const char *fdname, Error **errp);
 int monitor_handle_fd_param(Monitor *mon, const char *fdname);
 
 void monitor_vprintf(Monitor *mon, const char *fmt, va_list ap)
diff --git a/nbd.c b/nbd.c
index 0dd60c5..6f0db62 100644
--- a/nbd.c
+++ b/nbd.c
@@ -57,9 +57,12 @@
 
 /* This is all part of the "official" NBD API */
 
+#define NBD_REQUEST_SIZE        (4 + 4 + 8 + 8 + 4)
 #define NBD_REPLY_SIZE          (4 + 4 + 8)
 #define NBD_REQUEST_MAGIC       0x25609513
 #define NBD_REPLY_MAGIC         0x67446698
+#define NBD_OPTS_MAGIC          0x49484156454F5054LL
+#define NBD_CLIENT_MAGIC        0x0000420281861253LL
 
 #define NBD_SET_SOCK            _IO(0xab, 0)
 #define NBD_SET_BLKSIZE         _IO(0xab, 1)
@@ -75,6 +78,49 @@
 
 #define NBD_OPT_EXPORT_NAME     (1 << 0)
 
+/* Definitions for opaque data types */
+
+typedef struct NBDRequest NBDRequest;
+
+struct NBDRequest {
+    QSIMPLEQ_ENTRY(NBDRequest) entry;
+    NBDClient *client;
+    uint8_t *data;
+};
+
+struct NBDExport {
+    int refcount;
+    void (*close)(NBDExport *exp);
+
+    BlockDriverState *bs;
+    char *name;
+    off_t dev_offset;
+    off_t size;
+    uint32_t nbdflags;
+    QTAILQ_HEAD(, NBDClient) clients;
+    QSIMPLEQ_HEAD(, NBDRequest) requests;
+    QTAILQ_ENTRY(NBDExport) next;
+};
+
+static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);
+
+struct NBDClient {
+    int refcount;
+    void (*close)(NBDClient *client);
+
+    NBDExport *exp;
+    int sock;
+
+    Coroutine *recv_coroutine;
+
+    CoMutex send_lock;
+    Coroutine *send_coroutine;
+
+    QTAILQ_ENTRY(NBDClient) next;
+    int nb_requests;
+    bool closing;
+};
+
 /* That's all folks */
 
 ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read)
@@ -162,7 +208,7 @@
 
 int tcp_socket_outgoing_spec(const char *address_and_port)
 {
-    return inet_connect(address_and_port, true, NULL, NULL);
+    return inet_connect(address_and_port, NULL);
 }
 
 int tcp_socket_incoming(const char *address, uint16_t port)
@@ -192,11 +238,23 @@
     return unix_connect(path);
 }
 
-/* Basic flow
+/* Basic flow for negotiation
 
    Server         Client
-
    Negotiate
+
+   or
+
+   Server         Client
+   Negotiate #1
+                  Option
+   Negotiate #2
+
+   ----
+
+   followed by
+
+   Server         Client
                   Request
    Response
                   Request
@@ -204,19 +262,112 @@
                   ...
    ...
                   Request (type == 2)
+
 */
 
-static int nbd_send_negotiate(int csock, off_t size, uint32_t flags)
+static int nbd_receive_options(NBDClient *client)
 {
-    char buf[8 + 8 + 8 + 128];
+    int csock = client->sock;
+    char name[256];
+    uint32_t tmp, length;
+    uint64_t magic;
     int rc;
 
-    /* Negotiate
-        [ 0 ..   7]   passwd   ("NBDMAGIC")
-        [ 8 ..  15]   magic    (0x00420281861253)
+    /* Client sends:
+        [ 0 ..   3]   reserved (0)
+        [ 4 ..  11]   NBD_OPTS_MAGIC
+        [12 ..  15]   NBD_OPT_EXPORT_NAME
+        [16 ..  19]   length
+        [20 ..  xx]   export name (length bytes)
+     */
+
+    rc = -EINVAL;
+    if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
+        LOG("read failed");
+        goto fail;
+    }
+    TRACE("Checking reserved");
+    if (tmp != 0) {
+        LOG("Bad reserved received");
+        goto fail;
+    }
+
+    if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
+        LOG("read failed");
+        goto fail;
+    }
+    TRACE("Checking reserved");
+    if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
+        LOG("Bad magic received");
+        goto fail;
+    }
+
+    if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
+        LOG("read failed");
+        goto fail;
+    }
+    TRACE("Checking option");
+    if (tmp != be32_to_cpu(NBD_OPT_EXPORT_NAME)) {
+        LOG("Bad option received");
+        goto fail;
+    }
+
+    if (read_sync(csock, &length, sizeof(length)) != sizeof(length)) {
+        LOG("read failed");
+        goto fail;
+    }
+    TRACE("Checking length");
+    length = be32_to_cpu(length);
+    if (length > 255) {
+        LOG("Bad length received");
+        goto fail;
+    }
+    if (read_sync(csock, name, length) != length) {
+        LOG("read failed");
+        goto fail;
+    }
+    name[length] = '\0';
+
+    client->exp = nbd_export_find(name);
+    if (!client->exp) {
+        LOG("export not found");
+        goto fail;
+    }
+
+    QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
+    nbd_export_get(client->exp);
+
+    TRACE("Option negotiation succeeded.");
+    rc = 0;
+fail:
+    return rc;
+}
+
+static int nbd_send_negotiate(NBDClient *client)
+{
+    int csock = client->sock;
+    char buf[8 + 8 + 8 + 128];
+    int rc;
+    const int myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
+                         NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
+
+    /* Negotiation header without options:
+        [ 0 ..   7]   passwd       ("NBDMAGIC")
+        [ 8 ..  15]   magic        (NBD_CLIENT_MAGIC)
         [16 ..  23]   size
-        [24 ..  27]   flags
-        [28 .. 151]   reserved (0)
+        [24 ..  25]   server flags (0)
+        [24 ..  27]   export flags
+        [28 .. 151]   reserved     (0)
+
+       Negotiation header with options, part 1:
+        [ 0 ..   7]   passwd       ("NBDMAGIC")
+        [ 8 ..  15]   magic        (NBD_OPTS_MAGIC)
+        [16 ..  17]   server flags (0)
+
+       part 2 (after options are sent):
+        [18 ..  25]   size
+        [26 ..  27]   export flags
+        [28 .. 151]   reserved     (0)
      */
 
     socket_set_block(csock);
@@ -224,16 +375,39 @@
 
     TRACE("Beginning negotiation.");
     memcpy(buf, "NBDMAGIC", 8);
-    cpu_to_be64w((uint64_t*)(buf + 8), 0x00420281861253LL);
-    cpu_to_be64w((uint64_t*)(buf + 16), size);
-    cpu_to_be32w((uint32_t*)(buf + 24),
-                 flags | NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
-                 NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
+    if (client->exp) {
+        assert ((client->exp->nbdflags & ~65535) == 0);
+        cpu_to_be64w((uint64_t*)(buf + 8), NBD_CLIENT_MAGIC);
+        cpu_to_be64w((uint64_t*)(buf + 16), client->exp->size);
+        cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags);
+    } else {
+        cpu_to_be64w((uint64_t*)(buf + 8), NBD_OPTS_MAGIC);
+    }
     memset(buf + 28, 0, 124);
 
-    if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
-        LOG("write failed");
-        goto fail;
+    if (client->exp) {
+        if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
+            LOG("write failed");
+            goto fail;
+        }
+    } else {
+        if (write_sync(csock, buf, 18) != 18) {
+            LOG("write failed");
+            goto fail;
+        }
+        rc = nbd_receive_options(client);
+        if (rc < 0) {
+            LOG("option negotiation failed");
+            goto fail;
+        }
+
+        assert ((client->exp->nbdflags & ~65535) == 0);
+        cpu_to_be64w((uint64_t*)(buf + 18), client->exp->size);
+        cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags);
+        if (write_sync(csock, buf + 18, sizeof(buf) - 18) != sizeof(buf) - 18) {
+            LOG("write failed");
+            goto fail;
+        }
     }
 
     TRACE("Negotiation succeeded.");
@@ -295,7 +469,7 @@
         uint32_t namesize;
 
         TRACE("Checking magic (opts_magic)");
-        if (magic != 0x49484156454F5054LL) {
+        if (magic != NBD_OPTS_MAGIC) {
             LOG("Bad magic received");
             goto fail;
         }
@@ -334,7 +508,7 @@
     } else {
         TRACE("Checking magic (cli_magic)");
 
-        if (magic != 0x00420281861253LL) {
+        if (magic != NBD_CLIENT_MAGIC) {
             LOG("Bad magic received");
             goto fail;
         }
@@ -477,7 +651,7 @@
 
 ssize_t nbd_send_request(int csock, struct nbd_request *request)
 {
-    uint8_t buf[4 + 4 + 8 + 8 + 4];
+    uint8_t buf[NBD_REQUEST_SIZE];
     ssize_t ret;
 
     cpu_to_be32w((uint32_t*)buf, NBD_REQUEST_MAGIC);
@@ -504,7 +678,7 @@
 
 static ssize_t nbd_receive_request(int csock, struct nbd_request *request)
 {
-    uint8_t buf[4 + 4 + 8 + 8 + 4];
+    uint8_t buf[NBD_REQUEST_SIZE];
     uint32_t magic;
     ssize_t ret;
 
@@ -582,7 +756,7 @@
 
 static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply)
 {
-    uint8_t buf[4 + 4 + 8];
+    uint8_t buf[NBD_REPLY_SIZE];
     ssize_t ret;
 
     /* Reply
@@ -610,58 +784,47 @@
 
 #define MAX_NBD_REQUESTS 16
 
-typedef struct NBDRequest NBDRequest;
-
-struct NBDRequest {
-    QSIMPLEQ_ENTRY(NBDRequest) entry;
-    NBDClient *client;
-    uint8_t *data;
-};
-
-struct NBDExport {
-    BlockDriverState *bs;
-    off_t dev_offset;
-    off_t size;
-    uint32_t nbdflags;
-    QSIMPLEQ_HEAD(, NBDRequest) requests;
-};
-
-struct NBDClient {
-    int refcount;
-    void (*close)(NBDClient *client);
-
-    NBDExport *exp;
-    int sock;
-
-    Coroutine *recv_coroutine;
-
-    CoMutex send_lock;
-    Coroutine *send_coroutine;
-
-    int nb_requests;
-};
-
-static void nbd_client_get(NBDClient *client)
+void nbd_client_get(NBDClient *client)
 {
     client->refcount++;
 }
 
-static void nbd_client_put(NBDClient *client)
+void nbd_client_put(NBDClient *client)
 {
     if (--client->refcount == 0) {
+        /* The last reference should be dropped by client->close,
+         * which is called by nbd_client_close.
+         */
+        assert(client->closing);
+
+        qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL);
+        close(client->sock);
+        client->sock = -1;
+        if (client->exp) {
+            QTAILQ_REMOVE(&client->exp->clients, client, next);
+            nbd_export_put(client->exp);
+        }
         g_free(client);
     }
 }
 
-static void nbd_client_close(NBDClient *client)
+void nbd_client_close(NBDClient *client)
 {
-    qemu_set_fd_handler2(client->sock, NULL, NULL, NULL, NULL);
-    close(client->sock);
-    client->sock = -1;
+    if (client->closing) {
+        return;
+    }
+
+    client->closing = true;
+
+    /* Force requests to finish.  They will drop their own references,
+     * then we'll close the socket and free the NBDClient.
+     */
+    shutdown(client->sock, 2);
+
+    /* Also tell the client, so that they release their reference.  */
     if (client->close) {
         client->close(client);
     }
-    nbd_client_put(client);
 }
 
 static NBDRequest *nbd_request_get(NBDClient *client)
@@ -695,28 +858,109 @@
 }
 
 NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
-                          off_t size, uint32_t nbdflags)
+                          off_t size, uint32_t nbdflags,
+                          void (*close)(NBDExport *))
 {
     NBDExport *exp = g_malloc0(sizeof(NBDExport));
     QSIMPLEQ_INIT(&exp->requests);
+    exp->refcount = 1;
+    QTAILQ_INIT(&exp->clients);
     exp->bs = bs;
     exp->dev_offset = dev_offset;
     exp->nbdflags = nbdflags;
     exp->size = size == -1 ? bdrv_getlength(bs) : size;
+    exp->close = close;
     return exp;
 }
 
+NBDExport *nbd_export_find(const char *name)
+{
+    NBDExport *exp;
+    QTAILQ_FOREACH(exp, &exports, next) {
+        if (strcmp(name, exp->name) == 0) {
+            return exp;
+        }
+    }
+
+    return NULL;
+}
+
+void nbd_export_set_name(NBDExport *exp, const char *name)
+{
+    if (exp->name == name) {
+        return;
+    }
+
+    nbd_export_get(exp);
+    if (exp->name != NULL) {
+        g_free(exp->name);
+        exp->name = NULL;
+        QTAILQ_REMOVE(&exports, exp, next);
+        nbd_export_put(exp);
+    }
+    if (name != NULL) {
+        nbd_export_get(exp);
+        exp->name = g_strdup(name);
+        QTAILQ_INSERT_TAIL(&exports, exp, next);
+    }
+    nbd_export_put(exp);
+}
+
 void nbd_export_close(NBDExport *exp)
 {
-    while (!QSIMPLEQ_EMPTY(&exp->requests)) {
-        NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
-        QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
-        qemu_vfree(first->data);
-        g_free(first);
+    NBDClient *client, *next;
+
+    nbd_export_get(exp);
+    QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
+        nbd_client_close(client);
+    }
+    nbd_export_set_name(exp, NULL);
+    nbd_export_put(exp);
+}
+
+void nbd_export_get(NBDExport *exp)
+{
+    assert(exp->refcount > 0);
+    exp->refcount++;
+}
+
+void nbd_export_put(NBDExport *exp)
+{
+    assert(exp->refcount > 0);
+    if (exp->refcount == 1) {
+        nbd_export_close(exp);
     }
 
-    bdrv_close(exp->bs);
-    g_free(exp);
+    if (--exp->refcount == 0) {
+        assert(exp->name == NULL);
+
+        if (exp->close) {
+            exp->close(exp);
+        }
+
+        while (!QSIMPLEQ_EMPTY(&exp->requests)) {
+            NBDRequest *first = QSIMPLEQ_FIRST(&exp->requests);
+            QSIMPLEQ_REMOVE_HEAD(&exp->requests, entry);
+            qemu_vfree(first->data);
+            g_free(first);
+        }
+
+        g_free(exp);
+    }
+}
+
+BlockDriverState *nbd_export_get_blockdev(NBDExport *exp)
+{
+    return exp->bs;
+}
+
+void nbd_export_close_all(void)
+{
+    NBDExport *exp, *next;
+
+    QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
+        nbd_export_close(exp);
+    }
 }
 
 static int nbd_can_read(void *opaque);
@@ -805,14 +1049,18 @@
 static void nbd_trip(void *opaque)
 {
     NBDClient *client = opaque;
-    NBDRequest *req = nbd_request_get(client);
     NBDExport *exp = client->exp;
+    NBDRequest *req;
     struct nbd_request request;
     struct nbd_reply reply;
     ssize_t ret;
 
     TRACE("Reading request.");
+    if (client->closing) {
+        return;
+    }
 
+    req = nbd_request_get(client);
     ret = nbd_co_receive_request(req, &request);
     if (ret == -EAGAIN) {
         goto done;
@@ -974,15 +1222,21 @@
                           void (*close)(NBDClient *))
 {
     NBDClient *client;
-    if (nbd_send_negotiate(csock, exp->size, exp->nbdflags) < 0) {
-        return NULL;
-    }
     client = g_malloc0(sizeof(NBDClient));
     client->refcount = 1;
     client->exp = exp;
     client->sock = csock;
+    if (nbd_send_negotiate(client) < 0) {
+        g_free(client);
+        return NULL;
+    }
     client->close = close;
     qemu_co_mutex_init(&client->send_lock);
     qemu_set_fd_handler2(csock, nbd_can_read, nbd_read, NULL, client);
+
+    if (exp) {
+        QTAILQ_INSERT_TAIL(&exp->clients, client, next);
+        nbd_export_get(exp);
+    }
     return client;
 }
diff --git a/nbd.h b/nbd.h
index 40d58d3..344f05b 100644
--- a/nbd.h
+++ b/nbd.h
@@ -79,9 +79,22 @@
 typedef struct NBDClient NBDClient;
 
 NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset,
-                          off_t size, uint32_t nbdflags);
+                          off_t size, uint32_t nbdflags,
+                          void (*close)(NBDExport *));
 void nbd_export_close(NBDExport *exp);
+void nbd_export_get(NBDExport *exp);
+void nbd_export_put(NBDExport *exp);
+
+BlockDriverState *nbd_export_get_blockdev(NBDExport *exp);
+
+NBDExport *nbd_export_find(const char *name);
+void nbd_export_set_name(NBDExport *exp, const char *name);
+void nbd_export_close_all(void);
+
 NBDClient *nbd_client_new(NBDExport *exp, int csock,
                           void (*close)(NBDClient *));
+void nbd_client_close(NBDClient *client);
+void nbd_client_get(NBDClient *client);
+void nbd_client_put(NBDClient *client);
 
 #endif
diff --git a/net.c b/net.c
index e5d25d4..a187a7b 100644
--- a/net.c
+++ b/net.c
@@ -357,7 +357,12 @@
 {
     nc->receive_disabled = 0;
 
-    qemu_net_queue_flush(nc->send_queue);
+    if (qemu_net_queue_flush(nc->send_queue)) {
+        /* We emptied the queue successfully, signal to the IO thread to repoll
+         * the file descriptor (for tap, for example).
+         */
+        qemu_notify_event();
+    }
 }
 
 static ssize_t qemu_send_packet_async_with_flags(NetClientState *sender,
@@ -418,16 +423,27 @@
                                 void *opaque)
 {
     NetClientState *nc = opaque;
+    int ret;
 
     if (nc->link_down) {
         return iov_size(iov, iovcnt);
     }
 
-    if (nc->info->receive_iov) {
-        return nc->info->receive_iov(nc, iov, iovcnt);
-    } else {
-        return nc_sendv_compat(nc, iov, iovcnt);
+    if (nc->receive_disabled) {
+        return 0;
     }
+
+    if (nc->info->receive_iov) {
+        ret = nc->info->receive_iov(nc, iov, iovcnt);
+    } else {
+        ret = nc_sendv_compat(nc, iov, iovcnt);
+    }
+
+    if (ret == 0) {
+        nc->receive_disabled = 1;
+    }
+
+    return ret;
 }
 
 ssize_t qemu_sendv_packet_async(NetClientState *sender,
diff --git a/net/hub.c b/net/hub.c
index ac157e3..650a8b4 100644
--- a/net/hub.c
+++ b/net/hub.c
@@ -97,12 +97,12 @@
             continue;
         }
 
-        if (!qemu_can_send_packet(&port->nc)) {
-            return 0;
+        if (qemu_can_send_packet(&port->nc)) {
+            return 1;
         }
     }
 
-    return 1;
+    return 0;
 }
 
 static ssize_t net_hub_port_receive(NetClientState *nc,
diff --git a/net/queue.c b/net/queue.c
index e8030aa..254f280 100644
--- a/net/queue.c
+++ b/net/queue.c
@@ -83,12 +83,12 @@
     g_free(queue);
 }
 
-static ssize_t qemu_net_queue_append(NetQueue *queue,
-                                     NetClientState *sender,
-                                     unsigned flags,
-                                     const uint8_t *buf,
-                                     size_t size,
-                                     NetPacketSent *sent_cb)
+static void qemu_net_queue_append(NetQueue *queue,
+                                  NetClientState *sender,
+                                  unsigned flags,
+                                  const uint8_t *buf,
+                                  size_t size,
+                                  NetPacketSent *sent_cb)
 {
     NetPacket *packet;
 
@@ -100,16 +100,14 @@
     memcpy(packet->data, buf, size);
 
     QTAILQ_INSERT_TAIL(&queue->packets, packet, entry);
-
-    return size;
 }
 
-static ssize_t qemu_net_queue_append_iov(NetQueue *queue,
-                                         NetClientState *sender,
-                                         unsigned flags,
-                                         const struct iovec *iov,
-                                         int iovcnt,
-                                         NetPacketSent *sent_cb)
+static void qemu_net_queue_append_iov(NetQueue *queue,
+                                      NetClientState *sender,
+                                      unsigned flags,
+                                      const struct iovec *iov,
+                                      int iovcnt,
+                                      NetPacketSent *sent_cb)
 {
     NetPacket *packet;
     size_t max_len = 0;
@@ -133,8 +131,6 @@
     }
 
     QTAILQ_INSERT_TAIL(&queue->packets, packet, entry);
-
-    return packet->size;
 }
 
 static ssize_t qemu_net_queue_deliver(NetQueue *queue,
@@ -177,7 +173,8 @@
     ssize_t ret;
 
     if (queue->delivering || !qemu_can_send_packet(sender)) {
-        return qemu_net_queue_append(queue, sender, flags, data, size, sent_cb);
+        qemu_net_queue_append(queue, sender, flags, data, size, sent_cb);
+        return 0;
     }
 
     ret = qemu_net_queue_deliver(queue, sender, flags, data, size);
@@ -201,8 +198,8 @@
     ssize_t ret;
 
     if (queue->delivering || !qemu_can_send_packet(sender)) {
-        return qemu_net_queue_append_iov(queue, sender, flags,
-                                         iov, iovcnt, sent_cb);
+        qemu_net_queue_append_iov(queue, sender, flags, iov, iovcnt, sent_cb);
+        return 0;
     }
 
     ret = qemu_net_queue_deliver_iov(queue, sender, flags, iov, iovcnt);
@@ -228,7 +225,7 @@
     }
 }
 
-void qemu_net_queue_flush(NetQueue *queue)
+bool qemu_net_queue_flush(NetQueue *queue)
 {
     while (!QTAILQ_EMPTY(&queue->packets)) {
         NetPacket *packet;
@@ -244,7 +241,7 @@
                                      packet->size);
         if (ret == 0) {
             QTAILQ_INSERT_HEAD(&queue->packets, packet, entry);
-            break;
+            return false;
         }
 
         if (packet->sent_cb) {
@@ -253,4 +250,5 @@
 
         g_free(packet);
     }
+    return true;
 }
diff --git a/net/queue.h b/net/queue.h
index 9d44a9b..fc02b33 100644
--- a/net/queue.h
+++ b/net/queue.h
@@ -53,6 +53,6 @@
                                 NetPacketSent *sent_cb);
 
 void qemu_net_queue_purge(NetQueue *queue, NetClientState *from);
-void qemu_net_queue_flush(NetQueue *queue);
+bool qemu_net_queue_flush(NetQueue *queue);
 
 #endif /* QEMU_NET_QUEUE_H */
diff --git a/net/socket.c b/net/socket.c
index 7c602e4..f3d7878 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -32,6 +32,7 @@
 #include "qemu-error.h"
 #include "qemu-option.h"
 #include "qemu_socket.h"
+#include "iov.h"
 
 typedef struct NetSocketState {
     NetClientState nc;
@@ -40,29 +41,106 @@
     int state; /* 0 = getting length, 1 = getting data */
     unsigned int index;
     unsigned int packet_len;
+    unsigned int send_index;      /* number of bytes sent (only SOCK_STREAM) */
     uint8_t buf[4096];
     struct sockaddr_in dgram_dst; /* contains inet host and port destination iff connectionless (SOCK_DGRAM) */
+    IOHandler *send_fn;           /* differs between SOCK_STREAM/SOCK_DGRAM */
+    bool read_poll;               /* waiting to receive data? */
+    bool write_poll;              /* waiting to transmit data? */
 } NetSocketState;
 
 static void net_socket_accept(void *opaque);
+static void net_socket_writable(void *opaque);
 
-/* XXX: we consider we can send the whole packet without blocking */
+/* Only read packets from socket when peer can receive them */
+static int net_socket_can_send(void *opaque)
+{
+    NetSocketState *s = opaque;
+
+    return qemu_can_send_packet(&s->nc);
+}
+
+static void net_socket_update_fd_handler(NetSocketState *s)
+{
+    qemu_set_fd_handler2(s->fd,
+                         s->read_poll  ? net_socket_can_send : NULL,
+                         s->read_poll  ? s->send_fn : NULL,
+                         s->write_poll ? net_socket_writable : NULL,
+                         s);
+}
+
+static void net_socket_read_poll(NetSocketState *s, bool enable)
+{
+    s->read_poll = enable;
+    net_socket_update_fd_handler(s);
+}
+
+static void net_socket_write_poll(NetSocketState *s, bool enable)
+{
+    s->write_poll = enable;
+    net_socket_update_fd_handler(s);
+}
+
+static void net_socket_writable(void *opaque)
+{
+    NetSocketState *s = opaque;
+
+    net_socket_write_poll(s, false);
+
+    qemu_flush_queued_packets(&s->nc);
+}
+
 static ssize_t net_socket_receive(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc);
-    uint32_t len;
-    len = htonl(size);
+    uint32_t len = htonl(size);
+    struct iovec iov[] = {
+        {
+            .iov_base = &len,
+            .iov_len  = sizeof(len),
+        }, {
+            .iov_base = (void *)buf,
+            .iov_len  = size,
+        },
+    };
+    size_t remaining;
+    ssize_t ret;
 
-    send_all(s->fd, (const uint8_t *)&len, sizeof(len));
-    return send_all(s->fd, buf, size);
+    remaining = iov_size(iov, 2) - s->send_index;
+    ret = iov_send(s->fd, iov, 2, s->send_index, remaining);
+
+    if (ret == -1 && errno == EAGAIN) {
+        ret = 0; /* handled further down */
+    }
+    if (ret == -1) {
+        s->send_index = 0;
+        return -errno;
+    }
+    if (ret < (ssize_t)remaining) {
+        s->send_index += ret;
+        net_socket_write_poll(s, true);
+        return 0;
+    }
+    s->send_index = 0;
+    return size;
 }
 
 static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size)
 {
     NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc);
+    ssize_t ret;
 
-    return sendto(s->fd, (const void *)buf, size, 0,
-                  (struct sockaddr *)&s->dgram_dst, sizeof(s->dgram_dst));
+    do {
+        ret = qemu_sendto(s->fd, buf, size, 0,
+                          (struct sockaddr *)&s->dgram_dst,
+                          sizeof(s->dgram_dst));
+    } while (ret == -1 && errno == EINTR);
+
+    if (ret == -1 && errno == EAGAIN) {
+        net_socket_write_poll(s, true);
+        return 0;
+    }
+    return ret;
 }
 
 static void net_socket_send(void *opaque)
@@ -81,7 +159,8 @@
     } else if (size == 0) {
         /* end of connection */
     eoc:
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+        net_socket_read_poll(s, false);
+        net_socket_write_poll(s, false);
         if (s->listen_fd != -1) {
             qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s);
         }
@@ -152,7 +231,8 @@
         return;
     if (size == 0) {
         /* end of connection */
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+        net_socket_read_poll(s, false);
+        net_socket_write_poll(s, false);
         return;
     }
     qemu_send_packet(&s->nc, s->buf, size);
@@ -243,7 +323,8 @@
 {
     NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc);
     if (s->fd != -1) {
-        qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+        net_socket_read_poll(s, false);
+        net_socket_write_poll(s, false);
         close(s->fd);
         s->fd = -1;
     }
@@ -314,8 +395,8 @@
 
     s->fd = fd;
     s->listen_fd = -1;
-
-    qemu_set_fd_handler(s->fd, net_socket_send_dgram, NULL, s);
+    s->send_fn = net_socket_send_dgram;
+    net_socket_read_poll(s, true);
 
     /* mcast: save bound address as dst */
     if (is_connected) {
@@ -332,7 +413,8 @@
 static void net_socket_connect(void *opaque)
 {
     NetSocketState *s = opaque;
-    qemu_set_fd_handler(s->fd, net_socket_send, NULL, s);
+    s->send_fn = net_socket_send;
+    net_socket_read_poll(s, true);
 }
 
 static NetClientInfo net_socket_info = {
diff --git a/os-posix.c b/os-posix.c
index 79fa228..488e480 100644
--- a/os-posix.c
+++ b/os-posix.c
@@ -148,8 +148,7 @@
     char name[16];
     if (!s)
         return;
-    name[sizeof(name) - 1] = 0;
-    strncpy(name, s, sizeof(name));
+    pstrcpy(name, sizeof(name), s);
     /* Could rewrite argv[0] too, but that's a bit more complicated.
        This simple way is enough for `top'. */
     if (prctl(PR_SET_NAME, name)) {
@@ -194,7 +193,6 @@
         break;
 #endif
     }
-    return;
 }
 
 static void change_process_uid(void)
@@ -360,3 +358,8 @@
     /* keep pidfile open & locked forever */
     return 0;
 }
+
+bool is_daemonized(void)
+{
+    return daemonize;
+}
diff --git a/oslib-win32.c b/oslib-win32.c
index ffbc6d0..51b33e8 100644
--- a/oslib-win32.c
+++ b/oslib-win32.c
@@ -74,6 +74,30 @@
     VirtualFree(ptr, 0, MEM_RELEASE);
 }
 
+/* FIXME: add proper locking */
+struct tm *gmtime_r(const time_t *timep, struct tm *result)
+{
+    struct tm *p = gmtime(timep);
+    memset(result, 0, sizeof(*result));
+    if (p) {
+        *result = *p;
+        p = result;
+    }
+    return p;
+}
+
+/* FIXME: add proper locking */
+struct tm *localtime_r(const time_t *timep, struct tm *result)
+{
+    struct tm *p = localtime(timep);
+    memset(result, 0, sizeof(*result));
+    if (p) {
+        *result = *p;
+        p = result;
+    }
+    return p;
+}
+
 void socket_set_block(int fd)
 {
     unsigned long opt = 0;
diff --git a/qapi-schema-guest.json b/qapi-schema-guest.json
index d955cf1..ed0eb69 100644
--- a/qapi-schema-guest.json
+++ b/qapi-schema-guest.json
@@ -293,7 +293,7 @@
 ##
 # @GuestFsFreezeStatus
 #
-# An enumation of filesystem freeze states
+# An enumeration of filesystem freeze states
 #
 # @thawed: filesystems thawed/unfrozen
 #
diff --git a/qapi-schema.json b/qapi-schema.json
index a9f465a..f9dbdae 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -33,6 +33,31 @@
             'MigrationExpected' ] }
 
 ##
+# @add_client
+#
+# Allow client connections for VNC, Spice and socket based
+# character devices to be passed in to QEMU via SCM_RIGHTS.
+#
+# @protocol: protocol name. Valid names are "vnc", "spice" or the
+#            name of a character device (eg. from -chardev id=XXXX)
+#
+# @fdname: file descriptor name previously passed via 'getfd' command
+#
+# @skipauth: #optional whether to skip authentication. Only applies
+#            to "vnc" and "spice" protocols
+#
+# @tls: #optional whether to perform TLS. Only applies to the "spice"
+#       protocol
+#
+# Returns: nothing on success.
+#
+# Since: 0.14.0
+##
+{ 'command': 'add_client',
+  'data': { 'protocol': 'str', 'fdname': 'str', '*skipauth': 'bool',
+            '*tls': 'bool' } }
+
+##
 # @NameInfo:
 #
 # Guest name information.
@@ -118,7 +143,7 @@
 ##
 # @RunState
 #
-# An enumation of VM run states.
+# An enumeration of VM run states.
 #
 # @debug: QEMU is running on a debugger
 #
@@ -156,6 +181,70 @@
             'running', 'save-vm', 'shutdown', 'suspended', 'watchdog' ] }
 
 ##
+# @SnapshotInfo
+#
+# @id: unique snapshot id
+#
+# @name: user chosen name
+#
+# @vm-state-size: size of the VM state
+#
+# @date-sec: UTC date of the snapshot in seconds
+#
+# @date-nsec: fractional part in nano seconds to be used with date-sec
+#
+# @vm-clock-sec: VM clock relative to boot in seconds
+#
+# @vm-clock-nsec: fractional part in nano seconds to be used with vm-clock-sec
+#
+# Since: 1.3
+#
+##
+
+{ 'type': 'SnapshotInfo',
+  'data': { 'id': 'str', 'name': 'str', 'vm-state-size': 'int',
+            'date-sec': 'int', 'date-nsec': 'int',
+            'vm-clock-sec': 'int', 'vm-clock-nsec': 'int' } }
+
+##
+# @ImageInfo:
+#
+# Information about a QEMU image file
+#
+# @filename: name of the image file
+#
+# @format: format of the image file
+#
+# @virtual-size: maximum capacity in bytes of the image
+#
+# @actual-size: #optional actual size on disk in bytes of the image
+#
+# @dirty-flag: #optional true if image is not cleanly closed
+#
+# @cluster-size: #optional size of a cluster in bytes
+#
+# @encrypted: #optional true if the image is encrypted
+#
+# @backing-filename: #optional name of the backing file
+#
+# @full-backing-filename: #optional full path of the backing file
+#
+# @backing-filename-format: #optional the format of the backing file
+#
+# @snapshots: #optional list of VM snapshots
+#
+# Since: 1.3
+#
+##
+
+{ 'type': 'ImageInfo',
+  'data': {'filename': 'str', 'format': 'str', '*dirty-flag': 'bool',
+           '*actual-size': 'int', 'virtual-size': 'int',
+           '*cluster-size': 'int', '*encrypted': 'bool',
+           '*backing-filename': 'str', '*full-backing-filename': 'str',
+           '*backing-filename-format': 'str', '*snapshots': ['SnapshotInfo'] } }
+
+##
 # @StatusInfo:
 #
 # Information about VCPU run state
@@ -785,7 +874,7 @@
 ##
 # @SpiceQueryMouseMode
 #
-# An enumation of Spice mouse states.
+# An enumeration of Spice mouse states.
 #
 # @client: Mouse cursor position is determined by the client.
 #
@@ -1024,6 +1113,29 @@
 { 'command': 'query-pci', 'returns': ['PciInfo'] }
 
 ##
+# @BlockdevOnError:
+#
+# An enumeration of possible behaviors for errors on I/O operations.
+# The exact meaning depends on whether the I/O was initiated by a guest
+# or by a block job
+#
+# @report: for guest operations, report the error to the guest;
+#          for jobs, cancel the job
+#
+# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR
+#          or BLOCK_JOB_ERROR)
+#
+# @enospc: same as @stop on ENOSPC, same as @report otherwise.
+#
+# @stop: for guest operations, stop the virtual machine;
+#        for jobs, pause the job
+#
+# Since: 1.3
+##
+{ 'enum': 'BlockdevOnError',
+  'data': ['report', 'ignore', 'enospc', 'stop'] }
+
+##
 # @BlockJobInfo:
 #
 # Information about a long-running block device operation.
@@ -1034,15 +1146,24 @@
 #
 # @len: the maximum progress value
 #
+# @busy: false if the job is known to be in a quiescent state, with
+#        no pending I/O.  Since 1.3.
+#
+# @paused: whether the job is paused or, if @busy is true, will
+#          pause itself as soon as possible.  Since 1.3.
+#
 # @offset: the current progress value
 #
 # @speed: the rate limit, bytes per second
 #
+# @io-status: the status of the job (since 1.3)
+#
 # Since: 1.1
 ##
 { 'type': 'BlockJobInfo',
   'data': {'type': 'str', 'device': 'str', 'len': 'int',
-           'offset': 'int', 'speed': 'int'} }
+           'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int',
+           'io-status': 'BlockDeviceIoStatus'} }
 
 ##
 # @query-block-jobs:
@@ -1310,7 +1431,7 @@
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
 #
 # @mode: #optional whether and how QEMU should create a new image, default is
-# 'absolute-paths'.
+#        'absolute-paths'.
 ##
 { 'type': 'BlockdevSnapshot',
   'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str',
@@ -1364,7 +1485,7 @@
 # @format: #optional the format of the snapshot image, default is 'qcow2'.
 #
 # @mode: #optional whether and how QEMU should create a new image, default is
-# 'absolute-paths'.
+#        'absolute-paths'.
 #
 # Returns: nothing on success
 #          If @device is not a valid block device, DeviceNotFound
@@ -1404,6 +1525,40 @@
   'returns': 'str' }
 
 ##
+# @block-commit
+#
+# Live commit of data from overlay image nodes into backing nodes - i.e.,
+# writes data between 'top' and 'base' into 'base'.
+#
+# @device:  the name of the device
+#
+# @base:   #optional The file name of the backing image to write data into.
+#                    If not specified, this is the deepest backing image
+#
+# @top:              The file name of the backing image within the image chain,
+#                    which contains the topmost data to be committed down.
+#                    Note, the active layer as 'top' is currently unsupported.
+#
+#                    If top == base, that is an error.
+#
+#
+# @speed:  #optional the maximum speed, in bytes per second
+#
+# Returns: Nothing on success
+#          If commit or stream is already active on this device, DeviceInUse
+#          If @device does not exist, DeviceNotFound
+#          If image commit is not supported by this device, NotSupported
+#          If @base or @top is invalid, a generic error is returned
+#          If @top is the active layer, or omitted, a generic error is returned
+#          If @speed is invalid, InvalidParameter
+#
+# Since: 1.3
+#
+##
+{ 'command': 'block-commit',
+  'data': { 'device': 'str', '*base': 'str', 'top': 'str',
+            '*speed': 'int' } }
+
 # @migrate_cancel
 #
 # Cancel the current executing migration process.
@@ -1739,13 +1894,18 @@
 #
 # @speed:  #optional the maximum speed, in bytes per second
 #
+# @on-error: #optional the action to take on an error (default report).
+#            'stop' and 'enospc' can only be used if the block device
+#            supports io-status (see BlockInfo).  Since 1.3.
+#
 # Returns: Nothing on success
 #          If @device does not exist, DeviceNotFound
 #
 # Since: 1.1
 ##
-{ 'command': 'block-stream', 'data': { 'device': 'str', '*base': 'str',
-                                       '*speed': 'int' } }
+{ 'command': 'block-stream',
+  'data': { 'device': 'str', '*base': 'str', '*speed': 'int',
+            '*on-error': 'BlockdevOnError' } }
 
 ##
 # @block-job-set-speed:
@@ -1789,12 +1949,58 @@
 #
 # @device: the device name
 #
+# @force: #optional whether to allow cancellation of a paused job (default
+#         false).  Since 1.3.
+#
 # Returns: Nothing on success
 #          If no background operation is active on this device, DeviceNotActive
 #
 # Since: 1.1
 ##
-{ 'command': 'block-job-cancel', 'data': { 'device': 'str' } }
+{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } }
+
+##
+# @block-job-pause:
+#
+# Pause an active background block operation.
+#
+# This command returns immediately after marking the active background block
+# operation for pausing.  It is an error to call this command if no
+# operation is in progress.  Pausing an already paused job has no cumulative
+# effect; a single block-job-resume command will resume the job.
+#
+# The operation will pause as soon as possible.  No event is emitted when
+# the operation is actually paused.  Cancelling a paused job automatically
+# resumes it.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-pause', 'data': { 'device': 'str' } }
+
+##
+# @block-job-resume:
+#
+# Resume an active background block operation.
+#
+# This command returns immediately after resuming a paused background block
+# operation.  It is an error to call this command if no operation is in
+# progress.  Resuming an already running job is not an error.
+#
+# This command also clears the error status of the job.
+#
+# @device: the device name
+#
+# Returns: Nothing on success
+#          If no background operation is active on this device, DeviceNotActive
+#
+# Since: 1.3
+##
+{ 'command': 'block-job-resume', 'data': { 'device': 'str' } }
 
 ##
 # @ObjectTypeInfo:
@@ -1892,6 +2098,19 @@
 { 'command': 'xen-save-devices-state', 'data': {'filename': 'str'} }
 
 ##
+# @xen-set-global-dirty-log
+#
+# Enable or disable the global dirty log mode.
+#
+# @enable: true to enable, false to disable.
+#
+# Returns: nothing
+#
+# Since: 1.3
+##
+{ 'command': 'xen-set-global-dirty-log', 'data': { 'enable': 'bool' } }
+
+##
 # @device_del:
 #
 # Remove a device from a guest
@@ -1918,26 +2137,33 @@
 # supported on i386 and x86_64.
 #
 # @paging: if true, do paging to get guest's memory mapping. This allows
-# using gdb to process the core file. However, setting @paging to false
-# may be desirable because of two reasons:
+#          using gdb to process the core file.
 #
-#   1. The guest may be in a catastrophic state or can have corrupted
-#      memory, which cannot be trusted
-#   2. The guest can be in real-mode even if paging is enabled. For example,
-#      the guest uses ACPI to sleep, and ACPI sleep state goes in real-mode
+#          IMPORTANT: this option can make QEMU allocate several gigabytes
+#                     of RAM. This can happen for a large guest, or a
+#                     malicious guest pretending to be large.
+#
+#          Also, paging=true has the following limitations:
+#
+#             1. The guest may be in a catastrophic state or can have corrupted
+#                memory, which cannot be trusted
+#             2. The guest can be in real-mode even if paging is enabled. For
+#                example, the guest uses ACPI to sleep, and ACPI sleep state
+#                goes in real-mode
 #
 # @protocol: the filename or file descriptor of the vmcore. The supported
-# protocols are:
+#            protocols are:
 #
-#   1. file: the protocol starts with "file:", and the following string is
-#      the file's path.
-#   2. fd: the protocol starts with "fd:", and the following string is the
-#      fd's name.
+#            1. file: the protocol starts with "file:", and the following
+#               string is the file's path.
+#            2. fd: the protocol starts with "fd:", and the following string
+#               is the fd's name.
 #
 # @begin: #optional if specified, the starting physical address.
 #
 # @length: #optional if specified, the memory size, in bytes. If you don't
-# want to dump all guest's memory, please specify the start @begin and @length
+#          want to dump all guest's memory, please specify the start @begin
+#          and @length
 #
 # Returns: nothing on success
 #
@@ -1946,6 +2172,7 @@
 { 'command': 'dump-guest-memory',
   'data': { 'paging': 'bool', 'protocol': 'str', '*begin': 'int',
             '*length': 'int' } }
+
 ##
 # @netdev_add:
 #
@@ -2524,12 +2751,26 @@
              'lf', 'help', 'meta_l', 'meta_r', 'compose' ] }
 
 ##
+# @KeyValue
+#
+# Represents a keyboard key.
+#
+# Since: 1.3.0
+##
+{ 'union': 'KeyValue',
+  'data': {
+    'number': 'int',
+    'qcode': 'QKeyCode' } }
+
+##
 # @send-key:
 #
 # Send keys to guest.
 #
-# @keys: key sequence. 'keys' is the name of the key. Use a JSON array to
-#        press several keys simultaneously.
+# @keys: An array of @KeyValue elements. All @KeyValues in this array are
+#        simultaneously sent to the guest. A @KeyValue.number value is sent
+#        directly to the guest, while @KeyValue.qcode must be a valid
+#        @QKeyCode value
 #
 # @hold-time: #optional time to delay key up events, milliseconds. Defaults
 #             to 100
@@ -2541,7 +2782,7 @@
 #
 ##
 { 'command': 'send-key',
-  'data': { 'keys': ['QKeyCode'], '*hold-time': 'int' } }
+  'data': { 'keys': ['KeyValue'], '*hold-time': 'int' } }
 
 ##
 # @screendump:
diff --git a/qemu-barrier.h b/qemu-barrier.h
index 7e11197..faa83d2 100644
--- a/qemu-barrier.h
+++ b/qemu-barrier.h
@@ -6,6 +6,8 @@
 
 #if defined(__i386__)
 
+#include "compiler.h"        /* QEMU_GNUC_PREREQ */
+
 /*
  * Because of the strongly ordered x86 storage model, wmb() and rmb() are nops
  * on x86(well, a compiler barrier only).  Well, at least as long as
@@ -19,7 +21,7 @@
  * mfence on 32 bit as well, e.g. if built with -march=pentium-m.
  * However, on i386, there seem to be known bugs as recently as 4.3.
  * */
-#if defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 4
+#if QEMU_GNUC_PREREQ(4, 4)
 #define smp_mb() __sync_synchronize()
 #else
 #define smp_mb() asm volatile("lock; addl $0,0(%%esp) " ::: "memory")
diff --git a/qemu-char.c b/qemu-char.c
index 767da93..b082bae 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2141,18 +2141,13 @@
 
 static void tcp_chr_accept(void *opaque);
 
-static void tcp_chr_connect(void *opaque);
-
 static int tcp_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
     TCPCharDriver *s = chr->opaque;
     if (s->connected) {
         return send_all(s->fd, buf, len);
-    } else if (s->listen_fd == -1) {
-        /* (Re-)connect for unconnected writing */
-        tcp_chr_connect(chr);
-        return 0;
     } else {
+        /* XXX: indicate an error ? */
         return len;
     }
 }
@@ -2334,8 +2329,10 @@
     TCPCharDriver *s = chr->opaque;
 
     s->connected = 1;
-    qemu_set_fd_handler2(s->fd, tcp_chr_read_poll,
-                         tcp_chr_read, NULL, chr);
+    if (s->fd >= 0) {
+        qemu_set_fd_handler2(s->fd, tcp_chr_read_poll,
+                             tcp_chr_read, NULL, chr);
+    }
     qemu_chr_generic_open(chr);
 }
 
@@ -2459,7 +2456,7 @@
         if (is_listen) {
             fd = inet_listen_opts(opts, 0, NULL);
         } else {
-            fd = inet_connect_opts(opts, NULL, NULL);
+            fd = inet_connect_opts(opts, NULL, NULL, NULL);
         }
     }
     if (fd < 0) {
diff --git a/qemu-common.h b/qemu-common.h
index e5c2bcd..b54612b 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -223,9 +223,22 @@
 #endif
 
 #ifdef _WIN32
+/* MinGW needs type casts for the 'buf' and 'optval' arguments. */
+#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \
+    getsockopt(sockfd, level, optname, (void *)optval, optlen)
+#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \
+    setsockopt(sockfd, level, optname, (const void *)optval, optlen)
 #define qemu_recv(sockfd, buf, len, flags) recv(sockfd, (void *)buf, len, flags)
+#define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \
+    sendto(sockfd, (const void *)buf, len, flags, destaddr, addrlen)
 #else
+#define qemu_getsockopt(sockfd, level, optname, optval, optlen) \
+    getsockopt(sockfd, level, optname, optval, optlen)
+#define qemu_setsockopt(sockfd, level, optname, optval, optlen) \
+    setsockopt(sockfd, level, optname, optval, optlen)
 #define qemu_recv(sockfd, buf, len, flags) recv(sockfd, buf, len, flags)
+#define qemu_sendto(sockfd, buf, len, flags, destaddr, addrlen) \
+    sendto(sockfd, buf, len, flags, destaddr, addrlen)
 #endif
 
 /* Error handling.  */
diff --git a/qemu-config.c b/qemu-config.c
index 3eaee48..a5b33cc 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -619,6 +619,10 @@
             .name = "dump-guest-core",
             .type = QEMU_OPT_BOOL,
             .help = "Include guest memory in  a core dump",
+        }, {
+            .name = "mem-merge",
+            .type = QEMU_OPT_BOOL,
+            .help = "enable/disable memory merge support",
         },
         { /* End of list */ }
     },
@@ -645,6 +649,9 @@
         }, {
             .name = "splash-time",
             .type = QEMU_OPT_STRING,
+        }, {
+            .name = "reboot-timeout",
+            .type = QEMU_OPT_STRING,
         },
         { /*End of list */ }
     },
diff --git a/qemu-ga.c b/qemu-ga.c
index 7623079..b747470 100644
--- a/qemu-ga.c
+++ b/qemu-ga.c
@@ -114,12 +114,10 @@
     ret = sigaction(SIGINT, &sigact, NULL);
     if (ret == -1) {
         g_error("error configuring signal handler: %s", strerror(errno));
-        return false;
     }
     ret = sigaction(SIGTERM, &sigact, NULL);
     if (ret == -1) {
         g_error("error configuring signal handler: %s", strerror(errno));
-        return false;
     }
 
     return true;
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
index 39419a0..0ef82e9 100644
--- a/qemu-img-cmds.hx
+++ b/qemu-img-cmds.hx
@@ -34,9 +34,9 @@
 ETEXI
 
 DEF("info", img_info,
-    "info [-f fmt] filename")
+    "info [-f fmt] [--output=ofmt] filename")
 STEXI
-@item info [-f @var{fmt}] @var{filename}
+@item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
 ETEXI
 
 DEF("snapshot", img_snapshot,
diff --git a/qemu-img.c b/qemu-img.c
index b41e670..f17f187 100644
--- a/qemu-img.c
+++ b/qemu-img.c
@@ -21,12 +21,16 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
+#include "qapi-visit.h"
+#include "qapi/qmp-output-visitor.h"
+#include "qjson.h"
 #include "qemu-common.h"
 #include "qemu-option.h"
 #include "qemu-error.h"
 #include "osdep.h"
 #include "sysemu.h"
 #include "block_int.h"
+#include <getopt.h>
 #include <stdio.h>
 
 #ifdef _WIN32
@@ -84,12 +88,13 @@
            "  '-p' show progress of command (only certain commands)\n"
            "  '-S' indicates the consecutive number of bytes that must contain only zeros\n"
            "       for qemu-img to create a sparse image during conversion\n"
+           "  '--output' takes the format in which the output must be done (human or json)\n"
            "\n"
            "Parameters to check subcommand:\n"
            "  '-r' tries to repair any inconsistencies that are found during the check.\n"
            "       '-r leaks' repairs only cluster leaks, whereas '-r all' fixes all\n"
            "       kinds of errors, with a higher risk of choosing the wrong fix or\n"
-           "       hiding corruption that has already occured.\n"
+           "       hiding corruption that has already occurred.\n"
            "\n"
            "Parameters to snapshot subcommand:\n"
            "  'snapshot' is the name of the snapshot to create, apply or delete\n"
@@ -221,7 +226,8 @@
 
 static BlockDriverState *bdrv_new_open(const char *filename,
                                        const char *fmt,
-                                       int flags)
+                                       int flags,
+                                       bool require_io)
 {
     BlockDriverState *bs;
     BlockDriver *drv;
@@ -246,7 +252,7 @@
         goto fail;
     }
 
-    if (bdrv_is_encrypted(bs)) {
+    if (bdrv_is_encrypted(bs) && require_io) {
         printf("Disk image '%s' is encrypted.\n", filename);
         if (read_password(password, sizeof(password)) < 0) {
             error_report("No password given");
@@ -413,7 +419,7 @@
     }
     filename = argv[optind++];
 
-    bs = bdrv_new_open(filename, fmt, flags);
+    bs = bdrv_new_open(filename, fmt, flags, true);
     if (!bs) {
         return 1;
     }
@@ -520,7 +526,7 @@
         return -1;
     }
 
-    bs = bdrv_new_open(filename, fmt, flags);
+    bs = bdrv_new_open(filename, fmt, flags, true);
     if (!bs) {
         return 1;
     }
@@ -762,7 +768,7 @@
 
     total_sectors = 0;
     for (bs_i = 0; bs_i < bs_n; bs_i++) {
-        bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt, BDRV_O_FLAGS);
+        bs[bs_i] = bdrv_new_open(argv[optind + bs_i], fmt, BDRV_O_FLAGS, true);
         if (!bs[bs_i]) {
             error_report("Could not open '%s'", argv[optind + bs_i]);
             ret = -1;
@@ -881,7 +887,7 @@
         return -1;
     }
 
-    out_bs = bdrv_new_open(out_filename, out_fmt, flags);
+    out_bs = bdrv_new_open(out_filename, out_fmt, flags, true);
     if (!out_bs) {
         ret = -1;
         goto out;
@@ -1102,21 +1108,174 @@
     g_free(sn_tab);
 }
 
-static int img_info(int argc, char **argv)
+static void collect_snapshots(BlockDriverState *bs , ImageInfo *info)
 {
-    int c;
-    const char *filename, *fmt;
-    BlockDriverState *bs;
-    char size_buf[128], dsize_buf[128];
+    int i, sn_count;
+    QEMUSnapshotInfo *sn_tab = NULL;
+    SnapshotInfoList *info_list, *cur_item = NULL;
+    sn_count = bdrv_snapshot_list(bs, &sn_tab);
+
+    for (i = 0; i < sn_count; i++) {
+        info->has_snapshots = true;
+        info_list = g_new0(SnapshotInfoList, 1);
+
+        info_list->value                = g_new0(SnapshotInfo, 1);
+        info_list->value->id            = g_strdup(sn_tab[i].id_str);
+        info_list->value->name          = g_strdup(sn_tab[i].name);
+        info_list->value->vm_state_size = sn_tab[i].vm_state_size;
+        info_list->value->date_sec      = sn_tab[i].date_sec;
+        info_list->value->date_nsec     = sn_tab[i].date_nsec;
+        info_list->value->vm_clock_sec  = sn_tab[i].vm_clock_nsec / 1000000000;
+        info_list->value->vm_clock_nsec = sn_tab[i].vm_clock_nsec % 1000000000;
+
+        /* XXX: waiting for the qapi to support qemu-queue.h types */
+        if (!cur_item) {
+            info->snapshots = cur_item = info_list;
+        } else {
+            cur_item->next = info_list;
+            cur_item = info_list;
+        }
+
+    }
+
+    g_free(sn_tab);
+}
+
+static void dump_json_image_info(ImageInfo *info)
+{
+    Error *errp = NULL;
+    QString *str;
+    QmpOutputVisitor *ov = qmp_output_visitor_new();
+    QObject *obj;
+    visit_type_ImageInfo(qmp_output_get_visitor(ov),
+                         &info, NULL, &errp);
+    obj = qmp_output_get_qobject(ov);
+    str = qobject_to_json_pretty(obj);
+    assert(str != NULL);
+    printf("%s\n", qstring_get_str(str));
+    qobject_decref(obj);
+    qmp_output_visitor_cleanup(ov);
+    QDECREF(str);
+}
+
+static void collect_image_info(BlockDriverState *bs,
+                   ImageInfo *info,
+                   const char *filename,
+                   const char *fmt)
+{
     uint64_t total_sectors;
-    int64_t allocated_size;
     char backing_filename[1024];
     char backing_filename2[1024];
     BlockDriverInfo bdi;
 
+    bdrv_get_geometry(bs, &total_sectors);
+
+    info->filename        = g_strdup(filename);
+    info->format          = g_strdup(bdrv_get_format_name(bs));
+    info->virtual_size    = total_sectors * 512;
+    info->actual_size     = bdrv_get_allocated_file_size(bs);
+    info->has_actual_size = info->actual_size >= 0;
+    if (bdrv_is_encrypted(bs)) {
+        info->encrypted = true;
+        info->has_encrypted = true;
+    }
+    if (bdrv_get_info(bs, &bdi) >= 0) {
+        if (bdi.cluster_size != 0) {
+            info->cluster_size = bdi.cluster_size;
+            info->has_cluster_size = true;
+        }
+        info->dirty_flag = bdi.is_dirty;
+        info->has_dirty_flag = true;
+    }
+    bdrv_get_backing_filename(bs, backing_filename, sizeof(backing_filename));
+    if (backing_filename[0] != '\0') {
+        info->backing_filename = g_strdup(backing_filename);
+        info->has_backing_filename = true;
+        bdrv_get_full_backing_filename(bs, backing_filename2,
+                                       sizeof(backing_filename2));
+
+        if (strcmp(backing_filename, backing_filename2) != 0) {
+            info->full_backing_filename =
+                        g_strdup(backing_filename2);
+            info->has_full_backing_filename = true;
+        }
+
+        if (bs->backing_format[0]) {
+            info->backing_filename_format = g_strdup(bs->backing_format);
+            info->has_backing_filename_format = true;
+        }
+    }
+}
+
+static void dump_human_image_info(ImageInfo *info)
+{
+    char size_buf[128], dsize_buf[128];
+    if (!info->has_actual_size) {
+        snprintf(dsize_buf, sizeof(dsize_buf), "unavailable");
+    } else {
+        get_human_readable_size(dsize_buf, sizeof(dsize_buf),
+                                info->actual_size);
+    }
+    get_human_readable_size(size_buf, sizeof(size_buf), info->virtual_size);
+    printf("image: %s\n"
+           "file format: %s\n"
+           "virtual size: %s (%" PRId64 " bytes)\n"
+           "disk size: %s\n",
+           info->filename, info->format, size_buf,
+           info->virtual_size,
+           dsize_buf);
+
+    if (info->has_encrypted && info->encrypted) {
+        printf("encrypted: yes\n");
+    }
+
+    if (info->has_cluster_size) {
+        printf("cluster_size: %" PRId64 "\n", info->cluster_size);
+    }
+
+    if (info->has_dirty_flag && info->dirty_flag) {
+        printf("cleanly shut down: no\n");
+    }
+
+    if (info->has_backing_filename) {
+        printf("backing file: %s", info->backing_filename);
+        if (info->has_full_backing_filename) {
+            printf(" (actual path: %s)", info->full_backing_filename);
+        }
+        putchar('\n');
+        if (info->has_backing_filename_format) {
+            printf("backing file format: %s\n", info->backing_filename_format);
+        }
+    }
+}
+
+enum {OPTION_OUTPUT = 256};
+
+typedef enum OutputFormat {
+    OFORMAT_JSON,
+    OFORMAT_HUMAN,
+} OutputFormat;
+
+static int img_info(int argc, char **argv)
+{
+    int c;
+    OutputFormat output_format = OFORMAT_HUMAN;
+    const char *filename, *fmt, *output;
+    BlockDriverState *bs;
+    ImageInfo *info;
+
     fmt = NULL;
+    output = NULL;
     for(;;) {
-        c = getopt(argc, argv, "f:h");
+        int option_index = 0;
+        static const struct option long_options[] = {
+            {"help", no_argument, 0, 'h'},
+            {"format", required_argument, 0, 'f'},
+            {"output", required_argument, 0, OPTION_OUTPUT},
+            {0, 0, 0, 0}
+        };
+        c = getopt_long(argc, argv, "f:h",
+                        long_options, &option_index);
         if (c == -1) {
             break;
         }
@@ -1128,6 +1287,9 @@
         case 'f':
             fmt = optarg;
             break;
+        case OPTION_OUTPUT:
+            output = optarg;
+            break;
         }
     }
     if (optind >= argc) {
@@ -1135,48 +1297,35 @@
     }
     filename = argv[optind++];
 
-    bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_NO_BACKING);
+    if (output && !strcmp(output, "json")) {
+        output_format = OFORMAT_JSON;
+    } else if (output && !strcmp(output, "human")) {
+        output_format = OFORMAT_HUMAN;
+    } else if (output) {
+        error_report("--output must be used with human or json as argument.");
+        return 1;
+    }
+
+    bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_NO_BACKING, false);
     if (!bs) {
         return 1;
     }
-    bdrv_get_geometry(bs, &total_sectors);
-    get_human_readable_size(size_buf, sizeof(size_buf), total_sectors * 512);
-    allocated_size = bdrv_get_allocated_file_size(bs);
-    if (allocated_size < 0) {
-        snprintf(dsize_buf, sizeof(dsize_buf), "unavailable");
-    } else {
-        get_human_readable_size(dsize_buf, sizeof(dsize_buf),
-                                allocated_size);
+
+    info = g_new0(ImageInfo, 1);
+    collect_image_info(bs, info, filename, fmt);
+
+    switch (output_format) {
+    case OFORMAT_HUMAN:
+        dump_human_image_info(info);
+        dump_snapshots(bs);
+        break;
+    case OFORMAT_JSON:
+        collect_snapshots(bs, info);
+        dump_json_image_info(info);
+        break;
     }
-    printf("image: %s\n"
-           "file format: %s\n"
-           "virtual size: %s (%" PRId64 " bytes)\n"
-           "disk size: %s\n",
-           filename, bdrv_get_format_name(bs), size_buf,
-           (total_sectors * 512),
-           dsize_buf);
-    if (bdrv_is_encrypted(bs)) {
-        printf("encrypted: yes\n");
-    }
-    if (bdrv_get_info(bs, &bdi) >= 0) {
-        if (bdi.cluster_size != 0) {
-            printf("cluster_size: %d\n", bdi.cluster_size);
-        }
-        if (bdi.is_dirty) {
-            printf("cleanly shut down: no\n");
-        }
-    }
-    bdrv_get_backing_filename(bs, backing_filename, sizeof(backing_filename));
-    if (backing_filename[0] != '\0') {
-        bdrv_get_full_backing_filename(bs, backing_filename2,
-                                       sizeof(backing_filename2));
-        printf("backing file: %s", backing_filename);
-        if (strcmp(backing_filename, backing_filename2) != 0) {
-            printf(" (actual path: %s)", backing_filename2);
-        }
-        putchar('\n');
-    }
-    dump_snapshots(bs);
+
+    qapi_free_ImageInfo(info);
     bdrv_delete(bs);
     return 0;
 }
@@ -1248,7 +1397,7 @@
     filename = argv[optind++];
 
     /* Open the image */
-    bs = bdrv_new_open(filename, NULL, bdrv_oflags);
+    bs = bdrv_new_open(filename, NULL, bdrv_oflags, true);
     if (!bs) {
         return 1;
     }
@@ -1366,7 +1515,7 @@
      * Ignore the old backing file for unsafe rebase in case we want to correct
      * the reference to a renamed or moved backing file.
      */
-    bs = bdrv_new_open(filename, fmt, flags);
+    bs = bdrv_new_open(filename, fmt, flags, true);
     if (!bs) {
         return 1;
     }
@@ -1639,7 +1788,7 @@
     n = qemu_opt_get_size(param, BLOCK_OPT_SIZE, 0);
     qemu_opts_del(param);
 
-    bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR);
+    bs = bdrv_new_open(filename, fmt, BDRV_O_FLAGS | BDRV_O_RDWR, true);
     if (!bs) {
         ret = -1;
         goto out;
diff --git a/qemu-img.texi b/qemu-img.texi
index 6b42e35..8b05f2c 100644
--- a/qemu-img.texi
+++ b/qemu-img.texi
@@ -87,7 +87,7 @@
 If @code{-r} is specified, qemu-img tries to repair any inconsistencies found
 during the check. @code{-r leaks} repairs only cluster leaks, whereas
 @code{-r all} fixes all kinds of errors, with a higher risk of choosing the
-wrong fix or hiding corruption that has already occured.
+wrong fix or hiding corruption that has already occurred.
 
 Only the formats @code{qcow2}, @code{qed} and @code{vdi} support
 consistency checks.
@@ -129,12 +129,13 @@
 @var{backing_file} should have the same content as the input's base image,
 however the path, image format, etc may differ.
 
-@item info [-f @var{fmt}] @var{filename}
+@item info [-f @var{fmt}] [--output=@var{ofmt}] @var{filename}
 
 Give information about the disk image @var{filename}. Use it in
 particular to know the size reserved on disk which can be different
 from the displayed size. If VM snapshots are stored in the disk image,
-they are displayed too.
+they are displayed too. The command can output in the format @var{ofmt}
+which is either @code{human} or @code{json}.
 
 @item snapshot [-l | -a @var{snapshot} | -c @var{snapshot} | -d @var{snapshot} ] @var{filename}
 
diff --git a/qemu-nbd.c b/qemu-nbd.c
index 1c1cf6a..15bcd08 100644
--- a/qemu-nbd.c
+++ b/qemu-nbd.c
@@ -41,8 +41,8 @@
 static int verbose;
 static char *srcpath;
 static char *sockpath;
-static bool sigterm_reported;
-static bool nbd_started;
+static int persistent = 0;
+static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state;
 static int shared = 1;
 static int nb_fds;
 
@@ -186,7 +186,7 @@
 
 static void termsig_handler(int signum)
 {
-    sigterm_reported = true;
+    state = TERMINATE;
     qemu_notify_event();
 }
 
@@ -269,10 +269,20 @@
     return nb_fds < shared;
 }
 
+static void nbd_export_closed(NBDExport *exp)
+{
+    assert(state == TERMINATING);
+    state = TERMINATED;
+}
+
 static void nbd_client_closed(NBDClient *client)
 {
     nb_fds--;
+    if (nb_fds == 0 && !persistent && state == RUNNING) {
+        state = TERMINATE;
+    }
     qemu_notify_event();
+    nbd_client_put(client);
 }
 
 static void nbd_accept(void *opaque)
@@ -282,7 +292,11 @@
     socklen_t addr_len = sizeof(addr);
 
     int fd = accept(server_fd, (struct sockaddr *)&addr, &addr_len);
-    nbd_started = true;
+    if (state >= TERMINATE) {
+        close(fd);
+        return;
+    }
+
     if (fd >= 0 && nbd_client_new(exp, fd, nbd_client_closed)) {
         nb_fds++;
     }
@@ -329,7 +343,6 @@
     int partition = -1;
     int ret;
     int fd;
-    int persistent = 0;
     bool seen_cache = false;
 #ifdef CONFIG_LINUX_AIO
     bool seen_aio = false;
@@ -546,7 +559,7 @@
         }
     }
 
-    exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags);
+    exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags, nbd_export_closed);
 
     if (sockpath) {
         fd = unix_socket_incoming(sockpath);
@@ -581,11 +594,18 @@
         err(EXIT_FAILURE, "Could not chdir to root directory");
     }
 
+    state = RUNNING;
     do {
         main_loop_wait(false);
-    } while (!sigterm_reported && (persistent || !nbd_started || nb_fds > 0));
+        if (state == TERMINATE) {
+            state = TERMINATING;
+            nbd_export_close(exp);
+            nbd_export_put(exp);
+            exp = NULL;
+        }
+    } while (state != TERMINATED);
 
-    nbd_export_close(exp);
+    bdrv_close(bs);
     if (sockpath) {
         unlink(sockpath);
     }
diff --git a/qemu-options.hx b/qemu-options.hx
index 1af4fec..3cd9243 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -38,7 +38,8 @@
     "                supported accelerators are kvm, xen, tcg (default: tcg)\n"
     "                kernel_irqchip=on|off controls accelerated irqchip support\n"
     "                kvm_shadow_mem=size of KVM shadow MMU\n"
-    "                dump-guest-core=on|off include guest memory in a core dump (default=on)\n",
+    "                dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
+    "                mem-merge=on|off controls memory merge support (default: on)\n",
     QEMU_ARCH_ALL)
 STEXI
 @item -machine [type=]@var{name}[,prop=@var{value}[,...]]
@@ -57,6 +58,10 @@
 Defines the size of the KVM shadow MMU.
 @item dump-guest-core=on|off
 Include guest memory in a core dump. The default is on.
+@item mem-merge=on|off
+Enables or disables memory merge support. This feature, when supported by
+the host, de-duplicates identical memory pages among VMs instances
+(enabled by default).
 @end table
 ETEXI
 
@@ -339,13 +344,14 @@
 
 DEF("boot", HAS_ARG, QEMU_OPTION_boot,
     "-boot [order=drives][,once=drives][,menu=on|off]\n"
-    "      [,splash=sp_name][,splash-time=sp_time]\n"
+    "      [,splash=sp_name][,splash-time=sp_time][,reboot-timeout=rb_time]\n"
     "                'drives': floppy (a), hard disk (c), CD-ROM (d), network (n)\n"
     "                'sp_name': the file's name that would be passed to bios as logo picture, if menu=on\n"
-    "                'sp_time': the period that splash picture last if menu=on, unit is ms\n",
+    "                'sp_time': the period that splash picture last if menu=on, unit is ms\n"
+    "                'rb_timeout': the timeout before guest reboot when boot failed, unit is ms\n",
     QEMU_ARCH_ALL)
 STEXI
-@item -boot [order=@var{drives}][,once=@var{drives}][,menu=on|off][,splash=@var{sp_name}][,splash-time=@var{sp_time}]
+@item -boot [order=@var{drives}][,once=@var{drives}][,menu=on|off][,splash=@var{sp_name}][,splash-time=@var{sp_time}][,reboot-timeout=@var{rb_timeout}]
 @findex -boot
 Specify boot order @var{drives} as a string of drive letters. Valid
 drive letters depend on the target achitecture. The x86 PC uses: a, b
@@ -364,6 +370,11 @@
 format(true color). The resolution should be supported by the SVGA mode, so
 the recommended is 320x240, 640x480, 800x640.
 
+A timeout could be passed to bios, guest will pause for @var{rb_timeout} ms
+when boot failed, then reboot. If @var{rb_timeout} is '-1', guest will not
+reboot, qemu passes '-1' to bios by default. Currently Seabios for X86
+system support it.
+
 @example
 # try to boot from network first, then from hard disk
 qemu-system-i386 -boot order=nc
@@ -1357,6 +1368,7 @@
 Not all devices are supported on all targets.  Use -net nic,model=?
 for a list of available devices for your target.
 
+@item -netdev user,id=@var{id}[,@var{option}][,@var{option}][,...]
 @item -net user[,@var{option}][,@var{option}][,...]
 Use the user mode network stack which requires no administrator
 privilege to run. Valid options are:
@@ -1365,6 +1377,7 @@
 @item vlan=@var{n}
 Connect user mode stack to VLAN @var{n} (@var{n} = 0 is the default).
 
+@item id=@var{id}
 @item name=@var{name}
 Assign symbolic name for use in monitor commands.
 
@@ -1490,6 +1503,7 @@
 syntax gives undefined results. Their use for new applications is discouraged
 as they will be removed from future versions.
 
+@item -netdev tap,id=@var{id}[,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}]
 @item -net tap[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,ifname=@var{name}][,script=@var{file}][,downscript=@var{dfile}][,helper=@var{helper}]
 Connect the host TAP network interface @var{name} to VLAN @var{n}.
 
@@ -1529,6 +1543,7 @@
                  -net nic -net tap,"helper=/usr/local/libexec/qemu-bridge-helper"
 @end example
 
+@item -netdev bridge,id=@var{id}[,br=@var{bridge}][,helper=@var{helper}]
 @item -net bridge[,vlan=@var{n}][,name=@var{name}][,br=@var{bridge}][,helper=@var{helper}]
 Connect a host TAP network interface to a host bridge device.
 
@@ -1551,6 +1566,7 @@
 qemu-system-i386 linux.img -net bridge,br=qemubr0 -net nic,model=virtio
 @end example
 
+@item -netdev socket,id=@var{id}[,fd=@var{h}][,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}]
 @item -net socket[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}] [,listen=[@var{host}]:@var{port}][,connect=@var{host}:@var{port}]
 
 Connect the VLAN @var{n} to a remote VLAN in another QEMU virtual
@@ -1573,6 +1589,7 @@
                  -net socket,connect=127.0.0.1:1234
 @end example
 
+@item -netdev socket,id=@var{id}[,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]]
 @item -net socket[,vlan=@var{n}][,name=@var{name}][,fd=@var{h}][,mcast=@var{maddr}:@var{port}[,localaddr=@var{addr}]]
 
 Create a VLAN @var{n} shared with another QEMU virtual
@@ -1624,6 +1641,7 @@
                  -net socket,mcast=239.192.168.1:1102,localaddr=1.2.3.4
 @end example
 
+@item -netdev vde,id=@var{id}[,sock=@var{socketpath}][,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
 @item -net vde[,vlan=@var{n}][,name=@var{name}][,sock=@var{socketpath}] [,port=@var{n}][,group=@var{groupname}][,mode=@var{octalmode}]
 Connect VLAN @var{n} to PORT @var{n} of a vde switch running on host and
 listening for incoming connections on @var{socketpath}. Use GROUP @var{groupname}
diff --git a/qemu-os-posix.h b/qemu-os-posix.h
index 8e1149d..7f198e4 100644
--- a/qemu-os-posix.h
+++ b/qemu-os-posix.h
@@ -46,4 +46,6 @@
 typedef struct timespec qemu_timespec;
 int qemu_utimens(const char *path, const qemu_timespec *times);
 
+bool is_daemonized(void);
+
 #endif
diff --git a/qemu-os-win32.h b/qemu-os-win32.h
index 753679b..8ba466d 100644
--- a/qemu-os-win32.h
+++ b/qemu-os-win32.h
@@ -68,6 +68,12 @@
 /* Declaration of ffs() is missing in MinGW's strings.h. */
 int ffs(int i);
 
+/* Missing POSIX functions. Don't use MinGW-w64 macros. */
+#undef gmtime_r
+struct tm *gmtime_r(const time_t *timep, struct tm *result);
+#undef localtime_r
+struct tm *localtime_r(const time_t *timep, struct tm *result);
+
 static inline void os_setup_signal_handling(void) {}
 static inline void os_daemonize(void) {}
 static inline void os_setup_post(void) {}
@@ -86,4 +92,9 @@
 } qemu_timeval;
 int qemu_gettimeofday(qemu_timeval *tp);
 
+static inline bool is_daemonized(void)
+{
+    return false;
+}
+
 #endif
diff --git a/qemu-sockets.c b/qemu-sockets.c
index 361d890..2b1ed2f 100644
--- a/qemu-sockets.c
+++ b/qemu-sockets.c
@@ -24,6 +24,7 @@
 
 #include "qemu_socket.h"
 #include "qemu-common.h" /* for qemu_isdigit */
+#include "main-loop.h"
 
 #ifndef AI_ADDRCONFIG
 # define AI_ADDRCONFIG 0
@@ -54,9 +55,6 @@
         },{
             .name = "ipv6",
             .type = QEMU_OPT_BOOL,
-        },{
-            .name = "block",
-            .type = QEMU_OPT_BOOL,
         },
         { /* end if list */ }
     },
@@ -209,95 +207,200 @@
     return slisten;
 }
 
-int inet_connect_opts(QemuOpts *opts, bool *in_progress, Error **errp)
+#ifdef _WIN32
+#define QEMU_SOCKET_RC_INPROGRESS(rc) \
+    ((rc) == -EINPROGRESS || (rc) == -EWOULDBLOCK || (rc) == -WSAEALREADY)
+#else
+#define QEMU_SOCKET_RC_INPROGRESS(rc) \
+    ((rc) == -EINPROGRESS)
+#endif
+
+/* Struct to store connect state for non blocking connect */
+typedef struct ConnectState {
+    int fd;
+    struct addrinfo *addr_list;
+    struct addrinfo *current_addr;
+    NonBlockingConnectHandler *callback;
+    void *opaque;
+} ConnectState;
+
+static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
+                             ConnectState *connect_state);
+
+static void wait_for_connect(void *opaque)
 {
-    struct addrinfo ai,*res,*e;
+    ConnectState *s = opaque;
+    int val = 0, rc = 0;
+    socklen_t valsize = sizeof(val);
+    bool in_progress;
+
+    qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
+
+    do {
+        rc = getsockopt(s->fd, SOL_SOCKET, SO_ERROR, (void *) &val, &valsize);
+    } while (rc == -1 && socket_error() == EINTR);
+
+    /* update rc to contain error */
+    if (!rc && val) {
+        rc = -1;
+    }
+
+    /* connect error */
+    if (rc < 0) {
+        closesocket(s->fd);
+        s->fd = rc;
+    }
+
+    /* try to connect to the next address on the list */
+    while (s->current_addr->ai_next != NULL && s->fd < 0) {
+        s->current_addr = s->current_addr->ai_next;
+        s->fd = inet_connect_addr(s->current_addr, &in_progress, s);
+        /* connect in progress */
+        if (in_progress) {
+            return;
+        }
+    }
+
+    freeaddrinfo(s->addr_list);
+    if (s->callback) {
+        s->callback(s->fd, s->opaque);
+    }
+    g_free(s);
+}
+
+static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
+                             ConnectState *connect_state)
+{
+    int sock, rc;
+
+    *in_progress = false;
+
+    sock = qemu_socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol);
+    if (sock < 0) {
+        fprintf(stderr, "%s: socket(%s): %s\n", __func__,
+                inet_strfamily(addr->ai_family), strerror(errno));
+        return -1;
+    }
+    qemu_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+    if (connect_state != NULL) {
+        socket_set_nonblock(sock);
+    }
+    /* connect to peer */
+    do {
+        rc = 0;
+        if (connect(sock, addr->ai_addr, addr->ai_addrlen) < 0) {
+            rc = -socket_error();
+        }
+    } while (rc == -EINTR);
+
+    if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) {
+        connect_state->fd = sock;
+        qemu_set_fd_handler2(sock, NULL, NULL, wait_for_connect,
+                             connect_state);
+        *in_progress = true;
+    } else if (rc < 0) {
+        closesocket(sock);
+        return -1;
+    }
+    return sock;
+}
+
+static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp)
+{
+    struct addrinfo ai, *res;
+    int rc;
     const char *addr;
     const char *port;
-    char uaddr[INET6_ADDRSTRLEN+1];
-    char uport[33];
-    int sock,rc;
-    bool block;
 
-    memset(&ai,0, sizeof(ai));
+    memset(&ai, 0, sizeof(ai));
+
     ai.ai_flags = AI_CANONNAME | AI_ADDRCONFIG;
     ai.ai_family = PF_UNSPEC;
     ai.ai_socktype = SOCK_STREAM;
 
-    if (in_progress) {
-        *in_progress = false;
-    }
-
     addr = qemu_opt_get(opts, "host");
     port = qemu_opt_get(opts, "port");
-    block = qemu_opt_get_bool(opts, "block", 0);
     if (addr == NULL || port == NULL) {
-        fprintf(stderr, "inet_connect: host and/or port not specified\n");
+        fprintf(stderr,
+                "inet_parse_connect_opts: host and/or port not specified\n");
         error_set(errp, QERR_SOCKET_CREATE_FAILED);
+        return NULL;
+    }
+
+    if (qemu_opt_get_bool(opts, "ipv4", 0)) {
+        ai.ai_family = PF_INET;
+    }
+    if (qemu_opt_get_bool(opts, "ipv6", 0)) {
+        ai.ai_family = PF_INET6;
+    }
+
+    /* lookup */
+    rc = getaddrinfo(addr, port, &ai, &res);
+    if (rc != 0) {
+        fprintf(stderr, "getaddrinfo(%s,%s): %s\n", addr, port,
+                gai_strerror(rc));
+        error_set(errp, QERR_SOCKET_CREATE_FAILED);
+        return NULL;
+    }
+    return res;
+}
+
+/**
+ * Create a socket and connect it to an address.
+ *
+ * @opts: QEMU options, recognized parameters strings "host" and "port",
+ *        bools "ipv4" and "ipv6".
+ * @errp: set on error
+ * @callback: callback function for non-blocking connect
+ * @opaque: opaque for callback function
+ *
+ * Returns: -1 on error, file descriptor on success.
+ *
+ * If @callback is non-null, the connect is non-blocking.  If this
+ * function succeeds, callback will be called when the connection
+ * completes, with the file descriptor on success, or -1 on error.
+ */
+int inet_connect_opts(QemuOpts *opts, Error **errp,
+                      NonBlockingConnectHandler *callback, void *opaque)
+{
+    struct addrinfo *res, *e;
+    int sock = -1;
+    bool in_progress;
+    ConnectState *connect_state = NULL;
+
+    res = inet_parse_connect_opts(opts, errp);
+    if (!res) {
         return -1;
     }
 
-    if (qemu_opt_get_bool(opts, "ipv4", 0))
-        ai.ai_family = PF_INET;
-    if (qemu_opt_get_bool(opts, "ipv6", 0))
-        ai.ai_family = PF_INET6;
-
-    /* lookup */
-    if (0 != (rc = getaddrinfo(addr, port, &ai, &res))) {
-        fprintf(stderr,"getaddrinfo(%s,%s): %s\n", addr, port,
-                gai_strerror(rc));
-        error_set(errp, QERR_SOCKET_CREATE_FAILED);
-	return -1;
+    if (callback != NULL) {
+        connect_state = g_malloc0(sizeof(*connect_state));
+        connect_state->addr_list = res;
+        connect_state->callback = callback;
+        connect_state->opaque = opaque;
     }
 
     for (e = res; e != NULL; e = e->ai_next) {
-        if (getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen,
-                            uaddr,INET6_ADDRSTRLEN,uport,32,
-                            NI_NUMERICHOST | NI_NUMERICSERV) != 0) {
-            fprintf(stderr,"%s: getnameinfo: oops\n", __FUNCTION__);
-            continue;
+        if (connect_state != NULL) {
+            connect_state->current_addr = e;
         }
-        sock = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol);
-        if (sock < 0) {
-            fprintf(stderr,"%s: socket(%s): %s\n", __FUNCTION__,
-            inet_strfamily(e->ai_family), strerror(errno));
-            continue;
-        }
-        setsockopt(sock,SOL_SOCKET,SO_REUSEADDR,(void*)&on,sizeof(on));
-        if (!block) {
-            socket_set_nonblock(sock);
-        }
-        /* connect to peer */
-        do {
-            rc = 0;
-            if (connect(sock, e->ai_addr, e->ai_addrlen) < 0) {
-                rc = -socket_error();
+        sock = inet_connect_addr(e, &in_progress, connect_state);
+        if (in_progress) {
+            return sock;
+        } else if (sock >= 0) {
+            /* non blocking socket immediate success, call callback */
+            if (callback != NULL) {
+                callback(sock, opaque);
             }
-        } while (rc == -EINTR);
-
-  #ifdef _WIN32
-        if (!block && (rc == -EINPROGRESS || rc == -EWOULDBLOCK
-                       || rc == -WSAEALREADY)) {
-  #else
-        if (!block && (rc == -EINPROGRESS)) {
-  #endif
-            if (in_progress) {
-                *in_progress = true;
-            }
-        } else if (rc < 0) {
-            if (NULL == e->ai_next)
-                fprintf(stderr, "%s: connect(%s,%s,%s,%s): %s\n", __FUNCTION__,
-                        inet_strfamily(e->ai_family),
-                        e->ai_canonname, uaddr, uport, strerror(errno));
-            closesocket(sock);
-            continue;
+            break;
         }
-        freeaddrinfo(res);
-        return sock;
     }
-    error_set(errp, QERR_SOCKET_CONNECT_FAILED);
+    if (sock < 0) {
+        error_set(errp, QERR_SOCKET_CONNECT_FAILED);
+    }
+    g_free(connect_state);
     freeaddrinfo(res);
-    return -1;
+    return sock;
 }
 
 int inet_dgram_opts(QemuOpts *opts)
@@ -353,7 +456,7 @@
     if (0 != (rc = getaddrinfo(addr, port, &ai, &local))) {
         fprintf(stderr,"getaddrinfo(%s,%s): %s\n", addr, port,
                 gai_strerror(rc));
-        return -1;
+        goto err;
     }
 
     /* create socket */
@@ -493,17 +596,54 @@
     return sock;
 }
 
-int inet_connect(const char *str, bool block, bool *in_progress, Error **errp)
+/**
+ * Create a blocking socket and connect it to an address.
+ *
+ * @str: address string
+ * @errp: set in case of an error
+ *
+ * Returns -1 in case of error, file descriptor on success
+ **/
+int inet_connect(const char *str, Error **errp)
 {
     QemuOpts *opts;
     int sock = -1;
 
     opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
     if (inet_parse(opts, str) == 0) {
-        if (block) {
-            qemu_opt_set(opts, "block", "on");
-        }
-        sock = inet_connect_opts(opts, in_progress, errp);
+        sock = inet_connect_opts(opts, errp, NULL, NULL);
+    } else {
+        error_set(errp, QERR_SOCKET_CREATE_FAILED);
+    }
+    qemu_opts_del(opts);
+    return sock;
+}
+
+/**
+ * Create a non-blocking socket and connect it to an address.
+ * Calls the callback function with fd in case of success or -1 in case of
+ * error.
+ *
+ * @str: address string
+ * @callback: callback function that is called when connect completes,
+ *            cannot be NULL.
+ * @opaque: opaque for callback function
+ * @errp: set in case of an error
+ *
+ * Returns: -1 on immediate error, file descriptor on success.
+ **/
+int inet_nonblocking_connect(const char *str,
+                             NonBlockingConnectHandler *callback,
+                             void *opaque, Error **errp)
+{
+    QemuOpts *opts;
+    int sock = -1;
+
+    g_assert(callback != NULL);
+
+    opts = qemu_opts_create(&dummy_opts, NULL, 0, NULL);
+    if (inet_parse(opts, str) == 0) {
+        sock = inet_connect_opts(opts, errp, callback, opaque);
     } else {
         error_set(errp, QERR_SOCKET_CREATE_FAILED);
     }
diff --git a/qemu-timer.c b/qemu-timer.c
index c7a1551..908a103 100644
--- a/qemu-timer.c
+++ b/qemu-timer.c
@@ -372,21 +372,20 @@
 
 void qemu_run_timers(QEMUClock *clock)
 {
-    QEMUTimer **ptimer_head, *ts;
+    QEMUTimer *ts;
     int64_t current_time;
    
     if (!clock->enabled)
         return;
 
     current_time = qemu_get_clock_ns(clock);
-    ptimer_head = &clock->active_timers;
     for(;;) {
-        ts = *ptimer_head;
+        ts = clock->active_timers;
         if (!qemu_timer_expired_ns(ts, current_time)) {
             break;
         }
         /* remove timer from the list before calling the callback */
-        *ptimer_head = ts->next;
+        clock->active_timers = ts->next;
         ts->next = NULL;
 
         /* run the callback (the timer list can be modified) */
diff --git a/qemu-timer.h b/qemu-timer.h
index f8af595..da7e97c 100644
--- a/qemu-timer.h
+++ b/qemu-timer.h
@@ -218,7 +218,7 @@
     return val;
 }
 
-#elif defined(__sparc_v8plus__) || defined(__sparc_v8plusa__) || defined(__sparc_v9__)
+#elif defined(__sparc__)
 
 static inline int64_t cpu_get_real_ticks (void)
 {
@@ -227,6 +227,8 @@
     asm volatile("rd %%tick,%0" : "=r"(rval));
     return rval;
 #else
+    /* We need an %o or %g register for this.  For recent enough gcc
+       there is an "h" constraint for that.  Don't bother with that.  */
     union {
         uint64_t i64;
         struct {
@@ -234,8 +236,8 @@
             uint32_t low;
         }       i32;
     } rval;
-    asm volatile("rd %%tick,%1; srlx %1,32,%0"
-                 : "=r"(rval.i32.high), "=r"(rval.i32.low));
+    asm volatile("rd %%tick,%%g1; srlx %%g1,32,%0; mov %%g1,%1"
+                 : "=r"(rval.i32.high), "=r"(rval.i32.low) : : "g1");
     return rval.i64;
 #endif
 }
diff --git a/qemu-tool.c b/qemu-tool.c
index 18205ba..f2f9813 100644
--- a/qemu-tool.c
+++ b/qemu-tool.c
@@ -19,6 +19,7 @@
 #include "qemu-log.h"
 #include "migration.h"
 #include "main-loop.h"
+#include "sysemu.h"
 #include "qemu_socket.h"
 #include "slirp/libslirp.h"
 
@@ -37,6 +38,11 @@
 
 Monitor *cur_mon;
 
+void vm_stop(RunState state)
+{
+    abort();
+}
+
 int monitor_cur_is_qmp(void)
 {
     return 0;
diff --git a/qemu_socket.h b/qemu_socket.h
index 30ae6af..3e8aee9 100644
--- a/qemu_socket.h
+++ b/qemu_socket.h
@@ -38,12 +38,21 @@
 void socket_set_nonblock(int fd);
 int send_all(int fd, const void *buf, int len1);
 
-/* New, ipv6-ready socket helper functions, see qemu-sockets.c */
+/* callback function for nonblocking connect
+ * valid fd on success, negative error code on failure
+ */
+typedef void NonBlockingConnectHandler(int fd, void *opaque);
+
 int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp);
 int inet_listen(const char *str, char *ostr, int olen,
                 int socktype, int port_offset, Error **errp);
-int inet_connect_opts(QemuOpts *opts, bool *in_progress, Error **errp);
-int inet_connect(const char *str, bool block, bool *in_progress, Error **errp);
+int inet_connect_opts(QemuOpts *opts, Error **errp,
+                      NonBlockingConnectHandler *callback, void *opaque);
+int inet_connect(const char *str, Error **errp);
+int inet_nonblocking_connect(const char *str,
+                             NonBlockingConnectHandler *callback,
+                             void *opaque, Error **errp);
+
 int inet_dgram_opts(QemuOpts *opts);
 const char *inet_strfamily(int family);
 
diff --git a/qerror.h b/qerror.h
index d0a76a4..c91708c 100644
--- a/qerror.h
+++ b/qerror.h
@@ -48,6 +48,12 @@
 #define QERR_BASE_NOT_FOUND \
     ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found"
 
+#define QERR_BLOCK_JOB_NOT_ACTIVE \
+    ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'"
+
+#define QERR_BLOCK_JOB_PAUSED \
+    ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused"
+
 #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \
     ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'"
 
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index ce90421..726930a 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -828,7 +828,7 @@
             }
 
             memset(&ifr, 0, sizeof(ifr));
-            strncpy(ifr.ifr_name,  info->value->name, IF_NAMESIZE);
+            pstrcpy(ifr.ifr_name, IF_NAMESIZE, info->value->name);
             if (ioctl(sock, SIOCGIFHWADDR, &ifr) == -1) {
                 snprintf(err_msg, sizeof(err_msg),
                          "failed to get MAC address of %s: %s",
@@ -988,8 +988,6 @@
 void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **err)
 {
     error_set(err, QERR_UNSUPPORTED);
-
-    return;
 }
 #endif
 
diff --git a/qga/commands-win32.c b/qga/commands-win32.c
index 54bc546..5bd8fb2 100644
--- a/qga/commands-win32.c
+++ b/qga/commands-win32.c
@@ -180,8 +180,6 @@
 void qmp_guest_fstrim(bool has_minimum, int64_t minimum, Error **err)
 {
     error_set(err, QERR_UNSUPPORTED);
-
-    return;
 }
 
 typedef enum {
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 6e21ddb..2f8477e 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -493,6 +493,30 @@
 EQMP
 
     {
+        .name       = "xen-set-global-dirty-log",
+        .args_type  = "enable:b",
+        .mhandler.cmd_new = qmp_marshal_input_xen_set_global_dirty_log,
+    },
+
+SQMP
+xen-set-global-dirty-log
+-------
+
+Enable or disable the global dirty log mode.
+
+Arguments:
+
+- "enable": Enable it or disable it.
+
+Example:
+
+-> { "execute": "xen-set-global-dirty-log",
+     "arguments": { "enable": true } }
+<- { "return": {} }
+
+EQMP
+
+    {
         .name       = "migrate",
         .args_type  = "detach:-d,blk:-b,inc:-i,uri:s",
         .mhandler.cmd_new = qmp_marshal_input_migrate,
@@ -787,11 +811,17 @@
 
     {
         .name       = "block-stream",
-        .args_type  = "device:B,base:s?,speed:o?",
+        .args_type  = "device:B,base:s?,speed:o?,on-error:s?",
         .mhandler.cmd_new = qmp_marshal_input_block_stream,
     },
 
     {
+        .name       = "block-commit",
+        .args_type  = "device:B,base:s?,top:s,speed:o?",
+        .mhandler.cmd_new = qmp_marshal_input_block_commit,
+    },
+
+    {
         .name       = "block-job-set-speed",
         .args_type  = "device:B,speed:o",
         .mhandler.cmd_new = qmp_marshal_input_block_job_set_speed,
@@ -799,10 +829,20 @@
 
     {
         .name       = "block-job-cancel",
-        .args_type  = "device:B",
+        .args_type  = "device:B,force:b?",
         .mhandler.cmd_new = qmp_marshal_input_block_job_cancel,
     },
     {
+        .name       = "block-job-pause",
+        .args_type  = "device:B",
+        .mhandler.cmd_new = qmp_marshal_input_block_job_pause,
+    },
+    {
+        .name       = "block-job-resume",
+        .args_type  = "device:B",
+        .mhandler.cmd_new = qmp_marshal_input_block_job_resume,
+    },
+    {
         .name       = "transaction",
         .args_type  = "actions:q",
         .mhandler.cmd_new = qmp_marshal_input_transaction,
@@ -1231,10 +1271,7 @@
     {
         .name       = "add_client",
         .args_type  = "protocol:s,fdname:s,skipauth:b?,tls:b?",
-        .params     = "protocol fdname skipauth tls",
-        .help       = "add a graphics client",
-        .user_print = monitor_user_noop,
-        .mhandler.cmd_new = add_graphics_client,
+        .mhandler.cmd_new = qmp_marshal_input_add_client,
     },
 
 SQMP
diff --git a/qmp.c b/qmp.c
index 8463922..36c54c5 100644
--- a/qmp.c
+++ b/qmp.c
@@ -479,3 +479,46 @@
     return arch_query_cpu_definitions(errp);
 }
 
+void qmp_add_client(const char *protocol, const char *fdname,
+                    bool has_skipauth, bool skipauth, bool has_tls, bool tls,
+                    Error **errp)
+{
+    CharDriverState *s;
+    int fd;
+
+    fd = monitor_get_fd(cur_mon, fdname, errp);
+    if (fd < 0) {
+        return;
+    }
+
+    if (strcmp(protocol, "spice") == 0) {
+        if (!using_spice) {
+            error_set(errp, QERR_DEVICE_NOT_ACTIVE, "spice");
+            close(fd);
+            return;
+        }
+        skipauth = has_skipauth ? skipauth : false;
+        tls = has_tls ? tls : false;
+        if (qemu_spice_display_add_client(fd, skipauth, tls) < 0) {
+            error_setg(errp, "spice failed to add client");
+            close(fd);
+        }
+        return;
+#ifdef CONFIG_VNC
+    } else if (strcmp(protocol, "vnc") == 0) {
+        skipauth = has_skipauth ? skipauth : false;
+        vnc_display_add_client(NULL, fd, skipauth);
+        return;
+#endif
+    } else if ((s = qemu_chr_find(protocol)) != NULL) {
+        if (qemu_chr_add_client(s, fd) < 0) {
+            error_setg(errp, "failed to add client");
+            close(fd);
+            return;
+        }
+        return;
+    }
+
+    error_setg(errp, "protocol '%s' is invalid", protocol);
+    close(fd);
+}
diff --git a/savevm.c b/savevm.c
index c7fe283..31fd2e0 100644
--- a/savevm.c
+++ b/savevm.c
@@ -2201,7 +2201,6 @@
  the_end:
     if (saved_vm_running)
         vm_start();
-    return;
 }
 
 int load_vmstate(const char *name)
diff --git a/scripts/qapi-types.py b/scripts/qapi-types.py
index 49ef569..1b84834 100644
--- a/scripts/qapi-types.py
+++ b/scripts/qapi-types.py
@@ -91,9 +91,9 @@
 
 def generate_enum_name(name):
     if name.isupper():
-        return c_fun(name)
+        return c_fun(name, False)
     new_name = ''
-    for c in c_fun(name):
+    for c in c_fun(name, False):
         if c.isupper():
             new_name += '_'
         new_name += c
diff --git a/scripts/qapi-visit.py b/scripts/qapi-visit.py
index e2093e8..a360de7 100644
--- a/scripts/qapi-visit.py
+++ b/scripts/qapi-visit.py
@@ -173,7 +173,7 @@
                 break;
 ''',
                 abbrev = de_camel_case(name).upper(),
-                enum = c_fun(de_camel_case(key)).upper(),
+                enum = c_fun(de_camel_case(key),False).upper(),
                 c_type=members[key],
                 c_name=c_fun(key))
 
diff --git a/scripts/qapi.py b/scripts/qapi.py
index 122b4cb..afc5f32 100644
--- a/scripts/qapi.py
+++ b/scripts/qapi.py
@@ -141,7 +141,7 @@
             new_name += ch.lower()
     return new_name
 
-def c_var(name):
+def c_var(name, protect=True):
     # ANSI X3J11/88-090, 3.1.1
     c89_words = set(['auto', 'break', 'case', 'char', 'const', 'continue',
                      'default', 'do', 'double', 'else', 'enum', 'extern', 'float',
@@ -156,12 +156,14 @@
     # GCC http://gcc.gnu.org/onlinedocs/gcc-4.7.1/gcc/C-Extensions.html
     # excluding _.*
     gcc_words = set(['asm', 'typeof'])
-    if name in c89_words | c99_words | c11_words | gcc_words:
+    # namespace pollution:
+    polluted_words = set(['unix'])
+    if protect and (name in c89_words | c99_words | c11_words | gcc_words | polluted_words):
         return "q_" + name
     return name.replace('-', '_').lstrip("*")
 
-def c_fun(name):
-    return c_var(name).replace('.', '_')
+def c_fun(name, protect=True):
+    return c_var(name, protect).replace('.', '_')
 
 def c_list_type(name):
     return '%sList' % name
diff --git a/scripts/tracetool/backend/dtrace.py b/scripts/tracetool/backend/dtrace.py
index 9cab75c..6be7047 100644
--- a/scripts/tracetool/backend/dtrace.py
+++ b/scripts/tracetool/backend/dtrace.py
@@ -87,7 +87,7 @@
         if len(e.args) > 0:
             for name in e.args.names():
                 # Append underscore to reserved keywords
-                if name in ('limit', 'in', 'next', 'self'):
+                if name in ('limit', 'in', 'next', 'self', 'function'):
                     name += '_'
                 out('  %s = $arg%d;' % (name, i))
                 i += 1
diff --git a/scripts/update-linux-headers.sh b/scripts/update-linux-headers.sh
index a639c5b..67be2ef 100755
--- a/scripts/update-linux-headers.sh
+++ b/scripts/update-linux-headers.sh
@@ -28,7 +28,21 @@
     output="$PWD"
 fi
 
-for arch in x86 powerpc s390; do
+# This will pick up non-directories too (eg "Kconfig") but we will
+# ignore them in the next loop.
+ARCHLIST=$(cd "$linux/arch" && echo *)
+
+for arch in $ARCHLIST; do
+    # Discard anything which isn't a KVM-supporting architecture
+    if ! [ -e "$linux/arch/$arch/include/asm/kvm.h" ]; then
+        continue
+    fi
+
+    # Blacklist architectures which have KVM headers but are actually dead
+    if [ "$arch" = "ia64" ]; then
+        continue
+    fi
+
     make -C "$linux" INSTALL_HDR_PATH="$tmpdir" SRCARCH=$arch headers_install
 
     rm -rf "$output/linux-headers/asm-$arch"
@@ -43,7 +57,7 @@
 
 rm -rf "$output/linux-headers/linux"
 mkdir -p "$output/linux-headers/linux"
-for header in kvm.h kvm_para.h vhost.h virtio_config.h virtio_ring.h; do
+for header in kvm.h kvm_para.h vfio.h vhost.h virtio_config.h virtio_ring.h; do
     cp "$tmpdir/include/linux/$header" "$output/linux-headers/linux"
 done
 rm -rf "$output/linux-headers/asm-generic"
diff --git a/slirp/ip_icmp.h b/slirp/ip_icmp.h
index 1a1af91..be4426b 100644
--- a/slirp/ip_icmp.h
+++ b/slirp/ip_icmp.h
@@ -92,8 +92,8 @@
 
 /*
  * Lower bounds on packet lengths for various types.
- * For the error advice packets must first insure that the
- * packet is large enought to contain the returned ip header.
+ * For the error advice packets must first ensure that the
+ * packet is large enough to contain the returned ip header.
  * Only then can we do the check to see if 64 bits of packet
  * data have been returned, since we need to check the returned
  * ip header length.
diff --git a/slirp/ip_input.c b/slirp/ip_input.c
index ce24faf..6f4cff8 100644
--- a/slirp/ip_input.c
+++ b/slirp/ip_input.c
@@ -213,7 +213,6 @@
 	return;
 bad:
 	m_free(m);
-	return;
 }
 
 #define iptofrag(P) ((struct ipasfrag *)(((char*)(P)) - sizeof(struct qlink)))
diff --git a/slirp/tcp_input.c b/slirp/tcp_input.c
index 942aaf4..6440eae 100644
--- a/slirp/tcp_input.c
+++ b/slirp/tcp_input.c
@@ -1281,8 +1281,6 @@
 	 * Drop space held by incoming segment and return.
 	 */
 	m_free(m);
-
-	return;
 }
 
 static void
diff --git a/slirp/tcp_subr.c b/slirp/tcp_subr.c
index 025b374..1542e43 100644
--- a/slirp/tcp_subr.c
+++ b/slirp/tcp_subr.c
@@ -114,9 +114,9 @@
 	int win = 0;
 
 	DEBUG_CALL("tcp_respond");
-	DEBUG_ARG("tp = %lx", (long)tp);
-	DEBUG_ARG("ti = %lx", (long)ti);
-	DEBUG_ARG("m = %lx", (long)m);
+	DEBUG_ARG("tp = %p", tp);
+	DEBUG_ARG("ti = %p", ti);
+	DEBUG_ARG("m = %p", m);
 	DEBUG_ARG("ack = %u", ack);
 	DEBUG_ARG("seq = %u", seq);
 	DEBUG_ARG("flags = %x", flags);
@@ -124,7 +124,7 @@
 	if (tp)
 		win = sbspace(&tp->t_socket->so_rcv);
         if (m == NULL) {
-		if ((m = m_get(tp->t_socket->slirp)) == NULL)
+		if (!tp || (m = m_get(tp->t_socket->slirp)) == NULL)
 			return;
 		tlen = 0;
 		m->m_data += IF_MAXLINKHDR;
diff --git a/slirp/tftp.c b/slirp/tftp.c
index b78765f..1a79c45 100644
--- a/slirp/tftp.c
+++ b/slirp/tftp.c
@@ -37,6 +37,10 @@
 
 static void tftp_session_terminate(struct tftp_session *spt)
 {
+    if (spt->fd >= 0) {
+        close(spt->fd);
+        spt->fd = -1;
+    }
     g_free(spt->filename);
     spt->slirp = NULL;
 }
@@ -54,7 +58,7 @@
 
     /* sessions time out after 5 inactive seconds */
     if ((int)(curtime - spt->timestamp) > 5000) {
-        g_free(spt->filename);
+        tftp_session_terminate(spt);
         goto found;
     }
   }
@@ -64,6 +68,7 @@
  found:
   memset(spt, 0, sizeof(*spt));
   memcpy(&spt->client_ip, &tp->ip.ip_src, sizeof(spt->client_ip));
+  spt->fd = -1;
   spt->client_port = tp->udp.uh_sport;
   spt->slirp = slirp;
 
@@ -92,37 +97,36 @@
   return -1;
 }
 
-static int tftp_read_data(struct tftp_session *spt, uint16_t block_nr,
+static int tftp_read_data(struct tftp_session *spt, uint32_t block_nr,
                           uint8_t *buf, int len)
 {
-  int fd;
-  int bytes_read = 0;
+    int bytes_read = 0;
 
-  fd = open(spt->filename, O_RDONLY | O_BINARY);
+    if (spt->fd < 0) {
+        spt->fd = open(spt->filename, O_RDONLY | O_BINARY);
+    }
 
-  if (fd < 0) {
-    return -1;
-  }
+    if (spt->fd < 0) {
+        return -1;
+    }
 
-  if (len) {
-    lseek(fd, block_nr * 512, SEEK_SET);
+    if (len) {
+        lseek(spt->fd, block_nr * 512, SEEK_SET);
 
-    bytes_read = read(fd, buf, len);
-  }
+        bytes_read = read(spt->fd, buf, len);
+    }
 
-  close(fd);
-
-  return bytes_read;
+    return bytes_read;
 }
 
 static int tftp_send_oack(struct tftp_session *spt,
-                          const char *key, uint32_t value,
+                          const char *keys[], uint32_t values[], int nb,
                           struct tftp_t *recv_tp)
 {
     struct sockaddr_in saddr, daddr;
     struct mbuf *m;
     struct tftp_t *tp;
-    int n = 0;
+    int i, n = 0;
 
     m = m_get(spt->slirp);
 
@@ -136,10 +140,12 @@
     m->m_data += sizeof(struct udpiphdr);
 
     tp->tp_op = htons(TFTP_OACK);
-    n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s",
-                  key) + 1;
-    n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u",
-                  value) + 1;
+    for (i = 0; i < nb; i++) {
+        n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%s",
+                      keys[i]) + 1;
+        n += snprintf(tp->x.tp_buf + n, sizeof(tp->x.tp_buf) - n, "%u",
+                      values[i]) + 1;
+    }
 
     saddr.sin_addr = recv_tp->ip.ip_dst;
     saddr.sin_port = recv_tp->udp.uh_dport;
@@ -193,23 +199,18 @@
   tftp_session_terminate(spt);
 }
 
-static int tftp_send_data(struct tftp_session *spt,
-                          uint16_t block_nr,
-			  struct tftp_t *recv_tp)
+static void tftp_send_next_block(struct tftp_session *spt,
+                                 struct tftp_t *recv_tp)
 {
   struct sockaddr_in saddr, daddr;
   struct mbuf *m;
   struct tftp_t *tp;
   int nobytes;
 
-  if (block_nr < 1) {
-    return -1;
-  }
-
   m = m_get(spt->slirp);
 
   if (!m) {
-    return -1;
+    return;
   }
 
   memset(m->m_data, 0, m->m_size);
@@ -219,7 +220,7 @@
   m->m_data += sizeof(struct udpiphdr);
 
   tp->tp_op = htons(TFTP_DATA);
-  tp->x.tp_data.tp_block_nr = htons(block_nr);
+  tp->x.tp_data.tp_block_nr = htons((spt->block_nr + 1) & 0xffff);
 
   saddr.sin_addr = recv_tp->ip.ip_dst;
   saddr.sin_port = recv_tp->udp.uh_dport;
@@ -227,7 +228,7 @@
   daddr.sin_addr = spt->client_ip;
   daddr.sin_port = spt->client_port;
 
-  nobytes = tftp_read_data(spt, block_nr - 1, tp->x.tp_data.tp_buf, 512);
+  nobytes = tftp_read_data(spt, spt->block_nr, tp->x.tp_data.tp_buf, 512);
 
   if (nobytes < 0) {
     m_free(m);
@@ -236,7 +237,7 @@
 
     tftp_send_error(spt, 1, "File not found", tp);
 
-    return -1;
+    return;
   }
 
   m->m_len = sizeof(struct tftp_t) - (512 - nobytes) -
@@ -251,7 +252,7 @@
     tftp_session_terminate(spt);
   }
 
-  return 0;
+  spt->block_nr++;
 }
 
 static void tftp_handle_rrq(Slirp *slirp, struct tftp_t *tp, int pktlen)
@@ -260,6 +261,9 @@
   int s, k;
   size_t prefix_len;
   char *req_fname;
+  const char *option_name[2];
+  uint32_t option_value[2];
+  int nb_options = 0;
 
   /* check if a session already exists and if so terminate it */
   s = tftp_session_find(slirp, tp);
@@ -337,7 +341,7 @@
       return;
   }
 
-  while (k < pktlen) {
+  while (k < pktlen && nb_options < ARRAY_SIZE(option_name)) {
       const char *key, *value;
 
       key = &tp->x.tp_buf[k];
@@ -364,12 +368,32 @@
 	      }
 	  }
 
-	  tftp_send_oack(spt, "tsize", tsize, tp);
-	  return;
+          option_name[nb_options] = "tsize";
+          option_value[nb_options] = tsize;
+          nb_options++;
+      } else if (strcasecmp(key, "blksize") == 0) {
+          int blksize = atoi(value);
+
+          /* If blksize option is bigger than what we will
+           * emit, accept the option with our packet size.
+           * Otherwise, simply do as we didn't see the option.
+           */
+          if (blksize >= 512) {
+              option_name[nb_options] = "blksize";
+              option_value[nb_options] = 512;
+              nb_options++;
+          }
       }
   }
 
-  tftp_send_data(spt, 1, tp);
+  if (nb_options > 0) {
+      assert(nb_options <= ARRAY_SIZE(option_name));
+      tftp_send_oack(spt, option_name, option_value, nb_options, tp);
+      return;
+  }
+
+  spt->block_nr = 0;
+  tftp_send_next_block(spt, tp);
 }
 
 static void tftp_handle_ack(Slirp *slirp, struct tftp_t *tp, int pktlen)
@@ -382,11 +406,7 @@
     return;
   }
 
-  if (tftp_send_data(&slirp->tftp_sessions[s],
-		     ntohs(tp->x.tp_data.tp_block_nr) + 1,
-		     tp) < 0) {
-    return;
-  }
+  tftp_send_next_block(&slirp->tftp_sessions[s], tp);
 }
 
 static void tftp_handle_error(Slirp *slirp, struct tftp_t *tp, int pktlen)
diff --git a/slirp/tftp.h b/slirp/tftp.h
index 72e5e91..51704e4 100644
--- a/slirp/tftp.h
+++ b/slirp/tftp.h
@@ -33,9 +33,11 @@
 struct tftp_session {
     Slirp *slirp;
     char *filename;
+    int fd;
 
     struct in_addr client_ip;
     uint16_t client_port;
+    uint32_t block_nr;
 
     int timestamp;
 };
diff --git a/slirp/udp.c b/slirp/udp.c
index ced5096..9286cb7 100644
--- a/slirp/udp.c
+++ b/slirp/udp.c
@@ -231,7 +231,6 @@
 	return;
 bad:
 	m_free(m);
-	return;
 }
 
 int udp_output2(struct socket *so, struct mbuf *m,
diff --git a/softmmu_defs.h b/softmmu_defs.h
index 8d59f9d..1f25e33 100644
--- a/softmmu_defs.h
+++ b/softmmu_defs.h
@@ -9,25 +9,6 @@
 #ifndef SOFTMMU_DEFS_H
 #define SOFTMMU_DEFS_H
 
-#ifndef CONFIG_TCG_PASS_AREG0
-uint8_t __ldb_mmu(target_ulong addr, int mmu_idx);
-void __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
-uint16_t __ldw_mmu(target_ulong addr, int mmu_idx);
-void __stw_mmu(target_ulong addr, uint16_t val, int mmu_idx);
-uint32_t __ldl_mmu(target_ulong addr, int mmu_idx);
-void __stl_mmu(target_ulong addr, uint32_t val, int mmu_idx);
-uint64_t __ldq_mmu(target_ulong addr, int mmu_idx);
-void __stq_mmu(target_ulong addr, uint64_t val, int mmu_idx);
-
-uint8_t __ldb_cmmu(target_ulong addr, int mmu_idx);
-void __stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx);
-uint16_t __ldw_cmmu(target_ulong addr, int mmu_idx);
-void __stw_cmmu(target_ulong addr, uint16_t val, int mmu_idx);
-uint32_t __ldl_cmmu(target_ulong addr, int mmu_idx);
-void __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
-uint64_t __ldq_cmmu(target_ulong addr, int mmu_idx);
-void __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
-#else
 uint8_t helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
 void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
                     int mmu_idx);
@@ -54,5 +35,3 @@
 void helper_stq_cmmu(CPUArchState *env, target_ulong addr, uint64_t val,
                      int mmu_idx);
 #endif
-
-#endif
diff --git a/softmmu_header.h b/softmmu_header.h
index cf1aa38..d8d9c81 100644
--- a/softmmu_header.h
+++ b/softmmu_header.h
@@ -78,23 +78,10 @@
 #define ADDR_READ addr_read
 #endif
 
-#ifndef CONFIG_TCG_PASS_AREG0
-#define ENV_PARAM
-#define ENV_VAR
-#define CPU_PREFIX
-#define HELPER_PREFIX __
-#else
-#define ENV_PARAM CPUArchState *env,
-#define ENV_VAR env,
-#define CPU_PREFIX cpu_
-#define HELPER_PREFIX helper_
-#endif
-
 /* generic load/store macros */
 
 static inline RES_TYPE
-glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM
-                                                     target_ulong ptr)
+glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr)
 {
     int page_index;
     RES_TYPE res;
@@ -106,9 +93,7 @@
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR
-                                                                     addr,
-                                                                     mmu_idx);
+        res = glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(env, addr, mmu_idx);
     } else {
         uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(ld, USUFFIX), _raw)(hostaddr);
@@ -118,8 +103,7 @@
 
 #if DATA_SIZE <= 2
 static inline int
-glue(glue(glue(CPU_PREFIX, lds), SUFFIX), MEMSUFFIX)(ENV_PARAM
-                                                     target_ulong ptr)
+glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr)
 {
     int res, page_index;
     target_ulong addr;
@@ -130,8 +114,8 @@
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = (DATA_STYPE)glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
-                               MMUSUFFIX)(ENV_VAR addr, mmu_idx);
+        res = (DATA_STYPE)glue(glue(helper_ld, SUFFIX),
+                               MMUSUFFIX)(env, addr, mmu_idx);
     } else {
         uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(lds, SUFFIX), _raw)(hostaddr);
@@ -145,8 +129,8 @@
 /* generic store macro */
 
 static inline void
-glue(glue(glue(CPU_PREFIX, st), SUFFIX), MEMSUFFIX)(ENV_PARAM target_ulong ptr,
-                                                    RES_TYPE v)
+glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr,
+                                      RES_TYPE v)
 {
     int page_index;
     target_ulong addr;
@@ -157,8 +141,7 @@
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_VAR addr, v,
-                                                               mmu_idx);
+        glue(glue(helper_st, SUFFIX), MMUSUFFIX)(env, addr, v, mmu_idx);
     } else {
         uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         glue(glue(st, SUFFIX), _raw)(hostaddr, v);
@@ -170,52 +153,50 @@
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)
 
 #if DATA_SIZE == 8
-static inline float64 glue(glue(CPU_PREFIX, ldfq), MEMSUFFIX)(ENV_PARAM
-                                                              target_ulong ptr)
+static inline float64 glue(cpu_ldfq, MEMSUFFIX)(CPUArchState *env,
+                                                target_ulong ptr)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
-    u.i = glue(glue(CPU_PREFIX, ldq), MEMSUFFIX)(ENV_VAR ptr);
+    u.i = glue(cpu_ldq, MEMSUFFIX)(env, ptr);
     return u.d;
 }
 
-static inline void glue(glue(CPU_PREFIX, stfq), MEMSUFFIX)(ENV_PARAM
-                                                           target_ulong ptr,
-                                                           float64 v)
+static inline void glue(cpu_stfq, MEMSUFFIX)(CPUArchState *env,
+                                             target_ulong ptr, float64 v)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
     u.d = v;
-    glue(glue(CPU_PREFIX, stq), MEMSUFFIX)(ENV_VAR ptr, u.i);
+    glue(cpu_stq, MEMSUFFIX)(env, ptr, u.i);
 }
 #endif /* DATA_SIZE == 8 */
 
 #if DATA_SIZE == 4
-static inline float32 glue(glue(CPU_PREFIX, ldfl), MEMSUFFIX)(ENV_PARAM
-                                                              target_ulong ptr)
+static inline float32 glue(cpu_ldfl, MEMSUFFIX)(CPUArchState *env,
+                                                target_ulong ptr)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(glue(CPU_PREFIX, ldl), MEMSUFFIX)(ENV_VAR ptr);
+    u.i = glue(cpu_ldl, MEMSUFFIX)(env, ptr);
     return u.f;
 }
 
-static inline void glue(glue(CPU_PREFIX, stfl), MEMSUFFIX)(ENV_PARAM
-                                                           target_ulong ptr,
-                                                           float32 v)
+static inline void glue(cpu_stfl, MEMSUFFIX)(CPUArchState *env,
+                                             target_ulong ptr, float32 v)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
     u.f = v;
-    glue(glue(CPU_PREFIX, stl), MEMSUFFIX)(ENV_VAR ptr, u.i);
+    glue(cpu_stl, MEMSUFFIX)(env, ptr, u.i);
 }
 #endif /* DATA_SIZE == 4 */
 
@@ -230,7 +211,3 @@
 #undef CPU_MMU_INDEX
 #undef MMUSUFFIX
 #undef ADDR_READ
-#undef ENV_PARAM
-#undef ENV_VAR
-#undef CPU_PREFIX
-#undef HELPER_PREFIX
diff --git a/softmmu_template.h b/softmmu_template.h
index b8bd700..e2490f0 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -54,23 +54,11 @@
 #define ADDR_READ addr_read
 #endif
 
-#ifndef CONFIG_TCG_PASS_AREG0
-#define ENV_PARAM
-#define ENV_VAR
-#define CPU_PREFIX
-#define HELPER_PREFIX __
-#else
-#define ENV_PARAM CPUArchState *env,
-#define ENV_VAR env,
-#define CPU_PREFIX cpu_
-#define HELPER_PREFIX helper_
-#endif
-
-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
                                                         target_ulong addr,
                                                         int mmu_idx,
                                                         uintptr_t retaddr);
-static inline DATA_TYPE glue(io_read, SUFFIX)(ENV_PARAM
+static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
                                               target_phys_addr_t physaddr,
                                               target_ulong addr,
                                               uintptr_t retaddr)
@@ -104,9 +92,8 @@
 
 /* handle all cases except unaligned access which span two pages */
 DATA_TYPE
-glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
-                                                       target_ulong addr,
-                                                       int mmu_idx)
+glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
+                                         int mmu_idx)
 {
     DATA_TYPE res;
     int index;
@@ -126,15 +113,15 @@
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
-            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr,
+            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr,
                                                          mmu_idx, retaddr);
         } else {
             /* unaligned/aligned access in the same page */
@@ -142,7 +129,7 @@
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+                do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -154,7 +141,7 @@
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
         goto redo;
@@ -164,7 +151,7 @@
 
 /* handle all unaligned cases */
 static DATA_TYPE
-glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
                                        target_ulong addr,
                                        int mmu_idx,
                                        uintptr_t retaddr)
@@ -183,15 +170,15 @@
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
             addr1 = addr & ~(DATA_SIZE - 1);
             addr2 = addr1 + DATA_SIZE;
-            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr1,
+            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr1,
                                                           mmu_idx, retaddr);
-            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr2,
+            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr2,
                                                           mmu_idx, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -216,13 +203,13 @@
 
 #ifndef SOFTMMU_CODE_ACCESS
 
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
                                                    target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
                                                    uintptr_t retaddr);
 
-static inline void glue(io_write, SUFFIX)(ENV_PARAM
+static inline void glue(io_write, SUFFIX)(CPUArchState *env,
                                           target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong addr,
@@ -253,10 +240,9 @@
 #endif /* SHIFT > 2 */
 }
 
-void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
-                                                            target_ulong addr,
-                                                            DATA_TYPE val,
-                                                            int mmu_idx)
+void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
+                                              target_ulong addr, DATA_TYPE val,
+                                              int mmu_idx)
 {
     target_phys_addr_t ioaddr;
     target_ulong tlb_addr;
@@ -273,14 +259,14 @@
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
 #endif
-            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_VAR addr, val,
+            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(env, addr, val,
                                                    mmu_idx, retaddr);
         } else {
             /* aligned/unaligned access in the same page */
@@ -288,7 +274,7 @@
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
+                do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -300,7 +286,7 @@
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, 1, mmu_idx, retaddr);
         goto redo;
@@ -308,7 +294,7 @@
 }
 
 /* handles all unaligned cases */
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
                                                    target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
@@ -327,7 +313,7 @@
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
@@ -335,11 +321,11 @@
              * previous page from the TLB cache.  */
             for(i = DATA_SIZE - 1; i >= 0; i--) {
 #ifdef TARGET_WORDS_BIGENDIAN
-                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                glue(slow_stb, MMUSUFFIX)(env, addr + i,
                                           val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           mmu_idx, retaddr);
 #else
-                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                glue(slow_stb, MMUSUFFIX)(env, addr + i,
                                           val >> (i * 8),
                                           mmu_idx, retaddr);
 #endif
@@ -366,7 +352,3 @@
 #undef USUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
-#undef ENV_PARAM
-#undef ENV_VAR
-#undef CPU_PREFIX
-#undef HELPER_PREFIX
diff --git a/sysconfigs/target/cpus-x86_64.conf b/sysconfigs/target/cpus-x86_64.conf
deleted file mode 100644
index cee0ea9..0000000
--- a/sysconfigs/target/cpus-x86_64.conf
+++ /dev/null
@@ -1,128 +0,0 @@
-# x86 CPU MODELS
-
-[cpudef]
-   name = "Conroe"
-   level = "2"
-   vendor = "GenuineIntel"
-   family = "6"
-   model = "2"
-   stepping = "3"
-   feature_edx = "sse2 sse fxsr mmx clflush pse36 pat cmov mca pge mtrr sep apic cx8 mce pae msr tsc pse de fpu"
-   feature_ecx = "ssse3 sse3"
-   extfeature_edx = "i64 xd syscall"
-   extfeature_ecx = "lahf_lm"
-   xlevel = "0x8000000A"
-   model_id = "Intel Celeron_4x0 (Conroe/Merom Class Core 2)"
-
-[cpudef]
-   name = "Penryn"
-   level = "2"
-   vendor = "GenuineIntel"
-   family = "6"
-   model = "2"
-   stepping = "3"
-   feature_edx = "sse2 sse fxsr mmx clflush pse36 pat cmov mca pge mtrr sep apic cx8 mce pae msr tsc pse de fpu"
-   feature_ecx = "sse4.1 cx16 ssse3 sse3"
-   extfeature_edx = "i64 xd syscall"
-   extfeature_ecx = "lahf_lm"
-   xlevel = "0x8000000A"
-   model_id = "Intel Core 2 Duo P9xxx (Penryn Class Core 2)"
-
-[cpudef]
-   name = "Nehalem"
-   level = "2"
-   vendor = "GenuineIntel"
-   family = "6"
-   model = "2"
-   stepping = "3"
-   feature_edx = "sse2 sse fxsr mmx clflush pse36 pat cmov mca pge mtrr sep apic cx8 mce pae msr tsc pse de fpu"
-   feature_ecx = "popcnt sse4.2 sse4.1 cx16 ssse3 sse3"
-   extfeature_edx = "i64 syscall xd"
-   extfeature_ecx = "lahf_lm"
-   xlevel = "0x8000000A"
-   model_id = "Intel Core i7 9xx (Nehalem Class Core i7)"
-
-[cpudef]
-   name = "Westmere"
-   level = "11"
-   vendor = "GenuineIntel"
-   family = "6"
-   model = "44"
-   stepping = "1"
-   feature_edx = "sse2 sse fxsr mmx clflush pse36 pat cmov mca pge mtrr sep apic cx8 mce pae msr tsc pse de fpu"
-   feature_ecx = "aes popcnt sse4.2 sse4.1 cx16 ssse3 sse3"
-   extfeature_edx = "i64 syscall xd"
-   extfeature_ecx = "lahf_lm"
-   xlevel = "0x8000000A"
-   model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)"
-
-[cpudef]
-   name = "SandyBridge"
-   level = "0xd"
-   vendor = "GenuineIntel"
-   family = "6"
-   model = "42"
-   stepping = "1"
-   feature_edx = " sse2 sse fxsr mmx clflush pse36 pat cmov mca pge mtrr sep apic cx8 mce pae msr tsc pse de fpu"
-   feature_ecx = "avx xsave aes tsc-deadline popcnt x2apic sse4.2 sse4.1 cx16 ssse3 pclmulqdq sse3"
-   extfeature_edx = "i64 rdtscp nx syscall "
-   extfeature_ecx = "lahf_lm"
-   xlevel = "0x8000000A"
-   model_id = "Intel Xeon E312xx (Sandy Bridge)"
-
-[cpudef]
-   name = "Opteron_G1"
-   level = "5"
-   vendor = "AuthenticAMD"
-   family = "15"
-   model = "6"
-   stepping = "1"
-   feature_edx = "sse2 sse fxsr mmx clflush pse36 pat cmov mca pge mtrr sep apic cx8 mce pae msr tsc pse de fpu"
-   feature_ecx = "sse3"
-   extfeature_edx = "lm fxsr mmx nx pse36 pat cmov mca pge mtrr syscall apic cx8 mce pae msr tsc pse de fpu"
-   extfeature_ecx = " "
-   xlevel = "0x80000008"
-   model_id = "AMD Opteron 240 (Gen 1 Class Opteron)"
-
-[cpudef]
-   name = "Opteron_G2"
-   level = "5"
-   vendor = "AuthenticAMD"
-   family = "15"
-   model = "6"
-   stepping = "1"
-   feature_edx = "sse2 sse fxsr mmx clflush pse36 pat cmov mca pge mtrr sep apic cx8 mce pae msr tsc pse de fpu"
-   feature_ecx = "cx16 sse3"
-   extfeature_edx = "lm rdtscp fxsr mmx nx pse36 pat cmov mca pge mtrr syscall apic cx8 mce pae msr tsc pse de fpu"
-   extfeature_ecx = "svm lahf_lm"
-   xlevel = "0x80000008"
-   model_id = "AMD Opteron 22xx (Gen 2 Class Opteron)"
-
-[cpudef]
-   name = "Opteron_G3"
-   level = "5"
-   vendor = "AuthenticAMD"
-   family = "15"
-   model = "6"
-   stepping = "1"
-   feature_edx = "sse2 sse fxsr mmx clflush pse36 pat cmov mca pge mtrr sep apic cx8 mce pae msr tsc pse de fpu"
-   feature_ecx = "popcnt cx16 monitor sse3"
-   extfeature_edx = "lm rdtscp fxsr mmx nx pse36 pat cmov mca pge mtrr syscall apic cx8 mce pae msr tsc pse de fpu"
-   extfeature_ecx = "misalignsse sse4a abm svm lahf_lm"
-   xlevel = "0x80000008"
-   model_id = "AMD Opteron 23xx (Gen 3 Class Opteron)"
-
-[cpudef]
-   name = "Opteron_G4"
-   level = "0xd"
-   vendor = "AuthenticAMD"
-   family = "21"
-   model = "1"
-   stepping = "2"
-   feature_edx = "sse2 sse fxsr mmx clflush pse36 pat cmov mca pge mtrr sep apic cx8 mce pae msr tsc pse de fpu"
-   feature_ecx = "avx xsave aes popcnt sse4.2 sse4.1 cx16 ssse3 pclmulqdq sse3"
-   extfeature_edx = "lm rdtscp pdpe1gb fxsr mmx nx pse36 pat cmov mca pge mtrr syscall apic cx8 mce pae msr tsc pse de fpu"
-   extfeature_ecx = " fma4 xop 3dnowprefetch misalignsse sse4a abm svm lahf_lm"
-   xlevel = "0x8000001A"
-   model_id = "AMD Opteron 62xx class CPU"
-
diff --git a/sysemu.h b/sysemu.h
index 09590e0..b38b1a3 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -52,6 +52,7 @@
 void qemu_register_wakeup_notifier(Notifier *notifier);
 void qemu_system_shutdown_request(void);
 void qemu_system_powerdown_request(void);
+void qemu_register_powerdown_notifier(Notifier *notifier);
 void qemu_system_debug_request(void);
 void qemu_system_vmstop_request(RunState reason);
 int qemu_shutdown_requested_get(void);
@@ -61,7 +62,6 @@
 int qemu_powerdown_requested(void);
 void qemu_system_killed(int signal, pid_t pid);
 void qemu_kill_report(void);
-extern qemu_irq qemu_system_powerdown;
 void qemu_devices_reset(void);
 void qemu_system_reset(bool report);
 
@@ -105,10 +105,7 @@
 } VGAInterfaceType;
 
 extern int vga_interface_type;
-#define cirrus_vga_enabled (vga_interface_type == VGA_CIRRUS)
-#define std_vga_enabled (vga_interface_type == VGA_STD)
 #define xenfb_enabled (vga_interface_type == VGA_XENFB)
-#define vmsvga_enabled (vga_interface_type == VGA_VMWARE)
 #define qxl_enabled (vga_interface_type == VGA_QXL)
 
 extern int graphic_width;
diff --git a/target-alpha/translate.c b/target-alpha/translate.c
index 12de6a3..f707d8d 100644
--- a/target-alpha/translate.c
+++ b/target-alpha/translate.c
@@ -426,27 +426,15 @@
 
         return EXIT_GOTO_TB;
     } else {
-        int lab_over = gen_new_label();
+        TCGv_i64 z = tcg_const_i64(0);
+        TCGv_i64 d = tcg_const_i64(dest);
+        TCGv_i64 p = tcg_const_i64(ctx->pc);
 
-        /* ??? Consider using either
-             movi pc, next
-             addi tmp, pc, disp
-             movcond pc, cond, 0, tmp, pc
-           or
-             setcond tmp, cond, 0
-             movi pc, next
-             neg tmp, tmp
-             andi tmp, tmp, disp
-             add pc, pc, tmp
-           The current diamond subgraph surely isn't efficient.  */
+        tcg_gen_movcond_i64(cond, cpu_pc, cmp, z, d, p);
 
-        tcg_gen_brcondi_i64(cond, cmp, 0, lab_true);
-        tcg_gen_movi_i64(cpu_pc, ctx->pc);
-        tcg_gen_br(lab_over);
-        gen_set_label(lab_true);
-        tcg_gen_movi_i64(cpu_pc, dest);
-        gen_set_label(lab_over);
-
+        tcg_temp_free_i64(z);
+        tcg_temp_free_i64(d);
+        tcg_temp_free_i64(p);
         return EXIT_PC_UPDATED;
     }
 }
@@ -521,61 +509,67 @@
 static void gen_cmov(TCGCond cond, int ra, int rb, int rc,
                      int islit, uint8_t lit, int mask)
 {
-    TCGCond inv_cond = tcg_invert_cond(cond);
-    int l1;
-
-    if (unlikely(rc == 31))
-        return;
-
-    l1 = gen_new_label();
-
-    if (ra != 31) {
-        if (mask) {
-            TCGv tmp = tcg_temp_new();
-            tcg_gen_andi_i64(tmp, cpu_ir[ra], 1);
-            tcg_gen_brcondi_i64(inv_cond, tmp, 0, l1);
-            tcg_temp_free(tmp);
-        } else
-            tcg_gen_brcondi_i64(inv_cond, cpu_ir[ra], 0, l1);
-    } else {
-        /* Very uncommon case - Do not bother to optimize.  */
-        TCGv tmp = tcg_const_i64(0);
-        tcg_gen_brcondi_i64(inv_cond, tmp, 0, l1);
-        tcg_temp_free(tmp);
-    }
-
-    if (islit)
-        tcg_gen_movi_i64(cpu_ir[rc], lit);
-    else
-        tcg_gen_mov_i64(cpu_ir[rc], cpu_ir[rb]);
-    gen_set_label(l1);
-}
-
-static void gen_fcmov(TCGCond cond, int ra, int rb, int rc)
-{
-    TCGv cmp_tmp;
-    int l1;
+    TCGv_i64 c1, z, v1;
 
     if (unlikely(rc == 31)) {
         return;
     }
 
-    cmp_tmp = tcg_temp_new();
-    if (unlikely(ra == 31)) {
-        tcg_gen_movi_i64(cmp_tmp, 0);
+    if (ra == 31) {
+        /* Very uncommon case - Do not bother to optimize.  */
+        c1 = tcg_const_i64(0);
+    } else if (mask) {
+        c1 = tcg_const_i64(1);
+        tcg_gen_and_i64(c1, c1, cpu_ir[ra]);
     } else {
-        gen_fold_mzero(cond, cmp_tmp, cpu_fir[ra]);
+        c1 = cpu_ir[ra];
+    }
+    if (islit) {
+        v1 = tcg_const_i64(lit);
+    } else {
+        v1 = cpu_ir[rb];
+    }
+    z = tcg_const_i64(0);
+
+    tcg_gen_movcond_i64(cond, cpu_ir[rc], c1, z, v1, cpu_ir[rc]);
+
+    tcg_temp_free_i64(z);
+    if (ra == 31 || mask) {
+        tcg_temp_free_i64(c1);
+    }
+    if (islit) {
+        tcg_temp_free_i64(v1);
+    }
+}
+
+static void gen_fcmov(TCGCond cond, int ra, int rb, int rc)
+{
+    TCGv_i64 c1, z, v1;
+
+    if (unlikely(rc == 31)) {
+        return;
     }
 
-    l1 = gen_new_label();
-    tcg_gen_brcondi_i64(tcg_invert_cond(cond), cmp_tmp, 0, l1);
-    tcg_temp_free(cmp_tmp);
+    c1 = tcg_temp_new_i64();
+    if (unlikely(ra == 31)) {
+        tcg_gen_movi_i64(c1, 0);
+    } else {
+        gen_fold_mzero(cond, c1, cpu_fir[ra]);
+    }
+    if (rb == 31) {
+        v1 = tcg_const_i64(0);
+    } else {
+        v1 = cpu_fir[rb];
+    }
+    z = tcg_const_i64(0);
 
-    if (rb != 31)
-        tcg_gen_mov_i64(cpu_fir[rc], cpu_fir[rb]);
-    else
-        tcg_gen_movi_i64(cpu_fir[rc], 0);
-    gen_set_label(l1);
+    tcg_gen_movcond_i64(cond, cpu_fir[rc], c1, z, v1, cpu_fir[rc]);
+
+    tcg_temp_free_i64(z);
+    tcg_temp_free_i64(c1);
+    if (rb == 31) {
+        tcg_temp_free_i64(v1);
+    }
 }
 
 #define QUAL_RM_N       0x080   /* Round mode nearest even */
@@ -3427,7 +3421,7 @@
         insn = cpu_ldl_code(env, ctx.pc);
         num_insns++;
 
-	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(ctx.pc);
         }
 
@@ -3549,6 +3543,7 @@
     }
     env->implver = implver;
     env->amask = amask;
+    env->cpu_model_str = cpu_model;
 
     qemu_init_vcpu(env);
     return env;
diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
index f447c4f..b6f1a9e 100644
--- a/target-arm/Makefile.objs
+++ b/target-arm/Makefile.objs
@@ -2,5 +2,3 @@
 obj-$(CONFIG_SOFTMMU) += machine.o
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-y += neon_helper.o iwmmxt_helper.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index d7f93d9..ff4de10 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -423,8 +423,6 @@
     (((cp) << 16) | ((is64) << 15) | ((crn) << 11) |    \
      ((crm) << 7) | ((opc1) << 3) | (opc2))
 
-#define DECODE_CPREG_CRN(enc) (((enc) >> 7) & 0xf)
-
 /* ARMCPRegInfo type field bits. If the SPECIAL bit is set this is a
  * special-behaviour cp reg and bits [15..8] indicate what behaviour
  * it has. Otherwise it is a simple cp reg, where CONST indicates that
@@ -734,9 +732,10 @@
 }
 
 /* Load an instruction and return it in the standard little-endian order */
-static inline uint32_t arm_ldl_code(uint32_t addr, bool do_swap)
+static inline uint32_t arm_ldl_code(CPUARMState *env, uint32_t addr,
+                                    bool do_swap)
 {
-    uint32_t insn = ldl_code(addr);
+    uint32_t insn = cpu_ldl_code(env, addr);
     if (do_swap) {
         return bswap32(insn);
     }
@@ -744,9 +743,10 @@
 }
 
 /* Ditto, for a halfword (Thumb) instruction */
-static inline uint16_t arm_lduw_code(uint32_t addr, bool do_swap)
+static inline uint16_t arm_lduw_code(CPUARMState *env, uint32_t addr,
+                                     bool do_swap)
 {
-    uint16_t insn = lduw_code(addr);
+    uint16_t insn = cpu_lduw_code(env, addr);
     if (do_swap) {
         return bswap16(insn);
     }
diff --git a/target-arm/helper.c b/target-arm/helper.c
index e27df96..58340bd 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1756,7 +1756,7 @@
     case EXCP_BKPT:
         if (semihosting_enabled) {
             int nr;
-            nr = arm_lduw_code(env->regs[15], env->bswap_code) & 0xff;
+            nr = arm_lduw_code(env, env->regs[15], env->bswap_code) & 0xff;
             if (nr == 0xab) {
                 env->regs[15] += 2;
                 env->regs[0] = do_arm_semihosting(env);
@@ -1828,9 +1828,10 @@
         if (semihosting_enabled) {
             /* Check for semihosting interrupt.  */
             if (env->thumb) {
-                mask = arm_lduw_code(env->regs[15] - 2, env->bswap_code) & 0xff;
+                mask = arm_lduw_code(env, env->regs[15] - 2, env->bswap_code)
+                    & 0xff;
             } else {
-                mask = arm_ldl_code(env->regs[15] - 4, env->bswap_code)
+                mask = arm_ldl_code(env, env->regs[15] - 4, env->bswap_code)
                     & 0xffffff;
             }
             /* Only intercept calls from privileged modes, to provide some
@@ -1851,7 +1852,7 @@
     case EXCP_BKPT:
         /* See if this is a semihosting syscall.  */
         if (env->thumb && semihosting_enabled) {
-            mask = arm_lduw_code(env->regs[15], env->bswap_code) & 0xff;
+            mask = arm_lduw_code(env, env->regs[15], env->bswap_code) & 0xff;
             if (mask == 0xab
                   && (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR) {
                 env->regs[15] += 2;
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 21e9cfe..8b9adf1 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -1,19 +1,19 @@
 #include "def-helper.h"
 
-DEF_HELPER_1(clz, i32, i32)
-DEF_HELPER_1(sxtb16, i32, i32)
-DEF_HELPER_1(uxtb16, i32, i32)
+DEF_HELPER_FLAGS_1(clz, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(sxtb16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(uxtb16, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
 
-DEF_HELPER_2(add_setq, i32, i32, i32)
-DEF_HELPER_2(add_saturate, i32, i32, i32)
-DEF_HELPER_2(sub_saturate, i32, i32, i32)
-DEF_HELPER_2(add_usaturate, i32, i32, i32)
-DEF_HELPER_2(sub_usaturate, i32, i32, i32)
-DEF_HELPER_1(double_saturate, i32, s32)
-DEF_HELPER_2(sdiv, s32, s32, s32)
-DEF_HELPER_2(udiv, i32, i32, i32)
-DEF_HELPER_1(rbit, i32, i32)
-DEF_HELPER_1(abs, i32, i32)
+DEF_HELPER_3(add_setq, i32, env, i32, i32)
+DEF_HELPER_3(add_saturate, i32, env, i32, i32)
+DEF_HELPER_3(sub_saturate, i32, env, i32, i32)
+DEF_HELPER_3(add_usaturate, i32, env, i32, i32)
+DEF_HELPER_3(sub_usaturate, i32, env, i32, i32)
+DEF_HELPER_2(double_saturate, i32, env, s32)
+DEF_HELPER_FLAGS_2(sdiv, TCG_CALL_CONST | TCG_CALL_PURE, s32, s32, s32)
+DEF_HELPER_FLAGS_2(udiv, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32, i32)
+DEF_HELPER_FLAGS_1(rbit, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
+DEF_HELPER_FLAGS_1(abs, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32)
 
 #define PAS_OP(pfx)  \
     DEF_HELPER_3(pfx ## add8, i32, i32, i32, ptr) \
@@ -40,21 +40,22 @@
 PAS_OP(uh)
 #undef PAS_OP
 
-DEF_HELPER_2(ssat, i32, i32, i32)
-DEF_HELPER_2(usat, i32, i32, i32)
-DEF_HELPER_2(ssat16, i32, i32, i32)
-DEF_HELPER_2(usat16, i32, i32, i32)
+DEF_HELPER_3(ssat, i32, env, i32, i32)
+DEF_HELPER_3(usat, i32, env, i32, i32)
+DEF_HELPER_3(ssat16, i32, env, i32, i32)
+DEF_HELPER_3(usat16, i32, env, i32, i32)
 
-DEF_HELPER_2(usad8, i32, i32, i32)
+DEF_HELPER_FLAGS_2(usad8, TCG_CALL_CONST | TCG_CALL_PURE, i32, i32, i32)
 
 DEF_HELPER_1(logicq_cc, i32, i64)
 
-DEF_HELPER_3(sel_flags, i32, i32, i32, i32)
-DEF_HELPER_1(exception, void, i32)
-DEF_HELPER_0(wfi, void)
+DEF_HELPER_FLAGS_3(sel_flags, TCG_CALL_CONST | TCG_CALL_PURE,
+                   i32, i32, i32, i32)
+DEF_HELPER_2(exception, void, env, i32)
+DEF_HELPER_1(wfi, void, env)
 
-DEF_HELPER_2(cpsr_write, void, i32, i32)
-DEF_HELPER_0(cpsr_read, i32)
+DEF_HELPER_3(cpsr_write, void, env, i32, i32)
+DEF_HELPER_1(cpsr_read, i32, env)
 
 DEF_HELPER_3(v7m_msr, void, env, i32, i32)
 DEF_HELPER_2(v7m_mrs, i32, env, i32)
@@ -67,8 +68,8 @@
 DEF_HELPER_2(get_r13_banked, i32, env, i32)
 DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
 
-DEF_HELPER_1(get_user_reg, i32, i32)
-DEF_HELPER_2(set_user_reg, void, i32, i32)
+DEF_HELPER_2(get_user_reg, i32, env, i32)
+DEF_HELPER_3(set_user_reg, void, env, i32, i32)
 
 DEF_HELPER_1(vfp_get_fpscr, i32, env)
 DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
@@ -140,20 +141,15 @@
 DEF_HELPER_2(rsqrte_f32, f32, f32, env)
 DEF_HELPER_2(recpe_u32, i32, i32, env)
 DEF_HELPER_2(rsqrte_u32, i32, i32, env)
-DEF_HELPER_4(neon_tbl, i32, i32, i32, i32, i32)
+DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
 
-DEF_HELPER_2(add_cc, i32, i32, i32)
-DEF_HELPER_2(adc_cc, i32, i32, i32)
-DEF_HELPER_2(sub_cc, i32, i32, i32)
-DEF_HELPER_2(sbc_cc, i32, i32, i32)
+DEF_HELPER_3(adc_cc, i32, env, i32, i32)
+DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
 
-DEF_HELPER_2(shl, i32, i32, i32)
-DEF_HELPER_2(shr, i32, i32, i32)
-DEF_HELPER_2(sar, i32, i32, i32)
-DEF_HELPER_2(shl_cc, i32, i32, i32)
-DEF_HELPER_2(shr_cc, i32, i32, i32)
-DEF_HELPER_2(sar_cc, i32, i32, i32)
-DEF_HELPER_2(ror_cc, i32, i32, i32)
+DEF_HELPER_3(shl_cc, i32, env, i32, i32)
+DEF_HELPER_3(shr_cc, i32, env, i32, i32)
+DEF_HELPER_3(sar_cc, i32, env, i32, i32)
+DEF_HELPER_3(ror_cc, i32, env, i32, i32)
 
 /* neon_helper.c */
 DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index d77bfab..aef592a 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -17,19 +17,18 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helper.h"
 
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
 
-static void raise_exception(int tt)
+static void raise_exception(CPUARMState *env, int tt)
 {
     env->exception_index = tt;
     cpu_loop_exit(env);
 }
 
-uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def,
+uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
                           uint32_t rn, uint32_t maxindex)
 {
     uint32_t val;
@@ -72,16 +71,12 @@
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill(CPUARMState *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUARMState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUARMState *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
     ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
@@ -93,15 +88,14 @@
                 cpu_restore_state(tb, env, retaddr);
             }
         }
-        raise_exception(env->exception_index);
+        raise_exception(env, env->exception_index);
     }
-    env = saved_env;
 }
 #endif
 
 /* FIXME: Pass an explicit pointer to QF to CPUARMState, and move saturating
    instructions into helper.c  */
-uint32_t HELPER(add_setq)(uint32_t a, uint32_t b)
+uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT))
@@ -109,7 +103,7 @@
     return res;
 }
 
-uint32_t HELPER(add_saturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(add_saturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
@@ -119,7 +113,7 @@
     return res;
 }
 
-uint32_t HELPER(sub_saturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(sub_saturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a - b;
     if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
@@ -129,7 +123,7 @@
     return res;
 }
 
-uint32_t HELPER(double_saturate)(int32_t val)
+uint32_t HELPER(double_saturate)(CPUARMState *env, int32_t val)
 {
     uint32_t res;
     if (val >= 0x40000000) {
@@ -144,7 +138,7 @@
     return res;
 }
 
-uint32_t HELPER(add_usaturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(add_usaturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (res < a) {
@@ -154,7 +148,7 @@
     return res;
 }
 
-uint32_t HELPER(sub_usaturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(sub_usaturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a - b;
     if (res > a) {
@@ -165,7 +159,7 @@
 }
 
 /* Signed saturation.  */
-static inline uint32_t do_ssat(int32_t val, int shift)
+static inline uint32_t do_ssat(CPUARMState *env, int32_t val, int shift)
 {
     int32_t top;
     uint32_t mask;
@@ -183,7 +177,7 @@
 }
 
 /* Unsigned saturation.  */
-static inline uint32_t do_usat(int32_t val, int shift)
+static inline uint32_t do_usat(CPUARMState *env, int32_t val, int shift)
 {
     uint32_t max;
 
@@ -199,62 +193,62 @@
 }
 
 /* Signed saturate.  */
-uint32_t HELPER(ssat)(uint32_t x, uint32_t shift)
+uint32_t HELPER(ssat)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
-    return do_ssat(x, shift);
+    return do_ssat(env, x, shift);
 }
 
 /* Dual halfword signed saturate.  */
-uint32_t HELPER(ssat16)(uint32_t x, uint32_t shift)
+uint32_t HELPER(ssat16)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
     uint32_t res;
 
-    res = (uint16_t)do_ssat((int16_t)x, shift);
-    res |= do_ssat(((int32_t)x) >> 16, shift) << 16;
+    res = (uint16_t)do_ssat(env, (int16_t)x, shift);
+    res |= do_ssat(env, ((int32_t)x) >> 16, shift) << 16;
     return res;
 }
 
 /* Unsigned saturate.  */
-uint32_t HELPER(usat)(uint32_t x, uint32_t shift)
+uint32_t HELPER(usat)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
-    return do_usat(x, shift);
+    return do_usat(env, x, shift);
 }
 
 /* Dual halfword unsigned saturate.  */
-uint32_t HELPER(usat16)(uint32_t x, uint32_t shift)
+uint32_t HELPER(usat16)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
     uint32_t res;
 
-    res = (uint16_t)do_usat((int16_t)x, shift);
-    res |= do_usat(((int32_t)x) >> 16, shift) << 16;
+    res = (uint16_t)do_usat(env, (int16_t)x, shift);
+    res |= do_usat(env, ((int32_t)x) >> 16, shift) << 16;
     return res;
 }
 
-void HELPER(wfi)(void)
+void HELPER(wfi)(CPUARMState *env)
 {
     env->exception_index = EXCP_HLT;
     env->halted = 1;
     cpu_loop_exit(env);
 }
 
-void HELPER(exception)(uint32_t excp)
+void HELPER(exception)(CPUARMState *env, uint32_t excp)
 {
     env->exception_index = excp;
     cpu_loop_exit(env);
 }
 
-uint32_t HELPER(cpsr_read)(void)
+uint32_t HELPER(cpsr_read)(CPUARMState *env)
 {
     return cpsr_read(env) & ~CPSR_EXEC;
 }
 
-void HELPER(cpsr_write)(uint32_t val, uint32_t mask)
+void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask)
 {
     cpsr_write(env, val, mask);
 }
 
 /* Access to user mode registers from privileged modes.  */
-uint32_t HELPER(get_user_reg)(uint32_t regno)
+uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno)
 {
     uint32_t val;
 
@@ -271,7 +265,7 @@
     return val;
 }
 
-void HELPER(set_user_reg)(uint32_t regno, uint32_t val)
+void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val)
 {
     if (regno == 13) {
         env->banked_r13[0] = val;
@@ -290,7 +284,7 @@
     const ARMCPRegInfo *ri = rip;
     int excp = ri->writefn(env, ri, value);
     if (excp) {
-        raise_exception(excp);
+        raise_exception(env, excp);
     }
 }
 
@@ -300,7 +294,7 @@
     uint64_t value;
     int excp = ri->readfn(env, ri, &value);
     if (excp) {
-        raise_exception(excp);
+        raise_exception(env, excp);
     }
     return value;
 }
@@ -310,7 +304,7 @@
     const ARMCPRegInfo *ri = rip;
     int excp = ri->writefn(env, ri, value);
     if (excp) {
-        raise_exception(excp);
+        raise_exception(env, excp);
     }
 }
 
@@ -320,7 +314,7 @@
     uint64_t value;
     int excp = ri->readfn(env, ri, &value);
     if (excp) {
-        raise_exception(excp);
+        raise_exception(env, excp);
     }
     return value;
 }
@@ -329,17 +323,7 @@
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
 
-uint32_t HELPER (add_cc)(uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    result = a + b;
-    env->NF = env->ZF = result;
-    env->CF = result < a;
-    env->VF = (a ^ b ^ -1) & (a ^ result);
-    return result;
-}
-
-uint32_t HELPER(adc_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(adc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
@@ -354,17 +338,7 @@
     return result;
 }
 
-uint32_t HELPER(sub_cc)(uint32_t a, uint32_t b)
-{
-    uint32_t result;
-    result = a - b;
-    env->NF = env->ZF = result;
-    env->CF = a >= b;
-    env->VF = (a ^ b) & (a ^ result);
-    return result;
-}
-
-uint32_t HELPER(sbc_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(sbc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
@@ -381,31 +355,7 @@
 
 /* Similarly for variable shift instructions.  */
 
-uint32_t HELPER(shl)(uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        return 0;
-    return x << shift;
-}
-
-uint32_t HELPER(shr)(uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        return 0;
-    return (uint32_t)x >> shift;
-}
-
-uint32_t HELPER(sar)(uint32_t x, uint32_t i)
-{
-    int shift = i & 0xff;
-    if (shift >= 32)
-        shift = 31;
-    return (int32_t)x >> shift;
-}
-
-uint32_t HELPER(shl_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(shl_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -421,7 +371,7 @@
     return x;
 }
 
-uint32_t HELPER(shr_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(shr_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -437,7 +387,7 @@
     return x;
 }
 
-uint32_t HELPER(sar_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(sar_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -450,7 +400,7 @@
     return x;
 }
 
-uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift1, shift;
     shift1 = i & 0xff;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index edef79a..c6840b7 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -85,6 +85,7 @@
 /* We reuse the same 64-bit temporaries for efficiency.  */
 static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
 static TCGv_i32 cpu_R[16];
+static TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
 static TCGv_i32 cpu_exclusive_addr;
 static TCGv_i32 cpu_exclusive_val;
 static TCGv_i32 cpu_exclusive_high;
@@ -115,6 +116,11 @@
                                           offsetof(CPUARMState, regs[i]),
                                           regnames[i]);
     }
+    cpu_CF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, CF), "CF");
+    cpu_NF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, NF), "NF");
+    cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), "VF");
+    cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), "ZF");
+
     cpu_exclusive_addr = tcg_global_mem_new_i32(TCG_AREG0,
         offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
     cpu_exclusive_val = tcg_global_mem_new_i32(TCG_AREG0,
@@ -199,7 +205,7 @@
 static inline void gen_set_cpsr(TCGv var, uint32_t mask)
 {
     TCGv tmp_mask = tcg_const_i32(mask);
-    gen_helper_cpsr_write(var, tmp_mask);
+    gen_helper_cpsr_write(cpu_env, var, tmp_mask);
     tcg_temp_free_i32(tmp_mask);
 }
 /* Set NZCV flags from the high 4 bits of var.  */
@@ -209,7 +215,7 @@
 {
     TCGv tmp = tcg_temp_new_i32();
     tcg_gen_movi_i32(tmp, excp);
-    gen_helper_exception(tmp);
+    gen_helper_exception(cpu_env, tmp);
     tcg_temp_free_i32(tmp);
 }
 
@@ -271,15 +277,6 @@
     }
 }
 
-/* Bitfield insertion.  Insert val into base.  Clobbers base and val.  */
-static void gen_bfi(TCGv dest, TCGv base, TCGv val, int shift, uint32_t mask)
-{
-    tcg_gen_andi_i32(val, val, mask);
-    tcg_gen_shli_i32(val, val, shift);
-    tcg_gen_andi_i32(base, base, ~(mask << shift));
-    tcg_gen_or_i32(dest, base, val);
-}
-
 /* Return (b << 32) + a. Mark inputs as dead */
 static TCGv_i64 gen_addq_msw(TCGv_i64 a, TCGv b)
 {
@@ -369,53 +366,100 @@
     tcg_temp_free_i32(t1);
 }
 
-#define gen_set_CF(var) tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, CF))
-
 /* Set CF to the top bit of var.  */
 static void gen_set_CF_bit31(TCGv var)
 {
-    TCGv tmp = tcg_temp_new_i32();
-    tcg_gen_shri_i32(tmp, var, 31);
-    gen_set_CF(tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_shri_i32(cpu_CF, var, 31);
 }
 
 /* Set N and Z flags from var.  */
 static inline void gen_logic_CC(TCGv var)
 {
-    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, NF));
-    tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, ZF));
+    tcg_gen_mov_i32(cpu_NF, var);
+    tcg_gen_mov_i32(cpu_ZF, var);
 }
 
 /* T0 += T1 + CF.  */
 static void gen_adc(TCGv t0, TCGv t1)
 {
-    TCGv tmp;
     tcg_gen_add_i32(t0, t0, t1);
-    tmp = load_cpu_field(CF);
-    tcg_gen_add_i32(t0, t0, tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_add_i32(t0, t0, cpu_CF);
 }
 
 /* dest = T0 + T1 + CF. */
 static void gen_add_carry(TCGv dest, TCGv t0, TCGv t1)
 {
-    TCGv tmp;
     tcg_gen_add_i32(dest, t0, t1);
-    tmp = load_cpu_field(CF);
-    tcg_gen_add_i32(dest, dest, tmp);
-    tcg_temp_free_i32(tmp);
+    tcg_gen_add_i32(dest, dest, cpu_CF);
 }
 
 /* dest = T0 - T1 + CF - 1.  */
 static void gen_sub_carry(TCGv dest, TCGv t0, TCGv t1)
 {
-    TCGv tmp;
     tcg_gen_sub_i32(dest, t0, t1);
-    tmp = load_cpu_field(CF);
-    tcg_gen_add_i32(dest, dest, tmp);
+    tcg_gen_add_i32(dest, dest, cpu_CF);
     tcg_gen_subi_i32(dest, dest, 1);
+}
+
+/* dest = T0 + T1. Compute C, N, V and Z flags */
+static void gen_add_CC(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp;
+    tcg_gen_add_i32(cpu_NF, t0, t1);
+    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+    tcg_gen_setcond_i32(TCG_COND_LTU, cpu_CF, cpu_NF, t0);
+    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
+    tmp = tcg_temp_new_i32();
+    tcg_gen_xor_i32(tmp, t0, t1);
+    tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
     tcg_temp_free_i32(tmp);
+    tcg_gen_mov_i32(dest, cpu_NF);
+}
+
+/* dest = T0 - T1. Compute C, N, V and Z flags */
+static void gen_sub_CC(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp;
+    tcg_gen_sub_i32(cpu_NF, t0, t1);
+    tcg_gen_mov_i32(cpu_ZF, cpu_NF);
+    tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
+    tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
+    tmp = tcg_temp_new_i32();
+    tcg_gen_xor_i32(tmp, t0, t1);
+    tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
+    tcg_temp_free_i32(tmp);
+    tcg_gen_mov_i32(dest, cpu_NF);
+}
+
+#define GEN_SHIFT(name)                                               \
+static void gen_##name(TCGv dest, TCGv t0, TCGv t1)                   \
+{                                                                     \
+    TCGv tmp1, tmp2, tmp3;                                            \
+    tmp1 = tcg_temp_new_i32();                                        \
+    tcg_gen_andi_i32(tmp1, t1, 0xff);                                 \
+    tmp2 = tcg_const_i32(0);                                          \
+    tmp3 = tcg_const_i32(0x1f);                                       \
+    tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0);    \
+    tcg_temp_free_i32(tmp3);                                          \
+    tcg_gen_andi_i32(tmp1, tmp1, 0x1f);                               \
+    tcg_gen_##name##_i32(dest, tmp2, tmp1);                           \
+    tcg_temp_free_i32(tmp2);                                          \
+    tcg_temp_free_i32(tmp1);                                          \
+}
+GEN_SHIFT(shl)
+GEN_SHIFT(shr)
+#undef GEN_SHIFT
+
+static void gen_sar(TCGv dest, TCGv t0, TCGv t1)
+{
+    TCGv tmp1, tmp2;
+    tmp1 = tcg_temp_new_i32();
+    tcg_gen_andi_i32(tmp1, t1, 0xff);
+    tmp2 = tcg_const_i32(0x1f);
+    tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
+    tcg_temp_free_i32(tmp2);
+    tcg_gen_sar_i32(dest, t0, tmp1);
+    tcg_temp_free_i32(tmp1);
 }
 
 /* FIXME:  Implement this natively.  */
@@ -423,16 +467,14 @@
 
 static void shifter_out_im(TCGv var, int shift)
 {
-    TCGv tmp = tcg_temp_new_i32();
     if (shift == 0) {
-        tcg_gen_andi_i32(tmp, var, 1);
+        tcg_gen_andi_i32(cpu_CF, var, 1);
     } else {
-        tcg_gen_shri_i32(tmp, var, shift);
-        if (shift != 31)
-            tcg_gen_andi_i32(tmp, tmp, 1);
+        tcg_gen_shri_i32(cpu_CF, var, shift);
+        if (shift != 31) {
+            tcg_gen_andi_i32(cpu_CF, cpu_CF, 1);
+        }
     }
-    gen_set_CF(tmp);
-    tcg_temp_free_i32(tmp);
 }
 
 /* Shift by immediate.  Includes special handling for shift == 0.  */
@@ -449,8 +491,7 @@
     case 1: /* LSR */
         if (shift == 0) {
             if (flags) {
-                tcg_gen_shri_i32(var, var, 31);
-                gen_set_CF(var);
+                tcg_gen_shri_i32(cpu_CF, var, 31);
             }
             tcg_gen_movi_i32(var, 0);
         } else {
@@ -474,11 +515,11 @@
                 shifter_out_im(var, shift - 1);
             tcg_gen_rotri_i32(var, var, shift); break;
         } else {
-            TCGv tmp = load_cpu_field(CF);
+            TCGv tmp = tcg_temp_new_i32();
             if (flags)
                 shifter_out_im(var, 0);
             tcg_gen_shri_i32(var, var, 1);
-            tcg_gen_shli_i32(tmp, tmp, 31);
+            tcg_gen_shli_i32(tmp, cpu_CF, 31);
             tcg_gen_or_i32(var, var, tmp);
             tcg_temp_free_i32(tmp);
         }
@@ -490,16 +531,22 @@
 {
     if (flags) {
         switch (shiftop) {
-        case 0: gen_helper_shl_cc(var, var, shift); break;
-        case 1: gen_helper_shr_cc(var, var, shift); break;
-        case 2: gen_helper_sar_cc(var, var, shift); break;
-        case 3: gen_helper_ror_cc(var, var, shift); break;
+        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
+        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
+        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
+        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
         }
     } else {
         switch (shiftop) {
-        case 0: gen_helper_shl(var, var, shift); break;
-        case 1: gen_helper_shr(var, var, shift); break;
-        case 2: gen_helper_sar(var, var, shift); break;
+        case 0:
+            gen_shl(var, var, shift);
+            break;
+        case 1:
+            gen_shr(var, var, shift);
+            break;
+        case 2:
+            gen_sar(var, var, shift);
+            break;
         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
                 tcg_gen_rotr_i32(var, var, shift); break;
         }
@@ -603,99 +650,75 @@
 static void gen_test_cc(int cc, int label)
 {
     TCGv tmp;
-    TCGv tmp2;
     int inv;
 
     switch (cc) {
     case 0: /* eq: Z */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
         break;
     case 1: /* ne: !Z */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
         break;
     case 2: /* cs: C */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_CF, 0, label);
         break;
     case 3: /* cc: !C */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
         break;
     case 4: /* mi: N */
-        tmp = load_cpu_field(NF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_NF, 0, label);
         break;
     case 5: /* pl: !N */
-        tmp = load_cpu_field(NF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_NF, 0, label);
         break;
     case 6: /* vs: V */
-        tmp = load_cpu_field(VF);
-        tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_LT, cpu_VF, 0, label);
         break;
     case 7: /* vc: !V */
-        tmp = load_cpu_field(VF);
-        tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_GE, cpu_VF, 0, label);
         break;
     case 8: /* hi: C && !Z */
         inv = gen_new_label();
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_NE, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, inv);
+        tcg_gen_brcondi_i32(TCG_COND_NE, cpu_ZF, 0, label);
         gen_set_label(inv);
         break;
     case 9: /* ls: !C || Z */
-        tmp = load_cpu_field(CF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_CF, 0, label);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
         break;
     case 10: /* ge: N == V -> N ^ V == 0 */
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         break;
     case 11: /* lt: N != V -> N ^ V != 0 */
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         break;
     case 12: /* gt: !Z && N == V */
         inv = gen_new_label();
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, inv);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, inv);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_GE, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         gen_set_label(inv);
         break;
     case 13: /* le: Z || N != V */
-        tmp = load_cpu_field(ZF);
-        tcg_gen_brcondi_i32(TCG_COND_EQ, tmp, 0, label);
-        tcg_temp_free_i32(tmp);
-        tmp = load_cpu_field(VF);
-        tmp2 = load_cpu_field(NF);
-        tcg_gen_xor_i32(tmp, tmp, tmp2);
-        tcg_temp_free_i32(tmp2);
+        tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ZF, 0, label);
+        tmp = tcg_temp_new_i32();
+        tcg_gen_xor_i32(tmp, cpu_VF, cpu_NF);
         tcg_gen_brcondi_i32(TCG_COND_LT, tmp, 0, label);
+        tcg_temp_free_i32(tmp);
         break;
     default:
         fprintf(stderr, "Bad condition code 0x%x\n", cc);
         abort();
     }
-    tcg_temp_free_i32(tmp);
 }
 
 static const uint8_t table_logic_cc[16] = {
@@ -2628,12 +2651,12 @@
                         switch (size) {
                         case 0:
                             tmp2 = neon_load_reg(rn, pass);
-                            gen_bfi(tmp, tmp2, tmp, offset, 0xff);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 8);
                             tcg_temp_free_i32(tmp2);
                             break;
                         case 1:
                             tmp2 = neon_load_reg(rn, pass);
-                            gen_bfi(tmp, tmp2, tmp, offset, 0xffff);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, offset, 16);
                             tcg_temp_free_i32(tmp2);
                             break;
                         case 2:
@@ -3989,7 +4012,8 @@
                     }
                     if (size != 2) {
                         tmp2 = neon_load_reg(rd, pass);
-                        gen_bfi(tmp, tmp2, tmp, shift, size ? 0xffff : 0xff);
+                        tcg_gen_deposit_i32(tmp, tmp2, tmp,
+                                            shift, size ? 16 : 8);
                         tcg_temp_free_i32(tmp2);
                     }
                     neon_store_reg(rd, pass, tmp);
@@ -6121,7 +6145,7 @@
                 tmp2 = neon_load_reg(rm, 0);
                 tmp4 = tcg_const_i32(rn);
                 tmp5 = tcg_const_i32(n);
-                gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5);
+                gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5);
                 tcg_temp_free_i32(tmp);
                 if (insn & (1 << 6)) {
                     tmp = neon_load_reg(rd, 1);
@@ -6130,7 +6154,7 @@
                     tcg_gen_movi_i32(tmp, 0);
                 }
                 tmp3 = neon_load_reg(rm, 1);
-                gen_helper_neon_tbl(tmp3, tmp3, tmp, tmp4, tmp5);
+                gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5);
                 tcg_temp_free_i32(tmp5);
                 tcg_temp_free_i32(tmp4);
                 neon_store_reg(rd, 0, tmp2);
@@ -6534,7 +6558,7 @@
     TCGv addr;
     TCGv_i64 tmp64;
 
-    insn = arm_ldl_code(s->pc, s->bswap_code);
+    insn = arm_ldl_code(env, s->pc, s->bswap_code);
     s->pc += 4;
 
     /* M variants do not implement ARM mode.  */
@@ -6818,7 +6842,7 @@
                     tmp = load_cpu_field(spsr);
                 } else {
                     tmp = tcg_temp_new_i32();
-                    gen_helper_cpsr_read(tmp);
+                    gen_helper_cpsr_read(tmp, cpu_env);
                 }
                 store_reg(s, rd, tmp);
             }
@@ -6869,11 +6893,11 @@
             tmp = load_reg(s, rm);
             tmp2 = load_reg(s, rn);
             if (op1 & 2)
-                gen_helper_double_saturate(tmp2, tmp2);
+                gen_helper_double_saturate(tmp2, cpu_env, tmp2);
             if (op1 & 1)
-                gen_helper_sub_saturate(tmp, tmp, tmp2);
+                gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
             else
-                gen_helper_add_saturate(tmp, tmp, tmp2);
+                gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
             tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
             break;
@@ -6911,7 +6935,7 @@
                 tcg_temp_free_i64(tmp64);
                 if ((sh & 2) == 0) {
                     tmp2 = load_reg(s, rn);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                 }
                 store_reg(s, rd, tmp);
@@ -6931,7 +6955,7 @@
                 } else {
                     if (op1 == 0) {
                         tmp2 = load_reg(s, rn);
-                        gen_helper_add_setq(tmp, tmp, tmp2);
+                        gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                         tcg_temp_free_i32(tmp2);
                     }
                     store_reg(s, rd, tmp);
@@ -7005,11 +7029,11 @@
                 if (IS_USER(s)) {
                     goto illegal_op;
                 }
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
                 gen_exception_return(s, tmp);
             } else {
                 if (set_cc) {
-                    gen_helper_sub_cc(tmp, tmp, tmp2);
+                    gen_sub_CC(tmp, tmp, tmp2);
                 } else {
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 }
@@ -7018,7 +7042,7 @@
             break;
         case 0x03:
             if (set_cc) {
-                gen_helper_sub_cc(tmp, tmp2, tmp);
+                gen_sub_CC(tmp, tmp2, tmp);
             } else {
                 tcg_gen_sub_i32(tmp, tmp2, tmp);
             }
@@ -7026,7 +7050,7 @@
             break;
         case 0x04:
             if (set_cc) {
-                gen_helper_add_cc(tmp, tmp, tmp2);
+                gen_add_CC(tmp, tmp, tmp2);
             } else {
                 tcg_gen_add_i32(tmp, tmp, tmp2);
             }
@@ -7034,7 +7058,7 @@
             break;
         case 0x05:
             if (set_cc) {
-                gen_helper_adc_cc(tmp, tmp, tmp2);
+                gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 gen_add_carry(tmp, tmp, tmp2);
             }
@@ -7042,7 +7066,7 @@
             break;
         case 0x06:
             if (set_cc) {
-                gen_helper_sbc_cc(tmp, tmp, tmp2);
+                gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 gen_sub_carry(tmp, tmp, tmp2);
             }
@@ -7050,7 +7074,7 @@
             break;
         case 0x07:
             if (set_cc) {
-                gen_helper_sbc_cc(tmp, tmp2, tmp);
+                gen_helper_sbc_cc(tmp, cpu_env, tmp2, tmp);
             } else {
                 gen_sub_carry(tmp, tmp2, tmp);
             }
@@ -7072,13 +7096,13 @@
             break;
         case 0x0a:
             if (set_cc) {
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp);
             break;
         case 0x0b:
             if (set_cc) {
-                gen_helper_add_cc(tmp, tmp, tmp2);
+                gen_add_CC(tmp, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp);
             break;
@@ -7395,9 +7419,9 @@
                         sh = (insn >> 16) & 0x1f;
                         tmp2 = tcg_const_i32(sh);
                         if (insn & (1 << 22))
-                          gen_helper_usat(tmp, tmp, tmp2);
+                          gen_helper_usat(tmp, cpu_env, tmp, tmp2);
                         else
-                          gen_helper_ssat(tmp, tmp, tmp2);
+                          gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
                         tcg_temp_free_i32(tmp2);
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x00300fe0) == 0x00200f20) {
@@ -7406,9 +7430,9 @@
                         sh = (insn >> 16) & 0x1f;
                         tmp2 = tcg_const_i32(sh);
                         if (insn & (1 << 22))
-                          gen_helper_usat16(tmp, tmp, tmp2);
+                          gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
                         else
-                          gen_helper_ssat16(tmp, tmp, tmp2);
+                          gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
                         tcg_temp_free_i32(tmp2);
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x00700fe0) == 0x00000fa0) {
@@ -7518,7 +7542,7 @@
                              * however it may overflow considered as a signed
                              * operation, in which case we must set the Q flag.
                              */
-                            gen_helper_add_setq(tmp, tmp, tmp2);
+                            gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                         }
                         tcg_temp_free_i32(tmp2);
                         if (insn & (1 << 22)) {
@@ -7534,7 +7558,7 @@
                             if (rd != 15)
                               {
                                 tmp2 = load_reg(s, rd);
-                                gen_helper_add_setq(tmp, tmp, tmp2);
+                                gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                                 tcg_temp_free_i32(tmp2);
                               }
                             store_reg(s, rn, tmp);
@@ -7593,7 +7617,7 @@
                         }
                         if (i != 32) {
                             tmp2 = load_reg(s, rd);
-                            gen_bfi(tmp, tmp2, tmp, shift, (1u << i) - 1);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, i);
                             tcg_temp_free_i32(tmp2);
                         }
                         store_reg(s, rd, tmp);
@@ -7719,7 +7743,7 @@
                             tmp = gen_ld32(addr, IS_USER(s));
                             if (user) {
                                 tmp2 = tcg_const_i32(i);
-                                gen_helper_set_user_reg(tmp2, tmp);
+                                gen_helper_set_user_reg(cpu_env, tmp2, tmp);
                                 tcg_temp_free_i32(tmp2);
                                 tcg_temp_free_i32(tmp);
                             } else if (i == rn) {
@@ -7738,7 +7762,7 @@
                             } else if (user) {
                                 tmp = tcg_temp_new_i32();
                                 tmp2 = tcg_const_i32(i);
-                                gen_helper_get_user_reg(tmp, tmp2);
+                                gen_helper_get_user_reg(tmp, cpu_env, tmp2);
                                 tcg_temp_free_i32(tmp2);
                             } else {
                                 tmp = load_reg(s, i);
@@ -7865,31 +7889,31 @@
         break;
     case 8: /* add */
         if (conds)
-            gen_helper_add_cc(t0, t0, t1);
+            gen_add_CC(t0, t0, t1);
         else
             tcg_gen_add_i32(t0, t0, t1);
         break;
     case 10: /* adc */
         if (conds)
-            gen_helper_adc_cc(t0, t0, t1);
+            gen_helper_adc_cc(t0, cpu_env, t0, t1);
         else
             gen_adc(t0, t1);
         break;
     case 11: /* sbc */
         if (conds)
-            gen_helper_sbc_cc(t0, t0, t1);
+            gen_helper_sbc_cc(t0, cpu_env, t0, t1);
         else
             gen_sub_carry(t0, t0, t1);
         break;
     case 13: /* sub */
         if (conds)
-            gen_helper_sub_cc(t0, t0, t1);
+            gen_sub_CC(t0, t0, t1);
         else
             tcg_gen_sub_i32(t0, t0, t1);
         break;
     case 14: /* rsb */
         if (conds)
-            gen_helper_sub_cc(t0, t1, t0);
+            gen_sub_CC(t0, t1, t0);
         else
             tcg_gen_sub_i32(t0, t1, t0);
         break;
@@ -7962,7 +7986,7 @@
         /* Fall through to 32-bit decode.  */
     }
 
-    insn = arm_lduw_code(s->pc, s->bswap_code);
+    insn = arm_lduw_code(env, s->pc, s->bswap_code);
     s->pc += 2;
     insn |= (uint32_t)insn_hw1 << 16;
 
@@ -8111,7 +8135,7 @@
                     gen_st32(tmp, addr, 0);
                     tcg_gen_addi_i32(addr, addr, 4);
                     tmp = tcg_temp_new_i32();
-                    gen_helper_cpsr_read(tmp);
+                    gen_helper_cpsr_read(tmp, cpu_env);
                     gen_st32(tmp, addr, 0);
                     if (insn & (1 << 21)) {
                         if ((insn & (1 << 24)) == 0) {
@@ -8293,11 +8317,11 @@
                 tmp = load_reg(s, rn);
                 tmp2 = load_reg(s, rm);
                 if (op & 1)
-                    gen_helper_double_saturate(tmp, tmp);
+                    gen_helper_double_saturate(tmp, cpu_env, tmp);
                 if (op & 2)
-                    gen_helper_sub_saturate(tmp, tmp2, tmp);
+                    gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
                 else
-                    gen_helper_add_saturate(tmp, tmp, tmp2);
+                    gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
             } else {
                 tmp = load_reg(s, rn);
@@ -8353,7 +8377,7 @@
                 tcg_temp_free_i32(tmp2);
                 if (rs != 15) {
                     tmp2 = load_reg(s, rs);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                 }
                 break;
@@ -8370,13 +8394,13 @@
                      * however it may overflow considered as a signed
                      * operation, in which case we must set the Q flag.
                      */
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                 }
                 tcg_temp_free_i32(tmp2);
                 if (rs != 15)
                   {
                     tmp2 = load_reg(s, rs);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                   }
                 break;
@@ -8393,7 +8417,7 @@
                 if (rs != 15)
                   {
                     tmp2 = load_reg(s, rs);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                   }
                 break;
@@ -8632,7 +8656,7 @@
                             gen_helper_v7m_mrs(tmp, cpu_env, addr);
                             tcg_temp_free_i32(addr);
                         } else {
-                            gen_helper_cpsr_read(tmp);
+                            gen_helper_cpsr_read(tmp, cpu_env);
                         }
                         store_reg(s, rd, tmp);
                         break;
@@ -8704,7 +8728,7 @@
                         imm = imm + 1 - shift;
                         if (imm != 32) {
                             tmp2 = load_reg(s, rd);
-                            gen_bfi(tmp, tmp2, tmp, shift, (1u << imm) - 1);
+                            tcg_gen_deposit_i32(tmp, tmp2, tmp, shift, imm);
                             tcg_temp_free_i32(tmp2);
                         }
                         break;
@@ -8721,15 +8745,15 @@
                         if (op & 4) {
                             /* Unsigned.  */
                             if ((op & 1) && shift == 0)
-                                gen_helper_usat16(tmp, tmp, tmp2);
+                                gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
                             else
-                                gen_helper_usat(tmp, tmp, tmp2);
+                                gen_helper_usat(tmp, cpu_env, tmp, tmp2);
                         } else {
                             /* Signed.  */
                             if ((op & 1) && shift == 0)
-                                gen_helper_ssat16(tmp, tmp, tmp2);
+                                gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
                             else
-                                gen_helper_ssat(tmp, tmp, tmp2);
+                                gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
                         }
                         tcg_temp_free_i32(tmp2);
                         break;
@@ -8992,7 +9016,7 @@
         }
     }
 
-    insn = arm_lduw_code(s->pc, s->bswap_code);
+    insn = arm_lduw_code(env, s->pc, s->bswap_code);
     s->pc += 2;
 
     switch (insn >> 12) {
@@ -9017,12 +9041,12 @@
                 if (s->condexec_mask)
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_sub_cc(tmp, tmp, tmp2);
+                    gen_sub_CC(tmp, tmp, tmp2);
             } else {
                 if (s->condexec_mask)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_add_cc(tmp, tmp, tmp2);
+                    gen_add_CC(tmp, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
@@ -9053,7 +9077,7 @@
             tcg_gen_movi_i32(tmp2, insn & 0xff);
             switch (op) {
             case 1: /* cmp */
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp);
                 tcg_temp_free_i32(tmp2);
                 break;
@@ -9061,7 +9085,7 @@
                 if (s->condexec_mask)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_add_cc(tmp, tmp, tmp2);
+                    gen_add_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
                 break;
@@ -9069,7 +9093,7 @@
                 if (s->condexec_mask)
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_sub_cc(tmp, tmp, tmp2);
+                    gen_sub_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
                 break;
@@ -9105,7 +9129,7 @@
             case 1: /* cmp */
                 tmp = load_reg(s, rd);
                 tmp2 = load_reg(s, rm);
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 tcg_temp_free_i32(tmp);
                 break;
@@ -9166,25 +9190,25 @@
             break;
         case 0x2: /* lsl */
             if (s->condexec_mask) {
-                gen_helper_shl(tmp2, tmp2, tmp);
+                gen_shl(tmp2, tmp2, tmp);
             } else {
-                gen_helper_shl_cc(tmp2, tmp2, tmp);
+                gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
         case 0x3: /* lsr */
             if (s->condexec_mask) {
-                gen_helper_shr(tmp2, tmp2, tmp);
+                gen_shr(tmp2, tmp2, tmp);
             } else {
-                gen_helper_shr_cc(tmp2, tmp2, tmp);
+                gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
         case 0x4: /* asr */
             if (s->condexec_mask) {
-                gen_helper_sar(tmp2, tmp2, tmp);
+                gen_sar(tmp2, tmp2, tmp);
             } else {
-                gen_helper_sar_cc(tmp2, tmp2, tmp);
+                gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
@@ -9192,20 +9216,20 @@
             if (s->condexec_mask)
                 gen_adc(tmp, tmp2);
             else
-                gen_helper_adc_cc(tmp, tmp, tmp2);
+                gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
             break;
         case 0x6: /* sbc */
             if (s->condexec_mask)
                 gen_sub_carry(tmp, tmp, tmp2);
             else
-                gen_helper_sbc_cc(tmp, tmp, tmp2);
+                gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
             break;
         case 0x7: /* ror */
             if (s->condexec_mask) {
                 tcg_gen_andi_i32(tmp, tmp, 0x1f);
                 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
             } else {
-                gen_helper_ror_cc(tmp2, tmp2, tmp);
+                gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
@@ -9218,14 +9242,14 @@
             if (s->condexec_mask)
                 tcg_gen_neg_i32(tmp, tmp2);
             else
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_sub_CC(tmp, tmp, tmp2);
             break;
         case 0xa: /* cmp */
-            gen_helper_sub_cc(tmp, tmp, tmp2);
+            gen_sub_CC(tmp, tmp, tmp2);
             rd = 16;
             break;
         case 0xb: /* cmn */
-            gen_helper_add_cc(tmp, tmp, tmp2);
+            gen_add_CC(tmp, tmp, tmp2);
             rd = 16;
             break;
         case 0xc: /* orr */
@@ -9816,7 +9840,7 @@
         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO))
             gen_io_start();
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(dc->pc);
         }
 
@@ -9913,7 +9937,7 @@
             /* nothing more to generate */
             break;
         case DISAS_WFI:
-            gen_helper_wfi();
+            gen_helper_wfi(cpu_env);
             break;
         case DISAS_SWI:
             gen_exception(EXCP_SWI);
@@ -9970,19 +9994,6 @@
                     int flags)
 {
     int i;
-#if 0
-    union {
-        uint32_t i;
-        float s;
-    } s0, s1;
-    CPU_DoubleU d;
-    /* ??? This assumes float64 and double have the same layout.
-       Oh well, it's only debug dumps.  */
-    union {
-        float64 f64;
-        double d;
-    } d0;
-#endif
     uint32_t psr;
 
     for(i=0;i<16;i++) {
@@ -10002,20 +10013,23 @@
                 psr & CPSR_T ? 'T' : 'A',
                 cpu_mode_names[psr & 0xf], (psr & 0x10) ? 32 : 26);
 
-#if 0
-    for (i = 0; i < 16; i++) {
-        d.d = env->vfp.regs[i];
-        s0.i = d.l.lower;
-        s1.i = d.l.upper;
-        d0.f64 = d.d;
-        cpu_fprintf(f, "s%02d=%08x(%8g) s%02d=%08x(%8g) d%02d=%08x%08x(%8g)\n",
-                    i * 2, (int)s0.i, s0.s,
-                    i * 2 + 1, (int)s1.i, s1.s,
-                    i, (int)(uint32_t)d.l.upper, (int)(uint32_t)d.l.lower,
-                    d0.d);
+    if (flags & CPU_DUMP_FPU) {
+        int numvfpregs = 0;
+        if (arm_feature(env, ARM_FEATURE_VFP)) {
+            numvfpregs += 16;
+        }
+        if (arm_feature(env, ARM_FEATURE_VFP3)) {
+            numvfpregs += 16;
+        }
+        for (i = 0; i < numvfpregs; i++) {
+            uint64_t v = float64_val(env->vfp.regs[i]);
+            cpu_fprintf(f, "s%02d=%08x s%02d=%08x d%02d=%016" PRIx64 "\n",
+                        i * 2, (uint32_t)v,
+                        i * 2 + 1, (uint32_t)(v >> 32),
+                        i, v);
+        }
+        cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
     }
-    cpu_fprintf(f, "FPSCR: %08x\n", (int)env->vfp.xregs[ARM_VFP_FPSCR]);
-#endif
 }
 
 void restore_state_to_opc(CPUARMState *env, TranslationBlock *tb, int pc_pos)
diff --git a/target-cris/Makefile.objs b/target-cris/Makefile.objs
index 4b09e8c..afb87bc 100644
--- a/target-cris/Makefile.objs
+++ b/target-cris/Makefile.objs
@@ -1,4 +1,2 @@
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += mmu.o machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-cris/helper.c b/target-cris/helper.c
index bfbc29e..1bdb7e2 100644
--- a/target-cris/helper.c
+++ b/target-cris/helper.c
@@ -151,7 +151,7 @@
 	}
 
 	/* Now that we are in kernel mode, load the handlers address.  */
-	env->pc = ldl_code(env->pregs[PR_EBP] + ex_vec * 4);
+        env->pc = cpu_ldl_code(env, env->pregs[PR_EBP] + ex_vec * 4);
 	env->locked_irq = 1;
 	env->pregs[PR_CCS] |= F_FLAG_V10; /* set F.  */
 
@@ -233,7 +233,7 @@
 	/* Now that we are in kernel mode, load the handlers address.
 	   This load may not fault, real hw leaves that behaviour as
 	   undefined.  */
-	env->pc = ldl_code(env->pregs[PR_EBP] + ex_vec * 4);
+        env->pc = cpu_ldl_code(env, env->pregs[PR_EBP] + ex_vec * 4);
 
 	/* Clear the excption_index to avoid spurios hw_aborts for recursive
 	   bus faults.  */
diff --git a/target-cris/helper.h b/target-cris/helper.h
index 093063a..99fb326 100644
--- a/target-cris/helper.h
+++ b/target-cris/helper.h
@@ -1,26 +1,29 @@
 #include "def-helper.h"
 
-DEF_HELPER_1(raise_exception, void, i32)
-DEF_HELPER_1(tlb_flush_pid, void, i32)
-DEF_HELPER_1(spc_write, void, i32)
+DEF_HELPER_2(raise_exception, void, env, i32)
+DEF_HELPER_2(tlb_flush_pid, void, env, i32)
+DEF_HELPER_2(spc_write, void, env, i32)
 DEF_HELPER_3(dump, void, i32, i32, i32)
-DEF_HELPER_0(rfe, void);
-DEF_HELPER_0(rfn, void);
+DEF_HELPER_1(rfe, void, env);
+DEF_HELPER_1(rfn, void, env);
 
-DEF_HELPER_2(movl_sreg_reg, void, i32, i32)
-DEF_HELPER_2(movl_reg_sreg, void, i32, i32)
+DEF_HELPER_3(movl_sreg_reg, void, env, i32, i32)
+DEF_HELPER_3(movl_reg_sreg, void, env, i32, i32)
 
 DEF_HELPER_FLAGS_1(lz, TCG_CALL_PURE, i32, i32);
-DEF_HELPER_FLAGS_3(btst, TCG_CALL_PURE, i32, i32, i32, i32);
+DEF_HELPER_FLAGS_4(btst, TCG_CALL_PURE, i32, env, i32, i32, i32);
 
-DEF_HELPER_FLAGS_3(evaluate_flags_muls, TCG_CALL_PURE, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_3(evaluate_flags_mulu, TCG_CALL_PURE, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_4(evaluate_flags_mcp, TCG_CALL_PURE, i32, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_4(evaluate_flags_alu_4, TCG_CALL_PURE, i32, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_4(evaluate_flags_sub_4, TCG_CALL_PURE, i32, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_2(evaluate_flags_move_4, TCG_CALL_PURE, i32, i32, i32)
-DEF_HELPER_FLAGS_2(evaluate_flags_move_2, TCG_CALL_PURE, i32, i32, i32)
-DEF_HELPER_0(evaluate_flags, void)
-DEF_HELPER_0(top_evaluate_flags, void)
+DEF_HELPER_FLAGS_4(evaluate_flags_muls, TCG_CALL_PURE, i32, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(evaluate_flags_mulu, TCG_CALL_PURE, i32, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(evaluate_flags_mcp, TCG_CALL_PURE, i32, env,
+                                                      i32, i32, i32, i32)
+DEF_HELPER_FLAGS_5(evaluate_flags_alu_4, TCG_CALL_PURE, i32, env,
+                                                        i32, i32, i32, i32)
+DEF_HELPER_FLAGS_5(evaluate_flags_sub_4, TCG_CALL_PURE, i32, env,
+                                                        i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(evaluate_flags_move_4, TCG_CALL_PURE, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(evaluate_flags_move_2, TCG_CALL_PURE, i32, env, i32, i32)
+DEF_HELPER_1(evaluate_flags, void, env)
+DEF_HELPER_1(top_evaluate_flags, void, env)
 
 #include "def-helper.h"
diff --git a/target-cris/op_helper.c b/target-cris/op_helper.c
index ac7c98c..a7468d4 100644
--- a/target-cris/op_helper.c
+++ b/target-cris/op_helper.c
@@ -19,7 +19,6 @@
  */
 
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "mmu.h"
 #include "helper.h"
 #include "host-utils.h"
@@ -55,17 +54,12 @@
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill(CPUCRISState *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUCRISState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUCRISState *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
-
     D_LOG("%s pc=%x tpc=%x ra=%p\n", __func__,
           env->pc, env->debug1, (void *)retaddr);
     ret = cpu_cris_handle_mmu_fault(env, addr, is_write, mmu_idx);
@@ -79,23 +73,22 @@
                 cpu_restore_state(tb, env, retaddr);
 
 		/* Evaluate flags after retranslation.  */
-                helper_top_evaluate_flags();
+                helper_top_evaluate_flags(env);
             }
         }
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 
 #endif
 
-void helper_raise_exception(uint32_t index)
+void helper_raise_exception(CPUCRISState *env, uint32_t index)
 {
 	env->exception_index = index;
         cpu_loop_exit(env);
 }
 
-void helper_tlb_flush_pid(uint32_t pid)
+void helper_tlb_flush_pid(CPUCRISState *env, uint32_t pid)
 {
 #if !defined(CONFIG_USER_ONLY)
 	pid &= 0xff;
@@ -104,7 +97,7 @@
 #endif
 }
 
-void helper_spc_write(uint32_t new_spc)
+void helper_spc_write(CPUCRISState *env, uint32_t new_spc)
 {
 #if !defined(CONFIG_USER_ONLY)
 	tlb_flush_page(env, env->pregs[PR_SPC]);
@@ -121,7 +114,7 @@
 #define EXTRACT_FIELD(src, start, end) \
 	    (((src) >> start) & ((1 << (end - start + 1)) - 1))
 
-void helper_movl_sreg_reg (uint32_t sreg, uint32_t reg)
+void helper_movl_sreg_reg(CPUCRISState *env, uint32_t sreg, uint32_t reg)
 {
 	uint32_t srs;
 	srs = env->pregs[PR_SRS];
@@ -171,7 +164,7 @@
 #endif
 }
 
-void helper_movl_reg_sreg (uint32_t reg, uint32_t sreg)
+void helper_movl_reg_sreg(CPUCRISState *env, uint32_t reg, uint32_t sreg)
 {
 	uint32_t srs;
 	env->pregs[PR_SRS] &= 3;
@@ -216,7 +209,7 @@
 	env->pregs[PR_CCS] = ccs;
 }
 
-void helper_rfe(void)
+void helper_rfe(CPUCRISState *env)
 {
 	int rflag = env->pregs[PR_CCS] & R_FLAG;
 
@@ -232,7 +225,7 @@
 		env->pregs[PR_CCS] |= P_FLAG;
 }
 
-void helper_rfn(void)
+void helper_rfn(CPUCRISState *env)
 {
 	int rflag = env->pregs[PR_CCS] & R_FLAG;
 
@@ -256,7 +249,7 @@
 	return clz32(t0);
 }
 
-uint32_t helper_btst(uint32_t t0, uint32_t t1, uint32_t ccs)
+uint32_t helper_btst(CPUCRISState *env, uint32_t t0, uint32_t t1, uint32_t ccs)
 {
 	/* FIXME: clean this up.  */
 
@@ -284,7 +277,8 @@
 	return ccs;
 }
 
-static inline uint32_t evaluate_flags_writeback(uint32_t flags, uint32_t ccs)
+static inline uint32_t evaluate_flags_writeback(CPUCRISState *env,
+                                                uint32_t flags, uint32_t ccs)
 {
 	unsigned int x, z, mask;
 
@@ -303,7 +297,8 @@
 	return ccs;
 }
 
-uint32_t helper_evaluate_flags_muls(uint32_t ccs, uint32_t res, uint32_t mof)
+uint32_t helper_evaluate_flags_muls(CPUCRISState *env,
+                                    uint32_t ccs, uint32_t res, uint32_t mof)
 {
 	uint32_t flags = 0;
 	int64_t tmp;
@@ -321,10 +316,11 @@
 	if ((dneg && mof != -1)
 	    || (!dneg && mof != 0))
 		flags |= V_FLAG;
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
-uint32_t helper_evaluate_flags_mulu(uint32_t ccs, uint32_t res, uint32_t mof)
+uint32_t helper_evaluate_flags_mulu(CPUCRISState *env,
+                                    uint32_t ccs, uint32_t res, uint32_t mof)
 {
 	uint32_t flags = 0;
 	uint64_t tmp;
@@ -339,10 +335,10 @@
 	if (mof)
 		flags |= V_FLAG;
 
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
-uint32_t helper_evaluate_flags_mcp(uint32_t ccs,
+uint32_t helper_evaluate_flags_mcp(CPUCRISState *env, uint32_t ccs,
 				   uint32_t src, uint32_t dst, uint32_t res)
 {
 	uint32_t flags = 0;
@@ -368,10 +364,10 @@
 			flags |= R_FLAG;
 	}
 
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
-uint32_t helper_evaluate_flags_alu_4(uint32_t ccs,
+uint32_t helper_evaluate_flags_alu_4(CPUCRISState *env, uint32_t ccs,
 				     uint32_t src, uint32_t dst, uint32_t res)
 {
 	uint32_t flags = 0;
@@ -397,10 +393,10 @@
 			flags |= C_FLAG;
 	}
 
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
-uint32_t helper_evaluate_flags_sub_4(uint32_t ccs,
+uint32_t helper_evaluate_flags_sub_4(CPUCRISState *env, uint32_t ccs,
 				     uint32_t src, uint32_t dst, uint32_t res)
 {
 	uint32_t flags = 0;
@@ -427,10 +423,11 @@
 	}
 
 	flags ^= C_FLAG;
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
-uint32_t helper_evaluate_flags_move_4(uint32_t ccs, uint32_t res)
+uint32_t helper_evaluate_flags_move_4(CPUCRISState *env,
+                                      uint32_t ccs, uint32_t res)
 {
 	uint32_t flags = 0;
 
@@ -439,9 +436,10 @@
 	else if (res == 0L)
 		flags |= Z_FLAG;
 
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
-uint32_t helper_evaluate_flags_move_2(uint32_t ccs, uint32_t res)
+uint32_t helper_evaluate_flags_move_2(CPUCRISState *env,
+                                      uint32_t ccs, uint32_t res)
 {
 	uint32_t flags = 0;
 
@@ -450,12 +448,12 @@
 	else if (res == 0)
 		flags |= Z_FLAG;
 
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
 /* TODO: This is expensive. We could split things up and only evaluate part of
    CCR on a need to know basis. For now, we simply re-evaluate everything.  */
-void  helper_evaluate_flags(void)
+void helper_evaluate_flags(CPUCRISState *env)
 {
 	uint32_t src, dst, res;
 	uint32_t flags = 0;
@@ -571,25 +569,26 @@
 	if (env->cc_op == CC_OP_SUB || env->cc_op == CC_OP_CMP)
 		flags ^= C_FLAG;
 
-	env->pregs[PR_CCS] = evaluate_flags_writeback(flags, env->pregs[PR_CCS]);
+        env->pregs[PR_CCS] = evaluate_flags_writeback(env, flags,
+                                                      env->pregs[PR_CCS]);
 }
 
-void helper_top_evaluate_flags(void)
+void helper_top_evaluate_flags(CPUCRISState *env)
 {
 	switch (env->cc_op)
 	{
 		case CC_OP_MCP:
-			env->pregs[PR_CCS] = helper_evaluate_flags_mcp(
+                        env->pregs[PR_CCS] = helper_evaluate_flags_mcp(env,
 					env->pregs[PR_CCS], env->cc_src,
 					env->cc_dest, env->cc_result);
 			break;
 		case CC_OP_MULS:
-			env->pregs[PR_CCS] = helper_evaluate_flags_muls(
+                        env->pregs[PR_CCS] = helper_evaluate_flags_muls(env,
 					env->pregs[PR_CCS], env->cc_result,
 					env->pregs[PR_MOF]);
 			break;
 		case CC_OP_MULU:
-			env->pregs[PR_CCS] = helper_evaluate_flags_mulu(
+                        env->pregs[PR_CCS] = helper_evaluate_flags_mulu(env,
 					env->pregs[PR_CCS], env->cc_result,
 					env->pregs[PR_MOF]);
 			break;
@@ -604,18 +603,18 @@
 		{
 			case 4:
 				env->pregs[PR_CCS] =
-					helper_evaluate_flags_move_4(
+                                        helper_evaluate_flags_move_4(env,
 							env->pregs[PR_CCS],
 							env->cc_result);
 				break;
 			case 2:
 				env->pregs[PR_CCS] =
-					helper_evaluate_flags_move_2(
+                                        helper_evaluate_flags_move_2(env,
 							env->pregs[PR_CCS],
 							env->cc_result);
 				break;
 			default:
-				helper_evaluate_flags();
+                                helper_evaluate_flags(env);
 				break;
 		}
 		break;
@@ -626,12 +625,12 @@
 		case CC_OP_CMP:
 			if (env->cc_size == 4)
 				env->pregs[PR_CCS] =
-					helper_evaluate_flags_sub_4(
+                                        helper_evaluate_flags_sub_4(env,
 						env->pregs[PR_CCS],
 						env->cc_src, env->cc_dest,
 						env->cc_result);
 			else
-				helper_evaluate_flags();
+                                helper_evaluate_flags(env);
 			break;
 		default:
 		{
@@ -639,13 +638,13 @@
 			{
 			case 4:
 				env->pregs[PR_CCS] =
-					helper_evaluate_flags_alu_4(
+                                        helper_evaluate_flags_alu_4(env,
 						env->pregs[PR_CCS],
 						env->cc_src, env->cc_dest,
 						env->cc_result);
 				break;
 			default:
-				helper_evaluate_flags();
+                                helper_evaluate_flags(env);
 				break;
 			}
 		}
diff --git a/target-cris/translate.c b/target-cris/translate.c
index ad31877..755de65 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -78,7 +78,7 @@
 	target_ulong pc, ppc;
 
 	/* Decoder.  */
-	unsigned int (*decoder)(struct DisasContext *dc);
+        unsigned int (*decoder)(CPUCRISState *env, struct DisasContext *dc);
 	uint32_t ir;
 	uint32_t opcode;
 	unsigned int op1;
@@ -211,9 +211,9 @@
 		tcg_gen_andi_tl(cpu_PR[r], tn, 3);
 	else {
 		if (r == PR_PID) 
-			gen_helper_tlb_flush_pid(tn);
+                        gen_helper_tlb_flush_pid(cpu_env, tn);
 		if (dc->tb_flags & S_FLAG && r == PR_SPC) 
-			gen_helper_spc_write(tn);
+                        gen_helper_spc_write(cpu_env, tn);
 		else if (r == PR_CCS)
 			dc->cpustate_changed = 1;
 		tcg_gen_mov_tl(cpu_PR[r], tn);
@@ -233,7 +233,7 @@
 	return sval;
 }
 
-static int cris_fetch(DisasContext *dc, uint32_t addr,
+static int cris_fetch(CPUCRISState *env, DisasContext *dc, uint32_t addr,
 		      unsigned int size, unsigned int sign)
 {
 	int r;
@@ -241,24 +241,24 @@
 	switch (size) {
 		case 4:
 		{
-			r = ldl_code(addr);
+                        r = cpu_ldl_code(env, addr);
 			break;
 		}
 		case 2:
 		{
 			if (sign) {
-				r = ldsw_code(addr);
+                                r = cpu_ldsw_code(env, addr);
 			} else {
-				r = lduw_code(addr);
+                                r = cpu_lduw_code(env, addr);
 			}
 			break;
 		}
 		case 1:
 		{
 			if (sign) {
-				r = ldsb_code(addr);
+                                r = cpu_ldsb_code(env, addr);
 			} else {
-				r = ldub_code(addr);
+                                r = cpu_ldub_code(env, addr);
 			}
 			break;
 		}
@@ -278,7 +278,7 @@
 static inline void t_gen_raise_exception(uint32_t index)
 {
         TCGv_i32 tmp = tcg_const_i32(index);
-	gen_helper_raise_exception(tmp);
+        gen_helper_raise_exception(cpu_env, tmp);
         tcg_temp_free_i32(tmp);
 }
 
@@ -624,17 +624,17 @@
 	switch (dc->cc_op)
 	{
 	case CC_OP_MCP:
-		gen_helper_evaluate_flags_mcp(cpu_PR[PR_CCS],
+                gen_helper_evaluate_flags_mcp(cpu_PR[PR_CCS], cpu_env,
 					cpu_PR[PR_CCS], cc_src,
 					cc_dest, cc_result);
 		break;
 	case CC_OP_MULS:
-		gen_helper_evaluate_flags_muls(cpu_PR[PR_CCS],
+                gen_helper_evaluate_flags_muls(cpu_PR[PR_CCS], cpu_env,
 					cpu_PR[PR_CCS], cc_result,
 					cpu_PR[PR_MOF]);
 		break;
 	case CC_OP_MULU:
-		gen_helper_evaluate_flags_mulu(cpu_PR[PR_CCS],
+                gen_helper_evaluate_flags_mulu(cpu_PR[PR_CCS], cpu_env,
 					cpu_PR[PR_CCS], cc_result,
 					cpu_PR[PR_MOF]);
 		break;
@@ -648,15 +648,15 @@
 		switch (dc->cc_size)
 		{
 		case 4:
-			gen_helper_evaluate_flags_move_4(cpu_PR[PR_CCS],
-						cpu_PR[PR_CCS], cc_result);
+                        gen_helper_evaluate_flags_move_4(cpu_PR[PR_CCS],
+                                           cpu_env, cpu_PR[PR_CCS], cc_result);
 			break;
 		case 2:
-			gen_helper_evaluate_flags_move_2(cpu_PR[PR_CCS],
-						cpu_PR[PR_CCS], cc_result);
+                        gen_helper_evaluate_flags_move_2(cpu_PR[PR_CCS],
+                                           cpu_env, cpu_PR[PR_CCS], cc_result);
 			break;
 		default:
-			gen_helper_evaluate_flags();
+                        gen_helper_evaluate_flags(cpu_env);
 			break;
 		}
 		break;
@@ -666,21 +666,21 @@
 	case CC_OP_SUB:
 	case CC_OP_CMP:
 		if (dc->cc_size == 4)
-			gen_helper_evaluate_flags_sub_4(cpu_PR[PR_CCS],
+                        gen_helper_evaluate_flags_sub_4(cpu_PR[PR_CCS], cpu_env,
 				cpu_PR[PR_CCS], cc_src, cc_dest, cc_result);
 		else
-			gen_helper_evaluate_flags();
+                        gen_helper_evaluate_flags(cpu_env);
 
 		break;
 	default:
 		switch (dc->cc_size)
 		{
 			case 4:
-			gen_helper_evaluate_flags_alu_4(cpu_PR[PR_CCS],
+                        gen_helper_evaluate_flags_alu_4(cpu_PR[PR_CCS], cpu_env,
 				cpu_PR[PR_CCS], cc_src, cc_dest, cc_result);
 				break;
 			default:
-				gen_helper_evaluate_flags();
+                                gen_helper_evaluate_flags(cpu_env);
 				break;
 		}
 		break;
@@ -1304,8 +1304,8 @@
 		t_gen_zext(dst, cpu_R[rd], size);
 }
 
-static int dec_prep_move_m(DisasContext *dc, int s_ext, int memsize,
-			   TCGv dst)
+static int dec_prep_move_m(CPUCRISState *env, DisasContext *dc,
+                           int s_ext, int memsize, TCGv dst)
 {
 	unsigned int rs;
 	uint32_t imm;
@@ -1321,7 +1321,7 @@
 		if (memsize == 1)
 			insn_len++;
 
-		imm = cris_fetch(dc, dc->pc + 2, memsize, s_ext);
+                imm = cris_fetch(env, dc, dc->pc + 2, memsize, s_ext);
 		tcg_gen_movi_tl(dst, imm);
 		dc->postinc = 0;
 	} else {
@@ -1338,12 +1338,12 @@
 /* Prepare T0 and T1 for a memory + alu operation.
    s_ext decides if the operand1 should be sign-extended or zero-extended when
    needed.  */
-static int dec_prep_alu_m(DisasContext *dc, int s_ext, int memsize,
-			  TCGv dst, TCGv src)
+static int dec_prep_alu_m(CPUCRISState *env, DisasContext *dc,
+                          int s_ext, int memsize, TCGv dst, TCGv src)
 {
 	int insn_len;
 
-	insn_len = dec_prep_move_m(dc, s_ext, memsize, src);
+        insn_len = dec_prep_move_m(env, dc, s_ext, memsize, src);
 	tcg_gen_mov_tl(dst, cpu_R[dc->op2]);
 	return insn_len;
 }
@@ -1362,7 +1362,7 @@
 
 /* Start of insn decoders.  */
 
-static int dec_bccq(DisasContext *dc)
+static int dec_bccq(CPUCRISState *env, DisasContext *dc)
 {
 	int32_t offset;
 	int sign;
@@ -1382,7 +1382,7 @@
 	cris_prepare_cc_branch (dc, offset, cond);
 	return 2;
 }
-static int dec_addoq(DisasContext *dc)
+static int dec_addoq(CPUCRISState *env, DisasContext *dc)
 {
 	int32_t imm;
 
@@ -1396,7 +1396,7 @@
 
 	return 2;
 }
-static int dec_addq(DisasContext *dc)
+static int dec_addq(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("addq %u, $r%u\n", dc->op1, dc->op2);
 
@@ -1408,7 +1408,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], tcg_const_tl(dc->op1), 4);
 	return 2;
 }
-static int dec_moveq(DisasContext *dc)
+static int dec_moveq(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 
@@ -1419,7 +1419,7 @@
 	tcg_gen_movi_tl(cpu_R[dc->op2], imm);
 	return 2;
 }
-static int dec_subq(DisasContext *dc)
+static int dec_subq(CPUCRISState *env, DisasContext *dc)
 {
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 5);
 
@@ -1430,7 +1430,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], tcg_const_tl(dc->op1), 4);
 	return 2;
 }
-static int dec_cmpq(DisasContext *dc)
+static int dec_cmpq(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 5);
@@ -1443,7 +1443,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], tcg_const_tl(imm), 4);
 	return 2;
 }
-static int dec_andq(DisasContext *dc)
+static int dec_andq(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 5);
@@ -1456,7 +1456,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], tcg_const_tl(imm), 4);
 	return 2;
 }
-static int dec_orq(DisasContext *dc)
+static int dec_orq(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 5);
@@ -1468,14 +1468,14 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], tcg_const_tl(imm), 4);
 	return 2;
 }
-static int dec_btstq(DisasContext *dc)
+static int dec_btstq(CPUCRISState *env, DisasContext *dc)
 {
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 4);
 	LOG_DIS("btstq %u, $r%d\n", dc->op1, dc->op2);
 
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_evaluate_flags(dc);
-	gen_helper_btst(cpu_PR[PR_CCS], cpu_R[dc->op2],
+        gen_helper_btst(cpu_PR[PR_CCS], cpu_env, cpu_R[dc->op2],
 			tcg_const_tl(dc->op1), cpu_PR[PR_CCS]);
 	cris_alu(dc, CC_OP_MOVE,
 		 cpu_R[dc->op2], cpu_R[dc->op2], cpu_R[dc->op2], 4);
@@ -1483,7 +1483,7 @@
 	dc->flags_uptodate = 1;
 	return 2;
 }
-static int dec_asrq(DisasContext *dc)
+static int dec_asrq(CPUCRISState *env, DisasContext *dc)
 {
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 4);
 	LOG_DIS("asrq %u, $r%d\n", dc->op1, dc->op2);
@@ -1495,7 +1495,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], 4);
 	return 2;
 }
-static int dec_lslq(DisasContext *dc)
+static int dec_lslq(CPUCRISState *env, DisasContext *dc)
 {
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 4);
 	LOG_DIS("lslq %u, $r%d\n", dc->op1, dc->op2);
@@ -1509,7 +1509,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], 4);
 	return 2;
 }
-static int dec_lsrq(DisasContext *dc)
+static int dec_lsrq(CPUCRISState *env, DisasContext *dc)
 {
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 4);
 	LOG_DIS("lsrq %u, $r%d\n", dc->op1, dc->op2);
@@ -1523,7 +1523,7 @@
 	return 2;
 }
 
-static int dec_move_r(DisasContext *dc)
+static int dec_move_r(CPUCRISState *env, DisasContext *dc)
 {
 	int size = memsize_zz(dc);
 
@@ -1551,7 +1551,7 @@
 	return 2;
 }
 
-static int dec_scc_r(DisasContext *dc)
+static int dec_scc_r(CPUCRISState *env, DisasContext *dc)
 {
 	int cond = dc->op2;
 
@@ -1594,7 +1594,7 @@
 	}
 }
 
-static int dec_and_r(DisasContext *dc)
+static int dec_and_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1611,7 +1611,7 @@
 	return 2;
 }
 
-static int dec_lz_r(DisasContext *dc)
+static int dec_lz_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	LOG_DIS("lz $r%u, $r%u\n",
@@ -1624,7 +1624,7 @@
 	return 2;
 }
 
-static int dec_lsl_r(DisasContext *dc)
+static int dec_lsl_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1641,7 +1641,7 @@
 	return 2;
 }
 
-static int dec_lsr_r(DisasContext *dc)
+static int dec_lsr_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1658,7 +1658,7 @@
 	return 2;
 }
 
-static int dec_asr_r(DisasContext *dc)
+static int dec_asr_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1675,7 +1675,7 @@
 	return 2;
 }
 
-static int dec_muls_r(DisasContext *dc)
+static int dec_muls_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1691,7 +1691,7 @@
 	return 2;
 }
 
-static int dec_mulu_r(DisasContext *dc)
+static int dec_mulu_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1708,7 +1708,7 @@
 }
 
 
-static int dec_dstep_r(DisasContext *dc)
+static int dec_dstep_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("dstep $r%u, $r%u\n", dc->op1, dc->op2);
 	cris_cc_mask(dc, CC_MASK_NZ);
@@ -1717,7 +1717,7 @@
 	return 2;
 }
 
-static int dec_xor_r(DisasContext *dc)
+static int dec_xor_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1733,7 +1733,7 @@
 	return 2;
 }
 
-static int dec_bound_r(DisasContext *dc)
+static int dec_bound_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv l0;
 	int size = memsize_zz(dc);
@@ -1747,7 +1747,7 @@
 	return 2;
 }
 
-static int dec_cmp_r(DisasContext *dc)
+static int dec_cmp_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1762,7 +1762,7 @@
 	return 2;
 }
 
-static int dec_abs_r(DisasContext *dc)
+static int dec_abs_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 
@@ -1781,7 +1781,7 @@
 	return 2;
 }
 
-static int dec_add_r(DisasContext *dc)
+static int dec_add_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1796,7 +1796,7 @@
 	return 2;
 }
 
-static int dec_addc_r(DisasContext *dc)
+static int dec_addc_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("addc $r%u, $r%u\n",
 		    dc->op1, dc->op2);
@@ -1811,7 +1811,7 @@
 	return 2;
 }
 
-static int dec_mcp_r(DisasContext *dc)
+static int dec_mcp_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("mcp $p%u, $r%u\n",
 		     dc->op2, dc->op1);
@@ -1838,7 +1838,7 @@
 }
 #endif
 
-static int dec_swap_r(DisasContext *dc)
+static int dec_swap_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 #if DISAS_CRIS
@@ -1864,7 +1864,7 @@
 	return 2;
 }
 
-static int dec_or_r(DisasContext *dc)
+static int dec_or_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1878,7 +1878,7 @@
 	return 2;
 }
 
-static int dec_addi_r(DisasContext *dc)
+static int dec_addi_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	LOG_DIS("addi.%c $r%u, $r%u\n",
@@ -1891,7 +1891,7 @@
 	return 2;
 }
 
-static int dec_addi_acr(DisasContext *dc)
+static int dec_addi_acr(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	LOG_DIS("addi.%c $r%u, $r%u, $acr\n",
@@ -1904,7 +1904,7 @@
 	return 2;
 }
 
-static int dec_neg_r(DisasContext *dc)
+static int dec_neg_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1919,13 +1919,13 @@
 	return 2;
 }
 
-static int dec_btst_r(DisasContext *dc)
+static int dec_btst_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("btst $r%u, $r%u\n",
 		    dc->op1, dc->op2);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_evaluate_flags(dc);
-	gen_helper_btst(cpu_PR[PR_CCS], cpu_R[dc->op2],
+        gen_helper_btst(cpu_PR[PR_CCS], cpu_env, cpu_R[dc->op2],
 			cpu_R[dc->op1], cpu_PR[PR_CCS]);
 	cris_alu(dc, CC_OP_MOVE, cpu_R[dc->op2],
 		 cpu_R[dc->op2], cpu_R[dc->op2], 4);
@@ -1934,7 +1934,7 @@
 	return 2;
 }
 
-static int dec_sub_r(DisasContext *dc)
+static int dec_sub_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1949,7 +1949,7 @@
 }
 
 /* Zero extension. From size to dword.  */
-static int dec_movu_r(DisasContext *dc)
+static int dec_movu_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -1966,7 +1966,7 @@
 }
 
 /* Sign extension. From size to dword.  */
-static int dec_movs_r(DisasContext *dc)
+static int dec_movs_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -1985,7 +1985,7 @@
 }
 
 /* zero extension. From size to dword.  */
-static int dec_addu_r(DisasContext *dc)
+static int dec_addu_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -2004,7 +2004,7 @@
 }
 
 /* Sign extension. From size to dword.  */
-static int dec_adds_r(DisasContext *dc)
+static int dec_adds_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -2023,7 +2023,7 @@
 }
 
 /* Zero extension. From size to dword.  */
-static int dec_subu_r(DisasContext *dc)
+static int dec_subu_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -2042,7 +2042,7 @@
 }
 
 /* Sign extension. From size to dword.  */
-static int dec_subs_r(DisasContext *dc)
+static int dec_subs_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -2060,7 +2060,7 @@
 	return 2;
 }
 
-static int dec_setclrf(DisasContext *dc)
+static int dec_setclrf(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t flags;
 	int set = (~dc->opcode >> 2) & 1;
@@ -2131,22 +2131,24 @@
 	return 2;
 }
 
-static int dec_move_rs(DisasContext *dc)
+static int dec_move_rs(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("move $r%u, $s%u\n", dc->op1, dc->op2);
 	cris_cc_mask(dc, 0);
-	gen_helper_movl_sreg_reg(tcg_const_tl(dc->op2), tcg_const_tl(dc->op1));
+        gen_helper_movl_sreg_reg(cpu_env, tcg_const_tl(dc->op2),
+                                 tcg_const_tl(dc->op1));
 	return 2;
 }
-static int dec_move_sr(DisasContext *dc)
+static int dec_move_sr(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("move $s%u, $r%u\n", dc->op2, dc->op1);
 	cris_cc_mask(dc, 0);
-	gen_helper_movl_reg_sreg(tcg_const_tl(dc->op1), tcg_const_tl(dc->op2));
+        gen_helper_movl_reg_sreg(cpu_env, tcg_const_tl(dc->op1),
+                                 tcg_const_tl(dc->op2));
 	return 2;
 }
 
-static int dec_move_rp(DisasContext *dc)
+static int dec_move_rp(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	LOG_DIS("move $r%u, $p%u\n", dc->op1, dc->op2);
@@ -2176,7 +2178,7 @@
 	tcg_temp_free(t[0]);
 	return 2;
 }
-static int dec_move_pr(DisasContext *dc)
+static int dec_move_pr(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	LOG_DIS("move $p%u, $r%u\n", dc->op2, dc->op1);
@@ -2198,7 +2200,7 @@
 	return 2;
 }
 
-static int dec_move_mr(DisasContext *dc)
+static int dec_move_mr(CPUCRISState *env, DisasContext *dc)
 {
 	int memsize = memsize_zz(dc);
 	int insn_len;
@@ -2208,7 +2210,7 @@
 		    dc->op2);
 
 	if (memsize == 4) {
-		insn_len = dec_prep_move_m(dc, 0, 4, cpu_R[dc->op2]);
+                insn_len = dec_prep_move_m(env, dc, 0, 4, cpu_R[dc->op2]);
 		cris_cc_mask(dc, CC_MASK_NZ);
 		cris_update_cc_op(dc, CC_OP_MOVE, 4);
 		cris_update_cc_x(dc);
@@ -2218,7 +2220,7 @@
 		TCGv t0;
 
 		t0 = tcg_temp_new();
-		insn_len = dec_prep_move_m(dc, 0, memsize, t0);
+                insn_len = dec_prep_move_m(env, dc, 0, memsize, t0);
 		cris_cc_mask(dc, CC_MASK_NZ);
 		cris_alu(dc, CC_OP_MOVE,
 			    cpu_R[dc->op2], cpu_R[dc->op2], t0, memsize);
@@ -2240,7 +2242,7 @@
 	tcg_temp_free(t[1]);
 }
 
-static int dec_movs_m(DisasContext *dc)
+static int dec_movs_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2252,7 +2254,7 @@
 
 	cris_alu_m_alloc_temps(t);
 	/* sign extend.  */
-	insn_len = dec_prep_alu_m(dc, 1, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 1, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_alu(dc, CC_OP_MOVE,
 		    cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
@@ -2261,7 +2263,7 @@
 	return insn_len;
 }
 
-static int dec_addu_m(DisasContext *dc)
+static int dec_addu_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2273,7 +2275,7 @@
 
 	cris_alu_m_alloc_temps(t);
 	/* sign extend.  */
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_ADD,
 		    cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
@@ -2282,7 +2284,7 @@
 	return insn_len;
 }
 
-static int dec_adds_m(DisasContext *dc)
+static int dec_adds_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2294,7 +2296,7 @@
 
 	cris_alu_m_alloc_temps(t);
 	/* sign extend.  */
-	insn_len = dec_prep_alu_m(dc, 1, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 1, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_ADD, cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2302,7 +2304,7 @@
 	return insn_len;
 }
 
-static int dec_subu_m(DisasContext *dc)
+static int dec_subu_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2314,7 +2316,7 @@
 
 	cris_alu_m_alloc_temps(t);
 	/* sign extend.  */
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_SUB, cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2322,7 +2324,7 @@
 	return insn_len;
 }
 
-static int dec_subs_m(DisasContext *dc)
+static int dec_subs_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2334,7 +2336,7 @@
 
 	cris_alu_m_alloc_temps(t);
 	/* sign extend.  */
-	insn_len = dec_prep_alu_m(dc, 1, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 1, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_SUB, cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2342,7 +2344,7 @@
 	return insn_len;
 }
 
-static int dec_movu_m(DisasContext *dc)
+static int dec_movu_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2354,7 +2356,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_alu(dc, CC_OP_MOVE, cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2362,7 +2364,7 @@
 	return insn_len;
 }
 
-static int dec_cmpu_m(DisasContext *dc)
+static int dec_cmpu_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2373,7 +2375,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_CMP, cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2381,7 +2383,7 @@
 	return insn_len;
 }
 
-static int dec_cmps_m(DisasContext *dc)
+static int dec_cmps_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2392,7 +2394,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 1, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 1, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_CMP,
 		    cpu_R[dc->op2], cpu_R[dc->op2], t[1],
@@ -2402,7 +2404,7 @@
 	return insn_len;
 }
 
-static int dec_cmp_m(DisasContext *dc)
+static int dec_cmp_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2413,7 +2415,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_CMP,
 		    cpu_R[dc->op2], cpu_R[dc->op2], t[1],
@@ -2423,7 +2425,7 @@
 	return insn_len;
 }
 
-static int dec_test_m(DisasContext *dc)
+static int dec_test_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2436,7 +2438,7 @@
 	cris_evaluate_flags(dc);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~3);
 
@@ -2447,7 +2449,7 @@
 	return insn_len;
 }
 
-static int dec_and_m(DisasContext *dc)
+static int dec_and_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2458,7 +2460,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_alu(dc, CC_OP_AND, cpu_R[dc->op2], t[0], t[1], memsize_zz(dc));
 	do_postinc(dc, memsize);
@@ -2466,7 +2468,7 @@
 	return insn_len;
 }
 
-static int dec_add_m(DisasContext *dc)
+static int dec_add_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2477,7 +2479,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_ADD,
 		 cpu_R[dc->op2], t[0], t[1], memsize_zz(dc));
@@ -2486,7 +2488,7 @@
 	return insn_len;
 }
 
-static int dec_addo_m(DisasContext *dc)
+static int dec_addo_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2497,7 +2499,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 1, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 1, memsize, t[0], t[1]);
 	cris_cc_mask(dc, 0);
 	cris_alu(dc, CC_OP_ADD, cpu_R[R_ACR], t[0], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2505,7 +2507,7 @@
 	return insn_len;
 }
 
-static int dec_bound_m(DisasContext *dc)
+static int dec_bound_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv l[2];
 	int memsize = memsize_zz(dc);
@@ -2517,7 +2519,7 @@
 
 	l[0] = tcg_temp_local_new();
 	l[1] = tcg_temp_local_new();
-	insn_len = dec_prep_alu_m(dc, 0, memsize, l[0], l[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, l[0], l[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_alu(dc, CC_OP_BOUND, cpu_R[dc->op2], l[0], l[1], 4);
 	do_postinc(dc, memsize);
@@ -2526,7 +2528,7 @@
 	return insn_len;
 }
 
-static int dec_addc_mr(DisasContext *dc)
+static int dec_addc_mr(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int insn_len = 2;
@@ -2541,7 +2543,7 @@
 	dc->flags_x = X_FLAG;
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, 4, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, 4, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_ADDC, cpu_R[dc->op2], t[0], t[1], 4);
 	do_postinc(dc, 4);
@@ -2549,7 +2551,7 @@
 	return insn_len;
 }
 
-static int dec_sub_m(DisasContext *dc)
+static int dec_sub_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2560,7 +2562,7 @@
 		    dc->op2, dc->ir, dc->zzsize);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_SUB, cpu_R[dc->op2], t[0], t[1], memsize);
 	do_postinc(dc, memsize);
@@ -2568,7 +2570,7 @@
 	return insn_len;
 }
 
-static int dec_or_m(DisasContext *dc)
+static int dec_or_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2579,7 +2581,7 @@
 		    dc->op2, dc->pc);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_alu(dc, CC_OP_OR,
 		    cpu_R[dc->op2], t[0], t[1], memsize_zz(dc));
@@ -2588,7 +2590,7 @@
 	return insn_len;
 }
 
-static int dec_move_mp(DisasContext *dc)
+static int dec_move_mp(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2601,7 +2603,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, 0);
 	if (dc->op2 == PR_CCS) {
 		cris_evaluate_flags(dc);
@@ -2620,7 +2622,7 @@
 	return insn_len;
 }
 
-static int dec_move_pm(DisasContext *dc)
+static int dec_move_pm(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int memsize;
@@ -2646,7 +2648,7 @@
 	return 2;
 }
 
-static int dec_movem_mr(DisasContext *dc)
+static int dec_movem_mr(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv_i64 tmp[16];
         TCGv tmp32;
@@ -2693,7 +2695,7 @@
 	return 2;
 }
 
-static int dec_movem_rm(DisasContext *dc)
+static int dec_movem_rm(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv tmp;
 	TCGv addr;
@@ -2722,7 +2724,7 @@
 	return 2;
 }
 
-static int dec_move_rm(DisasContext *dc)
+static int dec_move_rm(CPUCRISState *env, DisasContext *dc)
 {
 	int memsize;
 
@@ -2741,7 +2743,7 @@
 	return 2;
 }
 
-static int dec_lapcq(DisasContext *dc)
+static int dec_lapcq(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("lapcq %x, $r%u\n",
 		    dc->pc + dc->op1*2, dc->op2);
@@ -2750,7 +2752,7 @@
 	return 2;
 }
 
-static int dec_lapc_im(DisasContext *dc)
+static int dec_lapc_im(CPUCRISState *env, DisasContext *dc)
 {
 	unsigned int rd;
 	int32_t imm;
@@ -2759,7 +2761,7 @@
 	rd = dc->op2;
 
 	cris_cc_mask(dc, 0);
-	imm = cris_fetch(dc, dc->pc + 2, 4, 0);
+        imm = cris_fetch(env, dc, dc->pc + 2, 4, 0);
 	LOG_DIS("lapc 0x%x, $r%u\n", imm + dc->pc, dc->op2);
 
 	pc = dc->pc;
@@ -2769,7 +2771,7 @@
 }
 
 /* Jump to special reg.  */
-static int dec_jump_p(DisasContext *dc)
+static int dec_jump_p(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("jump $p%u\n", dc->op2);
 
@@ -2784,7 +2786,7 @@
 }
 
 /* Jump and save.  */
-static int dec_jas_r(DisasContext *dc)
+static int dec_jas_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("jas $r%u, $p%u\n", dc->op1, dc->op2);
 	cris_cc_mask(dc, 0);
@@ -2798,11 +2800,11 @@
 	return 2;
 }
 
-static int dec_jas_im(DisasContext *dc)
+static int dec_jas_im(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 
-	imm = cris_fetch(dc, dc->pc + 2, 4, 0);
+        imm = cris_fetch(env, dc, dc->pc + 2, 4, 0);
 
 	LOG_DIS("jas 0x%x\n", imm);
 	cris_cc_mask(dc, 0);
@@ -2814,11 +2816,11 @@
 	return 6;
 }
 
-static int dec_jasc_im(DisasContext *dc)
+static int dec_jasc_im(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 
-	imm = cris_fetch(dc, dc->pc + 2, 4, 0);
+        imm = cris_fetch(env, dc, dc->pc + 2, 4, 0);
 
 	LOG_DIS("jasc 0x%x\n", imm);
 	cris_cc_mask(dc, 0);
@@ -2830,7 +2832,7 @@
 	return 6;
 }
 
-static int dec_jasc_r(DisasContext *dc)
+static int dec_jasc_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("jasc_r $r%u, $p%u\n", dc->op1, dc->op2);
 	cris_cc_mask(dc, 0);
@@ -2841,12 +2843,12 @@
 	return 2;
 }
 
-static int dec_bcc_im(DisasContext *dc)
+static int dec_bcc_im(CPUCRISState *env, DisasContext *dc)
 {
 	int32_t offset;
 	uint32_t cond = dc->op2;
 
-	offset = cris_fetch(dc, dc->pc + 2, 2, 1);
+        offset = cris_fetch(env, dc, dc->pc + 2, 2, 1);
 
 	LOG_DIS("b%s %d pc=%x dst=%x\n",
 		    cc_name(cond), offset,
@@ -2858,12 +2860,12 @@
 	return 4;
 }
 
-static int dec_bas_im(DisasContext *dc)
+static int dec_bas_im(CPUCRISState *env, DisasContext *dc)
 {
 	int32_t simm;
 
 
-	simm = cris_fetch(dc, dc->pc + 2, 4, 0);
+        simm = cris_fetch(env, dc, dc->pc + 2, 4, 0);
 
 	LOG_DIS("bas 0x%x, $p%u\n", dc->pc + simm, dc->op2);
 	cris_cc_mask(dc, 0);
@@ -2875,10 +2877,10 @@
 	return 6;
 }
 
-static int dec_basc_im(DisasContext *dc)
+static int dec_basc_im(CPUCRISState *env, DisasContext *dc)
 {
 	int32_t simm;
-	simm = cris_fetch(dc, dc->pc + 2, 4, 0);
+        simm = cris_fetch(env, dc, dc->pc + 2, 4, 0);
 
 	LOG_DIS("basc 0x%x, $p%u\n", dc->pc + simm, dc->op2);
 	cris_cc_mask(dc, 0);
@@ -2890,7 +2892,7 @@
 	return 6;
 }
 
-static int dec_rfe_etc(DisasContext *dc)
+static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc)
 {
 	cris_cc_mask(dc, 0);
 
@@ -2906,14 +2908,14 @@
 			/* rfe.  */
 			LOG_DIS("rfe\n");
 			cris_evaluate_flags(dc);
-			gen_helper_rfe();
+                        gen_helper_rfe(cpu_env);
 			dc->is_jmp = DISAS_UPDATE;
 			break;
 		case 5:
 			/* rfn.  */
 			LOG_DIS("rfn\n");
 			cris_evaluate_flags(dc);
-			gen_helper_rfn();
+                        gen_helper_rfn(cpu_env);
 			dc->is_jmp = DISAS_UPDATE;
 			break;
 		case 6:
@@ -2937,17 +2939,17 @@
 	return 2;
 }
 
-static int dec_ftag_fidx_d_m(DisasContext *dc)
+static int dec_ftag_fidx_d_m(CPUCRISState *env, DisasContext *dc)
 {
 	return 2;
 }
 
-static int dec_ftag_fidx_i_m(DisasContext *dc)
+static int dec_ftag_fidx_i_m(CPUCRISState *env, DisasContext *dc)
 {
 	return 2;
 }
 
-static int dec_null(DisasContext *dc)
+static int dec_null(CPUCRISState *env, DisasContext *dc)
 {
 	printf ("unknown insn pc=%x opc=%x op1=%x op2=%x\n",
 		dc->pc, dc->opcode, dc->op1, dc->op2);
@@ -2961,7 +2963,7 @@
 		uint32_t bits;
 		uint32_t mask;
 	};
-	int (*dec)(DisasContext *dc);
+        int (*dec)(CPUCRISState *env, DisasContext *dc);
 } decinfo[] = {
 	/* Order matters here.  */
 	{DEC_MOVEQ, dec_moveq},
@@ -3067,16 +3069,17 @@
 	{{0, 0}, dec_null}
 };
 
-static unsigned int crisv32_decoder(DisasContext *dc)
+static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc)
 {
 	int insn_len = 2;
 	int i;
 
-	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+	if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
 		tcg_gen_debug_insn_start(dc->pc);
+        }
 
 	/* Load a halfword onto the instruction register.  */
-	dc->ir = cris_fetch(dc, dc->pc, 2, 0);
+        dc->ir = cris_fetch(env, dc, dc->pc, 2, 0);
 
 	/* Now decode it.  */
 	dc->opcode   = EXTRACT_FIELD(dc->ir, 4, 11);
@@ -3090,7 +3093,7 @@
 	for (i = 0; i < ARRAY_SIZE(decinfo); i++) {
 		if ((dc->opcode & decinfo[i].mask) == decinfo[i].bits)
 		{
-			insn_len = decinfo[i].dec(dc);
+                        insn_len = decinfo[i].dec(env, dc);
 			break;
 		}
 	}
@@ -3284,7 +3287,7 @@
                     gen_io_start();
 		dc->clear_x = 1;
 
-		insn_len = dc->decoder(dc);
+                insn_len = dc->decoder(env, dc);
 		dc->ppc = dc->pc;
 		dc->pc += insn_len;
 		if (dc->clear_x)
diff --git a/target-cris/translate_v10.c b/target-cris/translate_v10.c
index 3629629..d2cca89 100644
--- a/target-cris/translate_v10.c
+++ b/target-cris/translate_v10.c
@@ -164,8 +164,8 @@
     return insn_len;
 }
 
-static int dec10_prep_move_m(DisasContext *dc, int s_ext, int memsize,
-                           TCGv dst)
+static int dec10_prep_move_m(CPUCRISState *env, DisasContext *dc,
+                             int s_ext, int memsize, TCGv dst)
 {
     unsigned int rs;
     uint32_t imm;
@@ -182,17 +182,17 @@
         if (memsize != 4) {
             if (s_ext) {
                 if (memsize == 1)
-                    imm = ldsb_code(dc->pc + 2);
+                    imm = cpu_ldsb_code(env, dc->pc + 2);
                 else
-                    imm = ldsw_code(dc->pc + 2);
+                    imm = cpu_ldsw_code(env, dc->pc + 2);
             } else {
                 if (memsize == 1)
-                    imm = ldub_code(dc->pc + 2);
+                    imm = cpu_ldub_code(env, dc->pc + 2);
                 else
-                    imm = lduw_code(dc->pc + 2);
+                    imm = cpu_lduw_code(env, dc->pc + 2);
             }
         } else
-            imm = ldl_code(dc->pc + 2);
+            imm = cpu_ldl_code(env, dc->pc + 2);
 
         tcg_gen_movi_tl(dst, imm);
 
@@ -289,7 +289,7 @@
             } else {
                 /* BTST */
                 cris_update_cc_op(dc, CC_OP_FLAGS, 4);
-                gen_helper_btst(cpu_PR[PR_CCS], cpu_R[dc->dst],
+                gen_helper_btst(cpu_PR[PR_CCS], cpu_env, cpu_R[dc->dst],
                            tcg_const_tl(imm), cpu_PR[PR_CCS]);
             }
             break;
@@ -723,7 +723,7 @@
                 LOG_DIS("btst $r%d, $r%d sz=%d\n", dc->src, dc->dst, size);
                 cris_cc_mask(dc, CC_MASK_NZVC);
                 cris_update_cc_op(dc, CC_OP_FLAGS, 4);
-                gen_helper_btst(cpu_PR[PR_CCS], cpu_R[dc->dst],
+                gen_helper_btst(cpu_PR[PR_CCS], cpu_env, cpu_R[dc->dst],
                            cpu_R[dc->src], cpu_PR[PR_CCS]);
                 break;
             case CRISV10_REG_DSTEP:
@@ -752,7 +752,8 @@
     return insn_len;
 }
 
-static unsigned int dec10_ind_move_m_r(DisasContext *dc, unsigned int size)
+static unsigned int dec10_ind_move_m_r(CPUCRISState *env, DisasContext *dc,
+                                       unsigned int size)
 {
     unsigned int insn_len = 2;
     TCGv t;
@@ -762,7 +763,7 @@
 
     cris_cc_mask(dc, CC_MASK_NZVC);
     t = tcg_temp_new();
-    insn_len += dec10_prep_move_m(dc, 0, size, t);
+    insn_len += dec10_prep_move_m(env, dc, 0, size, t);
     cris_alu(dc, CC_OP_MOVE, cpu_R[dc->dst], cpu_R[dc->dst], t, size);
     if (dc->dst == 15) {
         tcg_gen_mov_tl(env_btarget, cpu_R[dc->dst]);
@@ -789,7 +790,7 @@
     return insn_len;
 }
 
-static unsigned int dec10_ind_move_m_pr(DisasContext *dc)
+static unsigned int dec10_ind_move_m_pr(CPUCRISState *env, DisasContext *dc)
 {
     unsigned int insn_len = 2, rd = dc->dst;
     TCGv t, addr;
@@ -799,7 +800,7 @@
 
     addr = tcg_temp_new();
     t = tcg_temp_new();
-    insn_len += dec10_prep_move_m(dc, 0, 4, t);
+    insn_len += dec10_prep_move_m(env, dc, 0, 4, t);
     if (rd == 15) {
         tcg_gen_mov_tl(env_btarget, t);
         cris_prepare_jmp(dc, JMP_INDIRECT);
@@ -899,14 +900,15 @@
     tcg_temp_free(t0);
 }
 
-static int dec10_ind_alu(DisasContext *dc, int op, unsigned int size)
+static int dec10_ind_alu(CPUCRISState *env, DisasContext *dc,
+                         int op, unsigned int size)
 {
     int insn_len = 0;
     int rd = dc->dst;
     TCGv t[2];
 
     cris_alu_m_alloc_temps(t);
-    insn_len += dec10_prep_move_m(dc, 0, size, t[0]);
+    insn_len += dec10_prep_move_m(env, dc, 0, size, t[0]);
     cris_alu(dc, op, cpu_R[dc->dst], cpu_R[rd], t[0], size);
     if (dc->dst == 15) {
         tcg_gen_mov_tl(env_btarget, cpu_R[dc->dst]);
@@ -920,14 +922,15 @@
     return insn_len;
 }
 
-static int dec10_ind_bound(DisasContext *dc, unsigned int size)
+static int dec10_ind_bound(CPUCRISState *env, DisasContext *dc,
+                           unsigned int size)
 {
     int insn_len = 0;
     int rd = dc->dst;
     TCGv t;
 
     t = tcg_temp_local_new();
-    insn_len += dec10_prep_move_m(dc, 0, size, t);
+    insn_len += dec10_prep_move_m(env, dc, 0, size, t);
     cris_alu(dc, CC_OP_BOUND, cpu_R[dc->dst], cpu_R[rd], t, 4);
     if (dc->dst == 15) {
         tcg_gen_mov_tl(env_btarget, cpu_R[dc->dst]);
@@ -940,7 +943,7 @@
     return insn_len;
 }
 
-static int dec10_alux_m(DisasContext *dc, int op)
+static int dec10_alux_m(CPUCRISState *env, DisasContext *dc, int op)
 {
     unsigned int size = (dc->size & 1) ? 2 : 1;
     unsigned int sx = !!(dc->size & 2);
@@ -953,7 +956,7 @@
     t = tcg_temp_new();
 
     cris_cc_mask(dc, CC_MASK_NZVC);
-    insn_len += dec10_prep_move_m(dc, sx, size, t);
+    insn_len += dec10_prep_move_m(env, dc, sx, size, t);
     cris_alu(dc, op, cpu_R[dc->dst], cpu_R[rd], t, 4);
     if (dc->dst == 15) {
         tcg_gen_mov_tl(env_btarget, cpu_R[dc->dst]);
@@ -966,7 +969,7 @@
     return insn_len;
 }
 
-static int dec10_dip(DisasContext *dc)
+static int dec10_dip(CPUCRISState *env, DisasContext *dc)
 {
     int insn_len = 2;
     uint32_t imm;
@@ -974,7 +977,7 @@
     LOG_DIS("dip pc=%x opcode=%d r%d r%d\n",
               dc->pc, dc->opcode, dc->src, dc->dst);
     if (dc->src == 15) {
-        imm = ldl_code(dc->pc + 2);
+        imm = cpu_ldl_code(env, dc->pc + 2);
         tcg_gen_movi_tl(cpu_PR[PR_PREFIX], imm);
         if (dc->postinc)
             insn_len += 4;
@@ -989,7 +992,7 @@
     return insn_len;
 }
 
-static int dec10_bdap_m(DisasContext *dc, int size)
+static int dec10_bdap_m(CPUCRISState *env, DisasContext *dc, int size)
 {
     int insn_len = 2;
     int rd = dc->dst;
@@ -1014,13 +1017,13 @@
     }
 #endif
     /* Now the rest of the modes are truly indirect.  */
-    insn_len += dec10_prep_move_m(dc, 1, size, cpu_PR[PR_PREFIX]);
+    insn_len += dec10_prep_move_m(env, dc, 1, size, cpu_PR[PR_PREFIX]);
     tcg_gen_add_tl(cpu_PR[PR_PREFIX], cpu_PR[PR_PREFIX], cpu_R[rd]);
     cris_set_prefix(dc);
     return insn_len;
 }
 
-static unsigned int dec10_ind(DisasContext *dc)
+static unsigned int dec10_ind(CPUCRISState *env, DisasContext *dc)
 {
     unsigned int insn_len = 2;
     unsigned int size = dec10_size(dc->size);
@@ -1031,7 +1034,7 @@
     if (dc->size != 3) {
         switch (dc->opcode) {
             case CRISV10_IND_MOVE_M_R:
-                return dec10_ind_move_m_r(dc, size);
+                return dec10_ind_move_m_r(env, dc, size);
                 break;
             case CRISV10_IND_MOVE_R_M:
                 return dec10_ind_move_r_m(dc, size);
@@ -1039,7 +1042,7 @@
             case CRISV10_IND_CMP:
                 LOG_DIS("cmp size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_alu(dc, CC_OP_CMP, size);
+                insn_len += dec10_ind_alu(env, dc, CC_OP_CMP, size);
                 break;
             case CRISV10_IND_TEST:
                 LOG_DIS("test size=%d op=%d %d\n",  size, dc->src, dc->dst);
@@ -1047,7 +1050,7 @@
                 cris_evaluate_flags(dc);
                 cris_cc_mask(dc, CC_MASK_NZVC);
                 cris_alu_m_alloc_temps(t);
-                insn_len += dec10_prep_move_m(dc, 0, size, t[0]);
+                insn_len += dec10_prep_move_m(env, dc, 0, size, t[0]);
                 tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~3);
                 cris_alu(dc, CC_OP_CMP, cpu_R[dc->dst],
                          t[0], tcg_const_tl(0), size);
@@ -1056,39 +1059,39 @@
             case CRISV10_IND_ADD:
                 LOG_DIS("add size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_alu(dc, CC_OP_ADD, size);
+                insn_len += dec10_ind_alu(env, dc, CC_OP_ADD, size);
                 break;
             case CRISV10_IND_SUB:
                 LOG_DIS("sub size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_alu(dc, CC_OP_SUB, size);
+                insn_len += dec10_ind_alu(env, dc, CC_OP_SUB, size);
                 break;
             case CRISV10_IND_BOUND:
                 LOG_DIS("bound size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_bound(dc, size);
+                insn_len += dec10_ind_bound(env, dc, size);
                 break;
             case CRISV10_IND_AND:
                 LOG_DIS("and size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_alu(dc, CC_OP_AND, size);
+                insn_len += dec10_ind_alu(env, dc, CC_OP_AND, size);
                 break;
             case CRISV10_IND_OR:
                 LOG_DIS("or size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_alu(dc, CC_OP_OR, size);
+                insn_len += dec10_ind_alu(env, dc, CC_OP_OR, size);
                 break;
             case CRISV10_IND_MOVX:
-                insn_len = dec10_alux_m(dc, CC_OP_MOVE);
+                insn_len = dec10_alux_m(env, dc, CC_OP_MOVE);
                 break;
             case CRISV10_IND_ADDX:
-                insn_len = dec10_alux_m(dc, CC_OP_ADD);
+                insn_len = dec10_alux_m(env, dc, CC_OP_ADD);
                 break;
             case CRISV10_IND_SUBX:
-                insn_len = dec10_alux_m(dc, CC_OP_SUB);
+                insn_len = dec10_alux_m(env, dc, CC_OP_SUB);
                 break;
             case CRISV10_IND_CMPX:
-                insn_len = dec10_alux_m(dc, CC_OP_CMP);
+                insn_len = dec10_alux_m(env, dc, CC_OP_CMP);
                 break;
             case CRISV10_IND_MUL:
                 /* This is a reg insn coded in the mem indir space.  */
@@ -1097,7 +1100,7 @@
                 dec10_reg_mul(dc, size, dc->ir & (1 << 10));
                 break;
             case CRISV10_IND_BDAP_M:
-                insn_len = dec10_bdap_m(dc, size);
+                insn_len = dec10_bdap_m(env, dc, size);
                 break;
             default:
                 LOG_DIS("pc=%x var-ind.%d %d r%d r%d\n",
@@ -1110,7 +1113,7 @@
 
     switch (dc->opcode) {
         case CRISV10_IND_MOVE_M_SPR:
-            insn_len = dec10_ind_move_m_pr(dc);
+            insn_len = dec10_ind_move_m_pr(env, dc);
             break;
         case CRISV10_IND_MOVE_SPR_M:
             insn_len = dec10_ind_move_pr_m(dc);
@@ -1119,7 +1122,7 @@
             if (dc->src == 15) {
                 LOG_DIS("jump.%d %d r%d r%d direct\n", size,
                          dc->opcode, dc->src, dc->dst);
-                imm = ldl_code(dc->pc + 2);
+                imm = cpu_ldl_code(env, dc->pc + 2);
                 if (dc->mode == CRISV10_MODE_AUTOINC)
                     insn_len += size;
 
@@ -1168,24 +1171,24 @@
             dc->delayed_branch--; /* v10 has no dslot here.  */
             break;
         case CRISV10_IND_MOVX:
-            insn_len = dec10_alux_m(dc, CC_OP_MOVE);
+            insn_len = dec10_alux_m(env, dc, CC_OP_MOVE);
             break;
         case CRISV10_IND_ADDX:
-            insn_len = dec10_alux_m(dc, CC_OP_ADD);
+            insn_len = dec10_alux_m(env, dc, CC_OP_ADD);
             break;
         case CRISV10_IND_SUBX:
-            insn_len = dec10_alux_m(dc, CC_OP_SUB);
+            insn_len = dec10_alux_m(env, dc, CC_OP_SUB);
             break;
         case CRISV10_IND_CMPX:
-            insn_len = dec10_alux_m(dc, CC_OP_CMP);
+            insn_len = dec10_alux_m(env, dc, CC_OP_CMP);
             break;
         case CRISV10_IND_DIP:
-            insn_len = dec10_dip(dc);
+            insn_len = dec10_dip(env, dc);
             break;
         case CRISV10_IND_BCC_M:
 
             cris_cc_mask(dc, 0);
-            imm = ldsw_code(dc->pc + 2);
+            imm = cpu_ldsw_code(env, dc->pc + 2);
             simm = (int16_t)imm;
             simm += 4;
 
@@ -1202,7 +1205,7 @@
     return insn_len;
 }
 
-static unsigned int crisv10_decoder(DisasContext *dc)
+static unsigned int crisv10_decoder(CPUCRISState *env, DisasContext *dc)
 {
     unsigned int insn_len = 2;
 
@@ -1210,7 +1213,7 @@
         tcg_gen_debug_insn_start(dc->pc);
 
     /* Load a halfword onto the instruction register.  */
-    dc->ir = lduw_code(dc->pc);
+    dc->ir = cpu_lduw_code(env, dc->pc);
 
     /* Now decode it.  */
     dc->opcode   = EXTRACT_FIELD(dc->ir, 6, 9);
@@ -1235,7 +1238,7 @@
             break;
         case CRISV10_MODE_AUTOINC:
         case CRISV10_MODE_INDIRECT:
-            insn_len = dec10_ind(dc);
+            insn_len = dec10_ind(env, dc);
             break;
     }
 
diff --git a/target-i386/cc_helper.c b/target-i386/cc_helper.c
index 07892f9..9422003 100644
--- a/target-i386/cc_helper.c
+++ b/target-i386/cc_helper.c
@@ -353,6 +353,16 @@
     env->eflags |= IF_MASK;
 }
 
+void helper_clac(CPUX86State *env)
+{
+    env->eflags &= ~AC_MASK;
+}
+
+void helper_stac(CPUX86State *env)
+{
+    env->eflags |= AC_MASK;
+}
+
 #if 0
 /* vm86plus instructions */
 void helper_cli_vm(CPUX86State *env)
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 423e009..f3708e6 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -61,15 +61,19 @@
     "tsc-deadline", "aes", "xsave", "osxsave",
     "avx", NULL, NULL, "hypervisor",
 };
+/* Feature names that are already defined on feature_name[] but are set on
+ * CPUID[8000_0001].EDX on AMD CPUs don't have their names on
+ * ext2_feature_name[]. They are copied automatically to cpuid_ext2_features
+ * if and only if CPU vendor is AMD.
+ */
 static const char *ext2_feature_name[] = {
-    "fpu", "vme", "de", "pse",
-    "tsc", "msr", "pae", "mce",
-    "cx8" /* AMD CMPXCHG8B */, "apic", NULL, "syscall",
-    "mtrr", "pge", "mca", "cmov",
-    "pat", "pse36", NULL, NULL /* Linux mp */,
-    "nx|xd", NULL, "mmxext", "mmx",
-    "fxsr", "fxsr_opt|ffxsr", "pdpe1gb" /* AMD Page1GB */, "rdtscp",
-    NULL, "lm|i64", "3dnowext", "3dnow",
+    NULL /* fpu */, NULL /* vme */, NULL /* de */, NULL /* pse */,
+    NULL /* tsc */, NULL /* msr */, NULL /* pae */, NULL /* mce */,
+    NULL /* cx8 */ /* AMD CMPXCHG8B */, NULL /* apic */, NULL, "syscall",
+    NULL /* mtrr */, NULL /* pge */, NULL /* mca */, NULL /* cmov */,
+    NULL /* pat */, NULL /* pse36 */, NULL, NULL /* Linux mp */,
+    "nx|xd", NULL, "mmxext", NULL /* mmx */,
+    NULL /* fxsr */, "fxsr_opt|ffxsr", "pdpe1gb" /* AMD Page1GB */, "rdtscp",
 };
 static const char *ext3_feature_name[] = {
     "lahf_lm" /* AMD LahfSahf */, "cmp_legacy", "svm", "extapic" /* AMD ExtApicSpace */,
@@ -100,6 +104,13 @@
     NULL, NULL, NULL, NULL,
 };
 
+static const char *cpuid_7_0_ebx_feature_name[] = {
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, "smep",
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, "smap", NULL, NULL, NULL,
+    NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+};
+
 /* collects per-function cpuid data
  */
 typedef struct model_features_t {
@@ -215,14 +226,17 @@
                                     uint32_t *ext2_features,
                                     uint32_t *ext3_features,
                                     uint32_t *kvm_features,
-                                    uint32_t *svm_features)
+                                    uint32_t *svm_features,
+                                    uint32_t *cpuid_7_0_ebx_features)
 {
     if (!lookup_feature(features, flagname, NULL, feature_name) &&
         !lookup_feature(ext_features, flagname, NULL, ext_feature_name) &&
         !lookup_feature(ext2_features, flagname, NULL, ext2_feature_name) &&
         !lookup_feature(ext3_features, flagname, NULL, ext3_feature_name) &&
         !lookup_feature(kvm_features, flagname, NULL, kvm_feature_name) &&
-        !lookup_feature(svm_features, flagname, NULL, svm_feature_name))
+        !lookup_feature(svm_features, flagname, NULL, svm_feature_name) &&
+        !lookup_feature(cpuid_7_0_ebx_features, flagname, NULL,
+                        cpuid_7_0_ebx_feature_name))
             fprintf(stderr, "CPU feature %s not found\n", flagname);
 }
 
@@ -240,7 +254,6 @@
     uint32_t xlevel;
     char model_id[48];
     int vendor_override;
-    uint32_t flags;
     /* Store the results of Centaur's CPUID instructions */
     uint32_t ext4_features;
     uint32_t xlevel2;
@@ -259,7 +272,6 @@
           CPUID_MSR | CPUID_MCE | CPUID_CX8 | CPUID_PGE | CPUID_CMOV | \
           CPUID_PAT | CPUID_FXSR | CPUID_MMX | CPUID_SSE | CPUID_SSE2 | \
           CPUID_PAE | CPUID_SEP | CPUID_APIC)
-#define EXT2_FEATURE_MASK 0x0183F3FF
 
 #define TCG_FEATURES (CPUID_FP87 | CPUID_PSE | CPUID_TSC | CPUID_MSR | \
           CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | CPUID_SEP | \
@@ -277,7 +289,7 @@
           /* missing:
           CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_EST,
           CPUID_EXT_TM2, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_XSAVE */
-#define TCG_EXT2_FEATURES ((TCG_FEATURES & EXT2_FEATURE_MASK) | \
+#define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \
           CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \
           CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT)
           /* missing:
@@ -285,6 +297,7 @@
 #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \
           CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A)
 #define TCG_SVM_FEATURES 0
+#define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP)
 
 /* maintains list of cpu model definitions
  */
@@ -306,7 +319,7 @@
             CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA |
             CPUID_PSE36,
         .ext_features = CPUID_EXT_SSE3 | CPUID_EXT_CX16 | CPUID_EXT_POPCNT,
-        .ext2_features = (PPRO_FEATURES & EXT2_FEATURE_MASK) |
+        .ext2_features = (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
         .ext3_features = CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM |
             CPUID_EXT3_ABM | CPUID_EXT3_SSE4A,
@@ -326,7 +339,7 @@
             CPUID_PSE36 | CPUID_VME | CPUID_HT,
         .ext_features = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | CPUID_EXT_CX16 |
             CPUID_EXT_POPCNT,
-        .ext2_features = (PPRO_FEATURES & EXT2_FEATURE_MASK) |
+        .ext2_features = (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX |
             CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_MMXEXT |
             CPUID_EXT2_FFXSR | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP,
@@ -374,7 +387,7 @@
         /* Missing: CPUID_EXT_POPCNT, CPUID_EXT_MONITOR */
         .ext_features = CPUID_EXT_SSE3 | CPUID_EXT_CX16,
         /* Missing: CPUID_EXT2_PDPE1GB, CPUID_EXT2_RDTSCP */
-        .ext2_features = (PPRO_FEATURES & EXT2_FEATURE_MASK) |
+        .ext2_features = (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
             CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
         /* Missing: CPUID_EXT3_LAHF_LM, CPUID_EXT3_CMP_LEG, CPUID_EXT3_EXTAPIC,
                     CPUID_EXT3_CR8LEG, CPUID_EXT3_ABM, CPUID_EXT3_SSE4A,
@@ -403,7 +416,7 @@
         .features = PPRO_FEATURES |
             CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | CPUID_PSE36,
         .ext_features = CPUID_EXT_SSE3,
-        .ext2_features = PPRO_FEATURES & EXT2_FEATURE_MASK,
+        .ext2_features = PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES,
         .ext3_features = 0,
         .xlevel = 0x80000008,
         .model_id = "Common 32-bit KVM processor"
@@ -468,8 +481,10 @@
         .family = 6,
         .model = 2,
         .stepping = 3,
-        .features = PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR | CPUID_MCA,
-        .ext2_features = (PPRO_FEATURES & EXT2_FEATURE_MASK) | CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
+        .features = PPRO_FEATURES | CPUID_PSE36 | CPUID_VME | CPUID_MTRR |
+            CPUID_MCA,
+        .ext2_features = (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
+            CPUID_EXT2_MMXEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT,
         .xlevel = 0x80000008,
     },
     {
@@ -485,11 +500,231 @@
             /* Some CPUs got no CPUID_SEP */
         .ext_features = CPUID_EXT_SSE3 | CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 |
             CPUID_EXT_DSCPL | CPUID_EXT_EST | CPUID_EXT_TM2 | CPUID_EXT_XTPR,
-        .ext2_features = (PPRO_FEATURES & EXT2_FEATURE_MASK) | CPUID_EXT2_NX,
+        .ext2_features = (PPRO_FEATURES & CPUID_EXT2_AMD_ALIASES) |
+            CPUID_EXT2_NX,
         .ext3_features = CPUID_EXT3_LAHF_LM,
         .xlevel = 0x8000000A,
         .model_id = "Intel(R) Atom(TM) CPU N270   @ 1.60GHz",
     },
+    {
+        .name = "Conroe",
+        .level = 2,
+        .vendor1 = CPUID_VENDOR_INTEL_1,
+        .vendor2 = CPUID_VENDOR_INTEL_2,
+        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .family = 6,
+        .model = 2,
+        .stepping = 3,
+        .features = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+             CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+             CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+             CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+             CPUID_DE | CPUID_FP87,
+        .ext_features = CPUID_EXT_SSSE3 | CPUID_EXT_SSE3,
+        .ext2_features = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL,
+        .ext3_features = CPUID_EXT3_LAHF_LM,
+        .xlevel = 0x8000000A,
+        .model_id = "Intel Celeron_4x0 (Conroe/Merom Class Core 2)",
+    },
+    {
+        .name = "Penryn",
+        .level = 2,
+        .vendor1 = CPUID_VENDOR_INTEL_1,
+        .vendor2 = CPUID_VENDOR_INTEL_2,
+        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .family = 6,
+        .model = 2,
+        .stepping = 3,
+        .features = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+             CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+             CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+             CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+             CPUID_DE | CPUID_FP87,
+        .ext_features = CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
+             CPUID_EXT_SSE3,
+        .ext2_features = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL,
+        .ext3_features = CPUID_EXT3_LAHF_LM,
+        .xlevel = 0x8000000A,
+        .model_id = "Intel Core 2 Duo P9xxx (Penryn Class Core 2)",
+    },
+    {
+        .name = "Nehalem",
+        .level = 2,
+        .vendor1 = CPUID_VENDOR_INTEL_1,
+        .vendor2 = CPUID_VENDOR_INTEL_2,
+        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .family = 6,
+        .model = 2,
+        .stepping = 3,
+        .features = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+             CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+             CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+             CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+             CPUID_DE | CPUID_FP87,
+        .ext_features = CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
+             CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_SSE3,
+        .ext2_features = CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
+        .ext3_features = CPUID_EXT3_LAHF_LM,
+        .xlevel = 0x8000000A,
+        .model_id = "Intel Core i7 9xx (Nehalem Class Core i7)",
+    },
+    {
+        .name = "Westmere",
+        .level = 11,
+        .vendor1 = CPUID_VENDOR_INTEL_1,
+        .vendor2 = CPUID_VENDOR_INTEL_2,
+        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .family = 6,
+        .model = 44,
+        .stepping = 1,
+        .features = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+             CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+             CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+             CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+             CPUID_DE | CPUID_FP87,
+        .ext_features = CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 |
+             CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 |
+             CPUID_EXT_SSE3,
+        .ext2_features = CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX,
+        .ext3_features = CPUID_EXT3_LAHF_LM,
+        .xlevel = 0x8000000A,
+        .model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)",
+    },
+    {
+        .name = "SandyBridge",
+        .level = 0xd,
+        .vendor1 = CPUID_VENDOR_INTEL_1,
+        .vendor2 = CPUID_VENDOR_INTEL_2,
+        .vendor3 = CPUID_VENDOR_INTEL_3,
+        .family = 6,
+        .model = 42,
+        .stepping = 1,
+        .features = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+             CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+             CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+             CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+             CPUID_DE | CPUID_FP87,
+        .ext_features = CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES |
+             CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_POPCNT |
+             CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
+             CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ |
+             CPUID_EXT_SSE3,
+        .ext2_features = CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX |
+             CPUID_EXT2_SYSCALL,
+        .ext3_features = CPUID_EXT3_LAHF_LM,
+        .xlevel = 0x8000000A,
+        .model_id = "Intel Xeon E312xx (Sandy Bridge)",
+    },
+    {
+        .name = "Opteron_G1",
+        .level = 5,
+        .vendor1 = CPUID_VENDOR_AMD_1,
+        .vendor2 = CPUID_VENDOR_AMD_2,
+        .vendor3 = CPUID_VENDOR_AMD_3,
+        .family = 15,
+        .model = 6,
+        .stepping = 1,
+        .features = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+             CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+             CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+             CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+             CPUID_DE | CPUID_FP87,
+        .ext_features = CPUID_EXT_SSE3,
+        .ext2_features = CPUID_EXT2_LM | CPUID_EXT2_FXSR | CPUID_EXT2_MMX |
+             CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT |
+             CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE |
+             CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | CPUID_EXT2_APIC |
+             CPUID_EXT2_CX8 | CPUID_EXT2_MCE | CPUID_EXT2_PAE | CPUID_EXT2_MSR |
+             CPUID_EXT2_TSC | CPUID_EXT2_PSE | CPUID_EXT2_DE | CPUID_EXT2_FPU,
+        .xlevel = 0x80000008,
+        .model_id = "AMD Opteron 240 (Gen 1 Class Opteron)",
+    },
+    {
+        .name = "Opteron_G2",
+        .level = 5,
+        .vendor1 = CPUID_VENDOR_AMD_1,
+        .vendor2 = CPUID_VENDOR_AMD_2,
+        .vendor3 = CPUID_VENDOR_AMD_3,
+        .family = 15,
+        .model = 6,
+        .stepping = 1,
+        .features = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+             CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+             CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+             CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+             CPUID_DE | CPUID_FP87,
+        .ext_features = CPUID_EXT_CX16 | CPUID_EXT_SSE3,
+        .ext2_features = CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_FXSR |
+             CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 |
+             CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA |
+             CPUID_EXT2_PGE | CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL |
+             CPUID_EXT2_APIC | CPUID_EXT2_CX8 | CPUID_EXT2_MCE |
+             CPUID_EXT2_PAE | CPUID_EXT2_MSR | CPUID_EXT2_TSC | CPUID_EXT2_PSE |
+             CPUID_EXT2_DE | CPUID_EXT2_FPU,
+        .ext3_features = CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
+        .xlevel = 0x80000008,
+        .model_id = "AMD Opteron 22xx (Gen 2 Class Opteron)",
+    },
+    {
+        .name = "Opteron_G3",
+        .level = 5,
+        .vendor1 = CPUID_VENDOR_AMD_1,
+        .vendor2 = CPUID_VENDOR_AMD_2,
+        .vendor3 = CPUID_VENDOR_AMD_3,
+        .family = 15,
+        .model = 6,
+        .stepping = 1,
+        .features = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+             CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+             CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+             CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+             CPUID_DE | CPUID_FP87,
+        .ext_features = CPUID_EXT_POPCNT | CPUID_EXT_CX16 | CPUID_EXT_MONITOR |
+             CPUID_EXT_SSE3,
+        .ext2_features = CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_FXSR |
+             CPUID_EXT2_MMX | CPUID_EXT2_NX | CPUID_EXT2_PSE36 |
+             CPUID_EXT2_PAT | CPUID_EXT2_CMOV | CPUID_EXT2_MCA |
+             CPUID_EXT2_PGE | CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL |
+             CPUID_EXT2_APIC | CPUID_EXT2_CX8 | CPUID_EXT2_MCE |
+             CPUID_EXT2_PAE | CPUID_EXT2_MSR | CPUID_EXT2_TSC | CPUID_EXT2_PSE |
+             CPUID_EXT2_DE | CPUID_EXT2_FPU,
+        .ext3_features = CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A |
+             CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM,
+        .xlevel = 0x80000008,
+        .model_id = "AMD Opteron 23xx (Gen 3 Class Opteron)",
+    },
+    {
+        .name = "Opteron_G4",
+        .level = 0xd,
+        .vendor1 = CPUID_VENDOR_AMD_1,
+        .vendor2 = CPUID_VENDOR_AMD_2,
+        .vendor3 = CPUID_VENDOR_AMD_3,
+        .family = 21,
+        .model = 1,
+        .stepping = 2,
+        .features = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX |
+             CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA |
+             CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 |
+             CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE |
+             CPUID_DE | CPUID_FP87,
+        .ext_features = CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES |
+             CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 |
+             CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ |
+             CPUID_EXT_SSE3,
+        .ext2_features = CPUID_EXT2_LM | CPUID_EXT2_RDTSCP |
+             CPUID_EXT2_PDPE1GB | CPUID_EXT2_FXSR | CPUID_EXT2_MMX |
+             CPUID_EXT2_NX | CPUID_EXT2_PSE36 | CPUID_EXT2_PAT |
+             CPUID_EXT2_CMOV | CPUID_EXT2_MCA | CPUID_EXT2_PGE |
+             CPUID_EXT2_MTRR | CPUID_EXT2_SYSCALL | CPUID_EXT2_APIC |
+             CPUID_EXT2_CX8 | CPUID_EXT2_MCE | CPUID_EXT2_PAE | CPUID_EXT2_MSR |
+             CPUID_EXT2_TSC | CPUID_EXT2_PSE | CPUID_EXT2_DE | CPUID_EXT2_FPU,
+        .ext3_features = CPUID_EXT3_FMA4 | CPUID_EXT3_XOP |
+             CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE |
+             CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM |
+             CPUID_EXT3_LAHF_LM,
+        .xlevel = 0x8000001A,
+        .model_id = "AMD Opteron 62xx class CPU",
+    },
 };
 
 static int cpu_x86_fill_model_id(char *str)
@@ -846,7 +1081,7 @@
 {
     X86CPU *cpu = X86_CPU(obj);
     const int64_t min = 0;
-    const int64_t max = INT_MAX;
+    const int64_t max = INT64_MAX;
     int64_t value;
 
     visit_type_int(v, &value, name, errp);
@@ -873,10 +1108,12 @@
     uint32_t plus_features = 0, plus_ext_features = 0;
     uint32_t plus_ext2_features = 0, plus_ext3_features = 0;
     uint32_t plus_kvm_features = 0, plus_svm_features = 0;
+    uint32_t plus_7_0_ebx_features = 0;
     /* Features to be removed */
     uint32_t minus_features = 0, minus_ext_features = 0;
     uint32_t minus_ext2_features = 0, minus_ext3_features = 0;
     uint32_t minus_kvm_features = 0, minus_svm_features = 0;
+    uint32_t minus_7_0_ebx_features = 0;
     uint32_t numvalue;
 
     for (def = x86_defs; def; def = def->next)
@@ -903,8 +1140,8 @@
 #endif
 
     add_flagname_to_bitmaps("hypervisor", &plus_features,
-        &plus_ext_features, &plus_ext2_features, &plus_ext3_features,
-        &plus_kvm_features, &plus_svm_features);
+            &plus_ext_features, &plus_ext2_features, &plus_ext3_features,
+            &plus_kvm_features, &plus_svm_features,  &plus_7_0_ebx_features);
 
     featurestr = strtok(NULL, ",");
 
@@ -914,12 +1151,12 @@
             add_flagname_to_bitmaps(featurestr + 1, &plus_features,
                             &plus_ext_features, &plus_ext2_features,
                             &plus_ext3_features, &plus_kvm_features,
-                            &plus_svm_features);
+                            &plus_svm_features, &plus_7_0_ebx_features);
         } else if (featurestr[0] == '-') {
             add_flagname_to_bitmaps(featurestr + 1, &minus_features,
                             &minus_ext_features, &minus_ext2_features,
                             &minus_ext3_features, &minus_kvm_features,
-                            &minus_svm_features);
+                            &minus_svm_features, &minus_7_0_ebx_features);
         } else if ((val = strchr(featurestr, '='))) {
             *val = 0; val++;
             if (!strcmp(featurestr, "family")) {
@@ -1025,16 +1262,21 @@
     x86_cpu_def->ext3_features |= plus_ext3_features;
     x86_cpu_def->kvm_features |= plus_kvm_features;
     x86_cpu_def->svm_features |= plus_svm_features;
+    x86_cpu_def->cpuid_7_0_ebx_features |= plus_7_0_ebx_features;
     x86_cpu_def->features &= ~minus_features;
     x86_cpu_def->ext_features &= ~minus_ext_features;
     x86_cpu_def->ext2_features &= ~minus_ext2_features;
     x86_cpu_def->ext3_features &= ~minus_ext3_features;
     x86_cpu_def->kvm_features &= ~minus_kvm_features;
     x86_cpu_def->svm_features &= ~minus_svm_features;
+    x86_cpu_def->cpuid_7_0_ebx_features &= ~minus_7_0_ebx_features;
     if (check_cpuid) {
         if (check_features_against_host(x86_cpu_def) && enforce_cpuid)
             goto error;
     }
+    if (x86_cpu_def->cpuid_7_0_ebx_features && x86_cpu_def->level < 7) {
+        x86_cpu_def->level = 7;
+    }
     g_free(s);
     return 0;
 
@@ -1073,70 +1315,28 @@
         }
 }
 
-/* generate CPU information:
- * -?        list model names
- * -?model   list model names/IDs
- * -?dump    output all model (x86_def_t) data
- * -?cpuid   list all recognized cpuid flag names
- */
-void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf, const char *optarg)
+/* generate CPU information. */
+void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf)
 {
-    unsigned char model = !strcmp("?model", optarg);
-    unsigned char dump = !strcmp("?dump", optarg);
-    unsigned char cpuid = !strcmp("?cpuid", optarg);
     x86_def_t *def;
     char buf[256];
 
-    if (cpuid) {
-        (*cpu_fprintf)(f, "Recognized CPUID flags:\n");
-        listflags(buf, sizeof (buf), (uint32_t)~0, feature_name, 1);
-        (*cpu_fprintf)(f, "  f_edx: %s\n", buf);
-        listflags(buf, sizeof (buf), (uint32_t)~0, ext_feature_name, 1);
-        (*cpu_fprintf)(f, "  f_ecx: %s\n", buf);
-        listflags(buf, sizeof (buf), (uint32_t)~0, ext2_feature_name, 1);
-        (*cpu_fprintf)(f, "  extf_edx: %s\n", buf);
-        listflags(buf, sizeof (buf), (uint32_t)~0, ext3_feature_name, 1);
-        (*cpu_fprintf)(f, "  extf_ecx: %s\n", buf);
-        return;
-    }
     for (def = x86_defs; def; def = def->next) {
-        snprintf(buf, sizeof (buf), def->flags ? "[%s]": "%s", def->name);
-        if (model || dump) {
-            (*cpu_fprintf)(f, "x86 %16s  %-48s\n", buf, def->model_id);
-        } else {
-            (*cpu_fprintf)(f, "x86 %16s\n", buf);
-        }
-        if (dump) {
-            memcpy(buf, &def->vendor1, sizeof (def->vendor1));
-            memcpy(buf + 4, &def->vendor2, sizeof (def->vendor2));
-            memcpy(buf + 8, &def->vendor3, sizeof (def->vendor3));
-            buf[12] = '\0';
-            (*cpu_fprintf)(f,
-                "  family %d model %d stepping %d level %d xlevel 0x%x"
-                " vendor \"%s\"\n",
-                def->family, def->model, def->stepping, def->level,
-                def->xlevel, buf);
-            listflags(buf, sizeof (buf), def->features, feature_name, 0);
-            (*cpu_fprintf)(f, "  feature_edx %08x (%s)\n", def->features,
-                buf);
-            listflags(buf, sizeof (buf), def->ext_features, ext_feature_name,
-                0);
-            (*cpu_fprintf)(f, "  feature_ecx %08x (%s)\n", def->ext_features,
-                buf);
-            listflags(buf, sizeof (buf), def->ext2_features, ext2_feature_name,
-                0);
-            (*cpu_fprintf)(f, "  extfeature_edx %08x (%s)\n",
-                def->ext2_features, buf);
-            listflags(buf, sizeof (buf), def->ext3_features, ext3_feature_name,
-                0);
-            (*cpu_fprintf)(f, "  extfeature_ecx %08x (%s)\n",
-                def->ext3_features, buf);
-            (*cpu_fprintf)(f, "\n");
-        }
+        snprintf(buf, sizeof(buf), "%s", def->name);
+        (*cpu_fprintf)(f, "x86 %16s  %-48s\n", buf, def->model_id);
     }
     if (kvm_enabled()) {
         (*cpu_fprintf)(f, "x86 %16s\n", "[host]");
     }
+    (*cpu_fprintf)(f, "\nRecognized CPUID flags:\n");
+    listflags(buf, sizeof(buf), (uint32_t)~0, feature_name, 1);
+    (*cpu_fprintf)(f, "  %s\n", buf);
+    listflags(buf, sizeof(buf), (uint32_t)~0, ext_feature_name, 1);
+    (*cpu_fprintf)(f, "  %s\n", buf);
+    listflags(buf, sizeof(buf), (uint32_t)~0, ext2_feature_name, 1);
+    (*cpu_fprintf)(f, "  %s\n", buf);
+    listflags(buf, sizeof(buf), (uint32_t)~0, ext3_feature_name, 1);
+    (*cpu_fprintf)(f, "  %s\n", buf);
 }
 
 CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp)
@@ -1192,10 +1392,21 @@
     env->cpuid_kvm_features = def->kvm_features;
     env->cpuid_svm_features = def->svm_features;
     env->cpuid_ext4_features = def->ext4_features;
-    env->cpuid_7_0_ebx = def->cpuid_7_0_ebx_features;
+    env->cpuid_7_0_ebx_features = def->cpuid_7_0_ebx_features;
     env->cpuid_xlevel2 = def->xlevel2;
     object_property_set_int(OBJECT(cpu), (int64_t)def->tsc_khz * 1000,
                             "tsc-frequency", &error);
+
+    /* On AMD CPUs, some CPUID[8000_0001].EDX bits must match the bits on
+     * CPUID[1].EDX.
+     */
+    if (env->cpuid_vendor1 == CPUID_VENDOR_AMD_1 &&
+            env->cpuid_vendor2 == CPUID_VENDOR_AMD_2 &&
+            env->cpuid_vendor3 == CPUID_VENDOR_AMD_3) {
+        env->cpuid_ext2_features &= ~CPUID_EXT2_AMD_ALIASES;
+        env->cpuid_ext2_features |= (def->features & CPUID_EXT2_AMD_ALIASES);
+    }
+
     if (!kvm_enabled()) {
         env->cpuid_features &= TCG_FEATURES;
         env->cpuid_ext_features &= TCG_EXT_FEATURES;
@@ -1216,109 +1427,6 @@
 }
 
 #if !defined(CONFIG_USER_ONLY)
-/* copy vendor id string to 32 bit register, nul pad as needed
- */
-static void cpyid(const char *s, uint32_t *id)
-{
-    char *d = (char *)id;
-    char i;
-
-    for (i = sizeof (*id); i--; )
-        *d++ = *s ? *s++ : '\0';
-}
-
-/* interpret radix and convert from string to arbitrary scalar,
- * otherwise flag failure
- */
-#define setscalar(pval, str, perr)                      \
-{                                                       \
-    char *pend;                                         \
-    unsigned long ul;                                   \
-                                                        \
-    ul = strtoul(str, &pend, 0);                        \
-    *str && !*pend ? (*pval = ul) : (*perr = 1);        \
-}
-
-/* map cpuid options to feature bits, otherwise return failure
- * (option tags in *str are delimited by whitespace)
- */
-static void setfeatures(uint32_t *pval, const char *str,
-    const char **featureset, int *perr)
-{
-    const char *p, *q;
-
-    for (q = p = str; *p || *q; q = p) {
-        while (iswhite(*p))
-            q = ++p;
-        while (*p && !iswhite(*p))
-            ++p;
-        if (!*q && !*p)
-            return;
-        if (!lookup_feature(pval, q, p, featureset)) {
-            fprintf(stderr, "error: feature \"%.*s\" not available in set\n",
-                (int)(p - q), q);
-            *perr = 1;
-            return;
-        }
-    }
-}
-
-/* map config file options to x86_def_t form
- */
-static int cpudef_setfield(const char *name, const char *str, void *opaque)
-{
-    x86_def_t *def = opaque;
-    int err = 0;
-
-    if (!strcmp(name, "name")) {
-        g_free((void *)def->name);
-        def->name = g_strdup(str);
-    } else if (!strcmp(name, "model_id")) {
-        strncpy(def->model_id, str, sizeof (def->model_id));
-    } else if (!strcmp(name, "level")) {
-        setscalar(&def->level, str, &err)
-    } else if (!strcmp(name, "vendor")) {
-        cpyid(&str[0], &def->vendor1);
-        cpyid(&str[4], &def->vendor2);
-        cpyid(&str[8], &def->vendor3);
-    } else if (!strcmp(name, "family")) {
-        setscalar(&def->family, str, &err)
-    } else if (!strcmp(name, "model")) {
-        setscalar(&def->model, str, &err)
-    } else if (!strcmp(name, "stepping")) {
-        setscalar(&def->stepping, str, &err)
-    } else if (!strcmp(name, "feature_edx")) {
-        setfeatures(&def->features, str, feature_name, &err);
-    } else if (!strcmp(name, "feature_ecx")) {
-        setfeatures(&def->ext_features, str, ext_feature_name, &err);
-    } else if (!strcmp(name, "extfeature_edx")) {
-        setfeatures(&def->ext2_features, str, ext2_feature_name, &err);
-    } else if (!strcmp(name, "extfeature_ecx")) {
-        setfeatures(&def->ext3_features, str, ext3_feature_name, &err);
-    } else if (!strcmp(name, "xlevel")) {
-        setscalar(&def->xlevel, str, &err)
-    } else {
-        fprintf(stderr, "error: unknown option [%s = %s]\n", name, str);
-        return (1);
-    }
-    if (err) {
-        fprintf(stderr, "error: bad option value [%s = %s]\n", name, str);
-        return (1);
-    }
-    return (0);
-}
-
-/* register config file entry as x86_def_t
- */
-static int cpudef_register(QemuOpts *opts, void *opaque)
-{
-    x86_def_t *def = g_malloc0(sizeof (x86_def_t));
-
-    qemu_opt_foreach(opts, cpudef_setfield, def, 1);
-    def->next = x86_defs;
-    x86_defs = def;
-    return (0);
-}
 
 void cpu_clear_apic_feature(CPUX86State *env)
 {
@@ -1327,8 +1435,7 @@
 
 #endif /* !CONFIG_USER_ONLY */
 
-/* register "cpudef" models defined in configuration file.  Here we first
- * preload any built-in definitions
+/* Initialize list of CPU models, filling some non-static fields if necessary
  */
 void x86_cpudef_setup(void)
 {
@@ -1336,24 +1443,23 @@
     static const char *model_with_versions[] = { "qemu32", "qemu64", "athlon" };
 
     for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); ++i) {
-        builtin_x86_defs[i].next = x86_defs;
-        builtin_x86_defs[i].flags = 1;
+        x86_def_t *def = &builtin_x86_defs[i];
+        def->next = x86_defs;
 
         /* Look for specific "cpudef" models that */
         /* have the QEMU version in .model_id */
         for (j = 0; j < ARRAY_SIZE(model_with_versions); j++) {
-            if (strcmp(model_with_versions[j], builtin_x86_defs[i].name) == 0) {
-                pstrcpy(builtin_x86_defs[i].model_id, sizeof(builtin_x86_defs[i].model_id), "QEMU Virtual CPU version ");
-                pstrcat(builtin_x86_defs[i].model_id, sizeof(builtin_x86_defs[i].model_id), qemu_get_version());
+            if (strcmp(model_with_versions[j], def->name) == 0) {
+                pstrcpy(def->model_id, sizeof(def->model_id),
+                        "QEMU Virtual CPU version ");
+                pstrcat(def->model_id, sizeof(def->model_id),
+                        qemu_get_version());
                 break;
             }
         }
 
-        x86_defs = &builtin_x86_defs[i];
+        x86_defs = def;
     }
-#if !defined(CONFIG_USER_ONLY)
-    qemu_opts_foreach(qemu_find_opts("cpudef"), cpudef_register, NULL, 0);
-#endif
 }
 
 static void get_cpuid_vendor(CPUX86State *env, uint32_t *ebx,
@@ -1474,7 +1580,7 @@
         /* Structured Extended Feature Flags Enumeration Leaf */
         if (count == 0) {
             *eax = 0; /* Maximum ECX value for sub-leaves */
-            *ebx = env->cpuid_7_0_ebx; /* Feature flags */
+            *ebx = env->cpuid_7_0_ebx_features; /* Feature flags */
             *ecx = 0; /* Reserved */
             *edx = 0; /* Reserved */
         } else {
@@ -1653,7 +1759,7 @@
 
     if (qemu_loglevel_mask(CPU_LOG_RESET)) {
         qemu_log("CPU Reset (CPU %d)\n", env->cpu_index);
-        log_cpu_state(env, X86_DUMP_FPU | X86_DUMP_CCOP);
+        log_cpu_state(env, CPU_DUMP_FPU | CPU_DUMP_CCOP);
     }
 
     xcc->parent_reset(s);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 3c57d8b..bc60cdb 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -123,8 +123,8 @@
 
 /* hidden flags - used internally by qemu to represent additional cpu
    states. Only the CPL, INHIBIT_IRQ, SMM and SVMI are not
-   redundant. We avoid using the IOPL_MASK, TF_MASK and VM_MASK bit
-   position to ease oring with eflags. */
+   redundant. We avoid using the IOPL_MASK, TF_MASK, VM_MASK and AC_MASK
+   bit positions to ease oring with eflags. */
 /* current cpl */
 #define HF_CPL_SHIFT         0
 /* true if soft mmu is being used */
@@ -147,10 +147,12 @@
 #define HF_CS64_SHIFT       15 /* only used on x86_64: 64 bit code segment  */
 #define HF_RF_SHIFT         16 /* must be same as eflags */
 #define HF_VM_SHIFT         17 /* must be same as eflags */
+#define HF_AC_SHIFT         18 /* must be same as eflags */
 #define HF_SMM_SHIFT        19 /* CPU in SMM mode */
 #define HF_SVME_SHIFT       20 /* SVME enabled (copy of EFER.SVME) */
 #define HF_SVMI_SHIFT       21 /* SVM intercepts are active */
 #define HF_OSFXSR_SHIFT     22 /* CR4.OSFXSR */
+#define HF_SMAP_SHIFT       23 /* CR4.SMAP */
 
 #define HF_CPL_MASK          (3 << HF_CPL_SHIFT)
 #define HF_SOFTMMU_MASK      (1 << HF_SOFTMMU_SHIFT)
@@ -168,10 +170,12 @@
 #define HF_CS64_MASK         (1 << HF_CS64_SHIFT)
 #define HF_RF_MASK           (1 << HF_RF_SHIFT)
 #define HF_VM_MASK           (1 << HF_VM_SHIFT)
+#define HF_AC_MASK           (1 << HF_AC_SHIFT)
 #define HF_SMM_MASK          (1 << HF_SMM_SHIFT)
 #define HF_SVME_MASK         (1 << HF_SVME_SHIFT)
 #define HF_SVMI_MASK         (1 << HF_SVMI_SHIFT)
 #define HF_OSFXSR_MASK       (1 << HF_OSFXSR_SHIFT)
+#define HF_SMAP_MASK         (1 << HF_SMAP_SHIFT)
 
 /* hflags2 */
 
@@ -210,6 +214,13 @@
 #define CR4_OSFXSR_SHIFT 9
 #define CR4_OSFXSR_MASK (1 << CR4_OSFXSR_SHIFT)
 #define CR4_OSXMMEXCPT_MASK  (1 << 10)
+#define CR4_VMXE_MASK   (1 << 13)
+#define CR4_SMXE_MASK   (1 << 14)
+#define CR4_FSGSBASE_MASK (1 << 16)
+#define CR4_PCIDE_MASK  (1 << 17)
+#define CR4_OSXSAVE_MASK (1 << 18)
+#define CR4_SMEP_MASK   (1 << 20)
+#define CR4_SMAP_MASK   (1 << 21)
 
 #define DR6_BD          (1 << 13)
 #define DR6_BS          (1 << 14)
@@ -382,6 +393,7 @@
 #define CPUID_PBE (1 << 31)
 
 #define CPUID_EXT_SSE3     (1 << 0)
+#define CPUID_EXT_PCLMULQDQ (1 << 1)
 #define CPUID_EXT_DTES64   (1 << 2)
 #define CPUID_EXT_MONITOR  (1 << 3)
 #define CPUID_EXT_DSCPL    (1 << 4)
@@ -401,14 +413,34 @@
 #define CPUID_EXT_MOVBE    (1 << 22)
 #define CPUID_EXT_POPCNT   (1 << 23)
 #define CPUID_EXT_TSC_DEADLINE_TIMER (1 << 24)
+#define CPUID_EXT_AES      (1 << 25)
 #define CPUID_EXT_XSAVE    (1 << 26)
 #define CPUID_EXT_OSXSAVE  (1 << 27)
+#define CPUID_EXT_AVX      (1 << 28)
 #define CPUID_EXT_HYPERVISOR  (1 << 31)
 
+#define CPUID_EXT2_FPU     (1 << 0)
+#define CPUID_EXT2_VME     (1 << 1)
+#define CPUID_EXT2_DE      (1 << 2)
+#define CPUID_EXT2_PSE     (1 << 3)
+#define CPUID_EXT2_TSC     (1 << 4)
+#define CPUID_EXT2_MSR     (1 << 5)
+#define CPUID_EXT2_PAE     (1 << 6)
+#define CPUID_EXT2_MCE     (1 << 7)
+#define CPUID_EXT2_CX8     (1 << 8)
+#define CPUID_EXT2_APIC    (1 << 9)
 #define CPUID_EXT2_SYSCALL (1 << 11)
+#define CPUID_EXT2_MTRR    (1 << 12)
+#define CPUID_EXT2_PGE     (1 << 13)
+#define CPUID_EXT2_MCA     (1 << 14)
+#define CPUID_EXT2_CMOV    (1 << 15)
+#define CPUID_EXT2_PAT     (1 << 16)
+#define CPUID_EXT2_PSE36   (1 << 17)
 #define CPUID_EXT2_MP      (1 << 19)
 #define CPUID_EXT2_NX      (1 << 20)
 #define CPUID_EXT2_MMXEXT  (1 << 22)
+#define CPUID_EXT2_MMX     (1 << 23)
+#define CPUID_EXT2_FXSR    (1 << 24)
 #define CPUID_EXT2_FFXSR   (1 << 25)
 #define CPUID_EXT2_PDPE1GB (1 << 26)
 #define CPUID_EXT2_RDTSCP  (1 << 27)
@@ -416,6 +448,17 @@
 #define CPUID_EXT2_3DNOWEXT (1 << 30)
 #define CPUID_EXT2_3DNOW   (1 << 31)
 
+/* CPUID[8000_0001].EDX bits that are aliase of CPUID[1].EDX bits on AMD CPUs */
+#define CPUID_EXT2_AMD_ALIASES (CPUID_EXT2_FPU | CPUID_EXT2_VME | \
+                                CPUID_EXT2_DE | CPUID_EXT2_PSE | \
+                                CPUID_EXT2_TSC | CPUID_EXT2_MSR | \
+                                CPUID_EXT2_PAE | CPUID_EXT2_MCE | \
+                                CPUID_EXT2_CX8 | CPUID_EXT2_APIC | \
+                                CPUID_EXT2_MTRR | CPUID_EXT2_PGE | \
+                                CPUID_EXT2_MCA | CPUID_EXT2_CMOV | \
+                                CPUID_EXT2_PAT | CPUID_EXT2_PSE36 | \
+                                CPUID_EXT2_MMX | CPUID_EXT2_FXSR)
+
 #define CPUID_EXT3_LAHF_LM (1 << 0)
 #define CPUID_EXT3_CMP_LEG (1 << 1)
 #define CPUID_EXT3_SVM     (1 << 2)
@@ -427,7 +470,9 @@
 #define CPUID_EXT3_3DNOWPREFETCH (1 << 8)
 #define CPUID_EXT3_OSVW    (1 << 9)
 #define CPUID_EXT3_IBS     (1 << 10)
+#define CPUID_EXT3_XOP     (1 << 11)
 #define CPUID_EXT3_SKINIT  (1 << 12)
+#define CPUID_EXT3_FMA4    (1 << 16)
 
 #define CPUID_SVM_NPT          (1 << 0)
 #define CPUID_SVM_LBRV         (1 << 1)
@@ -440,6 +485,9 @@
 #define CPUID_SVM_PAUSEFILTER  (1 << 10)
 #define CPUID_SVM_PFTHRESHOLD  (1 << 12)
 
+#define CPUID_7_0_EBX_SMEP     (1 << 7)
+#define CPUID_7_0_EBX_SMAP     (1 << 20)
+
 #define CPUID_VENDOR_INTEL_1 0x756e6547 /* "Genu" */
 #define CPUID_VENDOR_INTEL_2 0x49656e69 /* "ineI" */
 #define CPUID_VENDOR_INTEL_3 0x6c65746e /* "ntel" */
@@ -615,7 +663,7 @@
 #define CPU_NB_REGS CPU_NB_REGS32
 #endif
 
-#define NB_MMU_MODES 2
+#define NB_MMU_MODES 3
 
 typedef enum TPRAccess {
     TPR_ACCESS_READ,
@@ -745,7 +793,7 @@
     uint32_t cpuid_xlevel2;
     uint32_t cpuid_ext4_features;
     /* Flags from CPUID[EAX=7,ECX=0].EBX */
-    uint32_t cpuid_7_0_ebx;
+    uint32_t cpuid_7_0_ebx_features;
 
     /* MTRRs */
     uint64_t mtrr_fixed[11];
@@ -792,7 +840,7 @@
 
 X86CPU *cpu_x86_init(const char *cpu_model);
 int cpu_x86_exec(CPUX86State *s);
-void x86_cpu_list (FILE *f, fprintf_function cpu_fprintf, const char *optarg);
+void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf);
 void x86_cpudef_setup(void);
 int cpu_x86_support_mca_broadcast(CPUX86State *env);
 
@@ -948,10 +996,6 @@
 uint64_t cpu_get_tsc(CPUX86State *env);
 X86CPU *pc_new_cpu(const char *cpu_model);
 
-/* used to debug */
-#define X86_DUMP_FPU  0x0001 /* dump FPU state too */
-#define X86_DUMP_CCOP 0x0002 /* dump qemu flag cache */
-
 #define TARGET_PAGE_BITS 12
 
 #ifdef TARGET_X86_64
@@ -977,7 +1021,7 @@
 #define cpu_exec cpu_x86_exec
 #define cpu_gen_code cpu_x86_gen_code
 #define cpu_signal_handler cpu_x86_signal_handler
-#define cpu_list_id x86_cpu_list
+#define cpu_list x86_cpu_list
 #define cpudef_setup	x86_cpudef_setup
 
 #define CPU_SAVE_VERSION 12
@@ -985,10 +1029,15 @@
 /* MMU modes definitions */
 #define MMU_MODE0_SUFFIX _kernel
 #define MMU_MODE1_SUFFIX _user
-#define MMU_USER_IDX 1
+#define MMU_MODE2_SUFFIX _ksmap /* Kernel with SMAP override */
+#define MMU_KERNEL_IDX  0
+#define MMU_USER_IDX    1
+#define MMU_KSMAP_IDX   2
 static inline int cpu_mmu_index (CPUX86State *env)
 {
-    return (env->hflags & HF_CPL_MASK) == 3 ? 1 : 0;
+    return (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER_IDX :
+        ((env->hflags & HF_SMAP_MASK) && (env->eflags & AC_MASK))
+        ? MMU_KSMAP_IDX : MMU_KERNEL_IDX;
 }
 
 #undef EAX
@@ -1074,7 +1123,7 @@
     *cs_base = env->segs[R_CS].base;
     *pc = *cs_base + env->eip;
     *flags = env->hflags |
-        (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK));
+        (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK));
 }
 
 void do_cpu_init(X86CPU *cpu);
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 8a5da3d..2ee7c6d 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -284,7 +284,7 @@
         cpu_fprintf(f, "\nDR6=" TARGET_FMT_lx " DR7=" TARGET_FMT_lx "\n",
                     env->dr[6], env->dr[7]);
     }
-    if (flags & X86_DUMP_CCOP) {
+    if (flags & CPU_DUMP_CCOP) {
         if ((unsigned)env->cc_op < CC_OP_NB)
             snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]);
         else
@@ -303,7 +303,7 @@
         }
     }
     cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer);
-    if (flags & X86_DUMP_FPU) {
+    if (flags & CPU_DUMP_FPU) {
         int fptag;
         fptag = 0;
         for(i = 0; i < 8; i++) {
@@ -443,17 +443,27 @@
 #if defined(DEBUG_MMU)
     printf("CR4 update: CR4=%08x\n", (uint32_t)env->cr[4]);
 #endif
-    if ((new_cr4 & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK)) !=
-        (env->cr[4] & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK))) {
+    if ((new_cr4 ^ env->cr[4]) &
+        (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK |
+         CR4_SMEP_MASK | CR4_SMAP_MASK)) {
         tlb_flush(env, 1);
     }
     /* SSE handling */
-    if (!(env->cpuid_features & CPUID_SSE))
+    if (!(env->cpuid_features & CPUID_SSE)) {
         new_cr4 &= ~CR4_OSFXSR_MASK;
-    if (new_cr4 & CR4_OSFXSR_MASK)
+    }
+    env->hflags &= ~HF_OSFXSR_MASK;
+    if (new_cr4 & CR4_OSFXSR_MASK) {
         env->hflags |= HF_OSFXSR_MASK;
-    else
-        env->hflags &= ~HF_OSFXSR_MASK;
+    }
+
+    if (!(env->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP)) {
+        new_cr4 &= ~CR4_SMAP_MASK;
+    }
+    env->hflags &= ~HF_SMAP_MASK;
+    if (new_cr4 & CR4_SMAP_MASK) {
+        env->hflags |= HF_SMAP_MASK;
+    }
 
     env->cr[4] = new_cr4;
 }
@@ -591,17 +601,38 @@
             /* 2 MB page */
             page_size = 2048 * 1024;
             ptep ^= PG_NX_MASK;
-            if ((ptep & PG_NX_MASK) && is_write1 == 2)
+            if ((ptep & PG_NX_MASK) && is_write1 == 2) {
                 goto do_fault_protect;
-            if (is_user) {
-                if (!(ptep & PG_USER_MASK))
+            }
+            switch (mmu_idx) {
+            case MMU_USER_IDX:
+                if (!(ptep & PG_USER_MASK)) {
                     goto do_fault_protect;
-                if (is_write && !(ptep & PG_RW_MASK))
+                }
+                if (is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
-            } else {
+                }
+                break;
+
+            case MMU_KERNEL_IDX:
+                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
+                /* fall through */
+            case MMU_KSMAP_IDX:
+                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
                 if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(ptep & PG_RW_MASK))
+                    is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
+                }
+                break;
+
+            default: /* cannot happen */
+                break;
             }
             is_dirty = is_write && !(pde & PG_DIRTY_MASK);
             if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
@@ -635,15 +666,35 @@
             ptep ^= PG_NX_MASK;
             if ((ptep & PG_NX_MASK) && is_write1 == 2)
                 goto do_fault_protect;
-            if (is_user) {
-                if (!(ptep & PG_USER_MASK))
+            switch (mmu_idx) {
+            case MMU_USER_IDX:
+                if (!(ptep & PG_USER_MASK)) {
                     goto do_fault_protect;
-                if (is_write && !(ptep & PG_RW_MASK))
+                }
+                if (is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
-            } else {
+                }
+                break;
+
+            case MMU_KERNEL_IDX:
+                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
+                /* fall through */
+            case MMU_KSMAP_IDX:
+                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
                 if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(ptep & PG_RW_MASK))
+                    is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
+                }
+                break;
+
+            default: /* cannot happen */
+                break;
             }
             is_dirty = is_write && !(pte & PG_DIRTY_MASK);
             if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
@@ -670,15 +721,35 @@
         /* if PSE bit is set, then we use a 4MB page */
         if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) {
             page_size = 4096 * 1024;
-            if (is_user) {
-                if (!(pde & PG_USER_MASK))
+            switch (mmu_idx) {
+            case MMU_USER_IDX:
+                if (!(pde & PG_USER_MASK)) {
                     goto do_fault_protect;
-                if (is_write && !(pde & PG_RW_MASK))
+                }
+                if (is_write && !(pde & PG_RW_MASK)) {
                     goto do_fault_protect;
-            } else {
+                }
+                break;
+
+            case MMU_KERNEL_IDX:
+                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
+                    (pde & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
+                /* fall through */
+            case MMU_KSMAP_IDX:
+                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
+                    (pde & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
                 if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(pde & PG_RW_MASK))
+                    is_write && !(pde & PG_RW_MASK)) {
                     goto do_fault_protect;
+                }
+                break;
+
+            default: /* cannot happen */
+                break;
             }
             is_dirty = is_write && !(pde & PG_DIRTY_MASK);
             if (!(pde & PG_ACCESSED_MASK) || is_dirty) {
@@ -707,15 +778,35 @@
             }
             /* combine pde and pte user and rw protections */
             ptep = pte & pde;
-            if (is_user) {
-                if (!(ptep & PG_USER_MASK))
+            switch (mmu_idx) {
+            case MMU_USER_IDX:
+                if (!(ptep & PG_USER_MASK)) {
                     goto do_fault_protect;
-                if (is_write && !(ptep & PG_RW_MASK))
+                }
+                if (is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
-            } else {
+                }
+                break;
+
+            case MMU_KERNEL_IDX:
+                if (is_write1 != 2 && (env->cr[4] & CR4_SMAP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
+                /* fall through */
+            case MMU_KSMAP_IDX:
+                if (is_write1 == 2 && (env->cr[4] & CR4_SMEP_MASK) &&
+                    (ptep & PG_USER_MASK)) {
+                    goto do_fault_protect;
+                }
                 if ((env->cr[0] & CR0_WP_MASK) &&
-                    is_write && !(ptep & PG_RW_MASK))
+                    is_write && !(ptep & PG_RW_MASK)) {
                     goto do_fault_protect;
+                }
+                break;
+
+            default: /* cannot happen */
+                break;
             }
             is_dirty = is_write && !(pte & PG_DIRTY_MASK);
             if (!(pte & PG_ACCESSED_MASK) || is_dirty) {
@@ -762,8 +853,9 @@
     if (is_user)
         error_code |= PG_ERROR_U_MASK;
     if (is_write1 == 2 &&
-        (env->efer & MSR_EFER_NXE) &&
-        (env->cr[4] & CR4_PAE_MASK))
+        (((env->efer & MSR_EFER_NXE) &&
+          (env->cr[4] & CR4_PAE_MASK)) ||
+         (env->cr[4] & CR4_SMEP_MASK)))
         error_code |= PG_ERROR_I_D_MASK;
     if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) {
         /* cr2 is not modified in case of exceptions */
diff --git a/target-i386/helper.h b/target-i386/helper.h
index ab6af63..93850ce 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -67,6 +67,8 @@
 DEF_HELPER_2(raise_exception, void, env, int)
 DEF_HELPER_1(cli, void, env)
 DEF_HELPER_1(sti, void, env)
+DEF_HELPER_1(clac, void, env)
+DEF_HELPER_1(stac, void, env)
 DEF_HELPER_1(set_inhibit_irq, void, env)
 DEF_HELPER_1(reset_inhibit_irq, void, env)
 DEF_HELPER_3(boundw, void, env, tl, int)
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 6790180..5b18383 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -165,7 +165,7 @@
                      * so add missing bits according to the AMD spec:
                      */
                     cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX);
-                    ret |= cpuid_1_edx & 0x183f7ff;
+                    ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES;
                     break;
                 }
                 break;
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 5fff8d5..ff93374 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -31,7 +31,7 @@
 #ifdef DEBUG_PCALL
 # define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__)
 # define LOG_PCALL_STATE(env)                                  \
-    log_cpu_state_mask(CPU_LOG_PCALL, (env), X86_DUMP_CCOP)
+    log_cpu_state_mask(CPU_LOG_PCALL, (env), CPU_DUMP_CCOP)
 #else
 # define LOG_PCALL(...) do { } while (0)
 # define LOG_PCALL_STATE(env) do { } while (0)
@@ -1177,7 +1177,7 @@
                 qemu_log(" EAX=" TARGET_FMT_lx, EAX);
             }
             qemu_log("\n");
-            log_cpu_state(env, X86_DUMP_CCOP);
+            log_cpu_state(env, CPU_DUMP_CCOP);
 #if 0
             {
                 int i;
diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
index 8b04eb2..eea2fe9 100644
--- a/target-i386/smm_helper.c
+++ b/target-i386/smm_helper.c
@@ -47,7 +47,7 @@
     int i, offset;
 
     qemu_log_mask(CPU_LOG_INT, "SMM: enter\n");
-    log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
+    log_cpu_state_mask(CPU_LOG_INT, env, CPU_DUMP_CCOP);
 
     env->hflags |= HF_SMM_MASK;
     cpu_smm_update(env);
@@ -295,7 +295,7 @@
     cpu_smm_update(env);
 
     qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n");
-    log_cpu_state_mask(CPU_LOG_INT, env, X86_DUMP_CCOP);
+    log_cpu_state_mask(CPU_LOG_INT, env, CPU_DUMP_CCOP);
 }
 
 #endif /* !CONFIG_USER_ONLY */
diff --git a/target-i386/translate.c b/target-i386/translate.c
index eb0cabc..0a7e4e3 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -107,6 +107,7 @@
     int cpuid_ext_features;
     int cpuid_ext2_features;
     int cpuid_ext3_features;
+    int cpuid_7_0_ebx_features;
 } DisasContext;
 
 static void gen_eob(DisasContext *s);
@@ -4202,8 +4203,9 @@
     target_ulong next_eip, tval;
     int rex_w, rex_r;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(pc_start);
+    }
     s->pc = pc_start;
     prefixes = 0;
     aflag = s->code32;
@@ -6555,7 +6557,7 @@
             }
             gen_pop_update(s);
             s->cc_op = CC_OP_EFLAGS;
-            /* abort translation because TF flag may change */
+            /* abort translation because TF/AC flag may change */
             gen_jmp_im(s->pc - s->cs_base);
             gen_eob(s);
         }
@@ -7205,6 +7207,24 @@
                     gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start));
                     gen_eob(s);
                     break;
+                case 2: /* clac */
+                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
+                        s->cpl != 0) {
+                        goto illegal_op;
+                    }
+                    gen_helper_clac(cpu_env);
+                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_eob(s);
+                    break;
+                case 3: /* stac */
+                    if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) ||
+                        s->cpl != 0) {
+                        goto illegal_op;
+                    }
+                    gen_helper_stac(cpu_env);
+                    gen_jmp_im(s->pc - s->cs_base);
+                    gen_eob(s);
+                    break;
                 default:
                     goto illegal_op;
                 }
@@ -7900,15 +7920,13 @@
     /* select memory access functions */
     dc->mem_index = 0;
     if (flags & HF_SOFTMMU_MASK) {
-        if (dc->cpl == 3)
-            dc->mem_index = 2 * 4;
-        else
-            dc->mem_index = 1 * 4;
+        dc->mem_index = (cpu_mmu_index(env) + 1) << 2;
     }
     dc->cpuid_features = env->cpuid_features;
     dc->cpuid_ext_features = env->cpuid_ext_features;
     dc->cpuid_ext2_features = env->cpuid_ext2_features;
     dc->cpuid_ext3_features = env->cpuid_ext3_features;
+    dc->cpuid_7_0_ebx_features = env->cpuid_7_0_ebx_features;
 #ifdef TARGET_X86_64
     dc->lma = (flags >> HF_LMA_SHIFT) & 1;
     dc->code64 = (flags >> HF_CS64_SHIFT) & 1;
diff --git a/target-lm32/Makefile.objs b/target-lm32/Makefile.objs
index 2e0e093..ca20f21 100644
--- a/target-lm32/Makefile.objs
+++ b/target-lm32/Makefile.objs
@@ -1,4 +1,2 @@
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-lm32/helper.h b/target-lm32/helper.h
index 9d335ef..07f5670 100644
--- a/target-lm32/helper.h
+++ b/target-lm32/helper.h
@@ -1,14 +1,14 @@
 #include "def-helper.h"
 
-DEF_HELPER_1(raise_exception, void, i32)
-DEF_HELPER_0(hlt, void)
-DEF_HELPER_1(wcsr_im, void, i32)
-DEF_HELPER_1(wcsr_ip, void, i32)
-DEF_HELPER_1(wcsr_jtx, void, i32)
-DEF_HELPER_1(wcsr_jrx, void, i32)
-DEF_HELPER_0(rcsr_im, i32)
-DEF_HELPER_0(rcsr_ip, i32)
-DEF_HELPER_0(rcsr_jtx, i32)
-DEF_HELPER_0(rcsr_jrx, i32)
+DEF_HELPER_2(raise_exception, void, env, i32)
+DEF_HELPER_1(hlt, void, env)
+DEF_HELPER_2(wcsr_im, void, env, i32)
+DEF_HELPER_2(wcsr_ip, void, env, i32)
+DEF_HELPER_2(wcsr_jtx, void, env, i32)
+DEF_HELPER_2(wcsr_jrx, void, env, i32)
+DEF_HELPER_1(rcsr_im, i32, env)
+DEF_HELPER_1(rcsr_ip, i32, env)
+DEF_HELPER_1(rcsr_jtx, i32, env)
+DEF_HELPER_1(rcsr_jrx, i32, env)
 
 #include "def-helper.h"
diff --git a/target-lm32/op_helper.c b/target-lm32/op_helper.c
index 51edc1a..7b91d8c 100644
--- a/target-lm32/op_helper.c
+++ b/target-lm32/op_helper.c
@@ -1,6 +1,5 @@
 #include <assert.h>
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helper.h"
 #include "host-utils.h"
 
@@ -18,55 +17,55 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
-void helper_raise_exception(uint32_t index)
+void helper_raise_exception(CPULM32State *env, uint32_t index)
 {
     env->exception_index = index;
     cpu_loop_exit(env);
 }
 
-void helper_hlt(void)
+void helper_hlt(CPULM32State *env)
 {
     env->halted = 1;
     env->exception_index = EXCP_HLT;
     cpu_loop_exit(env);
 }
 
-void helper_wcsr_im(uint32_t im)
+void helper_wcsr_im(CPULM32State *env, uint32_t im)
 {
     lm32_pic_set_im(env->pic_state, im);
 }
 
-void helper_wcsr_ip(uint32_t im)
+void helper_wcsr_ip(CPULM32State *env, uint32_t im)
 {
     lm32_pic_set_ip(env->pic_state, im);
 }
 
-void helper_wcsr_jtx(uint32_t jtx)
+void helper_wcsr_jtx(CPULM32State *env, uint32_t jtx)
 {
     lm32_juart_set_jtx(env->juart_state, jtx);
 }
 
-void helper_wcsr_jrx(uint32_t jrx)
+void helper_wcsr_jrx(CPULM32State *env, uint32_t jrx)
 {
     lm32_juart_set_jrx(env->juart_state, jrx);
 }
 
-uint32_t helper_rcsr_im(void)
+uint32_t helper_rcsr_im(CPULM32State *env)
 {
     return lm32_pic_get_im(env->pic_state);
 }
 
-uint32_t helper_rcsr_ip(void)
+uint32_t helper_rcsr_ip(CPULM32State *env)
 {
     return lm32_pic_get_ip(env->pic_state);
 }
 
-uint32_t helper_rcsr_jtx(void)
+uint32_t helper_rcsr_jtx(CPULM32State *env)
 {
     return lm32_juart_get_jtx(env->juart_state);
 }
 
-uint32_t helper_rcsr_jrx(void)
+uint32_t helper_rcsr_jrx(CPULM32State *env)
 {
     return lm32_juart_get_jrx(env->juart_state);
 }
@@ -74,17 +73,12 @@
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill(CPULM32State *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPULM32State *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPULM32State *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
-
     ret = cpu_lm32_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
@@ -98,7 +92,6 @@
         }
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 #endif
 
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index 872a2ba..77c2866 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -116,7 +116,7 @@
 {
     TCGv_i32 tmp = tcg_const_i32(index);
 
-    gen_helper_raise_exception(tmp);
+    gen_helper_raise_exception(cpu_env, tmp);
     tcg_temp_free_i32(tmp);
 }
 
@@ -179,7 +179,7 @@
     } else  {
         if (dc->r0 == 0 && dc->r1 == 0 && dc->r2 == 0) {
             tcg_gen_movi_tl(cpu_pc, dc->pc + 4);
-            gen_helper_hlt();
+            gen_helper_hlt(cpu_env);
         } else {
             tcg_gen_and_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
         }
@@ -601,10 +601,10 @@
         tcg_gen_mov_tl(cpu_R[dc->r2], cpu_ie);
         break;
     case CSR_IM:
-        gen_helper_rcsr_im(cpu_R[dc->r2]);
+        gen_helper_rcsr_im(cpu_R[dc->r2], cpu_env);
         break;
     case CSR_IP:
-        gen_helper_rcsr_ip(cpu_R[dc->r2]);
+        gen_helper_rcsr_ip(cpu_R[dc->r2], cpu_env);
         break;
     case CSR_CC:
         tcg_gen_mov_tl(cpu_R[dc->r2], cpu_cc);
@@ -622,10 +622,10 @@
         tcg_gen_mov_tl(cpu_R[dc->r2], cpu_deba);
         break;
     case CSR_JTX:
-        gen_helper_rcsr_jtx(cpu_R[dc->r2]);
+        gen_helper_rcsr_jtx(cpu_R[dc->r2], cpu_env);
         break;
     case CSR_JRX:
-        gen_helper_rcsr_jrx(cpu_R[dc->r2]);
+        gen_helper_rcsr_jrx(cpu_R[dc->r2], cpu_env);
         break;
     case CSR_ICC:
     case CSR_DCC:
@@ -812,7 +812,7 @@
         if (use_icount) {
             gen_io_start();
         }
-        gen_helper_wcsr_im(cpu_R[dc->r1]);
+        gen_helper_wcsr_im(cpu_env, cpu_R[dc->r1]);
         tcg_gen_movi_tl(cpu_pc, dc->pc + 4);
         if (use_icount) {
             gen_io_end();
@@ -824,7 +824,7 @@
         if (use_icount) {
             gen_io_start();
         }
-        gen_helper_wcsr_ip(cpu_R[dc->r1]);
+        gen_helper_wcsr_ip(cpu_env, cpu_R[dc->r1]);
         tcg_gen_movi_tl(cpu_pc, dc->pc + 4);
         if (use_icount) {
             gen_io_end();
@@ -844,10 +844,10 @@
         tcg_gen_mov_tl(cpu_deba, cpu_R[dc->r1]);
         break;
     case CSR_JTX:
-        gen_helper_wcsr_jtx(cpu_R[dc->r1]);
+        gen_helper_wcsr_jtx(cpu_env, cpu_R[dc->r1]);
         break;
     case CSR_JRX:
-        gen_helper_wcsr_jrx(cpu_R[dc->r1]);
+        gen_helper_wcsr_jrx(cpu_env, cpu_R[dc->r1]);
         break;
     case CSR_DC:
         tcg_gen_mov_tl(cpu_dc, cpu_R[dc->r1]);
@@ -940,15 +940,13 @@
     dec_cmpne
 };
 
-static inline void decode(DisasContext *dc)
+static inline void decode(DisasContext *dc, uint32_t ir)
 {
-    uint32_t ir;
-
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(dc->pc);
     }
 
-    dc->ir = ir = ldl_code(dc->pc);
+    dc->ir = ir;
     LOG_DIS("%8.8x\t", dc->ir);
 
     /* try guessing 'empty' instruction memory, although it may be a valid
@@ -1068,7 +1066,7 @@
             gen_io_start();
         }
 
-        decode(dc);
+        decode(dc, cpu_ldl_code(env, dc->pc));
         dc->pc += 4;
         num_insns++;
 
diff --git a/target-m68k/Makefile.objs b/target-m68k/Makefile.objs
index cda6015..7eccfab 100644
--- a/target-m68k/Makefile.objs
+++ b/target-m68k/Makefile.objs
@@ -1,5 +1,3 @@
 obj-y += m68k-semi.o
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-m68k/helpers.h b/target-m68k/helpers.h
index cb8a0c7..8112b44 100644
--- a/target-m68k/helpers.h
+++ b/target-m68k/helpers.h
@@ -49,6 +49,6 @@
 DEF_HELPER_3(set_mac_extu, void, env, i32, i32)
 
 DEF_HELPER_2(flush_flags, void, env, i32)
-DEF_HELPER_1(raise_exception, void, i32)
+DEF_HELPER_2(raise_exception, void, env, i32)
 
 #include "def-helper.h"
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index 1971a57..aa00504 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -17,17 +17,16 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helpers.h"
 
 #if defined(CONFIG_USER_ONLY)
 
-void do_interrupt(CPUM68KState *env1)
+void do_interrupt(CPUM68KState *env)
 {
-    env1->exception_index = -1;
+    env->exception_index = -1;
 }
 
-void do_interrupt_m68k_hardirq(CPUM68KState *env1)
+void do_interrupt_m68k_hardirq(CPUM68KState *env)
 {
 }
 
@@ -54,16 +53,12 @@
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill(CPUM68KState *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUM68KState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUM68KState *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
     ret = cpu_m68k_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
@@ -77,24 +72,23 @@
         }
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 
-static void do_rte(void)
+static void do_rte(CPUM68KState *env)
 {
     uint32_t sp;
     uint32_t fmt;
 
     sp = env->aregs[7];
-    fmt = ldl_kernel(sp);
-    env->pc = ldl_kernel(sp + 4);
+    fmt = cpu_ldl_kernel(env, sp);
+    env->pc = cpu_ldl_kernel(env, sp + 4);
     sp |= (fmt >> 28) & 3;
     env->sr = fmt & 0xffff;
     m68k_switch_sp(env);
     env->aregs[7] = sp + 8;
 }
 
-static void do_interrupt_all(int is_hw)
+static void do_interrupt_all(CPUM68KState *env, int is_hw)
 {
     uint32_t sp;
     uint32_t fmt;
@@ -108,14 +102,14 @@
         switch (env->exception_index) {
         case EXCP_RTE:
             /* Return from an exception.  */
-            do_rte();
+            do_rte(env);
             return;
         case EXCP_HALT_INSN:
             if (semihosting_enabled
                     && (env->sr & SR_S) != 0
                     && (env->pc & 3) == 0
-                    && lduw_code(env->pc - 4) == 0x4e71
-                    && ldl_code(env->pc) == 0x4e7bf000) {
+                    && cpu_lduw_code(env, env->pc - 4) == 0x4e71
+                    && cpu_ldl_code(env, env->pc) == 0x4e7bf000) {
                 env->pc += 4;
                 do_m68k_semihosting(env, env->dregs[0]);
                 return;
@@ -151,44 +145,34 @@
     /* ??? This could cause MMU faults.  */
     sp &= ~3;
     sp -= 4;
-    stl_kernel(sp, retaddr);
+    cpu_stl_kernel(env, sp, retaddr);
     sp -= 4;
-    stl_kernel(sp, fmt);
+    cpu_stl_kernel(env, sp, fmt);
     env->aregs[7] = sp;
     /* Jump to vector.  */
-    env->pc = ldl_kernel(env->vbr + vector);
+    env->pc = cpu_ldl_kernel(env, env->vbr + vector);
 }
 
-void do_interrupt(CPUM68KState *env1)
+void do_interrupt(CPUM68KState *env)
 {
-    CPUM68KState *saved_env;
-
-    saved_env = env;
-    env = env1;
-    do_interrupt_all(0);
-    env = saved_env;
+    do_interrupt_all(env, 0);
 }
 
-void do_interrupt_m68k_hardirq(CPUM68KState *env1)
+void do_interrupt_m68k_hardirq(CPUM68KState *env)
 {
-    CPUM68KState *saved_env;
-
-    saved_env = env;
-    env = env1;
-    do_interrupt_all(1);
-    env = saved_env;
+    do_interrupt_all(env, 1);
 }
 #endif
 
-static void raise_exception(int tt)
+static void raise_exception(CPUM68KState *env, int tt)
 {
     env->exception_index = tt;
     cpu_loop_exit(env);
 }
 
-void HELPER(raise_exception)(uint32_t tt)
+void HELPER(raise_exception)(CPUM68KState *env, uint32_t tt)
 {
-    raise_exception(tt);
+    raise_exception(env, tt);
 }
 
 void HELPER(divu)(CPUM68KState *env, uint32_t word)
@@ -202,14 +186,12 @@
     num = env->div1;
     den = env->div2;
     /* ??? This needs to make sure the throwing location is accurate.  */
-    if (den == 0)
-        raise_exception(EXCP_DIV0);
+    if (den == 0) {
+        raise_exception(env, EXCP_DIV0);
+    }
     quot = num / den;
     rem = num % den;
     flags = 0;
-    /* Avoid using a PARAM1 of zero.  This breaks dyngen because it uses
-       the address of a symbol, and gcc knows symbols can't have address
-       zero.  */
     if (word && quot > 0xffff)
         flags |= CCF_V;
     if (quot == 0)
@@ -231,8 +213,9 @@
 
     num = env->div1;
     den = env->div2;
-    if (den == 0)
-        raise_exception(EXCP_DIV0);
+    if (den == 0) {
+        raise_exception(env, EXCP_DIV0);
+    }
     quot = num / den;
     rem = num % den;
     flags = 0;
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 9fc1e31..451ef74 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -150,18 +150,24 @@
 #define OS_SINGLE 4
 #define OS_DOUBLE 5
 
-typedef void (*disas_proc)(DisasContext *, uint16_t);
+typedef void (*disas_proc)(CPUM68KState *env, DisasContext *s, uint16_t insn);
 
 #ifdef DEBUG_DISPATCH
-#define DISAS_INSN(name) \
-  static void real_disas_##name (DisasContext *s, uint16_t insn); \
-  static void disas_##name (DisasContext *s, uint16_t insn) { \
-    qemu_log("Dispatch " #name "\n"); \
-    real_disas_##name(s, insn); } \
-  static void real_disas_##name (DisasContext *s, uint16_t insn)
+#define DISAS_INSN(name)                                                \
+    static void real_disas_##name(CPUM68KState *env, DisasContext *s,   \
+                                  uint16_t insn);                       \
+    static void disas_##name(CPUM68KState *env, DisasContext *s,        \
+                             uint16_t insn)                             \
+    {                                                                   \
+        qemu_log("Dispatch " #name "\n");                               \
+        real_disas_##name(s, env, insn);                                \
+    }                                                                   \
+    static void real_disas_##name(CPUM68KState *env, DisasContext *s,   \
+                                  uint16_t insn)
 #else
-#define DISAS_INSN(name) \
-  static void disas_##name (DisasContext *s, uint16_t insn)
+#define DISAS_INSN(name)                                                \
+    static void disas_##name(CPUM68KState *env, DisasContext *s,        \
+                             uint16_t insn)
 #endif
 
 /* Generate a load from the specified address.  Narrow values are
@@ -257,12 +263,12 @@
 }
 
 /* Read a 32-bit immediate constant.  */
-static inline uint32_t read_im32(DisasContext *s)
+static inline uint32_t read_im32(CPUM68KState *env, DisasContext *s)
 {
     uint32_t im;
-    im = ((uint32_t)lduw_code(s->pc)) << 16;
+    im = ((uint32_t)cpu_lduw_code(env, s->pc)) << 16;
     s->pc += 2;
-    im |= lduw_code(s->pc);
+    im |= cpu_lduw_code(env, s->pc);
     s->pc += 2;
     return im;
 }
@@ -288,7 +294,8 @@
 
 /* Handle a base + index + displacement effective addresss.
    A NULL_QREG base means pc-relative.  */
-static TCGv gen_lea_indexed(DisasContext *s, int opsize, TCGv base)
+static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, int opsize,
+                            TCGv base)
 {
     uint32_t offset;
     uint16_t ext;
@@ -297,7 +304,7 @@
     uint32_t bd, od;
 
     offset = s->pc;
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
     if ((ext & 0x800) == 0 && !m68k_feature(s->env, M68K_FEATURE_WORD_INDEX))
@@ -311,10 +318,10 @@
         if ((ext & 0x30) > 0x10) {
             /* base displacement */
             if ((ext & 0x30) == 0x20) {
-                bd = (int16_t)lduw_code(s->pc);
+                bd = (int16_t)cpu_lduw_code(env, s->pc);
                 s->pc += 2;
             } else {
-                bd = read_im32(s);
+                bd = read_im32(env, s);
             }
         } else {
             bd = 0;
@@ -360,10 +367,10 @@
             if ((ext & 3) > 1) {
                 /* outer displacement */
                 if ((ext & 3) == 2) {
-                    od = (int16_t)lduw_code(s->pc);
+                    od = (int16_t)cpu_lduw_code(env, s->pc);
                     s->pc += 2;
                 } else {
-                    od = read_im32(s);
+                    od = read_im32(env, s);
                 }
             } else {
                 od = 0;
@@ -492,7 +499,8 @@
 
 /* Generate code for an "effective address".  Does not adjust the base
    register for autoincrement addressing modes.  */
-static TCGv gen_lea(DisasContext *s, uint16_t insn, int opsize)
+static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn,
+                    int opsize)
 {
     TCGv reg;
     TCGv tmp;
@@ -514,29 +522,29 @@
     case 5: /* Indirect displacement.  */
         reg = AREG(insn, 0);
         tmp = tcg_temp_new();
-        ext = lduw_code(s->pc);
+        ext = cpu_lduw_code(env, s->pc);
         s->pc += 2;
         tcg_gen_addi_i32(tmp, reg, (int16_t)ext);
         return tmp;
     case 6: /* Indirect index + displacement.  */
         reg = AREG(insn, 0);
-        return gen_lea_indexed(s, opsize, reg);
+        return gen_lea_indexed(env, s, opsize, reg);
     case 7: /* Other */
         switch (insn & 7) {
         case 0: /* Absolute short.  */
-            offset = ldsw_code(s->pc);
+            offset = cpu_ldsw_code(env, s->pc);
             s->pc += 2;
             return tcg_const_i32(offset);
         case 1: /* Absolute long.  */
-            offset = read_im32(s);
+            offset = read_im32(env, s);
             return tcg_const_i32(offset);
         case 2: /* pc displacement  */
             offset = s->pc;
-            offset += ldsw_code(s->pc);
+            offset += cpu_ldsw_code(env, s->pc);
             s->pc += 2;
             return tcg_const_i32(offset);
         case 3: /* pc index+displacement.  */
-            return gen_lea_indexed(s, opsize, NULL_QREG);
+            return gen_lea_indexed(env, s, opsize, NULL_QREG);
         case 4: /* Immediate.  */
         default:
             return NULL_QREG;
@@ -548,15 +556,16 @@
 
 /* Helper function for gen_ea. Reuse the computed address between the
    for read/write operands.  */
-static inline TCGv gen_ea_once(DisasContext *s, uint16_t insn, int opsize,
-                              TCGv val, TCGv *addrp, ea_what what)
+static inline TCGv gen_ea_once(CPUM68KState *env, DisasContext *s,
+                               uint16_t insn, int opsize, TCGv val,
+                               TCGv *addrp, ea_what what)
 {
     TCGv tmp;
 
     if (addrp && what == EA_STORE) {
         tmp = *addrp;
     } else {
-        tmp = gen_lea(s, insn, opsize);
+        tmp = gen_lea(env, s, insn, opsize);
         if (IS_NULL_QREG(tmp))
             return tmp;
         if (addrp)
@@ -568,8 +577,8 @@
 /* Generate code to load/store a value ito/from an EA.  If VAL > 0 this is
    a write otherwise it is a read (0 == sign extend, -1 == zero extend).
    ADDRP is non-null for readwrite operands.  */
-static TCGv gen_ea(DisasContext *s, uint16_t insn, int opsize, TCGv val,
-                   TCGv *addrp, ea_what what)
+static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn,
+                   int opsize, TCGv val, TCGv *addrp, ea_what what)
 {
     TCGv reg;
     TCGv result;
@@ -609,7 +618,7 @@
             if (addrp && what == EA_STORE) {
                 tmp = *addrp;
             } else {
-                tmp = gen_lea(s, insn, opsize);
+                tmp = gen_lea(env, s, insn, opsize);
                 if (IS_NULL_QREG(tmp))
                     return tmp;
                 if (addrp)
@@ -626,33 +635,35 @@
         return result;
     case 5: /* Indirect displacement.  */
     case 6: /* Indirect index + displacement.  */
-        return gen_ea_once(s, insn, opsize, val, addrp, what);
+        return gen_ea_once(env, s, insn, opsize, val, addrp, what);
     case 7: /* Other */
         switch (insn & 7) {
         case 0: /* Absolute short.  */
         case 1: /* Absolute long.  */
         case 2: /* pc displacement  */
         case 3: /* pc index+displacement.  */
-            return gen_ea_once(s, insn, opsize, val, addrp, what);
+            return gen_ea_once(env, s, insn, opsize, val, addrp, what);
         case 4: /* Immediate.  */
             /* Sign extend values for consistency.  */
             switch (opsize) {
             case OS_BYTE:
-                if (what == EA_LOADS)
-                    offset = ldsb_code(s->pc + 1);
-                else
-                    offset = ldub_code(s->pc + 1);
+                if (what == EA_LOADS) {
+                    offset = cpu_ldsb_code(env, s->pc + 1);
+                } else {
+                    offset = cpu_ldub_code(env, s->pc + 1);
+                }
                 s->pc += 2;
                 break;
             case OS_WORD:
-                if (what == EA_LOADS)
-                    offset = ldsw_code(s->pc);
-                else
-                    offset = lduw_code(s->pc);
+                if (what == EA_LOADS) {
+                    offset = cpu_ldsw_code(env, s->pc);
+                } else {
+                    offset = cpu_lduw_code(env, s->pc);
+                }
                 s->pc += 2;
                 break;
             case OS_LONG:
-                offset = read_im32(s);
+                offset = read_im32(env, s);
                 break;
             default:
                 qemu_assert(0, "Bad immediate operand");
@@ -815,7 +826,7 @@
 {
     gen_flush_cc_op(s);
     gen_jmp_im(s, where);
-    gen_helper_raise_exception(tcg_const_i32(nr));
+    gen_helper_raise_exception(cpu_env, tcg_const_i32(nr));
 }
 
 static inline void gen_addr_fault(DisasContext *s)
@@ -823,20 +834,21 @@
     gen_exception(s, s->insn_pc, EXCP_ADDRESS);
 }
 
-#define SRC_EA(result, opsize, op_sign, addrp) do { \
-    result = gen_ea(s, insn, opsize, NULL_QREG, addrp, op_sign ? EA_LOADS : EA_LOADU); \
-    if (IS_NULL_QREG(result)) { \
-        gen_addr_fault(s); \
-        return; \
-    } \
+#define SRC_EA(env, result, opsize, op_sign, addrp) do {                \
+        result = gen_ea(env, s, insn, opsize, NULL_QREG, addrp,         \
+                        op_sign ? EA_LOADS : EA_LOADU);                 \
+        if (IS_NULL_QREG(result)) {                                     \
+            gen_addr_fault(s);                                          \
+            return;                                                     \
+        }                                                               \
     } while (0)
 
-#define DEST_EA(insn, opsize, val, addrp) do { \
-    TCGv ea_result = gen_ea(s, insn, opsize, val, addrp, EA_STORE); \
-    if (IS_NULL_QREG(ea_result)) { \
-        gen_addr_fault(s); \
-        return; \
-    } \
+#define DEST_EA(env, insn, opsize, val, addrp) do {                     \
+        TCGv ea_result = gen_ea(env, s, insn, opsize, val, addrp, EA_STORE); \
+        if (IS_NULL_QREG(ea_result)) {                                  \
+            gen_addr_fault(s);                                          \
+            return;                                                     \
+        }                                                               \
     } while (0)
 
 /* Generate a jump to an immediate address.  */
@@ -872,8 +884,7 @@
 DISAS_INSN(undef)
 {
     gen_exception(s, s->pc - 2, EXCP_UNSUPPORTED);
-    cpu_abort(cpu_single_env, "Illegal instruction: %04x @ %08x",
-              insn, s->pc - 2);
+    cpu_abort(env, "Illegal instruction: %04x @ %08x", insn, s->pc - 2);
 }
 
 DISAS_INSN(mulw)
@@ -890,7 +901,7 @@
         tcg_gen_ext16s_i32(tmp, reg);
     else
         tcg_gen_ext16u_i32(tmp, reg);
-    SRC_EA(src, OS_WORD, sign, NULL);
+    SRC_EA(env, src, OS_WORD, sign, NULL);
     tcg_gen_mul_i32(tmp, tmp, src);
     tcg_gen_mov_i32(reg, tmp);
     /* Unlike m68k, coldfire always clears the overflow bit.  */
@@ -911,7 +922,7 @@
     } else {
         tcg_gen_ext16u_i32(QREG_DIV1, reg);
     }
-    SRC_EA(src, OS_WORD, sign, NULL);
+    SRC_EA(env, src, OS_WORD, sign, NULL);
     tcg_gen_mov_i32(QREG_DIV2, src);
     if (sign) {
         gen_helper_divs(cpu_env, tcg_const_i32(1));
@@ -934,7 +945,7 @@
     TCGv reg;
     uint16_t ext;
 
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     if (ext & 0x87f8) {
         gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
@@ -943,7 +954,7 @@
     num = DREG(ext, 12);
     reg = DREG(ext, 0);
     tcg_gen_mov_i32(QREG_DIV1, num);
-    SRC_EA(den, OS_LONG, 0, NULL);
+    SRC_EA(env, den, OS_LONG, 0, NULL);
     tcg_gen_mov_i32(QREG_DIV2, den);
     if (ext & 0x0800) {
         gen_helper_divs(cpu_env, tcg_const_i32(0));
@@ -973,11 +984,11 @@
     reg = DREG(insn, 9);
     dest = tcg_temp_new();
     if (insn & 0x100) {
-        SRC_EA(tmp, OS_LONG, 0, &addr);
+        SRC_EA(env, tmp, OS_LONG, 0, &addr);
         src = reg;
     } else {
         tmp = reg;
-        SRC_EA(src, OS_LONG, 0, NULL);
+        SRC_EA(env, src, OS_LONG, 0, NULL);
     }
     if (add) {
         tcg_gen_add_i32(dest, tmp, src);
@@ -990,7 +1001,7 @@
     }
     gen_update_cc_add(dest, src);
     if (insn & 0x100) {
-        DEST_EA(insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, OS_LONG, dest, &addr);
     } else {
         tcg_gen_mov_i32(reg, dest);
     }
@@ -1020,7 +1031,7 @@
     else
         opsize = OS_LONG;
     op = (insn >> 6) & 3;
-    SRC_EA(src1, opsize, 0, op ? &addr: NULL);
+    SRC_EA(env, src1, opsize, 0, op ? &addr: NULL);
     src2 = DREG(insn, 9);
     dest = tcg_temp_new();
 
@@ -1055,7 +1066,7 @@
         break;
     }
     if (op)
-        DEST_EA(insn, opsize, dest, &addr);
+        DEST_EA(env, insn, opsize, dest, &addr);
 }
 
 DISAS_INSN(sats)
@@ -1086,9 +1097,9 @@
     TCGv tmp;
     int is_load;
 
-    mask = lduw_code(s->pc);
+    mask = cpu_lduw_code(env, s->pc);
     s->pc += 2;
-    tmp = gen_lea(s, insn, OS_LONG);
+    tmp = gen_lea(env, s, insn, OS_LONG);
     if (IS_NULL_QREG(tmp)) {
         gen_addr_fault(s);
         return;
@@ -1130,14 +1141,14 @@
         opsize = OS_LONG;
     op = (insn >> 6) & 3;
 
-    bitnum = lduw_code(s->pc);
+    bitnum = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     if (bitnum & 0xff00) {
-        disas_undef(s, insn);
+        disas_undef(env, s, insn);
         return;
     }
 
-    SRC_EA(src1, opsize, 0, op ? &addr: NULL);
+    SRC_EA(env, src1, opsize, 0, op ? &addr: NULL);
 
     gen_flush_flags(s);
     if (opsize == OS_BYTE)
@@ -1172,7 +1183,7 @@
         default: /* btst */
             break;
         }
-        DEST_EA(insn, opsize, tmp, &addr);
+        DEST_EA(env, insn, opsize, tmp, &addr);
     }
 }
 
@@ -1185,8 +1196,8 @@
     TCGv addr;
 
     op = (insn >> 9) & 7;
-    SRC_EA(src1, OS_LONG, 0, (op == 6) ? NULL : &addr);
-    im = read_im32(s);
+    SRC_EA(env, src1, OS_LONG, 0, (op == 6) ? NULL : &addr);
+    im = read_im32(env, s);
     dest = tcg_temp_new();
     switch (op) {
     case 0: /* ori */
@@ -1225,7 +1236,7 @@
         abort();
     }
     if (op != 6) {
-        DEST_EA(insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, OS_LONG, dest, &addr);
     }
 }
 
@@ -1257,7 +1268,7 @@
     default:
         abort();
     }
-    SRC_EA(src, opsize, 1, NULL);
+    SRC_EA(env, src, opsize, 1, NULL);
     op = (insn >> 6) & 7;
     if (op == 1) {
         /* movea */
@@ -1268,7 +1279,7 @@
         /* normal move */
         uint16_t dest_ea;
         dest_ea = ((insn >> 9) & 7) | (op << 3);
-        DEST_EA(dest_ea, opsize, src, NULL);
+        DEST_EA(env, dest_ea, opsize, src, NULL);
         /* This will be correct because loads sign extend.  */
         gen_logic_cc(s, src);
     }
@@ -1289,7 +1300,7 @@
     TCGv tmp;
 
     reg = AREG(insn, 9);
-    tmp = gen_lea(s, insn, OS_LONG);
+    tmp = gen_lea(env, s, insn, OS_LONG);
     if (IS_NULL_QREG(tmp)) {
         gen_addr_fault(s);
         return;
@@ -1314,7 +1325,7 @@
     default:
         abort();
     }
-    DEST_EA(insn, opsize, tcg_const_i32(0), NULL);
+    DEST_EA(env, insn, opsize, tcg_const_i32(0), NULL);
     gen_logic_cc(s, tcg_const_i32(0));
 }
 
@@ -1363,7 +1374,8 @@
     }
 }
 
-static void gen_set_sr(DisasContext *s, uint16_t insn, int ccr_only)
+static void gen_set_sr(CPUM68KState *env, DisasContext *s, uint16_t insn,
+                       int ccr_only)
 {
     TCGv tmp;
     TCGv reg;
@@ -1383,17 +1395,17 @@
     else if ((insn & 0x3f) == 0x3c)
       {
         uint16_t val;
-        val = lduw_code(s->pc);
+        val = cpu_lduw_code(env, s->pc);
         s->pc += 2;
         gen_set_sr_im(s, val, ccr_only);
       }
     else
-        disas_undef(s, insn);
+        disas_undef(env, s, insn);
 }
 
 DISAS_INSN(move_to_ccr)
 {
-    gen_set_sr(s, insn, 1);
+    gen_set_sr(env, s, insn, 1);
 }
 
 DISAS_INSN(not)
@@ -1424,7 +1436,7 @@
 {
     TCGv tmp;
 
-    tmp = gen_lea(s, insn, OS_LONG);
+    tmp = gen_lea(env, s, insn, OS_LONG);
     if (IS_NULL_QREG(tmp)) {
         gen_addr_fault(s);
         return;
@@ -1470,7 +1482,7 @@
     default:
         abort();
     }
-    SRC_EA(tmp, opsize, 1, NULL);
+    SRC_EA(env, tmp, opsize, 1, NULL);
     gen_logic_cc(s, tmp);
 }
 
@@ -1492,10 +1504,10 @@
     TCGv addr;
 
     dest = tcg_temp_new();
-    SRC_EA(src1, OS_BYTE, 1, &addr);
+    SRC_EA(env, src1, OS_BYTE, 1, &addr);
     gen_logic_cc(s, src1);
     tcg_gen_ori_i32(dest, src1, 0x80);
-    DEST_EA(insn, OS_BYTE, dest, &addr);
+    DEST_EA(env, insn, OS_BYTE, dest, &addr);
 }
 
 DISAS_INSN(mull)
@@ -1507,14 +1519,14 @@
 
     /* The upper 32 bits of the product are discarded, so
        muls.l and mulu.l are functionally equivalent.  */
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     if (ext & 0x87ff) {
         gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
         return;
     }
     reg = DREG(ext, 12);
-    SRC_EA(src1, OS_LONG, 0, NULL);
+    SRC_EA(env, src1, OS_LONG, 0, NULL);
     dest = tcg_temp_new();
     tcg_gen_mul_i32(dest, src1, reg);
     tcg_gen_mov_i32(reg, dest);
@@ -1528,7 +1540,7 @@
     TCGv reg;
     TCGv tmp;
 
-    offset = ldsw_code(s->pc);
+    offset = cpu_ldsw_code(env, s->pc);
     s->pc += 2;
     reg = AREG(insn, 0);
     tmp = tcg_temp_new();
@@ -1572,7 +1584,7 @@
 
     /* Load the target address first to ensure correct exception
        behavior.  */
-    tmp = gen_lea(s, insn, OS_LONG);
+    tmp = gen_lea(env, s, insn, OS_LONG);
     if (IS_NULL_QREG(tmp)) {
         gen_addr_fault(s);
         return;
@@ -1592,7 +1604,7 @@
     int val;
     TCGv addr;
 
-    SRC_EA(src1, OS_LONG, 0, &addr);
+    SRC_EA(env, src1, OS_LONG, 0, &addr);
     val = (insn >> 9) & 7;
     if (val == 0)
         val = 8;
@@ -1619,7 +1631,7 @@
         }
         gen_update_cc_add(dest, src2);
     }
-    DEST_EA(insn, OS_LONG, dest, &addr);
+    DEST_EA(env, insn, OS_LONG, dest, &addr);
 }
 
 DISAS_INSN(tpf)
@@ -1634,7 +1646,7 @@
     case 4: /* No extension words.  */
         break;
     default:
-        disas_undef(s, insn);
+        disas_undef(env, s, insn);
     }
 }
 
@@ -1649,10 +1661,10 @@
     op = (insn >> 8) & 0xf;
     offset = (int8_t)insn;
     if (offset == 0) {
-        offset = ldsw_code(s->pc);
+        offset = cpu_ldsw_code(env, s->pc);
         s->pc += 2;
     } else if (offset == -1) {
-        offset = read_im32(s);
+        offset = read_im32(env, s);
     }
     if (op == 1) {
         /* bsr */
@@ -1691,7 +1703,7 @@
         opsize = OS_WORD;
     else
         opsize = OS_BYTE;
-    SRC_EA(src, opsize, (insn & 0x80) == 0, NULL);
+    SRC_EA(env, src, opsize, (insn & 0x80) == 0, NULL);
     reg = DREG(insn, 9);
     tcg_gen_mov_i32(reg, src);
     gen_logic_cc(s, src);
@@ -1707,11 +1719,11 @@
     reg = DREG(insn, 9);
     dest = tcg_temp_new();
     if (insn & 0x100) {
-        SRC_EA(src, OS_LONG, 0, &addr);
+        SRC_EA(env, src, OS_LONG, 0, &addr);
         tcg_gen_or_i32(dest, src, reg);
-        DEST_EA(insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, OS_LONG, dest, &addr);
     } else {
-        SRC_EA(src, OS_LONG, 0, NULL);
+        SRC_EA(env, src, OS_LONG, 0, NULL);
         tcg_gen_or_i32(dest, src, reg);
         tcg_gen_mov_i32(reg, dest);
     }
@@ -1723,7 +1735,7 @@
     TCGv src;
     TCGv reg;
 
-    SRC_EA(src, OS_LONG, 0, NULL);
+    SRC_EA(env, src, OS_LONG, 0, NULL);
     reg = AREG(insn, 9);
     tcg_gen_sub_i32(reg, reg, src);
 }
@@ -1749,7 +1761,7 @@
         val = -1;
     src = tcg_const_i32(val);
     gen_logic_cc(s, src);
-    DEST_EA(insn, OS_LONG, src, NULL);
+    DEST_EA(env, insn, OS_LONG, src, NULL);
 }
 
 DISAS_INSN(cmp)
@@ -1777,7 +1789,7 @@
     default:
         abort();
     }
-    SRC_EA(src, opsize, 1, NULL);
+    SRC_EA(env, src, opsize, 1, NULL);
     reg = DREG(insn, 9);
     dest = tcg_temp_new();
     tcg_gen_sub_i32(dest, reg, src);
@@ -1796,7 +1808,7 @@
     } else {
         opsize = OS_WORD;
     }
-    SRC_EA(src, opsize, 1, NULL);
+    SRC_EA(env, src, opsize, 1, NULL);
     reg = AREG(insn, 9);
     dest = tcg_temp_new();
     tcg_gen_sub_i32(dest, reg, src);
@@ -1811,12 +1823,12 @@
     TCGv dest;
     TCGv addr;
 
-    SRC_EA(src, OS_LONG, 0, &addr);
+    SRC_EA(env, src, OS_LONG, 0, &addr);
     reg = DREG(insn, 9);
     dest = tcg_temp_new();
     tcg_gen_xor_i32(dest, src, reg);
     gen_logic_cc(s, dest);
-    DEST_EA(insn, OS_LONG, dest, &addr);
+    DEST_EA(env, insn, OS_LONG, dest, &addr);
 }
 
 DISAS_INSN(and)
@@ -1829,11 +1841,11 @@
     reg = DREG(insn, 9);
     dest = tcg_temp_new();
     if (insn & 0x100) {
-        SRC_EA(src, OS_LONG, 0, &addr);
+        SRC_EA(env, src, OS_LONG, 0, &addr);
         tcg_gen_and_i32(dest, src, reg);
-        DEST_EA(insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, OS_LONG, dest, &addr);
     } else {
-        SRC_EA(src, OS_LONG, 0, NULL);
+        SRC_EA(env, src, OS_LONG, 0, NULL);
         tcg_gen_and_i32(dest, src, reg);
         tcg_gen_mov_i32(reg, dest);
     }
@@ -1845,7 +1857,7 @@
     TCGv src;
     TCGv reg;
 
-    SRC_EA(src, OS_LONG, 0, NULL);
+    SRC_EA(env, src, OS_LONG, 0, NULL);
     reg = AREG(insn, 9);
     tcg_gen_add_i32(reg, reg, src);
 }
@@ -1934,13 +1946,13 @@
     uint32_t addr;
 
     addr = s->pc - 2;
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     if (ext != 0x46FC) {
         gen_exception(s, addr, EXCP_UNSUPPORTED);
         return;
     }
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     if (IS_USER(s) || (ext & SR_S) == 0) {
         gen_exception(s, addr, EXCP_PRIVILEGE);
@@ -1970,7 +1982,7 @@
         gen_exception(s, s->pc - 2, EXCP_PRIVILEGE);
         return;
     }
-    gen_set_sr(s, insn, 0);
+    gen_set_sr(env, s, insn, 0);
     gen_lookup_tb(s);
 }
 
@@ -2008,7 +2020,7 @@
         return;
     }
 
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
     gen_set_sr_im(s, ext, 0);
@@ -2035,7 +2047,7 @@
         return;
     }
 
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
     if (ext & 0x8000) {
@@ -2100,7 +2112,7 @@
     int set_dest;
     int opsize;
 
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     opmode = ext & 0x7f;
     switch ((ext >> 13) & 7) {
@@ -2136,7 +2148,7 @@
                 tcg_gen_addi_i32(tmp32, tmp32, -8);
                 break;
             case 5:
-                offset = ldsw_code(s->pc);
+                offset = cpu_ldsw_code(env, s->pc);
                 s->pc += 2;
                 tcg_gen_addi_i32(tmp32, tmp32, offset);
                 break;
@@ -2162,7 +2174,7 @@
         default:
             goto undef;
         }
-        DEST_EA(insn, opsize, tmp32, NULL);
+        DEST_EA(env, insn, opsize, tmp32, NULL);
         tcg_temp_free_i32(tmp32);
         return;
     case 4: /* fmove to control register.  */
@@ -2190,7 +2202,7 @@
                       (ext >> 10) & 7);
             goto undef;
         }
-        DEST_EA(insn, OS_LONG, tmp32, NULL);
+        DEST_EA(env, insn, OS_LONG, tmp32, NULL);
         break;
     case 6: /* fmovem */
     case 7:
@@ -2200,7 +2212,7 @@
             int i;
             if ((ext & 0x1f00) != 0x1000 || (ext & 0xff) == 0)
                 goto undef;
-            tmp32 = gen_lea(s, insn, OS_LONG);
+            tmp32 = gen_lea(env, s, insn, OS_LONG);
             if (IS_NULL_QREG(tmp32)) {
                 gen_addr_fault(s);
                 return;
@@ -2250,12 +2262,12 @@
                 tcg_gen_addi_i32(tmp32, tmp32, -8);
                 break;
             case 5:
-                offset = ldsw_code(s->pc);
+                offset = cpu_ldsw_code(env, s->pc);
                 s->pc += 2;
                 tcg_gen_addi_i32(tmp32, tmp32, offset);
                 break;
             case 7:
-                offset = ldsw_code(s->pc);
+                offset = cpu_ldsw_code(env, s->pc);
                 offset += s->pc - 2;
                 s->pc += 2;
                 tcg_gen_addi_i32(tmp32, tmp32, offset);
@@ -2275,7 +2287,7 @@
             }
             tcg_temp_free_i32(tmp32);
         } else {
-            SRC_EA(tmp32, opsize, 1, NULL);
+            SRC_EA(env, tmp32, opsize, 1, NULL);
             src = tcg_temp_new_i64();
             switch (opsize) {
             case OS_LONG:
@@ -2370,7 +2382,7 @@
 undef:
     /* FIXME: Is this right for offset addressing modes?  */
     s->pc -= 2;
-    disas_undef_fpu(s, insn);
+    disas_undef_fpu(env, s, insn);
 }
 
 DISAS_INSN(fbcc)
@@ -2381,10 +2393,10 @@
     int l1;
 
     addr = s->pc;
-    offset = ldsw_code(s->pc);
+    offset = cpu_ldsw_code(env, s->pc);
     s->pc += 2;
     if (insn & (1 << 6)) {
-        offset = (offset << 16) | lduw_code(s->pc);
+        offset = (offset << 16) | cpu_lduw_code(env, s->pc);
         s->pc += 2;
     }
 
@@ -2506,18 +2518,18 @@
         s->done_mac = 1;
     }
 
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
     acc = ((insn >> 7) & 1) | ((ext >> 3) & 2);
     dual = ((insn & 0x30) != 0 && (ext & 3) != 0);
     if (dual && !m68k_feature(s->env, M68K_FEATURE_CF_EMAC_B)) {
-        disas_undef(s, insn);
+        disas_undef(env, s, insn);
         return;
     }
     if (insn & 0x30) {
         /* MAC with load.  */
-        tmp = gen_lea(s, insn, OS_LONG);
+        tmp = gen_lea(env, s, insn, OS_LONG);
         addr = tcg_temp_new();
         tcg_gen_and_i32(addr, tmp, QREG_MAC_MASK);
         /* Load the value now to ensure correct exception behavior.
@@ -2731,7 +2743,7 @@
     int accnum;
     accnum = (insn >> 9) & 3;
     acc = MACREG(accnum);
-    SRC_EA(val, OS_LONG, 0, NULL);
+    SRC_EA(env, val, OS_LONG, 0, NULL);
     if (s->env->macsr & MACSR_FI) {
         tcg_gen_ext_i32_i64(acc, val);
         tcg_gen_shli_i64(acc, acc, 8);
@@ -2748,7 +2760,7 @@
 DISAS_INSN(to_macsr)
 {
     TCGv val;
-    SRC_EA(val, OS_LONG, 0, NULL);
+    SRC_EA(env, val, OS_LONG, 0, NULL);
     gen_helper_set_macsr(cpu_env, val);
     gen_lookup_tb(s);
 }
@@ -2756,7 +2768,7 @@
 DISAS_INSN(to_mask)
 {
     TCGv val;
-    SRC_EA(val, OS_LONG, 0, NULL);
+    SRC_EA(env, val, OS_LONG, 0, NULL);
     tcg_gen_ori_i32(QREG_MAC_MASK, val, 0xffff0000);
 }
 
@@ -2764,7 +2776,7 @@
 {
     TCGv val;
     TCGv acc;
-    SRC_EA(val, OS_LONG, 0, NULL);
+    SRC_EA(env, val, OS_LONG, 0, NULL);
     acc = tcg_const_i32((insn & 0x400) ? 2 : 0);
     if (s->env->macsr & MACSR_FI)
         gen_helper_set_mac_extf(cpu_env, val, acc);
@@ -2941,10 +2953,14 @@
 {
     uint16_t insn;
 
-    insn = lduw_code(s->pc);
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
+        tcg_gen_debug_insn_start(s->pc);
+    }
+
+    insn = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
-    opcode_table[insn](s, insn);
+    opcode_table[insn](env, s, insn);
 }
 
 /* generate intermediate code for basic block 'tb'.  */
@@ -3028,7 +3044,7 @@
             gen_flush_cc_op(dc);
             tcg_gen_movi_i32(QREG_PC, dc->pc);
         }
-        gen_helper_raise_exception(tcg_const_i32(EXCP_DEBUG));
+        gen_helper_raise_exception(cpu_env, tcg_const_i32(EXCP_DEBUG));
     } else {
         switch(dc->is_jmp) {
         case DISAS_NEXT:
diff --git a/target-microblaze/Makefile.objs b/target-microblaze/Makefile.objs
index 4b09e8c..afb87bc 100644
--- a/target-microblaze/Makefile.objs
+++ b/target-microblaze/Makefile.objs
@@ -1,4 +1,2 @@
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += mmu.o machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-microblaze/helper.h b/target-microblaze/helper.h
index 9dcfb0f..a1a732c 100644
--- a/target-microblaze/helper.h
+++ b/target-microblaze/helper.h
@@ -1,39 +1,39 @@
 #include "def-helper.h"
 
-DEF_HELPER_1(raise_exception, void, i32)
-DEF_HELPER_0(debug, void)
+DEF_HELPER_2(raise_exception, void, env, i32)
+DEF_HELPER_1(debug, void, env)
 DEF_HELPER_FLAGS_3(carry, TCG_CALL_PURE | TCG_CALL_CONST, i32, i32, i32, i32)
 DEF_HELPER_2(cmp, i32, i32, i32)
 DEF_HELPER_2(cmpu, i32, i32, i32)
 DEF_HELPER_FLAGS_1(clz, TCG_CALL_PURE | TCG_CALL_CONST, i32, i32)
 
-DEF_HELPER_2(divs, i32, i32, i32)
-DEF_HELPER_2(divu, i32, i32, i32)
+DEF_HELPER_3(divs, i32, env, i32, i32)
+DEF_HELPER_3(divu, i32, env, i32, i32)
 
-DEF_HELPER_2(fadd, i32, i32, i32)
-DEF_HELPER_2(frsub, i32, i32, i32)
-DEF_HELPER_2(fmul, i32, i32, i32)
-DEF_HELPER_2(fdiv, i32, i32, i32)
-DEF_HELPER_1(flt, i32, i32)
-DEF_HELPER_1(fint, i32, i32)
-DEF_HELPER_1(fsqrt, i32, i32)
+DEF_HELPER_3(fadd, i32, env, i32, i32)
+DEF_HELPER_3(frsub, i32, env, i32, i32)
+DEF_HELPER_3(fmul, i32, env, i32, i32)
+DEF_HELPER_3(fdiv, i32, env, i32, i32)
+DEF_HELPER_2(flt, i32, env, i32)
+DEF_HELPER_2(fint, i32, env, i32)
+DEF_HELPER_2(fsqrt, i32, env, i32)
 
-DEF_HELPER_2(fcmp_un, i32, i32, i32)
-DEF_HELPER_2(fcmp_lt, i32, i32, i32)
-DEF_HELPER_2(fcmp_eq, i32, i32, i32)
-DEF_HELPER_2(fcmp_le, i32, i32, i32)
-DEF_HELPER_2(fcmp_gt, i32, i32, i32)
-DEF_HELPER_2(fcmp_ne, i32, i32, i32)
-DEF_HELPER_2(fcmp_ge, i32, i32, i32)
+DEF_HELPER_3(fcmp_un, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_lt, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_eq, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_le, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_gt, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_ne, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_ge, i32, env, i32, i32)
 
 DEF_HELPER_FLAGS_2(pcmpbf, TCG_CALL_PURE | TCG_CALL_CONST, i32, i32, i32)
 #if !defined(CONFIG_USER_ONLY)
-DEF_HELPER_1(mmu_read, i32, i32)
-DEF_HELPER_2(mmu_write, void, i32, i32)
+DEF_HELPER_2(mmu_read, i32, env, i32)
+DEF_HELPER_3(mmu_write, void, env, i32, i32)
 #endif
 
-DEF_HELPER_4(memalign, void, i32, i32, i32, i32)
-DEF_HELPER_1(stackprot, void, i32)
+DEF_HELPER_5(memalign, void, env, i32, i32, i32, i32)
+DEF_HELPER_2(stackprot, void, env, i32)
 
 DEF_HELPER_2(get, i32, i32, i32)
 DEF_HELPER_3(put, void, i32, i32, i32)
diff --git a/target-microblaze/op_helper.c b/target-microblaze/op_helper.c
index 3b1f072..c9789f4 100644
--- a/target-microblaze/op_helper.c
+++ b/target-microblaze/op_helper.c
@@ -20,7 +20,6 @@
 
 #include <assert.h>
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helper.h"
 #include "host-utils.h"
 
@@ -42,17 +41,12 @@
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill(CPUMBState *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUMBState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUMBState *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
-
     ret = cpu_mb_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
@@ -66,7 +60,6 @@
         }
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 #endif
 
@@ -105,13 +98,13 @@
     return 0xdead0000 | id;
 }
 
-void helper_raise_exception(uint32_t index)
+void helper_raise_exception(CPUMBState *env, uint32_t index)
 {
     env->exception_index = index;
     cpu_loop_exit(env);
 }
 
-void helper_debug(void)
+void helper_debug(CPUMBState *env)
 {
     int i;
 
@@ -176,7 +169,7 @@
     return ncf;
 }
 
-static inline int div_prepare(uint32_t a, uint32_t b)
+static inline int div_prepare(CPUMBState *env, uint32_t a, uint32_t b)
 {
     if (b == 0) {
         env->sregs[SR_MSR] |= MSR_DZ;
@@ -184,7 +177,7 @@
         if ((env->sregs[SR_MSR] & MSR_EE)
             && !(env->pvr.regs[2] & PVR2_DIV_ZERO_EXC_MASK)) {
             env->sregs[SR_ESR] = ESR_EC_DIVZERO;
-            helper_raise_exception(EXCP_HW_EXCP);
+            helper_raise_exception(env, EXCP_HW_EXCP);
         }
         return 0;
     }
@@ -192,28 +185,30 @@
     return 1;
 }
 
-uint32_t helper_divs(uint32_t a, uint32_t b)
+uint32_t helper_divs(CPUMBState *env, uint32_t a, uint32_t b)
 {
-    if (!div_prepare(a, b))
+    if (!div_prepare(env, a, b)) {
         return 0;
+    }
     return (int32_t)a / (int32_t)b;
 }
 
-uint32_t helper_divu(uint32_t a, uint32_t b)
+uint32_t helper_divu(CPUMBState *env, uint32_t a, uint32_t b)
 {
-    if (!div_prepare(a, b))
+    if (!div_prepare(env, a, b)) {
         return 0;
+    }
     return a / b;
 }
 
 /* raise FPU exception.  */
-static void raise_fpu_exception(void)
+static void raise_fpu_exception(CPUMBState *env)
 {
     env->sregs[SR_ESR] = ESR_EC_FPU;
-    helper_raise_exception(EXCP_HW_EXCP);
+    helper_raise_exception(env, EXCP_HW_EXCP);
 }
 
-static void update_fpu_flags(int flags)
+static void update_fpu_flags(CPUMBState *env, int flags)
 {
     int raise = 0;
 
@@ -236,11 +231,11 @@
     if (raise
         && (env->pvr.regs[2] & PVR2_FPU_EXC_MASK)
         && (env->sregs[SR_MSR] & MSR_EE)) {
-        raise_fpu_exception();
+        raise_fpu_exception(env);
     }
 }
 
-uint32_t helper_fadd(uint32_t a, uint32_t b)
+uint32_t helper_fadd(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fd, fa, fb;
     int flags;
@@ -251,11 +246,11 @@
     fd.f = float32_add(fa.f, fb.f, &env->fp_status);
 
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
     return fd.l;
 }
 
-uint32_t helper_frsub(uint32_t a, uint32_t b)
+uint32_t helper_frsub(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fd, fa, fb;
     int flags;
@@ -265,11 +260,11 @@
     fb.l = b;
     fd.f = float32_sub(fb.f, fa.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
     return fd.l;
 }
 
-uint32_t helper_fmul(uint32_t a, uint32_t b)
+uint32_t helper_fmul(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fd, fa, fb;
     int flags;
@@ -279,12 +274,12 @@
     fb.l = b;
     fd.f = float32_mul(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
 
     return fd.l;
 }
 
-uint32_t helper_fdiv(uint32_t a, uint32_t b)
+uint32_t helper_fdiv(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fd, fa, fb;
     int flags;
@@ -294,12 +289,12 @@
     fb.l = b;
     fd.f = float32_div(fb.f, fa.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
 
     return fd.l;
 }
 
-uint32_t helper_fcmp_un(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_un(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     uint32_t r = 0;
@@ -308,7 +303,7 @@
     fb.l = b;
 
     if (float32_is_signaling_nan(fa.f) || float32_is_signaling_nan(fb.f)) {
-        update_fpu_flags(float_flag_invalid);
+        update_fpu_flags(env, float_flag_invalid);
         r = 1;
     }
 
@@ -319,7 +314,7 @@
     return r;
 }
 
-uint32_t helper_fcmp_lt(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_lt(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int r;
@@ -330,12 +325,12 @@
     fb.l = b;
     r = float32_lt(fb.f, fa.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
 
     return r;
 }
 
-uint32_t helper_fcmp_eq(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_eq(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int flags;
@@ -346,12 +341,12 @@
     fb.l = b;
     r = float32_eq_quiet(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
 
     return r;
 }
 
-uint32_t helper_fcmp_le(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_le(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int flags;
@@ -362,13 +357,13 @@
     set_float_exception_flags(0, &env->fp_status);
     r = float32_le(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
 
 
     return r;
 }
 
-uint32_t helper_fcmp_gt(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_gt(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int flags, r;
@@ -378,11 +373,11 @@
     set_float_exception_flags(0, &env->fp_status);
     r = float32_lt(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
     return r;
 }
 
-uint32_t helper_fcmp_ne(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_ne(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int flags, r;
@@ -392,12 +387,12 @@
     set_float_exception_flags(0, &env->fp_status);
     r = !float32_eq_quiet(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
 
     return r;
 }
 
-uint32_t helper_fcmp_ge(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_ge(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int flags, r;
@@ -407,12 +402,12 @@
     set_float_exception_flags(0, &env->fp_status);
     r = !float32_lt(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
 
     return r;
 }
 
-uint32_t helper_flt(uint32_t a)
+uint32_t helper_flt(CPUMBState *env, uint32_t a)
 {
     CPU_FloatU fd, fa;
 
@@ -421,7 +416,7 @@
     return fd.l;
 }
 
-uint32_t helper_fint(uint32_t a)
+uint32_t helper_fint(CPUMBState *env, uint32_t a)
 {
     CPU_FloatU fa;
     uint32_t r;
@@ -431,12 +426,12 @@
     fa.l = a;
     r = float32_to_int32(fa.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
 
     return r;
 }
 
-uint32_t helper_fsqrt(uint32_t a)
+uint32_t helper_fsqrt(CPUMBState *env, uint32_t a)
 {
     CPU_FloatU fd, fa;
     int flags;
@@ -445,7 +440,7 @@
     fa.l = a;
     fd.l = float32_sqrt(fa.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
 
     return fd.l;
 }
@@ -463,7 +458,8 @@
     return 0;
 }
 
-void helper_memalign(uint32_t addr, uint32_t dr, uint32_t wr, uint32_t mask)
+void helper_memalign(CPUMBState *env, uint32_t addr, uint32_t dr, uint32_t wr,
+                     uint32_t mask)
 {
     if (addr & mask) {
             qemu_log_mask(CPU_LOG_INT,
@@ -478,45 +474,39 @@
             if (!(env->sregs[SR_MSR] & MSR_EE)) {
                 return;
             }
-            helper_raise_exception(EXCP_HW_EXCP);
+            helper_raise_exception(env, EXCP_HW_EXCP);
     }
 }
 
-void helper_stackprot(uint32_t addr)
+void helper_stackprot(CPUMBState *env, uint32_t addr)
 {
     if (addr < env->slr || addr > env->shr) {
             qemu_log("Stack protector violation at %x %x %x\n",
                      addr, env->slr, env->shr);
             env->sregs[SR_EAR] = addr;
             env->sregs[SR_ESR] = ESR_EC_STACKPROT;
-            helper_raise_exception(EXCP_HW_EXCP);
+            helper_raise_exception(env, EXCP_HW_EXCP);
     }
 }
 
 #if !defined(CONFIG_USER_ONLY)
 /* Writes/reads to the MMU's special regs end up here.  */
-uint32_t helper_mmu_read(uint32_t rn)
+uint32_t helper_mmu_read(CPUMBState *env, uint32_t rn)
 {
     return mmu_read(env, rn);
 }
 
-void helper_mmu_write(uint32_t rn, uint32_t v)
+void helper_mmu_write(CPUMBState *env, uint32_t rn, uint32_t v)
 {
     mmu_write(env, rn, v);
 }
 
-void cpu_unassigned_access(CPUMBState *env1, target_phys_addr_t addr,
+void cpu_unassigned_access(CPUMBState *env, target_phys_addr_t addr,
                            int is_write, int is_exec, int is_asi, int size)
 {
-    CPUMBState *saved_env;
-
-    saved_env = env;
-    env = env1;
-
     qemu_log_mask(CPU_LOG_INT, "Unassigned " TARGET_FMT_plx " wr=%d exe=%d\n",
              addr, is_write, is_exec);
     if (!(env->sregs[SR_MSR] & MSR_EE)) {
-        env = saved_env;
         return;
     }
 
@@ -524,14 +514,13 @@
     if (is_exec) {
         if ((env->pvr.regs[2] & PVR2_IOPB_BUS_EXC_MASK)) {
             env->sregs[SR_ESR] = ESR_EC_INSN_BUS;
-            helper_raise_exception(EXCP_HW_EXCP);
+            helper_raise_exception(env, EXCP_HW_EXCP);
         }
     } else {
         if ((env->pvr.regs[2] & PVR2_DOPB_BUS_EXC_MASK)) {
             env->sregs[SR_ESR] = ESR_EC_DATA_BUS;
-            helper_raise_exception(EXCP_HW_EXCP);
+            helper_raise_exception(env, EXCP_HW_EXCP);
         }
     }
-    env = saved_env;
 }
 #endif
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 7470149..7d864b1 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -126,7 +126,7 @@
 
     t_sync_flags(dc);
     tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
-    gen_helper_raise_exception(tmp);
+    gen_helper_raise_exception(cpu_env, tmp);
     tcg_temp_free_i32(tmp);
     dc->is_jmp = DISAS_UPDATE;
 }
@@ -503,9 +503,9 @@
         sr &= 7;
         LOG_DIS("m%ss sr%d r%d imm=%x\n", to ? "t" : "f", sr, dc->ra, dc->imm);
         if (to)
-            gen_helper_mmu_write(tcg_const_tl(sr), cpu_R[dc->ra]);
+            gen_helper_mmu_write(cpu_env, tcg_const_tl(sr), cpu_R[dc->ra]);
         else
-            gen_helper_mmu_read(cpu_R[dc->rd], tcg_const_tl(sr));
+            gen_helper_mmu_read(cpu_R[dc->rd], cpu_env, tcg_const_tl(sr));
         return;
     }
 #endif
@@ -704,9 +704,11 @@
     }
 
     if (u)
-        gen_helper_divu(cpu_R[dc->rd], *(dec_alu_op_b(dc)), cpu_R[dc->ra]);
+        gen_helper_divu(cpu_R[dc->rd], cpu_env, *(dec_alu_op_b(dc)),
+                        cpu_R[dc->ra]);
     else
-        gen_helper_divs(cpu_R[dc->rd], *(dec_alu_op_b(dc)), cpu_R[dc->ra]);
+        gen_helper_divs(cpu_R[dc->rd], cpu_env, *(dec_alu_op_b(dc)),
+                        cpu_R[dc->ra]);
     if (!dc->rd)
         tcg_gen_movi_tl(cpu_R[dc->rd], 0);
 }
@@ -912,7 +914,7 @@
         tcg_gen_add_tl(*t, cpu_R[dc->ra], cpu_R[dc->rb]);
 
         if (stackprot) {
-            gen_helper_stackprot(*t);
+            gen_helper_stackprot(cpu_env, *t);
         }
         return t;
     }
@@ -930,7 +932,7 @@
     }
 
     if (stackprot) {
-        gen_helper_stackprot(*t);
+        gen_helper_stackprot(cpu_env, *t);
     }
     return t;
 }
@@ -1056,7 +1058,7 @@
         gen_load(dc, v, *addr, size);
 
         tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
-        gen_helper_memalign(*addr, tcg_const_tl(dc->rd),
+        gen_helper_memalign(cpu_env, *addr, tcg_const_tl(dc->rd),
                             tcg_const_tl(0), tcg_const_tl(size - 1));
         if (dc->rd) {
             if (rev) {
@@ -1218,7 +1220,7 @@
          *        the alignment checks in between the probe and the mem
          *        access.
          */
-        gen_helper_memalign(*addr, tcg_const_tl(dc->rd),
+        gen_helper_memalign(cpu_env, *addr, tcg_const_tl(dc->rd),
                             tcg_const_tl(1), tcg_const_tl(size - 1));
     }
 
@@ -1493,49 +1495,53 @@
 
     switch (fpu_insn) {
         case 0:
-            gen_helper_fadd(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            gen_helper_fadd(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
+                            cpu_R[dc->rb]);
             break;
 
         case 1:
-            gen_helper_frsub(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            gen_helper_frsub(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
+                             cpu_R[dc->rb]);
             break;
 
         case 2:
-            gen_helper_fmul(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            gen_helper_fmul(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
+                            cpu_R[dc->rb]);
             break;
 
         case 3:
-            gen_helper_fdiv(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            gen_helper_fdiv(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
+                            cpu_R[dc->rb]);
             break;
 
         case 4:
             switch ((dc->ir >> 4) & 7) {
                 case 0:
-                    gen_helper_fcmp_un(cpu_R[dc->rd],
+                    gen_helper_fcmp_un(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 1:
-                    gen_helper_fcmp_lt(cpu_R[dc->rd],
+                    gen_helper_fcmp_lt(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 2:
-                    gen_helper_fcmp_eq(cpu_R[dc->rd],
+                    gen_helper_fcmp_eq(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 3:
-                    gen_helper_fcmp_le(cpu_R[dc->rd],
+                    gen_helper_fcmp_le(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 4:
-                    gen_helper_fcmp_gt(cpu_R[dc->rd],
+                    gen_helper_fcmp_gt(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 5:
-                    gen_helper_fcmp_ne(cpu_R[dc->rd],
+                    gen_helper_fcmp_ne(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 6:
-                    gen_helper_fcmp_ge(cpu_R[dc->rd],
+                    gen_helper_fcmp_ge(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 default:
@@ -1552,21 +1558,21 @@
             if (!dec_check_fpuv2(dc)) {
                 return;
             }
-            gen_helper_flt(cpu_R[dc->rd], cpu_R[dc->ra]);
+            gen_helper_flt(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]);
             break;
 
         case 6:
             if (!dec_check_fpuv2(dc)) {
                 return;
             }
-            gen_helper_fint(cpu_R[dc->rd], cpu_R[dc->ra]);
+            gen_helper_fint(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]);
             break;
 
         case 7:
             if (!dec_check_fpuv2(dc)) {
                 return;
             }
-            gen_helper_fsqrt(cpu_R[dc->rd], cpu_R[dc->ra]);
+            gen_helper_fsqrt(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]);
             break;
 
         default:
@@ -1654,15 +1660,15 @@
     {{0, 0}, dec_null}
 };
 
-static inline void decode(DisasContext *dc)
+static inline void decode(DisasContext *dc, uint32_t ir)
 {
-    uint32_t ir;
     int i;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(dc->pc);
+    }
 
-    dc->ir = ir = ldl_code(dc->pc);
+    dc->ir = ir;
     LOG_DIS("%8.8x\t", dc->ir);
 
     if (dc->ir)
@@ -1796,7 +1802,7 @@
             gen_io_start();
 
         dc->clear_imm = 1;
-	decode(dc);
+        decode(dc, cpu_ldl_code(env, dc->pc));
         if (dc->clear_imm)
             dc->tb_flags &= ~IMM_FLAG;
         dc->pc += 4;
@@ -1871,7 +1877,7 @@
         if (dc->is_jmp != DISAS_JUMP) {
             tcg_gen_movi_tl(cpu_SR[SR_PC], npc);
         }
-        gen_helper_raise_exception(tmp);
+        gen_helper_raise_exception(cpu_env, tmp);
         tcg_temp_free_i32(tmp);
     } else {
         switch(dc->is_jmp) {
diff --git a/target-mips/Makefile.objs b/target-mips/Makefile.objs
index 2e0e093..3eeeeac 100644
--- a/target-mips/Makefile.objs
+++ b/target-mips/Makefile.objs
@@ -1,4 +1,2 @@
-obj-y += translate.o op_helper.o helper.o cpu.o
+obj-y += translate.o op_helper.o lmi_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 88d92f1..b7a5112b 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -38,10 +38,10 @@
     uint32_t nb_tlb;
     uint32_t tlb_in_use;
     int (*map_address) (struct CPUMIPSState *env, target_phys_addr_t *physical, int *prot, target_ulong address, int rw, int access_type);
-    void (*helper_tlbwi) (void);
-    void (*helper_tlbwr) (void);
-    void (*helper_tlbp) (void);
-    void (*helper_tlbr) (void);
+    void (*helper_tlbwi)(struct CPUMIPSState *env);
+    void (*helper_tlbwr)(struct CPUMIPSState *env);
+    void (*helper_tlbp)(struct CPUMIPSState *env);
+    void (*helper_tlbr)(struct CPUMIPSState *env);
     union {
         struct {
             r4k_tlb_t tlb[MIPS_TLB_MAX];
@@ -485,10 +485,10 @@
                            target_ulong address, int rw, int access_type);
 int r4k_map_address (CPUMIPSState *env, target_phys_addr_t *physical, int *prot,
                      target_ulong address, int rw, int access_type);
-void r4k_helper_tlbwi (void);
-void r4k_helper_tlbwr (void);
-void r4k_helper_tlbp (void);
-void r4k_helper_tlbr (void);
+void r4k_helper_tlbwi(CPUMIPSState *env);
+void r4k_helper_tlbwr(CPUMIPSState *env);
+void r4k_helper_tlbp(CPUMIPSState *env);
+void r4k_helper_tlbr(CPUMIPSState *env);
 
 void cpu_unassigned_access(CPUMIPSState *env, target_phys_addr_t addr,
                            int is_write, int is_exec, int unused, int size);
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 76fb451..f35ed78 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -1,25 +1,25 @@
 #include "def-helper.h"
 
-DEF_HELPER_2(raise_exception_err, noreturn, i32, int)
-DEF_HELPER_1(raise_exception, noreturn, i32)
+DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int)
+DEF_HELPER_2(raise_exception, noreturn, env, i32)
 
 #ifdef TARGET_MIPS64
-DEF_HELPER_3(ldl, tl, tl, tl, int)
-DEF_HELPER_3(ldr, tl, tl, tl, int)
-DEF_HELPER_3(sdl, void, tl, tl, int)
-DEF_HELPER_3(sdr, void, tl, tl, int)
+DEF_HELPER_4(ldl, tl, env, tl, tl, int)
+DEF_HELPER_4(ldr, tl, env, tl, tl, int)
+DEF_HELPER_4(sdl, void, env, tl, tl, int)
+DEF_HELPER_4(sdr, void, env, tl, tl, int)
 #endif
-DEF_HELPER_3(lwl, tl, tl, tl, int)
-DEF_HELPER_3(lwr, tl, tl, tl, int)
-DEF_HELPER_3(swl, void, tl, tl, int)
-DEF_HELPER_3(swr, void, tl, tl, int)
+DEF_HELPER_4(lwl, tl, env, tl, tl, int)
+DEF_HELPER_4(lwr, tl, env, tl, tl, int)
+DEF_HELPER_4(swl, void, env, tl, tl, int)
+DEF_HELPER_4(swr, void, env, tl, tl, int)
 
 #ifndef CONFIG_USER_ONLY
-DEF_HELPER_2(ll, tl, tl, int)
-DEF_HELPER_3(sc, tl, tl, tl, int)
+DEF_HELPER_3(ll, tl, env, tl, int)
+DEF_HELPER_4(sc, tl, env, tl, tl, int)
 #ifdef TARGET_MIPS64
-DEF_HELPER_2(lld, tl, tl, int)
-DEF_HELPER_3(scd, tl, tl, tl, int)
+DEF_HELPER_3(lld, tl, env, tl, int)
+DEF_HELPER_4(scd, tl, env, tl, tl, int)
 #endif
 #endif
 
@@ -28,195 +28,195 @@
 #ifdef TARGET_MIPS64
 DEF_HELPER_FLAGS_1(dclo, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
 DEF_HELPER_FLAGS_1(dclz, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
-DEF_HELPER_2(dmult, void, tl, tl)
-DEF_HELPER_2(dmultu, void, tl, tl)
+DEF_HELPER_3(dmult, void, env, tl, tl)
+DEF_HELPER_3(dmultu, void, env, tl, tl)
 #endif
 
-DEF_HELPER_2(muls, tl, tl, tl)
-DEF_HELPER_2(mulsu, tl, tl, tl)
-DEF_HELPER_2(macc, tl, tl, tl)
-DEF_HELPER_2(maccu, tl, tl, tl)
-DEF_HELPER_2(msac, tl, tl, tl)
-DEF_HELPER_2(msacu, tl, tl, tl)
-DEF_HELPER_2(mulhi, tl, tl, tl)
-DEF_HELPER_2(mulhiu, tl, tl, tl)
-DEF_HELPER_2(mulshi, tl, tl, tl)
-DEF_HELPER_2(mulshiu, tl, tl, tl)
-DEF_HELPER_2(macchi, tl, tl, tl)
-DEF_HELPER_2(macchiu, tl, tl, tl)
-DEF_HELPER_2(msachi, tl, tl, tl)
-DEF_HELPER_2(msachiu, tl, tl, tl)
+DEF_HELPER_3(muls, tl, env, tl, tl)
+DEF_HELPER_3(mulsu, tl, env, tl, tl)
+DEF_HELPER_3(macc, tl, env, tl, tl)
+DEF_HELPER_3(maccu, tl, env, tl, tl)
+DEF_HELPER_3(msac, tl, env, tl, tl)
+DEF_HELPER_3(msacu, tl, env, tl, tl)
+DEF_HELPER_3(mulhi, tl, env, tl, tl)
+DEF_HELPER_3(mulhiu, tl, env, tl, tl)
+DEF_HELPER_3(mulshi, tl, env, tl, tl)
+DEF_HELPER_3(mulshiu, tl, env, tl, tl)
+DEF_HELPER_3(macchi, tl, env, tl, tl)
+DEF_HELPER_3(macchiu, tl, env, tl, tl)
+DEF_HELPER_3(msachi, tl, env, tl, tl)
+DEF_HELPER_3(msachiu, tl, env, tl, tl)
 
 #ifndef CONFIG_USER_ONLY
 /* CP0 helpers */
-DEF_HELPER_0(mfc0_mvpcontrol, tl)
-DEF_HELPER_0(mfc0_mvpconf0, tl)
-DEF_HELPER_0(mfc0_mvpconf1, tl)
-DEF_HELPER_0(mftc0_vpecontrol, tl)
-DEF_HELPER_0(mftc0_vpeconf0, tl)
-DEF_HELPER_0(mfc0_random, tl)
-DEF_HELPER_0(mfc0_tcstatus, tl)
-DEF_HELPER_0(mftc0_tcstatus, tl)
-DEF_HELPER_0(mfc0_tcbind, tl)
-DEF_HELPER_0(mftc0_tcbind, tl)
-DEF_HELPER_0(mfc0_tcrestart, tl)
-DEF_HELPER_0(mftc0_tcrestart, tl)
-DEF_HELPER_0(mfc0_tchalt, tl)
-DEF_HELPER_0(mftc0_tchalt, tl)
-DEF_HELPER_0(mfc0_tccontext, tl)
-DEF_HELPER_0(mftc0_tccontext, tl)
-DEF_HELPER_0(mfc0_tcschedule, tl)
-DEF_HELPER_0(mftc0_tcschedule, tl)
-DEF_HELPER_0(mfc0_tcschefback, tl)
-DEF_HELPER_0(mftc0_tcschefback, tl)
-DEF_HELPER_0(mfc0_count, tl)
-DEF_HELPER_0(mftc0_entryhi, tl)
-DEF_HELPER_0(mftc0_status, tl)
-DEF_HELPER_0(mftc0_cause, tl)
-DEF_HELPER_0(mftc0_epc, tl)
-DEF_HELPER_0(mftc0_ebase, tl)
-DEF_HELPER_1(mftc0_configx, tl, tl)
-DEF_HELPER_0(mfc0_lladdr, tl)
-DEF_HELPER_1(mfc0_watchlo, tl, i32)
-DEF_HELPER_1(mfc0_watchhi, tl, i32)
-DEF_HELPER_0(mfc0_debug, tl)
-DEF_HELPER_0(mftc0_debug, tl)
+DEF_HELPER_1(mfc0_mvpcontrol, tl, env)
+DEF_HELPER_1(mfc0_mvpconf0, tl, env)
+DEF_HELPER_1(mfc0_mvpconf1, tl, env)
+DEF_HELPER_1(mftc0_vpecontrol, tl, env)
+DEF_HELPER_1(mftc0_vpeconf0, tl, env)
+DEF_HELPER_1(mfc0_random, tl, env)
+DEF_HELPER_1(mfc0_tcstatus, tl, env)
+DEF_HELPER_1(mftc0_tcstatus, tl, env)
+DEF_HELPER_1(mfc0_tcbind, tl, env)
+DEF_HELPER_1(mftc0_tcbind, tl, env)
+DEF_HELPER_1(mfc0_tcrestart, tl, env)
+DEF_HELPER_1(mftc0_tcrestart, tl, env)
+DEF_HELPER_1(mfc0_tchalt, tl, env)
+DEF_HELPER_1(mftc0_tchalt, tl, env)
+DEF_HELPER_1(mfc0_tccontext, tl, env)
+DEF_HELPER_1(mftc0_tccontext, tl, env)
+DEF_HELPER_1(mfc0_tcschedule, tl, env)
+DEF_HELPER_1(mftc0_tcschedule, tl, env)
+DEF_HELPER_1(mfc0_tcschefback, tl, env)
+DEF_HELPER_1(mftc0_tcschefback, tl, env)
+DEF_HELPER_1(mfc0_count, tl, env)
+DEF_HELPER_1(mftc0_entryhi, tl, env)
+DEF_HELPER_1(mftc0_status, tl, env)
+DEF_HELPER_1(mftc0_cause, tl, env)
+DEF_HELPER_1(mftc0_epc, tl, env)
+DEF_HELPER_1(mftc0_ebase, tl, env)
+DEF_HELPER_2(mftc0_configx, tl, env, tl)
+DEF_HELPER_1(mfc0_lladdr, tl, env)
+DEF_HELPER_2(mfc0_watchlo, tl, env, i32)
+DEF_HELPER_2(mfc0_watchhi, tl, env, i32)
+DEF_HELPER_1(mfc0_debug, tl, env)
+DEF_HELPER_1(mftc0_debug, tl, env)
 #ifdef TARGET_MIPS64
-DEF_HELPER_0(dmfc0_tcrestart, tl)
-DEF_HELPER_0(dmfc0_tchalt, tl)
-DEF_HELPER_0(dmfc0_tccontext, tl)
-DEF_HELPER_0(dmfc0_tcschedule, tl)
-DEF_HELPER_0(dmfc0_tcschefback, tl)
-DEF_HELPER_0(dmfc0_lladdr, tl)
-DEF_HELPER_1(dmfc0_watchlo, tl, i32)
+DEF_HELPER_1(dmfc0_tcrestart, tl, env)
+DEF_HELPER_1(dmfc0_tchalt, tl, env)
+DEF_HELPER_1(dmfc0_tccontext, tl, env)
+DEF_HELPER_1(dmfc0_tcschedule, tl, env)
+DEF_HELPER_1(dmfc0_tcschefback, tl, env)
+DEF_HELPER_1(dmfc0_lladdr, tl, env)
+DEF_HELPER_2(dmfc0_watchlo, tl, env, i32)
 #endif /* TARGET_MIPS64 */
 
-DEF_HELPER_1(mtc0_index, void, tl)
-DEF_HELPER_1(mtc0_mvpcontrol, void, tl)
-DEF_HELPER_1(mtc0_vpecontrol, void, tl)
-DEF_HELPER_1(mttc0_vpecontrol, void, tl)
-DEF_HELPER_1(mtc0_vpeconf0, void, tl)
-DEF_HELPER_1(mttc0_vpeconf0, void, tl)
-DEF_HELPER_1(mtc0_vpeconf1, void, tl)
-DEF_HELPER_1(mtc0_yqmask, void, tl)
-DEF_HELPER_1(mtc0_vpeopt, void, tl)
-DEF_HELPER_1(mtc0_entrylo0, void, tl)
-DEF_HELPER_1(mtc0_tcstatus, void, tl)
-DEF_HELPER_1(mttc0_tcstatus, void, tl)
-DEF_HELPER_1(mtc0_tcbind, void, tl)
-DEF_HELPER_1(mttc0_tcbind, void, tl)
-DEF_HELPER_1(mtc0_tcrestart, void, tl)
-DEF_HELPER_1(mttc0_tcrestart, void, tl)
-DEF_HELPER_1(mtc0_tchalt, void, tl)
-DEF_HELPER_1(mttc0_tchalt, void, tl)
-DEF_HELPER_1(mtc0_tccontext, void, tl)
-DEF_HELPER_1(mttc0_tccontext, void, tl)
-DEF_HELPER_1(mtc0_tcschedule, void, tl)
-DEF_HELPER_1(mttc0_tcschedule, void, tl)
-DEF_HELPER_1(mtc0_tcschefback, void, tl)
-DEF_HELPER_1(mttc0_tcschefback, void, tl)
-DEF_HELPER_1(mtc0_entrylo1, void, tl)
-DEF_HELPER_1(mtc0_context, void, tl)
-DEF_HELPER_1(mtc0_pagemask, void, tl)
-DEF_HELPER_1(mtc0_pagegrain, void, tl)
-DEF_HELPER_1(mtc0_wired, void, tl)
-DEF_HELPER_1(mtc0_srsconf0, void, tl)
-DEF_HELPER_1(mtc0_srsconf1, void, tl)
-DEF_HELPER_1(mtc0_srsconf2, void, tl)
-DEF_HELPER_1(mtc0_srsconf3, void, tl)
-DEF_HELPER_1(mtc0_srsconf4, void, tl)
-DEF_HELPER_1(mtc0_hwrena, void, tl)
-DEF_HELPER_1(mtc0_count, void, tl)
-DEF_HELPER_1(mtc0_entryhi, void, tl)
-DEF_HELPER_1(mttc0_entryhi, void, tl)
-DEF_HELPER_1(mtc0_compare, void, tl)
-DEF_HELPER_1(mtc0_status, void, tl)
-DEF_HELPER_1(mttc0_status, void, tl)
-DEF_HELPER_1(mtc0_intctl, void, tl)
-DEF_HELPER_1(mtc0_srsctl, void, tl)
-DEF_HELPER_1(mtc0_cause, void, tl)
-DEF_HELPER_1(mttc0_cause, void, tl)
-DEF_HELPER_1(mtc0_ebase, void, tl)
-DEF_HELPER_1(mttc0_ebase, void, tl)
-DEF_HELPER_1(mtc0_config0, void, tl)
-DEF_HELPER_1(mtc0_config2, void, tl)
-DEF_HELPER_1(mtc0_lladdr, void, tl)
-DEF_HELPER_2(mtc0_watchlo, void, tl, i32)
-DEF_HELPER_2(mtc0_watchhi, void, tl, i32)
-DEF_HELPER_1(mtc0_xcontext, void, tl)
-DEF_HELPER_1(mtc0_framemask, void, tl)
-DEF_HELPER_1(mtc0_debug, void, tl)
-DEF_HELPER_1(mttc0_debug, void, tl)
-DEF_HELPER_1(mtc0_performance0, void, tl)
-DEF_HELPER_1(mtc0_taglo, void, tl)
-DEF_HELPER_1(mtc0_datalo, void, tl)
-DEF_HELPER_1(mtc0_taghi, void, tl)
-DEF_HELPER_1(mtc0_datahi, void, tl)
+DEF_HELPER_2(mtc0_index, void, env, tl)
+DEF_HELPER_2(mtc0_mvpcontrol, void, env, tl)
+DEF_HELPER_2(mtc0_vpecontrol, void, env, tl)
+DEF_HELPER_2(mttc0_vpecontrol, void, env, tl)
+DEF_HELPER_2(mtc0_vpeconf0, void, env, tl)
+DEF_HELPER_2(mttc0_vpeconf0, void, env, tl)
+DEF_HELPER_2(mtc0_vpeconf1, void, env, tl)
+DEF_HELPER_2(mtc0_yqmask, void, env, tl)
+DEF_HELPER_2(mtc0_vpeopt, void, env, tl)
+DEF_HELPER_2(mtc0_entrylo0, void, env, tl)
+DEF_HELPER_2(mtc0_tcstatus, void, env, tl)
+DEF_HELPER_2(mttc0_tcstatus, void, env, tl)
+DEF_HELPER_2(mtc0_tcbind, void, env, tl)
+DEF_HELPER_2(mttc0_tcbind, void, env, tl)
+DEF_HELPER_2(mtc0_tcrestart, void, env, tl)
+DEF_HELPER_2(mttc0_tcrestart, void, env, tl)
+DEF_HELPER_2(mtc0_tchalt, void, env, tl)
+DEF_HELPER_2(mttc0_tchalt, void, env, tl)
+DEF_HELPER_2(mtc0_tccontext, void, env, tl)
+DEF_HELPER_2(mttc0_tccontext, void, env, tl)
+DEF_HELPER_2(mtc0_tcschedule, void, env, tl)
+DEF_HELPER_2(mttc0_tcschedule, void, env, tl)
+DEF_HELPER_2(mtc0_tcschefback, void, env, tl)
+DEF_HELPER_2(mttc0_tcschefback, void, env, tl)
+DEF_HELPER_2(mtc0_entrylo1, void, env, tl)
+DEF_HELPER_2(mtc0_context, void, env, tl)
+DEF_HELPER_2(mtc0_pagemask, void, env, tl)
+DEF_HELPER_2(mtc0_pagegrain, void, env, tl)
+DEF_HELPER_2(mtc0_wired, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf0, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf1, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf2, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf3, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf4, void, env, tl)
+DEF_HELPER_2(mtc0_hwrena, void, env, tl)
+DEF_HELPER_2(mtc0_count, void, env, tl)
+DEF_HELPER_2(mtc0_entryhi, void, env, tl)
+DEF_HELPER_2(mttc0_entryhi, void, env, tl)
+DEF_HELPER_2(mtc0_compare, void, env, tl)
+DEF_HELPER_2(mtc0_status, void, env, tl)
+DEF_HELPER_2(mttc0_status, void, env, tl)
+DEF_HELPER_2(mtc0_intctl, void, env, tl)
+DEF_HELPER_2(mtc0_srsctl, void, env, tl)
+DEF_HELPER_2(mtc0_cause, void, env, tl)
+DEF_HELPER_2(mttc0_cause, void, env, tl)
+DEF_HELPER_2(mtc0_ebase, void, env, tl)
+DEF_HELPER_2(mttc0_ebase, void, env, tl)
+DEF_HELPER_2(mtc0_config0, void, env, tl)
+DEF_HELPER_2(mtc0_config2, void, env, tl)
+DEF_HELPER_2(mtc0_lladdr, void, env, tl)
+DEF_HELPER_3(mtc0_watchlo, void, env, tl, i32)
+DEF_HELPER_3(mtc0_watchhi, void, env, tl, i32)
+DEF_HELPER_2(mtc0_xcontext, void, env, tl)
+DEF_HELPER_2(mtc0_framemask, void, env, tl)
+DEF_HELPER_2(mtc0_debug, void, env, tl)
+DEF_HELPER_2(mttc0_debug, void, env, tl)
+DEF_HELPER_2(mtc0_performance0, void, env, tl)
+DEF_HELPER_2(mtc0_taglo, void, env, tl)
+DEF_HELPER_2(mtc0_datalo, void, env, tl)
+DEF_HELPER_2(mtc0_taghi, void, env, tl)
+DEF_HELPER_2(mtc0_datahi, void, env, tl)
 
 /* MIPS MT functions */
-DEF_HELPER_1(mftgpr, tl, i32);
-DEF_HELPER_1(mftlo, tl, i32)
-DEF_HELPER_1(mfthi, tl, i32)
-DEF_HELPER_1(mftacx, tl, i32)
-DEF_HELPER_0(mftdsp, tl)
-DEF_HELPER_2(mttgpr, void, tl, i32)
-DEF_HELPER_2(mttlo, void, tl, i32)
-DEF_HELPER_2(mtthi, void, tl, i32)
-DEF_HELPER_2(mttacx, void, tl, i32)
-DEF_HELPER_1(mttdsp, void, tl)
+DEF_HELPER_2(mftgpr, tl, env, i32);
+DEF_HELPER_2(mftlo, tl, env, i32)
+DEF_HELPER_2(mfthi, tl, env, i32)
+DEF_HELPER_2(mftacx, tl, env, i32)
+DEF_HELPER_1(mftdsp, tl, env)
+DEF_HELPER_3(mttgpr, void, env, tl, i32)
+DEF_HELPER_3(mttlo, void, env, tl, i32)
+DEF_HELPER_3(mtthi, void, env, tl, i32)
+DEF_HELPER_3(mttacx, void, env, tl, i32)
+DEF_HELPER_2(mttdsp, void, env, tl)
 DEF_HELPER_0(dmt, tl)
 DEF_HELPER_0(emt, tl)
-DEF_HELPER_0(dvpe, tl)
-DEF_HELPER_0(evpe, tl)
+DEF_HELPER_1(dvpe, tl, env)
+DEF_HELPER_1(evpe, tl, env)
 #endif /* !CONFIG_USER_ONLY */
 
 /* microMIPS functions */
-DEF_HELPER_3(lwm, void, tl, tl, i32);
-DEF_HELPER_3(swm, void, tl, tl, i32);
+DEF_HELPER_4(lwm, void, env, tl, tl, i32);
+DEF_HELPER_4(swm, void, env, tl, tl, i32);
 #ifdef TARGET_MIPS64
-DEF_HELPER_3(ldm, void, tl, tl, i32);
-DEF_HELPER_3(sdm, void, tl, tl, i32);
+DEF_HELPER_4(ldm, void, env, tl, tl, i32);
+DEF_HELPER_4(sdm, void, env, tl, tl, i32);
 #endif
 
 DEF_HELPER_2(fork, void, tl, tl)
-DEF_HELPER_1(yield, tl, tl)
+DEF_HELPER_2(yield, tl, env, tl)
 
 /* CP1 functions */
-DEF_HELPER_1(cfc1, tl, i32)
-DEF_HELPER_2(ctc1, void, tl, i32)
+DEF_HELPER_2(cfc1, tl, env, i32)
+DEF_HELPER_3(ctc1, void, env, tl, i32)
 
-DEF_HELPER_1(float_cvtd_s, i64, i32)
-DEF_HELPER_1(float_cvtd_w, i64, i32)
-DEF_HELPER_1(float_cvtd_l, i64, i64)
-DEF_HELPER_1(float_cvtl_d, i64, i64)
-DEF_HELPER_1(float_cvtl_s, i64, i32)
-DEF_HELPER_1(float_cvtps_pw, i64, i64)
-DEF_HELPER_1(float_cvtpw_ps, i64, i64)
-DEF_HELPER_1(float_cvts_d, i32, i64)
-DEF_HELPER_1(float_cvts_w, i32, i32)
-DEF_HELPER_1(float_cvts_l, i32, i64)
-DEF_HELPER_1(float_cvts_pl, i32, i32)
-DEF_HELPER_1(float_cvts_pu, i32, i32)
-DEF_HELPER_1(float_cvtw_s, i32, i32)
-DEF_HELPER_1(float_cvtw_d, i32, i64)
+DEF_HELPER_2(float_cvtd_s, i64, env, i32)
+DEF_HELPER_2(float_cvtd_w, i64, env, i32)
+DEF_HELPER_2(float_cvtd_l, i64, env, i64)
+DEF_HELPER_2(float_cvtl_d, i64, env, i64)
+DEF_HELPER_2(float_cvtl_s, i64, env, i32)
+DEF_HELPER_2(float_cvtps_pw, i64, env, i64)
+DEF_HELPER_2(float_cvtpw_ps, i64, env, i64)
+DEF_HELPER_2(float_cvts_d, i32, env, i64)
+DEF_HELPER_2(float_cvts_w, i32, env, i32)
+DEF_HELPER_2(float_cvts_l, i32, env, i64)
+DEF_HELPER_2(float_cvts_pl, i32, env, i32)
+DEF_HELPER_2(float_cvts_pu, i32, env, i32)
+DEF_HELPER_2(float_cvtw_s, i32, env, i32)
+DEF_HELPER_2(float_cvtw_d, i32, env, i64)
 
-DEF_HELPER_2(float_addr_ps, i64, i64, i64)
-DEF_HELPER_2(float_mulr_ps, i64, i64, i64)
+DEF_HELPER_3(float_addr_ps, i64, env, i64, i64)
+DEF_HELPER_3(float_mulr_ps, i64, env, i64, i64)
 
-#define FOP_PROTO(op)                       \
-DEF_HELPER_1(float_ ## op ## l_s, i64, i32) \
-DEF_HELPER_1(float_ ## op ## l_d, i64, i64) \
-DEF_HELPER_1(float_ ## op ## w_s, i32, i32) \
-DEF_HELPER_1(float_ ## op ## w_d, i32, i64)
+#define FOP_PROTO(op)                            \
+DEF_HELPER_2(float_ ## op ## l_s, i64, env, i32) \
+DEF_HELPER_2(float_ ## op ## l_d, i64, env, i64) \
+DEF_HELPER_2(float_ ## op ## w_s, i32, env, i32) \
+DEF_HELPER_2(float_ ## op ## w_d, i32, env, i64)
 FOP_PROTO(round)
 FOP_PROTO(trunc)
 FOP_PROTO(ceil)
 FOP_PROTO(floor)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                       \
-DEF_HELPER_1(float_ ## op ## _s, i32, i32)  \
-DEF_HELPER_1(float_ ## op ## _d, i64, i64)
+#define FOP_PROTO(op)                            \
+DEF_HELPER_2(float_ ## op ## _s, i32, env, i32)  \
+DEF_HELPER_2(float_ ## op ## _d, i64, env, i64)
 FOP_PROTO(sqrt)
 FOP_PROTO(rsqrt)
 FOP_PROTO(recip)
@@ -228,14 +228,20 @@
 DEF_HELPER_1(float_ ## op ## _ps, i64, i64)
 FOP_PROTO(abs)
 FOP_PROTO(chs)
+#undef FOP_PROTO
+
+#define FOP_PROTO(op)                            \
+DEF_HELPER_2(float_ ## op ## _s, i32, env, i32)  \
+DEF_HELPER_2(float_ ## op ## _d, i64, env, i64)  \
+DEF_HELPER_2(float_ ## op ## _ps, i64, env, i64)
 FOP_PROTO(recip1)
 FOP_PROTO(rsqrt1)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                             \
-DEF_HELPER_2(float_ ## op ## _s, i32, i32, i32)   \
-DEF_HELPER_2(float_ ## op ## _d, i64, i64, i64)   \
-DEF_HELPER_2(float_ ## op ## _ps, i64, i64, i64)
+#define FOP_PROTO(op)                                  \
+DEF_HELPER_3(float_ ## op ## _s, i32, env, i32, i32)   \
+DEF_HELPER_3(float_ ## op ## _d, i64, env, i64, i64)   \
+DEF_HELPER_3(float_ ## op ## _ps, i64, env, i64, i64)
 FOP_PROTO(add)
 FOP_PROTO(sub)
 FOP_PROTO(mul)
@@ -244,23 +250,23 @@
 FOP_PROTO(rsqrt2)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                                 \
-DEF_HELPER_3(float_ ## op ## _s, i32, i32, i32, i32)  \
-DEF_HELPER_3(float_ ## op ## _d, i64, i64, i64, i64)  \
-DEF_HELPER_3(float_ ## op ## _ps, i64, i64, i64, i64)
+#define FOP_PROTO(op)                                      \
+DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32)  \
+DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64)  \
+DEF_HELPER_4(float_ ## op ## _ps, i64, env, i64, i64, i64)
 FOP_PROTO(muladd)
 FOP_PROTO(mulsub)
 FOP_PROTO(nmuladd)
 FOP_PROTO(nmulsub)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                               \
-DEF_HELPER_3(cmp_d_ ## op, void, i64, i64, int)     \
-DEF_HELPER_3(cmpabs_d_ ## op, void, i64, i64, int)  \
-DEF_HELPER_3(cmp_s_ ## op, void, i32, i32, int)     \
-DEF_HELPER_3(cmpabs_s_ ## op, void, i32, i32, int)  \
-DEF_HELPER_3(cmp_ps_ ## op, void, i64, i64, int)    \
-DEF_HELPER_3(cmpabs_ps_ ## op, void, i64, i64, int)
+#define FOP_PROTO(op)                                    \
+DEF_HELPER_4(cmp_d_ ## op, void, env, i64, i64, int)     \
+DEF_HELPER_4(cmpabs_d_ ## op, void, env, i64, i64, int)  \
+DEF_HELPER_4(cmp_s_ ## op, void, env, i32, i32, int)     \
+DEF_HELPER_4(cmpabs_s_ ## op, void, env, i32, i32, int)  \
+DEF_HELPER_4(cmp_ps_ ## op, void, env, i64, i64, int)    \
+DEF_HELPER_4(cmpabs_ps_ ## op, void, env, i64, i64, int)
 FOP_PROTO(f)
 FOP_PROTO(un)
 FOP_PROTO(eq)
@@ -281,20 +287,79 @@
 
 /* Special functions */
 #ifndef CONFIG_USER_ONLY
-DEF_HELPER_0(tlbwi, void)
-DEF_HELPER_0(tlbwr, void)
-DEF_HELPER_0(tlbp, void)
-DEF_HELPER_0(tlbr, void)
-DEF_HELPER_0(di, tl)
-DEF_HELPER_0(ei, tl)
-DEF_HELPER_0(eret, void)
-DEF_HELPER_0(deret, void)
+DEF_HELPER_1(tlbwi, void, env)
+DEF_HELPER_1(tlbwr, void, env)
+DEF_HELPER_1(tlbp, void, env)
+DEF_HELPER_1(tlbr, void, env)
+DEF_HELPER_1(di, tl, env)
+DEF_HELPER_1(ei, tl, env)
+DEF_HELPER_1(eret, void, env)
+DEF_HELPER_1(deret, void, env)
 #endif /* !CONFIG_USER_ONLY */
-DEF_HELPER_0(rdhwr_cpunum, tl)
-DEF_HELPER_0(rdhwr_synci_step, tl)
-DEF_HELPER_0(rdhwr_cc, tl)
-DEF_HELPER_0(rdhwr_ccres, tl)
-DEF_HELPER_1(pmon, void, int)
-DEF_HELPER_0(wait, void)
+DEF_HELPER_1(rdhwr_cpunum, tl, env)
+DEF_HELPER_1(rdhwr_synci_step, tl, env)
+DEF_HELPER_1(rdhwr_cc, tl, env)
+DEF_HELPER_1(rdhwr_ccres, tl, env)
+DEF_HELPER_2(pmon, void, env, int)
+DEF_HELPER_1(wait, void, env)
+
+/* Loongson multimedia functions.  */
+DEF_HELPER_FLAGS_2(paddsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddush, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddsb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddusb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(paddb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(psubsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubush, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubsb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubusb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psubb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(pshufh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(packsswh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(packsshb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(packushb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(punpcklhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpckhhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpcklbh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpckhbh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpcklwd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(punpckhwd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(pavgh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pavgb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmaxsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pminsh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmaxub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pminub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(pcmpeqw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpgtw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpeqh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpgth, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpeqb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pcmpgtb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(psllw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psllh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psrlw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psrlh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psraw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(psrah, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(pmullh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmulhh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmulhuh, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(pmaddhw, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+
+DEF_HELPER_FLAGS_2(pasubub, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64, i64)
+DEF_HELPER_FLAGS_1(biadd, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
+DEF_HELPER_FLAGS_1(pmovmskb, TCG_CALL_CONST | TCG_CALL_PURE, i64, i64)
 
 #include "def-helper.h"
diff --git a/target-mips/lmi_helper.c b/target-mips/lmi_helper.c
new file mode 100644
index 0000000..1b24353
--- /dev/null
+++ b/target-mips/lmi_helper.c
@@ -0,0 +1,744 @@
+/*
+ *  Loongson Multimedia Instruction emulation helpers for QEMU.
+ *
+ *  Copyright (c) 2011  Richard Henderson <rth@twiddle.net>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "cpu.h"
+#include "helper.h"
+
+/* If the byte ordering doesn't matter, i.e. all columns are treated
+   identically, then this union can be used directly.  If byte ordering
+   does matter, we generally ignore dumping to memory.  */
+typedef union {
+    uint8_t  ub[8];
+    int8_t   sb[8];
+    uint16_t uh[4];
+    int16_t  sh[4];
+    uint32_t uw[2];
+    int32_t  sw[2];
+    uint64_t d;
+} LMIValue;
+
+/* Some byte ordering issues can be mitigated by XORing in the following.  */
+#ifdef HOST_WORDS_BIGENDIAN
+# define BYTE_ORDER_XOR(N) N
+#else
+# define BYTE_ORDER_XOR(N) 0
+#endif
+
+#define SATSB(x)  (x < -0x80 ? -0x80 : x > 0x7f ? 0x7f : x)
+#define SATUB(x)  (x > 0xff ? 0xff : x)
+
+#define SATSH(x)  (x < -0x8000 ? -0x8000 : x > 0x7fff ? 0x7fff : x)
+#define SATUH(x)  (x > 0xffff ? 0xffff : x)
+
+#define SATSW(x) \
+    (x < -0x80000000ll ? -0x80000000ll : x > 0x7fffffff ? 0x7fffffff : x)
+#define SATUW(x)  (x > 0xffffffffull ? 0xffffffffull : x)
+
+uint64_t helper_paddsb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        int r = vs.sb[i] + vt.sb[i];
+        vs.sb[i] = SATSB(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddusb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        int r = vs.ub[i] + vt.ub[i];
+        vs.ub[i] = SATUB(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddsh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        int r = vs.sh[i] + vt.sh[i];
+        vs.sh[i] = SATSH(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddush(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        int r = vs.uh[i] + vt.uh[i];
+        vs.uh[i] = SATUH(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        vs.ub[i] += vt.ub[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        vs.uh[i] += vt.uh[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_paddw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 2; ++i) {
+        vs.uw[i] += vt.uw[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubsb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        int r = vs.sb[i] - vt.sb[i];
+        vs.sb[i] = SATSB(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubusb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        int r = vs.ub[i] - vt.ub[i];
+        vs.ub[i] = SATUB(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubsh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        int r = vs.sh[i] - vt.sh[i];
+        vs.sh[i] = SATSH(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubush(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        int r = vs.uh[i] - vt.uh[i];
+        vs.uh[i] = SATUH(r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        vs.ub[i] -= vt.ub[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        vs.uh[i] -= vt.uh[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_psubw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned int i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 2; ++i) {
+        vs.uw[i] -= vt.uw[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_pshufh(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(3);
+    LMIValue vd, vs;
+    unsigned i;
+
+    vs.d = fs;
+    vd.d = 0;
+    for (i = 0; i < 4; i++, ft >>= 2) {
+        vd.uh[i ^ host] = vs.uh[(ft & 3) ^ host];
+    }
+    return vd.d;
+}
+
+uint64_t helper_packsswh(uint64_t fs, uint64_t ft)
+{
+    uint64_t fd = 0;
+    int64_t tmp;
+
+    tmp = (int32_t)(fs >> 0);
+    tmp = SATSH(tmp);
+    fd |= (tmp & 0xffff) << 0;
+
+    tmp = (int32_t)(fs >> 32);
+    tmp = SATSH(tmp);
+    fd |= (tmp & 0xffff) << 16;
+
+    tmp = (int32_t)(ft >> 0);
+    tmp = SATSH(tmp);
+    fd |= (tmp & 0xffff) << 32;
+
+    tmp = (int32_t)(ft >> 32);
+    tmp = SATSH(tmp);
+    fd |= (tmp & 0xffff) << 48;
+
+    return fd;
+}
+
+uint64_t helper_packsshb(uint64_t fs, uint64_t ft)
+{
+    uint64_t fd = 0;
+    unsigned int i;
+
+    for (i = 0; i < 4; ++i) {
+        int16_t tmp = fs >> (i * 16);
+        tmp = SATSB(tmp);
+        fd |= (uint64_t)(tmp & 0xff) << (i * 8);
+    }
+    for (i = 0; i < 4; ++i) {
+        int16_t tmp = ft >> (i * 16);
+        tmp = SATSB(tmp);
+        fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
+    }
+
+    return fd;
+}
+
+uint64_t helper_packushb(uint64_t fs, uint64_t ft)
+{
+    uint64_t fd = 0;
+    unsigned int i;
+
+    for (i = 0; i < 4; ++i) {
+        int16_t tmp = fs >> (i * 16);
+        tmp = SATUB(tmp);
+        fd |= (uint64_t)(tmp & 0xff) << (i * 8);
+    }
+    for (i = 0; i < 4; ++i) {
+        int16_t tmp = ft >> (i * 16);
+        tmp = SATUB(tmp);
+        fd |= (uint64_t)(tmp & 0xff) << (i * 8 + 32);
+    }
+
+    return fd;
+}
+
+uint64_t helper_punpcklwd(uint64_t fs, uint64_t ft)
+{
+    return (fs & 0xffffffff) | (ft << 32);
+}
+
+uint64_t helper_punpckhwd(uint64_t fs, uint64_t ft)
+{
+    return (fs >> 32) | (ft & ~0xffffffffull);
+}
+
+uint64_t helper_punpcklhw(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(3);
+    LMIValue vd, vs, vt;
+
+    vs.d = fs;
+    vt.d = ft;
+    vd.uh[0 ^ host] = vs.uh[0 ^ host];
+    vd.uh[1 ^ host] = vt.uh[0 ^ host];
+    vd.uh[2 ^ host] = vs.uh[1 ^ host];
+    vd.uh[3 ^ host] = vt.uh[1 ^ host];
+
+    return vd.d;
+}
+
+uint64_t helper_punpckhhw(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(3);
+    LMIValue vd, vs, vt;
+
+    vs.d = fs;
+    vt.d = ft;
+    vd.uh[0 ^ host] = vs.uh[2 ^ host];
+    vd.uh[1 ^ host] = vt.uh[2 ^ host];
+    vd.uh[2 ^ host] = vs.uh[3 ^ host];
+    vd.uh[3 ^ host] = vt.uh[3 ^ host];
+
+    return vd.d;
+}
+
+uint64_t helper_punpcklbh(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(7);
+    LMIValue vd, vs, vt;
+
+    vs.d = fs;
+    vt.d = ft;
+    vd.ub[0 ^ host] = vs.ub[0 ^ host];
+    vd.ub[1 ^ host] = vt.ub[0 ^ host];
+    vd.ub[2 ^ host] = vs.ub[1 ^ host];
+    vd.ub[3 ^ host] = vt.ub[1 ^ host];
+    vd.ub[4 ^ host] = vs.ub[2 ^ host];
+    vd.ub[5 ^ host] = vt.ub[2 ^ host];
+    vd.ub[6 ^ host] = vs.ub[3 ^ host];
+    vd.ub[7 ^ host] = vt.ub[3 ^ host];
+
+    return vd.d;
+}
+
+uint64_t helper_punpckhbh(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(7);
+    LMIValue vd, vs, vt;
+
+    vs.d = fs;
+    vt.d = ft;
+    vd.ub[0 ^ host] = vs.ub[4 ^ host];
+    vd.ub[1 ^ host] = vt.ub[4 ^ host];
+    vd.ub[2 ^ host] = vs.ub[5 ^ host];
+    vd.ub[3 ^ host] = vt.ub[5 ^ host];
+    vd.ub[4 ^ host] = vs.ub[6 ^ host];
+    vd.ub[5 ^ host] = vt.ub[6 ^ host];
+    vd.ub[6 ^ host] = vs.ub[7 ^ host];
+    vd.ub[7 ^ host] = vt.ub[7 ^ host];
+
+    return vd.d;
+}
+
+uint64_t helper_pavgh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.uh[i] = (vs.uh[i] + vt.uh[i] + 1) >> 1;
+    }
+    return vs.d;
+}
+
+uint64_t helper_pavgb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; i++) {
+        vs.ub[i] = (vs.ub[i] + vt.ub[i] + 1) >> 1;
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmaxsh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.sh[i] = (vs.sh[i] >= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pminsh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.sh[i] = (vs.sh[i] <= vt.sh[i] ? vs.sh[i] : vt.sh[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmaxub(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.ub[i] = (vs.ub[i] >= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pminub(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.ub[i] = (vs.ub[i] <= vt.ub[i] ? vs.ub[i] : vt.ub[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpeqw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 2; i++) {
+        vs.uw[i] = -(vs.uw[i] == vt.uw[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpgtw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 2; i++) {
+        vs.uw[i] = -(vs.uw[i] > vt.uw[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpeqh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.uh[i] = -(vs.uh[i] == vt.uh[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpgth(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; i++) {
+        vs.uh[i] = -(vs.uh[i] > vt.uh[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpeqb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; i++) {
+        vs.ub[i] = -(vs.ub[i] == vt.ub[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_pcmpgtb(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; i++) {
+        vs.ub[i] = -(vs.ub[i] > vt.ub[i]);
+    }
+    return vs.d;
+}
+
+uint64_t helper_psllw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 31) {
+        return 0;
+    }
+    vs.d = fs;
+    for (i = 0; i < 2; ++i) {
+        vs.uw[i] <<= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_psrlw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 31) {
+        return 0;
+    }
+    vs.d = fs;
+    for (i = 0; i < 2; ++i) {
+        vs.uw[i] >>= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_psraw(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 31) {
+        ft = 31;
+    }
+    vs.d = fs;
+    for (i = 0; i < 2; ++i) {
+        vs.sw[i] >>= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_psllh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 15) {
+        return 0;
+    }
+    vs.d = fs;
+    for (i = 0; i < 4; ++i) {
+        vs.uh[i] <<= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_psrlh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 15) {
+        return 0;
+    }
+    vs.d = fs;
+    for (i = 0; i < 4; ++i) {
+        vs.uh[i] >>= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_psrah(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs;
+    unsigned i;
+
+    ft &= 0x7f;
+    if (ft > 15) {
+        ft = 15;
+    }
+    vs.d = fs;
+    for (i = 0; i < 4; ++i) {
+        vs.sh[i] >>= ft;
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmullh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        vs.sh[i] *= vt.sh[i];
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmulhh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        int32_t r = vs.sh[i] * vt.sh[i];
+        vs.sh[i] = r >> 16;
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmulhuh(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 4; ++i) {
+        uint32_t r = vs.uh[i] * vt.uh[i];
+        vs.uh[i] = r >> 16;
+    }
+    return vs.d;
+}
+
+uint64_t helper_pmaddhw(uint64_t fs, uint64_t ft)
+{
+    unsigned host = BYTE_ORDER_XOR(3);
+    LMIValue vs, vt;
+    uint32_t p0, p1;
+
+    vs.d = fs;
+    vt.d = ft;
+    p0  = vs.sh[0 ^ host] * vt.sh[0 ^ host];
+    p0 += vs.sh[1 ^ host] * vt.sh[1 ^ host];
+    p1  = vs.sh[2 ^ host] * vt.sh[2 ^ host];
+    p1 += vs.sh[3 ^ host] * vt.sh[3 ^ host];
+
+    return ((uint64_t)p1 << 32) | p0;
+}
+
+uint64_t helper_pasubub(uint64_t fs, uint64_t ft)
+{
+    LMIValue vs, vt;
+    unsigned i;
+
+    vs.d = fs;
+    vt.d = ft;
+    for (i = 0; i < 8; ++i) {
+        int r = vs.ub[i] - vt.ub[i];
+        vs.ub[i] = (r < 0 ? -r : r);
+    }
+    return vs.d;
+}
+
+uint64_t helper_biadd(uint64_t fs)
+{
+    unsigned i, fd;
+
+    for (i = fd = 0; i < 8; ++i) {
+        fd += (fs >> (i * 8)) & 0xff;
+    }
+    return fd & 0xffff;
+}
+
+uint64_t helper_pmovmskb(uint64_t fs)
+{
+    unsigned fd = 0;
+
+    fd |= ((fs >>  7) & 1) << 0;
+    fd |= ((fs >> 15) & 1) << 1;
+    fd |= ((fs >> 23) & 1) << 2;
+    fd |= ((fs >> 31) & 1) << 3;
+    fd |= ((fs >> 39) & 1) << 4;
+    fd |= ((fs >> 47) & 1) << 5;
+    fd |= ((fs >> 55) & 1) << 6;
+    fd |= ((fs >> 63) & 1) << 7;
+
+    return fd & 0xff;
+}
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 3d242aa..ce5ddaf 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -18,8 +18,6 @@
  */
 #include <stdlib.h>
 #include "cpu.h"
-#include "dyngen-exec.h"
-
 #include "host-utils.h"
 
 #include "helper.h"
@@ -35,7 +33,8 @@
 /*****************************************************************************/
 /* Exceptions processing helpers */
 
-void helper_raise_exception_err (uint32_t exception, int error_code)
+void helper_raise_exception_err(CPUMIPSState *env, uint32_t exception,
+                                int error_code)
 {
 #if 1
     if (exception < 0x100)
@@ -46,13 +45,13 @@
     cpu_loop_exit(env);
 }
 
-void helper_raise_exception (uint32_t exception)
+void helper_raise_exception(CPUMIPSState *env, uint32_t exception)
 {
-    helper_raise_exception_err(exception, 0);
+    helper_raise_exception_err(env, exception, 0);
 }
 
 #if !defined(CONFIG_USER_ONLY)
-static void do_restore_state(uintptr_t pc)
+static void do_restore_state(CPUMIPSState *env, uintptr_t pc)
 {
     TranslationBlock *tb;
 
@@ -65,20 +64,22 @@
 
 #if defined(CONFIG_USER_ONLY)
 #define HELPER_LD(name, insn, type)                                     \
-static inline type do_##name(target_ulong addr, int mem_idx)            \
+static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             int mem_idx)                               \
 {                                                                       \
     return (type) insn##_raw(addr);                                     \
 }
 #else
 #define HELPER_LD(name, insn, type)                                     \
-static inline type do_##name(target_ulong addr, int mem_idx)            \
+static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             int mem_idx)                               \
 {                                                                       \
     switch (mem_idx)                                                    \
     {                                                                   \
-    case 0: return (type) insn##_kernel(addr); break;                   \
-    case 1: return (type) insn##_super(addr); break;                    \
+    case 0: return (type) cpu_##insn##_kernel(env, addr); break;        \
+    case 1: return (type) cpu_##insn##_super(env, addr); break;         \
     default:                                                            \
-    case 2: return (type) insn##_user(addr); break;                     \
+    case 2: return (type) cpu_##insn##_user(env, addr); break;          \
     }                                                                   \
 }
 #endif
@@ -91,20 +92,22 @@
 
 #if defined(CONFIG_USER_ONLY)
 #define HELPER_ST(name, insn, type)                                     \
-static inline void do_##name(target_ulong addr, type val, int mem_idx)  \
+static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             type val, int mem_idx)                     \
 {                                                                       \
     insn##_raw(addr, val);                                              \
 }
 #else
 #define HELPER_ST(name, insn, type)                                     \
-static inline void do_##name(target_ulong addr, type val, int mem_idx)  \
+static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             type val, int mem_idx)                     \
 {                                                                       \
     switch (mem_idx)                                                    \
     {                                                                   \
-    case 0: insn##_kernel(addr, val); break;                            \
-    case 1: insn##_super(addr, val); break;                             \
+    case 0: cpu_##insn##_kernel(env, addr, val); break;                 \
+    case 1: cpu_##insn##_super(env, addr, val); break;                  \
     default:                                                            \
-    case 2: insn##_user(addr, val); break;                              \
+    case 2: cpu_##insn##_user(env, addr, val); break;                   \
     }                                                                   \
 }
 #endif
@@ -138,12 +141,12 @@
 #endif /* TARGET_MIPS64 */
 
 /* 64 bits arithmetic for 32 bits hosts */
-static inline uint64_t get_HILO (void)
+static inline uint64_t get_HILO(CPUMIPSState *env)
 {
     return ((uint64_t)(env->active_tc.HI[0]) << 32) | (uint32_t)env->active_tc.LO[0];
 }
 
-static inline target_ulong set_HIT0_LO(uint64_t HILO)
+static inline target_ulong set_HIT0_LO(CPUMIPSState *env, uint64_t HILO)
 {
     target_ulong tmp;
     env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
@@ -151,7 +154,7 @@
     return tmp;
 }
 
-static inline target_ulong set_HI_LOT0(uint64_t HILO)
+static inline target_ulong set_HI_LOT0(CPUMIPSState *env, uint64_t HILO)
 {
     target_ulong tmp = env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
     env->active_tc.HI[0] = (int32_t)(HILO >> 32);
@@ -159,91 +162,110 @@
 }
 
 /* Multiplication variants of the vr54xx. */
-target_ulong helper_muls (target_ulong arg1, target_ulong arg2)
+target_ulong helper_muls(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
 {
-    return set_HI_LOT0(0 - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
+    return set_HI_LOT0(env, 0 - ((int64_t)(int32_t)arg1 *
+                                 (int64_t)(int32_t)arg2));
 }
 
-target_ulong helper_mulsu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulsu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    return set_HI_LOT0(0 - (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
+    return set_HI_LOT0(env, 0 - (uint64_t)(uint32_t)arg1 *
+                       (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_macc (target_ulong arg1, target_ulong arg2)
+target_ulong helper_macc(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
 {
-    return set_HI_LOT0((int64_t)get_HILO() + (int64_t)(int32_t)arg1 *
-                                             (int64_t)(int32_t)arg2);
+    return set_HI_LOT0(env, (int64_t)get_HILO(env) + (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_macchi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_macchi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    return set_HIT0_LO((int64_t)get_HILO() + (int64_t)(int32_t)arg1 *
-                                             (int64_t)(int32_t)arg2);
+    return set_HIT0_LO(env, (int64_t)get_HILO(env) + (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_maccu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_maccu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    return set_HI_LOT0((uint64_t)get_HILO() + (uint64_t)(uint32_t)arg1 *
-                                              (uint64_t)(uint32_t)arg2);
+    return set_HI_LOT0(env, (uint64_t)get_HILO(env) +
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_macchiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_macchiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
 {
-    return set_HIT0_LO((uint64_t)get_HILO() + (uint64_t)(uint32_t)arg1 *
-                                              (uint64_t)(uint32_t)arg2);
+    return set_HIT0_LO(env, (uint64_t)get_HILO(env) +
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_msac (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msac(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
 {
-    return set_HI_LOT0((int64_t)get_HILO() - (int64_t)(int32_t)arg1 *
-                                             (int64_t)(int32_t)arg2);
+    return set_HI_LOT0(env, (int64_t)get_HILO(env) - (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_msachi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msachi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    return set_HIT0_LO((int64_t)get_HILO() - (int64_t)(int32_t)arg1 *
-                                             (int64_t)(int32_t)arg2);
+    return set_HIT0_LO(env, (int64_t)get_HILO(env) - (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_msacu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msacu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    return set_HI_LOT0((uint64_t)get_HILO() - (uint64_t)(uint32_t)arg1 *
-                                              (uint64_t)(uint32_t)arg2);
+    return set_HI_LOT0(env, (uint64_t)get_HILO(env) -
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_msachiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msachiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
 {
-    return set_HIT0_LO((uint64_t)get_HILO() - (uint64_t)(uint32_t)arg1 *
-                                              (uint64_t)(uint32_t)arg2);
+    return set_HIT0_LO(env, (uint64_t)get_HILO(env) -
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_mulhi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulhi(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    return set_HIT0_LO((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
+    return set_HIT0_LO(env, (int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_mulhiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulhiu(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    return set_HIT0_LO((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
+    return set_HIT0_LO(env, (uint64_t)(uint32_t)arg1 *
+                       (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_mulshi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulshi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    return set_HIT0_LO(0 - (int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
+    return set_HIT0_LO(env, 0 - (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_mulshiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulshiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
 {
-    return set_HIT0_LO(0 - (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
+    return set_HIT0_LO(env, 0 - (uint64_t)(uint32_t)arg1 *
+                       (uint64_t)(uint32_t)arg2);
 }
 
 #ifdef TARGET_MIPS64
-void helper_dmult (target_ulong arg1, target_ulong arg2)
+void helper_dmult(CPUMIPSState *env, target_ulong arg1, target_ulong arg2)
 {
     muls64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
 }
 
-void helper_dmultu (target_ulong arg1, target_ulong arg2)
+void helper_dmultu(CPUMIPSState *env, target_ulong arg1, target_ulong arg2)
 {
     mulu64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
 }
@@ -251,7 +273,9 @@
 
 #ifndef CONFIG_USER_ONLY
 
-static inline target_phys_addr_t do_translate_address(target_ulong address, int rw)
+static inline target_phys_addr_t do_translate_address(CPUMIPSState *env,
+                                                      target_ulong address,
+                                                      int rw)
 {
     target_phys_addr_t lladdr;
 
@@ -265,10 +289,10 @@
 }
 
 #define HELPER_LD_ATOMIC(name, insn)                                          \
-target_ulong helper_##name(target_ulong arg, int mem_idx)                     \
+target_ulong helper_##name(CPUMIPSState *env, target_ulong arg, int mem_idx)  \
 {                                                                             \
-    env->lladdr = do_translate_address(arg, 0);                               \
-    env->llval = do_##insn(arg, mem_idx);                                     \
+    env->lladdr = do_translate_address(env, arg, 0);                          \
+    env->llval = do_##insn(env, arg, mem_idx);                                \
     return env->llval;                                                        \
 }
 HELPER_LD_ATOMIC(ll, lw)
@@ -278,18 +302,19 @@
 #undef HELPER_LD_ATOMIC
 
 #define HELPER_ST_ATOMIC(name, ld_insn, st_insn, almask)                      \
-target_ulong helper_##name(target_ulong arg1, target_ulong arg2, int mem_idx) \
+target_ulong helper_##name(CPUMIPSState *env, target_ulong arg1,              \
+                           target_ulong arg2, int mem_idx)                    \
 {                                                                             \
     target_long tmp;                                                          \
                                                                               \
     if (arg2 & almask) {                                                      \
         env->CP0_BadVAddr = arg2;                                             \
-        helper_raise_exception(EXCP_AdES);                                    \
+        helper_raise_exception(env, EXCP_AdES);                               \
     }                                                                         \
-    if (do_translate_address(arg2, 1) == env->lladdr) {                       \
-        tmp = do_##ld_insn(arg2, mem_idx);                                    \
+    if (do_translate_address(env, arg2, 1) == env->lladdr) {                  \
+        tmp = do_##ld_insn(env, arg2, mem_idx);                               \
         if (tmp == env->llval) {                                              \
-            do_##st_insn(arg2, arg1, mem_idx);                                \
+            do_##st_insn(env, arg2, arg1, mem_idx);                           \
             return 1;                                                         \
         }                                                                     \
     }                                                                         \
@@ -310,80 +335,84 @@
 #define GET_OFFSET(addr, offset) (addr - (offset))
 #endif
 
-target_ulong helper_lwl(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_lwl(CPUMIPSState *env, target_ulong arg1,
+                        target_ulong arg2, int mem_idx)
 {
     target_ulong tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);
 
     if (GET_LMASK(arg2) <= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, 1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
         arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
     }
 
     if (GET_LMASK(arg2) <= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, 2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
         arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
     }
 
     if (GET_LMASK(arg2) == 0) {
-        tmp = do_lbu(GET_OFFSET(arg2, 3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
         arg1 = (arg1 & 0xFFFFFF00) | tmp;
     }
     return (int32_t)arg1;
 }
 
-target_ulong helper_lwr(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_lwr(CPUMIPSState *env, target_ulong arg1,
+                        target_ulong arg2, int mem_idx)
 {
     target_ulong tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0xFFFFFF00) | tmp;
 
     if (GET_LMASK(arg2) >= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, -1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
         arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
     }
 
     if (GET_LMASK(arg2) >= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, -2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
         arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
     }
 
     if (GET_LMASK(arg2) == 3) {
-        tmp = do_lbu(GET_OFFSET(arg2, -3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
         arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);
     }
     return (int32_t)arg1;
 }
 
-void helper_swl(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_swl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
 {
-    do_sb(arg2, (uint8_t)(arg1 >> 24), mem_idx);
+    do_sb(env, arg2, (uint8_t)(arg1 >> 24), mem_idx);
 
     if (GET_LMASK(arg2) <= 2)
-        do_sb(GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK(arg2) <= 1)
-        do_sb(GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK(arg2) == 0)
-        do_sb(GET_OFFSET(arg2, 3), (uint8_t)arg1, mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 3), (uint8_t)arg1, mem_idx);
 }
 
-void helper_swr(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_swr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
 {
-    do_sb(arg2, (uint8_t)arg1, mem_idx);
+    do_sb(env, arg2, (uint8_t)arg1, mem_idx);
 
     if (GET_LMASK(arg2) >= 1)
-        do_sb(GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK(arg2) >= 2)
-        do_sb(GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK(arg2) == 3)
-        do_sb(GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
 }
 
 #if defined(TARGET_MIPS64)
@@ -396,167 +425,172 @@
 #define GET_LMASK64(v) (((v) & 7) ^ 7)
 #endif
 
-target_ulong helper_ldl(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_ldl(CPUMIPSState *env, target_ulong arg1,
+                        target_ulong arg2, int mem_idx)
 {
     uint64_t tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
 
     if (GET_LMASK64(arg2) <= 6) {
-        tmp = do_lbu(GET_OFFSET(arg2, 1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
         arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
     }
 
     if (GET_LMASK64(arg2) <= 5) {
-        tmp = do_lbu(GET_OFFSET(arg2, 2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
         arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
     }
 
     if (GET_LMASK64(arg2) <= 4) {
-        tmp = do_lbu(GET_OFFSET(arg2, 3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
         arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
     }
 
     if (GET_LMASK64(arg2) <= 3) {
-        tmp = do_lbu(GET_OFFSET(arg2, 4), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 4), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
     }
 
     if (GET_LMASK64(arg2) <= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, 5), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 5), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
     }
 
     if (GET_LMASK64(arg2) <= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, 6), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 6), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
     }
 
     if (GET_LMASK64(arg2) == 0) {
-        tmp = do_lbu(GET_OFFSET(arg2, 7), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 7), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
     }
 
     return arg1;
 }
 
-target_ulong helper_ldr(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_ldr(CPUMIPSState *env, target_ulong arg1,
+                        target_ulong arg2, int mem_idx)
 {
     uint64_t tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
 
     if (GET_LMASK64(arg2) >= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, -1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp  << 8);
     }
 
     if (GET_LMASK64(arg2) >= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, -2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
     }
 
     if (GET_LMASK64(arg2) >= 3) {
-        tmp = do_lbu(GET_OFFSET(arg2, -3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
     }
 
     if (GET_LMASK64(arg2) >= 4) {
-        tmp = do_lbu(GET_OFFSET(arg2, -4), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -4), mem_idx);
         arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
     }
 
     if (GET_LMASK64(arg2) >= 5) {
-        tmp = do_lbu(GET_OFFSET(arg2, -5), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -5), mem_idx);
         arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
     }
 
     if (GET_LMASK64(arg2) >= 6) {
-        tmp = do_lbu(GET_OFFSET(arg2, -6), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -6), mem_idx);
         arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
     }
 
     if (GET_LMASK64(arg2) == 7) {
-        tmp = do_lbu(GET_OFFSET(arg2, -7), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -7), mem_idx);
         arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
     }
 
     return arg1;
 }
 
-void helper_sdl(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_sdl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
 {
-    do_sb(arg2, (uint8_t)(arg1 >> 56), mem_idx);
+    do_sb(env, arg2, (uint8_t)(arg1 >> 56), mem_idx);
 
     if (GET_LMASK64(arg2) <= 6)
-        do_sb(GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 48), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 48), mem_idx);
 
     if (GET_LMASK64(arg2) <= 5)
-        do_sb(GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 40), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 40), mem_idx);
 
     if (GET_LMASK64(arg2) <= 4)
-        do_sb(GET_OFFSET(arg2, 3), (uint8_t)(arg1 >> 32), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 3), (uint8_t)(arg1 >> 32), mem_idx);
 
     if (GET_LMASK64(arg2) <= 3)
-        do_sb(GET_OFFSET(arg2, 4), (uint8_t)(arg1 >> 24), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 4), (uint8_t)(arg1 >> 24), mem_idx);
 
     if (GET_LMASK64(arg2) <= 2)
-        do_sb(GET_OFFSET(arg2, 5), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 5), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK64(arg2) <= 1)
-        do_sb(GET_OFFSET(arg2, 6), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 6), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK64(arg2) <= 0)
-        do_sb(GET_OFFSET(arg2, 7), (uint8_t)arg1, mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 7), (uint8_t)arg1, mem_idx);
 }
 
-void helper_sdr(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_sdr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
 {
-    do_sb(arg2, (uint8_t)arg1, mem_idx);
+    do_sb(env, arg2, (uint8_t)arg1, mem_idx);
 
     if (GET_LMASK64(arg2) >= 1)
-        do_sb(GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK64(arg2) >= 2)
-        do_sb(GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK64(arg2) >= 3)
-        do_sb(GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
 
     if (GET_LMASK64(arg2) >= 4)
-        do_sb(GET_OFFSET(arg2, -4), (uint8_t)(arg1 >> 32), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -4), (uint8_t)(arg1 >> 32), mem_idx);
 
     if (GET_LMASK64(arg2) >= 5)
-        do_sb(GET_OFFSET(arg2, -5), (uint8_t)(arg1 >> 40), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -5), (uint8_t)(arg1 >> 40), mem_idx);
 
     if (GET_LMASK64(arg2) >= 6)
-        do_sb(GET_OFFSET(arg2, -6), (uint8_t)(arg1 >> 48), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -6), (uint8_t)(arg1 >> 48), mem_idx);
 
     if (GET_LMASK64(arg2) == 7)
-        do_sb(GET_OFFSET(arg2, -7), (uint8_t)(arg1 >> 56), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -7), (uint8_t)(arg1 >> 56), mem_idx);
 }
 #endif /* TARGET_MIPS64 */
 
 static const int multiple_regs[] = { 16, 17, 18, 19, 20, 21, 22, 23, 30 };
 
-void helper_lwm (target_ulong addr, target_ulong reglist, uint32_t mem_idx)
+void helper_lwm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
 #ifdef CONFIG_USER_ONLY
 #undef ldfun
-#define ldfun ldl_raw
+#define ldfun(env, addr) ldl_raw(addr)
 #else
-    uint32_t (*ldfun)(target_ulong);
+    uint32_t (*ldfun)(CPUMIPSState *env, target_ulong);
 
     switch (mem_idx)
     {
-    case 0: ldfun = ldl_kernel; break;
-    case 1: ldfun = ldl_super; break;
+    case 0: ldfun = cpu_ldl_kernel; break;
+    case 1: ldfun = cpu_ldl_super; break;
     default:
-    case 2: ldfun = ldl_user; break;
+    case 2: ldfun = cpu_ldl_user; break;
     }
 #endif
 
@@ -564,32 +598,33 @@
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            env->active_tc.gpr[multiple_regs[i]] = (target_long) ldfun(addr);
+            env->active_tc.gpr[multiple_regs[i]] = (target_long)ldfun(env, addr);
             addr += 4;
         }
     }
 
     if (do_r31) {
-        env->active_tc.gpr[31] = (target_long) ldfun(addr);
+        env->active_tc.gpr[31] = (target_long)ldfun(env, addr);
     }
 }
 
-void helper_swm (target_ulong addr, target_ulong reglist, uint32_t mem_idx)
+void helper_swm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
 #ifdef CONFIG_USER_ONLY
 #undef stfun
-#define stfun stl_raw
+#define stfun(env, addr, val) stl_raw(addr, val)
 #else
-    void (*stfun)(target_ulong, uint32_t);
+    void (*stfun)(CPUMIPSState *env, target_ulong, uint32_t);
 
     switch (mem_idx)
     {
-    case 0: stfun = stl_kernel; break;
-    case 1: stfun = stl_super; break;
+    case 0: stfun = cpu_stl_kernel; break;
+    case 1: stfun = cpu_stl_super; break;
      default:
-    case 2: stfun = stl_user; break;
+    case 2: stfun = cpu_stl_user; break;
     }
 #endif
 
@@ -597,33 +632,34 @@
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            stfun(addr, env->active_tc.gpr[multiple_regs[i]]);
+            stfun(env, addr, env->active_tc.gpr[multiple_regs[i]]);
             addr += 4;
         }
     }
 
     if (do_r31) {
-        stfun(addr, env->active_tc.gpr[31]);
+        stfun(env, addr, env->active_tc.gpr[31]);
     }
 }
 
 #if defined(TARGET_MIPS64)
-void helper_ldm (target_ulong addr, target_ulong reglist, uint32_t mem_idx)
+void helper_ldm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
 #ifdef CONFIG_USER_ONLY
 #undef ldfun
-#define ldfun ldq_raw
+#define ldfun(env, addr) ldq_raw(addr)
 #else
-    uint64_t (*ldfun)(target_ulong);
+    uint64_t (*ldfun)(CPUMIPSState *env, target_ulong);
 
     switch (mem_idx)
     {
-    case 0: ldfun = ldq_kernel; break;
-    case 1: ldfun = ldq_super; break;
+    case 0: ldfun = cpu_ldq_kernel; break;
+    case 1: ldfun = cpu_ldq_super; break;
     default:
-    case 2: ldfun = ldq_user; break;
+    case 2: ldfun = cpu_ldq_user; break;
     }
 #endif
 
@@ -631,32 +667,33 @@
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            env->active_tc.gpr[multiple_regs[i]] = ldfun(addr);
+            env->active_tc.gpr[multiple_regs[i]] = ldfun(env, addr);
             addr += 8;
         }
     }
 
     if (do_r31) {
-        env->active_tc.gpr[31] = ldfun(addr);
+        env->active_tc.gpr[31] = ldfun(env, addr);
     }
 }
 
-void helper_sdm (target_ulong addr, target_ulong reglist, uint32_t mem_idx)
+void helper_sdm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
 #ifdef CONFIG_USER_ONLY
 #undef stfun
-#define stfun stq_raw
+#define stfun(env, addr, val) stq_raw(addr, val)
 #else
-    void (*stfun)(target_ulong, uint64_t);
+    void (*stfun)(CPUMIPSState *env, target_ulong, uint64_t);
 
     switch (mem_idx)
     {
-    case 0: stfun = stq_kernel; break;
-    case 1: stfun = stq_super; break;
+    case 0: stfun = cpu_stq_kernel; break;
+    case 1: stfun = cpu_stq_super; break;
      default:
-    case 2: stfun = stq_user; break;
+    case 2: stfun = cpu_stq_user; break;
     }
 #endif
 
@@ -664,13 +701,13 @@
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            stfun(addr, env->active_tc.gpr[multiple_regs[i]]);
+            stfun(env, addr, env->active_tc.gpr[multiple_regs[i]]);
             addr += 8;
         }
     }
 
     if (do_r31) {
-        stfun(addr, env->active_tc.gpr[31]);
+        stfun(env, addr, env->active_tc.gpr[31]);
     }
 }
 #endif
@@ -723,7 +760,7 @@
    FIXME: This code assumes that all VPEs have the same number of TCs,
           which depends on runtime setup. Can probably be fixed by
           walking the list of CPUMIPSStates.  */
-static CPUMIPSState *mips_cpu_map_tc(int *tc)
+static CPUMIPSState *mips_cpu_map_tc(CPUMIPSState *env, int *tc)
 {
     CPUMIPSState *other;
     int vpe_idx, nr_threads = env->nr_threads;
@@ -750,7 +787,7 @@
    These helper call synchronizes the regs for a given cpu.  */
 
 /* Called for updates to CP0_Status.  */
-static void sync_c0_status(CPUMIPSState *cpu, int tc)
+static void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc)
 {
     int32_t tcstatus, *tcst;
     uint32_t v = cpu->CP0_Status;
@@ -785,7 +822,8 @@
 }
 
 /* Called for updates to CP0_TCStatus.  */
-static void sync_c0_tcstatus(CPUMIPSState *cpu, int tc, target_ulong v)
+static void sync_c0_tcstatus(CPUMIPSState *cpu, int tc,
+                             target_ulong v)
 {
     uint32_t status;
     uint32_t tcu, tmx, tasid, tksu;
@@ -834,35 +872,35 @@
 }
 
 /* CP0 helpers */
-target_ulong helper_mfc0_mvpcontrol (void)
+target_ulong helper_mfc0_mvpcontrol(CPUMIPSState *env)
 {
     return env->mvp->CP0_MVPControl;
 }
 
-target_ulong helper_mfc0_mvpconf0 (void)
+target_ulong helper_mfc0_mvpconf0(CPUMIPSState *env)
 {
     return env->mvp->CP0_MVPConf0;
 }
 
-target_ulong helper_mfc0_mvpconf1 (void)
+target_ulong helper_mfc0_mvpconf1(CPUMIPSState *env)
 {
     return env->mvp->CP0_MVPConf1;
 }
 
-target_ulong helper_mfc0_random (void)
+target_ulong helper_mfc0_random(CPUMIPSState *env)
 {
     return (int32_t)cpu_mips_get_random(env);
 }
 
-target_ulong helper_mfc0_tcstatus (void)
+target_ulong helper_mfc0_tcstatus(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCStatus;
 }
 
-target_ulong helper_mftc0_tcstatus(void)
+target_ulong helper_mftc0_tcstatus(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCStatus;
@@ -870,15 +908,15 @@
         return other->tcs[other_tc].CP0_TCStatus;
 }
 
-target_ulong helper_mfc0_tcbind (void)
+target_ulong helper_mfc0_tcbind(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCBind;
 }
 
-target_ulong helper_mftc0_tcbind(void)
+target_ulong helper_mftc0_tcbind(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCBind;
@@ -886,15 +924,15 @@
         return other->tcs[other_tc].CP0_TCBind;
 }
 
-target_ulong helper_mfc0_tcrestart (void)
+target_ulong helper_mfc0_tcrestart(CPUMIPSState *env)
 {
     return env->active_tc.PC;
 }
 
-target_ulong helper_mftc0_tcrestart(void)
+target_ulong helper_mftc0_tcrestart(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.PC;
@@ -902,15 +940,15 @@
         return other->tcs[other_tc].PC;
 }
 
-target_ulong helper_mfc0_tchalt (void)
+target_ulong helper_mfc0_tchalt(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCHalt;
 }
 
-target_ulong helper_mftc0_tchalt(void)
+target_ulong helper_mftc0_tchalt(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCHalt;
@@ -918,15 +956,15 @@
         return other->tcs[other_tc].CP0_TCHalt;
 }
 
-target_ulong helper_mfc0_tccontext (void)
+target_ulong helper_mfc0_tccontext(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCContext;
 }
 
-target_ulong helper_mftc0_tccontext(void)
+target_ulong helper_mftc0_tccontext(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCContext;
@@ -934,15 +972,15 @@
         return other->tcs[other_tc].CP0_TCContext;
 }
 
-target_ulong helper_mfc0_tcschedule (void)
+target_ulong helper_mfc0_tcschedule(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCSchedule;
 }
 
-target_ulong helper_mftc0_tcschedule(void)
+target_ulong helper_mftc0_tcschedule(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCSchedule;
@@ -950,15 +988,15 @@
         return other->tcs[other_tc].CP0_TCSchedule;
 }
 
-target_ulong helper_mfc0_tcschefback (void)
+target_ulong helper_mfc0_tcschefback(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCScheFBack;
 }
 
-target_ulong helper_mftc0_tcschefback(void)
+target_ulong helper_mftc0_tcschefback(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCScheFBack;
@@ -966,24 +1004,24 @@
         return other->tcs[other_tc].CP0_TCScheFBack;
 }
 
-target_ulong helper_mfc0_count (void)
+target_ulong helper_mfc0_count(CPUMIPSState *env)
 {
     return (int32_t)cpu_mips_get_count(env);
 }
 
-target_ulong helper_mftc0_entryhi(void)
+target_ulong helper_mftc0_entryhi(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     return other->CP0_EntryHi;
 }
 
-target_ulong helper_mftc0_cause(void)
+target_ulong helper_mftc0_cause(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     int32_t tccause;
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc) {
         tccause = other->CP0_Cause;
@@ -994,30 +1032,30 @@
     return tccause;
 }
 
-target_ulong helper_mftc0_status(void)
+target_ulong helper_mftc0_status(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     return other->CP0_Status;
 }
 
-target_ulong helper_mfc0_lladdr (void)
+target_ulong helper_mfc0_lladdr(CPUMIPSState *env)
 {
     return (int32_t)(env->lladdr >> env->CP0_LLAddr_shift);
 }
 
-target_ulong helper_mfc0_watchlo (uint32_t sel)
+target_ulong helper_mfc0_watchlo(CPUMIPSState *env, uint32_t sel)
 {
     return (int32_t)env->CP0_WatchLo[sel];
 }
 
-target_ulong helper_mfc0_watchhi (uint32_t sel)
+target_ulong helper_mfc0_watchhi(CPUMIPSState *env, uint32_t sel)
 {
     return env->CP0_WatchHi[sel];
 }
 
-target_ulong helper_mfc0_debug (void)
+target_ulong helper_mfc0_debug(CPUMIPSState *env)
 {
     target_ulong t0 = env->CP0_Debug;
     if (env->hflags & MIPS_HFLAG_DM)
@@ -1026,11 +1064,11 @@
     return t0;
 }
 
-target_ulong helper_mftc0_debug(void)
+target_ulong helper_mftc0_debug(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     int32_t tcstatus;
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         tcstatus = other->active_tc.CP0_Debug_tcstatus;
@@ -1043,43 +1081,43 @@
 }
 
 #if defined(TARGET_MIPS64)
-target_ulong helper_dmfc0_tcrestart (void)
+target_ulong helper_dmfc0_tcrestart(CPUMIPSState *env)
 {
     return env->active_tc.PC;
 }
 
-target_ulong helper_dmfc0_tchalt (void)
+target_ulong helper_dmfc0_tchalt(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCHalt;
 }
 
-target_ulong helper_dmfc0_tccontext (void)
+target_ulong helper_dmfc0_tccontext(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCContext;
 }
 
-target_ulong helper_dmfc0_tcschedule (void)
+target_ulong helper_dmfc0_tcschedule(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCSchedule;
 }
 
-target_ulong helper_dmfc0_tcschefback (void)
+target_ulong helper_dmfc0_tcschefback(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCScheFBack;
 }
 
-target_ulong helper_dmfc0_lladdr (void)
+target_ulong helper_dmfc0_lladdr(CPUMIPSState *env)
 {
     return env->lladdr >> env->CP0_LLAddr_shift;
 }
 
-target_ulong helper_dmfc0_watchlo (uint32_t sel)
+target_ulong helper_dmfc0_watchlo(CPUMIPSState *env, uint32_t sel)
 {
     return env->CP0_WatchLo[sel];
 }
 #endif /* TARGET_MIPS64 */
 
-void helper_mtc0_index (target_ulong arg1)
+void helper_mtc0_index(CPUMIPSState *env, target_ulong arg1)
 {
     int num = 1;
     unsigned int tmp = env->tlb->nb_tlb;
@@ -1091,7 +1129,7 @@
     env->CP0_Index = (env->CP0_Index & 0x80000000) | (arg1 & (num - 1));
 }
 
-void helper_mtc0_mvpcontrol (target_ulong arg1)
+void helper_mtc0_mvpcontrol(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0;
     uint32_t newval;
@@ -1108,7 +1146,7 @@
     env->mvp->CP0_MVPControl = newval;
 }
 
-void helper_mtc0_vpecontrol (target_ulong arg1)
+void helper_mtc0_vpecontrol(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask;
     uint32_t newval;
@@ -1125,10 +1163,10 @@
     env->CP0_VPEControl = newval;
 }
 
-void helper_mttc0_vpecontrol(target_ulong arg1)
+void helper_mttc0_vpecontrol(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
     uint32_t mask;
     uint32_t newval;
 
@@ -1141,23 +1179,23 @@
     other->CP0_VPEControl = newval;
 }
 
-target_ulong helper_mftc0_vpecontrol(void)
+target_ulong helper_mftc0_vpecontrol(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
     /* FIXME: Mask away return zero on read bits.  */
     return other->CP0_VPEControl;
 }
 
-target_ulong helper_mftc0_vpeconf0(void)
+target_ulong helper_mftc0_vpeconf0(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     return other->CP0_VPEConf0;
 }
 
-void helper_mtc0_vpeconf0 (target_ulong arg1)
+void helper_mtc0_vpeconf0(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0;
     uint32_t newval;
@@ -1174,10 +1212,10 @@
     env->CP0_VPEConf0 = newval;
 }
 
-void helper_mttc0_vpeconf0(target_ulong arg1)
+void helper_mttc0_vpeconf0(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
     uint32_t mask = 0;
     uint32_t newval;
 
@@ -1188,7 +1226,7 @@
     other->CP0_VPEConf0 = newval;
 }
 
-void helper_mtc0_vpeconf1 (target_ulong arg1)
+void helper_mtc0_vpeconf1(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0;
     uint32_t newval;
@@ -1206,25 +1244,25 @@
     env->CP0_VPEConf1 = newval;
 }
 
-void helper_mtc0_yqmask (target_ulong arg1)
+void helper_mtc0_yqmask(CPUMIPSState *env, target_ulong arg1)
 {
     /* Yield qualifier inputs not implemented. */
     env->CP0_YQMask = 0x00000000;
 }
 
-void helper_mtc0_vpeopt (target_ulong arg1)
+void helper_mtc0_vpeopt(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_VPEOpt = arg1 & 0x0000ffff;
 }
 
-void helper_mtc0_entrylo0 (target_ulong arg1)
+void helper_mtc0_entrylo0(CPUMIPSState *env, target_ulong arg1)
 {
     /* Large physaddr (PABITS) not implemented */
     /* 1k pages not implemented */
     env->CP0_EntryLo0 = arg1 & 0x3FFFFFFF;
 }
 
-void helper_mtc0_tcstatus (target_ulong arg1)
+void helper_mtc0_tcstatus(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = env->CP0_TCStatus_rw_bitmask;
     uint32_t newval;
@@ -1235,10 +1273,10 @@
     sync_c0_tcstatus(env, env->current_tc, newval);
 }
 
-void helper_mttc0_tcstatus (target_ulong arg1)
+void helper_mttc0_tcstatus(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.CP0_TCStatus = arg1;
@@ -1247,7 +1285,7 @@
     sync_c0_tcstatus(other, other_tc, arg1);
 }
 
-void helper_mtc0_tcbind (target_ulong arg1)
+void helper_mtc0_tcbind(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = (1 << CP0TCBd_TBE);
     uint32_t newval;
@@ -1258,12 +1296,12 @@
     env->active_tc.CP0_TCBind = newval;
 }
 
-void helper_mttc0_tcbind (target_ulong arg1)
+void helper_mttc0_tcbind(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     uint32_t mask = (1 << CP0TCBd_TBE);
     uint32_t newval;
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other->mvp->CP0_MVPControl & (1 << CP0MVPCo_VPC))
         mask |= (1 << CP0TCBd_CurVPE);
@@ -1276,7 +1314,7 @@
     }
 }
 
-void helper_mtc0_tcrestart (target_ulong arg1)
+void helper_mtc0_tcrestart(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.PC = arg1;
     env->active_tc.CP0_TCStatus &= ~(1 << CP0TCSt_TDS);
@@ -1284,10 +1322,10 @@
     /* MIPS16 not implemented. */
 }
 
-void helper_mttc0_tcrestart (target_ulong arg1)
+void helper_mttc0_tcrestart(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc) {
         other->active_tc.PC = arg1;
@@ -1302,7 +1340,7 @@
     }
 }
 
-void helper_mtc0_tchalt (target_ulong arg1)
+void helper_mtc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCHalt = arg1 & 0x1;
 
@@ -1314,10 +1352,10 @@
     }
 }
 
-void helper_mttc0_tchalt (target_ulong arg1)
+void helper_mttc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     // TODO: Halt TC / Restart (if allocated+active) TC.
 
@@ -1333,15 +1371,15 @@
     }
 }
 
-void helper_mtc0_tccontext (target_ulong arg1)
+void helper_mtc0_tccontext(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCContext = arg1;
 }
 
-void helper_mttc0_tccontext (target_ulong arg1)
+void helper_mttc0_tccontext(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.CP0_TCContext = arg1;
@@ -1349,15 +1387,15 @@
         other->tcs[other_tc].CP0_TCContext = arg1;
 }
 
-void helper_mtc0_tcschedule (target_ulong arg1)
+void helper_mtc0_tcschedule(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCSchedule = arg1;
 }
 
-void helper_mttc0_tcschedule (target_ulong arg1)
+void helper_mttc0_tcschedule(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.CP0_TCSchedule = arg1;
@@ -1365,15 +1403,15 @@
         other->tcs[other_tc].CP0_TCSchedule = arg1;
 }
 
-void helper_mtc0_tcschefback (target_ulong arg1)
+void helper_mtc0_tcschefback(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCScheFBack = arg1;
 }
 
-void helper_mttc0_tcschefback (target_ulong arg1)
+void helper_mttc0_tcschefback(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.CP0_TCScheFBack = arg1;
@@ -1381,25 +1419,25 @@
         other->tcs[other_tc].CP0_TCScheFBack = arg1;
 }
 
-void helper_mtc0_entrylo1 (target_ulong arg1)
+void helper_mtc0_entrylo1(CPUMIPSState *env, target_ulong arg1)
 {
     /* Large physaddr (PABITS) not implemented */
     /* 1k pages not implemented */
     env->CP0_EntryLo1 = arg1 & 0x3FFFFFFF;
 }
 
-void helper_mtc0_context (target_ulong arg1)
+void helper_mtc0_context(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Context = (env->CP0_Context & 0x007FFFFF) | (arg1 & ~0x007FFFFF);
 }
 
-void helper_mtc0_pagemask (target_ulong arg1)
+void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1)
 {
     /* 1k pages not implemented */
     env->CP0_PageMask = arg1 & (0x1FFFFFFF & (TARGET_PAGE_MASK << 1));
 }
 
-void helper_mtc0_pagegrain (target_ulong arg1)
+void helper_mtc0_pagegrain(CPUMIPSState *env, target_ulong arg1)
 {
     /* SmartMIPS not implemented */
     /* Large physaddr (PABITS) not implemented */
@@ -1407,47 +1445,47 @@
     env->CP0_PageGrain = 0;
 }
 
-void helper_mtc0_wired (target_ulong arg1)
+void helper_mtc0_wired(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Wired = arg1 % env->tlb->nb_tlb;
 }
 
-void helper_mtc0_srsconf0 (target_ulong arg1)
+void helper_mtc0_srsconf0(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf0 |= arg1 & env->CP0_SRSConf0_rw_bitmask;
 }
 
-void helper_mtc0_srsconf1 (target_ulong arg1)
+void helper_mtc0_srsconf1(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf1 |= arg1 & env->CP0_SRSConf1_rw_bitmask;
 }
 
-void helper_mtc0_srsconf2 (target_ulong arg1)
+void helper_mtc0_srsconf2(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf2 |= arg1 & env->CP0_SRSConf2_rw_bitmask;
 }
 
-void helper_mtc0_srsconf3 (target_ulong arg1)
+void helper_mtc0_srsconf3(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf3 |= arg1 & env->CP0_SRSConf3_rw_bitmask;
 }
 
-void helper_mtc0_srsconf4 (target_ulong arg1)
+void helper_mtc0_srsconf4(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf4 |= arg1 & env->CP0_SRSConf4_rw_bitmask;
 }
 
-void helper_mtc0_hwrena (target_ulong arg1)
+void helper_mtc0_hwrena(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_HWREna = arg1 & 0x0000000F;
 }
 
-void helper_mtc0_count (target_ulong arg1)
+void helper_mtc0_count(CPUMIPSState *env, target_ulong arg1)
 {
     cpu_mips_store_count(env, arg1);
 }
 
-void helper_mtc0_entryhi (target_ulong arg1)
+void helper_mtc0_entryhi(CPUMIPSState *env, target_ulong arg1)
 {
     target_ulong old, val;
 
@@ -1466,21 +1504,21 @@
         cpu_mips_tlb_flush(env, 1);
 }
 
-void helper_mttc0_entryhi(target_ulong arg1)
+void helper_mttc0_entryhi(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     other->CP0_EntryHi = arg1;
     sync_c0_entryhi(other, other_tc);
 }
 
-void helper_mtc0_compare (target_ulong arg1)
+void helper_mtc0_compare(CPUMIPSState *env, target_ulong arg1)
 {
     cpu_mips_store_compare(env, arg1);
 }
 
-void helper_mtc0_status (target_ulong arg1)
+void helper_mtc0_status(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t val, old;
     uint32_t mask = env->CP0_Status_rw_bitmask;
@@ -1489,7 +1527,7 @@
     old = env->CP0_Status;
     env->CP0_Status = (env->CP0_Status & ~mask) | val;
     if (env->CP0_Config3 & (1 << CP0C3_MT)) {
-        sync_c0_status(env, env->current_tc);
+        sync_c0_status(env, env, env->current_tc);
     } else {
         compute_hflags(env);
     }
@@ -1508,22 +1546,22 @@
     }
 }
 
-void helper_mttc0_status(target_ulong arg1)
+void helper_mttc0_status(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     other->CP0_Status = arg1 & ~0xf1000018;
-    sync_c0_status(other, other_tc);
+    sync_c0_status(env, other, other_tc);
 }
 
-void helper_mtc0_intctl (target_ulong arg1)
+void helper_mtc0_intctl(CPUMIPSState *env, target_ulong arg1)
 {
     /* vectored interrupts not implemented, no performance counters. */
     env->CP0_IntCtl = (env->CP0_IntCtl & ~0x000003e0) | (arg1 & 0x000003e0);
 }
 
-void helper_mtc0_srsctl (target_ulong arg1)
+void helper_mtc0_srsctl(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = (0xf << CP0SRSCtl_ESS) | (0xf << CP0SRSCtl_PSS);
     env->CP0_SRSCtl = (env->CP0_SRSCtl & ~mask) | (arg1 & mask);
@@ -1557,52 +1595,52 @@
     }
 }
 
-void helper_mtc0_cause(target_ulong arg1)
+void helper_mtc0_cause(CPUMIPSState *env, target_ulong arg1)
 {
     mtc0_cause(env, arg1);
 }
 
-void helper_mttc0_cause(target_ulong arg1)
+void helper_mttc0_cause(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     mtc0_cause(other, arg1);
 }
 
-target_ulong helper_mftc0_epc(void)
+target_ulong helper_mftc0_epc(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     return other->CP0_EPC;
 }
 
-target_ulong helper_mftc0_ebase(void)
+target_ulong helper_mftc0_ebase(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     return other->CP0_EBase;
 }
 
-void helper_mtc0_ebase (target_ulong arg1)
+void helper_mtc0_ebase(CPUMIPSState *env, target_ulong arg1)
 {
     /* vectored interrupts not implemented */
     env->CP0_EBase = (env->CP0_EBase & ~0x3FFFF000) | (arg1 & 0x3FFFF000);
 }
 
-void helper_mttc0_ebase(target_ulong arg1)
+void helper_mttc0_ebase(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
     other->CP0_EBase = (other->CP0_EBase & ~0x3FFFF000) | (arg1 & 0x3FFFF000);
 }
 
-target_ulong helper_mftc0_configx(target_ulong idx)
+target_ulong helper_mftc0_configx(CPUMIPSState *env, target_ulong idx)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     switch (idx) {
     case 0: return other->CP0_Config0;
@@ -1618,49 +1656,49 @@
     return 0;
 }
 
-void helper_mtc0_config0 (target_ulong arg1)
+void helper_mtc0_config0(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Config0 = (env->CP0_Config0 & 0x81FFFFF8) | (arg1 & 0x00000007);
 }
 
-void helper_mtc0_config2 (target_ulong arg1)
+void helper_mtc0_config2(CPUMIPSState *env, target_ulong arg1)
 {
     /* tertiary/secondary caches not implemented */
     env->CP0_Config2 = (env->CP0_Config2 & 0x8FFF0FFF);
 }
 
-void helper_mtc0_lladdr (target_ulong arg1)
+void helper_mtc0_lladdr(CPUMIPSState *env, target_ulong arg1)
 {
     target_long mask = env->CP0_LLAddr_rw_bitmask;
     arg1 = arg1 << env->CP0_LLAddr_shift;
     env->lladdr = (env->lladdr & ~mask) | (arg1 & mask);
 }
 
-void helper_mtc0_watchlo (target_ulong arg1, uint32_t sel)
+void helper_mtc0_watchlo(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     /* Watch exceptions for instructions, data loads, data stores
        not implemented. */
     env->CP0_WatchLo[sel] = (arg1 & ~0x7);
 }
 
-void helper_mtc0_watchhi (target_ulong arg1, uint32_t sel)
+void helper_mtc0_watchhi(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     env->CP0_WatchHi[sel] = (arg1 & 0x40FF0FF8);
     env->CP0_WatchHi[sel] &= ~(env->CP0_WatchHi[sel] & arg1 & 0x7);
 }
 
-void helper_mtc0_xcontext (target_ulong arg1)
+void helper_mtc0_xcontext(CPUMIPSState *env, target_ulong arg1)
 {
     target_ulong mask = (1ULL << (env->SEGBITS - 7)) - 1;
     env->CP0_XContext = (env->CP0_XContext & mask) | (arg1 & ~mask);
 }
 
-void helper_mtc0_framemask (target_ulong arg1)
+void helper_mtc0_framemask(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Framemask = arg1; /* XXX */
 }
 
-void helper_mtc0_debug (target_ulong arg1)
+void helper_mtc0_debug(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Debug = (env->CP0_Debug & 0x8C03FC1F) | (arg1 & 0x13300120);
     if (arg1 & (1 << CP0DB_DM))
@@ -1669,11 +1707,11 @@
         env->hflags &= ~MIPS_HFLAG_DM;
 }
 
-void helper_mttc0_debug(target_ulong arg1)
+void helper_mttc0_debug(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     uint32_t val = arg1 & ((1 << CP0DB_SSt) | (1 << CP0DB_Halt));
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     /* XXX: Might be wrong, check with EJTAG spec. */
     if (other_tc == other->current_tc)
@@ -1685,36 +1723,36 @@
                      (arg1 & ~((1 << CP0DB_SSt) | (1 << CP0DB_Halt)));
 }
 
-void helper_mtc0_performance0 (target_ulong arg1)
+void helper_mtc0_performance0(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Performance0 = arg1 & 0x000007ff;
 }
 
-void helper_mtc0_taglo (target_ulong arg1)
+void helper_mtc0_taglo(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_TagLo = arg1 & 0xFFFFFCF6;
 }
 
-void helper_mtc0_datalo (target_ulong arg1)
+void helper_mtc0_datalo(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_DataLo = arg1; /* XXX */
 }
 
-void helper_mtc0_taghi (target_ulong arg1)
+void helper_mtc0_taghi(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_TagHi = arg1; /* XXX */
 }
 
-void helper_mtc0_datahi (target_ulong arg1)
+void helper_mtc0_datahi(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_DataHi = arg1; /* XXX */
 }
 
 /* MIPS MT functions */
-target_ulong helper_mftgpr(uint32_t sel)
+target_ulong helper_mftgpr(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.gpr[sel];
@@ -1722,10 +1760,10 @@
         return other->tcs[other_tc].gpr[sel];
 }
 
-target_ulong helper_mftlo(uint32_t sel)
+target_ulong helper_mftlo(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.LO[sel];
@@ -1733,10 +1771,10 @@
         return other->tcs[other_tc].LO[sel];
 }
 
-target_ulong helper_mfthi(uint32_t sel)
+target_ulong helper_mfthi(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.HI[sel];
@@ -1744,10 +1782,10 @@
         return other->tcs[other_tc].HI[sel];
 }
 
-target_ulong helper_mftacx(uint32_t sel)
+target_ulong helper_mftacx(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.ACX[sel];
@@ -1755,10 +1793,10 @@
         return other->tcs[other_tc].ACX[sel];
 }
 
-target_ulong helper_mftdsp(void)
+target_ulong helper_mftdsp(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.DSPControl;
@@ -1766,10 +1804,10 @@
         return other->tcs[other_tc].DSPControl;
 }
 
-void helper_mttgpr(target_ulong arg1, uint32_t sel)
+void helper_mttgpr(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.gpr[sel] = arg1;
@@ -1777,10 +1815,10 @@
         other->tcs[other_tc].gpr[sel] = arg1;
 }
 
-void helper_mttlo(target_ulong arg1, uint32_t sel)
+void helper_mttlo(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.LO[sel] = arg1;
@@ -1788,10 +1826,10 @@
         other->tcs[other_tc].LO[sel] = arg1;
 }
 
-void helper_mtthi(target_ulong arg1, uint32_t sel)
+void helper_mtthi(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.HI[sel] = arg1;
@@ -1799,10 +1837,10 @@
         other->tcs[other_tc].HI[sel] = arg1;
 }
 
-void helper_mttacx(target_ulong arg1, uint32_t sel)
+void helper_mttacx(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.ACX[sel] = arg1;
@@ -1810,10 +1848,10 @@
         other->tcs[other_tc].ACX[sel] = arg1;
 }
 
-void helper_mttdsp(target_ulong arg1)
+void helper_mttdsp(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.DSPControl = arg1;
@@ -1834,7 +1872,7 @@
     return 0;
 }
 
-target_ulong helper_dvpe(void)
+target_ulong helper_dvpe(CPUMIPSState *env)
 {
     CPUMIPSState *other_cpu = first_cpu;
     target_ulong prev = env->mvp->CP0_MVPControl;
@@ -1850,7 +1888,7 @@
     return prev;
 }
 
-target_ulong helper_evpe(void)
+target_ulong helper_evpe(CPUMIPSState *env)
 {
     CPUMIPSState *other_cpu = first_cpu;
     target_ulong prev = env->mvp->CP0_MVPControl;
@@ -1876,7 +1914,7 @@
     // TODO: store to TC register
 }
 
-target_ulong helper_yield(target_ulong arg)
+target_ulong helper_yield(CPUMIPSState *env, target_ulong arg)
 {
     target_long arg1 = arg;
 
@@ -1887,13 +1925,13 @@
                 env->active_tc.CP0_TCStatus & (1 << CP0TCSt_DT)) {
                 env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
                 env->CP0_VPEControl |= 4 << CP0VPECo_EXCPT;
-                helper_raise_exception(EXCP_THREAD);
+                helper_raise_exception(env, EXCP_THREAD);
             }
         }
     } else if (arg1 == 0) {
         if (0 /* TODO: TC underflow */) {
             env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
-            helper_raise_exception(EXCP_THREAD);
+            helper_raise_exception(env, EXCP_THREAD);
         } else {
             // TODO: Deallocate TC
         }
@@ -1901,7 +1939,7 @@
         /* Yield qualifier inputs not implemented. */
         env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
         env->CP0_VPEControl |= 2 << CP0VPECo_EXCPT;
-        helper_raise_exception(EXCP_THREAD);
+        helper_raise_exception(env, EXCP_THREAD);
     }
     return env->CP0_YQMask;
 }
@@ -1923,7 +1961,7 @@
     }
 }
 
-static void r4k_fill_tlb (int idx)
+static void r4k_fill_tlb(CPUMIPSState *env, int idx)
 {
     r4k_tlb_t *tlb;
 
@@ -1946,7 +1984,7 @@
     tlb->PFN[1] = (env->CP0_EntryLo1 >> 6) << 12;
 }
 
-void r4k_helper_tlbwi (void)
+void r4k_helper_tlbwi(CPUMIPSState *env)
 {
     int idx;
 
@@ -1958,18 +1996,18 @@
     r4k_mips_tlb_flush_extra (env, env->tlb->nb_tlb);
 
     r4k_invalidate_tlb(env, idx, 0);
-    r4k_fill_tlb(idx);
+    r4k_fill_tlb(env, idx);
 }
 
-void r4k_helper_tlbwr (void)
+void r4k_helper_tlbwr(CPUMIPSState *env)
 {
     int r = cpu_mips_get_random(env);
 
     r4k_invalidate_tlb(env, r, 1);
-    r4k_fill_tlb(r);
+    r4k_fill_tlb(env, r);
 }
 
-void r4k_helper_tlbp (void)
+void r4k_helper_tlbp(CPUMIPSState *env)
 {
     r4k_tlb_t *tlb;
     target_ulong mask;
@@ -2011,7 +2049,7 @@
     }
 }
 
-void r4k_helper_tlbr (void)
+void r4k_helper_tlbr(CPUMIPSState *env)
 {
     r4k_tlb_t *tlb;
     uint8_t ASID;
@@ -2035,28 +2073,28 @@
                         (tlb->C1 << 3) | (tlb->PFN[1] >> 6);
 }
 
-void helper_tlbwi(void)
+void helper_tlbwi(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbwi();
+    env->tlb->helper_tlbwi(env);
 }
 
-void helper_tlbwr(void)
+void helper_tlbwr(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbwr();
+    env->tlb->helper_tlbwr(env);
 }
 
-void helper_tlbp(void)
+void helper_tlbp(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbp();
+    env->tlb->helper_tlbp(env);
 }
 
-void helper_tlbr(void)
+void helper_tlbr(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbr();
+    env->tlb->helper_tlbr(env);
 }
 
 /* Specials */
-target_ulong helper_di (void)
+target_ulong helper_di(CPUMIPSState *env)
 {
     target_ulong t0 = env->CP0_Status;
 
@@ -2064,7 +2102,7 @@
     return t0;
 }
 
-target_ulong helper_ei (void)
+target_ulong helper_ei(CPUMIPSState *env)
 {
     target_ulong t0 = env->CP0_Status;
 
@@ -2072,7 +2110,7 @@
     return t0;
 }
 
-static void debug_pre_eret (void)
+static void debug_pre_eret(CPUMIPSState *env)
 {
     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
         qemu_log("ERET: PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx,
@@ -2085,7 +2123,7 @@
     }
 }
 
-static void debug_post_eret (void)
+static void debug_post_eret(CPUMIPSState *env)
 {
     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
         qemu_log("  =>  PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx,
@@ -2103,7 +2141,7 @@
     }
 }
 
-static void set_pc (target_ulong error_pc)
+static void set_pc(CPUMIPSState *env, target_ulong error_pc)
 {
     env->active_tc.PC = error_pc & ~(target_ulong)1;
     if (error_pc & 1) {
@@ -2113,78 +2151,78 @@
     }
 }
 
-void helper_eret (void)
+void helper_eret(CPUMIPSState *env)
 {
-    debug_pre_eret();
+    debug_pre_eret(env);
     if (env->CP0_Status & (1 << CP0St_ERL)) {
-        set_pc(env->CP0_ErrorEPC);
+        set_pc(env, env->CP0_ErrorEPC);
         env->CP0_Status &= ~(1 << CP0St_ERL);
     } else {
-        set_pc(env->CP0_EPC);
+        set_pc(env, env->CP0_EPC);
         env->CP0_Status &= ~(1 << CP0St_EXL);
     }
     compute_hflags(env);
-    debug_post_eret();
+    debug_post_eret(env);
     env->lladdr = 1;
 }
 
-void helper_deret (void)
+void helper_deret(CPUMIPSState *env)
 {
-    debug_pre_eret();
-    set_pc(env->CP0_DEPC);
+    debug_pre_eret(env);
+    set_pc(env, env->CP0_DEPC);
 
     env->hflags &= MIPS_HFLAG_DM;
     compute_hflags(env);
-    debug_post_eret();
+    debug_post_eret(env);
     env->lladdr = 1;
 }
 #endif /* !CONFIG_USER_ONLY */
 
-target_ulong helper_rdhwr_cpunum(void)
+target_ulong helper_rdhwr_cpunum(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 0)))
         return env->CP0_EBase & 0x3ff;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-target_ulong helper_rdhwr_synci_step(void)
+target_ulong helper_rdhwr_synci_step(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 1)))
         return env->SYNCI_Step;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-target_ulong helper_rdhwr_cc(void)
+target_ulong helper_rdhwr_cc(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 2)))
         return env->CP0_Count;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-target_ulong helper_rdhwr_ccres(void)
+target_ulong helper_rdhwr_ccres(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 3)))
         return env->CCRes;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-void helper_pmon (int function)
+void helper_pmon(CPUMIPSState *env, int function)
 {
     function /= 2;
     switch (function) {
@@ -2210,16 +2248,17 @@
     }
 }
 
-void helper_wait (void)
+void helper_wait(CPUMIPSState *env)
 {
     env->halted = 1;
     cpu_reset_interrupt(env, CPU_INTERRUPT_WAKE);
-    helper_raise_exception(EXCP_HLT);
+    helper_raise_exception(env, EXCP_HLT);
 }
 
 #if !defined(CONFIG_USER_ONLY)
 
-static void QEMU_NORETURN do_unaligned_access(target_ulong addr, int is_write,
+static void QEMU_NORETURN do_unaligned_access(CPUMIPSState *env,
+                                              target_ulong addr, int is_write,
                                               int is_user, uintptr_t retaddr);
 
 #define MMUSUFFIX _mmu
@@ -2237,23 +2276,20 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
-static void do_unaligned_access(target_ulong addr, int is_write,
-                                int is_user, uintptr_t retaddr)
+static void do_unaligned_access(CPUMIPSState *env, target_ulong addr,
+                                int is_write, int is_user, uintptr_t retaddr)
 {
     env->CP0_BadVAddr = addr;
-    do_restore_state (retaddr);
-    helper_raise_exception ((is_write == 1) ? EXCP_AdES : EXCP_AdEL);
+    do_restore_state(env, retaddr);
+    helper_raise_exception(env, (is_write == 1) ? EXCP_AdES : EXCP_AdEL);
 }
 
-void tlb_fill(CPUMIPSState *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUMIPSState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUMIPSState *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
     ret = cpu_mips_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
         if (retaddr) {
@@ -2265,20 +2301,17 @@
                 cpu_restore_state(tb, env, retaddr);
             }
         }
-        helper_raise_exception_err(env->exception_index, env->error_code);
+        helper_raise_exception_err(env, env->exception_index, env->error_code);
     }
-    env = saved_env;
 }
 
-void cpu_unassigned_access(CPUMIPSState *env1, target_phys_addr_t addr,
+void cpu_unassigned_access(CPUMIPSState *env, target_phys_addr_t addr,
                            int is_write, int is_exec, int unused, int size)
 {
-    env = env1;
-
     if (is_exec)
-        helper_raise_exception(EXCP_IBE);
+        helper_raise_exception(env, EXCP_IBE);
     else
-        helper_raise_exception(EXCP_DBE);
+        helper_raise_exception(env, EXCP_DBE);
 }
 #endif /* !CONFIG_USER_ONLY */
 
@@ -2307,7 +2340,7 @@
 #define RESTORE_FLUSH_MODE \
     set_flush_to_zero((env->active_fpu.fcr31 & (1 << 24)) != 0, &env->active_fpu.fp_status);
 
-target_ulong helper_cfc1 (uint32_t reg)
+target_ulong helper_cfc1(CPUMIPSState *env, uint32_t reg)
 {
     target_ulong arg1;
 
@@ -2332,7 +2365,7 @@
     return arg1;
 }
 
-void helper_ctc1 (target_ulong arg1, uint32_t reg)
+void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t reg)
 {
     switch(reg) {
     case 25:
@@ -2366,7 +2399,7 @@
     RESTORE_FLUSH_MODE;
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     if ((GET_FP_ENABLE(env->active_fpu.fcr31) | 0x20) & GET_FP_CAUSE(env->active_fpu.fcr31))
-        helper_raise_exception(EXCP_FPE);
+        helper_raise_exception(env, EXCP_FPE);
 }
 
 static inline int ieee_ex_to_mips(int xcpt)
@@ -2392,13 +2425,13 @@
     return ret;
 }
 
-static inline void update_fcr31(void)
+static inline void update_fcr31(CPUMIPSState *env)
 {
     int tmp = ieee_ex_to_mips(get_float_exception_flags(&env->active_fpu.fp_status));
 
     SET_FP_CAUSE(env->active_fpu.fcr31, tmp);
     if (GET_FP_ENABLE(env->active_fpu.fcr31) & tmp)
-        helper_raise_exception(EXCP_FPE);
+        helper_raise_exception(env, EXCP_FPE);
     else
         UPDATE_FP_FLAGS(env->active_fpu.fcr31, tmp);
 }
@@ -2409,71 +2442,71 @@
    paired single lower "pl", paired single upper "pu".  */
 
 /* unary operations, modifying fp status  */
-uint64_t helper_float_sqrt_d(uint64_t fdt0)
+uint64_t helper_float_sqrt_d(CPUMIPSState *env, uint64_t fdt0)
 {
     return float64_sqrt(fdt0, &env->active_fpu.fp_status);
 }
 
-uint32_t helper_float_sqrt_s(uint32_t fst0)
+uint32_t helper_float_sqrt_s(CPUMIPSState *env, uint32_t fst0)
 {
     return float32_sqrt(fst0, &env->active_fpu.fp_status);
 }
 
-uint64_t helper_float_cvtd_s(uint32_t fst0)
+uint64_t helper_float_cvtd_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float32_to_float64(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint64_t helper_float_cvtd_w(uint32_t wt0)
+uint64_t helper_float_cvtd_w(CPUMIPSState *env, uint32_t wt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = int32_to_float64(wt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint64_t helper_float_cvtd_l(uint64_t dt0)
+uint64_t helper_float_cvtd_l(CPUMIPSState *env, uint64_t dt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = int64_to_float64(dt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint64_t helper_float_cvtl_d(uint64_t fdt0)
+uint64_t helper_float_cvtl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_cvtl_s(uint32_t fst0)
+uint64_t helper_float_cvtl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_cvtps_pw(uint64_t dt0)
+uint64_t helper_float_cvtps_pw(CPUMIPSState *env, uint64_t dt0)
 {
     uint32_t fst2;
     uint32_t fsth2;
@@ -2481,11 +2514,11 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int32_to_float32(dt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     fsth2 = int32_to_float32(dt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_cvtpw_ps(uint64_t fdt0)
+uint64_t helper_float_cvtpw_ps(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
     uint32_t wth2;
@@ -2493,7 +2526,7 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     wth2 = float32_to_int32(fdt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID)) {
         wt2 = FLOAT_SNAN32;
         wth2 = FLOAT_SNAN32;
@@ -2501,81 +2534,81 @@
     return ((uint64_t)wth2 << 32) | wt2;
 }
 
-uint32_t helper_float_cvts_d(uint64_t fdt0)
+uint32_t helper_float_cvts_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float64_to_float32(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint32_t helper_float_cvts_w(uint32_t wt0)
+uint32_t helper_float_cvts_w(CPUMIPSState *env, uint32_t wt0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int32_to_float32(wt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint32_t helper_float_cvts_l(uint64_t dt0)
+uint32_t helper_float_cvts_l(CPUMIPSState *env, uint64_t dt0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int64_to_float32(dt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint32_t helper_float_cvts_pl(uint32_t wt0)
+uint32_t helper_float_cvts_pl(CPUMIPSState *env, uint32_t wt0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = wt0;
-    update_fcr31();
+    update_fcr31(env);
     return wt2;
 }
 
-uint32_t helper_float_cvts_pu(uint32_t wth0)
+uint32_t helper_float_cvts_pu(CPUMIPSState *env, uint32_t wth0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = wth0;
-    update_fcr31();
+    update_fcr31(env);
     return wt2;
 }
 
-uint32_t helper_float_cvtw_s(uint32_t fst0)
+uint32_t helper_float_cvtw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_cvtw_d(uint64_t fdt0)
+uint32_t helper_float_cvtw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_roundl_d(uint64_t fdt0)
+uint64_t helper_float_roundl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
@@ -2583,13 +2616,13 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_roundl_s(uint32_t fst0)
+uint64_t helper_float_roundl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
@@ -2597,13 +2630,13 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_roundw_d(uint64_t fdt0)
+uint32_t helper_float_roundw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
@@ -2611,13 +2644,13 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_roundw_s(uint32_t fst0)
+uint32_t helper_float_roundw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
@@ -2625,61 +2658,61 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_truncl_d(uint64_t fdt0)
+uint64_t helper_float_truncl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float64_to_int64_round_to_zero(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_truncl_s(uint32_t fst0)
+uint64_t helper_float_truncl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float32_to_int64_round_to_zero(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_truncw_d(uint64_t fdt0)
+uint32_t helper_float_truncw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float64_to_int32_round_to_zero(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_truncw_s(uint32_t fst0)
+uint32_t helper_float_truncw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32_round_to_zero(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_ceill_d(uint64_t fdt0)
+uint64_t helper_float_ceill_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
@@ -2687,13 +2720,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_ceill_s(uint32_t fst0)
+uint64_t helper_float_ceill_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
@@ -2701,13 +2734,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_ceilw_d(uint64_t fdt0)
+uint32_t helper_float_ceilw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
@@ -2715,13 +2748,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_ceilw_s(uint32_t fst0)
+uint32_t helper_float_ceilw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
@@ -2729,13 +2762,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_floorl_d(uint64_t fdt0)
+uint64_t helper_float_floorl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
@@ -2743,13 +2776,13 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_floorl_s(uint32_t fst0)
+uint64_t helper_float_floorl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
@@ -2757,13 +2790,13 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_floorw_d(uint64_t fdt0)
+uint32_t helper_float_floorw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
@@ -2771,13 +2804,13 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_floorw_s(uint32_t fst0)
+uint32_t helper_float_floorw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
@@ -2785,7 +2818,7 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
@@ -2815,69 +2848,69 @@
 #undef FLOAT_UNOP
 
 /* MIPS specific unary operations */
-uint64_t helper_float_recip_d(uint64_t fdt0)
+uint64_t helper_float_recip_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_recip_s(uint32_t fst0)
+uint32_t helper_float_recip_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_rsqrt_d(uint64_t fdt0)
+uint64_t helper_float_rsqrt_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_rsqrt_s(uint32_t fst0)
+uint32_t helper_float_rsqrt_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_recip1_d(uint64_t fdt0)
+uint64_t helper_float_recip1_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_recip1_s(uint32_t fst0)
+uint32_t helper_float_recip1_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_recip1_ps(uint64_t fdt0)
+uint64_t helper_float_recip1_ps(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t fst2;
     uint32_t fsth2;
@@ -2885,33 +2918,33 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     fsth2 = float32_div(FLOAT_ONE32, fdt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_rsqrt1_d(uint64_t fdt0)
+uint64_t helper_float_rsqrt1_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_rsqrt1_s(uint32_t fst0)
+uint32_t helper_float_rsqrt1_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_rsqrt1_ps(uint64_t fdt0)
+uint64_t helper_float_rsqrt1_ps(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t fst2;
     uint32_t fsth2;
@@ -2921,39 +2954,43 @@
     fsth2 = float32_sqrt(fdt0 >> 32, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
     fsth2 = float32_div(FLOAT_ONE32, fsth2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-#define FLOAT_OP(name, p) void helper_float_##name##_##p(void)
+#define FLOAT_OP(name, p) void helper_float_##name##_##p(CPUMIPSState *env)
 
 /* binary operations */
 #define FLOAT_BINOP(name)                                          \
-uint64_t helper_float_ ## name ## _d(uint64_t fdt0, uint64_t fdt1)     \
+uint64_t helper_float_ ## name ## _d(CPUMIPSState *env,            \
+                                     uint64_t fdt0, uint64_t fdt1) \
 {                                                                  \
     uint64_t dt2;                                                  \
                                                                    \
     set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     dt2 = float64_ ## name (fdt0, fdt1, &env->active_fpu.fp_status);     \
-    update_fcr31();                                                \
+    update_fcr31(env);                                             \
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID)                \
         dt2 = FLOAT_QNAN64;                                        \
     return dt2;                                                    \
 }                                                                  \
                                                                    \
-uint32_t helper_float_ ## name ## _s(uint32_t fst0, uint32_t fst1)     \
+uint32_t helper_float_ ## name ## _s(CPUMIPSState *env,            \
+                                     uint32_t fst0, uint32_t fst1) \
 {                                                                  \
     uint32_t wt2;                                                  \
                                                                    \
     set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     wt2 = float32_ ## name (fst0, fst1, &env->active_fpu.fp_status);     \
-    update_fcr31();                                                \
+    update_fcr31(env);                                             \
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID)                \
         wt2 = FLOAT_QNAN32;                                        \
     return wt2;                                                    \
 }                                                                  \
                                                                    \
-uint64_t helper_float_ ## name ## _ps(uint64_t fdt0, uint64_t fdt1)    \
+uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,           \
+                                      uint64_t fdt0,               \
+                                      uint64_t fdt1)               \
 {                                                                  \
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;                             \
     uint32_t fsth0 = fdt0 >> 32;                                   \
@@ -2965,7 +3002,7 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     wt2 = float32_ ## name (fst0, fst1, &env->active_fpu.fp_status);     \
     wth2 = float32_ ## name (fsth0, fsth1, &env->active_fpu.fp_status);  \
-    update_fcr31();                                                \
+    update_fcr31(env);                                             \
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID) {              \
         wt2 = FLOAT_QNAN32;                                        \
         wth2 = FLOAT_QNAN32;                                       \
@@ -2981,22 +3018,28 @@
 
 /* ternary operations */
 #define FLOAT_TERNOP(name1, name2)                                        \
-uint64_t helper_float_ ## name1 ## name2 ## _d(uint64_t fdt0, uint64_t fdt1,  \
-                                           uint64_t fdt2)                 \
+uint64_t helper_float_ ## name1 ## name2 ## _d(CPUMIPSState *env,         \
+                                               uint64_t fdt0,             \
+                                               uint64_t fdt1,             \
+                                               uint64_t fdt2)             \
 {                                                                         \
     fdt0 = float64_ ## name1 (fdt0, fdt1, &env->active_fpu.fp_status);          \
     return float64_ ## name2 (fdt0, fdt2, &env->active_fpu.fp_status);          \
 }                                                                         \
                                                                           \
-uint32_t helper_float_ ## name1 ## name2 ## _s(uint32_t fst0, uint32_t fst1,  \
-                                           uint32_t fst2)                 \
+uint32_t helper_float_ ## name1 ## name2 ## _s(CPUMIPSState *env,         \
+                                               uint32_t fst0,             \
+                                               uint32_t fst1,             \
+                                               uint32_t fst2)             \
 {                                                                         \
     fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status);          \
     return float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status);          \
 }                                                                         \
                                                                           \
-uint64_t helper_float_ ## name1 ## name2 ## _ps(uint64_t fdt0, uint64_t fdt1, \
-                                            uint64_t fdt2)                \
+uint64_t helper_float_ ## name1 ## name2 ## _ps(CPUMIPSState *env,        \
+                                                uint64_t fdt0,            \
+                                                uint64_t fdt1,            \
+                                                uint64_t fdt2)            \
 {                                                                         \
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;                                    \
     uint32_t fsth0 = fdt0 >> 32;                                          \
@@ -3018,24 +3061,30 @@
 
 /* negated ternary operations */
 #define FLOAT_NTERNOP(name1, name2)                                       \
-uint64_t helper_float_n ## name1 ## name2 ## _d(uint64_t fdt0, uint64_t fdt1, \
-                                           uint64_t fdt2)                 \
+uint64_t helper_float_n ## name1 ## name2 ## _d(CPUMIPSState *env,        \
+                                                uint64_t fdt0,            \
+                                                uint64_t fdt1,            \
+                                                uint64_t fdt2)            \
 {                                                                         \
     fdt0 = float64_ ## name1 (fdt0, fdt1, &env->active_fpu.fp_status);          \
     fdt2 = float64_ ## name2 (fdt0, fdt2, &env->active_fpu.fp_status);          \
     return float64_chs(fdt2);                                             \
 }                                                                         \
                                                                           \
-uint32_t helper_float_n ## name1 ## name2 ## _s(uint32_t fst0, uint32_t fst1, \
-                                           uint32_t fst2)                 \
+uint32_t helper_float_n ## name1 ## name2 ## _s(CPUMIPSState *env,        \
+                                                uint32_t fst0,            \
+                                                uint32_t fst1,            \
+                                                uint32_t fst2)            \
 {                                                                         \
     fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status);          \
     fst2 = float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status);          \
     return float32_chs(fst2);                                             \
 }                                                                         \
                                                                           \
-uint64_t helper_float_n ## name1 ## name2 ## _ps(uint64_t fdt0, uint64_t fdt1,\
-                                           uint64_t fdt2)                 \
+uint64_t helper_float_n ## name1 ## name2 ## _ps(CPUMIPSState *env,       \
+                                                 uint64_t fdt0,           \
+                                                 uint64_t fdt1,           \
+                                                 uint64_t fdt2)           \
 {                                                                         \
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;                                    \
     uint32_t fsth0 = fdt0 >> 32;                                          \
@@ -3058,25 +3107,25 @@
 #undef FLOAT_NTERNOP
 
 /* MIPS specific binary operations */
-uint64_t helper_float_recip2_d(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_recip2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
     fdt2 = float64_chs(float64_sub(fdt2, FLOAT_ONE64, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_recip2_s(uint32_t fst0, uint32_t fst2)
+uint32_t helper_float_recip2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_recip2_ps(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_recip2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -3088,31 +3137,31 @@
     fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status));
     fsth2 = float32_chs(float32_sub(fsth2, FLOAT_ONE32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_rsqrt2_d(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_rsqrt2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
     fdt2 = float64_sub(fdt2, FLOAT_ONE64, &env->active_fpu.fp_status);
     fdt2 = float64_chs(float64_div(fdt2, FLOAT_TWO64, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_rsqrt2_s(uint32_t fst0, uint32_t fst2)
+uint32_t helper_float_rsqrt2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
     fst2 = float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_rsqrt2_ps(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_rsqrt2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -3126,11 +3175,11 @@
     fsth2 = float32_sub(fsth2, FLOAT_ONE32, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, &env->active_fpu.fp_status));
     fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_addr_ps(uint64_t fdt0, uint64_t fdt1)
+uint64_t helper_float_addr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -3142,11 +3191,11 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_add (fst0, fsth0, &env->active_fpu.fp_status);
     fsth2 = float32_add (fst1, fsth1, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_mulr_ps(uint64_t fdt0, uint64_t fdt1)
+uint64_t helper_float_mulr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -3158,31 +3207,33 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul (fst0, fsth0, &env->active_fpu.fp_status);
     fsth2 = float32_mul (fst1, fsth1, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
 /* compare operations */
 #define FOP_COND_D(op, cond)                                   \
-void helper_cmp_d_ ## op (uint64_t fdt0, uint64_t fdt1, int cc)    \
+void helper_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
+                         uint64_t fdt1, int cc)                \
 {                                                              \
     int c;                                                     \
     set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     c = cond;                                                  \
-    update_fcr31();                                            \
+    update_fcr31(env);                                         \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
         CLEAR_FP_COND(cc, env->active_fpu);                    \
 }                                                              \
-void helper_cmpabs_d_ ## op (uint64_t fdt0, uint64_t fdt1, int cc) \
+void helper_cmpabs_d_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
+                            uint64_t fdt1, int cc)             \
 {                                                              \
     int c;                                                     \
     set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     fdt0 = float64_abs(fdt0);                                  \
     fdt1 = float64_abs(fdt1);                                  \
     c = cond;                                                  \
-    update_fcr31();                                            \
+    update_fcr31(env);                                         \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
@@ -3211,25 +3262,27 @@
 FOP_COND_D(ngt, float64_unordered(fdt1, fdt0, &env->active_fpu.fp_status)  || float64_le(fdt0, fdt1, &env->active_fpu.fp_status))
 
 #define FOP_COND_S(op, cond)                                   \
-void helper_cmp_s_ ## op (uint32_t fst0, uint32_t fst1, int cc)    \
+void helper_cmp_s_ ## op(CPUMIPSState *env, uint32_t fst0,     \
+                         uint32_t fst1, int cc)                \
 {                                                              \
     int c;                                                     \
     set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     c = cond;                                                  \
-    update_fcr31();                                            \
+    update_fcr31(env);                                         \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
         CLEAR_FP_COND(cc, env->active_fpu);                    \
 }                                                              \
-void helper_cmpabs_s_ ## op (uint32_t fst0, uint32_t fst1, int cc) \
+void helper_cmpabs_s_ ## op(CPUMIPSState *env, uint32_t fst0,  \
+                            uint32_t fst1, int cc)             \
 {                                                              \
     int c;                                                     \
     set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     fst0 = float32_abs(fst0);                                  \
     fst1 = float32_abs(fst1);                                  \
     c = cond;                                                  \
-    update_fcr31();                                            \
+    update_fcr31(env);                                         \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
@@ -3258,7 +3311,8 @@
 FOP_COND_S(ngt, float32_unordered(fst1, fst0, &env->active_fpu.fp_status)  || float32_le(fst0, fst1, &env->active_fpu.fp_status))
 
 #define FOP_COND_PS(op, condl, condh)                           \
-void helper_cmp_ps_ ## op (uint64_t fdt0, uint64_t fdt1, int cc)    \
+void helper_cmp_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
+                          uint64_t fdt1, int cc)                \
 {                                                               \
     uint32_t fst0, fsth0, fst1, fsth1;                          \
     int ch, cl;                                                 \
@@ -3269,7 +3323,7 @@
     fsth1 = fdt1 >> 32;                                         \
     cl = condl;                                                 \
     ch = condh;                                                 \
-    update_fcr31();                                             \
+    update_fcr31(env);                                          \
     if (cl)                                                     \
         SET_FP_COND(cc, env->active_fpu);                       \
     else                                                        \
@@ -3279,7 +3333,8 @@
     else                                                        \
         CLEAR_FP_COND(cc + 1, env->active_fpu);                 \
 }                                                               \
-void helper_cmpabs_ps_ ## op (uint64_t fdt0, uint64_t fdt1, int cc) \
+void helper_cmpabs_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
+                             uint64_t fdt1, int cc)             \
 {                                                               \
     uint32_t fst0, fsth0, fst1, fsth1;                          \
     int ch, cl;                                                 \
@@ -3289,7 +3344,7 @@
     fsth1 = float32_abs(fdt1 >> 32);                            \
     cl = condl;                                                 \
     ch = condh;                                                 \
-    update_fcr31();                                             \
+    update_fcr31(env);                                          \
     if (cl)                                                     \
         SET_FP_COND(cc, env->active_fpu);                       \
     else                                                        \
diff --git a/target-mips/translate.c b/target-mips/translate.c
index a884f75..454e5cc 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -28,7 +28,7 @@
 #define GEN_HELPER 1
 #include "helper.h"
 
-//#define MIPS_DEBUG_DISAS
+#define MIPS_DEBUG_DISAS 0
 //#define MIPS_DEBUG_SIGN_EXTENSIONS
 
 /* MIPS major opcodes */
@@ -446,6 +446,103 @@
     OPC_BC2     = (0x08 << 21) | OPC_CP2,
 };
 
+#define MASK_LMI(op)  (MASK_OP_MAJOR(op) | (op & (0x1F << 21)) | (op & 0x1F))
+
+enum {
+    OPC_PADDSH  = (24 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDUSH = (25 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDH   = (26 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDW   = (27 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDSB  = (28 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDUSB = (29 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDB   = (30 << 21) | (0x00) | OPC_CP2,
+    OPC_PADDD   = (31 << 21) | (0x00) | OPC_CP2,
+
+    OPC_PSUBSH  = (24 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBUSH = (25 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBH   = (26 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBW   = (27 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBSB  = (28 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBUSB = (29 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBB   = (30 << 21) | (0x01) | OPC_CP2,
+    OPC_PSUBD   = (31 << 21) | (0x01) | OPC_CP2,
+
+    OPC_PSHUFH   = (24 << 21) | (0x02) | OPC_CP2,
+    OPC_PACKSSWH = (25 << 21) | (0x02) | OPC_CP2,
+    OPC_PACKSSHB = (26 << 21) | (0x02) | OPC_CP2,
+    OPC_PACKUSHB = (27 << 21) | (0x02) | OPC_CP2,
+    OPC_XOR_CP2  = (28 << 21) | (0x02) | OPC_CP2,
+    OPC_NOR_CP2  = (29 << 21) | (0x02) | OPC_CP2,
+    OPC_AND_CP2  = (30 << 21) | (0x02) | OPC_CP2,
+    OPC_PANDN    = (31 << 21) | (0x02) | OPC_CP2,
+
+    OPC_PUNPCKLHW = (24 << 21) | (0x03) | OPC_CP2,
+    OPC_PUNPCKHHW = (25 << 21) | (0x03) | OPC_CP2,
+    OPC_PUNPCKLBH = (26 << 21) | (0x03) | OPC_CP2,
+    OPC_PUNPCKHBH = (27 << 21) | (0x03) | OPC_CP2,
+    OPC_PINSRH_0  = (28 << 21) | (0x03) | OPC_CP2,
+    OPC_PINSRH_1  = (29 << 21) | (0x03) | OPC_CP2,
+    OPC_PINSRH_2  = (30 << 21) | (0x03) | OPC_CP2,
+    OPC_PINSRH_3  = (31 << 21) | (0x03) | OPC_CP2,
+
+    OPC_PAVGH   = (24 << 21) | (0x08) | OPC_CP2,
+    OPC_PAVGB   = (25 << 21) | (0x08) | OPC_CP2,
+    OPC_PMAXSH  = (26 << 21) | (0x08) | OPC_CP2,
+    OPC_PMINSH  = (27 << 21) | (0x08) | OPC_CP2,
+    OPC_PMAXUB  = (28 << 21) | (0x08) | OPC_CP2,
+    OPC_PMINUB  = (29 << 21) | (0x08) | OPC_CP2,
+
+    OPC_PCMPEQW = (24 << 21) | (0x09) | OPC_CP2,
+    OPC_PCMPGTW = (25 << 21) | (0x09) | OPC_CP2,
+    OPC_PCMPEQH = (26 << 21) | (0x09) | OPC_CP2,
+    OPC_PCMPGTH = (27 << 21) | (0x09) | OPC_CP2,
+    OPC_PCMPEQB = (28 << 21) | (0x09) | OPC_CP2,
+    OPC_PCMPGTB = (29 << 21) | (0x09) | OPC_CP2,
+
+    OPC_PSLLW   = (24 << 21) | (0x0A) | OPC_CP2,
+    OPC_PSLLH   = (25 << 21) | (0x0A) | OPC_CP2,
+    OPC_PMULLH  = (26 << 21) | (0x0A) | OPC_CP2,
+    OPC_PMULHH  = (27 << 21) | (0x0A) | OPC_CP2,
+    OPC_PMULUW  = (28 << 21) | (0x0A) | OPC_CP2,
+    OPC_PMULHUH = (29 << 21) | (0x0A) | OPC_CP2,
+
+    OPC_PSRLW     = (24 << 21) | (0x0B) | OPC_CP2,
+    OPC_PSRLH     = (25 << 21) | (0x0B) | OPC_CP2,
+    OPC_PSRAW     = (26 << 21) | (0x0B) | OPC_CP2,
+    OPC_PSRAH     = (27 << 21) | (0x0B) | OPC_CP2,
+    OPC_PUNPCKLWD = (28 << 21) | (0x0B) | OPC_CP2,
+    OPC_PUNPCKHWD = (29 << 21) | (0x0B) | OPC_CP2,
+
+    OPC_ADDU_CP2 = (24 << 21) | (0x0C) | OPC_CP2,
+    OPC_OR_CP2   = (25 << 21) | (0x0C) | OPC_CP2,
+    OPC_ADD_CP2  = (26 << 21) | (0x0C) | OPC_CP2,
+    OPC_DADD_CP2 = (27 << 21) | (0x0C) | OPC_CP2,
+    OPC_SEQU_CP2 = (28 << 21) | (0x0C) | OPC_CP2,
+    OPC_SEQ_CP2  = (29 << 21) | (0x0C) | OPC_CP2,
+
+    OPC_SUBU_CP2 = (24 << 21) | (0x0D) | OPC_CP2,
+    OPC_PASUBUB  = (25 << 21) | (0x0D) | OPC_CP2,
+    OPC_SUB_CP2  = (26 << 21) | (0x0D) | OPC_CP2,
+    OPC_DSUB_CP2 = (27 << 21) | (0x0D) | OPC_CP2,
+    OPC_SLTU_CP2 = (28 << 21) | (0x0D) | OPC_CP2,
+    OPC_SLT_CP2  = (29 << 21) | (0x0D) | OPC_CP2,
+
+    OPC_SLL_CP2  = (24 << 21) | (0x0E) | OPC_CP2,
+    OPC_DSLL_CP2 = (25 << 21) | (0x0E) | OPC_CP2,
+    OPC_PEXTRH   = (26 << 21) | (0x0E) | OPC_CP2,
+    OPC_PMADDHW  = (27 << 21) | (0x0E) | OPC_CP2,
+    OPC_SLEU_CP2 = (28 << 21) | (0x0E) | OPC_CP2,
+    OPC_SLE_CP2  = (29 << 21) | (0x0E) | OPC_CP2,
+
+    OPC_SRL_CP2  = (24 << 21) | (0x0F) | OPC_CP2,
+    OPC_DSRL_CP2 = (25 << 21) | (0x0F) | OPC_CP2,
+    OPC_SRA_CP2  = (26 << 21) | (0x0F) | OPC_CP2,
+    OPC_DSRA_CP2 = (27 << 21) | (0x0F) | OPC_CP2,
+    OPC_BIADD    = (28 << 21) | (0x0F) | OPC_CP2,
+    OPC_PMOVMSKB = (29 << 21) | (0x0F) | OPC_CP2,
+};
+
+
 #define MASK_CP3(op)       MASK_OP_MAJOR(op) | (op & 0x3F)
 
 enum {
@@ -483,27 +580,45 @@
 
 #include "gen-icount.h"
 
-#define gen_helper_0i(name, arg) do {                             \
+#define gen_helper_0e0i(name, arg) do {                           \
     TCGv_i32 helper_tmp = tcg_const_i32(arg);                     \
-    gen_helper_##name(helper_tmp);                                \
+    gen_helper_##name(cpu_env, helper_tmp);                       \
     tcg_temp_free_i32(helper_tmp);                                \
     } while(0)
 
-#define gen_helper_1i(name, arg1, arg2) do {                      \
+#define gen_helper_0e1i(name, arg1, arg2) do {                    \
     TCGv_i32 helper_tmp = tcg_const_i32(arg2);                    \
-    gen_helper_##name(arg1, helper_tmp);                          \
+    gen_helper_##name(cpu_env, arg1, helper_tmp);                 \
     tcg_temp_free_i32(helper_tmp);                                \
     } while(0)
 
-#define gen_helper_2i(name, arg1, arg2, arg3) do {                \
+#define gen_helper_1e0i(name, ret, arg1) do {                     \
+    TCGv_i32 helper_tmp = tcg_const_i32(arg1);                    \
+    gen_helper_##name(ret, cpu_env, helper_tmp);                  \
+    tcg_temp_free_i32(helper_tmp);                                \
+    } while(0)
+
+#define gen_helper_1e1i(name, ret, arg1, arg2) do {               \
+    TCGv_i32 helper_tmp = tcg_const_i32(arg2);                    \
+    gen_helper_##name(ret, cpu_env, arg1, helper_tmp);            \
+    tcg_temp_free_i32(helper_tmp);                                \
+    } while(0)
+
+#define gen_helper_0e2i(name, arg1, arg2, arg3) do {              \
     TCGv_i32 helper_tmp = tcg_const_i32(arg3);                    \
-    gen_helper_##name(arg1, arg2, helper_tmp);                    \
+    gen_helper_##name(cpu_env, arg1, arg2, helper_tmp);           \
     tcg_temp_free_i32(helper_tmp);                                \
     } while(0)
 
-#define gen_helper_3i(name, arg1, arg2, arg3, arg4) do {          \
+#define gen_helper_1e2i(name, ret, arg1, arg2, arg3) do {         \
+    TCGv_i32 helper_tmp = tcg_const_i32(arg3);                    \
+    gen_helper_##name(ret, cpu_env, arg1, arg2, helper_tmp);      \
+    tcg_temp_free_i32(helper_tmp);                                \
+    } while(0)
+
+#define gen_helper_0e3i(name, arg1, arg2, arg3, arg4) do {        \
     TCGv_i32 helper_tmp = tcg_const_i32(arg4);                    \
-    gen_helper_##name(arg1, arg2, arg3, helper_tmp);              \
+    gen_helper_##name(cpu_env, arg1, arg2, arg3, helper_tmp);     \
     tcg_temp_free_i32(helper_tmp);                                \
     } while(0)
 
@@ -548,22 +663,25 @@
       "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
       "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", };
 
-#ifdef MIPS_DEBUG_DISAS
-#define MIPS_DEBUG(fmt, ...)                         \
-        qemu_log_mask(CPU_LOG_TB_IN_ASM,                \
-                       TARGET_FMT_lx ": %08x " fmt "\n", \
-                       ctx->pc, ctx->opcode , ## __VA_ARGS__)
-#define LOG_DISAS(...) qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__)
-#else
-#define MIPS_DEBUG(fmt, ...) do { } while(0)
-#define LOG_DISAS(...) do { } while (0)
-#endif
+#define MIPS_DEBUG(fmt, ...)                                                  \
+    do {                                                                      \
+        if (MIPS_DEBUG_DISAS) {                                               \
+            qemu_log_mask(CPU_LOG_TB_IN_ASM,                                  \
+                          TARGET_FMT_lx ": %08x " fmt "\n",                   \
+                          ctx->pc, ctx->opcode , ## __VA_ARGS__);             \
+        }                                                                     \
+    } while (0)
+
+#define LOG_DISAS(...)                                                        \
+    do {                                                                      \
+        if (MIPS_DEBUG_DISAS) {                                               \
+            qemu_log_mask(CPU_LOG_TB_IN_ASM, ## __VA_ARGS__);                 \
+        }                                                                     \
+    } while (0)
 
 #define MIPS_INVAL(op)                                                        \
-do {                                                                          \
     MIPS_DEBUG("Invalid %s %03x %03x %03x", op, ctx->opcode >> 26,            \
-               ctx->opcode & 0x3F, ((ctx->opcode >> 16) & 0x1F));             \
-} while (0)
+               ctx->opcode & 0x3F, ((ctx->opcode >> 16) & 0x1F))
 
 /* General purpose registers moves. */
 static inline void gen_load_gpr (TCGv t, int reg)
@@ -748,7 +866,7 @@
     TCGv_i32 texcp = tcg_const_i32(excp);
     TCGv_i32 terr = tcg_const_i32(err);
     save_cpu_state(ctx, 1);
-    gen_helper_raise_exception_err(texcp, terr);
+    gen_helper_raise_exception_err(cpu_env, texcp, terr);
     tcg_temp_free_i32(terr);
     tcg_temp_free_i32(texcp);
 }
@@ -757,7 +875,7 @@
 generate_exception (DisasContext *ctx, int excp)
 {
     save_cpu_state(ctx, 1);
-    gen_helper_0i(raise_exception, excp);
+    gen_helper_0e0i(raise_exception, excp);
 }
 
 /* Addresses computation */
@@ -871,22 +989,22 @@
     gen_ldcmp_fpr##bits (ctx, fp0, fs);                                       \
     gen_ldcmp_fpr##bits (ctx, fp1, ft);                                       \
     switch (n) {                                                              \
-    case  0: gen_helper_2i(cmp ## type ## _ ## fmt ## _f, fp0, fp1, cc);    break;\
-    case  1: gen_helper_2i(cmp ## type ## _ ## fmt ## _un, fp0, fp1, cc);   break;\
-    case  2: gen_helper_2i(cmp ## type ## _ ## fmt ## _eq, fp0, fp1, cc);   break;\
-    case  3: gen_helper_2i(cmp ## type ## _ ## fmt ## _ueq, fp0, fp1, cc);  break;\
-    case  4: gen_helper_2i(cmp ## type ## _ ## fmt ## _olt, fp0, fp1, cc);  break;\
-    case  5: gen_helper_2i(cmp ## type ## _ ## fmt ## _ult, fp0, fp1, cc);  break;\
-    case  6: gen_helper_2i(cmp ## type ## _ ## fmt ## _ole, fp0, fp1, cc);  break;\
-    case  7: gen_helper_2i(cmp ## type ## _ ## fmt ## _ule, fp0, fp1, cc);  break;\
-    case  8: gen_helper_2i(cmp ## type ## _ ## fmt ## _sf, fp0, fp1, cc);   break;\
-    case  9: gen_helper_2i(cmp ## type ## _ ## fmt ## _ngle, fp0, fp1, cc); break;\
-    case 10: gen_helper_2i(cmp ## type ## _ ## fmt ## _seq, fp0, fp1, cc);  break;\
-    case 11: gen_helper_2i(cmp ## type ## _ ## fmt ## _ngl, fp0, fp1, cc);  break;\
-    case 12: gen_helper_2i(cmp ## type ## _ ## fmt ## _lt, fp0, fp1, cc);   break;\
-    case 13: gen_helper_2i(cmp ## type ## _ ## fmt ## _nge, fp0, fp1, cc);  break;\
-    case 14: gen_helper_2i(cmp ## type ## _ ## fmt ## _le, fp0, fp1, cc);   break;\
-    case 15: gen_helper_2i(cmp ## type ## _ ## fmt ## _ngt, fp0, fp1, cc);  break;\
+    case  0: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _f, fp0, fp1, cc);    break;\
+    case  1: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _un, fp0, fp1, cc);   break;\
+    case  2: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _eq, fp0, fp1, cc);   break;\
+    case  3: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ueq, fp0, fp1, cc);  break;\
+    case  4: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _olt, fp0, fp1, cc);  break;\
+    case  5: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ult, fp0, fp1, cc);  break;\
+    case  6: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ole, fp0, fp1, cc);  break;\
+    case  7: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ule, fp0, fp1, cc);  break;\
+    case  8: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _sf, fp0, fp1, cc);   break;\
+    case  9: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngle, fp0, fp1, cc); break;\
+    case 10: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _seq, fp0, fp1, cc);  break;\
+    case 11: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngl, fp0, fp1, cc);  break;\
+    case 12: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _lt, fp0, fp1, cc);   break;\
+    case 13: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _nge, fp0, fp1, cc);  break;\
+    case 14: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _le, fp0, fp1, cc);   break;\
+    case 15: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngt, fp0, fp1, cc);  break;\
     default: abort();                                                         \
     }                                                                         \
     tcg_temp_free_i##bits (fp0);                                              \
@@ -948,7 +1066,7 @@
 #define OP_LD_ATOMIC(insn,fname)                                           \
 static inline void op_ld_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)    \
 {                                                                          \
-    gen_helper_2i(insn, ret, arg1, ctx->mem_idx);                          \
+    gen_helper_1e1i(insn, ret, arg1, ctx->mem_idx);                        \
 }
 #endif
 OP_LD_ATOMIC(ll,ld32s);
@@ -975,7 +1093,7 @@
     tcg_gen_movi_tl(t0, rt | ((almask << 3) & 0x20));                        \
     tcg_gen_st_tl(t0, cpu_env, offsetof(CPUMIPSState, llreg));                   \
     tcg_gen_st_tl(arg1, cpu_env, offsetof(CPUMIPSState, llnewval));              \
-    gen_helper_0i(raise_exception, EXCP_SC);                                 \
+    gen_helper_0e0i(raise_exception, EXCP_SC);                               \
     gen_set_label(l2);                                                       \
     tcg_gen_movi_tl(t0, 0);                                                  \
     gen_store_gpr(t0, rt);                                                   \
@@ -986,7 +1104,7 @@
 static inline void op_st_##insn(TCGv arg1, TCGv arg2, int rt, DisasContext *ctx) \
 {                                                                            \
     TCGv t0 = tcg_temp_new();                                                \
-    gen_helper_3i(insn, t0, arg1, arg2, ctx->mem_idx);                       \
+    gen_helper_1e2i(insn, t0, arg1, arg2, ctx->mem_idx);                     \
     gen_store_gpr(t0, rt);                                                   \
     tcg_temp_free(t0);                                                       \
 }
@@ -1066,14 +1184,14 @@
     case OPC_LDL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(ldl, t1, t1, t0, ctx->mem_idx);
+        gen_helper_1e2i(ldl, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "ldl";
         break;
     case OPC_LDR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(ldr, t1, t1, t0, ctx->mem_idx);
+        gen_helper_1e2i(ldr, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "ldr";
         break;
@@ -1127,14 +1245,14 @@
     case OPC_LWL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(lwl, t1, t1, t0, ctx->mem_idx);
+        gen_helper_1e2i(lwl, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "lwl";
         break;
     case OPC_LWR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(lwr, t1, t1, t0, ctx->mem_idx);
+        gen_helper_1e2i(lwr, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "lwr";
         break;
@@ -1170,12 +1288,12 @@
         break;
     case OPC_SDL:
         save_cpu_state(ctx, 1);
-        gen_helper_2i(sdl, t1, t0, ctx->mem_idx);
+        gen_helper_0e2i(sdl, t1, t0, ctx->mem_idx);
         opn = "sdl";
         break;
     case OPC_SDR:
         save_cpu_state(ctx, 1);
-        gen_helper_2i(sdr, t1, t0, ctx->mem_idx);
+        gen_helper_0e2i(sdr, t1, t0, ctx->mem_idx);
         opn = "sdr";
         break;
 #endif
@@ -1196,12 +1314,12 @@
         break;
     case OPC_SWL:
         save_cpu_state(ctx, 1);
-        gen_helper_2i(swl, t1, t0, ctx->mem_idx);
+        gen_helper_0e2i(swl, t1, t0, ctx->mem_idx);
         opn = "swl";
         break;
     case OPC_SWR:
         save_cpu_state(ctx, 1);
-        gen_helper_2i(swr, t1, t0, ctx->mem_idx);
+        gen_helper_0e2i(swr, t1, t0, ctx->mem_idx);
         opn = "swr";
         break;
     }
@@ -1413,7 +1531,8 @@
 }
 
 /* Logic with immediate operand */
-static void gen_logic_imm (CPUMIPSState *env, uint32_t opc, int rt, int rs, int16_t imm)
+static void gen_logic_imm(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                          int rt, int rs, int16_t imm)
 {
     target_ulong uimm;
     const char *opn = "imm logic";
@@ -1456,7 +1575,8 @@
 }
 
 /* Set on less than with immediate operand */
-static void gen_slt_imm (CPUMIPSState *env, uint32_t opc, int rt, int rs, int16_t imm)
+static void gen_slt_imm(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                        int rt, int rs, int16_t imm)
 {
     target_ulong uimm = (target_long)imm; /* Sign extend to 32/64 bits */
     const char *opn = "imm arith";
@@ -1757,7 +1877,8 @@
 }
 
 /* Conditional move */
-static void gen_cond_move (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_cond_move(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                          int rd, int rs, int rt)
 {
     const char *opn = "cond move";
     int l1;
@@ -1795,7 +1916,8 @@
 }
 
 /* Logic */
-static void gen_logic (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_logic(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                      int rd, int rs, int rt)
 {
     const char *opn = "logic";
 
@@ -1856,7 +1978,8 @@
 }
 
 /* Set on lower than */
-static void gen_slt (CPUMIPSState *env, uint32_t opc, int rd, int rs, int rt)
+static void gen_slt(CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
+                    int rd, int rs, int rt)
 {
     const char *opn = "slt";
     TCGv t0, t1;
@@ -2138,11 +2261,11 @@
         opn = "ddivu";
         break;
     case OPC_DMULT:
-        gen_helper_dmult(t0, t1);
+        gen_helper_dmult(cpu_env, t0, t1);
         opn = "dmult";
         break;
     case OPC_DMULTU:
-        gen_helper_dmultu(t0, t1);
+        gen_helper_dmultu(cpu_env, t0, t1);
         opn = "dmultu";
         break;
 #endif
@@ -2254,59 +2377,59 @@
 
     switch (opc) {
     case OPC_VR54XX_MULS:
-        gen_helper_muls(t0, t0, t1);
+        gen_helper_muls(t0, cpu_env, t0, t1);
         opn = "muls";
         break;
     case OPC_VR54XX_MULSU:
-        gen_helper_mulsu(t0, t0, t1);
+        gen_helper_mulsu(t0, cpu_env, t0, t1);
         opn = "mulsu";
         break;
     case OPC_VR54XX_MACC:
-        gen_helper_macc(t0, t0, t1);
+        gen_helper_macc(t0, cpu_env, t0, t1);
         opn = "macc";
         break;
     case OPC_VR54XX_MACCU:
-        gen_helper_maccu(t0, t0, t1);
+        gen_helper_maccu(t0, cpu_env, t0, t1);
         opn = "maccu";
         break;
     case OPC_VR54XX_MSAC:
-        gen_helper_msac(t0, t0, t1);
+        gen_helper_msac(t0, cpu_env, t0, t1);
         opn = "msac";
         break;
     case OPC_VR54XX_MSACU:
-        gen_helper_msacu(t0, t0, t1);
+        gen_helper_msacu(t0, cpu_env, t0, t1);
         opn = "msacu";
         break;
     case OPC_VR54XX_MULHI:
-        gen_helper_mulhi(t0, t0, t1);
+        gen_helper_mulhi(t0, cpu_env, t0, t1);
         opn = "mulhi";
         break;
     case OPC_VR54XX_MULHIU:
-        gen_helper_mulhiu(t0, t0, t1);
+        gen_helper_mulhiu(t0, cpu_env, t0, t1);
         opn = "mulhiu";
         break;
     case OPC_VR54XX_MULSHI:
-        gen_helper_mulshi(t0, t0, t1);
+        gen_helper_mulshi(t0, cpu_env, t0, t1);
         opn = "mulshi";
         break;
     case OPC_VR54XX_MULSHIU:
-        gen_helper_mulshiu(t0, t0, t1);
+        gen_helper_mulshiu(t0, cpu_env, t0, t1);
         opn = "mulshiu";
         break;
     case OPC_VR54XX_MACCHI:
-        gen_helper_macchi(t0, t0, t1);
+        gen_helper_macchi(t0, cpu_env, t0, t1);
         opn = "macchi";
         break;
     case OPC_VR54XX_MACCHIU:
-        gen_helper_macchiu(t0, t0, t1);
+        gen_helper_macchiu(t0, cpu_env, t0, t1);
         opn = "macchiu";
         break;
     case OPC_VR54XX_MSACHI:
-        gen_helper_msachi(t0, t0, t1);
+        gen_helper_msachi(t0, cpu_env, t0, t1);
         opn = "msachi";
         break;
     case OPC_VR54XX_MSACHIU:
-        gen_helper_msachiu(t0, t0, t1);
+        gen_helper_msachiu(t0, cpu_env, t0, t1);
         opn = "msachiu";
         break;
     default:
@@ -2362,8 +2485,8 @@
 }
 
 /* Godson integer instructions */
-static void gen_loongson_integer (DisasContext *ctx, uint32_t opc,
-                                int rd, int rs, int rt)
+static void gen_loongson_integer(DisasContext *ctx, uint32_t opc,
+                                 int rd, int rs, int rt)
 {
     const char *opn = "loongson";
     TCGv t0, t1;
@@ -2576,6 +2699,278 @@
     tcg_temp_free(t1);
 }
 
+/* Loongson multimedia instructions */
+static void gen_loongson_multimedia(DisasContext *ctx, int rd, int rs, int rt)
+{
+    const char *opn = "loongson_cp2";
+    uint32_t opc, shift_max;
+    TCGv_i64 t0, t1;
+
+    opc = MASK_LMI(ctx->opcode);
+    switch (opc) {
+    case OPC_ADD_CP2:
+    case OPC_SUB_CP2:
+    case OPC_DADD_CP2:
+    case OPC_DSUB_CP2:
+        t0 = tcg_temp_local_new_i64();
+        t1 = tcg_temp_local_new_i64();
+        break;
+    default:
+        t0 = tcg_temp_new_i64();
+        t1 = tcg_temp_new_i64();
+        break;
+    }
+
+    gen_load_fpr64(ctx, t0, rs);
+    gen_load_fpr64(ctx, t1, rt);
+
+#define LMI_HELPER(UP, LO) \
+    case OPC_##UP: gen_helper_##LO(t0, t0, t1); opn = #LO; break
+#define LMI_HELPER_1(UP, LO) \
+    case OPC_##UP: gen_helper_##LO(t0, t0); opn = #LO; break
+#define LMI_DIRECT(UP, LO, OP) \
+    case OPC_##UP: tcg_gen_##OP##_i64(t0, t0, t1); opn = #LO; break
+
+    switch (opc) {
+    LMI_HELPER(PADDSH, paddsh);
+    LMI_HELPER(PADDUSH, paddush);
+    LMI_HELPER(PADDH, paddh);
+    LMI_HELPER(PADDW, paddw);
+    LMI_HELPER(PADDSB, paddsb);
+    LMI_HELPER(PADDUSB, paddusb);
+    LMI_HELPER(PADDB, paddb);
+
+    LMI_HELPER(PSUBSH, psubsh);
+    LMI_HELPER(PSUBUSH, psubush);
+    LMI_HELPER(PSUBH, psubh);
+    LMI_HELPER(PSUBW, psubw);
+    LMI_HELPER(PSUBSB, psubsb);
+    LMI_HELPER(PSUBUSB, psubusb);
+    LMI_HELPER(PSUBB, psubb);
+
+    LMI_HELPER(PSHUFH, pshufh);
+    LMI_HELPER(PACKSSWH, packsswh);
+    LMI_HELPER(PACKSSHB, packsshb);
+    LMI_HELPER(PACKUSHB, packushb);
+
+    LMI_HELPER(PUNPCKLHW, punpcklhw);
+    LMI_HELPER(PUNPCKHHW, punpckhhw);
+    LMI_HELPER(PUNPCKLBH, punpcklbh);
+    LMI_HELPER(PUNPCKHBH, punpckhbh);
+    LMI_HELPER(PUNPCKLWD, punpcklwd);
+    LMI_HELPER(PUNPCKHWD, punpckhwd);
+
+    LMI_HELPER(PAVGH, pavgh);
+    LMI_HELPER(PAVGB, pavgb);
+    LMI_HELPER(PMAXSH, pmaxsh);
+    LMI_HELPER(PMINSH, pminsh);
+    LMI_HELPER(PMAXUB, pmaxub);
+    LMI_HELPER(PMINUB, pminub);
+
+    LMI_HELPER(PCMPEQW, pcmpeqw);
+    LMI_HELPER(PCMPGTW, pcmpgtw);
+    LMI_HELPER(PCMPEQH, pcmpeqh);
+    LMI_HELPER(PCMPGTH, pcmpgth);
+    LMI_HELPER(PCMPEQB, pcmpeqb);
+    LMI_HELPER(PCMPGTB, pcmpgtb);
+
+    LMI_HELPER(PSLLW, psllw);
+    LMI_HELPER(PSLLH, psllh);
+    LMI_HELPER(PSRLW, psrlw);
+    LMI_HELPER(PSRLH, psrlh);
+    LMI_HELPER(PSRAW, psraw);
+    LMI_HELPER(PSRAH, psrah);
+
+    LMI_HELPER(PMULLH, pmullh);
+    LMI_HELPER(PMULHH, pmulhh);
+    LMI_HELPER(PMULHUH, pmulhuh);
+    LMI_HELPER(PMADDHW, pmaddhw);
+
+    LMI_HELPER(PASUBUB, pasubub);
+    LMI_HELPER_1(BIADD, biadd);
+    LMI_HELPER_1(PMOVMSKB, pmovmskb);
+
+    LMI_DIRECT(PADDD, paddd, add);
+    LMI_DIRECT(PSUBD, psubd, sub);
+    LMI_DIRECT(XOR_CP2, xor, xor);
+    LMI_DIRECT(NOR_CP2, nor, nor);
+    LMI_DIRECT(AND_CP2, and, and);
+    LMI_DIRECT(PANDN, pandn, andc);
+    LMI_DIRECT(OR, or, or);
+
+    case OPC_PINSRH_0:
+        tcg_gen_deposit_i64(t0, t0, t1, 0, 16);
+        opn = "pinsrh_0";
+        break;
+    case OPC_PINSRH_1:
+        tcg_gen_deposit_i64(t0, t0, t1, 16, 16);
+        opn = "pinsrh_1";
+        break;
+    case OPC_PINSRH_2:
+        tcg_gen_deposit_i64(t0, t0, t1, 32, 16);
+        opn = "pinsrh_2";
+        break;
+    case OPC_PINSRH_3:
+        tcg_gen_deposit_i64(t0, t0, t1, 48, 16);
+        opn = "pinsrh_3";
+        break;
+
+    case OPC_PEXTRH:
+        tcg_gen_andi_i64(t1, t1, 3);
+        tcg_gen_shli_i64(t1, t1, 4);
+        tcg_gen_shr_i64(t0, t0, t1);
+        tcg_gen_ext16u_i64(t0, t0);
+        opn = "pextrh";
+        break;
+
+    case OPC_ADDU_CP2:
+        tcg_gen_add_i64(t0, t0, t1);
+        tcg_gen_ext32s_i64(t0, t0);
+        opn = "addu";
+        break;
+    case OPC_SUBU_CP2:
+        tcg_gen_sub_i64(t0, t0, t1);
+        tcg_gen_ext32s_i64(t0, t0);
+        opn = "addu";
+        break;
+
+    case OPC_SLL_CP2:
+        opn = "sll";
+        shift_max = 32;
+        goto do_shift;
+    case OPC_SRL_CP2:
+        opn = "srl";
+        shift_max = 32;
+        goto do_shift;
+    case OPC_SRA_CP2:
+        opn = "sra";
+        shift_max = 32;
+        goto do_shift;
+    case OPC_DSLL_CP2:
+        opn = "dsll";
+        shift_max = 64;
+        goto do_shift;
+    case OPC_DSRL_CP2:
+        opn = "dsrl";
+        shift_max = 64;
+        goto do_shift;
+    case OPC_DSRA_CP2:
+        opn = "dsra";
+        shift_max = 64;
+        goto do_shift;
+    do_shift:
+        /* Make sure shift count isn't TCG undefined behaviour.  */
+        tcg_gen_andi_i64(t1, t1, shift_max - 1);
+
+        switch (opc) {
+        case OPC_SLL_CP2:
+        case OPC_DSLL_CP2:
+            tcg_gen_shl_i64(t0, t0, t1);
+            break;
+        case OPC_SRA_CP2:
+        case OPC_DSRA_CP2:
+            /* Since SRA is UndefinedResult without sign-extended inputs,
+               we can treat SRA and DSRA the same.  */
+            tcg_gen_sar_i64(t0, t0, t1);
+            break;
+        case OPC_SRL_CP2:
+            /* We want to shift in zeros for SRL; zero-extend first.  */
+            tcg_gen_ext32u_i64(t0, t0);
+            /* FALLTHRU */
+        case OPC_DSRL_CP2:
+            tcg_gen_shr_i64(t0, t0, t1);
+            break;
+        }
+
+        if (shift_max == 32) {
+            tcg_gen_ext32s_i64(t0, t0);
+        }
+
+        /* Shifts larger than MAX produce zero.  */
+        tcg_gen_setcondi_i64(TCG_COND_LTU, t1, t1, shift_max);
+        tcg_gen_neg_i64(t1, t1);
+        tcg_gen_and_i64(t0, t0, t1);
+        break;
+
+    case OPC_ADD_CP2:
+    case OPC_DADD_CP2:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            int lab = gen_new_label();
+
+            tcg_gen_mov_i64(t2, t0);
+            tcg_gen_add_i64(t0, t1, t2);
+            if (opc == OPC_ADD_CP2) {
+                tcg_gen_ext32s_i64(t0, t0);
+            }
+            tcg_gen_xor_i64(t1, t1, t2);
+            tcg_gen_xor_i64(t2, t2, t0);
+            tcg_gen_andc_i64(t1, t2, t1);
+            tcg_temp_free_i64(t2);
+            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(lab);
+
+            opn = (opc == OPC_ADD_CP2 ? "add" : "dadd");
+            break;
+        }
+
+    case OPC_SUB_CP2:
+    case OPC_DSUB_CP2:
+        {
+            TCGv_i64 t2 = tcg_temp_new_i64();
+            int lab = gen_new_label();
+
+            tcg_gen_mov_i64(t2, t0);
+            tcg_gen_sub_i64(t0, t1, t2);
+            if (opc == OPC_SUB_CP2) {
+                tcg_gen_ext32s_i64(t0, t0);
+            }
+            tcg_gen_xor_i64(t1, t1, t2);
+            tcg_gen_xor_i64(t2, t2, t0);
+            tcg_gen_and_i64(t1, t1, t2);
+            tcg_temp_free_i64(t2);
+            tcg_gen_brcondi_i64(TCG_COND_GE, t1, 0, lab);
+            generate_exception(ctx, EXCP_OVERFLOW);
+            gen_set_label(lab);
+
+            opn = (opc == OPC_SUB_CP2 ? "sub" : "dsub");
+            break;
+        }
+
+    case OPC_PMULUW:
+        tcg_gen_ext32u_i64(t0, t0);
+        tcg_gen_ext32u_i64(t1, t1);
+        tcg_gen_mul_i64(t0, t0, t1);
+        opn = "pmuluw";
+        break;
+
+    case OPC_SEQU_CP2:
+    case OPC_SEQ_CP2:
+    case OPC_SLTU_CP2:
+    case OPC_SLT_CP2:
+    case OPC_SLEU_CP2:
+    case OPC_SLE_CP2:
+        /* ??? Document is unclear: Set FCC[CC].  Does that mean the
+           FD field is the CC field?  */
+    default:
+        MIPS_INVAL(opn);
+        generate_exception(ctx, EXCP_RI);
+        return;
+    }
+
+#undef LMI_HELPER
+#undef LMI_DIRECT
+
+    gen_store_fpr64(ctx, t0, rd);
+
+    (void)opn; /* avoid a compiler warning */
+    MIPS_DEBUG("%s %s, %s, %s", opn,
+               fregnames[rd], fregnames[rs], fregnames[rt]);
+    tcg_temp_free_i64(t0);
+    tcg_temp_free_i64(t1);
+}
+
 /* Traps */
 static void gen_trap (DisasContext *ctx, uint32_t opc,
                       int rs, int rt, int16_t imm)
@@ -2683,7 +3078,7 @@
         gen_save_pc(dest);
         if (ctx->singlestep_enabled) {
             save_cpu_state(ctx, 0);
-            gen_helper_0i(raise_exception, EXCP_DEBUG);
+            gen_helper_0e0i(raise_exception, EXCP_DEBUG);
         }
         tcg_gen_exit_tb(0);
     }
@@ -3187,17 +3582,17 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpcontrol(arg);
+            gen_helper_mfc0_mvpcontrol(arg, cpu_env);
             rn = "MVPControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf0(arg);
+            gen_helper_mfc0_mvpconf0(arg, cpu_env);
             rn = "MVPConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf1(arg);
+            gen_helper_mfc0_mvpconf1(arg, cpu_env);
             rn = "MVPConf1";
             break;
         default:
@@ -3207,7 +3602,7 @@
     case 1:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_random(arg);
+            gen_helper_mfc0_random(arg, cpu_env);
             rn = "Random";
             break;
         case 1:
@@ -3258,37 +3653,37 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcstatus(arg);
+            gen_helper_mfc0_tcstatus(arg, cpu_env);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcbind(arg);
+            gen_helper_mfc0_tcbind(arg, cpu_env);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcrestart(arg);
+            gen_helper_mfc0_tcrestart(arg, cpu_env);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tchalt(arg);
+            gen_helper_mfc0_tchalt(arg, cpu_env);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tccontext(arg);
+            gen_helper_mfc0_tccontext(arg, cpu_env);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcschedule(arg);
+            gen_helper_mfc0_tcschedule(arg, cpu_env);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcschefback(arg);
+            gen_helper_mfc0_tcschefback(arg, cpu_env);
             rn = "TCScheFBack";
             break;
         default:
@@ -3399,7 +3794,7 @@
             /* Mark as an IO operation because we read the time.  */
             if (use_icount)
                 gen_io_start();
-            gen_helper_mfc0_count(arg);
+            gen_helper_mfc0_count(arg, cpu_env);
             if (use_icount) {
                 gen_io_end();
             }
@@ -3531,7 +3926,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_lladdr(arg);
+            gen_helper_mfc0_lladdr(arg, cpu_env);
             rn = "LLAddr";
             break;
         default:
@@ -3541,7 +3936,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mfc0_watchlo, arg, sel);
+            gen_helper_1e0i(mfc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -3551,7 +3946,7 @@
     case 19:
         switch (sel) {
         case 0 ...7:
-            gen_helper_1i(mfc0_watchhi, arg, sel);
+            gen_helper_1e0i(mfc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -3590,7 +3985,7 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_debug(arg); /* EJTAG support */
+            gen_helper_mfc0_debug(arg, cpu_env); /* EJTAG support */
             rn = "Debug";
             break;
         case 1:
@@ -3765,12 +4160,12 @@
     case 0:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_index(arg);
+            gen_helper_mtc0_index(cpu_env, arg);
             rn = "Index";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_mvpcontrol(arg);
+            gen_helper_mtc0_mvpcontrol(cpu_env, arg);
             rn = "MVPControl";
             break;
         case 2:
@@ -3795,22 +4190,22 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpecontrol(arg);
+            gen_helper_mtc0_vpecontrol(cpu_env, arg);
             rn = "VPEControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf0(arg);
+            gen_helper_mtc0_vpeconf0(cpu_env, arg);
             rn = "VPEConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf1(arg);
+            gen_helper_mtc0_vpeconf1(cpu_env, arg);
             rn = "VPEConf1";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_yqmask(arg);
+            gen_helper_mtc0_yqmask(cpu_env, arg);
             rn = "YQMask";
             break;
         case 5:
@@ -3825,7 +4220,7 @@
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeopt(arg);
+            gen_helper_mtc0_vpeopt(cpu_env, arg);
             rn = "VPEOpt";
             break;
         default:
@@ -3835,42 +4230,42 @@
     case 2:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo0(arg);
+            gen_helper_mtc0_entrylo0(cpu_env, arg);
             rn = "EntryLo0";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcstatus(arg);
+            gen_helper_mtc0_tcstatus(cpu_env, arg);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcbind(arg);
+            gen_helper_mtc0_tcbind(cpu_env, arg);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcrestart(arg);
+            gen_helper_mtc0_tcrestart(cpu_env, arg);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tchalt(arg);
+            gen_helper_mtc0_tchalt(cpu_env, arg);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tccontext(arg);
+            gen_helper_mtc0_tccontext(cpu_env, arg);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschedule(arg);
+            gen_helper_mtc0_tcschedule(cpu_env, arg);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschefback(arg);
+            gen_helper_mtc0_tcschefback(cpu_env, arg);
             rn = "TCScheFBack";
             break;
         default:
@@ -3880,7 +4275,7 @@
     case 3:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo1(arg);
+            gen_helper_mtc0_entrylo1(cpu_env, arg);
             rn = "EntryLo1";
             break;
         default:
@@ -3890,11 +4285,11 @@
     case 4:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_context(arg);
+            gen_helper_mtc0_context(cpu_env, arg);
             rn = "Context";
             break;
         case 1:
-//            gen_helper_mtc0_contextconfig(arg); /* SmartMIPS ASE */
+//            gen_helper_mtc0_contextconfig(cpu_env, arg); /* SmartMIPS ASE */
             rn = "ContextConfig";
 //            break;
         default:
@@ -3904,12 +4299,12 @@
     case 5:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_pagemask(arg);
+            gen_helper_mtc0_pagemask(cpu_env, arg);
             rn = "PageMask";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_pagegrain(arg);
+            gen_helper_mtc0_pagegrain(cpu_env, arg);
             rn = "PageGrain";
             break;
         default:
@@ -3919,32 +4314,32 @@
     case 6:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_wired(arg);
+            gen_helper_mtc0_wired(cpu_env, arg);
             rn = "Wired";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf0(arg);
+            gen_helper_mtc0_srsconf0(cpu_env, arg);
             rn = "SRSConf0";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf1(arg);
+            gen_helper_mtc0_srsconf1(cpu_env, arg);
             rn = "SRSConf1";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf2(arg);
+            gen_helper_mtc0_srsconf2(cpu_env, arg);
             rn = "SRSConf2";
             break;
         case 4:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf3(arg);
+            gen_helper_mtc0_srsconf3(cpu_env, arg);
             rn = "SRSConf3";
             break;
         case 5:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf4(arg);
+            gen_helper_mtc0_srsconf4(cpu_env, arg);
             rn = "SRSConf4";
             break;
         default:
@@ -3955,7 +4350,7 @@
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_hwrena(arg);
+            gen_helper_mtc0_hwrena(cpu_env, arg);
             rn = "HWREna";
             break;
         default:
@@ -3969,7 +4364,7 @@
     case 9:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_count(arg);
+            gen_helper_mtc0_count(cpu_env, arg);
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
@@ -3980,7 +4375,7 @@
     case 10:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entryhi(arg);
+            gen_helper_mtc0_entryhi(cpu_env, arg);
             rn = "EntryHi";
             break;
         default:
@@ -3990,7 +4385,7 @@
     case 11:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_compare(arg);
+            gen_helper_mtc0_compare(cpu_env, arg);
             rn = "Compare";
             break;
         /* 6,7 are implementation dependent */
@@ -4002,7 +4397,7 @@
         switch (sel) {
         case 0:
             save_cpu_state(ctx, 1);
-            gen_helper_mtc0_status(arg);
+            gen_helper_mtc0_status(cpu_env, arg);
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
@@ -4010,14 +4405,14 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_intctl(arg);
+            gen_helper_mtc0_intctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "IntCtl";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsctl(arg);
+            gen_helper_mtc0_srsctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSCtl";
@@ -4037,7 +4432,7 @@
         switch (sel) {
         case 0:
             save_cpu_state(ctx, 1);
-            gen_helper_mtc0_cause(arg);
+            gen_helper_mtc0_cause(cpu_env, arg);
             rn = "Cause";
             break;
         default:
@@ -4062,7 +4457,7 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_ebase(arg);
+            gen_helper_mtc0_ebase(cpu_env, arg);
             rn = "EBase";
             break;
         default:
@@ -4072,7 +4467,7 @@
     case 16:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_config0(arg);
+            gen_helper_mtc0_config0(cpu_env, arg);
             rn = "Config";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -4082,7 +4477,7 @@
             rn = "Config1";
             break;
         case 2:
-            gen_helper_mtc0_config2(arg);
+            gen_helper_mtc0_config2(cpu_env, arg);
             rn = "Config2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -4109,7 +4504,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_lladdr(arg);
+            gen_helper_mtc0_lladdr(cpu_env, arg);
             rn = "LLAddr";
             break;
         default:
@@ -4119,7 +4514,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchlo, arg, sel);
+            gen_helper_0e1i(mtc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -4129,7 +4524,7 @@
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchhi, arg, sel);
+            gen_helper_0e1i(mtc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -4141,7 +4536,7 @@
         case 0:
 #if defined(TARGET_MIPS64)
             check_insn(env, ctx, ISA_MIPS3);
-            gen_helper_mtc0_xcontext(arg);
+            gen_helper_mtc0_xcontext(cpu_env, arg);
             rn = "XContext";
             break;
 #endif
@@ -4153,7 +4548,7 @@
        /* Officially reserved, but sel 0 is used for R1x000 framemask */
         switch (sel) {
         case 0:
-            gen_helper_mtc0_framemask(arg);
+            gen_helper_mtc0_framemask(cpu_env, arg);
             rn = "Framemask";
             break;
         default:
@@ -4167,20 +4562,20 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_debug(arg); /* EJTAG support */
+            gen_helper_mtc0_debug(cpu_env, arg); /* EJTAG support */
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_mtc0_tracecontrol(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol(cpu_env, arg); /* PDtrace support */
             rn = "TraceControl";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
 //            break;
         case 2:
-//            gen_helper_mtc0_tracecontrol2(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol2(cpu_env, arg); /* PDtrace support */
             rn = "TraceControl2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -4188,13 +4583,13 @@
         case 3:
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
-//            gen_helper_mtc0_usertracedata(arg); /* PDtrace support */
+//            gen_helper_mtc0_usertracedata(cpu_env, arg); /* PDtrace support */
             rn = "UserTraceData";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
 //            break;
         case 4:
-//            gen_helper_mtc0_tracebpc(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracebpc(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceBPC";
@@ -4217,7 +4612,7 @@
     case 25:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_performance0(arg);
+            gen_helper_mtc0_performance0(cpu_env, arg);
             rn = "Performance0";
             break;
         case 1:
@@ -4272,14 +4667,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taglo(arg);
+            gen_helper_mtc0_taglo(cpu_env, arg);
             rn = "TagLo";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datalo(arg);
+            gen_helper_mtc0_datalo(cpu_env, arg);
             rn = "DataLo";
             break;
         default:
@@ -4292,14 +4687,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taghi(arg);
+            gen_helper_mtc0_taghi(cpu_env, arg);
             rn = "TagHi";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datahi(arg);
+            gen_helper_mtc0_datahi(cpu_env, arg);
             rn = "DataHi";
             break;
         default:
@@ -4364,17 +4759,17 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpcontrol(arg);
+            gen_helper_mfc0_mvpcontrol(arg, cpu_env);
             rn = "MVPControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf0(arg);
+            gen_helper_mfc0_mvpconf0(arg, cpu_env);
             rn = "MVPConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf1(arg);
+            gen_helper_mfc0_mvpconf1(arg, cpu_env);
             rn = "MVPConf1";
             break;
         default:
@@ -4384,7 +4779,7 @@
     case 1:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_random(arg);
+            gen_helper_mfc0_random(arg, cpu_env);
             rn = "Random";
             break;
         case 1:
@@ -4434,37 +4829,37 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcstatus(arg);
+            gen_helper_mfc0_tcstatus(arg, cpu_env);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcbind(arg);
+            gen_helper_mfc0_tcbind(arg, cpu_env);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcrestart(arg);
+            gen_helper_dmfc0_tcrestart(arg, cpu_env);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tchalt(arg);
+            gen_helper_dmfc0_tchalt(arg, cpu_env);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tccontext(arg);
+            gen_helper_dmfc0_tccontext(arg, cpu_env);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcschedule(arg);
+            gen_helper_dmfc0_tcschedule(arg, cpu_env);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcschefback(arg);
+            gen_helper_dmfc0_tcschefback(arg, cpu_env);
             rn = "TCScheFBack";
             break;
         default:
@@ -4572,7 +4967,7 @@
             /* Mark as an IO operation because we read the time.  */
             if (use_icount)
                 gen_io_start();
-            gen_helper_mfc0_count(arg);
+            gen_helper_mfc0_count(arg, cpu_env);
             if (use_icount) {
                 gen_io_end();
             }
@@ -4701,7 +5096,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_dmfc0_lladdr(arg);
+            gen_helper_dmfc0_lladdr(arg, cpu_env);
             rn = "LLAddr";
             break;
         default:
@@ -4711,7 +5106,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(dmfc0_watchlo, arg, sel);
+            gen_helper_1e0i(dmfc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -4721,7 +5116,7 @@
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mfc0_watchhi, arg, sel);
+            gen_helper_1e0i(mfc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -4757,23 +5152,23 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_debug(arg); /* EJTAG support */
+            gen_helper_mfc0_debug(arg, cpu_env); /* EJTAG support */
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_dmfc0_tracecontrol(arg); /* PDtrace support */
+//            gen_helper_dmfc0_tracecontrol(arg, cpu_env); /* PDtrace support */
             rn = "TraceControl";
 //            break;
         case 2:
-//            gen_helper_dmfc0_tracecontrol2(arg); /* PDtrace support */
+//            gen_helper_dmfc0_tracecontrol2(arg, cpu_env); /* PDtrace support */
             rn = "TraceControl2";
 //            break;
         case 3:
-//            gen_helper_dmfc0_usertracedata(arg); /* PDtrace support */
+//            gen_helper_dmfc0_usertracedata(arg, cpu_env); /* PDtrace support */
             rn = "UserTraceData";
 //            break;
         case 4:
-//            gen_helper_dmfc0_tracebpc(arg); /* PDtrace support */
+//            gen_helper_dmfc0_tracebpc(arg, cpu_env); /* PDtrace support */
             rn = "TraceBPC";
 //            break;
         default:
@@ -4931,12 +5326,12 @@
     case 0:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_index(arg);
+            gen_helper_mtc0_index(cpu_env, arg);
             rn = "Index";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_mvpcontrol(arg);
+            gen_helper_mtc0_mvpcontrol(cpu_env, arg);
             rn = "MVPControl";
             break;
         case 2:
@@ -4961,22 +5356,22 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpecontrol(arg);
+            gen_helper_mtc0_vpecontrol(cpu_env, arg);
             rn = "VPEControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf0(arg);
+            gen_helper_mtc0_vpeconf0(cpu_env, arg);
             rn = "VPEConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf1(arg);
+            gen_helper_mtc0_vpeconf1(cpu_env, arg);
             rn = "VPEConf1";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_yqmask(arg);
+            gen_helper_mtc0_yqmask(cpu_env, arg);
             rn = "YQMask";
             break;
         case 5:
@@ -4991,7 +5386,7 @@
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeopt(arg);
+            gen_helper_mtc0_vpeopt(cpu_env, arg);
             rn = "VPEOpt";
             break;
         default:
@@ -5001,42 +5396,42 @@
     case 2:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo0(arg);
+            gen_helper_mtc0_entrylo0(cpu_env, arg);
             rn = "EntryLo0";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcstatus(arg);
+            gen_helper_mtc0_tcstatus(cpu_env, arg);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcbind(arg);
+            gen_helper_mtc0_tcbind(cpu_env, arg);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcrestart(arg);
+            gen_helper_mtc0_tcrestart(cpu_env, arg);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tchalt(arg);
+            gen_helper_mtc0_tchalt(cpu_env, arg);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tccontext(arg);
+            gen_helper_mtc0_tccontext(cpu_env, arg);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschedule(arg);
+            gen_helper_mtc0_tcschedule(cpu_env, arg);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschefback(arg);
+            gen_helper_mtc0_tcschefback(cpu_env, arg);
             rn = "TCScheFBack";
             break;
         default:
@@ -5046,7 +5441,7 @@
     case 3:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo1(arg);
+            gen_helper_mtc0_entrylo1(cpu_env, arg);
             rn = "EntryLo1";
             break;
         default:
@@ -5056,11 +5451,11 @@
     case 4:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_context(arg);
+            gen_helper_mtc0_context(cpu_env, arg);
             rn = "Context";
             break;
         case 1:
-//           gen_helper_mtc0_contextconfig(arg); /* SmartMIPS ASE */
+//           gen_helper_mtc0_contextconfig(cpu_env, arg); /* SmartMIPS ASE */
             rn = "ContextConfig";
 //           break;
         default:
@@ -5070,12 +5465,12 @@
     case 5:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_pagemask(arg);
+            gen_helper_mtc0_pagemask(cpu_env, arg);
             rn = "PageMask";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_pagegrain(arg);
+            gen_helper_mtc0_pagegrain(cpu_env, arg);
             rn = "PageGrain";
             break;
         default:
@@ -5085,32 +5480,32 @@
     case 6:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_wired(arg);
+            gen_helper_mtc0_wired(cpu_env, arg);
             rn = "Wired";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf0(arg);
+            gen_helper_mtc0_srsconf0(cpu_env, arg);
             rn = "SRSConf0";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf1(arg);
+            gen_helper_mtc0_srsconf1(cpu_env, arg);
             rn = "SRSConf1";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf2(arg);
+            gen_helper_mtc0_srsconf2(cpu_env, arg);
             rn = "SRSConf2";
             break;
         case 4:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf3(arg);
+            gen_helper_mtc0_srsconf3(cpu_env, arg);
             rn = "SRSConf3";
             break;
         case 5:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf4(arg);
+            gen_helper_mtc0_srsconf4(cpu_env, arg);
             rn = "SRSConf4";
             break;
         default:
@@ -5121,7 +5516,7 @@
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_hwrena(arg);
+            gen_helper_mtc0_hwrena(cpu_env, arg);
             rn = "HWREna";
             break;
         default:
@@ -5135,7 +5530,7 @@
     case 9:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_count(arg);
+            gen_helper_mtc0_count(cpu_env, arg);
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
@@ -5148,7 +5543,7 @@
     case 10:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entryhi(arg);
+            gen_helper_mtc0_entryhi(cpu_env, arg);
             rn = "EntryHi";
             break;
         default:
@@ -5158,7 +5553,7 @@
     case 11:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_compare(arg);
+            gen_helper_mtc0_compare(cpu_env, arg);
             rn = "Compare";
             break;
         /* 6,7 are implementation dependent */
@@ -5172,7 +5567,7 @@
         switch (sel) {
         case 0:
             save_cpu_state(ctx, 1);
-            gen_helper_mtc0_status(arg);
+            gen_helper_mtc0_status(cpu_env, arg);
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
@@ -5180,14 +5575,14 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_intctl(arg);
+            gen_helper_mtc0_intctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "IntCtl";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsctl(arg);
+            gen_helper_mtc0_srsctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSCtl";
@@ -5212,7 +5607,7 @@
             if (use_icount) {
                 gen_io_start();
             }
-            gen_helper_mtc0_cause(arg);
+            gen_helper_mtc0_cause(cpu_env, arg);
             if (use_icount) {
                 gen_io_end();
             }
@@ -5242,7 +5637,7 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_ebase(arg);
+            gen_helper_mtc0_ebase(cpu_env, arg);
             rn = "EBase";
             break;
         default:
@@ -5252,7 +5647,7 @@
     case 16:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_config0(arg);
+            gen_helper_mtc0_config0(cpu_env, arg);
             rn = "Config";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -5262,7 +5657,7 @@
             rn = "Config1";
             break;
         case 2:
-            gen_helper_mtc0_config2(arg);
+            gen_helper_mtc0_config2(cpu_env, arg);
             rn = "Config2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -5280,7 +5675,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_lladdr(arg);
+            gen_helper_mtc0_lladdr(cpu_env, arg);
             rn = "LLAddr";
             break;
         default:
@@ -5290,7 +5685,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchlo, arg, sel);
+            gen_helper_0e1i(mtc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -5300,7 +5695,7 @@
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchhi, arg, sel);
+            gen_helper_0e1i(mtc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -5311,7 +5706,7 @@
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS3);
-            gen_helper_mtc0_xcontext(arg);
+            gen_helper_mtc0_xcontext(cpu_env, arg);
             rn = "XContext";
             break;
         default:
@@ -5322,7 +5717,7 @@
        /* Officially reserved, but sel 0 is used for R1x000 framemask */
         switch (sel) {
         case 0:
-            gen_helper_mtc0_framemask(arg);
+            gen_helper_mtc0_framemask(cpu_env, arg);
             rn = "Framemask";
             break;
         default:
@@ -5336,32 +5731,32 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_debug(arg); /* EJTAG support */
+            gen_helper_mtc0_debug(cpu_env, arg); /* EJTAG support */
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_mtc0_tracecontrol(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceControl";
 //            break;
         case 2:
-//            gen_helper_mtc0_tracecontrol2(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol2(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceControl2";
 //            break;
         case 3:
-//            gen_helper_mtc0_usertracedata(arg); /* PDtrace support */
+//            gen_helper_mtc0_usertracedata(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "UserTraceData";
 //            break;
         case 4:
-//            gen_helper_mtc0_tracebpc(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracebpc(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceBPC";
@@ -5384,35 +5779,35 @@
     case 25:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_performance0(arg);
+            gen_helper_mtc0_performance0(cpu_env, arg);
             rn = "Performance0";
             break;
         case 1:
-//            gen_helper_mtc0_performance1(arg);
+//            gen_helper_mtc0_performance1(cpu_env, arg);
             rn = "Performance1";
 //            break;
         case 2:
-//            gen_helper_mtc0_performance2(arg);
+//            gen_helper_mtc0_performance2(cpu_env, arg);
             rn = "Performance2";
 //            break;
         case 3:
-//            gen_helper_mtc0_performance3(arg);
+//            gen_helper_mtc0_performance3(cpu_env, arg);
             rn = "Performance3";
 //            break;
         case 4:
-//            gen_helper_mtc0_performance4(arg);
+//            gen_helper_mtc0_performance4(cpu_env, arg);
             rn = "Performance4";
 //            break;
         case 5:
-//            gen_helper_mtc0_performance5(arg);
+//            gen_helper_mtc0_performance5(cpu_env, arg);
             rn = "Performance5";
 //            break;
         case 6:
-//            gen_helper_mtc0_performance6(arg);
+//            gen_helper_mtc0_performance6(cpu_env, arg);
             rn = "Performance6";
 //            break;
         case 7:
-//            gen_helper_mtc0_performance7(arg);
+//            gen_helper_mtc0_performance7(cpu_env, arg);
             rn = "Performance7";
 //            break;
         default:
@@ -5439,14 +5834,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taglo(arg);
+            gen_helper_mtc0_taglo(cpu_env, arg);
             rn = "TagLo";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datalo(arg);
+            gen_helper_mtc0_datalo(cpu_env, arg);
             rn = "DataLo";
             break;
         default:
@@ -5459,14 +5854,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taghi(arg);
+            gen_helper_mtc0_taghi(cpu_env, arg);
             rn = "TagHi";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datahi(arg);
+            gen_helper_mtc0_datahi(cpu_env, arg);
             rn = "DataHi";
             break;
         default:
@@ -5533,10 +5928,10 @@
         case 1:
             switch (sel) {
             case 1:
-                gen_helper_mftc0_vpecontrol(t0);
+                gen_helper_mftc0_vpecontrol(t0, cpu_env);
                 break;
             case 2:
-                gen_helper_mftc0_vpeconf0(t0);
+                gen_helper_mftc0_vpeconf0(t0, cpu_env);
                 break;
             default:
                 goto die;
@@ -5546,25 +5941,25 @@
         case 2:
             switch (sel) {
             case 1:
-                gen_helper_mftc0_tcstatus(t0);
+                gen_helper_mftc0_tcstatus(t0, cpu_env);
                 break;
             case 2:
-                gen_helper_mftc0_tcbind(t0);
+                gen_helper_mftc0_tcbind(t0, cpu_env);
                 break;
             case 3:
-                gen_helper_mftc0_tcrestart(t0);
+                gen_helper_mftc0_tcrestart(t0, cpu_env);
                 break;
             case 4:
-                gen_helper_mftc0_tchalt(t0);
+                gen_helper_mftc0_tchalt(t0, cpu_env);
                 break;
             case 5:
-                gen_helper_mftc0_tccontext(t0);
+                gen_helper_mftc0_tccontext(t0, cpu_env);
                 break;
             case 6:
-                gen_helper_mftc0_tcschedule(t0);
+                gen_helper_mftc0_tcschedule(t0, cpu_env);
                 break;
             case 7:
-                gen_helper_mftc0_tcschefback(t0);
+                gen_helper_mftc0_tcschefback(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5574,7 +5969,7 @@
         case 10:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_entryhi(t0);
+                gen_helper_mftc0_entryhi(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5583,7 +5978,7 @@
         case 12:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_status(t0);
+                gen_helper_mftc0_status(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5592,7 +5987,7 @@
         case 13:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_cause(t0);
+                gen_helper_mftc0_cause(t0, cpu_env);
                 break;
             default:
                 goto die;
@@ -5602,7 +5997,7 @@
         case 14:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_epc(t0);
+                gen_helper_mftc0_epc(t0, cpu_env);
                 break;
             default:
                 goto die;
@@ -5612,7 +6007,7 @@
         case 15:
             switch (sel) {
             case 1:
-                gen_helper_mftc0_ebase(t0);
+                gen_helper_mftc0_ebase(t0, cpu_env);
                 break;
             default:
                 goto die;
@@ -5622,7 +6017,7 @@
         case 16:
             switch (sel) {
             case 0 ... 7:
-                gen_helper_mftc0_configx(t0, tcg_const_tl(sel));
+                gen_helper_mftc0_configx(t0, cpu_env, tcg_const_tl(sel));
                 break;
             default:
                 goto die;
@@ -5632,7 +6027,7 @@
         case 23:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_debug(t0);
+                gen_helper_mftc0_debug(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5645,49 +6040,49 @@
     } else switch (sel) {
     /* GPR registers. */
     case 0:
-        gen_helper_1i(mftgpr, t0, rt);
+        gen_helper_1e0i(mftgpr, t0, rt);
         break;
     /* Auxiliary CPU registers */
     case 1:
         switch (rt) {
         case 0:
-            gen_helper_1i(mftlo, t0, 0);
+            gen_helper_1e0i(mftlo, t0, 0);
             break;
         case 1:
-            gen_helper_1i(mfthi, t0, 0);
+            gen_helper_1e0i(mfthi, t0, 0);
             break;
         case 2:
-            gen_helper_1i(mftacx, t0, 0);
+            gen_helper_1e0i(mftacx, t0, 0);
             break;
         case 4:
-            gen_helper_1i(mftlo, t0, 1);
+            gen_helper_1e0i(mftlo, t0, 1);
             break;
         case 5:
-            gen_helper_1i(mfthi, t0, 1);
+            gen_helper_1e0i(mfthi, t0, 1);
             break;
         case 6:
-            gen_helper_1i(mftacx, t0, 1);
+            gen_helper_1e0i(mftacx, t0, 1);
             break;
         case 8:
-            gen_helper_1i(mftlo, t0, 2);
+            gen_helper_1e0i(mftlo, t0, 2);
             break;
         case 9:
-            gen_helper_1i(mfthi, t0, 2);
+            gen_helper_1e0i(mfthi, t0, 2);
             break;
         case 10:
-            gen_helper_1i(mftacx, t0, 2);
+            gen_helper_1e0i(mftacx, t0, 2);
             break;
         case 12:
-            gen_helper_1i(mftlo, t0, 3);
+            gen_helper_1e0i(mftlo, t0, 3);
             break;
         case 13:
-            gen_helper_1i(mfthi, t0, 3);
+            gen_helper_1e0i(mfthi, t0, 3);
             break;
         case 14:
-            gen_helper_1i(mftacx, t0, 3);
+            gen_helper_1e0i(mftacx, t0, 3);
             break;
         case 16:
-            gen_helper_mftdsp(t0);
+            gen_helper_mftdsp(t0, cpu_env);
             break;
         default:
             goto die;
@@ -5712,7 +6107,7 @@
         break;
     case 3:
         /* XXX: For now we support only a single FPU context. */
-        gen_helper_1i(cfc1, t0, rt);
+        gen_helper_1e0i(cfc1, t0, rt);
         break;
     /* COP2: Not implemented. */
     case 4:
@@ -5751,10 +6146,10 @@
         case 1:
             switch (sel) {
             case 1:
-                gen_helper_mttc0_vpecontrol(t0);
+                gen_helper_mttc0_vpecontrol(cpu_env, t0);
                 break;
             case 2:
-                gen_helper_mttc0_vpeconf0(t0);
+                gen_helper_mttc0_vpeconf0(cpu_env, t0);
                 break;
             default:
                 goto die;
@@ -5764,25 +6159,25 @@
         case 2:
             switch (sel) {
             case 1:
-                gen_helper_mttc0_tcstatus(t0);
+                gen_helper_mttc0_tcstatus(cpu_env, t0);
                 break;
             case 2:
-                gen_helper_mttc0_tcbind(t0);
+                gen_helper_mttc0_tcbind(cpu_env, t0);
                 break;
             case 3:
-                gen_helper_mttc0_tcrestart(t0);
+                gen_helper_mttc0_tcrestart(cpu_env, t0);
                 break;
             case 4:
-                gen_helper_mttc0_tchalt(t0);
+                gen_helper_mttc0_tchalt(cpu_env, t0);
                 break;
             case 5:
-                gen_helper_mttc0_tccontext(t0);
+                gen_helper_mttc0_tccontext(cpu_env, t0);
                 break;
             case 6:
-                gen_helper_mttc0_tcschedule(t0);
+                gen_helper_mttc0_tcschedule(cpu_env, t0);
                 break;
             case 7:
-                gen_helper_mttc0_tcschefback(t0);
+                gen_helper_mttc0_tcschefback(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5792,7 +6187,7 @@
         case 10:
             switch (sel) {
             case 0:
-                gen_helper_mttc0_entryhi(t0);
+                gen_helper_mttc0_entryhi(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5801,7 +6196,7 @@
         case 12:
             switch (sel) {
             case 0:
-                gen_helper_mttc0_status(t0);
+                gen_helper_mttc0_status(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5810,7 +6205,7 @@
         case 13:
             switch (sel) {
             case 0:
-                gen_helper_mttc0_cause(t0);
+                gen_helper_mttc0_cause(cpu_env, t0);
                 break;
             default:
                 goto die;
@@ -5820,7 +6215,7 @@
         case 15:
             switch (sel) {
             case 1:
-                gen_helper_mttc0_ebase(t0);
+                gen_helper_mttc0_ebase(cpu_env, t0);
                 break;
             default:
                 goto die;
@@ -5830,7 +6225,7 @@
         case 23:
             switch (sel) {
             case 0:
-                gen_helper_mttc0_debug(t0);
+                gen_helper_mttc0_debug(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5843,49 +6238,49 @@
     } else switch (sel) {
     /* GPR registers. */
     case 0:
-        gen_helper_1i(mttgpr, t0, rd);
+        gen_helper_0e1i(mttgpr, t0, rd);
         break;
     /* Auxiliary CPU registers */
     case 1:
         switch (rd) {
         case 0:
-            gen_helper_1i(mttlo, t0, 0);
+            gen_helper_0e1i(mttlo, t0, 0);
             break;
         case 1:
-            gen_helper_1i(mtthi, t0, 0);
+            gen_helper_0e1i(mtthi, t0, 0);
             break;
         case 2:
-            gen_helper_1i(mttacx, t0, 0);
+            gen_helper_0e1i(mttacx, t0, 0);
             break;
         case 4:
-            gen_helper_1i(mttlo, t0, 1);
+            gen_helper_0e1i(mttlo, t0, 1);
             break;
         case 5:
-            gen_helper_1i(mtthi, t0, 1);
+            gen_helper_0e1i(mtthi, t0, 1);
             break;
         case 6:
-            gen_helper_1i(mttacx, t0, 1);
+            gen_helper_0e1i(mttacx, t0, 1);
             break;
         case 8:
-            gen_helper_1i(mttlo, t0, 2);
+            gen_helper_0e1i(mttlo, t0, 2);
             break;
         case 9:
-            gen_helper_1i(mtthi, t0, 2);
+            gen_helper_0e1i(mtthi, t0, 2);
             break;
         case 10:
-            gen_helper_1i(mttacx, t0, 2);
+            gen_helper_0e1i(mttacx, t0, 2);
             break;
         case 12:
-            gen_helper_1i(mttlo, t0, 3);
+            gen_helper_0e1i(mttlo, t0, 3);
             break;
         case 13:
-            gen_helper_1i(mtthi, t0, 3);
+            gen_helper_0e1i(mtthi, t0, 3);
             break;
         case 14:
-            gen_helper_1i(mttacx, t0, 3);
+            gen_helper_0e1i(mttacx, t0, 3);
             break;
         case 16:
-            gen_helper_mttdsp(t0);
+            gen_helper_mttdsp(cpu_env, t0);
             break;
         default:
             goto die;
@@ -5910,7 +6305,7 @@
         break;
     case 3:
         /* XXX: For now we support only a single FPU context. */
-        gen_helper_1i(ctc1, t0, rd);
+        gen_helper_0e1i(ctc1, t0, rd);
         break;
     /* COP2: Not implemented. */
     case 4:
@@ -5995,30 +6390,30 @@
         opn = "tlbwi";
         if (!env->tlb->helper_tlbwi)
             goto die;
-        gen_helper_tlbwi();
+        gen_helper_tlbwi(cpu_env);
         break;
     case OPC_TLBWR:
         opn = "tlbwr";
         if (!env->tlb->helper_tlbwr)
             goto die;
-        gen_helper_tlbwr();
+        gen_helper_tlbwr(cpu_env);
         break;
     case OPC_TLBP:
         opn = "tlbp";
         if (!env->tlb->helper_tlbp)
             goto die;
-        gen_helper_tlbp();
+        gen_helper_tlbp(cpu_env);
         break;
     case OPC_TLBR:
         opn = "tlbr";
         if (!env->tlb->helper_tlbr)
             goto die;
-        gen_helper_tlbr();
+        gen_helper_tlbr(cpu_env);
         break;
     case OPC_ERET:
         opn = "eret";
         check_insn(env, ctx, ISA_MIPS2);
-        gen_helper_eret();
+        gen_helper_eret(cpu_env);
         ctx->bstate = BS_EXCP;
         break;
     case OPC_DERET:
@@ -6028,7 +6423,7 @@
             MIPS_INVAL(opn);
             generate_exception(ctx, EXCP_RI);
         } else {
-            gen_helper_deret();
+            gen_helper_deret(cpu_env);
             ctx->bstate = BS_EXCP;
         }
         break;
@@ -6039,7 +6434,7 @@
         ctx->pc += 4;
         save_cpu_state(ctx, 1);
         ctx->pc -= 4;
-        gen_helper_wait();
+        gen_helper_wait(cpu_env);
         ctx->bstate = BS_EXCP;
         break;
     default:
@@ -6340,13 +6735,13 @@
         opn = "mtc1";
         break;
     case OPC_CFC1:
-        gen_helper_1i(cfc1, t0, fs);
+        gen_helper_1e0i(cfc1, t0, fs);
         gen_store_gpr(t0, rt);
         opn = "cfc1";
         break;
     case OPC_CTC1:
         gen_load_gpr(t0, rt);
-        gen_helper_1i(ctc1, t0, fs);
+        gen_helper_0e1i(ctc1, t0, fs);
         opn = "ctc1";
         break;
 #if defined(TARGET_MIPS64)
@@ -6543,7 +6938,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_add_s(fp0, fp0, fp1);
+            gen_helper_float_add_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6558,7 +6953,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_sub_s(fp0, fp0, fp1);
+            gen_helper_float_sub_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6573,7 +6968,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_mul_s(fp0, fp0, fp1);
+            gen_helper_float_mul_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6588,7 +6983,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_div_s(fp0, fp0, fp1);
+            gen_helper_float_div_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6601,7 +6996,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_sqrt_s(fp0, fp0);
+            gen_helper_float_sqrt_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6646,7 +7041,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_roundl_s(fp64, fp32);
+            gen_helper_float_roundl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6660,7 +7055,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_truncl_s(fp64, fp32);
+            gen_helper_float_truncl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6674,7 +7069,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_ceill_s(fp64, fp32);
+            gen_helper_float_ceill_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6688,7 +7083,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_floorl_s(fp64, fp32);
+            gen_helper_float_floorl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6700,7 +7095,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_roundw_s(fp0, fp0);
+            gen_helper_float_roundw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6711,7 +7106,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_truncw_s(fp0, fp0);
+            gen_helper_float_truncw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6722,7 +7117,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_ceilw_s(fp0, fp0);
+            gen_helper_float_ceilw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6733,7 +7128,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_floorw_s(fp0, fp0);
+            gen_helper_float_floorw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6781,7 +7176,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_recip_s(fp0, fp0);
+            gen_helper_float_recip_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6793,7 +7188,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_rsqrt_s(fp0, fp0);
+            gen_helper_float_rsqrt_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6807,7 +7202,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_recip2_s(fp0, fp0, fp1);
+            gen_helper_float_recip2_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6820,7 +7215,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_recip1_s(fp0, fp0);
+            gen_helper_float_recip1_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6832,7 +7227,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_rsqrt1_s(fp0, fp0);
+            gen_helper_float_rsqrt1_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6846,7 +7241,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_rsqrt2_s(fp0, fp0, fp1);
+            gen_helper_float_rsqrt2_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6860,7 +7255,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_cvtd_s(fp64, fp32);
+            gen_helper_float_cvtd_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6872,7 +7267,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_cvtw_s(fp0, fp0);
+            gen_helper_float_cvtw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6885,7 +7280,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_cvtl_s(fp64, fp32);
+            gen_helper_float_cvtl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6941,7 +7336,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_add_d(fp0, fp0, fp1);
+            gen_helper_float_add_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6957,7 +7352,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_sub_d(fp0, fp0, fp1);
+            gen_helper_float_sub_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6973,7 +7368,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_mul_d(fp0, fp0, fp1);
+            gen_helper_float_mul_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6989,7 +7384,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_div_d(fp0, fp0, fp1);
+            gen_helper_float_div_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7003,7 +7398,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_sqrt_d(fp0, fp0);
+            gen_helper_float_sqrt_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7050,7 +7445,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_roundl_d(fp0, fp0);
+            gen_helper_float_roundl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7062,7 +7457,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_truncl_d(fp0, fp0);
+            gen_helper_float_truncl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7074,7 +7469,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_ceill_d(fp0, fp0);
+            gen_helper_float_ceill_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7086,7 +7481,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_floorl_d(fp0, fp0);
+            gen_helper_float_floorl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7099,7 +7494,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_roundw_d(fp32, fp64);
+            gen_helper_float_roundw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7113,7 +7508,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_truncw_d(fp32, fp64);
+            gen_helper_float_truncw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7127,7 +7522,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_ceilw_d(fp32, fp64);
+            gen_helper_float_ceilw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7141,7 +7536,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_floorw_d(fp32, fp64);
+            gen_helper_float_floorw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7190,7 +7585,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_recip_d(fp0, fp0);
+            gen_helper_float_recip_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7202,7 +7597,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rsqrt_d(fp0, fp0);
+            gen_helper_float_rsqrt_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7216,7 +7611,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_recip2_d(fp0, fp0, fp1);
+            gen_helper_float_recip2_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7229,7 +7624,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_recip1_d(fp0, fp0);
+            gen_helper_float_recip1_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7241,7 +7636,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rsqrt1_d(fp0, fp0);
+            gen_helper_float_rsqrt1_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7255,7 +7650,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_rsqrt2_d(fp0, fp0, fp1);
+            gen_helper_float_rsqrt2_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7293,7 +7688,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_cvts_d(fp32, fp64);
+            gen_helper_float_cvts_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7307,7 +7702,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_cvtw_d(fp32, fp64);
+            gen_helper_float_cvtw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7320,7 +7715,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtl_d(fp0, fp0);
+            gen_helper_float_cvtl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7331,7 +7726,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_cvts_w(fp0, fp0);
+            gen_helper_float_cvts_w(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -7344,7 +7739,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_cvtd_w(fp64, fp32);
+            gen_helper_float_cvtd_w(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -7358,7 +7753,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_cvts_l(fp32, fp64);
+            gen_helper_float_cvts_l(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7371,7 +7766,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtd_l(fp0, fp0);
+            gen_helper_float_cvtd_l(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7383,7 +7778,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtps_pw(fp0, fp0);
+            gen_helper_float_cvtps_pw(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7397,7 +7792,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_add_ps(fp0, fp0, fp1);
+            gen_helper_float_add_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7412,7 +7807,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_sub_ps(fp0, fp0, fp1);
+            gen_helper_float_sub_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7427,7 +7822,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_mul_ps(fp0, fp0, fp1);
+            gen_helper_float_mul_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7515,7 +7910,7 @@
 
             gen_load_fpr64(ctx, fp0, ft);
             gen_load_fpr64(ctx, fp1, fs);
-            gen_helper_float_addr_ps(fp0, fp0, fp1);
+            gen_helper_float_addr_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7530,7 +7925,7 @@
 
             gen_load_fpr64(ctx, fp0, ft);
             gen_load_fpr64(ctx, fp1, fs);
-            gen_helper_float_mulr_ps(fp0, fp0, fp1);
+            gen_helper_float_mulr_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7545,7 +7940,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_recip2_ps(fp0, fp0, fp1);
+            gen_helper_float_recip2_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7558,7 +7953,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_recip1_ps(fp0, fp0);
+            gen_helper_float_recip1_ps(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7570,7 +7965,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rsqrt1_ps(fp0, fp0);
+            gen_helper_float_rsqrt1_ps(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7584,7 +7979,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_rsqrt2_ps(fp0, fp0, fp1);
+            gen_helper_float_rsqrt2_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7597,7 +7992,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32h(fp0, fs);
-            gen_helper_float_cvts_pu(fp0, fp0);
+            gen_helper_float_cvts_pu(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -7609,7 +8004,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtpw_ps(fp0, fp0);
+            gen_helper_float_cvtpw_ps(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7621,7 +8016,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_cvts_pl(fp0, fp0);
+            gen_helper_float_cvts_pl(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -7887,7 +8282,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_muladd_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_muladd_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -7906,7 +8301,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_muladd_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_muladd_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7924,7 +8319,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_muladd_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_muladd_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7942,7 +8337,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_mulsub_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_mulsub_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -7961,7 +8356,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_mulsub_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_mulsub_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7979,7 +8374,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_mulsub_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_mulsub_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7997,7 +8392,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_nmuladd_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmuladd_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -8016,7 +8411,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmuladd_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmuladd_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8034,7 +8429,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmuladd_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmuladd_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8052,7 +8447,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_nmulsub_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmulsub_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -8071,7 +8466,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmulsub_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmulsub_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8089,7 +8484,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmulsub_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmulsub_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8122,22 +8517,22 @@
     switch (rd) {
     case 0:
         save_cpu_state(ctx, 1);
-        gen_helper_rdhwr_cpunum(t0);
+        gen_helper_rdhwr_cpunum(t0, cpu_env);
         gen_store_gpr(t0, rt);
         break;
     case 1:
         save_cpu_state(ctx, 1);
-        gen_helper_rdhwr_synci_step(t0);
+        gen_helper_rdhwr_synci_step(t0, cpu_env);
         gen_store_gpr(t0, rt);
         break;
     case 2:
         save_cpu_state(ctx, 1);
-        gen_helper_rdhwr_cc(t0);
+        gen_helper_rdhwr_cc(t0, cpu_env);
         gen_store_gpr(t0, rt);
         break;
     case 3:
         save_cpu_state(ctx, 1);
-        gen_helper_rdhwr_ccres(t0);
+        gen_helper_rdhwr_ccres(t0, cpu_env);
         gen_store_gpr(t0, rt);
         break;
     case 29:
@@ -8214,7 +8609,7 @@
             }
             if (ctx->singlestep_enabled) {
                 save_cpu_state(ctx, 0);
-                gen_helper_0i(raise_exception, EXCP_DEBUG);
+                gen_helper_0e0i(raise_exception, EXCP_DEBUG);
             }
             tcg_gen_exit_tb(0);
             break;
@@ -8678,7 +9073,7 @@
 static int decode_extended_mips16_opc (CPUMIPSState *env, DisasContext *ctx,
                                        int *is_branch)
 {
-    int extend = lduw_code(ctx->pc + 2);
+    int extend = cpu_lduw_code(env, ctx->pc + 2);
     int op, rx, ry, funct, sa;
     int16_t imm, offset;
 
@@ -8760,10 +9155,10 @@
         gen_arith_imm(env, ctx, OPC_ADDIU, rx, rx, imm);
         break;
     case M16_OPC_SLTI:
-        gen_slt_imm(env, OPC_SLTI, 24, rx, imm);
+        gen_slt_imm(env, ctx, OPC_SLTI, 24, rx, imm);
         break;
     case M16_OPC_SLTIU:
-        gen_slt_imm(env, OPC_SLTIU, 24, rx, imm);
+        gen_slt_imm(env, ctx, OPC_SLTIU, 24, rx, imm);
         break;
     case M16_OPC_I8:
         switch (funct) {
@@ -8904,7 +9299,7 @@
         /* No delay slot, so just process as a normal instruction */
         break;
     case M16_OPC_JAL:
-        offset = lduw_code(ctx->pc + 2);
+        offset = cpu_lduw_code(env, ctx->pc + 2);
         offset = (((ctx->opcode & 0x1f) << 21)
                   | ((ctx->opcode >> 5) & 0x1f) << 16
                   | offset) << 2;
@@ -8974,15 +9369,13 @@
     case M16_OPC_SLTI:
         {
             int16_t imm = (uint8_t) ctx->opcode;
-
-            gen_slt_imm(env, OPC_SLTI, 24, rx, imm);
+            gen_slt_imm(env, ctx, OPC_SLTI, 24, rx, imm);
         }
         break;
     case M16_OPC_SLTIU:
         {
             int16_t imm = (uint8_t) ctx->opcode;
-
-            gen_slt_imm(env, OPC_SLTIU, 24, rx, imm);
+            gen_slt_imm(env, ctx, OPC_SLTIU, 24, rx, imm);
         }
         break;
     case M16_OPC_I8:
@@ -9057,8 +9450,7 @@
     case M16_OPC_CMPI:
         {
             int16_t imm = (uint8_t) ctx->opcode;
-
-            gen_logic_imm(env, OPC_XORI, 24, rx, imm);
+            gen_logic_imm(env, ctx, OPC_XORI, 24, rx, imm);
         }
         break;
 #if defined(TARGET_MIPS64)
@@ -9170,10 +9562,10 @@
             }
             break;
         case RR_SLT:
-            gen_slt(env, OPC_SLT, 24, rx, ry);
+            gen_slt(env, ctx, OPC_SLT, 24, rx, ry);
             break;
         case RR_SLTU:
-            gen_slt(env, OPC_SLTU, 24, rx, ry);
+            gen_slt(env, ctx, OPC_SLTU, 24, rx, ry);
             break;
         case RR_BREAK:
             generate_exception(ctx, EXCP_BREAK);
@@ -9194,22 +9586,22 @@
             break;
 #endif
         case RR_CMP:
-            gen_logic(env, OPC_XOR, 24, rx, ry);
+            gen_logic(env, ctx, OPC_XOR, 24, rx, ry);
             break;
         case RR_NEG:
             gen_arith(env, ctx, OPC_SUBU, rx, 0, ry);
             break;
         case RR_AND:
-            gen_logic(env, OPC_AND, rx, rx, ry);
+            gen_logic(env, ctx, OPC_AND, rx, rx, ry);
             break;
         case RR_OR:
-            gen_logic(env, OPC_OR, rx, rx, ry);
+            gen_logic(env, ctx, OPC_OR, rx, rx, ry);
             break;
         case RR_XOR:
-            gen_logic(env, OPC_XOR, rx, rx, ry);
+            gen_logic(env, ctx, OPC_XOR, rx, rx, ry);
             break;
         case RR_NOT:
-            gen_logic(env, OPC_NOR, rx, ry, 0);
+            gen_logic(env, ctx, OPC_NOR, rx, ry, 0);
             break;
         case RR_MFHI:
             gen_HILO(ctx, OPC_MFHI, rx);
@@ -9831,12 +10223,13 @@
     int rs = mmreg(uMIPS_RS(ctx->opcode));
     int encoded = ZIMM(ctx->opcode, 0, 4);
 
-    gen_logic_imm(env, OPC_ANDI, rd, rs, decoded_imm[encoded]);
+    gen_logic_imm(env, ctx, OPC_ANDI, rd, rs, decoded_imm[encoded]);
 }
 
 static void gen_ldst_multiple (DisasContext *ctx, uint32_t opc, int reglist,
                                int base, int16_t offset)
 {
+    const char *opn = "ldst_multiple";
     TCGv t0, t1;
     TCGv_i32 t2;
 
@@ -9855,20 +10248,25 @@
     save_cpu_state(ctx, 1);
     switch (opc) {
     case LWM32:
-        gen_helper_lwm(t0, t1, t2);
+        gen_helper_lwm(cpu_env, t0, t1, t2);
+        opn = "lwm";
         break;
     case SWM32:
-        gen_helper_swm(t0, t1, t2);
+        gen_helper_swm(cpu_env, t0, t1, t2);
+        opn = "swm";
         break;
 #ifdef TARGET_MIPS64
     case LDM:
-        gen_helper_ldm(t0, t1, t2);
+        gen_helper_ldm(cpu_env, t0, t1, t2);
+        opn = "ldm";
         break;
     case SDM:
-        gen_helper_sdm(t0, t1, t2);
+        gen_helper_sdm(cpu_env, t0, t1, t2);
+        opn = "sdm";
         break;
 #endif
     }
+    (void)opn;
     MIPS_DEBUG("%s, %x, %d(%s)", opn, reglist, offset, regnames[base]);
     tcg_temp_free(t0);
     tcg_temp_free(t1);
@@ -9887,25 +10285,25 @@
     case NOT16 + 1:
     case NOT16 + 2:
     case NOT16 + 3:
-        gen_logic(env, OPC_NOR, rd, rs, 0);
+        gen_logic(env, ctx, OPC_NOR, rd, rs, 0);
         break;
     case XOR16 + 0:
     case XOR16 + 1:
     case XOR16 + 2:
     case XOR16 + 3:
-        gen_logic(env, OPC_XOR, rd, rd, rs);
+        gen_logic(env, ctx, OPC_XOR, rd, rd, rs);
         break;
     case AND16 + 0:
     case AND16 + 1:
     case AND16 + 2:
     case AND16 + 3:
-        gen_logic(env, OPC_AND, rd, rd, rs);
+        gen_logic(env, ctx, OPC_AND, rd, rd, rs);
         break;
     case OR16 + 0:
     case OR16 + 1:
     case OR16 + 2:
     case OR16 + 3:
-        gen_logic(env, OPC_OR, rd, rd, rs);
+        gen_logic(env, ctx, OPC_OR, rd, rd, rs);
         break;
     case LWM16 + 0:
     case LWM16 + 1:
@@ -10287,7 +10685,7 @@
                 TCGv t0 = tcg_temp_new();
 
                 save_cpu_state(ctx, 1);
-                gen_helper_di(t0);
+                gen_helper_di(t0, cpu_env);
                 gen_store_gpr(t0, rs);
                 /* Stop translation as we may have switched the execution mode */
                 ctx->bstate = BS_STOP;
@@ -10300,7 +10698,7 @@
                 TCGv t0 = tcg_temp_new();
 
                 save_cpu_state(ctx, 1);
-                gen_helper_ei(t0);
+                gen_helper_ei(t0, cpu_env);
                 gen_store_gpr(t0, rs);
                 /* Stop translation as we may have switched the execution mode */
                 ctx->bstate = BS_STOP;
@@ -10635,7 +11033,7 @@
     uint32_t op, minor, mips32_op;
     uint32_t cond, fmt, cc;
 
-    insn = lduw_code(ctx->pc + 2);
+    insn = cpu_lduw_code(env, ctx->pc + 2);
     ctx->opcode = (ctx->opcode << 16) | insn;
 
     rt = (ctx->opcode >> 21) & 0x1f;
@@ -10719,7 +11117,7 @@
             case XOR32:
                 mips32_op = OPC_XOR;
             do_logic:
-                gen_logic(env, mips32_op, rd, rs, rt);
+                gen_logic(env, ctx, mips32_op, rd, rs, rt);
                 break;
                 /* Set less than */
             case SLT:
@@ -10728,7 +11126,7 @@
             case SLTU:
                 mips32_op = OPC_SLTU;
             do_slt:
-                gen_slt(env, mips32_op, rd, rs, rt);
+                gen_slt(env, ctx, mips32_op, rd, rs, rt);
                 break;
             default:
                 goto pool32a_invalid;
@@ -10744,7 +11142,7 @@
             case MOVZ:
                 mips32_op = OPC_MOVZ;
             do_cmov:
-                gen_cond_move(env, mips32_op, rd, rs, rt);
+                gen_cond_move(env, ctx, mips32_op, rd, rs, rt);
                 break;
             case LWXS:
                 gen_ldxs(ctx, rs, rt, rd);
@@ -11157,7 +11555,7 @@
                target. */
             break;
         case LUI:
-            gen_logic_imm(env, OPC_LUI, rs, -1, imm);
+            gen_logic_imm(env, ctx, OPC_LUI, rs, -1, imm);
             break;
         case SYNCI:
             break;
@@ -11276,7 +11674,7 @@
     case ANDI32:
         mips32_op = OPC_ANDI;
     do_logici:
-        gen_logic_imm(env, mips32_op, rt, rs, imm);
+        gen_logic_imm(env, ctx, mips32_op, rt, rs, imm);
         break;
 
         /* Set less than immediate */
@@ -11286,7 +11684,7 @@
     case SLTIU32:
         mips32_op = OPC_SLTIU;
     do_slti:
-        gen_slt_imm(env, mips32_op, rt, rs, imm);
+        gen_slt_imm(env, ctx, mips32_op, rt, rs, imm);
         break;
     case JALX32:
         offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
@@ -11726,8 +12124,9 @@
         gen_set_label(l1);
     }
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(ctx->pc);
+    }
 
     op = MASK_OP_MAJOR(ctx->opcode);
     rs = (ctx->opcode >> 21) & 0x1f;
@@ -11763,7 +12162,7 @@
         case OPC_MOVZ:
             check_insn(env, ctx, ISA_MIPS4 | ISA_MIPS32 |
                                  INSN_LOONGSON2E | INSN_LOONGSON2F);
-            gen_cond_move(env, op1, rd, rs, rt);
+            gen_cond_move(env, ctx, op1, rd, rs, rt);
             break;
         case OPC_ADD ... OPC_SUBU:
             gen_arith(env, ctx, op1, rd, rs, rt);
@@ -11790,13 +12189,13 @@
             break;
         case OPC_SLT:          /* Set on less than */
         case OPC_SLTU:
-            gen_slt(env, op1, rd, rs, rt);
+            gen_slt(env, ctx, op1, rd, rs, rt);
             break;
         case OPC_AND:          /* Logic*/
         case OPC_OR:
         case OPC_NOR:
         case OPC_XOR:
-            gen_logic(env, op1, rd, rs, rt);
+            gen_logic(env, ctx, op1, rd, rs, rt);
             break;
         case OPC_MULT ... OPC_DIVU:
             if (sa) {
@@ -11827,7 +12226,7 @@
             MIPS_INVAL("PMON / selsl");
             generate_exception(ctx, EXCP_RI);
 #else
-            gen_helper_0i(pmon, sa);
+            gen_helper_0e0i(pmon, sa);
 #endif
             break;
         case OPC_SYSCALL:
@@ -12045,7 +12444,7 @@
 
                 save_cpu_state(ctx, 1);
                 gen_load_gpr(t0, rs);
-                gen_helper_yield(t0, t0);
+                gen_helper_yield(t0, cpu_env, t0);
                 gen_store_gpr(t0, rd);
                 tcg_temp_free(t0);
             }
@@ -12144,18 +12543,18 @@
                     break;
                 case OPC_DVPE:
                     check_insn(env, ctx, ASE_MT);
-                    gen_helper_dvpe(t0);
+                    gen_helper_dvpe(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_EVPE:
                     check_insn(env, ctx, ASE_MT);
-                    gen_helper_evpe(t0);
+                    gen_helper_evpe(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_DI:
                     check_insn(env, ctx, ISA_MIPS32R2);
                     save_cpu_state(ctx, 1);
-                    gen_helper_di(t0);
+                    gen_helper_di(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     /* Stop translation as we may have switched the execution mode */
                     ctx->bstate = BS_STOP;
@@ -12163,7 +12562,7 @@
                 case OPC_EI:
                     check_insn(env, ctx, ISA_MIPS32R2);
                     save_cpu_state(ctx, 1);
-                    gen_helper_ei(t0);
+                    gen_helper_ei(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     /* Stop translation as we may have switched the execution mode */
                     ctx->bstate = BS_STOP;
@@ -12197,13 +12596,13 @@
          break;
     case OPC_SLTI: /* Set on less than with immediate opcode */
     case OPC_SLTIU:
-         gen_slt_imm(env, op, rt, rs, imm);
+         gen_slt_imm(env, ctx, op, rt, rs, imm);
          break;
     case OPC_ANDI: /* Arithmetic with immediate opcode */
     case OPC_LUI:
     case OPC_ORI:
     case OPC_XORI:
-         gen_logic_imm(env, op, rt, rs, imm);
+         gen_logic_imm(env, ctx, op, rt, rs, imm);
          break;
     case OPC_J ... OPC_JAL: /* Jump */
          offset = (int32_t)(ctx->opcode & 0x3FFFFFF) << 2;
@@ -12298,10 +12697,14 @@
     case OPC_LDC2:
     case OPC_SWC2:
     case OPC_SDC2:
-    case OPC_CP2:
         /* COP2: Not implemented. */
         generate_exception_err(ctx, EXCP_CpU, 2);
         break;
+    case OPC_CP2:
+        check_insn(env, ctx, INSN_LOONGSON2F);
+        /* Note that these instructions use different fields.  */
+        gen_loongson_multimedia(ctx, sa, rd, rt);
+        break;
 
     case OPC_CP3:
         if (env->CP0_Config1 & (1 << CP0C1_FP)) {
@@ -12432,7 +12835,7 @@
                 if (bp->pc == ctx.pc) {
                     save_cpu_state(&ctx, 1);
                     ctx.bstate = BS_BRANCH;
-                    gen_helper_0i(raise_exception, EXCP_DEBUG);
+                    gen_helper_0e0i(raise_exception, EXCP_DEBUG);
                     /* Include the breakpoint location or the tb won't
                      * be flushed when it must be.  */
                     ctx.pc += 4;
@@ -12458,14 +12861,14 @@
 
         is_branch = 0;
         if (!(ctx.hflags & MIPS_HFLAG_M16)) {
-            ctx.opcode = ldl_code(ctx.pc);
+            ctx.opcode = cpu_ldl_code(env, ctx.pc);
             insn_bytes = 4;
             decode_opc(env, &ctx, &is_branch);
         } else if (env->insn_flags & ASE_MICROMIPS) {
-            ctx.opcode = lduw_code(ctx.pc);
+            ctx.opcode = cpu_lduw_code(env, ctx.pc);
             insn_bytes = decode_micromips_opc(env, &ctx, &is_branch);
         } else if (env->insn_flags & ASE_MIPS16) {
-            ctx.opcode = lduw_code(ctx.pc);
+            ctx.opcode = cpu_lduw_code(env, ctx.pc);
             insn_bytes = decode_mips16_opc(env, &ctx, &is_branch);
         } else {
             generate_exception(&ctx, EXCP_RI);
@@ -12502,7 +12905,7 @@
         gen_io_end();
     if (env->singlestep_enabled && ctx.bstate != BS_BRANCH) {
         save_cpu_state(&ctx, ctx.bstate == BS_NONE);
-        gen_helper_0i(raise_exception, EXCP_DEBUG);
+        gen_helper_0e0i(raise_exception, EXCP_DEBUG);
     } else {
         switch (ctx.bstate) {
         case BS_STOP:
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 325ba09..e2cad3a 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -1715,7 +1715,7 @@
             gen_opc_icount[k] = num_insns;
         }
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(dc->pc);
         }
 
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index ca2fc21..faf4404 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -1079,7 +1079,6 @@
     int mmu_idx;         /* precomputed MMU index to speed up mem accesses */
 
     /* Power management */
-    int power_mode;
     int (*check_pow)(CPUPPCState *env);
 
 #if !defined(CONFIG_USER_ONLY)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index f638b2a..f39b4f6 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -287,23 +287,6 @@
     for (index = ARRAY_SIZE(r->element)-1; index >= 0; index--)
 #endif
 
-/* If X is a NaN, store the corresponding QNaN into RESULT.  Otherwise,
- * execute the following block.  */
-#define DO_HANDLE_NAN(result, x)                        \
-    if (float32_is_any_nan(x)) {                        \
-        CPU_FloatU __f;                                 \
-        __f.f = x;                                      \
-        __f.l = __f.l | (1 << 22);  /* Set QNaN bit. */ \
-        result = __f.f;                                 \
-    } else
-
-#define HANDLE_NAN1(result, x)                  \
-    DO_HANDLE_NAN(result, x)
-#define HANDLE_NAN2(result, x, y)                       \
-    DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y)
-#define HANDLE_NAN3(result, x, y, z)                                    \
-    DO_HANDLE_NAN(result, x) DO_HANDLE_NAN(result, y) DO_HANDLE_NAN(result, z)
-
 /* Saturating arithmetic helpers.  */
 #define SATCVT(from, to, from_type, to_type, min, max)          \
     static inline to_type cvt##from##to(from_type x, int *sat)  \
@@ -409,15 +392,29 @@
         int i;                                                          \
                                                                         \
         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
-            HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) {                    \
-                r->f[i] = func(a->f[i], b->f[i], &env->vec_status);     \
-            }                                                           \
+            r->f[i] = func(a->f[i], b->f[i], &env->vec_status);         \
         }                                                               \
     }
 VARITHFP(addfp, float32_add)
 VARITHFP(subfp, float32_sub)
+VARITHFP(minfp, float32_min)
+VARITHFP(maxfp, float32_max)
 #undef VARITHFP
 
+#define VARITHFPFMA(suffix, type)                                       \
+    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
+                           ppc_avr_t *b, ppc_avr_t *c)                  \
+    {                                                                   \
+        int i;                                                          \
+        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
+            r->f[i] = float32_muladd(a->f[i], c->f[i], b->f[i],         \
+                                     type, &env->vec_status);           \
+        }                                                               \
+    }
+VARITHFPFMA(maddfp, 0);
+VARITHFPFMA(nmsubfp, float_muladd_negate_result | float_muladd_negate_c);
+#undef VARITHFPFMA
+
 #define VARITHSAT_CASE(type, op, cvt, element)                          \
     {                                                                   \
         type result = (type)a->element[i] op (type)b->element[i];       \
@@ -649,27 +646,6 @@
 VCT(sxs, cvtsdsw, s32)
 #undef VCT
 
-void helper_vmaddfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
-                    ppc_avr_t *c)
-{
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
-            /* Need to do the computation in higher precision and round
-             * once at the end.  */
-            float64 af, bf, cf, t;
-
-            af = float32_to_float64(a->f[i], &env->vec_status);
-            bf = float32_to_float64(b->f[i], &env->vec_status);
-            cf = float32_to_float64(c->f[i], &env->vec_status);
-            t = float64_mul(af, cf, &env->vec_status);
-            t = float64_add(t, bf, &env->vec_status);
-            r->f[i] = float64_to_float32(t, &env->vec_status);
-        }
-    }
-}
-
 void helper_vmhaddshs(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
                       ppc_avr_t *b, ppc_avr_t *c)
 {
@@ -730,27 +706,6 @@
 #undef VMINMAX_DO
 #undef VMINMAX
 
-#define VMINMAXFP(suffix, rT, rF)                                       \
-    void helper_v##suffix(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, \
-                          ppc_avr_t *b)                                 \
-    {                                                                   \
-        int i;                                                          \
-                                                                        \
-        for (i = 0; i < ARRAY_SIZE(r->f); i++) {                        \
-            HANDLE_NAN2(r->f[i], a->f[i], b->f[i]) {                    \
-                if (float32_lt_quiet(a->f[i], b->f[i],                  \
-                                     &env->vec_status)) {               \
-                    r->f[i] = rT->f[i];                                 \
-                } else {                                                \
-                    r->f[i] = rF->f[i];                                 \
-                }                                                       \
-            }                                                           \
-        }                                                               \
-    }
-VMINMAXFP(minfp, a, b)
-VMINMAXFP(maxfp, b, a)
-#undef VMINMAXFP
-
 void helper_vmladduhm(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, ppc_avr_t *c)
 {
     int i;
@@ -930,28 +885,6 @@
 #undef VMUL_DO
 #undef VMUL
 
-void helper_vnmsubfp(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a,
-                     ppc_avr_t *b, ppc_avr_t *c)
-{
-    int i;
-
-    for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN3(r->f[i], a->f[i], b->f[i], c->f[i]) {
-            /* Need to do the computation is higher precision and round
-             * once at the end.  */
-            float64 af, bf, cf, t;
-
-            af = float32_to_float64(a->f[i], &env->vec_status);
-            bf = float32_to_float64(b->f[i], &env->vec_status);
-            cf = float32_to_float64(c->f[i], &env->vec_status);
-            t = float64_mul(af, cf, &env->vec_status);
-            t = float64_sub(t, bf, &env->vec_status);
-            t = float64_chs(t);
-            r->f[i] = float64_to_float32(t, &env->vec_status);
-        }
-    }
-}
-
 void helper_vperm(CPUPPCState *env, ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b,
                   ppc_avr_t *c)
 {
@@ -1039,9 +972,7 @@
     int i;
 
     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
-        }
+        r->f[i] = float32_div(float32_one, b->f[i], &env->vec_status);
     }
 }
 
@@ -1054,9 +985,7 @@
                                                                 \
         set_float_rounding_mode(rounding, &s);                  \
         for (i = 0; i < ARRAY_SIZE(r->f); i++) {                \
-            HANDLE_NAN1(r->f[i], b->f[i]) {                     \
-                r->f[i] = float32_round_to_int (b->f[i], &s);   \
-            }                                                   \
+            r->f[i] = float32_round_to_int (b->f[i], &s);       \
         }                                                       \
     }
 VRFI(n, float_round_nearest_even)
@@ -1089,11 +1018,9 @@
     int i;
 
     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            float32 t = float32_sqrt(b->f[i], &env->vec_status);
+        float32 t = float32_sqrt(b->f[i], &env->vec_status);
 
-            r->f[i] = float32_div(float32_one, t, &env->vec_status);
-        }
+        r->f[i] = float32_div(float32_one, t, &env->vec_status);
     }
 }
 
@@ -1109,9 +1036,7 @@
     int i;
 
     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            r->f[i] = float32_exp2(b->f[i], &env->vec_status);
-        }
+        r->f[i] = float32_exp2(b->f[i], &env->vec_status);
     }
 }
 
@@ -1120,9 +1045,7 @@
     int i;
 
     for (i = 0; i < ARRAY_SIZE(r->f); i++) {
-        HANDLE_NAN1(r->f[i], b->f[i]) {
-            r->f[i] = float32_log2(b->f[i], &env->vec_status);
-        }
+        r->f[i] = float32_log2(b->f[i], &env->vec_status);
     }
 }
 
@@ -1473,10 +1396,6 @@
 #undef UPKHI
 #undef UPKLO
 
-#undef DO_HANDLE_NAN
-#undef HANDLE_NAN1
-#undef HANDLE_NAN2
-#undef HANDLE_NAN3
 #undef VECTOR_FOR_INORDER_I
 #undef HI_IDX
 #undef LO_IDX
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index a31d278..5cbe98a 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -60,6 +60,7 @@
 static int cap_ppc_smt;
 static int cap_ppc_rma;
 static int cap_spapr_tce;
+static int cap_hior;
 
 /* XXX We have a race condition where we actually have a level triggered
  *     interrupt, but the infrastructure can't expose that yet, so the guest
@@ -86,6 +87,7 @@
     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
+    cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 
     if (!cap_interrupt_level) {
         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
@@ -469,6 +471,54 @@
         env->tlb_dirty = false;
     }
 
+    if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
+        struct kvm_sregs sregs;
+
+        sregs.pvr = env->spr[SPR_PVR];
+
+        sregs.u.s.sdr1 = env->spr[SPR_SDR1];
+
+        /* Sync SLB */
+#ifdef TARGET_PPC64
+        for (i = 0; i < 64; i++) {
+            sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
+            sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
+        }
+#endif
+
+        /* Sync SRs */
+        for (i = 0; i < 16; i++) {
+            sregs.u.s.ppc32.sr[i] = env->sr[i];
+        }
+
+        /* Sync BATs */
+        for (i = 0; i < 8; i++) {
+            /* Beware. We have to swap upper and lower bits here */
+            sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
+                | env->DBAT[1][i];
+            sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
+                | env->IBAT[1][i];
+        }
+
+        ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
+        if (ret) {
+            return ret;
+        }
+    }
+
+    if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
+        uint64_t hior = env->spr[SPR_HIOR];
+        struct kvm_one_reg reg = {
+            .id = KVM_REG_PPC_HIOR,
+            .addr = (uintptr_t) &hior,
+        };
+
+        ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
+        if (ret) {
+            return ret;
+        }
+    }
+
     return ret;
 }
 
@@ -795,7 +845,7 @@
             break;
         }
         if (!strncmp(line, field, field_len)) {
-            strncpy(value, line, len);
+            pstrcpy(value, len, line);
             ret = 0;
             break;
         }
@@ -946,52 +996,14 @@
 void kvmppc_set_papr(CPUPPCState *env)
 {
     struct kvm_enable_cap cap = {};
-    struct kvm_one_reg reg = {};
-    struct kvm_sregs sregs = {};
     int ret;
-    uint64_t hior = env->spr[SPR_HIOR];
 
     cap.cap = KVM_CAP_PPC_PAPR;
     ret = kvm_vcpu_ioctl(env, KVM_ENABLE_CAP, &cap);
 
     if (ret) {
-        goto fail;
+        cpu_abort(env, "This KVM version does not support PAPR\n");
     }
-
-    /*
-     * XXX We set HIOR here. It really should be a qdev property of
-     *     the CPU node, but we don't have CPUs converted to qdev yet.
-     *
-     *     Once we have qdev CPUs, move HIOR to a qdev property and
-     *     remove this chunk.
-     */
-    reg.id = KVM_REG_PPC_HIOR;
-    reg.addr = (uintptr_t)&hior;
-    ret = kvm_vcpu_ioctl(env, KVM_SET_ONE_REG, &reg);
-    if (ret) {
-        fprintf(stderr, "Couldn't set HIOR. Maybe you're running an old \n"
-                        "kernel with support for HV KVM but no PAPR PR \n"
-                        "KVM in which case things will work. If they don't \n"
-                        "please update your host kernel!\n");
-    }
-
-    /* Set SDR1 so kernel space finds the HTAB */
-    ret = kvm_vcpu_ioctl(env, KVM_GET_SREGS, &sregs);
-    if (ret) {
-        goto fail;
-    }
-
-    sregs.u.s.sdr1 = env->spr[SPR_SDR1];
-
-    ret = kvm_vcpu_ioctl(env, KVM_SET_SREGS, &sregs);
-    if (ret) {
-        goto fail;
-    }
-
-    return;
-
-fail:
-    cpu_abort(env, "This KVM version does not support PAPR\n");
 }
 
 int kvmppc_smt_threads(void)
@@ -999,6 +1011,7 @@
     return cap_ppc_smt ? cap_ppc_smt : 1;
 }
 
+#ifdef TARGET_PPC64
 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
 {
     void *rma;
@@ -1042,6 +1055,16 @@
     return size;
 }
 
+uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
+{
+    if (cap_ppc_rma >= 2) {
+        return current_size;
+    }
+    return MIN(current_size,
+               getrampagesize() << (hash_shift - 7));
+}
+#endif
+
 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
 {
     struct kvm_create_spapr_tce args = {
@@ -1101,6 +1124,44 @@
     return 0;
 }
 
+int kvmppc_reset_htab(int shift_hint)
+{
+    uint32_t shift = shift_hint;
+
+    if (!kvm_enabled()) {
+        /* Full emulation, tell caller to allocate htab itself */
+        return 0;
+    }
+    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
+        int ret;
+        ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
+        if (ret == -ENOTTY) {
+            /* At least some versions of PR KVM advertise the
+             * capability, but don't implement the ioctl().  Oops.
+             * Return 0 so that we allocate the htab in qemu, as is
+             * correct for PR. */
+            return 0;
+        } else if (ret < 0) {
+            return ret;
+        }
+        return shift;
+    }
+
+    /* We have a kernel that predates the htab reset calls.  For PR
+     * KVM, we need to allocate the htab ourselves, for an HV KVM of
+     * this era, it has allocated a 16MB fixed size hash table
+     * already.  Kernels of this era have the GET_PVINFO capability
+     * only on PR, so we use this hack to determine the right
+     * answer */
+    if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
+        /* PR - tell caller to allocate htab */
+        return 0;
+    } else {
+        /* HV - assume 16MB kernel allocated htab */
+        return 24;
+    }
+}
+
 static inline uint32_t mfpvr(void)
 {
     uint32_t pvr;
diff --git a/target-ppc/kvm_ppc.h b/target-ppc/kvm_ppc.h
index e2f8703..baad6eb 100644
--- a/target-ppc/kvm_ppc.h
+++ b/target-ppc/kvm_ppc.h
@@ -27,6 +27,8 @@
 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem);
 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd);
 int kvmppc_remove_spapr_tce(void *table, int pfd, uint32_t window_size);
+int kvmppc_reset_htab(int shift_hint);
+uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift);
 #endif /* !CONFIG_USER_ONLY */
 const ppc_def_t *kvmppc_host_cpu_def(void);
 int kvmppc_fixup_cpu(CPUPPCState *env);
@@ -94,6 +96,23 @@
 {
     return -1;
 }
+
+static inline int kvmppc_reset_htab(int shift_hint)
+{
+    return -1;
+}
+
+static inline uint64_t kvmppc_rma_size(uint64_t current_size,
+                                       unsigned int hash_shift)
+{
+    return ram_size;
+}
+
+static inline int kvmppc_update_sdr1(CPUPPCState *env)
+{
+    return 0;
+}
+
 #endif /* !CONFIG_USER_ONLY */
 
 static inline const ppc_def_t *kvmppc_host_cpu_def(void)
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index d6c2ee4..21ce757 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -82,7 +82,7 @@
     qemu_put_betls(f, &env->hflags);
     qemu_put_betls(f, &env->hflags_nmsr);
     qemu_put_sbe32s(f, &env->mmu_idx);
-    qemu_put_sbe32s(f, &env->power_mode);
+    qemu_put_sbe32(f, 0);
 }
 
 int cpu_load(QEMUFile *f, void *opaque, int version_id)
@@ -167,7 +167,7 @@
     qemu_get_betls(f, &env->hflags);
     qemu_get_betls(f, &env->hflags_nmsr);
     qemu_get_sbe32s(f, &env->mmu_idx);
-    qemu_get_sbe32s(f, &env->power_mode);
+    qemu_get_sbe32(f); /* Discard unused power_mode */
 
     return 0;
 }
diff --git a/target-ppc/mmu_helper.c b/target-ppc/mmu_helper.c
index d2664ac..532b114 100644
--- a/target-ppc/mmu_helper.c
+++ b/target-ppc/mmu_helper.c
@@ -1032,12 +1032,10 @@
         return -1;
     }
     *raddrp = (tlb->RPN & mask) | (address & ~mask);
-#if (TARGET_PHYS_ADDR_BITS >= 36)
     if (ext) {
         /* Extend the physical address to 36 bits */
-        *raddrp |= (target_phys_addr_t)(tlb->RPN & 0xF) << 32;
+        *raddrp |= (uint64_t)(tlb->RPN & 0xF) << 32;
     }
-#endif
 
     return 0;
 }
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ac915cc..1042268 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9690,8 +9690,9 @@
         LOG_DISAS("translate opcode %08x (%02x %02x %02x) (%s)\n",
                     ctx.opcode, opc1(ctx.opcode), opc2(ctx.opcode),
                     opc3(ctx.opcode), little_endian ? "little" : "big");
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(ctx.nip);
+        }
         ctx.nip += 4;
         table = env->opcodes;
         num_insns++;
diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index fba2b42..a972287 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -10423,6 +10423,14 @@
     env->pending_interrupts = 0;
     env->exception_index = POWERPC_EXCP_NONE;
     env->error_code = 0;
+
+#if defined(TARGET_PPC64) && !defined(CONFIG_USER_ONLY)
+    env->vpa = 0;
+    env->slb_shadow = 0;
+    env->dispatch_trace_log = 0;
+    env->dtl_size = 0;
+#endif /* TARGET_PPC64 */
+
     /* Flush all TLBs */
     tlb_flush(env, 1);
 }
diff --git a/target-s390x/helper.c b/target-s390x/helper.c
index a5741ec..22256b0 100644
--- a/target-s390x/helper.c
+++ b/target-s390x/helper.c
@@ -511,7 +511,8 @@
         break;
     }
 
-    qemu_log("%s: code=0x%x ilc=%d\n", __func__, env->int_pgm_code, ilc);
+    qemu_log_mask(CPU_LOG_INT, "%s: code=0x%x ilc=%d\n",
+                  __func__, env->int_pgm_code, ilc);
 
     lowcore = cpu_physical_memory_map(env->psa, &len, 1);
 
@@ -575,8 +576,8 @@
 
 void do_interrupt(CPUS390XState *env)
 {
-    qemu_log("%s: %d at pc=%" PRIx64 "\n", __func__, env->exception_index,
-             env->psw.addr);
+    qemu_log_mask(CPU_LOG_INT, "%s: %d at pc=%" PRIx64 "\n",
+                  __func__, env->exception_index, env->psw.addr);
 
     s390_add_running_cpu(env);
     /* handle external interrupts */
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index 2938ac9..e9b3cae 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -53,7 +53,8 @@
 #ifndef CONFIG_USER_ONLY
 void program_interrupt(CPUS390XState *env, uint32_t code, int ilc)
 {
-    qemu_log("program interrupt at %#" PRIx64 "\n", env->psw.addr);
+    qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n",
+                  env->psw.addr);
 
     if (kvm_enabled()) {
 #ifdef CONFIG_KVM
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 66119cd..db464cc 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -79,6 +79,14 @@
 {
     int i;
 
+    if (env->cc_op > 3) {
+        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %15s\n",
+                    env->psw.mask, env->psw.addr, cc_name(env->cc_op));
+    } else {
+        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %02x\n",
+                    env->psw.mask, env->psw.addr, env->cc_op);
+    }
+
     for (i = 0; i < 16; i++) {
         cpu_fprintf(f, "R%02d=%016" PRIx64, i, env->regs[i]);
         if ((i % 4) == 3) {
@@ -97,8 +105,6 @@
         }
     }
 
-    cpu_fprintf(f, "\n");
-
 #ifndef CONFIG_USER_ONLY
     for (i = 0; i < 16; i++) {
         cpu_fprintf(f, "C%02d=%016" PRIx64, i, env->cregs[i]);
@@ -110,22 +116,14 @@
     }
 #endif
 
-    cpu_fprintf(f, "\n");
-
-    if (env->cc_op > 3) {
-        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %15s\n",
-                    env->psw.mask, env->psw.addr, cc_name(env->cc_op));
-    } else {
-        cpu_fprintf(f, "PSW=mask %016" PRIx64 " addr %016" PRIx64 " cc %02x\n",
-                    env->psw.mask, env->psw.addr, env->cc_op);
-    }
-
 #ifdef DEBUG_INLINE_BRANCHES
     for (i = 0; i < CC_OP_MAX; i++) {
         cpu_fprintf(f, "  %15s = %10ld\t%10ld\n", cc_name(i),
                     inline_branch_miss[i], inline_branch_hit[i]);
     }
 #endif
+
+    cpu_fprintf(f, "\n");
 }
 
 static TCGv_i64 psw_addr;
@@ -274,21 +272,21 @@
 #endif
 }
 
-static inline uint64_t ld_code2(uint64_t pc)
+static inline uint64_t ld_code2(CPUS390XState *env, uint64_t pc)
 {
-    return (uint64_t)cpu_lduw_code(cpu_single_env, pc);
+    return (uint64_t)cpu_lduw_code(env, pc);
 }
 
-static inline uint64_t ld_code4(uint64_t pc)
+static inline uint64_t ld_code4(CPUS390XState *env, uint64_t pc)
 {
-    return (uint64_t)cpu_ldl_code(cpu_single_env, pc);
+    return (uint64_t)cpu_ldl_code(env, pc);
 }
 
-static inline uint64_t ld_code6(uint64_t pc)
+static inline uint64_t ld_code6(CPUS390XState *env, uint64_t pc)
 {
     uint64_t opc;
-    opc = (uint64_t)cpu_lduw_code(cpu_single_env, pc) << 32;
-    opc |= (uint64_t)(uint32_t)cpu_ldl_code(cpu_single_env, pc + 2);
+    opc = (uint64_t)cpu_lduw_code(env, pc) << 32;
+    opc |= (uint64_t)(uint32_t)cpu_ldl_code(env, pc + 2);
     return opc;
 }
 
@@ -319,7 +317,7 @@
 
 #ifdef CONFIG_USER_ONLY
 
-static void gen_illegal_opcode(DisasContext *s, int ilc)
+static void gen_illegal_opcode(CPUS390XState *env, DisasContext *s, int ilc)
 {
     TCGv_i32 tmp = tcg_const_i32(EXCP_SPEC);
     update_psw_addr(s);
@@ -331,20 +329,20 @@
 
 #else /* CONFIG_USER_ONLY */
 
-static void debug_print_inst(DisasContext *s, int ilc)
+static void debug_print_inst(CPUS390XState *env, DisasContext *s, int ilc)
 {
 #ifdef DEBUG_ILLEGAL_INSTRUCTIONS
     uint64_t inst = 0;
 
     switch (ilc & 3) {
     case 1:
-        inst = ld_code2(s->pc);
+        inst = ld_code2(env, s->pc);
         break;
     case 2:
-        inst = ld_code4(s->pc);
+        inst = ld_code4(env, s->pc);
         break;
     case 3:
-        inst = ld_code6(s->pc);
+        inst = ld_code6(env, s->pc);
         break;
     }
 
@@ -353,11 +351,12 @@
 #endif
 }
 
-static void gen_program_exception(DisasContext *s, int ilc, int code)
+static void gen_program_exception(CPUS390XState *env, DisasContext *s, int ilc,
+                                  int code)
 {
     TCGv_i32 tmp;
 
-    debug_print_inst(s, ilc);
+    debug_print_inst(env, s, ilc);
 
     /* remember what pgm exeption this was */
     tmp = tcg_const_i32(code);
@@ -385,20 +384,21 @@
 }
 
 
-static void gen_illegal_opcode(DisasContext *s, int ilc)
+static void gen_illegal_opcode(CPUS390XState *env, DisasContext *s, int ilc)
 {
-    gen_program_exception(s, ilc, PGM_SPECIFICATION);
+    gen_program_exception(env, s, ilc, PGM_SPECIFICATION);
 }
 
-static void gen_privileged_exception(DisasContext *s, int ilc)
+static void gen_privileged_exception(CPUS390XState *env, DisasContext *s,
+                                     int ilc)
 {
-    gen_program_exception(s, ilc, PGM_PRIVILEGED);
+    gen_program_exception(env, s, ilc, PGM_PRIVILEGED);
 }
 
-static void check_privileged(DisasContext *s, int ilc)
+static void check_privileged(CPUS390XState *env, DisasContext *s, int ilc)
 {
     if (s->tb->flags & (PSW_MASK_PSTATE >> 32)) {
-        gen_privileged_exception(s, ilc);
+        gen_privileged_exception(env, s, ilc);
     }
 }
 
@@ -1460,7 +1460,8 @@
     set_cc_static(s);
 }
 
-static void disas_e3(DisasContext* s, int op, int r1, int x2, int b2, int d2)
+static void disas_e3(CPUS390XState *env, DisasContext* s, int op, int r1,
+                     int x2, int b2, int d2)
 {
     TCGv_i64 addr, tmp, tmp2, tmp3, tmp4;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -1925,14 +1926,14 @@
         break;
     default:
         LOG_DISAS("illegal e3 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 3);
+        gen_illegal_opcode(env, s, 3);
         break;
     }
     tcg_temp_free_i64(addr);
 }
 
 #ifndef CONFIG_USER_ONLY
-static void disas_e5(DisasContext* s, uint64_t insn)
+static void disas_e5(CPUS390XState *env, DisasContext* s, uint64_t insn)
 {
     TCGv_i64 tmp, tmp2;
     int op = (insn >> 32) & 0xff;
@@ -1950,7 +1951,7 @@
         break;
     default:
         LOG_DISAS("illegal e5 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 3);
+        gen_illegal_opcode(env, s, 3);
         break;
     }
 
@@ -1959,7 +1960,8 @@
 }
 #endif
 
-static void disas_eb(DisasContext *s, int op, int r1, int r3, int b2, int d2)
+static void disas_eb(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int r3, int b2, int d2)
 {
     TCGv_i64 tmp, tmp2, tmp3, tmp4;
     TCGv_i32 tmp32_1, tmp32_2;
@@ -2102,7 +2104,7 @@
 #ifndef CONFIG_USER_ONLY
     case 0x2f: /* LCTLG     R1,R3,D2(B2)     [RSE] */
         /* Load Control */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
         tmp32_2 = tcg_const_i32(r3);
@@ -2114,7 +2116,7 @@
         break;
     case 0x25: /* STCTG     R1,R3,D2(B2)     [RSE] */
         /* Store Control */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
         tmp32_2 = tcg_const_i32(r3);
@@ -2191,13 +2193,13 @@
         break;
     default:
         LOG_DISAS("illegal eb operation 0x%x\n", op);
-        gen_illegal_opcode(s, ilc);
+        gen_illegal_opcode(env, s, ilc);
         break;
     }
 }
 
-static void disas_ed(DisasContext *s, int op, int r1, int x2, int b2, int d2,
-                     int r1b)
+static void disas_ed(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int x2, int b2, int d2, int r1b)
 {
     TCGv_i32 tmp_r1, tmp32;
     TCGv_i64 addr, tmp;
@@ -2311,14 +2313,15 @@
         break;
     default:
         LOG_DISAS("illegal ed operation 0x%x\n", op);
-        gen_illegal_opcode(s, 3);
+        gen_illegal_opcode(env, s, 3);
         return;
     }
     tcg_temp_free_i32(tmp_r1);
     tcg_temp_free_i64(addr);
 }
 
-static void disas_a5(DisasContext *s, int op, int r1, int i2)
+static void disas_a5(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int i2)
 {
     TCGv_i64 tmp, tmp2;
     TCGv_i32 tmp32;
@@ -2467,12 +2470,13 @@
         break;
     default:
         LOG_DISAS("illegal a5 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 2);
+        gen_illegal_opcode(env, s, 2);
         return;
     }
 }
 
-static void disas_a7(DisasContext *s, int op, int r1, int i2)
+static void disas_a7(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int i2)
 {
     TCGv_i64 tmp, tmp2;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -2604,12 +2608,13 @@
         break;
     default:
         LOG_DISAS("illegal a7 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 2);
+        gen_illegal_opcode(env, s, 2);
         return;
     }
 }
 
-static void disas_b2(DisasContext *s, int op, uint32_t insn)
+static void disas_b2(CPUS390XState *env, DisasContext *s, int op,
+                     uint32_t insn)
 {
     TCGv_i64 tmp, tmp2, tmp3;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -2708,7 +2713,7 @@
 #ifndef CONFIG_USER_ONLY
     case 0x02: /* STIDP     D2(B2)     [S] */
         /* Store CPU ID */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2717,7 +2722,7 @@
         break;
     case 0x04: /* SCK       D2(B2)     [S] */
         /* Set Clock */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2736,7 +2741,7 @@
         break;
     case 0x06: /* SCKC     D2(B2)     [S] */
         /* Set Clock Comparator */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2745,7 +2750,7 @@
         break;
     case 0x07: /* STCKC    D2(B2)     [S] */
         /* Store Clock Comparator */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2754,7 +2759,7 @@
         break;
     case 0x08: /* SPT      D2(B2)     [S] */
         /* Set CPU Timer */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2763,7 +2768,7 @@
         break;
     case 0x09: /* STPT     D2(B2)     [S] */
         /* Store CPU Timer */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2772,7 +2777,7 @@
         break;
     case 0x0a: /* SPKA     D2(B2)     [S] */
         /* Set PSW Key from Address */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -2784,12 +2789,12 @@
         break;
     case 0x0d: /* PTLB                [S] */
         /* Purge TLB */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         gen_helper_ptlb(cpu_env);
         break;
     case 0x10: /* SPX      D2(B2)     [S] */
         /* Set Prefix Register */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2798,7 +2803,7 @@
         break;
     case 0x11: /* STPX     D2(B2)     [S] */
         /* Store Prefix */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -2809,7 +2814,7 @@
         break;
     case 0x12: /* STAP     D2(B2)     [S] */
         /* Store CPU Address */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -2823,7 +2828,7 @@
         break;
     case 0x21: /* IPTE     R1,R2      [RRE] */
         /* Invalidate PTE */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp = load_reg(r1);
@@ -2834,7 +2839,7 @@
         break;
     case 0x29: /* ISKE     R1,R2      [RRE] */
         /* Insert Storage Key Extended */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp = load_reg(r2);
@@ -2846,7 +2851,7 @@
         break;
     case 0x2a: /* RRBE     R1,R2      [RRE] */
         /* Set Storage Key Extended */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp32_1 = load_reg32(r1);
@@ -2858,7 +2863,7 @@
         break;
     case 0x2b: /* SSKE     R1,R2      [RRE] */
         /* Set Storage Key Extended */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp32_1 = load_reg32(r1);
@@ -2869,12 +2874,12 @@
         break;
     case 0x34: /* STCH ? */
         /* Store Subchannel */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         gen_op_movi_cc(s, 3);
         break;
     case 0x46: /* STURA    R1,R2      [RRE] */
         /* Store Using Real Address */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp32_1 = load_reg32(r1);
@@ -2886,7 +2891,7 @@
         break;
     case 0x50: /* CSP      R1,R2      [RRE] */
         /* Compare And Swap And Purge */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp32_1 = tcg_const_i32(r1);
@@ -2898,7 +2903,7 @@
         break;
     case 0x5f: /* CHSC ? */
         /* Channel Subsystem Call */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         gen_op_movi_cc(s, 3);
         break;
     case 0x78: /* STCKE    D2(B2)     [S] */
@@ -2912,7 +2917,7 @@
         break;
     case 0x79: /* SACF    D2(B2)     [S] */
         /* Store Clock Extended */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2924,7 +2929,7 @@
         s->is_jmp = DISAS_EXCP;
         break;
     case 0x7d: /* STSI     D2,(B2)     [S] */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = load_reg32(0);
@@ -2950,7 +2955,7 @@
         break;
     case 0xb1: /* STFL     D2(B2)     [S] */
         /* Store Facility List (CPU features) at 200 */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         tmp2 = tcg_const_i64(0xc0000000);
         tmp = tcg_const_i64(200);
         tcg_gen_qemu_st32(tmp2, tmp, get_mem_index(s));
@@ -2959,7 +2964,7 @@
         break;
     case 0xb2: /* LPSWE    D2(B2)     [S] */
         /* Load PSW Extended */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -2976,7 +2981,7 @@
         break;
     case 0x20: /* SERVC     R1,R2     [RRE] */
         /* SCLP Service call (PV hypercall) */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         potential_page_fault(s);
         tmp32_1 = load_reg32(r2);
         tmp = load_reg(r1);
@@ -2988,12 +2993,13 @@
 #endif
     default:
         LOG_DISAS("illegal b2 operation 0x%x\n", op);
-        gen_illegal_opcode(s, ilc);
+        gen_illegal_opcode(env, s, ilc);
         break;
     }
 }
 
-static void disas_b3(DisasContext *s, int op, int m3, int r1, int r2)
+static void disas_b3(CPUS390XState *env, DisasContext *s, int op, int m3,
+                     int r1, int r2)
 {
     TCGv_i64 tmp;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -3263,7 +3269,7 @@
         break;
     default:
         LOG_DISAS("illegal b3 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 2);
+        gen_illegal_opcode(env, s, 2);
         break;
     }
 
@@ -3271,7 +3277,8 @@
 #undef FP_HELPER
 }
 
-static void disas_b9(DisasContext *s, int op, int r1, int r2)
+static void disas_b9(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int r2)
 {
     TCGv_i64 tmp, tmp2, tmp3;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -3654,12 +3661,12 @@
         break;
     default:
         LOG_DISAS("illegal b9 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 2);
+        gen_illegal_opcode(env, s, 2);
         break;
     }
 }
 
-static void disas_c0(DisasContext *s, int op, int r1, int i2)
+static void disas_c0(CPUS390XState *env, DisasContext *s, int op, int r1, int i2)
 {
     TCGv_i64 tmp;
     TCGv_i32 tmp32_1, tmp32_2;
@@ -3755,12 +3762,13 @@
         break;
     default:
         LOG_DISAS("illegal c0 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 3);
+        gen_illegal_opcode(env, s, 3);
         break;
     }
 }
 
-static void disas_c2(DisasContext *s, int op, int r1, int i2)
+static void disas_c2(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int i2)
 {
     TCGv_i64 tmp, tmp2, tmp3;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -3832,7 +3840,7 @@
         break;
     default:
         LOG_DISAS("illegal c2 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 3);
+        gen_illegal_opcode(env, s, 3);
         break;
     }
 }
@@ -3854,7 +3862,7 @@
     }
 }
 
-static void disas_s390_insn(DisasContext *s)
+static void disas_s390_insn(CPUS390XState *env, DisasContext *s)
 {
     TCGv_i64 tmp, tmp2, tmp3, tmp4;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3, tmp32_4;
@@ -3865,7 +3873,7 @@
     int ilc;
     int l1;
 
-    opc = cpu_ldub_code(cpu_single_env, s->pc);
+    opc = cpu_ldub_code(env, s->pc);
     LOG_DISAS("opc 0x%x\n", opc);
 
     ilc = get_ilc(opc);
@@ -3873,12 +3881,12 @@
     switch (opc) {
 #ifndef CONFIG_USER_ONLY
     case 0x01: /* SAM */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         /* set addressing mode, but we only do 64bit anyways */
         break;
 #endif
     case 0x6: /* BCTR     R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r1);
         tcg_gen_subi_i32(tmp32_1, tmp32_1, 1);
@@ -3904,7 +3912,7 @@
         }
         break;
     case 0x7: /* BCR    M1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         if (r2) {
             tmp = load_reg(r2);
@@ -3916,7 +3924,7 @@
         }
         break;
     case 0xa: /* SVC    I         [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         debug_insn(insn);
         i = insn & 0xff;
         update_psw_addr(s);
@@ -3933,7 +3941,7 @@
         tcg_temp_free_i32(tmp32_3);
         break;
     case 0xd: /* BASR   R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp = tcg_const_i64(pc_to_link_info(s, s->pc + 2));
         store_reg(r1, tmp);
@@ -3946,7 +3954,7 @@
         tcg_temp_free_i64(tmp);
         break;
     case 0xe: /* MVCL   R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = tcg_const_i32(r1);
         tmp32_2 = tcg_const_i32(r2);
@@ -3957,7 +3965,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0x10: /* LPR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r2);
         set_cc_abs32(s, tmp32_1);
@@ -3966,7 +3974,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x11: /* LNR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r2);
         set_cc_nabs32(s, tmp32_1);
@@ -3975,7 +3983,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x12: /* LTR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r2);
         if (r1 != r2) {
@@ -3985,7 +3993,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x13: /* LCR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r2);
         tcg_gen_neg_i32(tmp32_1, tmp32_1);
@@ -3996,7 +4004,7 @@
     case 0x14: /* NR     R1,R2     [RR] */
     case 0x16: /* OR     R1,R2     [RR] */
     case 0x17: /* XR     R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_2 = load_reg32(r2);
         tmp32_1 = load_reg32(r1);
@@ -4007,7 +4015,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0x18: /* LR     R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r2);
         store_reg32(r1, tmp32_1);
@@ -4015,7 +4023,7 @@
         break;
     case 0x15: /* CLR    R1,R2     [RR] */
     case 0x19: /* CR     R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = load_reg32(r2);
@@ -4029,7 +4037,7 @@
         break;
     case 0x1a: /* AR     R1,R2     [RR] */
     case 0x1e: /* ALR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = load_reg32(r2);
@@ -4047,7 +4055,7 @@
         break;
     case 0x1b: /* SR     R1,R2     [RR] */
     case 0x1f: /* SLR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = load_reg32(r2);
@@ -4065,7 +4073,7 @@
         break;
     case 0x1c: /* MR     R1,R2     [RR] */
         /* reg(r1, r1+1) = reg(r1+1) * reg(r2) */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp2 = load_reg(r2);
         tmp3 = load_reg((r1 + 1) & 15);
@@ -4079,7 +4087,7 @@
         tcg_temp_free_i64(tmp3);
         break;
     case 0x1d: /* DR     R1,R2               [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = load_reg32(r1 + 1);
@@ -4114,21 +4122,21 @@
         tcg_temp_free_i64(tmp3);
         break;
     case 0x28: /* LDR    R1,R2               [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp = load_freg(r2);
         store_freg(r1, tmp);
         tcg_temp_free_i64(tmp);
         break;
     case 0x38: /* LER    R1,R2               [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_freg32(r2);
         store_freg32(r1, tmp32_1);
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x40: /* STH    R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = load_reg(r1);
         tcg_gen_qemu_st16(tmp2, tmp, get_mem_index(s));
@@ -4136,13 +4144,13 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x41:        /* la */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         store_reg(r1, tmp); /* FIXME: 31/24-bit addressing */
         tcg_temp_free_i64(tmp);
         break;
     case 0x42: /* STC    R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = load_reg(r1);
         tcg_gen_qemu_st8(tmp2, tmp, get_mem_index(s));
@@ -4150,7 +4158,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x43: /* IC     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld8u(tmp2, tmp, get_mem_index(s));
@@ -4159,7 +4167,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x44: /* EX     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = load_reg(r1);
         tmp3 = tcg_const_i64(s->pc + 4);
@@ -4172,7 +4180,7 @@
         tcg_temp_free_i64(tmp3);
         break;
     case 0x46: /* BCT    R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tcg_temp_free_i64(tmp);
 
@@ -4196,14 +4204,14 @@
         tcg_temp_free_i64(tmp);
         break;
     case 0x47: /* BC     M1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         gen_bcr(s, r1, tmp, s->pc + 4);
         tcg_temp_free_i64(tmp);
         s->is_jmp = DISAS_TB_JUMP;
         break;
     case 0x48: /* LH     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld16s(tmp2, tmp, get_mem_index(s));
@@ -4212,7 +4220,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x49: /* CH     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = tcg_temp_new_i32();
@@ -4228,7 +4236,7 @@
     case 0x4a: /* AH     R1,D2(X2,B2)     [RX] */
     case 0x4b: /* SH     R1,D2(X2,B2)     [RX] */
     case 0x4c: /* MH     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = load_reg32(r1);
@@ -4261,7 +4269,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x4d: /* BAS    R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_const_i64(pc_to_link_info(s, s->pc + 4));
         store_reg(r1, tmp2);
@@ -4271,7 +4279,7 @@
         s->is_jmp = DISAS_JUMP;
         break;
     case 0x4e: /* CVD    R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = tcg_temp_new_i32();
@@ -4283,7 +4291,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x50: /* st r1, d2(x2, b2) */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = load_reg(r1);
         tcg_gen_qemu_st32(tmp2, tmp, get_mem_index(s));
@@ -4291,7 +4299,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x55: /* CL     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = tcg_temp_new_i32();
@@ -4307,7 +4315,7 @@
     case 0x54: /* N      R1,D2(X2,B2)     [RX] */
     case 0x56: /* O      R1,D2(X2,B2)     [RX] */
     case 0x57: /* X      R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = load_reg32(r1);
@@ -4323,7 +4331,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0x58: /* l r1, d2(x2, b2) */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = tcg_temp_new_i32();
@@ -4335,7 +4343,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x59: /* C      R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = tcg_temp_new_i32();
@@ -4352,7 +4360,7 @@
     case 0x5b: /* S      R1,D2(X2,B2)     [RX] */
     case 0x5e: /* AL     R1,D2(X2,B2)     [RX] */
     case 0x5f: /* SL     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = tcg_temp_new_i32();
@@ -4395,7 +4403,7 @@
         break;
     case 0x5c: /* M      R1,D2(X2,B2)        [RX] */
         /* reg(r1, r1+1) = reg(r1+1) * *(s32*)addr */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld32s(tmp2, tmp, get_mem_index(s));
@@ -4411,7 +4419,7 @@
         tcg_temp_free_i64(tmp3);
         break;
     case 0x5d: /* D      R1,D2(X2,B2)        [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp3 = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = load_reg32(r1 + 1);
@@ -4445,7 +4453,7 @@
         tcg_temp_free_i64(tmp3);
         break;
     case 0x60: /* STD    R1,D2(X2,B2)        [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = load_freg(r1);
         tcg_gen_qemu_st64(tmp2, tmp, get_mem_index(s));
@@ -4453,7 +4461,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x68: /* LD    R1,D2(X2,B2)        [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld64(tmp2, tmp, get_mem_index(s));
@@ -4462,7 +4470,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x70: /* STE R1,D2(X2,B2) [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = load_freg32(r1);
@@ -4473,7 +4481,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x71: /* MS      R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = load_reg32(r1);
@@ -4488,7 +4496,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0x78: /* LE     R1,D2(X2,B2)        [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = tcg_temp_new_i32();
@@ -4502,8 +4510,8 @@
 #ifndef CONFIG_USER_ONLY
     case 0x80: /* SSM      D2(B2)       [S] */
         /* Set System Mask */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -4518,8 +4526,8 @@
         break;
     case 0x82: /* LPSW     D2(B2)       [S] */
         /* Load PSW */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -4536,9 +4544,9 @@
         break;
     case 0x83: /* DIAG     R1,R3,D2     [RS] */
         /* Diagnose call (KVM hypercall) */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         potential_page_fault(s);
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp32_1 = tcg_const_i32(insn & 0xfff);
         tmp2 = load_reg(2);
@@ -4553,7 +4561,7 @@
     case 0x88: /* SRL    R1,D2(B2)        [RS] */
     case 0x89: /* SLL    R1,D2(B2)        [RS] */
     case 0x8a: /* SRA    R1,D2(B2)        [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = load_reg32(r1);
@@ -4582,7 +4590,7 @@
     case 0x8c: /* SRDL   R1,D2(B2)        [RS] */
     case 0x8d: /* SLDL   R1,D2(B2)        [RS] */
     case 0x8e: /* SRDA   R1,D2(B2)        [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2); /* shift */
         tmp2 = tcg_temp_new_i64();
@@ -4611,7 +4619,7 @@
         break;
     case 0x98: /* LM     R1,R3,D2(B2)     [RS] */
     case 0x90: /* STM    R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
 
         tmp = get_address(s, 0, b2, d2);
@@ -4637,7 +4645,7 @@
         tcg_temp_free_i64(tmp4);
         break;
     case 0x91: /* TM     D1(B1),I2        [SI] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_si(s, insn, &i2, &b1, &d1);
         tmp2 = tcg_const_i64(i2);
         tcg_gen_qemu_ld8u(tmp, tmp, get_mem_index(s));
@@ -4646,7 +4654,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x92: /* MVI    D1(B1),I2        [SI] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_si(s, insn, &i2, &b1, &d1);
         tmp2 = tcg_const_i64(i2);
         tcg_gen_qemu_st8(tmp2, tmp, get_mem_index(s));
@@ -4656,7 +4664,7 @@
     case 0x94: /* NI     D1(B1),I2        [SI] */
     case 0x96: /* OI     D1(B1),I2        [SI] */
     case 0x97: /* XI     D1(B1),I2        [SI] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_si(s, insn, &i2, &b1, &d1);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld8u(tmp2, tmp, get_mem_index(s));
@@ -4679,7 +4687,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x95: /* CLI    D1(B1),I2        [SI] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_si(s, insn, &i2, &b1, &d1);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld8u(tmp2, tmp, get_mem_index(s));
@@ -4688,7 +4696,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x9a: /* LAM      R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4700,7 +4708,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0x9b: /* STAM     R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4712,21 +4720,21 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0xa5:
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         r1 = (insn >> 20) & 0xf;
         op = (insn >> 16) & 0xf;
         i2 = insn & 0xffff;
-        disas_a5(s, op, r1, i2);
+        disas_a5(env, s, op, r1, i2);
         break;
     case 0xa7:
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         r1 = (insn >> 20) & 0xf;
         op = (insn >> 16) & 0xf;
         i2 = (short)insn;
-        disas_a7(s, op, r1, i2);
+        disas_a7(env, s, op, r1, i2);
         break;
     case 0xa8: /* MVCLE   R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4739,7 +4747,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0xa9: /* CLCLE   R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4754,8 +4762,8 @@
 #ifndef CONFIG_USER_ONLY
     case 0xac: /* STNSM   D1(B1),I2     [SI] */
     case 0xad: /* STOSM   D1(B1),I2     [SI] */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_si(s, insn, &i2, &b1, &d1);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_shri_i64(tmp2, psw_mask, 56);
@@ -4770,8 +4778,8 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0xae: /* SIGP   R1,R3,D2(B2)     [RS] */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = load_reg(r3);
@@ -4784,8 +4792,8 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0xb1: /* LRA    R1,D2(X2, B2)     [RX] */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp32_1 = tcg_const_i32(r1);
         potential_page_fault(s);
@@ -4796,7 +4804,7 @@
         break;
 #endif
     case 0xb2:
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         op = (insn >> 16) & 0xff;
         switch (op) {
         case 0x9c: /* STFPC    D2(B2) [S] */
@@ -4813,23 +4821,23 @@
             tcg_temp_free_i64(tmp2);
             break;
         default:
-            disas_b2(s, op, insn);
+            disas_b2(env, s, op, insn);
             break;
         }
         break;
     case 0xb3:
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         op = (insn >> 16) & 0xff;
         r3 = (insn >> 12) & 0xf; /* aka m3 */
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
-        disas_b3(s, op, r3, r1, r2);
+        disas_b3(env, s, op, r3, r1, r2);
         break;
 #ifndef CONFIG_USER_ONLY
     case 0xb6: /* STCTL     R1,R3,D2(B2)     [RS] */
         /* Store Control */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4842,8 +4850,8 @@
         break;
     case 0xb7: /* LCTL      R1,R3,D2(B2)     [RS] */
         /* Load Control */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4856,14 +4864,14 @@
         break;
 #endif
     case 0xb9:
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         op = (insn >> 16) & 0xff;
-        disas_b9(s, op, r1, r2);
+        disas_b9(env, s, op, r1, r2);
         break;
     case 0xba: /* CS     R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4876,7 +4884,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0xbd: /* CLM    R1,M3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = load_reg32(r1);
@@ -4889,7 +4897,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0xbe: /* STCM R1,M3,D2(B2) [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = load_reg32(r1);
@@ -4901,7 +4909,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0xbf: /* ICM    R1,M3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         if (r3 == 15) {
             /* effectively a 32-bit load */
@@ -4956,16 +4964,16 @@
         break;
     case 0xc0:
     case 0xc2:
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         r1 = (insn >> 36) & 0xf;
         op = (insn >> 32) & 0xf;
         i2 = (int)insn;
         switch (opc) {
         case 0xc0:
-            disas_c0(s, op, r1, i2);
+            disas_c0(env, s, op, r1, i2);
             break;
         case 0xc2:
-            disas_c2(s, op, r1, i2);
+            disas_c2(env, s, op, r1, i2);
             break;
         default:
             tcg_abort();
@@ -4978,7 +4986,7 @@
     case 0xd7: /* XC     D1(L,B1),D2(B2)         [SS] */
     case 0xdc: /* TR     D1(L,B1),D2(B2)         [SS] */
     case 0xf3: /* UNPK   D1(L1,B1),D2(L2,B2)     [SS] */
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         vl = tcg_const_i32((insn >> 32) & 0xff);
         b1 = (insn >> 28) & 0xf;
         b2 = (insn >> 12) & 0xf;
@@ -5026,9 +5034,9 @@
 #ifndef CONFIG_USER_ONLY
     case 0xda: /* MVCP     D1(R1,B1),D2(B2),R3   [SS] */
     case 0xdb: /* MVCS     D1(R1,B1),D2(B2),R3   [SS] */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         potential_page_fault(s);
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         r1 = (insn >> 36) & 0xf;
         r3 = (insn >> 32) & 0xf;
         b1 = (insn >> 28) & 0xf;
@@ -5051,7 +5059,7 @@
         break;
 #endif
     case 0xe3:
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         debug_insn(insn);
         op = insn & 0xff;
         r1 = (insn >> 36) & 0xf;
@@ -5059,19 +5067,19 @@
         b2 = (insn >> 28) & 0xf;
         d2 = ((int)((((insn >> 16) & 0xfff)
            | ((insn << 4) & 0xff000)) << 12)) >> 12;
-        disas_e3(s, op,  r1, x2, b2, d2 );
+        disas_e3(env, s, op,  r1, x2, b2, d2 );
         break;
 #ifndef CONFIG_USER_ONLY
     case 0xe5:
         /* Test Protection */
-        check_privileged(s, ilc);
-        insn = ld_code6(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code6(env, s->pc);
         debug_insn(insn);
-        disas_e5(s, insn);
+        disas_e5(env, s, insn);
         break;
 #endif
     case 0xeb:
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         debug_insn(insn);
         op = insn & 0xff;
         r1 = (insn >> 36) & 0xf;
@@ -5079,10 +5087,10 @@
         b2 = (insn >> 28) & 0xf;
         d2 = ((int)((((insn >> 16) & 0xfff)
            | ((insn << 4) & 0xff000)) << 12)) >> 12;
-        disas_eb(s, op, r1, r3, b2, d2);
+        disas_eb(env, s, op, r1, r3, b2, d2);
         break;
     case 0xed:
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         debug_insn(insn);
         op = insn & 0xff;
         r1 = (insn >> 36) & 0xf;
@@ -5090,11 +5098,11 @@
         b2 = (insn >> 28) & 0xf;
         d2 = (short)((insn >> 16) & 0xfff);
         r1b = (insn >> 12) & 0xf;
-        disas_ed(s, op, r1, x2, b2, d2, r1b);
+        disas_ed(env, s, op, r1, x2, b2, d2, r1b);
         break;
     default:
         qemu_log_mask(LOG_UNIMP, "unimplemented opcode 0x%x\n", opc);
-        gen_illegal_opcode(s, ilc);
+        gen_illegal_opcode(env, s, ilc);
         break;
     }
 
@@ -5163,11 +5171,12 @@
         if (num_insns + 1 == max_insns && (tb->cflags & CF_LAST_IO)) {
             gen_io_start();
         }
-#if defined(S390X_DEBUG_DISAS_VERBOSE)
-        LOG_DISAS("pc " TARGET_FMT_lx "\n",
-                  dc.pc);
-#endif
-        disas_s390_insn(&dc);
+
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
+            tcg_gen_debug_insn_start(dc.pc);
+        }
+
+        disas_s390_insn(env, &dc);
 
         num_insns++;
         if (env->singlestep_enabled) {
@@ -5209,7 +5218,6 @@
         tb->icount = num_insns;
     }
 #if defined(S390X_DEBUG_DISAS)
-    log_cpu_state_mask(CPU_LOG_TB_CPU, env, 0);
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
         qemu_log("IN: %s\n", lookup_symbol(pc_start));
         log_target_disas(pc_start, dc.pc - pc_start, 1);
diff --git a/target-sh4/Makefile.objs b/target-sh4/Makefile.objs
index 2e0e093..ca20f21 100644
--- a/target-sh4/Makefile.objs
+++ b/target-sh4/Makefile.objs
@@ -1,4 +1,2 @@
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index 95e3c7c..6c1a47d 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -1,54 +1,50 @@
 #include "def-helper.h"
 
-DEF_HELPER_0(ldtlb, void)
-DEF_HELPER_0(raise_illegal_instruction, void)
-DEF_HELPER_0(raise_slot_illegal_instruction, void)
-DEF_HELPER_0(raise_fpu_disable, void)
-DEF_HELPER_0(raise_slot_fpu_disable, void)
-DEF_HELPER_0(debug, void)
-DEF_HELPER_1(sleep, void, i32)
-DEF_HELPER_1(trapa, void, i32)
+DEF_HELPER_1(ldtlb, void, env)
+DEF_HELPER_1(raise_illegal_instruction, noreturn, env)
+DEF_HELPER_1(raise_slot_illegal_instruction, noreturn, env)
+DEF_HELPER_1(raise_fpu_disable, noreturn, env)
+DEF_HELPER_1(raise_slot_fpu_disable, noreturn, env)
+DEF_HELPER_1(debug, noreturn, env)
+DEF_HELPER_1(sleep, noreturn, env)
+DEF_HELPER_2(trapa, noreturn, env, i32)
 
-DEF_HELPER_2(movcal, void, i32, i32)
-DEF_HELPER_0(discard_movcal_backup, void)
-DEF_HELPER_1(ocbi, void, i32)
+DEF_HELPER_3(movcal, void, env, i32, i32)
+DEF_HELPER_1(discard_movcal_backup, void, env)
+DEF_HELPER_2(ocbi, void, env, i32)
 
-DEF_HELPER_2(addv, i32, i32, i32)
-DEF_HELPER_2(addc, i32, i32, i32)
-DEF_HELPER_2(subv, i32, i32, i32)
-DEF_HELPER_2(subc, i32, i32, i32)
-DEF_HELPER_2(div1, i32, i32, i32)
-DEF_HELPER_2(macl, void, i32, i32)
-DEF_HELPER_2(macw, void, i32, i32)
+DEF_HELPER_3(div1, i32, env, i32, i32)
+DEF_HELPER_3(macl, void, env, i32, i32)
+DEF_HELPER_3(macw, void, env, i32, i32)
 
-DEF_HELPER_1(ld_fpscr, void, i32)
+DEF_HELPER_2(ld_fpscr, void, env, i32)
 
-DEF_HELPER_1(fabs_FT, f32, f32)
-DEF_HELPER_1(fabs_DT, f64, f64)
-DEF_HELPER_2(fadd_FT, f32, f32, f32)
-DEF_HELPER_2(fadd_DT, f64, f64, f64)
-DEF_HELPER_1(fcnvsd_FT_DT, f64, f32)
-DEF_HELPER_1(fcnvds_DT_FT, f32, f64)
+DEF_HELPER_FLAGS_1(fabs_FT, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_FLAGS_1(fabs_DT, TCG_CALL_CONST | TCG_CALL_PURE, f64, f64)
+DEF_HELPER_3(fadd_FT, f32, env, f32, f32)
+DEF_HELPER_3(fadd_DT, f64, env, f64, f64)
+DEF_HELPER_2(fcnvsd_FT_DT, f64, env, f32)
+DEF_HELPER_2(fcnvds_DT_FT, f32, env, f64)
 
-DEF_HELPER_2(fcmp_eq_FT, void, f32, f32)
-DEF_HELPER_2(fcmp_eq_DT, void, f64, f64)
-DEF_HELPER_2(fcmp_gt_FT, void, f32, f32)
-DEF_HELPER_2(fcmp_gt_DT, void, f64, f64)
-DEF_HELPER_2(fdiv_FT, f32, f32, f32)
-DEF_HELPER_2(fdiv_DT, f64, f64, f64)
-DEF_HELPER_1(float_FT, f32, i32)
-DEF_HELPER_1(float_DT, f64, i32)
-DEF_HELPER_3(fmac_FT, f32, f32, f32, f32)
-DEF_HELPER_2(fmul_FT, f32, f32, f32)
-DEF_HELPER_2(fmul_DT, f64, f64, f64)
-DEF_HELPER_1(fneg_T, f32, f32)
-DEF_HELPER_2(fsub_FT, f32, f32, f32)
-DEF_HELPER_2(fsub_DT, f64, f64, f64)
-DEF_HELPER_1(fsqrt_FT, f32, f32)
-DEF_HELPER_1(fsqrt_DT, f64, f64)
-DEF_HELPER_1(ftrc_FT, i32, f32)
-DEF_HELPER_1(ftrc_DT, i32, f64)
-DEF_HELPER_2(fipr, void, i32, i32)
-DEF_HELPER_1(ftrv, void, i32)
+DEF_HELPER_3(fcmp_eq_FT, void, env, f32, f32)
+DEF_HELPER_3(fcmp_eq_DT, void, env, f64, f64)
+DEF_HELPER_3(fcmp_gt_FT, void, env, f32, f32)
+DEF_HELPER_3(fcmp_gt_DT, void, env, f64, f64)
+DEF_HELPER_3(fdiv_FT, f32, env, f32, f32)
+DEF_HELPER_3(fdiv_DT, f64, env, f64, f64)
+DEF_HELPER_2(float_FT, f32, env, i32)
+DEF_HELPER_2(float_DT, f64, env, i32)
+DEF_HELPER_4(fmac_FT, f32, env, f32, f32, f32)
+DEF_HELPER_3(fmul_FT, f32, env, f32, f32)
+DEF_HELPER_3(fmul_DT, f64, env, f64, f64)
+DEF_HELPER_FLAGS_1(fneg_T, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_3(fsub_FT, f32, env, f32, f32)
+DEF_HELPER_3(fsub_DT, f64, env, f64, f64)
+DEF_HELPER_2(fsqrt_FT, f32, env, f32)
+DEF_HELPER_2(fsqrt_DT, f64, env, f64)
+DEF_HELPER_2(ftrc_FT, i32, env, f32)
+DEF_HELPER_2(ftrc_DT, i32, env, f64)
+DEF_HELPER_3(fipr, void, env, i32, i32)
+DEF_HELPER_2(ftrv, void, env, i32)
 
 #include "def-helper.h"
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 4054791..60ec4cb 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -19,10 +19,10 @@
 #include <assert.h>
 #include <stdlib.h>
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helper.h"
 
-static void cpu_restore_state_from_retaddr(uintptr_t retaddr)
+static inline void cpu_restore_state_from_retaddr(CPUSH4State *env,
+                                                  uintptr_t retaddr)
 {
     TranslationBlock *tb;
 
@@ -53,26 +53,22 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
-void tlb_fill(CPUSH4State *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUSH4State *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    CPUSH4State *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
     ret = cpu_sh4_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
         /* now we have a real cpu fault */
-        cpu_restore_state_from_retaddr(retaddr);
+        cpu_restore_state_from_retaddr(env, retaddr);
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 
 #endif
 
-void helper_ldtlb(void)
+void helper_ldtlb(CPUSH4State *env)
 {
 #ifdef CONFIG_USER_ONLY
     /* XXXXX */
@@ -82,55 +78,53 @@
 #endif
 }
 
-static inline void raise_exception(int index, uintptr_t retaddr)
+static inline void QEMU_NORETURN raise_exception(CPUSH4State *env, int index,
+                                                 uintptr_t retaddr)
 {
     env->exception_index = index;
-    cpu_restore_state_from_retaddr(retaddr);
+    cpu_restore_state_from_retaddr(env, retaddr);
     cpu_loop_exit(env);
 }
 
-void helper_raise_illegal_instruction(void)
+void helper_raise_illegal_instruction(CPUSH4State *env)
 {
-    raise_exception(0x180, GETPC());
+    raise_exception(env, 0x180, 0);
 }
 
-void helper_raise_slot_illegal_instruction(void)
+void helper_raise_slot_illegal_instruction(CPUSH4State *env)
 {
-    raise_exception(0x1a0, GETPC());
+    raise_exception(env, 0x1a0, 0);
 }
 
-void helper_raise_fpu_disable(void)
+void helper_raise_fpu_disable(CPUSH4State *env)
 {
-    raise_exception(0x800, GETPC());
+    raise_exception(env, 0x800, 0);
 }
 
-void helper_raise_slot_fpu_disable(void)
+void helper_raise_slot_fpu_disable(CPUSH4State *env)
 {
-    raise_exception(0x820, GETPC());
+    raise_exception(env, 0x820, 0);
 }
 
-void helper_debug(void)
+void helper_debug(CPUSH4State *env)
 {
-    env->exception_index = EXCP_DEBUG;
-    cpu_loop_exit(env);
+    raise_exception(env, EXCP_DEBUG, 0);
 }
 
-void helper_sleep(uint32_t next_pc)
+void helper_sleep(CPUSH4State *env)
 {
     env->halted = 1;
     env->in_sleep = 1;
-    env->exception_index = EXCP_HLT;
-    env->pc = next_pc;
-    cpu_loop_exit(env);
+    raise_exception(env, EXCP_HLT, 0);
 }
 
-void helper_trapa(uint32_t tra)
+void helper_trapa(CPUSH4State *env, uint32_t tra)
 {
     env->tra = tra << 2;
-    raise_exception(0x160, GETPC());
+    raise_exception(env, 0x160, 0);
 }
 
-void helper_movcal(uint32_t address, uint32_t value)
+void helper_movcal(CPUSH4State *env, uint32_t address, uint32_t value)
 {
     if (cpu_sh4_is_cached (env, address))
     {
@@ -144,7 +138,7 @@
     }
 }
 
-void helper_discard_movcal_backup(void)
+void helper_discard_movcal_backup(CPUSH4State *env)
 {
     memory_content *current = env->movcal_backup;
 
@@ -158,7 +152,7 @@
     } 
 }
 
-void helper_ocbi(uint32_t address)
+void helper_ocbi(CPUSH4State *env, uint32_t address)
 {
     memory_content **current = &(env->movcal_backup);
     while (*current)
@@ -167,7 +161,7 @@
 	if ((a & ~0x1F) == (address & ~0x1F))
 	{
 	    memory_content *next = (*current)->next;
-	    stl(a, (*current)->value);
+            cpu_stl_data(env, a, (*current)->value);
 	    
 	    if (next == NULL)
 	    {
@@ -181,51 +175,6 @@
     }
 }
 
-uint32_t helper_addc(uint32_t arg0, uint32_t arg1)
-{
-    uint32_t tmp0, tmp1;
-
-    tmp1 = arg0 + arg1;
-    tmp0 = arg1;
-    arg1 = tmp1 + (env->sr & 1);
-    if (tmp0 > tmp1)
-	env->sr |= SR_T;
-    else
-	env->sr &= ~SR_T;
-    if (tmp1 > arg1)
-	env->sr |= SR_T;
-    return arg1;
-}
-
-uint32_t helper_addv(uint32_t arg0, uint32_t arg1)
-{
-    uint32_t dest, src, ans;
-
-    if ((int32_t) arg1 >= 0)
-	dest = 0;
-    else
-	dest = 1;
-    if ((int32_t) arg0 >= 0)
-	src = 0;
-    else
-	src = 1;
-    src += dest;
-    arg1 += arg0;
-    if ((int32_t) arg1 >= 0)
-	ans = 0;
-    else
-	ans = 1;
-    ans += dest;
-    if (src == 0 || src == 2) {
-	if (ans == 1)
-	    env->sr |= SR_T;
-	else
-	    env->sr &= ~SR_T;
-    } else
-	env->sr &= ~SR_T;
-    return arg1;
-}
-
 #define T (env->sr & SR_T)
 #define Q (env->sr & SR_Q ? 1 : 0)
 #define M (env->sr & SR_M ? 1 : 0)
@@ -236,7 +185,7 @@
 #define SETM env->sr |= SR_M
 #define CLRM env->sr &= ~SR_M
 
-uint32_t helper_div1(uint32_t arg0, uint32_t arg1)
+uint32_t helper_div1(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     uint32_t tmp0, tmp2;
     uint8_t old_q, tmp1 = 0xff;
@@ -344,7 +293,7 @@
     return arg1;
 }
 
-void helper_macl(uint32_t arg0, uint32_t arg1)
+void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     int64_t res;
 
@@ -360,7 +309,7 @@
     }
 }
 
-void helper_macw(uint32_t arg0, uint32_t arg1)
+void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     int64_t res;
 
@@ -379,62 +328,17 @@
     }
 }
 
-uint32_t helper_subc(uint32_t arg0, uint32_t arg1)
-{
-    uint32_t tmp0, tmp1;
-
-    tmp1 = arg1 - arg0;
-    tmp0 = arg1;
-    arg1 = tmp1 - (env->sr & SR_T);
-    if (tmp0 < tmp1)
-	env->sr |= SR_T;
-    else
-	env->sr &= ~SR_T;
-    if (tmp1 < arg1)
-	env->sr |= SR_T;
-    return arg1;
-}
-
-uint32_t helper_subv(uint32_t arg0, uint32_t arg1)
-{
-    int32_t dest, src, ans;
-
-    if ((int32_t) arg1 >= 0)
-	dest = 0;
-    else
-	dest = 1;
-    if ((int32_t) arg0 >= 0)
-	src = 0;
-    else
-	src = 1;
-    src += dest;
-    arg1 -= arg0;
-    if ((int32_t) arg1 >= 0)
-	ans = 0;
-    else
-	ans = 1;
-    ans += dest;
-    if (src == 1) {
-	if (ans == 1)
-	    env->sr |= SR_T;
-	else
-	    env->sr &= ~SR_T;
-    } else
-	env->sr &= ~SR_T;
-    return arg1;
-}
-
-static inline void set_t(void)
+static inline void set_t(CPUSH4State *env)
 {
     env->sr |= SR_T;
 }
 
-static inline void clr_t(void)
+static inline void clr_t(CPUSH4State *env)
 {
     env->sr &= ~SR_T;
 }
 
-void helper_ld_fpscr(uint32_t val)
+void helper_ld_fpscr(CPUSH4State *env, uint32_t val)
 {
     env->fpscr = val & FPSCR_MASK;
     if ((val & FPSCR_RM_MASK) == FPSCR_RM_ZERO) {
@@ -445,7 +349,7 @@
     set_flush_to_zero((val & FPSCR_DN) != 0, &env->fp_status);
 }
 
-static void update_fpscr(uintptr_t retaddr)
+static void update_fpscr(CPUSH4State *env, uintptr_t retaddr)
 {
     int xcpt, cause, enable;
 
@@ -479,9 +383,7 @@
         cause = (env->fpscr & FPSCR_CAUSE_MASK) >> FPSCR_CAUSE_SHIFT;
         enable = (env->fpscr & FPSCR_ENABLE_MASK) >> FPSCR_ENABLE_SHIFT;
         if (cause & enable) {
-            cpu_restore_state_from_retaddr(retaddr);
-            env->exception_index = 0x120;
-            cpu_loop_exit(env);
+            raise_exception(env, 0x120, retaddr);
         }
     }
 }
@@ -496,156 +398,155 @@
     return float64_abs(t0);
 }
 
-float32 helper_fadd_FT(float32 t0, float32 t1)
+float32 helper_fadd_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float32_add(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float64 helper_fadd_DT(float64 t0, float64 t1)
+float64 helper_fadd_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float64_add(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-void helper_fcmp_eq_FT(float32 t0, float32 t1)
+void helper_fcmp_eq_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     int relation;
 
     set_float_exception_flags(0, &env->fp_status);
     relation = float32_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
-        update_fpscr(GETPC());
+        update_fpscr(env, GETPC());
     } else if (relation == float_relation_equal) {
-	set_t();
+        set_t(env);
     } else {
-	clr_t();
+        clr_t(env);
     }
 }
 
-void helper_fcmp_eq_DT(float64 t0, float64 t1)
+void helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     int relation;
 
     set_float_exception_flags(0, &env->fp_status);
     relation = float64_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
-        update_fpscr(GETPC());
+        update_fpscr(env, GETPC());
     } else if (relation == float_relation_equal) {
-	set_t();
+        set_t(env);
     } else {
-	clr_t();
+        clr_t(env);
     }
 }
 
-void helper_fcmp_gt_FT(float32 t0, float32 t1)
+void helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     int relation;
 
     set_float_exception_flags(0, &env->fp_status);
     relation = float32_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
-        update_fpscr(GETPC());
+        update_fpscr(env, GETPC());
     } else if (relation == float_relation_greater) {
-	set_t();
+        set_t(env);
     } else {
-	clr_t();
+        clr_t(env);
     }
 }
 
-void helper_fcmp_gt_DT(float64 t0, float64 t1)
+void helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     int relation;
 
     set_float_exception_flags(0, &env->fp_status);
     relation = float64_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
-        update_fpscr(GETPC());
+        update_fpscr(env, GETPC());
     } else if (relation == float_relation_greater) {
-	set_t();
+        set_t(env);
     } else {
-	clr_t();
+        clr_t(env);
     }
 }
 
-float64 helper_fcnvsd_FT_DT(float32 t0)
+float64 helper_fcnvsd_FT_DT(CPUSH4State *env, float32 t0)
 {
     float64 ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = float32_to_float64(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-float32 helper_fcnvds_DT_FT(float64 t0)
+float32 helper_fcnvds_DT_FT(CPUSH4State *env, float64 t0)
 {
     float32 ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = float64_to_float32(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-float32 helper_fdiv_FT(float32 t0, float32 t1)
+float32 helper_fdiv_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float32_div(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float64 helper_fdiv_DT(float64 t0, float64 t1)
+float64 helper_fdiv_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float64_div(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float32 helper_float_FT(uint32_t t0)
+float32 helper_float_FT(CPUSH4State *env, uint32_t t0)
 {
     float32 ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = int32_to_float32(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-float64 helper_float_DT(uint32_t t0)
+float64 helper_float_DT(CPUSH4State *env, uint32_t t0)
 {
     float64 ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = int32_to_float64(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-float32 helper_fmac_FT(float32 t0, float32 t1, float32 t2)
+float32 helper_fmac_FT(CPUSH4State *env, float32 t0, float32 t1, float32 t2)
 {
     set_float_exception_flags(0, &env->fp_status);
-    t0 = float32_mul(t0, t1, &env->fp_status);
-    t0 = float32_add(t0, t2, &env->fp_status);
-    update_fpscr(GETPC());
+    t0 = float32_muladd(t0, t1, t2, 0, &env->fp_status);
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float32 helper_fmul_FT(float32 t0, float32 t1)
+float32 helper_fmul_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float32_mul(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float64 helper_fmul_DT(float64 t0, float64 t1)
+float64 helper_fmul_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float64_mul(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
@@ -654,57 +555,57 @@
     return float32_chs(t0);
 }
 
-float32 helper_fsqrt_FT(float32 t0)
+float32 helper_fsqrt_FT(CPUSH4State *env, float32 t0)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float32_sqrt(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float64 helper_fsqrt_DT(float64 t0)
+float64 helper_fsqrt_DT(CPUSH4State *env, float64 t0)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float64_sqrt(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float32 helper_fsub_FT(float32 t0, float32 t1)
+float32 helper_fsub_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float32_sub(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float64 helper_fsub_DT(float64 t0, float64 t1)
+float64 helper_fsub_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float64_sub(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-uint32_t helper_ftrc_FT(float32 t0)
+uint32_t helper_ftrc_FT(CPUSH4State *env, float32 t0)
 {
     uint32_t ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = float32_to_int32_round_to_zero(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-uint32_t helper_ftrc_DT(float64 t0)
+uint32_t helper_ftrc_DT(CPUSH4State *env, float64 t0)
 {
     uint32_t ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = float64_to_int32_round_to_zero(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-void helper_fipr(uint32_t m, uint32_t n)
+void helper_fipr(CPUSH4State *env, uint32_t m, uint32_t n)
 {
     int bank, i;
     float32 r, p;
@@ -719,12 +620,12 @@
                         &env->fp_status);
         r = float32_add(r, p, &env->fp_status);
     }
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
 
     env->fregs[bank + n + 3] = r;
 }
 
-void helper_ftrv(uint32_t n)
+void helper_ftrv(CPUSH4State *env, uint32_t n)
 {
     int bank_matrix, bank_vector;
     int i, j;
@@ -743,7 +644,7 @@
             r[i] = float32_add(r[i], p, &env->fp_status);
         }
     }
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
 
     for (i = 0 ; i < 4 ; i++) {
         env->fregs[bank_vector + i] = r[i];
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 6532ad2..9d955eb 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -18,7 +18,6 @@
  */
 
 #define DEBUG_DISAS
-#define SH4_DEBUG_DISAS
 //#define SH4_SINGLE_STEP
 
 #include "cpu.h"
@@ -32,8 +31,6 @@
 typedef struct DisasContext {
     struct TranslationBlock *tb;
     target_ulong pc;
-    uint32_t sr;
-    uint32_t fpscr;
     uint16_t opcode;
     uint32_t flags;
     int bstate;
@@ -47,7 +44,7 @@
 #if defined(CONFIG_USER_ONLY)
 #define IS_USER(ctx) 1
 #else
-#define IS_USER(ctx) (!(ctx->sr & SR_MD))
+#define IS_USER(ctx) (!(ctx->flags & SR_MD))
 #endif
 
 enum {
@@ -276,7 +273,7 @@
     } else {
         tcg_gen_movi_i32(cpu_pc, dest);
         if (ctx->singlestep_enabled)
-            gen_helper_debug();
+            gen_helper_debug(cpu_env);
         tcg_gen_exit_tb(0);
     }
 }
@@ -288,7 +285,7 @@
 	   delayed jump as immediate jump are conditinal jumps */
 	tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
 	if (ctx->singlestep_enabled)
-	    gen_helper_debug();
+            gen_helper_debug(cpu_env);
 	tcg_gen_exit_tb(0);
     } else {
 	gen_goto_tb(ctx, 0, ctx->delayed_pc);
@@ -339,16 +336,6 @@
     gen_jump(ctx);
 }
 
-static inline void gen_set_t(void)
-{
-    tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_T);
-}
-
-static inline void gen_clr_t(void)
-{
-    tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
-}
-
 static inline void gen_cmp(int cond, TCGv t0, TCGv t1)
 {
     TCGv t;
@@ -423,44 +410,47 @@
 #define B11_8 ((ctx->opcode >> 8) & 0xf)
 #define B15_12 ((ctx->opcode >> 12) & 0xf)
 
-#define REG(x) ((x) < 8 && (ctx->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB) ? \
-		(cpu_gregs[x + 16]) : (cpu_gregs[x]))
+#define REG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) == (SR_MD | SR_RB) \
+                ? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
 
-#define ALTREG(x) ((x) < 8 && (ctx->sr & (SR_MD | SR_RB)) != (SR_MD | SR_RB) \
+#define ALTREG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) != (SR_MD | SR_RB)\
 		? (cpu_gregs[x + 16]) : (cpu_gregs[x]))
 
-#define FREG(x) (ctx->fpscr & FPSCR_FR ? (x) ^ 0x10 : (x))
+#define FREG(x) (ctx->flags & FPSCR_FR ? (x) ^ 0x10 : (x))
 #define XHACK(x) ((((x) & 1 ) << 4) | ((x) & 0xe))
-#define XREG(x) (ctx->fpscr & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
+#define XREG(x) (ctx->flags & FPSCR_FR ? XHACK(x) ^ 0x10 : XHACK(x))
 #define DREG(x) FREG(x) /* Assumes lsb of (x) is always 0 */
 
 #define CHECK_NOT_DELAY_SLOT \
   if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))     \
   {                                                           \
-      gen_helper_raise_slot_illegal_instruction();            \
-      ctx->bstate = BS_EXCP;                                  \
+      tcg_gen_movi_i32(cpu_pc, ctx->pc);                      \
+      gen_helper_raise_slot_illegal_instruction(cpu_env);     \
+      ctx->bstate = BS_BRANCH;                                \
       return;                                                 \
   }
 
 #define CHECK_PRIVILEGED                                        \
   if (IS_USER(ctx)) {                                           \
+      tcg_gen_movi_i32(cpu_pc, ctx->pc);                        \
       if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
-         gen_helper_raise_slot_illegal_instruction();           \
+          gen_helper_raise_slot_illegal_instruction(cpu_env);   \
       } else {                                                  \
-         gen_helper_raise_illegal_instruction();                \
+          gen_helper_raise_illegal_instruction(cpu_env);        \
       }                                                         \
-      ctx->bstate = BS_EXCP;                                    \
+      ctx->bstate = BS_BRANCH;                                  \
       return;                                                   \
   }
 
 #define CHECK_FPU_ENABLED                                       \
   if (ctx->flags & SR_FD) {                                     \
+      tcg_gen_movi_i32(cpu_pc, ctx->pc);                        \
       if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
-          gen_helper_raise_slot_fpu_disable();                  \
+          gen_helper_raise_slot_fpu_disable(cpu_env);           \
       } else {                                                  \
-          gen_helper_raise_fpu_disable();                       \
+          gen_helper_raise_fpu_disable(cpu_env);                \
       }                                                         \
-      ctx->bstate = BS_EXCP;                                    \
+      ctx->bstate = BS_BRANCH;                                  \
       return;                                                   \
   }
 
@@ -492,7 +482,7 @@
 	  if (opcode != 0x0093 /* ocbi */
 	      && opcode != 0x00c3 /* movca.l */)
 	      {
-		  gen_helper_discard_movcal_backup ();
+                  gen_helper_discard_movcal_backup(cpu_env);
 		  ctx->has_movcal = 0;
 	      }
 	}
@@ -519,11 +509,11 @@
 	tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_S);
 	return;
     case 0x0008:		/* clrt */
-	gen_clr_t();
+        tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
 	return;
     case 0x0038:		/* ldtlb */
 	CHECK_PRIVILEGED
-	gen_helper_ldtlb();
+        gen_helper_ldtlb(cpu_env);
 	return;
     case 0x002b:		/* rte */
 	CHECK_PRIVILEGED
@@ -537,21 +527,22 @@
 	tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_S);
 	return;
     case 0x0018:		/* sett */
-	gen_set_t();
+        tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_T);
 	return;
     case 0xfbfd:		/* frchg */
 	tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR);
 	ctx->bstate = BS_STOP;
 	return;
     case 0xf3fd:		/* fschg */
-	tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ);
+        tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_SZ);
 	ctx->bstate = BS_STOP;
 	return;
     case 0x0009:		/* nop */
 	return;
     case 0x001b:		/* sleep */
 	CHECK_PRIVILEGED
-	gen_helper_sleep(tcg_const_i32(ctx->pc + 2));
+        tcg_gen_movi_i32(cpu_pc, ctx->pc + 2);
+        gen_helper_sleep(cpu_env);
 	return;
     }
 
@@ -732,17 +723,7 @@
 	}
 	return;
     case 0x6009:		/* swap.w Rm,Rn */
-	{
-	    TCGv high, low;
-	    high = tcg_temp_new();
-	    tcg_gen_shli_i32(high, REG(B7_4), 16);
-	    low = tcg_temp_new();
-	    tcg_gen_shri_i32(low, REG(B7_4), 16);
-	    tcg_gen_ext16u_i32(low, low);
-	    tcg_gen_or_i32(REG(B11_8), high, low);
-	    tcg_temp_free(low);
-	    tcg_temp_free(high);
-	}
+        tcg_gen_rotli_i32(REG(B11_8), REG(B7_4), 16);
 	return;
     case 0x200d:		/* xtrct Rm,Rn */
 	{
@@ -751,7 +732,6 @@
 	    tcg_gen_shli_i32(high, REG(B7_4), 16);
 	    low = tcg_temp_new();
 	    tcg_gen_shri_i32(low, REG(B11_8), 16);
-	    tcg_gen_ext16u_i32(low, low);
 	    tcg_gen_or_i32(REG(B11_8), high, low);
 	    tcg_temp_free(low);
 	    tcg_temp_free(high);
@@ -761,10 +741,43 @@
 	tcg_gen_add_i32(REG(B11_8), REG(B11_8), REG(B7_4));
 	return;
     case 0x300e:		/* addc Rm,Rn */
-	gen_helper_addc(REG(B11_8), REG(B7_4), REG(B11_8));
+        {
+            TCGv t0, t1, t2;
+            t0 = tcg_temp_new();
+            tcg_gen_andi_i32(t0, cpu_sr, SR_T);
+            t1 = tcg_temp_new();
+            tcg_gen_add_i32(t1, REG(B7_4), REG(B11_8));
+            tcg_gen_add_i32(t0, t0, t1);
+            t2 = tcg_temp_new();
+            tcg_gen_setcond_i32(TCG_COND_GTU, t2, REG(B11_8), t1);
+            tcg_gen_setcond_i32(TCG_COND_GTU, t1, t1, t0);
+            tcg_gen_or_i32(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+            tcg_temp_free(t1);
+            tcg_gen_mov_i32(REG(B11_8), t0);
+            tcg_temp_free(t0);
+        }
 	return;
     case 0x300f:		/* addv Rm,Rn */
-	gen_helper_addv(REG(B11_8), REG(B7_4), REG(B11_8));
+        {
+            TCGv t0, t1, t2;
+            t0 = tcg_temp_new();
+            tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8));
+            t1 = tcg_temp_new();
+            tcg_gen_xor_i32(t1, t0, REG(B11_8));
+            t2 = tcg_temp_new();
+            tcg_gen_xor_i32(t2, REG(B7_4), REG(B11_8));
+            tcg_gen_andc_i32(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_shri_i32(t1, t1, 31);
+            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+            tcg_temp_free(t1);
+            tcg_gen_mov_i32(REG(B7_4), t0);
+            tcg_temp_free(t0);
+        }
 	return;
     case 0x2009:		/* and Rm,Rn */
 	tcg_gen_and_i32(REG(B11_8), REG(B11_8), REG(B7_4));
@@ -817,7 +830,7 @@
 	}
 	return;
     case 0x3004:		/* div1 Rm,Rn */
-	gen_helper_div1(REG(B11_8), REG(B7_4), REG(B11_8));
+        gen_helper_div1(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
 	return;
     case 0x300d:		/* dmuls.l Rm,Rn */
 	{
@@ -870,7 +883,7 @@
 	    tcg_gen_qemu_ld32s(arg0, REG(B7_4), ctx->memidx);
 	    arg1 = tcg_temp_new();
 	    tcg_gen_qemu_ld32s(arg1, REG(B11_8), ctx->memidx);
-	    gen_helper_macl(arg0, arg1);
+            gen_helper_macl(cpu_env, arg0, arg1);
 	    tcg_temp_free(arg1);
 	    tcg_temp_free(arg0);
 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
@@ -884,7 +897,7 @@
 	    tcg_gen_qemu_ld32s(arg0, REG(B7_4), ctx->memidx);
 	    arg1 = tcg_temp_new();
 	    tcg_gen_qemu_ld32s(arg1, REG(B11_8), ctx->memidx);
-	    gen_helper_macw(arg0, arg1);
+            gen_helper_macw(cpu_env, arg0, arg1);
 	    tcg_temp_free(arg1);
 	    tcg_temp_free(arg0);
 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2);
@@ -1013,10 +1026,43 @@
 	tcg_gen_sub_i32(REG(B11_8), REG(B11_8), REG(B7_4));
 	return;
     case 0x300a:		/* subc Rm,Rn */
-	gen_helper_subc(REG(B11_8), REG(B7_4), REG(B11_8));
+        {
+            TCGv t0, t1, t2;
+            t0 = tcg_temp_new();
+            tcg_gen_andi_i32(t0, cpu_sr, SR_T);
+            t1 = tcg_temp_new();
+            tcg_gen_sub_i32(t1, REG(B11_8), REG(B7_4));
+            tcg_gen_sub_i32(t0, t1, t0);
+            t2 = tcg_temp_new();
+            tcg_gen_setcond_i32(TCG_COND_LTU, t2, REG(B11_8), t1);
+            tcg_gen_setcond_i32(TCG_COND_LTU, t1, t1, t0);
+            tcg_gen_or_i32(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+            tcg_temp_free(t1);
+            tcg_gen_mov_i32(REG(B11_8), t0);
+            tcg_temp_free(t0);
+        }
 	return;
     case 0x300b:		/* subv Rm,Rn */
-	gen_helper_subv(REG(B11_8), REG(B7_4), REG(B11_8));
+        {
+            TCGv t0, t1, t2;
+            t0 = tcg_temp_new();
+            tcg_gen_sub_i32(t0, REG(B11_8), REG(B7_4));
+            t1 = tcg_temp_new();
+            tcg_gen_xor_i32(t1, t0, REG(B7_4));
+            t2 = tcg_temp_new();
+            tcg_gen_xor_i32(t2, REG(B11_8), REG(B7_4));
+            tcg_gen_and_i32(t1, t1, t2);
+            tcg_temp_free(t2);
+            tcg_gen_shri_i32(t1, t1, 31);
+            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
+            tcg_gen_or_i32(cpu_sr, cpu_sr, t1);
+            tcg_temp_free(t1);
+            tcg_gen_mov_i32(REG(B11_8), t0);
+            tcg_temp_free(t0);
+        }
 	return;
     case 0x2008:		/* tst Rm,Rn */
 	{
@@ -1031,7 +1077,7 @@
 	return;
     case 0xf00c: /* fmov {F,D,X}Rm,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_SZ) {
+        if (ctx->flags & FPSCR_SZ) {
 	    TCGv_i64 fp = tcg_temp_new_i64();
 	    gen_load_fpr64(fp, XREG(B7_4));
 	    gen_store_fpr64(fp, XREG(B11_8));
@@ -1042,7 +1088,7 @@
 	return;
     case 0xf00a: /* fmov {F,D,X}Rm,@Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_SZ) {
+        if (ctx->flags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B7_4);
 	    tcg_gen_addi_i32(addr_hi, REG(B11_8), 4);
@@ -1055,7 +1101,7 @@
 	return;
     case 0xf008: /* fmov @Rm,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_SZ) {
+        if (ctx->flags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B11_8);
 	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
@@ -1068,7 +1114,7 @@
 	return;
     case 0xf009: /* fmov @Rm+,{F,D,X}Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_SZ) {
+        if (ctx->flags & FPSCR_SZ) {
 	    TCGv addr_hi = tcg_temp_new();
 	    int fr = XREG(B11_8);
 	    tcg_gen_addi_i32(addr_hi, REG(B7_4), 4);
@@ -1083,7 +1129,7 @@
 	return;
     case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_SZ) {
+        if (ctx->flags & FPSCR_SZ) {
 	    TCGv addr = tcg_temp_new_i32();
 	    int fr = XREG(B7_4);
 	    tcg_gen_subi_i32(addr, REG(B11_8), 4);
@@ -1106,7 +1152,7 @@
 	{
 	    TCGv addr = tcg_temp_new_i32();
 	    tcg_gen_add_i32(addr, REG(B7_4), REG(0));
-	    if (ctx->fpscr & FPSCR_SZ) {
+            if (ctx->flags & FPSCR_SZ) {
 		int fr = XREG(B11_8);
 		tcg_gen_qemu_ld32u(cpu_fregs[fr	 ], addr, ctx->memidx);
 		tcg_gen_addi_i32(addr, addr, 4);
@@ -1122,7 +1168,7 @@
 	{
 	    TCGv addr = tcg_temp_new();
 	    tcg_gen_add_i32(addr, REG(B11_8), REG(0));
-	    if (ctx->fpscr & FPSCR_SZ) {
+            if (ctx->flags & FPSCR_SZ) {
 		int fr = XREG(B7_4);
 		tcg_gen_qemu_ld32u(cpu_fregs[fr	 ], addr, ctx->memidx);
 		tcg_gen_addi_i32(addr, addr, 4);
@@ -1141,7 +1187,7 @@
     case 0xf005: /* fcmp/gt Rm,Rn - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
 	{
 	    CHECK_FPU_ENABLED
-	    if (ctx->fpscr & FPSCR_PR) {
+            if (ctx->flags & FPSCR_PR) {
                 TCGv_i64 fp0, fp1;
 
 		if (ctx->opcode & 0x0110)
@@ -1152,22 +1198,22 @@
 		gen_load_fpr64(fp1, DREG(B7_4));
                 switch (ctx->opcode & 0xf00f) {
                 case 0xf000:		/* fadd Rm,Rn */
-                    gen_helper_fadd_DT(fp0, fp0, fp1);
+                    gen_helper_fadd_DT(fp0, cpu_env, fp0, fp1);
                     break;
                 case 0xf001:		/* fsub Rm,Rn */
-                    gen_helper_fsub_DT(fp0, fp0, fp1);
+                    gen_helper_fsub_DT(fp0, cpu_env, fp0, fp1);
                     break;
                 case 0xf002:		/* fmul Rm,Rn */
-                    gen_helper_fmul_DT(fp0, fp0, fp1);
+                    gen_helper_fmul_DT(fp0, cpu_env, fp0, fp1);
                     break;
                 case 0xf003:		/* fdiv Rm,Rn */
-                    gen_helper_fdiv_DT(fp0, fp0, fp1);
+                    gen_helper_fdiv_DT(fp0, cpu_env, fp0, fp1);
                     break;
                 case 0xf004:		/* fcmp/eq Rm,Rn */
-                    gen_helper_fcmp_eq_DT(fp0, fp1);
+                    gen_helper_fcmp_eq_DT(cpu_env, fp0, fp1);
                     return;
                 case 0xf005:		/* fcmp/gt Rm,Rn */
-                    gen_helper_fcmp_gt_DT(fp0, fp1);
+                    gen_helper_fcmp_gt_DT(cpu_env, fp0, fp1);
                     return;
                 }
 		gen_store_fpr64(fp0, DREG(B11_8));
@@ -1176,22 +1222,32 @@
 	    } else {
                 switch (ctx->opcode & 0xf00f) {
                 case 0xf000:		/* fadd Rm,Rn */
-                    gen_helper_fadd_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fadd_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                       cpu_fregs[FREG(B11_8)],
+                                       cpu_fregs[FREG(B7_4)]);
                     break;
                 case 0xf001:		/* fsub Rm,Rn */
-                    gen_helper_fsub_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fsub_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                       cpu_fregs[FREG(B11_8)],
+                                       cpu_fregs[FREG(B7_4)]);
                     break;
                 case 0xf002:		/* fmul Rm,Rn */
-                    gen_helper_fmul_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fmul_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                       cpu_fregs[FREG(B11_8)],
+                                       cpu_fregs[FREG(B7_4)]);
                     break;
                 case 0xf003:		/* fdiv Rm,Rn */
-                    gen_helper_fdiv_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fdiv_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                       cpu_fregs[FREG(B11_8)],
+                                       cpu_fregs[FREG(B7_4)]);
                     break;
                 case 0xf004:		/* fcmp/eq Rm,Rn */
-                    gen_helper_fcmp_eq_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fcmp_eq_FT(cpu_env, cpu_fregs[FREG(B11_8)],
+                                          cpu_fregs[FREG(B7_4)]);
                     return;
                 case 0xf005:		/* fcmp/gt Rm,Rn */
-                    gen_helper_fcmp_gt_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fcmp_gt_FT(cpu_env, cpu_fregs[FREG(B11_8)],
+                                          cpu_fregs[FREG(B7_4)]);
                     return;
                 }
 	    }
@@ -1200,11 +1256,12 @@
     case 0xf00e: /* fmac FR0,RM,Rn */
         {
             CHECK_FPU_ENABLED
-            if (ctx->fpscr & FPSCR_PR) {
+            if (ctx->flags & FPSCR_PR) {
                 break; /* illegal instruction */
             } else {
-                gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)],
-                                   cpu_fregs[FREG(0)], cpu_fregs[FREG(B7_4)], cpu_fregs[FREG(B11_8)]);
+                gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                   cpu_fregs[FREG(0)], cpu_fregs[FREG(B7_4)],
+                                   cpu_fregs[FREG(B11_8)]);
                 return;
             }
         }
@@ -1355,8 +1412,9 @@
 	{
 	    TCGv imm;
 	    CHECK_NOT_DELAY_SLOT
+            tcg_gen_movi_i32(cpu_pc, ctx->pc);
 	    imm = tcg_const_i32(B7_0);
-	    gen_helper_trapa(imm);
+            gen_helper_trapa(cpu_env, imm);
 	    tcg_temp_free(imm);
 	    ctx->bstate = BS_BRANCH;
 	}
@@ -1531,7 +1589,7 @@
 	LDST(fpul, 0x405a, 0x4056, 0x005a, 0x4052, {CHECK_FPU_ENABLED})
     case 0x406a:		/* lds Rm,FPSCR */
 	CHECK_FPU_ENABLED
-	gen_helper_ld_fpscr(REG(B11_8));
+        gen_helper_ld_fpscr(cpu_env, REG(B11_8));
 	ctx->bstate = BS_STOP;
 	return;
     case 0x4066:		/* lds.l @Rm+,FPSCR */
@@ -1540,7 +1598,7 @@
 	    TCGv addr = tcg_temp_new();
 	    tcg_gen_qemu_ld32s(addr, REG(B11_8), ctx->memidx);
 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
-	    gen_helper_ld_fpscr(addr);
+            gen_helper_ld_fpscr(cpu_env, addr);
 	    tcg_temp_free(addr);
 	    ctx->bstate = BS_STOP;
 	}
@@ -1567,7 +1625,7 @@
         {
             TCGv val = tcg_temp_new();
             tcg_gen_qemu_ld32u(val, REG(B11_8), ctx->memidx);
-            gen_helper_movcal (REG(B11_8), val);            
+            gen_helper_movcal(cpu_env, REG(B11_8), val);
             tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
         }
         ctx->has_movcal = 1;
@@ -1594,7 +1652,7 @@
         */
         if (ctx->features & SH_FEATURE_SH4A) {
 	    int label = gen_new_label();
-	    gen_clr_t();
+            tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T);
 	    tcg_gen_or_i32(cpu_sr, cpu_sr, cpu_ldst);
 	    tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label);
 	    tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
@@ -1619,7 +1677,7 @@
 	    break;
     case 0x0093:		/* ocbi @Rn */
 	{
-	    gen_helper_ocbi (REG(B11_8));
+            gen_helper_ocbi(cpu_env, REG(B11_8));
 	}
 	return;
     case 0x00a3:		/* ocbp @Rn */
@@ -1728,32 +1786,32 @@
 	return;
     case 0xf02d: /* float FPUL,FRn/DRn - FPSCR: R[PR,Enable.I]/W[Cause,Flag] */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_PR) {
+        if (ctx->flags & FPSCR_PR) {
 	    TCGv_i64 fp;
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    fp = tcg_temp_new_i64();
-	    gen_helper_float_DT(fp, cpu_fpul);
+            gen_helper_float_DT(fp, cpu_env, cpu_fpul);
 	    gen_store_fpr64(fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	}
 	else {
-	    gen_helper_float_FT(cpu_fregs[FREG(B11_8)], cpu_fpul);
+            gen_helper_float_FT(cpu_fregs[FREG(B11_8)], cpu_env, cpu_fpul);
 	}
 	return;
     case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_PR) {
+        if (ctx->flags & FPSCR_PR) {
 	    TCGv_i64 fp;
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    fp = tcg_temp_new_i64();
 	    gen_load_fpr64(fp, DREG(B11_8));
-	    gen_helper_ftrc_DT(cpu_fpul, fp);
+            gen_helper_ftrc_DT(cpu_fpul, cpu_env, fp);
 	    tcg_temp_free_i64(fp);
 	}
 	else {
-	    gen_helper_ftrc_FT(cpu_fpul, cpu_fregs[FREG(B11_8)]);
+            gen_helper_ftrc_FT(cpu_fpul, cpu_env, cpu_fregs[FREG(B11_8)]);
 	}
 	return;
     case 0xf04d: /* fneg FRn/DRn - FPSCR: Nothing */
@@ -1764,7 +1822,7 @@
 	return;
     case 0xf05d: /* fabs FRn/DRn */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_PR) {
+        if (ctx->flags & FPSCR_PR) {
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
@@ -1778,16 +1836,17 @@
 	return;
     case 0xf06d: /* fsqrt FRn */
 	CHECK_FPU_ENABLED
-	if (ctx->fpscr & FPSCR_PR) {
+        if (ctx->flags & FPSCR_PR) {
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
 	    gen_load_fpr64(fp, DREG(B11_8));
-	    gen_helper_fsqrt_DT(fp, fp);
+            gen_helper_fsqrt_DT(fp, cpu_env, fp);
 	    gen_store_fpr64(fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	} else {
-	    gen_helper_fsqrt_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
+            gen_helper_fsqrt_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                cpu_fregs[FREG(B11_8)]);
 	}
 	return;
     case 0xf07d: /* fsrra FRn */
@@ -1795,13 +1854,13 @@
 	break;
     case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
 	CHECK_FPU_ENABLED
-	if (!(ctx->fpscr & FPSCR_PR)) {
+        if (!(ctx->flags & FPSCR_PR)) {
 	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0);
 	}
 	return;
     case 0xf09d: /* fldi1 FRn - FPSCR: R[PR] */
 	CHECK_FPU_ENABLED
-	if (!(ctx->fpscr & FPSCR_PR)) {
+        if (!(ctx->flags & FPSCR_PR)) {
 	    tcg_gen_movi_i32(cpu_fregs[FREG(B11_8)], 0x3f800000);
 	}
 	return;
@@ -1809,7 +1868,7 @@
 	CHECK_FPU_ENABLED
 	{
 	    TCGv_i64 fp = tcg_temp_new_i64();
-	    gen_helper_fcnvsd_FT_DT(fp, cpu_fpul);
+            gen_helper_fcnvsd_FT_DT(fp, cpu_env, cpu_fpul);
 	    gen_store_fpr64(fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	}
@@ -1819,17 +1878,17 @@
 	{
 	    TCGv_i64 fp = tcg_temp_new_i64();
 	    gen_load_fpr64(fp, DREG(B11_8));
-	    gen_helper_fcnvds_DT_FT(cpu_fpul, fp);
+            gen_helper_fcnvds_DT_FT(cpu_fpul, cpu_env, fp);
 	    tcg_temp_free_i64(fp);
 	}
 	return;
     case 0xf0ed: /* fipr FVm,FVn */
         CHECK_FPU_ENABLED
-        if ((ctx->fpscr & FPSCR_PR) == 0) {
+        if ((ctx->flags & FPSCR_PR) == 0) {
             TCGv m, n;
             m = tcg_const_i32((ctx->opcode >> 8) & 3);
             n = tcg_const_i32((ctx->opcode >> 10) & 3);
-            gen_helper_fipr(m, n);
+            gen_helper_fipr(cpu_env, m, n);
             tcg_temp_free(m);
             tcg_temp_free(n);
             return;
@@ -1838,10 +1897,10 @@
     case 0xf0fd: /* ftrv XMTRX,FVn */
         CHECK_FPU_ENABLED
         if ((ctx->opcode & 0x0300) == 0x0100 &&
-            (ctx->fpscr & FPSCR_PR) == 0) {
+            (ctx->flags & FPSCR_PR) == 0) {
             TCGv n;
             n = tcg_const_i32((ctx->opcode >> 10) & 3);
-            gen_helper_ftrv(n);
+            gen_helper_ftrv(cpu_env, n);
             tcg_temp_free(n);
             return;
         }
@@ -1852,19 +1911,20 @@
 	    ctx->opcode, ctx->pc);
     fflush(stderr);
 #endif
+    tcg_gen_movi_i32(cpu_pc, ctx->pc);
     if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
-       gen_helper_raise_slot_illegal_instruction();
+        gen_helper_raise_slot_illegal_instruction(cpu_env);
     } else {
-       gen_helper_raise_illegal_instruction();
+        gen_helper_raise_illegal_instruction(cpu_env);
     }
-    ctx->bstate = BS_EXCP;
+    ctx->bstate = BS_BRANCH;
 }
 
 static void decode_opc(DisasContext * ctx)
 {
     uint32_t old_flags = ctx->flags;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(ctx->pc);
     }
 
@@ -1911,16 +1971,14 @@
     ctx.pc = pc_start;
     ctx.flags = (uint32_t)tb->flags;
     ctx.bstate = BS_NONE;
-    ctx.sr = env->sr;
-    ctx.fpscr = env->fpscr;
-    ctx.memidx = (env->sr & SR_MD) == 0 ? 1 : 0;
+    ctx.memidx = (ctx.flags & SR_MD) == 0 ? 1 : 0;
     /* We don't know if the delayed pc came from a dynamic or static branch,
        so assume it is a dynamic branch.  */
     ctx.delayed_pc = -1; /* use delayed pc from env pointer */
     ctx.tb = tb;
     ctx.singlestep_enabled = env->singlestep_enabled;
     ctx.features = env->features;
-    ctx.has_movcal = (tb->flags & TB_FLAG_PENDING_MOVCA);
+    ctx.has_movcal = (ctx.flags & TB_FLAG_PENDING_MOVCA);
 
     ii = -1;
     num_insns = 0;
@@ -1934,8 +1992,8 @@
                 if (ctx.pc == bp->pc) {
 		    /* We have hit a breakpoint - make sure PC is up-to-date */
 		    tcg_gen_movi_i32(cpu_pc, ctx.pc);
-		    gen_helper_debug();
-		    ctx.bstate = BS_EXCP;
+                    gen_helper_debug(cpu_env);
+                    ctx.bstate = BS_BRANCH;
 		    break;
 		}
 	    }
@@ -1958,7 +2016,7 @@
 	fprintf(stderr, "Loading opcode at address 0x%08x\n", ctx.pc);
 	fflush(stderr);
 #endif
-	ctx.opcode = lduw_code(ctx.pc);
+        ctx.opcode = cpu_lduw_code(env, ctx.pc);
 	decode_opc(&ctx);
         num_insns++;
 	ctx.pc += 2;
@@ -1975,7 +2033,7 @@
         gen_io_end();
     if (env->singlestep_enabled) {
         tcg_gen_movi_i32(cpu_pc, ctx.pc);
-        gen_helper_debug();
+        gen_helper_debug(cpu_env);
     } else {
 	switch (ctx.bstate) {
         case BS_STOP:
@@ -2010,9 +2068,6 @@
     }
 
 #ifdef DEBUG_DISAS
-#ifdef SH4_DEBUG_DISAS
-    qemu_log_mask(CPU_LOG_TB_IN_ASM, "\n");
-#endif
     if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
 	qemu_log("IN:\n");	/* , lookup_symbol(pc_start)); */
 	log_target_disas(pc_start, ctx.pc - pc_start, 0);
diff --git a/target-sparc/Makefile.objs b/target-sparc/Makefile.objs
index a93e07d..9fc42ea 100644
--- a/target-sparc/Makefile.objs
+++ b/target-sparc/Makefile.objs
@@ -4,5 +4,3 @@
 obj-$(TARGET_SPARC) += int32_helper.o
 obj-$(TARGET_SPARC64) += int64_helper.o
 obj-$(TARGET_SPARC64) += vis_helper.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-sparc/cpu.c b/target-sparc/cpu.c
index f7c004c..882d306 100644
--- a/target-sparc/cpu.c
+++ b/target-sparc/cpu.c
@@ -643,7 +643,7 @@
 {
     unsigned int i;
     const sparc_def_t *def = NULL;
-    char *s = strdup(cpu_model);
+    char *s = g_strdup(cpu_model);
     char *featurestr, *name = strtok(s, ",");
     uint32_t plus_features = 0;
     uint32_t minus_features = 0;
@@ -735,7 +735,7 @@
 #ifdef DEBUG_FEATURES
     print_features(stderr, fprintf, cpu_def->features, NULL);
 #endif
-    free(s);
+    g_free(s);
     return 0;
 
  error:
@@ -792,7 +792,6 @@
 
     cpu_fprintf(f, "pc: " TARGET_FMT_lx "  npc: " TARGET_FMT_lx "\n", env->pc,
                 env->npc);
-    cpu_fprintf(f, "General Registers:\n");
 
     for (i = 0; i < 8; i++) {
         if (i % REGS_PER_LINE == 0) {
@@ -803,7 +802,6 @@
             cpu_fprintf(f, "\n");
         }
     }
-    cpu_fprintf(f, "\nCurrent Register Window:\n");
     for (x = 0; x < 3; x++) {
         for (i = 0; i < 8; i++) {
             if (i % REGS_PER_LINE == 0) {
@@ -817,10 +815,10 @@
             }
         }
     }
-    cpu_fprintf(f, "\nFloating Point Registers:\n");
+
     for (i = 0; i < TARGET_DPREGS; i++) {
         if ((i & 3) == 0) {
-            cpu_fprintf(f, "%%f%02d:", i * 2);
+            cpu_fprintf(f, "%%f%02d: ", i * 2);
         }
         cpu_fprintf(f, " %016" PRIx64, env->fpr[i].ll);
         if ((i & 3) == 3) {
@@ -850,6 +848,7 @@
     cpu_fprintf(f, "fsr: " TARGET_FMT_lx " y: " TARGET_FMT_lx "\n",
                 env->fsr, env->y);
 #endif
+    cpu_fprintf(f, "\n");
 }
 
 static void sparc_cpu_initfn(Object *obj)
diff --git a/target-sparc/cpu.h b/target-sparc/cpu.h
index e16b7b3..214d01d 100644
--- a/target-sparc/cpu.h
+++ b/target-sparc/cpu.h
@@ -714,6 +714,7 @@
 void QEMU_NORETURN do_unaligned_access(CPUSPARCState *env, target_ulong addr,
                                        int is_write, int is_user,
                                        uintptr_t retaddr);
+void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr);
 
 #define TB_FLAG_FPU_ENABLED (1 << 4)
 #define TB_FLAG_AM_ENABLED (1 << 5)
diff --git a/target-sparc/helper.c b/target-sparc/helper.c
index 65e1740..556ac28 100644
--- a/target-sparc/helper.c
+++ b/target-sparc/helper.c
@@ -75,6 +75,7 @@
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
+        cpu_restore_state2(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     }
 
@@ -113,6 +114,7 @@
     x1 = (b & 0xffffffff);
 
     if (x1 == 0) {
+        cpu_restore_state2(env, GETPC());
         helper_raise_exception(env, TT_DIV_ZERO);
     }
 
@@ -139,3 +141,87 @@
 {
     return helper_sdiv_common(env, a, b, 1);
 }
+
+#ifdef TARGET_SPARC64
+int64_t helper_sdivx(CPUSPARCState *env, int64_t a, int64_t b)
+{
+    if (b == 0) {
+        /* Raise divide by zero trap.  */
+        cpu_restore_state2(env, GETPC());
+        helper_raise_exception(env, TT_DIV_ZERO);
+    } else if (b == -1) {
+        /* Avoid overflow trap with i386 divide insn.  */
+        return -a;
+    } else {
+        return a / b;
+    }
+}
+
+uint64_t helper_udivx(CPUSPARCState *env, uint64_t a, uint64_t b)
+{
+    if (b == 0) {
+        /* Raise divide by zero trap.  */
+        cpu_restore_state2(env, GETPC());
+        helper_raise_exception(env, TT_DIV_ZERO);
+    }
+    return a / b;
+}
+#endif
+
+target_ulong helper_taddcctv(CPUSPARCState *env, target_ulong src1,
+                             target_ulong src2)
+{
+    target_ulong dst;
+
+    /* Tag overflow occurs if either input has bits 0 or 1 set.  */
+    if ((src1 | src2) & 3) {
+        goto tag_overflow;
+    }
+
+    dst = src1 + src2;
+
+    /* Tag overflow occurs if the addition overflows.  */
+    if (~(src1 ^ src2) & (src1 ^ dst) & (1u << 31)) {
+        goto tag_overflow;
+    }
+
+    /* Only modify the CC after any exceptions have been generated.  */
+    env->cc_op = CC_OP_TADDTV;
+    env->cc_src = src1;
+    env->cc_src2 = src2;
+    env->cc_dst = dst;
+    return dst;
+
+ tag_overflow:
+    cpu_restore_state2(env, GETPC());
+    helper_raise_exception(env, TT_TOVF);
+}
+
+target_ulong helper_tsubcctv(CPUSPARCState *env, target_ulong src1,
+                             target_ulong src2)
+{
+    target_ulong dst;
+
+    /* Tag overflow occurs if either input has bits 0 or 1 set.  */
+    if ((src1 | src2) & 3) {
+        goto tag_overflow;
+    }
+
+    dst = src1 - src2;
+
+    /* Tag overflow occurs if the subtraction overflows.  */
+    if ((src1 ^ src2) & (src1 ^ dst) & (1u << 31)) {
+        goto tag_overflow;
+    }
+
+    /* Only modify the CC after any exceptions have been generated.  */
+    env->cc_op = CC_OP_TSUBTV;
+    env->cc_src = src1;
+    env->cc_src2 = src2;
+    env->cc_dst = dst;
+    return dst;
+
+ tag_overflow:
+    cpu_restore_state2(env, GETPC());
+    helper_raise_exception(env, TT_TOVF);
+}
diff --git a/target-sparc/helper.h b/target-sparc/helper.h
index e3c7fdd..e1ae3c7 100644
--- a/target-sparc/helper.h
+++ b/target-sparc/helper.h
@@ -38,6 +38,12 @@
 DEF_HELPER_3(udiv_cc, tl, env, tl, tl)
 DEF_HELPER_3(sdiv, tl, env, tl, tl)
 DEF_HELPER_3(sdiv_cc, tl, env, tl, tl)
+DEF_HELPER_3(taddcctv, tl, env, tl, tl)
+DEF_HELPER_3(tsubcctv, tl, env, tl, tl)
+#ifdef TARGET_SPARC64
+DEF_HELPER_3(sdivx, s64, env, s64, s64)
+DEF_HELPER_3(udivx, i64, env, i64, i64)
+#endif
 DEF_HELPER_3(ldqf, void, env, tl, int)
 DEF_HELPER_3(stqf, void, env, tl, int)
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
diff --git a/target-sparc/int32_helper.c b/target-sparc/int32_helper.c
index 5e33d50..9ac5aac 100644
--- a/target-sparc/int32_helper.c
+++ b/target-sparc/int32_helper.c
@@ -21,7 +21,7 @@
 #include "trace.h"
 #include "sysemu.h"
 
-//#define DEBUG_PCALL
+#define DEBUG_PCALL
 
 #ifdef DEBUG_PCALL
 static const char * const excp_names[0x80] = {
@@ -78,10 +78,7 @@
             }
         }
 
-        qemu_log("%6d: %s (v=%02x) pc=%08x npc=%08x SP=%08x\n",
-                count, name, intno,
-                env->pc,
-                env->npc, env->regwptr[6]);
+        qemu_log("%6d: %s (v=%02x)\n", count, name, intno);
         log_cpu_state(env, 0);
 #if 0
         {
diff --git a/target-sparc/int64_helper.c b/target-sparc/int64_helper.c
index 5e3eff7..5d0bc6c 100644
--- a/target-sparc/int64_helper.c
+++ b/target-sparc/int64_helper.c
@@ -21,7 +21,7 @@
 #include "helper.h"
 #include "trace.h"
 
-//#define DEBUG_PCALL
+#define DEBUG_PCALL
 
 #ifdef DEBUG_PCALL
 static const char * const excp_names[0x80] = {
@@ -84,11 +84,7 @@
             }
         }
 
-        qemu_log("%6d: %s (v=%04x) pc=%016" PRIx64 " npc=%016" PRIx64
-                " SP=%016" PRIx64 "\n",
-                count, name, intno,
-                env->pc,
-                env->npc, env->regwptr[6]);
+        qemu_log("%6d: %s (v=%04x)\n", count, name, intno);
         log_cpu_state(env, 0);
 #if 0
         {
diff --git a/target-sparc/ldst_helper.c b/target-sparc/ldst_helper.c
index 9bec7a9..2ca9a5c 100644
--- a/target-sparc/ldst_helper.c
+++ b/target-sparc/ldst_helper.c
@@ -2390,9 +2390,8 @@
 #endif
 #endif
 
-#if !defined(CONFIG_USER_ONLY)
 /* XXX: make it generic ? */
-static void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr)
+void cpu_restore_state2(CPUSPARCState *env, uintptr_t retaddr)
 {
     TranslationBlock *tb;
 
@@ -2407,6 +2406,7 @@
     }
 }
 
+#if !defined(CONFIG_USER_ONLY)
 void do_unaligned_access(CPUSPARCState *env, target_ulong addr, int is_write,
                          int is_user, uintptr_t retaddr)
 {
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index b95f91c..472eb51 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -86,6 +86,13 @@
     int n_t32;
 } DisasContext;
 
+typedef struct {
+    TCGCond cond;
+    bool is_bool;
+    bool g1, g2;
+    TCGv c1, c2;
+} DisasCompare;
+
 // This function uses non-native bit order
 #define GET_FIELD(X, FROM, TO)                                  \
     ((X) >> (31 - (TO)) & ((1 << ((TO) - (FROM) + 1)) - 1))
@@ -329,43 +336,6 @@
     tcg_gen_andi_tl(reg, reg, 0x1);
 }
 
-static inline void gen_add_tv(TCGv dst, TCGv src1, TCGv src2)
-{
-    TCGv r_temp;
-    TCGv_i32 r_const;
-    int l1;
-
-    l1 = gen_new_label();
-
-    r_temp = tcg_temp_new();
-    tcg_gen_xor_tl(r_temp, src1, src2);
-    tcg_gen_not_tl(r_temp, r_temp);
-    tcg_gen_xor_tl(cpu_tmp0, src1, dst);
-    tcg_gen_and_tl(r_temp, r_temp, cpu_tmp0);
-    tcg_gen_andi_tl(r_temp, r_temp, (1ULL << 31));
-    tcg_gen_brcondi_tl(TCG_COND_EQ, r_temp, 0, l1);
-    r_const = tcg_const_i32(TT_TOVF);
-    gen_helper_raise_exception(cpu_env, r_const);
-    tcg_temp_free_i32(r_const);
-    gen_set_label(l1);
-    tcg_temp_free(r_temp);
-}
-
-static inline void gen_tag_tv(TCGv src1, TCGv src2)
-{
-    int l1;
-    TCGv_i32 r_const;
-
-    l1 = gen_new_label();
-    tcg_gen_or_tl(cpu_tmp0, src1, src2);
-    tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0x3);
-    tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_tmp0, 0, l1);
-    r_const = tcg_const_i32(TT_TOVF);
-    gen_helper_raise_exception(cpu_env, r_const);
-    tcg_temp_free_i32(r_const);
-    gen_set_label(l1);
-}
-
 static inline void gen_op_addi_cc(TCGv dst, TCGv src1, target_long src2)
 {
     tcg_gen_mov_tl(cpu_cc_src, src1);
@@ -510,45 +480,6 @@
     }
 }
 
-static inline void gen_op_tadd_cc(TCGv dst, TCGv src1, TCGv src2)
-{
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
-}
-
-static inline void gen_op_tadd_ccTV(TCGv dst, TCGv src1, TCGv src2)
-{
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    gen_tag_tv(cpu_cc_src, cpu_cc_src2);
-    tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    gen_add_tv(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
-}
-
-static inline void gen_sub_tv(TCGv dst, TCGv src1, TCGv src2)
-{
-    TCGv r_temp;
-    TCGv_i32 r_const;
-    int l1;
-
-    l1 = gen_new_label();
-
-    r_temp = tcg_temp_new();
-    tcg_gen_xor_tl(r_temp, src1, src2);
-    tcg_gen_xor_tl(cpu_tmp0, src1, dst);
-    tcg_gen_and_tl(r_temp, r_temp, cpu_tmp0);
-    tcg_gen_andi_tl(r_temp, r_temp, (1ULL << 31));
-    tcg_gen_brcondi_tl(TCG_COND_EQ, r_temp, 0, l1);
-    r_const = tcg_const_i32(TT_TOVF);
-    gen_helper_raise_exception(cpu_env, r_const);
-    tcg_temp_free_i32(r_const);
-    gen_set_label(l1);
-    tcg_temp_free(r_temp);
-}
-
 static inline void gen_op_subi_cc(TCGv dst, TCGv src1, target_long src2, DisasContext *dc)
 {
     tcg_gen_mov_tl(cpu_cc_src, src1);
@@ -649,42 +580,23 @@
     }
 }
 
-static inline void gen_op_tsub_cc(TCGv dst, TCGv src1, TCGv src2)
-{
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
-}
-
-static inline void gen_op_tsub_ccTV(TCGv dst, TCGv src1, TCGv src2)
-{
-    tcg_gen_mov_tl(cpu_cc_src, src1);
-    tcg_gen_mov_tl(cpu_cc_src2, src2);
-    gen_tag_tv(cpu_cc_src, cpu_cc_src2);
-    tcg_gen_sub_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    gen_sub_tv(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
-    tcg_gen_mov_tl(dst, cpu_cc_dst);
-}
-
 static inline void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
 {
-    TCGv r_temp;
-    int l1;
+    TCGv r_temp, zero;
 
-    l1 = gen_new_label();
     r_temp = tcg_temp_new();
 
     /* old op:
     if (!(env->y & 1))
         T1 = 0;
     */
+    zero = tcg_const_tl(0);
     tcg_gen_andi_tl(cpu_cc_src, src1, 0xffffffff);
     tcg_gen_andi_tl(r_temp, cpu_y, 0x1);
     tcg_gen_andi_tl(cpu_cc_src2, src2, 0xffffffff);
-    tcg_gen_brcondi_tl(TCG_COND_NE, r_temp, 0, l1);
-    tcg_gen_movi_tl(cpu_cc_src2, 0);
-    gen_set_label(l1);
+    tcg_gen_movcond_tl(TCG_COND_EQ, cpu_cc_src2, r_temp, zero,
+                       zero, cpu_cc_src2);
+    tcg_temp_free(zero);
 
     // b2 = T0 & 1;
     // env->y = (b2 << 31) | (env->y >> 1);
@@ -761,44 +673,6 @@
     gen_op_multiply(dst, src1, src2, 1);
 }
 
-#ifdef TARGET_SPARC64
-static inline void gen_trap_ifdivzero_tl(TCGv divisor)
-{
-    TCGv_i32 r_const;
-    int l1;
-
-    l1 = gen_new_label();
-    tcg_gen_brcondi_tl(TCG_COND_NE, divisor, 0, l1);
-    r_const = tcg_const_i32(TT_DIV_ZERO);
-    gen_helper_raise_exception(cpu_env, r_const);
-    tcg_temp_free_i32(r_const);
-    gen_set_label(l1);
-}
-
-static inline void gen_op_sdivx(TCGv dst, TCGv src1, TCGv src2)
-{
-    int l1, l2;
-    TCGv r_temp1, r_temp2;
-
-    l1 = gen_new_label();
-    l2 = gen_new_label();
-    r_temp1 = tcg_temp_local_new();
-    r_temp2 = tcg_temp_local_new();
-    tcg_gen_mov_tl(r_temp1, src1);
-    tcg_gen_mov_tl(r_temp2, src2);
-    gen_trap_ifdivzero_tl(r_temp2);
-    tcg_gen_brcondi_tl(TCG_COND_NE, r_temp1, INT64_MIN, l1);
-    tcg_gen_brcondi_tl(TCG_COND_NE, r_temp2, -1, l1);
-    tcg_gen_movi_i64(dst, INT64_MIN);
-    tcg_gen_br(l2);
-    gen_set_label(l1);
-    tcg_gen_div_i64(dst, r_temp1, r_temp2);
-    gen_set_label(l2);
-    tcg_temp_free(r_temp1);
-    tcg_temp_free(r_temp2);
-}
-#endif
-
 // 1
 static inline void gen_op_eval_ba(TCGv dst)
 {
@@ -1098,45 +972,40 @@
     gen_goto_tb(dc, 1, pc2 + 4, pc2 + 8);
 }
 
-static inline void gen_generic_branch(target_ulong npc1, target_ulong npc2,
-                                      TCGv r_cond)
+static inline void gen_generic_branch(DisasContext *dc)
 {
-    int l1, l2;
+    TCGv npc0 = tcg_const_tl(dc->jump_pc[0]);
+    TCGv npc1 = tcg_const_tl(dc->jump_pc[1]);
+    TCGv zero = tcg_const_tl(0);
 
-    l1 = gen_new_label();
-    l2 = gen_new_label();
+    tcg_gen_movcond_tl(TCG_COND_NE, cpu_npc, cpu_cond, zero, npc0, npc1);
 
-    tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond, 0, l1);
-
-    tcg_gen_movi_tl(cpu_npc, npc1);
-    tcg_gen_br(l2);
-
-    gen_set_label(l1);
-    tcg_gen_movi_tl(cpu_npc, npc2);
-    gen_set_label(l2);
+    tcg_temp_free(npc0);
+    tcg_temp_free(npc1);
+    tcg_temp_free(zero);
 }
 
 /* call this function before using the condition register as it may
    have been set for a jump */
-static inline void flush_cond(DisasContext *dc, TCGv cond)
+static inline void flush_cond(DisasContext *dc)
 {
     if (dc->npc == JUMP_PC) {
-        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cond);
+        gen_generic_branch(dc);
         dc->npc = DYNAMIC_PC;
     }
 }
 
-static inline void save_npc(DisasContext *dc, TCGv cond)
+static inline void save_npc(DisasContext *dc)
 {
     if (dc->npc == JUMP_PC) {
-        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cond);
+        gen_generic_branch(dc);
         dc->npc = DYNAMIC_PC;
     } else if (dc->npc != DYNAMIC_PC) {
         tcg_gen_movi_tl(cpu_npc, dc->npc);
     }
 }
 
-static inline void save_state(DisasContext *dc, TCGv cond)
+static inline void save_state(DisasContext *dc)
 {
     tcg_gen_movi_tl(cpu_pc, dc->pc);
     /* flush pending conditional evaluations before exposing cpu state */
@@ -1144,13 +1013,13 @@
         dc->cc_op = CC_OP_FLAGS;
         gen_helper_compute_psr(cpu_env);
     }
-    save_npc(dc, cond);
+    save_npc(dc);
 }
 
-static inline void gen_mov_pc_npc(DisasContext *dc, TCGv cond)
+static inline void gen_mov_pc_npc(DisasContext *dc)
 {
     if (dc->npc == JUMP_PC) {
-        gen_generic_branch(dc->jump_pc[0], dc->jump_pc[1], cond);
+        gen_generic_branch(dc);
         tcg_gen_mov_tl(cpu_pc, cpu_npc);
         dc->pc = DYNAMIC_PC;
     } else if (dc->npc == DYNAMIC_PC) {
@@ -1167,82 +1036,178 @@
     tcg_gen_addi_tl(cpu_npc, cpu_npc, 4);
 }
 
-static inline void gen_cond(TCGv r_dst, unsigned int cc, unsigned int cond,
-                            DisasContext *dc)
+static void free_compare(DisasCompare *cmp)
 {
+    if (!cmp->g1) {
+        tcg_temp_free(cmp->c1);
+    }
+    if (!cmp->g2) {
+        tcg_temp_free(cmp->c2);
+    }
+}
+
+static void gen_compare(DisasCompare *cmp, bool xcc, unsigned int cond,
+                        DisasContext *dc)
+{
+    static int subcc_cond[16] = {
+        -1, /* never */
+        TCG_COND_EQ,
+        TCG_COND_LE,
+        TCG_COND_LT,
+        TCG_COND_LEU,
+        TCG_COND_LTU,
+        -1, /* neg */
+        -1, /* overflow */
+        -1, /* always */
+        TCG_COND_NE,
+        TCG_COND_GT,
+        TCG_COND_GE,
+        TCG_COND_GTU,
+        TCG_COND_GEU,
+        -1, /* pos */
+        -1, /* no overflow */
+    };
+
     TCGv_i32 r_src;
+    TCGv r_dst;
 
 #ifdef TARGET_SPARC64
-    if (cc)
+    if (xcc) {
         r_src = cpu_xcc;
-    else
+    } else {
         r_src = cpu_psr;
+    }
 #else
     r_src = cpu_psr;
 #endif
+
     switch (dc->cc_op) {
-    case CC_OP_FLAGS:
+    case CC_OP_SUB:
+        switch (cond) {
+        case 6:  /* neg */
+        case 14: /* pos */
+            cmp->cond = (cond == 6 ? TCG_COND_LT : TCG_COND_GE);
+            cmp->is_bool = false;
+            cmp->g2 = false;
+            cmp->c2 = tcg_const_tl(0);
+#ifdef TARGET_SPARC64
+            if (!xcc) {
+                cmp->g1 = false;
+                cmp->c1 = tcg_temp_new();
+                tcg_gen_ext32s_tl(cmp->c1, cpu_cc_dst);
+                break;
+            }
+#endif
+            cmp->g1 = true;
+            cmp->c1 = cpu_cc_dst;
+            break;
+
+        case 0: /* never */
+        case 8: /* always */
+        case 7: /* overflow */
+        case 15: /* !overflow */
+            goto do_dynamic;
+
+        default:
+            cmp->cond = subcc_cond[cond];
+            cmp->is_bool = false;
+#ifdef TARGET_SPARC64
+            if (!xcc) {
+                /* Note that sign-extension works for unsigned compares as
+                   long as both operands are sign-extended.  */
+                cmp->g1 = cmp->g2 = false;
+                cmp->c1 = tcg_temp_new();
+                cmp->c2 = tcg_temp_new();
+                tcg_gen_ext32s_tl(cmp->c1, cpu_cc_src);
+                tcg_gen_ext32s_tl(cmp->c2, cpu_cc_src2);
+            }
+#endif
+            cmp->g1 = cmp->g2 = true;
+            cmp->c1 = cpu_cc_src;
+            cmp->c2 = cpu_cc_src2;
+            break;
+        }
         break;
+
     default:
+    do_dynamic:
         gen_helper_compute_psr(cpu_env);
         dc->cc_op = CC_OP_FLAGS;
-        break;
-    }
-    switch (cond) {
-    case 0x0:
-        gen_op_eval_bn(r_dst);
-        break;
-    case 0x1:
-        gen_op_eval_be(r_dst, r_src);
-        break;
-    case 0x2:
-        gen_op_eval_ble(r_dst, r_src);
-        break;
-    case 0x3:
-        gen_op_eval_bl(r_dst, r_src);
-        break;
-    case 0x4:
-        gen_op_eval_bleu(r_dst, r_src);
-        break;
-    case 0x5:
-        gen_op_eval_bcs(r_dst, r_src);
-        break;
-    case 0x6:
-        gen_op_eval_bneg(r_dst, r_src);
-        break;
-    case 0x7:
-        gen_op_eval_bvs(r_dst, r_src);
-        break;
-    case 0x8:
-        gen_op_eval_ba(r_dst);
-        break;
-    case 0x9:
-        gen_op_eval_bne(r_dst, r_src);
-        break;
-    case 0xa:
-        gen_op_eval_bg(r_dst, r_src);
-        break;
-    case 0xb:
-        gen_op_eval_bge(r_dst, r_src);
-        break;
-    case 0xc:
-        gen_op_eval_bgu(r_dst, r_src);
-        break;
-    case 0xd:
-        gen_op_eval_bcc(r_dst, r_src);
-        break;
-    case 0xe:
-        gen_op_eval_bpos(r_dst, r_src);
-        break;
-    case 0xf:
-        gen_op_eval_bvc(r_dst, r_src);
+        /* FALLTHRU */
+
+    case CC_OP_FLAGS:
+        /* We're going to generate a boolean result.  */
+        cmp->cond = TCG_COND_NE;
+        cmp->is_bool = true;
+        cmp->g1 = cmp->g2 = false;
+        cmp->c1 = r_dst = tcg_temp_new();
+        cmp->c2 = tcg_const_tl(0);
+
+        switch (cond) {
+        case 0x0:
+            gen_op_eval_bn(r_dst);
+            break;
+        case 0x1:
+            gen_op_eval_be(r_dst, r_src);
+            break;
+        case 0x2:
+            gen_op_eval_ble(r_dst, r_src);
+            break;
+        case 0x3:
+            gen_op_eval_bl(r_dst, r_src);
+            break;
+        case 0x4:
+            gen_op_eval_bleu(r_dst, r_src);
+            break;
+        case 0x5:
+            gen_op_eval_bcs(r_dst, r_src);
+            break;
+        case 0x6:
+            gen_op_eval_bneg(r_dst, r_src);
+            break;
+        case 0x7:
+            gen_op_eval_bvs(r_dst, r_src);
+            break;
+        case 0x8:
+            gen_op_eval_ba(r_dst);
+            break;
+        case 0x9:
+            gen_op_eval_bne(r_dst, r_src);
+            break;
+        case 0xa:
+            gen_op_eval_bg(r_dst, r_src);
+            break;
+        case 0xb:
+            gen_op_eval_bge(r_dst, r_src);
+            break;
+        case 0xc:
+            gen_op_eval_bgu(r_dst, r_src);
+            break;
+        case 0xd:
+            gen_op_eval_bcc(r_dst, r_src);
+            break;
+        case 0xe:
+            gen_op_eval_bpos(r_dst, r_src);
+            break;
+        case 0xf:
+            gen_op_eval_bvc(r_dst, r_src);
+            break;
+        }
         break;
     }
 }
 
-static inline void gen_fcond(TCGv r_dst, unsigned int cc, unsigned int cond)
+static void gen_fcompare(DisasCompare *cmp, unsigned int cc, unsigned int cond)
 {
     unsigned int offset;
+    TCGv r_dst;
+
+    /* For now we still generate a straight boolean result.  */
+    cmp->cond = TCG_COND_NE;
+    cmp->is_bool = true;
+    cmp->g1 = cmp->g2 = false;
+    cmp->c1 = r_dst = tcg_temp_new();
+    cmp->c2 = tcg_const_tl(0);
 
     switch (cc) {
     default:
@@ -1312,6 +1277,37 @@
     }
 }
 
+static void gen_cond(TCGv r_dst, unsigned int cc, unsigned int cond,
+                     DisasContext *dc)
+{
+    DisasCompare cmp;
+    gen_compare(&cmp, cc, cond, dc);
+
+    /* The interface is to return a boolean in r_dst.  */
+    if (cmp.is_bool) {
+        tcg_gen_mov_tl(r_dst, cmp.c1);
+    } else {
+        tcg_gen_setcond_tl(cmp.cond, r_dst, cmp.c1, cmp.c2);
+    }
+
+    free_compare(&cmp);
+}
+
+static void gen_fcond(TCGv r_dst, unsigned int cc, unsigned int cond)
+{
+    DisasCompare cmp;
+    gen_fcompare(&cmp, cc, cond);
+
+    /* The interface is to return a boolean in r_dst.  */
+    if (cmp.is_bool) {
+        tcg_gen_mov_tl(r_dst, cmp.c1);
+    } else {
+        tcg_gen_setcond_tl(cmp.cond, r_dst, cmp.c1, cmp.c2);
+    }
+
+    free_compare(&cmp);
+}
+
 #ifdef TARGET_SPARC64
 // Inverted logic
 static const int gen_tcg_cond_reg[8] = {
@@ -1325,20 +1321,29 @@
     TCG_COND_LT,
 };
 
+static void gen_compare_reg(DisasCompare *cmp, int cond, TCGv r_src)
+{
+    cmp->cond = tcg_invert_cond(gen_tcg_cond_reg[cond]);
+    cmp->is_bool = false;
+    cmp->g1 = true;
+    cmp->g2 = false;
+    cmp->c1 = r_src;
+    cmp->c2 = tcg_const_tl(0);
+}
+
 static inline void gen_cond_reg(TCGv r_dst, int cond, TCGv r_src)
 {
-    int l1;
+    DisasCompare cmp;
+    gen_compare_reg(&cmp, cond, r_src);
 
-    l1 = gen_new_label();
-    tcg_gen_movi_tl(r_dst, 0);
-    tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], r_src, 0, l1);
-    tcg_gen_movi_tl(r_dst, 1);
-    gen_set_label(l1);
+    /* The interface is to return a boolean in r_dst.  */
+    tcg_gen_setcond_tl(cmp.cond, r_dst, cmp.c1, cmp.c2);
+
+    free_compare(&cmp);
 }
 #endif
 
-static void do_branch(DisasContext *dc, int32_t offset, uint32_t insn, int cc,
-                      TCGv r_cond)
+static void do_branch(DisasContext *dc, int32_t offset, uint32_t insn, int cc)
 {
     unsigned int cond = GET_FIELD(insn, 3, 6), a = (insn & (1 << 29));
     target_ulong target = dc->pc + offset;
@@ -1368,10 +1373,10 @@
             tcg_gen_mov_tl(cpu_pc, cpu_npc);
         }
     } else {
-        flush_cond(dc, r_cond);
-        gen_cond(r_cond, cc, cond, dc);
+        flush_cond(dc);
+        gen_cond(cpu_cond, cc, cond, dc);
         if (a) {
-            gen_branch_a(dc, target, dc->npc, r_cond);
+            gen_branch_a(dc, target, dc->npc, cpu_cond);
             dc->is_br = 1;
         } else {
             dc->pc = dc->npc;
@@ -1387,8 +1392,7 @@
     }
 }
 
-static void do_fbranch(DisasContext *dc, int32_t offset, uint32_t insn, int cc,
-                      TCGv r_cond)
+static void do_fbranch(DisasContext *dc, int32_t offset, uint32_t insn, int cc)
 {
     unsigned int cond = GET_FIELD(insn, 3, 6), a = (insn & (1 << 29));
     target_ulong target = dc->pc + offset;
@@ -1418,10 +1422,10 @@
             tcg_gen_mov_tl(cpu_pc, cpu_npc);
         }
     } else {
-        flush_cond(dc, r_cond);
-        gen_fcond(r_cond, cc, cond);
+        flush_cond(dc);
+        gen_fcond(cpu_cond, cc, cond);
         if (a) {
-            gen_branch_a(dc, target, dc->npc, r_cond);
+            gen_branch_a(dc, target, dc->npc, cpu_cond);
             dc->is_br = 1;
         } else {
             dc->pc = dc->npc;
@@ -1439,7 +1443,7 @@
 
 #ifdef TARGET_SPARC64
 static void do_branch_reg(DisasContext *dc, int32_t offset, uint32_t insn,
-                          TCGv r_cond, TCGv r_reg)
+                          TCGv r_reg)
 {
     unsigned int cond = GET_FIELD_SP(insn, 25, 27), a = (insn & (1 << 29));
     target_ulong target = dc->pc + offset;
@@ -1447,10 +1451,10 @@
     if (unlikely(AM_CHECK(dc))) {
         target &= 0xffffffffULL;
     }
-    flush_cond(dc, r_cond);
-    gen_cond_reg(r_cond, cond, r_reg);
+    flush_cond(dc);
+    gen_cond_reg(cpu_cond, cond, r_reg);
     if (a) {
-        gen_branch_a(dc, target, dc->npc, r_cond);
+        gen_branch_a(dc, target, dc->npc, cpu_cond);
         dc->is_br = 1;
     } else {
         dc->pc = dc->npc;
@@ -1617,13 +1621,13 @@
     tcg_temp_free_i32(r_const);
 }
 
-static int gen_trap_ifnofpu(DisasContext *dc, TCGv r_cond)
+static int gen_trap_ifnofpu(DisasContext *dc)
 {
 #if !defined(CONFIG_USER_ONLY)
     if (!dc->fpu_enabled) {
         TCGv_i32 r_const;
 
-        save_state(dc, r_cond);
+        save_state(dc);
         r_const = tcg_const_i32(TT_NFPU_INSN);
         gen_helper_raise_exception(cpu_env, r_const);
         tcg_temp_free_i32(r_const);
@@ -2210,6 +2214,57 @@
 }
 
 #ifdef TARGET_SPARC64
+static void gen_fmovs(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
+{
+    TCGv_i32 c32, zero, dst, s1, s2;
+
+    /* We have two choices here: extend the 32 bit data and use movcond_i64,
+       or fold the comparison down to 32 bits and use movcond_i32.  Choose
+       the later.  */
+    c32 = tcg_temp_new_i32();
+    if (cmp->is_bool) {
+        tcg_gen_trunc_i64_i32(c32, cmp->c1);
+    } else {
+        TCGv_i64 c64 = tcg_temp_new_i64();
+        tcg_gen_setcond_i64(cmp->cond, c64, cmp->c1, cmp->c2);
+        tcg_gen_trunc_i64_i32(c32, c64);
+        tcg_temp_free_i64(c64);
+    }
+
+    s1 = gen_load_fpr_F(dc, rs);
+    s2 = gen_load_fpr_F(dc, rd);
+    dst = gen_dest_fpr_F();
+    zero = tcg_const_i32(0);
+
+    tcg_gen_movcond_i32(TCG_COND_NE, dst, c32, zero, s1, s2);
+
+    tcg_temp_free_i32(c32);
+    tcg_temp_free_i32(zero);
+    gen_store_fpr_F(dc, rd, dst);
+}
+
+static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
+{
+    TCGv_i64 dst = gen_dest_fpr_D();
+    tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, cmp->c2,
+                        gen_load_fpr_D(dc, rs),
+                        gen_load_fpr_D(dc, rd));
+    gen_store_fpr_D(dc, rd, dst);
+}
+
+static void gen_fmovq(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
+{
+    int qd = QFPREG(rd);
+    int qs = QFPREG(rs);
+
+    tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2], cmp->c1, cmp->c2,
+                        cpu_fpr[qs / 2], cpu_fpr[qd / 2]);
+    tcg_gen_movcond_i64(cmp->cond, cpu_fpr[qd / 2 + 1], cmp->c1, cmp->c2,
+                        cpu_fpr[qs / 2 + 1], cpu_fpr[qd / 2 + 1]);
+
+    gen_update_fprs_dirty(qd);
+}
+
 static inline void gen_load_trap_state_at_tl(TCGv_ptr r_tsptr, TCGv_ptr cpu_env)
 {
     TCGv_i32 r_tl = tcg_temp_new_i32();
@@ -2394,8 +2449,9 @@
     TCGv_i64 cpu_src1_64, cpu_src2_64, cpu_dst_64;
     target_long simm;
 
-    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
         tcg_gen_debug_insn_start(dc->pc);
+    }
 
     opc = GET_FIELD(insn, 0, 1);
 
@@ -2420,9 +2476,9 @@
                     target <<= 2;
                     cc = GET_FIELD_SP(insn, 20, 21);
                     if (cc == 0)
-                        do_branch(dc, target, insn, 0, cpu_cond);
+                        do_branch(dc, target, insn, 0);
                     else if (cc == 2)
-                        do_branch(dc, target, insn, 1, cpu_cond);
+                        do_branch(dc, target, insn, 1);
                     else
                         goto illegal_insn;
                     goto jmp_insn;
@@ -2434,18 +2490,19 @@
                     target = sign_extend(target, 16);
                     target <<= 2;
                     cpu_src1 = get_src1(insn, cpu_src1);
-                    do_branch_reg(dc, target, insn, cpu_cond, cpu_src1);
+                    do_branch_reg(dc, target, insn, cpu_src1);
                     goto jmp_insn;
                 }
             case 0x5:           /* V9 FBPcc */
                 {
                     int cc = GET_FIELD_SP(insn, 20, 21);
-                    if (gen_trap_ifnofpu(dc, cpu_cond))
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
+                    }
                     target = GET_FIELD_SP(insn, 0, 18);
                     target = sign_extend(target, 19);
                     target <<= 2;
-                    do_fbranch(dc, target, insn, cc, cpu_cond);
+                    do_fbranch(dc, target, insn, cc);
                     goto jmp_insn;
                 }
 #else
@@ -2459,17 +2516,18 @@
                     target = GET_FIELD(insn, 10, 31);
                     target = sign_extend(target, 22);
                     target <<= 2;
-                    do_branch(dc, target, insn, 0, cpu_cond);
+                    do_branch(dc, target, insn, 0);
                     goto jmp_insn;
                 }
             case 0x6:           /* FBN+x */
                 {
-                    if (gen_trap_ifnofpu(dc, cpu_cond))
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
+                    }
                     target = GET_FIELD(insn, 10, 31);
                     target = sign_extend(target, 22);
                     target <<= 2;
-                    do_fbranch(dc, target, insn, 0, cpu_cond);
+                    do_fbranch(dc, target, insn, 0);
                     goto jmp_insn;
                 }
             case 0x4:           /* SETHI */
@@ -2498,7 +2556,7 @@
             gen_movl_TN_reg(15, r_const);
             tcg_temp_free(r_const);
             target += dc->pc;
-            gen_mov_pc_npc(dc, cpu_cond);
+            gen_mov_pc_npc(dc);
 #ifdef TARGET_SPARC64
             if (unlikely(AM_CHECK(dc))) {
                 target &= 0xffffffffULL;
@@ -2511,70 +2569,89 @@
         {
             unsigned int xop = GET_FIELD(insn, 7, 12);
             if (xop == 0x3a) {  /* generate trap */
-                int cond;
+                int cond = GET_FIELD(insn, 3, 6);
+                TCGv_i32 trap;
+                int l1 = -1, mask;
 
-                cpu_src1 = get_src1(insn, cpu_src1);
-                if (IS_IMM) {
-                    rs2 = GET_FIELD(insn, 25, 31);
-                    tcg_gen_addi_tl(cpu_dst, cpu_src1, rs2);
-                } else {
-                    rs2 = GET_FIELD(insn, 27, 31);
-                    if (rs2 != 0) {
-                        gen_movl_reg_TN(rs2, cpu_src2);
-                        tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
-                    } else
-                        tcg_gen_mov_tl(cpu_dst, cpu_src1);
+                if (cond == 0) {
+                    /* Trap never.  */
+                    break;
                 }
 
-                cond = GET_FIELD(insn, 3, 6);
-                if (cond == 0x8) { /* Trap Always */
-                    save_state(dc, cpu_cond);
-                    if ((dc->def->features & CPU_FEATURE_HYPV) &&
-                        supervisor(dc))
-                        tcg_gen_andi_tl(cpu_dst, cpu_dst, UA2005_HTRAP_MASK);
-                    else
-                        tcg_gen_andi_tl(cpu_dst, cpu_dst, V8_TRAP_MASK);
-                    tcg_gen_addi_tl(cpu_dst, cpu_dst, TT_TRAP);
-                    tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_dst);
-                    gen_helper_raise_exception(cpu_env, cpu_tmp32);
+                save_state(dc);
 
-                } else if (cond != 0) {
-                    TCGv r_cond = tcg_temp_new();
-                    int l1;
+                if (cond != 8) {
+                    /* Conditional trap.  */
+                    DisasCompare cmp;
 #ifdef TARGET_SPARC64
                     /* V9 icc/xcc */
                     int cc = GET_FIELD_SP(insn, 11, 12);
-
-                    save_state(dc, cpu_cond);
-                    if (cc == 0)
-                        gen_cond(r_cond, 0, cond, dc);
-                    else if (cc == 2)
-                        gen_cond(r_cond, 1, cond, dc);
-                    else
+                    if (cc == 0) {
+                        gen_compare(&cmp, 0, cond, dc);
+                    } else if (cc == 2) {
+                        gen_compare(&cmp, 1, cond, dc);
+                    } else {
                         goto illegal_insn;
+                    }
 #else
-                    save_state(dc, cpu_cond);
-                    gen_cond(r_cond, 0, cond, dc);
+                    gen_compare(&cmp, 0, cond, dc);
 #endif
                     l1 = gen_new_label();
-                    tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond, 0, l1);
-
-                    if ((dc->def->features & CPU_FEATURE_HYPV) &&
-                        supervisor(dc))
-                        tcg_gen_andi_tl(cpu_dst, cpu_dst, UA2005_HTRAP_MASK);
-                    else
-                        tcg_gen_andi_tl(cpu_dst, cpu_dst, V8_TRAP_MASK);
-                    tcg_gen_addi_tl(cpu_dst, cpu_dst, TT_TRAP);
-                    tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_dst);
-                    gen_helper_raise_exception(cpu_env, cpu_tmp32);
-
-                    gen_set_label(l1);
-                    tcg_temp_free(r_cond);
+                    tcg_gen_brcond_tl(tcg_invert_cond(cmp.cond),
+                                      cmp.c1, cmp.c2, l1);
+                    free_compare(&cmp);
                 }
-                gen_op_next_insn();
-                tcg_gen_exit_tb(0);
-                dc->is_br = 1;
-                goto jmp_insn;
+
+                mask = ((dc->def->features & CPU_FEATURE_HYPV) && supervisor(dc)
+                        ? UA2005_HTRAP_MASK : V8_TRAP_MASK);
+
+                /* Don't use the normal temporaries, as they may well have
+                   gone out of scope with the branch above.  While we're
+                   doing that we might as well pre-truncate to 32-bit.  */
+                trap = tcg_temp_new_i32();
+
+                rs1 = GET_FIELD_SP(insn, 14, 18);
+                if (IS_IMM) {
+                    rs2 = GET_FIELD_SP(insn, 0, 6);
+                    if (rs1 == 0) {
+                        tcg_gen_movi_i32(trap, (rs2 & mask) + TT_TRAP);
+                        /* Signal that the trap value is fully constant.  */
+                        mask = 0;
+                    } else {
+                        TCGv t1 = tcg_temp_new();
+                        gen_movl_reg_TN(rs1, t1);
+                        tcg_gen_trunc_tl_i32(trap, t1);
+                        tcg_temp_free(t1);
+                        tcg_gen_addi_i32(trap, trap, rs2);
+                    }
+                } else {
+                    TCGv t1 = tcg_temp_new();
+                    TCGv t2 = tcg_temp_new();
+                    rs2 = GET_FIELD_SP(insn, 0, 4);
+                    gen_movl_reg_TN(rs1, t1);
+                    gen_movl_reg_TN(rs2, t2);
+                    tcg_gen_add_tl(t1, t1, t2);
+                    tcg_gen_trunc_tl_i32(trap, t1);
+                    tcg_temp_free(t1);
+                    tcg_temp_free(t2);
+                }
+                if (mask != 0) {
+                    tcg_gen_andi_i32(trap, trap, mask);
+                    tcg_gen_addi_i32(trap, trap, TT_TRAP);
+                }
+
+                gen_helper_raise_exception(cpu_env, trap);
+                tcg_temp_free_i32(trap);
+
+                if (cond == 8) {
+                    /* An unconditional trap ends the TB.  */
+                    dc->is_br = 1;
+                    goto jmp_insn;
+                } else {
+                    /* A conditional trap falls through to the next insn.  */
+                    gen_set_label(l1);
+                    break;
+                }
             } else if (xop == 0x28) {
                 rs1 = GET_FIELD(insn, 13, 17);
                 switch(rs1) {
@@ -2644,8 +2721,9 @@
                 case 0xf: /* V9 membar */
                     break; /* no effect */
                 case 0x13: /* Graphics Status */
-                    if (gen_trap_ifnofpu(dc, cpu_cond))
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
+                    }
                     gen_movl_TN_reg(rd, cpu_gsr);
                     break;
                 case 0x16: /* Softint */
@@ -2852,7 +2930,7 @@
                 break;
             } else if (xop == 0x2b) { /* rdtbr / V9 flushw */
 #ifdef TARGET_SPARC64
-                save_state(dc, cpu_cond);
+                save_state(dc);
                 gen_helper_flushw(cpu_env);
 #else
                 if (!supervisor(dc))
@@ -2862,13 +2940,14 @@
                 break;
 #endif
             } else if (xop == 0x34) {   /* FPU Operations */
-                if (gen_trap_ifnofpu(dc, cpu_cond))
+                if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
+                }
                 gen_op_clear_ieee_excp_and_FTT();
                 rs1 = GET_FIELD(insn, 13, 17);
                 rs2 = GET_FIELD(insn, 27, 31);
                 xop = GET_FIELD(insn, 18, 26);
-                save_state(dc, cpu_cond);
+                save_state(dc);
                 switch (xop) {
                 case 0x1: /* fmovs */
                     cpu_src1_32 = gen_load_fpr_F(dc, rs2);
@@ -3036,216 +3115,121 @@
 #ifdef TARGET_SPARC64
                 int cond;
 #endif
-                if (gen_trap_ifnofpu(dc, cpu_cond))
+                if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
+                }
                 gen_op_clear_ieee_excp_and_FTT();
                 rs1 = GET_FIELD(insn, 13, 17);
                 rs2 = GET_FIELD(insn, 27, 31);
                 xop = GET_FIELD(insn, 18, 26);
-                save_state(dc, cpu_cond);
-#ifdef TARGET_SPARC64
-                if ((xop & 0x11f) == 0x005) { // V9 fmovsr
-                    int l1;
+                save_state(dc);
 
-                    l1 = gen_new_label();
-                    cond = GET_FIELD_SP(insn, 14, 17);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
-                                       0, l1);
-                    cpu_src1_32 = gen_load_fpr_F(dc, rs2);
-                    gen_store_fpr_F(dc, rd, cpu_src1_32);
-                    gen_set_label(l1);
+#ifdef TARGET_SPARC64
+#define FMOVR(sz)                                                  \
+                do {                                               \
+                    DisasCompare cmp;                              \
+                    cond = GET_FIELD_SP(insn, 14, 17);             \
+                    cpu_src1 = get_src1(insn, cpu_src1);           \
+                    gen_compare_reg(&cmp, cond, cpu_src1);         \
+                    gen_fmov##sz(dc, &cmp, rd, rs2);               \
+                    free_compare(&cmp);                            \
+                } while (0)
+
+                if ((xop & 0x11f) == 0x005) { /* V9 fmovsr */
+                    FMOVR(s);
                     break;
                 } else if ((xop & 0x11f) == 0x006) { // V9 fmovdr
-                    int l1;
-
-                    l1 = gen_new_label();
-                    cond = GET_FIELD_SP(insn, 14, 17);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
-                                       0, l1);
-                    cpu_src1_64 = gen_load_fpr_D(dc, rs2);
-                    gen_store_fpr_D(dc, rd, cpu_src1_64);
-                    gen_set_label(l1);
+                    FMOVR(d);
                     break;
                 } else if ((xop & 0x11f) == 0x007) { // V9 fmovqr
-                    int l1;
-
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    l1 = gen_new_label();
-                    cond = GET_FIELD_SP(insn, 14, 17);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond], cpu_src1,
-                                       0, l1);
-                    gen_move_Q(rd, rs2);
-                    gen_set_label(l1);
+                    FMOVR(q);
                     break;
                 }
+#undef FMOVR
 #endif
                 switch (xop) {
 #ifdef TARGET_SPARC64
-#define FMOVSCC(fcc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
+#define FMOVCC(fcc, sz)                                                 \
+                    do {                                                \
+                        DisasCompare cmp;                               \
                         cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_fcond(r_cond, fcc, cond);                   \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        cpu_src1_32 = gen_load_fpr_F(dc, rs2);          \
-                        gen_store_fpr_F(dc, rd, cpu_src1_32);           \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
-#define FMOVDCC(fcc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
-                        cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_fcond(r_cond, fcc, cond);                   \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        cpu_src1_64 = gen_load_fpr_D(dc, rs2);          \
-                        gen_store_fpr_D(dc, rd, cpu_src1_64);           \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
-#define FMOVQCC(fcc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
-                        cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_fcond(r_cond, fcc, cond);                   \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        gen_move_Q(rd, rs2);                            \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
+                        gen_fcompare(&cmp, fcc, cond);                  \
+                        gen_fmov##sz(dc, &cmp, rd, rs2);                \
+                        free_compare(&cmp);                             \
+                    } while (0)
+
                     case 0x001: /* V9 fmovscc %fcc0 */
-                        FMOVSCC(0);
+                        FMOVCC(0, s);
                         break;
                     case 0x002: /* V9 fmovdcc %fcc0 */
-                        FMOVDCC(0);
+                        FMOVCC(0, d);
                         break;
                     case 0x003: /* V9 fmovqcc %fcc0 */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(0);
+                        FMOVCC(0, q);
                         break;
                     case 0x041: /* V9 fmovscc %fcc1 */
-                        FMOVSCC(1);
+                        FMOVCC(1, s);
                         break;
                     case 0x042: /* V9 fmovdcc %fcc1 */
-                        FMOVDCC(1);
+                        FMOVCC(1, d);
                         break;
                     case 0x043: /* V9 fmovqcc %fcc1 */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(1);
+                        FMOVCC(1, q);
                         break;
                     case 0x081: /* V9 fmovscc %fcc2 */
-                        FMOVSCC(2);
+                        FMOVCC(2, s);
                         break;
                     case 0x082: /* V9 fmovdcc %fcc2 */
-                        FMOVDCC(2);
+                        FMOVCC(2, d);
                         break;
                     case 0x083: /* V9 fmovqcc %fcc2 */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(2);
+                        FMOVCC(2, q);
                         break;
                     case 0x0c1: /* V9 fmovscc %fcc3 */
-                        FMOVSCC(3);
+                        FMOVCC(3, s);
                         break;
                     case 0x0c2: /* V9 fmovdcc %fcc3 */
-                        FMOVDCC(3);
+                        FMOVCC(3, d);
                         break;
                     case 0x0c3: /* V9 fmovqcc %fcc3 */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(3);
+                        FMOVCC(3, q);
                         break;
-#undef FMOVSCC
-#undef FMOVDCC
-#undef FMOVQCC
-#define FMOVSCC(icc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
+#undef FMOVCC
+#define FMOVCC(xcc, sz)                                                 \
+                    do {                                                \
+                        DisasCompare cmp;                               \
                         cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_cond(r_cond, icc, cond, dc);                \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        cpu_src1_32 = gen_load_fpr_F(dc, rs2);          \
-                        gen_store_fpr_F(dc, rd, cpu_src1_32);           \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
-#define FMOVDCC(icc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
-                        cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_cond(r_cond, icc, cond, dc);                \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        cpu_src1_64 = gen_load_fpr_D(dc, rs2);          \
-                        gen_store_fpr_D(dc, rd, cpu_src1_64);           \
-                        gen_update_fprs_dirty(DFPREG(rd));              \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
-#define FMOVQCC(icc)                                                    \
-                    {                                                   \
-                        TCGv r_cond;                                    \
-                        int l1;                                         \
-                                                                        \
-                        l1 = gen_new_label();                           \
-                        r_cond = tcg_temp_new();                        \
-                        cond = GET_FIELD_SP(insn, 14, 17);              \
-                        gen_cond(r_cond, icc, cond, dc);                \
-                        tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond,         \
-                                           0, l1);                      \
-                        gen_move_Q(rd, rs2);                            \
-                        gen_set_label(l1);                              \
-                        tcg_temp_free(r_cond);                          \
-                    }
+                        gen_compare(&cmp, xcc, cond, dc);               \
+                        gen_fmov##sz(dc, &cmp, rd, rs2);                \
+                        free_compare(&cmp);                             \
+                    } while (0)
 
                     case 0x101: /* V9 fmovscc %icc */
-                        FMOVSCC(0);
+                        FMOVCC(0, s);
                         break;
                     case 0x102: /* V9 fmovdcc %icc */
-                        FMOVDCC(0);
+                        FMOVCC(0, d);
                         break;
                     case 0x103: /* V9 fmovqcc %icc */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(0);
+                        FMOVCC(0, q);
                         break;
                     case 0x181: /* V9 fmovscc %xcc */
-                        FMOVSCC(1);
+                        FMOVCC(1, s);
                         break;
                     case 0x182: /* V9 fmovdcc %xcc */
-                        FMOVDCC(1);
+                        FMOVCC(1, d);
                         break;
                     case 0x183: /* V9 fmovqcc %xcc */
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        FMOVQCC(1);
+                        FMOVCC(1, q);
                         break;
-#undef FMOVSCC
-#undef FMOVDCC
-#undef FMOVQCC
+#undef FMOVCC
 #endif
                     case 0x51: /* fcmps, V9 %fcc */
                         cpu_src1_32 = gen_load_fpr_F(dc, rs1);
@@ -3547,17 +3531,7 @@
                         break;
 #ifdef TARGET_SPARC64
                     case 0xd: /* V9 udivx */
-                        {
-                            TCGv r_temp1, r_temp2;
-                            r_temp1 = tcg_temp_local_new();
-                            r_temp2 = tcg_temp_local_new();
-                            tcg_gen_mov_tl(r_temp1, cpu_src1);
-                            tcg_gen_mov_tl(r_temp2, cpu_src2);
-                            gen_trap_ifdivzero_tl(r_temp2);
-                            tcg_gen_divu_i64(cpu_dst, r_temp1, r_temp2);
-                            tcg_temp_free(r_temp1);
-                            tcg_temp_free(r_temp2);
-                        }
+                        gen_helper_udivx(cpu_dst, cpu_env, cpu_src1, cpu_src2);
                         break;
 #endif
                     case 0xe: /* udiv */
@@ -3591,29 +3565,27 @@
                     cpu_src2 = get_src2(insn, cpu_src2);
                     switch (xop) {
                     case 0x20: /* taddcc */
-                        gen_op_tadd_cc(cpu_dst, cpu_src1, cpu_src2);
+                        gen_op_add_cc(cpu_dst, cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_TADD);
                         dc->cc_op = CC_OP_TADD;
                         break;
                     case 0x21: /* tsubcc */
-                        gen_op_tsub_cc(cpu_dst, cpu_src1, cpu_src2);
+                        gen_op_sub_cc(cpu_dst, cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_TSUB);
                         dc->cc_op = CC_OP_TSUB;
                         break;
                     case 0x22: /* taddcctv */
-                        save_state(dc, cpu_cond);
-                        gen_op_tadd_ccTV(cpu_dst, cpu_src1, cpu_src2);
+                        gen_helper_taddcctv(cpu_dst, cpu_env,
+                                            cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
-                        tcg_gen_movi_i32(cpu_cc_op, CC_OP_TADDTV);
                         dc->cc_op = CC_OP_TADDTV;
                         break;
                     case 0x23: /* tsubcctv */
-                        save_state(dc, cpu_cond);
-                        gen_op_tsub_ccTV(cpu_dst, cpu_src1, cpu_src2);
+                        gen_helper_tsubcctv(cpu_dst, cpu_env,
+                                            cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
-                        tcg_gen_movi_i32(cpu_cc_op, CC_OP_TSUBTV);
                         dc->cc_op = CC_OP_TSUBTV;
                         break;
                     case 0x24: /* mulscc */
@@ -3687,7 +3659,7 @@
                             case 0x6: /* V9 wrfprs */
                                 tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
                                 tcg_gen_trunc_tl_i32(cpu_fprs, cpu_dst);
-                                save_state(dc, cpu_cond);
+                                save_state(dc);
                                 gen_op_next_insn();
                                 tcg_gen_exit_tb(0);
                                 dc->is_br = 1;
@@ -3700,8 +3672,9 @@
 #endif
                                 break;
                             case 0x13: /* Graphics Status */
-                                if (gen_trap_ifnofpu(dc, cpu_cond))
+                                if (gen_trap_ifnofpu(dc)) {
                                     goto jmp_insn;
+                                }
                                 tcg_gen_xor_tl(cpu_gsr, cpu_src1, cpu_src2);
                                 break;
                             case 0x14: /* Softint set */
@@ -3813,7 +3786,7 @@
                             gen_helper_wrpsr(cpu_env, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_FLAGS);
                             dc->cc_op = CC_OP_FLAGS;
-                            save_state(dc, cpu_cond);
+                            save_state(dc);
                             gen_op_next_insn();
                             tcg_gen_exit_tb(0);
                             dc->is_br = 1;
@@ -3893,7 +3866,7 @@
                                     TCGv r_tmp = tcg_temp_local_new();
 
                                     tcg_gen_mov_tl(r_tmp, cpu_tmp0);
-                                    save_state(dc, cpu_cond);
+                                    save_state(dc);
                                     gen_helper_wrpstate(cpu_env, r_tmp);
                                     tcg_temp_free(r_tmp);
                                     dc->npc = DYNAMIC_PC;
@@ -3904,7 +3877,7 @@
                                     TCGv r_tmp = tcg_temp_local_new();
 
                                     tcg_gen_mov_tl(r_tmp, cpu_tmp0);
-                                    save_state(dc, cpu_cond);
+                                    save_state(dc);
                                     tcg_gen_trunc_tl_i32(cpu_tmp32, r_tmp);
                                     tcg_temp_free(r_tmp);
                                     tcg_gen_st_i32(cpu_tmp32, cpu_env,
@@ -3986,7 +3959,7 @@
                             switch (rd) {
                             case 0: // hpstate
                                 // XXX gen_op_wrhpstate();
-                                save_state(dc, cpu_cond);
+                                save_state(dc);
                                 gen_op_next_insn();
                                 tcg_gen_exit_tb(0);
                                 dc->is_br = 1;
@@ -4026,42 +3999,38 @@
                         {
                             int cc = GET_FIELD_SP(insn, 11, 12);
                             int cond = GET_FIELD_SP(insn, 14, 17);
-                            TCGv r_cond;
-                            int l1;
+                            DisasCompare cmp;
 
-                            r_cond = tcg_temp_new();
                             if (insn & (1 << 18)) {
-                                if (cc == 0)
-                                    gen_cond(r_cond, 0, cond, dc);
-                                else if (cc == 2)
-                                    gen_cond(r_cond, 1, cond, dc);
-                                else
+                                if (cc == 0) {
+                                    gen_compare(&cmp, 0, cond, dc);
+                                } else if (cc == 2) {
+                                    gen_compare(&cmp, 1, cond, dc);
+                                } else {
                                     goto illegal_insn;
+                                }
                             } else {
-                                gen_fcond(r_cond, cc, cond);
+                                gen_fcompare(&cmp, cc, cond);
                             }
 
-                            l1 = gen_new_label();
-
-                            tcg_gen_brcondi_tl(TCG_COND_EQ, r_cond, 0, l1);
-                            if (IS_IMM) {       /* immediate */
-                                TCGv r_const;
-
+                            /* The get_src2 above loaded the normal 13-bit
+                               immediate field, not the 11-bit field we have
+                               in movcc.  But it did handle the reg case.  */
+                            if (IS_IMM) {
                                 simm = GET_FIELD_SPs(insn, 0, 10);
-                                r_const = tcg_const_tl(simm);
-                                gen_movl_TN_reg(rd, r_const);
-                                tcg_temp_free(r_const);
-                            } else {
-                                rs2 = GET_FIELD_SP(insn, 0, 4);
-                                gen_movl_reg_TN(rs2, cpu_tmp0);
-                                gen_movl_TN_reg(rd, cpu_tmp0);
+                                tcg_gen_movi_tl(cpu_src2, simm);
                             }
-                            gen_set_label(l1);
-                            tcg_temp_free(r_cond);
+
+                            gen_movl_reg_TN(rd, cpu_dst);
+                            tcg_gen_movcond_tl(cmp.cond, cpu_dst,
+                                               cmp.c1, cmp.c2,
+                                               cpu_src2, cpu_dst);
+                            free_compare(&cmp);
+                            gen_movl_TN_reg(rd, cpu_dst);
                             break;
                         }
                     case 0x2d: /* V9 sdivx */
-                        gen_op_sdivx(cpu_dst, cpu_src1, cpu_src2);
+                        gen_helper_sdivx(cpu_dst, cpu_env, cpu_src1, cpu_src2);
                         gen_movl_TN_reg(rd, cpu_dst);
                         break;
                     case 0x2e: /* V9 popc */
@@ -4073,27 +4042,24 @@
                     case 0x2f: /* V9 movr */
                         {
                             int cond = GET_FIELD_SP(insn, 10, 12);
-                            int l1;
+                            DisasCompare cmp;
 
-                            cpu_src1 = get_src1(insn, cpu_src1);
+                            gen_compare_reg(&cmp, cond, cpu_src1);
 
-                            l1 = gen_new_label();
-
-                            tcg_gen_brcondi_tl(gen_tcg_cond_reg[cond],
-                                              cpu_src1, 0, l1);
-                            if (IS_IMM) {       /* immediate */
-                                TCGv r_const;
-
+                            /* The get_src2 above loaded the normal 13-bit
+                               immediate field, not the 10-bit field we have
+                               in movr.  But it did handle the reg case.  */
+                            if (IS_IMM) {
                                 simm = GET_FIELD_SPs(insn, 0, 9);
-                                r_const = tcg_const_tl(simm);
-                                gen_movl_TN_reg(rd, r_const);
-                                tcg_temp_free(r_const);
-                            } else {
-                                rs2 = GET_FIELD_SP(insn, 0, 4);
-                                gen_movl_reg_TN(rs2, cpu_tmp0);
-                                gen_movl_TN_reg(rd, cpu_tmp0);
+                                tcg_gen_movi_tl(cpu_src2, simm);
                             }
-                            gen_set_label(l1);
+
+                            gen_movl_reg_TN(rd, cpu_dst);
+                            tcg_gen_movcond_tl(cmp.cond, cpu_dst,
+                                               cmp.c1, cmp.c2,
+                                               cpu_src2, cpu_dst);
+                            free_compare(&cmp);
+                            gen_movl_TN_reg(rd, cpu_dst);
                             break;
                         }
 #endif
@@ -4106,8 +4072,9 @@
                 int opf = GET_FIELD_SP(insn, 5, 13);
                 rs1 = GET_FIELD(insn, 13, 17);
                 rs2 = GET_FIELD(insn, 27, 31);
-                if (gen_trap_ifnofpu(dc, cpu_cond))
+                if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
+                }
 
                 switch (opf) {
                 case 0x000: /* VIS I edge8cc */
@@ -4553,7 +4520,7 @@
             } else if (xop == 0x39) { /* V9 return */
                 TCGv_i32 r_const;
 
-                save_state(dc, cpu_cond);
+                save_state(dc);
                 cpu_src1 = get_src1(insn, cpu_src1);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 19, 31);
@@ -4567,7 +4534,7 @@
                         tcg_gen_mov_tl(cpu_dst, cpu_src1);
                 }
                 gen_helper_restore(cpu_env);
-                gen_mov_pc_npc(dc, cpu_cond);
+                gen_mov_pc_npc(dc);
                 r_const = tcg_const_i32(3);
                 gen_helper_check_align(cpu_env, cpu_dst, r_const);
                 tcg_temp_free_i32(r_const);
@@ -4597,7 +4564,7 @@
                         r_pc = tcg_const_tl(dc->pc);
                         gen_movl_TN_reg(rd, r_pc);
                         tcg_temp_free(r_pc);
-                        gen_mov_pc_npc(dc, cpu_cond);
+                        gen_mov_pc_npc(dc);
                         r_const = tcg_const_i32(3);
                         gen_helper_check_align(cpu_env, cpu_dst, r_const);
                         tcg_temp_free_i32(r_const);
@@ -4613,7 +4580,7 @@
 
                         if (!supervisor(dc))
                             goto priv_insn;
-                        gen_mov_pc_npc(dc, cpu_cond);
+                        gen_mov_pc_npc(dc);
                         r_const = tcg_const_i32(3);
                         gen_helper_check_align(cpu_env, cpu_dst, r_const);
                         tcg_temp_free_i32(r_const);
@@ -4629,12 +4596,12 @@
                     /* nop */
                     break;
                 case 0x3c:      /* save */
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_helper_save(cpu_env);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
                 case 0x3d:      /* restore */
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_helper_restore(cpu_env);
                     gen_movl_TN_reg(rd, cpu_dst);
                     break;
@@ -4717,7 +4684,7 @@
                     else {
                         TCGv_i32 r_const;
 
-                        save_state(dc, cpu_cond);
+                        save_state(dc);
                         r_const = tcg_const_i32(7);
                         /* XXX remove alignment check */
                         gen_helper_check_align(cpu_env, cpu_addr, r_const);
@@ -4768,7 +4735,7 @@
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 4, 0);
                     break;
                 case 0x11:      /* lduba, load unsigned byte alternate */
@@ -4778,7 +4745,7 @@
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 1, 0);
                     break;
                 case 0x12:      /* lduha, load unsigned halfword alternate */
@@ -4788,7 +4755,7 @@
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 2, 0);
                     break;
                 case 0x13:      /* ldda, load double word alternate */
@@ -4800,7 +4767,7 @@
 #endif
                     if (rd & 1)
                         goto illegal_insn;
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ldda_asi(cpu_val, cpu_addr, insn, rd);
                     goto skip_move;
                 case 0x19:      /* ldsba, load signed byte alternate */
@@ -4810,7 +4777,7 @@
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 1, 1);
                     break;
                 case 0x1a:      /* ldsha, load signed halfword alternate */
@@ -4820,7 +4787,7 @@
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 2, 1);
                     break;
                 case 0x1d:      /* ldstuba -- XXX: should be atomically */
@@ -4830,7 +4797,7 @@
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ldstub_asi(cpu_val, cpu_addr, insn);
                     break;
                 case 0x1f:      /* swapa, swap reg with alt. memory. Also
@@ -4842,7 +4809,7 @@
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_movl_reg_TN(rd, cpu_val);
                     gen_swap_asi(cpu_val, cpu_addr, insn);
                     break;
@@ -4864,28 +4831,28 @@
                     tcg_gen_qemu_ld64(cpu_val, cpu_addr, dc->mem_idx);
                     break;
                 case 0x18: /* V9 ldswa */
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 4, 1);
                     break;
                 case 0x1b: /* V9 ldxa */
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ld_asi(cpu_val, cpu_addr, insn, 8, 0);
                     break;
                 case 0x2d: /* V9 prefetch, no effect */
                     goto skip_move;
                 case 0x30: /* V9 ldfa */
-                    if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ldf_asi(cpu_addr, insn, 4, rd);
                     gen_update_fprs_dirty(rd);
                     goto skip_move;
                 case 0x33: /* V9 lddfa */
-                    if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ldf_asi(cpu_addr, insn, 8, DFPREG(rd));
                     gen_update_fprs_dirty(DFPREG(rd));
                     goto skip_move;
@@ -4893,10 +4860,10 @@
                     goto skip_move;
                 case 0x32: /* V9 ldqfa */
                     CHECK_FPU_FEATURE(dc, FLOAT128);
-                    if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_ldf_asi(cpu_addr, insn, 16, QFPREG(rd));
                     gen_update_fprs_dirty(QFPREG(rd));
                     goto skip_move;
@@ -4909,9 +4876,10 @@
             skip_move: ;
 #endif
             } else if (xop >= 0x20 && xop < 0x24) {
-                if (gen_trap_ifnofpu(dc, cpu_cond))
+                if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
-                save_state(dc, cpu_cond);
+                }
+                save_state(dc);
                 switch (xop) {
                 case 0x20:      /* ldf, load fpreg */
                     gen_address_mask(dc, cpu_addr);
@@ -4982,7 +4950,7 @@
                     else {
                         TCGv_i32 r_const;
 
-                        save_state(dc, cpu_cond);
+                        save_state(dc);
                         gen_address_mask(dc, cpu_addr);
                         r_const = tcg_const_i32(7);
                         /* XXX remove alignment check */
@@ -5001,7 +4969,7 @@
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_st_asi(cpu_val, cpu_addr, insn, 4);
                     dc->npc = DYNAMIC_PC;
                     break;
@@ -5012,7 +4980,7 @@
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_st_asi(cpu_val, cpu_addr, insn, 1);
                     dc->npc = DYNAMIC_PC;
                     break;
@@ -5023,7 +4991,7 @@
                     if (!supervisor(dc))
                         goto priv_insn;
 #endif
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_st_asi(cpu_val, cpu_addr, insn, 2);
                     dc->npc = DYNAMIC_PC;
                     break;
@@ -5037,7 +5005,7 @@
                     if (rd & 1)
                         goto illegal_insn;
                     else {
-                        save_state(dc, cpu_cond);
+                        save_state(dc);
                         gen_stda_asi(cpu_val, cpu_addr, insn, rd);
                     }
                     break;
@@ -5048,7 +5016,7 @@
                     tcg_gen_qemu_st64(cpu_val, cpu_addr, dc->mem_idx);
                     break;
                 case 0x1e: /* V9 stxa */
-                    save_state(dc, cpu_cond);
+                    save_state(dc);
                     gen_st_asi(cpu_val, cpu_addr, insn, 8);
                     dc->npc = DYNAMIC_PC;
                     break;
@@ -5057,9 +5025,10 @@
                     goto illegal_insn;
                 }
             } else if (xop > 0x23 && xop < 0x28) {
-                if (gen_trap_ifnofpu(dc, cpu_cond))
+                if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
-                save_state(dc, cpu_cond);
+                }
+                save_state(dc);
                 switch (xop) {
                 case 0x24: /* stf, store fpreg */
                     gen_address_mask(dc, cpu_addr);
@@ -5101,8 +5070,9 @@
 #else
                     if (!supervisor(dc))
                         goto priv_insn;
-                    if (gen_trap_ifnofpu(dc, cpu_cond))
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
+                    }
                     goto nfq_insn;
 #endif
 #endif
@@ -5115,11 +5085,11 @@
                     goto illegal_insn;
                 }
             } else if (xop > 0x33 && xop < 0x3f) {
-                save_state(dc, cpu_cond);
+                save_state(dc);
                 switch (xop) {
 #ifdef TARGET_SPARC64
                 case 0x34: /* V9 stfa */
-                    if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
                     gen_stf_asi(cpu_addr, insn, 4, rd);
@@ -5129,7 +5099,7 @@
                         TCGv_i32 r_const;
 
                         CHECK_FPU_FEATURE(dc, FLOAT128);
-                        if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                        if (gen_trap_ifnofpu(dc)) {
                             goto jmp_insn;
                         }
                         r_const = tcg_const_i32(7);
@@ -5139,7 +5109,7 @@
                     }
                     break;
                 case 0x37: /* V9 stdfa */
-                    if (gen_trap_ifnofpu(dc, cpu_cond)) {
+                    if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
                     gen_stf_asi(cpu_addr, insn, 8, DFPREG(rd));
@@ -5185,7 +5155,7 @@
     {
         TCGv_i32 r_const;
 
-        save_state(dc, cpu_cond);
+        save_state(dc);
         r_const = tcg_const_i32(TT_ILL_INSN);
         gen_helper_raise_exception(cpu_env, r_const);
         tcg_temp_free_i32(r_const);
@@ -5196,7 +5166,7 @@
     {
         TCGv_i32 r_const;
 
-        save_state(dc, cpu_cond);
+        save_state(dc);
         r_const = tcg_const_i32(TT_UNIMP_FLUSH);
         gen_helper_raise_exception(cpu_env, r_const);
         tcg_temp_free_i32(r_const);
@@ -5208,7 +5178,7 @@
     {
         TCGv_i32 r_const;
 
-        save_state(dc, cpu_cond);
+        save_state(dc);
         r_const = tcg_const_i32(TT_PRIV_INSN);
         gen_helper_raise_exception(cpu_env, r_const);
         tcg_temp_free_i32(r_const);
@@ -5217,13 +5187,13 @@
     goto egress;
 #endif
  nfpu_insn:
-    save_state(dc, cpu_cond);
+    save_state(dc);
     gen_op_fpexception_im(FSR_FTT_UNIMPFPOP);
     dc->is_br = 1;
     goto egress;
 #if !defined(CONFIG_USER_ONLY) && !defined(TARGET_SPARC64)
  nfq_insn:
-    save_state(dc, cpu_cond);
+    save_state(dc);
     gen_op_fpexception_im(FSR_FTT_SEQ_ERROR);
     dc->is_br = 1;
     goto egress;
@@ -5233,7 +5203,7 @@
     {
         TCGv r_const;
 
-        save_state(dc, cpu_cond);
+        save_state(dc);
         r_const = tcg_const_i32(TT_NCP_INSN);
         gen_helper_raise_exception(cpu_env, r_const);
         tcg_temp_free(r_const);
@@ -5279,16 +5249,6 @@
     dc->singlestep = (env->singlestep_enabled || singlestep);
     gen_opc_end = gen_opc_buf + OPC_MAX_SIZE;
 
-    cpu_tmp0 = tcg_temp_new();
-    cpu_tmp32 = tcg_temp_new_i32();
-    cpu_tmp64 = tcg_temp_new_i64();
-
-    cpu_dst = tcg_temp_local_new();
-
-    // loads and stores
-    cpu_val = tcg_temp_local_new();
-    cpu_addr = tcg_temp_local_new();
-
     num_insns = 0;
     max_insns = tb->cflags & CF_COUNT_MASK;
     if (max_insns == 0)
@@ -5299,7 +5259,7 @@
             QTAILQ_FOREACH(bp, &env->breakpoints, entry) {
                 if (bp->pc == dc->pc) {
                     if (dc->pc != pc_start)
-                        save_state(dc, cpu_cond);
+                        save_state(dc);
                     gen_helper_debug(cpu_env);
                     tcg_gen_exit_tb(0);
                     dc->is_br = 1;
@@ -5324,9 +5284,24 @@
             gen_io_start();
         last_pc = dc->pc;
         insn = cpu_ldl_code(env, dc->pc);
+
+        cpu_tmp0 = tcg_temp_new();
+        cpu_tmp32 = tcg_temp_new_i32();
+        cpu_tmp64 = tcg_temp_new_i64();
+        cpu_dst = tcg_temp_new();
+        cpu_val = tcg_temp_new();
+        cpu_addr = tcg_temp_new();
+
         disas_sparc_insn(dc, insn);
         num_insns++;
 
+        tcg_temp_free(cpu_addr);
+        tcg_temp_free(cpu_val);
+        tcg_temp_free(cpu_dst);
+        tcg_temp_free_i64(cpu_tmp64);
+        tcg_temp_free_i32(cpu_tmp32);
+        tcg_temp_free(cpu_tmp0);
+
         if (dc->is_br)
             break;
         /* if the next PC is different, we abort now */
@@ -5346,24 +5321,19 @@
              num_insns < max_insns);
 
  exit_gen_loop:
-    tcg_temp_free(cpu_addr);
-    tcg_temp_free(cpu_val);
-    tcg_temp_free(cpu_dst);
-    tcg_temp_free_i64(cpu_tmp64);
-    tcg_temp_free_i32(cpu_tmp32);
-    tcg_temp_free(cpu_tmp0);
-
-    if (tb->cflags & CF_LAST_IO)
+    if (tb->cflags & CF_LAST_IO) {
         gen_io_end();
+    }
     if (!dc->is_br) {
         if (dc->pc != DYNAMIC_PC &&
             (dc->npc != DYNAMIC_PC && dc->npc != JUMP_PC)) {
             /* static PC and NPC: we can use direct chaining */
             gen_goto_tb(dc, 0, dc->pc, dc->npc);
         } else {
-            if (dc->pc != DYNAMIC_PC)
+            if (dc->pc != DYNAMIC_PC) {
                 tcg_gen_movi_tl(cpu_pc, dc->pc);
-            save_npc(dc, cpu_cond);
+            }
+            save_npc(dc);
             tcg_gen_exit_tb(0);
         }
     }
diff --git a/target-unicore32/Makefile.objs b/target-unicore32/Makefile.objs
index 777f01f..8e143da 100644
--- a/target-unicore32/Makefile.objs
+++ b/target-unicore32/Makefile.objs
@@ -2,5 +2,3 @@
 obj-y += ucf64_helper.o
 
 obj-$(CONFIG_SOFTMMU) += machine.o softmmu.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-unicore32/helper.h b/target-unicore32/helper.h
index 305318a..a4b8149 100644
--- a/target-unicore32/helper.h
+++ b/target-unicore32/helper.h
@@ -17,26 +17,26 @@
 DEF_HELPER_1(clz, i32, i32)
 DEF_HELPER_1(clo, i32, i32)
 
-DEF_HELPER_1(exception, void, i32)
+DEF_HELPER_2(exception, void, env, i32)
 
-DEF_HELPER_2(asr_write, void, i32, i32)
-DEF_HELPER_0(asr_read, i32)
+DEF_HELPER_3(asr_write, void, env, i32, i32)
+DEF_HELPER_1(asr_read, i32, env)
 
-DEF_HELPER_1(get_user_reg, i32, i32)
-DEF_HELPER_2(set_user_reg, void, i32, i32)
+DEF_HELPER_2(get_user_reg, i32, env, i32)
+DEF_HELPER_3(set_user_reg, void, env, i32, i32)
 
-DEF_HELPER_2(add_cc, i32, i32, i32)
-DEF_HELPER_2(adc_cc, i32, i32, i32)
-DEF_HELPER_2(sub_cc, i32, i32, i32)
-DEF_HELPER_2(sbc_cc, i32, i32, i32)
+DEF_HELPER_3(add_cc, i32, env, i32, i32)
+DEF_HELPER_3(adc_cc, i32, env, i32, i32)
+DEF_HELPER_3(sub_cc, i32, env, i32, i32)
+DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
 
 DEF_HELPER_2(shl, i32, i32, i32)
 DEF_HELPER_2(shr, i32, i32, i32)
 DEF_HELPER_2(sar, i32, i32, i32)
-DEF_HELPER_2(shl_cc, i32, i32, i32)
-DEF_HELPER_2(shr_cc, i32, i32, i32)
-DEF_HELPER_2(sar_cc, i32, i32, i32)
-DEF_HELPER_2(ror_cc, i32, i32, i32)
+DEF_HELPER_3(shl_cc, i32, env, i32, i32)
+DEF_HELPER_3(shr_cc, i32, env, i32, i32)
+DEF_HELPER_3(sar_cc, i32, env, i32, i32)
+DEF_HELPER_3(ror_cc, i32, env, i32, i32)
 
 DEF_HELPER_1(ucf64_get_fpscr, i32, env)
 DEF_HELPER_2(ucf64_set_fpscr, void, env, i32)
diff --git a/target-unicore32/op_helper.c b/target-unicore32/op_helper.c
index c63789d..f474d1b 100644
--- a/target-unicore32/op_helper.c
+++ b/target-unicore32/op_helper.c
@@ -9,19 +9,18 @@
  * later version. See the COPYING file in the top-level directory.
  */
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helper.h"
 
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
 
-void HELPER(exception)(uint32_t excp)
+void HELPER(exception)(CPUUniCore32State *env, uint32_t excp)
 {
     env->exception_index = excp;
     cpu_loop_exit(env);
 }
 
-static target_ulong asr_read(void)
+static target_ulong asr_read(CPUUniCore32State *env)
 {
     int ZF;
     ZF = (env->ZF == 0);
@@ -29,24 +28,18 @@
         (env->CF << 29) | ((env->VF & 0x80000000) >> 3);
 }
 
-target_ulong cpu_asr_read(CPUUniCore32State *env1)
+target_ulong cpu_asr_read(CPUUniCore32State *env)
 {
-    CPUUniCore32State *saved_env;
-    target_ulong ret;
-
-    saved_env = env;
-    env = env1;
-    ret = asr_read();
-    env = saved_env;
-    return ret;
+    return asr_read(env);
 }
 
-target_ulong HELPER(asr_read)(void)
+target_ulong HELPER(asr_read)(CPUUniCore32State *env)
 {
-    return asr_read();
+    return asr_read(env);
 }
 
-static void asr_write(target_ulong val, target_ulong mask)
+static void asr_write(CPUUniCore32State *env, target_ulong val,
+                      target_ulong mask)
 {
     if (mask & ASR_NZCV) {
         env->ZF = (~val) & ASR_Z;
@@ -62,23 +55,19 @@
     env->uncached_asr = (env->uncached_asr & ~mask) | (val & mask);
 }
 
-void cpu_asr_write(CPUUniCore32State *env1, target_ulong val, target_ulong mask)
+void cpu_asr_write(CPUUniCore32State *env, target_ulong val, target_ulong mask)
 {
-    CPUUniCore32State *saved_env;
-
-    saved_env = env;
-    env = env1;
-    asr_write(val, mask);
-    env = saved_env;
+    asr_write(env, val, mask);
 }
 
-void HELPER(asr_write)(target_ulong val, target_ulong mask)
+void HELPER(asr_write)(CPUUniCore32State *env, target_ulong val,
+                       target_ulong mask)
 {
-    asr_write(val, mask);
+    asr_write(env, val, mask);
 }
 
 /* Access to user mode registers from privileged modes.  */
-uint32_t HELPER(get_user_reg)(uint32_t regno)
+uint32_t HELPER(get_user_reg)(CPUUniCore32State *env, uint32_t regno)
 {
     uint32_t val;
 
@@ -92,7 +81,7 @@
     return val;
 }
 
-void HELPER(set_user_reg)(uint32_t regno, uint32_t val)
+void HELPER(set_user_reg)(CPUUniCore32State *env, uint32_t regno, uint32_t val)
 {
     if (regno == 29) {
         env->banked_r29[0] = val;
@@ -107,7 +96,7 @@
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
 
-uint32_t HELPER(add_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(add_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     result = a + b;
@@ -117,7 +106,7 @@
     return result;
 }
 
-uint32_t HELPER(adc_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(adc_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
@@ -132,7 +121,7 @@
     return result;
 }
 
-uint32_t HELPER(sub_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(sub_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     result = a - b;
@@ -142,7 +131,7 @@
     return result;
 }
 
-uint32_t HELPER(sbc_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(sbc_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
@@ -186,7 +175,7 @@
     return (int32_t)x >> shift;
 }
 
-uint32_t HELPER(shl_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(shl_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -203,7 +192,7 @@
     return x;
 }
 
-uint32_t HELPER(shr_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(shr_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -220,7 +209,7 @@
     return x;
 }
 
-uint32_t HELPER(sar_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(sar_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -233,7 +222,7 @@
     return x;
 }
 
-uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(ror_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
 {
     int shift1, shift;
     shift1 = i & 0xff;
@@ -264,16 +253,13 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
-void tlb_fill(CPUUniCore32State *env1, target_ulong addr, int is_write,
-        int mmu_idx, uintptr_t retaddr)
+void tlb_fill(CPUUniCore32State *env, target_ulong addr, int is_write,
+              int mmu_idx, uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUUniCore32State *saved_env;
     unsigned long pc;
     int ret;
 
-    saved_env = env;
-    env = env1;
     ret = uc32_cpu_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
@@ -287,6 +273,5 @@
         }
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 #endif
diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index 188bf8c..c3cdafa 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -253,7 +253,7 @@
 static inline void gen_set_asr(TCGv var, uint32_t mask)
 {
     TCGv tmp_mask = tcg_const_i32(mask);
-    gen_helper_asr_write(var, tmp_mask);
+    gen_helper_asr_write(cpu_env, var, tmp_mask);
     tcg_temp_free_i32(tmp_mask);
 }
 /* Set NZCV flags from the high 4 bits of var.  */
@@ -263,7 +263,7 @@
 {
     TCGv tmp = new_tmp();
     tcg_gen_movi_i32(tmp, excp);
-    gen_helper_exception(tmp);
+    gen_helper_exception(cpu_env, tmp);
     dead_tmp(tmp);
 }
 
@@ -416,16 +416,16 @@
     if (flags) {
         switch (shiftop) {
         case 0:
-            gen_helper_shl_cc(var, var, shift);
+            gen_helper_shl_cc(var, cpu_env, var, shift);
             break;
         case 1:
-            gen_helper_shr_cc(var, var, shift);
+            gen_helper_shr_cc(var, cpu_env, var, shift);
             break;
         case 2:
-            gen_helper_sar_cc(var, var, shift);
+            gen_helper_sar_cc(var, cpu_env, var, shift);
             break;
         case 3:
-            gen_helper_ror_cc(var, var, shift);
+            gen_helper_ror_cc(var, cpu_env, var, shift);
             break;
         }
     } else {
@@ -1323,11 +1323,11 @@
             if (IS_USER(s)) {
                 ILLEGAL;
             }
-            gen_helper_sub_cc(tmp, tmp, tmp2);
+            gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             gen_exception_return(s, tmp);
         } else {
             if (UCOP_SET_S) {
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 tcg_gen_sub_i32(tmp, tmp, tmp2);
             }
@@ -1336,7 +1336,7 @@
         break;
     case 0x03:
         if (UCOP_SET_S) {
-            gen_helper_sub_cc(tmp, tmp2, tmp);
+            gen_helper_sub_cc(tmp, cpu_env, tmp2, tmp);
         } else {
             tcg_gen_sub_i32(tmp, tmp2, tmp);
         }
@@ -1344,7 +1344,7 @@
         break;
     case 0x04:
         if (UCOP_SET_S) {
-            gen_helper_add_cc(tmp, tmp, tmp2);
+            gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
         } else {
             tcg_gen_add_i32(tmp, tmp, tmp2);
         }
@@ -1352,7 +1352,7 @@
         break;
     case 0x05:
         if (UCOP_SET_S) {
-            gen_helper_adc_cc(tmp, tmp, tmp2);
+            gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
         } else {
             gen_add_carry(tmp, tmp, tmp2);
         }
@@ -1360,7 +1360,7 @@
         break;
     case 0x06:
         if (UCOP_SET_S) {
-            gen_helper_sbc_cc(tmp, tmp, tmp2);
+            gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
         } else {
             gen_sub_carry(tmp, tmp, tmp2);
         }
@@ -1368,7 +1368,7 @@
         break;
     case 0x07:
         if (UCOP_SET_S) {
-            gen_helper_sbc_cc(tmp, tmp2, tmp);
+            gen_helper_sbc_cc(tmp, cpu_env, tmp2, tmp);
         } else {
             gen_sub_carry(tmp, tmp2, tmp);
         }
@@ -1390,13 +1390,13 @@
         break;
     case 0x0a:
         if (UCOP_SET_S) {
-            gen_helper_sub_cc(tmp, tmp, tmp2);
+            gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
         }
         dead_tmp(tmp);
         break;
     case 0x0b:
         if (UCOP_SET_S) {
-            gen_helper_add_cc(tmp, tmp, tmp2);
+            gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
         }
         dead_tmp(tmp);
         break;
@@ -1536,7 +1536,7 @@
             tmp = load_cpu_field(bsr);
         } else {
             tmp = new_tmp();
-            gen_helper_asr_read(tmp);
+            gen_helper_asr_read(tmp, cpu_env);
         }
         store_reg(s, UCOP_REG_D, tmp);
         return;
@@ -1760,7 +1760,7 @@
                     gen_bx(s, tmp);
                 } else if (user) {
                     tmp2 = tcg_const_i32(reg);
-                    gen_helper_set_user_reg(tmp2, tmp);
+                    gen_helper_set_user_reg(cpu_env, tmp2, tmp);
                     tcg_temp_free_i32(tmp2);
                     dead_tmp(tmp);
                 } else if (reg == UCOP_REG_N) {
@@ -1778,7 +1778,7 @@
                 } else if (user) {
                     tmp = new_tmp();
                     tmp2 = tcg_const_i32(reg);
-                    gen_helper_get_user_reg(tmp, tmp2);
+                    gen_helper_get_user_reg(tmp, cpu_env, tmp2);
                     tcg_temp_free_i32(tmp2);
                 } else {
                     tmp = load_reg(s, reg);
@@ -1861,7 +1861,11 @@
 {
     unsigned int insn;
 
-    insn = ldl_code(s->pc);
+    if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
+        tcg_gen_debug_insn_start(s->pc);
+    }
+
+    insn = cpu_ldl_code(env, s->pc);
     s->pc += 4;
 
     /* UniCore instructions class:
@@ -1928,8 +1932,6 @@
         }
         ILLEGAL;
     }
-
-    return;
 }
 
 /* generate intermediate code in gen_opc_buf and gen_opparam_buf for
diff --git a/target-xtensa/cpu.h b/target-xtensa/cpu.h
index 177094a..7348277 100644
--- a/target-xtensa/cpu.h
+++ b/target-xtensa/cpu.h
@@ -36,6 +36,7 @@
 #include "config.h"
 #include "qemu-common.h"
 #include "cpu-defs.h"
+#include "fpu/softfloat.h"
 
 #define TARGET_HAS_ICE 1
 
@@ -325,6 +326,8 @@
     uint32_t sregs[256];
     uint32_t uregs[256];
     uint32_t phys_regs[MAX_NAREG];
+    float32 fregs[16];
+    float_status fp_status;
 
     xtensa_tlb_entry itlb[7][MAX_TLB_WAY_SIZE];
     xtensa_tlb_entry dtlb[10][MAX_TLB_WAY_SIZE];
@@ -465,6 +468,8 @@
 #define XTENSA_TBFLAG_LITBASE 0x8
 #define XTENSA_TBFLAG_DEBUG 0x10
 #define XTENSA_TBFLAG_ICOUNT 0x20
+#define XTENSA_TBFLAG_CPENABLE_MASK 0x3fc0
+#define XTENSA_TBFLAG_CPENABLE_SHIFT 6
 
 static inline void cpu_get_tb_cpu_state(CPUXtensaState *env, target_ulong *pc,
         target_ulong *cs_base, int *flags)
@@ -488,6 +493,9 @@
             *flags |= XTENSA_TBFLAG_ICOUNT;
         }
     }
+    if (xtensa_option_enabled(env->config, XTENSA_OPTION_COPROCESSOR)) {
+        *flags |= env->sregs[CPENABLE] << XTENSA_TBFLAG_CPENABLE_SHIFT;
+    }
 }
 
 #include "cpu-all.h"
diff --git a/target-xtensa/helper.h b/target-xtensa/helper.h
index 152fec0..4cc0088 100644
--- a/target-xtensa/helper.h
+++ b/target-xtensa/helper.h
@@ -36,4 +36,25 @@
 DEF_HELPER_3(wsr_dbreaka, void, env, i32, i32)
 DEF_HELPER_3(wsr_dbreakc, void, env, i32, i32)
 
+DEF_HELPER_2(wur_fcr, void, env, i32)
+DEF_HELPER_FLAGS_1(abs_s, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_FLAGS_1(neg_s, TCG_CALL_CONST | TCG_CALL_PURE, f32, f32)
+DEF_HELPER_3(add_s, f32, env, f32, f32)
+DEF_HELPER_3(sub_s, f32, env, f32, f32)
+DEF_HELPER_3(mul_s, f32, env, f32, f32)
+DEF_HELPER_4(madd_s, f32, env, f32, f32, f32)
+DEF_HELPER_4(msub_s, f32, env, f32, f32, f32)
+DEF_HELPER_FLAGS_3(ftoi, TCG_CALL_CONST | TCG_CALL_PURE, i32, f32, i32, i32)
+DEF_HELPER_FLAGS_3(ftoui, TCG_CALL_CONST | TCG_CALL_PURE, i32, f32, i32, i32)
+DEF_HELPER_3(itof, f32, env, i32, i32)
+DEF_HELPER_3(uitof, f32, env, i32, i32)
+
+DEF_HELPER_4(un_s, void, env, i32, f32, f32)
+DEF_HELPER_4(oeq_s, void, env, i32, f32, f32)
+DEF_HELPER_4(ueq_s, void, env, i32, f32, f32)
+DEF_HELPER_4(olt_s, void, env, i32, f32, f32)
+DEF_HELPER_4(ult_s, void, env, i32, f32, f32)
+DEF_HELPER_4(ole_s, void, env, i32, f32, f32)
+DEF_HELPER_4(ule_s, void, env, i32, f32, f32)
+
 #include "def-helper.h"
diff --git a/target-xtensa/op_helper.c b/target-xtensa/op_helper.c
index 2659c0e..ae0c099 100644
--- a/target-xtensa/op_helper.c
+++ b/target-xtensa/op_helper.c
@@ -771,3 +771,137 @@
     }
     env->sregs[DBREAKC + i] = v;
 }
+
+void HELPER(wur_fcr)(CPUXtensaState *env, uint32_t v)
+{
+    static const int rounding_mode[] = {
+        float_round_nearest_even,
+        float_round_to_zero,
+        float_round_up,
+        float_round_down,
+    };
+
+    env->uregs[FCR] = v & 0xfffff07f;
+    set_float_rounding_mode(rounding_mode[v & 3], &env->fp_status);
+}
+
+float32 HELPER(abs_s)(float32 v)
+{
+    return float32_abs(v);
+}
+
+float32 HELPER(neg_s)(float32 v)
+{
+    return float32_chs(v);
+}
+
+float32 HELPER(add_s)(CPUXtensaState *env, float32 a, float32 b)
+{
+    return float32_add(a, b, &env->fp_status);
+}
+
+float32 HELPER(sub_s)(CPUXtensaState *env, float32 a, float32 b)
+{
+    return float32_sub(a, b, &env->fp_status);
+}
+
+float32 HELPER(mul_s)(CPUXtensaState *env, float32 a, float32 b)
+{
+    return float32_mul(a, b, &env->fp_status);
+}
+
+float32 HELPER(madd_s)(CPUXtensaState *env, float32 a, float32 b, float32 c)
+{
+    return float32_muladd(b, c, a, 0,
+            &env->fp_status);
+}
+
+float32 HELPER(msub_s)(CPUXtensaState *env, float32 a, float32 b, float32 c)
+{
+    return float32_muladd(b, c, a, float_muladd_negate_product,
+            &env->fp_status);
+}
+
+uint32_t HELPER(ftoi)(float32 v, uint32_t rounding_mode, uint32_t scale)
+{
+    float_status fp_status = {0};
+
+    set_float_rounding_mode(rounding_mode, &fp_status);
+    return float32_to_int32(
+            float32_scalbn(v, scale, &fp_status), &fp_status);
+}
+
+uint32_t HELPER(ftoui)(float32 v, uint32_t rounding_mode, uint32_t scale)
+{
+    float_status fp_status = {0};
+    float32 res;
+
+    set_float_rounding_mode(rounding_mode, &fp_status);
+
+    res = float32_scalbn(v, scale, &fp_status);
+
+    if (float32_is_neg(v) && !float32_is_any_nan(v)) {
+        return float32_to_int32(res, &fp_status);
+    } else {
+        return float32_to_uint32(res, &fp_status);
+    }
+}
+
+float32 HELPER(itof)(CPUXtensaState *env, uint32_t v, uint32_t scale)
+{
+    return float32_scalbn(int32_to_float32(v, &env->fp_status),
+            (int32_t)scale, &env->fp_status);
+}
+
+float32 HELPER(uitof)(CPUXtensaState *env, uint32_t v, uint32_t scale)
+{
+    return float32_scalbn(uint32_to_float32(v, &env->fp_status),
+            (int32_t)scale, &env->fp_status);
+}
+
+static inline void set_br(CPUXtensaState *env, bool v, uint32_t br)
+{
+    if (v) {
+        env->sregs[BR] |= br;
+    } else {
+        env->sregs[BR] &= ~br;
+    }
+}
+
+void HELPER(un_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    set_br(env, float32_unordered_quiet(a, b, &env->fp_status), br);
+}
+
+void HELPER(oeq_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    set_br(env, float32_eq_quiet(a, b, &env->fp_status), br);
+}
+
+void HELPER(ueq_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    int v = float32_compare_quiet(a, b, &env->fp_status);
+    set_br(env, v == float_relation_equal || v == float_relation_unordered, br);
+}
+
+void HELPER(olt_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    set_br(env, float32_lt_quiet(a, b, &env->fp_status), br);
+}
+
+void HELPER(ult_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    int v = float32_compare_quiet(a, b, &env->fp_status);
+    set_br(env, v == float_relation_less || v == float_relation_unordered, br);
+}
+
+void HELPER(ole_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    set_br(env, float32_le_quiet(a, b, &env->fp_status), br);
+}
+
+void HELPER(ule_s)(CPUXtensaState *env, uint32_t br, float32 a, float32 b)
+{
+    int v = float32_compare_quiet(a, b, &env->fp_status);
+    set_br(env, v != float_relation_greater, br);
+}
diff --git a/target-xtensa/overlay_tool.h b/target-xtensa/overlay_tool.h
index a3a5650..e395053 100644
--- a/target-xtensa/overlay_tool.h
+++ b/target-xtensa/overlay_tool.h
@@ -58,6 +58,7 @@
     XCHAL_OPTION(XCHAL_HAVE_SEXT, XTENSA_OPTION_MISC_OP_SEXT) | \
     XCHAL_OPTION(XCHAL_HAVE_CLAMPS, XTENSA_OPTION_MISC_OP_CLAMPS) | \
     XCHAL_OPTION(XCHAL_HAVE_CP, XTENSA_OPTION_COPROCESSOR) | \
+    XCHAL_OPTION(XCHAL_HAVE_BOOLEANS, XTENSA_OPTION_BOOLEAN) | \
     XCHAL_OPTION(XCHAL_HAVE_FP, XTENSA_OPTION_FP_COPROCESSOR) | \
     XCHAL_OPTION(XCHAL_HAVE_RELEASE_SYNC, XTENSA_OPTION_MP_SYNCHRO) | \
     XCHAL_OPTION(XCHAL_HAVE_S32C1I, XTENSA_OPTION_CONDITIONAL_STORE) | \
diff --git a/target-xtensa/translate.c b/target-xtensa/translate.c
index 1900bd5..82e8ccc 100644
--- a/target-xtensa/translate.c
+++ b/target-xtensa/translate.c
@@ -65,11 +65,14 @@
     bool debug;
     bool icount;
     TCGv_i32 next_icount;
+
+    unsigned cpenable;
 } DisasContext;
 
 static TCGv_ptr cpu_env;
 static TCGv_i32 cpu_pc;
 static TCGv_i32 cpu_R[16];
+static TCGv_i32 cpu_FR[16];
 static TCGv_i32 cpu_SR[256];
 static TCGv_i32 cpu_UR[256];
 
@@ -155,6 +158,12 @@
         "ar8", "ar9", "ar10", "ar11",
         "ar12", "ar13", "ar14", "ar15",
     };
+    static const char * const fregnames[] = {
+        "f0", "f1", "f2", "f3",
+        "f4", "f5", "f6", "f7",
+        "f8", "f9", "f10", "f11",
+        "f12", "f13", "f14", "f15",
+    };
     int i;
 
     cpu_env = tcg_global_reg_new_ptr(TCG_AREG0, "env");
@@ -167,6 +176,12 @@
                 regnames[i]);
     }
 
+    for (i = 0; i < 16; i++) {
+        cpu_FR[i] = tcg_global_mem_new_i32(TCG_AREG0,
+                offsetof(CPUXtensaState, fregs[i]),
+                fregnames[i]);
+    }
+
     for (i = 0; i < 256; ++i) {
         if (sregnames[i]) {
             cpu_SR[i] = tcg_global_mem_new_i32(TCG_AREG0,
@@ -318,6 +333,15 @@
     }
 }
 
+static void gen_check_cpenable(DisasContext *dc, unsigned cp)
+{
+    if (option_enabled(dc, XTENSA_OPTION_COPROCESSOR) &&
+            !(dc->cpenable & (1 << cp))) {
+        gen_exception_cause(dc, COPROCESSOR0_DISABLED + cp);
+        dc->is_jmp = DISAS_UPDATE;
+    }
+}
+
 static void gen_jump_slot(DisasContext *dc, TCGv dest, int slot)
 {
     tcg_gen_mov_i32(cpu_pc, dest);
@@ -566,6 +590,13 @@
     }
 }
 
+static void gen_wsr_cpenable(DisasContext *dc, uint32_t sr, TCGv_i32 v)
+{
+    tcg_gen_andi_i32(cpu_SR[sr], v, 0xff);
+    /* This can change tb->flags, so exit tb */
+    gen_jumpi_check_loop_end(dc, -1);
+}
+
 static void gen_wsr_intset(DisasContext *dc, uint32_t sr, TCGv_i32 v)
 {
     tcg_gen_andi_i32(cpu_SR[sr], v,
@@ -668,6 +699,7 @@
         [DBREAKA + 1] = gen_wsr_dbreaka,
         [DBREAKC] = gen_wsr_dbreakc,
         [DBREAKC + 1] = gen_wsr_dbreakc,
+        [CPENABLE] = gen_wsr_cpenable,
         [INTSET] = gen_wsr_intset,
         [INTCLEAR] = gen_wsr_intclear,
         [INTENABLE] = gen_wsr_intenable,
@@ -692,6 +724,23 @@
     }
 }
 
+static void gen_wur(uint32_t ur, TCGv_i32 s)
+{
+    switch (ur) {
+    case FCR:
+        gen_helper_wur_fcr(cpu_env, s);
+        break;
+
+    case FSR:
+        tcg_gen_andi_i32(cpu_UR[ur], s, 0xffffff80);
+        break;
+
+    default:
+        tcg_gen_mov_i32(cpu_UR[ur], s);
+        break;
+    }
+}
+
 static void gen_load_store_alignment(DisasContext *dc, int shift,
         TCGv_i32 addr, bool no_hw_alignment)
 {
@@ -1761,13 +1810,11 @@
 
             case 15: /*WUR*/
                 gen_window_check1(dc, RRR_T);
-                {
-                    if (uregnames[RSR_SR]) {
-                        tcg_gen_mov_i32(cpu_UR[RSR_SR], cpu_R[RRR_T]);
-                    } else {
-                        qemu_log("WUR %d not implemented, ", RSR_SR);
-                        TBD();
-                    }
+                if (uregnames[RSR_SR]) {
+                    gen_wur(RSR_SR, cpu_R[RRR_T]);
+                } else {
+                    qemu_log("WUR %d not implemented, ", RSR_SR);
+                    TBD();
                 }
                 break;
 
@@ -1778,7 +1825,7 @@
         case 5:
             gen_window_check2(dc, RRR_R, RRR_T);
             {
-                int shiftimm = RRR_S | (OP1 << 4);
+                int shiftimm = RRR_S | ((OP1 & 1) << 4);
                 int maskimm = (1 << (OP2 + 1)) - 1;
 
                 TCGv_i32 tmp = tcg_temp_new_i32();
@@ -1797,8 +1844,34 @@
             break;
 
         case 8: /*LSCXp*/
-            HAS_OPTION(XTENSA_OPTION_COPROCESSOR);
-            TBD();
+            switch (OP2) {
+            case 0: /*LSXf*/
+            case 1: /*LSXUf*/
+            case 4: /*SSXf*/
+            case 5: /*SSXUf*/
+                HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
+                gen_window_check2(dc, RRR_S, RRR_T);
+                gen_check_cpenable(dc, 0);
+                {
+                    TCGv_i32 addr = tcg_temp_new_i32();
+                    tcg_gen_add_i32(addr, cpu_R[RRR_S], cpu_R[RRR_T]);
+                    gen_load_store_alignment(dc, 2, addr, false);
+                    if (OP2 & 0x4) {
+                        tcg_gen_qemu_st32(cpu_FR[RRR_R], addr, dc->cring);
+                    } else {
+                        tcg_gen_qemu_ld32u(cpu_FR[RRR_R], addr, dc->cring);
+                    }
+                    if (OP2 & 0x1) {
+                        tcg_gen_mov_i32(cpu_R[RRR_S], addr);
+                    }
+                    tcg_temp_free(addr);
+                }
+                break;
+
+            default: /*reserved*/
+                RESERVED();
+                break;
+            }
             break;
 
         case 9: /*LSC4*/
@@ -1836,12 +1909,213 @@
 
         case 10: /*FP0*/
             HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
-            TBD();
+            switch (OP2) {
+            case 0: /*ADD.Sf*/
+                gen_check_cpenable(dc, 0);
+                gen_helper_add_s(cpu_FR[RRR_R], cpu_env,
+                        cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                break;
+
+            case 1: /*SUB.Sf*/
+                gen_check_cpenable(dc, 0);
+                gen_helper_sub_s(cpu_FR[RRR_R], cpu_env,
+                        cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                break;
+
+            case 2: /*MUL.Sf*/
+                gen_check_cpenable(dc, 0);
+                gen_helper_mul_s(cpu_FR[RRR_R], cpu_env,
+                        cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                break;
+
+            case 4: /*MADD.Sf*/
+                gen_check_cpenable(dc, 0);
+                gen_helper_madd_s(cpu_FR[RRR_R], cpu_env,
+                        cpu_FR[RRR_R], cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                break;
+
+            case 5: /*MSUB.Sf*/
+                gen_check_cpenable(dc, 0);
+                gen_helper_msub_s(cpu_FR[RRR_R], cpu_env,
+                        cpu_FR[RRR_R], cpu_FR[RRR_S], cpu_FR[RRR_T]);
+                break;
+
+            case 8: /*ROUND.Sf*/
+            case 9: /*TRUNC.Sf*/
+            case 10: /*FLOOR.Sf*/
+            case 11: /*CEIL.Sf*/
+            case 14: /*UTRUNC.Sf*/
+                gen_window_check1(dc, RRR_R);
+                gen_check_cpenable(dc, 0);
+                {
+                    static const unsigned rounding_mode_const[] = {
+                        float_round_nearest_even,
+                        float_round_to_zero,
+                        float_round_down,
+                        float_round_up,
+                        [6] = float_round_to_zero,
+                    };
+                    TCGv_i32 rounding_mode = tcg_const_i32(
+                            rounding_mode_const[OP2 & 7]);
+                    TCGv_i32 scale = tcg_const_i32(RRR_T);
+
+                    if (OP2 == 14) {
+                        gen_helper_ftoui(cpu_R[RRR_R], cpu_FR[RRR_S],
+                                rounding_mode, scale);
+                    } else {
+                        gen_helper_ftoi(cpu_R[RRR_R], cpu_FR[RRR_S],
+                                rounding_mode, scale);
+                    }
+
+                    tcg_temp_free(rounding_mode);
+                    tcg_temp_free(scale);
+                }
+                break;
+
+            case 12: /*FLOAT.Sf*/
+            case 13: /*UFLOAT.Sf*/
+                gen_window_check1(dc, RRR_S);
+                gen_check_cpenable(dc, 0);
+                {
+                    TCGv_i32 scale = tcg_const_i32(-RRR_T);
+
+                    if (OP2 == 13) {
+                        gen_helper_uitof(cpu_FR[RRR_R], cpu_env,
+                                cpu_R[RRR_S], scale);
+                    } else {
+                        gen_helper_itof(cpu_FR[RRR_R], cpu_env,
+                                cpu_R[RRR_S], scale);
+                    }
+                    tcg_temp_free(scale);
+                }
+                break;
+
+            case 15: /*FP1OP*/
+                switch (RRR_T) {
+                case 0: /*MOV.Sf*/
+                    gen_check_cpenable(dc, 0);
+                    tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    break;
+
+                case 1: /*ABS.Sf*/
+                    gen_check_cpenable(dc, 0);
+                    gen_helper_abs_s(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    break;
+
+                case 4: /*RFRf*/
+                    gen_window_check1(dc, RRR_R);
+                    gen_check_cpenable(dc, 0);
+                    tcg_gen_mov_i32(cpu_R[RRR_R], cpu_FR[RRR_S]);
+                    break;
+
+                case 5: /*WFRf*/
+                    gen_window_check1(dc, RRR_S);
+                    gen_check_cpenable(dc, 0);
+                    tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_R[RRR_S]);
+                    break;
+
+                case 6: /*NEG.Sf*/
+                    gen_check_cpenable(dc, 0);
+                    gen_helper_neg_s(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    break;
+
+                default: /*reserved*/
+                    RESERVED();
+                    break;
+                }
+                break;
+
+            default: /*reserved*/
+                RESERVED();
+                break;
+            }
             break;
 
         case 11: /*FP1*/
             HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
-            TBD();
+
+#define gen_compare(rel, br, a, b) \
+    do { \
+        TCGv_i32 bit = tcg_const_i32(1 << br); \
+        \
+        gen_check_cpenable(dc, 0); \
+        gen_helper_##rel(cpu_env, bit, cpu_FR[a], cpu_FR[b]); \
+        tcg_temp_free(bit); \
+    } while (0)
+
+            switch (OP2) {
+            case 1: /*UN.Sf*/
+                gen_compare(un_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 2: /*OEQ.Sf*/
+                gen_compare(oeq_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 3: /*UEQ.Sf*/
+                gen_compare(ueq_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 4: /*OLT.Sf*/
+                gen_compare(olt_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 5: /*ULT.Sf*/
+                gen_compare(ult_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 6: /*OLE.Sf*/
+                gen_compare(ole_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+            case 7: /*ULE.Sf*/
+                gen_compare(ule_s, RRR_R, RRR_S, RRR_T);
+                break;
+
+#undef gen_compare
+
+            case 8: /*MOVEQZ.Sf*/
+            case 9: /*MOVNEZ.Sf*/
+            case 10: /*MOVLTZ.Sf*/
+            case 11: /*MOVGEZ.Sf*/
+                gen_window_check1(dc, RRR_T);
+                gen_check_cpenable(dc, 0);
+                {
+                    static const TCGCond cond[] = {
+                        TCG_COND_NE,
+                        TCG_COND_EQ,
+                        TCG_COND_GE,
+                        TCG_COND_LT
+                    };
+                    int label = gen_new_label();
+                    tcg_gen_brcondi_i32(cond[OP2 - 8], cpu_R[RRR_T], 0, label);
+                    tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    gen_set_label(label);
+                }
+                break;
+
+            case 12: /*MOVF.Sf*/
+            case 13: /*MOVT.Sf*/
+                HAS_OPTION(XTENSA_OPTION_BOOLEAN);
+                gen_check_cpenable(dc, 0);
+                {
+                    int label = gen_new_label();
+                    TCGv_i32 tmp = tcg_temp_new_i32();
+
+                    tcg_gen_andi_i32(tmp, cpu_SR[BR], 1 << RRR_T);
+                    tcg_gen_brcondi_i32(
+                            OP2 & 1 ? TCG_COND_EQ : TCG_COND_NE,
+                            tmp, 0, label);
+                    tcg_gen_mov_i32(cpu_FR[RRR_R], cpu_FR[RRR_S]);
+                    gen_set_label(label);
+                    tcg_temp_free(tmp);
+                }
+                break;
+
+            default: /*reserved*/
+                RESERVED();
+                break;
+            }
             break;
 
         default: /*reserved*/
@@ -2072,8 +2346,34 @@
         break;
 
     case 3: /*LSCIp*/
-        HAS_OPTION(XTENSA_OPTION_COPROCESSOR);
-        TBD();
+        switch (RRI8_R) {
+        case 0: /*LSIf*/
+        case 4: /*SSIf*/
+        case 8: /*LSIUf*/
+        case 12: /*SSIUf*/
+            HAS_OPTION(XTENSA_OPTION_FP_COPROCESSOR);
+            gen_window_check1(dc, RRI8_S);
+            gen_check_cpenable(dc, 0);
+            {
+                TCGv_i32 addr = tcg_temp_new_i32();
+                tcg_gen_addi_i32(addr, cpu_R[RRI8_S], RRI8_IMM8 << 2);
+                gen_load_store_alignment(dc, 2, addr, false);
+                if (RRI8_R & 0x4) {
+                    tcg_gen_qemu_st32(cpu_FR[RRI8_T], addr, dc->cring);
+                } else {
+                    tcg_gen_qemu_ld32u(cpu_FR[RRI8_T], addr, dc->cring);
+                }
+                if (RRI8_R & 0x8) {
+                    tcg_gen_mov_i32(cpu_R[RRI8_S], addr);
+                }
+                tcg_temp_free(addr);
+            }
+            break;
+
+        default: /*reserved*/
+            RESERVED();
+            break;
+        }
         break;
 
     case 4: /*MAC16d*/
@@ -2502,7 +2802,9 @@
         break;
     }
 
-    gen_check_loop_end(dc, 0);
+    if (dc->is_jmp == DISAS_NEXT) {
+        gen_check_loop_end(dc, 0);
+    }
     dc->pc = dc->next_pc;
 
     return;
@@ -2569,6 +2871,8 @@
     dc.ccount_delta = 0;
     dc.debug = tb->flags & XTENSA_TBFLAG_DEBUG;
     dc.icount = tb->flags & XTENSA_TBFLAG_ICOUNT;
+    dc.cpenable = (tb->flags & XTENSA_TBFLAG_CPENABLE_MASK) >>
+        XTENSA_TBFLAG_CPENABLE_SHIFT;
 
     init_litbase(&dc);
     init_sar_tracker(&dc);
@@ -2601,7 +2905,7 @@
             gen_opc_icount[lj] = insn_count;
         }
 
-        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
+        if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT))) {
             tcg_gen_debug_insn_start(dc.pc);
         }
 
@@ -2710,6 +3014,16 @@
         cpu_fprintf(f, "AR%02d=%08x%c", i, env->phys_regs[i],
                 (i % 4) == 3 ? '\n' : ' ');
     }
+
+    if (xtensa_option_enabled(env->config, XTENSA_OPTION_FP_COPROCESSOR)) {
+        cpu_fprintf(f, "\n");
+
+        for (i = 0; i < 16; ++i) {
+            cpu_fprintf(f, "F%02d=%08x (%+10.8e)%c", i,
+                    float32_val(env->fregs[i]),
+                    *(float *)&env->fregs[i], (i % 2) == 1 ? '\n' : ' ');
+        }
+    }
 }
 
 void restore_state_to_opc(CPUXtensaState *env, TranslationBlock *tb, int pc_pos)
diff --git a/targphys.h b/targphys.h
index bd4938f..08cade9 100644
--- a/targphys.h
+++ b/targphys.h
@@ -3,25 +3,10 @@
 #ifndef TARGPHYS_H
 #define TARGPHYS_H
 
-#ifdef TARGET_PHYS_ADDR_BITS
+#define TARGET_PHYS_ADDR_BITS 64
 /* target_phys_addr_t is the type of a physical address (its size can
    be different from 'target_ulong').  */
 
-#if TARGET_PHYS_ADDR_BITS == 32
-typedef uint32_t target_phys_addr_t;
-#define TARGET_PHYS_ADDR_MAX UINT32_MAX
-#define TARGET_FMT_plx "%08x"
-/* Format strings for printing target_phys_addr_t types.
- * These are recommended over the less flexible TARGET_FMT_plx,
- * which is retained for the benefit of existing code.
- */
-#define TARGET_PRIdPHYS PRId32
-#define TARGET_PRIiPHYS PRIi32
-#define TARGET_PRIoPHYS PRIo32
-#define TARGET_PRIuPHYS PRIu32
-#define TARGET_PRIxPHYS PRIx32
-#define TARGET_PRIXPHYS PRIX32
-#elif TARGET_PHYS_ADDR_BITS == 64
 typedef uint64_t target_phys_addr_t;
 #define TARGET_PHYS_ADDR_MAX UINT64_MAX
 #define TARGET_FMT_plx "%016" PRIx64
@@ -31,7 +16,5 @@
 #define TARGET_PRIuPHYS PRIu64
 #define TARGET_PRIxPHYS PRIx64
 #define TARGET_PRIXPHYS PRIX64
-#endif
-#endif
 
 #endif
diff --git a/tcg/README b/tcg/README
index cfdfd96..aa86992 100644
--- a/tcg/README
+++ b/tcg/README
@@ -88,8 +88,7 @@
 
 * Branches:
 
-Use the instruction 'br' to jump to a label. Use 'jmp' to jump to an
-explicit address. Conditional branches can only jump to labels.
+Use the instruction 'br' to jump to a label.
 
 3.3) Code Optimizations
 
@@ -129,10 +128,6 @@
 
 ********* Jumps/Labels
 
-* jmp t0
-
-Absolute jump to address t0 (pointer type).
-
 * set_label $label
 
 Define label 'label' at the current program point.
@@ -141,7 +136,7 @@
 
 Jump to label.
 
-* brcond_i32/i64 cond, t0, t1, label
+* brcond_i32/i64 t0, t1, cond, label
 
 Conditional jump if t0 cond t1 is true. cond can be:
     TCG_COND_EQ
@@ -301,12 +296,18 @@
 
 ********* Conditional moves
 
-* setcond_i32/i64 cond, dest, t1, t2
+* setcond_i32/i64 dest, t1, t2, cond
 
 dest = (t1 cond t2)
 
 Set DEST to 1 if (T1 cond T2) is true, otherwise set to 0.
 
+* movcond_i32/i64 dest, c1, c2, v1, v2, cond
+
+dest = (c1 cond c2 ? v1 : v2)
+
+Set DEST to V1 if (C1 cond C2) is true, otherwise set to V2.
+
 ********* Type conversions
 
 * ext_i32_i64 t0, t1
@@ -354,7 +355,7 @@
 32-bit host code generators, but are not to be emitted by guest translators.
 They are emitted as needed by inline functions within "tcg-op.h".
 
-* brcond2_i32 cond, t0_low, t0_high, t1_low, t1_high, label
+* brcond2_i32 t0_low, t0_high, t1_low, t1_high, cond, label
 
 Similar to brcond, except that the 64-bit values T0 and T1
 are formed from two 32-bit arguments.
@@ -371,7 +372,7 @@
 Similar to mul, except two 32-bit (unsigned) inputs T1 and T2 yielding
 the full 64-bit product T0.  The later is returned in two 32-bit outputs.
 
-* setcond2_i32 cond, dest, t1_low, t1_high, t2_low, t2_high
+* setcond2_i32 dest, t1_low, t1_high, t2_low, t2_high, cond
 
 Similar to setcond, except that the 64-bit values T1 and T2 are
 formed from two 32-bit arguments.  The result is a 32-bit value.
@@ -386,7 +387,8 @@
 
 Exit the current TB and jump to the TB index 'index' (constant) if the
 current TB was linked to this TB. Otherwise execute the next
-instructions.
+instructions. Only indices 0 and 1 are valid and tcg_gen_goto_tb may be issued
+at most once with each slot index per TB.
 
 * qemu_ld8u t0, t1, flags
 qemu_ld8s t0, t1, flags
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index cf0ca3d..737200e 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -145,12 +145,6 @@
     }
 }
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 4;
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
@@ -176,7 +170,7 @@
            so don't use these. */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-#if defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+#if TARGET_LONG_BITS == 64
         /* If we're passing env to the helper as r0 and need a regpair
          * for the address then r2 will be overwritten as we're setting
          * up the args to the helper.
@@ -204,8 +198,7 @@
            use these. */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-#if defined(CONFIG_SOFTMMU) && \
-    defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+#if defined(CONFIG_SOFTMMU) && (TARGET_LONG_BITS == 64)
         /* Avoid clashes with registers being used for helper args */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
@@ -223,7 +216,7 @@
 #ifdef CONFIG_SOFTMMU
         /* r2 is still needed to load data_reg, so don't use it. */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
-#if defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+#if TARGET_LONG_BITS == 64
         /* Avoid clashes with registers being used for helper args */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 #endif
@@ -342,7 +335,7 @@
     COND_AL = 0xe,
 };
 
-static const uint8_t tcg_cond_to_arm_cond[10] = {
+static const uint8_t tcg_cond_to_arm_cond[] = {
     [TCG_COND_EQ] = COND_EQ,
     [TCG_COND_NE] = COND_NE,
     [TCG_COND_LT] = COND_LT,
@@ -954,7 +947,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -972,25 +964,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 
 /* Helper routines for marshalling helper function arguments into
  * the correct registers and stack.
@@ -1203,9 +1176,7 @@
      * trash by moving the earlier arguments into them.
      */
     argreg = TCG_REG_R0;
-#ifdef CONFIG_TCG_PASS_AREG0
     argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
-#endif
 #if TARGET_LONG_BITS == 64
     argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2);
 #else
@@ -1226,20 +1197,11 @@
     case 1:
     case 2:
     default:
-        if (data_reg != TCG_REG_R0) {
-            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                            data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
-        }
+        tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
         break;
     case 3:
-        if (data_reg != TCG_REG_R0) {
-            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                            data_reg, 0, TCG_REG_R0, SHIFT_IMM_LSL(0));
-        }
-        if (data_reg2 != TCG_REG_R1) {
-            tcg_out_dat_reg(s, COND_AL, ARITH_MOV,
-                            data_reg2, 0, TCG_REG_R1, SHIFT_IMM_LSL(0));
-        }
+        tcg_out_mov_reg(s, COND_AL, data_reg, TCG_REG_R0);
+        tcg_out_mov_reg(s, COND_AL, data_reg2, TCG_REG_R1);
         break;
     }
 
@@ -1421,9 +1383,7 @@
      * trash by moving the earlier arguments into them.
      */
     argreg = TCG_REG_R0;
-#ifdef CONFIG_TCG_PASS_AREG0
     argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
-#endif
 #if TARGET_LONG_BITS == 64
     argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2);
 #else
@@ -1561,12 +1521,6 @@
         else
             tcg_out_callr(s, COND_AL, args[0]);
         break;
-    case INDEX_op_jmp:
-        if (const_args[0])
-            tcg_out_goto(s, COND_AL, args[0]);
-        else
-            tcg_out_bx(s, COND_AL, args[0]);
-        break;
     case INDEX_op_br:
         tcg_out_goto_label(s, COND_AL, args[0]);
         break;
@@ -1800,7 +1754,6 @@
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index f90b834..e2299ca 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -73,11 +73,11 @@
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #define TCG_TARGET_HAS_GUEST_BASE
 
 enum {
-    /* Note: must be synced with dyngen-exec.h */
     TCG_AREG0 = TCG_REG_R6,
 };
 
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 2885212..de500ae 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -175,12 +175,6 @@
     *insn_ptr = insn;
 }
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 4;
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
@@ -738,7 +732,7 @@
     }
 }
 
-static const uint8_t tcg_cond_to_cmp_cond[10] =
+static const uint8_t tcg_cond_to_cmp_cond[] =
 {
     [TCG_COND_EQ] = COND_EQ,
     [TCG_COND_NE] = COND_EQ | COND_FALSE,
@@ -826,13 +820,15 @@
 {
     switch (cond) {
     case TCG_COND_EQ:
-    case TCG_COND_NE:
-        tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, al, bl, blconst);
-        tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
+        tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, al, bl, blconst);
+        tcg_out_brcond(s, TCG_COND_EQ, ah, bh, bhconst, label_index);
         break;
-
+    case TCG_COND_NE:
+        tcg_out_brcond(s, TCG_COND_NE, al, bl, bhconst, label_index);
+        tcg_out_brcond(s, TCG_COND_NE, ah, bh, bhconst, label_index);
+        break;
     default:
-        tcg_out_brcond(s, cond, ah, bh, bhconst, label_index);
+        tcg_out_brcond(s, tcg_high_cond(cond), ah, bh, bhconst, label_index);
         tcg_out_comclr(s, TCG_COND_NE, TCG_REG_R0, ah, bh, bhconst);
         tcg_out_brcond(s, tcg_unsigned_cond(cond),
                        al, bl, blconst, label_index);
@@ -853,9 +849,8 @@
 {
     int scratch = TCG_REG_R20;
 
-    if (ret != al && ret != ah
-        && (blconst || ret != bl)
-        && (bhconst || ret != bh)) {
+    /* Note that the low parts are fully consumed before scratch is set.  */
+    if (ret != ah && (bhconst || ret != bh)) {
         scratch = ret;
     }
 
@@ -867,22 +862,52 @@
         tcg_out_movi(s, TCG_TYPE_I32, scratch, cond == TCG_COND_NE);
         break;
 
-    default:
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+    case TCG_COND_LT:
+    case TCG_COND_LTU:
+        /* Optimize compares with low part zero.  */
+        if (bl == 0) {
+            tcg_out_setcond(s, cond, ret, ah, bh, bhconst);
+            return;
+        }
+        /* FALLTHRU */
+
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+    case TCG_COND_GT:
+    case TCG_COND_GTU:
+        /* <= : ah < bh | (ah == bh && al <= bl) */
         tcg_out_setcond(s, tcg_unsigned_cond(cond), scratch, al, bl, blconst);
         tcg_out_comclr(s, TCG_COND_EQ, TCG_REG_R0, ah, bh, bhconst);
         tcg_out_movi(s, TCG_TYPE_I32, scratch, 0);
-        tcg_out_comclr(s, cond, TCG_REG_R0, ah, bh, bhconst);
+        tcg_out_comclr(s, tcg_invert_cond(tcg_high_cond(cond)),
+                       TCG_REG_R0, ah, bh, bhconst);
         tcg_out_movi(s, TCG_TYPE_I32, scratch, 1);
         break;
+
+    default:
+        tcg_abort();
     }
 
     tcg_out_mov(s, TCG_TYPE_I32, ret, scratch);
 }
 
+static void tcg_out_movcond(TCGContext *s, int cond, TCGArg ret,
+                            TCGArg c1, TCGArg c2, int c2const,
+                            TCGArg v1, int v1const)
+{
+    tcg_out_comclr(s, tcg_invert_cond(cond), TCG_REG_R0, c1, c2, c2const);
+    if (v1const) {
+        tcg_out_movi(s, TCG_TYPE_I32, ret, v1);
+    } else {
+        tcg_out_mov(s, TCG_TYPE_I32, ret, v1);
+    }
+}
+
 #if defined(CONFIG_SOFTMMU)
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -900,25 +925,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 
 /* Load and compare a TLB entry, and branch if TLB miss.  OFFSET is set to
    the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
@@ -963,10 +969,11 @@
         tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset);
     }
 
-    /* Compute the value that ought to appear in the TLB for a hit, namely, the page
-       of the address.  We include the low N bits of the address to catch unaligned
-       accesses and force them onto the slow path.  Do this computation after having
-       issued the load from the TLB slot to give the load time to complete.  */
+    /* Compute the value that ought to appear in the TLB for a hit, namely,
+       the page of the address.  We include the low N bits of the address
+       to catch unaligned accesses and force them onto the slow path.  Do
+       this computation after having issued the load from the TLB slot to
+       give the load time to complete.  */
     tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
 
     /* If not equal, jump to lab_miss. */
@@ -979,6 +986,36 @@
 
     return ret;
 }
+
+static int tcg_out_arg_reg32(TCGContext *s, int argno, TCGArg v, bool vconst)
+{
+    if (argno < 4) {
+        if (vconst) {
+            tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+        } else {
+            tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
+        }
+    } else {
+        if (vconst && v != 0) {
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, v);
+            v = TCG_REG_R20;
+        }
+        tcg_out_st(s, TCG_TYPE_I32, v, TCG_REG_CALL_STACK,
+                   TCG_TARGET_CALL_STACK_OFFSET - ((argno - 3) * 4));
+    }
+    return argno + 1;
+}
+
+static int tcg_out_arg_reg64(TCGContext *s, int argno, TCGArg vl, TCGArg vh)
+{
+    /* 64-bit arguments must go in even reg pairs and stack slots.  */
+    if (argno & 1) {
+        argno++;
+    }
+    argno = tcg_out_arg_reg32(s, argno, vl, false);
+    argno = tcg_out_arg_reg32(s, argno, vh, false);
+    return argno;
+}
 #endif
 
 static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg,
@@ -1059,41 +1096,36 @@
     /* Note that addrhi_reg is only used for 64-bit guests.  */
     int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
     int mem_index = *args;
-    int lab1, lab2, argreg, offset;
+    int lab1, lab2, argno, offset;
 
     lab1 = gen_new_label();
     lab2 = gen_new_label();
 
     offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
-    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
-                              opc & 3, lab1, offset);
+    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+                              addrhi_reg, opc & 3, lab1, offset);
 
     /* TLB Hit.  */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+               (offset ? TCG_REG_R1 : TCG_REG_R25),
                offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
-    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc);
+    tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg,
+                           TCG_REG_R20, opc);
     tcg_out_branch(s, lab2, 1);
 
     /* TLB Miss.  */
     /* label1: */
     tcg_out_label(s, lab1, s->code_ptr);
 
-    argreg = TCG_REG_R26;
-    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+    argno = 0;
+    argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
     if (TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+        argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+    } else {
+        argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
     }
-    tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+    argno = tcg_out_arg_reg32(s, argno, mem_index, true);
 
-#ifdef CONFIG_TCG_PASS_AREG0
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
-#endif
     tcg_out_call(s, qemu_ld_helpers[opc & 3]);
 
     switch (opc) {
@@ -1129,8 +1161,8 @@
 #endif
 }
 
-static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg,
-                                   int addr_reg, int opc)
+static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg,
+                                   int datahi_reg, int addr_reg, int opc)
 {
 #ifdef TARGET_WORDS_BIGENDIAN
     const int bswap = 0;
@@ -1183,17 +1215,18 @@
     /* Note that addrhi_reg is only used for 64-bit guests.  */
     int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
     int mem_index = *args;
-    int lab1, lab2, argreg, offset;
+    int lab1, lab2, argno, next, offset;
 
     lab1 = gen_new_label();
     lab2 = gen_new_label();
 
     offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
-    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
-                              opc, lab1, offset);
+    offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
+                              addrhi_reg, opc, lab1, offset);
 
     /* TLB Hit.  */
-    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
+    tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
+               (offset ? TCG_REG_R1 : TCG_REG_R25),
                offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
 
     /* There are no indexed stores, so we must do this addition explitly.
@@ -1206,65 +1239,46 @@
     /* label1: */
     tcg_out_label(s, lab1, s->code_ptr);
 
-    argreg = TCG_REG_R26;
-    tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
+    argno = 0;
+    argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
     if (TARGET_LONG_BITS == 64) {
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
+        argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
+    } else {
+        argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
     }
 
+    next = (argno < 4 ? tcg_target_call_iarg_regs[argno] : TCG_REG_R20);
     switch(opc) {
     case 0:
-        tcg_out_andi(s, argreg--, datalo_reg, 0xff);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        tcg_out_andi(s, next, datalo_reg, 0xff);
+        argno = tcg_out_arg_reg32(s, argno, next, false);
         break;
     case 1:
-        tcg_out_andi(s, argreg--, datalo_reg, 0xffff);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        tcg_out_andi(s, next, datalo_reg, 0xffff);
+        argno = tcg_out_arg_reg32(s, argno, next, false);
         break;
     case 2:
-        tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg);
-        tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
+        argno = tcg_out_arg_reg32(s, argno, datalo_reg, false);
         break;
     case 3:
-        /* Because of the alignment required by the 64-bit data argument,
-           we will always use R23/R24.  Also, we will always run out of
-           argument registers for storing mem_index, so that will have
-           to go on the stack.  */
-        if (mem_index == 0) {
-            argreg = TCG_REG_R0;
-        } else {
-            argreg = TCG_REG_R20;
-            tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
-        }
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg);
-        tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg);
-        tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_CALL_STACK,
-                   TCG_TARGET_CALL_STACK_OFFSET - 4);
+        argno = tcg_out_arg_reg64(s, argno, datalo_reg, datahi_reg);
         break;
     default:
         tcg_abort();
     }
+    argno = tcg_out_arg_reg32(s, argno, mem_index, true);
 
-#ifdef CONFIG_TCG_PASS_AREG0
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
-#endif
     tcg_out_call(s, qemu_st_helpers[opc]);
 
     /* label2: */
     tcg_out_label(s, lab2, s->code_ptr);
 #else
-    /* There are no indexed stores, so if GUEST_BASE is set we must do the add
-       explicitly.  Careful to avoid R20, which is used for the bswaps to follow.  */
+    /* There are no indexed stores, so if GUEST_BASE is set we must do
+       the add explicitly.  Careful to avoid R20, which is used for the
+       bswaps to follow.  */
     if (GUEST_BASE != 0) {
-        tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL);
+        tcg_out_arith(s, TCG_REG_R31, addrlo_reg,
+                      TCG_GUEST_BASE_REG, INSN_ADDL);
         addrlo_reg = TCG_REG_R31;
     }
     tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc);
@@ -1326,11 +1340,6 @@
         }
         break;
 
-    case INDEX_op_jmp:
-        fprintf(stderr, "unimplemented jmp\n");
-        tcg_abort();
-        break;
-
     case INDEX_op_br:
         tcg_out_branch(s, args[0], 1);
         break;
@@ -1499,6 +1508,11 @@
                          args[3], const_args[3], args[4], const_args[4]);
         break;
 
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2], const_args[2],
+                        args[3], const_args[3]);
+        break;
+
     case INDEX_op_add2_i32:
         tcg_out_add2(s, args[0], args[1], args[2], args[3],
                      args[4], args[5], const_args[4]);
@@ -1560,7 +1574,6 @@
     { INDEX_op_goto_tb, { } },
 
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "r" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
@@ -1607,6 +1620,10 @@
     { INDEX_op_setcond_i32, { "r", "rZ", "rI" } },
     { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rI", "rI" } },
 
+    /* ??? We can actually support a signed 14-bit arg3, but we
+       only have existing constraints for a signed 11-bit.  */
+    { INDEX_op_movcond_i32, { "r", "rZ", "rI", "rI", "0" } },
+
     { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rI", "rZ" } },
     { INDEX_op_sub2_i32, { "r", "r", "rI", "rZ", "rK", "rZ" } },
 
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index d4bf6fe..5351353 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -96,6 +96,7 @@
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_movcond_i32      1
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub rd, 0, rs */
@@ -104,7 +105,6 @@
 
 #define TCG_TARGET_HAS_GUEST_BASE
 
-/* Note: must be synced with dyngen-exec.h */
 #define TCG_AREG0 TCG_REG_R17
 
 
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index da17bba..4952c05 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -75,9 +75,7 @@
     TCG_REG_R8,
     TCG_REG_R9,
 #else
-    TCG_REG_EAX,
-    TCG_REG_EDX,
-    TCG_REG_ECX
+    /* 32 bit mode uses stack based calling convention (GCC default). */
 #endif
 };
 
@@ -88,6 +86,18 @@
 #endif
 };
 
+/* Registers used with L constraint, which are the first argument 
+   registers on x86_64, and two random call clobbered registers on
+   i386. */
+#if TCG_TARGET_REG_BITS == 64
+# define TCG_REG_L0 tcg_target_call_iarg_regs[0]
+# define TCG_REG_L1 tcg_target_call_iarg_regs[1]
+# define TCG_REG_L2 tcg_target_call_iarg_regs[2]
+#else
+# define TCG_REG_L0 TCG_REG_EAX
+# define TCG_REG_L1 TCG_REG_EDX
+#endif
+
 static uint8_t *tb_ret_addr;
 
 static void patch_reloc(uint8_t *code_ptr, int type,
@@ -114,16 +124,6 @@
     }
 }
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    if (TCG_TARGET_REG_BITS == 64) {
-        return 6;
-    }
-
-    return 0;
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
@@ -179,18 +179,16 @@
         /* qemu_ld/st address constraint */
     case 'L':
         ct->ct |= TCG_CT_REG;
-        if (TCG_TARGET_REG_BITS == 64) {
+#if TCG_TARGET_REG_BITS == 64
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
-            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
-            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
-#ifdef CONFIG_TCG_PASS_AREG0
-            tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
-#endif
-        } else {
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L2);
+#else
             tcg_regset_set32(ct->u.regs, 0, 0xff);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
-            tcg_regset_reset_reg(ct->u.regs, TCG_REG_EDX);
-        }
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0);
+            tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1);
+#endif
         break;
 
     case 'e':
@@ -251,6 +249,7 @@
 #define OPC_ADD_GvEv	(OPC_ARITH_GvEv | (ARITH_ADD << 3))
 #define OPC_BSWAP	(0xc8 | P_EXT)
 #define OPC_CALL_Jz	(0xe8)
+#define OPC_CMOVCC      (0x40 | P_EXT)  /* ... plus condition code */
 #define OPC_CMP_GvEv	(OPC_ARITH_GvEv | (ARITH_CMP << 3))
 #define OPC_DEC_r32	(0x48)
 #define OPC_IMUL_GvEv	(0xaf | P_EXT)
@@ -265,6 +264,7 @@
 #define OPC_MOVB_EvGv	(0x88)		/* stores, more or less */
 #define OPC_MOVL_EvGv	(0x89)		/* stores, more or less */
 #define OPC_MOVL_GvEv	(0x8b)		/* loads, more or less */
+#define OPC_MOVB_EvIz   (0xc6)
 #define OPC_MOVL_EvIz	(0xc7)
 #define OPC_MOVL_Iv     (0xb8)
 #define OPC_MOVSBL	(0xbe | P_EXT)
@@ -338,7 +338,7 @@
 #define JCC_JLE 0xe
 #define JCC_JG  0xf
 
-static const uint8_t tcg_cond_to_jcc[10] = {
+static const uint8_t tcg_cond_to_jcc[] = {
     [TCG_COND_EQ] = JCC_JE,
     [TCG_COND_NE] = JCC_JNE,
     [TCG_COND_LT] = JCC_JL,
@@ -937,6 +937,24 @@
 }
 #endif
 
+static void tcg_out_movcond32(TCGContext *s, TCGCond cond, TCGArg dest,
+                              TCGArg c1, TCGArg c2, int const_c2,
+                              TCGArg v1)
+{
+    tcg_out_cmp(s, c1, c2, const_c2, 0);
+    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond], dest, v1);
+}
+
+#if TCG_TARGET_REG_BITS == 64
+static void tcg_out_movcond64(TCGContext *s, TCGCond cond, TCGArg dest,
+                              TCGArg c1, TCGArg c2, int const_c2,
+                              TCGArg v1)
+{
+    tcg_out_cmp(s, c1, c2, const_c2, P_REXW);
+    tcg_out_modrm(s, OPC_CMOVCC | tcg_cond_to_jcc[cond] | P_REXW, dest, v1);
+}
+#endif
+
 static void tcg_out_branch(TCGContext *s, int call, tcg_target_long dest)
 {
     tcg_target_long disp = dest - (tcg_target_long)s->code_ptr - 5;
@@ -965,7 +983,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void *qemu_ld_helpers[4] = {
@@ -983,25 +1000,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 
 /* Perform the TLB load and compare.
 
@@ -1031,8 +1029,8 @@
                                     uint8_t **label_ptr, int which)
 {
     const int addrlo = args[addrlo_idx];
-    const int r0 = tcg_target_call_iarg_regs[0];
-    const int r1 = tcg_target_call_iarg_regs[1];
+    const int r0 = TCG_REG_L0;
+    const int r1 = TCG_REG_L1;
     TCGType type = TCG_TYPE_I32;
     int rexw = 0;
 
@@ -1194,8 +1192,7 @@
                      label_ptr, offsetof(CPUTLBEntry, addr_read));
 
     /* TLB Hit.  */
-    tcg_out_qemu_ld_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+    tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1220,26 +1217,18 @@
     }
     tcg_out_push(s, args[addrlo_idx]);
     stack_adjust += 4;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_push(s, TCG_AREG0);
     stack_adjust += 4;
-#endif
 #else
     /* The first argument is already loaded with addrlo.  */
     arg_idx = 1;
     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
                  mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
-#endif
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
 #endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
@@ -1305,11 +1294,9 @@
                use the ADDR32 prefix.  For now, do nothing.  */
 
             if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64,
-                             tcg_target_call_iarg_regs[0], GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW,
-                            tcg_target_call_iarg_regs[0], base);
-                base = tcg_target_call_iarg_regs[0];
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+                base = TCG_REG_L0;
                 offset = 0;
             }
         }
@@ -1330,8 +1317,8 @@
     /* ??? Ideally we wouldn't need a scratch register.  For user-only,
        we could perform the bswap twice to restore the original value
        instead of moving to the scratch.  But as it is, the L constraint
-       means that the second argument reg is definitely free here.  */
-    int scratch = tcg_target_call_iarg_regs[1];
+       means that TCG_REG_L1 is definitely free here.  */
+    const int scratch = TCG_REG_L1;
 
     switch (sizeop) {
     case 0:
@@ -1404,8 +1391,7 @@
                      label_ptr, offsetof(CPUTLBEntry, addr_write));
 
     /* TLB Hit.  */
-    tcg_out_qemu_st_direct(s, data_reg, data_reg2,
-                           tcg_target_call_iarg_regs[0], 0, opc);
+    tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
 
     /* jmp label2 */
     tcg_out8(s, OPC_JMP_short);
@@ -1436,26 +1422,18 @@
     }
     tcg_out_push(s, args[addrlo_idx]);
     stack_adjust += 4;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_push(s, TCG_AREG0);
     stack_adjust += 4;
-#endif
 #else
     tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
-                tcg_target_call_iarg_regs[1], data_reg);
-    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
+                TCG_REG_L1, data_reg);
+    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index);
     stack_adjust = 0;
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
-#endif
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
+    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
 #endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
@@ -1482,11 +1460,9 @@
                use the ADDR32 prefix.  For now, do nothing.  */
 
             if (offset != GUEST_BASE) {
-                tcg_out_movi(s, TCG_TYPE_I64,
-                             tcg_target_call_iarg_regs[0], GUEST_BASE);
-                tgen_arithr(s, ARITH_ADD + P_REXW,
-                            tcg_target_call_iarg_regs[0], base);
-                base = tcg_target_call_iarg_regs[0];
+                tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
+                tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
+                base = TCG_REG_L0;
                 offset = 0;
             }
         }
@@ -1537,14 +1513,6 @@
             tcg_out_modrm(s, OPC_GRP5, EXT5_CALLN_Ev, args[0]);
         }
         break;
-    case INDEX_op_jmp:
-        if (const_args[0]) {
-            tcg_out_jmp(s, args[0]);
-        } else {
-            /* jmp *reg */
-            tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
-        }
-        break;
     case INDEX_op_br:
         tcg_out_jxx(s, JCC_JMP, args[0], 0);
         break;
@@ -1573,18 +1541,35 @@
         break;
 
     OP_32_64(st8):
-        tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
-                             args[0], args[1], args[2]);
+        if (const_args[0]) {
+            tcg_out_modrm_offset(s, OPC_MOVB_EvIz,
+                                 0, args[1], args[2]);
+            tcg_out8(s, args[0]);
+        } else {
+            tcg_out_modrm_offset(s, OPC_MOVB_EvGv | P_REXB_R,
+                                 args[0], args[1], args[2]);
+        }
         break;
     OP_32_64(st16):
-        tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
-                             args[0], args[1], args[2]);
+        if (const_args[0]) {
+            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_DATA16,
+                                 0, args[1], args[2]);
+            tcg_out16(s, args[0]);
+        } else {
+            tcg_out_modrm_offset(s, OPC_MOVL_EvGv | P_DATA16,
+                                 args[0], args[1], args[2]);
+        }
         break;
 #if TCG_TARGET_REG_BITS == 64
     case INDEX_op_st32_i64:
 #endif
     case INDEX_op_st_i32:
-        tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+        if (const_args[0]) {
+            tcg_out_modrm_offset(s, OPC_MOVL_EvIz, 0, args[1], args[2]);
+            tcg_out32(s, args[0]);
+        } else {
+            tcg_out_st(s, TCG_TYPE_I32, args[0], args[1], args[2]);
+        }
         break;
 
     OP_32_64(add):
@@ -1680,6 +1665,10 @@
         tcg_out_setcond32(s, args[3], args[0], args[1],
                           args[2], const_args[2]);
         break;
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond32(s, args[5], args[0], args[1],
+                          args[2], const_args[2], args[3]);
+        break;
 
     OP_32_64(bswap16):
         tcg_out_rolw_8(s, args[0]);
@@ -1788,7 +1777,13 @@
         tcg_out_ld(s, TCG_TYPE_I64, args[0], args[1], args[2]);
         break;
     case INDEX_op_st_i64:
-        tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+        if (const_args[0]) {
+            tcg_out_modrm_offset(s, OPC_MOVL_EvIz | P_REXW,
+                                 0, args[1], args[2]);
+            tcg_out32(s, args[0]);
+        } else {
+            tcg_out_st(s, TCG_TYPE_I64, args[0], args[1], args[2]);
+        }
         break;
     case INDEX_op_qemu_ld32s:
         tcg_out_qemu_ld(s, args, 2 | 4);
@@ -1802,6 +1797,10 @@
         tcg_out_setcond64(s, args[3], args[0], args[1],
                           args[2], const_args[2]);
         break;
+    case INDEX_op_movcond_i64:
+        tcg_out_movcond64(s, args[5], args[0], args[1],
+                          args[2], const_args[2], args[3]);
+        break;
 
     case INDEX_op_bswap64_i64:
         tcg_out_bswap64(s, args[0]);
@@ -1841,7 +1840,6 @@
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
     { INDEX_op_mov_i32, { "r", "r" } },
     { INDEX_op_movi_i32, { "r" } },
@@ -1850,9 +1848,9 @@
     { INDEX_op_ld16u_i32, { "r", "r" } },
     { INDEX_op_ld16s_i32, { "r", "r" } },
     { INDEX_op_ld_i32, { "r", "r" } },
-    { INDEX_op_st8_i32, { "q", "r" } },
-    { INDEX_op_st16_i32, { "r", "r" } },
-    { INDEX_op_st_i32, { "r", "r" } },
+    { INDEX_op_st8_i32, { "qi", "r" } },
+    { INDEX_op_st16_i32, { "ri", "r" } },
+    { INDEX_op_st_i32, { "ri", "r" } },
 
     { INDEX_op_add_i32, { "r", "r", "ri" } },
     { INDEX_op_sub_i32, { "r", "0", "ri" } },
@@ -1886,6 +1884,9 @@
     { INDEX_op_setcond_i32, { "q", "r", "ri" } },
 
     { INDEX_op_deposit_i32, { "Q", "0", "Q" } },
+#if TCG_TARGET_HAS_movcond_i32
+    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "0" } },
+#endif
 
 #if TCG_TARGET_REG_BITS == 32
     { INDEX_op_mulu2_i32, { "a", "d", "a", "r" } },
@@ -1903,10 +1904,10 @@
     { INDEX_op_ld32u_i64, { "r", "r" } },
     { INDEX_op_ld32s_i64, { "r", "r" } },
     { INDEX_op_ld_i64, { "r", "r" } },
-    { INDEX_op_st8_i64, { "r", "r" } },
-    { INDEX_op_st16_i64, { "r", "r" } },
-    { INDEX_op_st32_i64, { "r", "r" } },
-    { INDEX_op_st_i64, { "r", "r" } },
+    { INDEX_op_st8_i64, { "ri", "r" } },
+    { INDEX_op_st16_i64, { "ri", "r" } },
+    { INDEX_op_st32_i64, { "ri", "r" } },
+    { INDEX_op_st_i64, { "re", "r" } },
 
     { INDEX_op_add_i64, { "r", "0", "re" } },
     { INDEX_op_mul_i64, { "r", "0", "re" } },
@@ -1940,6 +1941,7 @@
     { INDEX_op_ext32u_i64, { "r", "r" } },
 
     { INDEX_op_deposit_i64, { "Q", "0", "Q" } },
+    { INDEX_op_movcond_i64, { "r", "r", "re", "r", "0" } },
 #endif
 
 #if TCG_TARGET_REG_BITS == 64
@@ -2038,15 +2040,17 @@
 #if TCG_TARGET_REG_BITS == 32
     tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
                (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
-    tcg_out_ld(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[1], TCG_REG_ESP,
-               (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4);
+    tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+    /* jmp *tb.  */
+    tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
+		         (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
+			 + stack_addend);
 #else
     tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
-#endif
     tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
-
     /* jmp *tb.  */
     tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
+#endif
 
     /* TB epilogue */
     tb_ret_addr = s->code_ptr;
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index c3cfe05..ace63ba 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -67,7 +67,11 @@
 /* used for function call generation */
 #define TCG_REG_CALL_STACK TCG_REG_ESP 
 #define TCG_TARGET_STACK_ALIGN 16
+#if defined(_WIN64)
+#define TCG_TARGET_CALL_STACK_OFFSET 32
+#else
 #define TCG_TARGET_CALL_STACK_OFFSET 0
+#endif
 
 /* optional instructions */
 #define TCG_TARGET_HAS_div2_i32         1
@@ -86,6 +90,12 @@
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      1
+#if defined(__x86_64__) || defined(__i686__)
+/* Use cmov only if the compiler is already doing so.  */
+#define TCG_TARGET_HAS_movcond_i32      1
+#else
+#define TCG_TARGET_HAS_movcond_i32      0
+#endif
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
@@ -107,6 +117,7 @@
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      1
+#define TCG_TARGET_HAS_movcond_i64      1
 #endif
 
 #define TCG_TARGET_deposit_i32_valid(ofs, len) \
@@ -116,7 +127,6 @@
 
 #define TCG_TARGET_HAS_GUEST_BASE
 
-/* Note: must be synced with dyngen-exec.h */
 #if TCG_TARGET_REG_BITS == 64
 # define TCG_AREG0 TCG_REG_R14
 #else
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index dc588db..705712f 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -176,12 +176,6 @@
     TCG_REG_R8
 };
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 8;
-}
-
 /*
  * opcode formation
  */
@@ -1452,7 +1446,6 @@
                                TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
 }
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -1461,16 +1454,6 @@
     helper_ldl_mmu,
     helper_ldq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-#endif
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1530,7 +1513,6 @@
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
     tcg_out_bundle(s, mII,
                    tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
@@ -1539,7 +1521,6 @@
                                TCG_REG_R57, 0, TCG_REG_R56),
                    tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
                                TCG_REG_R56, 0, TCG_AREG0));
-#endif
     if (!bswap || s_bits == 0) {
         tcg_out_bundle(s, miB,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
@@ -1570,7 +1551,6 @@
     }
 }
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
    uintxx_t val, int mmu_idx) */
 static const void * const qemu_st_helpers[4] = {
@@ -1579,16 +1559,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1658,7 +1628,6 @@
         data_reg = TCG_REG_R2;
     }
 
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
     tcg_out_bundle(s, mII,
                    tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
@@ -1674,15 +1643,6 @@
                                TCG_REG_R56, 0, TCG_AREG0),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
-#else
-    tcg_out_bundle(s, miB,
-                   tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
-                               data_reg, TCG_REG_R3),
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
-                               mem_index, TCG_REG_R0),
-                   tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
-                               TCG_REG_B0, TCG_REG_B6));
-#endif
 }
 
 #else /* !CONFIG_SOFTMMU */
@@ -1956,9 +1916,6 @@
     case INDEX_op_goto_tb:
         tcg_out_goto_tb(s, args[0]);
         break;
-    case INDEX_op_jmp:
-        tcg_out_jmp(s, args[0]);
-        break;
 
     case INDEX_op_movi_i32:
         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
@@ -2196,7 +2153,6 @@
     { INDEX_op_call, { "r" } },
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
-    { INDEX_op_jmp, { "r" } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
     { INDEX_op_movi_i32, { "r" } },
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 0631b9f..368aee4 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -133,6 +133,8 @@
 #define TCG_TARGET_HAS_rot_i64          1
 #define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_movcond_i32      0
+#define TCG_TARGET_HAS_movcond_i64      0
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub r1, r0, r3 */
@@ -140,7 +142,6 @@
 #define TCG_TARGET_HAS_not_i32          0 /* xor r1, -1, r3 */
 #define TCG_TARGET_HAS_not_i64          0 /* xor r1, -1, r3 */
 
-/* Note: must be synced with dyngen-exec.h */
 #define TCG_AREG0 TCG_REG_R7
 
 /* Guest base is supported */
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 1006e28..7e4013e 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -68,7 +68,7 @@
 #endif
 
 /* check if we really need so many registers :P */
-static const int tcg_target_reg_alloc_order[] = {
+static const TCGReg tcg_target_reg_alloc_order[] = {
     TCG_REG_S0,
     TCG_REG_S1,
     TCG_REG_S2,
@@ -94,14 +94,14 @@
     TCG_REG_V1
 };
 
-static const int tcg_target_call_iarg_regs[4] = {
+static const TCGReg tcg_target_call_iarg_regs[4] = {
     TCG_REG_A0,
     TCG_REG_A1,
     TCG_REG_A2,
     TCG_REG_A3
 };
 
-static const int tcg_target_call_oarg_regs[2] = {
+static const TCGReg tcg_target_call_oarg_regs[2] = {
     TCG_REG_V0,
     TCG_REG_V1
 };
@@ -185,12 +185,6 @@
     }
 }
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 4;
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
@@ -217,7 +211,7 @@
         tcg_regset_set(ct->u.regs, 0xffffffff);
 #if defined(CONFIG_SOFTMMU)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
-# if defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+# if (TARGET_LONG_BITS == 64)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
 # endif
 #endif
@@ -227,12 +221,11 @@
         tcg_regset_set(ct->u.regs, 0xffffffff);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
-# if (defined(CONFIG_TCG_PASS_AREG0) && TARGET_LONG_BITS == 32) || \
-     (!defined(CONFIG_TCG_PASS_AREG0) && TARGET_LONG_BITS == 64)
+# if (TARGET_LONG_BITS == 32)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
 # endif
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
-# if defined(CONFIG_TCG_PASS_AREG0) && TARGET_LONG_BITS == 64
+# if TARGET_LONG_BITS == 64
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
 # endif
 #endif
@@ -279,6 +272,8 @@
 enum {
     OPC_BEQ      = 0x04 << 26,
     OPC_BNE      = 0x05 << 26,
+    OPC_BLEZ     = 0x06 << 26,
+    OPC_BGTZ     = 0x07 << 26,
     OPC_ADDIU    = 0x09 << 26,
     OPC_SLTI     = 0x0A << 26,
     OPC_SLTIU    = 0x0B << 26,
@@ -299,12 +294,16 @@
     OPC_SPECIAL  = 0x00 << 26,
     OPC_SLL      = OPC_SPECIAL | 0x00,
     OPC_SRL      = OPC_SPECIAL | 0x02,
+    OPC_ROTR     = OPC_SPECIAL | (0x01 << 21) | 0x02,
     OPC_SRA      = OPC_SPECIAL | 0x03,
     OPC_SLLV     = OPC_SPECIAL | 0x04,
     OPC_SRLV     = OPC_SPECIAL | 0x06,
+    OPC_ROTRV    = OPC_SPECIAL | (0x01 <<  6) | 0x06,
     OPC_SRAV     = OPC_SPECIAL | 0x07,
     OPC_JR       = OPC_SPECIAL | 0x08,
     OPC_JALR     = OPC_SPECIAL | 0x09,
+    OPC_MOVZ     = OPC_SPECIAL | 0x0A,
+    OPC_MOVN     = OPC_SPECIAL | 0x0B,
     OPC_MFHI     = OPC_SPECIAL | 0x10,
     OPC_MFLO     = OPC_SPECIAL | 0x12,
     OPC_MULT     = OPC_SPECIAL | 0x18,
@@ -320,7 +319,13 @@
     OPC_SLT      = OPC_SPECIAL | 0x2A,
     OPC_SLTU     = OPC_SPECIAL | 0x2B,
 
+    OPC_REGIMM   = 0x01 << 26,
+    OPC_BLTZ     = OPC_REGIMM | (0x00 << 16),
+    OPC_BGEZ     = OPC_REGIMM | (0x01 << 16),
+
     OPC_SPECIAL3 = 0x1f << 26,
+    OPC_INS      = OPC_SPECIAL3 | 0x004,
+    OPC_WSBH     = OPC_SPECIAL3 | 0x0a0,
     OPC_SEB      = OPC_SPECIAL3 | 0x420,
     OPC_SEH      = OPC_SPECIAL3 | 0x620,
 };
@@ -328,7 +333,8 @@
 /*
  * Type reg
  */
-static inline void tcg_out_opc_reg(TCGContext *s, int opc, int rd, int rs, int rt)
+static inline void tcg_out_opc_reg(TCGContext *s, int opc,
+                                   TCGReg rd, TCGReg rs, TCGReg rt)
 {
     int32_t inst;
 
@@ -342,7 +348,8 @@
 /*
  * Type immediate
  */
-static inline void tcg_out_opc_imm(TCGContext *s, int opc, int rt, int rs, int imm)
+static inline void tcg_out_opc_imm(TCGContext *s, int opc,
+                                   TCGReg rt, TCGReg rs, TCGArg imm)
 {
     int32_t inst;
 
@@ -356,7 +363,8 @@
 /*
  * Type branch
  */
-static inline void tcg_out_opc_br(TCGContext *s, int opc, int rt, int rs)
+static inline void tcg_out_opc_br(TCGContext *s, int opc,
+                                  TCGReg rt, TCGReg rs)
 {
     /* We pay attention here to not modify the branch target by reading
        the existing value and using it again. This ensure that caches and
@@ -369,7 +377,8 @@
 /*
  * Type sa
  */
-static inline void tcg_out_opc_sa(TCGContext *s, int opc, int rd, int rt, int sa)
+static inline void tcg_out_opc_sa(TCGContext *s, int opc,
+                                  TCGReg rd, TCGReg rt, TCGArg sa)
 {
     int32_t inst;
 
@@ -408,38 +417,47 @@
     }
 }
 
-static inline void tcg_out_bswap16(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap16(TCGContext *s, TCGReg ret, TCGReg arg)
 {
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+    tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+#else
     /* ret and arg can't be register at */
     if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
         tcg_abort();
     }
 
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
-    tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0x00ff);
-
     tcg_out_opc_sa(s, OPC_SLL, ret, arg, 8);
     tcg_out_opc_imm(s, OPC_ANDI, ret, ret, 0xff00);
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
 }
 
-static inline void tcg_out_bswap16s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap16s(TCGContext *s, TCGReg ret, TCGReg arg)
 {
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+    tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+    tcg_out_opc_reg(s, OPC_SEH, ret, 0, ret);
+#else
     /* ret and arg can't be register at */
     if (ret == TCG_REG_AT || arg == TCG_REG_AT) {
         tcg_abort();
     }
 
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
-    tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff);
-
     tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
     tcg_out_opc_sa(s, OPC_SRA, ret, ret, 16);
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
 }
 
-static inline void tcg_out_bswap32(TCGContext *s, int ret, int arg)
+static inline void tcg_out_bswap32(TCGContext *s, TCGReg ret, TCGReg arg)
 {
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+    tcg_out_opc_reg(s, OPC_WSBH, ret, 0, arg);
+    tcg_out_opc_sa(s, OPC_ROTR, ret, ret, 16);
+#else
     /* ret and arg must be different and can't be register at */
     if (ret == arg || ret == TCG_REG_AT || arg == TCG_REG_AT) {
         tcg_abort();
@@ -457,11 +475,12 @@
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_AT, arg, 8);
     tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_AT, TCG_REG_AT, 0xff00);
     tcg_out_opc_reg(s, OPC_OR, ret, ret, TCG_REG_AT);
+#endif
 }
 
-static inline void tcg_out_ext8s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_ext8s(TCGContext *s, TCGReg ret, TCGReg arg)
 {
-#ifdef _MIPS_ARCH_MIPS32R2
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
     tcg_out_opc_reg(s, OPC_SEB, ret, 0, arg);
 #else
     tcg_out_opc_sa(s, OPC_SLL, ret, arg, 24);
@@ -469,9 +488,9 @@
 #endif
 }
 
-static inline void tcg_out_ext16s(TCGContext *s, int ret, int arg)
+static inline void tcg_out_ext16s(TCGContext *s, TCGReg ret, TCGReg arg)
 {
-#ifdef _MIPS_ARCH_MIPS32R2
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
     tcg_out_opc_reg(s, OPC_SEH, ret, 0, arg);
 #else
     tcg_out_opc_sa(s, OPC_SLL, ret, arg, 16);
@@ -479,8 +498,8 @@
 #endif
 }
 
-static inline void tcg_out_ldst(TCGContext *s, int opc, int arg,
-                              int arg1, tcg_target_long arg2)
+static inline void tcg_out_ldst(TCGContext *s, int opc, TCGArg arg,
+                                TCGReg arg1, TCGArg arg2)
 {
     if (arg2 == (int16_t) arg2) {
         tcg_out_opc_imm(s, opc, arg, arg1, arg2);
@@ -503,7 +522,7 @@
     tcg_out_ldst(s, OPC_SW, arg, arg1, arg2);
 }
 
-static inline void tcg_out_addi(TCGContext *s, int reg, tcg_target_long val)
+static inline void tcg_out_addi(TCGContext *s, TCGReg reg, TCGArg val)
 {
     if (val == (int16_t)val) {
         tcg_out_opc_imm(s, OPC_ADDIU, reg, reg, val);
@@ -544,7 +563,7 @@
 #undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG
 #define DEFINE_TCG_OUT_CALL_IARG_GET_ARG(A) \
     tcg_out_movi(s, TCG_TYPE_I32, A, arg);
-DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, uint32_t arg)
+DEFINE_TCG_OUT_CALL_IARG(tcg_out_call_iarg_imm32, TCGArg arg)
 #undef DEFINE_TCG_OUT_CALL_IARG_GET_ARG
 
 /* We don't use the macro for this one to avoid an unnecessary reg-reg
@@ -574,8 +593,8 @@
 #endif
 }
 
-static void tcg_out_brcond(TCGContext *s, TCGCond cond, int arg1,
-                           int arg2, int label_index)
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGArg arg1,
+                           TCGArg arg2, int label_index)
 {
     TCGLabel *l = &s->labels[label_index];
 
@@ -587,32 +606,48 @@
         tcg_out_opc_br(s, OPC_BNE, arg1, arg2);
         break;
     case TCG_COND_LT:
-        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
-        tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+        if (arg2 == 0) {
+            tcg_out_opc_br(s, OPC_BLTZ, 0, arg1);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
+            tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+        }
         break;
     case TCG_COND_LTU:
         tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2);
         tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
         break;
     case TCG_COND_GE:
-        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
-        tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+        if (arg2 == 0) {
+            tcg_out_opc_br(s, OPC_BGEZ, 0, arg1);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg1, arg2);
+            tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+        }
         break;
     case TCG_COND_GEU:
         tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg1, arg2);
         tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
         break;
     case TCG_COND_LE:
-        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
-        tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+        if (arg2 == 0) {
+            tcg_out_opc_br(s, OPC_BLEZ, 0, arg1);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
+            tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
+        }
         break;
     case TCG_COND_LEU:
         tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1);
         tcg_out_opc_br(s, OPC_BEQ, TCG_REG_AT, TCG_REG_ZERO);
         break;
     case TCG_COND_GT:
-        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
-        tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+        if (arg2 == 0) {
+            tcg_out_opc_br(s, OPC_BGTZ, 0, arg1);
+        } else {
+            tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, arg2, arg1);
+            tcg_out_opc_br(s, OPC_BNE, TCG_REG_AT, TCG_REG_ZERO);
+        }
         break;
     case TCG_COND_GTU:
         tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, arg2, arg1);
@@ -632,8 +667,9 @@
 
 /* XXX: we implement it at the target level to avoid having to
    handle cross basic blocks temporaries */
-static void tcg_out_brcond2(TCGContext *s, TCGCond cond, int arg1,
-                            int arg2, int arg3, int arg4, int label_index)
+static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGArg arg1,
+                            TCGArg arg2, TCGArg arg3, TCGArg arg4,
+                            int label_index)
 {
     void *label_ptr;
 
@@ -695,8 +731,70 @@
     reloc_pc16(label_ptr, (tcg_target_long) s->code_ptr);
 }
 
-static void tcg_out_setcond(TCGContext *s, TCGCond cond, int ret,
-                            int arg1, int arg2)
+static void tcg_out_movcond(TCGContext *s, TCGCond cond, TCGReg ret,
+                            TCGArg c1, TCGArg c2, TCGArg v)
+{
+    switch (cond) {
+    case TCG_COND_EQ:
+        if (c1 == 0) {
+            tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c2);
+        } else if (c2 == 0) {
+            tcg_out_opc_reg(s, OPC_MOVZ, ret, v, c1);
+        } else {
+            tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2);
+            tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+        }
+        break;
+    case TCG_COND_NE:
+        if (c1 == 0) {
+            tcg_out_opc_reg(s, OPC_MOVN, ret, v, c2);
+        } else if (c2 == 0) {
+            tcg_out_opc_reg(s, OPC_MOVN, ret, v, c1);
+        } else {
+            tcg_out_opc_reg(s, OPC_XOR, TCG_REG_AT, c1, c2);
+            tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+        }
+        break;
+    case TCG_COND_LT:
+        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2);
+        tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_LTU:
+        tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2);
+        tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_GE:
+        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c1, c2);
+        tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_GEU:
+        tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c1, c2);
+        tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_LE:
+        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1);
+        tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_LEU:
+        tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1);
+        tcg_out_opc_reg(s, OPC_MOVZ, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_GT:
+        tcg_out_opc_reg(s, OPC_SLT, TCG_REG_AT, c2, c1);
+        tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+        break;
+    case TCG_COND_GTU:
+        tcg_out_opc_reg(s, OPC_SLTU, TCG_REG_AT, c2, c1);
+        tcg_out_opc_reg(s, OPC_MOVN, ret, v, TCG_REG_AT);
+        break;
+    default:
+        tcg_abort();
+        break;
+    }
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+                            TCGArg arg1, TCGArg arg2)
 {
     switch (cond) {
     case TCG_COND_EQ:
@@ -755,8 +853,8 @@
 
 /* XXX: we implement it at the target level to avoid having to
    handle cross basic blocks temporaries */
-static void tcg_out_setcond2(TCGContext *s, TCGCond cond, int ret,
-                             int arg1, int arg2, int arg3, int arg4)
+static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
+                             TCGArg arg1, TCGArg arg2, TCGArg arg3, TCGArg arg4)
 {
     switch (cond) {
     case TCG_COND_EQ:
@@ -821,7 +919,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -839,42 +936,22 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 #endif
 
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_regl, addr_meml;
-    int data_regl, data_regh, data_reg1, data_reg2;
-    int mem_index, s_bits;
+    TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2;
 #if defined(CONFIG_SOFTMMU)
     void *label1_ptr, *label2_ptr;
     int arg_num;
-#endif
-#if TARGET_LONG_BITS == 64
-# if defined(CONFIG_SOFTMMU)
+    int mem_index, s_bits;
+    int addr_meml;
+# if TARGET_LONG_BITS == 64
     uint8_t *label3_ptr;
+    TCGReg addr_regh;
+    int addr_memh;
 # endif
-    int addr_regh, addr_memh;
 #endif
     data_regl = *args++;
     if (opc == 3)
@@ -882,11 +959,22 @@
     else
         data_regh = 0;
     addr_regl = *args++;
-#if TARGET_LONG_BITS == 64
+#if defined(CONFIG_SOFTMMU)
+# if TARGET_LONG_BITS == 64
     addr_regh = *args++;
-#endif
+#  if defined(TCG_TARGET_WORDS_BIGENDIAN)
+    addr_memh = 0;
+    addr_meml = 4;
+#  else
+    addr_memh = 4;
+    addr_meml = 0;
+#  endif
+# else
+    addr_meml = 0;
+# endif
     mem_index = *args;
     s_bits = opc & 3;
+#endif
 
     if (opc == 3) {
 #if defined(TCG_TARGET_WORDS_BIGENDIAN)
@@ -900,18 +988,6 @@
         data_reg1 = data_regl;
         data_reg2 = 0;
     }
-#if TARGET_LONG_BITS == 64
-# if defined(TCG_TARGET_WORDS_BIGENDIAN)
-    addr_memh = 0;
-    addr_meml = 4;
-# else
-    addr_memh = 4;
-    addr_meml = 0;
-# endif
-#else
-    addr_meml = 0;
-#endif
-
 #if defined(CONFIG_SOFTMMU)
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
     tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_A0, TCG_REG_A0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
@@ -942,9 +1018,7 @@
 
     /* slow path */
     arg_num = 0;
-# ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0);
-# endif
 # if TARGET_LONG_BITS == 64
     tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh);
 # else
@@ -1052,23 +1126,45 @@
 static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
                             int opc)
 {
-    int addr_regl, addr_meml;
-    int data_regl, data_regh, data_reg1, data_reg2;
-    int mem_index, s_bits;
+    TCGReg addr_regl, data_regl, data_regh, data_reg1, data_reg2;
 #if defined(CONFIG_SOFTMMU)
     uint8_t *label1_ptr, *label2_ptr;
     int arg_num;
+    int mem_index, s_bits;
+    int addr_meml;
 #endif
 #if TARGET_LONG_BITS == 64
 # if defined(CONFIG_SOFTMMU)
     uint8_t *label3_ptr;
+    TCGReg addr_regh;
+    int addr_memh;
 # endif
-    int addr_regh, addr_memh;
 #endif
-
     data_regl = *args++;
     if (opc == 3) {
         data_regh = *args++;
+    } else {
+        data_regh = 0;
+    }
+    addr_regl = *args++;
+#if defined(CONFIG_SOFTMMU)
+# if TARGET_LONG_BITS == 64
+    addr_regh = *args++;
+#  if defined(TCG_TARGET_WORDS_BIGENDIAN)
+    addr_memh = 0;
+    addr_meml = 4;
+#  else
+    addr_memh = 4;
+    addr_meml = 0;
+#  endif
+# else
+    addr_meml = 0;
+# endif
+    mem_index = *args;
+    s_bits = opc;
+#endif
+
+    if (opc == 3) {
 #if defined(TCG_TARGET_WORDS_BIGENDIAN)
         data_reg1 = data_regh;
         data_reg2 = data_regl;
@@ -1079,23 +1175,7 @@
     } else {
         data_reg1 = data_regl;
         data_reg2 = 0;
-        data_regh = 0;
     }
-    addr_regl = *args++;
-#if TARGET_LONG_BITS == 64
-    addr_regh = *args++;
-# if defined(TCG_TARGET_WORDS_BIGENDIAN)
-    addr_memh = 0;
-    addr_meml = 4;
-# else
-    addr_memh = 4;
-    addr_meml = 0;
-# endif
-#else
-    addr_meml = 0;
-#endif
-    mem_index = *args;
-    s_bits = opc;
 
 #if defined(CONFIG_SOFTMMU)
     tcg_out_opc_sa(s, OPC_SRL, TCG_REG_A0, addr_regl, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
@@ -1127,9 +1207,7 @@
 
     /* slow path */
     arg_num = 0;
-# ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0);
-# endif
 # if TARGET_LONG_BITS == 64
     tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh);
 # else
@@ -1182,7 +1260,8 @@
         break;
     case 1:
         if (TCG_NEED_BSWAP) {
-            tcg_out_bswap16(s, TCG_REG_T0, data_reg1);
+            tcg_out_opc_imm(s, OPC_ANDI, TCG_REG_T0, data_reg1, 0xffff);
+            tcg_out_bswap16(s, TCG_REG_T0, TCG_REG_T0);
             tcg_out_opc_imm(s, OPC_SH, TCG_REG_T0, TCG_REG_A0, 0);
         } else {
             tcg_out_opc_imm(s, OPC_SH, data_reg1, TCG_REG_A0, 0);
@@ -1243,10 +1322,6 @@
         tcg_out_opc_reg(s, OPC_JALR, TCG_REG_RA, args[0], 0);
         tcg_out_nop(s);
         break;
-    case INDEX_op_jmp:
-        tcg_out_opc_reg(s, OPC_JR, 0, args[0], 0);
-        tcg_out_nop(s);
-        break;
     case INDEX_op_br:
         tcg_out_brcond(s, TCG_COND_EQ, TCG_REG_ZERO, TCG_REG_ZERO, args[0]);
         break;
@@ -1402,6 +1477,31 @@
             tcg_out_opc_reg(s, OPC_SRLV, args[0], args[2], args[1]);
         }
         break;
+    case INDEX_op_rotl_i32:
+        if (const_args[2]) {
+            tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], 0x20 - args[2]);
+        } else {
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_AT, 32);
+            tcg_out_opc_reg(s, OPC_SUBU, TCG_REG_AT, TCG_REG_AT, args[2]);
+            tcg_out_opc_reg(s, OPC_ROTRV, args[0], TCG_REG_AT, args[1]);
+        }
+        break;
+    case INDEX_op_rotr_i32:
+        if (const_args[2]) {
+            tcg_out_opc_sa(s, OPC_ROTR, args[0], args[1], args[2]);
+        } else {
+            tcg_out_opc_reg(s, OPC_ROTRV, args[0], args[2], args[1]);
+        }
+        break;
+
+    /* The bswap routines do not work on non-R2 CPU. In that case
+       we let TCG generating the corresponding code. */
+    case INDEX_op_bswap16_i32:
+        tcg_out_bswap16(s, args[0], args[1]);
+        break;
+    case INDEX_op_bswap32_i32:
+        tcg_out_bswap32(s, args[0], args[1]);
+        break;
 
     case INDEX_op_ext8s_i32:
         tcg_out_ext8s(s, args[0], args[1]);
@@ -1410,6 +1510,11 @@
         tcg_out_ext16s(s, args[0], args[1]);
         break;
 
+    case INDEX_op_deposit_i32:
+        tcg_out_opc_imm(s, OPC_INS, args[0], args[2],
+                        ((args[3] + args[4] - 1) << 11) | (args[3] << 6));
+        break;
+
     case INDEX_op_brcond_i32:
         tcg_out_brcond(s, args[2], args[0], args[1], args[3]);
         break;
@@ -1417,6 +1522,10 @@
         tcg_out_brcond2(s, args[4], args[0], args[1], args[2], args[3], args[5]);
         break;
 
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond(s, args[5], args[0], args[1], args[2], args[3]);
+        break;
+
     case INDEX_op_setcond_i32:
         tcg_out_setcond(s, args[3], args[0], args[1], args[2]);
         break;
@@ -1464,7 +1573,6 @@
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "C" } },
-    { INDEX_op_jmp, { "r" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
@@ -1478,34 +1586,42 @@
     { INDEX_op_st16_i32, { "rZ", "r" } },
     { INDEX_op_st_i32, { "rZ", "r" } },
 
-    { INDEX_op_add_i32, { "r", "rZ", "rJZ" } },
+    { INDEX_op_add_i32, { "r", "rZ", "rJ" } },
     { INDEX_op_mul_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_mulu2_i32, { "r", "r", "rZ", "rZ" } },
     { INDEX_op_div_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_divu_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_rem_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_remu_i32, { "r", "rZ", "rZ" } },
-    { INDEX_op_sub_i32, { "r", "rZ", "rJZ" } },
+    { INDEX_op_sub_i32, { "r", "rZ", "rJ" } },
 
-    { INDEX_op_and_i32, { "r", "rZ", "rIZ" } },
+    { INDEX_op_and_i32, { "r", "rZ", "rI" } },
     { INDEX_op_nor_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_not_i32, { "r", "rZ" } },
     { INDEX_op_or_i32, { "r", "rZ", "rIZ" } },
     { INDEX_op_xor_i32, { "r", "rZ", "rIZ" } },
 
-    { INDEX_op_shl_i32, { "r", "rZ", "riZ" } },
-    { INDEX_op_shr_i32, { "r", "rZ", "riZ" } },
-    { INDEX_op_sar_i32, { "r", "rZ", "riZ" } },
+    { INDEX_op_shl_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_shr_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_sar_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_rotr_i32, { "r", "rZ", "ri" } },
+    { INDEX_op_rotl_i32, { "r", "rZ", "ri" } },
+
+    { INDEX_op_bswap16_i32, { "r", "r" } },
+    { INDEX_op_bswap32_i32, { "r", "r" } },
 
     { INDEX_op_ext8s_i32, { "r", "rZ" } },
     { INDEX_op_ext16s_i32, { "r", "rZ" } },
 
+    { INDEX_op_deposit_i32, { "r", "0", "rZ" } },
+
     { INDEX_op_brcond_i32, { "rZ", "rZ" } },
+    { INDEX_op_movcond_i32, { "r", "rZ", "rZ", "rZ", "0" } },
     { INDEX_op_setcond_i32, { "r", "rZ", "rZ" } },
     { INDEX_op_setcond2_i32, { "r", "rZ", "rZ", "rZ", "rZ" } },
 
-    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } },
-    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJZ", "rJZ" } },
+    { INDEX_op_add2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
+    { INDEX_op_sub2_i32, { "r", "r", "rZ", "rZ", "rJ", "rJ" } },
     { INDEX_op_brcond2_i32, { "rZ", "rZ", "rZ", "rZ" } },
 
 #if TARGET_LONG_BITS == 32
@@ -1545,7 +1661,6 @@
     TCG_REG_S5,
     TCG_REG_S6,
     TCG_REG_S7,
-    TCG_REG_GP,
     TCG_REG_FP,
     TCG_REG_RA,       /* should be last for ABI compliance */
 };
@@ -1555,11 +1670,15 @@
 {
     int i, frame_size;
 
-    /* reserve some stack space */
+    /* reserve some stack space, also for TCG temps. */
     frame_size = ARRAY_SIZE(tcg_target_callee_save_regs) * 4
-                 + TCG_STATIC_CALL_ARGS_SIZE;
+                 + TCG_STATIC_CALL_ARGS_SIZE
+                 + CPU_TEMP_BUF_NLONGS * sizeof(long);
     frame_size = (frame_size + TCG_TARGET_STACK_ALIGN - 1) &
                  ~(TCG_TARGET_STACK_ALIGN - 1);
+    tcg_set_frame(s, TCG_REG_SP, ARRAY_SIZE(tcg_target_callee_save_regs) * 4
+                  + TCG_STATIC_CALL_ARGS_SIZE,
+                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 
     /* TB prologue */
     tcg_out_addi(s, TCG_REG_SP, -frame_size);
@@ -1611,8 +1730,7 @@
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_T0);   /* internal use */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_RA);   /* return address */
     tcg_regset_set_reg(s->reserved_regs, TCG_REG_SP);   /* stack pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_GP);   /* global pointer */
 
     tcg_add_target_add_op_defs(mips_op_defs);
-    tcg_set_frame(s, TCG_AREG0, offsetof(CPUArchState, temp_buf),
-                  CPU_TEMP_BUF_NLONGS * sizeof(long));
 }
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index d3c804d..7020d65 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -80,23 +80,40 @@
 #define TCG_TARGET_HAS_div_i32          1
 #define TCG_TARGET_HAS_not_i32          1
 #define TCG_TARGET_HAS_nor_i32          1
-#define TCG_TARGET_HAS_rot_i32          0
 #define TCG_TARGET_HAS_ext8s_i32        1
 #define TCG_TARGET_HAS_ext16s_i32       1
-#define TCG_TARGET_HAS_bswap32_i32      0
-#define TCG_TARGET_HAS_bswap16_i32      0
 #define TCG_TARGET_HAS_andc_i32         0
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_eqv_i32          0
 #define TCG_TARGET_HAS_nand_i32         0
+
+/* optional instructions only implemented on MIPS4, MIPS32 and Loongson 2 */
+#if (defined(__mips_isa_rev) && (__mips_isa_rev >= 1)) || \
+    defined(_MIPS_ARCH_LOONGSON2E) || defined(_MIPS_ARCH_LOONGSON2F) || \
+    defined(_MIPS_ARCH_MIPS4)
+#define TCG_TARGET_HAS_movcond_i32      1
+#else
+#define TCG_TARGET_HAS_movcond_i32      0
+#endif
+
+/* optional instructions only implemented on MIPS32R2 */
+#if defined(__mips_isa_rev) && (__mips_isa_rev >= 2)
+#define TCG_TARGET_HAS_bswap16_i32      1
+#define TCG_TARGET_HAS_bswap32_i32      1
+#define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_deposit_i32      1
+#else
+#define TCG_TARGET_HAS_bswap16_i32      0
+#define TCG_TARGET_HAS_bswap32_i32      0
+#define TCG_TARGET_HAS_rot_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
+#endif
 
 /* optional instructions automatically implemented */
 #define TCG_TARGET_HAS_neg_i32          0 /* sub  rd, zero, rt   */
 #define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */
 #define TCG_TARGET_HAS_ext16u_i32       0 /* andi rt, rs, 0xffff */
 
-/* Note: must be synced with dyngen-exec.h */
 #define TCG_AREG0 TCG_REG_S0
 
 /* guest base is supported */
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9c65474..edb2b0e 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -39,8 +39,6 @@
     TCG_TEMP_UNDEF = 0,
     TCG_TEMP_CONST,
     TCG_TEMP_COPY,
-    TCG_TEMP_HAS_COPY,
-    TCG_TEMP_ANY
 } tcg_temp_state;
 
 struct tcg_temp_info {
@@ -52,39 +50,19 @@
 
 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
 
-/* Reset TEMP's state to TCG_TEMP_ANY.  If TEMP was a representative of some
-   class of equivalent temp's, a new representative should be chosen in this
-   class. */
-static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
+/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
+   the copy flag from the left temp.  */
+static void reset_temp(TCGArg temp)
 {
-    int i;
-    TCGArg new_base = (TCGArg)-1;
-    if (temps[temp].state == TCG_TEMP_HAS_COPY) {
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
-            if (i >= nb_globals) {
-                temps[i].state = TCG_TEMP_HAS_COPY;
-                new_base = i;
-                break;
-            }
+    if (temps[temp].state == TCG_TEMP_COPY) {
+        if (temps[temp].prev_copy == temps[temp].next_copy) {
+            temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
+        } else {
+            temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
+            temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
         }
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
-            if (new_base == (TCGArg)-1) {
-                temps[i].state = TCG_TEMP_ANY;
-            } else {
-                temps[i].val = new_base;
-            }
-        }
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
-    } else if (temps[temp].state == TCG_TEMP_COPY) {
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
-        new_base = temps[temp].val;
     }
-    temps[temp].state = TCG_TEMP_ANY;
-    if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
-        temps[new_base].state = TCG_TEMP_ANY;
-    }
+    temps[temp].state = TCG_TEMP_UNDEF;
 }
 
 static int op_bits(TCGOpcode op)
@@ -107,36 +85,83 @@
     }
 }
 
-static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
-                            TCGArg src, int nb_temps, int nb_globals)
+static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
 {
-        reset_temp(dst, nb_temps, nb_globals);
-        assert(temps[src].state != TCG_TEMP_COPY);
-        /* Don't try to copy if one of temps is a global or either one
-           is local and another is register */
-        if (src >= nb_globals && dst >= nb_globals &&
-            tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
-            assert(temps[src].state != TCG_TEMP_CONST);
-            if (temps[src].state != TCG_TEMP_HAS_COPY) {
-                temps[src].state = TCG_TEMP_HAS_COPY;
+    TCGArg i;
+
+    /* If this is already a global, we can't do better. */
+    if (temp < s->nb_globals) {
+        return temp;
+    }
+
+    /* Search for a global first. */
+    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+        if (i < s->nb_globals) {
+            return i;
+        }
+    }
+
+    /* If it is a temp, search for a temp local. */
+    if (!s->temps[temp].temp_local) {
+        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
+            if (s->temps[i].temp_local) {
+                return i;
+            }
+        }
+    }
+
+    /* Failure to find a better representation, return the same temp. */
+    return temp;
+}
+
+static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
+{
+    TCGArg i;
+
+    if (arg1 == arg2) {
+        return true;
+    }
+
+    if (temps[arg1].state != TCG_TEMP_COPY
+        || temps[arg2].state != TCG_TEMP_COPY) {
+        return false;
+    }
+
+    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
+        if (i == arg2) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
+                            TCGArg dst, TCGArg src)
+{
+        reset_temp(dst);
+        assert(temps[src].state != TCG_TEMP_CONST);
+
+        if (s->temps[src].type == s->temps[dst].type) {
+            if (temps[src].state != TCG_TEMP_COPY) {
+                temps[src].state = TCG_TEMP_COPY;
                 temps[src].next_copy = src;
                 temps[src].prev_copy = src;
             }
             temps[dst].state = TCG_TEMP_COPY;
-            temps[dst].val = src;
             temps[dst].next_copy = temps[src].next_copy;
             temps[dst].prev_copy = src;
             temps[temps[dst].next_copy].prev_copy = dst;
             temps[src].next_copy = dst;
         }
+
         gen_args[0] = dst;
         gen_args[1] = src;
 }
 
-static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
-                             int nb_temps, int nb_globals)
+static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
 {
-        reset_temp(dst, nb_temps, nb_globals);
+        reset_temp(dst);
         temps[dst].state = TCG_TEMP_CONST;
         temps[dst].val = val;
         gen_args[0] = dst;
@@ -267,6 +292,102 @@
     return res;
 }
 
+/* Return 2 if the condition can't be simplified, and the result
+   of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
+                                       TCGArg y, TCGCond c)
+{
+    if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
+        switch (op_bits(op)) {
+        case 32:
+            switch (c) {
+            case TCG_COND_EQ:
+                return (uint32_t)temps[x].val == (uint32_t)temps[y].val;
+            case TCG_COND_NE:
+                return (uint32_t)temps[x].val != (uint32_t)temps[y].val;
+            case TCG_COND_LT:
+                return (int32_t)temps[x].val < (int32_t)temps[y].val;
+            case TCG_COND_GE:
+                return (int32_t)temps[x].val >= (int32_t)temps[y].val;
+            case TCG_COND_LE:
+                return (int32_t)temps[x].val <= (int32_t)temps[y].val;
+            case TCG_COND_GT:
+                return (int32_t)temps[x].val > (int32_t)temps[y].val;
+            case TCG_COND_LTU:
+                return (uint32_t)temps[x].val < (uint32_t)temps[y].val;
+            case TCG_COND_GEU:
+                return (uint32_t)temps[x].val >= (uint32_t)temps[y].val;
+            case TCG_COND_LEU:
+                return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
+            case TCG_COND_GTU:
+                return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
+            default:
+                break;
+            }
+            break;
+        case 64:
+            switch (c) {
+            case TCG_COND_EQ:
+                return (uint64_t)temps[x].val == (uint64_t)temps[y].val;
+            case TCG_COND_NE:
+                return (uint64_t)temps[x].val != (uint64_t)temps[y].val;
+            case TCG_COND_LT:
+                return (int64_t)temps[x].val < (int64_t)temps[y].val;
+            case TCG_COND_GE:
+                return (int64_t)temps[x].val >= (int64_t)temps[y].val;
+            case TCG_COND_LE:
+                return (int64_t)temps[x].val <= (int64_t)temps[y].val;
+            case TCG_COND_GT:
+                return (int64_t)temps[x].val > (int64_t)temps[y].val;
+            case TCG_COND_LTU:
+                return (uint64_t)temps[x].val < (uint64_t)temps[y].val;
+            case TCG_COND_GEU:
+                return (uint64_t)temps[x].val >= (uint64_t)temps[y].val;
+            case TCG_COND_LEU:
+                return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
+            case TCG_COND_GTU:
+                return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
+            default:
+                break;
+            }
+            break;
+        }
+    } else if (temps_are_copies(x, y)) {
+        switch (c) {
+        case TCG_COND_GT:
+        case TCG_COND_LTU:
+        case TCG_COND_LT:
+        case TCG_COND_GTU:
+        case TCG_COND_NE:
+            return 0;
+        case TCG_COND_GE:
+        case TCG_COND_GEU:
+        case TCG_COND_LE:
+        case TCG_COND_LEU:
+        case TCG_COND_EQ:
+            return 1;
+        default:
+            break;
+        }
+    } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
+        switch (c) {
+        case TCG_COND_LTU:
+            return 0;
+        case TCG_COND_GEU:
+            return 1;
+        default:
+            return 2;
+        }
+    } else {
+        return 2;
+    }
+
+    fprintf(stderr,
+            "Unrecognized bitness %d or condition %d in "
+            "do_constant_folding_cond.\n", op_bits(op), c);
+    tcg_abort();
+}
+
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -276,12 +397,12 @@
     const TCGOpDef *def;
     TCGArg *gen_args;
     TCGArg tmp;
+    TCGCond cond;
+
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
-       If this temp is a copy of other ones then this equivalence class'
-       representative is kept in VALS' element.
-       If this temp is neither copy nor constant then corresponding VALS'
-       element is unused. */
+       If this temp is a copy of other ones then the other copies are
+       available through the doubly linked circular list. */
 
     nb_temps = s->nb_temps;
     nb_globals = s->nb_globals;
@@ -293,11 +414,18 @@
         op = gen_opc_buf[op_index];
         def = &tcg_op_defs[op];
         /* Do copy propagation */
-        if (!(def->flags & (TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS))) {
-            assert(op != INDEX_op_call);
+        if (op == INDEX_op_call) {
+            int nb_oargs = args[0] >> 16;
+            int nb_iargs = args[0] & 0xffff;
+            for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
+                if (temps[args[i]].state == TCG_TEMP_COPY) {
+                    args[i] = find_better_copy(s, args[i]);
+                }
+            }
+        } else {
             for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
                 if (temps[args[i]].state == TCG_TEMP_COPY) {
-                    args[i] = temps[args[i]].val;
+                    args[i] = find_better_copy(s, args[i]);
                 }
             }
         }
@@ -312,17 +440,77 @@
         CASE_OP_32_64(eqv):
         CASE_OP_32_64(nand):
         CASE_OP_32_64(nor):
-            if (temps[args[1]].state == TCG_TEMP_CONST) {
+            /* Prefer the constant in second argument, and then the form
+               op a, a, b, which is better handled on non-RISC hosts. */
+            if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2]
+                && temps[args[2]].state != TCG_TEMP_CONST)) {
                 tmp = args[1];
                 args[1] = args[2];
                 args[2] = tmp;
             }
             break;
+        CASE_OP_32_64(brcond):
+            if (temps[args[0]].state == TCG_TEMP_CONST
+                && temps[args[1]].state != TCG_TEMP_CONST) {
+                tmp = args[0];
+                args[0] = args[1];
+                args[1] = tmp;
+                args[2] = tcg_swap_cond(args[2]);
+            }
+            break;
+        CASE_OP_32_64(setcond):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state != TCG_TEMP_CONST) {
+                tmp = args[1];
+                args[1] = args[2];
+                args[2] = tmp;
+                args[3] = tcg_swap_cond(args[3]);
+            }
+            break;
+        CASE_OP_32_64(movcond):
+            cond = args[5];
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state != TCG_TEMP_CONST) {
+                tmp = args[1];
+                args[1] = args[2];
+                args[2] = tmp;
+                cond = tcg_swap_cond(cond);
+            }
+            /* For movcond, we canonicalize the "false" input reg to match
+               the destination reg so that the tcg backend can implement
+               a "move if true" operation.  */
+            if (args[0] == args[3]) {
+                tmp = args[3];
+                args[3] = args[4];
+                args[4] = tmp;
+                cond = tcg_invert_cond(cond);
+            }
+            args[5] = cond;
+        default:
+            break;
+        }
+
+        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
+        switch (op) {
+        CASE_OP_32_64(shl):
+        CASE_OP_32_64(shr):
+        CASE_OP_32_64(sar):
+        CASE_OP_32_64(rotl):
+        CASE_OP_32_64(rotr):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[1]].val == 0) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
+                args += 3;
+                gen_args += 2;
+                continue;
+            }
+            break;
         default:
             break;
         }
 
-        /* Simplify expression if possible. */
+        /* Simplify expression for "op r, a, 0 => mov r, a" cases */
         switch (op) {
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
@@ -331,50 +519,75 @@
         CASE_OP_32_64(sar):
         CASE_OP_32_64(rotl):
         CASE_OP_32_64(rotr):
+        CASE_OP_32_64(or):
+        CASE_OP_32_64(xor):
             if (temps[args[1]].state == TCG_TEMP_CONST) {
                 /* Proceed with possible constant folding. */
                 break;
             }
             if (temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0) {
-                if ((temps[args[0]].state == TCG_TEMP_COPY
-                    && temps[args[0]].val == args[1])
-                    || args[0] == args[1]) {
-                    args += 3;
+                if (temps_are_copies(args[0], args[1])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(s, gen_args, args[0], args[1],
-                                    nb_temps, nb_globals);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
                     gen_args += 2;
-                    args += 3;
                 }
+                args += 3;
                 continue;
             }
             break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
+        switch (op) {
+        CASE_OP_32_64(and):
         CASE_OP_32_64(mul):
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
                 gen_opc_buf[op_index] = op_to_movi(op);
-                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
                 args += 3;
                 gen_args += 2;
                 continue;
             }
             break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, a => mov r, a" cases */
+        switch (op) {
         CASE_OP_32_64(or):
         CASE_OP_32_64(and):
-            if (args[1] == args[2]) {
-                if (args[1] == args[0]) {
-                    args += 3;
+            if (temps_are_copies(args[1], args[2])) {
+                if (temps_are_copies(args[0], args[1])) {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
-                    tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
-                                    nb_globals);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
                     gen_args += 2;
-                    args += 3;
                 }
+                args += 3;
+                continue;
+            }
+            break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, a => movi r, 0" cases */
+        switch (op) {
+        CASE_OP_32_64(sub):
+        CASE_OP_32_64(xor):
+            if (temps_are_copies(args[1], args[2])) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], 0);
+                gen_args += 2;
+                args += 3;
                 continue;
             }
             break;
@@ -387,16 +600,13 @@
            allocator where needed and possible.  Also detect copies. */
         switch (op) {
         CASE_OP_32_64(mov):
-            if ((temps[args[1]].state == TCG_TEMP_COPY
-                && temps[args[1]].val == args[0])
-                || args[0] == args[1]) {
+            if (temps_are_copies(args[0], args[1])) {
                 args += 2;
                 gen_opc_buf[op_index] = INDEX_op_nop;
                 break;
             }
             if (temps[args[1]].state != TCG_TEMP_CONST) {
-                tcg_opt_gen_mov(s, gen_args, args[0], args[1],
-                                nb_temps, nb_globals);
+                tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
                 gen_args += 2;
                 args += 2;
                 break;
@@ -408,7 +618,7 @@
             args[1] = temps[args[1]].val;
             /* fallthrough */
         CASE_OP_32_64(movi):
-            tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
+            tcg_opt_gen_movi(gen_args, args[0], args[1]);
             gen_args += 2;
             args += 2;
             break;
@@ -423,18 +633,15 @@
             if (temps[args[1]].state == TCG_TEMP_CONST) {
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
-                gen_args += 2;
-                args += 2;
-                break;
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
-                gen_args += 2;
-                args += 2;
-                break;
             }
+            gen_args += 2;
+            args += 2;
+            break;
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(mul):
@@ -456,28 +663,110 @@
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val,
                                           temps[args[2]].val);
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-                args += 3;
-                break;
             } else {
-                reset_temp(args[0], nb_temps, nb_globals);
+                reset_temp(args[0]);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
                 gen_args += 3;
-                args += 3;
-                break;
             }
+            args += 3;
+            break;
+        CASE_OP_32_64(deposit):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state == TCG_TEMP_CONST) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tmp = ((1ull << args[4]) - 1);
+                tmp = (temps[args[1]].val & ~(tmp << args[3]))
+                      | ((temps[args[2]].val & tmp) << args[3]);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+            } else {
+                reset_temp(args[0]);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args[4] = args[4];
+                gen_args += 5;
+            }
+            args += 5;
+            break;
+        CASE_OP_32_64(setcond):
+            tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
+            if (tmp != 2) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+            } else {
+                reset_temp(args[0]);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args += 4;
+            }
+            args += 4;
+            break;
+        CASE_OP_32_64(brcond):
+            tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
+            if (tmp != 2) {
+                if (tmp) {
+                    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                    gen_opc_buf[op_index] = INDEX_op_br;
+                    gen_args[0] = args[3];
+                    gen_args += 1;
+                } else {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                }
+            } else {
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                reset_temp(args[0]);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args += 4;
+            }
+            args += 4;
+            break;
+        CASE_OP_32_64(movcond):
+            tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
+            if (tmp != 2) {
+                if (temps_are_copies(args[0], args[4-tmp])) {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
+                    gen_opc_buf[op_index] = op_to_movi(op);
+                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
+                    gen_args += 2;
+                } else {
+                    gen_opc_buf[op_index] = op_to_mov(op);
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
+                    gen_args += 2;
+                }
+            } else {
+                reset_temp(args[0]);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args[4] = args[4];
+                gen_args[5] = args[5];
+                gen_args += 6;
+            }
+            args += 6;
+            break;
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
                 for (i = 0; i < nb_globals; i++) {
-                    reset_temp(i, nb_temps, nb_globals);
+                    reset_temp(i);
                 }
             }
             for (i = 0; i < (args[0] >> 16); i++) {
-                reset_temp(args[i + 1], nb_temps, nb_globals);
+                reset_temp(args[i + 1]);
             }
             i = nb_call_args + 3;
             while (i) {
@@ -487,22 +776,17 @@
                 i--;
             }
             break;
-        case INDEX_op_set_label:
-        case INDEX_op_jmp:
-        case INDEX_op_br:
-        CASE_OP_32_64(brcond):
-            memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-            for (i = 0; i < def->nb_args; i++) {
-                *gen_args = *args;
-                args++;
-                gen_args++;
-            }
-            break;
         default:
             /* Default case: we do know nothing about operation so no
-               propagation is done.  We only trash output args.  */
-            for (i = 0; i < def->nb_oargs; i++) {
-                reset_temp(args[i], nb_temps, nb_globals);
+               propagation is done.  We trash everything if the operation
+               is the end of a basic block, otherwise we only trash the
+               output args.  */
+            if (def->flags & TCG_OPF_BB_END) {
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+            } else {
+                for (i = 0; i < def->nb_oargs; i++) {
+                    reset_temp(args[i]);
+                }
             }
             for (i = 0; i < def->nb_args; i++) {
                 gen_args[i] = args[i];
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 0cff181..60b7b92 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -221,12 +221,6 @@
     }
 }
 
-/* maximum number of register used for input function arguments */
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return ARRAY_SIZE (tcg_target_call_iarg_regs);
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
@@ -248,7 +242,6 @@
         tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
-#ifdef CONFIG_TCG_PASS_AREG0
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
 #if TARGET_LONG_BITS == 64
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
@@ -256,11 +249,6 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7);
 #endif
 #endif
-#else /* !AREG0 */
-#if TARGET_LONG_BITS == 64
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
-#endif
-#endif
         break;
     case 'K':                   /* qemu_st[8..32] constraint */
         ct->ct |= TCG_CT_REG;
@@ -268,7 +256,6 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
-#ifdef CONFIG_TCG_PASS_AREG0
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
 #if TARGET_LONG_BITS == 64
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7);
@@ -276,11 +263,6 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R8);
 #endif
 #endif
-#else /* !AREG0 */
-#if TARGET_LONG_BITS == 64
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
-#endif
-#endif
         break;
     case 'M':                   /* qemu_st64 constraint */
         ct->ct |= TCG_CT_REG;
@@ -290,12 +272,10 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7);
-#if defined(CONFIG_TCG_PASS_AREG0)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R8);
 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R9);
 #endif
-#endif
         break;
 #else
     case 'L':
@@ -404,6 +384,7 @@
 #define ORC    XO31(412)
 #define EQV    XO31(284)
 #define NAND   XO31(476)
+#define ISEL   XO31( 15)
 
 #define LBZX   XO31( 87)
 #define LHZX   XO31(279)
@@ -456,7 +437,7 @@
     CR_SO
 };
 
-static const uint32_t tcg_to_bc[10] = {
+static const uint32_t tcg_to_bc[] = {
     [TCG_COND_EQ]  = BC | BI (7, CR_EQ) | BO_COND_TRUE,
     [TCG_COND_NE]  = BC | BI (7, CR_EQ) | BO_COND_FALSE,
     [TCG_COND_LT]  = BC | BI (7, CR_LT) | BO_COND_TRUE,
@@ -541,7 +522,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -559,25 +539,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 #endif
 
 static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
@@ -647,9 +608,7 @@
 
     /* slow path */
     ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
-#endif
 #if TARGET_LONG_BITS == 32
     tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
 #else
@@ -849,9 +808,7 @@
 
     /* slow path */
     ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
-#endif
 #if TARGET_LONG_BITS == 32
     tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
 #else
@@ -1307,6 +1264,72 @@
         );
 }
 
+static void tcg_out_movcond (TCGContext *s, TCGCond cond,
+                             TCGArg dest,
+                             TCGArg c1, TCGArg c2,
+                             TCGArg v1, TCGArg v2,
+                             int const_c2)
+{
+    tcg_out_cmp (s, cond, c1, c2, const_c2, 7);
+
+    if (1) {
+        /* At least here on 7747A bit twiddling hacks are outperformed
+           by jumpy code (the testing was not scientific) */
+        if (dest == v2) {
+            cond = tcg_invert_cond (cond);
+            v2 = v1;
+        }
+        else {
+            if (dest != v1) {
+                tcg_out_mov (s, TCG_TYPE_I32, dest, v1);
+            }
+        }
+        /* Branch forward over one insn */
+        tcg_out32 (s, tcg_to_bc[cond] | 8);
+        tcg_out_mov (s, TCG_TYPE_I32, dest, v2);
+    }
+    else {
+        /* isel version, "if (1)" above should be replaced once a way
+           to figure out availability of isel on the underlying
+           hardware is found */
+        int tab, bc;
+
+        switch (cond) {
+        case TCG_COND_EQ:
+            tab = TAB (dest, v1, v2);
+            bc = CR_EQ;
+            break;
+        case TCG_COND_NE:
+            tab = TAB (dest, v2, v1);
+            bc = CR_EQ;
+            break;
+        case TCG_COND_LTU:
+        case TCG_COND_LT:
+            tab = TAB (dest, v1, v2);
+            bc = CR_LT;
+            break;
+        case TCG_COND_GEU:
+        case TCG_COND_GE:
+            tab = TAB (dest, v2, v1);
+            bc = CR_LT;
+            break;
+        case TCG_COND_LEU:
+        case TCG_COND_LE:
+            tab = TAB (dest, v2, v1);
+            bc = CR_GT;
+            break;
+        case TCG_COND_GTU:
+        case TCG_COND_GT:
+            tab = TAB (dest, v1, v2);
+            bc = CR_GT;
+            break;
+        default:
+            tcg_abort ();
+        }
+        tcg_out32 (s, ISEL | tab | ((bc + 28) << 6));
+    }
+}
+
 static void tcg_out_brcond (TCGContext *s, TCGCond cond,
                             TCGArg arg1, TCGArg arg2, int const_arg2,
                             int label_index)
@@ -1394,15 +1417,6 @@
     case INDEX_op_call:
         tcg_out_call (s, args[0], const_args[0]);
         break;
-    case INDEX_op_jmp:
-        if (const_args[0]) {
-            tcg_out_b (s, 0, args[0]);
-        }
-        else {
-            tcg_out32 (s, MTSPR | RS (args[0]) | CTR);
-            tcg_out32 (s, BCCTR | BO_ALWAYS);
-        }
-        break;
     case INDEX_op_movi_i32:
         tcg_out_movi(s, TCG_TYPE_I32, args[0], args[1]);
         break;
@@ -1864,6 +1878,13 @@
             );
         break;
 
+    case INDEX_op_movcond_i32:
+        tcg_out_movcond (s, args[5], args[0],
+                         args[1], args[2],
+                         args[3], args[4],
+                         const_args[2]);
+        break;
+
     default:
         tcg_dump_ops (s);
         tcg_abort ();
@@ -1874,7 +1895,6 @@
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
@@ -1960,6 +1980,7 @@
     { INDEX_op_ext16u_i32, { "r", "r" } },
 
     { INDEX_op_deposit_i32, { "r", "0", "r" } },
+    { INDEX_op_movcond_i32, { "r", "r", "ri", "r", "r" } },
 
     { -1 },
 };
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 2f37fd2..3259d89 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -92,6 +92,7 @@
 #define TCG_TARGET_HAS_nand_i32         1
 #define TCG_TARGET_HAS_nor_i32          1
 #define TCG_TARGET_HAS_deposit_i32      1
+#define TCG_TARGET_HAS_movcond_i32      1
 
 #define TCG_AREG0 TCG_REG_R27
 
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 27a0ae8..5403fc1 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -208,12 +208,6 @@
     }
 }
 
-/* maximum number of register used for input function arguments */
-static int tcg_target_get_call_iarg_regs_count (int flags)
-{
-    return ARRAY_SIZE (tcg_target_call_iarg_regs);
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint (TCGArgConstraint *ct, const char **pct_str)
 {
@@ -235,10 +229,8 @@
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3);
 #ifdef CONFIG_SOFTMMU
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
-#ifdef CONFIG_TCG_PASS_AREG0
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5);
 #endif
-#endif
         break;
     case 'S':                   /* qemu_st constraint */
         ct->ct |= TCG_CT_REG;
@@ -247,10 +239,8 @@
 #ifdef CONFIG_SOFTMMU
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5);
-#ifdef CONFIG_TCG_PASS_AREG0
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R6);
 #endif
-#endif
         break;
     case 'Z':
         ct->ct |= TCG_CT_CONST_U32;
@@ -428,7 +418,7 @@
     CR_SO
 };
 
-static const uint32_t tcg_to_bc[10] = {
+static const uint32_t tcg_to_bc[] = {
     [TCG_COND_EQ]  = BC | BI (7, CR_EQ) | BO_COND_TRUE,
     [TCG_COND_NE]  = BC | BI (7, CR_EQ) | BO_COND_FALSE,
     [TCG_COND_LT]  = BC | BI (7, CR_LT) | BO_COND_TRUE,
@@ -558,7 +548,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -576,25 +565,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 
 static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2,
                               int addr_reg, int s_bits, int offset)
@@ -676,9 +646,7 @@
 
     /* slow path */
     ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0);
-#endif
     tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg);
     tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index);
 
@@ -827,9 +795,7 @@
 
     /* slow path */
     ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0);
-#endif
     tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg);
     tcg_out_rld (s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc)));
     tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index);
@@ -1279,15 +1245,6 @@
     case INDEX_op_call:
         tcg_out_call (s, args[0], const_args[0]);
         break;
-    case INDEX_op_jmp:
-        if (const_args[0]) {
-            tcg_out_b (s, 0, args[0]);
-        }
-        else {
-            tcg_out32 (s, MTSPR | RS (args[0]) | CTR);
-            tcg_out32 (s, BCCTR | BO_ALWAYS);
-        }
-        break;
     case INDEX_op_movi_i32:
         tcg_out_movi (s, TCG_TYPE_I32, args[0], args[1]);
         break;
@@ -1622,7 +1579,6 @@
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 97eec08..57569e8 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -83,6 +83,7 @@
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #define TCG_TARGET_HAS_div_i64          1
 #define TCG_TARGET_HAS_rot_i64          0
@@ -103,6 +104,7 @@
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_movcond_i64      0
 
 #define TCG_AREG0 TCG_REG_R27
 
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 99b5339..fd9286f 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -268,7 +268,7 @@
 #define S390_CC_ALWAYS  15
 
 /* Condition codes that result from a COMPARE and COMPARE LOGICAL.  */
-static const uint8_t tcg_cond_to_s390_cond[10] = {
+static const uint8_t tcg_cond_to_s390_cond[] = {
     [TCG_COND_EQ]  = S390_CC_EQ,
     [TCG_COND_NE]  = S390_CC_NE,
     [TCG_COND_LT]  = S390_CC_LT,
@@ -284,7 +284,7 @@
 /* Condition codes that result from a LOAD AND TEST.  Here, we have no
    unsigned instruction variation, however since the test is vs zero we
    can re-map the outcomes appropriately.  */
-static const uint8_t tcg_cond_to_ltr_cond[10] = {
+static const uint8_t tcg_cond_to_ltr_cond[] = {
     [TCG_COND_EQ]  = S390_CC_EQ,
     [TCG_COND_NE]  = S390_CC_NE,
     [TCG_COND_LT]  = S390_CC_LT,
@@ -301,7 +301,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -319,25 +318,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 #endif
 
 static uint8_t *tb_ret_addr;
@@ -376,11 +356,6 @@
     }
 }
 
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return sizeof(tcg_target_call_iarg_regs) / sizeof(int);
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
@@ -1138,7 +1113,7 @@
 static int tgen_cmp(TCGContext *s, TCGType type, TCGCond c, TCGReg r1,
                     TCGArg c2, int c2const)
 {
-    bool is_unsigned = (c > TCG_COND_GT);
+    bool is_unsigned = is_unsigned_cond(c);
     if (c2const) {
         if (c2 == 0) {
             if (type == TCG_TYPE_I32) {
@@ -1507,7 +1482,6 @@
             tcg_abort();
         }
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
         /* XXX/FIXME: suboptimal */
         tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
                     tcg_target_call_iarg_regs[2]);
@@ -1517,11 +1491,9 @@
                     tcg_target_call_iarg_regs[0]);
         tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
                     TCG_AREG0);
-#endif
         tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
         /* XXX/FIXME: suboptimal */
         tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
                     tcg_target_call_iarg_regs[1]);
@@ -1529,7 +1501,6 @@
                     tcg_target_call_iarg_regs[0]);
         tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
                     TCG_AREG0);
-#endif
         tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
 
         /* sign extension */
@@ -2066,11 +2037,6 @@
         break;
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
-    case INDEX_op_jmp:
-        /* This one is obsolete and never emitted.  */
-        tcg_abort();
-        break;
-
     default:
         fprintf(stderr,"unimplemented opc 0x%x\n",opc);
         tcg_abort();
@@ -2081,7 +2047,6 @@
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index d12f90b..ed55c33 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -63,6 +63,7 @@
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div2_i64         1
@@ -84,6 +85,7 @@
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_movcond_i64      0
 #endif
 
 #define TCG_TARGET_HAS_GUEST_BASE
@@ -96,7 +98,6 @@
 #define TCG_TARGET_EXTEND_ARGS 1
 
 enum {
-    /* Note: must be synced with dyngen-exec.h */
     TCG_AREG0 = TCG_REG_R10,
 };
 
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 247a278..0c32baa 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,10 +59,14 @@
 };
 #endif
 
-#ifdef CONFIG_TCG_PASS_AREG0
-#define ARG_OFFSET 1
+/* Define some temporary registers.  T2 is used for constant generation.  */
+#define TCG_REG_T1  TCG_REG_G1
+#define TCG_REG_T2  TCG_REG_O7
+
+#ifdef CONFIG_USE_GUEST_BASE
+# define TCG_GUEST_BASE_REG TCG_REG_I5
 #else
-#define ARG_OFFSET 0
+# define TCG_GUEST_BASE_REG TCG_REG_G0
 #endif
 
 static const int tcg_target_reg_alloc_order[] = {
@@ -74,11 +78,25 @@
     TCG_REG_L5,
     TCG_REG_L6,
     TCG_REG_L7,
+
     TCG_REG_I0,
     TCG_REG_I1,
     TCG_REG_I2,
     TCG_REG_I3,
     TCG_REG_I4,
+    TCG_REG_I5,
+
+    TCG_REG_G2,
+    TCG_REG_G3,
+    TCG_REG_G4,
+    TCG_REG_G5,
+
+    TCG_REG_O0,
+    TCG_REG_O1,
+    TCG_REG_O2,
+    TCG_REG_O3,
+    TCG_REG_O4,
+    TCG_REG_O5,
 };
 
 static const int tcg_target_call_iarg_regs[6] = {
@@ -137,12 +155,6 @@
     }
 }
 
-/* maximum number of register used for input function arguments */
-static inline int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return 6;
-}
-
 /* parse target specific constraints */
 static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str)
 {
@@ -161,9 +173,6 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
-#ifdef CONFIG_TCG_PASS_AREG0
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_O3);
-#endif
         break;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
@@ -242,7 +251,7 @@
 #define ARITH_XOR  (INSN_OP(2) | INSN_OP3(0x03))
 #define ARITH_SUB  (INSN_OP(2) | INSN_OP3(0x04))
 #define ARITH_SUBCC (INSN_OP(2) | INSN_OP3(0x14))
-#define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x10))
+#define ARITH_ADDX (INSN_OP(2) | INSN_OP3(0x08))
 #define ARITH_SUBX (INSN_OP(2) | INSN_OP3(0x0c))
 #define ARITH_UMUL (INSN_OP(2) | INSN_OP3(0x0a))
 #define ARITH_UDIV (INSN_OP(2) | INSN_OP3(0x0e))
@@ -294,6 +303,16 @@
 #define ASI_PRIMARY_LITTLE 0x88
 #endif
 
+#define LDUH_LE    (LDUHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSH_LE    (LDSHA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDUW_LE    (LDUWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDSW_LE    (LDSWA | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define LDX_LE     (LDXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+
+#define STH_LE     (STHA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STW_LE     (STWA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+#define STX_LE     (STXA  | INSN_ASI(ASI_PRIMARY_LITTLE))
+
 static inline void tcg_out_arith(TCGContext *s, int rd, int rs1, int rs2,
                                  int op)
 {
@@ -359,71 +378,50 @@
         tcg_out_sethi(s, ret, ~arg);
         tcg_out_arithi(s, ret, ret, (arg & 0x3ff) | -0x400, ARITH_XOR);
     } else {
-        tcg_out_movi_imm32(s, TCG_REG_I4, arg >> (TCG_TARGET_REG_BITS / 2));
-        tcg_out_arithi(s, TCG_REG_I4, TCG_REG_I4, 32, SHIFT_SLLX);
-        tcg_out_movi_imm32(s, ret, arg);
-        tcg_out_arith(s, ret, ret, TCG_REG_I4, ARITH_OR);
+        tcg_out_movi_imm32(s, ret, arg >> (TCG_TARGET_REG_BITS / 2));
+        tcg_out_arithi(s, ret, ret, 32, SHIFT_SLLX);
+        tcg_out_movi_imm32(s, TCG_REG_T2, arg);
+        tcg_out_arith(s, ret, ret, TCG_REG_T2, ARITH_OR);
     }
 }
 
-static inline void tcg_out_ld_raw(TCGContext *s, int ret,
-                                  tcg_target_long arg)
+static inline void tcg_out_ldst_rr(TCGContext *s, int data, int a1,
+                                   int a2, int op)
 {
-    tcg_out_sethi(s, ret, arg);
-    tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
-              INSN_IMM13(arg & 0x3ff));
+    tcg_out32(s, op | INSN_RD(data) | INSN_RS1(a1) | INSN_RS2(a2));
 }
 
-static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
-                                  tcg_target_long arg)
+static inline void tcg_out_ldst(TCGContext *s, int ret, int addr,
+                                int offset, int op)
 {
-    if (!check_fit_tl(arg, 10))
-        tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ffULL);
-    if (TCG_TARGET_REG_BITS == 64) {
-        tcg_out32(s, LDX | INSN_RD(ret) | INSN_RS1(ret) |
-                  INSN_IMM13(arg & 0x3ff));
-    } else {
-        tcg_out32(s, LDUW | INSN_RD(ret) | INSN_RS1(ret) |
-                  INSN_IMM13(arg & 0x3ff));
-    }
-}
-
-static inline void tcg_out_ldst(TCGContext *s, int ret, int addr, int offset, int op)
-{
-    if (check_fit_tl(offset, 13))
+    if (check_fit_tl(offset, 13)) {
         tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(addr) |
                   INSN_IMM13(offset));
-    else {
-        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
-        tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
-                  INSN_RS2(addr));
+    } else {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, offset);
+        tcg_out_ldst_rr(s, ret, addr, TCG_REG_T1, op);
     }
 }
 
-static inline void tcg_out_ldst_asi(TCGContext *s, int ret, int addr,
-                                    int offset, int op, int asi)
-{
-    tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, offset);
-    tcg_out32(s, op | INSN_RD(ret) | INSN_RS1(TCG_REG_I5) |
-              INSN_ASI(asi) | INSN_RS2(addr));
-}
-
 static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret,
                               TCGReg arg1, tcg_target_long arg2)
 {
-    if (type == TCG_TYPE_I32)
-        tcg_out_ldst(s, ret, arg1, arg2, LDUW);
-    else
-        tcg_out_ldst(s, ret, arg1, arg2, LDX);
+    tcg_out_ldst(s, ret, arg1, arg2, (type == TCG_TYPE_I32 ? LDUW : LDX));
 }
 
 static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg,
                               TCGReg arg1, tcg_target_long arg2)
 {
-    if (type == TCG_TYPE_I32)
-        tcg_out_ldst(s, arg, arg1, arg2, STW);
-    else
-        tcg_out_ldst(s, arg, arg1, arg2, STX);
+    tcg_out_ldst(s, arg, arg1, arg2, (type == TCG_TYPE_I32 ? STW : STX));
+}
+
+static inline void tcg_out_ld_ptr(TCGContext *s, int ret,
+                                  tcg_target_long arg)
+{
+    if (!check_fit_tl(arg, 10)) {
+        tcg_out_movi(s, TCG_TYPE_PTR, ret, arg & ~0x3ff);
+    }
+    tcg_out_ld(s, TCG_TYPE_PTR, ret, ret, arg & 0x3ff);
 }
 
 static inline void tcg_out_sety(TCGContext *s, int rs)
@@ -442,20 +440,21 @@
         if (check_fit_tl(val, 13))
             tcg_out_arithi(s, reg, reg, val, ARITH_ADD);
         else {
-            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_I5, val);
-            tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_ADD);
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_T1, val);
+            tcg_out_arith(s, reg, reg, TCG_REG_T1, ARITH_ADD);
         }
     }
 }
 
-static inline void tcg_out_andi(TCGContext *s, int reg, tcg_target_long val)
+static inline void tcg_out_andi(TCGContext *s, int rd, int rs,
+                                tcg_target_long val)
 {
     if (val != 0) {
         if (check_fit_tl(val, 13))
-            tcg_out_arithi(s, reg, reg, val, ARITH_AND);
+            tcg_out_arithi(s, rd, rs, val, ARITH_AND);
         else {
-            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, val);
-            tcg_out_arith(s, reg, reg, TCG_REG_I5, ARITH_AND);
+            tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_T1, val);
+            tcg_out_arith(s, rd, rs, TCG_REG_T1, ARITH_AND);
         }
     }
 }
@@ -467,8 +466,8 @@
     if (uns) {
         tcg_out_sety(s, TCG_REG_G0);
     } else {
-        tcg_out_arithi(s, TCG_REG_I5, rs1, 31, SHIFT_SRA);
-        tcg_out_sety(s, TCG_REG_I5);
+        tcg_out_arithi(s, TCG_REG_T1, rs1, 31, SHIFT_SRA);
+        tcg_out_sety(s, TCG_REG_T1);
     }
 
     tcg_out_arithc(s, rd, rs1, val2, val2const,
@@ -483,34 +482,37 @@
 static void tcg_out_branch_i32(TCGContext *s, int opc, int label_index)
 {
     TCGLabel *l = &s->labels[label_index];
+    uint32_t off22;
 
     if (l->has_value) {
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2)
-                      | INSN_OFF22(l->u.value - (unsigned long)s->code_ptr)));
+        off22 = INSN_OFF22(l->u.value - (unsigned long)s->code_ptr);
     } else {
+        /* Make sure to preserve destinations during retranslation.  */
+        off22 = *(uint32_t *)s->code_ptr & INSN_OFF22(-1);
         tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP22, label_index, 0);
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | 0));
     }
+    tcg_out32(s, INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x2) | off22);
 }
 
 #if TCG_TARGET_REG_BITS == 64
 static void tcg_out_branch_i64(TCGContext *s, int opc, int label_index)
 {
     TCGLabel *l = &s->labels[label_index];
+    uint32_t off19;
 
     if (l->has_value) {
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
-                      (0x5 << 19) |
-                      INSN_OFF19(l->u.value - (unsigned long)s->code_ptr)));
+        off19 = INSN_OFF19(l->u.value - (unsigned long)s->code_ptr);
     } else {
+        /* Make sure to preserve destinations during retranslation.  */
+        off19 = *(uint32_t *)s->code_ptr & INSN_OFF19(-1);
         tcg_out_reloc(s, s->code_ptr, R_SPARC_WDISP19, label_index, 0);
-        tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
-                      (0x5 << 19) | 0));
     }
+    tcg_out32(s, (INSN_OP(0) | INSN_COND(opc, 0) | INSN_OP2(0x1) |
+                  (0x5 << 19) | off19));
 }
 #endif
 
-static const uint8_t tcg_cond_to_bcond[10] = {
+static const uint8_t tcg_cond_to_bcond[] = {
     [TCG_COND_EQ] = COND_E,
     [TCG_COND_NE] = COND_NE,
     [TCG_COND_LT] = COND_L,
@@ -614,8 +616,8 @@
     case TCG_COND_GTU:
     case TCG_COND_GEU:
         if (c2const && c2 != 0) {
-            tcg_out_movi_imm13(s, TCG_REG_I5, c2);
-            c2 = TCG_REG_I5;
+            tcg_out_movi_imm13(s, TCG_REG_T1, c2);
+            c2 = TCG_REG_T1;
         }
         t = c1, c1 = c2, c2 = t, c2const = 0;
         cond = tcg_swap_cond(cond);
@@ -627,18 +629,10 @@
 
     default:
         tcg_out_cmp(s, c1, c2, c2const);
-#if defined(__sparc_v9__) || defined(__sparc_v8plus__)
         tcg_out_movi_imm13(s, ret, 0);
-        tcg_out32 (s, ARITH_MOVCC | INSN_RD(ret)
-                   | INSN_RS1(tcg_cond_to_bcond[cond])
-                   | MOVCC_ICC | INSN_IMM11(1));
-#else
-        t = gen_new_label();
-        tcg_out_branch_i32(s, INSN_COND(tcg_cond_to_bcond[cond], 1), t);
-        tcg_out_movi_imm13(s, ret, 1);
-        tcg_out_movi_imm13(s, ret, 0);
-        tcg_out_label(s, t, s->code_ptr);
-#endif
+        tcg_out32(s, ARITH_MOVCC | INSN_RD(ret)
+                  | INSN_RS1(tcg_cond_to_bcond[cond])
+                  | MOVCC_ICC | INSN_IMM11(1));
         return;
     }
 
@@ -670,15 +664,15 @@
 
     switch (cond) {
     case TCG_COND_EQ:
-        tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_I5, al, bl, blconst);
+        tcg_out_setcond_i32(s, TCG_COND_EQ, TCG_REG_T1, al, bl, blconst);
         tcg_out_setcond_i32(s, TCG_COND_EQ, ret, ah, bh, bhconst);
-        tcg_out_arith(s, ret, ret, TCG_REG_I5, ARITH_AND);
+        tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_AND);
         break;
 
     case TCG_COND_NE:
-        tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_I5, al, al, blconst);
+        tcg_out_setcond_i32(s, TCG_COND_NE, TCG_REG_T1, al, al, blconst);
         tcg_out_setcond_i32(s, TCG_COND_NE, ret, ah, bh, bhconst);
-        tcg_out_arith(s, ret, ret, TCG_REG_I5, ARITH_OR);
+        tcg_out_arith(s, ret, ret, TCG_REG_T1, ARITH_OR);
         break;
 
     default:
@@ -701,21 +695,42 @@
 /* Generate global QEMU prologue and epilogue code */
 static void tcg_target_qemu_prologue(TCGContext *s)
 {
-    tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_CALL_STACK_OFFSET,
-                  CPU_TEMP_BUF_NLONGS * (int)sizeof(long));
+    int tmp_buf_size, frame_size;
+
+    /* The TCG temp buffer is at the top of the frame, immediately
+       below the frame pointer.  */
+    tmp_buf_size = CPU_TEMP_BUF_NLONGS * (int)sizeof(long);
+    tcg_set_frame(s, TCG_REG_I6, TCG_TARGET_STACK_BIAS - tmp_buf_size,
+                  tmp_buf_size);
+
+    /* TCG_TARGET_CALL_STACK_OFFSET includes the stack bias, but is
+       otherwise the minimal frame usable by callees.  */
+    frame_size = TCG_TARGET_CALL_STACK_OFFSET - TCG_TARGET_STACK_BIAS;
+    frame_size += TCG_STATIC_CALL_ARGS_SIZE + tmp_buf_size;
+    frame_size += TCG_TARGET_STACK_ALIGN - 1;
+    frame_size &= -TCG_TARGET_STACK_ALIGN;
     tcg_out32(s, SAVE | INSN_RD(TCG_REG_O6) | INSN_RS1(TCG_REG_O6) |
-              INSN_IMM13(-(TCG_TARGET_STACK_MINFRAME +
-                           CPU_TEMP_BUF_NLONGS * (int)sizeof(long))));
+              INSN_IMM13(-frame_size));
+
+#ifdef CONFIG_USE_GUEST_BASE
+    if (GUEST_BASE != 0) {
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_GUEST_BASE_REG, GUEST_BASE);
+        tcg_regset_set_reg(s->reserved_regs, TCG_GUEST_BASE_REG);
+    }
+#endif
+
     tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I1) |
               INSN_RS2(TCG_REG_G0));
-    tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_I0);
+    /* delay slot */
+    tcg_out_nop(s);
+
+    /* No epilogue required.  We issue ret + restore directly in the TB.  */
 }
 
 #if defined(CONFIG_SOFTMMU)
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -733,441 +748,309 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static const void * const qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
 
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static const void * const qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
-#endif
+/* Perform the TLB load and compare.
 
-#if TARGET_LONG_BITS == 32
-#define TARGET_LD_OP LDUW
-#else
-#define TARGET_LD_OP LDX
-#endif
+   Inputs:
+   ADDRLO_IDX contains the index into ARGS of the low part of the
+   address; the high part of the address is at ADDR_LOW_IDX+1.
 
-#if defined(CONFIG_SOFTMMU)
-#if HOST_LONG_BITS == 32
-#define TARGET_ADDEND_LD_OP LDUW
-#else
-#define TARGET_ADDEND_LD_OP LDX
-#endif
-#endif
+   MEM_INDEX and S_BITS are the memory context and log2 size of the load.
 
-#ifdef __arch64__
-#define HOST_LD_OP LDX
-#define HOST_ST_OP STX
-#define HOST_SLL_OP SHIFT_SLLX
-#define HOST_SRA_OP SHIFT_SRAX
-#else
-#define HOST_LD_OP LDUW
-#define HOST_ST_OP STW
-#define HOST_SLL_OP SHIFT_SLL
-#define HOST_SRA_OP SHIFT_SRA
-#endif
+   WHICH is the offset into the CPUTLBEntry structure of the slot to read.
+   This should be offsetof addr_read or addr_write.
 
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
-                            int opc)
+   The result of the TLB comparison is in %[ix]cc.  The sanitized address
+   is in the returned register, maybe %o0.  The TLB addend is in %o1.  */
+
+static int tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index,
+                            int s_bits, const TCGArg *args, int which)
 {
-    int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
-#if defined(CONFIG_SOFTMMU)
-    uint32_t *label1_ptr, *label2_ptr;
-#endif
+    const int addrlo = args[addrlo_idx];
+    const int r0 = TCG_REG_O0;
+    const int r1 = TCG_REG_O1;
+    const int r2 = TCG_REG_O2;
+    int addr = addrlo;
+    int tlb_ofs;
 
-    data_reg = *args++;
-    addr_reg = *args++;
-    mem_index = *args;
-    s_bits = opc & 3;
+    if (TCG_TARGET_REG_BITS == 32 && TARGET_LONG_BITS == 64) {
+        /* Assemble the 64-bit address in R0.  */
+        tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
+        tcg_out_arithi(s, r1, args[addrlo_idx + 1], 32, SHIFT_SLLX);
+        tcg_out_arith(s, r0, r0, r1, ARITH_OR);
+    }
 
-    arg0 = TCG_REG_O0;
-    arg1 = TCG_REG_O1;
-    arg2 = TCG_REG_O2;
+    /* Shift the page number down to tlb-entry.  */
+    tcg_out_arithi(s, r1, addrlo,
+                   TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS, SHIFT_SRL);
 
-#if defined(CONFIG_SOFTMMU)
-    /* srl addr_reg, x, arg1 */
-    tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
-                   SHIFT_SRL);
-    /* and addr_reg, x, arg0 */
-    tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
-                   ARITH_AND);
+    /* Mask out the page offset, except for the required alignment.  */
+    tcg_out_andi(s, r0, addr, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
 
-    /* and arg1, x, arg1 */
-    tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
+    /* Compute tlb index, modulo tlb size.  */
+    tcg_out_andi(s, r1, r1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
 
-    /* add arg1, x, arg1 */
-    tcg_out_addi(s, arg1, offsetof(CPUArchState,
-                                   tlb_table[mem_index][0].addr_read));
+    /* Relative to the current ENV.  */
+    tcg_out_arith(s, r1, TCG_AREG0, r1, ARITH_ADD);
 
-    /* add env, arg1, arg1 */
-    tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
+    /* Find a base address that can load both tlb comparator and addend.  */
+    tlb_ofs = offsetof(CPUArchState, tlb_table[mem_index][0]);
+    if (!check_fit_tl(tlb_ofs + sizeof(CPUTLBEntry), 13)) {
+        tcg_out_addi(s, r1, tlb_ofs);
+        tlb_ofs = 0;
+    }
 
-    /* ld [arg1], arg2 */
-    tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
-              INSN_RS2(TCG_REG_G0));
+    /* Load the tlb comparator and the addend.  */
+    tcg_out_ld(s, TCG_TYPE_TL, r2, r1, tlb_ofs + which);
+    tcg_out_ld(s, TCG_TYPE_PTR, r1, r1, tlb_ofs+offsetof(CPUTLBEntry, addend));
 
     /* subcc arg0, arg2, %g0 */
-    tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
+    tcg_out_cmp(s, r0, r2, 0);
 
-    /* will become:
-       be label1
-        or
-       be,pt %xcc label1 */
-    label1_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, 0);
+    /* If the guest address must be zero-extended, do so now.  */
+    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+        tcg_out_arithi(s, r0, addrlo, 0, SHIFT_SRL);
+        return r0;
+    }
+    return addrlo;
+}
+#endif /* CONFIG_SOFTMMU */
 
-    /* mov (delay slot) */
-    tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
+static const int qemu_ld_opc[8] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+    LDUB, LDUH, LDUW, LDX, LDSB, LDSH, LDSW, LDX
+#else
+    LDUB, LDUH_LE, LDUW_LE, LDX_LE, LDSB, LDSH_LE, LDSW_LE, LDX_LE
+#endif
+};
 
-    /* mov */
-    tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
+static const int qemu_st_opc[4] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+    STB, STH, STW, STX
+#else
+    STB, STH_LE, STW_LE, STX_LE
+#endif
+};
+
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int sizeop)
+{
+    int addrlo_idx = 1, datalo, datahi, addr_reg;
+#if defined(CONFIG_SOFTMMU)
+    int memi_idx, memi, s_bits, n;
+    uint32_t *label_ptr[2];
 #endif
 
-    /* XXX: move that code at the end of the TB */
+    datahi = datalo = args[0];
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        datahi = args[1];
+        addrlo_idx = 2;
+    }
+
+#if defined(CONFIG_SOFTMMU)
+    memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
+    memi = args[memi_idx];
+    s_bits = sizeop & 3;
+
+    addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, s_bits, args,
+                                offsetof(CPUTLBEntry, addr_read));
+
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        int reg64;
+
+        /* bne,pn %[xi]cc, label0 */
+        label_ptr[0] = (uint32_t *)s->code_ptr;
+        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_NE, 0) | INSN_OP2(0x1)
+                      | ((TARGET_LONG_BITS == 64) << 21)));
+
+        /* TLB Hit.  */
+        /* Load all 64-bits into an O/G register.  */
+        reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
+        tcg_out_ldst_rr(s, reg64, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
+
+        /* Move the two 32-bit pieces into the destination registers.  */
+        tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
+        if (reg64 != datalo) {
+            tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
+        }
+
+        /* b,a,pt label1 */
+        label_ptr[1] = (uint32_t *)s->code_ptr;
+        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x1)
+                      | (1 << 29) | (1 << 19)));
+    } else {
+        /* The fast path is exactly one insn.  Thus we can perform the
+           entire TLB Hit in the (annulled) delay slot of the branch
+           over the TLB Miss case.  */
+
+        /* beq,a,pt %[xi]cc, label0 */
+        label_ptr[0] = NULL;
+        label_ptr[1] = (uint32_t *)s->code_ptr;
+        tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
+                      | ((TARGET_LONG_BITS == 64) << 21)
+                      | (1 << 29) | (1 << 19)));
+        /* delay slot */
+        tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_ld_opc[sizeop]);
+    }
+
+    /* TLB Miss.  */
+
+    if (label_ptr[0]) {
+        *label_ptr[0] |= INSN_OFF19((unsigned long)s->code_ptr -
+                                    (unsigned long)label_ptr[0]);
+    }
+    n = 0;
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+                    args[addrlo_idx + 1]);
+    }
+    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+                args[addrlo_idx]);
+
     /* qemu_ld_helper[s_bits](arg0, arg1) */
     tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_ld_helpers[s_bits]
                            - (tcg_target_ulong)s->code_ptr) >> 2)
                          & 0x3fffffff));
-    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
-       global registers */
-    // delay slot
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                 sizeof(long), HOST_ST_OP);
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                 sizeof(long), HOST_LD_OP);
+    /* delay slot */
+    tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[n], memi);
 
-    /* data_reg = sign_extend(arg0) */
-    switch(opc) {
+    n = tcg_target_call_oarg_regs[0];
+    /* datalo = sign_extend(arg0) */
+    switch (sizeop) {
     case 0 | 4:
-        /* sll arg0, 24/56, data_reg */
-        tcg_out_arithi(s, data_reg, arg0, (int)sizeof(tcg_target_long) * 8 - 8,
-                       HOST_SLL_OP);
-        /* sra data_reg, 24/56, data_reg */
-        tcg_out_arithi(s, data_reg, data_reg,
-                       (int)sizeof(tcg_target_long) * 8 - 8, HOST_SRA_OP);
+        /* Recall that SRA sign extends from bit 31 through bit 63.  */
+        tcg_out_arithi(s, datalo, n, 24, SHIFT_SLL);
+        tcg_out_arithi(s, datalo, datalo, 24, SHIFT_SRA);
         break;
     case 1 | 4:
-        /* sll arg0, 16/48, data_reg */
-        tcg_out_arithi(s, data_reg, arg0,
-                       (int)sizeof(tcg_target_long) * 8 - 16, HOST_SLL_OP);
-        /* sra data_reg, 16/48, data_reg */
-        tcg_out_arithi(s, data_reg, data_reg,
-                       (int)sizeof(tcg_target_long) * 8 - 16, HOST_SRA_OP);
+        tcg_out_arithi(s, datalo, n, 16, SHIFT_SLL);
+        tcg_out_arithi(s, datalo, datalo, 16, SHIFT_SRA);
         break;
     case 2 | 4:
-        /* sll arg0, 32, data_reg */
-        tcg_out_arithi(s, data_reg, arg0, 32, HOST_SLL_OP);
-        /* sra data_reg, 32, data_reg */
-        tcg_out_arithi(s, data_reg, data_reg, 32, HOST_SRA_OP);
+        tcg_out_arithi(s, datalo, n, 0, SHIFT_SRA);
         break;
+    case 3:
+        if (TCG_TARGET_REG_BITS == 32) {
+            tcg_out_mov(s, TCG_TYPE_REG, datahi, n);
+            tcg_out_mov(s, TCG_TYPE_REG, datalo, n + 1);
+            break;
+        }
+        /* FALLTHRU */
     case 0:
     case 1:
     case 2:
-    case 3:
     default:
         /* mov */
-        tcg_out_mov(s, TCG_TYPE_REG, data_reg, arg0);
+        tcg_out_mov(s, TCG_TYPE_REG, datalo, n);
         break;
     }
 
-    /* will become:
-       ba label2 */
-    label2_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, 0);
-
-    /* nop (delay slot */
-    tcg_out_nop(s);
-
-    /* label1: */
-#if TARGET_LONG_BITS == 32
-    /* be label1 */
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
-                   INSN_OFF22((unsigned long)s->code_ptr -
-                              (unsigned long)label1_ptr));
+    *label_ptr[1] |= INSN_OFF19((unsigned long)s->code_ptr -
+                                (unsigned long)label_ptr[1]);
 #else
-    /* be,pt %xcc label1 */
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
-                   (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
-                              (unsigned long)label1_ptr));
-#endif
-
-    /* ld [arg1 + x], arg1 */
-    tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
-                 offsetof(CPUTLBEntry, addr_read), TARGET_ADDEND_LD_OP);
-
-#if TARGET_LONG_BITS == 32
-    /* and addr_reg, x, arg0 */
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
-    tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
-    /* add arg0, arg1, arg0 */
-    tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
-#else
-    /* add addr_reg, arg1, arg0 */
-    tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
-#endif
-
-#else
-    arg0 = addr_reg;
-#endif
-
-    switch(opc) {
-    case 0:
-        /* ldub [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDUB);
-        break;
-    case 0 | 4:
-        /* ldsb [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDSB);
-        break;
-    case 1:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* lduh [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDUH);
-#else
-        /* lduha [arg0] ASI_PRIMARY_LITTLE, data_reg */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUHA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 1 | 4:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* ldsh [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDSH);
-#else
-        /* ldsha [arg0] ASI_PRIMARY_LITTLE, data_reg */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSHA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 2:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* lduw [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDUW);
-#else
-        /* lduwa [arg0] ASI_PRIMARY_LITTLE, data_reg */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDUWA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 2 | 4:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* ldsw [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDSW);
-#else
-        /* ldswa [arg0] ASI_PRIMARY_LITTLE, data_reg */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDSWA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 3:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* ldx [arg0], data_reg */
-        tcg_out_ldst(s, data_reg, arg0, 0, LDX);
-#else
-        /* ldxa [arg0] ASI_PRIMARY_LITTLE, data_reg */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, LDXA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    default:
-        tcg_abort();
+    addr_reg = args[addrlo_idx];
+    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+        tcg_out_arithi(s, TCG_REG_T1, addr_reg, 0, SHIFT_SRL);
+        addr_reg = TCG_REG_T1;
     }
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        int reg64 = (datalo < 16 ? datalo : TCG_REG_O0);
 
-#if defined(CONFIG_SOFTMMU)
-    /* label2: */
-    *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
-                   INSN_OFF22((unsigned long)s->code_ptr -
-                              (unsigned long)label2_ptr));
-#endif
+        tcg_out_ldst_rr(s, reg64, addr_reg,
+                        (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+                        qemu_ld_opc[sizeop]);
+
+        tcg_out_arithi(s, datahi, reg64, 32, SHIFT_SRLX);
+        if (reg64 != datalo) {
+            tcg_out_mov(s, TCG_TYPE_I32, datalo, reg64);
+        }
+    } else {
+        tcg_out_ldst_rr(s, datalo, addr_reg,
+                        (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+                        qemu_ld_opc[sizeop]);
+    }
+#endif /* CONFIG_SOFTMMU */
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
-                            int opc)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int sizeop)
 {
-    int addr_reg, data_reg, arg0, arg1, arg2, mem_index, s_bits;
+    int addrlo_idx = 1, datalo, datahi, addr_reg;
 #if defined(CONFIG_SOFTMMU)
-    uint32_t *label1_ptr, *label2_ptr;
+    int memi_idx, memi, n;
+    uint32_t *label_ptr;
 #endif
 
-    data_reg = *args++;
-    addr_reg = *args++;
-    mem_index = *args;
-
-    s_bits = opc;
-
-    arg0 = TCG_REG_O0;
-    arg1 = TCG_REG_O1;
-    arg2 = TCG_REG_O2;
-
-#if defined(CONFIG_SOFTMMU)
-    /* srl addr_reg, x, arg1 */
-    tcg_out_arithi(s, arg1, addr_reg, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS,
-                   SHIFT_SRL);
-
-    /* and addr_reg, x, arg0 */
-    tcg_out_arithi(s, arg0, addr_reg, TARGET_PAGE_MASK | ((1 << s_bits) - 1),
-                   ARITH_AND);
-
-    /* and arg1, x, arg1 */
-    tcg_out_andi(s, arg1, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS);
-
-    /* add arg1, x, arg1 */
-    tcg_out_addi(s, arg1, offsetof(CPUArchState,
-                                   tlb_table[mem_index][0].addr_write));
-
-    /* add env, arg1, arg1 */
-    tcg_out_arith(s, arg1, TCG_AREG0, arg1, ARITH_ADD);
-
-    /* ld [arg1], arg2 */
-    tcg_out32(s, TARGET_LD_OP | INSN_RD(arg2) | INSN_RS1(arg1) |
-              INSN_RS2(TCG_REG_G0));
-
-    /* subcc arg0, arg2, %g0 */
-    tcg_out_arith(s, TCG_REG_G0, arg0, arg2, ARITH_SUBCC);
-
-    /* will become:
-       be label1
-        or
-       be,pt %xcc label1 */
-    label1_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, 0);
-
-    /* mov (delay slot) */
-    tcg_out_mov(s, TCG_TYPE_PTR, arg0, addr_reg);
-
-    /* mov */
-    tcg_out_mov(s, TCG_TYPE_REG, arg1, data_reg);
-
-    /* mov */
-    tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
-
-#ifdef CONFIG_TCG_PASS_AREG0
-    /* XXX/FIXME: suboptimal */
-    tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
-                tcg_target_call_iarg_regs[2]);
-    tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
-                tcg_target_call_iarg_regs[1]);
-    tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
-                tcg_target_call_iarg_regs[0]);
-    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
-                TCG_AREG0);
-#endif
-    /* XXX: move that code at the end of the TB */
-    /* qemu_st_helper[s_bits](arg0, arg1, arg2) */
-    tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[s_bits]
-                           - (tcg_target_ulong)s->code_ptr) >> 2)
-                         & 0x3fffffff));
-    /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
-       global registers */
-    // delay slot
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                 sizeof(long), HOST_ST_OP);
-    tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                 TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                 sizeof(long), HOST_LD_OP);
-
-    /* will become:
-       ba label2 */
-    label2_ptr = (uint32_t *)s->code_ptr;
-    tcg_out32(s, 0);
-
-    /* nop (delay slot) */
-    tcg_out_nop(s);
-
-#if TARGET_LONG_BITS == 32
-    /* be label1 */
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x2) |
-                   INSN_OFF22((unsigned long)s->code_ptr -
-                              (unsigned long)label1_ptr));
-#else
-    /* be,pt %xcc label1 */
-    *label1_ptr = (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1) |
-                   (0x5 << 19) | INSN_OFF19((unsigned long)s->code_ptr -
-                              (unsigned long)label1_ptr));
-#endif
-
-    /* ld [arg1 + x], arg1 */
-    tcg_out_ldst(s, arg1, arg1, offsetof(CPUTLBEntry, addend) -
-                 offsetof(CPUTLBEntry, addr_write), TARGET_ADDEND_LD_OP);
-
-#if TARGET_LONG_BITS == 32
-    /* and addr_reg, x, arg0 */
-    tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_I5, 0xffffffff);
-    tcg_out_arith(s, arg0, addr_reg, TCG_REG_I5, ARITH_AND);
-    /* add arg0, arg1, arg0 */
-    tcg_out_arith(s, arg0, arg0, arg1, ARITH_ADD);
-#else
-    /* add addr_reg, arg1, arg0 */
-    tcg_out_arith(s, arg0, addr_reg, arg1, ARITH_ADD);
-#endif
-
-#else
-    arg0 = addr_reg;
-#endif
-
-    switch(opc) {
-    case 0:
-        /* stb data_reg, [arg0] */
-        tcg_out_ldst(s, data_reg, arg0, 0, STB);
-        break;
-    case 1:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* sth data_reg, [arg0] */
-        tcg_out_ldst(s, data_reg, arg0, 0, STH);
-#else
-        /* stha data_reg, [arg0] ASI_PRIMARY_LITTLE */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, STHA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 2:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* stw data_reg, [arg0] */
-        tcg_out_ldst(s, data_reg, arg0, 0, STW);
-#else
-        /* stwa data_reg, [arg0] ASI_PRIMARY_LITTLE */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, STWA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    case 3:
-#ifdef TARGET_WORDS_BIGENDIAN
-        /* stx data_reg, [arg0] */
-        tcg_out_ldst(s, data_reg, arg0, 0, STX);
-#else
-        /* stxa data_reg, [arg0] ASI_PRIMARY_LITTLE */
-        tcg_out_ldst_asi(s, data_reg, arg0, 0, STXA, ASI_PRIMARY_LITTLE);
-#endif
-        break;
-    default:
-        tcg_abort();
+    datahi = datalo = args[0];
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        datahi = args[1];
+        addrlo_idx = 2;
     }
 
 #if defined(CONFIG_SOFTMMU)
-    /* label2: */
-    *label2_ptr = (INSN_OP(0) | INSN_COND(COND_A, 0) | INSN_OP2(0x2) |
-                   INSN_OFF22((unsigned long)s->code_ptr -
-                              (unsigned long)label2_ptr));
-#endif
+    memi_idx = addrlo_idx + 1 + (TARGET_LONG_BITS > TCG_TARGET_REG_BITS);
+    memi = args[memi_idx];
+
+    addr_reg = tcg_out_tlb_load(s, addrlo_idx, memi, sizeop, args,
+                                offsetof(CPUTLBEntry, addr_write));
+
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        /* Reconstruct the full 64-bit value.  */
+        tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
+        tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
+        tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
+        datalo = TCG_REG_O2;
+    }
+
+    /* The fast path is exactly one insn.  Thus we can perform the entire
+       TLB Hit in the (annulled) delay slot of the branch over TLB Miss.  */
+    /* beq,a,pt %[xi]cc, label0 */
+    label_ptr = (uint32_t *)s->code_ptr;
+    tcg_out32(s, (INSN_OP(0) | INSN_COND(COND_E, 0) | INSN_OP2(0x1)
+                  | ((TARGET_LONG_BITS == 64) << 21)
+                  | (1 << 29) | (1 << 19)));
+    /* delay slot */
+    tcg_out_ldst_rr(s, datalo, addr_reg, TCG_REG_O1, qemu_st_opc[sizeop]);
+
+    /* TLB Miss.  */
+
+    n = 0;
+    tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[n++], TCG_AREG0);
+    if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+        tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+                    args[addrlo_idx + 1]);
+    }
+    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++],
+                args[addrlo_idx]);
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datahi);
+    }
+    tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n++], datalo);
+
+    /* qemu_st_helper[s_bits](arg0, arg1, arg2) */
+    tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[sizeop]
+                           - (tcg_target_ulong)s->code_ptr) >> 2)
+                         & 0x3fffffff));
+    /* delay slot */
+    tcg_out_movi(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[n], memi);
+
+    *label_ptr |= INSN_OFF19((unsigned long)s->code_ptr -
+                             (unsigned long)label_ptr);
+#else
+    addr_reg = args[addrlo_idx];
+    if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 32) {
+        tcg_out_arithi(s, TCG_REG_T1, addr_reg, 0, SHIFT_SRL);
+        addr_reg = TCG_REG_T1;
+    }
+    if (TCG_TARGET_REG_BITS == 32 && sizeop == 3) {
+        tcg_out_arithi(s, TCG_REG_T1, datalo, 0, SHIFT_SRL);
+        tcg_out_arithi(s, TCG_REG_O2, datahi, 32, SHIFT_SLLX);
+        tcg_out_arith(s, TCG_REG_O2, TCG_REG_T1, TCG_REG_O2, ARITH_OR);
+        datalo = TCG_REG_O2;
+    }
+    tcg_out_ldst_rr(s, datalo, addr_reg,
+                    (GUEST_BASE ? TCG_GUEST_BASE_REG : TCG_REG_G0),
+                    qemu_st_opc[sizeop]);
+#endif /* CONFIG_SOFTMMU */
 }
 
 static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
@@ -1186,41 +1069,34 @@
     case INDEX_op_goto_tb:
         if (s->tb_jmp_offset) {
             /* direct jump method */
-            tcg_out_sethi(s, TCG_REG_I5, args[0] & 0xffffe000);
-            tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I5) |
-                      INSN_IMM13((args[0] & 0x1fff)));
+            uint32_t old_insn = *(uint32_t *)s->code_ptr;
             s->tb_jmp_offset[args[0]] = s->code_ptr - s->code_buf;
+            /* Make sure to preserve links during retranslation.  */
+            tcg_out32(s, CALL | (old_insn & ~INSN_OP(-1)));
         } else {
             /* indirect jump method */
-            tcg_out_ld_ptr(s, TCG_REG_I5, (tcg_target_long)(s->tb_next + args[0]));
-            tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_I5) |
+            tcg_out_ld_ptr(s, TCG_REG_T1,
+                           (tcg_target_long)(s->tb_next + args[0]));
+            tcg_out32(s, JMPL | INSN_RD(TCG_REG_G0) | INSN_RS1(TCG_REG_T1) |
                       INSN_RS2(TCG_REG_G0));
         }
         tcg_out_nop(s);
         s->tb_next_offset[args[0]] = s->code_ptr - s->code_buf;
         break;
     case INDEX_op_call:
-        if (const_args[0])
+        if (const_args[0]) {
             tcg_out32(s, CALL | ((((tcg_target_ulong)args[0]
                                    - (tcg_target_ulong)s->code_ptr) >> 2)
                                  & 0x3fffffff));
-        else {
-            tcg_out_ld_ptr(s, TCG_REG_I5,
+        } else {
+            tcg_out_ld_ptr(s, TCG_REG_T1,
                            (tcg_target_long)(s->tb_next + args[0]));
-            tcg_out32(s, JMPL | INSN_RD(TCG_REG_O7) | INSN_RS1(TCG_REG_I5) |
+            tcg_out32(s, JMPL | INSN_RD(TCG_REG_O7) | INSN_RS1(TCG_REG_T1) |
                       INSN_RS2(TCG_REG_G0));
         }
-        /* Store AREG0 in stack to avoid ugly glibc bugs that mangle
-           global registers */
-        // delay slot
-        tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                     TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                     sizeof(long), HOST_ST_OP);
-        tcg_out_ldst(s, TCG_AREG0, TCG_REG_CALL_STACK,
-                     TCG_TARGET_CALL_STACK_OFFSET - TCG_STATIC_CALL_ARGS_SIZE -
-                     sizeof(long), HOST_LD_OP);
+        /* delay slot */
+        tcg_out_nop(s);
         break;
-    case INDEX_op_jmp:
     case INDEX_op_br:
         tcg_out_branch_i32(s, COND_A, args[0]);
         tcg_out_nop(s);
@@ -1290,13 +1166,16 @@
         goto gen_arith;
     case INDEX_op_shl_i32:
         c = SHIFT_SLL;
-        goto gen_arith;
+    do_shift32:
+        /* Limit immediate shift count lest we create an illegal insn.  */
+        tcg_out_arithc(s, args[0], args[1], args[2] & 31, const_args[2], c);
+        break;
     case INDEX_op_shr_i32:
         c = SHIFT_SRL;
-        goto gen_arith;
+        goto do_shift32;
     case INDEX_op_sar_i32:
         c = SHIFT_SRA;
-        goto gen_arith;
+        goto do_shift32;
     case INDEX_op_mul_i32:
         c = ARITH_UMUL;
         goto gen_arith;
@@ -1317,11 +1196,11 @@
 
     case INDEX_op_rem_i32:
     case INDEX_op_remu_i32:
-        tcg_out_div32(s, TCG_REG_I5, args[1], args[2], const_args[2],
+        tcg_out_div32(s, TCG_REG_T1, args[1], args[2], const_args[2],
                       opc == INDEX_op_remu_i32);
-        tcg_out_arithc(s, TCG_REG_I5, TCG_REG_I5, args[2], const_args[2],
+        tcg_out_arithc(s, TCG_REG_T1, TCG_REG_T1, args[2], const_args[2],
                        ARITH_UMUL);
-        tcg_out_arith(s, args[0], args[1], TCG_REG_I5, ARITH_SUB);
+        tcg_out_arith(s, args[0], args[1], TCG_REG_T1, ARITH_SUB);
         break;
 
     case INDEX_op_brcond_i32:
@@ -1386,6 +1265,9 @@
         tcg_out_qemu_ld(s, args, 2 | 4);
         break;
 #endif
+    case INDEX_op_qemu_ld64:
+        tcg_out_qemu_ld(s, args, 3);
+        break;
     case INDEX_op_qemu_st8:
         tcg_out_qemu_st(s, args, 0);
         break;
@@ -1395,6 +1277,9 @@
     case INDEX_op_qemu_st32:
         tcg_out_qemu_st(s, args, 2);
         break;
+    case INDEX_op_qemu_st64:
+        tcg_out_qemu_st(s, args, 3);
+        break;
 
 #if TCG_TARGET_REG_BITS == 64
     case INDEX_op_movi_i64:
@@ -1411,13 +1296,16 @@
         break;
     case INDEX_op_shl_i64:
         c = SHIFT_SLLX;
-        goto gen_arith;
+    do_shift64:
+        /* Limit immediate shift count lest we create an illegal insn.  */
+        tcg_out_arithc(s, args[0], args[1], args[2] & 63, const_args[2], c);
+        break;
     case INDEX_op_shr_i64:
         c = SHIFT_SRLX;
-        goto gen_arith;
+        goto do_shift64;
     case INDEX_op_sar_i64:
         c = SHIFT_SRAX;
-        goto gen_arith;
+        goto do_shift64;
     case INDEX_op_mul_i64:
         c = ARITH_MULX;
         goto gen_arith;
@@ -1429,11 +1317,11 @@
         goto gen_arith;
     case INDEX_op_rem_i64:
     case INDEX_op_remu_i64:
-        tcg_out_arithc(s, TCG_REG_I5, args[1], args[2], const_args[2],
+        tcg_out_arithc(s, TCG_REG_T1, args[1], args[2], const_args[2],
                        opc == INDEX_op_rem_i64 ? ARITH_SDIVX : ARITH_UDIVX);
-        tcg_out_arithc(s, TCG_REG_I5, TCG_REG_I5, args[2], const_args[2],
+        tcg_out_arithc(s, TCG_REG_T1, TCG_REG_T1, args[2], const_args[2],
                        ARITH_MULX);
-        tcg_out_arith(s, args[0], args[1], TCG_REG_I5, ARITH_SUB);
+        tcg_out_arith(s, args[0], args[1], TCG_REG_T1, ARITH_SUB);
         break;
     case INDEX_op_ext32s_i64:
         if (const_args[1]) {
@@ -1459,13 +1347,6 @@
                             args[2], const_args[2]);
         break;
 
-    case INDEX_op_qemu_ld64:
-        tcg_out_qemu_ld(s, args, 3);
-        break;
-    case INDEX_op_qemu_st64:
-        tcg_out_qemu_st(s, args, 3);
-        break;
-
 #endif
     gen_arith:
         tcg_out_arithc(s, args[0], args[1], args[2], const_args[2], c);
@@ -1485,7 +1366,6 @@
     { INDEX_op_exit_tb, { } },
     { INDEX_op_goto_tb, { } },
     { INDEX_op_call, { "ri" } },
-    { INDEX_op_jmp, { "ri" } },
     { INDEX_op_br, { } },
 
     { INDEX_op_mov_i32, { "r", "r" } },
@@ -1530,20 +1410,6 @@
     { INDEX_op_mulu2_i32, { "r", "r", "r", "rJ" } },
 #endif
 
-    { INDEX_op_qemu_ld8u, { "r", "L" } },
-    { INDEX_op_qemu_ld8s, { "r", "L" } },
-    { INDEX_op_qemu_ld16u, { "r", "L" } },
-    { INDEX_op_qemu_ld16s, { "r", "L" } },
-    { INDEX_op_qemu_ld32, { "r", "L" } },
-#if TCG_TARGET_REG_BITS == 64
-    { INDEX_op_qemu_ld32u, { "r", "L" } },
-    { INDEX_op_qemu_ld32s, { "r", "L" } },
-#endif
-
-    { INDEX_op_qemu_st8, { "L", "L" } },
-    { INDEX_op_qemu_st16, { "L", "L" } },
-    { INDEX_op_qemu_st32, { "L", "L" } },
-
 #if TCG_TARGET_REG_BITS == 64
     { INDEX_op_mov_i64, { "r", "r" } },
     { INDEX_op_movi_i64, { "r" } },
@@ -1558,8 +1424,6 @@
     { INDEX_op_st16_i64, { "r", "r" } },
     { INDEX_op_st32_i64, { "r", "r" } },
     { INDEX_op_st_i64, { "r", "r" } },
-    { INDEX_op_qemu_ld64, { "L", "L" } },
-    { INDEX_op_qemu_st64, { "L", "L" } },
 
     { INDEX_op_add_i64, { "r", "r", "rJ" } },
     { INDEX_op_mul_i64, { "r", "r", "rJ" } },
@@ -1587,6 +1451,47 @@
     { INDEX_op_brcond_i64, { "r", "rJ" } },
     { INDEX_op_setcond_i64, { "r", "r", "rJ" } },
 #endif
+
+#if TCG_TARGET_REG_BITS == 64
+    { INDEX_op_qemu_ld8u, { "r", "L" } },
+    { INDEX_op_qemu_ld8s, { "r", "L" } },
+    { INDEX_op_qemu_ld16u, { "r", "L" } },
+    { INDEX_op_qemu_ld16s, { "r", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L" } },
+    { INDEX_op_qemu_ld32u, { "r", "L" } },
+    { INDEX_op_qemu_ld32s, { "r", "L" } },
+    { INDEX_op_qemu_ld64, { "r", "L" } },
+
+    { INDEX_op_qemu_st8, { "L", "L" } },
+    { INDEX_op_qemu_st16, { "L", "L" } },
+    { INDEX_op_qemu_st32, { "L", "L" } },
+    { INDEX_op_qemu_st64, { "L", "L" } },
+#elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
+    { INDEX_op_qemu_ld8u, { "r", "L" } },
+    { INDEX_op_qemu_ld8s, { "r", "L" } },
+    { INDEX_op_qemu_ld16u, { "r", "L" } },
+    { INDEX_op_qemu_ld16s, { "r", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L" } },
+    { INDEX_op_qemu_ld64, { "r", "r", "L" } },
+
+    { INDEX_op_qemu_st8, { "L", "L" } },
+    { INDEX_op_qemu_st16, { "L", "L" } },
+    { INDEX_op_qemu_st32, { "L", "L" } },
+    { INDEX_op_qemu_st64, { "L", "L", "L" } },
+#else
+    { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld32, { "r", "L", "L" } },
+    { INDEX_op_qemu_ld64, { "L", "L", "L", "L" } },
+
+    { INDEX_op_qemu_st8, { "L", "L", "L" } },
+    { INDEX_op_qemu_st16, { "L", "L", "L" } },
+    { INDEX_op_qemu_st32, { "L", "L", "L" } },
+    { INDEX_op_qemu_st64, { "L", "L", "L", "L" } },
+#endif
+
     { -1 },
 };
 
@@ -1613,25 +1518,23 @@
                      (1 << TCG_REG_O7));
 
     tcg_regset_clear(s->reserved_regs);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0);
-#if TCG_TARGET_REG_BITS == 64
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I4); // for internal use
-#endif
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I5); // for internal use
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6);
-    tcg_regset_set_reg(s->reserved_regs, TCG_REG_O7);
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G0); /* zero */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G6); /* reserved for os */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_G7); /* thread pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I6); /* frame pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_I7); /* return address */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_O6); /* stack pointer */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T1); /* for internal use */
+    tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */
+
     tcg_add_target_add_op_defs(sparc_op_defs);
 }
 
 #if TCG_TARGET_REG_BITS == 64
 # define ELF_HOST_MACHINE  EM_SPARCV9
-#elif defined(__sparc_v8plus__)
+#else
 # define ELF_HOST_MACHINE  EM_SPARC32PLUS
 # define ELF_HOST_FLAGS    EF_SPARC_32PLUS
-#else
-# define ELF_HOST_MACHINE  EM_SPARC
 #endif
 
 typedef struct {
@@ -1687,3 +1590,18 @@
 
     tcg_register_jit_int(buf, buf_size, &debug_frame, sizeof(debug_frame));
 }
+
+void tb_set_jmp_target1(uintptr_t jmp_addr, uintptr_t addr)
+{
+    uint32_t *ptr = (uint32_t *)jmp_addr;
+    tcg_target_long disp = (tcg_target_long)(addr - jmp_addr) >> 2;
+
+    /* We can reach the entire address space for 32-bit.  For 64-bit
+       the code_gen_buffer can't be larger than 2GB.  */
+    if (TCG_TARGET_REG_BITS == 64 && !check_fit_tl(disp, 30)) {
+        tcg_abort();
+    }
+
+    *ptr = CALL | (disp & 0x3fffffff);
+    flush_icache_range(jmp_addr, jmp_addr + 4);
+}
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index ee2274d..6314ffb 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -66,22 +66,19 @@
 #define TCG_CT_CONST_S13 0x200
 
 /* used for function call generation */
-#define TCG_REG_CALL_STACK TCG_REG_I6
-#ifdef __arch64__
-// Reserve space for AREG0
-#define TCG_TARGET_STACK_MINFRAME (176 + 4 * (int)sizeof(long) + \
-                                   TCG_STATIC_CALL_ARGS_SIZE)
-#define TCG_TARGET_CALL_STACK_OFFSET (2047 - 16)
-#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_REG_CALL_STACK TCG_REG_O6
+
+#if TCG_TARGET_REG_BITS == 64
+#define TCG_TARGET_STACK_BIAS           2047
+#define TCG_TARGET_STACK_ALIGN          16
+#define TCG_TARGET_CALL_STACK_OFFSET    (128 + 6*8 + TCG_TARGET_STACK_BIAS)
 #else
-// AREG0 + one word for alignment
-#define TCG_TARGET_STACK_MINFRAME (92 + (2 + 1) * (int)sizeof(long) + \
-                                   TCG_STATIC_CALL_ARGS_SIZE)
-#define TCG_TARGET_CALL_STACK_OFFSET TCG_TARGET_STACK_MINFRAME
-#define TCG_TARGET_STACK_ALIGN 8
+#define TCG_TARGET_STACK_BIAS           0
+#define TCG_TARGET_STACK_ALIGN          8
+#define TCG_TARGET_CALL_STACK_OFFSET    (64 + 4 + 6*4)
 #endif
 
-#ifdef __arch64__
+#if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_EXTEND_ARGS 1
 #endif
 
@@ -102,6 +99,7 @@
 #define TCG_TARGET_HAS_nand_i32         0
 #define TCG_TARGET_HAS_nor_i32          0
 #define TCG_TARGET_HAS_deposit_i32      0
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_div_i64          1
@@ -123,16 +121,12 @@
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_movcond_i64      0
 #endif
 
-/* Note: must be synced with dyngen-exec.h */
-#ifdef CONFIG_SOLARIS
-#define TCG_AREG0 TCG_REG_G2
-#elif defined(__sparc_v9__)
-#define TCG_AREG0 TCG_REG_G5
-#else
-#define TCG_AREG0 TCG_REG_G6
-#endif
+#define TCG_TARGET_HAS_GUEST_BASE
+
+#define TCG_AREG0 TCG_REG_I0
 
 static inline void flush_icache_range(tcg_target_ulong start,
                                       tcg_target_ulong stop)
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 169d3b2..5518458 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -518,18 +518,34 @@
     }
 }
 
-static inline void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
+static inline void tcg_gen_andi_i32(TCGv_i32 ret, TCGv_i32 arg1, uint32_t arg2)
 {
-    /* some cases can be optimized here */
-    if (arg2 == 0) {
+    TCGv_i32 t0;
+    /* Some cases can be optimized here.  */
+    switch (arg2) {
+    case 0:
         tcg_gen_movi_i32(ret, 0);
-    } else if (arg2 == 0xffffffff) {
+        return;
+    case 0xffffffffu:
         tcg_gen_mov_i32(ret, arg1);
-    } else {
-        TCGv_i32 t0 = tcg_const_i32(arg2);
-        tcg_gen_and_i32(ret, arg1, t0);
-        tcg_temp_free_i32(t0);
+        return;
+    case 0xffu:
+        /* Don't recurse with tcg_gen_ext8u_i32.  */
+        if (TCG_TARGET_HAS_ext8u_i32) {
+            tcg_gen_op2_i32(INDEX_op_ext8u_i32, ret, arg1);
+            return;
+        }
+        break;
+    case 0xffffu:
+        if (TCG_TARGET_HAS_ext16u_i32) {
+            tcg_gen_op2_i32(INDEX_op_ext16u_i32, ret, arg1);
+            return;
+        }
+        break;
     }
+    t0 = tcg_const_i32(arg2);
+    tcg_gen_and_i32(ret, arg1, t0);
+    tcg_temp_free_i32(t0);
 }
 
 static inline void tcg_gen_or_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
@@ -543,9 +559,9 @@
 
 static inline void tcg_gen_ori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
-    /* some cases can be optimized here */
-    if (arg2 == 0xffffffff) {
-        tcg_gen_movi_i32(ret, 0xffffffff);
+    /* Some cases can be optimized here.  */
+    if (arg2 == -1) {
+        tcg_gen_movi_i32(ret, -1);
     } else if (arg2 == 0) {
         tcg_gen_mov_i32(ret, arg1);
     } else {
@@ -566,9 +582,12 @@
 
 static inline void tcg_gen_xori_i32(TCGv_i32 ret, TCGv_i32 arg1, int32_t arg2)
 {
-    /* some cases can be optimized here */
+    /* Some cases can be optimized here.  */
     if (arg2 == 0) {
         tcg_gen_mov_i32(ret, arg1);
+    } else if (arg2 == -1 && TCG_TARGET_HAS_not_i32) {
+        /* Don't recurse with tcg_gen_not_i32.  */
+        tcg_gen_op2_i32(INDEX_op_not_i32, ret, arg1);
     } else {
         TCGv_i32 t0 = tcg_const_i32(arg2);
         tcg_gen_xor_i32(ret, arg1, t0);
@@ -627,29 +646,49 @@
 static inline void tcg_gen_brcond_i32(TCGCond cond, TCGv_i32 arg1,
                                       TCGv_i32 arg2, int label_index)
 {
-    tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op4ii_i32(INDEX_op_brcond_i32, arg1, arg2, cond, label_index);
+    }
 }
 
 static inline void tcg_gen_brcondi_i32(TCGCond cond, TCGv_i32 arg1,
                                        int32_t arg2, int label_index)
 {
-    TCGv_i32 t0 = tcg_const_i32(arg2);
-    tcg_gen_brcond_i32(cond, arg1, t0, label_index);
-    tcg_temp_free_i32(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_brcond_i32(cond, arg1, t0, label_index);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_setcond_i32(TCGCond cond, TCGv_i32 ret,
                                        TCGv_i32 arg1, TCGv_i32 arg2)
 {
-    tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(ret, 0);
+    } else {
+        tcg_gen_op4i_i32(INDEX_op_setcond_i32, ret, arg1, arg2, cond);
+    }
 }
 
 static inline void tcg_gen_setcondi_i32(TCGCond cond, TCGv_i32 ret,
                                         TCGv_i32 arg1, int32_t arg2)
 {
-    TCGv_i32 t0 = tcg_const_i32(arg2);
-    tcg_gen_setcond_i32(cond, ret, arg1, t0);
-    tcg_temp_free_i32(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(ret, 0);
+    } else {
+        TCGv_i32 t0 = tcg_const_i32(arg2);
+        tcg_gen_setcond_i32(cond, ret, arg1, t0);
+        tcg_temp_free_i32(t0);
+    }
 }
 
 static inline void tcg_gen_mul_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
@@ -945,17 +984,27 @@
 static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
                                       TCGv_i64 arg2, int label_index)
 {
-    tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
-                      TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
-                      TCGV_HIGH(arg2), cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op6ii_i32(INDEX_op_brcond2_i32,
+                          TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
+                          TCGV_HIGH(arg2), cond, label_index);
+    }
 }
 
 static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
                                        TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
-                     TCGV_LOW(arg1), TCGV_HIGH(arg1),
-                     TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i32(TCGV_LOW(ret), 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i32(TCGV_LOW(ret), 0);
+    } else {
+        tcg_gen_op6i_i32(INDEX_op_setcond2_i32, TCGV_LOW(ret),
+                         TCGV_LOW(arg1), TCGV_HIGH(arg1),
+                         TCGV_LOW(arg2), TCGV_HIGH(arg2), cond);
+    }
     tcg_gen_movi_i32(TCGV_HIGH(ret), 0);
 }
 
@@ -1120,9 +1169,38 @@
     }
 }
 
-static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
+static inline void tcg_gen_andi_i64(TCGv_i64 ret, TCGv_i64 arg1, uint64_t arg2)
 {
-    TCGv_i64 t0 = tcg_const_i64(arg2);
+    TCGv_i64 t0;
+    /* Some cases can be optimized here.  */
+    switch (arg2) {
+    case 0:
+        tcg_gen_movi_i64(ret, 0);
+        return;
+    case 0xffffffffffffffffull:
+        tcg_gen_mov_i64(ret, arg1);
+        return;
+    case 0xffull:
+        /* Don't recurse with tcg_gen_ext8u_i32.  */
+        if (TCG_TARGET_HAS_ext8u_i64) {
+            tcg_gen_op2_i64(INDEX_op_ext8u_i64, ret, arg1);
+            return;
+        }
+        break;
+    case 0xffffu:
+        if (TCG_TARGET_HAS_ext16u_i64) {
+            tcg_gen_op2_i64(INDEX_op_ext16u_i64, ret, arg1);
+            return;
+        }
+        break;
+    case 0xffffffffull:
+        if (TCG_TARGET_HAS_ext32u_i64) {
+            tcg_gen_op2_i64(INDEX_op_ext32u_i64, ret, arg1);
+            return;
+        }
+        break;
+    }
+    t0 = tcg_const_i64(arg2);
     tcg_gen_and_i64(ret, arg1, t0);
     tcg_temp_free_i64(t0);
 }
@@ -1138,9 +1216,16 @@
 
 static inline void tcg_gen_ori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 {
-    TCGv_i64 t0 = tcg_const_i64(arg2);
-    tcg_gen_or_i64(ret, arg1, t0);
-    tcg_temp_free_i64(t0);
+    /* Some cases can be optimized here.  */
+    if (arg2 == -1) {
+        tcg_gen_movi_i64(ret, -1);
+    } else if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_or_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
 }
 
 static inline void tcg_gen_xor_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1154,9 +1239,17 @@
 
 static inline void tcg_gen_xori_i64(TCGv_i64 ret, TCGv_i64 arg1, int64_t arg2)
 {
-    TCGv_i64 t0 = tcg_const_i64(arg2);
-    tcg_gen_xor_i64(ret, arg1, t0);
-    tcg_temp_free_i64(t0);
+    /* Some cases can be optimized here.  */
+    if (arg2 == 0) {
+        tcg_gen_mov_i64(ret, arg1);
+    } else if (arg2 == -1 && TCG_TARGET_HAS_not_i64) {
+        /* Don't recurse with tcg_gen_not_i64.  */
+        tcg_gen_op2_i64(INDEX_op_not_i64, ret, arg1);
+    } else {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_xor_i64(ret, arg1, t0);
+        tcg_temp_free_i64(t0);
+    }
 }
 
 static inline void tcg_gen_shl_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1210,13 +1303,23 @@
 static inline void tcg_gen_brcond_i64(TCGCond cond, TCGv_i64 arg1,
                                       TCGv_i64 arg2, int label_index)
 {
-    tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        tcg_gen_op4ii_i64(INDEX_op_brcond_i64, arg1, arg2, cond, label_index);
+    }
 }
 
 static inline void tcg_gen_setcond_i64(TCGCond cond, TCGv_i64 ret,
                                        TCGv_i64 arg1, TCGv_i64 arg2)
 {
-    tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_movi_i64(ret, 1);
+    } else if (cond == TCG_COND_NEVER) {
+        tcg_gen_movi_i64(ret, 0);
+    } else {
+        tcg_gen_op4i_i64(INDEX_op_setcond_i64, ret, arg1, arg2, cond);
+    }
 }
 
 static inline void tcg_gen_mul_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1334,9 +1437,13 @@
 static inline void tcg_gen_brcondi_i64(TCGCond cond, TCGv_i64 arg1,
                                        int64_t arg2, int label_index)
 {
-    TCGv_i64 t0 = tcg_const_i64(arg2);
-    tcg_gen_brcond_i64(cond, arg1, t0, label_index);
-    tcg_temp_free_i64(t0);
+    if (cond == TCG_COND_ALWAYS) {
+        tcg_gen_br(label_index);
+    } else if (cond != TCG_COND_NEVER) {
+        TCGv_i64 t0 = tcg_const_i64(arg2);
+        tcg_gen_brcond_i64(cond, arg1, t0, label_index);
+        tcg_temp_free_i64(t0);
+    }
 }
 
 static inline void tcg_gen_setcondi_i64(TCGCond cond, TCGv_i64 ret,
@@ -1746,36 +1853,6 @@
 #endif
 }
 
-static inline void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low, TCGv_i32 high)
-{
-#if TCG_TARGET_REG_BITS == 32
-    tcg_gen_mov_i32(TCGV_LOW(dest), low);
-    tcg_gen_mov_i32(TCGV_HIGH(dest), high);
-#else
-    TCGv_i64 tmp = tcg_temp_new_i64();
-    /* This extension is only needed for type correctness.
-       We may be able to do better given target specific information.  */
-    tcg_gen_extu_i32_i64(tmp, high);
-    tcg_gen_shli_i64(tmp, tmp, 32);
-    tcg_gen_extu_i32_i64(dest, low);
-    tcg_gen_or_i64(dest, dest, tmp);
-    tcg_temp_free_i64(tmp);
-#endif
-}
-
-static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low, TCGv_i64 high)
-{
-#if TCG_TARGET_REG_BITS == 32
-    tcg_gen_concat_i32_i64(dest, TCGV_LOW(low), TCGV_LOW(high));
-#else
-    TCGv_i64 tmp = tcg_temp_new_i64();
-    tcg_gen_ext32u_i64(dest, low);
-    tcg_gen_shli_i64(tmp, high, 32);
-    tcg_gen_or_i64(dest, dest, tmp);
-    tcg_temp_free_i64(tmp);
-#endif
-}
-
 static inline void tcg_gen_andc_i32(TCGv_i32 ret, TCGv_i32 arg1, TCGv_i32 arg2)
 {
     if (TCG_TARGET_HAS_andc_i32) {
@@ -2048,6 +2125,10 @@
     uint32_t mask;
     TCGv_i32 t1;
 
+    tcg_debug_assert(ofs < 32);
+    tcg_debug_assert(len <= 32);
+    tcg_debug_assert(ofs + len <= 32);
+
     if (ofs == 0 && len == 32) {
         tcg_gen_mov_i32(ret, arg2);
         return;
@@ -2079,6 +2160,10 @@
     uint64_t mask;
     TCGv_i64 t1;
 
+    tcg_debug_assert(ofs < 64);
+    tcg_debug_assert(len <= 64);
+    tcg_debug_assert(ofs + len <= 64);
+
     if (ofs == 0 && len == 64) {
         tcg_gen_mov_i64(ret, arg2);
         return;
@@ -2118,6 +2203,102 @@
     tcg_temp_free_i64(t1);
 }
 
+static inline void tcg_gen_concat_i32_i64(TCGv_i64 dest, TCGv_i32 low,
+                                          TCGv_i32 high)
+{
+#if TCG_TARGET_REG_BITS == 32
+    tcg_gen_mov_i32(TCGV_LOW(dest), low);
+    tcg_gen_mov_i32(TCGV_HIGH(dest), high);
+#else
+    TCGv_i64 tmp = tcg_temp_new_i64();
+    /* These extensions are only needed for type correctness.
+       We may be able to do better given target specific information.  */
+    tcg_gen_extu_i32_i64(tmp, high);
+    tcg_gen_extu_i32_i64(dest, low);
+    /* If deposit is available, use it.  Otherwise use the extra
+       knowledge that we have of the zero-extensions above.  */
+    if (TCG_TARGET_HAS_deposit_i64 && TCG_TARGET_deposit_i64_valid(32, 32)) {
+        tcg_gen_deposit_i64(dest, dest, tmp, 32, 32);
+    } else {
+        tcg_gen_shli_i64(tmp, tmp, 32);
+        tcg_gen_or_i64(dest, dest, tmp);
+    }
+    tcg_temp_free_i64(tmp);
+#endif
+}
+
+static inline void tcg_gen_concat32_i64(TCGv_i64 dest, TCGv_i64 low,
+                                        TCGv_i64 high)
+{
+    tcg_gen_deposit_i64(dest, low, high, 32, 32);
+}
+
+static inline void tcg_gen_movcond_i32(TCGCond cond, TCGv_i32 ret,
+                                       TCGv_i32 c1, TCGv_i32 c2,
+                                       TCGv_i32 v1, TCGv_i32 v2)
+{
+    if (TCG_TARGET_HAS_movcond_i32) {
+        tcg_gen_op6i_i32(INDEX_op_movcond_i32, ret, c1, c2, v1, v2, cond);
+    } else {
+        TCGv_i32 t0 = tcg_temp_new_i32();
+        TCGv_i32 t1 = tcg_temp_new_i32();
+        tcg_gen_setcond_i32(cond, t0, c1, c2);
+        tcg_gen_neg_i32(t0, t0);
+        tcg_gen_and_i32(t1, v1, t0);
+        tcg_gen_andc_i32(ret, v2, t0);
+        tcg_gen_or_i32(ret, ret, t1);
+        tcg_temp_free_i32(t0);
+        tcg_temp_free_i32(t1);
+    }
+}
+
+static inline void tcg_gen_movcond_i64(TCGCond cond, TCGv_i64 ret,
+                                       TCGv_i64 c1, TCGv_i64 c2,
+                                       TCGv_i64 v1, TCGv_i64 v2)
+{
+#if TCG_TARGET_REG_BITS == 32
+    TCGv_i32 t0 = tcg_temp_new_i32();
+    TCGv_i32 t1 = tcg_temp_new_i32();
+    tcg_gen_op6i_i32(INDEX_op_setcond2_i32, t0,
+                     TCGV_LOW(c1), TCGV_HIGH(c1),
+                     TCGV_LOW(c2), TCGV_HIGH(c2), cond);
+
+    if (TCG_TARGET_HAS_movcond_i32) {
+        tcg_gen_movi_i32(t1, 0);
+        tcg_gen_movcond_i32(TCG_COND_NE, TCGV_LOW(ret), t0, t1,
+                            TCGV_LOW(v1), TCGV_LOW(v2));
+        tcg_gen_movcond_i32(TCG_COND_NE, TCGV_HIGH(ret), t0, t1,
+                            TCGV_HIGH(v1), TCGV_HIGH(v2));
+    } else {
+        tcg_gen_neg_i32(t0, t0);
+
+        tcg_gen_and_i32(t1, TCGV_LOW(v1), t0);
+        tcg_gen_andc_i32(TCGV_LOW(ret), TCGV_LOW(v2), t0);
+        tcg_gen_or_i32(TCGV_LOW(ret), TCGV_LOW(ret), t1);
+
+        tcg_gen_and_i32(t1, TCGV_HIGH(v1), t0);
+        tcg_gen_andc_i32(TCGV_HIGH(ret), TCGV_HIGH(v2), t0);
+        tcg_gen_or_i32(TCGV_HIGH(ret), TCGV_HIGH(ret), t1);
+    }
+    tcg_temp_free_i32(t0);
+    tcg_temp_free_i32(t1);
+#else
+    if (TCG_TARGET_HAS_movcond_i64) {
+        tcg_gen_op6i_i64(INDEX_op_movcond_i64, ret, c1, c2, v1, v2, cond);
+    } else {
+        TCGv_i64 t0 = tcg_temp_new_i64();
+        TCGv_i64 t1 = tcg_temp_new_i64();
+        tcg_gen_setcond_i64(cond, t0, c1, c2);
+        tcg_gen_neg_i64(t0, t0);
+        tcg_gen_and_i64(t1, v1, t0);
+        tcg_gen_andc_i64(ret, v2, t0);
+        tcg_gen_or_i64(ret, ret, t1);
+        tcg_temp_free_i64(t0);
+        tcg_temp_free_i64(t1);
+    }
+#endif
+}
+
 /***************************************/
 /* QEMU specific operations. Their type depend on the QEMU CPU
    type. */
@@ -2166,8 +2347,15 @@
     tcg_gen_op1i(INDEX_op_exit_tb, val);
 }
 
-static inline void tcg_gen_goto_tb(int idx)
+static inline void tcg_gen_goto_tb(unsigned idx)
 {
+    /* We only support two chained exits.  */
+    tcg_debug_assert(idx <= 1);
+#ifdef CONFIG_DEBUG_TCG
+    /* Verify that we havn't seen this numbered exit before.  */
+    tcg_debug_assert((tcg_ctx.goto_tb_issue_mask & (1 << idx)) == 0);
+    tcg_ctx.goto_tb_issue_mask |= 1 << idx;
+#endif
     tcg_gen_op1i(INDEX_op_goto_tb, idx);
 }
 
@@ -2434,6 +2622,7 @@
 #define tcg_gen_deposit_tl tcg_gen_deposit_i64
 #define tcg_const_tl tcg_const_i64
 #define tcg_const_local_tl tcg_const_local_i64
+#define tcg_gen_movcond_tl tcg_gen_movcond_i64
 #else
 #define tcg_gen_movi_tl tcg_gen_movi_i32
 #define tcg_gen_mov_tl tcg_gen_mov_i32
@@ -2505,6 +2694,7 @@
 #define tcg_gen_deposit_tl tcg_gen_deposit_i32
 #define tcg_const_tl tcg_const_i32
 #define tcg_const_local_tl tcg_const_local_i32
+#define tcg_gen_movcond_tl tcg_gen_movcond_i32
 #endif
 
 #if TCG_TARGET_REG_BITS == 32
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 8e06d03..04cb7ca 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -36,9 +36,8 @@
 
 DEF(discard, 1, 0, 0, 0)
 
-DEF(set_label, 0, 0, 1, 0)
+DEF(set_label, 0, 0, 1, TCG_OPF_BB_END)
 DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */
-DEF(jmp, 0, 1, 0, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS)
 
 #define IMPL(X) (X ? 0 : TCG_OPF_NOT_PRESENT)
@@ -51,6 +50,7 @@
 DEF(mov_i32, 1, 1, 0, 0)
 DEF(movi_i32, 1, 0, 1, 0)
 DEF(setcond_i32, 1, 2, 1, 0)
+DEF(movcond_i32, 1, 4, 1, IMPL(TCG_TARGET_HAS_movcond_i32))
 /* load/store */
 DEF(ld8u_i32, 1, 1, 1, 0)
 DEF(ld8s_i32, 1, 1, 1, 0)
@@ -107,6 +107,7 @@
 DEF(mov_i64, 1, 1, 0, IMPL64)
 DEF(movi_i64, 1, 0, 1, IMPL64)
 DEF(setcond_i64, 1, 2, 1, IMPL64)
+DEF(movcond_i64, 1, 4, 1, IMPL64 | IMPL(TCG_TARGET_HAS_movcond_i64))
 /* load/store */
 DEF(ld8u_i64, 1, 1, 1, IMPL64)
 DEF(ld8s_i64, 1, 1, 1, IMPL64)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 8386b70..32cd0c6 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -89,7 +89,6 @@
                        tcg_target_long arg2);
 static int tcg_target_const_match(tcg_target_long val,
                                   const TCGArgConstraint *arg_ct);
-static int tcg_target_get_call_iarg_regs_count(int flags);
 
 TCGOpDef tcg_op_defs[] = {
 #define DEF(s, oargs, iargs, cargs, flags) { #s, oargs, iargs, cargs, iargs + oargs + cargs, flags },
@@ -243,7 +242,6 @@
     int *sorted_args;
 
     memset(s, 0, sizeof(*s));
-    s->temps = s->static_temps;
     s->nb_globals = 0;
     
     /* Count total number of arguments and allocate the corresponding
@@ -299,6 +297,10 @@
     s->nb_labels = 0;
     s->current_frame_offset = s->frame_start;
 
+#ifdef CONFIG_DEBUG_TCG
+    s->goto_tb_issue_mask = 0;
+#endif
+
     gen_opc_ptr = gen_opc_buf;
     gen_opparam_ptr = gen_opparam_buf;
 }
@@ -861,6 +863,8 @@
 
 static const char * const cond_name[] =
 {
+    [TCG_COND_NEVER] = "never",
+    [TCG_COND_ALWAYS] = "always",
     [TCG_COND_EQ] = "eq",
     [TCG_COND_NE] = "ne",
     [TCG_COND_LT] = "lt",
@@ -937,11 +941,7 @@
                                                        args[nb_oargs + i]));
                 }
             }
-        } else if (c == INDEX_op_movi_i32 
-#if TCG_TARGET_REG_BITS == 64
-                   || c == INDEX_op_movi_i64
-#endif
-                   ) {
+        } else if (c == INDEX_op_movi_i32 || c == INDEX_op_movi_i64) {
             tcg_target_ulong val;
             TCGHelperInfo *th;
 
@@ -991,17 +991,13 @@
             }
             switch (c) {
             case INDEX_op_brcond_i32:
-#if TCG_TARGET_REG_BITS == 32
-            case INDEX_op_brcond2_i32:
-#elif TCG_TARGET_REG_BITS == 64
-            case INDEX_op_brcond_i64:
-#endif
             case INDEX_op_setcond_i32:
-#if TCG_TARGET_REG_BITS == 32
+            case INDEX_op_movcond_i32:
+            case INDEX_op_brcond2_i32:
             case INDEX_op_setcond2_i32:
-#elif TCG_TARGET_REG_BITS == 64
+            case INDEX_op_brcond_i64:
             case INDEX_op_setcond_i64:
-#endif
+            case INDEX_op_movcond_i64:
                 if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
                     qemu_log(",%s", cond_name[args[k++]]);
                 } else {
@@ -1297,11 +1293,6 @@
                 args--;
             }
             break;
-        case INDEX_op_set_label:
-            args--;
-            /* mark end of basic block */
-            tcg_la_bb_end(s, dead_temps);
-            break;
         case INDEX_op_debug_insn_start:
             args -= def->nb_args;
             break;
@@ -1463,7 +1454,8 @@
 {
     TCGTemp *ts;
     ts = &s->temps[temp];
-#ifndef __sparc_v9__ /* Sparc64 stack is accessed with offset of 2047 */
+#if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
+    /* Sparc64 stack is accessed with offset of 2047 */
     s->current_frame_offset = (s->current_frame_offset +
                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
         ~(sizeof(tcg_target_long) - 1);
@@ -1866,7 +1858,7 @@
 
     flags = args[nb_oargs + nb_iargs];
 
-    nb_regs = tcg_target_get_call_iarg_regs_count(flags);
+    nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
     if (nb_regs > nb_params)
         nb_regs = nb_params;
 
@@ -2059,22 +2051,29 @@
     }
 #endif
 
+#ifdef CONFIG_PROFILER
+    s->opt_time -= profile_getclock();
+#endif
+
 #ifdef USE_TCG_OPTIMIZATIONS
     gen_opparam_ptr =
         tcg_optimize(s, gen_opc_ptr, gen_opparam_buf, tcg_op_defs);
 #endif
 
 #ifdef CONFIG_PROFILER
+    s->opt_time += profile_getclock();
     s->la_time -= profile_getclock();
 #endif
+
     tcg_liveness_analysis(s);
+
 #ifdef CONFIG_PROFILER
     s->la_time += profile_getclock();
 #endif
 
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
-        qemu_log("OP after liveness analysis:\n");
+        qemu_log("OP after optimization and liveness analysis:\n");
         tcg_dump_ops(s);
         qemu_log("\n");
     }
@@ -2101,16 +2100,12 @@
 #endif
         switch(opc) {
         case INDEX_op_mov_i32:
-#if TCG_TARGET_REG_BITS == 64
         case INDEX_op_mov_i64:
-#endif
             dead_args = s->op_dead_args[op_index];
             tcg_reg_alloc_mov(s, def, args, dead_args);
             break;
         case INDEX_op_movi_i32:
-#if TCG_TARGET_REG_BITS == 64
         case INDEX_op_movi_i64:
-#endif
             tcg_reg_alloc_movi(s, args);
             break;
         case INDEX_op_debug_insn_start:
@@ -2241,6 +2236,9 @@
                 (double)s->interm_time / tot * 100.0);
     cpu_fprintf(f, "  gen_code time     %0.1f%%\n", 
                 (double)s->code_time / tot * 100.0);
+    cpu_fprintf(f, "optim./code time    %0.1f%%\n",
+                (double)s->opt_time / (s->code_time ? s->code_time : 1)
+                * 100.0);
     cpu_fprintf(f, "liveness/code time  %0.1f%%\n", 
                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
diff --git a/tcg/tcg.h b/tcg/tcg.h
index d710694..7bafe0e 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -79,6 +79,7 @@
 #define TCG_TARGET_HAS_nand_i64         0
 #define TCG_TARGET_HAS_nor_i64          0
 #define TCG_TARGET_HAS_deposit_i64      0
+#define TCG_TARGET_HAS_movcond_i64      0
 #endif
 
 #ifndef TCG_TARGET_deposit_i32_valid
@@ -265,18 +266,28 @@
 #define TCG_CALL_DUMMY_TCGV     MAKE_TCGV_I32(-1)
 #define TCG_CALL_DUMMY_ARG      ((TCGArg)(-1))
 
+/* Conditions.  Note that these are layed out for easy manipulation by
+   the the functions below:
+     bit 0 is used for inverting;
+     bit 1 is signed,
+     bit 2 is unsigned,
+     bit 3 is used with bit 0 for swapping signed/unsigned.  */
 typedef enum {
-    TCG_COND_EQ,
-    TCG_COND_NE,
-    TCG_COND_LT,
-    TCG_COND_GE,
-    TCG_COND_LE,
-    TCG_COND_GT,
+    /* non-signed */
+    TCG_COND_NEVER  = 0 | 0 | 0 | 0,
+    TCG_COND_ALWAYS = 0 | 0 | 0 | 1,
+    TCG_COND_EQ     = 8 | 0 | 0 | 0,
+    TCG_COND_NE     = 8 | 0 | 0 | 1,
+    /* signed */
+    TCG_COND_LT     = 0 | 0 | 2 | 0,
+    TCG_COND_GE     = 0 | 0 | 2 | 1,
+    TCG_COND_LE     = 8 | 0 | 2 | 0,
+    TCG_COND_GT     = 8 | 0 | 2 | 1,
     /* unsigned */
-    TCG_COND_LTU,
-    TCG_COND_GEU,
-    TCG_COND_LEU,
-    TCG_COND_GTU,
+    TCG_COND_LTU    = 0 | 4 | 0 | 0,
+    TCG_COND_GEU    = 0 | 4 | 0 | 1,
+    TCG_COND_LEU    = 8 | 4 | 0 | 0,
+    TCG_COND_GTU    = 8 | 4 | 0 | 1,
 } TCGCond;
 
 /* Invert the sense of the comparison.  */
@@ -288,13 +299,34 @@
 /* Swap the operands in a comparison.  */
 static inline TCGCond tcg_swap_cond(TCGCond c)
 {
-    int mask = (c < TCG_COND_LT ? 0 : c < TCG_COND_LTU ? 7 : 15);
-    return (TCGCond)(c ^ mask);
+    return c & 6 ? (TCGCond)(c ^ 9) : c;
 }
 
+/* Create an "unsigned" version of a "signed" comparison.  */
 static inline TCGCond tcg_unsigned_cond(TCGCond c)
 {
-    return (c >= TCG_COND_LT && c <= TCG_COND_GT ? c + 4 : c);
+    return c & 2 ? (TCGCond)(c ^ 6) : c;
+}
+
+/* Must a comparison be considered unsigned?  */
+static inline bool is_unsigned_cond(TCGCond c)
+{
+    return (c & 4) != 0;
+}
+
+/* Create a "high" version of a double-word comparison.
+   This removes equality from a LTE or GTE comparison.  */
+static inline TCGCond tcg_high_cond(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_GE:
+    case TCG_COND_LE:
+    case TCG_COND_GEU:
+    case TCG_COND_LEU:
+        return (TCGCond)(c ^ 8);
+    default:
+        return c;
+    }
 }
 
 #define TEMP_VAL_DEAD  0
@@ -335,7 +367,6 @@
     TCGPool *pool_first, *pool_current, *pool_first_large;
     TCGLabel *labels;
     int nb_labels;
-    TCGTemp *temps; /* globals first, temps after */
     int nb_globals;
     int nb_temps;
     /* index of free temps, -1 if none */
@@ -343,7 +374,7 @@
 
     /* goto_tb support */
     uint8_t *code_buf;
-    unsigned long *tb_next;
+    uintptr_t *tb_next;
     uint16_t *tb_next_offset;
     uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */
 
@@ -361,7 +392,7 @@
     int frame_reg;
 
     uint8_t *code_ptr;
-    TCGTemp static_temps[TCG_MAX_TEMPS];
+    TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
 
     TCGHelperInfo *helpers;
     int nb_helpers;
@@ -382,12 +413,14 @@
     int64_t interm_time;
     int64_t code_time;
     int64_t la_time;
+    int64_t opt_time;
     int64_t restore_count;
     int64_t restore_time;
 #endif
 
 #ifdef CONFIG_DEBUG_TCG
     int temps_in_use;
+    int goto_tb_issue_mask;
 #endif
 };
 
@@ -458,11 +491,6 @@
 void tcg_temp_free_i64(TCGv_i64 arg);
 char *tcg_get_arg_str_i64(TCGContext *s, char *buf, int buf_size, TCGv_i64 arg);
 
-static inline bool tcg_arg_is_local(TCGContext *s, TCGArg arg)
-{
-    return s->temps[arg].temp_local;
-}
-
 #if defined(CONFIG_DEBUG_TCG)
 /* If you call tcg_clear_temp_count() at the start of a section of
  * code which is not supposed to leak any TCG temporaries, then
@@ -533,6 +561,15 @@
     abort();\
 } while (0)
 
+#ifdef CONFIG_DEBUG_TCG
+# define tcg_debug_assert(X) do { assert(X); } while (0)
+#elif QEMU_GNUC_PREREQ(4, 5)
+# define tcg_debug_assert(X) \
+    do { if (!(X)) { __builtin_unreachable(); } } while (0)
+#else
+# define tcg_debug_assert(X) do { (void)(X); } while (0)
+#endif
+
 void tcg_add_target_add_op_defs(const TCGTargetOpDef *tdefs);
 
 #if TCG_TARGET_REG_BITS == 32
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index ef8580f..e930740 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -69,7 +69,6 @@
     { INDEX_op_exit_tb, { NULL } },
     { INDEX_op_goto_tb, { NULL } },
     { INDEX_op_call, { RI } },
-    { INDEX_op_jmp, { RI } },
     { INDEX_op_br, { NULL } },
 
     { INDEX_op_mov_i32, { R, R } },
@@ -300,7 +299,7 @@
 #endif
 };
 
-#if MAX_OPC_PARAM_IARGS != 4
+#if MAX_OPC_PARAM_IARGS != 5
 # error Fix needed, number of supported input arguments changed!
 #endif
 
@@ -309,16 +308,18 @@
     TCG_REG_R1,
     TCG_REG_R2,
     TCG_REG_R3,
-#if TCG_TARGET_REG_BITS == 32
-    /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
 #if 0 /* used for TCG_REG_CALL_STACK */
     TCG_REG_R4,
 #endif
     TCG_REG_R5,
+#if TCG_TARGET_REG_BITS == 32
+    /* 32 bit hosts need 2 * MAX_OPC_PARAM_IARGS registers. */
     TCG_REG_R6,
     TCG_REG_R7,
 #if TCG_TARGET_NB_REGS >= 16
     TCG_REG_R8,
+    TCG_REG_R9,
+    TCG_REG_R10,
 #else
 # error Too few input registers available
 #endif
@@ -581,9 +582,6 @@
     case INDEX_op_call:
         tcg_out_ri(s, const_args[0], args[0]);
         break;
-    case INDEX_op_jmp:
-        TODO();
-        break;
     case INDEX_op_setcond_i32:
         tcg_out_r(s, args[0]);
         tcg_out_r(s, args[1]);
@@ -798,9 +796,6 @@
     case INDEX_op_qemu_st8:
     case INDEX_op_qemu_st16:
     case INDEX_op_qemu_st32:
-#ifdef CONFIG_TCG_PASS_AREG0
-        tcg_out_r(s, TCG_AREG0);
-#endif
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -811,9 +806,6 @@
 #endif
         break;
     case INDEX_op_qemu_st64:
-#ifdef CONFIG_TCG_PASS_AREG0
-        tcg_out_r(s, TCG_AREG0);
-#endif
         tcg_out_r(s, *args++);
 #if TCG_TARGET_REG_BITS == 32
         tcg_out_r(s, *args++);
@@ -867,12 +859,6 @@
     return arg_ct->ct & TCG_CT_CONST;
 }
 
-/* Maximum number of register used for input function arguments. */
-static int tcg_target_get_call_iarg_regs_count(int flags)
-{
-    return ARRAY_SIZE(tcg_target_call_iarg_regs);
-}
-
 static void tcg_target_init(TCGContext *s)
 {
 #if defined(CONFIG_DEBUG_TCG_INTERPRETER)
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 30a0f21..6d89495 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -75,6 +75,7 @@
 #define TCG_TARGET_HAS_not_i32          1
 #define TCG_TARGET_HAS_orc_i32          0
 #define TCG_TARGET_HAS_rot_i32          1
+#define TCG_TARGET_HAS_movcond_i32      0
 
 #if TCG_TARGET_REG_BITS == 64
 #define TCG_TARGET_HAS_bswap16_i64      1
@@ -98,6 +99,7 @@
 #define TCG_TARGET_HAS_not_i64          1
 #define TCG_TARGET_HAS_orc_i64          0
 #define TCG_TARGET_HAS_rot_i64          1
+#define TCG_TARGET_HAS_movcond_i64      0
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
 /* Offset to user memory in user mode. */
diff --git a/tci.c b/tci.c
index c79350d..98f5f71 100644
--- a/tci.c
+++ b/tci.c
@@ -25,7 +25,6 @@
 #endif
 
 #include "qemu-common.h"
-#include "dyngen-exec.h"        /* env */
 #include "exec-all.h"           /* MAX_OPC_PARAM_IARGS */
 #include "tcg-op.h"
 
@@ -37,17 +36,19 @@
         tcg_abort(); \
     } while (0)
 
-#if MAX_OPC_PARAM_IARGS != 4
+#if MAX_OPC_PARAM_IARGS != 5
 # error Fix needed, number of supported input arguments changed!
 #endif
 #if TCG_TARGET_REG_BITS == 32
 typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
                                     tcg_target_ulong, tcg_target_ulong,
                                     tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong, tcg_target_ulong,
                                     tcg_target_ulong, tcg_target_ulong);
 #else
 typedef uint64_t (*helper_function)(tcg_target_ulong, tcg_target_ulong,
-                                    tcg_target_ulong, tcg_target_ulong);
+                                    tcg_target_ulong, tcg_target_ulong,
+                                    tcg_target_ulong);
 #endif
 
 /* TCI can optionally use a global register variable for env. */
@@ -63,17 +64,6 @@
 
 static tcg_target_ulong tci_reg[TCG_TARGET_NB_REGS];
 
-#if !defined(CONFIG_TCG_PASS_AREG0)
-# define helper_ldb_mmu(env, addr, mmu_idx) __ldb_mmu(addr, mmu_idx)
-# define helper_ldw_mmu(env, addr, mmu_idx) __ldw_mmu(addr, mmu_idx)
-# define helper_ldl_mmu(env, addr, mmu_idx) __ldl_mmu(addr, mmu_idx)
-# define helper_ldq_mmu(env, addr, mmu_idx) __ldq_mmu(addr, mmu_idx)
-# define helper_stb_mmu(env, addr, val, mmu_idx) __stb_mmu(addr, val, mmu_idx)
-# define helper_stw_mmu(env, addr, val, mmu_idx) __stw_mmu(addr, val, mmu_idx)
-# define helper_stl_mmu(env, addr, val, mmu_idx) __stl_mmu(addr, val, mmu_idx)
-# define helper_stq_mmu(env, addr, val, mmu_idx) __stq_mmu(addr, val, mmu_idx)
-#endif /* !CONFIG_TCG_PASS_AREG0 */
-
 static tcg_target_ulong tci_read_reg(TCGReg index)
 {
     assert(index < ARRAY_SIZE(tci_reg));
@@ -501,18 +491,20 @@
                                           tci_read_reg(TCG_REG_R5),
                                           tci_read_reg(TCG_REG_R6),
                                           tci_read_reg(TCG_REG_R7),
-                                          tci_read_reg(TCG_REG_R8));
+                                          tci_read_reg(TCG_REG_R8),
+                                          tci_read_reg(TCG_REG_R9),
+                                          tci_read_reg(TCG_REG_R10));
             tci_write_reg(TCG_REG_R0, tmp64);
             tci_write_reg(TCG_REG_R1, tmp64 >> 32);
 #else
             tmp64 = ((helper_function)t0)(tci_read_reg(TCG_REG_R0),
                                           tci_read_reg(TCG_REG_R1),
                                           tci_read_reg(TCG_REG_R2),
-                                          tci_read_reg(TCG_REG_R3));
+                                          tci_read_reg(TCG_REG_R3),
+                                          tci_read_reg(TCG_REG_R5));
             tci_write_reg(TCG_REG_R0, tmp64);
 #endif
             break;
-        case INDEX_op_jmp:
         case INDEX_op_br:
             label = tci_read_label(&tb_ptr);
             assert(tb_ptr == old_code_ptr + op_size);
diff --git a/tests/libqtest.c b/tests/libqtest.c
index 02d0392..71b84c1 100644
--- a/tests/libqtest.c
+++ b/tests/libqtest.c
@@ -85,6 +85,22 @@
     return ret;
 }
 
+static pid_t qtest_qemu_pid(QTestState *s)
+{
+    FILE *f;
+    char buffer[1024];
+    pid_t pid = -1;
+
+    f = fopen(s->pid_file, "r");
+    if (f) {
+        if (fgets(buffer, sizeof(buffer), f)) {
+            pid = atoi(buffer);
+        }
+    }
+    fclose(f);
+    return pid;
+}
+
 QTestState *qtest_init(const char *extra_args)
 {
     QTestState *s;
@@ -136,25 +152,21 @@
     qtest_qmp(s, "");
     qtest_qmp(s, "{ 'execute': 'qmp_capabilities' }");
 
+    if (getenv("QTEST_STOP")) {
+        kill(qtest_qemu_pid(s), SIGSTOP);
+    }
+
     return s;
 }
 
 void qtest_quit(QTestState *s)
 {
-    FILE *f;
-    char buffer[1024];
+    int status;
 
-    f = fopen(s->pid_file, "r");
-    if (f) {
-        if (fgets(buffer, sizeof(buffer), f)) {
-            pid_t pid = atoi(buffer);
-            int status = 0;
-
-            kill(pid, SIGTERM);
-            waitpid(pid, &status, 0);
-        }
-
-        fclose(f);
+    pid_t pid = qtest_qemu_pid(s);
+    if (pid != -1) {
+        kill(pid, SIGTERM);
+        waitpid(pid, &status, 0);
     }
 
     unlink(s->pid_file);
diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
index 55b16f8..dd4ef11 100755
--- a/tests/qemu-iotests/030
+++ b/tests/qemu-iotests/030
@@ -18,6 +18,7 @@
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #
 
+import time
 import os
 import iotests
 from iotests import qemu_img, qemu_io
@@ -98,6 +99,43 @@
                          qemu_io('-c', 'map', test_img),
                          'image file map does not match backing file after streaming')
 
+    def test_stream_pause(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        result = self.vm.qmp('block-job-pause', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        offset = self.dictpath(result, 'return[0]/offset')
+
+        time.sleep(1)
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/offset', offset)
+
+        result = self.vm.qmp('block-job-resume', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
+
+        self.assertEqual(qemu_io('-c', 'map', backing_img),
+                         qemu_io('-c', 'map', test_img),
+                         'image file map does not match backing file after streaming')
+
     def test_stream_partial(self):
         self.assert_no_active_streams()
 
@@ -157,6 +195,226 @@
         self.assert_no_active_streams()
         self.vm.shutdown()
 
+class TestErrors(ImageStreamingTestCase):
+    image_len = 2 * 1024 * 1024 # MB
+
+    # this should match STREAM_BUFFER_SIZE/512 in block/stream.c
+    STREAM_BUFFER_SIZE = 512 * 1024
+
+    def create_blkdebug_file(self, name, event, errno):
+        file = open(name, 'w')
+        file.write('''
+[inject-error]
+state = "1"
+event = "%s"
+errno = "%d"
+immediately = "off"
+once = "on"
+sector = "%d"
+
+[set-state]
+state = "1"
+event = "%s"
+new_state = "2"
+
+[set-state]
+state = "2"
+event = "%s"
+new_state = "1"
+''' % (event, errno, self.STREAM_BUFFER_SIZE / 512, event, event))
+        file.close()
+
+class TestEIO(TestErrors):
+    def setUp(self):
+        self.blkdebug_file = backing_img + ".blkdebug"
+        self.create_image(backing_img, TestErrors.image_len)
+        self.create_blkdebug_file(self.blkdebug_file, "read_aio", 5)
+        qemu_img('create', '-f', iotests.imgfmt,
+                 '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw'
+                       % (self.blkdebug_file, backing_img),
+                 test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(backing_img)
+        os.remove(self.blkdebug_file)
+
+    def test_report(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0')
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        error = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/error', 'Input/output error')
+                    self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
+
+    def test_ignore(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0', on_error='ignore')
+        self.assert_qmp(result, 'return', {})
+
+        error = False
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', False)
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/error', 'Input/output error')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
+
+    def test_stop(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0', on_error='stop')
+        self.assert_qmp(result, 'return', {})
+
+        error = False
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', True)
+                    self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE)
+                    self.assert_qmp(result, 'return[0]/io-status', 'failed')
+
+                    result = self.vm.qmp('block-job-resume', device='drive0')
+                    self.assert_qmp(result, 'return', {})
+
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', False)
+                    self.assert_qmp(result, 'return[0]/io-status', 'ok')
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp_absent(event, 'data/error')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
+
+    def test_enospc(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0', on_error='enospc')
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        error = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/error', 'Input/output error')
+                    self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
+
+class TestENOSPC(TestErrors):
+    def setUp(self):
+        self.blkdebug_file = backing_img + ".blkdebug"
+        self.create_image(backing_img, TestErrors.image_len)
+        self.create_blkdebug_file(self.blkdebug_file, "read_aio", 28)
+        qemu_img('create', '-f', iotests.imgfmt,
+                 '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw'
+                       % (self.blkdebug_file, backing_img),
+                 test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(backing_img)
+        os.remove(self.blkdebug_file)
+
+    def test_enospc(self):
+        self.assert_no_active_streams()
+
+        result = self.vm.qmp('block-stream', device='drive0', on_error='enospc')
+        self.assert_qmp(result, 'return', {})
+
+        error = False
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_ERROR':
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/operation', 'read')
+
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', True)
+                    self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE)
+                    self.assert_qmp(result, 'return[0]/io-status', 'nospace')
+
+                    result = self.vm.qmp('block-job-resume', device='drive0')
+                    self.assert_qmp(result, 'return', {})
+
+                    result = self.vm.qmp('query-block-jobs')
+                    self.assert_qmp(result, 'return[0]/paused', False)
+                    self.assert_qmp(result, 'return[0]/io-status', 'ok')
+                    error = True
+                elif event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assertTrue(error, 'job completed unexpectedly')
+                    self.assert_qmp(event, 'data/type', 'stream')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp_absent(event, 'data/error')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_streams()
+        self.vm.shutdown()
 
 class TestStreamStop(ImageStreamingTestCase):
     image_len = 8 * 1024 * 1024 * 1024 # GB
@@ -173,8 +431,6 @@
         os.remove(backing_img)
 
     def test_stream_stop(self):
-        import time
-
         self.assert_no_active_streams()
 
         result = self.vm.qmp('block-stream', device='drive0')
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
index 2f7d390..fa16b5c 100644
--- a/tests/qemu-iotests/030.out
+++ b/tests/qemu-iotests/030.out
@@ -1,5 +1,5 @@
-.......
+.............
 ----------------------------------------------------------------------
-Ran 7 tests
+Ran 13 tests
 
 OK
diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040
new file mode 100755
index 0000000..258e7ea
--- /dev/null
+++ b/tests/qemu-iotests/040
@@ -0,0 +1,178 @@
+#!/usr/bin/env python
+#
+# Tests for image block commit.
+#
+# Copyright (C) 2012 IBM, Corp.
+# Copyright (C) 2012 Red Hat, Inc.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# Test for live block commit
+# Derived from Image Streaming Test 030
+
+import time
+import os
+import iotests
+from iotests import qemu_img, qemu_io
+import struct
+
+backing_img = os.path.join(iotests.test_dir, 'backing.img')
+mid_img = os.path.join(iotests.test_dir, 'mid.img')
+test_img = os.path.join(iotests.test_dir, 'test.img')
+
+class ImageCommitTestCase(iotests.QMPTestCase):
+    '''Abstract base class for image commit test cases'''
+
+    def assert_no_active_commit(self):
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return', [])
+
+    def cancel_and_wait(self, drive='drive0'):
+        '''Cancel a block job and wait for it to finish'''
+        result = self.vm.qmp('block-job-cancel', device=drive)
+        self.assert_qmp(result, 'return', {})
+
+        cancelled = False
+        while not cancelled:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_CANCELLED':
+                    self.assert_qmp(event, 'data/type', 'commit')
+                    self.assert_qmp(event, 'data/device', drive)
+                    cancelled = True
+
+        self.assert_no_active_commit()
+
+    def create_image(self, name, size):
+        file = open(name, 'w')
+        i = 0
+        while i < size:
+            sector = struct.pack('>l504xl', i / 512, i / 512)
+            file.write(sector)
+            i = i + 512
+        file.close()
+
+
+class TestSingleDrive(ImageCommitTestCase):
+    image_len = 1 * 1024 * 1024
+    test_len = 1 * 1024 * 256
+
+    def setUp(self):
+        self.create_image(backing_img, TestSingleDrive.image_len)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
+        qemu_io('-c', 'write -P 0xab 0 524288', backing_img)
+        qemu_io('-c', 'write -P 0xef 524288 524288', mid_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(mid_img)
+        os.remove(backing_img)
+
+    def test_commit(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img)
+        self.assert_qmp(result, 'return', {})
+
+        completed = False
+        while not completed:
+            for event in self.vm.get_qmp_events(wait=True):
+                if event['event'] == 'BLOCK_JOB_COMPLETED':
+                    self.assert_qmp(event, 'data/type', 'commit')
+                    self.assert_qmp(event, 'data/device', 'drive0')
+                    self.assert_qmp(event, 'data/offset', self.image_len)
+                    self.assert_qmp(event, 'data/len', self.image_len)
+                    completed = True
+
+        self.assert_no_active_commit()
+        self.vm.shutdown()
+
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed"))
+        self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed"))
+
+    def test_device_not_found(self):
+        result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img)
+        self.assert_qmp(result, 'error/class', 'DeviceNotFound')
+
+    def test_top_same_base(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Invalid files for merge: top and base are the same')
+
+    def test_top_invalid(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='badfile', base='%s' % backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Top image file badfile not found')
+
+    def test_base_invalid(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img, base='badfile')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found')
+
+    def test_top_is_active(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % test_img, base='%s' % backing_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported')
+
+    def test_top_and_base_reversed(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % mid_img)
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', 'Base (%(1)s) is not reachable from top (%(2)s)' % {"1" : mid_img, "2" : backing_img})
+
+    def test_top_omitted(self):
+        self.assert_no_active_commit()
+        result = self.vm.qmp('block-commit', device='drive0')
+        self.assert_qmp(result, 'error/class', 'GenericError')
+        self.assert_qmp(result, 'error/desc', "Parameter 'top' is missing")
+
+
+class TestSetSpeed(ImageCommitTestCase):
+    image_len = 80 * 1024 * 1024 # MB
+
+    def setUp(self):
+        qemu_img('create', backing_img, str(TestSetSpeed.image_len))
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img)
+        qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img)
+        self.vm = iotests.VM().add_drive(test_img)
+        self.vm.launch()
+
+    def tearDown(self):
+        self.vm.shutdown()
+        os.remove(test_img)
+        os.remove(mid_img)
+        os.remove(backing_img)
+
+    def test_set_speed(self):
+        self.assert_no_active_commit()
+
+        result = self.vm.qmp('block-commit', device='drive0', top=mid_img, speed=1024 * 1024)
+        self.assert_qmp(result, 'return', {})
+
+        # Ensure the speed we set was accepted
+        result = self.vm.qmp('query-block-jobs')
+        self.assert_qmp(result, 'return[0]/device', 'drive0')
+        self.assert_qmp(result, 'return[0]/speed', 1024 * 1024)
+
+        self.cancel_and_wait()
+
+
+if __name__ == '__main__':
+    iotests.main(supported_fmts=['qcow2', 'qed'])
diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out
new file mode 100644
index 0000000..dae404e
--- /dev/null
+++ b/tests/qemu-iotests/040.out
@@ -0,0 +1,5 @@
+.........
+----------------------------------------------------------------------
+Ran 9 tests
+
+OK
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
index ebb5ca4..66d2ba9 100644
--- a/tests/qemu-iotests/group
+++ b/tests/qemu-iotests/group
@@ -36,7 +36,7 @@
 027 rw auto quick
 028 rw backing auto
 029 rw auto quick
-030 rw auto
+030 rw auto backing
 031 rw auto quick
 032 rw auto
 033 rw auto
@@ -46,3 +46,4 @@
 037 rw auto backing
 038 rw auto backing
 039 rw auto
+040 rw auto
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
index e05b1d6..3c60b2d 100644
--- a/tests/qemu-iotests/iotests.py
+++ b/tests/qemu-iotests/iotests.py
@@ -19,6 +19,7 @@
 import os
 import re
 import subprocess
+import string
 import unittest
 import sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'QMP'))
 import qmp
@@ -96,9 +97,14 @@
             os.remove(self._qemu_log_path)
             self._popen = None
 
+    underscore_to_dash = string.maketrans('_', '-')
     def qmp(self, cmd, **args):
         '''Invoke a QMP command and return the result dict'''
-        return self._qmp.cmd(cmd, args=args)
+        qmp_args = dict()
+        for k in args.keys():
+            qmp_args[k.translate(self.underscore_to_dash)] = args[k]
+
+        return self._qmp.cmd(cmd, args=qmp_args)
 
     def get_qmp_events(self, wait=False):
         '''Poll for queued QMP events and return a list of dicts'''
@@ -132,6 +138,13 @@
                     self.fail('invalid index "%s" in path "%s" in "%s"' % (idx, path, str(d)))
         return d
 
+    def assert_qmp_absent(self, d, path):
+        try:
+            result = self.dictpath(d, path)
+        except AssertionError:
+            return
+        self.fail('path "%s" has value "%s"' % (path, str(result)))
+
     def assert_qmp(self, d, path, value):
         '''Assert that the value for a specific path in a QMP dict matches'''
         result = self.dictpath(d, path)
diff --git a/tests/rtc-test.c b/tests/rtc-test.c
index f23ac3a..7fdc94a 100644
--- a/tests/rtc-test.c
+++ b/tests/rtc-test.c
@@ -179,6 +179,77 @@
 
 static int wiggle = 2;
 
+static void set_year_20xx(void)
+{
+    /* Set BCD mode */
+    cmos_write(RTC_REG_B, cmos_read(RTC_REG_B) & ~REG_B_DM);
+    cmos_write(RTC_REG_A, 0x76);
+    cmos_write(RTC_YEAR, 0x11);
+    cmos_write(RTC_CENTURY, 0x20);
+    cmos_write(RTC_MONTH, 0x02);
+    cmos_write(RTC_DAY_OF_MONTH, 0x02);
+    cmos_write(RTC_HOURS, 0x02);
+    cmos_write(RTC_MINUTES, 0x04);
+    cmos_write(RTC_SECONDS, 0x58);
+    cmos_write(RTC_REG_A, 0x26);
+
+    g_assert_cmpint(cmos_read(RTC_HOURS), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MINUTES), ==, 0x04);
+    g_assert_cmpint(cmos_read(RTC_SECONDS), >=, 0x58);
+    g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x11);
+    g_assert_cmpint(cmos_read(RTC_CENTURY), ==, 0x20);
+
+    /* Set a date in 2080 to ensure there is no year-2038 overflow.  */
+    cmos_write(RTC_REG_A, 0x76);
+    cmos_write(RTC_YEAR, 0x80);
+    cmos_write(RTC_REG_A, 0x26);
+
+    g_assert_cmpint(cmos_read(RTC_HOURS), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MINUTES), ==, 0x04);
+    g_assert_cmpint(cmos_read(RTC_SECONDS), >=, 0x58);
+    g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x80);
+    g_assert_cmpint(cmos_read(RTC_CENTURY), ==, 0x20);
+
+    cmos_write(RTC_REG_A, 0x76);
+    cmos_write(RTC_YEAR, 0x11);
+    cmos_write(RTC_REG_A, 0x26);
+
+    g_assert_cmpint(cmos_read(RTC_HOURS), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MINUTES), ==, 0x04);
+    g_assert_cmpint(cmos_read(RTC_SECONDS), >=, 0x58);
+    g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x11);
+    g_assert_cmpint(cmos_read(RTC_CENTURY), ==, 0x20);
+}
+
+static void set_year_1980(void)
+{
+    /* Set BCD mode */
+    cmos_write(RTC_REG_B, cmos_read(RTC_REG_B) & ~REG_B_DM);
+    cmos_write(RTC_REG_A, 0x76);
+    cmos_write(RTC_YEAR, 0x80);
+    cmos_write(RTC_CENTURY, 0x19);
+    cmos_write(RTC_MONTH, 0x02);
+    cmos_write(RTC_DAY_OF_MONTH, 0x02);
+    cmos_write(RTC_HOURS, 0x02);
+    cmos_write(RTC_MINUTES, 0x04);
+    cmos_write(RTC_SECONDS, 0x58);
+    cmos_write(RTC_REG_A, 0x26);
+
+    g_assert_cmpint(cmos_read(RTC_HOURS), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MINUTES), ==, 0x04);
+    g_assert_cmpint(cmos_read(RTC_SECONDS), >=, 0x58);
+    g_assert_cmpint(cmos_read(RTC_DAY_OF_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_MONTH), ==, 0x02);
+    g_assert_cmpint(cmos_read(RTC_YEAR), ==, 0x80);
+    g_assert_cmpint(cmos_read(RTC_CENTURY), ==, 0x19);
+}
+
 static void bcd_check_time(void)
 {
     /* Set BCD mode */
@@ -269,6 +340,8 @@
     qtest_add_func("/rtc/bcd/check-time", bcd_check_time);
     qtest_add_func("/rtc/dec/check-time", dec_check_time);
     qtest_add_func("/rtc/alarm-time", alarm_time);
+    qtest_add_func("/rtc/set-year/20xx", set_year_20xx);
+    qtest_add_func("/rtc/set-year/1980", set_year_1980);
     qtest_add_func("/rtc/fuzz-registers", fuzz_registers);
     ret = g_test_run();
 
diff --git a/trace-events b/trace-events
index 8fcbc50..42b66f1 100644
--- a/trace-events
+++ b/trace-events
@@ -74,10 +74,14 @@
 # block/stream.c
 stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
 stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p"
+commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d"
+commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p"
 
 # blockdev.c
 qmp_block_job_cancel(void *job) "job %p"
-block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
+qmp_block_job_pause(void *job) "job %p"
+qmp_block_job_resume(void *job) "job %p"
+block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d"
 qmp_block_stream(void *bs, void *job) "bs %p job %p"
 
 # hw/virtio-blk.c
@@ -243,9 +247,12 @@
 
 # hw/usb/hcd-ehci.c
 usb_ehci_reset(void) "=== RESET ==="
-usb_ehci_mmio_readl(uint32_t addr, const char *str, uint32_t val) "rd mmio %04x [%s] = %x"
-usb_ehci_mmio_writel(uint32_t addr, const char *str, uint32_t val) "wr mmio %04x [%s] = %x"
-usb_ehci_mmio_change(uint32_t addr, const char *str, uint32_t new, uint32_t old) "ch mmio %04x [%s] = %x (old: %x)"
+usb_ehci_opreg_read(uint32_t addr, const char *str, uint32_t val) "rd mmio %04x [%s] = %x"
+usb_ehci_opreg_write(uint32_t addr, const char *str, uint32_t val) "wr mmio %04x [%s] = %x"
+usb_ehci_opreg_change(uint32_t addr, const char *str, uint32_t new, uint32_t old) "ch mmio %04x [%s] = %x (old: %x)"
+usb_ehci_portsc_read(uint32_t addr, uint32_t port, uint32_t val) "rd mmio %04x [port %d] = %x"
+usb_ehci_portsc_write(uint32_t addr, uint32_t port, uint32_t val) "wr mmio %04x [port %d] = %x"
+usb_ehci_portsc_change(uint32_t addr, uint32_t port, uint32_t new, uint32_t old) "ch mmio %04x [port %d] = %x (old: %x)"
 usb_ehci_usbsts(const char *sts, int state) "usbsts %s %d"
 usb_ehci_state(const char *schedule, const char *state) "%s schedule %s"
 usb_ehci_qh_ptrs(void *q, uint32_t addr, uint32_t nxt, uint32_t c_qtd, uint32_t n_qtd, uint32_t a_qtd) "q %p - QH @ %08x: next %08x qtds %08x,%08x,%08x"
@@ -263,6 +270,9 @@
 usb_ehci_queue_action(void *q, const char *action) "q %p: %s"
 usb_ehci_packet_action(void *q, void *p, const char *action) "q %p p %p: %s"
 usb_ehci_irq(uint32_t level, uint32_t frindex, uint32_t sts, uint32_t mask) "level %d, frindex 0x%04x, sts 0x%x, mask 0x%x"
+usb_ehci_guest_bug(const char *reason) "%s"
+usb_ehci_doorbell_ring(void) ""
+usb_ehci_doorbell_ack(void) ""
 
 # hw/usb/hcd-uhci.c
 usb_uhci_reset(void) "=== RESET ==="
@@ -310,7 +320,10 @@
 usb_xhci_doorbell_write(uint32_t off, uint32_t val) "off 0x%04x, val 0x%08x"
 usb_xhci_irq_intx(uint32_t level) "level %d"
 usb_xhci_irq_msi(uint32_t nr) "nr %d"
-usb_xhci_queue_event(uint32_t idx, const char *name, uint64_t param, uint32_t status, uint32_t control) "idx %d, %s, p %016" PRIx64 ", s %08x, c 0x%08x"
+usb_xhci_irq_msix(uint32_t nr) "nr %d"
+usb_xhci_irq_msix_use(uint32_t nr) "nr %d"
+usb_xhci_irq_msix_unuse(uint32_t nr) "nr %d"
+usb_xhci_queue_event(uint32_t vector, uint32_t idx, const char *trb, const char *evt, uint64_t param, uint32_t status, uint32_t control) "v %d, idx %d, %s, %s, p %016" PRIx64 ", s %08x, c 0x%08x"
 usb_xhci_fetch_trb(uint64_t addr, const char *name, uint64_t param, uint32_t status, uint32_t control) "addr %016" PRIx64 ", %s, p %016" PRIx64 ", s %08x, c 0x%08x"
 usb_xhci_slot_enable(uint32_t slotid) "slotid %d"
 usb_xhci_slot_disable(uint32_t slotid) "slotid %d"
@@ -320,10 +333,11 @@
 usb_xhci_slot_reset(uint32_t slotid) "slotid %d"
 usb_xhci_ep_enable(uint32_t slotid, uint32_t epid) "slotid %d, epid %d"
 usb_xhci_ep_disable(uint32_t slotid, uint32_t epid) "slotid %d, epid %d"
+usb_xhci_ep_set_dequeue(uint32_t slotid, uint32_t epid, uint64_t param) "slotid %d, epid %d, ptr %016" PRIx64
 usb_xhci_ep_kick(uint32_t slotid, uint32_t epid) "slotid %d, epid %d"
 usb_xhci_ep_stop(uint32_t slotid, uint32_t epid) "slotid %d, epid %d"
 usb_xhci_ep_reset(uint32_t slotid, uint32_t epid) "slotid %d, epid %d"
-usb_xhci_xfer_start(void *xfer, uint32_t slotid, uint32_t epid, uint32_t length) "%p: slotid %d, epid %d, length %d"
+usb_xhci_xfer_start(void *xfer, uint32_t slotid, uint32_t epid) "%p: slotid %d, epid %d"
 usb_xhci_xfer_async(void *xfer) "%p"
 usb_xhci_xfer_nak(void *xfer) "%p"
 usb_xhci_xfer_retry(void *xfer) "%p"
@@ -336,6 +350,7 @@
 usb_desc_config(int addr, int index, int len, int ret) "dev %d query config %d, len %d, ret %d"
 usb_desc_other_speed_config(int addr, int index, int len, int ret) "dev %d query config %d, len %d, ret %d"
 usb_desc_string(int addr, int index, int len, int ret) "dev %d query string %d, len %d, ret %d"
+usb_desc_bos(int addr, int len, int ret) "dev %d bos, len %d, ret %d"
 usb_set_addr(int addr) "dev %d"
 usb_set_config(int addr, int config, int ret) "dev %d, config %d, ret %d"
 usb_set_interface(int addr, int iface, int alt, int ret) "dev %d, interface %d, altsetting %d, ret %d"
@@ -924,8 +939,9 @@
 qxl_interface_update_area_complete_overflow(int qid, int max) "%d max=%d"
 qxl_interface_update_area_complete_schedule_bh(int qid, uint32_t num_dirty) "%d #dirty=%d"
 qxl_io_destroy_primary_ignored(int qid, const char *mode) "%d %s"
+qxl_io_log(int qid, const uint8_t *log_buf) "%d %s"
 qxl_io_read_unexpected(int qid) "%d"
-qxl_io_unexpected_vga_mode(int qid, uint32_t io_port, const char *desc) "%d 0x%x (%s)"
+qxl_io_unexpected_vga_mode(int qid, uint64_t addr, uint64_t val, const char *desc) "%d 0x%"PRIx64"=%"PRIu64" (%s)"
 qxl_io_write(int qid, const char *mode, uint64_t addr, uint64_t val, unsigned size, int async) "%d %s addr=%"PRIu64 " val=%"PRIu64" size=%u async=%d"
 qxl_memslot_add_guest(int qid, uint32_t slot_id, uint64_t guest_start, uint64_t guest_end) "%d %u: guest phys 0x%"PRIx64 " - 0x%" PRIx64
 qxl_post_load(int qid, const char *mode) "%d %s"
@@ -956,7 +972,7 @@
 qxl_spice_destroy_surface_wait_complete(int qid, uint32_t id) "%d sid=%d"
 qxl_spice_destroy_surface_wait(int qid, uint32_t id, int async) "%d sid=%d async=%d"
 qxl_spice_flush_surfaces_async(int qid, uint32_t surface_count, uint32_t num_free_res) "%d s#=%d, res#=%d"
-qxl_spice_monitors_config(int id) "%d"
+qxl_spice_monitors_config(int qid) "%d"
 qxl_spice_loadvm_commands(int qid, void *ext, uint32_t count) "%d ext=%p count=%d"
 qxl_spice_oom(int qid) "%d"
 qxl_spice_reset_cursor(int qid) "%d"
@@ -965,6 +981,12 @@
 qxl_spice_update_area(int qid, uint32_t surface_id, uint32_t left, uint32_t right, uint32_t top, uint32_t bottom) "%d sid=%d [%d,%d,%d,%d]"
 qxl_spice_update_area_rest(int qid, uint32_t num_dirty_rects, uint32_t clear_dirty_region) "%d #d=%d clear=%d"
 qxl_surfaces_dirty(int qid, int surface, int offset, int size) "%d surface=%d offset=%d size=%d"
+qxl_send_events(int qid, uint32_t events) "%d %d"
+qxl_set_guest_bug(int qid) "%d"
+qxl_interrupt_client_monitors_config(int qid, int num_heads, void *heads) "%d %d %p"
+qxl_client_monitors_config_unsupported_by_guest(int qid, uint32_t int_mask, void *client_monitors_config) "%d %X %p"
+qxl_client_monitors_config_capped(int qid, int requested, int limit) "%d %d %d"
+qxl_client_monitors_config_crc(int qid, unsigned size, uint32_t crc32) "%d %u %u"
 
 # hw/qxl-render.c
 qxl_render_blit_guest_primary_initialized(void) ""
diff --git a/ui/spice-display.c b/ui/spice-display.c
index 99bc665..50fbefb 100644
--- a/ui/spice-display.c
+++ b/ui/spice-display.c
@@ -164,34 +164,31 @@
 #endif
 }
 
-static SimpleSpiceUpdate *qemu_spice_create_update(SimpleSpiceDisplay *ssd)
+static void qemu_spice_create_one_update(SimpleSpiceDisplay *ssd,
+                                         QXLRect *rect)
 {
     SimpleSpiceUpdate *update;
     QXLDrawable *drawable;
     QXLImage *image;
     QXLCommand *cmd;
-    uint8_t *src, *dst;
-    int by, bw, bh;
+    uint8_t *src, *mirror, *dst;
+    int by, bw, bh, offset, bytes;
     struct timespec time_space;
 
-    if (qemu_spice_rect_is_empty(&ssd->dirty)) {
-        return NULL;
-    };
-
     trace_qemu_spice_create_update(
-           ssd->dirty.left, ssd->dirty.right,
-           ssd->dirty.top, ssd->dirty.bottom);
+           rect->left, rect->right,
+           rect->top, rect->bottom);
 
     update   = g_malloc0(sizeof(*update));
     drawable = &update->drawable;
     image    = &update->image;
     cmd      = &update->ext.cmd;
 
-    bw       = ssd->dirty.right - ssd->dirty.left;
-    bh       = ssd->dirty.bottom - ssd->dirty.top;
+    bw       = rect->right - rect->left;
+    bh       = rect->bottom - rect->top;
     update->bitmap = g_malloc(bw * bh * 4);
 
-    drawable->bbox            = ssd->dirty;
+    drawable->bbox            = *rect;
     drawable->clip.type       = SPICE_CLIP_TYPE_NONE;
     drawable->effect          = QXL_EFFECT_OPAQUE;
     drawable->release_info.id = (uintptr_t)update;
@@ -219,31 +216,103 @@
     image->bitmap.palette = 0;
     image->bitmap.format = SPICE_BITMAP_FMT_32BIT;
 
-    if (ssd->conv == NULL) {
-        PixelFormat dst = qemu_default_pixelformat(32);
-        ssd->conv = qemu_pf_conv_get(&dst, &ssd->ds->surface->pf);
-        assert(ssd->conv);
-    }
-
-    src = ds_get_data(ssd->ds) +
-        ssd->dirty.top * ds_get_linesize(ssd->ds) +
-        ssd->dirty.left * ds_get_bytes_per_pixel(ssd->ds);
+    offset =
+        rect->top * ds_get_linesize(ssd->ds) +
+        rect->left * ds_get_bytes_per_pixel(ssd->ds);
+    bytes = ds_get_bytes_per_pixel(ssd->ds) * bw;
+    src = ds_get_data(ssd->ds) + offset;
+    mirror = ssd->ds_mirror + offset;
     dst = update->bitmap;
     for (by = 0; by < bh; by++) {
-        qemu_pf_conv_run(ssd->conv, dst, src, bw);
+        memcpy(mirror, src, bytes);
+        qemu_pf_conv_run(ssd->conv, dst, mirror, bw);
         src += ds_get_linesize(ssd->ds);
+        mirror += ds_get_linesize(ssd->ds);
         dst += image->bitmap.stride;
     }
 
     cmd->type = QXL_CMD_DRAW;
     cmd->data = (uintptr_t)drawable;
 
+    QTAILQ_INSERT_TAIL(&ssd->updates, update, next);
+}
+
+static void qemu_spice_create_update(SimpleSpiceDisplay *ssd)
+{
+    static const int blksize = 32;
+    int blocks = (ds_get_width(ssd->ds) + blksize - 1) / blksize;
+    int dirty_top[blocks];
+    int y, yoff, x, xoff, blk, bw;
+    int bpp = ds_get_bytes_per_pixel(ssd->ds);
+    uint8_t *guest, *mirror;
+
+    if (qemu_spice_rect_is_empty(&ssd->dirty)) {
+        return;
+    };
+
+    if (ssd->conv == NULL) {
+        PixelFormat dst = qemu_default_pixelformat(32);
+        ssd->conv = qemu_pf_conv_get(&dst, &ssd->ds->surface->pf);
+        assert(ssd->conv);
+    }
+    if (ssd->ds_mirror == NULL) {
+        int size = ds_get_height(ssd->ds) * ds_get_linesize(ssd->ds);
+        ssd->ds_mirror = g_malloc0(size);
+    }
+
+    for (blk = 0; blk < blocks; blk++) {
+        dirty_top[blk] = -1;
+    }
+
+    guest = ds_get_data(ssd->ds);
+    mirror = ssd->ds_mirror;
+    for (y = ssd->dirty.top; y < ssd->dirty.bottom; y++) {
+        yoff = y * ds_get_linesize(ssd->ds);
+        for (x = ssd->dirty.left; x < ssd->dirty.right; x += blksize) {
+            xoff = x * bpp;
+            blk = x / blksize;
+            bw = MIN(blksize, ssd->dirty.right - x);
+            if (memcmp(guest + yoff + xoff,
+                       mirror + yoff + xoff,
+                       bw * bpp) == 0) {
+                if (dirty_top[blk] != -1) {
+                    QXLRect update = {
+                        .top    = dirty_top[blk],
+                        .bottom = y,
+                        .left   = x,
+                        .right  = x + bw,
+                    };
+                    qemu_spice_create_one_update(ssd, &update);
+                    dirty_top[blk] = -1;
+                }
+            } else {
+                if (dirty_top[blk] == -1) {
+                    dirty_top[blk] = y;
+                }
+            }
+        }
+    }
+
+    for (x = ssd->dirty.left; x < ssd->dirty.right; x += blksize) {
+        blk = x / blksize;
+        bw = MIN(blksize, ssd->dirty.right - x);
+        if (dirty_top[blk] != -1) {
+            QXLRect update = {
+                .top    = dirty_top[blk],
+                .bottom = ssd->dirty.bottom,
+                .left   = x,
+                .right  = x + bw,
+            };
+            qemu_spice_create_one_update(ssd, &update);
+            dirty_top[blk] = -1;
+        }
+    }
+
     memset(&ssd->dirty, 0, sizeof(ssd->dirty));
-    return update;
 }
 
 /*
- * Called from spice server thread context (via interface_release_ressource)
+ * Called from spice server thread context (via interface_release_resource)
  * We do *not* hold the global qemu mutex here, so extra care is needed
  * when calling qemu functions.  QEMU interfaces used:
  *    - g_free (underlying glibc free is re-entrant).
@@ -315,6 +384,7 @@
 {
     ssd->ds = ds;
     qemu_mutex_init(&ssd->lock);
+    QTAILQ_INIT(&ssd->updates);
     ssd->mouse_x = -1;
     ssd->mouse_y = -1;
     if (ssd->num_surfaces == 0) {
@@ -345,16 +415,20 @@
 
 void qemu_spice_display_resize(SimpleSpiceDisplay *ssd)
 {
+    SimpleSpiceUpdate *update;
+
     dprint(1, "%s:\n", __FUNCTION__);
 
     memset(&ssd->dirty, 0, sizeof(ssd->dirty));
     qemu_pf_conv_put(ssd->conv);
     ssd->conv = NULL;
+    g_free(ssd->ds_mirror);
+    ssd->ds_mirror = NULL;
 
     qemu_mutex_lock(&ssd->lock);
-    if (ssd->update != NULL) {
-        qemu_spice_destroy_update(ssd, ssd->update);
-        ssd->update = NULL;
+    while ((update = QTAILQ_FIRST(&ssd->updates)) != NULL) {
+        QTAILQ_REMOVE(&ssd->updates, update, next);
+        qemu_spice_destroy_update(ssd, update);
     }
     qemu_mutex_unlock(&ssd->lock);
     qemu_spice_destroy_host_primary(ssd);
@@ -384,8 +458,8 @@
     vga_hw_update();
 
     qemu_mutex_lock(&ssd->lock);
-    if (ssd->update == NULL) {
-        ssd->update = qemu_spice_create_update(ssd);
+    if (QTAILQ_EMPTY(&ssd->updates)) {
+        qemu_spice_create_update(ssd);
         ssd->notify++;
     }
     qemu_spice_cursor_refresh_unlocked(ssd);
@@ -442,9 +516,9 @@
     dprint(3, "%s:\n", __FUNCTION__);
 
     qemu_mutex_lock(&ssd->lock);
-    if (ssd->update != NULL) {
-        update = ssd->update;
-        ssd->update = NULL;
+    update = QTAILQ_FIRST(&ssd->updates);
+    if (update != NULL) {
+        QTAILQ_REMOVE(&ssd->updates, update, next);
         *ext = update->ext;
         ret = true;
     }
diff --git a/ui/spice-display.h b/ui/spice-display.h
index 512ab78..dea41c1 100644
--- a/ui/spice-display.h
+++ b/ui/spice-display.h
@@ -72,6 +72,7 @@
 
 struct SimpleSpiceDisplay {
     DisplayState *ds;
+    uint8_t *ds_mirror;
     void *buf;
     int bufsize;
     QXLWorker *worker;
@@ -92,7 +93,7 @@
      * to them must be protected by the lock.
      */
     QemuMutex lock;
-    SimpleSpiceUpdate *update;
+    QTAILQ_HEAD(, SimpleSpiceUpdate) updates;
     QEMUCursor *cursor;
     int mouse_x, mouse_y;
 };
@@ -102,6 +103,7 @@
     QXLImage image;
     QXLCommandExt ext;
     uint8_t *bitmap;
+    QTAILQ_ENTRY(SimpleSpiceUpdate) next;
 };
 
 int qemu_spice_rect_is_empty(const QXLRect* r);
diff --git a/ui/vnc-auth-sasl.c b/ui/vnc-auth-sasl.c
index 8fba770..f3ad75d 100644
--- a/ui/vnc-auth-sasl.c
+++ b/ui/vnc-auth-sasl.c
@@ -432,9 +432,7 @@
 
 static int protocol_client_auth_sasl_mechname(VncState *vs, uint8_t *data, size_t len)
 {
-    char *mechname = g_malloc(len + 1);
-    strncpy(mechname, (char*)data, len);
-    mechname[len] = '\0';
+    char *mechname = g_strndup((const char *) data, len);
     VNC_DEBUG("Got client mechname '%s' check against '%s'\n",
               mechname, vs->sasl.mechlist);
 
@@ -619,7 +617,6 @@
 
  authabort:
     vnc_client_error(vs);
-    return;
 }
 
 
diff --git a/ui/vnc-tls.c b/ui/vnc-tls.c
index 3aaa939..a7f7d07 100644
--- a/ui/vnc-tls.c
+++ b/ui/vnc-tls.c
@@ -49,7 +49,7 @@
     if (gnutls_global_init () < 0)
         return 0;
 
-    /* XXX ought to re-generate diffie-hellmen params periodically */
+    /* XXX ought to re-generate diffie-hellman params periodically */
     if (gnutls_dh_params_init (&dh_params) < 0)
         return 0;
     if (gnutls_dh_params_generate2 (dh_params, DH_BITS) < 0)
diff --git a/ui/vnc.c b/ui/vnc.c
index 385e345..01b2daf 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -3061,7 +3061,7 @@
         if (strncmp(display, "unix:", 5) == 0)
             vs->lsock = unix_connect(display+5);
         else
-            vs->lsock = inet_connect(display, true, NULL, NULL);
+            vs->lsock = inet_connect(display, NULL);
         if (-1 == vs->lsock) {
             g_free(vs->display);
             vs->display = NULL;
diff --git a/uri.c b/uri.c
new file mode 100644
index 0000000..dd922de
--- /dev/null
+++ b/uri.c
@@ -0,0 +1,2249 @@
+/**
+ * uri.c: set of generic URI related routines
+ *
+ * Reference: RFCs 3986, 2732 and 2373
+ *
+ * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Daniel Veillard shall not
+ * be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization from him.
+ *
+ * daniel@veillard.com
+ *
+ **
+ *
+ * Copyright (C) 2007, 2009-2010 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ * Authors:
+ *    Richard W.M. Jones <rjones@redhat.com>
+ *
+ */
+
+#include <glib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "uri.h"
+
+static void uri_clean(URI *uri);
+
+/*
+ * Old rule from 2396 used in legacy handling code
+ * alpha    = lowalpha | upalpha
+ */
+#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
+
+
+/*
+ * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
+ *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
+ *            "u" | "v" | "w" | "x" | "y" | "z"
+ */
+
+#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
+
+/*
+ * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
+ *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
+ *           "U" | "V" | "W" | "X" | "Y" | "Z"
+ */
+#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
+
+#ifdef IS_DIGIT
+#undef IS_DIGIT
+#endif
+/*
+ * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+ */
+#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
+
+/*
+ * alphanum = alpha | digit
+ */
+
+#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
+
+/*
+ * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+ */
+
+#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
+    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
+    ((x) == '(') || ((x) == ')'))
+
+/*
+ * unwise = "{" | "}" | "|" | "\" | "^" | "`"
+ */
+
+#define IS_UNWISE(p)                                                    \
+      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
+       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
+       ((*(p) == ']')) || ((*(p) == '`')))
+/*
+ * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
+ *            "[" | "]"
+ */
+
+#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
+        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
+        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
+        ((x) == ']'))
+
+/*
+ * unreserved = alphanum | mark
+ */
+
+#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
+
+/*
+ * Skip to next pointer char, handle escaped sequences
+ */
+
+#define NEXT(p) ((*p == '%')? p += 3 : p++)
+
+/*
+ * Productions from the spec.
+ *
+ *    authority     = server | reg_name
+ *    reg_name      = 1*( unreserved | escaped | "$" | "," |
+ *                        ";" | ":" | "@" | "&" | "=" | "+" )
+ *
+ * path          = [ abs_path | opaque_part ]
+ */
+
+
+/************************************************************************
+ *									*
+ *                         RFC 3986 parser				*
+ *									*
+ ************************************************************************/
+
+#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
+#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
+                      ((*(p) >= 'A') && (*(p) <= 'Z')))
+#define ISA_HEXDIG(p)							\
+       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
+        ((*(p) >= 'A') && (*(p) <= 'F')))
+
+/*
+ *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+ *                     / "*" / "+" / "," / ";" / "="
+ */
+#define ISA_SUB_DELIM(p)						\
+      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
+       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
+       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
+       ((*(p) == '=')) || ((*(p) == '\'')))
+
+/*
+ *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ */
+#define ISA_GEN_DELIM(p)						\
+      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
+       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
+       ((*(p) == '@')))
+
+/*
+ *    reserved      = gen-delims / sub-delims
+ */
+#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
+
+/*
+ *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ */
+#define ISA_UNRESERVED(p)						\
+      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
+       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
+
+/*
+ *    pct-encoded   = "%" HEXDIG HEXDIG
+ */
+#define ISA_PCT_ENCODED(p)						\
+     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
+
+/*
+ *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
+ */
+#define ISA_PCHAR(p)							\
+     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
+      ((*(p) == ':')) || ((*(p) == '@')))
+
+/**
+ * rfc3986_parse_scheme:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse an URI scheme
+ *
+ * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_scheme(URI *uri, const char **str) {
+    const char *cur;
+
+    if (str == NULL)
+	return(-1);
+
+    cur = *str;
+    if (!ISA_ALPHA(cur))
+	return(2);
+    cur++;
+    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
+           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
+    if (uri != NULL) {
+	if (uri->scheme != NULL) g_free(uri->scheme);
+	uri->scheme = g_strndup(*str, cur - *str);
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_fragment:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * fragment      = *( pchar / "/" / "?" )
+ * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
+ *       in the fragment identifier but this is used very broadly for
+ *       xpointer scheme selection, so we are allowing it here to not break
+ *       for example all the DocBook processing chains.
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_fragment(URI *uri, const char **str)
+{
+    const char *cur;
+
+    if (str == NULL)
+        return (-1);
+
+    cur = *str;
+
+    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+           (*cur == '[') || (*cur == ']') ||
+           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+        NEXT(cur);
+    if (uri != NULL) {
+        if (uri->fragment != NULL)
+            g_free(uri->fragment);
+	if (uri->cleanup & 2)
+	    uri->fragment = g_strndup(*str, cur - *str);
+	else
+	    uri->fragment = uri_string_unescape(*str, cur - *str, NULL);
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_query:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * query = *uric
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_query(URI *uri, const char **str)
+{
+    const char *cur;
+
+    if (str == NULL)
+        return (-1);
+
+    cur = *str;
+
+    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+        NEXT(cur);
+    if (uri != NULL) {
+	if (uri->query != NULL)
+	    g_free (uri->query);
+	uri->query = g_strndup (*str, cur - *str);
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_port:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse a port  part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * port          = *DIGIT
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_port(URI *uri, const char **str)
+{
+    const char *cur = *str;
+
+    if (ISA_DIGIT(cur)) {
+	if (uri != NULL)
+	    uri->port = 0;
+	while (ISA_DIGIT(cur)) {
+	    if (uri != NULL)
+		uri->port = uri->port * 10 + (*cur - '0');
+	    cur++;
+	}
+	*str = cur;
+	return(0);
+    }
+    return(1);
+}
+
+/**
+ * rfc3986_parse_user_info:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an user informations part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_user_info(URI *uri, const char **str)
+{
+    const char *cur;
+
+    cur = *str;
+    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
+           ISA_SUB_DELIM(cur) || (*cur == ':'))
+	NEXT(cur);
+    if (*cur == '@') {
+	if (uri != NULL) {
+	    if (uri->user != NULL) g_free(uri->user);
+	    if (uri->cleanup & 2)
+		uri->user = g_strndup(*str, cur - *str);
+	    else
+		uri->user = uri_string_unescape(*str, cur - *str, NULL);
+	}
+	*str = cur;
+	return(0);
+    }
+    return(1);
+}
+
+/**
+ * rfc3986_parse_dec_octet:
+ * @str:  the string to analyze
+ *
+ *    dec-octet     = DIGIT                 ; 0-9
+ *                  / %x31-39 DIGIT         ; 10-99
+ *                  / "1" 2DIGIT            ; 100-199
+ *                  / "2" %x30-34 DIGIT     ; 200-249
+ *                  / "25" %x30-35          ; 250-255
+ *
+ * Skip a dec-octet.
+ *
+ * Returns 0 if found and skipped, 1 otherwise
+ */
+static int
+rfc3986_parse_dec_octet(const char **str) {
+    const char *cur = *str;
+
+    if (!(ISA_DIGIT(cur)))
+        return(1);
+    if (!ISA_DIGIT(cur+1))
+	cur++;
+    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
+	cur += 2;
+    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
+	cur += 3;
+    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
+	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
+	cur += 3;
+    else if ((*cur == '2') && (*(cur + 1) == '5') &&
+	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
+	cur += 3;
+    else
+        return(1);
+    *str = cur;
+    return(0);
+}
+/**
+ * rfc3986_parse_host:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an host part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * host          = IP-literal / IPv4address / reg-name
+ * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
+ * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
+ * reg-name      = *( unreserved / pct-encoded / sub-delims )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_host(URI *uri, const char **str)
+{
+    const char *cur = *str;
+    const char *host;
+
+    host = cur;
+    /*
+     * IPv6 and future adressing scheme are enclosed between brackets
+     */
+    if (*cur == '[') {
+        cur++;
+	while ((*cur != ']') && (*cur != 0))
+	    cur++;
+	if (*cur != ']')
+	    return(1);
+	cur++;
+	goto found;
+    }
+    /*
+     * try to parse an IPv4
+     */
+    if (ISA_DIGIT(cur)) {
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+	cur++;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	goto found;
+not_ipv4:
+        cur = *str;
+    }
+    /*
+     * then this should be a hostname which can be empty
+     */
+    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
+        NEXT(cur);
+found:
+    if (uri != NULL) {
+	if (uri->authority != NULL) g_free(uri->authority);
+	uri->authority = NULL;
+	if (uri->server != NULL) g_free(uri->server);
+	if (cur != host) {
+	    if (uri->cleanup & 2)
+		uri->server = g_strndup(host, cur - host);
+	    else
+		uri->server = uri_string_unescape(host, cur - host, NULL);
+	} else
+	    uri->server = NULL;
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_authority:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an authority part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * authority     = [ userinfo "@" ] host [ ":" port ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_authority(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+    /*
+     * try to parse an userinfo and check for the trailing @
+     */
+    ret = rfc3986_parse_user_info(uri, &cur);
+    if ((ret != 0) || (*cur != '@'))
+        cur = *str;
+    else
+        cur++;
+    ret = rfc3986_parse_host(uri, &cur);
+    if (ret != 0) return(ret);
+    if (*cur == ':') {
+        cur++;
+        ret = rfc3986_parse_port(uri, &cur);
+	if (ret != 0) return(ret);
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_segment:
+ * @str:  the string to analyze
+ * @forbid: an optional forbidden character
+ * @empty: allow an empty segment
+ *
+ * Parse a segment and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * segment       = *pchar
+ * segment-nz    = 1*pchar
+ * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ *               ; non-zero-length segment without any colon ":"
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_segment(const char **str, char forbid, int empty)
+{
+    const char *cur;
+
+    cur = *str;
+    if (!ISA_PCHAR(cur)) {
+        if (empty)
+	    return(0);
+	return(1);
+    }
+    while (ISA_PCHAR(cur) && (*cur != forbid))
+        NEXT(cur);
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_ab_empty:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path absolute or empty and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-abempty  = *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_ab_empty(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (*str != cur) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_absolute:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path absolute and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_absolute(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    if (*cur != '/')
+        return(1);
+    cur++;
+    ret = rfc3986_parse_segment(&cur, 0, 0);
+    if (ret == 0) {
+	while (*cur == '/') {
+	    cur++;
+	    ret = rfc3986_parse_segment(&cur, 0, 1);
+	    if (ret != 0) return(ret);
+	}
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_rootless:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path without root and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-rootless = segment-nz *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_rootless(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    ret = rfc3986_parse_segment(&cur, 0, 0);
+    if (ret != 0) return(ret);
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_no_scheme:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path which is not a scheme and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-noscheme = segment-nz-nc *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_no_scheme(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    ret = rfc3986_parse_segment(&cur, ':', 0);
+    if (ret != 0) return(ret);
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+	if (uri->path != NULL) g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_hier_part:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an hierarchical part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * hier-part     = "//" authority path-abempty
+ *                / path-absolute
+ *                / path-rootless
+ *                / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_hier_part(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    if ((*cur == '/') && (*(cur + 1) == '/')) {
+        cur += 2;
+	ret = rfc3986_parse_authority(uri, &cur);
+	if (ret != 0) return(ret);
+	ret = rfc3986_parse_path_ab_empty(uri, &cur);
+	if (ret != 0) return(ret);
+	*str = cur;
+	return(0);
+    } else if (*cur == '/') {
+        ret = rfc3986_parse_path_absolute(uri, &cur);
+	if (ret != 0) return(ret);
+    } else if (ISA_PCHAR(cur)) {
+        ret = rfc3986_parse_path_rootless(uri, &cur);
+	if (ret != 0) return(ret);
+    } else {
+	/* path-empty is effectively empty */
+	if (uri != NULL) {
+	    if (uri->path != NULL) g_free(uri->path);
+	    uri->path = NULL;
+	}
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_relative_ref:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
+ * relative-part = "//" authority path-abempty
+ *               / path-absolute
+ *               / path-noscheme
+ *               / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_relative_ref(URI *uri, const char *str) {
+    int ret;
+
+    if ((*str == '/') && (*(str + 1) == '/')) {
+        str += 2;
+	ret = rfc3986_parse_authority(uri, &str);
+	if (ret != 0) return(ret);
+	ret = rfc3986_parse_path_ab_empty(uri, &str);
+	if (ret != 0) return(ret);
+    } else if (*str == '/') {
+	ret = rfc3986_parse_path_absolute(uri, &str);
+	if (ret != 0) return(ret);
+    } else if (ISA_PCHAR(str)) {
+        ret = rfc3986_parse_path_no_scheme(uri, &str);
+	if (ret != 0) return(ret);
+    } else {
+	/* path-empty is effectively empty */
+	if (uri != NULL) {
+	    if (uri->path != NULL) g_free(uri->path);
+	    uri->path = NULL;
+	}
+    }
+
+    if (*str == '?') {
+	str++;
+	ret = rfc3986_parse_query(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str == '#') {
+	str++;
+	ret = rfc3986_parse_fragment(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str != 0) {
+	uri_clean(uri);
+	return(1);
+    }
+    return(0);
+}
+
+
+/**
+ * rfc3986_parse:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse(URI *uri, const char *str) {
+    int ret;
+
+    ret = rfc3986_parse_scheme(uri, &str);
+    if (ret != 0) return(ret);
+    if (*str != ':') {
+	return(1);
+    }
+    str++;
+    ret = rfc3986_parse_hier_part(uri, &str);
+    if (ret != 0) return(ret);
+    if (*str == '?') {
+	str++;
+	ret = rfc3986_parse_query(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str == '#') {
+	str++;
+	ret = rfc3986_parse_fragment(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str != 0) {
+	uri_clean(uri);
+	return(1);
+    }
+    return(0);
+}
+
+/**
+ * rfc3986_parse_uri_reference:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI reference string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_uri_reference(URI *uri, const char *str) {
+    int ret;
+
+    if (str == NULL)
+	return(-1);
+    uri_clean(uri);
+
+    /*
+     * Try first to parse absolute refs, then fallback to relative if
+     * it fails.
+     */
+    ret = rfc3986_parse(uri, str);
+    if (ret != 0) {
+	uri_clean(uri);
+        ret = rfc3986_parse_relative_ref(uri, str);
+	if (ret != 0) {
+	    uri_clean(uri);
+	    return(ret);
+	}
+    }
+    return(0);
+}
+
+/**
+ * uri_parse:
+ * @str:  the URI string to analyze
+ *
+ * Parse an URI based on RFC 3986
+ *
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse(const char *str) {
+    URI *uri;
+    int ret;
+
+    if (str == NULL)
+	return(NULL);
+    uri = uri_new();
+    if (uri != NULL) {
+	ret = rfc3986_parse_uri_reference(uri, str);
+        if (ret) {
+	    uri_free(uri);
+	    return(NULL);
+	}
+    }
+    return(uri);
+}
+
+/**
+ * uri_parse_into:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI reference string based on RFC 3986 and fills in the
+ * appropriate fields of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+int
+uri_parse_into(URI *uri, const char *str) {
+    return(rfc3986_parse_uri_reference(uri, str));
+}
+
+/**
+ * uri_parse_raw:
+ * @str:  the URI string to analyze
+ * @raw:  if 1 unescaping of URI pieces are disabled
+ *
+ * Parse an URI but allows to keep intact the original fragments.
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse_raw(const char *str, int raw) {
+    URI *uri;
+    int ret;
+
+    if (str == NULL)
+	return(NULL);
+    uri = uri_new();
+    if (uri != NULL) {
+        if (raw) {
+	    uri->cleanup |= 2;
+	}
+	ret = uri_parse_into(uri, str);
+        if (ret) {
+	    uri_free(uri);
+	    return(NULL);
+	}
+    }
+    return(uri);
+}
+
+/************************************************************************
+ *									*
+ *			Generic URI structure functions			*
+ *									*
+ ************************************************************************/
+
+/**
+ * uri_new:
+ *
+ * Simply creates an empty URI
+ *
+ * Returns the new structure or NULL in case of error
+ */
+URI *
+uri_new(void) {
+    URI *ret;
+
+    ret = (URI *) g_malloc(sizeof(URI));
+    memset(ret, 0, sizeof(URI));
+    return(ret);
+}
+
+/**
+ * realloc2n:
+ *
+ * Function to handle properly a reallocation when saving an URI
+ * Also imposes some limit on the length of an URI string output
+ */
+static char *
+realloc2n(char *ret, int *max) {
+    char *temp;
+    int tmp;
+
+    tmp = *max * 2;
+    temp = g_realloc(ret, (tmp + 1));
+    *max = tmp;
+    return(temp);
+}
+
+/**
+ * uri_to_string:
+ * @uri:  pointer to an URI
+ *
+ * Save the URI as an escaped string
+ *
+ * Returns a new string (to be deallocated by caller)
+ */
+char *
+uri_to_string(URI *uri) {
+    char *ret = NULL;
+    char *temp;
+    const char *p;
+    int len;
+    int max;
+
+    if (uri == NULL) return(NULL);
+
+
+    max = 80;
+    ret = g_malloc(max + 1);
+    len = 0;
+
+    if (uri->scheme != NULL) {
+	p = uri->scheme;
+	while (*p != 0) {
+	    if (len >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+		ret = temp;
+	    }
+	    ret[len++] = *p++;
+	}
+	if (len >= max) {
+            temp = realloc2n(ret, &max);
+            if (temp == NULL) goto mem_error;
+            ret = temp;
+	}
+	ret[len++] = ':';
+    }
+    if (uri->opaque != NULL) {
+	p = uri->opaque;
+	while (*p != 0) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
+		ret[len++] = *p++;
+	    else {
+		int val = *(unsigned char *)p++;
+		int hi = val / 0x10, lo = val % 0x10;
+		ret[len++] = '%';
+		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+	    }
+	}
+    } else {
+	if (uri->server != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	    if (uri->user != NULL) {
+		p = uri->user;
+		while (*p != 0) {
+		    if (len + 3 >= max) {
+                        temp = realloc2n(ret, &max);
+                        if (temp == NULL) goto mem_error;
+                        ret = temp;
+		    }
+		    if ((IS_UNRESERVED(*(p))) ||
+			((*(p) == ';')) || ((*(p) == ':')) ||
+			((*(p) == '&')) || ((*(p) == '=')) ||
+			((*(p) == '+')) || ((*(p) == '$')) ||
+			((*(p) == ',')))
+			ret[len++] = *p++;
+		    else {
+			int val = *(unsigned char *)p++;
+			int hi = val / 0x10, lo = val % 0x10;
+			ret[len++] = '%';
+			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		    }
+		}
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = '@';
+	    }
+	    p = uri->server;
+	    while (*p != 0) {
+		if (len >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+	    }
+	    if (uri->port > 0) {
+		if (len + 10 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		len += snprintf(&ret[len], max - len, ":%d", uri->port);
+	    }
+	} else if (uri->authority != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	    p = uri->authority;
+	    while (*p != 0) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		if ((IS_UNRESERVED(*(p))) ||
+                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
+                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+                    ((*(p) == '=')) || ((*(p) == '+')))
+		    ret[len++] = *p++;
+		else {
+		    int val = *(unsigned char *)p++;
+		    int hi = val / 0x10, lo = val % 0x10;
+		    ret[len++] = '%';
+		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		}
+	    }
+	} else if (uri->scheme != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	}
+	if (uri->path != NULL) {
+	    p = uri->path;
+	    /*
+	     * the colon in file:///d: should not be escaped or
+	     * Windows accesses fail later.
+	     */
+	    if ((uri->scheme != NULL) &&
+		(p[0] == '/') &&
+		(((p[1] >= 'a') && (p[1] <= 'z')) ||
+		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
+		(p[2] == ':') &&
+	        (!strcmp(uri->scheme, "file"))) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+		ret[len++] = *p++;
+		ret[len++] = *p++;
+	    }
+	    while (*p != 0) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
+                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
+	            ((*(p) == ',')))
+		    ret[len++] = *p++;
+		else {
+		    int val = *(unsigned char *)p++;
+		    int hi = val / 0x10, lo = val % 0x10;
+		    ret[len++] = '%';
+		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		}
+	    }
+	}
+	if (uri->query != NULL) {
+	    if (len + 1 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    ret[len++] = '?';
+	    p = uri->query;
+	    while (*p != 0) {
+		if (len + 1 >= max) {
+                    temp = realloc2n(ret, &max);
+                    if (temp == NULL) goto mem_error;
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+	    }
+	}
+    }
+    if (uri->fragment != NULL) {
+	if (len + 3 >= max) {
+            temp = realloc2n(ret, &max);
+            if (temp == NULL) goto mem_error;
+            ret = temp;
+	}
+	ret[len++] = '#';
+	p = uri->fragment;
+	while (*p != 0) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                if (temp == NULL) goto mem_error;
+                ret = temp;
+	    }
+	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
+		ret[len++] = *p++;
+	    else {
+		int val = *(unsigned char *)p++;
+		int hi = val / 0x10, lo = val % 0x10;
+		ret[len++] = '%';
+		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+	    }
+	}
+    }
+    if (len >= max) {
+        temp = realloc2n(ret, &max);
+        if (temp == NULL) goto mem_error;
+        ret = temp;
+    }
+    ret[len] = 0;
+    return(ret);
+
+mem_error:
+    g_free(ret);
+    return(NULL);
+}
+
+/**
+ * uri_clean:
+ * @uri:  pointer to an URI
+ *
+ * Make sure the URI struct is free of content
+ */
+static void
+uri_clean(URI *uri) {
+    if (uri == NULL) return;
+
+    if (uri->scheme != NULL) g_free(uri->scheme);
+    uri->scheme = NULL;
+    if (uri->server != NULL) g_free(uri->server);
+    uri->server = NULL;
+    if (uri->user != NULL) g_free(uri->user);
+    uri->user = NULL;
+    if (uri->path != NULL) g_free(uri->path);
+    uri->path = NULL;
+    if (uri->fragment != NULL) g_free(uri->fragment);
+    uri->fragment = NULL;
+    if (uri->opaque != NULL) g_free(uri->opaque);
+    uri->opaque = NULL;
+    if (uri->authority != NULL) g_free(uri->authority);
+    uri->authority = NULL;
+    if (uri->query != NULL) g_free(uri->query);
+    uri->query = NULL;
+}
+
+/**
+ * uri_free:
+ * @uri:  pointer to an URI
+ *
+ * Free up the URI struct
+ */
+void
+uri_free(URI *uri) {
+    uri_clean(uri);
+    g_free(uri);
+}
+
+/************************************************************************
+ *									*
+ *			Helper functions				*
+ *									*
+ ************************************************************************/
+
+/**
+ * normalize_uri_path:
+ * @path:  pointer to the path string
+ *
+ * Applies the 5 normalization steps to a path string--that is, RFC 2396
+ * Section 5.2, steps 6.c through 6.g.
+ *
+ * Normalization occurs directly on the string, no new allocation is done
+ *
+ * Returns 0 or an error code
+ */
+static int
+normalize_uri_path(char *path) {
+    char *cur, *out;
+
+    if (path == NULL)
+	return(-1);
+
+    /* Skip all initial "/" chars.  We want to get to the beginning of the
+     * first non-empty segment.
+     */
+    cur = path;
+    while (cur[0] == '/')
+      ++cur;
+    if (cur[0] == '\0')
+      return(0);
+
+    /* Keep everything we've seen so far.  */
+    out = cur;
+
+    /*
+     * Analyze each segment in sequence for cases (c) and (d).
+     */
+    while (cur[0] != '\0') {
+	/*
+	 * c) All occurrences of "./", where "." is a complete path segment,
+	 *    are removed from the buffer string.
+	 */
+	if ((cur[0] == '.') && (cur[1] == '/')) {
+	    cur += 2;
+	    /* '//' normalization should be done at this point too */
+	    while (cur[0] == '/')
+		cur++;
+	    continue;
+	}
+
+	/*
+	 * d) If the buffer string ends with "." as a complete path segment,
+	 *    that "." is removed.
+	 */
+	if ((cur[0] == '.') && (cur[1] == '\0'))
+	    break;
+
+	/* Otherwise keep the segment.  */
+	while (cur[0] != '/') {
+            if (cur[0] == '\0')
+              goto done_cd;
+	    (out++)[0] = (cur++)[0];
+	}
+	/* nomalize // */
+	while ((cur[0] == '/') && (cur[1] == '/'))
+	    cur++;
+
+        (out++)[0] = (cur++)[0];
+    }
+ done_cd:
+    out[0] = '\0';
+
+    /* Reset to the beginning of the first segment for the next sequence.  */
+    cur = path;
+    while (cur[0] == '/')
+      ++cur;
+    if (cur[0] == '\0')
+	return(0);
+
+    /*
+     * Analyze each segment in sequence for cases (e) and (f).
+     *
+     * e) All occurrences of "<segment>/../", where <segment> is a
+     *    complete path segment not equal to "..", are removed from the
+     *    buffer string.  Removal of these path segments is performed
+     *    iteratively, removing the leftmost matching pattern on each
+     *    iteration, until no matching pattern remains.
+     *
+     * f) If the buffer string ends with "<segment>/..", where <segment>
+     *    is a complete path segment not equal to "..", that
+     *    "<segment>/.." is removed.
+     *
+     * To satisfy the "iterative" clause in (e), we need to collapse the
+     * string every time we find something that needs to be removed.  Thus,
+     * we don't need to keep two pointers into the string: we only need a
+     * "current position" pointer.
+     */
+    while (1) {
+        char *segp, *tmp;
+
+        /* At the beginning of each iteration of this loop, "cur" points to
+         * the first character of the segment we want to examine.
+         */
+
+        /* Find the end of the current segment.  */
+        segp = cur;
+        while ((segp[0] != '/') && (segp[0] != '\0'))
+          ++segp;
+
+        /* If this is the last segment, we're done (we need at least two
+         * segments to meet the criteria for the (e) and (f) cases).
+         */
+        if (segp[0] == '\0')
+          break;
+
+        /* If the first segment is "..", or if the next segment _isn't_ "..",
+         * keep this segment and try the next one.
+         */
+        ++segp;
+        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
+            || ((segp[0] != '.') || (segp[1] != '.')
+                || ((segp[2] != '/') && (segp[2] != '\0')))) {
+          cur = segp;
+          continue;
+        }
+
+        /* If we get here, remove this segment and the next one and back up
+         * to the previous segment (if there is one), to implement the
+         * "iteratively" clause.  It's pretty much impossible to back up
+         * while maintaining two pointers into the buffer, so just compact
+         * the whole buffer now.
+         */
+
+        /* If this is the end of the buffer, we're done.  */
+        if (segp[2] == '\0') {
+          cur[0] = '\0';
+          break;
+        }
+        /* Valgrind complained, strcpy(cur, segp + 3); */
+        /* string will overlap, do not use strcpy */
+        tmp = cur;
+        segp += 3;
+        while ((*tmp++ = *segp++) != 0)
+          ;
+
+        /* If there are no previous segments, then keep going from here.  */
+        segp = cur;
+        while ((segp > path) && ((--segp)[0] == '/'))
+          ;
+        if (segp == path)
+          continue;
+
+        /* "segp" is pointing to the end of a previous segment; find it's
+         * start.  We need to back up to the previous segment and start
+         * over with that to handle things like "foo/bar/../..".  If we
+         * don't do this, then on the first pass we'll remove the "bar/..",
+         * but be pointing at the second ".." so we won't realize we can also
+         * remove the "foo/..".
+         */
+        cur = segp;
+        while ((cur > path) && (cur[-1] != '/'))
+          --cur;
+    }
+    out[0] = '\0';
+
+    /*
+     * g) If the resulting buffer string still begins with one or more
+     *    complete path segments of "..", then the reference is
+     *    considered to be in error. Implementations may handle this
+     *    error by retaining these components in the resolved path (i.e.,
+     *    treating them as part of the final URI), by removing them from
+     *    the resolved path (i.e., discarding relative levels above the
+     *    root), or by avoiding traversal of the reference.
+     *
+     * We discard them from the final path.
+     */
+    if (path[0] == '/') {
+      cur = path;
+      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
+             && ((cur[3] == '/') || (cur[3] == '\0')))
+	cur += 3;
+
+      if (cur != path) {
+	out = path;
+	while (cur[0] != '\0')
+          (out++)[0] = (cur++)[0];
+	out[0] = 0;
+      }
+    }
+
+    return(0);
+}
+
+static int is_hex(char c) {
+    if (((c >= '0') && (c <= '9')) ||
+        ((c >= 'a') && (c <= 'f')) ||
+        ((c >= 'A') && (c <= 'F')))
+	return(1);
+    return(0);
+}
+
+
+/**
+ * uri_string_unescape:
+ * @str:  the string to unescape
+ * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
+ * @target:  optional destination buffer
+ *
+ * Unescaping routine, but does not check that the string is an URI. The
+ * output is a direct unsigned char translation of %XX values (no encoding)
+ * Note that the length of the result can only be smaller or same size as
+ * the input string.
+ *
+ * Returns a copy of the string, but unescaped, will return NULL only in case
+ * of error
+ */
+char *
+uri_string_unescape(const char *str, int len, char *target) {
+    char *ret, *out;
+    const char *in;
+
+    if (str == NULL)
+	return(NULL);
+    if (len <= 0) len = strlen(str);
+    if (len < 0) return(NULL);
+
+    if (target == NULL) {
+	ret = g_malloc(len + 1);
+    } else
+	ret = target;
+    in = str;
+    out = ret;
+    while(len > 0) {
+	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
+	    in++;
+	    if ((*in >= '0') && (*in <= '9'))
+	        *out = (*in - '0');
+	    else if ((*in >= 'a') && (*in <= 'f'))
+	        *out = (*in - 'a') + 10;
+	    else if ((*in >= 'A') && (*in <= 'F'))
+	        *out = (*in - 'A') + 10;
+	    in++;
+	    if ((*in >= '0') && (*in <= '9'))
+	        *out = *out * 16 + (*in - '0');
+	    else if ((*in >= 'a') && (*in <= 'f'))
+	        *out = *out * 16 + (*in - 'a') + 10;
+	    else if ((*in >= 'A') && (*in <= 'F'))
+	        *out = *out * 16 + (*in - 'A') + 10;
+	    in++;
+	    len -= 3;
+	    out++;
+	} else {
+	    *out++ = *in++;
+	    len--;
+	}
+    }
+    *out = 0;
+    return(ret);
+}
+
+/**
+ * uri_string_escape:
+ * @str:  string to escape
+ * @list: exception list string of chars not to escape
+ *
+ * This routine escapes a string to hex, ignoring reserved characters (a-z)
+ * and the characters in the exception list.
+ *
+ * Returns a new escaped string or NULL in case of error.
+ */
+char *
+uri_string_escape(const char *str, const char *list) {
+    char *ret, ch;
+    char *temp;
+    const char *in;
+    int len, out;
+
+    if (str == NULL)
+	return(NULL);
+    if (str[0] == 0)
+	return(g_strdup(str));
+    len = strlen(str);
+    if (!(len > 0)) return(NULL);
+
+    len += 20;
+    ret = g_malloc(len);
+    in = str;
+    out = 0;
+    while(*in != 0) {
+	if (len - out <= 3) {
+            temp = realloc2n(ret, &len);
+	    ret = temp;
+	}
+
+	ch = *in;
+
+	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) {
+	    unsigned char val;
+	    ret[out++] = '%';
+	    val = ch >> 4;
+	    if (val <= 9)
+		ret[out++] = '0' + val;
+	    else
+		ret[out++] = 'A' + val - 0xA;
+	    val = ch & 0xF;
+	    if (val <= 9)
+		ret[out++] = '0' + val;
+	    else
+		ret[out++] = 'A' + val - 0xA;
+	    in++;
+	} else {
+	    ret[out++] = *in++;
+	}
+
+    }
+    ret[out] = 0;
+    return(ret);
+}
+
+/************************************************************************
+ *									*
+ *			Public functions				*
+ *									*
+ ************************************************************************/
+
+/**
+ * uri_resolve:
+ * @URI:  the URI instance found in the document
+ * @base:  the base value
+ *
+ * Computes he final URI of the reference done by checking that
+ * the given URI is valid, and building the final URI using the
+ * base URI. This is processed according to section 5.2 of the
+ * RFC 2396
+ *
+ * 5.2. Resolving Relative References to Absolute Form
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ *         of error.
+ */
+char *
+uri_resolve(const char *uri, const char *base) {
+    char *val = NULL;
+    int ret, len, indx, cur, out;
+    URI *ref = NULL;
+    URI *bas = NULL;
+    URI *res = NULL;
+
+    /*
+     * 1) The URI reference is parsed into the potential four components and
+     *    fragment identifier, as described in Section 4.3.
+     *
+     *    NOTE that a completely empty URI is treated by modern browsers
+     *    as a reference to "." rather than as a synonym for the current
+     *    URI.  Should we do that here?
+     */
+    if (uri == NULL)
+	ret = -1;
+    else {
+	if (*uri) {
+	    ref = uri_new();
+	    if (ref == NULL)
+		goto done;
+	    ret = uri_parse_into(ref, uri);
+	}
+	else
+	    ret = 0;
+    }
+    if (ret != 0)
+	goto done;
+    if ((ref != NULL) && (ref->scheme != NULL)) {
+	/*
+	 * The URI is absolute don't modify.
+	 */
+	val = g_strdup(uri);
+	goto done;
+    }
+    if (base == NULL)
+	ret = -1;
+    else {
+	bas = uri_new();
+	if (bas == NULL)
+	    goto done;
+	ret = uri_parse_into(bas, base);
+    }
+    if (ret != 0) {
+	if (ref)
+	    val = uri_to_string(ref);
+	goto done;
+    }
+    if (ref == NULL) {
+	/*
+	 * the base fragment must be ignored
+	 */
+	if (bas->fragment != NULL) {
+	    g_free(bas->fragment);
+	    bas->fragment = NULL;
+	}
+	val = uri_to_string(bas);
+	goto done;
+    }
+
+    /*
+     * 2) If the path component is empty and the scheme, authority, and
+     *    query components are undefined, then it is a reference to the
+     *    current document and we are done.  Otherwise, the reference URI's
+     *    query and fragment components are defined as found (or not found)
+     *    within the URI reference and not inherited from the base URI.
+     *
+     *    NOTE that in modern browsers, the parsing differs from the above
+     *    in the following aspect:  the query component is allowed to be
+     *    defined while still treating this as a reference to the current
+     *    document.
+     */
+    res = uri_new();
+    if (res == NULL)
+	goto done;
+    if ((ref->scheme == NULL) && (ref->path == NULL) &&
+	((ref->authority == NULL) && (ref->server == NULL))) {
+	if (bas->scheme != NULL)
+	    res->scheme = g_strdup(bas->scheme);
+	if (bas->authority != NULL)
+	    res->authority = g_strdup(bas->authority);
+	else if (bas->server != NULL) {
+	    res->server = g_strdup(bas->server);
+	    if (bas->user != NULL)
+		res->user = g_strdup(bas->user);
+	    res->port = bas->port;
+	}
+	if (bas->path != NULL)
+	    res->path = g_strdup(bas->path);
+	if (ref->query != NULL)
+	    res->query = g_strdup (ref->query);
+	else if (bas->query != NULL)
+	    res->query = g_strdup(bas->query);
+	if (ref->fragment != NULL)
+	    res->fragment = g_strdup(ref->fragment);
+	goto step_7;
+    }
+
+    /*
+     * 3) If the scheme component is defined, indicating that the reference
+     *    starts with a scheme name, then the reference is interpreted as an
+     *    absolute URI and we are done.  Otherwise, the reference URI's
+     *    scheme is inherited from the base URI's scheme component.
+     */
+    if (ref->scheme != NULL) {
+	val = uri_to_string(ref);
+	goto done;
+    }
+    if (bas->scheme != NULL)
+	res->scheme = g_strdup(bas->scheme);
+
+    if (ref->query != NULL)
+	res->query = g_strdup(ref->query);
+    if (ref->fragment != NULL)
+	res->fragment = g_strdup(ref->fragment);
+
+    /*
+     * 4) If the authority component is defined, then the reference is a
+     *    network-path and we skip to step 7.  Otherwise, the reference
+     *    URI's authority is inherited from the base URI's authority
+     *    component, which will also be undefined if the URI scheme does not
+     *    use an authority component.
+     */
+    if ((ref->authority != NULL) || (ref->server != NULL)) {
+	if (ref->authority != NULL)
+	    res->authority = g_strdup(ref->authority);
+	else {
+	    res->server = g_strdup(ref->server);
+	    if (ref->user != NULL)
+		res->user = g_strdup(ref->user);
+            res->port = ref->port;
+	}
+	if (ref->path != NULL)
+	    res->path = g_strdup(ref->path);
+	goto step_7;
+    }
+    if (bas->authority != NULL)
+	res->authority = g_strdup(bas->authority);
+    else if (bas->server != NULL) {
+	res->server = g_strdup(bas->server);
+	if (bas->user != NULL)
+	    res->user = g_strdup(bas->user);
+	res->port = bas->port;
+    }
+
+    /*
+     * 5) If the path component begins with a slash character ("/"), then
+     *    the reference is an absolute-path and we skip to step 7.
+     */
+    if ((ref->path != NULL) && (ref->path[0] == '/')) {
+	res->path = g_strdup(ref->path);
+	goto step_7;
+    }
+
+
+    /*
+     * 6) If this step is reached, then we are resolving a relative-path
+     *    reference.  The relative path needs to be merged with the base
+     *    URI's path.  Although there are many ways to do this, we will
+     *    describe a simple method using a separate string buffer.
+     *
+     * Allocate a buffer large enough for the result string.
+     */
+    len = 2; /* extra / and 0 */
+    if (ref->path != NULL)
+	len += strlen(ref->path);
+    if (bas->path != NULL)
+	len += strlen(bas->path);
+    res->path = g_malloc(len);
+    res->path[0] = 0;
+
+    /*
+     * a) All but the last segment of the base URI's path component is
+     *    copied to the buffer.  In other words, any characters after the
+     *    last (right-most) slash character, if any, are excluded.
+     */
+    cur = 0;
+    out = 0;
+    if (bas->path != NULL) {
+	while (bas->path[cur] != 0) {
+	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
+		cur++;
+	    if (bas->path[cur] == 0)
+		break;
+
+	    cur++;
+	    while (out < cur) {
+		res->path[out] = bas->path[out];
+		out++;
+	    }
+	}
+    }
+    res->path[out] = 0;
+
+    /*
+     * b) The reference's path component is appended to the buffer
+     *    string.
+     */
+    if (ref->path != NULL && ref->path[0] != 0) {
+	indx = 0;
+	/*
+	 * Ensure the path includes a '/'
+	 */
+	if ((out == 0) && (bas->server != NULL))
+	    res->path[out++] = '/';
+	while (ref->path[indx] != 0) {
+	    res->path[out++] = ref->path[indx++];
+	}
+    }
+    res->path[out] = 0;
+
+    /*
+     * Steps c) to h) are really path normalization steps
+     */
+    normalize_uri_path(res->path);
+
+step_7:
+
+    /*
+     * 7) The resulting URI components, including any inherited from the
+     *    base URI, are recombined to give the absolute form of the URI
+     *    reference.
+     */
+    val = uri_to_string(res);
+
+done:
+    if (ref != NULL)
+	uri_free(ref);
+    if (bas != NULL)
+	uri_free(bas);
+    if (res != NULL)
+	uri_free(res);
+    return(val);
+}
+
+/**
+ * uri_resolve_relative:
+ * @URI:  the URI reference under consideration
+ * @base:  the base value
+ *
+ * Expresses the URI of the reference in terms relative to the
+ * base.  Some examples of this operation include:
+ *     base = "http://site1.com/docs/book1.html"
+ *        URI input                        URI returned
+ *     docs/pic1.gif                    pic1.gif
+ *     docs/img/pic1.gif                img/pic1.gif
+ *     img/pic1.gif                     ../img/pic1.gif
+ *     http://site1.com/docs/pic1.gif   pic1.gif
+ *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
+ *
+ *     base = "docs/book1.html"
+ *        URI input                        URI returned
+ *     docs/pic1.gif                    pic1.gif
+ *     docs/img/pic1.gif                img/pic1.gif
+ *     img/pic1.gif                     ../img/pic1.gif
+ *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
+ *
+ *
+ * Note: if the URI reference is really wierd or complicated, it may be
+ *       worthwhile to first convert it into a "nice" one by calling
+ *       uri_resolve (using 'base') before calling this routine,
+ *       since this routine (for reasonable efficiency) assumes URI has
+ *       already been through some validation.
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ * error.
+ */
+char *
+uri_resolve_relative (const char *uri, const char * base)
+{
+    char *val = NULL;
+    int ret;
+    int ix;
+    int pos = 0;
+    int nbslash = 0;
+    int len;
+    URI *ref = NULL;
+    URI *bas = NULL;
+    char *bptr, *uptr, *vptr;
+    int remove_path = 0;
+
+    if ((uri == NULL) || (*uri == 0))
+	return NULL;
+
+    /*
+     * First parse URI into a standard form
+     */
+    ref = uri_new ();
+    if (ref == NULL)
+	return NULL;
+    /* If URI not already in "relative" form */
+    if (uri[0] != '.') {
+	ret = uri_parse_into (ref, uri);
+	if (ret != 0)
+	    goto done;		/* Error in URI, return NULL */
+    } else
+	ref->path = g_strdup(uri);
+
+    /*
+     * Next parse base into the same standard form
+     */
+    if ((base == NULL) || (*base == 0)) {
+	val = g_strdup (uri);
+	goto done;
+    }
+    bas = uri_new ();
+    if (bas == NULL)
+	goto done;
+    if (base[0] != '.') {
+	ret = uri_parse_into (bas, base);
+	if (ret != 0)
+	    goto done;		/* Error in base, return NULL */
+    } else
+	bas->path = g_strdup(base);
+
+    /*
+     * If the scheme / server on the URI differs from the base,
+     * just return the URI
+     */
+    if ((ref->scheme != NULL) &&
+	((bas->scheme == NULL) ||
+	 (strcmp (bas->scheme, ref->scheme)) ||
+	 (strcmp (bas->server, ref->server)))) {
+	val = g_strdup (uri);
+	goto done;
+    }
+    if (!strcmp(bas->path, ref->path)) {
+	val = g_strdup("");
+	goto done;
+    }
+    if (bas->path == NULL) {
+	val = g_strdup(ref->path);
+	goto done;
+    }
+    if (ref->path == NULL) {
+        ref->path = (char *) "/";
+	remove_path = 1;
+    }
+
+    /*
+     * At this point (at last!) we can compare the two paths
+     *
+     * First we take care of the special case where either of the
+     * two path components may be missing (bug 316224)
+     */
+    if (bas->path == NULL) {
+	if (ref->path != NULL) {
+	    uptr = ref->path;
+	    if (*uptr == '/')
+		uptr++;
+	    /* exception characters from uri_to_string */
+	    val = uri_string_escape(uptr, "/;&=+$,");
+	}
+	goto done;
+    }
+    bptr = bas->path;
+    if (ref->path == NULL) {
+	for (ix = 0; bptr[ix] != 0; ix++) {
+	    if (bptr[ix] == '/')
+		nbslash++;
+	}
+	uptr = NULL;
+	len = 1;	/* this is for a string terminator only */
+    } else {
+    /*
+     * Next we compare the two strings and find where they first differ
+     */
+	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
+            pos += 2;
+	if ((*bptr == '.') && (bptr[1] == '/'))
+            bptr += 2;
+	else if ((*bptr == '/') && (ref->path[pos] != '/'))
+	    bptr++;
+	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
+	    pos++;
+
+	if (bptr[pos] == ref->path[pos]) {
+	    val = g_strdup("");
+	    goto done;		/* (I can't imagine why anyone would do this) */
+	}
+
+	/*
+	 * In URI, "back up" to the last '/' encountered.  This will be the
+	 * beginning of the "unique" suffix of URI
+	 */
+	ix = pos;
+	if ((ref->path[ix] == '/') && (ix > 0))
+	    ix--;
+	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
+	    ix -= 2;
+	for (; ix > 0; ix--) {
+	    if (ref->path[ix] == '/')
+		break;
+	}
+	if (ix == 0) {
+	    uptr = ref->path;
+	} else {
+	    ix++;
+	    uptr = &ref->path[ix];
+	}
+
+	/*
+	 * In base, count the number of '/' from the differing point
+	 */
+	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
+	    for (; bptr[ix] != 0; ix++) {
+		if (bptr[ix] == '/')
+		    nbslash++;
+	    }
+	}
+	len = strlen (uptr) + 1;
+    }
+
+    if (nbslash == 0) {
+	if (uptr != NULL)
+	    /* exception characters from uri_to_string */
+	    val = uri_string_escape(uptr, "/;&=+$,");
+	goto done;
+    }
+
+    /*
+     * Allocate just enough space for the returned string -
+     * length of the remainder of the URI, plus enough space
+     * for the "../" groups, plus one for the terminator
+     */
+    val = g_malloc (len + 3 * nbslash);
+    vptr = val;
+    /*
+     * Put in as many "../" as needed
+     */
+    for (; nbslash>0; nbslash--) {
+	*vptr++ = '.';
+	*vptr++ = '.';
+	*vptr++ = '/';
+    }
+    /*
+     * Finish up with the end of the URI
+     */
+    if (uptr != NULL) {
+        if ((vptr > val) && (len > 0) &&
+	    (uptr[0] == '/') && (vptr[-1] == '/')) {
+	    memcpy (vptr, uptr + 1, len - 1);
+	    vptr[len - 2] = 0;
+	} else {
+	    memcpy (vptr, uptr, len);
+	    vptr[len - 1] = 0;
+	}
+    } else {
+	vptr[len - 1] = 0;
+    }
+
+    /* escape the freshly-built path */
+    vptr = val;
+	/* exception characters from uri_to_string */
+    val = uri_string_escape(vptr, "/;&=+$,");
+    g_free(vptr);
+
+done:
+    /*
+     * Free the working variables
+     */
+    if (remove_path != 0)
+        ref->path = NULL;
+    if (ref != NULL)
+	uri_free (ref);
+    if (bas != NULL)
+	uri_free (bas);
+
+    return val;
+}
+
+/*
+ * Utility functions to help parse and assemble query strings.
+ */
+
+struct QueryParams *
+query_params_new (int init_alloc)
+{
+    struct QueryParams *ps;
+
+    if (init_alloc <= 0) init_alloc = 1;
+
+    ps = g_new(QueryParams, 1);
+    ps->n = 0;
+    ps->alloc = init_alloc;
+    ps->p = g_new(QueryParam, ps->alloc);
+
+    return ps;
+}
+
+/* Ensure there is space to store at least one more parameter
+ * at the end of the set.
+ */
+static int
+query_params_append (struct QueryParams *ps,
+               const char *name, const char *value)
+{
+    if (ps->n >= ps->alloc) {
+        ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
+        ps->alloc *= 2;
+    }
+
+    ps->p[ps->n].name = g_strdup(name);
+    ps->p[ps->n].value = value ? g_strdup(value) : NULL;
+    ps->p[ps->n].ignore = 0;
+    ps->n++;
+
+    return 0;
+}
+
+void
+query_params_free (struct QueryParams *ps)
+{
+    int i;
+
+    for (i = 0; i < ps->n; ++i) {
+        g_free (ps->p[i].name);
+        g_free (ps->p[i].value);
+    }
+    g_free (ps->p);
+    g_free (ps);
+}
+
+struct QueryParams *
+query_params_parse (const char *query)
+{
+    struct QueryParams *ps;
+    const char *end, *eq;
+
+    ps = query_params_new (0);
+    if (!query || query[0] == '\0') return ps;
+
+    while (*query) {
+        char *name = NULL, *value = NULL;
+
+        /* Find the next separator, or end of the string. */
+        end = strchr (query, '&');
+        if (!end)
+            end = strchr (query, ';');
+        if (!end)
+            end = query + strlen (query);
+
+        /* Find the first '=' character between here and end. */
+        eq = strchr (query, '=');
+        if (eq && eq >= end) eq = NULL;
+
+        /* Empty section (eg. "&&"). */
+        if (end == query)
+            goto next;
+
+        /* If there is no '=' character, then we have just "name"
+         * and consistent with CGI.pm we assume value is "".
+         */
+        else if (!eq) {
+            name = uri_string_unescape (query, end - query, NULL);
+            value = NULL;
+        }
+        /* Or if we have "name=" here (works around annoying
+         * problem when calling uri_string_unescape with len = 0).
+         */
+        else if (eq+1 == end) {
+            name = uri_string_unescape (query, eq - query, NULL);
+            value = g_new0(char, 1);
+        }
+        /* If the '=' character is at the beginning then we have
+         * "=value" and consistent with CGI.pm we _ignore_ this.
+         */
+        else if (query == eq)
+            goto next;
+
+        /* Otherwise it's "name=value". */
+        else {
+            name = uri_string_unescape (query, eq - query, NULL);
+            value = uri_string_unescape (eq+1, end - (eq+1), NULL);
+        }
+
+        /* Append to the parameter set. */
+        query_params_append (ps, name, value);
+        g_free(name);
+        g_free(value);
+
+    next:
+        query = end;
+        if (*query) query ++; /* skip '&' separator */
+    }
+
+    return ps;
+}
diff --git a/uri.h b/uri.h
new file mode 100644
index 0000000..de99b3b
--- /dev/null
+++ b/uri.h
@@ -0,0 +1,113 @@
+/**
+ * Summary: library of generic URI related routines
+ * Description: library of generic URI related routines
+ *              Implements RFC 2396
+ *
+ * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Daniel Veillard shall not
+ * be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization from him.
+ *
+ * Author: Daniel Veillard
+ **
+ * Copyright (C) 2007 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ * Authors:
+ *    Richard W.M. Jones <rjones@redhat.com>
+ *
+ * Utility functions to help parse and assemble query strings.
+ */
+
+#ifndef QEMU_URI_H
+#define QEMU_URI_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * URI:
+ *
+ * A parsed URI reference. This is a struct containing the various fields
+ * as described in RFC 2396 but separated for further processing.
+ */
+typedef struct URI {
+    char *scheme;	/* the URI scheme */
+    char *opaque;	/* opaque part */
+    char *authority;	/* the authority part */
+    char *server;	/* the server part */
+    char *user;		/* the user part */
+    int port;		/* the port number */
+    char *path;		/* the path string */
+    char *fragment;	/* the fragment identifier */
+    int  cleanup;	/* parsing potentially unclean URI */
+    char *query;	/* the query string (as it appears in the URI) */
+} URI;
+
+URI *uri_new(void);
+char *uri_resolve(const char *URI, const char *base);
+char *uri_resolve_relative(const char *URI, const char *base);
+URI *uri_parse(const char *str);
+URI *uri_parse_raw(const char *str, int raw);
+int uri_parse_into(URI *uri, const char *str);
+char *uri_to_string(URI *uri);
+char *uri_string_escape(const char *str, const char *list);
+char *uri_string_unescape(const char *str, int len, char *target);
+void uri_free(URI *uri);
+
+/* Single web service query parameter 'name=value'. */
+typedef struct QueryParam {
+  char *name;			/* Name (unescaped). */
+  char *value;			/* Value (unescaped). */
+  int ignore;			/* Ignore this field in qparam_get_query */
+} QueryParam;
+
+/* Set of parameters. */
+typedef struct QueryParams {
+  int n;			/* number of parameters used */
+  int alloc;			/* allocated space */
+  QueryParam *p;		/* array of parameters */
+} QueryParams;
+
+struct QueryParams *query_params_new (int init_alloc);
+int query_param_append (QueryParams *ps, const char *name, const char *value);
+extern char *query_param_to_string (const QueryParams *ps);
+extern QueryParams *query_params_parse (const char *query);
+extern void query_params_free (QueryParams *ps);
+
+#ifdef __cplusplus
+}
+#endif
+#endif /* QEMU_URI_H */
diff --git a/user-exec.c b/user-exec.c
index b9ea9dd..ef9b172 100644
--- a/user-exec.c
+++ b/user-exec.c
@@ -18,9 +18,6 @@
  */
 #include "config.h"
 #include "cpu.h"
-#ifndef CONFIG_TCG_PASS_AREG0
-#include "dyngen-exec.h"
-#endif
 #include "disas.h"
 #include "tcg.h"
 
@@ -60,12 +57,6 @@
     struct sigcontext *uc = puc;
 #endif
 
-#ifndef CONFIG_TCG_PASS_AREG0
-    env = env1;
-
-    /* XXX: restore cpu registers saved in host registers */
-#endif
-
     if (puc) {
         /* XXX: use siglongjmp ? */
 #ifdef __linux__
@@ -93,11 +84,6 @@
     TranslationBlock *tb;
     int ret;
 
-#ifndef CONFIG_TCG_PASS_AREG0
-    if (cpu_single_env) {
-        env = cpu_single_env; /* XXX: find a correct solution for multithread */
-    }
-#endif
 #if defined(DEBUG_SIGNAL)
     qemu_printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
                 pc, address, is_write, *(unsigned long *)old_set);
diff --git a/vl.c b/vl.c
index c681c33..a1c0aa7 100644
--- a/vl.c
+++ b/vl.c
@@ -1023,7 +1023,6 @@
         }
         nb_numa_nodes++;
     }
-    return;
 }
 
 static void smp_parse(const char *optarg)
@@ -1355,6 +1354,8 @@
 static int debug_requested;
 static int suspend_requested;
 static int wakeup_requested;
+static NotifierList powerdown_notifiers =
+    NOTIFIER_LIST_INITIALIZER(powerdown_notifiers);
 static NotifierList suspend_notifiers =
     NOTIFIER_LIST_INITIALIZER(suspend_notifiers);
 static NotifierList wakeup_notifiers =
@@ -1563,12 +1564,23 @@
     qemu_notify_event();
 }
 
+static void qemu_system_powerdown(void)
+{
+    monitor_protocol_event(QEVENT_POWERDOWN, NULL);
+    notifier_list_notify(&powerdown_notifiers, NULL);
+}
+
 void qemu_system_powerdown_request(void)
 {
     powerdown_requested = 1;
     qemu_notify_event();
 }
 
+void qemu_register_powerdown_notifier(Notifier *notifier)
+{
+    notifier_list_add(&powerdown_notifiers, notifier);
+}
+
 void qemu_system_debug_request(void)
 {
     debug_requested = 1;
@@ -1581,8 +1593,6 @@
     qemu_notify_event();
 }
 
-qemu_irq qemu_system_powerdown;
-
 static bool main_loop_should_exit(void)
 {
     RunState r;
@@ -1619,8 +1629,7 @@
         monitor_protocol_event(QEVENT_WAKEUP, NULL);
     }
     if (qemu_powerdown_requested()) {
-        monitor_protocol_event(QEVENT_POWERDOWN, NULL);
-        qemu_irq_raise(qemu_system_powerdown);
+        qemu_system_powerdown();
     }
     if (qemu_vmstop_requested(&r)) {
         vm_stop(r);
@@ -1690,17 +1699,23 @@
 
 static bool vga_available(void)
 {
-    return qdev_exists("VGA") || qdev_exists("isa-vga");
+    return object_class_by_name("VGA") || object_class_by_name("isa-vga");
 }
 
 static bool cirrus_vga_available(void)
 {
-    return qdev_exists("cirrus-vga") || qdev_exists("isa-cirrus-vga");
+    return object_class_by_name("cirrus-vga")
+           || object_class_by_name("isa-cirrus-vga");
 }
 
 static bool vmware_vga_available(void)
 {
-    return qdev_exists("vmware-svga");
+    return object_class_by_name("vmware-svga");
+}
+
+static bool qxl_vga_available(void)
+{
+    return object_class_by_name("qxl-vga");
 }
 
 static void select_vgahw (const char *p)
@@ -1732,7 +1747,12 @@
     } else if (strstart(p, "xenfb", &opts)) {
         vga_interface_type = VGA_XENFB;
     } else if (strstart(p, "qxl", &opts)) {
-        vga_interface_type = VGA_QXL;
+        if (qxl_vga_available()) {
+            vga_interface_type = VGA_QXL;
+        } else {
+            fprintf(stderr, "Error: QXL VGA not available\n");
+            exit(0);
+        }
     } else if (!strstart(p, "none", &opts)) {
     invalid_vga:
         fprintf(stderr, "Unknown vga type: %s\n", p);
@@ -2622,7 +2642,8 @@
                 {
                     static const char * const params[] = {
                         "order", "once", "menu",
-                        "splash", "splash-time", NULL
+                        "splash", "splash-time",
+                        "reboot-timeout", NULL
                     };
                     char buf[sizeof(boot_devices)];
                     char *standard_boot_devices;
@@ -3630,8 +3651,12 @@
         exit(1);
 
     /* If no default VGA is requested, the default is "none".  */
-    if (default_vga && cirrus_vga_available()) {
-        vga_model = "cirrus";
+    if (default_vga) {
+        if (cirrus_vga_available()) {
+            vga_model = "cirrus";
+        } else if (vga_available()) {
+            vga_model = "std";
+        }
     }
     select_vgahw(vga_model);
 
@@ -3692,7 +3717,9 @@
         break;
 #if defined(CONFIG_CURSES)
     case DT_CURSES:
-        curses_display_init(ds, full_screen);
+        if (!is_daemonized()) {
+            curses_display_init(ds, full_screen);
+        }
         break;
 #endif
 #if defined(CONFIG_SDL)
diff --git a/xen-all.c b/xen-all.c
index f76b051..bcb7ef7 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -14,6 +14,7 @@
 #include "hw/pc.h"
 #include "hw/xen_common.h"
 #include "hw/xen_backend.h"
+#include "qmp-commands.h"
 
 #include "range.h"
 #include "xen-mapcache.h"
@@ -36,6 +37,7 @@
 
 static MemoryRegion ram_memory, ram_640k, ram_lo, ram_hi;
 static MemoryRegion *framebuffer;
+static bool xen_in_migration;
 
 /* Compatibility with older version */
 #if __XEN_LATEST_INTERFACE_VERSION__ < 0x0003020a
@@ -505,7 +507,8 @@
                                  bitmap);
     if (rc < 0) {
         if (rc != -ENODATA) {
-            fprintf(stderr, "xen: track_dirty_vram failed (0x" TARGET_FMT_plx
+            memory_region_set_dirty(framebuffer, 0, size);
+            DPRINTF("xen: track_dirty_vram failed (0x" TARGET_FMT_plx
                     ", 0x" TARGET_FMT_plx "): %s\n",
                     start_addr, start_addr + size, strerror(-rc));
         }
@@ -552,10 +555,14 @@
 
 static void xen_log_global_start(MemoryListener *listener)
 {
+    if (xen_enabled()) {
+        xen_in_migration = true;
+    }
 }
 
 static void xen_log_global_stop(MemoryListener *listener)
 {
+    xen_in_migration = false;
 }
 
 static void xen_eventfd_add(MemoryListener *listener,
@@ -588,6 +595,15 @@
     .priority = 10,
 };
 
+void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
+{
+    if (enable) {
+        memory_global_dirty_log_start();
+    } else {
+        memory_global_dirty_log_stop();
+    }
+}
+
 /* VCPU Operations, MMIO, IO ring ... */
 
 static void xen_reset_vcpu(void *opaque)
@@ -1076,7 +1092,6 @@
         QLIST_INSERT_HEAD(&state->physmap, physmap, list);
     }
     free(entries);
-    return;
 }
 
 int xen_hvm_init(void)
@@ -1213,3 +1228,24 @@
     /* destroy the domain */
     qemu_system_shutdown_request();
 }
+
+void xen_modified_memory(ram_addr_t start, ram_addr_t length)
+{
+    if (unlikely(xen_in_migration)) {
+        int rc;
+        ram_addr_t start_pfn, nb_pages;
+
+        if (length == 0) {
+            length = TARGET_PAGE_SIZE;
+        }
+        start_pfn = start >> TARGET_PAGE_BITS;
+        nb_pages = ((start + length + TARGET_PAGE_SIZE - 1) >> TARGET_PAGE_BITS)
+            - start_pfn;
+        rc = xc_hvm_modified_memory(xen_xc, xen_domid, start_pfn, nb_pages);
+        if (rc) {
+            fprintf(stderr,
+                    "%s failed for "RAM_ADDR_FMT" ("RAM_ADDR_FMT"): %i, %s\n",
+                    __func__, start, nb_pages, rc, strerror(-rc));
+        }
+    }
+}
diff --git a/xen-stub.c b/xen-stub.c
index 8ff2b79..9214392 100644
--- a/xen-stub.c
+++ b/xen-stub.c
@@ -11,6 +11,7 @@
 #include "qemu-common.h"
 #include "hw/xen.h"
 #include "memory.h"
+#include "qmp-commands.h"
 
 void xenstore_store_pv_console_info(int i, CharDriverState *chr)
 {
@@ -54,3 +55,11 @@
 void xen_register_framebuffer(MemoryRegion *mr)
 {
 }
+
+void qmp_xen_set_global_dirty_log(bool enable, Error **errp)
+{
+}
+
+void xen_modified_memory(ram_addr_t start, ram_addr_t length)
+{
+}