Merge commit '121afa9e0c02617c2a774996512e4f85f3e93da8' into upstream-merge

* commit '121afa9e0c02617c2a774996512e4f85f3e93da8': (85 commits)
  Revert "Add ability to disable build of all targets"
  cpu_physical_memory_write_rom() needs to do TB invalidates
  qemu-char: BUGFIX, don't call FD_ISSET with negative fd
  Revert 455aa1e08 and c3767ed0eb
  pc: Drop practically unused BOCHS BIOS debug ports
  add -machine mem-merge=on|off option
  Remove unused CONFIG_TCG_PASS_AREG0 and dead code
  target-mips: switch to AREG0 free mode
  target-sh4: switch to AREG0 free mode
  target-cris: Switch to AREG0 free mode
  target-cris: Avoid AREG0 for helpers
  target-microblaze: switch to AREG0 free mode
  target-arm: final conversion to AREG0 free mode
  target-arm: convert remaining helpers
  target-arm: convert void helpers
  target-unicore32: switch to AREG0 free mode
  target-m68k: avoid using cpu_single_env
  target-m68k: switch to AREG0 free mode
  target-lm32: switch to AREG0 free mode
  target-s390x: avoid cpu_single_env
  ...

Conflicts:
	configure

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
diff --git a/Makefile.target b/Makefile.target
index 7892a8d..d9d54b8 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -80,14 +80,6 @@
 
 tci-dis.o: QEMU_CFLAGS += -I$(SRC_PATH)/tcg -I$(SRC_PATH)/tcg/tci
 
-# HELPER_CFLAGS is used for all the legacy code compiled with static register
-# variables
-user-exec.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
-
-# Note: this is a workaround. The real fix is to avoid compiling
-# cpu_signal_handler() in user-exec.c.
-%/signal.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
-
 #########################################################
 # Linux user emulator target
 
diff --git a/configure b/configure
index b67be2c..57482f3 100755
--- a/configure
+++ b/configure
@@ -118,7 +118,6 @@
 audio_possible_cards="ac97 es1370 sb16 cs4231a adlib gus hda"
 block_drv_whitelist=""
 host_cc="gcc"
-helper_cflags=""
 libs_softmmu=""
 libs_tools=""
 audio_pt_int=""
@@ -911,7 +910,6 @@
            QEMU_CFLAGS="-m32 -ffixed-g2 -ffixed-g3 $QEMU_CFLAGS"
            if test "$solaris" = "no" ; then
              QEMU_CFLAGS="-ffixed-g1 -ffixed-g6 $QEMU_CFLAGS"
-             helper_cflags="-ffixed-i0"
            fi
            ;;
     sparc64)
@@ -936,7 +934,6 @@
            QEMU_CFLAGS="-m32 $QEMU_CFLAGS"
            LDFLAGS="-m32 $LDFLAGS"
            cc_i386='$(CC) -m32'
-           helper_cflags="-fomit-frame-pointer"
            host_guest_base="yes"
            ;;
     x86_64)
@@ -1331,10 +1328,15 @@
   exit 1
 fi
 
-if test "$target_list" = "DEFAULT" ; then
-    target_list=`echo "$default_target_list" | sed -e 's/,/ /g'`
+if test -z "$target_list" ; then
+    target_list="$default_target_list"
+else
+    target_list=`echo "$target_list" | sed -e 's/,/ /g'`
 fi
-
+if test -z "$target_list" ; then
+    echo "No targets enabled"
+    exit 1
+fi
 # see if system emulation was really requested
 case " $target_list " in
   *"-softmmu "*) softmmu=yes
@@ -2680,17 +2682,44 @@
 
 
 ##########################################
+# Do we need libm
+cat > $TMPC << EOF
+#include <math.h>
+int main(void) { return isnan(sin(0.0)); }
+EOF
+if compile_prog "" "" ; then
+  :
+elif compile_prog "" "-lm" ; then
+  LIBS="-lm $LIBS"
+  libs_qga="-lm $libs_qga"
+else
+  echo
+  echo "Error: libm check failed"
+  echo
+  exit 1
+fi
+
+##########################################
 # Do we need librt
+# uClibc provides 2 versions of clock_gettime(), one with realtime
+# support and one without. This means that the clock_gettime() don't
+# need -lrt. We still need it for timer_create() so we check for this
+# function in addition.
 cat > $TMPC <<EOF
 #include <signal.h>
 #include <time.h>
-int main(void) { return clock_gettime(CLOCK_REALTIME, NULL); }
+int main(void) {
+  timer_create(CLOCK_REALTIME, NULL, NULL);
+  return clock_gettime(CLOCK_REALTIME, NULL);
+}
 EOF
 
 if compile_prog "" "" ; then
   :
-elif compile_prog "" "-lrt" ; then
+# we need pthread for static linking. use previous pthread test result
+elif compile_prog "" "-lrt $pthread_lib" ; then
   LIBS="-lrt $LIBS"
+  libs_qga="-lrt $libs_qga"
 fi
 
 if test "$darwin" != "yes" -a "$mingw32" != "yes" -a "$solaris" != yes -a \
@@ -2767,7 +2796,7 @@
 
 # check for usbredirparser for usb network redirection support
 if test "$usb_redir" != "no" ; then
-    if $pkg_config --atleast-version=0.3.4 libusbredirparser >/dev/null 2>&1 ; then
+    if $pkg_config --atleast-version=0.5 libusbredirparser >/dev/null 2>&1 ; then
         usb_redir="yes"
         usb_redir_cflags=$($pkg_config --cflags libusbredirparser 2>/dev/null)
         usb_redir_libs=$($pkg_config --libs libusbredirparser 2>/dev/null)
@@ -3599,7 +3628,6 @@
   echo "HOST_CC      := REAL_CC=\"\$(HOST_CC)\" cgcc"  >> $config_host_mak
   echo "QEMU_CFLAGS  += -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-non-pointer-null" >> $config_host_mak
 fi
-echo "HELPER_CFLAGS=$helper_cflags" >> $config_host_mak
 echo "LDFLAGS=$LDFLAGS" >> $config_host_mak
 echo "ARLIBS_BEGIN=$arlibs_begin" >> $config_host_mak
 echo "ARLIBS_END=$arlibs_end" >> $config_host_mak
@@ -3854,13 +3882,6 @@
 
 symlink "$source_path/Makefile.target" "$target_dir/Makefile"
 
-
-case "$target_arch2" in
-  alpha | i386 | or32 | s390x | sparc* | x86_64 | xtensa* | ppc*)
-    echo "CONFIG_TCG_PASS_AREG0=y" >> $config_target_mak
-  ;;
-esac
-
 upper() {
     echo "$@"| LC_ALL=C tr '[a-z]' '[A-Z]'
 }
diff --git a/cpu-all.h b/cpu-all.h
index 5e07d28..74d3681 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -260,14 +260,6 @@
 #define stfl(p, v) stfl_raw(p, v)
 #define stfq(p, v) stfq_raw(p, v)
 
-#ifndef CONFIG_TCG_PASS_AREG0
-#define ldub_code(p) ldub_raw(p)
-#define ldsb_code(p) ldsb_raw(p)
-#define lduw_code(p) lduw_raw(p)
-#define ldsw_code(p) ldsw_raw(p)
-#define ldl_code(p) ldl_raw(p)
-#define ldq_code(p) ldq_raw(p)
-#else
 #define cpu_ldub_code(env1, p) ldub_raw(p)
 #define cpu_ldsb_code(env1, p) ldsb_raw(p)
 #define cpu_lduw_code(env1, p) lduw_raw(p)
@@ -296,7 +288,6 @@
 #define cpu_stw_kernel(env, addr, data) stw_raw(addr, data)
 #define cpu_stl_kernel(env, addr, data) stl_raw(addr, data)
 #define cpu_stq_kernel(env, addr, data) stq_raw(addr, data)
-#endif
 
 #define ldub_kernel(p) ldub_raw(p)
 #define ldsb_kernel(p) ldsb_raw(p)
@@ -313,7 +304,6 @@
 #define stfl_kernel(p, v) stfl_raw(p, v)
 #define stfq_kernel(p, vt) stfq_raw(p, v)
 
-#ifdef CONFIG_TCG_PASS_AREG0
 #define cpu_ldub_data(env, addr) ldub_raw(addr)
 #define cpu_lduw_data(env, addr) lduw_raw(addr)
 #define cpu_ldl_data(env, addr) ldl_raw(addr)
@@ -321,7 +311,6 @@
 #define cpu_stb_data(env, addr, data) stb_raw(addr, data)
 #define cpu_stw_data(env, addr, data) stw_raw(addr, data)
 #define cpu_stl_data(env, addr, data) stl_raw(addr, data)
-#endif
 #endif /* defined(CONFIG_USER_ONLY) */
 
 /* page related stuff */
diff --git a/cputlb.c b/cputlb.c
index d3e7b25..51b5897 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -325,11 +325,7 @@
     mmu_idx = cpu_mmu_index(env1);
     if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
                  (addr & TARGET_PAGE_MASK))) {
-#ifdef CONFIG_TCG_PASS_AREG0
         cpu_ldub_code(env1, addr);
-#else
-        ldub_code(addr);
-#endif
     }
     pd = env1->iotlb[mmu_idx][page_index] & ~TARGET_PAGE_MASK;
     mr = iotlb_to_region(pd);
@@ -348,7 +344,6 @@
 #define MMUSUFFIX _cmmu
 #undef GETPC
 #define GETPC() ((uintptr_t)0)
-#define env cpu_single_env
 #define SOFTMMU_CODE_ACCESS
 
 #define SHIFT 0
diff --git a/def-helper.h b/def-helper.h
index b98ff69..022a9ce 100644
--- a/def-helper.h
+++ b/def-helper.h
@@ -128,6 +128,8 @@
 #define DEF_HELPER_5(name, ret, t1, t2, t3, t4, t5) \
     DEF_HELPER_FLAGS_5(name, 0, ret, t1, t2, t3, t4, t5)
 
+/* MAX_OPC_PARAM_IARGS must be set to n if last entry is DEF_HELPER_FLAGS_n. */
+
 #endif /* DEF_HELPER_H */
 
 #ifndef GEN_HELPER
diff --git a/dyngen-exec.h b/dyngen-exec.h
deleted file mode 100644
index 083e20b..0000000
--- a/dyngen-exec.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- *  dyngen defines for micro operation code
- *
- *  Copyright (c) 2003 Fabrice Bellard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-#if !defined(__DYNGEN_EXEC_H__)
-#define __DYNGEN_EXEC_H__
-
-#if defined(CONFIG_TCG_INTERPRETER)
-/* The TCG interpreter does not need a special register AREG0,
- * but it is possible to use one by defining AREG0.
- * On i386, register edi seems to work. */
-/* Run without special register AREG0 or use a value defined elsewhere. */
-#elif defined(__i386__)
-#define AREG0 "ebp"
-#elif defined(__x86_64__)
-#define AREG0 "r14"
-#elif defined(_ARCH_PPC)
-#define AREG0 "r27"
-#elif defined(__arm__)
-#define AREG0 "r6"
-#elif defined(__hppa__)
-#define AREG0 "r17"
-#elif defined(__mips__)
-#define AREG0 "s0"
-#elif defined(__sparc__)
-#ifdef CONFIG_SOLARIS
-#define AREG0 "g2"
-#else
-#ifdef __sparc_v9__
-#define AREG0 "g5"
-#else
-#define AREG0 "g6"
-#endif
-#endif
-#elif defined(__s390__)
-#define AREG0 "r10"
-#elif defined(__alpha__)
-/* Note $15 is the frame pointer, so anything in op-i386.c that would
-   require a frame pointer, like alloca, would probably loose.  */
-#define AREG0 "$15"
-#elif defined(__mc68000)
-#define AREG0 "%a5"
-#elif defined(__ia64__)
-#define AREG0 "r7"
-#else
-#error unsupported CPU
-#endif
-
-#if defined(AREG0)
-register CPUArchState *env asm(AREG0);
-#else
-/* TODO: Try env = cpu_single_env. */
-extern CPUArchState *env;
-#endif
-
-#endif /* !defined(__DYNGEN_EXEC_H__) */
diff --git a/exec-all.h b/exec-all.h
index c5ec8e1..dba9609 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -51,7 +51,7 @@
 #else
 #define MAX_OPC_PARAM_PER_ARG 1
 #endif
-#define MAX_OPC_PARAM_IARGS 4
+#define MAX_OPC_PARAM_IARGS 5
 #define MAX_OPC_PARAM_OARGS 1
 #define MAX_OPC_PARAM_ARGS (MAX_OPC_PARAM_IARGS + MAX_OPC_PARAM_OARGS)
 
@@ -323,9 +323,6 @@
 
 #define ACCESS_TYPE (NB_MMU_MODES + 1)
 #define MEMSUFFIX _code
-#ifndef CONFIG_TCG_PASS_AREG0
-#define env cpu_single_env
-#endif
 
 #define DATA_SIZE 1
 #include "softmmu_header.h"
@@ -341,7 +338,6 @@
 
 #undef ACCESS_TYPE
 #undef MEMSUFFIX
-#undef env
 
 #endif
 
diff --git a/exec.c b/exec.c
index 5834766..f22e9e6 100644
--- a/exec.c
+++ b/exec.c
@@ -2525,6 +2525,19 @@
     }
 }
 
+static int memory_try_enable_merging(void *addr, size_t len)
+{
+    QemuOpts *opts;
+
+    opts = qemu_opts_find(qemu_find_opts("machine"), 0);
+    if (opts && !qemu_opt_get_bool(opts, "mem-merge", true)) {
+        /* disabled by the user */
+        return 0;
+    }
+
+    return qemu_madvise(addr, len, QEMU_MADV_MERGEABLE);
+}
+
 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr)
 {
@@ -2544,7 +2557,7 @@
             new_block->host = file_ram_alloc(new_block, size, mem_path);
             if (!new_block->host) {
                 new_block->host = qemu_vmalloc(size);
-                qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
+                memory_try_enable_merging(new_block->host, size);
             }
 #else
             fprintf(stderr, "-mem-path option unsupported\n");
@@ -2559,7 +2572,7 @@
             } else {
                 new_block->host = qemu_vmalloc(size);
             }
-            qemu_madvise(new_block->host, size, QEMU_MADV_MERGEABLE);
+            memory_try_enable_merging(new_block->host, size);
         }
     }
     new_block->length = size;
@@ -2689,7 +2702,7 @@
                             length, addr);
                     exit(1);
                 }
-                qemu_madvise(vaddr, length, QEMU_MADV_MERGEABLE);
+                memory_try_enable_merging(vaddr, length);
                 qemu_ram_setup_dump(vaddr, length);
             }
             return;
@@ -3523,6 +3536,13 @@
             /* ROM/RAM case */
             ptr = qemu_get_ram_ptr(addr1);
             memcpy(ptr, buf, l);
+            if (!cpu_physical_memory_is_dirty(addr1)) {
+                /* invalidate code */
+                tb_invalidate_phys_page_range(addr1, addr1 + l, 0);
+                /* set dirty bit */
+                cpu_physical_memory_set_dirty_flags(
+                    addr1, (0xff & ~CODE_DIRTY_FLAG));
+            }
             qemu_put_ram_ptr(ptr);
         }
         len -= l;
diff --git a/hw/pc.c b/hw/pc.c
index c32ee8e..52bc461 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -53,9 +53,6 @@
 #include "bitmap.h"
 #include "vga-pci.h"
 
-/* output Bochs bios info messages */
-//#define DEBUG_BIOS
-
 /* debug PC/ISA interrupts */
 //#define DEBUG_IRQ
 
@@ -534,17 +531,6 @@
     static int shutdown_index = 0;
 
     switch(addr) {
-        /* Bochs BIOS messages */
-    case 0x400:
-    case 0x401:
-        /* used to be panic, now unused */
-        break;
-    case 0x402:
-    case 0x403:
-#ifdef DEBUG_BIOS
-        fprintf(stderr, "%c", val);
-#endif
-        break;
     case 0x8900:
         /* same as Bochs power off */
         if (val == shutdown_str[shutdown_index]) {
@@ -558,16 +544,9 @@
         }
         break;
 
-        /* LGPL'ed VGA BIOS messages */
     case 0x501:
     case 0x502:
         exit((val << 1) | 1);
-    case 0x500:
-    case 0x503:
-#ifdef DEBUG_BIOS
-        fprintf(stderr, "%c", val);
-#endif
-        break;
     }
 }
 
@@ -596,17 +575,11 @@
     uint64_t *numa_fw_cfg;
     int i, j;
 
-    register_ioport_write(0x400, 1, 2, bochs_bios_write, NULL);
-    register_ioport_write(0x401, 1, 2, bochs_bios_write, NULL);
-    register_ioport_write(0x402, 1, 1, bochs_bios_write, NULL);
-    register_ioport_write(0x403, 1, 1, bochs_bios_write, NULL);
     register_ioport_write(0x8900, 1, 1, bochs_bios_write, NULL);
 
     register_ioport_write(0x501, 1, 1, bochs_bios_write, NULL);
     register_ioport_write(0x501, 1, 2, bochs_bios_write, NULL);
     register_ioport_write(0x502, 1, 2, bochs_bios_write, NULL);
-    register_ioport_write(0x500, 1, 1, bochs_bios_write, NULL);
-    register_ioport_write(0x503, 1, 1, bochs_bios_write, NULL);
 
     fw_cfg = fw_cfg_init(BIOS_CFG_IOPORT, BIOS_CFG_IOPORT + 1, 0, 0);
 
diff --git a/hw/spapr_hcall.c b/hw/spapr_hcall.c
index a5990a9..abd847f 100644
--- a/hw/spapr_hcall.c
+++ b/hw/spapr_hcall.c
@@ -1,6 +1,5 @@
 #include "sysemu.h"
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "qemu-char.h"
 #include "sysemu.h"
 #include "qemu-char.h"
diff --git a/hw/usb.h b/hw/usb.h
index b8fceec..48c8926 100644
--- a/hw/usb.h
+++ b/hw/usb.h
@@ -135,8 +135,15 @@
 #define USB_DT_OTHER_SPEED_CONFIG       0x07
 #define USB_DT_DEBUG                    0x0A
 #define USB_DT_INTERFACE_ASSOC          0x0B
+#define USB_DT_BOS                      0x0F
+#define USB_DT_DEVICE_CAPABILITY        0x10
 #define USB_DT_CS_INTERFACE             0x24
 #define USB_DT_CS_ENDPOINT              0x25
+#define USB_DT_ENDPOINT_COMPANION       0x30
+
+#define USB_DEV_CAP_WIRELESS            0x01
+#define USB_DEV_CAP_USB2_EXT            0x02
+#define USB_DEV_CAP_SUPERSPEED          0x03
 
 #define USB_ENDPOINT_XFER_CONTROL	0
 #define USB_ENDPOINT_XFER_ISOC		1
@@ -377,6 +384,8 @@
                                 uint16_t raw);
 int usb_ep_get_max_packet_size(USBDevice *dev, int pid, int ep);
 void usb_ep_set_pipeline(USBDevice *dev, int pid, int ep, bool enabled);
+USBPacket *usb_ep_find_packet_by_id(USBDevice *dev, int pid, int ep,
+                                    uint64_t id);
 
 void usb_attach(USBPort *port);
 void usb_detach(USBPort *port);
diff --git a/hw/usb/core.c b/hw/usb/core.c
index 2da38e7..b9f1f7a 100644
--- a/hw/usb/core.c
+++ b/hw/usb/core.c
@@ -398,9 +398,11 @@
              * When pipelining is enabled usb-devices must always return async,
              * otherwise packets can complete out of order!
              */
-            assert(!p->ep->pipeline);
-            p->result = ret;
-            usb_packet_set_state(p, USB_PACKET_COMPLETE);
+            assert(!p->ep->pipeline || QTAILQ_EMPTY(&p->ep->queue));
+            if (ret != USB_RET_NAK) {
+                p->result = ret;
+                usb_packet_set_state(p, USB_PACKET_COMPLETE);
+            }
         }
     } else {
         ret = USB_RET_ASYNC;
@@ -724,3 +726,18 @@
     struct USBEndpoint *uep = usb_ep_get(dev, pid, ep);
     uep->pipeline = enabled;
 }
+
+USBPacket *usb_ep_find_packet_by_id(USBDevice *dev, int pid, int ep,
+                                    uint64_t id)
+{
+    struct USBEndpoint *uep = usb_ep_get(dev, pid, ep);
+    USBPacket *p;
+
+    while ((p = QTAILQ_FIRST(&uep->queue)) != NULL) {
+        if (p->id == id) {
+            return p;
+        }
+    }
+
+    return NULL;
+}
diff --git a/hw/usb/desc.c b/hw/usb/desc.c
index 0a9d3c9..1f12eae 100644
--- a/hw/usb/desc.c
+++ b/hw/usb/desc.c
@@ -76,7 +76,8 @@
     return bLength;
 }
 
-int usb_desc_config(const USBDescConfig *conf, uint8_t *dest, size_t len)
+int usb_desc_config(const USBDescConfig *conf, int flags,
+                    uint8_t *dest, size_t len)
 {
     uint8_t  bLength = 0x09;
     uint16_t wTotalLength = 0;
@@ -99,7 +100,7 @@
 
     /* handle grouped interfaces if any */
     for (i = 0; i < conf->nif_groups; i++) {
-        rc = usb_desc_iface_group(&(conf->if_groups[i]),
+        rc = usb_desc_iface_group(&(conf->if_groups[i]), flags,
                                   dest + wTotalLength,
                                   len - wTotalLength);
         if (rc < 0) {
@@ -110,7 +111,8 @@
 
     /* handle normal (ungrouped / no IAD) interfaces if any */
     for (i = 0; i < conf->nif; i++) {
-        rc = usb_desc_iface(conf->ifs + i, dest + wTotalLength, len - wTotalLength);
+        rc = usb_desc_iface(conf->ifs + i, flags,
+                            dest + wTotalLength, len - wTotalLength);
         if (rc < 0) {
             return rc;
         }
@@ -122,8 +124,8 @@
     return wTotalLength;
 }
 
-int usb_desc_iface_group(const USBDescIfaceAssoc *iad, uint8_t *dest,
-                         size_t len)
+int usb_desc_iface_group(const USBDescIfaceAssoc *iad, int flags,
+                         uint8_t *dest, size_t len)
 {
     int pos = 0;
     int i = 0;
@@ -147,7 +149,7 @@
 
     /* handle associated interfaces in this group */
     for (i = 0; i < iad->nif; i++) {
-        int rc = usb_desc_iface(&(iad->ifs[i]), dest + pos, len - pos);
+        int rc = usb_desc_iface(&(iad->ifs[i]), flags, dest + pos, len - pos);
         if (rc < 0) {
             return rc;
         }
@@ -157,7 +159,8 @@
     return pos;
 }
 
-int usb_desc_iface(const USBDescIface *iface, uint8_t *dest, size_t len)
+int usb_desc_iface(const USBDescIface *iface, int flags,
+                   uint8_t *dest, size_t len)
 {
     uint8_t bLength = 0x09;
     int i, rc, pos = 0;
@@ -188,7 +191,7 @@
     }
 
     for (i = 0; i < iface->bNumEndpoints; i++) {
-        rc = usb_desc_endpoint(iface->eps + i, dest + pos, len - pos);
+        rc = usb_desc_endpoint(iface->eps + i, flags, dest + pos, len - pos);
         if (rc < 0) {
             return rc;
         }
@@ -198,13 +201,15 @@
     return pos;
 }
 
-int usb_desc_endpoint(const USBDescEndpoint *ep, uint8_t *dest, size_t len)
+int usb_desc_endpoint(const USBDescEndpoint *ep, int flags,
+                      uint8_t *dest, size_t len)
 {
     uint8_t bLength = ep->is_audio ? 0x09 : 0x07;
     uint8_t extralen = ep->extra ? ep->extra[0] : 0;
+    uint8_t superlen = (flags & USB_DESC_FLAG_SUPER) ? 0x06 : 0;
     USBDescriptor *d = (void *)dest;
 
-    if (len < bLength + extralen) {
+    if (len < bLength + extralen + superlen) {
         return -1;
     }
 
@@ -224,7 +229,21 @@
         memcpy(dest + bLength, ep->extra, extralen);
     }
 
-    return bLength + extralen;
+    if (superlen) {
+        USBDescriptor *d = (void *)(dest + bLength + extralen);
+
+        d->bLength                       = 0x06;
+        d->bDescriptorType               = USB_DT_ENDPOINT_COMPANION;
+
+        d->u.super_endpoint.bMaxBurst    = ep->bMaxBurst;
+        d->u.super_endpoint.bmAttributes = ep->bmAttributes_super;
+        d->u.super_endpoint.wBytesPerInterval_lo =
+            usb_lo(ep->wBytesPerInterval);
+        d->u.super_endpoint.wBytesPerInterval_hi =
+            usb_hi(ep->wBytesPerInterval);
+    }
+
+    return bLength + extralen + superlen;
 }
 
 int usb_desc_other(const USBDescOther *desc, uint8_t *dest, size_t len)
@@ -239,6 +258,111 @@
     return bLength;
 }
 
+static int usb_desc_cap_usb2_ext(const USBDesc *desc, uint8_t *dest, size_t len)
+{
+    uint8_t  bLength = 0x07;
+    USBDescriptor *d = (void *)dest;
+
+    if (len < bLength) {
+        return -1;
+    }
+
+    d->bLength                          = bLength;
+    d->bDescriptorType                  = USB_DT_DEVICE_CAPABILITY;
+    d->u.cap.bDevCapabilityType         = USB_DEV_CAP_USB2_EXT;
+
+    d->u.cap.u.usb2_ext.bmAttributes_1  = (1 << 1);  /* LPM */
+    d->u.cap.u.usb2_ext.bmAttributes_2  = 0;
+    d->u.cap.u.usb2_ext.bmAttributes_3  = 0;
+    d->u.cap.u.usb2_ext.bmAttributes_4  = 0;
+
+    return bLength;
+}
+
+static int usb_desc_cap_super(const USBDesc *desc, uint8_t *dest, size_t len)
+{
+    uint8_t  bLength = 0x0a;
+    USBDescriptor *d = (void *)dest;
+
+    if (len < bLength) {
+        return -1;
+    }
+
+    d->bLength                           = bLength;
+    d->bDescriptorType                   = USB_DT_DEVICE_CAPABILITY;
+    d->u.cap.bDevCapabilityType          = USB_DEV_CAP_SUPERSPEED;
+
+    d->u.cap.u.super.bmAttributes        = 0;
+    d->u.cap.u.super.wSpeedsSupported_lo = 0;
+    d->u.cap.u.super.wSpeedsSupported_hi = 0;
+    d->u.cap.u.super.bFunctionalitySupport = 0;
+    d->u.cap.u.super.bU1DevExitLat       = 0x0a;
+    d->u.cap.u.super.wU2DevExitLat_lo    = 0x20;
+    d->u.cap.u.super.wU2DevExitLat_hi    = 0;
+
+    if (desc->full) {
+        d->u.cap.u.super.wSpeedsSupported_lo |= (1 << 1);
+        d->u.cap.u.super.bFunctionalitySupport = 1;
+    }
+    if (desc->high) {
+        d->u.cap.u.super.wSpeedsSupported_lo |= (1 << 2);
+        if (!d->u.cap.u.super.bFunctionalitySupport) {
+            d->u.cap.u.super.bFunctionalitySupport = 2;
+        }
+    }
+    if (desc->super) {
+        d->u.cap.u.super.wSpeedsSupported_lo |= (1 << 3);
+        if (!d->u.cap.u.super.bFunctionalitySupport) {
+            d->u.cap.u.super.bFunctionalitySupport = 3;
+        }
+    }
+
+    return bLength;
+}
+
+static int usb_desc_bos(const USBDesc *desc, uint8_t *dest, size_t len)
+{
+    uint8_t  bLength = 0x05;
+    uint16_t wTotalLength = 0;
+    uint8_t  bNumDeviceCaps = 0;
+    USBDescriptor *d = (void *)dest;
+    int rc;
+
+    if (len < bLength) {
+        return -1;
+    }
+
+    d->bLength                      = bLength;
+    d->bDescriptorType              = USB_DT_BOS;
+
+    wTotalLength += bLength;
+
+    if (desc->high != NULL) {
+        rc = usb_desc_cap_usb2_ext(desc, dest + wTotalLength,
+                                   len - wTotalLength);
+        if (rc < 0) {
+            return rc;
+        }
+        wTotalLength += rc;
+        bNumDeviceCaps++;
+    }
+
+    if (desc->super != NULL) {
+        rc = usb_desc_cap_super(desc, dest + wTotalLength,
+                                len - wTotalLength);
+        if (rc < 0) {
+            return rc;
+        }
+        wTotalLength += rc;
+        bNumDeviceCaps++;
+    }
+
+    d->u.bos.wTotalLength_lo = usb_lo(wTotalLength);
+    d->u.bos.wTotalLength_hi = usb_hi(wTotalLength);
+    d->u.bos.bNumDeviceCaps  = bNumDeviceCaps;
+    return wTotalLength;
+}
+
 /* ------------------------------------------------------------------ */
 
 static void usb_desc_ep_init(USBDevice *dev)
@@ -359,6 +483,9 @@
     case USB_SPEED_HIGH:
         dev->device = desc->high;
         break;
+    case USB_SPEED_SUPER:
+        dev->device = desc->super;
+        break;
     }
     usb_desc_set_config(dev, 0);
 }
@@ -376,6 +503,9 @@
     if (desc->high) {
         dev->speedmask |= USB_SPEED_MASK_HIGH;
     }
+    if (desc->super) {
+        dev->speedmask |= USB_SPEED_MASK_SUPER;
+    }
     usb_desc_setdefaults(dev);
 }
 
@@ -384,7 +514,9 @@
     const USBDesc *desc = usb_device_get_usb_desc(dev);
 
     assert(desc != NULL);
-    if (desc->high && (dev->port->speedmask & USB_SPEED_MASK_HIGH)) {
+    if (desc->super && (dev->port->speedmask & USB_SPEED_MASK_SUPER)) {
+        dev->speed = USB_SPEED_SUPER;
+    } else if (desc->high && (dev->port->speedmask & USB_SPEED_MASK_HIGH)) {
         dev->speed = USB_SPEED_HIGH;
     } else if (desc->full && (dev->port->speedmask & USB_SPEED_MASK_FULL)) {
         dev->speed = USB_SPEED_FULL;
@@ -501,7 +633,7 @@
     uint8_t buf[256];
     uint8_t type = value >> 8;
     uint8_t index = value & 0xff;
-    int ret = -1;
+    int flags, ret = -1;
 
     if (dev->speed == USB_SPEED_HIGH) {
         other_dev = usb_device_get_usb_desc(dev)->full;
@@ -509,6 +641,11 @@
         other_dev = usb_device_get_usb_desc(dev)->high;
     }
 
+    flags = 0;
+    if (dev->device->bcdUSB >= 0x0300) {
+        flags |= USB_DESC_FLAG_SUPER;
+    }
+
     switch(type) {
     case USB_DT_DEVICE:
         ret = usb_desc_device(&desc->id, dev->device, buf, sizeof(buf));
@@ -516,7 +653,8 @@
         break;
     case USB_DT_CONFIG:
         if (index < dev->device->bNumConfigurations) {
-            ret = usb_desc_config(dev->device->confs + index, buf, sizeof(buf));
+            ret = usb_desc_config(dev->device->confs + index, flags,
+                                  buf, sizeof(buf));
         }
         trace_usb_desc_config(dev->addr, index, len, ret);
         break;
@@ -524,7 +662,6 @@
         ret = usb_desc_string(dev, index, buf, sizeof(buf));
         trace_usb_desc_string(dev->addr, index, len, ret);
         break;
-
     case USB_DT_DEVICE_QUALIFIER:
         if (other_dev != NULL) {
             ret = usb_desc_device_qualifier(other_dev, buf, sizeof(buf));
@@ -533,11 +670,16 @@
         break;
     case USB_DT_OTHER_SPEED_CONFIG:
         if (other_dev != NULL && index < other_dev->bNumConfigurations) {
-            ret = usb_desc_config(other_dev->confs + index, buf, sizeof(buf));
+            ret = usb_desc_config(other_dev->confs + index, flags,
+                                  buf, sizeof(buf));
             buf[0x01] = USB_DT_OTHER_SPEED_CONFIG;
         }
         trace_usb_desc_other_speed_config(dev->addr, index, len, ret);
         break;
+    case USB_DT_BOS:
+        ret = usb_desc_bos(desc, buf, sizeof(buf));
+        trace_usb_desc_bos(dev->addr, len, ret);
+        break;
 
     case USB_DT_DEBUG:
         /* ignore silently */
diff --git a/hw/usb/desc.h b/hw/usb/desc.h
index 7cf5442..68bb570 100644
--- a/hw/usb/desc.h
+++ b/hw/usb/desc.h
@@ -63,6 +63,37 @@
             uint8_t           bRefresh;        /* only audio ep */
             uint8_t           bSynchAddress;   /* only audio ep */
         } endpoint;
+        struct {
+            uint8_t           bMaxBurst;
+            uint8_t           bmAttributes;
+            uint8_t           wBytesPerInterval_lo;
+            uint8_t           wBytesPerInterval_hi;
+        } super_endpoint;
+        struct {
+            uint8_t           wTotalLength_lo;
+            uint8_t           wTotalLength_hi;
+            uint8_t           bNumDeviceCaps;
+        } bos;
+        struct {
+            uint8_t           bDevCapabilityType;
+            union {
+                struct {
+                    uint8_t   bmAttributes_1;
+                    uint8_t   bmAttributes_2;
+                    uint8_t   bmAttributes_3;
+                    uint8_t   bmAttributes_4;
+                } usb2_ext;
+                struct {
+                    uint8_t   bmAttributes;
+                    uint8_t   wSpeedsSupported_lo;
+                    uint8_t   wSpeedsSupported_hi;
+                    uint8_t   bFunctionalitySupport;
+                    uint8_t   bU1DevExitLat;
+                    uint8_t   wU2DevExitLat_lo;
+                    uint8_t   wU2DevExitLat_hi;
+                } super;
+            } u;
+        } cap;
     } u;
 } QEMU_PACKED USBDescriptor;
 
@@ -139,6 +170,11 @@
 
     uint8_t                   is_audio; /* has bRefresh + bSynchAddress */
     uint8_t                   *extra;
+
+    /* superspeed endpoint companion */
+    uint8_t                   bMaxBurst;
+    uint8_t                   bmAttributes_super;
+    uint16_t                  wBytesPerInterval;
 };
 
 struct USBDescOther {
@@ -152,19 +188,25 @@
     USBDescID                 id;
     const USBDescDevice       *full;
     const USBDescDevice       *high;
+    const USBDescDevice       *super;
     const char* const         *str;
 };
 
+#define USB_DESC_FLAG_SUPER (1 << 1)
+
 /* generate usb packages from structs */
 int usb_desc_device(const USBDescID *id, const USBDescDevice *dev,
                     uint8_t *dest, size_t len);
 int usb_desc_device_qualifier(const USBDescDevice *dev,
                               uint8_t *dest, size_t len);
-int usb_desc_config(const USBDescConfig *conf, uint8_t *dest, size_t len);
-int usb_desc_iface_group(const USBDescIfaceAssoc *iad, uint8_t *dest,
-                         size_t len);
-int usb_desc_iface(const USBDescIface *iface, uint8_t *dest, size_t len);
-int usb_desc_endpoint(const USBDescEndpoint *ep, uint8_t *dest, size_t len);
+int usb_desc_config(const USBDescConfig *conf, int flags,
+                    uint8_t *dest, size_t len);
+int usb_desc_iface_group(const USBDescIfaceAssoc *iad, int flags,
+                         uint8_t *dest, size_t len);
+int usb_desc_iface(const USBDescIface *iface, int flags,
+                   uint8_t *dest, size_t len);
+int usb_desc_endpoint(const USBDescEndpoint *ep, int flags,
+                      uint8_t *dest, size_t len);
 int usb_desc_other(const USBDescOther *desc, uint8_t *dest, size_t len);
 
 /* control message emulation helpers */
diff --git a/hw/usb/dev-audio.c b/hw/usb/dev-audio.c
index 79b75fb..2594c78 100644
--- a/hw/usb/dev-audio.c
+++ b/hw/usb/dev-audio.c
@@ -217,7 +217,7 @@
 };
 
 static const USBDescDevice desc_device = {
-    .bcdUSB                        = 0x0200,
+    .bcdUSB                        = 0x0100,
     .bMaxPacketSize0               = 64,
     .bNumConfigurations            = 1,
     .confs = (USBDescConfig[]) {
diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c
index 8aa6552..69b6e48 100644
--- a/hw/usb/dev-serial.c
+++ b/hw/usb/dev-serial.c
@@ -113,7 +113,7 @@
 static const USBDescStrings desc_strings = {
     [STR_MANUFACTURER]    = "QEMU",
     [STR_PRODUCT_SERIAL]  = "QEMU USB SERIAL",
-    [STR_PRODUCT_BRAILLE] = "QEMU USB BRAILLE",
+    [STR_PRODUCT_BRAILLE] = "QEMU USB BAUM BRAILLE",
     [STR_SERIALNUMBER]    = "1",
 };
 
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
index ff48d91..e732191 100644
--- a/hw/usb/dev-storage.c
+++ b/hw/usb/dev-storage.c
@@ -78,6 +78,7 @@
     STR_SERIALNUMBER,
     STR_CONFIG_FULL,
     STR_CONFIG_HIGH,
+    STR_CONFIG_SUPER,
 };
 
 static const USBDescStrings desc_strings = {
@@ -86,6 +87,7 @@
     [STR_SERIALNUMBER] = "1",
     [STR_CONFIG_FULL]  = "Full speed config (usb 1.1)",
     [STR_CONFIG_HIGH]  = "High speed config (usb 2.0)",
+    [STR_CONFIG_SUPER] = "Super speed config (usb 3.0)",
 };
 
 static const USBDescIface desc_iface_full = {
@@ -158,6 +160,43 @@
     },
 };
 
+static const USBDescIface desc_iface_super = {
+    .bInterfaceNumber              = 0,
+    .bNumEndpoints                 = 2,
+    .bInterfaceClass               = USB_CLASS_MASS_STORAGE,
+    .bInterfaceSubClass            = 0x06, /* SCSI */
+    .bInterfaceProtocol            = 0x50, /* Bulk */
+    .eps = (USBDescEndpoint[]) {
+        {
+            .bEndpointAddress      = USB_DIR_IN | 0x01,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 1024,
+            .bMaxBurst             = 15,
+        },{
+            .bEndpointAddress      = USB_DIR_OUT | 0x02,
+            .bmAttributes          = USB_ENDPOINT_XFER_BULK,
+            .wMaxPacketSize        = 1024,
+            .bMaxBurst             = 15,
+        },
+    }
+};
+
+static const USBDescDevice desc_device_super = {
+    .bcdUSB                        = 0x0300,
+    .bMaxPacketSize0               = 9,
+    .bNumConfigurations            = 1,
+    .confs = (USBDescConfig[]) {
+        {
+            .bNumInterfaces        = 1,
+            .bConfigurationValue   = 1,
+            .iConfiguration        = STR_CONFIG_SUPER,
+            .bmAttributes          = 0xc0,
+            .nif = 1,
+            .ifs = &desc_iface_super,
+        },
+    },
+};
+
 static const USBDesc desc = {
     .id = {
         .idVendor          = 0x46f4, /* CRC16() of "QEMU" */
@@ -167,9 +206,10 @@
         .iProduct          = STR_PRODUCT,
         .iSerialNumber     = STR_SERIALNUMBER,
     },
-    .full = &desc_device_full,
-    .high = &desc_device_high,
-    .str  = desc_strings,
+    .full  = &desc_device_full,
+    .high  = &desc_device_high,
+    .super = &desc_device_super,
+    .str   = desc_strings,
 };
 
 static void usb_msd_copy_data(MSDState *s, USBPacket *p)
diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c
index 017342b..2f3e9c0 100644
--- a/hw/usb/hcd-ehci.c
+++ b/hw/usb/hcd-ehci.c
@@ -2,6 +2,11 @@
  * QEMU USB EHCI Emulation
  *
  * Copyright(c) 2008  Emutex Ltd. (address@hidden)
+ * Copyright(c) 2011-2012 Red Hat, Inc.
+ *
+ * Red Hat Authors:
+ * Gerd Hoffmann <kraxel@redhat.com>
+ * Hans de Goede <hdegoede@redhat.com>
  *
  * EHCI project was started by Mark Burkley, with contributions by
  * Niels de Vos.  David S. Ahern continued working on it.  Kevin Wolf,
@@ -340,6 +345,7 @@
 
 enum async_state {
     EHCI_ASYNC_NONE = 0,
+    EHCI_ASYNC_INITIALIZED,
     EHCI_ASYNC_INFLIGHT,
     EHCI_ASYNC_FINISHED,
 };
@@ -365,7 +371,6 @@
     uint32_t seen;
     uint64_t ts;
     int async;
-    int revalidate;
 
     /* cached data from guest - needs to be flushed
      * when guest removes an entry (doorbell, handshake sequence)
@@ -485,6 +490,9 @@
     [CONFIGFLAG]        = "CONFIGFLAG",
 };
 
+static int ehci_state_executing(EHCIQueue *q);
+static int ehci_state_writeback(EHCIQueue *q);
+
 static const char *nr2str(const char **n, size_t len, uint32_t nr)
 {
     if (nr < len && n[nr] != NULL) {
@@ -709,6 +717,12 @@
                         (bool)(sitd->results & SITD_RESULTS_ACTIVE));
 }
 
+static void ehci_trace_guest_bug(EHCIState *s, const char *message)
+{
+    trace_usb_ehci_guest_bug(message);
+    fprintf(stderr, "ehci warning: %s\n", message);
+}
+
 static inline bool ehci_enabled(EHCIState *s)
 {
     return s->usbcmd & USBCMD_RUNSTOP;
@@ -740,9 +754,25 @@
 
 static void ehci_free_packet(EHCIPacket *p)
 {
+    if (p->async == EHCI_ASYNC_FINISHED) {
+        int state = ehci_get_state(p->queue->ehci, p->queue->async);
+        /* This is a normal, but rare condition (cancel racing completion) */
+        fprintf(stderr, "EHCI: Warning packet completed but not processed\n");
+        ehci_state_executing(p->queue);
+        ehci_state_writeback(p->queue);
+        ehci_set_state(p->queue->ehci, p->queue->async, state);
+        /* state_writeback recurses into us with async == EHCI_ASYNC_NONE!! */
+        return;
+    }
     trace_usb_ehci_packet_action(p->queue, p, "free");
+    if (p->async == EHCI_ASYNC_INITIALIZED) {
+        usb_packet_unmap(&p->packet, &p->sgl);
+        qemu_sglist_destroy(&p->sgl);
+    }
     if (p->async == EHCI_ASYNC_INFLIGHT) {
         usb_cancel_packet(&p->packet);
+        usb_packet_unmap(&p->packet, &p->sgl);
+        qemu_sglist_destroy(&p->sgl);
     }
     QTAILQ_REMOVE(&p->queue->packets, p, next);
     usb_packet_cleanup(&p->packet);
@@ -766,27 +796,45 @@
     return q;
 }
 
-static void ehci_cancel_queue(EHCIQueue *q)
+static int ehci_cancel_queue(EHCIQueue *q)
 {
     EHCIPacket *p;
+    int packets = 0;
 
     p = QTAILQ_FIRST(&q->packets);
     if (p == NULL) {
-        return;
+        return 0;
     }
 
     trace_usb_ehci_queue_action(q, "cancel");
     do {
         ehci_free_packet(p);
+        packets++;
     } while ((p = QTAILQ_FIRST(&q->packets)) != NULL);
+    return packets;
 }
 
-static void ehci_free_queue(EHCIQueue *q)
+static int ehci_reset_queue(EHCIQueue *q)
+{
+    int packets;
+
+    trace_usb_ehci_queue_action(q, "reset");
+    packets = ehci_cancel_queue(q);
+    q->dev = NULL;
+    q->qtdaddr = 0;
+    return packets;
+}
+
+static void ehci_free_queue(EHCIQueue *q, const char *warn)
 {
     EHCIQueueHead *head = q->async ? &q->ehci->aqueues : &q->ehci->pqueues;
+    int cancelled;
 
     trace_usb_ehci_queue_action(q, "free");
-    ehci_cancel_queue(q);
+    cancelled = ehci_cancel_queue(q);
+    if (warn && cancelled > 0) {
+        ehci_trace_guest_bug(q->ehci, warn);
+    }
     QTAILQ_REMOVE(head, q, next);
     g_free(q);
 }
@@ -805,20 +853,10 @@
     return NULL;
 }
 
-static void ehci_queues_tag_unused_async(EHCIState *ehci)
-{
-    EHCIQueue *q;
-
-    QTAILQ_FOREACH(q, &ehci->aqueues, next) {
-        if (!q->seen) {
-            q->revalidate = 1;
-        }
-    }
-}
-
-static void ehci_queues_rip_unused(EHCIState *ehci, int async)
+static void ehci_queues_rip_unused(EHCIState *ehci, int async, int flush)
 {
     EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues;
+    const char *warn = (async && !flush) ? "guest unlinked busy QH" : NULL;
     uint64_t maxage = FRAME_TIMER_NS * ehci->maxframes * 4;
     EHCIQueue *q, *tmp;
 
@@ -828,10 +866,10 @@
             q->ts = ehci->last_run_ns;
             continue;
         }
-        if (ehci->last_run_ns < q->ts + maxage) {
+        if (!flush && ehci->last_run_ns < q->ts + maxage) {
             continue;
         }
-        ehci_free_queue(q);
+        ehci_free_queue(q, warn);
     }
 }
 
@@ -844,17 +882,18 @@
         if (q->dev != dev) {
             continue;
         }
-        ehci_free_queue(q);
+        ehci_free_queue(q, NULL);
     }
 }
 
 static void ehci_queues_rip_all(EHCIState *ehci, int async)
 {
     EHCIQueueHead *head = async ? &ehci->aqueues : &ehci->pqueues;
+    const char *warn = async ? "guest stopped busy async schedule" : NULL;
     EHCIQueue *q, *tmp;
 
     QTAILQ_FOREACH_SAFE(q, head, next, tmp) {
-        ehci_free_queue(q);
+        ehci_free_queue(q, warn);
     }
 }
 
@@ -1213,6 +1252,7 @@
              */
             s->async_stepdown = 0;
             qemu_bh_schedule(s->async_bh);
+            trace_usb_ehci_doorbell_ring();
         }
 
         if (((USBCMD_RUNSTOP | USBCMD_PSE | USBCMD_ASE) & val) !=
@@ -1450,8 +1490,8 @@
 
     assert(p != NULL);
     assert(p->qtdaddr == q->qtdaddr);
-    assert(p->async != EHCI_ASYNC_INFLIGHT);
-    p->async = EHCI_ASYNC_NONE;
+    assert(p->async == EHCI_ASYNC_INITIALIZED ||
+           p->async == EHCI_ASYNC_FINISHED);
 
     DPRINTF("execute_complete: qhaddr 0x%x, next %x, qtdaddr 0x%x, status %d\n",
             q->qhaddr, q->qh.next, q->qtdaddr, q->usb_status);
@@ -1481,10 +1521,6 @@
             assert(0);
             break;
         }
-    } else if ((p->usb_status > p->tbytes) && (p->pid == USB_TOKEN_IN)) {
-        p->usb_status = USB_RET_BABBLE;
-        q->qh.token |= (QTD_TOKEN_HALT | QTD_TOKEN_BABBLE);
-        ehci_raise_irq(q->ehci, USBSTS_ERRINT);
     } else {
         // TODO check 4.12 for splits
 
@@ -1500,6 +1536,7 @@
     ehci_finish_transfer(q, p->usb_status);
     usb_packet_unmap(&p->packet, &p->sgl);
     qemu_sglist_destroy(&p->sgl);
+    p->async = EHCI_ASYNC_NONE;
 
     q->qh.token ^= QTD_TOKEN_DTOGGLE;
     q->qh.token &= ~QTD_TOKEN_ACTIVE;
@@ -1517,6 +1554,9 @@
     int ret;
     int endp;
 
+    assert(p->async == EHCI_ASYNC_NONE ||
+           p->async == EHCI_ASYNC_INITIALIZED);
+
     if (!(p->qtd.token & QTD_TOKEN_ACTIVE)) {
         fprintf(stderr, "Attempting to execute inactive qtd\n");
         return USB_RET_PROCERR;
@@ -1524,7 +1564,8 @@
 
     p->tbytes = (p->qtd.token & QTD_TOKEN_TBYTES_MASK) >> QTD_TOKEN_TBYTES_SH;
     if (p->tbytes > BUFF_SIZE) {
-        fprintf(stderr, "Request for more bytes than allowed\n");
+        ehci_trace_guest_bug(p->queue->ehci,
+                             "guest requested more bytes than allowed");
         return USB_RET_PROCERR;
     }
 
@@ -1544,15 +1585,18 @@
         break;
     }
 
-    if (ehci_init_transfer(p) != 0) {
-        return USB_RET_PROCERR;
-    }
-
     endp = get_field(p->queue->qh.epchar, QH_EPCHAR_EP);
     ep = usb_ep_get(p->queue->dev, p->pid, endp);
 
-    usb_packet_setup(&p->packet, p->pid, ep, p->qtdaddr);
-    usb_packet_map(&p->packet, &p->sgl);
+    if (p->async == EHCI_ASYNC_NONE) {
+        if (ehci_init_transfer(p) != 0) {
+            return USB_RET_PROCERR;
+        }
+
+        usb_packet_setup(&p->packet, p->pid, ep, p->qtdaddr);
+        usb_packet_map(&p->packet, &p->sgl);
+        p->async = EHCI_ASYNC_INITIALIZED;
+    }
 
     trace_usb_ehci_packet_action(p->queue, p, action);
     ret = usb_handle_packet(p->queue->dev, &p->packet);
@@ -1688,7 +1732,7 @@
         ehci_set_usbsts(ehci, USBSTS_REC);
     }
 
-    ehci_queues_rip_unused(ehci, async);
+    ehci_queues_rip_unused(ehci, async, 0);
 
     /*  Find the head of the list (4.9.1.1) */
     for(i = 0; i < MAX_QH; i++) {
@@ -1771,7 +1815,7 @@
 static EHCIQueue *ehci_state_fetchqh(EHCIState *ehci, int async)
 {
     EHCIPacket *p;
-    uint32_t entry, devaddr;
+    uint32_t entry, devaddr, endp;
     EHCIQueue *q;
     EHCIqh qh;
 
@@ -1792,26 +1836,26 @@
 
     get_dwords(ehci, NLPTR_GET(q->qhaddr),
                (uint32_t *) &qh, sizeof(EHCIqh) >> 2);
-    if (q->revalidate && (q->qh.epchar      != qh.epchar ||
-                          q->qh.epcap       != qh.epcap  ||
-                          q->qh.current_qtd != qh.current_qtd)) {
-        ehci_free_queue(q);
-        q = ehci_alloc_queue(ehci, entry, async);
-        q->seen++;
+    ehci_trace_qh(q, NLPTR_GET(q->qhaddr), &qh);
+
+    /*
+     * The overlay area of the qh should never be changed by the guest,
+     * except when idle, in which case the reset is a nop.
+     */
+    devaddr = get_field(qh.epchar, QH_EPCHAR_DEVADDR);
+    endp    = get_field(qh.epchar, QH_EPCHAR_EP);
+    if ((devaddr != get_field(q->qh.epchar, QH_EPCHAR_DEVADDR)) ||
+        (endp    != get_field(q->qh.epchar, QH_EPCHAR_EP)) ||
+        (memcmp(&qh.current_qtd, &q->qh.current_qtd,
+                                 9 * sizeof(uint32_t)) != 0) ||
+        (q->dev != NULL && q->dev->addr != devaddr)) {
+        if (ehci_reset_queue(q) > 0) {
+            ehci_trace_guest_bug(ehci, "guest updated active QH");
+        }
         p = NULL;
     }
     q->qh = qh;
-    q->revalidate = 0;
-    ehci_trace_qh(q, NLPTR_GET(q->qhaddr), &q->qh);
 
-    devaddr = get_field(q->qh.epchar, QH_EPCHAR_DEVADDR);
-    if (q->dev != NULL && q->dev->addr != devaddr) {
-        if (!QTAILQ_EMPTY(&q->packets)) {
-            /* should not happen (guest bug) */
-            ehci_cancel_queue(q);
-        }
-        q->dev = NULL;
-    }
     if (q->dev == NULL) {
         q->dev = ehci_find_device(q->ehci, devaddr);
     }
@@ -1969,8 +2013,8 @@
             (!NLPTR_TBIT(p->qtd.next) && (p->qtd.next != qtd.next)) ||
             (!NLPTR_TBIT(p->qtd.altnext) && (p->qtd.altnext != qtd.altnext)) ||
             p->qtd.bufptr[0] != qtd.bufptr[0]) {
-            /* guest bug: guest updated active QH or qTD underneath us */
             ehci_cancel_queue(q);
+            ehci_trace_guest_bug(q->ehci, "guest updated active QH or qTD");
             p = NULL;
         } else {
             p->qtd = qtd;
@@ -1989,15 +2033,22 @@
     } else if (p != NULL) {
         switch (p->async) {
         case EHCI_ASYNC_NONE:
+            /* Should never happen packet should at least be initialized */
+            assert(0);
+            break;
+        case EHCI_ASYNC_INITIALIZED:
             /* Previously nacked packet (likely interrupt ep) */
-           ehci_set_state(q->ehci, q->async, EST_EXECUTE);
-           break;
+            ehci_set_state(q->ehci, q->async, EST_EXECUTE);
+            break;
         case EHCI_ASYNC_INFLIGHT:
-            /* Unfinyshed async handled packet, go horizontal */
+            /* Unfinished async handled packet, go horizontal */
             ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
             break;
         case EHCI_ASYNC_FINISHED:
-            /* Should never happen, as this case is caught by fetchqh */
+            /*
+             * We get here when advqueue moves to a packet which is already
+             * finished, which can happen with packets queued up by fill_queue
+             */
             ehci_set_state(q->ehci, q->async, EST_EXECUTING);
             break;
         }
@@ -2028,7 +2079,7 @@
     return again;
 }
 
-static void ehci_fill_queue(EHCIPacket *p)
+static int ehci_fill_queue(EHCIPacket *p)
 {
     EHCIQueue *q = p->queue;
     EHCIqtd qtd = p->qtd;
@@ -2052,9 +2103,13 @@
         p->qtdaddr = qtdaddr;
         p->qtd = qtd;
         p->usb_status = ehci_execute(p, "queue");
+        if (p->usb_status == USB_RET_PROCERR) {
+            break;
+        }
         assert(p->usb_status == USB_RET_ASYNC);
         p->async = EHCI_ASYNC_INFLIGHT;
     }
+    return p->usb_status;
 }
 
 static int ehci_state_execute(EHCIQueue *q)
@@ -2096,8 +2151,7 @@
         trace_usb_ehci_packet_action(p->queue, p, "async");
         p->async = EHCI_ASYNC_INFLIGHT;
         ehci_set_state(q->ehci, q->async, EST_HORIZONTALQH);
-        again = 1;
-        ehci_fill_queue(p);
+        again = (ehci_fill_queue(p) == USB_RET_PROCERR) ? -1 : 1;
         goto out;
     }
 
@@ -2310,8 +2364,8 @@
          */
         if (ehci->usbcmd & USBCMD_IAAD) {
             /* Remove all unseen qhs from the async qhs queue */
-            ehci_queues_tag_unused_async(ehci);
-            DPRINTF("ASYNC: doorbell request acknowledged\n");
+            ehci_queues_rip_unused(ehci, async, 1);
+            trace_usb_ehci_doorbell_ack();
             ehci->usbcmd &= ~USBCMD_IAAD;
             ehci_raise_irq(ehci, USBSTS_IAA);
         }
@@ -2363,7 +2417,7 @@
         ehci_set_fetch_addr(ehci, async,entry);
         ehci_set_state(ehci, async, EST_FETCHENTRY);
         ehci_advance_state(ehci, async);
-        ehci_queues_rip_unused(ehci, async);
+        ehci_queues_rip_unused(ehci, async, 0);
         break;
 
     default:
diff --git a/hw/usb/hcd-uhci.c b/hw/usb/hcd-uhci.c
index b0db921..c7c8786 100644
--- a/hw/usb/hcd-uhci.c
+++ b/hw/usb/hcd-uhci.c
@@ -729,11 +729,6 @@
         *int_mask |= 0x01;
 
     if (pid == USB_TOKEN_IN) {
-        if (len > max_len) {
-            ret = USB_RET_BABBLE;
-            goto out;
-        }
-
         if ((td->ctrl & TD_CTRL_SPD) && len < max_len) {
             *int_mask |= 0x02;
             /* short packet: do not update QH */
diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c
index 3eb27fa..e0ca690 100644
--- a/hw/usb/hcd-xhci.c
+++ b/hw/usb/hcd-xhci.c
@@ -23,6 +23,7 @@
 #include "hw/usb.h"
 #include "hw/pci.h"
 #include "hw/msi.h"
+#include "hw/msix.h"
 #include "trace.h"
 
 //#define DEBUG_XHCI
@@ -36,17 +37,14 @@
 #define FIXME() do { fprintf(stderr, "FIXME %s:%d\n", \
                              __func__, __LINE__); abort(); } while (0)
 
-#define MAXSLOTS 8
-#define MAXINTRS 1
+#define MAXPORTS_2 8
+#define MAXPORTS_3 8
 
-#define USB2_PORTS 4
-#define USB3_PORTS 4
-
-#define MAXPORTS (USB2_PORTS+USB3_PORTS)
+#define MAXPORTS (MAXPORTS_2+MAXPORTS_3)
+#define MAXSLOTS MAXPORTS
+#define MAXINTRS MAXPORTS
 
 #define TD_QUEUE 24
-#define BG_XFERS 8
-#define BG_PKTS 8
 
 /* Very pessimistic, let's hope it's enough for all cases */
 #define EV_QUEUE (((3*TD_QUEUE)+16)*MAXSLOTS)
@@ -55,24 +53,28 @@
 #define ER_FULL_HACK
 
 #define LEN_CAP         0x40
-#define OFF_OPER        LEN_CAP
 #define LEN_OPER        (0x400 + 0x10 * MAXPORTS)
-#define OFF_RUNTIME     ((OFF_OPER + LEN_OPER + 0x20) & ~0x1f)
-#define LEN_RUNTIME     (0x20 + MAXINTRS * 0x20)
-#define OFF_DOORBELL    (OFF_RUNTIME + LEN_RUNTIME)
+#define LEN_RUNTIME     ((MAXINTRS + 1) * 0x20)
 #define LEN_DOORBELL    ((MAXSLOTS + 1) * 0x20)
 
+#define OFF_OPER        LEN_CAP
+#define OFF_RUNTIME     0x1000
+#define OFF_DOORBELL    0x2000
+#define OFF_MSIX_TABLE  0x3000
+#define OFF_MSIX_PBA    0x3800
 /* must be power of 2 */
-#define LEN_REGS        0x2000
+#define LEN_REGS        0x4000
 
+#if (OFF_OPER + LEN_OPER) > OFF_RUNTIME
+#error Increase OFF_RUNTIME
+#endif
+#if (OFF_RUNTIME + LEN_RUNTIME) > OFF_DOORBELL
+#error Increase OFF_DOORBELL
+#endif
 #if (OFF_DOORBELL + LEN_DOORBELL) > LEN_REGS
 # error Increase LEN_REGS
 #endif
 
-#if MAXINTRS > 1
-# error TODO: only one interrupter supported
-#endif
-
 /* bit definitions */
 #define USBCMD_RS       (1<<0)
 #define USBCMD_HCRST    (1<<1)
@@ -258,6 +260,10 @@
 
 #define TRB_LK_TC           (1<<1)
 
+#define TRB_INTR_SHIFT          22
+#define TRB_INTR_MASK       0x3ff
+#define TRB_INTR(t)         (((t).status >> TRB_INTR_SHIFT) & TRB_INTR_MASK)
+
 #define EP_TYPE_MASK        0x7
 #define EP_TYPE_SHIFT           3
 
@@ -297,8 +303,10 @@
 } XHCIRing;
 
 typedef struct XHCIPort {
-    USBPort port;
     uint32_t portsc;
+    uint32_t portnr;
+    USBPort  *uport;
+    uint32_t speedmask;
 } XHCIPort;
 
 struct XHCIState;
@@ -307,48 +315,49 @@
 typedef struct XHCITransfer {
     XHCIState *xhci;
     USBPacket packet;
+    QEMUSGList sgl;
     bool running_async;
     bool running_retry;
     bool cancelled;
     bool complete;
-    bool backgrounded;
     unsigned int iso_pkts;
     unsigned int slotid;
     unsigned int epid;
     bool in_xfer;
     bool iso_xfer;
-    bool bg_xfer;
 
     unsigned int trb_count;
     unsigned int trb_alloced;
     XHCITRB *trbs;
 
-    unsigned int data_length;
-    unsigned int data_alloced;
-    uint8_t *data;
-
     TRBCCode status;
 
     unsigned int pkts;
     unsigned int pktsize;
     unsigned int cur_pkt;
+
+    uint64_t mfindex_kick;
 } XHCITransfer;
 
 typedef struct XHCIEPContext {
+    XHCIState *xhci;
+    unsigned int slotid;
+    unsigned int epid;
+
     XHCIRing ring;
     unsigned int next_xfer;
     unsigned int comp_xfer;
     XHCITransfer transfers[TD_QUEUE];
     XHCITransfer *retry;
-    bool bg_running;
-    bool bg_updating;
-    unsigned int next_bg;
-    XHCITransfer bg_transfers[BG_XFERS];
     EPType type;
     dma_addr_t pctx;
     unsigned int max_psize;
-    bool has_bg;
     uint32_t state;
+
+    /* iso xfer scheduling */
+    unsigned int interval;
+    int64_t mfindex_last;
+    QEMUTimer *kick_timer;
 } XHCIEPContext;
 
 typedef struct XHCISlot {
@@ -369,15 +378,44 @@
     uint8_t epid;
 } XHCIEvent;
 
+typedef struct XHCIInterrupter {
+    uint32_t iman;
+    uint32_t imod;
+    uint32_t erstsz;
+    uint32_t erstba_low;
+    uint32_t erstba_high;
+    uint32_t erdp_low;
+    uint32_t erdp_high;
+
+    bool msix_used, er_pcs, er_full;
+
+    dma_addr_t er_start;
+    uint32_t er_size;
+    unsigned int er_ep_idx;
+
+    XHCIEvent ev_buffer[EV_QUEUE];
+    unsigned int ev_buffer_put;
+    unsigned int ev_buffer_get;
+
+} XHCIInterrupter;
+
 struct XHCIState {
     PCIDevice pci_dev;
     USBBus bus;
     qemu_irq irq;
     MemoryRegion mem;
+    MemoryRegion mem_cap;
+    MemoryRegion mem_oper;
+    MemoryRegion mem_runtime;
+    MemoryRegion mem_doorbell;
     const char *name;
-    uint32_t msi;
     unsigned int devaddr;
 
+    /* properties */
+    uint32_t numports_2;
+    uint32_t numports_3;
+    uint32_t flags;
+
     /* Operational Registers */
     uint32_t usbcmd;
     uint32_t usbsts;
@@ -388,29 +426,15 @@
     uint32_t dcbaap_high;
     uint32_t config;
 
+    USBPort  uports[MAX(MAXPORTS_2, MAXPORTS_3)];
     XHCIPort ports[MAXPORTS];
     XHCISlot slots[MAXSLOTS];
+    uint32_t numports;
 
     /* Runtime Registers */
-    uint32_t mfindex;
-    /* note: we only support one interrupter */
-    uint32_t iman;
-    uint32_t imod;
-    uint32_t erstsz;
-    uint32_t erstba_low;
-    uint32_t erstba_high;
-    uint32_t erdp_low;
-    uint32_t erdp_high;
-
-    dma_addr_t er_start;
-    uint32_t er_size;
-    bool er_pcs;
-    unsigned int er_ep_idx;
-    bool er_full;
-
-    XHCIEvent ev_buffer[EV_QUEUE];
-    unsigned int ev_buffer_put;
-    unsigned int ev_buffer_get;
+    int64_t mfindex_start;
+    QEMUTimer *mfwrap_timer;
+    XHCIInterrupter intr[MAXINTRS];
 
     XHCIRing cmd_ring;
 };
@@ -422,6 +446,16 @@
     uint32_t rsvd;
 } XHCIEvRingSeg;
 
+enum xhci_flags {
+    XHCI_FLAG_USE_MSI = 1,
+    XHCI_FLAG_USE_MSI_X,
+};
+
+static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
+                         unsigned int epid);
+static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v);
+static void xhci_write_event(XHCIState *xhci, XHCIEvent *event, int v);
+
 static const char *TRBType_names[] = {
     [TRB_RESERVED]                     = "TRB_RESERVED",
     [TR_NORMAL]                        = "TR_NORMAL",
@@ -460,6 +494,45 @@
     [CR_VENDOR_NEC_CHALLENGE_RESPONSE] = "CR_VENDOR_NEC_CHALLENGE_RESPONSE",
 };
 
+static const char *TRBCCode_names[] = {
+    [CC_INVALID]                       = "CC_INVALID",
+    [CC_SUCCESS]                       = "CC_SUCCESS",
+    [CC_DATA_BUFFER_ERROR]             = "CC_DATA_BUFFER_ERROR",
+    [CC_BABBLE_DETECTED]               = "CC_BABBLE_DETECTED",
+    [CC_USB_TRANSACTION_ERROR]         = "CC_USB_TRANSACTION_ERROR",
+    [CC_TRB_ERROR]                     = "CC_TRB_ERROR",
+    [CC_STALL_ERROR]                   = "CC_STALL_ERROR",
+    [CC_RESOURCE_ERROR]                = "CC_RESOURCE_ERROR",
+    [CC_BANDWIDTH_ERROR]               = "CC_BANDWIDTH_ERROR",
+    [CC_NO_SLOTS_ERROR]                = "CC_NO_SLOTS_ERROR",
+    [CC_INVALID_STREAM_TYPE_ERROR]     = "CC_INVALID_STREAM_TYPE_ERROR",
+    [CC_SLOT_NOT_ENABLED_ERROR]        = "CC_SLOT_NOT_ENABLED_ERROR",
+    [CC_EP_NOT_ENABLED_ERROR]          = "CC_EP_NOT_ENABLED_ERROR",
+    [CC_SHORT_PACKET]                  = "CC_SHORT_PACKET",
+    [CC_RING_UNDERRUN]                 = "CC_RING_UNDERRUN",
+    [CC_RING_OVERRUN]                  = "CC_RING_OVERRUN",
+    [CC_VF_ER_FULL]                    = "CC_VF_ER_FULL",
+    [CC_PARAMETER_ERROR]               = "CC_PARAMETER_ERROR",
+    [CC_BANDWIDTH_OVERRUN]             = "CC_BANDWIDTH_OVERRUN",
+    [CC_CONTEXT_STATE_ERROR]           = "CC_CONTEXT_STATE_ERROR",
+    [CC_NO_PING_RESPONSE_ERROR]        = "CC_NO_PING_RESPONSE_ERROR",
+    [CC_EVENT_RING_FULL_ERROR]         = "CC_EVENT_RING_FULL_ERROR",
+    [CC_INCOMPATIBLE_DEVICE_ERROR]     = "CC_INCOMPATIBLE_DEVICE_ERROR",
+    [CC_MISSED_SERVICE_ERROR]          = "CC_MISSED_SERVICE_ERROR",
+    [CC_COMMAND_RING_STOPPED]          = "CC_COMMAND_RING_STOPPED",
+    [CC_COMMAND_ABORTED]               = "CC_COMMAND_ABORTED",
+    [CC_STOPPED]                       = "CC_STOPPED",
+    [CC_STOPPED_LENGTH_INVALID]        = "CC_STOPPED_LENGTH_INVALID",
+    [CC_MAX_EXIT_LATENCY_TOO_LARGE_ERROR]
+    = "CC_MAX_EXIT_LATENCY_TOO_LARGE_ERROR",
+    [CC_ISOCH_BUFFER_OVERRUN]          = "CC_ISOCH_BUFFER_OVERRUN",
+    [CC_EVENT_LOST_ERROR]              = "CC_EVENT_LOST_ERROR",
+    [CC_UNDEFINED_ERROR]               = "CC_UNDEFINED_ERROR",
+    [CC_INVALID_STREAM_ID_ERROR]       = "CC_INVALID_STREAM_ID_ERROR",
+    [CC_SECONDARY_BANDWIDTH_ERROR]     = "CC_SECONDARY_BANDWIDTH_ERROR",
+    [CC_SPLIT_TRANSACTION_ERROR]       = "CC_SPLIT_TRANSACTION_ERROR",
+};
+
 static const char *lookup_name(uint32_t index, const char **list, uint32_t llen)
 {
     if (index >= llen || list[index] == NULL) {
@@ -474,8 +547,42 @@
                        ARRAY_SIZE(TRBType_names));
 }
 
-static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid,
-                         unsigned int epid);
+static const char *event_name(XHCIEvent *event)
+{
+    return lookup_name(event->ccode, TRBCCode_names,
+                       ARRAY_SIZE(TRBCCode_names));
+}
+
+static uint64_t xhci_mfindex_get(XHCIState *xhci)
+{
+    int64_t now = qemu_get_clock_ns(vm_clock);
+    return (now - xhci->mfindex_start) / 125000;
+}
+
+static void xhci_mfwrap_update(XHCIState *xhci)
+{
+    const uint32_t bits = USBCMD_RS | USBCMD_EWE;
+    uint32_t mfindex, left;
+    int64_t now;
+
+    if ((xhci->usbcmd & bits) == bits) {
+        now = qemu_get_clock_ns(vm_clock);
+        mfindex = ((now - xhci->mfindex_start) / 125000) & 0x3fff;
+        left = 0x4000 - mfindex;
+        qemu_mod_timer(xhci->mfwrap_timer, now + left * 125000);
+    } else {
+        qemu_del_timer(xhci->mfwrap_timer);
+    }
+}
+
+static void xhci_mfwrap_timer(void *opaque)
+{
+    XHCIState *xhci = opaque;
+    XHCIEvent wrap = { ER_MFINDEX_WRAP, CC_SUCCESS };
+
+    xhci_event(xhci, &wrap, 0);
+    xhci_mfwrap_update(xhci);
+}
 
 static inline dma_addr_t xhci_addr64(uint32_t low, uint32_t high)
 {
@@ -495,29 +602,106 @@
     }
 }
 
-static void xhci_irq_update(XHCIState *xhci)
+static XHCIPort *xhci_lookup_port(XHCIState *xhci, struct USBPort *uport)
+{
+    int index;
+
+    if (!uport->dev) {
+        return NULL;
+    }
+    switch (uport->dev->speed) {
+    case USB_SPEED_LOW:
+    case USB_SPEED_FULL:
+    case USB_SPEED_HIGH:
+        index = uport->index;
+        break;
+    case USB_SPEED_SUPER:
+        index = uport->index + xhci->numports_2;
+        break;
+    default:
+        return NULL;
+    }
+    return &xhci->ports[index];
+}
+
+static void xhci_intx_update(XHCIState *xhci)
 {
     int level = 0;
 
-    if (xhci->iman & IMAN_IP && xhci->iman & IMAN_IE &&
+    if (msix_enabled(&xhci->pci_dev) ||
+        msi_enabled(&xhci->pci_dev)) {
+        return;
+    }
+
+    if (xhci->intr[0].iman & IMAN_IP &&
+        xhci->intr[0].iman & IMAN_IE &&
         xhci->usbcmd & USBCMD_INTE) {
         level = 1;
     }
 
-    if (xhci->msi && msi_enabled(&xhci->pci_dev)) {
-        if (level) {
-            trace_usb_xhci_irq_msi(0);
-            msi_notify(&xhci->pci_dev, 0);
-        }
+    trace_usb_xhci_irq_intx(level);
+    qemu_set_irq(xhci->irq, level);
+}
+
+static void xhci_msix_update(XHCIState *xhci, int v)
+{
+    bool enabled;
+
+    if (!msix_enabled(&xhci->pci_dev)) {
+        return;
+    }
+
+    enabled = xhci->intr[v].iman & IMAN_IE;
+    if (enabled == xhci->intr[v].msix_used) {
+        return;
+    }
+
+    if (enabled) {
+        trace_usb_xhci_irq_msix_use(v);
+        msix_vector_use(&xhci->pci_dev, v);
+        xhci->intr[v].msix_used = true;
     } else {
-        trace_usb_xhci_irq_intx(level);
-        qemu_set_irq(xhci->irq, level);
+        trace_usb_xhci_irq_msix_unuse(v);
+        msix_vector_unuse(&xhci->pci_dev, v);
+        xhci->intr[v].msix_used = false;
+    }
+}
+
+static void xhci_intr_raise(XHCIState *xhci, int v)
+{
+    xhci->intr[v].erdp_low |= ERDP_EHB;
+    xhci->intr[v].iman |= IMAN_IP;
+    xhci->usbsts |= USBSTS_EINT;
+
+    if (!(xhci->intr[v].iman & IMAN_IE)) {
+        return;
+    }
+
+    if (!(xhci->usbcmd & USBCMD_INTE)) {
+        return;
+    }
+
+    if (msix_enabled(&xhci->pci_dev)) {
+        trace_usb_xhci_irq_msix(v);
+        msix_notify(&xhci->pci_dev, v);
+        return;
+    }
+
+    if (msi_enabled(&xhci->pci_dev)) {
+        trace_usb_xhci_irq_msi(v);
+        msi_notify(&xhci->pci_dev, v);
+        return;
+    }
+
+    if (v == 0) {
+        trace_usb_xhci_irq_intx(1);
+        qemu_set_irq(xhci->irq, 1);
     }
 }
 
 static inline int xhci_running(XHCIState *xhci)
 {
-    return !(xhci->usbsts & USBSTS_HCH) && !xhci->er_full;
+    return !(xhci->usbsts & USBSTS_HCH) && !xhci->intr[0].er_full;
 }
 
 static void xhci_die(XHCIState *xhci)
@@ -526,8 +710,9 @@
     fprintf(stderr, "xhci: asserted controller error\n");
 }
 
-static void xhci_write_event(XHCIState *xhci, XHCIEvent *event)
+static void xhci_write_event(XHCIState *xhci, XHCIEvent *event, int v)
 {
+    XHCIInterrupter *intr = &xhci->intr[v];
     XHCITRB ev_trb;
     dma_addr_t addr;
 
@@ -535,26 +720,28 @@
     ev_trb.status = cpu_to_le32(event->length | (event->ccode << 24));
     ev_trb.control = (event->slotid << 24) | (event->epid << 16) |
                      event->flags | (event->type << TRB_TYPE_SHIFT);
-    if (xhci->er_pcs) {
+    if (intr->er_pcs) {
         ev_trb.control |= TRB_C;
     }
     ev_trb.control = cpu_to_le32(ev_trb.control);
 
-    trace_usb_xhci_queue_event(xhci->er_ep_idx, trb_name(&ev_trb),
-                               ev_trb.parameter, ev_trb.status, ev_trb.control);
+    trace_usb_xhci_queue_event(v, intr->er_ep_idx, trb_name(&ev_trb),
+                               event_name(event), ev_trb.parameter,
+                               ev_trb.status, ev_trb.control);
 
-    addr = xhci->er_start + TRB_SIZE*xhci->er_ep_idx;
+    addr = intr->er_start + TRB_SIZE*intr->er_ep_idx;
     pci_dma_write(&xhci->pci_dev, addr, &ev_trb, TRB_SIZE);
 
-    xhci->er_ep_idx++;
-    if (xhci->er_ep_idx >= xhci->er_size) {
-        xhci->er_ep_idx = 0;
-        xhci->er_pcs = !xhci->er_pcs;
+    intr->er_ep_idx++;
+    if (intr->er_ep_idx >= intr->er_size) {
+        intr->er_ep_idx = 0;
+        intr->er_pcs = !intr->er_pcs;
     }
 }
 
-static void xhci_events_update(XHCIState *xhci)
+static void xhci_events_update(XHCIState *xhci, int v)
 {
+    XHCIInterrupter *intr = &xhci->intr[v];
     dma_addr_t erdp;
     unsigned int dp_idx;
     bool do_irq = 0;
@@ -563,122 +750,122 @@
         return;
     }
 
-    erdp = xhci_addr64(xhci->erdp_low, xhci->erdp_high);
-    if (erdp < xhci->er_start ||
-        erdp >= (xhci->er_start + TRB_SIZE*xhci->er_size)) {
+    erdp = xhci_addr64(intr->erdp_low, intr->erdp_high);
+    if (erdp < intr->er_start ||
+        erdp >= (intr->er_start + TRB_SIZE*intr->er_size)) {
         fprintf(stderr, "xhci: ERDP out of bounds: "DMA_ADDR_FMT"\n", erdp);
-        fprintf(stderr, "xhci: ER at "DMA_ADDR_FMT" len %d\n",
-                xhci->er_start, xhci->er_size);
+        fprintf(stderr, "xhci: ER[%d] at "DMA_ADDR_FMT" len %d\n",
+                v, intr->er_start, intr->er_size);
         xhci_die(xhci);
         return;
     }
-    dp_idx = (erdp - xhci->er_start) / TRB_SIZE;
-    assert(dp_idx < xhci->er_size);
+    dp_idx = (erdp - intr->er_start) / TRB_SIZE;
+    assert(dp_idx < intr->er_size);
 
     /* NEC didn't read section 4.9.4 of the spec (v1.0 p139 top Note) and thus
      * deadlocks when the ER is full. Hack it by holding off events until
      * the driver decides to free at least half of the ring */
-    if (xhci->er_full) {
-        int er_free = dp_idx - xhci->er_ep_idx;
+    if (intr->er_full) {
+        int er_free = dp_idx - intr->er_ep_idx;
         if (er_free <= 0) {
-            er_free += xhci->er_size;
+            er_free += intr->er_size;
         }
-        if (er_free < (xhci->er_size/2)) {
+        if (er_free < (intr->er_size/2)) {
             DPRINTF("xhci_events_update(): event ring still "
                     "more than half full (hack)\n");
             return;
         }
     }
 
-    while (xhci->ev_buffer_put != xhci->ev_buffer_get) {
-        assert(xhci->er_full);
-        if (((xhci->er_ep_idx+1) % xhci->er_size) == dp_idx) {
+    while (intr->ev_buffer_put != intr->ev_buffer_get) {
+        assert(intr->er_full);
+        if (((intr->er_ep_idx+1) % intr->er_size) == dp_idx) {
             DPRINTF("xhci_events_update(): event ring full again\n");
 #ifndef ER_FULL_HACK
             XHCIEvent full = {ER_HOST_CONTROLLER, CC_EVENT_RING_FULL_ERROR};
-            xhci_write_event(xhci, &full);
+            xhci_write_event(xhci, &full, v);
 #endif
             do_irq = 1;
             break;
         }
-        XHCIEvent *event = &xhci->ev_buffer[xhci->ev_buffer_get];
-        xhci_write_event(xhci, event);
-        xhci->ev_buffer_get++;
+        XHCIEvent *event = &intr->ev_buffer[intr->ev_buffer_get];
+        xhci_write_event(xhci, event, v);
+        intr->ev_buffer_get++;
         do_irq = 1;
-        if (xhci->ev_buffer_get == EV_QUEUE) {
-            xhci->ev_buffer_get = 0;
+        if (intr->ev_buffer_get == EV_QUEUE) {
+            intr->ev_buffer_get = 0;
         }
     }
 
     if (do_irq) {
-        xhci->erdp_low |= ERDP_EHB;
-        xhci->iman |= IMAN_IP;
-        xhci->usbsts |= USBSTS_EINT;
-        xhci_irq_update(xhci);
+        xhci_intr_raise(xhci, v);
     }
 
-    if (xhci->er_full && xhci->ev_buffer_put == xhci->ev_buffer_get) {
+    if (intr->er_full && intr->ev_buffer_put == intr->ev_buffer_get) {
         DPRINTF("xhci_events_update(): event ring no longer full\n");
-        xhci->er_full = 0;
+        intr->er_full = 0;
     }
     return;
 }
 
-static void xhci_event(XHCIState *xhci, XHCIEvent *event)
+static void xhci_event(XHCIState *xhci, XHCIEvent *event, int v)
 {
+    XHCIInterrupter *intr;
     dma_addr_t erdp;
     unsigned int dp_idx;
 
-    if (xhci->er_full) {
+    if (v >= MAXINTRS) {
+        DPRINTF("intr nr out of range (%d >= %d)\n", v, MAXINTRS);
+        return;
+    }
+    intr = &xhci->intr[v];
+
+    if (intr->er_full) {
         DPRINTF("xhci_event(): ER full, queueing\n");
-        if (((xhci->ev_buffer_put+1) % EV_QUEUE) == xhci->ev_buffer_get) {
+        if (((intr->ev_buffer_put+1) % EV_QUEUE) == intr->ev_buffer_get) {
             fprintf(stderr, "xhci: event queue full, dropping event!\n");
             return;
         }
-        xhci->ev_buffer[xhci->ev_buffer_put++] = *event;
-        if (xhci->ev_buffer_put == EV_QUEUE) {
-            xhci->ev_buffer_put = 0;
+        intr->ev_buffer[intr->ev_buffer_put++] = *event;
+        if (intr->ev_buffer_put == EV_QUEUE) {
+            intr->ev_buffer_put = 0;
         }
         return;
     }
 
-    erdp = xhci_addr64(xhci->erdp_low, xhci->erdp_high);
-    if (erdp < xhci->er_start ||
-        erdp >= (xhci->er_start + TRB_SIZE*xhci->er_size)) {
+    erdp = xhci_addr64(intr->erdp_low, intr->erdp_high);
+    if (erdp < intr->er_start ||
+        erdp >= (intr->er_start + TRB_SIZE*intr->er_size)) {
         fprintf(stderr, "xhci: ERDP out of bounds: "DMA_ADDR_FMT"\n", erdp);
-        fprintf(stderr, "xhci: ER at "DMA_ADDR_FMT" len %d\n",
-                xhci->er_start, xhci->er_size);
+        fprintf(stderr, "xhci: ER[%d] at "DMA_ADDR_FMT" len %d\n",
+                v, intr->er_start, intr->er_size);
         xhci_die(xhci);
         return;
     }
 
-    dp_idx = (erdp - xhci->er_start) / TRB_SIZE;
-    assert(dp_idx < xhci->er_size);
+    dp_idx = (erdp - intr->er_start) / TRB_SIZE;
+    assert(dp_idx < intr->er_size);
 
-    if ((xhci->er_ep_idx+1) % xhci->er_size == dp_idx) {
+    if ((intr->er_ep_idx+1) % intr->er_size == dp_idx) {
         DPRINTF("xhci_event(): ER full, queueing\n");
 #ifndef ER_FULL_HACK
         XHCIEvent full = {ER_HOST_CONTROLLER, CC_EVENT_RING_FULL_ERROR};
         xhci_write_event(xhci, &full);
 #endif
-        xhci->er_full = 1;
-        if (((xhci->ev_buffer_put+1) % EV_QUEUE) == xhci->ev_buffer_get) {
+        intr->er_full = 1;
+        if (((intr->ev_buffer_put+1) % EV_QUEUE) == intr->ev_buffer_get) {
             fprintf(stderr, "xhci: event queue full, dropping event!\n");
             return;
         }
-        xhci->ev_buffer[xhci->ev_buffer_put++] = *event;
-        if (xhci->ev_buffer_put == EV_QUEUE) {
-            xhci->ev_buffer_put = 0;
+        intr->ev_buffer[intr->ev_buffer_put++] = *event;
+        if (intr->ev_buffer_put == EV_QUEUE) {
+            intr->ev_buffer_put = 0;
         }
     } else {
-        xhci_write_event(xhci, event);
+        xhci_write_event(xhci, event, v);
     }
 
-    xhci->erdp_low |= ERDP_EHB;
-    xhci->iman |= IMAN_IP;
-    xhci->usbsts |= USBSTS_EINT;
-
-    xhci_irq_update(xhci);
+    xhci_intr_raise(xhci, v);
 }
 
 static void xhci_ring_init(XHCIState *xhci, XHCIRing *ring,
@@ -770,17 +957,18 @@
     }
 }
 
-static void xhci_er_reset(XHCIState *xhci)
+static void xhci_er_reset(XHCIState *xhci, int v)
 {
+    XHCIInterrupter *intr = &xhci->intr[v];
     XHCIEvRingSeg seg;
 
     /* cache the (sole) event ring segment location */
-    if (xhci->erstsz != 1) {
-        fprintf(stderr, "xhci: invalid value for ERSTSZ: %d\n", xhci->erstsz);
+    if (intr->erstsz != 1) {
+        fprintf(stderr, "xhci: invalid value for ERSTSZ: %d\n", intr->erstsz);
         xhci_die(xhci);
         return;
     }
-    dma_addr_t erstba = xhci_addr64(xhci->erstba_low, xhci->erstba_high);
+    dma_addr_t erstba = xhci_addr64(intr->erstba_low, intr->erstba_high);
     pci_dma_read(&xhci->pci_dev, erstba, &seg, sizeof(seg));
     le32_to_cpus(&seg.addr_low);
     le32_to_cpus(&seg.addr_high);
@@ -790,21 +978,22 @@
         xhci_die(xhci);
         return;
     }
-    xhci->er_start = xhci_addr64(seg.addr_low, seg.addr_high);
-    xhci->er_size = seg.size;
+    intr->er_start = xhci_addr64(seg.addr_low, seg.addr_high);
+    intr->er_size = seg.size;
 
-    xhci->er_ep_idx = 0;
-    xhci->er_pcs = 1;
-    xhci->er_full = 0;
+    intr->er_ep_idx = 0;
+    intr->er_pcs = 1;
+    intr->er_full = 0;
 
-    DPRINTF("xhci: event ring:" DMA_ADDR_FMT " [%d]\n",
-            xhci->er_start, xhci->er_size);
+    DPRINTF("xhci: event ring[%d]:" DMA_ADDR_FMT " [%d]\n",
+            v, intr->er_start, intr->er_size);
 }
 
 static void xhci_run(XHCIState *xhci)
 {
     trace_usb_xhci_run();
     xhci->usbsts &= ~USBSTS_HCH;
+    xhci->mfindex_start = qemu_get_clock_ns(vm_clock);
 }
 
 static void xhci_stop(XHCIState *xhci)
@@ -833,6 +1022,12 @@
     epctx->state = state;
 }
 
+static void xhci_ep_kick_timer(void *opaque)
+{
+    XHCIEPContext *epctx = opaque;
+    xhci_kick_ep(epctx->xhci, epctx->slotid, epctx->epid);
+}
+
 static TRBCCode xhci_enable_ep(XHCIState *xhci, unsigned int slotid,
                                unsigned int epid, dma_addr_t pctx,
                                uint32_t *ctx)
@@ -854,6 +1049,9 @@
 
     epctx = g_malloc(sizeof(XHCIEPContext));
     memset(epctx, 0, sizeof(XHCIEPContext));
+    epctx->xhci = xhci;
+    epctx->slotid = slotid;
+    epctx->epid = epid;
 
     slot->eps[epid-1] = epctx;
 
@@ -866,16 +1064,16 @@
     epctx->pctx = pctx;
     epctx->max_psize = ctx[1]>>16;
     epctx->max_psize *= 1+((ctx[1]>>8)&0xff);
-    epctx->has_bg = false;
-    if (epctx->type == ET_ISO_IN) {
-        epctx->has_bg = true;
-    }
     DPRINTF("xhci: endpoint %d.%d max transaction (burst) size is %d\n",
             epid/2, epid%2, epctx->max_psize);
     for (i = 0; i < ARRAY_SIZE(epctx->transfers); i++) {
         usb_packet_init(&epctx->transfers[i].packet);
     }
 
+    epctx->interval = 1 << (ctx[0] >> 16) & 0xff;
+    epctx->mfindex_last = 0;
+    epctx->kick_timer = qemu_new_timer_ns(vm_clock, xhci_ep_kick_timer, epctx);
+
     epctx->state = EP_RUNNING;
     ctx[0] &= ~EP_STATE_MASK;
     ctx[0] |= EP_RUNNING;
@@ -915,42 +1113,16 @@
         if (t->running_retry) {
             t->running_retry = 0;
             epctx->retry = NULL;
-        }
-        if (t->backgrounded) {
-            t->backgrounded = 0;
+            qemu_del_timer(epctx->kick_timer);
         }
         if (t->trbs) {
             g_free(t->trbs);
         }
-        if (t->data) {
-            g_free(t->data);
-        }
 
         t->trbs = NULL;
-        t->data = NULL;
         t->trb_count = t->trb_alloced = 0;
-        t->data_length = t->data_alloced = 0;
         xferi = (xferi + 1) % TD_QUEUE;
     }
-    if (epctx->has_bg) {
-        xferi = epctx->next_bg;
-        for (i = 0; i < BG_XFERS; i++) {
-            XHCITransfer *t = &epctx->bg_transfers[xferi];
-            if (t->running_async) {
-                usb_cancel_packet(&t->packet);
-                t->running_async = 0;
-                t->cancelled = 1;
-                DPRINTF("xhci: cancelling bg transfer %d, waiting for it to complete...\n", i);
-                killed++;
-            }
-            if (t->data) {
-                g_free(t->data);
-            }
-
-            t->data = NULL;
-            xferi = (xferi + 1) % BG_XFERS;
-        }
-    }
     return killed;
 }
 
@@ -977,6 +1149,7 @@
 
     xhci_set_ep_state(xhci, epctx, EP_DISABLED);
 
+    qemu_free_timer(epctx->kick_timer);
     g_free(epctx);
     slot->eps[epid-1] = NULL;
 
@@ -1057,7 +1230,7 @@
         ep |= 0x80;
     }
 
-    dev = xhci->ports[xhci->slots[slotid-1].port-1].port.dev;
+    dev = xhci->ports[xhci->slots[slotid-1].port-1].uport->dev;
     if (!dev) {
         return CC_USB_TRANSACTION_ERROR;
     }
@@ -1081,7 +1254,7 @@
         return CC_TRB_ERROR;
     }
 
-    DPRINTF("xhci_set_ep_dequeue(%d, %d, %016"PRIx64")\n", slotid, epid, pdequeue);
+    trace_usb_xhci_ep_set_dequeue(slotid, epid, pdequeue);
     dequeue = xhci_mask64(pdequeue);
 
     slot = &xhci->slots[slotid-1];
@@ -1107,24 +1280,13 @@
     return CC_SUCCESS;
 }
 
-static int xhci_xfer_data(XHCITransfer *xfer, uint8_t *data,
-                          unsigned int length, bool in_xfer, bool out_xfer,
-                          bool report)
+static int xhci_xfer_map(XHCITransfer *xfer)
 {
-    int i;
-    uint32_t edtla = 0;
-    unsigned int transferred = 0;
-    unsigned int left = length;
-    bool reported = 0;
-    bool shortpkt = 0;
-    XHCIEvent event = {ER_TRANSFER, CC_SUCCESS};
+    int in_xfer = (xfer->packet.pid == USB_TOKEN_IN);
     XHCIState *xhci = xfer->xhci;
+    int i;
 
-    DPRINTF("xhci_xfer_data(len=%d, in_xfer=%d, out_xfer=%d, report=%d)\n",
-            length, in_xfer, out_xfer, report);
-
-    assert(!(in_xfer && out_xfer));
-
+    pci_dma_sglist_init(&xfer->sgl, &xhci->pci_dev, xfer->trb_count);
     for (i = 0; i < xfer->trb_count; i++) {
         XHCITRB *trb = &xfer->trbs[i];
         dma_addr_t addr;
@@ -1134,54 +1296,70 @@
         case TR_DATA:
             if ((!(trb->control & TRB_TR_DIR)) != (!in_xfer)) {
                 fprintf(stderr, "xhci: data direction mismatch for TR_DATA\n");
-                xhci_die(xhci);
-                return transferred;
+                goto err;
             }
             /* fallthrough */
         case TR_NORMAL:
         case TR_ISOCH:
             addr = xhci_mask64(trb->parameter);
             chunk = trb->status & 0x1ffff;
+            if (trb->control & TRB_TR_IDT) {
+                if (chunk > 8 || in_xfer) {
+                    fprintf(stderr, "xhci: invalid immediate data TRB\n");
+                    goto err;
+                }
+                qemu_sglist_add(&xfer->sgl, trb->addr, chunk);
+            } else {
+                qemu_sglist_add(&xfer->sgl, addr, chunk);
+            }
+            break;
+        }
+    }
+
+    usb_packet_map(&xfer->packet, &xfer->sgl);
+    return 0;
+
+err:
+    qemu_sglist_destroy(&xfer->sgl);
+    xhci_die(xhci);
+    return -1;
+}
+
+static void xhci_xfer_unmap(XHCITransfer *xfer)
+{
+    usb_packet_unmap(&xfer->packet, &xfer->sgl);
+    qemu_sglist_destroy(&xfer->sgl);
+}
+
+static void xhci_xfer_report(XHCITransfer *xfer)
+{
+    uint32_t edtla = 0;
+    unsigned int left;
+    bool reported = 0;
+    bool shortpkt = 0;
+    XHCIEvent event = {ER_TRANSFER, CC_SUCCESS};
+    XHCIState *xhci = xfer->xhci;
+    int i;
+
+    left = xfer->packet.result < 0 ? 0 : xfer->packet.result;
+
+    for (i = 0; i < xfer->trb_count; i++) {
+        XHCITRB *trb = &xfer->trbs[i];
+        unsigned int chunk = 0;
+
+        switch (TRB_TYPE(*trb)) {
+        case TR_DATA:
+        case TR_NORMAL:
+        case TR_ISOCH:
+            chunk = trb->status & 0x1ffff;
             if (chunk > left) {
                 chunk = left;
-                shortpkt = 1;
-            }
-            if (in_xfer || out_xfer) {
-                if (trb->control & TRB_TR_IDT) {
-                    uint64_t idata;
-                    if (chunk > 8 || in_xfer) {
-                        fprintf(stderr, "xhci: invalid immediate data TRB\n");
-                        xhci_die(xhci);
-                        return transferred;
-                    }
-                    idata = le64_to_cpu(trb->parameter);
-                    memcpy(data, &idata, chunk);
-                } else {
-                    DPRINTF("xhci_xfer_data: r/w(%d) %d bytes at "
-                            DMA_ADDR_FMT "\n", in_xfer, chunk, addr);
-                    if (in_xfer) {
-                        pci_dma_write(&xhci->pci_dev, addr, data, chunk);
-                    } else {
-                        pci_dma_read(&xhci->pci_dev, addr, data, chunk);
-                    }
-#ifdef DEBUG_DATA
-                    unsigned int count = chunk;
-                    int i;
-                    if (count > 16) {
-                        count = 16;
-                    }
-                    DPRINTF(" ::");
-                    for (i = 0; i < count; i++) {
-                        DPRINTF(" %02x", data[i]);
-                    }
-                    DPRINTF("\n");
-#endif
+                if (xfer->status == CC_SUCCESS) {
+                    shortpkt = 1;
                 }
             }
             left -= chunk;
-            data += chunk;
             edtla += chunk;
-            transferred += chunk;
             break;
         case TR_STATUS:
             reported = 0;
@@ -1189,8 +1367,9 @@
             break;
         }
 
-        if (report && !reported && (trb->control & TRB_TR_IOC ||
-            (shortpkt && (trb->control & TRB_TR_ISP)))) {
+        if (!reported && ((trb->control & TRB_TR_IOC) ||
+                          (shortpkt && (trb->control & TRB_TR_ISP)) ||
+                          (xfer->status != CC_SUCCESS))) {
             event.slotid = xfer->slotid;
             event.epid = xfer->epid;
             event.length = (trb->status & 0x1ffff) - chunk;
@@ -1208,11 +1387,13 @@
                 DPRINTF("xhci_xfer_data: EDTLA=%d\n", event.length);
                 edtla = 0;
             }
-            xhci_event(xhci, &event);
+            xhci_event(xhci, &event, TRB_INTR(*trb));
             reported = 1;
+            if (xfer->status != CC_SUCCESS) {
+                return;
+            }
         }
     }
-    return transferred;
 }
 
 static void xhci_stall_ep(XHCITransfer *xfer)
@@ -1231,169 +1412,40 @@
 static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer,
                        XHCIEPContext *epctx);
 
-static void xhci_bg_update(XHCIState *xhci, XHCIEPContext *epctx)
+static USBDevice *xhci_find_device(XHCIPort *port, uint8_t addr)
 {
-    if (epctx->bg_updating) {
-        return;
+    if (!(port->portsc & PORTSC_PED)) {
+        return NULL;
     }
-    DPRINTF("xhci_bg_update(%p, %p)\n", xhci, epctx);
-    assert(epctx->has_bg);
-    DPRINTF("xhci: fg=%d bg=%d\n", epctx->comp_xfer, epctx->next_bg);
-    epctx->bg_updating = 1;
-    while (epctx->transfers[epctx->comp_xfer].backgrounded &&
-           epctx->bg_transfers[epctx->next_bg].complete) {
-        XHCITransfer *fg = &epctx->transfers[epctx->comp_xfer];
-        XHCITransfer *bg = &epctx->bg_transfers[epctx->next_bg];
-#if 0
-        DPRINTF("xhci: completing fg %d from bg %d.%d (stat: %d)\n",
-                epctx->comp_xfer, epctx->next_bg, bg->cur_pkt,
-                bg->usbxfer->iso_packet_desc[bg->cur_pkt].status
-               );
-#endif
-        assert(epctx->type == ET_ISO_IN);
-        assert(bg->iso_xfer);
-        assert(bg->in_xfer);
-        uint8_t *p = bg->data + bg->cur_pkt * bg->pktsize;
-#if 0
-        int len = bg->usbxfer->iso_packet_desc[bg->cur_pkt].actual_length;
-        fg->status = libusb_to_ccode(bg->usbxfer->iso_packet_desc[bg->cur_pkt].status);
-#else
-        int len = 0;
-        FIXME();
-#endif
-        fg->complete = 1;
-        fg->backgrounded = 0;
-
-        if (fg->status == CC_STALL_ERROR) {
-            xhci_stall_ep(fg);
-        }
-
-        xhci_xfer_data(fg, p, len, 1, 0, 1);
-
-        epctx->comp_xfer++;
-        if (epctx->comp_xfer == TD_QUEUE) {
-            epctx->comp_xfer = 0;
-        }
-        DPRINTF("next fg xfer: %d\n", epctx->comp_xfer);
-        bg->cur_pkt++;
-        if (bg->cur_pkt == bg->pkts) {
-            bg->complete = 0;
-            if (xhci_submit(xhci, bg, epctx) < 0) {
-                fprintf(stderr, "xhci: bg resubmit failed\n");
-            }
-            epctx->next_bg++;
-            if (epctx->next_bg == BG_XFERS) {
-                epctx->next_bg = 0;
-            }
-            DPRINTF("next bg xfer: %d\n", epctx->next_bg);
-
-        xhci_kick_ep(xhci, fg->slotid, fg->epid);
-        }
-    }
-    epctx->bg_updating = 0;
+    return usb_find_device(port->uport, addr);
 }
 
-#if 0
-static void xhci_xfer_cb(struct libusb_transfer *transfer)
+static int xhci_setup_packet(XHCITransfer *xfer)
 {
-    XHCIState *xhci;
-    XHCITransfer *xfer;
-
-    xfer = (XHCITransfer *)transfer->user_data;
-    xhci = xfer->xhci;
-
-    DPRINTF("xhci_xfer_cb(slot=%d, ep=%d, status=%d)\n", xfer->slotid,
-            xfer->epid, transfer->status);
-
-    assert(xfer->slotid >= 1 && xfer->slotid <= MAXSLOTS);
-    assert(xfer->epid >= 1 && xfer->epid <= 31);
-
-    if (xfer->cancelled) {
-        DPRINTF("xhci: transfer cancelled, not reporting anything\n");
-        xfer->running = 0;
-        return;
-    }
-
-    XHCIEPContext *epctx;
-    XHCISlot *slot;
-    slot = &xhci->slots[xfer->slotid-1];
-    assert(slot->eps[xfer->epid-1]);
-    epctx = slot->eps[xfer->epid-1];
-
-    if (xfer->bg_xfer) {
-        DPRINTF("xhci: background transfer, updating\n");
-        xfer->complete = 1;
-        xfer->running = 0;
-        xhci_bg_update(xhci, epctx);
-        return;
-    }
-
-    if (xfer->iso_xfer) {
-        transfer->status = transfer->iso_packet_desc[0].status;
-        transfer->actual_length = transfer->iso_packet_desc[0].actual_length;
-    }
-
-    xfer->status = libusb_to_ccode(transfer->status);
-
-    xfer->complete = 1;
-    xfer->running = 0;
-
-    if (transfer->status == LIBUSB_TRANSFER_STALL)
-        xhci_stall_ep(xhci, epctx, xfer);
-
-    DPRINTF("xhci: transfer actual length = %d\n", transfer->actual_length);
-
-    if (xfer->in_xfer) {
-        if (xfer->epid == 1) {
-            xhci_xfer_data(xhci, xfer, xfer->data + 8,
-                           transfer->actual_length, 1, 0, 1);
-        } else {
-            xhci_xfer_data(xhci, xfer, xfer->data,
-                           transfer->actual_length, 1, 0, 1);
-        }
-    } else {
-        xhci_xfer_data(xhci, xfer, NULL, transfer->actual_length, 0, 0, 1);
-    }
-
-    xhci_kick_ep(xhci, xfer->slotid, xfer->epid);
-}
-
-static int xhci_hle_control(XHCIState *xhci, XHCITransfer *xfer,
-                            uint8_t bmRequestType, uint8_t bRequest,
-                            uint16_t wValue, uint16_t wIndex, uint16_t wLength)
-{
-    uint16_t type_req = (bmRequestType << 8) | bRequest;
-
-    switch (type_req) {
-        case 0x0000 | USB_REQ_SET_CONFIGURATION:
-            DPRINTF("xhci: HLE switch configuration\n");
-            return xhci_switch_config(xhci, xfer->slotid, wValue) == 0;
-        case 0x0100 | USB_REQ_SET_INTERFACE:
-            DPRINTF("xhci: HLE set interface altsetting\n");
-            return xhci_set_iface_alt(xhci, xfer->slotid, wIndex, wValue) == 0;
-        case 0x0200 | USB_REQ_CLEAR_FEATURE:
-            if (wValue == 0) { // endpoint halt
-                DPRINTF("xhci: HLE clear halt\n");
-                return xhci_clear_halt(xhci, xfer->slotid, wIndex);
-            }
-        case 0x0000 | USB_REQ_SET_ADDRESS:
-            fprintf(stderr, "xhci: warn: illegal SET_ADDRESS request\n");
-            return 0;
-        default:
-            return 0;
-    }
-}
-#endif
-
-static int xhci_setup_packet(XHCITransfer *xfer, USBDevice *dev)
-{
+    XHCIState *xhci = xfer->xhci;
+    XHCIPort *port;
+    USBDevice *dev;
     USBEndpoint *ep;
     int dir;
 
     dir = xfer->in_xfer ? USB_TOKEN_IN : USB_TOKEN_OUT;
-    ep = usb_ep_get(dev, dir, xfer->epid >> 1);
+
+    if (xfer->packet.ep) {
+        ep = xfer->packet.ep;
+        dev = ep->dev;
+    } else {
+        port = &xhci->ports[xhci->slots[xfer->slotid-1].port-1];
+        dev = xhci_find_device(port, xhci->slots[xfer->slotid-1].devaddr);
+        if (!dev) {
+            fprintf(stderr, "xhci: slot %d port %d has no device\n",
+                    xfer->slotid, xhci->slots[xfer->slotid-1].port);
+            return -1;
+        }
+        ep = usb_ep_get(dev, dir, xfer->epid >> 1);
+    }
+
     usb_packet_setup(&xfer->packet, dir, ep, xfer->trbs[0].addr);
-    usb_packet_addbuf(&xfer->packet, xfer->data, xfer->data_length);
+    xhci_xfer_map(xfer);
     DPRINTF("xhci: setup packet pid 0x%x addr %d ep %d\n",
             xfer->packet.pid, dev->addr, ep->nr);
     return 0;
@@ -1419,12 +1471,13 @@
         xfer->running_async = 0;
         xfer->running_retry = 0;
         xfer->complete = 1;
+        xhci_xfer_unmap(xfer);
     }
 
     if (ret >= 0) {
-        xfer->status = CC_SUCCESS;
-        xhci_xfer_data(xfer, xfer->data, ret, xfer->in_xfer, 0, 1);
         trace_usb_xhci_xfer_success(xfer, ret);
+        xfer->status = CC_SUCCESS;
+        xhci_xfer_report(xfer);
         return 0;
     }
 
@@ -1433,12 +1486,12 @@
     switch (ret) {
     case USB_RET_NODEV:
         xfer->status = CC_USB_TRANSACTION_ERROR;
-        xhci_xfer_data(xfer, xfer->data, 0, xfer->in_xfer, 0, 1);
+        xhci_xfer_report(xfer);
         xhci_stall_ep(xfer);
         break;
     case USB_RET_STALL:
         xfer->status = CC_STALL_ERROR;
-        xhci_xfer_data(xfer, xfer->data, 0, xfer->in_xfer, 0, 1);
+        xhci_xfer_report(xfer);
         xhci_stall_ep(xfer);
         break;
     default:
@@ -1448,28 +1501,16 @@
     return 0;
 }
 
-static USBDevice *xhci_find_device(XHCIPort *port, uint8_t addr)
-{
-    if (!(port->portsc & PORTSC_PED)) {
-        return NULL;
-    }
-    return usb_find_device(&port->port, addr);
-}
-
 static int xhci_fire_ctl_transfer(XHCIState *xhci, XHCITransfer *xfer)
 {
     XHCITRB *trb_setup, *trb_status;
     uint8_t bmRequestType;
-    uint16_t wLength;
-    XHCIPort *port;
-    USBDevice *dev;
     int ret;
 
     trb_setup = &xfer->trbs[0];
     trb_status = &xfer->trbs[xfer->trb_count-1];
 
-    trace_usb_xhci_xfer_start(xfer, xfer->slotid, xfer->epid,
-                              trb_setup->parameter >> 48);
+    trace_usb_xhci_xfer_start(xfer, xfer->slotid, xfer->epid);
 
     /* at most one Event Data TRB allowed after STATUS */
     if (TRB_TYPE(*trb_status) == TR_EVDATA && xfer->trb_count > 2) {
@@ -1498,38 +1539,16 @@
     }
 
     bmRequestType = trb_setup->parameter;
-    wLength = trb_setup->parameter >> 48;
-
-    if (xfer->data && xfer->data_alloced < wLength) {
-        xfer->data_alloced = 0;
-        g_free(xfer->data);
-        xfer->data = NULL;
-    }
-    if (!xfer->data) {
-        DPRINTF("xhci: alloc %d bytes data\n", wLength);
-        xfer->data = g_malloc(wLength+1);
-        xfer->data_alloced = wLength;
-    }
-    xfer->data_length = wLength;
-
-    port = &xhci->ports[xhci->slots[xfer->slotid-1].port-1];
-    dev = xhci_find_device(port, xhci->slots[xfer->slotid-1].devaddr);
-    if (!dev) {
-        fprintf(stderr, "xhci: slot %d port %d has no device\n", xfer->slotid,
-                xhci->slots[xfer->slotid-1].port);
-        return -1;
-    }
 
     xfer->in_xfer = bmRequestType & USB_DIR_IN;
     xfer->iso_xfer = false;
 
-    xhci_setup_packet(xfer, dev);
-    xfer->packet.parameter = trb_setup->parameter;
-    if (!xfer->in_xfer) {
-        xhci_xfer_data(xfer, xfer->data, wLength, 0, 1, 0);
+    if (xhci_setup_packet(xfer) < 0) {
+        return -1;
     }
+    xfer->packet.parameter = trb_setup->parameter;
 
-    ret = usb_handle_packet(dev, &xfer->packet);
+    ret = usb_handle_packet(xfer->packet.ep->dev, &xfer->packet);
 
     xhci_complete_packet(xfer, ret);
     if (!xfer->running_async && !xfer->running_retry) {
@@ -1538,53 +1557,70 @@
     return 0;
 }
 
+static void xhci_calc_iso_kick(XHCIState *xhci, XHCITransfer *xfer,
+                               XHCIEPContext *epctx, uint64_t mfindex)
+{
+    if (xfer->trbs[0].control & TRB_TR_SIA) {
+        uint64_t asap = ((mfindex + epctx->interval - 1) &
+                         ~(epctx->interval-1));
+        if (asap >= epctx->mfindex_last &&
+            asap <= epctx->mfindex_last + epctx->interval * 4) {
+            xfer->mfindex_kick = epctx->mfindex_last + epctx->interval;
+        } else {
+            xfer->mfindex_kick = asap;
+        }
+    } else {
+        xfer->mfindex_kick = (xfer->trbs[0].control >> TRB_TR_FRAMEID_SHIFT)
+            & TRB_TR_FRAMEID_MASK;
+        xfer->mfindex_kick |= mfindex & ~0x3fff;
+        if (xfer->mfindex_kick < mfindex) {
+            xfer->mfindex_kick += 0x4000;
+        }
+    }
+}
+
+static void xhci_check_iso_kick(XHCIState *xhci, XHCITransfer *xfer,
+                                XHCIEPContext *epctx, uint64_t mfindex)
+{
+    if (xfer->mfindex_kick > mfindex) {
+        qemu_mod_timer(epctx->kick_timer, qemu_get_clock_ns(vm_clock) +
+                       (xfer->mfindex_kick - mfindex) * 125000);
+        xfer->running_retry = 1;
+    } else {
+        epctx->mfindex_last = xfer->mfindex_kick;
+        qemu_del_timer(epctx->kick_timer);
+        xfer->running_retry = 0;
+    }
+}
+
+
 static int xhci_submit(XHCIState *xhci, XHCITransfer *xfer, XHCIEPContext *epctx)
 {
-    XHCIPort *port;
-    USBDevice *dev;
+    uint64_t mfindex;
     int ret;
 
     DPRINTF("xhci_submit(slotid=%d,epid=%d)\n", xfer->slotid, xfer->epid);
 
     xfer->in_xfer = epctx->type>>2;
 
-    if (xfer->data && xfer->data_alloced < xfer->data_length) {
-        xfer->data_alloced = 0;
-        g_free(xfer->data);
-        xfer->data = NULL;
-    }
-    if (!xfer->data && xfer->data_length) {
-        DPRINTF("xhci: alloc %d bytes data\n", xfer->data_length);
-        xfer->data = g_malloc(xfer->data_length);
-        xfer->data_alloced = xfer->data_length;
-    }
-    if (epctx->type == ET_ISO_IN || epctx->type == ET_ISO_OUT) {
-        if (!xfer->bg_xfer) {
-            xfer->pkts = 1;
-        }
-    } else {
-        xfer->pkts = 0;
-    }
-
-    port = &xhci->ports[xhci->slots[xfer->slotid-1].port-1];
-    dev = xhci_find_device(port, xhci->slots[xfer->slotid-1].devaddr);
-    if (!dev) {
-        fprintf(stderr, "xhci: slot %d port %d has no device\n", xfer->slotid,
-                xhci->slots[xfer->slotid-1].port);
-        return -1;
-    }
-
-    xhci_setup_packet(xfer, dev);
-
     switch(epctx->type) {
     case ET_INTR_OUT:
     case ET_INTR_IN:
     case ET_BULK_OUT:
     case ET_BULK_IN:
+        xfer->pkts = 0;
+        xfer->iso_xfer = false;
         break;
     case ET_ISO_OUT:
     case ET_ISO_IN:
-        FIXME();
+        xfer->pkts = 1;
+        xfer->iso_xfer = true;
+        mfindex = xhci_mfindex_get(xhci);
+        xhci_calc_iso_kick(xhci, xfer, epctx, mfindex);
+        xhci_check_iso_kick(xhci, xfer, epctx, mfindex);
+        if (xfer->running_retry) {
+            return -1;
+        }
         break;
     default:
         fprintf(stderr, "xhci: unknown or unhandled EP "
@@ -1593,10 +1629,10 @@
         return -1;
     }
 
-    if (!xfer->in_xfer) {
-        xhci_xfer_data(xfer, xfer->data, xfer->data_length, 0, 1, 0);
+    if (xhci_setup_packet(xfer) < 0) {
+        return -1;
     }
-    ret = usb_handle_packet(dev, &xfer->packet);
+    ret = usb_handle_packet(xfer->packet.ep->dev, &xfer->packet);
 
     xhci_complete_packet(xfer, ret);
     if (!xfer->running_async && !xfer->running_retry) {
@@ -1607,50 +1643,14 @@
 
 static int xhci_fire_transfer(XHCIState *xhci, XHCITransfer *xfer, XHCIEPContext *epctx)
 {
-    int i;
-    unsigned int length = 0;
-    XHCITRB *trb;
-
-    for (i = 0; i < xfer->trb_count; i++) {
-        trb = &xfer->trbs[i];
-        if (TRB_TYPE(*trb) == TR_NORMAL || TRB_TYPE(*trb) == TR_ISOCH) {
-            length += trb->status & 0x1ffff;
-        }
-    }
-
-    trace_usb_xhci_xfer_start(xfer, xfer->slotid, xfer->epid, length);
-
-    if (!epctx->has_bg) {
-        xfer->data_length = length;
-        xfer->backgrounded = 0;
-        return xhci_submit(xhci, xfer, epctx);
-    } else {
-        if (!epctx->bg_running) {
-            for (i = 0; i < BG_XFERS; i++) {
-                XHCITransfer *t = &epctx->bg_transfers[i];
-                t->xhci = xhci;
-                t->epid = xfer->epid;
-                t->slotid = xfer->slotid;
-                t->pkts = BG_PKTS;
-                t->pktsize = epctx->max_psize;
-                t->data_length = t->pkts * t->pktsize;
-                t->bg_xfer = 1;
-                if (xhci_submit(xhci, t, epctx) < 0) {
-                    fprintf(stderr, "xhci: bg submit failed\n");
-                    return -1;
-                }
-            }
-            epctx->bg_running = 1;
-        }
-        xfer->backgrounded = 1;
-        xhci_bg_update(xhci, epctx);
-        return 0;
-    }
+    trace_usb_xhci_xfer_start(xfer, xfer->slotid, xfer->epid);
+    return xhci_submit(xhci, xfer, epctx);
 }
 
 static void xhci_kick_ep(XHCIState *xhci, unsigned int slotid, unsigned int epid)
 {
     XHCIEPContext *epctx;
+    uint64_t mfindex;
     int length;
     int i;
 
@@ -1670,18 +1670,35 @@
     }
 
     if (epctx->retry) {
-        /* retry nak'ed transfer */
         XHCITransfer *xfer = epctx->retry;
         int result;
 
         trace_usb_xhci_xfer_retry(xfer);
         assert(xfer->running_retry);
-        xhci_setup_packet(xfer, xfer->packet.ep->dev);
-        result = usb_handle_packet(xfer->packet.ep->dev, &xfer->packet);
-        if (result == USB_RET_NAK) {
-            return;
+        if (xfer->iso_xfer) {
+            /* retry delayed iso transfer */
+            mfindex = xhci_mfindex_get(xhci);
+            xhci_check_iso_kick(xhci, xfer, epctx, mfindex);
+            if (xfer->running_retry) {
+                return;
+            }
+            if (xhci_setup_packet(xfer) < 0) {
+                return;
+            }
+            result = usb_handle_packet(xfer->packet.ep->dev, &xfer->packet);
+            assert(result != USB_RET_NAK);
+            xhci_complete_packet(xfer, result);
+        } else {
+            /* retry nak'ed transfer */
+            if (xhci_setup_packet(xfer) < 0) {
+                return;
+            }
+            result = usb_handle_packet(xfer->packet.ep->dev, &xfer->packet);
+            if (result == USB_RET_NAK) {
+                return;
+            }
+            xhci_complete_packet(xfer, result);
         }
-        xhci_complete_packet(xfer, result);
         assert(!xfer->running_retry);
         epctx->retry = NULL;
     }
@@ -1695,7 +1712,7 @@
 
     while (1) {
         XHCITransfer *xfer = &epctx->transfers[epctx->next_xfer];
-        if (xfer->running_async || xfer->running_retry || xfer->backgrounded) {
+        if (xfer->running_async || xfer->running_retry) {
             break;
         }
         length = xhci_ring_chain_length(xhci, &epctx->ring);
@@ -1733,7 +1750,9 @@
             if (xhci_fire_transfer(xhci, xfer, epctx) >= 0) {
                 epctx->next_xfer = (epctx->next_xfer + 1) % TD_QUEUE;
             } else {
-                fprintf(stderr, "xhci: error firing data transfer\n");
+                if (!xfer->iso_xfer) {
+                    fprintf(stderr, "xhci: error firing data transfer\n");
+                }
             }
         }
 
@@ -1819,9 +1838,9 @@
             ep0_ctx[0], ep0_ctx[1], ep0_ctx[2], ep0_ctx[3], ep0_ctx[4]);
 
     port = (slot_ctx[1]>>16) & 0xFF;
-    dev = xhci->ports[port-1].port.dev;
+    dev = xhci->ports[port-1].uport->dev;
 
-    if (port < 1 || port > MAXPORTS) {
+    if (port < 1 || port > xhci->numports) {
         fprintf(stderr, "xhci: bad port %d\n", port);
         return CC_TRB_ERROR;
     } else if (!dev) {
@@ -2070,7 +2089,7 @@
 static TRBCCode xhci_get_port_bandwidth(XHCIState *xhci, uint64_t pctx)
 {
     dma_addr_t ctx;
-    uint8_t bw_ctx[MAXPORTS+1];
+    uint8_t bw_ctx[xhci->numports+1];
 
     DPRINTF("xhci_get_port_bandwidth()\n");
 
@@ -2080,7 +2099,7 @@
 
     /* TODO: actually implement real values here */
     bw_ctx[0] = 0;
-    memset(&bw_ctx[1], 80, MAXPORTS); /* 80% */
+    memset(&bw_ctx[1], 80, xhci->numports); /* 80% */
     pci_dma_write(&xhci->pci_dev, ctx, bw_ctx, sizeof(bw_ctx));
 
     return CC_SUCCESS;
@@ -2244,18 +2263,17 @@
             break;
         }
         event.slotid = slotid;
-        xhci_event(xhci, &event);
+        xhci_event(xhci, &event, 0);
     }
 }
 
 static void xhci_update_port(XHCIState *xhci, XHCIPort *port, int is_detach)
 {
-    int nr = port->port.index + 1;
-
     port->portsc = PORTSC_PP;
-    if (port->port.dev && port->port.dev->attached && !is_detach) {
+    if (port->uport->dev && port->uport->dev->attached && !is_detach &&
+        (1 << port->uport->dev->speed) & port->speedmask) {
         port->portsc |= PORTSC_CCS;
-        switch (port->port.dev->speed) {
+        switch (port->uport->dev->speed) {
         case USB_SPEED_LOW:
             port->portsc |= PORTSC_SPEED_LOW;
             break;
@@ -2265,14 +2283,18 @@
         case USB_SPEED_HIGH:
             port->portsc |= PORTSC_SPEED_HIGH;
             break;
+        case USB_SPEED_SUPER:
+            port->portsc |= PORTSC_SPEED_SUPER;
+            break;
         }
     }
 
     if (xhci_running(xhci)) {
         port->portsc |= PORTSC_CSC;
-        XHCIEvent ev = { ER_PORT_STATUS_CHANGE, CC_SUCCESS, nr << 24};
-        xhci_event(xhci, &ev);
-        DPRINTF("xhci: port change event for port %d\n", nr);
+        XHCIEvent ev = { ER_PORT_STATUS_CHANGE, CC_SUCCESS,
+                         port->portnr << 24};
+        xhci_event(xhci, &ev, 0);
+        DPRINTF("xhci: port change event for port %d\n", port->portnr);
     }
 }
 
@@ -2300,28 +2322,34 @@
         xhci_disable_slot(xhci, i+1);
     }
 
-    for (i = 0; i < MAXPORTS; i++) {
+    for (i = 0; i < xhci->numports; i++) {
         xhci_update_port(xhci, xhci->ports + i, 0);
     }
 
-    xhci->mfindex = 0;
-    xhci->iman = 0;
-    xhci->imod = 0;
-    xhci->erstsz = 0;
-    xhci->erstba_low = 0;
-    xhci->erstba_high = 0;
-    xhci->erdp_low = 0;
-    xhci->erdp_high = 0;
+    for (i = 0; i < MAXINTRS; i++) {
+        xhci->intr[i].iman = 0;
+        xhci->intr[i].imod = 0;
+        xhci->intr[i].erstsz = 0;
+        xhci->intr[i].erstba_low = 0;
+        xhci->intr[i].erstba_high = 0;
+        xhci->intr[i].erdp_low = 0;
+        xhci->intr[i].erdp_high = 0;
+        xhci->intr[i].msix_used = 0;
 
-    xhci->er_ep_idx = 0;
-    xhci->er_pcs = 1;
-    xhci->er_full = 0;
-    xhci->ev_buffer_put = 0;
-    xhci->ev_buffer_get = 0;
+        xhci->intr[i].er_ep_idx = 0;
+        xhci->intr[i].er_pcs = 1;
+        xhci->intr[i].er_full = 0;
+        xhci->intr[i].ev_buffer_put = 0;
+        xhci->intr[i].ev_buffer_get = 0;
+    }
+
+    xhci->mfindex_start = qemu_get_clock_ns(vm_clock);
+    xhci_mfwrap_update(xhci);
 }
 
-static uint32_t xhci_cap_read(XHCIState *xhci, uint32_t reg)
+static uint64_t xhci_cap_read(void *ptr, target_phys_addr_t reg, unsigned size)
 {
+    XHCIState *xhci = ptr;
     uint32_t ret;
 
     switch (reg) {
@@ -2329,7 +2357,8 @@
         ret = 0x01000000 | LEN_CAP;
         break;
     case 0x04: /* HCSPARAMS 1 */
-        ret = (MAXPORTS<<24) | (MAXINTRS<<8) | MAXSLOTS;
+        ret = ((xhci->numports_2+xhci->numports_3)<<24)
+            | (MAXINTRS<<8) | MAXSLOTS;
         break;
     case 0x08: /* HCSPARAMS 2 */
         ret = 0x0000000f;
@@ -2359,7 +2388,7 @@
         ret = 0x20425455; /* "USB " */
         break;
     case 0x28: /* Supported Protocol:08 */
-        ret = 0x00000001 | (USB2_PORTS<<8);
+        ret = 0x00000001 | (xhci->numports_2<<8);
         break;
     case 0x2c: /* Supported Protocol:0c */
         ret = 0x00000000; /* reserved */
@@ -2371,13 +2400,13 @@
         ret = 0x20425455; /* "USB " */
         break;
     case 0x38: /* Supported Protocol:08 */
-        ret = 0x00000000 | (USB2_PORTS+1) | (USB3_PORTS<<8);
+        ret = 0x00000000 | (xhci->numports_2+1) | (xhci->numports_3<<8);
         break;
     case 0x3c: /* Supported Protocol:0c */
         ret = 0x00000000; /* reserved */
         break;
     default:
-        fprintf(stderr, "xhci_cap_read: reg %d unimplemented\n", reg);
+        fprintf(stderr, "xhci_cap_read: reg %d unimplemented\n", (int)reg);
         ret = 0;
     }
 
@@ -2390,7 +2419,7 @@
     uint32_t port = reg >> 4;
     uint32_t ret;
 
-    if (port >= MAXPORTS) {
+    if (port >= xhci->numports) {
         fprintf(stderr, "xhci_port_read: port %d out of bounds\n", port);
         ret = 0;
         goto out;
@@ -2423,7 +2452,7 @@
 
     trace_usb_xhci_port_write(port, reg & 0x0f, val);
 
-    if (port >= MAXPORTS) {
+    if (port >= xhci->numports) {
         fprintf(stderr, "xhci_port_read: port %d out of bounds\n", port);
         return;
     }
@@ -2445,7 +2474,7 @@
         /* write-1-to-start bits */
         if (val & PORTSC_PR) {
             DPRINTF("xhci: port %d reset\n", port);
-            usb_device_reset(xhci->ports[port].port.dev);
+            usb_device_reset(xhci->ports[port].uport->dev);
             portsc |= PORTSC_PRC | PORTSC_PED;
         }
         xhci->ports[port].portsc = portsc;
@@ -2458,8 +2487,9 @@
     }
 }
 
-static uint32_t xhci_oper_read(XHCIState *xhci, uint32_t reg)
+static uint64_t xhci_oper_read(void *ptr, target_phys_addr_t reg, unsigned size)
 {
+    XHCIState *xhci = ptr;
     uint32_t ret;
 
     if (reg >= 0x400) {
@@ -2495,7 +2525,7 @@
         ret = xhci->config;
         break;
     default:
-        fprintf(stderr, "xhci_oper_read: reg 0x%x unimplemented\n", reg);
+        fprintf(stderr, "xhci_oper_read: reg 0x%x unimplemented\n", (int)reg);
         ret = 0;
     }
 
@@ -2503,8 +2533,11 @@
     return ret;
 }
 
-static void xhci_oper_write(XHCIState *xhci, uint32_t reg, uint32_t val)
+static void xhci_oper_write(void *ptr, target_phys_addr_t reg,
+                            uint64_t val, unsigned size)
 {
+    XHCIState *xhci = ptr;
+
     if (reg >= 0x400) {
         xhci_port_write(xhci, reg - 0x400, val);
         return;
@@ -2520,16 +2553,17 @@
             xhci_stop(xhci);
         }
         xhci->usbcmd = val & 0xc0f;
+        xhci_mfwrap_update(xhci);
         if (val & USBCMD_HCRST) {
             xhci_reset(&xhci->pci_dev.qdev);
         }
-        xhci_irq_update(xhci);
+        xhci_intx_update(xhci);
         break;
 
     case 0x04: /* USBSTS */
         /* these bits are write-1-to-clear */
         xhci->usbsts &= ~(val & (USBSTS_HSE|USBSTS_EINT|USBSTS_PCD|USBSTS_SRE));
-        xhci_irq_update(xhci);
+        xhci_intx_update(xhci);
         break;
 
     case 0x14: /* DNCTRL */
@@ -2543,7 +2577,7 @@
         if (xhci->crcr_low & (CRCR_CA|CRCR_CS) && (xhci->crcr_low & CRCR_CRR)) {
             XHCIEvent event = {ER_COMMAND_COMPLETE, CC_COMMAND_RING_STOPPED};
             xhci->crcr_low &= ~CRCR_CRR;
-            xhci_event(xhci, &event);
+            xhci_event(xhci, &event, 0);
             DPRINTF("xhci: command ring stopped (CRCR=%08x)\n", xhci->crcr_low);
         } else {
             dma_addr_t base = xhci_addr64(xhci->crcr_low & ~0x3f, val);
@@ -2561,101 +2595,127 @@
         xhci->config = val & 0xff;
         break;
     default:
-        fprintf(stderr, "xhci_oper_write: reg 0x%x unimplemented\n", reg);
+        fprintf(stderr, "xhci_oper_write: reg 0x%x unimplemented\n", (int)reg);
     }
 }
 
-static uint32_t xhci_runtime_read(XHCIState *xhci, uint32_t reg)
+static uint64_t xhci_runtime_read(void *ptr, target_phys_addr_t reg,
+                                  unsigned size)
 {
-    uint32_t ret;
+    XHCIState *xhci = ptr;
+    uint32_t ret = 0;
 
-    switch (reg) {
-    case 0x00: /* MFINDEX */
-        fprintf(stderr, "xhci_runtime_read: MFINDEX not yet implemented\n");
-        ret = xhci->mfindex;
-        break;
-    case 0x20: /* IMAN */
-        ret = xhci->iman;
-        break;
-    case 0x24: /* IMOD */
-        ret = xhci->imod;
-        break;
-    case 0x28: /* ERSTSZ */
-        ret = xhci->erstsz;
-        break;
-    case 0x30: /* ERSTBA low */
-        ret = xhci->erstba_low;
-        break;
-    case 0x34: /* ERSTBA high */
-        ret = xhci->erstba_high;
-        break;
-    case 0x38: /* ERDP low */
-        ret = xhci->erdp_low;
-        break;
-    case 0x3c: /* ERDP high */
-        ret = xhci->erdp_high;
-        break;
-    default:
-        fprintf(stderr, "xhci_runtime_read: reg 0x%x unimplemented\n", reg);
-        ret = 0;
+    if (reg < 0x20) {
+        switch (reg) {
+        case 0x00: /* MFINDEX */
+            ret = xhci_mfindex_get(xhci) & 0x3fff;
+            break;
+        default:
+            fprintf(stderr, "xhci_runtime_read: reg 0x%x unimplemented\n",
+                    (int)reg);
+            break;
+        }
+    } else {
+        int v = (reg - 0x20) / 0x20;
+        XHCIInterrupter *intr = &xhci->intr[v];
+        switch (reg & 0x1f) {
+        case 0x00: /* IMAN */
+            ret = intr->iman;
+            break;
+        case 0x04: /* IMOD */
+            ret = intr->imod;
+            break;
+        case 0x08: /* ERSTSZ */
+            ret = intr->erstsz;
+            break;
+        case 0x10: /* ERSTBA low */
+            ret = intr->erstba_low;
+            break;
+        case 0x14: /* ERSTBA high */
+            ret = intr->erstba_high;
+            break;
+        case 0x18: /* ERDP low */
+            ret = intr->erdp_low;
+            break;
+        case 0x1c: /* ERDP high */
+            ret = intr->erdp_high;
+            break;
+        }
     }
 
     trace_usb_xhci_runtime_read(reg, ret);
     return ret;
 }
 
-static void xhci_runtime_write(XHCIState *xhci, uint32_t reg, uint32_t val)
+static void xhci_runtime_write(void *ptr, target_phys_addr_t reg,
+                               uint64_t val, unsigned size)
 {
-    trace_usb_xhci_runtime_read(reg, val);
+    XHCIState *xhci = ptr;
+    int v = (reg - 0x20) / 0x20;
+    XHCIInterrupter *intr = &xhci->intr[v];
+    trace_usb_xhci_runtime_write(reg, val);
 
-    switch (reg) {
-    case 0x20: /* IMAN */
+    if (reg < 0x20) {
+        fprintf(stderr, "xhci_oper_write: reg 0x%x unimplemented\n", (int)reg);
+        return;
+    }
+
+    switch (reg & 0x1f) {
+    case 0x00: /* IMAN */
         if (val & IMAN_IP) {
-            xhci->iman &= ~IMAN_IP;
+            intr->iman &= ~IMAN_IP;
         }
-        xhci->iman &= ~IMAN_IE;
-        xhci->iman |= val & IMAN_IE;
-        xhci_irq_update(xhci);
+        intr->iman &= ~IMAN_IE;
+        intr->iman |= val & IMAN_IE;
+        if (v == 0) {
+            xhci_intx_update(xhci);
+        }
+        xhci_msix_update(xhci, v);
         break;
-    case 0x24: /* IMOD */
-        xhci->imod = val;
+    case 0x04: /* IMOD */
+        intr->imod = val;
         break;
-    case 0x28: /* ERSTSZ */
-        xhci->erstsz = val & 0xffff;
+    case 0x08: /* ERSTSZ */
+        intr->erstsz = val & 0xffff;
         break;
-    case 0x30: /* ERSTBA low */
+    case 0x10: /* ERSTBA low */
         /* XXX NEC driver bug: it doesn't align this to 64 bytes
-        xhci->erstba_low = val & 0xffffffc0; */
-        xhci->erstba_low = val & 0xfffffff0;
+        intr->erstba_low = val & 0xffffffc0; */
+        intr->erstba_low = val & 0xfffffff0;
         break;
-    case 0x34: /* ERSTBA high */
-        xhci->erstba_high = val;
-        xhci_er_reset(xhci);
+    case 0x14: /* ERSTBA high */
+        intr->erstba_high = val;
+        xhci_er_reset(xhci, v);
         break;
-    case 0x38: /* ERDP low */
+    case 0x18: /* ERDP low */
         if (val & ERDP_EHB) {
-            xhci->erdp_low &= ~ERDP_EHB;
+            intr->erdp_low &= ~ERDP_EHB;
         }
-        xhci->erdp_low = (val & ~ERDP_EHB) | (xhci->erdp_low & ERDP_EHB);
+        intr->erdp_low = (val & ~ERDP_EHB) | (intr->erdp_low & ERDP_EHB);
         break;
-    case 0x3c: /* ERDP high */
-        xhci->erdp_high = val;
-        xhci_events_update(xhci);
+    case 0x1c: /* ERDP high */
+        intr->erdp_high = val;
+        xhci_events_update(xhci, v);
         break;
     default:
-        fprintf(stderr, "xhci_oper_write: reg 0x%x unimplemented\n", reg);
+        fprintf(stderr, "xhci_oper_write: reg 0x%x unimplemented\n",
+                (int)reg);
     }
 }
 
-static uint32_t xhci_doorbell_read(XHCIState *xhci, uint32_t reg)
+static uint64_t xhci_doorbell_read(void *ptr, target_phys_addr_t reg,
+                                   unsigned size)
 {
     /* doorbells always read as 0 */
     trace_usb_xhci_doorbell_read(reg, 0);
     return 0;
 }
 
-static void xhci_doorbell_write(XHCIState *xhci, uint32_t reg, uint32_t val)
+static void xhci_doorbell_write(void *ptr, target_phys_addr_t reg,
+                                uint64_t val, unsigned size)
 {
+    XHCIState *xhci = ptr;
+
     trace_usb_xhci_doorbell_write(reg, val);
 
     if (!xhci_running(xhci)) {
@@ -2669,69 +2729,49 @@
         if (val == 0) {
             xhci_process_commands(xhci);
         } else {
-            fprintf(stderr, "xhci: bad doorbell 0 write: 0x%x\n", val);
+            fprintf(stderr, "xhci: bad doorbell 0 write: 0x%x\n",
+                    (uint32_t)val);
         }
     } else {
         if (reg > MAXSLOTS) {
-            fprintf(stderr, "xhci: bad doorbell %d\n", reg);
+            fprintf(stderr, "xhci: bad doorbell %d\n", (int)reg);
         } else if (val > 31) {
-            fprintf(stderr, "xhci: bad doorbell %d write: 0x%x\n", reg, val);
+            fprintf(stderr, "xhci: bad doorbell %d write: 0x%x\n",
+                    (int)reg, (uint32_t)val);
         } else {
             xhci_kick_ep(xhci, reg, val);
         }
     }
 }
 
-static uint64_t xhci_mem_read(void *ptr, target_phys_addr_t addr,
-                              unsigned size)
-{
-    XHCIState *xhci = ptr;
+static const MemoryRegionOps xhci_cap_ops = {
+    .read = xhci_cap_read,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 4,
+    .impl.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
 
-    /* Only aligned reads are allowed on xHCI */
-    if (addr & 3) {
-        fprintf(stderr, "xhci_mem_read: Mis-aligned read\n");
-        return 0;
-    }
+static const MemoryRegionOps xhci_oper_ops = {
+    .read = xhci_oper_read,
+    .write = xhci_oper_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
 
-    if (addr < LEN_CAP) {
-        return xhci_cap_read(xhci, addr);
-    } else if (addr >= OFF_OPER && addr < (OFF_OPER + LEN_OPER)) {
-        return xhci_oper_read(xhci, addr - OFF_OPER);
-    } else if (addr >= OFF_RUNTIME && addr < (OFF_RUNTIME + LEN_RUNTIME)) {
-        return xhci_runtime_read(xhci, addr - OFF_RUNTIME);
-    } else if (addr >= OFF_DOORBELL && addr < (OFF_DOORBELL + LEN_DOORBELL)) {
-        return xhci_doorbell_read(xhci, addr - OFF_DOORBELL);
-    } else {
-        fprintf(stderr, "xhci_mem_read: Bad offset %x\n", (int)addr);
-        return 0;
-    }
-}
+static const MemoryRegionOps xhci_runtime_ops = {
+    .read = xhci_runtime_read,
+    .write = xhci_runtime_write,
+    .valid.min_access_size = 4,
+    .valid.max_access_size = 4,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
 
-static void xhci_mem_write(void *ptr, target_phys_addr_t addr,
-                           uint64_t val, unsigned size)
-{
-    XHCIState *xhci = ptr;
-
-    /* Only aligned writes are allowed on xHCI */
-    if (addr & 3) {
-        fprintf(stderr, "xhci_mem_write: Mis-aligned write\n");
-        return;
-    }
-
-    if (addr >= OFF_OPER && addr < (OFF_OPER + LEN_OPER)) {
-        xhci_oper_write(xhci, addr - OFF_OPER, val);
-    } else if (addr >= OFF_RUNTIME && addr < (OFF_RUNTIME + LEN_RUNTIME)) {
-        xhci_runtime_write(xhci, addr - OFF_RUNTIME, val);
-    } else if (addr >= OFF_DOORBELL && addr < (OFF_DOORBELL + LEN_DOORBELL)) {
-        xhci_doorbell_write(xhci, addr - OFF_DOORBELL, val);
-    } else {
-        fprintf(stderr, "xhci_mem_write: Bad offset %x\n", (int)addr);
-    }
-}
-
-static const MemoryRegionOps xhci_mem_ops = {
-    .read = xhci_mem_read,
-    .write = xhci_mem_write,
+static const MemoryRegionOps xhci_doorbell_ops = {
+    .read = xhci_doorbell_read,
+    .write = xhci_doorbell_write,
     .valid.min_access_size = 4,
     .valid.max_access_size = 4,
     .endianness = DEVICE_LITTLE_ENDIAN,
@@ -2740,7 +2780,7 @@
 static void xhci_attach(USBPort *usbport)
 {
     XHCIState *xhci = usbport->opaque;
-    XHCIPort *port = &xhci->ports[usbport->index];
+    XHCIPort *port = xhci_lookup_port(xhci, usbport);
 
     xhci_update_port(xhci, port, 0);
 }
@@ -2748,7 +2788,7 @@
 static void xhci_detach(USBPort *usbport)
 {
     XHCIState *xhci = usbport->opaque;
-    XHCIPort *port = &xhci->ports[usbport->index];
+    XHCIPort *port = xhci_lookup_port(xhci, usbport);
 
     xhci_update_port(xhci, port, 1);
 }
@@ -2756,9 +2796,9 @@
 static void xhci_wakeup(USBPort *usbport)
 {
     XHCIState *xhci = usbport->opaque;
-    XHCIPort *port = &xhci->ports[usbport->index];
-    int nr = port->port.index + 1;
-    XHCIEvent ev = { ER_PORT_STATUS_CHANGE, CC_SUCCESS, nr << 24};
+    XHCIPort *port = xhci_lookup_port(xhci, usbport);
+    XHCIEvent ev = { ER_PORT_STATUS_CHANGE, CC_SUCCESS,
+                     port->portnr << 24};
     uint32_t pls;
 
     pls = (port->portsc >> PORTSC_PLS_SHIFT) & PORTSC_PLS_MASK;
@@ -2770,7 +2810,7 @@
         return;
     }
     port->portsc |= PORTSC_PLC;
-    xhci_event(xhci, &ev);
+    xhci_event(xhci, &ev, 0);
 }
 
 static void xhci_complete(USBPort *port, USBPacket *packet)
@@ -2840,22 +2880,43 @@
 
 static void usb_xhci_init(XHCIState *xhci, DeviceState *dev)
 {
-    int i;
+    XHCIPort *port;
+    int i, usbports, speedmask;
 
     xhci->usbsts = USBSTS_HCH;
 
+    if (xhci->numports_2 > MAXPORTS_2) {
+        xhci->numports_2 = MAXPORTS_2;
+    }
+    if (xhci->numports_3 > MAXPORTS_3) {
+        xhci->numports_3 = MAXPORTS_3;
+    }
+    usbports = MAX(xhci->numports_2, xhci->numports_3);
+    xhci->numports = xhci->numports_2 + xhci->numports_3;
+
     usb_bus_new(&xhci->bus, &xhci_bus_ops, &xhci->pci_dev.qdev);
 
-    for (i = 0; i < MAXPORTS; i++) {
-        memset(&xhci->ports[i], 0, sizeof(xhci->ports[i]));
-        usb_register_port(&xhci->bus, &xhci->ports[i].port, xhci, i,
-                          &xhci_port_ops,
-                          USB_SPEED_MASK_LOW  |
-                          USB_SPEED_MASK_FULL |
-                          USB_SPEED_MASK_HIGH);
-    }
-    for (i = 0; i < MAXSLOTS; i++) {
-        xhci->slots[i].enabled = 0;
+    for (i = 0; i < usbports; i++) {
+        speedmask = 0;
+        if (i < xhci->numports_2) {
+            port = &xhci->ports[i];
+            port->portnr = i + 1;
+            port->uport = &xhci->uports[i];
+            port->speedmask =
+                USB_SPEED_MASK_LOW  |
+                USB_SPEED_MASK_FULL |
+                USB_SPEED_MASK_HIGH;
+            speedmask |= port->speedmask;
+        }
+        if (i < xhci->numports_3) {
+            port = &xhci->ports[i + xhci->numports_2];
+            port->portnr = i + 1 + xhci->numports_2;
+            port->uport = &xhci->uports[i];
+            port->speedmask = USB_SPEED_MASK_SUPER;
+            speedmask |= port->speedmask;
+        }
+        usb_register_port(&xhci->bus, &xhci->uports[i], xhci, i,
+                          &xhci_port_ops, speedmask);
     }
 }
 
@@ -2872,10 +2933,25 @@
 
     usb_xhci_init(xhci, &dev->qdev);
 
+    xhci->mfwrap_timer = qemu_new_timer_ns(vm_clock, xhci_mfwrap_timer, xhci);
+
     xhci->irq = xhci->pci_dev.irq[0];
 
-    memory_region_init_io(&xhci->mem, &xhci_mem_ops, xhci,
-                          "xhci", LEN_REGS);
+    memory_region_init(&xhci->mem, "xhci", LEN_REGS);
+    memory_region_init_io(&xhci->mem_cap, &xhci_cap_ops, xhci,
+                          "capabilities", LEN_CAP);
+    memory_region_init_io(&xhci->mem_oper, &xhci_oper_ops, xhci,
+                          "operational", 0x400 + 0x10 * xhci->numports);
+    memory_region_init_io(&xhci->mem_runtime, &xhci_runtime_ops, xhci,
+                          "runtime", LEN_RUNTIME);
+    memory_region_init_io(&xhci->mem_doorbell, &xhci_doorbell_ops, xhci,
+                          "doorbell", LEN_DOORBELL);
+
+    memory_region_add_subregion(&xhci->mem, 0,            &xhci->mem_cap);
+    memory_region_add_subregion(&xhci->mem, OFF_OPER,     &xhci->mem_oper);
+    memory_region_add_subregion(&xhci->mem, OFF_RUNTIME,  &xhci->mem_runtime);
+    memory_region_add_subregion(&xhci->mem, OFF_DOORBELL, &xhci->mem_doorbell);
+
     pci_register_bar(&xhci->pci_dev, 0,
                      PCI_BASE_ADDRESS_SPACE_MEMORY|PCI_BASE_ADDRESS_MEM_TYPE_64,
                      &xhci->mem);
@@ -2883,32 +2959,29 @@
     ret = pcie_cap_init(&xhci->pci_dev, 0xa0, PCI_EXP_TYPE_ENDPOINT, 0);
     assert(ret >= 0);
 
-    if (xhci->msi) {
-        ret = msi_init(&xhci->pci_dev, 0x70, 1, true, false);
-        assert(ret >= 0);
+    if (xhci->flags & (1 << XHCI_FLAG_USE_MSI)) {
+        msi_init(&xhci->pci_dev, 0x70, MAXINTRS, true, false);
+    }
+    if (xhci->flags & (1 << XHCI_FLAG_USE_MSI_X)) {
+        msix_init(&xhci->pci_dev, MAXINTRS,
+                  &xhci->mem, 0, OFF_MSIX_TABLE,
+                  &xhci->mem, 0, OFF_MSIX_PBA,
+                  0x90);
     }
 
     return 0;
 }
 
-static void xhci_write_config(PCIDevice *dev, uint32_t addr, uint32_t val,
-                              int len)
-{
-    XHCIState *xhci = DO_UPCAST(XHCIState, pci_dev, dev);
-
-    pci_default_write_config(dev, addr, val, len);
-    if (xhci->msi) {
-        msi_write_config(dev, addr, val, len);
-    }
-}
-
 static const VMStateDescription vmstate_xhci = {
     .name = "xhci",
     .unmigratable = 1,
 };
 
 static Property xhci_properties[] = {
-    DEFINE_PROP_UINT32("msi", XHCIState, msi, 0),
+    DEFINE_PROP_BIT("msi",    XHCIState, flags, XHCI_FLAG_USE_MSI, true),
+    DEFINE_PROP_BIT("msix",   XHCIState, flags, XHCI_FLAG_USE_MSI_X, true),
+    DEFINE_PROP_UINT32("p2",  XHCIState, numports_2, 4),
+    DEFINE_PROP_UINT32("p3",  XHCIState, numports_3, 4),
     DEFINE_PROP_END_OF_LIST(),
 };
 
@@ -2926,7 +2999,6 @@
     k->class_id     = PCI_CLASS_SERIAL_USB;
     k->revision     = 0x03;
     k->is_express   = 1;
-    k->config_write = xhci_write_config;
 }
 
 static TypeInfo xhci_info = {
diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c
index 10b4fbb..5301a69 100644
--- a/hw/usb/redirect.c
+++ b/hw/usb/redirect.c
@@ -1,7 +1,7 @@
 /*
  * USB redirector usb-guest
  *
- * Copyright (c) 2011 Red Hat, Inc.
+ * Copyright (c) 2011-2012 Red Hat, Inc.
  *
  * Red Hat Authors:
  * Hans de Goede <hdegoede@redhat.com>
@@ -43,7 +43,7 @@
 #define EP2I(ep_address) (((ep_address & 0x80) >> 3) | (ep_address & 0x0f))
 #define I2EP(i) (((i & 0x10) << 3) | (i & 0x0f))
 
-typedef struct AsyncURB AsyncURB;
+typedef struct Cancelled Cancelled;
 typedef struct USBRedirDevice USBRedirDevice;
 
 /* Struct to hold buffered packets (iso or int input packets) */
@@ -79,15 +79,14 @@
     /* Data passed from chardev the fd_read cb to the usbredirparser read cb */
     const uint8_t *read_buf;
     int read_buf_size;
-    /* For async handling of open/close */
-    QEMUBH *open_close_bh;
+    /* For async handling of close */
+    QEMUBH *chardev_close_bh;
     /* To delay the usb attach in case of quick chardev close + open */
     QEMUTimer *attach_timer;
     int64_t next_attach_time;
     struct usbredirparser *parser;
     struct endp_data endpoint[MAX_ENDPOINTS];
-    uint32_t packet_id;
-    QTAILQ_HEAD(, AsyncURB) asyncq;
+    QTAILQ_HEAD(, Cancelled) cancelled;
     /* Data for device filtering */
     struct usb_redir_device_connect_header device_info;
     struct usb_redir_interface_info_header interface_info;
@@ -95,17 +94,9 @@
     int filter_rules_count;
 };
 
-struct AsyncURB {
-    USBRedirDevice *dev;
-    USBPacket *packet;
-    uint32_t packet_id;
-    int get;
-    union {
-        struct usb_redir_control_packet_header control_packet;
-        struct usb_redir_bulk_packet_header bulk_packet;
-        struct usb_redir_interrupt_packet_header interrupt_packet;
-    };
-    QTAILQ_ENTRY(AsyncURB)next;
+struct Cancelled {
+    uint64_t id;
+    QTAILQ_ENTRY(Cancelled)next;
 };
 
 static void usbredir_hello(void *priv, struct usb_redir_hello_header *h);
@@ -116,27 +107,27 @@
     struct usb_redir_interface_info_header *interface_info);
 static void usbredir_ep_info(void *priv,
     struct usb_redir_ep_info_header *ep_info);
-static void usbredir_configuration_status(void *priv, uint32_t id,
+static void usbredir_configuration_status(void *priv, uint64_t id,
     struct usb_redir_configuration_status_header *configuration_status);
-static void usbredir_alt_setting_status(void *priv, uint32_t id,
+static void usbredir_alt_setting_status(void *priv, uint64_t id,
     struct usb_redir_alt_setting_status_header *alt_setting_status);
-static void usbredir_iso_stream_status(void *priv, uint32_t id,
+static void usbredir_iso_stream_status(void *priv, uint64_t id,
     struct usb_redir_iso_stream_status_header *iso_stream_status);
-static void usbredir_interrupt_receiving_status(void *priv, uint32_t id,
+static void usbredir_interrupt_receiving_status(void *priv, uint64_t id,
     struct usb_redir_interrupt_receiving_status_header
     *interrupt_receiving_status);
-static void usbredir_bulk_streams_status(void *priv, uint32_t id,
+static void usbredir_bulk_streams_status(void *priv, uint64_t id,
     struct usb_redir_bulk_streams_status_header *bulk_streams_status);
-static void usbredir_control_packet(void *priv, uint32_t id,
+static void usbredir_control_packet(void *priv, uint64_t id,
     struct usb_redir_control_packet_header *control_packet,
     uint8_t *data, int data_len);
-static void usbredir_bulk_packet(void *priv, uint32_t id,
+static void usbredir_bulk_packet(void *priv, uint64_t id,
     struct usb_redir_bulk_packet_header *bulk_packet,
     uint8_t *data, int data_len);
-static void usbredir_iso_packet(void *priv, uint32_t id,
+static void usbredir_iso_packet(void *priv, uint64_t id,
     struct usb_redir_iso_packet_header *iso_packet,
     uint8_t *data, int data_len);
-static void usbredir_interrupt_packet(void *priv, uint32_t id,
+static void usbredir_interrupt_packet(void *priv, uint64_t id,
     struct usb_redir_interrupt_packet_header *interrupt_header,
     uint8_t *data, int data_len);
 
@@ -245,58 +236,58 @@
 }
 
 /*
- * Async and buffered packets helpers
+ * Cancelled and buffered packets helpers
  */
 
-static AsyncURB *async_alloc(USBRedirDevice *dev, USBPacket *p)
-{
-    AsyncURB *aurb = (AsyncURB *) g_malloc0(sizeof(AsyncURB));
-    aurb->dev = dev;
-    aurb->packet = p;
-    aurb->packet_id = dev->packet_id;
-    QTAILQ_INSERT_TAIL(&dev->asyncq, aurb, next);
-    dev->packet_id++;
-
-    return aurb;
-}
-
-static void async_free(USBRedirDevice *dev, AsyncURB *aurb)
-{
-    QTAILQ_REMOVE(&dev->asyncq, aurb, next);
-    g_free(aurb);
-}
-
-static AsyncURB *async_find(USBRedirDevice *dev, uint32_t packet_id)
-{
-    AsyncURB *aurb;
-
-    QTAILQ_FOREACH(aurb, &dev->asyncq, next) {
-        if (aurb->packet_id == packet_id) {
-            return aurb;
-        }
-    }
-    DPRINTF("could not find async urb for packet_id %u\n", packet_id);
-    return NULL;
-}
-
 static void usbredir_cancel_packet(USBDevice *udev, USBPacket *p)
 {
     USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev);
-    AsyncURB *aurb;
+    Cancelled *c;
 
-    QTAILQ_FOREACH(aurb, &dev->asyncq, next) {
-        if (p != aurb->packet) {
-            continue;
-        }
+    DPRINTF("cancel packet id %"PRIu64"\n", p->id);
 
-        DPRINTF("async cancel id %u\n", aurb->packet_id);
-        usbredirparser_send_cancel_data_packet(dev->parser, aurb->packet_id);
-        usbredirparser_do_write(dev->parser);
+    c = g_malloc0(sizeof(Cancelled));
+    c->id = p->id;
+    QTAILQ_INSERT_TAIL(&dev->cancelled, c, next);
 
-        /* Mark it as dead */
-        aurb->packet = NULL;
-        break;
+    usbredirparser_send_cancel_data_packet(dev->parser, p->id);
+    usbredirparser_do_write(dev->parser);
+}
+
+static int usbredir_is_cancelled(USBRedirDevice *dev, uint64_t id)
+{
+    Cancelled *c;
+
+    if (!dev->dev.attached) {
+        return 1; /* Treat everything as cancelled after a disconnect */
     }
+
+    QTAILQ_FOREACH(c, &dev->cancelled, next) {
+        if (c->id == id) {
+            QTAILQ_REMOVE(&dev->cancelled, c, next);
+            g_free(c);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+static USBPacket *usbredir_find_packet_by_id(USBRedirDevice *dev,
+    uint8_t ep, uint64_t id)
+{
+    USBPacket *p;
+
+    if (usbredir_is_cancelled(dev, id)) {
+        return NULL;
+    }
+
+    p = usb_ep_find_packet_by_id(&dev->dev,
+                            (ep & USB_DIR_IN) ? USB_TOKEN_IN : USB_TOKEN_OUT,
+                            ep & 0x0f, id);
+    if (p == NULL) {
+        ERROR("could not find packet with id %"PRIu64"\n", id);
+    }
+    return p;
 }
 
 static void bufp_alloc(USBRedirDevice *dev,
@@ -492,25 +483,22 @@
 static int usbredir_handle_bulk_data(USBRedirDevice *dev, USBPacket *p,
                                       uint8_t ep)
 {
-    AsyncURB *aurb = async_alloc(dev, p);
     struct usb_redir_bulk_packet_header bulk_packet;
 
-    DPRINTF("bulk-out ep %02X len %zd id %u\n", ep,
-            p->iov.size, aurb->packet_id);
+    DPRINTF("bulk-out ep %02X len %zd id %"PRIu64"\n", ep, p->iov.size, p->id);
 
     bulk_packet.endpoint  = ep;
     bulk_packet.length    = p->iov.size;
     bulk_packet.stream_id = 0;
-    aurb->bulk_packet = bulk_packet;
 
     if (ep & USB_DIR_IN) {
-        usbredirparser_send_bulk_packet(dev->parser, aurb->packet_id,
+        usbredirparser_send_bulk_packet(dev->parser, p->id,
                                         &bulk_packet, NULL, 0);
     } else {
         uint8_t buf[p->iov.size];
         usb_packet_copy(p, buf, p->iov.size);
         usbredir_log_data(dev, "bulk data out:", buf, p->iov.size);
-        usbredirparser_send_bulk_packet(dev->parser, aurb->packet_id,
+        usbredirparser_send_bulk_packet(dev->parser, p->id,
                                         &bulk_packet, buf, p->iov.size);
     }
     usbredirparser_do_write(dev->parser);
@@ -573,20 +561,18 @@
         return len;
     } else {
         /* Output interrupt endpoint, normal async operation */
-        AsyncURB *aurb = async_alloc(dev, p);
         struct usb_redir_interrupt_packet_header interrupt_packet;
         uint8_t buf[p->iov.size];
 
-        DPRINTF("interrupt-out ep %02X len %zd id %u\n", ep, p->iov.size,
-                aurb->packet_id);
+        DPRINTF("interrupt-out ep %02X len %zd id %"PRIu64"\n", ep,
+                p->iov.size, p->id);
 
         interrupt_packet.endpoint  = ep;
         interrupt_packet.length    = p->iov.size;
-        aurb->interrupt_packet     = interrupt_packet;
 
         usb_packet_copy(p, buf, p->iov.size);
         usbredir_log_data(dev, "interrupt data out:", buf, p->iov.size);
-        usbredirparser_send_interrupt_packet(dev->parser, aurb->packet_id,
+        usbredirparser_send_interrupt_packet(dev->parser, p->id,
                                         &interrupt_packet, buf, p->iov.size);
         usbredirparser_do_write(dev->parser);
         return USB_RET_ASYNC;
@@ -640,10 +626,9 @@
                                 int config)
 {
     struct usb_redir_set_configuration_header set_config;
-    AsyncURB *aurb = async_alloc(dev, p);
     int i;
 
-    DPRINTF("set config %d id %u\n", config, aurb->packet_id);
+    DPRINTF("set config %d id %"PRIu64"\n", config, p->id);
 
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         switch (dev->endpoint[i].type) {
@@ -660,20 +645,16 @@
     }
 
     set_config.configuration = config;
-    usbredirparser_send_set_configuration(dev->parser, aurb->packet_id,
-                                          &set_config);
+    usbredirparser_send_set_configuration(dev->parser, p->id, &set_config);
     usbredirparser_do_write(dev->parser);
     return USB_RET_ASYNC;
 }
 
 static int usbredir_get_config(USBRedirDevice *dev, USBPacket *p)
 {
-    AsyncURB *aurb = async_alloc(dev, p);
+    DPRINTF("get config id %"PRIu64"\n", p->id);
 
-    DPRINTF("get config id %u\n", aurb->packet_id);
-
-    aurb->get = 1;
-    usbredirparser_send_get_configuration(dev->parser, aurb->packet_id);
+    usbredirparser_send_get_configuration(dev->parser, p->id);
     usbredirparser_do_write(dev->parser);
     return USB_RET_ASYNC;
 }
@@ -682,11 +663,9 @@
                                    int interface, int alt)
 {
     struct usb_redir_set_alt_setting_header set_alt;
-    AsyncURB *aurb = async_alloc(dev, p);
     int i;
 
-    DPRINTF("set interface %d alt %d id %u\n", interface, alt,
-            aurb->packet_id);
+    DPRINTF("set interface %d alt %d id %"PRIu64"\n", interface, alt, p->id);
 
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         if (dev->endpoint[i].interface == interface) {
@@ -706,8 +685,7 @@
 
     set_alt.interface = interface;
     set_alt.alt = alt;
-    usbredirparser_send_set_alt_setting(dev->parser, aurb->packet_id,
-                                        &set_alt);
+    usbredirparser_send_set_alt_setting(dev->parser, p->id, &set_alt);
     usbredirparser_do_write(dev->parser);
     return USB_RET_ASYNC;
 }
@@ -716,14 +694,11 @@
                                    int interface)
 {
     struct usb_redir_get_alt_setting_header get_alt;
-    AsyncURB *aurb = async_alloc(dev, p);
 
-    DPRINTF("get interface %d id %u\n", interface, aurb->packet_id);
+    DPRINTF("get interface %d id %"PRIu64"\n", interface, p->id);
 
     get_alt.interface = interface;
-    aurb->get = 1;
-    usbredirparser_send_get_alt_setting(dev->parser, aurb->packet_id,
-                                        &get_alt);
+    usbredirparser_send_get_alt_setting(dev->parser, p->id, &get_alt);
     usbredirparser_do_write(dev->parser);
     return USB_RET_ASYNC;
 }
@@ -733,7 +708,6 @@
 {
     USBRedirDevice *dev = DO_UPCAST(USBRedirDevice, dev, udev);
     struct usb_redir_control_packet_header control_packet;
-    AsyncURB *aurb;
 
     /* Special cases for certain standard device requests */
     switch (request) {
@@ -751,13 +725,10 @@
         return usbredir_get_interface(dev, p, index);
     }
 
-    /* "Normal" ctrl requests */
-    aurb = async_alloc(dev, p);
-
-    /* Note request is (bRequestType << 8) | bRequest */
-    DPRINTF("ctrl-out type 0x%x req 0x%x val 0x%x index %d len %d id %u\n",
-            request >> 8, request & 0xff, value, index, length,
-            aurb->packet_id);
+    /* Normal ctrl requests, note request is (bRequestType << 8) | bRequest */
+    DPRINTF(
+        "ctrl-out type 0x%x req 0x%x val 0x%x index %d len %d id %"PRIu64"\n",
+        request >> 8, request & 0xff, value, index, length, p->id);
 
     control_packet.request     = request & 0xFF;
     control_packet.requesttype = request >> 8;
@@ -765,14 +736,13 @@
     control_packet.value       = value;
     control_packet.index       = index;
     control_packet.length      = length;
-    aurb->control_packet       = control_packet;
 
     if (control_packet.requesttype & USB_DIR_IN) {
-        usbredirparser_send_control_packet(dev->parser, aurb->packet_id,
+        usbredirparser_send_control_packet(dev->parser, p->id,
                                            &control_packet, NULL, 0);
     } else {
         usbredir_log_data(dev, "ctrl data out:", data, length);
-        usbredirparser_send_control_packet(dev->parser, aurb->packet_id,
+        usbredirparser_send_control_packet(dev->parser, p->id,
                                            &control_packet, data, length);
     }
     usbredirparser_do_write(dev->parser);
@@ -784,18 +754,11 @@
  * from within the USBDevice data / control packet callbacks and doing a
  * usb_detach from within these callbacks is not a good idea.
  *
- * So we use a bh handler to take care of close events. We also handle
- * open events from this callback to make sure that a close directly followed
- * by an open gets handled in the right order.
+ * So we use a bh handler to take care of close events.
  */
-static void usbredir_open_close_bh(void *opaque)
+static void usbredir_chardev_close_bh(void *opaque)
 {
     USBRedirDevice *dev = opaque;
-    uint32_t caps[USB_REDIR_CAPS_SIZE] = { 0, };
-    char version[32];
-
-    strcpy(version, "qemu usb-redir guest ");
-    pstrcat(version, sizeof(version), qemu_get_version());
 
     usbredir_device_disconnect(dev);
 
@@ -803,34 +766,56 @@
         usbredirparser_destroy(dev->parser);
         dev->parser = NULL;
     }
+}
 
-    if (dev->cs->opened) {
-        dev->parser = qemu_oom_check(usbredirparser_create());
-        dev->parser->priv = dev;
-        dev->parser->log_func = usbredir_log;
-        dev->parser->read_func = usbredir_read;
-        dev->parser->write_func = usbredir_write;
-        dev->parser->hello_func = usbredir_hello;
-        dev->parser->device_connect_func = usbredir_device_connect;
-        dev->parser->device_disconnect_func = usbredir_device_disconnect;
-        dev->parser->interface_info_func = usbredir_interface_info;
-        dev->parser->ep_info_func = usbredir_ep_info;
-        dev->parser->configuration_status_func = usbredir_configuration_status;
-        dev->parser->alt_setting_status_func = usbredir_alt_setting_status;
-        dev->parser->iso_stream_status_func = usbredir_iso_stream_status;
-        dev->parser->interrupt_receiving_status_func =
-            usbredir_interrupt_receiving_status;
-        dev->parser->bulk_streams_status_func = usbredir_bulk_streams_status;
-        dev->parser->control_packet_func = usbredir_control_packet;
-        dev->parser->bulk_packet_func = usbredir_bulk_packet;
-        dev->parser->iso_packet_func = usbredir_iso_packet;
-        dev->parser->interrupt_packet_func = usbredir_interrupt_packet;
-        dev->read_buf = NULL;
-        dev->read_buf_size = 0;
+static void usbredir_chardev_open(USBRedirDevice *dev)
+{
+    uint32_t caps[USB_REDIR_CAPS_SIZE] = { 0, };
+    char version[32];
 
-        usbredirparser_caps_set_cap(caps, usb_redir_cap_connect_device_version);
-        usbredirparser_caps_set_cap(caps, usb_redir_cap_filter);
-        usbredirparser_init(dev->parser, version, caps, USB_REDIR_CAPS_SIZE, 0);
+    /* Make sure any pending closes are handled (no-op if none pending) */
+    usbredir_chardev_close_bh(dev);
+    qemu_bh_cancel(dev->chardev_close_bh);
+
+    strcpy(version, "qemu usb-redir guest ");
+    pstrcat(version, sizeof(version), qemu_get_version());
+
+    dev->parser = qemu_oom_check(usbredirparser_create());
+    dev->parser->priv = dev;
+    dev->parser->log_func = usbredir_log;
+    dev->parser->read_func = usbredir_read;
+    dev->parser->write_func = usbredir_write;
+    dev->parser->hello_func = usbredir_hello;
+    dev->parser->device_connect_func = usbredir_device_connect;
+    dev->parser->device_disconnect_func = usbredir_device_disconnect;
+    dev->parser->interface_info_func = usbredir_interface_info;
+    dev->parser->ep_info_func = usbredir_ep_info;
+    dev->parser->configuration_status_func = usbredir_configuration_status;
+    dev->parser->alt_setting_status_func = usbredir_alt_setting_status;
+    dev->parser->iso_stream_status_func = usbredir_iso_stream_status;
+    dev->parser->interrupt_receiving_status_func =
+        usbredir_interrupt_receiving_status;
+    dev->parser->bulk_streams_status_func = usbredir_bulk_streams_status;
+    dev->parser->control_packet_func = usbredir_control_packet;
+    dev->parser->bulk_packet_func = usbredir_bulk_packet;
+    dev->parser->iso_packet_func = usbredir_iso_packet;
+    dev->parser->interrupt_packet_func = usbredir_interrupt_packet;
+    dev->read_buf = NULL;
+    dev->read_buf_size = 0;
+
+    usbredirparser_caps_set_cap(caps, usb_redir_cap_connect_device_version);
+    usbredirparser_caps_set_cap(caps, usb_redir_cap_filter);
+    usbredirparser_caps_set_cap(caps, usb_redir_cap_ep_info_max_packet_size);
+    usbredirparser_caps_set_cap(caps, usb_redir_cap_64bits_ids);
+    usbredirparser_init(dev->parser, version, caps, USB_REDIR_CAPS_SIZE, 0);
+    usbredirparser_do_write(dev->parser);
+}
+
+static void usbredir_reject_device(USBRedirDevice *dev)
+{
+    usbredir_device_disconnect(dev);
+    if (usbredirparser_peer_has_cap(dev->parser, usb_redir_cap_filter)) {
+        usbredirparser_send_filter_reject(dev->parser);
         usbredirparser_do_write(dev->parser);
     }
 }
@@ -839,12 +824,19 @@
 {
     USBRedirDevice *dev = opaque;
 
+    /* In order to work properly with XHCI controllers we need these caps */
+    if ((dev->dev.port->speedmask & USB_SPEED_MASK_SUPER) && !(
+        usbredirparser_peer_has_cap(dev->parser,
+                                    usb_redir_cap_ep_info_max_packet_size) &&
+        usbredirparser_peer_has_cap(dev->parser,
+                                    usb_redir_cap_64bits_ids))) {
+        ERROR("usb-redir-host lacks capabilities needed for use with XHCI\n");
+        usbredir_reject_device(dev);
+        return;
+    }
+
     if (usb_device_attach(&dev->dev) != 0) {
-        usbredir_device_disconnect(dev);
-        if (usbredirparser_peer_has_cap(dev->parser, usb_redir_cap_filter)) {
-            usbredirparser_send_filter_reject(dev->parser);
-            usbredirparser_do_write(dev->parser);
-        }
+        usbredir_reject_device(dev);
     }
 }
 
@@ -856,13 +848,13 @@
 {
     USBRedirDevice *dev = opaque;
 
-    if (dev->parser) {
-        /* usbredir_parser_do_read will consume *all* data we give it */
-        return 1024 * 1024;
-    } else {
-        /* usbredir_open_close_bh hasn't handled the open event yet */
+    if (!dev->parser) {
+        WARNING("chardev_can_read called on non open chardev!\n");
         return 0;
     }
+
+    /* usbredir_parser_do_read will consume *all* data we give it */
+    return 1024 * 1024;
 }
 
 static void usbredir_chardev_read(void *opaque, const uint8_t *buf, int size)
@@ -886,8 +878,10 @@
 
     switch (event) {
     case CHR_EVENT_OPENED:
+        usbredir_chardev_open(dev);
+        break;
     case CHR_EVENT_CLOSED:
-        qemu_bh_schedule(dev->open_close_bh);
+        qemu_bh_schedule(dev->chardev_close_bh);
         break;
     }
 }
@@ -917,10 +911,10 @@
         }
     }
 
-    dev->open_close_bh = qemu_bh_new(usbredir_open_close_bh, dev);
+    dev->chardev_close_bh = qemu_bh_new(usbredir_chardev_close_bh, dev);
     dev->attach_timer = qemu_new_timer_ms(vm_clock, usbredir_do_attach, dev);
 
-    QTAILQ_INIT(&dev->asyncq);
+    QTAILQ_INIT(&dev->cancelled);
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         QTAILQ_INIT(&dev->endpoint[i].bufpq);
     }
@@ -939,11 +933,12 @@
 
 static void usbredir_cleanup_device_queues(USBRedirDevice *dev)
 {
-    AsyncURB *aurb, *next_aurb;
+    Cancelled *c, *next_c;
     int i;
 
-    QTAILQ_FOREACH_SAFE(aurb, &dev->asyncq, next, next_aurb) {
-        async_free(dev, aurb);
+    QTAILQ_FOREACH_SAFE(c, &dev->cancelled, next, next_c) {
+        QTAILQ_REMOVE(&dev->cancelled, c, next);
+        g_free(c);
     }
     for (i = 0; i < MAX_ENDPOINTS; i++) {
         usbredir_free_bufpq(dev, I2EP(i));
@@ -957,7 +952,7 @@
     qemu_chr_fe_close(dev->cs);
     qemu_chr_delete(dev->cs);
     /* Note must be done after qemu_chr_close, as that causes a close event */
-    qemu_bh_delete(dev->open_close_bh);
+    qemu_bh_delete(dev->chardev_close_bh);
 
     qemu_del_timer(dev->attach_timer);
     qemu_free_timer(dev->attach_timer);
@@ -1007,11 +1002,7 @@
     return 0;
 
 error:
-    usbredir_device_disconnect(dev);
-    if (usbredirparser_peer_has_cap(dev->parser, usb_redir_cap_filter)) {
-        usbredirparser_send_filter_reject(dev->parser);
-        usbredirparser_do_write(dev->parser);
-    }
+    usbredir_reject_device(dev);
     return -1;
 }
 
@@ -1028,11 +1019,14 @@
     case usb_redir_stall:
         return USB_RET_STALL;
     case usb_redir_cancelled:
-        WARNING("returning cancelled packet to HC?\n");
-        return USB_RET_NAK;
+        /*
+         * When the usbredir-host unredirects a device, it will report a status
+         * of cancelled for all pending packets, followed by a disconnect msg.
+         */
+        return USB_RET_IOERROR;
     case usb_redir_inval:
         WARNING("got invalid param error from usb-host?\n");
-        return USB_RET_NAK;
+        return USB_RET_IOERROR;
     case usb_redir_babble:
         return USB_RET_BABBLE;
     case usb_redir_ioerror:
@@ -1199,70 +1193,67 @@
                             i & 0x0f);
         usb_ep->type = dev->endpoint[i].type;
         usb_ep->ifnum = dev->endpoint[i].interface;
+        if (usbredirparser_peer_has_cap(dev->parser,
+                                     usb_redir_cap_ep_info_max_packet_size)) {
+            usb_ep->max_packet_size = ep_info->max_packet_size[i];
+        }
+        if (ep_info->type[i] == usb_redir_type_bulk) {
+            usb_ep->pipeline = true;
+        }
     }
 }
 
-static void usbredir_configuration_status(void *priv, uint32_t id,
+static void usbredir_configuration_status(void *priv, uint64_t id,
     struct usb_redir_configuration_status_header *config_status)
 {
     USBRedirDevice *dev = priv;
-    AsyncURB *aurb;
+    USBPacket *p;
     int len = 0;
 
-    DPRINTF("set config status %d config %d id %u\n", config_status->status,
-            config_status->configuration, id);
+    DPRINTF("set config status %d config %d id %"PRIu64"\n",
+            config_status->status, config_status->configuration, id);
 
-    aurb = async_find(dev, id);
-    if (!aurb) {
-        return;
-    }
-    if (aurb->packet) {
-        if (aurb->get) {
+    p = usbredir_find_packet_by_id(dev, 0, id);
+    if (p) {
+        if (dev->dev.setup_buf[0] & USB_DIR_IN) {
             dev->dev.data_buf[0] = config_status->configuration;
             len = 1;
         }
-        aurb->packet->result =
-            usbredir_handle_status(dev, config_status->status, len);
-        usb_generic_async_ctrl_complete(&dev->dev, aurb->packet);
+        p->result = usbredir_handle_status(dev, config_status->status, len);
+        usb_generic_async_ctrl_complete(&dev->dev, p);
     }
-    async_free(dev, aurb);
 }
 
-static void usbredir_alt_setting_status(void *priv, uint32_t id,
+static void usbredir_alt_setting_status(void *priv, uint64_t id,
     struct usb_redir_alt_setting_status_header *alt_setting_status)
 {
     USBRedirDevice *dev = priv;
-    AsyncURB *aurb;
+    USBPacket *p;
     int len = 0;
 
-    DPRINTF("alt status %d intf %d alt %d id: %u\n",
-            alt_setting_status->status,
-            alt_setting_status->interface,
+    DPRINTF("alt status %d intf %d alt %d id: %"PRIu64"\n",
+            alt_setting_status->status, alt_setting_status->interface,
             alt_setting_status->alt, id);
 
-    aurb = async_find(dev, id);
-    if (!aurb) {
-        return;
-    }
-    if (aurb->packet) {
-        if (aurb->get) {
+    p = usbredir_find_packet_by_id(dev, 0, id);
+    if (p) {
+        if (dev->dev.setup_buf[0] & USB_DIR_IN) {
             dev->dev.data_buf[0] = alt_setting_status->alt;
             len = 1;
         }
-        aurb->packet->result =
+        p->result =
             usbredir_handle_status(dev, alt_setting_status->status, len);
-        usb_generic_async_ctrl_complete(&dev->dev, aurb->packet);
+        usb_generic_async_ctrl_complete(&dev->dev, p);
     }
-    async_free(dev, aurb);
 }
 
-static void usbredir_iso_stream_status(void *priv, uint32_t id,
+static void usbredir_iso_stream_status(void *priv, uint64_t id,
     struct usb_redir_iso_stream_status_header *iso_stream_status)
 {
     USBRedirDevice *dev = priv;
     uint8_t ep = iso_stream_status->endpoint;
 
-    DPRINTF("iso status %d ep %02X id %u\n", iso_stream_status->status,
+    DPRINTF("iso status %d ep %02X id %"PRIu64"\n", iso_stream_status->status,
             ep, id);
 
     if (!dev->dev.attached || !dev->endpoint[EP2I(ep)].iso_started) {
@@ -1276,14 +1267,14 @@
     }
 }
 
-static void usbredir_interrupt_receiving_status(void *priv, uint32_t id,
+static void usbredir_interrupt_receiving_status(void *priv, uint64_t id,
     struct usb_redir_interrupt_receiving_status_header
     *interrupt_receiving_status)
 {
     USBRedirDevice *dev = priv;
     uint8_t ep = interrupt_receiving_status->endpoint;
 
-    DPRINTF("interrupt recv status %d ep %02X id %u\n",
+    DPRINTF("interrupt recv status %d ep %02X id %"PRIu64"\n",
             interrupt_receiving_status->status, ep, id);
 
     if (!dev->dev.attached || !dev->endpoint[EP2I(ep)].interrupt_started) {
@@ -1298,37 +1289,24 @@
     }
 }
 
-static void usbredir_bulk_streams_status(void *priv, uint32_t id,
+static void usbredir_bulk_streams_status(void *priv, uint64_t id,
     struct usb_redir_bulk_streams_status_header *bulk_streams_status)
 {
 }
 
-static void usbredir_control_packet(void *priv, uint32_t id,
+static void usbredir_control_packet(void *priv, uint64_t id,
     struct usb_redir_control_packet_header *control_packet,
     uint8_t *data, int data_len)
 {
     USBRedirDevice *dev = priv;
+    USBPacket *p;
     int len = control_packet->length;
-    AsyncURB *aurb;
 
-    DPRINTF("ctrl-in status %d len %d id %u\n", control_packet->status,
+    DPRINTF("ctrl-in status %d len %d id %"PRIu64"\n", control_packet->status,
             len, id);
 
-    aurb = async_find(dev, id);
-    if (!aurb) {
-        free(data);
-        return;
-    }
-
-    aurb->control_packet.status = control_packet->status;
-    aurb->control_packet.length = control_packet->length;
-    if (memcmp(&aurb->control_packet, control_packet,
-               sizeof(*control_packet))) {
-        ERROR("return control packet mismatch, please report this!\n");
-        len = USB_RET_NAK;
-    }
-
-    if (aurb->packet) {
+    p = usbredir_find_packet_by_id(dev, 0, id);
+    if (p) {
         len = usbredir_handle_status(dev, control_packet->status, len);
         if (len > 0) {
             usbredir_log_data(dev, "ctrl data in:", data, data_len);
@@ -1340,65 +1318,52 @@
                 len = USB_RET_STALL;
             }
         }
-        aurb->packet->result = len;
-        usb_generic_async_ctrl_complete(&dev->dev, aurb->packet);
+        p->result = len;
+        usb_generic_async_ctrl_complete(&dev->dev, p);
     }
-    async_free(dev, aurb);
     free(data);
 }
 
-static void usbredir_bulk_packet(void *priv, uint32_t id,
+static void usbredir_bulk_packet(void *priv, uint64_t id,
     struct usb_redir_bulk_packet_header *bulk_packet,
     uint8_t *data, int data_len)
 {
     USBRedirDevice *dev = priv;
     uint8_t ep = bulk_packet->endpoint;
     int len = bulk_packet->length;
-    AsyncURB *aurb;
+    USBPacket *p;
 
-    DPRINTF("bulk-in status %d ep %02X len %d id %u\n", bulk_packet->status,
-            ep, len, id);
+    DPRINTF("bulk-in status %d ep %02X len %d id %"PRIu64"\n",
+            bulk_packet->status, ep, len, id);
 
-    aurb = async_find(dev, id);
-    if (!aurb) {
-        free(data);
-        return;
-    }
-
-    if (aurb->bulk_packet.endpoint != bulk_packet->endpoint ||
-            aurb->bulk_packet.stream_id != bulk_packet->stream_id) {
-        ERROR("return bulk packet mismatch, please report this!\n");
-        len = USB_RET_NAK;
-    }
-
-    if (aurb->packet) {
+    p = usbredir_find_packet_by_id(dev, ep, id);
+    if (p) {
         len = usbredir_handle_status(dev, bulk_packet->status, len);
         if (len > 0) {
             usbredir_log_data(dev, "bulk data in:", data, data_len);
-            if (data_len <= aurb->packet->iov.size) {
-                usb_packet_copy(aurb->packet, data, data_len);
+            if (data_len <= p->iov.size) {
+                usb_packet_copy(p, data, data_len);
             } else {
-                ERROR("bulk buffer too small (%d > %zd)\n", data_len,
-                      aurb->packet->iov.size);
-                len = USB_RET_STALL;
+                ERROR("bulk got more data then requested (%d > %zd)\n",
+                      data_len, p->iov.size);
+                len = USB_RET_BABBLE;
             }
         }
-        aurb->packet->result = len;
-        usb_packet_complete(&dev->dev, aurb->packet);
+        p->result = len;
+        usb_packet_complete(&dev->dev, p);
     }
-    async_free(dev, aurb);
     free(data);
 }
 
-static void usbredir_iso_packet(void *priv, uint32_t id,
+static void usbredir_iso_packet(void *priv, uint64_t id,
     struct usb_redir_iso_packet_header *iso_packet,
     uint8_t *data, int data_len)
 {
     USBRedirDevice *dev = priv;
     uint8_t ep = iso_packet->endpoint;
 
-    DPRINTF2("iso-in status %d ep %02X len %d id %u\n", iso_packet->status, ep,
-             data_len, id);
+    DPRINTF2("iso-in status %d ep %02X len %d id %"PRIu64"\n",
+             iso_packet->status, ep, data_len, id);
 
     if (dev->endpoint[EP2I(ep)].type != USB_ENDPOINT_XFER_ISOC) {
         ERROR("received iso packet for non iso endpoint %02X\n", ep);
@@ -1416,14 +1381,14 @@
     bufp_alloc(dev, data, data_len, iso_packet->status, ep);
 }
 
-static void usbredir_interrupt_packet(void *priv, uint32_t id,
+static void usbredir_interrupt_packet(void *priv, uint64_t id,
     struct usb_redir_interrupt_packet_header *interrupt_packet,
     uint8_t *data, int data_len)
 {
     USBRedirDevice *dev = priv;
     uint8_t ep = interrupt_packet->endpoint;
 
-    DPRINTF("interrupt-in status %d ep %02X len %d id %u\n",
+    DPRINTF("interrupt-in status %d ep %02X len %d id %"PRIu64"\n",
             interrupt_packet->status, ep, data_len, id);
 
     if (dev->endpoint[EP2I(ep)].type != USB_ENDPOINT_XFER_INT) {
@@ -1444,22 +1409,12 @@
     } else {
         int len = interrupt_packet->length;
 
-        AsyncURB *aurb = async_find(dev, id);
-        if (!aurb) {
-            return;
-        }
-
-        if (aurb->interrupt_packet.endpoint != interrupt_packet->endpoint) {
-            ERROR("return int packet mismatch, please report this!\n");
-            len = USB_RET_NAK;
-        }
-
-        if (aurb->packet) {
-            aurb->packet->result = usbredir_handle_status(dev,
+        USBPacket *p = usbredir_find_packet_by_id(dev, ep, id);
+        if (p) {
+            p->result = usbredir_handle_status(dev,
                                                interrupt_packet->status, len);
-            usb_packet_complete(&dev->dev, aurb->packet);
+            usb_packet_complete(&dev->dev, p);
         }
-        async_free(dev, aurb);
     }
 }
 
diff --git a/iohandler.c b/iohandler.c
index dea4355..a2d871b 100644
--- a/iohandler.c
+++ b/iohandler.c
@@ -56,6 +56,8 @@
 {
     IOHandlerRecord *ioh;
 
+    assert(fd >= 0);
+
     if (!fd_read && !fd_write) {
         QLIST_FOREACH(ioh, &io_handlers, next) {
             if (ioh->fd == fd) {
diff --git a/qemu-char.c b/qemu-char.c
index 767da93..7f0f895 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -2141,18 +2141,13 @@
 
 static void tcp_chr_accept(void *opaque);
 
-static void tcp_chr_connect(void *opaque);
-
 static int tcp_chr_write(CharDriverState *chr, const uint8_t *buf, int len)
 {
     TCPCharDriver *s = chr->opaque;
     if (s->connected) {
         return send_all(s->fd, buf, len);
-    } else if (s->listen_fd == -1) {
-        /* (Re-)connect for unconnected writing */
-        tcp_chr_connect(chr);
-        return 0;
     } else {
+        /* XXX: indicate an error ? */
         return len;
     }
 }
@@ -2334,8 +2329,10 @@
     TCPCharDriver *s = chr->opaque;
 
     s->connected = 1;
-    qemu_set_fd_handler2(s->fd, tcp_chr_read_poll,
-                         tcp_chr_read, NULL, chr);
+    if (s->fd >= 0) {
+        qemu_set_fd_handler2(s->fd, tcp_chr_read_poll,
+                             tcp_chr_read, NULL, chr);
+    }
     qemu_chr_generic_open(chr);
 }
 
diff --git a/qemu-config.c b/qemu-config.c
index 3eaee48..deaf648 100644
--- a/qemu-config.c
+++ b/qemu-config.c
@@ -619,6 +619,10 @@
             .name = "dump-guest-core",
             .type = QEMU_OPT_BOOL,
             .help = "Include guest memory in  a core dump",
+        }, {
+            .name = "mem-merge",
+            .type = QEMU_OPT_BOOL,
+            .help = "enable/disable memory merge support",
         },
         { /* End of list */ }
     },
diff --git a/qemu-options.hx b/qemu-options.hx
index 1af4fec..8029716 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -38,7 +38,8 @@
     "                supported accelerators are kvm, xen, tcg (default: tcg)\n"
     "                kernel_irqchip=on|off controls accelerated irqchip support\n"
     "                kvm_shadow_mem=size of KVM shadow MMU\n"
-    "                dump-guest-core=on|off include guest memory in a core dump (default=on)\n",
+    "                dump-guest-core=on|off include guest memory in a core dump (default=on)\n"
+    "                mem-merge=on|off controls memory merge support (default: on)\n",
     QEMU_ARCH_ALL)
 STEXI
 @item -machine [type=]@var{name}[,prop=@var{value}[,...]]
@@ -57,6 +58,10 @@
 Defines the size of the KVM shadow MMU.
 @item dump-guest-core=on|off
 Include guest memory in a core dump. The default is on.
+@item mem-merge=on|off
+Enables or disables memory merge support. This feature, when supported by
+the host, de-duplicates identical memory pages among VMs instances
+(enabled by default).
 @end table
 ETEXI
 
diff --git a/softmmu_defs.h b/softmmu_defs.h
index 8d59f9d..1f25e33 100644
--- a/softmmu_defs.h
+++ b/softmmu_defs.h
@@ -9,25 +9,6 @@
 #ifndef SOFTMMU_DEFS_H
 #define SOFTMMU_DEFS_H
 
-#ifndef CONFIG_TCG_PASS_AREG0
-uint8_t __ldb_mmu(target_ulong addr, int mmu_idx);
-void __stb_mmu(target_ulong addr, uint8_t val, int mmu_idx);
-uint16_t __ldw_mmu(target_ulong addr, int mmu_idx);
-void __stw_mmu(target_ulong addr, uint16_t val, int mmu_idx);
-uint32_t __ldl_mmu(target_ulong addr, int mmu_idx);
-void __stl_mmu(target_ulong addr, uint32_t val, int mmu_idx);
-uint64_t __ldq_mmu(target_ulong addr, int mmu_idx);
-void __stq_mmu(target_ulong addr, uint64_t val, int mmu_idx);
-
-uint8_t __ldb_cmmu(target_ulong addr, int mmu_idx);
-void __stb_cmmu(target_ulong addr, uint8_t val, int mmu_idx);
-uint16_t __ldw_cmmu(target_ulong addr, int mmu_idx);
-void __stw_cmmu(target_ulong addr, uint16_t val, int mmu_idx);
-uint32_t __ldl_cmmu(target_ulong addr, int mmu_idx);
-void __stl_cmmu(target_ulong addr, uint32_t val, int mmu_idx);
-uint64_t __ldq_cmmu(target_ulong addr, int mmu_idx);
-void __stq_cmmu(target_ulong addr, uint64_t val, int mmu_idx);
-#else
 uint8_t helper_ldb_mmu(CPUArchState *env, target_ulong addr, int mmu_idx);
 void helper_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val,
                     int mmu_idx);
@@ -54,5 +35,3 @@
 void helper_stq_cmmu(CPUArchState *env, target_ulong addr, uint64_t val,
                      int mmu_idx);
 #endif
-
-#endif
diff --git a/softmmu_header.h b/softmmu_header.h
index cf1aa38..d8d9c81 100644
--- a/softmmu_header.h
+++ b/softmmu_header.h
@@ -78,23 +78,10 @@
 #define ADDR_READ addr_read
 #endif
 
-#ifndef CONFIG_TCG_PASS_AREG0
-#define ENV_PARAM
-#define ENV_VAR
-#define CPU_PREFIX
-#define HELPER_PREFIX __
-#else
-#define ENV_PARAM CPUArchState *env,
-#define ENV_VAR env,
-#define CPU_PREFIX cpu_
-#define HELPER_PREFIX helper_
-#endif
-
 /* generic load/store macros */
 
 static inline RES_TYPE
-glue(glue(glue(CPU_PREFIX, ld), USUFFIX), MEMSUFFIX)(ENV_PARAM
-                                                     target_ulong ptr)
+glue(glue(cpu_ld, USUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr)
 {
     int page_index;
     RES_TYPE res;
@@ -106,9 +93,7 @@
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_VAR
-                                                                     addr,
-                                                                     mmu_idx);
+        res = glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(env, addr, mmu_idx);
     } else {
         uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(ld, USUFFIX), _raw)(hostaddr);
@@ -118,8 +103,7 @@
 
 #if DATA_SIZE <= 2
 static inline int
-glue(glue(glue(CPU_PREFIX, lds), SUFFIX), MEMSUFFIX)(ENV_PARAM
-                                                     target_ulong ptr)
+glue(glue(cpu_lds, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr)
 {
     int res, page_index;
     target_ulong addr;
@@ -130,8 +114,8 @@
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        res = (DATA_STYPE)glue(glue(glue(HELPER_PREFIX, ld), SUFFIX),
-                               MMUSUFFIX)(ENV_VAR addr, mmu_idx);
+        res = (DATA_STYPE)glue(glue(helper_ld, SUFFIX),
+                               MMUSUFFIX)(env, addr, mmu_idx);
     } else {
         uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         res = glue(glue(lds, SUFFIX), _raw)(hostaddr);
@@ -145,8 +129,8 @@
 /* generic store macro */
 
 static inline void
-glue(glue(glue(CPU_PREFIX, st), SUFFIX), MEMSUFFIX)(ENV_PARAM target_ulong ptr,
-                                                    RES_TYPE v)
+glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr,
+                                      RES_TYPE v)
 {
     int page_index;
     target_ulong addr;
@@ -157,8 +141,7 @@
     mmu_idx = CPU_MMU_INDEX;
     if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
                  (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
-        glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_VAR addr, v,
-                                                               mmu_idx);
+        glue(glue(helper_st, SUFFIX), MMUSUFFIX)(env, addr, v, mmu_idx);
     } else {
         uintptr_t hostaddr = addr + env->tlb_table[mmu_idx][page_index].addend;
         glue(glue(st, SUFFIX), _raw)(hostaddr, v);
@@ -170,52 +153,50 @@
 #if ACCESS_TYPE != (NB_MMU_MODES + 1)
 
 #if DATA_SIZE == 8
-static inline float64 glue(glue(CPU_PREFIX, ldfq), MEMSUFFIX)(ENV_PARAM
-                                                              target_ulong ptr)
+static inline float64 glue(cpu_ldfq, MEMSUFFIX)(CPUArchState *env,
+                                                target_ulong ptr)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
-    u.i = glue(glue(CPU_PREFIX, ldq), MEMSUFFIX)(ENV_VAR ptr);
+    u.i = glue(cpu_ldq, MEMSUFFIX)(env, ptr);
     return u.d;
 }
 
-static inline void glue(glue(CPU_PREFIX, stfq), MEMSUFFIX)(ENV_PARAM
-                                                           target_ulong ptr,
-                                                           float64 v)
+static inline void glue(cpu_stfq, MEMSUFFIX)(CPUArchState *env,
+                                             target_ulong ptr, float64 v)
 {
     union {
         float64 d;
         uint64_t i;
     } u;
     u.d = v;
-    glue(glue(CPU_PREFIX, stq), MEMSUFFIX)(ENV_VAR ptr, u.i);
+    glue(cpu_stq, MEMSUFFIX)(env, ptr, u.i);
 }
 #endif /* DATA_SIZE == 8 */
 
 #if DATA_SIZE == 4
-static inline float32 glue(glue(CPU_PREFIX, ldfl), MEMSUFFIX)(ENV_PARAM
-                                                              target_ulong ptr)
+static inline float32 glue(cpu_ldfl, MEMSUFFIX)(CPUArchState *env,
+                                                target_ulong ptr)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
-    u.i = glue(glue(CPU_PREFIX, ldl), MEMSUFFIX)(ENV_VAR ptr);
+    u.i = glue(cpu_ldl, MEMSUFFIX)(env, ptr);
     return u.f;
 }
 
-static inline void glue(glue(CPU_PREFIX, stfl), MEMSUFFIX)(ENV_PARAM
-                                                           target_ulong ptr,
-                                                           float32 v)
+static inline void glue(cpu_stfl, MEMSUFFIX)(CPUArchState *env,
+                                             target_ulong ptr, float32 v)
 {
     union {
         float32 f;
         uint32_t i;
     } u;
     u.f = v;
-    glue(glue(CPU_PREFIX, stl), MEMSUFFIX)(ENV_VAR ptr, u.i);
+    glue(cpu_stl, MEMSUFFIX)(env, ptr, u.i);
 }
 #endif /* DATA_SIZE == 4 */
 
@@ -230,7 +211,3 @@
 #undef CPU_MMU_INDEX
 #undef MMUSUFFIX
 #undef ADDR_READ
-#undef ENV_PARAM
-#undef ENV_VAR
-#undef CPU_PREFIX
-#undef HELPER_PREFIX
diff --git a/softmmu_template.h b/softmmu_template.h
index b8bd700..e2490f0 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -54,23 +54,11 @@
 #define ADDR_READ addr_read
 #endif
 
-#ifndef CONFIG_TCG_PASS_AREG0
-#define ENV_PARAM
-#define ENV_VAR
-#define CPU_PREFIX
-#define HELPER_PREFIX __
-#else
-#define ENV_PARAM CPUArchState *env,
-#define ENV_VAR env,
-#define CPU_PREFIX cpu_
-#define HELPER_PREFIX helper_
-#endif
-
-static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+static DATA_TYPE glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
                                                         target_ulong addr,
                                                         int mmu_idx,
                                                         uintptr_t retaddr);
-static inline DATA_TYPE glue(io_read, SUFFIX)(ENV_PARAM
+static inline DATA_TYPE glue(io_read, SUFFIX)(CPUArchState *env,
                                               target_phys_addr_t physaddr,
                                               target_ulong addr,
                                               uintptr_t retaddr)
@@ -104,9 +92,8 @@
 
 /* handle all cases except unaligned access which span two pages */
 DATA_TYPE
-glue(glue(glue(HELPER_PREFIX, ld), SUFFIX), MMUSUFFIX)(ENV_PARAM
-                                                       target_ulong addr,
-                                                       int mmu_idx)
+glue(glue(helper_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env, target_ulong addr,
+                                         int mmu_idx)
 {
     DATA_TYPE res;
     int index;
@@ -126,15 +113,15 @@
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
             /* slow unaligned access (it spans two pages or IO) */
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
-            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr,
+            res = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr,
                                                          mmu_idx, retaddr);
         } else {
             /* unaligned/aligned access in the same page */
@@ -142,7 +129,7 @@
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+                do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -154,7 +141,7 @@
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(ENV_VAR addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
+            do_unaligned_access(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, READ_ACCESS_TYPE, mmu_idx, retaddr);
         goto redo;
@@ -164,7 +151,7 @@
 
 /* handle all unaligned cases */
 static DATA_TYPE
-glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_PARAM
+glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(CPUArchState *env,
                                        target_ulong addr,
                                        int mmu_idx,
                                        uintptr_t retaddr)
@@ -183,15 +170,15 @@
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            res = glue(io_read, SUFFIX)(ENV_VAR ioaddr, addr, retaddr);
+            res = glue(io_read, SUFFIX)(env, ioaddr, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* slow unaligned access (it spans two pages) */
             addr1 = addr & ~(DATA_SIZE - 1);
             addr2 = addr1 + DATA_SIZE;
-            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr1,
+            res1 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr1,
                                                           mmu_idx, retaddr);
-            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(ENV_VAR addr2,
+            res2 = glue(glue(slow_ld, SUFFIX), MMUSUFFIX)(env, addr2,
                                                           mmu_idx, retaddr);
             shift = (addr & (DATA_SIZE - 1)) * 8;
 #ifdef TARGET_WORDS_BIGENDIAN
@@ -216,13 +203,13 @@
 
 #ifndef SOFTMMU_CODE_ACCESS
 
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
                                                    target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
                                                    uintptr_t retaddr);
 
-static inline void glue(io_write, SUFFIX)(ENV_PARAM
+static inline void glue(io_write, SUFFIX)(CPUArchState *env,
                                           target_phys_addr_t physaddr,
                                           DATA_TYPE val,
                                           target_ulong addr,
@@ -253,10 +240,9 @@
 #endif /* SHIFT > 2 */
 }
 
-void glue(glue(glue(HELPER_PREFIX, st), SUFFIX), MMUSUFFIX)(ENV_PARAM
-                                                            target_ulong addr,
-                                                            DATA_TYPE val,
-                                                            int mmu_idx)
+void glue(glue(helper_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
+                                              target_ulong addr, DATA_TYPE val,
+                                              int mmu_idx)
 {
     target_phys_addr_t ioaddr;
     target_ulong tlb_addr;
@@ -273,14 +259,14 @@
                 goto do_unaligned_access;
             retaddr = GETPC();
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             retaddr = GETPC();
 #ifdef ALIGNED_ONLY
-            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
 #endif
-            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_VAR addr, val,
+            glue(glue(slow_st, SUFFIX), MMUSUFFIX)(env, addr, val,
                                                    mmu_idx, retaddr);
         } else {
             /* aligned/unaligned access in the same page */
@@ -288,7 +274,7 @@
 #ifdef ALIGNED_ONLY
             if ((addr & (DATA_SIZE - 1)) != 0) {
                 retaddr = GETPC();
-                do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
+                do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
             }
 #endif
             addend = env->tlb_table[mmu_idx][index].addend;
@@ -300,7 +286,7 @@
         retaddr = GETPC();
 #ifdef ALIGNED_ONLY
         if ((addr & (DATA_SIZE - 1)) != 0)
-            do_unaligned_access(ENV_VAR addr, 1, mmu_idx, retaddr);
+            do_unaligned_access(env, addr, 1, mmu_idx, retaddr);
 #endif
         tlb_fill(env, addr, 1, mmu_idx, retaddr);
         goto redo;
@@ -308,7 +294,7 @@
 }
 
 /* handles all unaligned cases */
-static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(ENV_PARAM
+static void glue(glue(slow_st, SUFFIX), MMUSUFFIX)(CPUArchState *env,
                                                    target_ulong addr,
                                                    DATA_TYPE val,
                                                    int mmu_idx,
@@ -327,7 +313,7 @@
             if ((addr & (DATA_SIZE - 1)) != 0)
                 goto do_unaligned_access;
             ioaddr = env->iotlb[mmu_idx][index];
-            glue(io_write, SUFFIX)(ENV_VAR ioaddr, val, addr, retaddr);
+            glue(io_write, SUFFIX)(env, ioaddr, val, addr, retaddr);
         } else if (((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1) >= TARGET_PAGE_SIZE) {
         do_unaligned_access:
             /* XXX: not efficient, but simple */
@@ -335,11 +321,11 @@
              * previous page from the TLB cache.  */
             for(i = DATA_SIZE - 1; i >= 0; i--) {
 #ifdef TARGET_WORDS_BIGENDIAN
-                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                glue(slow_stb, MMUSUFFIX)(env, addr + i,
                                           val >> (((DATA_SIZE - 1) * 8) - (i * 8)),
                                           mmu_idx, retaddr);
 #else
-                glue(slow_stb, MMUSUFFIX)(ENV_VAR addr + i,
+                glue(slow_stb, MMUSUFFIX)(env, addr + i,
                                           val >> (i * 8),
                                           mmu_idx, retaddr);
 #endif
@@ -366,7 +352,3 @@
 #undef USUFFIX
 #undef DATA_SIZE
 #undef ADDR_READ
-#undef ENV_PARAM
-#undef ENV_VAR
-#undef CPU_PREFIX
-#undef HELPER_PREFIX
diff --git a/target-arm/Makefile.objs b/target-arm/Makefile.objs
index f447c4f..b6f1a9e 100644
--- a/target-arm/Makefile.objs
+++ b/target-arm/Makefile.objs
@@ -2,5 +2,3 @@
 obj-$(CONFIG_SOFTMMU) += machine.o
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-y += neon_helper.o iwmmxt_helper.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index d7f93d9..7fac94f 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -734,9 +734,10 @@
 }
 
 /* Load an instruction and return it in the standard little-endian order */
-static inline uint32_t arm_ldl_code(uint32_t addr, bool do_swap)
+static inline uint32_t arm_ldl_code(CPUARMState *env, uint32_t addr,
+                                    bool do_swap)
 {
-    uint32_t insn = ldl_code(addr);
+    uint32_t insn = cpu_ldl_code(env, addr);
     if (do_swap) {
         return bswap32(insn);
     }
@@ -744,9 +745,10 @@
 }
 
 /* Ditto, for a halfword (Thumb) instruction */
-static inline uint16_t arm_lduw_code(uint32_t addr, bool do_swap)
+static inline uint16_t arm_lduw_code(CPUARMState *env, uint32_t addr,
+                                     bool do_swap)
 {
-    uint16_t insn = lduw_code(addr);
+    uint16_t insn = cpu_lduw_code(env, addr);
     if (do_swap) {
         return bswap16(insn);
     }
diff --git a/target-arm/helper.c b/target-arm/helper.c
index e27df96..58340bd 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -1756,7 +1756,7 @@
     case EXCP_BKPT:
         if (semihosting_enabled) {
             int nr;
-            nr = arm_lduw_code(env->regs[15], env->bswap_code) & 0xff;
+            nr = arm_lduw_code(env, env->regs[15], env->bswap_code) & 0xff;
             if (nr == 0xab) {
                 env->regs[15] += 2;
                 env->regs[0] = do_arm_semihosting(env);
@@ -1828,9 +1828,10 @@
         if (semihosting_enabled) {
             /* Check for semihosting interrupt.  */
             if (env->thumb) {
-                mask = arm_lduw_code(env->regs[15] - 2, env->bswap_code) & 0xff;
+                mask = arm_lduw_code(env, env->regs[15] - 2, env->bswap_code)
+                    & 0xff;
             } else {
-                mask = arm_ldl_code(env->regs[15] - 4, env->bswap_code)
+                mask = arm_ldl_code(env, env->regs[15] - 4, env->bswap_code)
                     & 0xffffff;
             }
             /* Only intercept calls from privileged modes, to provide some
@@ -1851,7 +1852,7 @@
     case EXCP_BKPT:
         /* See if this is a semihosting syscall.  */
         if (env->thumb && semihosting_enabled) {
-            mask = arm_lduw_code(env->regs[15], env->bswap_code) & 0xff;
+            mask = arm_lduw_code(env, env->regs[15], env->bswap_code) & 0xff;
             if (mask == 0xab
                   && (env->uncached_cpsr & CPSR_M) != ARM_CPU_MODE_USR) {
                 env->regs[15] += 2;
diff --git a/target-arm/helper.h b/target-arm/helper.h
index 21e9cfe..afdb2b5 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -4,12 +4,12 @@
 DEF_HELPER_1(sxtb16, i32, i32)
 DEF_HELPER_1(uxtb16, i32, i32)
 
-DEF_HELPER_2(add_setq, i32, i32, i32)
-DEF_HELPER_2(add_saturate, i32, i32, i32)
-DEF_HELPER_2(sub_saturate, i32, i32, i32)
-DEF_HELPER_2(add_usaturate, i32, i32, i32)
-DEF_HELPER_2(sub_usaturate, i32, i32, i32)
-DEF_HELPER_1(double_saturate, i32, s32)
+DEF_HELPER_3(add_setq, i32, env, i32, i32)
+DEF_HELPER_3(add_saturate, i32, env, i32, i32)
+DEF_HELPER_3(sub_saturate, i32, env, i32, i32)
+DEF_HELPER_3(add_usaturate, i32, env, i32, i32)
+DEF_HELPER_3(sub_usaturate, i32, env, i32, i32)
+DEF_HELPER_2(double_saturate, i32, env, s32)
 DEF_HELPER_2(sdiv, s32, s32, s32)
 DEF_HELPER_2(udiv, i32, i32, i32)
 DEF_HELPER_1(rbit, i32, i32)
@@ -40,21 +40,21 @@
 PAS_OP(uh)
 #undef PAS_OP
 
-DEF_HELPER_2(ssat, i32, i32, i32)
-DEF_HELPER_2(usat, i32, i32, i32)
-DEF_HELPER_2(ssat16, i32, i32, i32)
-DEF_HELPER_2(usat16, i32, i32, i32)
+DEF_HELPER_3(ssat, i32, env, i32, i32)
+DEF_HELPER_3(usat, i32, env, i32, i32)
+DEF_HELPER_3(ssat16, i32, env, i32, i32)
+DEF_HELPER_3(usat16, i32, env, i32, i32)
 
 DEF_HELPER_2(usad8, i32, i32, i32)
 
 DEF_HELPER_1(logicq_cc, i32, i64)
 
 DEF_HELPER_3(sel_flags, i32, i32, i32, i32)
-DEF_HELPER_1(exception, void, i32)
-DEF_HELPER_0(wfi, void)
+DEF_HELPER_2(exception, void, env, i32)
+DEF_HELPER_1(wfi, void, env)
 
-DEF_HELPER_2(cpsr_write, void, i32, i32)
-DEF_HELPER_0(cpsr_read, i32)
+DEF_HELPER_3(cpsr_write, void, env, i32, i32)
+DEF_HELPER_1(cpsr_read, i32, env)
 
 DEF_HELPER_3(v7m_msr, void, env, i32, i32)
 DEF_HELPER_2(v7m_mrs, i32, env, i32)
@@ -67,8 +67,8 @@
 DEF_HELPER_2(get_r13_banked, i32, env, i32)
 DEF_HELPER_3(set_r13_banked, void, env, i32, i32)
 
-DEF_HELPER_1(get_user_reg, i32, i32)
-DEF_HELPER_2(set_user_reg, void, i32, i32)
+DEF_HELPER_2(get_user_reg, i32, env, i32)
+DEF_HELPER_3(set_user_reg, void, env, i32, i32)
 
 DEF_HELPER_1(vfp_get_fpscr, i32, env)
 DEF_HELPER_2(vfp_set_fpscr, void, env, i32)
@@ -140,20 +140,20 @@
 DEF_HELPER_2(rsqrte_f32, f32, f32, env)
 DEF_HELPER_2(recpe_u32, i32, i32, env)
 DEF_HELPER_2(rsqrte_u32, i32, i32, env)
-DEF_HELPER_4(neon_tbl, i32, i32, i32, i32, i32)
+DEF_HELPER_5(neon_tbl, i32, env, i32, i32, i32, i32)
 
-DEF_HELPER_2(add_cc, i32, i32, i32)
-DEF_HELPER_2(adc_cc, i32, i32, i32)
-DEF_HELPER_2(sub_cc, i32, i32, i32)
-DEF_HELPER_2(sbc_cc, i32, i32, i32)
+DEF_HELPER_3(add_cc, i32, env, i32, i32)
+DEF_HELPER_3(adc_cc, i32, env, i32, i32)
+DEF_HELPER_3(sub_cc, i32, env, i32, i32)
+DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
 
-DEF_HELPER_2(shl, i32, i32, i32)
-DEF_HELPER_2(shr, i32, i32, i32)
-DEF_HELPER_2(sar, i32, i32, i32)
-DEF_HELPER_2(shl_cc, i32, i32, i32)
-DEF_HELPER_2(shr_cc, i32, i32, i32)
-DEF_HELPER_2(sar_cc, i32, i32, i32)
-DEF_HELPER_2(ror_cc, i32, i32, i32)
+DEF_HELPER_3(shl, i32, env, i32, i32)
+DEF_HELPER_3(shr, i32, env, i32, i32)
+DEF_HELPER_3(sar, i32, env, i32, i32)
+DEF_HELPER_3(shl_cc, i32, env, i32, i32)
+DEF_HELPER_3(shr_cc, i32, env, i32, i32)
+DEF_HELPER_3(sar_cc, i32, env, i32, i32)
+DEF_HELPER_3(ror_cc, i32, env, i32, i32)
 
 /* neon_helper.c */
 DEF_HELPER_3(neon_qadd_u8, i32, env, i32, i32)
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index d77bfab..f13fc3a 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -17,19 +17,18 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helper.h"
 
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
 
-static void raise_exception(int tt)
+static void raise_exception(CPUARMState *env, int tt)
 {
     env->exception_index = tt;
     cpu_loop_exit(env);
 }
 
-uint32_t HELPER(neon_tbl)(uint32_t ireg, uint32_t def,
+uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def,
                           uint32_t rn, uint32_t maxindex)
 {
     uint32_t val;
@@ -72,16 +71,12 @@
 /* try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill(CPUARMState *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUARMState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUARMState *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
     ret = cpu_arm_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
@@ -93,15 +88,14 @@
                 cpu_restore_state(tb, env, retaddr);
             }
         }
-        raise_exception(env->exception_index);
+        raise_exception(env, env->exception_index);
     }
-    env = saved_env;
 }
 #endif
 
 /* FIXME: Pass an explicit pointer to QF to CPUARMState, and move saturating
    instructions into helper.c  */
-uint32_t HELPER(add_setq)(uint32_t a, uint32_t b)
+uint32_t HELPER(add_setq)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT))
@@ -109,7 +103,7 @@
     return res;
 }
 
-uint32_t HELPER(add_saturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(add_saturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (((res ^ a) & SIGNBIT) && !((a ^ b) & SIGNBIT)) {
@@ -119,7 +113,7 @@
     return res;
 }
 
-uint32_t HELPER(sub_saturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(sub_saturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a - b;
     if (((res ^ a) & SIGNBIT) && ((a ^ b) & SIGNBIT)) {
@@ -129,7 +123,7 @@
     return res;
 }
 
-uint32_t HELPER(double_saturate)(int32_t val)
+uint32_t HELPER(double_saturate)(CPUARMState *env, int32_t val)
 {
     uint32_t res;
     if (val >= 0x40000000) {
@@ -144,7 +138,7 @@
     return res;
 }
 
-uint32_t HELPER(add_usaturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(add_usaturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a + b;
     if (res < a) {
@@ -154,7 +148,7 @@
     return res;
 }
 
-uint32_t HELPER(sub_usaturate)(uint32_t a, uint32_t b)
+uint32_t HELPER(sub_usaturate)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t res = a - b;
     if (res > a) {
@@ -165,7 +159,7 @@
 }
 
 /* Signed saturation.  */
-static inline uint32_t do_ssat(int32_t val, int shift)
+static inline uint32_t do_ssat(CPUARMState *env, int32_t val, int shift)
 {
     int32_t top;
     uint32_t mask;
@@ -183,7 +177,7 @@
 }
 
 /* Unsigned saturation.  */
-static inline uint32_t do_usat(int32_t val, int shift)
+static inline uint32_t do_usat(CPUARMState *env, int32_t val, int shift)
 {
     uint32_t max;
 
@@ -199,62 +193,62 @@
 }
 
 /* Signed saturate.  */
-uint32_t HELPER(ssat)(uint32_t x, uint32_t shift)
+uint32_t HELPER(ssat)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
-    return do_ssat(x, shift);
+    return do_ssat(env, x, shift);
 }
 
 /* Dual halfword signed saturate.  */
-uint32_t HELPER(ssat16)(uint32_t x, uint32_t shift)
+uint32_t HELPER(ssat16)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
     uint32_t res;
 
-    res = (uint16_t)do_ssat((int16_t)x, shift);
-    res |= do_ssat(((int32_t)x) >> 16, shift) << 16;
+    res = (uint16_t)do_ssat(env, (int16_t)x, shift);
+    res |= do_ssat(env, ((int32_t)x) >> 16, shift) << 16;
     return res;
 }
 
 /* Unsigned saturate.  */
-uint32_t HELPER(usat)(uint32_t x, uint32_t shift)
+uint32_t HELPER(usat)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
-    return do_usat(x, shift);
+    return do_usat(env, x, shift);
 }
 
 /* Dual halfword unsigned saturate.  */
-uint32_t HELPER(usat16)(uint32_t x, uint32_t shift)
+uint32_t HELPER(usat16)(CPUARMState *env, uint32_t x, uint32_t shift)
 {
     uint32_t res;
 
-    res = (uint16_t)do_usat((int16_t)x, shift);
-    res |= do_usat(((int32_t)x) >> 16, shift) << 16;
+    res = (uint16_t)do_usat(env, (int16_t)x, shift);
+    res |= do_usat(env, ((int32_t)x) >> 16, shift) << 16;
     return res;
 }
 
-void HELPER(wfi)(void)
+void HELPER(wfi)(CPUARMState *env)
 {
     env->exception_index = EXCP_HLT;
     env->halted = 1;
     cpu_loop_exit(env);
 }
 
-void HELPER(exception)(uint32_t excp)
+void HELPER(exception)(CPUARMState *env, uint32_t excp)
 {
     env->exception_index = excp;
     cpu_loop_exit(env);
 }
 
-uint32_t HELPER(cpsr_read)(void)
+uint32_t HELPER(cpsr_read)(CPUARMState *env)
 {
     return cpsr_read(env) & ~CPSR_EXEC;
 }
 
-void HELPER(cpsr_write)(uint32_t val, uint32_t mask)
+void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask)
 {
     cpsr_write(env, val, mask);
 }
 
 /* Access to user mode registers from privileged modes.  */
-uint32_t HELPER(get_user_reg)(uint32_t regno)
+uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno)
 {
     uint32_t val;
 
@@ -271,7 +265,7 @@
     return val;
 }
 
-void HELPER(set_user_reg)(uint32_t regno, uint32_t val)
+void HELPER(set_user_reg)(CPUARMState *env, uint32_t regno, uint32_t val)
 {
     if (regno == 13) {
         env->banked_r13[0] = val;
@@ -290,7 +284,7 @@
     const ARMCPRegInfo *ri = rip;
     int excp = ri->writefn(env, ri, value);
     if (excp) {
-        raise_exception(excp);
+        raise_exception(env, excp);
     }
 }
 
@@ -300,7 +294,7 @@
     uint64_t value;
     int excp = ri->readfn(env, ri, &value);
     if (excp) {
-        raise_exception(excp);
+        raise_exception(env, excp);
     }
     return value;
 }
@@ -310,7 +304,7 @@
     const ARMCPRegInfo *ri = rip;
     int excp = ri->writefn(env, ri, value);
     if (excp) {
-        raise_exception(excp);
+        raise_exception(env, excp);
     }
 }
 
@@ -320,7 +314,7 @@
     uint64_t value;
     int excp = ri->readfn(env, ri, &value);
     if (excp) {
-        raise_exception(excp);
+        raise_exception(env, excp);
     }
     return value;
 }
@@ -329,7 +323,7 @@
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
 
-uint32_t HELPER (add_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER (add_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     result = a + b;
@@ -339,7 +333,7 @@
     return result;
 }
 
-uint32_t HELPER(adc_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(adc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
@@ -354,7 +348,7 @@
     return result;
 }
 
-uint32_t HELPER(sub_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(sub_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     result = a - b;
@@ -364,7 +358,7 @@
     return result;
 }
 
-uint32_t HELPER(sbc_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(sbc_cc)(CPUARMState *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
@@ -381,7 +375,7 @@
 
 /* Similarly for variable shift instructions.  */
 
-uint32_t HELPER(shl)(uint32_t x, uint32_t i)
+uint32_t HELPER(shl)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32)
@@ -389,7 +383,7 @@
     return x << shift;
 }
 
-uint32_t HELPER(shr)(uint32_t x, uint32_t i)
+uint32_t HELPER(shr)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32)
@@ -397,7 +391,7 @@
     return (uint32_t)x >> shift;
 }
 
-uint32_t HELPER(sar)(uint32_t x, uint32_t i)
+uint32_t HELPER(sar)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32)
@@ -405,7 +399,7 @@
     return (int32_t)x >> shift;
 }
 
-uint32_t HELPER(shl_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(shl_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -421,7 +415,7 @@
     return x;
 }
 
-uint32_t HELPER(shr_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(shr_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -437,7 +431,7 @@
     return x;
 }
 
-uint32_t HELPER(sar_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(sar_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -450,7 +444,7 @@
     return x;
 }
 
-uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
 {
     int shift1, shift;
     shift1 = i & 0xff;
diff --git a/target-arm/translate.c b/target-arm/translate.c
index edef79a..f4b447a 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -199,7 +199,7 @@
 static inline void gen_set_cpsr(TCGv var, uint32_t mask)
 {
     TCGv tmp_mask = tcg_const_i32(mask);
-    gen_helper_cpsr_write(var, tmp_mask);
+    gen_helper_cpsr_write(cpu_env, var, tmp_mask);
     tcg_temp_free_i32(tmp_mask);
 }
 /* Set NZCV flags from the high 4 bits of var.  */
@@ -209,7 +209,7 @@
 {
     TCGv tmp = tcg_temp_new_i32();
     tcg_gen_movi_i32(tmp, excp);
-    gen_helper_exception(tmp);
+    gen_helper_exception(cpu_env, tmp);
     tcg_temp_free_i32(tmp);
 }
 
@@ -490,16 +490,16 @@
 {
     if (flags) {
         switch (shiftop) {
-        case 0: gen_helper_shl_cc(var, var, shift); break;
-        case 1: gen_helper_shr_cc(var, var, shift); break;
-        case 2: gen_helper_sar_cc(var, var, shift); break;
-        case 3: gen_helper_ror_cc(var, var, shift); break;
+        case 0: gen_helper_shl_cc(var, cpu_env, var, shift); break;
+        case 1: gen_helper_shr_cc(var, cpu_env, var, shift); break;
+        case 2: gen_helper_sar_cc(var, cpu_env, var, shift); break;
+        case 3: gen_helper_ror_cc(var, cpu_env, var, shift); break;
         }
     } else {
         switch (shiftop) {
-        case 0: gen_helper_shl(var, var, shift); break;
-        case 1: gen_helper_shr(var, var, shift); break;
-        case 2: gen_helper_sar(var, var, shift); break;
+        case 0: gen_helper_shl(var, cpu_env, var, shift); break;
+        case 1: gen_helper_shr(var, cpu_env, var, shift); break;
+        case 2: gen_helper_sar(var, cpu_env, var, shift); break;
         case 3: tcg_gen_andi_i32(shift, shift, 0x1f);
                 tcg_gen_rotr_i32(var, var, shift); break;
         }
@@ -6121,7 +6121,7 @@
                 tmp2 = neon_load_reg(rm, 0);
                 tmp4 = tcg_const_i32(rn);
                 tmp5 = tcg_const_i32(n);
-                gen_helper_neon_tbl(tmp2, tmp2, tmp, tmp4, tmp5);
+                gen_helper_neon_tbl(tmp2, cpu_env, tmp2, tmp, tmp4, tmp5);
                 tcg_temp_free_i32(tmp);
                 if (insn & (1 << 6)) {
                     tmp = neon_load_reg(rd, 1);
@@ -6130,7 +6130,7 @@
                     tcg_gen_movi_i32(tmp, 0);
                 }
                 tmp3 = neon_load_reg(rm, 1);
-                gen_helper_neon_tbl(tmp3, tmp3, tmp, tmp4, tmp5);
+                gen_helper_neon_tbl(tmp3, cpu_env, tmp3, tmp, tmp4, tmp5);
                 tcg_temp_free_i32(tmp5);
                 tcg_temp_free_i32(tmp4);
                 neon_store_reg(rd, 0, tmp2);
@@ -6534,7 +6534,7 @@
     TCGv addr;
     TCGv_i64 tmp64;
 
-    insn = arm_ldl_code(s->pc, s->bswap_code);
+    insn = arm_ldl_code(env, s->pc, s->bswap_code);
     s->pc += 4;
 
     /* M variants do not implement ARM mode.  */
@@ -6818,7 +6818,7 @@
                     tmp = load_cpu_field(spsr);
                 } else {
                     tmp = tcg_temp_new_i32();
-                    gen_helper_cpsr_read(tmp);
+                    gen_helper_cpsr_read(tmp, cpu_env);
                 }
                 store_reg(s, rd, tmp);
             }
@@ -6869,11 +6869,11 @@
             tmp = load_reg(s, rm);
             tmp2 = load_reg(s, rn);
             if (op1 & 2)
-                gen_helper_double_saturate(tmp2, tmp2);
+                gen_helper_double_saturate(tmp2, cpu_env, tmp2);
             if (op1 & 1)
-                gen_helper_sub_saturate(tmp, tmp, tmp2);
+                gen_helper_sub_saturate(tmp, cpu_env, tmp, tmp2);
             else
-                gen_helper_add_saturate(tmp, tmp, tmp2);
+                gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
             tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
             break;
@@ -6911,7 +6911,7 @@
                 tcg_temp_free_i64(tmp64);
                 if ((sh & 2) == 0) {
                     tmp2 = load_reg(s, rn);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                 }
                 store_reg(s, rd, tmp);
@@ -6931,7 +6931,7 @@
                 } else {
                     if (op1 == 0) {
                         tmp2 = load_reg(s, rn);
-                        gen_helper_add_setq(tmp, tmp, tmp2);
+                        gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                         tcg_temp_free_i32(tmp2);
                     }
                     store_reg(s, rd, tmp);
@@ -7005,11 +7005,11 @@
                 if (IS_USER(s)) {
                     goto illegal_op;
                 }
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
                 gen_exception_return(s, tmp);
             } else {
                 if (set_cc) {
-                    gen_helper_sub_cc(tmp, tmp, tmp2);
+                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
                 } else {
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 }
@@ -7018,7 +7018,7 @@
             break;
         case 0x03:
             if (set_cc) {
-                gen_helper_sub_cc(tmp, tmp2, tmp);
+                gen_helper_sub_cc(tmp, cpu_env, tmp2, tmp);
             } else {
                 tcg_gen_sub_i32(tmp, tmp2, tmp);
             }
@@ -7026,7 +7026,7 @@
             break;
         case 0x04:
             if (set_cc) {
-                gen_helper_add_cc(tmp, tmp, tmp2);
+                gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 tcg_gen_add_i32(tmp, tmp, tmp2);
             }
@@ -7034,7 +7034,7 @@
             break;
         case 0x05:
             if (set_cc) {
-                gen_helper_adc_cc(tmp, tmp, tmp2);
+                gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 gen_add_carry(tmp, tmp, tmp2);
             }
@@ -7042,7 +7042,7 @@
             break;
         case 0x06:
             if (set_cc) {
-                gen_helper_sbc_cc(tmp, tmp, tmp2);
+                gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 gen_sub_carry(tmp, tmp, tmp2);
             }
@@ -7050,7 +7050,7 @@
             break;
         case 0x07:
             if (set_cc) {
-                gen_helper_sbc_cc(tmp, tmp2, tmp);
+                gen_helper_sbc_cc(tmp, cpu_env, tmp2, tmp);
             } else {
                 gen_sub_carry(tmp, tmp2, tmp);
             }
@@ -7072,13 +7072,13 @@
             break;
         case 0x0a:
             if (set_cc) {
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp);
             break;
         case 0x0b:
             if (set_cc) {
-                gen_helper_add_cc(tmp, tmp, tmp2);
+                gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp);
             break;
@@ -7395,9 +7395,9 @@
                         sh = (insn >> 16) & 0x1f;
                         tmp2 = tcg_const_i32(sh);
                         if (insn & (1 << 22))
-                          gen_helper_usat(tmp, tmp, tmp2);
+                          gen_helper_usat(tmp, cpu_env, tmp, tmp2);
                         else
-                          gen_helper_ssat(tmp, tmp, tmp2);
+                          gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
                         tcg_temp_free_i32(tmp2);
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x00300fe0) == 0x00200f20) {
@@ -7406,9 +7406,9 @@
                         sh = (insn >> 16) & 0x1f;
                         tmp2 = tcg_const_i32(sh);
                         if (insn & (1 << 22))
-                          gen_helper_usat16(tmp, tmp, tmp2);
+                          gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
                         else
-                          gen_helper_ssat16(tmp, tmp, tmp2);
+                          gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
                         tcg_temp_free_i32(tmp2);
                         store_reg(s, rd, tmp);
                     } else if ((insn & 0x00700fe0) == 0x00000fa0) {
@@ -7518,7 +7518,7 @@
                              * however it may overflow considered as a signed
                              * operation, in which case we must set the Q flag.
                              */
-                            gen_helper_add_setq(tmp, tmp, tmp2);
+                            gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                         }
                         tcg_temp_free_i32(tmp2);
                         if (insn & (1 << 22)) {
@@ -7534,7 +7534,7 @@
                             if (rd != 15)
                               {
                                 tmp2 = load_reg(s, rd);
-                                gen_helper_add_setq(tmp, tmp, tmp2);
+                                gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                                 tcg_temp_free_i32(tmp2);
                               }
                             store_reg(s, rn, tmp);
@@ -7719,7 +7719,7 @@
                             tmp = gen_ld32(addr, IS_USER(s));
                             if (user) {
                                 tmp2 = tcg_const_i32(i);
-                                gen_helper_set_user_reg(tmp2, tmp);
+                                gen_helper_set_user_reg(cpu_env, tmp2, tmp);
                                 tcg_temp_free_i32(tmp2);
                                 tcg_temp_free_i32(tmp);
                             } else if (i == rn) {
@@ -7738,7 +7738,7 @@
                             } else if (user) {
                                 tmp = tcg_temp_new_i32();
                                 tmp2 = tcg_const_i32(i);
-                                gen_helper_get_user_reg(tmp, tmp2);
+                                gen_helper_get_user_reg(tmp, cpu_env, tmp2);
                                 tcg_temp_free_i32(tmp2);
                             } else {
                                 tmp = load_reg(s, i);
@@ -7865,31 +7865,31 @@
         break;
     case 8: /* add */
         if (conds)
-            gen_helper_add_cc(t0, t0, t1);
+            gen_helper_add_cc(t0, cpu_env, t0, t1);
         else
             tcg_gen_add_i32(t0, t0, t1);
         break;
     case 10: /* adc */
         if (conds)
-            gen_helper_adc_cc(t0, t0, t1);
+            gen_helper_adc_cc(t0, cpu_env, t0, t1);
         else
             gen_adc(t0, t1);
         break;
     case 11: /* sbc */
         if (conds)
-            gen_helper_sbc_cc(t0, t0, t1);
+            gen_helper_sbc_cc(t0, cpu_env, t0, t1);
         else
             gen_sub_carry(t0, t0, t1);
         break;
     case 13: /* sub */
         if (conds)
-            gen_helper_sub_cc(t0, t0, t1);
+            gen_helper_sub_cc(t0, cpu_env, t0, t1);
         else
             tcg_gen_sub_i32(t0, t0, t1);
         break;
     case 14: /* rsb */
         if (conds)
-            gen_helper_sub_cc(t0, t1, t0);
+            gen_helper_sub_cc(t0, cpu_env, t1, t0);
         else
             tcg_gen_sub_i32(t0, t1, t0);
         break;
@@ -7962,7 +7962,7 @@
         /* Fall through to 32-bit decode.  */
     }
 
-    insn = arm_lduw_code(s->pc, s->bswap_code);
+    insn = arm_lduw_code(env, s->pc, s->bswap_code);
     s->pc += 2;
     insn |= (uint32_t)insn_hw1 << 16;
 
@@ -8111,7 +8111,7 @@
                     gen_st32(tmp, addr, 0);
                     tcg_gen_addi_i32(addr, addr, 4);
                     tmp = tcg_temp_new_i32();
-                    gen_helper_cpsr_read(tmp);
+                    gen_helper_cpsr_read(tmp, cpu_env);
                     gen_st32(tmp, addr, 0);
                     if (insn & (1 << 21)) {
                         if ((insn & (1 << 24)) == 0) {
@@ -8293,11 +8293,11 @@
                 tmp = load_reg(s, rn);
                 tmp2 = load_reg(s, rm);
                 if (op & 1)
-                    gen_helper_double_saturate(tmp, tmp);
+                    gen_helper_double_saturate(tmp, cpu_env, tmp);
                 if (op & 2)
-                    gen_helper_sub_saturate(tmp, tmp2, tmp);
+                    gen_helper_sub_saturate(tmp, cpu_env, tmp2, tmp);
                 else
-                    gen_helper_add_saturate(tmp, tmp, tmp2);
+                    gen_helper_add_saturate(tmp, cpu_env, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
             } else {
                 tmp = load_reg(s, rn);
@@ -8353,7 +8353,7 @@
                 tcg_temp_free_i32(tmp2);
                 if (rs != 15) {
                     tmp2 = load_reg(s, rs);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                 }
                 break;
@@ -8370,13 +8370,13 @@
                      * however it may overflow considered as a signed
                      * operation, in which case we must set the Q flag.
                      */
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                 }
                 tcg_temp_free_i32(tmp2);
                 if (rs != 15)
                   {
                     tmp2 = load_reg(s, rs);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                   }
                 break;
@@ -8393,7 +8393,7 @@
                 if (rs != 15)
                   {
                     tmp2 = load_reg(s, rs);
-                    gen_helper_add_setq(tmp, tmp, tmp2);
+                    gen_helper_add_setq(tmp, cpu_env, tmp, tmp2);
                     tcg_temp_free_i32(tmp2);
                   }
                 break;
@@ -8632,7 +8632,7 @@
                             gen_helper_v7m_mrs(tmp, cpu_env, addr);
                             tcg_temp_free_i32(addr);
                         } else {
-                            gen_helper_cpsr_read(tmp);
+                            gen_helper_cpsr_read(tmp, cpu_env);
                         }
                         store_reg(s, rd, tmp);
                         break;
@@ -8721,15 +8721,15 @@
                         if (op & 4) {
                             /* Unsigned.  */
                             if ((op & 1) && shift == 0)
-                                gen_helper_usat16(tmp, tmp, tmp2);
+                                gen_helper_usat16(tmp, cpu_env, tmp, tmp2);
                             else
-                                gen_helper_usat(tmp, tmp, tmp2);
+                                gen_helper_usat(tmp, cpu_env, tmp, tmp2);
                         } else {
                             /* Signed.  */
                             if ((op & 1) && shift == 0)
-                                gen_helper_ssat16(tmp, tmp, tmp2);
+                                gen_helper_ssat16(tmp, cpu_env, tmp, tmp2);
                             else
-                                gen_helper_ssat(tmp, tmp, tmp2);
+                                gen_helper_ssat(tmp, cpu_env, tmp, tmp2);
                         }
                         tcg_temp_free_i32(tmp2);
                         break;
@@ -8992,7 +8992,7 @@
         }
     }
 
-    insn = arm_lduw_code(s->pc, s->bswap_code);
+    insn = arm_lduw_code(env, s->pc, s->bswap_code);
     s->pc += 2;
 
     switch (insn >> 12) {
@@ -9017,12 +9017,12 @@
                 if (s->condexec_mask)
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_sub_cc(tmp, tmp, tmp2);
+                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 if (s->condexec_mask)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_add_cc(tmp, tmp, tmp2);
+                    gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
             }
             tcg_temp_free_i32(tmp2);
             store_reg(s, rd, tmp);
@@ -9053,7 +9053,7 @@
             tcg_gen_movi_i32(tmp2, insn & 0xff);
             switch (op) {
             case 1: /* cmp */
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
                 tcg_temp_free_i32(tmp);
                 tcg_temp_free_i32(tmp2);
                 break;
@@ -9061,7 +9061,7 @@
                 if (s->condexec_mask)
                     tcg_gen_add_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_add_cc(tmp, tmp, tmp2);
+                    gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
                 break;
@@ -9069,7 +9069,7 @@
                 if (s->condexec_mask)
                     tcg_gen_sub_i32(tmp, tmp, tmp2);
                 else
-                    gen_helper_sub_cc(tmp, tmp, tmp2);
+                    gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 store_reg(s, rd, tmp);
                 break;
@@ -9105,7 +9105,7 @@
             case 1: /* cmp */
                 tmp = load_reg(s, rd);
                 tmp2 = load_reg(s, rm);
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
                 tcg_temp_free_i32(tmp2);
                 tcg_temp_free_i32(tmp);
                 break;
@@ -9166,25 +9166,25 @@
             break;
         case 0x2: /* lsl */
             if (s->condexec_mask) {
-                gen_helper_shl(tmp2, tmp2, tmp);
+                gen_helper_shl(tmp2, cpu_env, tmp2, tmp);
             } else {
-                gen_helper_shl_cc(tmp2, tmp2, tmp);
+                gen_helper_shl_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
         case 0x3: /* lsr */
             if (s->condexec_mask) {
-                gen_helper_shr(tmp2, tmp2, tmp);
+                gen_helper_shr(tmp2, cpu_env, tmp2, tmp);
             } else {
-                gen_helper_shr_cc(tmp2, tmp2, tmp);
+                gen_helper_shr_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
         case 0x4: /* asr */
             if (s->condexec_mask) {
-                gen_helper_sar(tmp2, tmp2, tmp);
+                gen_helper_sar(tmp2, cpu_env, tmp2, tmp);
             } else {
-                gen_helper_sar_cc(tmp2, tmp2, tmp);
+                gen_helper_sar_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
@@ -9192,20 +9192,20 @@
             if (s->condexec_mask)
                 gen_adc(tmp, tmp2);
             else
-                gen_helper_adc_cc(tmp, tmp, tmp2);
+                gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
             break;
         case 0x6: /* sbc */
             if (s->condexec_mask)
                 gen_sub_carry(tmp, tmp, tmp2);
             else
-                gen_helper_sbc_cc(tmp, tmp, tmp2);
+                gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
             break;
         case 0x7: /* ror */
             if (s->condexec_mask) {
                 tcg_gen_andi_i32(tmp, tmp, 0x1f);
                 tcg_gen_rotr_i32(tmp2, tmp2, tmp);
             } else {
-                gen_helper_ror_cc(tmp2, tmp2, tmp);
+                gen_helper_ror_cc(tmp2, cpu_env, tmp2, tmp);
                 gen_logic_CC(tmp2);
             }
             break;
@@ -9218,14 +9218,14 @@
             if (s->condexec_mask)
                 tcg_gen_neg_i32(tmp, tmp2);
             else
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             break;
         case 0xa: /* cmp */
-            gen_helper_sub_cc(tmp, tmp, tmp2);
+            gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             rd = 16;
             break;
         case 0xb: /* cmn */
-            gen_helper_add_cc(tmp, tmp, tmp2);
+            gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
             rd = 16;
             break;
         case 0xc: /* orr */
@@ -9913,7 +9913,7 @@
             /* nothing more to generate */
             break;
         case DISAS_WFI:
-            gen_helper_wfi();
+            gen_helper_wfi(cpu_env);
             break;
         case DISAS_SWI:
             gen_exception(EXCP_SWI);
diff --git a/target-cris/Makefile.objs b/target-cris/Makefile.objs
index 4b09e8c..afb87bc 100644
--- a/target-cris/Makefile.objs
+++ b/target-cris/Makefile.objs
@@ -1,4 +1,2 @@
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += mmu.o machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-cris/helper.c b/target-cris/helper.c
index bfbc29e..1bdb7e2 100644
--- a/target-cris/helper.c
+++ b/target-cris/helper.c
@@ -151,7 +151,7 @@
 	}
 
 	/* Now that we are in kernel mode, load the handlers address.  */
-	env->pc = ldl_code(env->pregs[PR_EBP] + ex_vec * 4);
+        env->pc = cpu_ldl_code(env, env->pregs[PR_EBP] + ex_vec * 4);
 	env->locked_irq = 1;
 	env->pregs[PR_CCS] |= F_FLAG_V10; /* set F.  */
 
@@ -233,7 +233,7 @@
 	/* Now that we are in kernel mode, load the handlers address.
 	   This load may not fault, real hw leaves that behaviour as
 	   undefined.  */
-	env->pc = ldl_code(env->pregs[PR_EBP] + ex_vec * 4);
+        env->pc = cpu_ldl_code(env, env->pregs[PR_EBP] + ex_vec * 4);
 
 	/* Clear the excption_index to avoid spurios hw_aborts for recursive
 	   bus faults.  */
diff --git a/target-cris/helper.h b/target-cris/helper.h
index 093063a..99fb326 100644
--- a/target-cris/helper.h
+++ b/target-cris/helper.h
@@ -1,26 +1,29 @@
 #include "def-helper.h"
 
-DEF_HELPER_1(raise_exception, void, i32)
-DEF_HELPER_1(tlb_flush_pid, void, i32)
-DEF_HELPER_1(spc_write, void, i32)
+DEF_HELPER_2(raise_exception, void, env, i32)
+DEF_HELPER_2(tlb_flush_pid, void, env, i32)
+DEF_HELPER_2(spc_write, void, env, i32)
 DEF_HELPER_3(dump, void, i32, i32, i32)
-DEF_HELPER_0(rfe, void);
-DEF_HELPER_0(rfn, void);
+DEF_HELPER_1(rfe, void, env);
+DEF_HELPER_1(rfn, void, env);
 
-DEF_HELPER_2(movl_sreg_reg, void, i32, i32)
-DEF_HELPER_2(movl_reg_sreg, void, i32, i32)
+DEF_HELPER_3(movl_sreg_reg, void, env, i32, i32)
+DEF_HELPER_3(movl_reg_sreg, void, env, i32, i32)
 
 DEF_HELPER_FLAGS_1(lz, TCG_CALL_PURE, i32, i32);
-DEF_HELPER_FLAGS_3(btst, TCG_CALL_PURE, i32, i32, i32, i32);
+DEF_HELPER_FLAGS_4(btst, TCG_CALL_PURE, i32, env, i32, i32, i32);
 
-DEF_HELPER_FLAGS_3(evaluate_flags_muls, TCG_CALL_PURE, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_3(evaluate_flags_mulu, TCG_CALL_PURE, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_4(evaluate_flags_mcp, TCG_CALL_PURE, i32, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_4(evaluate_flags_alu_4, TCG_CALL_PURE, i32, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_4(evaluate_flags_sub_4, TCG_CALL_PURE, i32, i32, i32, i32, i32)
-DEF_HELPER_FLAGS_2(evaluate_flags_move_4, TCG_CALL_PURE, i32, i32, i32)
-DEF_HELPER_FLAGS_2(evaluate_flags_move_2, TCG_CALL_PURE, i32, i32, i32)
-DEF_HELPER_0(evaluate_flags, void)
-DEF_HELPER_0(top_evaluate_flags, void)
+DEF_HELPER_FLAGS_4(evaluate_flags_muls, TCG_CALL_PURE, i32, env, i32, i32, i32)
+DEF_HELPER_FLAGS_4(evaluate_flags_mulu, TCG_CALL_PURE, i32, env, i32, i32, i32)
+DEF_HELPER_FLAGS_5(evaluate_flags_mcp, TCG_CALL_PURE, i32, env,
+                                                      i32, i32, i32, i32)
+DEF_HELPER_FLAGS_5(evaluate_flags_alu_4, TCG_CALL_PURE, i32, env,
+                                                        i32, i32, i32, i32)
+DEF_HELPER_FLAGS_5(evaluate_flags_sub_4, TCG_CALL_PURE, i32, env,
+                                                        i32, i32, i32, i32)
+DEF_HELPER_FLAGS_3(evaluate_flags_move_4, TCG_CALL_PURE, i32, env, i32, i32)
+DEF_HELPER_FLAGS_3(evaluate_flags_move_2, TCG_CALL_PURE, i32, env, i32, i32)
+DEF_HELPER_1(evaluate_flags, void, env)
+DEF_HELPER_1(top_evaluate_flags, void, env)
 
 #include "def-helper.h"
diff --git a/target-cris/op_helper.c b/target-cris/op_helper.c
index ac7c98c..a7468d4 100644
--- a/target-cris/op_helper.c
+++ b/target-cris/op_helper.c
@@ -19,7 +19,6 @@
  */
 
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "mmu.h"
 #include "helper.h"
 #include "host-utils.h"
@@ -55,17 +54,12 @@
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill(CPUCRISState *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUCRISState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUCRISState *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
-
     D_LOG("%s pc=%x tpc=%x ra=%p\n", __func__,
           env->pc, env->debug1, (void *)retaddr);
     ret = cpu_cris_handle_mmu_fault(env, addr, is_write, mmu_idx);
@@ -79,23 +73,22 @@
                 cpu_restore_state(tb, env, retaddr);
 
 		/* Evaluate flags after retranslation.  */
-                helper_top_evaluate_flags();
+                helper_top_evaluate_flags(env);
             }
         }
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 
 #endif
 
-void helper_raise_exception(uint32_t index)
+void helper_raise_exception(CPUCRISState *env, uint32_t index)
 {
 	env->exception_index = index;
         cpu_loop_exit(env);
 }
 
-void helper_tlb_flush_pid(uint32_t pid)
+void helper_tlb_flush_pid(CPUCRISState *env, uint32_t pid)
 {
 #if !defined(CONFIG_USER_ONLY)
 	pid &= 0xff;
@@ -104,7 +97,7 @@
 #endif
 }
 
-void helper_spc_write(uint32_t new_spc)
+void helper_spc_write(CPUCRISState *env, uint32_t new_spc)
 {
 #if !defined(CONFIG_USER_ONLY)
 	tlb_flush_page(env, env->pregs[PR_SPC]);
@@ -121,7 +114,7 @@
 #define EXTRACT_FIELD(src, start, end) \
 	    (((src) >> start) & ((1 << (end - start + 1)) - 1))
 
-void helper_movl_sreg_reg (uint32_t sreg, uint32_t reg)
+void helper_movl_sreg_reg(CPUCRISState *env, uint32_t sreg, uint32_t reg)
 {
 	uint32_t srs;
 	srs = env->pregs[PR_SRS];
@@ -171,7 +164,7 @@
 #endif
 }
 
-void helper_movl_reg_sreg (uint32_t reg, uint32_t sreg)
+void helper_movl_reg_sreg(CPUCRISState *env, uint32_t reg, uint32_t sreg)
 {
 	uint32_t srs;
 	env->pregs[PR_SRS] &= 3;
@@ -216,7 +209,7 @@
 	env->pregs[PR_CCS] = ccs;
 }
 
-void helper_rfe(void)
+void helper_rfe(CPUCRISState *env)
 {
 	int rflag = env->pregs[PR_CCS] & R_FLAG;
 
@@ -232,7 +225,7 @@
 		env->pregs[PR_CCS] |= P_FLAG;
 }
 
-void helper_rfn(void)
+void helper_rfn(CPUCRISState *env)
 {
 	int rflag = env->pregs[PR_CCS] & R_FLAG;
 
@@ -256,7 +249,7 @@
 	return clz32(t0);
 }
 
-uint32_t helper_btst(uint32_t t0, uint32_t t1, uint32_t ccs)
+uint32_t helper_btst(CPUCRISState *env, uint32_t t0, uint32_t t1, uint32_t ccs)
 {
 	/* FIXME: clean this up.  */
 
@@ -284,7 +277,8 @@
 	return ccs;
 }
 
-static inline uint32_t evaluate_flags_writeback(uint32_t flags, uint32_t ccs)
+static inline uint32_t evaluate_flags_writeback(CPUCRISState *env,
+                                                uint32_t flags, uint32_t ccs)
 {
 	unsigned int x, z, mask;
 
@@ -303,7 +297,8 @@
 	return ccs;
 }
 
-uint32_t helper_evaluate_flags_muls(uint32_t ccs, uint32_t res, uint32_t mof)
+uint32_t helper_evaluate_flags_muls(CPUCRISState *env,
+                                    uint32_t ccs, uint32_t res, uint32_t mof)
 {
 	uint32_t flags = 0;
 	int64_t tmp;
@@ -321,10 +316,11 @@
 	if ((dneg && mof != -1)
 	    || (!dneg && mof != 0))
 		flags |= V_FLAG;
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
-uint32_t helper_evaluate_flags_mulu(uint32_t ccs, uint32_t res, uint32_t mof)
+uint32_t helper_evaluate_flags_mulu(CPUCRISState *env,
+                                    uint32_t ccs, uint32_t res, uint32_t mof)
 {
 	uint32_t flags = 0;
 	uint64_t tmp;
@@ -339,10 +335,10 @@
 	if (mof)
 		flags |= V_FLAG;
 
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
-uint32_t helper_evaluate_flags_mcp(uint32_t ccs,
+uint32_t helper_evaluate_flags_mcp(CPUCRISState *env, uint32_t ccs,
 				   uint32_t src, uint32_t dst, uint32_t res)
 {
 	uint32_t flags = 0;
@@ -368,10 +364,10 @@
 			flags |= R_FLAG;
 	}
 
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
-uint32_t helper_evaluate_flags_alu_4(uint32_t ccs,
+uint32_t helper_evaluate_flags_alu_4(CPUCRISState *env, uint32_t ccs,
 				     uint32_t src, uint32_t dst, uint32_t res)
 {
 	uint32_t flags = 0;
@@ -397,10 +393,10 @@
 			flags |= C_FLAG;
 	}
 
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
-uint32_t helper_evaluate_flags_sub_4(uint32_t ccs,
+uint32_t helper_evaluate_flags_sub_4(CPUCRISState *env, uint32_t ccs,
 				     uint32_t src, uint32_t dst, uint32_t res)
 {
 	uint32_t flags = 0;
@@ -427,10 +423,11 @@
 	}
 
 	flags ^= C_FLAG;
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
-uint32_t helper_evaluate_flags_move_4(uint32_t ccs, uint32_t res)
+uint32_t helper_evaluate_flags_move_4(CPUCRISState *env,
+                                      uint32_t ccs, uint32_t res)
 {
 	uint32_t flags = 0;
 
@@ -439,9 +436,10 @@
 	else if (res == 0L)
 		flags |= Z_FLAG;
 
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
-uint32_t helper_evaluate_flags_move_2(uint32_t ccs, uint32_t res)
+uint32_t helper_evaluate_flags_move_2(CPUCRISState *env,
+                                      uint32_t ccs, uint32_t res)
 {
 	uint32_t flags = 0;
 
@@ -450,12 +448,12 @@
 	else if (res == 0)
 		flags |= Z_FLAG;
 
-	return evaluate_flags_writeback(flags, ccs);
+        return evaluate_flags_writeback(env, flags, ccs);
 }
 
 /* TODO: This is expensive. We could split things up and only evaluate part of
    CCR on a need to know basis. For now, we simply re-evaluate everything.  */
-void  helper_evaluate_flags(void)
+void helper_evaluate_flags(CPUCRISState *env)
 {
 	uint32_t src, dst, res;
 	uint32_t flags = 0;
@@ -571,25 +569,26 @@
 	if (env->cc_op == CC_OP_SUB || env->cc_op == CC_OP_CMP)
 		flags ^= C_FLAG;
 
-	env->pregs[PR_CCS] = evaluate_flags_writeback(flags, env->pregs[PR_CCS]);
+        env->pregs[PR_CCS] = evaluate_flags_writeback(env, flags,
+                                                      env->pregs[PR_CCS]);
 }
 
-void helper_top_evaluate_flags(void)
+void helper_top_evaluate_flags(CPUCRISState *env)
 {
 	switch (env->cc_op)
 	{
 		case CC_OP_MCP:
-			env->pregs[PR_CCS] = helper_evaluate_flags_mcp(
+                        env->pregs[PR_CCS] = helper_evaluate_flags_mcp(env,
 					env->pregs[PR_CCS], env->cc_src,
 					env->cc_dest, env->cc_result);
 			break;
 		case CC_OP_MULS:
-			env->pregs[PR_CCS] = helper_evaluate_flags_muls(
+                        env->pregs[PR_CCS] = helper_evaluate_flags_muls(env,
 					env->pregs[PR_CCS], env->cc_result,
 					env->pregs[PR_MOF]);
 			break;
 		case CC_OP_MULU:
-			env->pregs[PR_CCS] = helper_evaluate_flags_mulu(
+                        env->pregs[PR_CCS] = helper_evaluate_flags_mulu(env,
 					env->pregs[PR_CCS], env->cc_result,
 					env->pregs[PR_MOF]);
 			break;
@@ -604,18 +603,18 @@
 		{
 			case 4:
 				env->pregs[PR_CCS] =
-					helper_evaluate_flags_move_4(
+                                        helper_evaluate_flags_move_4(env,
 							env->pregs[PR_CCS],
 							env->cc_result);
 				break;
 			case 2:
 				env->pregs[PR_CCS] =
-					helper_evaluate_flags_move_2(
+                                        helper_evaluate_flags_move_2(env,
 							env->pregs[PR_CCS],
 							env->cc_result);
 				break;
 			default:
-				helper_evaluate_flags();
+                                helper_evaluate_flags(env);
 				break;
 		}
 		break;
@@ -626,12 +625,12 @@
 		case CC_OP_CMP:
 			if (env->cc_size == 4)
 				env->pregs[PR_CCS] =
-					helper_evaluate_flags_sub_4(
+                                        helper_evaluate_flags_sub_4(env,
 						env->pregs[PR_CCS],
 						env->cc_src, env->cc_dest,
 						env->cc_result);
 			else
-				helper_evaluate_flags();
+                                helper_evaluate_flags(env);
 			break;
 		default:
 		{
@@ -639,13 +638,13 @@
 			{
 			case 4:
 				env->pregs[PR_CCS] =
-					helper_evaluate_flags_alu_4(
+                                        helper_evaluate_flags_alu_4(env,
 						env->pregs[PR_CCS],
 						env->cc_src, env->cc_dest,
 						env->cc_result);
 				break;
 			default:
-				helper_evaluate_flags();
+                                helper_evaluate_flags(env);
 				break;
 			}
 		}
diff --git a/target-cris/translate.c b/target-cris/translate.c
index ad31877..19144b5 100644
--- a/target-cris/translate.c
+++ b/target-cris/translate.c
@@ -78,7 +78,7 @@
 	target_ulong pc, ppc;
 
 	/* Decoder.  */
-	unsigned int (*decoder)(struct DisasContext *dc);
+        unsigned int (*decoder)(CPUCRISState *env, struct DisasContext *dc);
 	uint32_t ir;
 	uint32_t opcode;
 	unsigned int op1;
@@ -211,9 +211,9 @@
 		tcg_gen_andi_tl(cpu_PR[r], tn, 3);
 	else {
 		if (r == PR_PID) 
-			gen_helper_tlb_flush_pid(tn);
+                        gen_helper_tlb_flush_pid(cpu_env, tn);
 		if (dc->tb_flags & S_FLAG && r == PR_SPC) 
-			gen_helper_spc_write(tn);
+                        gen_helper_spc_write(cpu_env, tn);
 		else if (r == PR_CCS)
 			dc->cpustate_changed = 1;
 		tcg_gen_mov_tl(cpu_PR[r], tn);
@@ -233,7 +233,7 @@
 	return sval;
 }
 
-static int cris_fetch(DisasContext *dc, uint32_t addr,
+static int cris_fetch(CPUCRISState *env, DisasContext *dc, uint32_t addr,
 		      unsigned int size, unsigned int sign)
 {
 	int r;
@@ -241,24 +241,24 @@
 	switch (size) {
 		case 4:
 		{
-			r = ldl_code(addr);
+                        r = cpu_ldl_code(env, addr);
 			break;
 		}
 		case 2:
 		{
 			if (sign) {
-				r = ldsw_code(addr);
+                                r = cpu_ldsw_code(env, addr);
 			} else {
-				r = lduw_code(addr);
+                                r = cpu_lduw_code(env, addr);
 			}
 			break;
 		}
 		case 1:
 		{
 			if (sign) {
-				r = ldsb_code(addr);
+                                r = cpu_ldsb_code(env, addr);
 			} else {
-				r = ldub_code(addr);
+                                r = cpu_ldub_code(env, addr);
 			}
 			break;
 		}
@@ -278,7 +278,7 @@
 static inline void t_gen_raise_exception(uint32_t index)
 {
         TCGv_i32 tmp = tcg_const_i32(index);
-	gen_helper_raise_exception(tmp);
+        gen_helper_raise_exception(cpu_env, tmp);
         tcg_temp_free_i32(tmp);
 }
 
@@ -624,17 +624,17 @@
 	switch (dc->cc_op)
 	{
 	case CC_OP_MCP:
-		gen_helper_evaluate_flags_mcp(cpu_PR[PR_CCS],
+                gen_helper_evaluate_flags_mcp(cpu_PR[PR_CCS], cpu_env,
 					cpu_PR[PR_CCS], cc_src,
 					cc_dest, cc_result);
 		break;
 	case CC_OP_MULS:
-		gen_helper_evaluate_flags_muls(cpu_PR[PR_CCS],
+                gen_helper_evaluate_flags_muls(cpu_PR[PR_CCS], cpu_env,
 					cpu_PR[PR_CCS], cc_result,
 					cpu_PR[PR_MOF]);
 		break;
 	case CC_OP_MULU:
-		gen_helper_evaluate_flags_mulu(cpu_PR[PR_CCS],
+                gen_helper_evaluate_flags_mulu(cpu_PR[PR_CCS], cpu_env,
 					cpu_PR[PR_CCS], cc_result,
 					cpu_PR[PR_MOF]);
 		break;
@@ -648,15 +648,15 @@
 		switch (dc->cc_size)
 		{
 		case 4:
-			gen_helper_evaluate_flags_move_4(cpu_PR[PR_CCS],
-						cpu_PR[PR_CCS], cc_result);
+                        gen_helper_evaluate_flags_move_4(cpu_PR[PR_CCS],
+                                           cpu_env, cpu_PR[PR_CCS], cc_result);
 			break;
 		case 2:
-			gen_helper_evaluate_flags_move_2(cpu_PR[PR_CCS],
-						cpu_PR[PR_CCS], cc_result);
+                        gen_helper_evaluate_flags_move_2(cpu_PR[PR_CCS],
+                                           cpu_env, cpu_PR[PR_CCS], cc_result);
 			break;
 		default:
-			gen_helper_evaluate_flags();
+                        gen_helper_evaluate_flags(cpu_env);
 			break;
 		}
 		break;
@@ -666,21 +666,21 @@
 	case CC_OP_SUB:
 	case CC_OP_CMP:
 		if (dc->cc_size == 4)
-			gen_helper_evaluate_flags_sub_4(cpu_PR[PR_CCS],
+                        gen_helper_evaluate_flags_sub_4(cpu_PR[PR_CCS], cpu_env,
 				cpu_PR[PR_CCS], cc_src, cc_dest, cc_result);
 		else
-			gen_helper_evaluate_flags();
+                        gen_helper_evaluate_flags(cpu_env);
 
 		break;
 	default:
 		switch (dc->cc_size)
 		{
 			case 4:
-			gen_helper_evaluate_flags_alu_4(cpu_PR[PR_CCS],
+                        gen_helper_evaluate_flags_alu_4(cpu_PR[PR_CCS], cpu_env,
 				cpu_PR[PR_CCS], cc_src, cc_dest, cc_result);
 				break;
 			default:
-				gen_helper_evaluate_flags();
+                                gen_helper_evaluate_flags(cpu_env);
 				break;
 		}
 		break;
@@ -1304,8 +1304,8 @@
 		t_gen_zext(dst, cpu_R[rd], size);
 }
 
-static int dec_prep_move_m(DisasContext *dc, int s_ext, int memsize,
-			   TCGv dst)
+static int dec_prep_move_m(CPUCRISState *env, DisasContext *dc,
+                           int s_ext, int memsize, TCGv dst)
 {
 	unsigned int rs;
 	uint32_t imm;
@@ -1321,7 +1321,7 @@
 		if (memsize == 1)
 			insn_len++;
 
-		imm = cris_fetch(dc, dc->pc + 2, memsize, s_ext);
+                imm = cris_fetch(env, dc, dc->pc + 2, memsize, s_ext);
 		tcg_gen_movi_tl(dst, imm);
 		dc->postinc = 0;
 	} else {
@@ -1338,12 +1338,12 @@
 /* Prepare T0 and T1 for a memory + alu operation.
    s_ext decides if the operand1 should be sign-extended or zero-extended when
    needed.  */
-static int dec_prep_alu_m(DisasContext *dc, int s_ext, int memsize,
-			  TCGv dst, TCGv src)
+static int dec_prep_alu_m(CPUCRISState *env, DisasContext *dc,
+                          int s_ext, int memsize, TCGv dst, TCGv src)
 {
 	int insn_len;
 
-	insn_len = dec_prep_move_m(dc, s_ext, memsize, src);
+        insn_len = dec_prep_move_m(env, dc, s_ext, memsize, src);
 	tcg_gen_mov_tl(dst, cpu_R[dc->op2]);
 	return insn_len;
 }
@@ -1362,7 +1362,7 @@
 
 /* Start of insn decoders.  */
 
-static int dec_bccq(DisasContext *dc)
+static int dec_bccq(CPUCRISState *env, DisasContext *dc)
 {
 	int32_t offset;
 	int sign;
@@ -1382,7 +1382,7 @@
 	cris_prepare_cc_branch (dc, offset, cond);
 	return 2;
 }
-static int dec_addoq(DisasContext *dc)
+static int dec_addoq(CPUCRISState *env, DisasContext *dc)
 {
 	int32_t imm;
 
@@ -1396,7 +1396,7 @@
 
 	return 2;
 }
-static int dec_addq(DisasContext *dc)
+static int dec_addq(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("addq %u, $r%u\n", dc->op1, dc->op2);
 
@@ -1408,7 +1408,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], tcg_const_tl(dc->op1), 4);
 	return 2;
 }
-static int dec_moveq(DisasContext *dc)
+static int dec_moveq(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 
@@ -1419,7 +1419,7 @@
 	tcg_gen_movi_tl(cpu_R[dc->op2], imm);
 	return 2;
 }
-static int dec_subq(DisasContext *dc)
+static int dec_subq(CPUCRISState *env, DisasContext *dc)
 {
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 5);
 
@@ -1430,7 +1430,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], tcg_const_tl(dc->op1), 4);
 	return 2;
 }
-static int dec_cmpq(DisasContext *dc)
+static int dec_cmpq(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 5);
@@ -1443,7 +1443,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], tcg_const_tl(imm), 4);
 	return 2;
 }
-static int dec_andq(DisasContext *dc)
+static int dec_andq(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 5);
@@ -1456,7 +1456,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], tcg_const_tl(imm), 4);
 	return 2;
 }
-static int dec_orq(DisasContext *dc)
+static int dec_orq(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 5);
@@ -1468,14 +1468,14 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], tcg_const_tl(imm), 4);
 	return 2;
 }
-static int dec_btstq(DisasContext *dc)
+static int dec_btstq(CPUCRISState *env, DisasContext *dc)
 {
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 4);
 	LOG_DIS("btstq %u, $r%d\n", dc->op1, dc->op2);
 
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_evaluate_flags(dc);
-	gen_helper_btst(cpu_PR[PR_CCS], cpu_R[dc->op2],
+        gen_helper_btst(cpu_PR[PR_CCS], cpu_env, cpu_R[dc->op2],
 			tcg_const_tl(dc->op1), cpu_PR[PR_CCS]);
 	cris_alu(dc, CC_OP_MOVE,
 		 cpu_R[dc->op2], cpu_R[dc->op2], cpu_R[dc->op2], 4);
@@ -1483,7 +1483,7 @@
 	dc->flags_uptodate = 1;
 	return 2;
 }
-static int dec_asrq(DisasContext *dc)
+static int dec_asrq(CPUCRISState *env, DisasContext *dc)
 {
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 4);
 	LOG_DIS("asrq %u, $r%d\n", dc->op1, dc->op2);
@@ -1495,7 +1495,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], 4);
 	return 2;
 }
-static int dec_lslq(DisasContext *dc)
+static int dec_lslq(CPUCRISState *env, DisasContext *dc)
 {
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 4);
 	LOG_DIS("lslq %u, $r%d\n", dc->op1, dc->op2);
@@ -1509,7 +1509,7 @@
 		    cpu_R[dc->op2], cpu_R[dc->op2], 4);
 	return 2;
 }
-static int dec_lsrq(DisasContext *dc)
+static int dec_lsrq(CPUCRISState *env, DisasContext *dc)
 {
 	dc->op1 = EXTRACT_FIELD(dc->ir, 0, 4);
 	LOG_DIS("lsrq %u, $r%d\n", dc->op1, dc->op2);
@@ -1523,7 +1523,7 @@
 	return 2;
 }
 
-static int dec_move_r(DisasContext *dc)
+static int dec_move_r(CPUCRISState *env, DisasContext *dc)
 {
 	int size = memsize_zz(dc);
 
@@ -1551,7 +1551,7 @@
 	return 2;
 }
 
-static int dec_scc_r(DisasContext *dc)
+static int dec_scc_r(CPUCRISState *env, DisasContext *dc)
 {
 	int cond = dc->op2;
 
@@ -1594,7 +1594,7 @@
 	}
 }
 
-static int dec_and_r(DisasContext *dc)
+static int dec_and_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1611,7 +1611,7 @@
 	return 2;
 }
 
-static int dec_lz_r(DisasContext *dc)
+static int dec_lz_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	LOG_DIS("lz $r%u, $r%u\n",
@@ -1624,7 +1624,7 @@
 	return 2;
 }
 
-static int dec_lsl_r(DisasContext *dc)
+static int dec_lsl_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1641,7 +1641,7 @@
 	return 2;
 }
 
-static int dec_lsr_r(DisasContext *dc)
+static int dec_lsr_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1658,7 +1658,7 @@
 	return 2;
 }
 
-static int dec_asr_r(DisasContext *dc)
+static int dec_asr_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1675,7 +1675,7 @@
 	return 2;
 }
 
-static int dec_muls_r(DisasContext *dc)
+static int dec_muls_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1691,7 +1691,7 @@
 	return 2;
 }
 
-static int dec_mulu_r(DisasContext *dc)
+static int dec_mulu_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1708,7 +1708,7 @@
 }
 
 
-static int dec_dstep_r(DisasContext *dc)
+static int dec_dstep_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("dstep $r%u, $r%u\n", dc->op1, dc->op2);
 	cris_cc_mask(dc, CC_MASK_NZ);
@@ -1717,7 +1717,7 @@
 	return 2;
 }
 
-static int dec_xor_r(DisasContext *dc)
+static int dec_xor_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1733,7 +1733,7 @@
 	return 2;
 }
 
-static int dec_bound_r(DisasContext *dc)
+static int dec_bound_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv l0;
 	int size = memsize_zz(dc);
@@ -1747,7 +1747,7 @@
 	return 2;
 }
 
-static int dec_cmp_r(DisasContext *dc)
+static int dec_cmp_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1762,7 +1762,7 @@
 	return 2;
 }
 
-static int dec_abs_r(DisasContext *dc)
+static int dec_abs_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 
@@ -1781,7 +1781,7 @@
 	return 2;
 }
 
-static int dec_add_r(DisasContext *dc)
+static int dec_add_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1796,7 +1796,7 @@
 	return 2;
 }
 
-static int dec_addc_r(DisasContext *dc)
+static int dec_addc_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("addc $r%u, $r%u\n",
 		    dc->op1, dc->op2);
@@ -1811,7 +1811,7 @@
 	return 2;
 }
 
-static int dec_mcp_r(DisasContext *dc)
+static int dec_mcp_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("mcp $p%u, $r%u\n",
 		     dc->op2, dc->op1);
@@ -1838,7 +1838,7 @@
 }
 #endif
 
-static int dec_swap_r(DisasContext *dc)
+static int dec_swap_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 #if DISAS_CRIS
@@ -1864,7 +1864,7 @@
 	return 2;
 }
 
-static int dec_or_r(DisasContext *dc)
+static int dec_or_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1878,7 +1878,7 @@
 	return 2;
 }
 
-static int dec_addi_r(DisasContext *dc)
+static int dec_addi_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	LOG_DIS("addi.%c $r%u, $r%u\n",
@@ -1891,7 +1891,7 @@
 	return 2;
 }
 
-static int dec_addi_acr(DisasContext *dc)
+static int dec_addi_acr(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	LOG_DIS("addi.%c $r%u, $r%u, $acr\n",
@@ -1904,7 +1904,7 @@
 	return 2;
 }
 
-static int dec_neg_r(DisasContext *dc)
+static int dec_neg_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1919,13 +1919,13 @@
 	return 2;
 }
 
-static int dec_btst_r(DisasContext *dc)
+static int dec_btst_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("btst $r%u, $r%u\n",
 		    dc->op1, dc->op2);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_evaluate_flags(dc);
-	gen_helper_btst(cpu_PR[PR_CCS], cpu_R[dc->op2],
+        gen_helper_btst(cpu_PR[PR_CCS], cpu_env, cpu_R[dc->op2],
 			cpu_R[dc->op1], cpu_PR[PR_CCS]);
 	cris_alu(dc, CC_OP_MOVE, cpu_R[dc->op2],
 		 cpu_R[dc->op2], cpu_R[dc->op2], 4);
@@ -1934,7 +1934,7 @@
 	return 2;
 }
 
-static int dec_sub_r(DisasContext *dc)
+static int dec_sub_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int size = memsize_zz(dc);
@@ -1949,7 +1949,7 @@
 }
 
 /* Zero extension. From size to dword.  */
-static int dec_movu_r(DisasContext *dc)
+static int dec_movu_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -1966,7 +1966,7 @@
 }
 
 /* Sign extension. From size to dword.  */
-static int dec_movs_r(DisasContext *dc)
+static int dec_movs_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -1985,7 +1985,7 @@
 }
 
 /* zero extension. From size to dword.  */
-static int dec_addu_r(DisasContext *dc)
+static int dec_addu_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -2004,7 +2004,7 @@
 }
 
 /* Sign extension. From size to dword.  */
-static int dec_adds_r(DisasContext *dc)
+static int dec_adds_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -2023,7 +2023,7 @@
 }
 
 /* Zero extension. From size to dword.  */
-static int dec_subu_r(DisasContext *dc)
+static int dec_subu_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -2042,7 +2042,7 @@
 }
 
 /* Sign extension. From size to dword.  */
-static int dec_subs_r(DisasContext *dc)
+static int dec_subs_r(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int size = memsize_z(dc);
@@ -2060,7 +2060,7 @@
 	return 2;
 }
 
-static int dec_setclrf(DisasContext *dc)
+static int dec_setclrf(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t flags;
 	int set = (~dc->opcode >> 2) & 1;
@@ -2131,22 +2131,24 @@
 	return 2;
 }
 
-static int dec_move_rs(DisasContext *dc)
+static int dec_move_rs(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("move $r%u, $s%u\n", dc->op1, dc->op2);
 	cris_cc_mask(dc, 0);
-	gen_helper_movl_sreg_reg(tcg_const_tl(dc->op2), tcg_const_tl(dc->op1));
+        gen_helper_movl_sreg_reg(cpu_env, tcg_const_tl(dc->op2),
+                                 tcg_const_tl(dc->op1));
 	return 2;
 }
-static int dec_move_sr(DisasContext *dc)
+static int dec_move_sr(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("move $s%u, $r%u\n", dc->op2, dc->op1);
 	cris_cc_mask(dc, 0);
-	gen_helper_movl_reg_sreg(tcg_const_tl(dc->op1), tcg_const_tl(dc->op2));
+        gen_helper_movl_reg_sreg(cpu_env, tcg_const_tl(dc->op1),
+                                 tcg_const_tl(dc->op2));
 	return 2;
 }
 
-static int dec_move_rp(DisasContext *dc)
+static int dec_move_rp(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	LOG_DIS("move $r%u, $p%u\n", dc->op1, dc->op2);
@@ -2176,7 +2178,7 @@
 	tcg_temp_free(t[0]);
 	return 2;
 }
-static int dec_move_pr(DisasContext *dc)
+static int dec_move_pr(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	LOG_DIS("move $p%u, $r%u\n", dc->op2, dc->op1);
@@ -2198,7 +2200,7 @@
 	return 2;
 }
 
-static int dec_move_mr(DisasContext *dc)
+static int dec_move_mr(CPUCRISState *env, DisasContext *dc)
 {
 	int memsize = memsize_zz(dc);
 	int insn_len;
@@ -2208,7 +2210,7 @@
 		    dc->op2);
 
 	if (memsize == 4) {
-		insn_len = dec_prep_move_m(dc, 0, 4, cpu_R[dc->op2]);
+                insn_len = dec_prep_move_m(env, dc, 0, 4, cpu_R[dc->op2]);
 		cris_cc_mask(dc, CC_MASK_NZ);
 		cris_update_cc_op(dc, CC_OP_MOVE, 4);
 		cris_update_cc_x(dc);
@@ -2218,7 +2220,7 @@
 		TCGv t0;
 
 		t0 = tcg_temp_new();
-		insn_len = dec_prep_move_m(dc, 0, memsize, t0);
+                insn_len = dec_prep_move_m(env, dc, 0, memsize, t0);
 		cris_cc_mask(dc, CC_MASK_NZ);
 		cris_alu(dc, CC_OP_MOVE,
 			    cpu_R[dc->op2], cpu_R[dc->op2], t0, memsize);
@@ -2240,7 +2242,7 @@
 	tcg_temp_free(t[1]);
 }
 
-static int dec_movs_m(DisasContext *dc)
+static int dec_movs_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2252,7 +2254,7 @@
 
 	cris_alu_m_alloc_temps(t);
 	/* sign extend.  */
-	insn_len = dec_prep_alu_m(dc, 1, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 1, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_alu(dc, CC_OP_MOVE,
 		    cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
@@ -2261,7 +2263,7 @@
 	return insn_len;
 }
 
-static int dec_addu_m(DisasContext *dc)
+static int dec_addu_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2273,7 +2275,7 @@
 
 	cris_alu_m_alloc_temps(t);
 	/* sign extend.  */
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_ADD,
 		    cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
@@ -2282,7 +2284,7 @@
 	return insn_len;
 }
 
-static int dec_adds_m(DisasContext *dc)
+static int dec_adds_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2294,7 +2296,7 @@
 
 	cris_alu_m_alloc_temps(t);
 	/* sign extend.  */
-	insn_len = dec_prep_alu_m(dc, 1, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 1, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_ADD, cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2302,7 +2304,7 @@
 	return insn_len;
 }
 
-static int dec_subu_m(DisasContext *dc)
+static int dec_subu_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2314,7 +2316,7 @@
 
 	cris_alu_m_alloc_temps(t);
 	/* sign extend.  */
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_SUB, cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2322,7 +2324,7 @@
 	return insn_len;
 }
 
-static int dec_subs_m(DisasContext *dc)
+static int dec_subs_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2334,7 +2336,7 @@
 
 	cris_alu_m_alloc_temps(t);
 	/* sign extend.  */
-	insn_len = dec_prep_alu_m(dc, 1, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 1, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_SUB, cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2342,7 +2344,7 @@
 	return insn_len;
 }
 
-static int dec_movu_m(DisasContext *dc)
+static int dec_movu_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2354,7 +2356,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_alu(dc, CC_OP_MOVE, cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2362,7 +2364,7 @@
 	return insn_len;
 }
 
-static int dec_cmpu_m(DisasContext *dc)
+static int dec_cmpu_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2373,7 +2375,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_CMP, cpu_R[dc->op2], cpu_R[dc->op2], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2381,7 +2383,7 @@
 	return insn_len;
 }
 
-static int dec_cmps_m(DisasContext *dc)
+static int dec_cmps_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_z(dc);
@@ -2392,7 +2394,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 1, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 1, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_CMP,
 		    cpu_R[dc->op2], cpu_R[dc->op2], t[1],
@@ -2402,7 +2404,7 @@
 	return insn_len;
 }
 
-static int dec_cmp_m(DisasContext *dc)
+static int dec_cmp_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2413,7 +2415,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_CMP,
 		    cpu_R[dc->op2], cpu_R[dc->op2], t[1],
@@ -2423,7 +2425,7 @@
 	return insn_len;
 }
 
-static int dec_test_m(DisasContext *dc)
+static int dec_test_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2436,7 +2438,7 @@
 	cris_evaluate_flags(dc);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~3);
 
@@ -2447,7 +2449,7 @@
 	return insn_len;
 }
 
-static int dec_and_m(DisasContext *dc)
+static int dec_and_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2458,7 +2460,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_alu(dc, CC_OP_AND, cpu_R[dc->op2], t[0], t[1], memsize_zz(dc));
 	do_postinc(dc, memsize);
@@ -2466,7 +2468,7 @@
 	return insn_len;
 }
 
-static int dec_add_m(DisasContext *dc)
+static int dec_add_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2477,7 +2479,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_ADD,
 		 cpu_R[dc->op2], t[0], t[1], memsize_zz(dc));
@@ -2486,7 +2488,7 @@
 	return insn_len;
 }
 
-static int dec_addo_m(DisasContext *dc)
+static int dec_addo_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2497,7 +2499,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 1, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 1, memsize, t[0], t[1]);
 	cris_cc_mask(dc, 0);
 	cris_alu(dc, CC_OP_ADD, cpu_R[R_ACR], t[0], t[1], 4);
 	do_postinc(dc, memsize);
@@ -2505,7 +2507,7 @@
 	return insn_len;
 }
 
-static int dec_bound_m(DisasContext *dc)
+static int dec_bound_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv l[2];
 	int memsize = memsize_zz(dc);
@@ -2517,7 +2519,7 @@
 
 	l[0] = tcg_temp_local_new();
 	l[1] = tcg_temp_local_new();
-	insn_len = dec_prep_alu_m(dc, 0, memsize, l[0], l[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, l[0], l[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_alu(dc, CC_OP_BOUND, cpu_R[dc->op2], l[0], l[1], 4);
 	do_postinc(dc, memsize);
@@ -2526,7 +2528,7 @@
 	return insn_len;
 }
 
-static int dec_addc_mr(DisasContext *dc)
+static int dec_addc_mr(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int insn_len = 2;
@@ -2541,7 +2543,7 @@
 	dc->flags_x = X_FLAG;
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, 4, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, 4, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_ADDC, cpu_R[dc->op2], t[0], t[1], 4);
 	do_postinc(dc, 4);
@@ -2549,7 +2551,7 @@
 	return insn_len;
 }
 
-static int dec_sub_m(DisasContext *dc)
+static int dec_sub_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2560,7 +2562,7 @@
 		    dc->op2, dc->ir, dc->zzsize);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZVC);
 	cris_alu(dc, CC_OP_SUB, cpu_R[dc->op2], t[0], t[1], memsize);
 	do_postinc(dc, memsize);
@@ -2568,7 +2570,7 @@
 	return insn_len;
 }
 
-static int dec_or_m(DisasContext *dc)
+static int dec_or_m(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2579,7 +2581,7 @@
 		    dc->op2, dc->pc);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, CC_MASK_NZ);
 	cris_alu(dc, CC_OP_OR,
 		    cpu_R[dc->op2], t[0], t[1], memsize_zz(dc));
@@ -2588,7 +2590,7 @@
 	return insn_len;
 }
 
-static int dec_move_mp(DisasContext *dc)
+static int dec_move_mp(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t[2];
 	int memsize = memsize_zz(dc);
@@ -2601,7 +2603,7 @@
 		    dc->op2);
 
 	cris_alu_m_alloc_temps(t);
-	insn_len = dec_prep_alu_m(dc, 0, memsize, t[0], t[1]);
+        insn_len = dec_prep_alu_m(env, dc, 0, memsize, t[0], t[1]);
 	cris_cc_mask(dc, 0);
 	if (dc->op2 == PR_CCS) {
 		cris_evaluate_flags(dc);
@@ -2620,7 +2622,7 @@
 	return insn_len;
 }
 
-static int dec_move_pm(DisasContext *dc)
+static int dec_move_pm(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv t0;
 	int memsize;
@@ -2646,7 +2648,7 @@
 	return 2;
 }
 
-static int dec_movem_mr(DisasContext *dc)
+static int dec_movem_mr(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv_i64 tmp[16];
         TCGv tmp32;
@@ -2693,7 +2695,7 @@
 	return 2;
 }
 
-static int dec_movem_rm(DisasContext *dc)
+static int dec_movem_rm(CPUCRISState *env, DisasContext *dc)
 {
 	TCGv tmp;
 	TCGv addr;
@@ -2722,7 +2724,7 @@
 	return 2;
 }
 
-static int dec_move_rm(DisasContext *dc)
+static int dec_move_rm(CPUCRISState *env, DisasContext *dc)
 {
 	int memsize;
 
@@ -2741,7 +2743,7 @@
 	return 2;
 }
 
-static int dec_lapcq(DisasContext *dc)
+static int dec_lapcq(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("lapcq %x, $r%u\n",
 		    dc->pc + dc->op1*2, dc->op2);
@@ -2750,7 +2752,7 @@
 	return 2;
 }
 
-static int dec_lapc_im(DisasContext *dc)
+static int dec_lapc_im(CPUCRISState *env, DisasContext *dc)
 {
 	unsigned int rd;
 	int32_t imm;
@@ -2759,7 +2761,7 @@
 	rd = dc->op2;
 
 	cris_cc_mask(dc, 0);
-	imm = cris_fetch(dc, dc->pc + 2, 4, 0);
+        imm = cris_fetch(env, dc, dc->pc + 2, 4, 0);
 	LOG_DIS("lapc 0x%x, $r%u\n", imm + dc->pc, dc->op2);
 
 	pc = dc->pc;
@@ -2769,7 +2771,7 @@
 }
 
 /* Jump to special reg.  */
-static int dec_jump_p(DisasContext *dc)
+static int dec_jump_p(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("jump $p%u\n", dc->op2);
 
@@ -2784,7 +2786,7 @@
 }
 
 /* Jump and save.  */
-static int dec_jas_r(DisasContext *dc)
+static int dec_jas_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("jas $r%u, $p%u\n", dc->op1, dc->op2);
 	cris_cc_mask(dc, 0);
@@ -2798,11 +2800,11 @@
 	return 2;
 }
 
-static int dec_jas_im(DisasContext *dc)
+static int dec_jas_im(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 
-	imm = cris_fetch(dc, dc->pc + 2, 4, 0);
+        imm = cris_fetch(env, dc, dc->pc + 2, 4, 0);
 
 	LOG_DIS("jas 0x%x\n", imm);
 	cris_cc_mask(dc, 0);
@@ -2814,11 +2816,11 @@
 	return 6;
 }
 
-static int dec_jasc_im(DisasContext *dc)
+static int dec_jasc_im(CPUCRISState *env, DisasContext *dc)
 {
 	uint32_t imm;
 
-	imm = cris_fetch(dc, dc->pc + 2, 4, 0);
+        imm = cris_fetch(env, dc, dc->pc + 2, 4, 0);
 
 	LOG_DIS("jasc 0x%x\n", imm);
 	cris_cc_mask(dc, 0);
@@ -2830,7 +2832,7 @@
 	return 6;
 }
 
-static int dec_jasc_r(DisasContext *dc)
+static int dec_jasc_r(CPUCRISState *env, DisasContext *dc)
 {
 	LOG_DIS("jasc_r $r%u, $p%u\n", dc->op1, dc->op2);
 	cris_cc_mask(dc, 0);
@@ -2841,12 +2843,12 @@
 	return 2;
 }
 
-static int dec_bcc_im(DisasContext *dc)
+static int dec_bcc_im(CPUCRISState *env, DisasContext *dc)
 {
 	int32_t offset;
 	uint32_t cond = dc->op2;
 
-	offset = cris_fetch(dc, dc->pc + 2, 2, 1);
+        offset = cris_fetch(env, dc, dc->pc + 2, 2, 1);
 
 	LOG_DIS("b%s %d pc=%x dst=%x\n",
 		    cc_name(cond), offset,
@@ -2858,12 +2860,12 @@
 	return 4;
 }
 
-static int dec_bas_im(DisasContext *dc)
+static int dec_bas_im(CPUCRISState *env, DisasContext *dc)
 {
 	int32_t simm;
 
 
-	simm = cris_fetch(dc, dc->pc + 2, 4, 0);
+        simm = cris_fetch(env, dc, dc->pc + 2, 4, 0);
 
 	LOG_DIS("bas 0x%x, $p%u\n", dc->pc + simm, dc->op2);
 	cris_cc_mask(dc, 0);
@@ -2875,10 +2877,10 @@
 	return 6;
 }
 
-static int dec_basc_im(DisasContext *dc)
+static int dec_basc_im(CPUCRISState *env, DisasContext *dc)
 {
 	int32_t simm;
-	simm = cris_fetch(dc, dc->pc + 2, 4, 0);
+        simm = cris_fetch(env, dc, dc->pc + 2, 4, 0);
 
 	LOG_DIS("basc 0x%x, $p%u\n", dc->pc + simm, dc->op2);
 	cris_cc_mask(dc, 0);
@@ -2890,7 +2892,7 @@
 	return 6;
 }
 
-static int dec_rfe_etc(DisasContext *dc)
+static int dec_rfe_etc(CPUCRISState *env, DisasContext *dc)
 {
 	cris_cc_mask(dc, 0);
 
@@ -2906,14 +2908,14 @@
 			/* rfe.  */
 			LOG_DIS("rfe\n");
 			cris_evaluate_flags(dc);
-			gen_helper_rfe();
+                        gen_helper_rfe(cpu_env);
 			dc->is_jmp = DISAS_UPDATE;
 			break;
 		case 5:
 			/* rfn.  */
 			LOG_DIS("rfn\n");
 			cris_evaluate_flags(dc);
-			gen_helper_rfn();
+                        gen_helper_rfn(cpu_env);
 			dc->is_jmp = DISAS_UPDATE;
 			break;
 		case 6:
@@ -2937,17 +2939,17 @@
 	return 2;
 }
 
-static int dec_ftag_fidx_d_m(DisasContext *dc)
+static int dec_ftag_fidx_d_m(CPUCRISState *env, DisasContext *dc)
 {
 	return 2;
 }
 
-static int dec_ftag_fidx_i_m(DisasContext *dc)
+static int dec_ftag_fidx_i_m(CPUCRISState *env, DisasContext *dc)
 {
 	return 2;
 }
 
-static int dec_null(DisasContext *dc)
+static int dec_null(CPUCRISState *env, DisasContext *dc)
 {
 	printf ("unknown insn pc=%x opc=%x op1=%x op2=%x\n",
 		dc->pc, dc->opcode, dc->op1, dc->op2);
@@ -2961,7 +2963,7 @@
 		uint32_t bits;
 		uint32_t mask;
 	};
-	int (*dec)(DisasContext *dc);
+        int (*dec)(CPUCRISState *env, DisasContext *dc);
 } decinfo[] = {
 	/* Order matters here.  */
 	{DEC_MOVEQ, dec_moveq},
@@ -3067,7 +3069,7 @@
 	{{0, 0}, dec_null}
 };
 
-static unsigned int crisv32_decoder(DisasContext *dc)
+static unsigned int crisv32_decoder(CPUCRISState *env, DisasContext *dc)
 {
 	int insn_len = 2;
 	int i;
@@ -3076,7 +3078,7 @@
 		tcg_gen_debug_insn_start(dc->pc);
 
 	/* Load a halfword onto the instruction register.  */
-	dc->ir = cris_fetch(dc, dc->pc, 2, 0);
+        dc->ir = cris_fetch(env, dc, dc->pc, 2, 0);
 
 	/* Now decode it.  */
 	dc->opcode   = EXTRACT_FIELD(dc->ir, 4, 11);
@@ -3090,7 +3092,7 @@
 	for (i = 0; i < ARRAY_SIZE(decinfo); i++) {
 		if ((dc->opcode & decinfo[i].mask) == decinfo[i].bits)
 		{
-			insn_len = decinfo[i].dec(dc);
+                        insn_len = decinfo[i].dec(env, dc);
 			break;
 		}
 	}
@@ -3284,7 +3286,7 @@
                     gen_io_start();
 		dc->clear_x = 1;
 
-		insn_len = dc->decoder(dc);
+                insn_len = dc->decoder(env, dc);
 		dc->ppc = dc->pc;
 		dc->pc += insn_len;
 		if (dc->clear_x)
diff --git a/target-cris/translate_v10.c b/target-cris/translate_v10.c
index 3629629..d2cca89 100644
--- a/target-cris/translate_v10.c
+++ b/target-cris/translate_v10.c
@@ -164,8 +164,8 @@
     return insn_len;
 }
 
-static int dec10_prep_move_m(DisasContext *dc, int s_ext, int memsize,
-                           TCGv dst)
+static int dec10_prep_move_m(CPUCRISState *env, DisasContext *dc,
+                             int s_ext, int memsize, TCGv dst)
 {
     unsigned int rs;
     uint32_t imm;
@@ -182,17 +182,17 @@
         if (memsize != 4) {
             if (s_ext) {
                 if (memsize == 1)
-                    imm = ldsb_code(dc->pc + 2);
+                    imm = cpu_ldsb_code(env, dc->pc + 2);
                 else
-                    imm = ldsw_code(dc->pc + 2);
+                    imm = cpu_ldsw_code(env, dc->pc + 2);
             } else {
                 if (memsize == 1)
-                    imm = ldub_code(dc->pc + 2);
+                    imm = cpu_ldub_code(env, dc->pc + 2);
                 else
-                    imm = lduw_code(dc->pc + 2);
+                    imm = cpu_lduw_code(env, dc->pc + 2);
             }
         } else
-            imm = ldl_code(dc->pc + 2);
+            imm = cpu_ldl_code(env, dc->pc + 2);
 
         tcg_gen_movi_tl(dst, imm);
 
@@ -289,7 +289,7 @@
             } else {
                 /* BTST */
                 cris_update_cc_op(dc, CC_OP_FLAGS, 4);
-                gen_helper_btst(cpu_PR[PR_CCS], cpu_R[dc->dst],
+                gen_helper_btst(cpu_PR[PR_CCS], cpu_env, cpu_R[dc->dst],
                            tcg_const_tl(imm), cpu_PR[PR_CCS]);
             }
             break;
@@ -723,7 +723,7 @@
                 LOG_DIS("btst $r%d, $r%d sz=%d\n", dc->src, dc->dst, size);
                 cris_cc_mask(dc, CC_MASK_NZVC);
                 cris_update_cc_op(dc, CC_OP_FLAGS, 4);
-                gen_helper_btst(cpu_PR[PR_CCS], cpu_R[dc->dst],
+                gen_helper_btst(cpu_PR[PR_CCS], cpu_env, cpu_R[dc->dst],
                            cpu_R[dc->src], cpu_PR[PR_CCS]);
                 break;
             case CRISV10_REG_DSTEP:
@@ -752,7 +752,8 @@
     return insn_len;
 }
 
-static unsigned int dec10_ind_move_m_r(DisasContext *dc, unsigned int size)
+static unsigned int dec10_ind_move_m_r(CPUCRISState *env, DisasContext *dc,
+                                       unsigned int size)
 {
     unsigned int insn_len = 2;
     TCGv t;
@@ -762,7 +763,7 @@
 
     cris_cc_mask(dc, CC_MASK_NZVC);
     t = tcg_temp_new();
-    insn_len += dec10_prep_move_m(dc, 0, size, t);
+    insn_len += dec10_prep_move_m(env, dc, 0, size, t);
     cris_alu(dc, CC_OP_MOVE, cpu_R[dc->dst], cpu_R[dc->dst], t, size);
     if (dc->dst == 15) {
         tcg_gen_mov_tl(env_btarget, cpu_R[dc->dst]);
@@ -789,7 +790,7 @@
     return insn_len;
 }
 
-static unsigned int dec10_ind_move_m_pr(DisasContext *dc)
+static unsigned int dec10_ind_move_m_pr(CPUCRISState *env, DisasContext *dc)
 {
     unsigned int insn_len = 2, rd = dc->dst;
     TCGv t, addr;
@@ -799,7 +800,7 @@
 
     addr = tcg_temp_new();
     t = tcg_temp_new();
-    insn_len += dec10_prep_move_m(dc, 0, 4, t);
+    insn_len += dec10_prep_move_m(env, dc, 0, 4, t);
     if (rd == 15) {
         tcg_gen_mov_tl(env_btarget, t);
         cris_prepare_jmp(dc, JMP_INDIRECT);
@@ -899,14 +900,15 @@
     tcg_temp_free(t0);
 }
 
-static int dec10_ind_alu(DisasContext *dc, int op, unsigned int size)
+static int dec10_ind_alu(CPUCRISState *env, DisasContext *dc,
+                         int op, unsigned int size)
 {
     int insn_len = 0;
     int rd = dc->dst;
     TCGv t[2];
 
     cris_alu_m_alloc_temps(t);
-    insn_len += dec10_prep_move_m(dc, 0, size, t[0]);
+    insn_len += dec10_prep_move_m(env, dc, 0, size, t[0]);
     cris_alu(dc, op, cpu_R[dc->dst], cpu_R[rd], t[0], size);
     if (dc->dst == 15) {
         tcg_gen_mov_tl(env_btarget, cpu_R[dc->dst]);
@@ -920,14 +922,15 @@
     return insn_len;
 }
 
-static int dec10_ind_bound(DisasContext *dc, unsigned int size)
+static int dec10_ind_bound(CPUCRISState *env, DisasContext *dc,
+                           unsigned int size)
 {
     int insn_len = 0;
     int rd = dc->dst;
     TCGv t;
 
     t = tcg_temp_local_new();
-    insn_len += dec10_prep_move_m(dc, 0, size, t);
+    insn_len += dec10_prep_move_m(env, dc, 0, size, t);
     cris_alu(dc, CC_OP_BOUND, cpu_R[dc->dst], cpu_R[rd], t, 4);
     if (dc->dst == 15) {
         tcg_gen_mov_tl(env_btarget, cpu_R[dc->dst]);
@@ -940,7 +943,7 @@
     return insn_len;
 }
 
-static int dec10_alux_m(DisasContext *dc, int op)
+static int dec10_alux_m(CPUCRISState *env, DisasContext *dc, int op)
 {
     unsigned int size = (dc->size & 1) ? 2 : 1;
     unsigned int sx = !!(dc->size & 2);
@@ -953,7 +956,7 @@
     t = tcg_temp_new();
 
     cris_cc_mask(dc, CC_MASK_NZVC);
-    insn_len += dec10_prep_move_m(dc, sx, size, t);
+    insn_len += dec10_prep_move_m(env, dc, sx, size, t);
     cris_alu(dc, op, cpu_R[dc->dst], cpu_R[rd], t, 4);
     if (dc->dst == 15) {
         tcg_gen_mov_tl(env_btarget, cpu_R[dc->dst]);
@@ -966,7 +969,7 @@
     return insn_len;
 }
 
-static int dec10_dip(DisasContext *dc)
+static int dec10_dip(CPUCRISState *env, DisasContext *dc)
 {
     int insn_len = 2;
     uint32_t imm;
@@ -974,7 +977,7 @@
     LOG_DIS("dip pc=%x opcode=%d r%d r%d\n",
               dc->pc, dc->opcode, dc->src, dc->dst);
     if (dc->src == 15) {
-        imm = ldl_code(dc->pc + 2);
+        imm = cpu_ldl_code(env, dc->pc + 2);
         tcg_gen_movi_tl(cpu_PR[PR_PREFIX], imm);
         if (dc->postinc)
             insn_len += 4;
@@ -989,7 +992,7 @@
     return insn_len;
 }
 
-static int dec10_bdap_m(DisasContext *dc, int size)
+static int dec10_bdap_m(CPUCRISState *env, DisasContext *dc, int size)
 {
     int insn_len = 2;
     int rd = dc->dst;
@@ -1014,13 +1017,13 @@
     }
 #endif
     /* Now the rest of the modes are truly indirect.  */
-    insn_len += dec10_prep_move_m(dc, 1, size, cpu_PR[PR_PREFIX]);
+    insn_len += dec10_prep_move_m(env, dc, 1, size, cpu_PR[PR_PREFIX]);
     tcg_gen_add_tl(cpu_PR[PR_PREFIX], cpu_PR[PR_PREFIX], cpu_R[rd]);
     cris_set_prefix(dc);
     return insn_len;
 }
 
-static unsigned int dec10_ind(DisasContext *dc)
+static unsigned int dec10_ind(CPUCRISState *env, DisasContext *dc)
 {
     unsigned int insn_len = 2;
     unsigned int size = dec10_size(dc->size);
@@ -1031,7 +1034,7 @@
     if (dc->size != 3) {
         switch (dc->opcode) {
             case CRISV10_IND_MOVE_M_R:
-                return dec10_ind_move_m_r(dc, size);
+                return dec10_ind_move_m_r(env, dc, size);
                 break;
             case CRISV10_IND_MOVE_R_M:
                 return dec10_ind_move_r_m(dc, size);
@@ -1039,7 +1042,7 @@
             case CRISV10_IND_CMP:
                 LOG_DIS("cmp size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_alu(dc, CC_OP_CMP, size);
+                insn_len += dec10_ind_alu(env, dc, CC_OP_CMP, size);
                 break;
             case CRISV10_IND_TEST:
                 LOG_DIS("test size=%d op=%d %d\n",  size, dc->src, dc->dst);
@@ -1047,7 +1050,7 @@
                 cris_evaluate_flags(dc);
                 cris_cc_mask(dc, CC_MASK_NZVC);
                 cris_alu_m_alloc_temps(t);
-                insn_len += dec10_prep_move_m(dc, 0, size, t[0]);
+                insn_len += dec10_prep_move_m(env, dc, 0, size, t[0]);
                 tcg_gen_andi_tl(cpu_PR[PR_CCS], cpu_PR[PR_CCS], ~3);
                 cris_alu(dc, CC_OP_CMP, cpu_R[dc->dst],
                          t[0], tcg_const_tl(0), size);
@@ -1056,39 +1059,39 @@
             case CRISV10_IND_ADD:
                 LOG_DIS("add size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_alu(dc, CC_OP_ADD, size);
+                insn_len += dec10_ind_alu(env, dc, CC_OP_ADD, size);
                 break;
             case CRISV10_IND_SUB:
                 LOG_DIS("sub size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_alu(dc, CC_OP_SUB, size);
+                insn_len += dec10_ind_alu(env, dc, CC_OP_SUB, size);
                 break;
             case CRISV10_IND_BOUND:
                 LOG_DIS("bound size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_bound(dc, size);
+                insn_len += dec10_ind_bound(env, dc, size);
                 break;
             case CRISV10_IND_AND:
                 LOG_DIS("and size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_alu(dc, CC_OP_AND, size);
+                insn_len += dec10_ind_alu(env, dc, CC_OP_AND, size);
                 break;
             case CRISV10_IND_OR:
                 LOG_DIS("or size=%d op=%d %d\n",  size, dc->src, dc->dst);
                 cris_cc_mask(dc, CC_MASK_NZVC);
-                insn_len += dec10_ind_alu(dc, CC_OP_OR, size);
+                insn_len += dec10_ind_alu(env, dc, CC_OP_OR, size);
                 break;
             case CRISV10_IND_MOVX:
-                insn_len = dec10_alux_m(dc, CC_OP_MOVE);
+                insn_len = dec10_alux_m(env, dc, CC_OP_MOVE);
                 break;
             case CRISV10_IND_ADDX:
-                insn_len = dec10_alux_m(dc, CC_OP_ADD);
+                insn_len = dec10_alux_m(env, dc, CC_OP_ADD);
                 break;
             case CRISV10_IND_SUBX:
-                insn_len = dec10_alux_m(dc, CC_OP_SUB);
+                insn_len = dec10_alux_m(env, dc, CC_OP_SUB);
                 break;
             case CRISV10_IND_CMPX:
-                insn_len = dec10_alux_m(dc, CC_OP_CMP);
+                insn_len = dec10_alux_m(env, dc, CC_OP_CMP);
                 break;
             case CRISV10_IND_MUL:
                 /* This is a reg insn coded in the mem indir space.  */
@@ -1097,7 +1100,7 @@
                 dec10_reg_mul(dc, size, dc->ir & (1 << 10));
                 break;
             case CRISV10_IND_BDAP_M:
-                insn_len = dec10_bdap_m(dc, size);
+                insn_len = dec10_bdap_m(env, dc, size);
                 break;
             default:
                 LOG_DIS("pc=%x var-ind.%d %d r%d r%d\n",
@@ -1110,7 +1113,7 @@
 
     switch (dc->opcode) {
         case CRISV10_IND_MOVE_M_SPR:
-            insn_len = dec10_ind_move_m_pr(dc);
+            insn_len = dec10_ind_move_m_pr(env, dc);
             break;
         case CRISV10_IND_MOVE_SPR_M:
             insn_len = dec10_ind_move_pr_m(dc);
@@ -1119,7 +1122,7 @@
             if (dc->src == 15) {
                 LOG_DIS("jump.%d %d r%d r%d direct\n", size,
                          dc->opcode, dc->src, dc->dst);
-                imm = ldl_code(dc->pc + 2);
+                imm = cpu_ldl_code(env, dc->pc + 2);
                 if (dc->mode == CRISV10_MODE_AUTOINC)
                     insn_len += size;
 
@@ -1168,24 +1171,24 @@
             dc->delayed_branch--; /* v10 has no dslot here.  */
             break;
         case CRISV10_IND_MOVX:
-            insn_len = dec10_alux_m(dc, CC_OP_MOVE);
+            insn_len = dec10_alux_m(env, dc, CC_OP_MOVE);
             break;
         case CRISV10_IND_ADDX:
-            insn_len = dec10_alux_m(dc, CC_OP_ADD);
+            insn_len = dec10_alux_m(env, dc, CC_OP_ADD);
             break;
         case CRISV10_IND_SUBX:
-            insn_len = dec10_alux_m(dc, CC_OP_SUB);
+            insn_len = dec10_alux_m(env, dc, CC_OP_SUB);
             break;
         case CRISV10_IND_CMPX:
-            insn_len = dec10_alux_m(dc, CC_OP_CMP);
+            insn_len = dec10_alux_m(env, dc, CC_OP_CMP);
             break;
         case CRISV10_IND_DIP:
-            insn_len = dec10_dip(dc);
+            insn_len = dec10_dip(env, dc);
             break;
         case CRISV10_IND_BCC_M:
 
             cris_cc_mask(dc, 0);
-            imm = ldsw_code(dc->pc + 2);
+            imm = cpu_ldsw_code(env, dc->pc + 2);
             simm = (int16_t)imm;
             simm += 4;
 
@@ -1202,7 +1205,7 @@
     return insn_len;
 }
 
-static unsigned int crisv10_decoder(DisasContext *dc)
+static unsigned int crisv10_decoder(CPUCRISState *env, DisasContext *dc)
 {
     unsigned int insn_len = 2;
 
@@ -1210,7 +1213,7 @@
         tcg_gen_debug_insn_start(dc->pc);
 
     /* Load a halfword onto the instruction register.  */
-    dc->ir = lduw_code(dc->pc);
+    dc->ir = cpu_lduw_code(env, dc->pc);
 
     /* Now decode it.  */
     dc->opcode   = EXTRACT_FIELD(dc->ir, 6, 9);
@@ -1235,7 +1238,7 @@
             break;
         case CRISV10_MODE_AUTOINC:
         case CRISV10_MODE_INDIRECT:
-            insn_len = dec10_ind(dc);
+            insn_len = dec10_ind(env, dc);
             break;
     }
 
diff --git a/target-lm32/Makefile.objs b/target-lm32/Makefile.objs
index 2e0e093..ca20f21 100644
--- a/target-lm32/Makefile.objs
+++ b/target-lm32/Makefile.objs
@@ -1,4 +1,2 @@
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-lm32/helper.h b/target-lm32/helper.h
index 9d335ef..07f5670 100644
--- a/target-lm32/helper.h
+++ b/target-lm32/helper.h
@@ -1,14 +1,14 @@
 #include "def-helper.h"
 
-DEF_HELPER_1(raise_exception, void, i32)
-DEF_HELPER_0(hlt, void)
-DEF_HELPER_1(wcsr_im, void, i32)
-DEF_HELPER_1(wcsr_ip, void, i32)
-DEF_HELPER_1(wcsr_jtx, void, i32)
-DEF_HELPER_1(wcsr_jrx, void, i32)
-DEF_HELPER_0(rcsr_im, i32)
-DEF_HELPER_0(rcsr_ip, i32)
-DEF_HELPER_0(rcsr_jtx, i32)
-DEF_HELPER_0(rcsr_jrx, i32)
+DEF_HELPER_2(raise_exception, void, env, i32)
+DEF_HELPER_1(hlt, void, env)
+DEF_HELPER_2(wcsr_im, void, env, i32)
+DEF_HELPER_2(wcsr_ip, void, env, i32)
+DEF_HELPER_2(wcsr_jtx, void, env, i32)
+DEF_HELPER_2(wcsr_jrx, void, env, i32)
+DEF_HELPER_1(rcsr_im, i32, env)
+DEF_HELPER_1(rcsr_ip, i32, env)
+DEF_HELPER_1(rcsr_jtx, i32, env)
+DEF_HELPER_1(rcsr_jrx, i32, env)
 
 #include "def-helper.h"
diff --git a/target-lm32/op_helper.c b/target-lm32/op_helper.c
index 51edc1a..7b91d8c 100644
--- a/target-lm32/op_helper.c
+++ b/target-lm32/op_helper.c
@@ -1,6 +1,5 @@
 #include <assert.h>
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helper.h"
 #include "host-utils.h"
 
@@ -18,55 +17,55 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
-void helper_raise_exception(uint32_t index)
+void helper_raise_exception(CPULM32State *env, uint32_t index)
 {
     env->exception_index = index;
     cpu_loop_exit(env);
 }
 
-void helper_hlt(void)
+void helper_hlt(CPULM32State *env)
 {
     env->halted = 1;
     env->exception_index = EXCP_HLT;
     cpu_loop_exit(env);
 }
 
-void helper_wcsr_im(uint32_t im)
+void helper_wcsr_im(CPULM32State *env, uint32_t im)
 {
     lm32_pic_set_im(env->pic_state, im);
 }
 
-void helper_wcsr_ip(uint32_t im)
+void helper_wcsr_ip(CPULM32State *env, uint32_t im)
 {
     lm32_pic_set_ip(env->pic_state, im);
 }
 
-void helper_wcsr_jtx(uint32_t jtx)
+void helper_wcsr_jtx(CPULM32State *env, uint32_t jtx)
 {
     lm32_juart_set_jtx(env->juart_state, jtx);
 }
 
-void helper_wcsr_jrx(uint32_t jrx)
+void helper_wcsr_jrx(CPULM32State *env, uint32_t jrx)
 {
     lm32_juart_set_jrx(env->juart_state, jrx);
 }
 
-uint32_t helper_rcsr_im(void)
+uint32_t helper_rcsr_im(CPULM32State *env)
 {
     return lm32_pic_get_im(env->pic_state);
 }
 
-uint32_t helper_rcsr_ip(void)
+uint32_t helper_rcsr_ip(CPULM32State *env)
 {
     return lm32_pic_get_ip(env->pic_state);
 }
 
-uint32_t helper_rcsr_jtx(void)
+uint32_t helper_rcsr_jtx(CPULM32State *env)
 {
     return lm32_juart_get_jtx(env->juart_state);
 }
 
-uint32_t helper_rcsr_jrx(void)
+uint32_t helper_rcsr_jrx(CPULM32State *env)
 {
     return lm32_juart_get_jrx(env->juart_state);
 }
@@ -74,17 +73,12 @@
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill(CPULM32State *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPULM32State *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPULM32State *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
-
     ret = cpu_lm32_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
@@ -98,7 +92,6 @@
         }
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 #endif
 
diff --git a/target-lm32/translate.c b/target-lm32/translate.c
index 872a2ba..5f6dcba 100644
--- a/target-lm32/translate.c
+++ b/target-lm32/translate.c
@@ -116,7 +116,7 @@
 {
     TCGv_i32 tmp = tcg_const_i32(index);
 
-    gen_helper_raise_exception(tmp);
+    gen_helper_raise_exception(cpu_env, tmp);
     tcg_temp_free_i32(tmp);
 }
 
@@ -179,7 +179,7 @@
     } else  {
         if (dc->r0 == 0 && dc->r1 == 0 && dc->r2 == 0) {
             tcg_gen_movi_tl(cpu_pc, dc->pc + 4);
-            gen_helper_hlt();
+            gen_helper_hlt(cpu_env);
         } else {
             tcg_gen_and_tl(cpu_R[dc->r2], cpu_R[dc->r0], cpu_R[dc->r1]);
         }
@@ -601,10 +601,10 @@
         tcg_gen_mov_tl(cpu_R[dc->r2], cpu_ie);
         break;
     case CSR_IM:
-        gen_helper_rcsr_im(cpu_R[dc->r2]);
+        gen_helper_rcsr_im(cpu_R[dc->r2], cpu_env);
         break;
     case CSR_IP:
-        gen_helper_rcsr_ip(cpu_R[dc->r2]);
+        gen_helper_rcsr_ip(cpu_R[dc->r2], cpu_env);
         break;
     case CSR_CC:
         tcg_gen_mov_tl(cpu_R[dc->r2], cpu_cc);
@@ -622,10 +622,10 @@
         tcg_gen_mov_tl(cpu_R[dc->r2], cpu_deba);
         break;
     case CSR_JTX:
-        gen_helper_rcsr_jtx(cpu_R[dc->r2]);
+        gen_helper_rcsr_jtx(cpu_R[dc->r2], cpu_env);
         break;
     case CSR_JRX:
-        gen_helper_rcsr_jrx(cpu_R[dc->r2]);
+        gen_helper_rcsr_jrx(cpu_R[dc->r2], cpu_env);
         break;
     case CSR_ICC:
     case CSR_DCC:
@@ -812,7 +812,7 @@
         if (use_icount) {
             gen_io_start();
         }
-        gen_helper_wcsr_im(cpu_R[dc->r1]);
+        gen_helper_wcsr_im(cpu_env, cpu_R[dc->r1]);
         tcg_gen_movi_tl(cpu_pc, dc->pc + 4);
         if (use_icount) {
             gen_io_end();
@@ -824,7 +824,7 @@
         if (use_icount) {
             gen_io_start();
         }
-        gen_helper_wcsr_ip(cpu_R[dc->r1]);
+        gen_helper_wcsr_ip(cpu_env, cpu_R[dc->r1]);
         tcg_gen_movi_tl(cpu_pc, dc->pc + 4);
         if (use_icount) {
             gen_io_end();
@@ -844,10 +844,10 @@
         tcg_gen_mov_tl(cpu_deba, cpu_R[dc->r1]);
         break;
     case CSR_JTX:
-        gen_helper_wcsr_jtx(cpu_R[dc->r1]);
+        gen_helper_wcsr_jtx(cpu_env, cpu_R[dc->r1]);
         break;
     case CSR_JRX:
-        gen_helper_wcsr_jrx(cpu_R[dc->r1]);
+        gen_helper_wcsr_jrx(cpu_env, cpu_R[dc->r1]);
         break;
     case CSR_DC:
         tcg_gen_mov_tl(cpu_dc, cpu_R[dc->r1]);
@@ -940,15 +940,13 @@
     dec_cmpne
 };
 
-static inline void decode(DisasContext *dc)
+static inline void decode(DisasContext *dc, uint32_t ir)
 {
-    uint32_t ir;
-
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
         tcg_gen_debug_insn_start(dc->pc);
     }
 
-    dc->ir = ir = ldl_code(dc->pc);
+    dc->ir = ir;
     LOG_DIS("%8.8x\t", dc->ir);
 
     /* try guessing 'empty' instruction memory, although it may be a valid
@@ -1068,7 +1066,7 @@
             gen_io_start();
         }
 
-        decode(dc);
+        decode(dc, cpu_ldl_code(env, dc->pc));
         dc->pc += 4;
         num_insns++;
 
diff --git a/target-m68k/Makefile.objs b/target-m68k/Makefile.objs
index cda6015..7eccfab 100644
--- a/target-m68k/Makefile.objs
+++ b/target-m68k/Makefile.objs
@@ -1,5 +1,3 @@
 obj-y += m68k-semi.o
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-m68k/helpers.h b/target-m68k/helpers.h
index cb8a0c7..8112b44 100644
--- a/target-m68k/helpers.h
+++ b/target-m68k/helpers.h
@@ -49,6 +49,6 @@
 DEF_HELPER_3(set_mac_extu, void, env, i32, i32)
 
 DEF_HELPER_2(flush_flags, void, env, i32)
-DEF_HELPER_1(raise_exception, void, i32)
+DEF_HELPER_2(raise_exception, void, env, i32)
 
 #include "def-helper.h"
diff --git a/target-m68k/op_helper.c b/target-m68k/op_helper.c
index 1971a57..aa00504 100644
--- a/target-m68k/op_helper.c
+++ b/target-m68k/op_helper.c
@@ -17,17 +17,16 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helpers.h"
 
 #if defined(CONFIG_USER_ONLY)
 
-void do_interrupt(CPUM68KState *env1)
+void do_interrupt(CPUM68KState *env)
 {
-    env1->exception_index = -1;
+    env->exception_index = -1;
 }
 
-void do_interrupt_m68k_hardirq(CPUM68KState *env1)
+void do_interrupt_m68k_hardirq(CPUM68KState *env)
 {
 }
 
@@ -54,16 +53,12 @@
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill(CPUM68KState *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUM68KState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUM68KState *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
     ret = cpu_m68k_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
@@ -77,24 +72,23 @@
         }
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 
-static void do_rte(void)
+static void do_rte(CPUM68KState *env)
 {
     uint32_t sp;
     uint32_t fmt;
 
     sp = env->aregs[7];
-    fmt = ldl_kernel(sp);
-    env->pc = ldl_kernel(sp + 4);
+    fmt = cpu_ldl_kernel(env, sp);
+    env->pc = cpu_ldl_kernel(env, sp + 4);
     sp |= (fmt >> 28) & 3;
     env->sr = fmt & 0xffff;
     m68k_switch_sp(env);
     env->aregs[7] = sp + 8;
 }
 
-static void do_interrupt_all(int is_hw)
+static void do_interrupt_all(CPUM68KState *env, int is_hw)
 {
     uint32_t sp;
     uint32_t fmt;
@@ -108,14 +102,14 @@
         switch (env->exception_index) {
         case EXCP_RTE:
             /* Return from an exception.  */
-            do_rte();
+            do_rte(env);
             return;
         case EXCP_HALT_INSN:
             if (semihosting_enabled
                     && (env->sr & SR_S) != 0
                     && (env->pc & 3) == 0
-                    && lduw_code(env->pc - 4) == 0x4e71
-                    && ldl_code(env->pc) == 0x4e7bf000) {
+                    && cpu_lduw_code(env, env->pc - 4) == 0x4e71
+                    && cpu_ldl_code(env, env->pc) == 0x4e7bf000) {
                 env->pc += 4;
                 do_m68k_semihosting(env, env->dregs[0]);
                 return;
@@ -151,44 +145,34 @@
     /* ??? This could cause MMU faults.  */
     sp &= ~3;
     sp -= 4;
-    stl_kernel(sp, retaddr);
+    cpu_stl_kernel(env, sp, retaddr);
     sp -= 4;
-    stl_kernel(sp, fmt);
+    cpu_stl_kernel(env, sp, fmt);
     env->aregs[7] = sp;
     /* Jump to vector.  */
-    env->pc = ldl_kernel(env->vbr + vector);
+    env->pc = cpu_ldl_kernel(env, env->vbr + vector);
 }
 
-void do_interrupt(CPUM68KState *env1)
+void do_interrupt(CPUM68KState *env)
 {
-    CPUM68KState *saved_env;
-
-    saved_env = env;
-    env = env1;
-    do_interrupt_all(0);
-    env = saved_env;
+    do_interrupt_all(env, 0);
 }
 
-void do_interrupt_m68k_hardirq(CPUM68KState *env1)
+void do_interrupt_m68k_hardirq(CPUM68KState *env)
 {
-    CPUM68KState *saved_env;
-
-    saved_env = env;
-    env = env1;
-    do_interrupt_all(1);
-    env = saved_env;
+    do_interrupt_all(env, 1);
 }
 #endif
 
-static void raise_exception(int tt)
+static void raise_exception(CPUM68KState *env, int tt)
 {
     env->exception_index = tt;
     cpu_loop_exit(env);
 }
 
-void HELPER(raise_exception)(uint32_t tt)
+void HELPER(raise_exception)(CPUM68KState *env, uint32_t tt)
 {
-    raise_exception(tt);
+    raise_exception(env, tt);
 }
 
 void HELPER(divu)(CPUM68KState *env, uint32_t word)
@@ -202,14 +186,12 @@
     num = env->div1;
     den = env->div2;
     /* ??? This needs to make sure the throwing location is accurate.  */
-    if (den == 0)
-        raise_exception(EXCP_DIV0);
+    if (den == 0) {
+        raise_exception(env, EXCP_DIV0);
+    }
     quot = num / den;
     rem = num % den;
     flags = 0;
-    /* Avoid using a PARAM1 of zero.  This breaks dyngen because it uses
-       the address of a symbol, and gcc knows symbols can't have address
-       zero.  */
     if (word && quot > 0xffff)
         flags |= CCF_V;
     if (quot == 0)
@@ -231,8 +213,9 @@
 
     num = env->div1;
     den = env->div2;
-    if (den == 0)
-        raise_exception(EXCP_DIV0);
+    if (den == 0) {
+        raise_exception(env, EXCP_DIV0);
+    }
     quot = num / den;
     rem = num % den;
     flags = 0;
diff --git a/target-m68k/translate.c b/target-m68k/translate.c
index 9fc1e31..fb707f2 100644
--- a/target-m68k/translate.c
+++ b/target-m68k/translate.c
@@ -150,18 +150,24 @@
 #define OS_SINGLE 4
 #define OS_DOUBLE 5
 
-typedef void (*disas_proc)(DisasContext *, uint16_t);
+typedef void (*disas_proc)(CPUM68KState *env, DisasContext *s, uint16_t insn);
 
 #ifdef DEBUG_DISPATCH
-#define DISAS_INSN(name) \
-  static void real_disas_##name (DisasContext *s, uint16_t insn); \
-  static void disas_##name (DisasContext *s, uint16_t insn) { \
-    qemu_log("Dispatch " #name "\n"); \
-    real_disas_##name(s, insn); } \
-  static void real_disas_##name (DisasContext *s, uint16_t insn)
+#define DISAS_INSN(name)                                                \
+    static void real_disas_##name(CPUM68KState *env, DisasContext *s,   \
+                                  uint16_t insn);                       \
+    static void disas_##name(CPUM68KState *env, DisasContext *s,        \
+                             uint16_t insn)                             \
+    {                                                                   \
+        qemu_log("Dispatch " #name "\n");                               \
+        real_disas_##name(s, env, insn);                                \
+    }                                                                   \
+    static void real_disas_##name(CPUM68KState *env, DisasContext *s,   \
+                                  uint16_t insn)
 #else
-#define DISAS_INSN(name) \
-  static void disas_##name (DisasContext *s, uint16_t insn)
+#define DISAS_INSN(name)                                                \
+    static void disas_##name(CPUM68KState *env, DisasContext *s,        \
+                             uint16_t insn)
 #endif
 
 /* Generate a load from the specified address.  Narrow values are
@@ -257,12 +263,12 @@
 }
 
 /* Read a 32-bit immediate constant.  */
-static inline uint32_t read_im32(DisasContext *s)
+static inline uint32_t read_im32(CPUM68KState *env, DisasContext *s)
 {
     uint32_t im;
-    im = ((uint32_t)lduw_code(s->pc)) << 16;
+    im = ((uint32_t)cpu_lduw_code(env, s->pc)) << 16;
     s->pc += 2;
-    im |= lduw_code(s->pc);
+    im |= cpu_lduw_code(env, s->pc);
     s->pc += 2;
     return im;
 }
@@ -288,7 +294,8 @@
 
 /* Handle a base + index + displacement effective addresss.
    A NULL_QREG base means pc-relative.  */
-static TCGv gen_lea_indexed(DisasContext *s, int opsize, TCGv base)
+static TCGv gen_lea_indexed(CPUM68KState *env, DisasContext *s, int opsize,
+                            TCGv base)
 {
     uint32_t offset;
     uint16_t ext;
@@ -297,7 +304,7 @@
     uint32_t bd, od;
 
     offset = s->pc;
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
     if ((ext & 0x800) == 0 && !m68k_feature(s->env, M68K_FEATURE_WORD_INDEX))
@@ -311,10 +318,10 @@
         if ((ext & 0x30) > 0x10) {
             /* base displacement */
             if ((ext & 0x30) == 0x20) {
-                bd = (int16_t)lduw_code(s->pc);
+                bd = (int16_t)cpu_lduw_code(env, s->pc);
                 s->pc += 2;
             } else {
-                bd = read_im32(s);
+                bd = read_im32(env, s);
             }
         } else {
             bd = 0;
@@ -360,10 +367,10 @@
             if ((ext & 3) > 1) {
                 /* outer displacement */
                 if ((ext & 3) == 2) {
-                    od = (int16_t)lduw_code(s->pc);
+                    od = (int16_t)cpu_lduw_code(env, s->pc);
                     s->pc += 2;
                 } else {
-                    od = read_im32(s);
+                    od = read_im32(env, s);
                 }
             } else {
                 od = 0;
@@ -492,7 +499,8 @@
 
 /* Generate code for an "effective address".  Does not adjust the base
    register for autoincrement addressing modes.  */
-static TCGv gen_lea(DisasContext *s, uint16_t insn, int opsize)
+static TCGv gen_lea(CPUM68KState *env, DisasContext *s, uint16_t insn,
+                    int opsize)
 {
     TCGv reg;
     TCGv tmp;
@@ -514,29 +522,29 @@
     case 5: /* Indirect displacement.  */
         reg = AREG(insn, 0);
         tmp = tcg_temp_new();
-        ext = lduw_code(s->pc);
+        ext = cpu_lduw_code(env, s->pc);
         s->pc += 2;
         tcg_gen_addi_i32(tmp, reg, (int16_t)ext);
         return tmp;
     case 6: /* Indirect index + displacement.  */
         reg = AREG(insn, 0);
-        return gen_lea_indexed(s, opsize, reg);
+        return gen_lea_indexed(env, s, opsize, reg);
     case 7: /* Other */
         switch (insn & 7) {
         case 0: /* Absolute short.  */
-            offset = ldsw_code(s->pc);
+            offset = cpu_ldsw_code(env, s->pc);
             s->pc += 2;
             return tcg_const_i32(offset);
         case 1: /* Absolute long.  */
-            offset = read_im32(s);
+            offset = read_im32(env, s);
             return tcg_const_i32(offset);
         case 2: /* pc displacement  */
             offset = s->pc;
-            offset += ldsw_code(s->pc);
+            offset += cpu_ldsw_code(env, s->pc);
             s->pc += 2;
             return tcg_const_i32(offset);
         case 3: /* pc index+displacement.  */
-            return gen_lea_indexed(s, opsize, NULL_QREG);
+            return gen_lea_indexed(env, s, opsize, NULL_QREG);
         case 4: /* Immediate.  */
         default:
             return NULL_QREG;
@@ -548,15 +556,16 @@
 
 /* Helper function for gen_ea. Reuse the computed address between the
    for read/write operands.  */
-static inline TCGv gen_ea_once(DisasContext *s, uint16_t insn, int opsize,
-                              TCGv val, TCGv *addrp, ea_what what)
+static inline TCGv gen_ea_once(CPUM68KState *env, DisasContext *s,
+                               uint16_t insn, int opsize, TCGv val,
+                               TCGv *addrp, ea_what what)
 {
     TCGv tmp;
 
     if (addrp && what == EA_STORE) {
         tmp = *addrp;
     } else {
-        tmp = gen_lea(s, insn, opsize);
+        tmp = gen_lea(env, s, insn, opsize);
         if (IS_NULL_QREG(tmp))
             return tmp;
         if (addrp)
@@ -568,8 +577,8 @@
 /* Generate code to load/store a value ito/from an EA.  If VAL > 0 this is
    a write otherwise it is a read (0 == sign extend, -1 == zero extend).
    ADDRP is non-null for readwrite operands.  */
-static TCGv gen_ea(DisasContext *s, uint16_t insn, int opsize, TCGv val,
-                   TCGv *addrp, ea_what what)
+static TCGv gen_ea(CPUM68KState *env, DisasContext *s, uint16_t insn,
+                   int opsize, TCGv val, TCGv *addrp, ea_what what)
 {
     TCGv reg;
     TCGv result;
@@ -609,7 +618,7 @@
             if (addrp && what == EA_STORE) {
                 tmp = *addrp;
             } else {
-                tmp = gen_lea(s, insn, opsize);
+                tmp = gen_lea(env, s, insn, opsize);
                 if (IS_NULL_QREG(tmp))
                     return tmp;
                 if (addrp)
@@ -626,33 +635,35 @@
         return result;
     case 5: /* Indirect displacement.  */
     case 6: /* Indirect index + displacement.  */
-        return gen_ea_once(s, insn, opsize, val, addrp, what);
+        return gen_ea_once(env, s, insn, opsize, val, addrp, what);
     case 7: /* Other */
         switch (insn & 7) {
         case 0: /* Absolute short.  */
         case 1: /* Absolute long.  */
         case 2: /* pc displacement  */
         case 3: /* pc index+displacement.  */
-            return gen_ea_once(s, insn, opsize, val, addrp, what);
+            return gen_ea_once(env, s, insn, opsize, val, addrp, what);
         case 4: /* Immediate.  */
             /* Sign extend values for consistency.  */
             switch (opsize) {
             case OS_BYTE:
-                if (what == EA_LOADS)
-                    offset = ldsb_code(s->pc + 1);
-                else
-                    offset = ldub_code(s->pc + 1);
+                if (what == EA_LOADS) {
+                    offset = cpu_ldsb_code(env, s->pc + 1);
+                } else {
+                    offset = cpu_ldub_code(env, s->pc + 1);
+                }
                 s->pc += 2;
                 break;
             case OS_WORD:
-                if (what == EA_LOADS)
-                    offset = ldsw_code(s->pc);
-                else
-                    offset = lduw_code(s->pc);
+                if (what == EA_LOADS) {
+                    offset = cpu_ldsw_code(env, s->pc);
+                } else {
+                    offset = cpu_lduw_code(env, s->pc);
+                }
                 s->pc += 2;
                 break;
             case OS_LONG:
-                offset = read_im32(s);
+                offset = read_im32(env, s);
                 break;
             default:
                 qemu_assert(0, "Bad immediate operand");
@@ -815,7 +826,7 @@
 {
     gen_flush_cc_op(s);
     gen_jmp_im(s, where);
-    gen_helper_raise_exception(tcg_const_i32(nr));
+    gen_helper_raise_exception(cpu_env, tcg_const_i32(nr));
 }
 
 static inline void gen_addr_fault(DisasContext *s)
@@ -823,20 +834,21 @@
     gen_exception(s, s->insn_pc, EXCP_ADDRESS);
 }
 
-#define SRC_EA(result, opsize, op_sign, addrp) do { \
-    result = gen_ea(s, insn, opsize, NULL_QREG, addrp, op_sign ? EA_LOADS : EA_LOADU); \
-    if (IS_NULL_QREG(result)) { \
-        gen_addr_fault(s); \
-        return; \
-    } \
+#define SRC_EA(env, result, opsize, op_sign, addrp) do {                \
+        result = gen_ea(env, s, insn, opsize, NULL_QREG, addrp,         \
+                        op_sign ? EA_LOADS : EA_LOADU);                 \
+        if (IS_NULL_QREG(result)) {                                     \
+            gen_addr_fault(s);                                          \
+            return;                                                     \
+        }                                                               \
     } while (0)
 
-#define DEST_EA(insn, opsize, val, addrp) do { \
-    TCGv ea_result = gen_ea(s, insn, opsize, val, addrp, EA_STORE); \
-    if (IS_NULL_QREG(ea_result)) { \
-        gen_addr_fault(s); \
-        return; \
-    } \
+#define DEST_EA(env, insn, opsize, val, addrp) do {                     \
+        TCGv ea_result = gen_ea(env, s, insn, opsize, val, addrp, EA_STORE); \
+        if (IS_NULL_QREG(ea_result)) {                                  \
+            gen_addr_fault(s);                                          \
+            return;                                                     \
+        }                                                               \
     } while (0)
 
 /* Generate a jump to an immediate address.  */
@@ -872,8 +884,7 @@
 DISAS_INSN(undef)
 {
     gen_exception(s, s->pc - 2, EXCP_UNSUPPORTED);
-    cpu_abort(cpu_single_env, "Illegal instruction: %04x @ %08x",
-              insn, s->pc - 2);
+    cpu_abort(env, "Illegal instruction: %04x @ %08x", insn, s->pc - 2);
 }
 
 DISAS_INSN(mulw)
@@ -890,7 +901,7 @@
         tcg_gen_ext16s_i32(tmp, reg);
     else
         tcg_gen_ext16u_i32(tmp, reg);
-    SRC_EA(src, OS_WORD, sign, NULL);
+    SRC_EA(env, src, OS_WORD, sign, NULL);
     tcg_gen_mul_i32(tmp, tmp, src);
     tcg_gen_mov_i32(reg, tmp);
     /* Unlike m68k, coldfire always clears the overflow bit.  */
@@ -911,7 +922,7 @@
     } else {
         tcg_gen_ext16u_i32(QREG_DIV1, reg);
     }
-    SRC_EA(src, OS_WORD, sign, NULL);
+    SRC_EA(env, src, OS_WORD, sign, NULL);
     tcg_gen_mov_i32(QREG_DIV2, src);
     if (sign) {
         gen_helper_divs(cpu_env, tcg_const_i32(1));
@@ -934,7 +945,7 @@
     TCGv reg;
     uint16_t ext;
 
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     if (ext & 0x87f8) {
         gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
@@ -943,7 +954,7 @@
     num = DREG(ext, 12);
     reg = DREG(ext, 0);
     tcg_gen_mov_i32(QREG_DIV1, num);
-    SRC_EA(den, OS_LONG, 0, NULL);
+    SRC_EA(env, den, OS_LONG, 0, NULL);
     tcg_gen_mov_i32(QREG_DIV2, den);
     if (ext & 0x0800) {
         gen_helper_divs(cpu_env, tcg_const_i32(0));
@@ -973,11 +984,11 @@
     reg = DREG(insn, 9);
     dest = tcg_temp_new();
     if (insn & 0x100) {
-        SRC_EA(tmp, OS_LONG, 0, &addr);
+        SRC_EA(env, tmp, OS_LONG, 0, &addr);
         src = reg;
     } else {
         tmp = reg;
-        SRC_EA(src, OS_LONG, 0, NULL);
+        SRC_EA(env, src, OS_LONG, 0, NULL);
     }
     if (add) {
         tcg_gen_add_i32(dest, tmp, src);
@@ -990,7 +1001,7 @@
     }
     gen_update_cc_add(dest, src);
     if (insn & 0x100) {
-        DEST_EA(insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, OS_LONG, dest, &addr);
     } else {
         tcg_gen_mov_i32(reg, dest);
     }
@@ -1020,7 +1031,7 @@
     else
         opsize = OS_LONG;
     op = (insn >> 6) & 3;
-    SRC_EA(src1, opsize, 0, op ? &addr: NULL);
+    SRC_EA(env, src1, opsize, 0, op ? &addr: NULL);
     src2 = DREG(insn, 9);
     dest = tcg_temp_new();
 
@@ -1055,7 +1066,7 @@
         break;
     }
     if (op)
-        DEST_EA(insn, opsize, dest, &addr);
+        DEST_EA(env, insn, opsize, dest, &addr);
 }
 
 DISAS_INSN(sats)
@@ -1086,9 +1097,9 @@
     TCGv tmp;
     int is_load;
 
-    mask = lduw_code(s->pc);
+    mask = cpu_lduw_code(env, s->pc);
     s->pc += 2;
-    tmp = gen_lea(s, insn, OS_LONG);
+    tmp = gen_lea(env, s, insn, OS_LONG);
     if (IS_NULL_QREG(tmp)) {
         gen_addr_fault(s);
         return;
@@ -1130,14 +1141,14 @@
         opsize = OS_LONG;
     op = (insn >> 6) & 3;
 
-    bitnum = lduw_code(s->pc);
+    bitnum = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     if (bitnum & 0xff00) {
-        disas_undef(s, insn);
+        disas_undef(env, s, insn);
         return;
     }
 
-    SRC_EA(src1, opsize, 0, op ? &addr: NULL);
+    SRC_EA(env, src1, opsize, 0, op ? &addr: NULL);
 
     gen_flush_flags(s);
     if (opsize == OS_BYTE)
@@ -1172,7 +1183,7 @@
         default: /* btst */
             break;
         }
-        DEST_EA(insn, opsize, tmp, &addr);
+        DEST_EA(env, insn, opsize, tmp, &addr);
     }
 }
 
@@ -1185,8 +1196,8 @@
     TCGv addr;
 
     op = (insn >> 9) & 7;
-    SRC_EA(src1, OS_LONG, 0, (op == 6) ? NULL : &addr);
-    im = read_im32(s);
+    SRC_EA(env, src1, OS_LONG, 0, (op == 6) ? NULL : &addr);
+    im = read_im32(env, s);
     dest = tcg_temp_new();
     switch (op) {
     case 0: /* ori */
@@ -1225,7 +1236,7 @@
         abort();
     }
     if (op != 6) {
-        DEST_EA(insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, OS_LONG, dest, &addr);
     }
 }
 
@@ -1257,7 +1268,7 @@
     default:
         abort();
     }
-    SRC_EA(src, opsize, 1, NULL);
+    SRC_EA(env, src, opsize, 1, NULL);
     op = (insn >> 6) & 7;
     if (op == 1) {
         /* movea */
@@ -1268,7 +1279,7 @@
         /* normal move */
         uint16_t dest_ea;
         dest_ea = ((insn >> 9) & 7) | (op << 3);
-        DEST_EA(dest_ea, opsize, src, NULL);
+        DEST_EA(env, dest_ea, opsize, src, NULL);
         /* This will be correct because loads sign extend.  */
         gen_logic_cc(s, src);
     }
@@ -1289,7 +1300,7 @@
     TCGv tmp;
 
     reg = AREG(insn, 9);
-    tmp = gen_lea(s, insn, OS_LONG);
+    tmp = gen_lea(env, s, insn, OS_LONG);
     if (IS_NULL_QREG(tmp)) {
         gen_addr_fault(s);
         return;
@@ -1314,7 +1325,7 @@
     default:
         abort();
     }
-    DEST_EA(insn, opsize, tcg_const_i32(0), NULL);
+    DEST_EA(env, insn, opsize, tcg_const_i32(0), NULL);
     gen_logic_cc(s, tcg_const_i32(0));
 }
 
@@ -1363,7 +1374,8 @@
     }
 }
 
-static void gen_set_sr(DisasContext *s, uint16_t insn, int ccr_only)
+static void gen_set_sr(CPUM68KState *env, DisasContext *s, uint16_t insn,
+                       int ccr_only)
 {
     TCGv tmp;
     TCGv reg;
@@ -1383,17 +1395,17 @@
     else if ((insn & 0x3f) == 0x3c)
       {
         uint16_t val;
-        val = lduw_code(s->pc);
+        val = cpu_lduw_code(env, s->pc);
         s->pc += 2;
         gen_set_sr_im(s, val, ccr_only);
       }
     else
-        disas_undef(s, insn);
+        disas_undef(env, s, insn);
 }
 
 DISAS_INSN(move_to_ccr)
 {
-    gen_set_sr(s, insn, 1);
+    gen_set_sr(env, s, insn, 1);
 }
 
 DISAS_INSN(not)
@@ -1424,7 +1436,7 @@
 {
     TCGv tmp;
 
-    tmp = gen_lea(s, insn, OS_LONG);
+    tmp = gen_lea(env, s, insn, OS_LONG);
     if (IS_NULL_QREG(tmp)) {
         gen_addr_fault(s);
         return;
@@ -1470,7 +1482,7 @@
     default:
         abort();
     }
-    SRC_EA(tmp, opsize, 1, NULL);
+    SRC_EA(env, tmp, opsize, 1, NULL);
     gen_logic_cc(s, tmp);
 }
 
@@ -1492,10 +1504,10 @@
     TCGv addr;
 
     dest = tcg_temp_new();
-    SRC_EA(src1, OS_BYTE, 1, &addr);
+    SRC_EA(env, src1, OS_BYTE, 1, &addr);
     gen_logic_cc(s, src1);
     tcg_gen_ori_i32(dest, src1, 0x80);
-    DEST_EA(insn, OS_BYTE, dest, &addr);
+    DEST_EA(env, insn, OS_BYTE, dest, &addr);
 }
 
 DISAS_INSN(mull)
@@ -1507,14 +1519,14 @@
 
     /* The upper 32 bits of the product are discarded, so
        muls.l and mulu.l are functionally equivalent.  */
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     if (ext & 0x87ff) {
         gen_exception(s, s->pc - 4, EXCP_UNSUPPORTED);
         return;
     }
     reg = DREG(ext, 12);
-    SRC_EA(src1, OS_LONG, 0, NULL);
+    SRC_EA(env, src1, OS_LONG, 0, NULL);
     dest = tcg_temp_new();
     tcg_gen_mul_i32(dest, src1, reg);
     tcg_gen_mov_i32(reg, dest);
@@ -1528,7 +1540,7 @@
     TCGv reg;
     TCGv tmp;
 
-    offset = ldsw_code(s->pc);
+    offset = cpu_ldsw_code(env, s->pc);
     s->pc += 2;
     reg = AREG(insn, 0);
     tmp = tcg_temp_new();
@@ -1572,7 +1584,7 @@
 
     /* Load the target address first to ensure correct exception
        behavior.  */
-    tmp = gen_lea(s, insn, OS_LONG);
+    tmp = gen_lea(env, s, insn, OS_LONG);
     if (IS_NULL_QREG(tmp)) {
         gen_addr_fault(s);
         return;
@@ -1592,7 +1604,7 @@
     int val;
     TCGv addr;
 
-    SRC_EA(src1, OS_LONG, 0, &addr);
+    SRC_EA(env, src1, OS_LONG, 0, &addr);
     val = (insn >> 9) & 7;
     if (val == 0)
         val = 8;
@@ -1619,7 +1631,7 @@
         }
         gen_update_cc_add(dest, src2);
     }
-    DEST_EA(insn, OS_LONG, dest, &addr);
+    DEST_EA(env, insn, OS_LONG, dest, &addr);
 }
 
 DISAS_INSN(tpf)
@@ -1634,7 +1646,7 @@
     case 4: /* No extension words.  */
         break;
     default:
-        disas_undef(s, insn);
+        disas_undef(env, s, insn);
     }
 }
 
@@ -1649,10 +1661,10 @@
     op = (insn >> 8) & 0xf;
     offset = (int8_t)insn;
     if (offset == 0) {
-        offset = ldsw_code(s->pc);
+        offset = cpu_ldsw_code(env, s->pc);
         s->pc += 2;
     } else if (offset == -1) {
-        offset = read_im32(s);
+        offset = read_im32(env, s);
     }
     if (op == 1) {
         /* bsr */
@@ -1691,7 +1703,7 @@
         opsize = OS_WORD;
     else
         opsize = OS_BYTE;
-    SRC_EA(src, opsize, (insn & 0x80) == 0, NULL);
+    SRC_EA(env, src, opsize, (insn & 0x80) == 0, NULL);
     reg = DREG(insn, 9);
     tcg_gen_mov_i32(reg, src);
     gen_logic_cc(s, src);
@@ -1707,11 +1719,11 @@
     reg = DREG(insn, 9);
     dest = tcg_temp_new();
     if (insn & 0x100) {
-        SRC_EA(src, OS_LONG, 0, &addr);
+        SRC_EA(env, src, OS_LONG, 0, &addr);
         tcg_gen_or_i32(dest, src, reg);
-        DEST_EA(insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, OS_LONG, dest, &addr);
     } else {
-        SRC_EA(src, OS_LONG, 0, NULL);
+        SRC_EA(env, src, OS_LONG, 0, NULL);
         tcg_gen_or_i32(dest, src, reg);
         tcg_gen_mov_i32(reg, dest);
     }
@@ -1723,7 +1735,7 @@
     TCGv src;
     TCGv reg;
 
-    SRC_EA(src, OS_LONG, 0, NULL);
+    SRC_EA(env, src, OS_LONG, 0, NULL);
     reg = AREG(insn, 9);
     tcg_gen_sub_i32(reg, reg, src);
 }
@@ -1749,7 +1761,7 @@
         val = -1;
     src = tcg_const_i32(val);
     gen_logic_cc(s, src);
-    DEST_EA(insn, OS_LONG, src, NULL);
+    DEST_EA(env, insn, OS_LONG, src, NULL);
 }
 
 DISAS_INSN(cmp)
@@ -1777,7 +1789,7 @@
     default:
         abort();
     }
-    SRC_EA(src, opsize, 1, NULL);
+    SRC_EA(env, src, opsize, 1, NULL);
     reg = DREG(insn, 9);
     dest = tcg_temp_new();
     tcg_gen_sub_i32(dest, reg, src);
@@ -1796,7 +1808,7 @@
     } else {
         opsize = OS_WORD;
     }
-    SRC_EA(src, opsize, 1, NULL);
+    SRC_EA(env, src, opsize, 1, NULL);
     reg = AREG(insn, 9);
     dest = tcg_temp_new();
     tcg_gen_sub_i32(dest, reg, src);
@@ -1811,12 +1823,12 @@
     TCGv dest;
     TCGv addr;
 
-    SRC_EA(src, OS_LONG, 0, &addr);
+    SRC_EA(env, src, OS_LONG, 0, &addr);
     reg = DREG(insn, 9);
     dest = tcg_temp_new();
     tcg_gen_xor_i32(dest, src, reg);
     gen_logic_cc(s, dest);
-    DEST_EA(insn, OS_LONG, dest, &addr);
+    DEST_EA(env, insn, OS_LONG, dest, &addr);
 }
 
 DISAS_INSN(and)
@@ -1829,11 +1841,11 @@
     reg = DREG(insn, 9);
     dest = tcg_temp_new();
     if (insn & 0x100) {
-        SRC_EA(src, OS_LONG, 0, &addr);
+        SRC_EA(env, src, OS_LONG, 0, &addr);
         tcg_gen_and_i32(dest, src, reg);
-        DEST_EA(insn, OS_LONG, dest, &addr);
+        DEST_EA(env, insn, OS_LONG, dest, &addr);
     } else {
-        SRC_EA(src, OS_LONG, 0, NULL);
+        SRC_EA(env, src, OS_LONG, 0, NULL);
         tcg_gen_and_i32(dest, src, reg);
         tcg_gen_mov_i32(reg, dest);
     }
@@ -1845,7 +1857,7 @@
     TCGv src;
     TCGv reg;
 
-    SRC_EA(src, OS_LONG, 0, NULL);
+    SRC_EA(env, src, OS_LONG, 0, NULL);
     reg = AREG(insn, 9);
     tcg_gen_add_i32(reg, reg, src);
 }
@@ -1934,13 +1946,13 @@
     uint32_t addr;
 
     addr = s->pc - 2;
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     if (ext != 0x46FC) {
         gen_exception(s, addr, EXCP_UNSUPPORTED);
         return;
     }
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     if (IS_USER(s) || (ext & SR_S) == 0) {
         gen_exception(s, addr, EXCP_PRIVILEGE);
@@ -1970,7 +1982,7 @@
         gen_exception(s, s->pc - 2, EXCP_PRIVILEGE);
         return;
     }
-    gen_set_sr(s, insn, 0);
+    gen_set_sr(env, s, insn, 0);
     gen_lookup_tb(s);
 }
 
@@ -2008,7 +2020,7 @@
         return;
     }
 
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
     gen_set_sr_im(s, ext, 0);
@@ -2035,7 +2047,7 @@
         return;
     }
 
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
     if (ext & 0x8000) {
@@ -2100,7 +2112,7 @@
     int set_dest;
     int opsize;
 
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
     opmode = ext & 0x7f;
     switch ((ext >> 13) & 7) {
@@ -2136,7 +2148,7 @@
                 tcg_gen_addi_i32(tmp32, tmp32, -8);
                 break;
             case 5:
-                offset = ldsw_code(s->pc);
+                offset = cpu_ldsw_code(env, s->pc);
                 s->pc += 2;
                 tcg_gen_addi_i32(tmp32, tmp32, offset);
                 break;
@@ -2162,7 +2174,7 @@
         default:
             goto undef;
         }
-        DEST_EA(insn, opsize, tmp32, NULL);
+        DEST_EA(env, insn, opsize, tmp32, NULL);
         tcg_temp_free_i32(tmp32);
         return;
     case 4: /* fmove to control register.  */
@@ -2190,7 +2202,7 @@
                       (ext >> 10) & 7);
             goto undef;
         }
-        DEST_EA(insn, OS_LONG, tmp32, NULL);
+        DEST_EA(env, insn, OS_LONG, tmp32, NULL);
         break;
     case 6: /* fmovem */
     case 7:
@@ -2200,7 +2212,7 @@
             int i;
             if ((ext & 0x1f00) != 0x1000 || (ext & 0xff) == 0)
                 goto undef;
-            tmp32 = gen_lea(s, insn, OS_LONG);
+            tmp32 = gen_lea(env, s, insn, OS_LONG);
             if (IS_NULL_QREG(tmp32)) {
                 gen_addr_fault(s);
                 return;
@@ -2250,12 +2262,12 @@
                 tcg_gen_addi_i32(tmp32, tmp32, -8);
                 break;
             case 5:
-                offset = ldsw_code(s->pc);
+                offset = cpu_ldsw_code(env, s->pc);
                 s->pc += 2;
                 tcg_gen_addi_i32(tmp32, tmp32, offset);
                 break;
             case 7:
-                offset = ldsw_code(s->pc);
+                offset = cpu_ldsw_code(env, s->pc);
                 offset += s->pc - 2;
                 s->pc += 2;
                 tcg_gen_addi_i32(tmp32, tmp32, offset);
@@ -2275,7 +2287,7 @@
             }
             tcg_temp_free_i32(tmp32);
         } else {
-            SRC_EA(tmp32, opsize, 1, NULL);
+            SRC_EA(env, tmp32, opsize, 1, NULL);
             src = tcg_temp_new_i64();
             switch (opsize) {
             case OS_LONG:
@@ -2370,7 +2382,7 @@
 undef:
     /* FIXME: Is this right for offset addressing modes?  */
     s->pc -= 2;
-    disas_undef_fpu(s, insn);
+    disas_undef_fpu(env, s, insn);
 }
 
 DISAS_INSN(fbcc)
@@ -2381,10 +2393,10 @@
     int l1;
 
     addr = s->pc;
-    offset = ldsw_code(s->pc);
+    offset = cpu_ldsw_code(env, s->pc);
     s->pc += 2;
     if (insn & (1 << 6)) {
-        offset = (offset << 16) | lduw_code(s->pc);
+        offset = (offset << 16) | cpu_lduw_code(env, s->pc);
         s->pc += 2;
     }
 
@@ -2506,18 +2518,18 @@
         s->done_mac = 1;
     }
 
-    ext = lduw_code(s->pc);
+    ext = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
     acc = ((insn >> 7) & 1) | ((ext >> 3) & 2);
     dual = ((insn & 0x30) != 0 && (ext & 3) != 0);
     if (dual && !m68k_feature(s->env, M68K_FEATURE_CF_EMAC_B)) {
-        disas_undef(s, insn);
+        disas_undef(env, s, insn);
         return;
     }
     if (insn & 0x30) {
         /* MAC with load.  */
-        tmp = gen_lea(s, insn, OS_LONG);
+        tmp = gen_lea(env, s, insn, OS_LONG);
         addr = tcg_temp_new();
         tcg_gen_and_i32(addr, tmp, QREG_MAC_MASK);
         /* Load the value now to ensure correct exception behavior.
@@ -2731,7 +2743,7 @@
     int accnum;
     accnum = (insn >> 9) & 3;
     acc = MACREG(accnum);
-    SRC_EA(val, OS_LONG, 0, NULL);
+    SRC_EA(env, val, OS_LONG, 0, NULL);
     if (s->env->macsr & MACSR_FI) {
         tcg_gen_ext_i32_i64(acc, val);
         tcg_gen_shli_i64(acc, acc, 8);
@@ -2748,7 +2760,7 @@
 DISAS_INSN(to_macsr)
 {
     TCGv val;
-    SRC_EA(val, OS_LONG, 0, NULL);
+    SRC_EA(env, val, OS_LONG, 0, NULL);
     gen_helper_set_macsr(cpu_env, val);
     gen_lookup_tb(s);
 }
@@ -2756,7 +2768,7 @@
 DISAS_INSN(to_mask)
 {
     TCGv val;
-    SRC_EA(val, OS_LONG, 0, NULL);
+    SRC_EA(env, val, OS_LONG, 0, NULL);
     tcg_gen_ori_i32(QREG_MAC_MASK, val, 0xffff0000);
 }
 
@@ -2764,7 +2776,7 @@
 {
     TCGv val;
     TCGv acc;
-    SRC_EA(val, OS_LONG, 0, NULL);
+    SRC_EA(env, val, OS_LONG, 0, NULL);
     acc = tcg_const_i32((insn & 0x400) ? 2 : 0);
     if (s->env->macsr & MACSR_FI)
         gen_helper_set_mac_extf(cpu_env, val, acc);
@@ -2941,10 +2953,10 @@
 {
     uint16_t insn;
 
-    insn = lduw_code(s->pc);
+    insn = cpu_lduw_code(env, s->pc);
     s->pc += 2;
 
-    opcode_table[insn](s, insn);
+    opcode_table[insn](env, s, insn);
 }
 
 /* generate intermediate code for basic block 'tb'.  */
@@ -3028,7 +3040,7 @@
             gen_flush_cc_op(dc);
             tcg_gen_movi_i32(QREG_PC, dc->pc);
         }
-        gen_helper_raise_exception(tcg_const_i32(EXCP_DEBUG));
+        gen_helper_raise_exception(cpu_env, tcg_const_i32(EXCP_DEBUG));
     } else {
         switch(dc->is_jmp) {
         case DISAS_NEXT:
diff --git a/target-microblaze/Makefile.objs b/target-microblaze/Makefile.objs
index 4b09e8c..afb87bc 100644
--- a/target-microblaze/Makefile.objs
+++ b/target-microblaze/Makefile.objs
@@ -1,4 +1,2 @@
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += mmu.o machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-microblaze/helper.h b/target-microblaze/helper.h
index 9dcfb0f..a1a732c 100644
--- a/target-microblaze/helper.h
+++ b/target-microblaze/helper.h
@@ -1,39 +1,39 @@
 #include "def-helper.h"
 
-DEF_HELPER_1(raise_exception, void, i32)
-DEF_HELPER_0(debug, void)
+DEF_HELPER_2(raise_exception, void, env, i32)
+DEF_HELPER_1(debug, void, env)
 DEF_HELPER_FLAGS_3(carry, TCG_CALL_PURE | TCG_CALL_CONST, i32, i32, i32, i32)
 DEF_HELPER_2(cmp, i32, i32, i32)
 DEF_HELPER_2(cmpu, i32, i32, i32)
 DEF_HELPER_FLAGS_1(clz, TCG_CALL_PURE | TCG_CALL_CONST, i32, i32)
 
-DEF_HELPER_2(divs, i32, i32, i32)
-DEF_HELPER_2(divu, i32, i32, i32)
+DEF_HELPER_3(divs, i32, env, i32, i32)
+DEF_HELPER_3(divu, i32, env, i32, i32)
 
-DEF_HELPER_2(fadd, i32, i32, i32)
-DEF_HELPER_2(frsub, i32, i32, i32)
-DEF_HELPER_2(fmul, i32, i32, i32)
-DEF_HELPER_2(fdiv, i32, i32, i32)
-DEF_HELPER_1(flt, i32, i32)
-DEF_HELPER_1(fint, i32, i32)
-DEF_HELPER_1(fsqrt, i32, i32)
+DEF_HELPER_3(fadd, i32, env, i32, i32)
+DEF_HELPER_3(frsub, i32, env, i32, i32)
+DEF_HELPER_3(fmul, i32, env, i32, i32)
+DEF_HELPER_3(fdiv, i32, env, i32, i32)
+DEF_HELPER_2(flt, i32, env, i32)
+DEF_HELPER_2(fint, i32, env, i32)
+DEF_HELPER_2(fsqrt, i32, env, i32)
 
-DEF_HELPER_2(fcmp_un, i32, i32, i32)
-DEF_HELPER_2(fcmp_lt, i32, i32, i32)
-DEF_HELPER_2(fcmp_eq, i32, i32, i32)
-DEF_HELPER_2(fcmp_le, i32, i32, i32)
-DEF_HELPER_2(fcmp_gt, i32, i32, i32)
-DEF_HELPER_2(fcmp_ne, i32, i32, i32)
-DEF_HELPER_2(fcmp_ge, i32, i32, i32)
+DEF_HELPER_3(fcmp_un, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_lt, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_eq, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_le, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_gt, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_ne, i32, env, i32, i32)
+DEF_HELPER_3(fcmp_ge, i32, env, i32, i32)
 
 DEF_HELPER_FLAGS_2(pcmpbf, TCG_CALL_PURE | TCG_CALL_CONST, i32, i32, i32)
 #if !defined(CONFIG_USER_ONLY)
-DEF_HELPER_1(mmu_read, i32, i32)
-DEF_HELPER_2(mmu_write, void, i32, i32)
+DEF_HELPER_2(mmu_read, i32, env, i32)
+DEF_HELPER_3(mmu_write, void, env, i32, i32)
 #endif
 
-DEF_HELPER_4(memalign, void, i32, i32, i32, i32)
-DEF_HELPER_1(stackprot, void, i32)
+DEF_HELPER_5(memalign, void, env, i32, i32, i32, i32)
+DEF_HELPER_2(stackprot, void, env, i32)
 
 DEF_HELPER_2(get, i32, i32, i32)
 DEF_HELPER_3(put, void, i32, i32, i32)
diff --git a/target-microblaze/op_helper.c b/target-microblaze/op_helper.c
index 3b1f072..c9789f4 100644
--- a/target-microblaze/op_helper.c
+++ b/target-microblaze/op_helper.c
@@ -20,7 +20,6 @@
 
 #include <assert.h>
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helper.h"
 #include "host-utils.h"
 
@@ -42,17 +41,12 @@
 /* Try to fill the TLB and return an exception if error. If retaddr is
    NULL, it means that the function was called in C code (i.e. not
    from generated code or from helper.c) */
-/* XXX: fix it to restore all registers */
-void tlb_fill(CPUMBState *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUMBState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUMBState *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
-
     ret = cpu_mb_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
@@ -66,7 +60,6 @@
         }
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 #endif
 
@@ -105,13 +98,13 @@
     return 0xdead0000 | id;
 }
 
-void helper_raise_exception(uint32_t index)
+void helper_raise_exception(CPUMBState *env, uint32_t index)
 {
     env->exception_index = index;
     cpu_loop_exit(env);
 }
 
-void helper_debug(void)
+void helper_debug(CPUMBState *env)
 {
     int i;
 
@@ -176,7 +169,7 @@
     return ncf;
 }
 
-static inline int div_prepare(uint32_t a, uint32_t b)
+static inline int div_prepare(CPUMBState *env, uint32_t a, uint32_t b)
 {
     if (b == 0) {
         env->sregs[SR_MSR] |= MSR_DZ;
@@ -184,7 +177,7 @@
         if ((env->sregs[SR_MSR] & MSR_EE)
             && !(env->pvr.regs[2] & PVR2_DIV_ZERO_EXC_MASK)) {
             env->sregs[SR_ESR] = ESR_EC_DIVZERO;
-            helper_raise_exception(EXCP_HW_EXCP);
+            helper_raise_exception(env, EXCP_HW_EXCP);
         }
         return 0;
     }
@@ -192,28 +185,30 @@
     return 1;
 }
 
-uint32_t helper_divs(uint32_t a, uint32_t b)
+uint32_t helper_divs(CPUMBState *env, uint32_t a, uint32_t b)
 {
-    if (!div_prepare(a, b))
+    if (!div_prepare(env, a, b)) {
         return 0;
+    }
     return (int32_t)a / (int32_t)b;
 }
 
-uint32_t helper_divu(uint32_t a, uint32_t b)
+uint32_t helper_divu(CPUMBState *env, uint32_t a, uint32_t b)
 {
-    if (!div_prepare(a, b))
+    if (!div_prepare(env, a, b)) {
         return 0;
+    }
     return a / b;
 }
 
 /* raise FPU exception.  */
-static void raise_fpu_exception(void)
+static void raise_fpu_exception(CPUMBState *env)
 {
     env->sregs[SR_ESR] = ESR_EC_FPU;
-    helper_raise_exception(EXCP_HW_EXCP);
+    helper_raise_exception(env, EXCP_HW_EXCP);
 }
 
-static void update_fpu_flags(int flags)
+static void update_fpu_flags(CPUMBState *env, int flags)
 {
     int raise = 0;
 
@@ -236,11 +231,11 @@
     if (raise
         && (env->pvr.regs[2] & PVR2_FPU_EXC_MASK)
         && (env->sregs[SR_MSR] & MSR_EE)) {
-        raise_fpu_exception();
+        raise_fpu_exception(env);
     }
 }
 
-uint32_t helper_fadd(uint32_t a, uint32_t b)
+uint32_t helper_fadd(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fd, fa, fb;
     int flags;
@@ -251,11 +246,11 @@
     fd.f = float32_add(fa.f, fb.f, &env->fp_status);
 
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
     return fd.l;
 }
 
-uint32_t helper_frsub(uint32_t a, uint32_t b)
+uint32_t helper_frsub(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fd, fa, fb;
     int flags;
@@ -265,11 +260,11 @@
     fb.l = b;
     fd.f = float32_sub(fb.f, fa.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
     return fd.l;
 }
 
-uint32_t helper_fmul(uint32_t a, uint32_t b)
+uint32_t helper_fmul(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fd, fa, fb;
     int flags;
@@ -279,12 +274,12 @@
     fb.l = b;
     fd.f = float32_mul(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
 
     return fd.l;
 }
 
-uint32_t helper_fdiv(uint32_t a, uint32_t b)
+uint32_t helper_fdiv(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fd, fa, fb;
     int flags;
@@ -294,12 +289,12 @@
     fb.l = b;
     fd.f = float32_div(fb.f, fa.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
 
     return fd.l;
 }
 
-uint32_t helper_fcmp_un(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_un(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     uint32_t r = 0;
@@ -308,7 +303,7 @@
     fb.l = b;
 
     if (float32_is_signaling_nan(fa.f) || float32_is_signaling_nan(fb.f)) {
-        update_fpu_flags(float_flag_invalid);
+        update_fpu_flags(env, float_flag_invalid);
         r = 1;
     }
 
@@ -319,7 +314,7 @@
     return r;
 }
 
-uint32_t helper_fcmp_lt(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_lt(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int r;
@@ -330,12 +325,12 @@
     fb.l = b;
     r = float32_lt(fb.f, fa.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
 
     return r;
 }
 
-uint32_t helper_fcmp_eq(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_eq(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int flags;
@@ -346,12 +341,12 @@
     fb.l = b;
     r = float32_eq_quiet(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
 
     return r;
 }
 
-uint32_t helper_fcmp_le(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_le(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int flags;
@@ -362,13 +357,13 @@
     set_float_exception_flags(0, &env->fp_status);
     r = float32_le(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
 
 
     return r;
 }
 
-uint32_t helper_fcmp_gt(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_gt(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int flags, r;
@@ -378,11 +373,11 @@
     set_float_exception_flags(0, &env->fp_status);
     r = float32_lt(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
     return r;
 }
 
-uint32_t helper_fcmp_ne(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_ne(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int flags, r;
@@ -392,12 +387,12 @@
     set_float_exception_flags(0, &env->fp_status);
     r = !float32_eq_quiet(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
 
     return r;
 }
 
-uint32_t helper_fcmp_ge(uint32_t a, uint32_t b)
+uint32_t helper_fcmp_ge(CPUMBState *env, uint32_t a, uint32_t b)
 {
     CPU_FloatU fa, fb;
     int flags, r;
@@ -407,12 +402,12 @@
     set_float_exception_flags(0, &env->fp_status);
     r = !float32_lt(fa.f, fb.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags & float_flag_invalid);
+    update_fpu_flags(env, flags & float_flag_invalid);
 
     return r;
 }
 
-uint32_t helper_flt(uint32_t a)
+uint32_t helper_flt(CPUMBState *env, uint32_t a)
 {
     CPU_FloatU fd, fa;
 
@@ -421,7 +416,7 @@
     return fd.l;
 }
 
-uint32_t helper_fint(uint32_t a)
+uint32_t helper_fint(CPUMBState *env, uint32_t a)
 {
     CPU_FloatU fa;
     uint32_t r;
@@ -431,12 +426,12 @@
     fa.l = a;
     r = float32_to_int32(fa.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
 
     return r;
 }
 
-uint32_t helper_fsqrt(uint32_t a)
+uint32_t helper_fsqrt(CPUMBState *env, uint32_t a)
 {
     CPU_FloatU fd, fa;
     int flags;
@@ -445,7 +440,7 @@
     fa.l = a;
     fd.l = float32_sqrt(fa.f, &env->fp_status);
     flags = get_float_exception_flags(&env->fp_status);
-    update_fpu_flags(flags);
+    update_fpu_flags(env, flags);
 
     return fd.l;
 }
@@ -463,7 +458,8 @@
     return 0;
 }
 
-void helper_memalign(uint32_t addr, uint32_t dr, uint32_t wr, uint32_t mask)
+void helper_memalign(CPUMBState *env, uint32_t addr, uint32_t dr, uint32_t wr,
+                     uint32_t mask)
 {
     if (addr & mask) {
             qemu_log_mask(CPU_LOG_INT,
@@ -478,45 +474,39 @@
             if (!(env->sregs[SR_MSR] & MSR_EE)) {
                 return;
             }
-            helper_raise_exception(EXCP_HW_EXCP);
+            helper_raise_exception(env, EXCP_HW_EXCP);
     }
 }
 
-void helper_stackprot(uint32_t addr)
+void helper_stackprot(CPUMBState *env, uint32_t addr)
 {
     if (addr < env->slr || addr > env->shr) {
             qemu_log("Stack protector violation at %x %x %x\n",
                      addr, env->slr, env->shr);
             env->sregs[SR_EAR] = addr;
             env->sregs[SR_ESR] = ESR_EC_STACKPROT;
-            helper_raise_exception(EXCP_HW_EXCP);
+            helper_raise_exception(env, EXCP_HW_EXCP);
     }
 }
 
 #if !defined(CONFIG_USER_ONLY)
 /* Writes/reads to the MMU's special regs end up here.  */
-uint32_t helper_mmu_read(uint32_t rn)
+uint32_t helper_mmu_read(CPUMBState *env, uint32_t rn)
 {
     return mmu_read(env, rn);
 }
 
-void helper_mmu_write(uint32_t rn, uint32_t v)
+void helper_mmu_write(CPUMBState *env, uint32_t rn, uint32_t v)
 {
     mmu_write(env, rn, v);
 }
 
-void cpu_unassigned_access(CPUMBState *env1, target_phys_addr_t addr,
+void cpu_unassigned_access(CPUMBState *env, target_phys_addr_t addr,
                            int is_write, int is_exec, int is_asi, int size)
 {
-    CPUMBState *saved_env;
-
-    saved_env = env;
-    env = env1;
-
     qemu_log_mask(CPU_LOG_INT, "Unassigned " TARGET_FMT_plx " wr=%d exe=%d\n",
              addr, is_write, is_exec);
     if (!(env->sregs[SR_MSR] & MSR_EE)) {
-        env = saved_env;
         return;
     }
 
@@ -524,14 +514,13 @@
     if (is_exec) {
         if ((env->pvr.regs[2] & PVR2_IOPB_BUS_EXC_MASK)) {
             env->sregs[SR_ESR] = ESR_EC_INSN_BUS;
-            helper_raise_exception(EXCP_HW_EXCP);
+            helper_raise_exception(env, EXCP_HW_EXCP);
         }
     } else {
         if ((env->pvr.regs[2] & PVR2_DOPB_BUS_EXC_MASK)) {
             env->sregs[SR_ESR] = ESR_EC_DATA_BUS;
-            helper_raise_exception(EXCP_HW_EXCP);
+            helper_raise_exception(env, EXCP_HW_EXCP);
         }
     }
-    env = saved_env;
 }
 #endif
diff --git a/target-microblaze/translate.c b/target-microblaze/translate.c
index 7470149..9c7d77f 100644
--- a/target-microblaze/translate.c
+++ b/target-microblaze/translate.c
@@ -126,7 +126,7 @@
 
     t_sync_flags(dc);
     tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
-    gen_helper_raise_exception(tmp);
+    gen_helper_raise_exception(cpu_env, tmp);
     tcg_temp_free_i32(tmp);
     dc->is_jmp = DISAS_UPDATE;
 }
@@ -503,9 +503,9 @@
         sr &= 7;
         LOG_DIS("m%ss sr%d r%d imm=%x\n", to ? "t" : "f", sr, dc->ra, dc->imm);
         if (to)
-            gen_helper_mmu_write(tcg_const_tl(sr), cpu_R[dc->ra]);
+            gen_helper_mmu_write(cpu_env, tcg_const_tl(sr), cpu_R[dc->ra]);
         else
-            gen_helper_mmu_read(cpu_R[dc->rd], tcg_const_tl(sr));
+            gen_helper_mmu_read(cpu_R[dc->rd], cpu_env, tcg_const_tl(sr));
         return;
     }
 #endif
@@ -704,9 +704,11 @@
     }
 
     if (u)
-        gen_helper_divu(cpu_R[dc->rd], *(dec_alu_op_b(dc)), cpu_R[dc->ra]);
+        gen_helper_divu(cpu_R[dc->rd], cpu_env, *(dec_alu_op_b(dc)),
+                        cpu_R[dc->ra]);
     else
-        gen_helper_divs(cpu_R[dc->rd], *(dec_alu_op_b(dc)), cpu_R[dc->ra]);
+        gen_helper_divs(cpu_R[dc->rd], cpu_env, *(dec_alu_op_b(dc)),
+                        cpu_R[dc->ra]);
     if (!dc->rd)
         tcg_gen_movi_tl(cpu_R[dc->rd], 0);
 }
@@ -912,7 +914,7 @@
         tcg_gen_add_tl(*t, cpu_R[dc->ra], cpu_R[dc->rb]);
 
         if (stackprot) {
-            gen_helper_stackprot(*t);
+            gen_helper_stackprot(cpu_env, *t);
         }
         return t;
     }
@@ -930,7 +932,7 @@
     }
 
     if (stackprot) {
-        gen_helper_stackprot(*t);
+        gen_helper_stackprot(cpu_env, *t);
     }
     return t;
 }
@@ -1056,7 +1058,7 @@
         gen_load(dc, v, *addr, size);
 
         tcg_gen_movi_tl(cpu_SR[SR_PC], dc->pc);
-        gen_helper_memalign(*addr, tcg_const_tl(dc->rd),
+        gen_helper_memalign(cpu_env, *addr, tcg_const_tl(dc->rd),
                             tcg_const_tl(0), tcg_const_tl(size - 1));
         if (dc->rd) {
             if (rev) {
@@ -1218,7 +1220,7 @@
          *        the alignment checks in between the probe and the mem
          *        access.
          */
-        gen_helper_memalign(*addr, tcg_const_tl(dc->rd),
+        gen_helper_memalign(cpu_env, *addr, tcg_const_tl(dc->rd),
                             tcg_const_tl(1), tcg_const_tl(size - 1));
     }
 
@@ -1493,49 +1495,53 @@
 
     switch (fpu_insn) {
         case 0:
-            gen_helper_fadd(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            gen_helper_fadd(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
+                            cpu_R[dc->rb]);
             break;
 
         case 1:
-            gen_helper_frsub(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            gen_helper_frsub(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
+                             cpu_R[dc->rb]);
             break;
 
         case 2:
-            gen_helper_fmul(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            gen_helper_fmul(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
+                            cpu_R[dc->rb]);
             break;
 
         case 3:
-            gen_helper_fdiv(cpu_R[dc->rd], cpu_R[dc->ra], cpu_R[dc->rb]);
+            gen_helper_fdiv(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra],
+                            cpu_R[dc->rb]);
             break;
 
         case 4:
             switch ((dc->ir >> 4) & 7) {
                 case 0:
-                    gen_helper_fcmp_un(cpu_R[dc->rd],
+                    gen_helper_fcmp_un(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 1:
-                    gen_helper_fcmp_lt(cpu_R[dc->rd],
+                    gen_helper_fcmp_lt(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 2:
-                    gen_helper_fcmp_eq(cpu_R[dc->rd],
+                    gen_helper_fcmp_eq(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 3:
-                    gen_helper_fcmp_le(cpu_R[dc->rd],
+                    gen_helper_fcmp_le(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 4:
-                    gen_helper_fcmp_gt(cpu_R[dc->rd],
+                    gen_helper_fcmp_gt(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 5:
-                    gen_helper_fcmp_ne(cpu_R[dc->rd],
+                    gen_helper_fcmp_ne(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 case 6:
-                    gen_helper_fcmp_ge(cpu_R[dc->rd],
+                    gen_helper_fcmp_ge(cpu_R[dc->rd], cpu_env,
                                        cpu_R[dc->ra], cpu_R[dc->rb]);
                     break;
                 default:
@@ -1552,21 +1558,21 @@
             if (!dec_check_fpuv2(dc)) {
                 return;
             }
-            gen_helper_flt(cpu_R[dc->rd], cpu_R[dc->ra]);
+            gen_helper_flt(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]);
             break;
 
         case 6:
             if (!dec_check_fpuv2(dc)) {
                 return;
             }
-            gen_helper_fint(cpu_R[dc->rd], cpu_R[dc->ra]);
+            gen_helper_fint(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]);
             break;
 
         case 7:
             if (!dec_check_fpuv2(dc)) {
                 return;
             }
-            gen_helper_fsqrt(cpu_R[dc->rd], cpu_R[dc->ra]);
+            gen_helper_fsqrt(cpu_R[dc->rd], cpu_env, cpu_R[dc->ra]);
             break;
 
         default:
@@ -1654,15 +1660,14 @@
     {{0, 0}, dec_null}
 };
 
-static inline void decode(DisasContext *dc)
+static inline void decode(DisasContext *dc, uint32_t ir)
 {
-    uint32_t ir;
     int i;
 
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)))
         tcg_gen_debug_insn_start(dc->pc);
 
-    dc->ir = ir = ldl_code(dc->pc);
+    dc->ir = ir;
     LOG_DIS("%8.8x\t", dc->ir);
 
     if (dc->ir)
@@ -1796,7 +1801,7 @@
             gen_io_start();
 
         dc->clear_imm = 1;
-	decode(dc);
+        decode(dc, cpu_ldl_code(env, dc->pc));
         if (dc->clear_imm)
             dc->tb_flags &= ~IMM_FLAG;
         dc->pc += 4;
@@ -1871,7 +1876,7 @@
         if (dc->is_jmp != DISAS_JUMP) {
             tcg_gen_movi_tl(cpu_SR[SR_PC], npc);
         }
-        gen_helper_raise_exception(tmp);
+        gen_helper_raise_exception(cpu_env, tmp);
         tcg_temp_free_i32(tmp);
     } else {
         switch(dc->is_jmp) {
diff --git a/target-mips/Makefile.objs b/target-mips/Makefile.objs
index 2e0e093..ca20f21 100644
--- a/target-mips/Makefile.objs
+++ b/target-mips/Makefile.objs
@@ -1,4 +1,2 @@
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-mips/cpu.h b/target-mips/cpu.h
index 88d92f1..b7a5112b 100644
--- a/target-mips/cpu.h
+++ b/target-mips/cpu.h
@@ -38,10 +38,10 @@
     uint32_t nb_tlb;
     uint32_t tlb_in_use;
     int (*map_address) (struct CPUMIPSState *env, target_phys_addr_t *physical, int *prot, target_ulong address, int rw, int access_type);
-    void (*helper_tlbwi) (void);
-    void (*helper_tlbwr) (void);
-    void (*helper_tlbp) (void);
-    void (*helper_tlbr) (void);
+    void (*helper_tlbwi)(struct CPUMIPSState *env);
+    void (*helper_tlbwr)(struct CPUMIPSState *env);
+    void (*helper_tlbp)(struct CPUMIPSState *env);
+    void (*helper_tlbr)(struct CPUMIPSState *env);
     union {
         struct {
             r4k_tlb_t tlb[MIPS_TLB_MAX];
@@ -485,10 +485,10 @@
                            target_ulong address, int rw, int access_type);
 int r4k_map_address (CPUMIPSState *env, target_phys_addr_t *physical, int *prot,
                      target_ulong address, int rw, int access_type);
-void r4k_helper_tlbwi (void);
-void r4k_helper_tlbwr (void);
-void r4k_helper_tlbp (void);
-void r4k_helper_tlbr (void);
+void r4k_helper_tlbwi(CPUMIPSState *env);
+void r4k_helper_tlbwr(CPUMIPSState *env);
+void r4k_helper_tlbp(CPUMIPSState *env);
+void r4k_helper_tlbr(CPUMIPSState *env);
 
 void cpu_unassigned_access(CPUMIPSState *env, target_phys_addr_t addr,
                            int is_write, int is_exec, int unused, int size);
diff --git a/target-mips/helper.h b/target-mips/helper.h
index 76fb451..109ac37 100644
--- a/target-mips/helper.h
+++ b/target-mips/helper.h
@@ -1,25 +1,25 @@
 #include "def-helper.h"
 
-DEF_HELPER_2(raise_exception_err, noreturn, i32, int)
-DEF_HELPER_1(raise_exception, noreturn, i32)
+DEF_HELPER_3(raise_exception_err, noreturn, env, i32, int)
+DEF_HELPER_2(raise_exception, noreturn, env, i32)
 
 #ifdef TARGET_MIPS64
-DEF_HELPER_3(ldl, tl, tl, tl, int)
-DEF_HELPER_3(ldr, tl, tl, tl, int)
-DEF_HELPER_3(sdl, void, tl, tl, int)
-DEF_HELPER_3(sdr, void, tl, tl, int)
+DEF_HELPER_4(ldl, tl, env, tl, tl, int)
+DEF_HELPER_4(ldr, tl, env, tl, tl, int)
+DEF_HELPER_4(sdl, void, env, tl, tl, int)
+DEF_HELPER_4(sdr, void, env, tl, tl, int)
 #endif
-DEF_HELPER_3(lwl, tl, tl, tl, int)
-DEF_HELPER_3(lwr, tl, tl, tl, int)
-DEF_HELPER_3(swl, void, tl, tl, int)
-DEF_HELPER_3(swr, void, tl, tl, int)
+DEF_HELPER_4(lwl, tl, env, tl, tl, int)
+DEF_HELPER_4(lwr, tl, env, tl, tl, int)
+DEF_HELPER_4(swl, void, env, tl, tl, int)
+DEF_HELPER_4(swr, void, env, tl, tl, int)
 
 #ifndef CONFIG_USER_ONLY
-DEF_HELPER_2(ll, tl, tl, int)
-DEF_HELPER_3(sc, tl, tl, tl, int)
+DEF_HELPER_3(ll, tl, env, tl, int)
+DEF_HELPER_4(sc, tl, env, tl, tl, int)
 #ifdef TARGET_MIPS64
-DEF_HELPER_2(lld, tl, tl, int)
-DEF_HELPER_3(scd, tl, tl, tl, int)
+DEF_HELPER_3(lld, tl, env, tl, int)
+DEF_HELPER_4(scd, tl, env, tl, tl, int)
 #endif
 #endif
 
@@ -28,195 +28,195 @@
 #ifdef TARGET_MIPS64
 DEF_HELPER_FLAGS_1(dclo, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
 DEF_HELPER_FLAGS_1(dclz, TCG_CALL_CONST | TCG_CALL_PURE, tl, tl)
-DEF_HELPER_2(dmult, void, tl, tl)
-DEF_HELPER_2(dmultu, void, tl, tl)
+DEF_HELPER_3(dmult, void, env, tl, tl)
+DEF_HELPER_3(dmultu, void, env, tl, tl)
 #endif
 
-DEF_HELPER_2(muls, tl, tl, tl)
-DEF_HELPER_2(mulsu, tl, tl, tl)
-DEF_HELPER_2(macc, tl, tl, tl)
-DEF_HELPER_2(maccu, tl, tl, tl)
-DEF_HELPER_2(msac, tl, tl, tl)
-DEF_HELPER_2(msacu, tl, tl, tl)
-DEF_HELPER_2(mulhi, tl, tl, tl)
-DEF_HELPER_2(mulhiu, tl, tl, tl)
-DEF_HELPER_2(mulshi, tl, tl, tl)
-DEF_HELPER_2(mulshiu, tl, tl, tl)
-DEF_HELPER_2(macchi, tl, tl, tl)
-DEF_HELPER_2(macchiu, tl, tl, tl)
-DEF_HELPER_2(msachi, tl, tl, tl)
-DEF_HELPER_2(msachiu, tl, tl, tl)
+DEF_HELPER_3(muls, tl, env, tl, tl)
+DEF_HELPER_3(mulsu, tl, env, tl, tl)
+DEF_HELPER_3(macc, tl, env, tl, tl)
+DEF_HELPER_3(maccu, tl, env, tl, tl)
+DEF_HELPER_3(msac, tl, env, tl, tl)
+DEF_HELPER_3(msacu, tl, env, tl, tl)
+DEF_HELPER_3(mulhi, tl, env, tl, tl)
+DEF_HELPER_3(mulhiu, tl, env, tl, tl)
+DEF_HELPER_3(mulshi, tl, env, tl, tl)
+DEF_HELPER_3(mulshiu, tl, env, tl, tl)
+DEF_HELPER_3(macchi, tl, env, tl, tl)
+DEF_HELPER_3(macchiu, tl, env, tl, tl)
+DEF_HELPER_3(msachi, tl, env, tl, tl)
+DEF_HELPER_3(msachiu, tl, env, tl, tl)
 
 #ifndef CONFIG_USER_ONLY
 /* CP0 helpers */
-DEF_HELPER_0(mfc0_mvpcontrol, tl)
-DEF_HELPER_0(mfc0_mvpconf0, tl)
-DEF_HELPER_0(mfc0_mvpconf1, tl)
-DEF_HELPER_0(mftc0_vpecontrol, tl)
-DEF_HELPER_0(mftc0_vpeconf0, tl)
-DEF_HELPER_0(mfc0_random, tl)
-DEF_HELPER_0(mfc0_tcstatus, tl)
-DEF_HELPER_0(mftc0_tcstatus, tl)
-DEF_HELPER_0(mfc0_tcbind, tl)
-DEF_HELPER_0(mftc0_tcbind, tl)
-DEF_HELPER_0(mfc0_tcrestart, tl)
-DEF_HELPER_0(mftc0_tcrestart, tl)
-DEF_HELPER_0(mfc0_tchalt, tl)
-DEF_HELPER_0(mftc0_tchalt, tl)
-DEF_HELPER_0(mfc0_tccontext, tl)
-DEF_HELPER_0(mftc0_tccontext, tl)
-DEF_HELPER_0(mfc0_tcschedule, tl)
-DEF_HELPER_0(mftc0_tcschedule, tl)
-DEF_HELPER_0(mfc0_tcschefback, tl)
-DEF_HELPER_0(mftc0_tcschefback, tl)
-DEF_HELPER_0(mfc0_count, tl)
-DEF_HELPER_0(mftc0_entryhi, tl)
-DEF_HELPER_0(mftc0_status, tl)
-DEF_HELPER_0(mftc0_cause, tl)
-DEF_HELPER_0(mftc0_epc, tl)
-DEF_HELPER_0(mftc0_ebase, tl)
-DEF_HELPER_1(mftc0_configx, tl, tl)
-DEF_HELPER_0(mfc0_lladdr, tl)
-DEF_HELPER_1(mfc0_watchlo, tl, i32)
-DEF_HELPER_1(mfc0_watchhi, tl, i32)
-DEF_HELPER_0(mfc0_debug, tl)
-DEF_HELPER_0(mftc0_debug, tl)
+DEF_HELPER_1(mfc0_mvpcontrol, tl, env)
+DEF_HELPER_1(mfc0_mvpconf0, tl, env)
+DEF_HELPER_1(mfc0_mvpconf1, tl, env)
+DEF_HELPER_1(mftc0_vpecontrol, tl, env)
+DEF_HELPER_1(mftc0_vpeconf0, tl, env)
+DEF_HELPER_1(mfc0_random, tl, env)
+DEF_HELPER_1(mfc0_tcstatus, tl, env)
+DEF_HELPER_1(mftc0_tcstatus, tl, env)
+DEF_HELPER_1(mfc0_tcbind, tl, env)
+DEF_HELPER_1(mftc0_tcbind, tl, env)
+DEF_HELPER_1(mfc0_tcrestart, tl, env)
+DEF_HELPER_1(mftc0_tcrestart, tl, env)
+DEF_HELPER_1(mfc0_tchalt, tl, env)
+DEF_HELPER_1(mftc0_tchalt, tl, env)
+DEF_HELPER_1(mfc0_tccontext, tl, env)
+DEF_HELPER_1(mftc0_tccontext, tl, env)
+DEF_HELPER_1(mfc0_tcschedule, tl, env)
+DEF_HELPER_1(mftc0_tcschedule, tl, env)
+DEF_HELPER_1(mfc0_tcschefback, tl, env)
+DEF_HELPER_1(mftc0_tcschefback, tl, env)
+DEF_HELPER_1(mfc0_count, tl, env)
+DEF_HELPER_1(mftc0_entryhi, tl, env)
+DEF_HELPER_1(mftc0_status, tl, env)
+DEF_HELPER_1(mftc0_cause, tl, env)
+DEF_HELPER_1(mftc0_epc, tl, env)
+DEF_HELPER_1(mftc0_ebase, tl, env)
+DEF_HELPER_2(mftc0_configx, tl, env, tl)
+DEF_HELPER_1(mfc0_lladdr, tl, env)
+DEF_HELPER_2(mfc0_watchlo, tl, env, i32)
+DEF_HELPER_2(mfc0_watchhi, tl, env, i32)
+DEF_HELPER_1(mfc0_debug, tl, env)
+DEF_HELPER_1(mftc0_debug, tl, env)
 #ifdef TARGET_MIPS64
-DEF_HELPER_0(dmfc0_tcrestart, tl)
-DEF_HELPER_0(dmfc0_tchalt, tl)
-DEF_HELPER_0(dmfc0_tccontext, tl)
-DEF_HELPER_0(dmfc0_tcschedule, tl)
-DEF_HELPER_0(dmfc0_tcschefback, tl)
-DEF_HELPER_0(dmfc0_lladdr, tl)
-DEF_HELPER_1(dmfc0_watchlo, tl, i32)
+DEF_HELPER_1(dmfc0_tcrestart, tl, env)
+DEF_HELPER_1(dmfc0_tchalt, tl, env)
+DEF_HELPER_1(dmfc0_tccontext, tl, env)
+DEF_HELPER_1(dmfc0_tcschedule, tl, env)
+DEF_HELPER_1(dmfc0_tcschefback, tl, env)
+DEF_HELPER_1(dmfc0_lladdr, tl, env)
+DEF_HELPER_2(dmfc0_watchlo, tl, env, i32)
 #endif /* TARGET_MIPS64 */
 
-DEF_HELPER_1(mtc0_index, void, tl)
-DEF_HELPER_1(mtc0_mvpcontrol, void, tl)
-DEF_HELPER_1(mtc0_vpecontrol, void, tl)
-DEF_HELPER_1(mttc0_vpecontrol, void, tl)
-DEF_HELPER_1(mtc0_vpeconf0, void, tl)
-DEF_HELPER_1(mttc0_vpeconf0, void, tl)
-DEF_HELPER_1(mtc0_vpeconf1, void, tl)
-DEF_HELPER_1(mtc0_yqmask, void, tl)
-DEF_HELPER_1(mtc0_vpeopt, void, tl)
-DEF_HELPER_1(mtc0_entrylo0, void, tl)
-DEF_HELPER_1(mtc0_tcstatus, void, tl)
-DEF_HELPER_1(mttc0_tcstatus, void, tl)
-DEF_HELPER_1(mtc0_tcbind, void, tl)
-DEF_HELPER_1(mttc0_tcbind, void, tl)
-DEF_HELPER_1(mtc0_tcrestart, void, tl)
-DEF_HELPER_1(mttc0_tcrestart, void, tl)
-DEF_HELPER_1(mtc0_tchalt, void, tl)
-DEF_HELPER_1(mttc0_tchalt, void, tl)
-DEF_HELPER_1(mtc0_tccontext, void, tl)
-DEF_HELPER_1(mttc0_tccontext, void, tl)
-DEF_HELPER_1(mtc0_tcschedule, void, tl)
-DEF_HELPER_1(mttc0_tcschedule, void, tl)
-DEF_HELPER_1(mtc0_tcschefback, void, tl)
-DEF_HELPER_1(mttc0_tcschefback, void, tl)
-DEF_HELPER_1(mtc0_entrylo1, void, tl)
-DEF_HELPER_1(mtc0_context, void, tl)
-DEF_HELPER_1(mtc0_pagemask, void, tl)
-DEF_HELPER_1(mtc0_pagegrain, void, tl)
-DEF_HELPER_1(mtc0_wired, void, tl)
-DEF_HELPER_1(mtc0_srsconf0, void, tl)
-DEF_HELPER_1(mtc0_srsconf1, void, tl)
-DEF_HELPER_1(mtc0_srsconf2, void, tl)
-DEF_HELPER_1(mtc0_srsconf3, void, tl)
-DEF_HELPER_1(mtc0_srsconf4, void, tl)
-DEF_HELPER_1(mtc0_hwrena, void, tl)
-DEF_HELPER_1(mtc0_count, void, tl)
-DEF_HELPER_1(mtc0_entryhi, void, tl)
-DEF_HELPER_1(mttc0_entryhi, void, tl)
-DEF_HELPER_1(mtc0_compare, void, tl)
-DEF_HELPER_1(mtc0_status, void, tl)
-DEF_HELPER_1(mttc0_status, void, tl)
-DEF_HELPER_1(mtc0_intctl, void, tl)
-DEF_HELPER_1(mtc0_srsctl, void, tl)
-DEF_HELPER_1(mtc0_cause, void, tl)
-DEF_HELPER_1(mttc0_cause, void, tl)
-DEF_HELPER_1(mtc0_ebase, void, tl)
-DEF_HELPER_1(mttc0_ebase, void, tl)
-DEF_HELPER_1(mtc0_config0, void, tl)
-DEF_HELPER_1(mtc0_config2, void, tl)
-DEF_HELPER_1(mtc0_lladdr, void, tl)
-DEF_HELPER_2(mtc0_watchlo, void, tl, i32)
-DEF_HELPER_2(mtc0_watchhi, void, tl, i32)
-DEF_HELPER_1(mtc0_xcontext, void, tl)
-DEF_HELPER_1(mtc0_framemask, void, tl)
-DEF_HELPER_1(mtc0_debug, void, tl)
-DEF_HELPER_1(mttc0_debug, void, tl)
-DEF_HELPER_1(mtc0_performance0, void, tl)
-DEF_HELPER_1(mtc0_taglo, void, tl)
-DEF_HELPER_1(mtc0_datalo, void, tl)
-DEF_HELPER_1(mtc0_taghi, void, tl)
-DEF_HELPER_1(mtc0_datahi, void, tl)
+DEF_HELPER_2(mtc0_index, void, env, tl)
+DEF_HELPER_2(mtc0_mvpcontrol, void, env, tl)
+DEF_HELPER_2(mtc0_vpecontrol, void, env, tl)
+DEF_HELPER_2(mttc0_vpecontrol, void, env, tl)
+DEF_HELPER_2(mtc0_vpeconf0, void, env, tl)
+DEF_HELPER_2(mttc0_vpeconf0, void, env, tl)
+DEF_HELPER_2(mtc0_vpeconf1, void, env, tl)
+DEF_HELPER_2(mtc0_yqmask, void, env, tl)
+DEF_HELPER_2(mtc0_vpeopt, void, env, tl)
+DEF_HELPER_2(mtc0_entrylo0, void, env, tl)
+DEF_HELPER_2(mtc0_tcstatus, void, env, tl)
+DEF_HELPER_2(mttc0_tcstatus, void, env, tl)
+DEF_HELPER_2(mtc0_tcbind, void, env, tl)
+DEF_HELPER_2(mttc0_tcbind, void, env, tl)
+DEF_HELPER_2(mtc0_tcrestart, void, env, tl)
+DEF_HELPER_2(mttc0_tcrestart, void, env, tl)
+DEF_HELPER_2(mtc0_tchalt, void, env, tl)
+DEF_HELPER_2(mttc0_tchalt, void, env, tl)
+DEF_HELPER_2(mtc0_tccontext, void, env, tl)
+DEF_HELPER_2(mttc0_tccontext, void, env, tl)
+DEF_HELPER_2(mtc0_tcschedule, void, env, tl)
+DEF_HELPER_2(mttc0_tcschedule, void, env, tl)
+DEF_HELPER_2(mtc0_tcschefback, void, env, tl)
+DEF_HELPER_2(mttc0_tcschefback, void, env, tl)
+DEF_HELPER_2(mtc0_entrylo1, void, env, tl)
+DEF_HELPER_2(mtc0_context, void, env, tl)
+DEF_HELPER_2(mtc0_pagemask, void, env, tl)
+DEF_HELPER_2(mtc0_pagegrain, void, env, tl)
+DEF_HELPER_2(mtc0_wired, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf0, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf1, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf2, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf3, void, env, tl)
+DEF_HELPER_2(mtc0_srsconf4, void, env, tl)
+DEF_HELPER_2(mtc0_hwrena, void, env, tl)
+DEF_HELPER_2(mtc0_count, void, env, tl)
+DEF_HELPER_2(mtc0_entryhi, void, env, tl)
+DEF_HELPER_2(mttc0_entryhi, void, env, tl)
+DEF_HELPER_2(mtc0_compare, void, env, tl)
+DEF_HELPER_2(mtc0_status, void, env, tl)
+DEF_HELPER_2(mttc0_status, void, env, tl)
+DEF_HELPER_2(mtc0_intctl, void, env, tl)
+DEF_HELPER_2(mtc0_srsctl, void, env, tl)
+DEF_HELPER_2(mtc0_cause, void, env, tl)
+DEF_HELPER_2(mttc0_cause, void, env, tl)
+DEF_HELPER_2(mtc0_ebase, void, env, tl)
+DEF_HELPER_2(mttc0_ebase, void, env, tl)
+DEF_HELPER_2(mtc0_config0, void, env, tl)
+DEF_HELPER_2(mtc0_config2, void, env, tl)
+DEF_HELPER_2(mtc0_lladdr, void, env, tl)
+DEF_HELPER_3(mtc0_watchlo, void, env, tl, i32)
+DEF_HELPER_3(mtc0_watchhi, void, env, tl, i32)
+DEF_HELPER_2(mtc0_xcontext, void, env, tl)
+DEF_HELPER_2(mtc0_framemask, void, env, tl)
+DEF_HELPER_2(mtc0_debug, void, env, tl)
+DEF_HELPER_2(mttc0_debug, void, env, tl)
+DEF_HELPER_2(mtc0_performance0, void, env, tl)
+DEF_HELPER_2(mtc0_taglo, void, env, tl)
+DEF_HELPER_2(mtc0_datalo, void, env, tl)
+DEF_HELPER_2(mtc0_taghi, void, env, tl)
+DEF_HELPER_2(mtc0_datahi, void, env, tl)
 
 /* MIPS MT functions */
-DEF_HELPER_1(mftgpr, tl, i32);
-DEF_HELPER_1(mftlo, tl, i32)
-DEF_HELPER_1(mfthi, tl, i32)
-DEF_HELPER_1(mftacx, tl, i32)
-DEF_HELPER_0(mftdsp, tl)
-DEF_HELPER_2(mttgpr, void, tl, i32)
-DEF_HELPER_2(mttlo, void, tl, i32)
-DEF_HELPER_2(mtthi, void, tl, i32)
-DEF_HELPER_2(mttacx, void, tl, i32)
-DEF_HELPER_1(mttdsp, void, tl)
+DEF_HELPER_2(mftgpr, tl, env, i32);
+DEF_HELPER_2(mftlo, tl, env, i32)
+DEF_HELPER_2(mfthi, tl, env, i32)
+DEF_HELPER_2(mftacx, tl, env, i32)
+DEF_HELPER_1(mftdsp, tl, env)
+DEF_HELPER_3(mttgpr, void, env, tl, i32)
+DEF_HELPER_3(mttlo, void, env, tl, i32)
+DEF_HELPER_3(mtthi, void, env, tl, i32)
+DEF_HELPER_3(mttacx, void, env, tl, i32)
+DEF_HELPER_2(mttdsp, void, env, tl)
 DEF_HELPER_0(dmt, tl)
 DEF_HELPER_0(emt, tl)
-DEF_HELPER_0(dvpe, tl)
-DEF_HELPER_0(evpe, tl)
+DEF_HELPER_1(dvpe, tl, env)
+DEF_HELPER_1(evpe, tl, env)
 #endif /* !CONFIG_USER_ONLY */
 
 /* microMIPS functions */
-DEF_HELPER_3(lwm, void, tl, tl, i32);
-DEF_HELPER_3(swm, void, tl, tl, i32);
+DEF_HELPER_4(lwm, void, env, tl, tl, i32);
+DEF_HELPER_4(swm, void, env, tl, tl, i32);
 #ifdef TARGET_MIPS64
-DEF_HELPER_3(ldm, void, tl, tl, i32);
-DEF_HELPER_3(sdm, void, tl, tl, i32);
+DEF_HELPER_4(ldm, void, env, tl, tl, i32);
+DEF_HELPER_4(sdm, void, env, tl, tl, i32);
 #endif
 
 DEF_HELPER_2(fork, void, tl, tl)
-DEF_HELPER_1(yield, tl, tl)
+DEF_HELPER_2(yield, tl, env, tl)
 
 /* CP1 functions */
-DEF_HELPER_1(cfc1, tl, i32)
-DEF_HELPER_2(ctc1, void, tl, i32)
+DEF_HELPER_2(cfc1, tl, env, i32)
+DEF_HELPER_3(ctc1, void, env, tl, i32)
 
-DEF_HELPER_1(float_cvtd_s, i64, i32)
-DEF_HELPER_1(float_cvtd_w, i64, i32)
-DEF_HELPER_1(float_cvtd_l, i64, i64)
-DEF_HELPER_1(float_cvtl_d, i64, i64)
-DEF_HELPER_1(float_cvtl_s, i64, i32)
-DEF_HELPER_1(float_cvtps_pw, i64, i64)
-DEF_HELPER_1(float_cvtpw_ps, i64, i64)
-DEF_HELPER_1(float_cvts_d, i32, i64)
-DEF_HELPER_1(float_cvts_w, i32, i32)
-DEF_HELPER_1(float_cvts_l, i32, i64)
-DEF_HELPER_1(float_cvts_pl, i32, i32)
-DEF_HELPER_1(float_cvts_pu, i32, i32)
-DEF_HELPER_1(float_cvtw_s, i32, i32)
-DEF_HELPER_1(float_cvtw_d, i32, i64)
+DEF_HELPER_2(float_cvtd_s, i64, env, i32)
+DEF_HELPER_2(float_cvtd_w, i64, env, i32)
+DEF_HELPER_2(float_cvtd_l, i64, env, i64)
+DEF_HELPER_2(float_cvtl_d, i64, env, i64)
+DEF_HELPER_2(float_cvtl_s, i64, env, i32)
+DEF_HELPER_2(float_cvtps_pw, i64, env, i64)
+DEF_HELPER_2(float_cvtpw_ps, i64, env, i64)
+DEF_HELPER_2(float_cvts_d, i32, env, i64)
+DEF_HELPER_2(float_cvts_w, i32, env, i32)
+DEF_HELPER_2(float_cvts_l, i32, env, i64)
+DEF_HELPER_2(float_cvts_pl, i32, env, i32)
+DEF_HELPER_2(float_cvts_pu, i32, env, i32)
+DEF_HELPER_2(float_cvtw_s, i32, env, i32)
+DEF_HELPER_2(float_cvtw_d, i32, env, i64)
 
-DEF_HELPER_2(float_addr_ps, i64, i64, i64)
-DEF_HELPER_2(float_mulr_ps, i64, i64, i64)
+DEF_HELPER_3(float_addr_ps, i64, env, i64, i64)
+DEF_HELPER_3(float_mulr_ps, i64, env, i64, i64)
 
-#define FOP_PROTO(op)                       \
-DEF_HELPER_1(float_ ## op ## l_s, i64, i32) \
-DEF_HELPER_1(float_ ## op ## l_d, i64, i64) \
-DEF_HELPER_1(float_ ## op ## w_s, i32, i32) \
-DEF_HELPER_1(float_ ## op ## w_d, i32, i64)
+#define FOP_PROTO(op)                            \
+DEF_HELPER_2(float_ ## op ## l_s, i64, env, i32) \
+DEF_HELPER_2(float_ ## op ## l_d, i64, env, i64) \
+DEF_HELPER_2(float_ ## op ## w_s, i32, env, i32) \
+DEF_HELPER_2(float_ ## op ## w_d, i32, env, i64)
 FOP_PROTO(round)
 FOP_PROTO(trunc)
 FOP_PROTO(ceil)
 FOP_PROTO(floor)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                       \
-DEF_HELPER_1(float_ ## op ## _s, i32, i32)  \
-DEF_HELPER_1(float_ ## op ## _d, i64, i64)
+#define FOP_PROTO(op)                            \
+DEF_HELPER_2(float_ ## op ## _s, i32, env, i32)  \
+DEF_HELPER_2(float_ ## op ## _d, i64, env, i64)
 FOP_PROTO(sqrt)
 FOP_PROTO(rsqrt)
 FOP_PROTO(recip)
@@ -228,14 +228,20 @@
 DEF_HELPER_1(float_ ## op ## _ps, i64, i64)
 FOP_PROTO(abs)
 FOP_PROTO(chs)
+#undef FOP_PROTO
+
+#define FOP_PROTO(op)                            \
+DEF_HELPER_2(float_ ## op ## _s, i32, env, i32)  \
+DEF_HELPER_2(float_ ## op ## _d, i64, env, i64)  \
+DEF_HELPER_2(float_ ## op ## _ps, i64, env, i64)
 FOP_PROTO(recip1)
 FOP_PROTO(rsqrt1)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                             \
-DEF_HELPER_2(float_ ## op ## _s, i32, i32, i32)   \
-DEF_HELPER_2(float_ ## op ## _d, i64, i64, i64)   \
-DEF_HELPER_2(float_ ## op ## _ps, i64, i64, i64)
+#define FOP_PROTO(op)                                  \
+DEF_HELPER_3(float_ ## op ## _s, i32, env, i32, i32)   \
+DEF_HELPER_3(float_ ## op ## _d, i64, env, i64, i64)   \
+DEF_HELPER_3(float_ ## op ## _ps, i64, env, i64, i64)
 FOP_PROTO(add)
 FOP_PROTO(sub)
 FOP_PROTO(mul)
@@ -244,23 +250,23 @@
 FOP_PROTO(rsqrt2)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                                 \
-DEF_HELPER_3(float_ ## op ## _s, i32, i32, i32, i32)  \
-DEF_HELPER_3(float_ ## op ## _d, i64, i64, i64, i64)  \
-DEF_HELPER_3(float_ ## op ## _ps, i64, i64, i64, i64)
+#define FOP_PROTO(op)                                      \
+DEF_HELPER_4(float_ ## op ## _s, i32, env, i32, i32, i32)  \
+DEF_HELPER_4(float_ ## op ## _d, i64, env, i64, i64, i64)  \
+DEF_HELPER_4(float_ ## op ## _ps, i64, env, i64, i64, i64)
 FOP_PROTO(muladd)
 FOP_PROTO(mulsub)
 FOP_PROTO(nmuladd)
 FOP_PROTO(nmulsub)
 #undef FOP_PROTO
 
-#define FOP_PROTO(op)                               \
-DEF_HELPER_3(cmp_d_ ## op, void, i64, i64, int)     \
-DEF_HELPER_3(cmpabs_d_ ## op, void, i64, i64, int)  \
-DEF_HELPER_3(cmp_s_ ## op, void, i32, i32, int)     \
-DEF_HELPER_3(cmpabs_s_ ## op, void, i32, i32, int)  \
-DEF_HELPER_3(cmp_ps_ ## op, void, i64, i64, int)    \
-DEF_HELPER_3(cmpabs_ps_ ## op, void, i64, i64, int)
+#define FOP_PROTO(op)                                    \
+DEF_HELPER_4(cmp_d_ ## op, void, env, i64, i64, int)     \
+DEF_HELPER_4(cmpabs_d_ ## op, void, env, i64, i64, int)  \
+DEF_HELPER_4(cmp_s_ ## op, void, env, i32, i32, int)     \
+DEF_HELPER_4(cmpabs_s_ ## op, void, env, i32, i32, int)  \
+DEF_HELPER_4(cmp_ps_ ## op, void, env, i64, i64, int)    \
+DEF_HELPER_4(cmpabs_ps_ ## op, void, env, i64, i64, int)
 FOP_PROTO(f)
 FOP_PROTO(un)
 FOP_PROTO(eq)
@@ -281,20 +287,20 @@
 
 /* Special functions */
 #ifndef CONFIG_USER_ONLY
-DEF_HELPER_0(tlbwi, void)
-DEF_HELPER_0(tlbwr, void)
-DEF_HELPER_0(tlbp, void)
-DEF_HELPER_0(tlbr, void)
-DEF_HELPER_0(di, tl)
-DEF_HELPER_0(ei, tl)
-DEF_HELPER_0(eret, void)
-DEF_HELPER_0(deret, void)
+DEF_HELPER_1(tlbwi, void, env)
+DEF_HELPER_1(tlbwr, void, env)
+DEF_HELPER_1(tlbp, void, env)
+DEF_HELPER_1(tlbr, void, env)
+DEF_HELPER_1(di, tl, env)
+DEF_HELPER_1(ei, tl, env)
+DEF_HELPER_1(eret, void, env)
+DEF_HELPER_1(deret, void, env)
 #endif /* !CONFIG_USER_ONLY */
-DEF_HELPER_0(rdhwr_cpunum, tl)
-DEF_HELPER_0(rdhwr_synci_step, tl)
-DEF_HELPER_0(rdhwr_cc, tl)
-DEF_HELPER_0(rdhwr_ccres, tl)
-DEF_HELPER_1(pmon, void, int)
-DEF_HELPER_0(wait, void)
+DEF_HELPER_1(rdhwr_cpunum, tl, env)
+DEF_HELPER_1(rdhwr_synci_step, tl, env)
+DEF_HELPER_1(rdhwr_cc, tl, env)
+DEF_HELPER_1(rdhwr_ccres, tl, env)
+DEF_HELPER_2(pmon, void, env, int)
+DEF_HELPER_1(wait, void, env)
 
 #include "def-helper.h"
diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
index 3d242aa..ce5ddaf 100644
--- a/target-mips/op_helper.c
+++ b/target-mips/op_helper.c
@@ -18,8 +18,6 @@
  */
 #include <stdlib.h>
 #include "cpu.h"
-#include "dyngen-exec.h"
-
 #include "host-utils.h"
 
 #include "helper.h"
@@ -35,7 +33,8 @@
 /*****************************************************************************/
 /* Exceptions processing helpers */
 
-void helper_raise_exception_err (uint32_t exception, int error_code)
+void helper_raise_exception_err(CPUMIPSState *env, uint32_t exception,
+                                int error_code)
 {
 #if 1
     if (exception < 0x100)
@@ -46,13 +45,13 @@
     cpu_loop_exit(env);
 }
 
-void helper_raise_exception (uint32_t exception)
+void helper_raise_exception(CPUMIPSState *env, uint32_t exception)
 {
-    helper_raise_exception_err(exception, 0);
+    helper_raise_exception_err(env, exception, 0);
 }
 
 #if !defined(CONFIG_USER_ONLY)
-static void do_restore_state(uintptr_t pc)
+static void do_restore_state(CPUMIPSState *env, uintptr_t pc)
 {
     TranslationBlock *tb;
 
@@ -65,20 +64,22 @@
 
 #if defined(CONFIG_USER_ONLY)
 #define HELPER_LD(name, insn, type)                                     \
-static inline type do_##name(target_ulong addr, int mem_idx)            \
+static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             int mem_idx)                               \
 {                                                                       \
     return (type) insn##_raw(addr);                                     \
 }
 #else
 #define HELPER_LD(name, insn, type)                                     \
-static inline type do_##name(target_ulong addr, int mem_idx)            \
+static inline type do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             int mem_idx)                               \
 {                                                                       \
     switch (mem_idx)                                                    \
     {                                                                   \
-    case 0: return (type) insn##_kernel(addr); break;                   \
-    case 1: return (type) insn##_super(addr); break;                    \
+    case 0: return (type) cpu_##insn##_kernel(env, addr); break;        \
+    case 1: return (type) cpu_##insn##_super(env, addr); break;         \
     default:                                                            \
-    case 2: return (type) insn##_user(addr); break;                     \
+    case 2: return (type) cpu_##insn##_user(env, addr); break;          \
     }                                                                   \
 }
 #endif
@@ -91,20 +92,22 @@
 
 #if defined(CONFIG_USER_ONLY)
 #define HELPER_ST(name, insn, type)                                     \
-static inline void do_##name(target_ulong addr, type val, int mem_idx)  \
+static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             type val, int mem_idx)                     \
 {                                                                       \
     insn##_raw(addr, val);                                              \
 }
 #else
 #define HELPER_ST(name, insn, type)                                     \
-static inline void do_##name(target_ulong addr, type val, int mem_idx)  \
+static inline void do_##name(CPUMIPSState *env, target_ulong addr,      \
+                             type val, int mem_idx)                     \
 {                                                                       \
     switch (mem_idx)                                                    \
     {                                                                   \
-    case 0: insn##_kernel(addr, val); break;                            \
-    case 1: insn##_super(addr, val); break;                             \
+    case 0: cpu_##insn##_kernel(env, addr, val); break;                 \
+    case 1: cpu_##insn##_super(env, addr, val); break;                  \
     default:                                                            \
-    case 2: insn##_user(addr, val); break;                              \
+    case 2: cpu_##insn##_user(env, addr, val); break;                   \
     }                                                                   \
 }
 #endif
@@ -138,12 +141,12 @@
 #endif /* TARGET_MIPS64 */
 
 /* 64 bits arithmetic for 32 bits hosts */
-static inline uint64_t get_HILO (void)
+static inline uint64_t get_HILO(CPUMIPSState *env)
 {
     return ((uint64_t)(env->active_tc.HI[0]) << 32) | (uint32_t)env->active_tc.LO[0];
 }
 
-static inline target_ulong set_HIT0_LO(uint64_t HILO)
+static inline target_ulong set_HIT0_LO(CPUMIPSState *env, uint64_t HILO)
 {
     target_ulong tmp;
     env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
@@ -151,7 +154,7 @@
     return tmp;
 }
 
-static inline target_ulong set_HI_LOT0(uint64_t HILO)
+static inline target_ulong set_HI_LOT0(CPUMIPSState *env, uint64_t HILO)
 {
     target_ulong tmp = env->active_tc.LO[0] = (int32_t)(HILO & 0xFFFFFFFF);
     env->active_tc.HI[0] = (int32_t)(HILO >> 32);
@@ -159,91 +162,110 @@
 }
 
 /* Multiplication variants of the vr54xx. */
-target_ulong helper_muls (target_ulong arg1, target_ulong arg2)
+target_ulong helper_muls(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
 {
-    return set_HI_LOT0(0 - ((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2));
+    return set_HI_LOT0(env, 0 - ((int64_t)(int32_t)arg1 *
+                                 (int64_t)(int32_t)arg2));
 }
 
-target_ulong helper_mulsu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulsu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    return set_HI_LOT0(0 - (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
+    return set_HI_LOT0(env, 0 - (uint64_t)(uint32_t)arg1 *
+                       (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_macc (target_ulong arg1, target_ulong arg2)
+target_ulong helper_macc(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
 {
-    return set_HI_LOT0((int64_t)get_HILO() + (int64_t)(int32_t)arg1 *
-                                             (int64_t)(int32_t)arg2);
+    return set_HI_LOT0(env, (int64_t)get_HILO(env) + (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_macchi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_macchi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    return set_HIT0_LO((int64_t)get_HILO() + (int64_t)(int32_t)arg1 *
-                                             (int64_t)(int32_t)arg2);
+    return set_HIT0_LO(env, (int64_t)get_HILO(env) + (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_maccu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_maccu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    return set_HI_LOT0((uint64_t)get_HILO() + (uint64_t)(uint32_t)arg1 *
-                                              (uint64_t)(uint32_t)arg2);
+    return set_HI_LOT0(env, (uint64_t)get_HILO(env) +
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_macchiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_macchiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
 {
-    return set_HIT0_LO((uint64_t)get_HILO() + (uint64_t)(uint32_t)arg1 *
-                                              (uint64_t)(uint32_t)arg2);
+    return set_HIT0_LO(env, (uint64_t)get_HILO(env) +
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_msac (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msac(CPUMIPSState *env, target_ulong arg1,
+                         target_ulong arg2)
 {
-    return set_HI_LOT0((int64_t)get_HILO() - (int64_t)(int32_t)arg1 *
-                                             (int64_t)(int32_t)arg2);
+    return set_HI_LOT0(env, (int64_t)get_HILO(env) - (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_msachi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msachi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    return set_HIT0_LO((int64_t)get_HILO() - (int64_t)(int32_t)arg1 *
-                                             (int64_t)(int32_t)arg2);
+    return set_HIT0_LO(env, (int64_t)get_HILO(env) - (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_msacu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msacu(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    return set_HI_LOT0((uint64_t)get_HILO() - (uint64_t)(uint32_t)arg1 *
-                                              (uint64_t)(uint32_t)arg2);
+    return set_HI_LOT0(env, (uint64_t)get_HILO(env) -
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_msachiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_msachiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
 {
-    return set_HIT0_LO((uint64_t)get_HILO() - (uint64_t)(uint32_t)arg1 *
-                                              (uint64_t)(uint32_t)arg2);
+    return set_HIT0_LO(env, (uint64_t)get_HILO(env) -
+                       (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_mulhi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulhi(CPUMIPSState *env, target_ulong arg1,
+                          target_ulong arg2)
 {
-    return set_HIT0_LO((int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
+    return set_HIT0_LO(env, (int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_mulhiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulhiu(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    return set_HIT0_LO((uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
+    return set_HIT0_LO(env, (uint64_t)(uint32_t)arg1 *
+                       (uint64_t)(uint32_t)arg2);
 }
 
-target_ulong helper_mulshi (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulshi(CPUMIPSState *env, target_ulong arg1,
+                           target_ulong arg2)
 {
-    return set_HIT0_LO(0 - (int64_t)(int32_t)arg1 * (int64_t)(int32_t)arg2);
+    return set_HIT0_LO(env, 0 - (int64_t)(int32_t)arg1 *
+                       (int64_t)(int32_t)arg2);
 }
 
-target_ulong helper_mulshiu (target_ulong arg1, target_ulong arg2)
+target_ulong helper_mulshiu(CPUMIPSState *env, target_ulong arg1,
+                            target_ulong arg2)
 {
-    return set_HIT0_LO(0 - (uint64_t)(uint32_t)arg1 * (uint64_t)(uint32_t)arg2);
+    return set_HIT0_LO(env, 0 - (uint64_t)(uint32_t)arg1 *
+                       (uint64_t)(uint32_t)arg2);
 }
 
 #ifdef TARGET_MIPS64
-void helper_dmult (target_ulong arg1, target_ulong arg2)
+void helper_dmult(CPUMIPSState *env, target_ulong arg1, target_ulong arg2)
 {
     muls64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
 }
 
-void helper_dmultu (target_ulong arg1, target_ulong arg2)
+void helper_dmultu(CPUMIPSState *env, target_ulong arg1, target_ulong arg2)
 {
     mulu64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
 }
@@ -251,7 +273,9 @@
 
 #ifndef CONFIG_USER_ONLY
 
-static inline target_phys_addr_t do_translate_address(target_ulong address, int rw)
+static inline target_phys_addr_t do_translate_address(CPUMIPSState *env,
+                                                      target_ulong address,
+                                                      int rw)
 {
     target_phys_addr_t lladdr;
 
@@ -265,10 +289,10 @@
 }
 
 #define HELPER_LD_ATOMIC(name, insn)                                          \
-target_ulong helper_##name(target_ulong arg, int mem_idx)                     \
+target_ulong helper_##name(CPUMIPSState *env, target_ulong arg, int mem_idx)  \
 {                                                                             \
-    env->lladdr = do_translate_address(arg, 0);                               \
-    env->llval = do_##insn(arg, mem_idx);                                     \
+    env->lladdr = do_translate_address(env, arg, 0);                          \
+    env->llval = do_##insn(env, arg, mem_idx);                                \
     return env->llval;                                                        \
 }
 HELPER_LD_ATOMIC(ll, lw)
@@ -278,18 +302,19 @@
 #undef HELPER_LD_ATOMIC
 
 #define HELPER_ST_ATOMIC(name, ld_insn, st_insn, almask)                      \
-target_ulong helper_##name(target_ulong arg1, target_ulong arg2, int mem_idx) \
+target_ulong helper_##name(CPUMIPSState *env, target_ulong arg1,              \
+                           target_ulong arg2, int mem_idx)                    \
 {                                                                             \
     target_long tmp;                                                          \
                                                                               \
     if (arg2 & almask) {                                                      \
         env->CP0_BadVAddr = arg2;                                             \
-        helper_raise_exception(EXCP_AdES);                                    \
+        helper_raise_exception(env, EXCP_AdES);                               \
     }                                                                         \
-    if (do_translate_address(arg2, 1) == env->lladdr) {                       \
-        tmp = do_##ld_insn(arg2, mem_idx);                                    \
+    if (do_translate_address(env, arg2, 1) == env->lladdr) {                  \
+        tmp = do_##ld_insn(env, arg2, mem_idx);                               \
         if (tmp == env->llval) {                                              \
-            do_##st_insn(arg2, arg1, mem_idx);                                \
+            do_##st_insn(env, arg2, arg1, mem_idx);                           \
             return 1;                                                         \
         }                                                                     \
     }                                                                         \
@@ -310,80 +335,84 @@
 #define GET_OFFSET(addr, offset) (addr - (offset))
 #endif
 
-target_ulong helper_lwl(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_lwl(CPUMIPSState *env, target_ulong arg1,
+                        target_ulong arg2, int mem_idx)
 {
     target_ulong tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);
 
     if (GET_LMASK(arg2) <= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, 1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
         arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
     }
 
     if (GET_LMASK(arg2) <= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, 2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
         arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
     }
 
     if (GET_LMASK(arg2) == 0) {
-        tmp = do_lbu(GET_OFFSET(arg2, 3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
         arg1 = (arg1 & 0xFFFFFF00) | tmp;
     }
     return (int32_t)arg1;
 }
 
-target_ulong helper_lwr(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_lwr(CPUMIPSState *env, target_ulong arg1,
+                        target_ulong arg2, int mem_idx)
 {
     target_ulong tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0xFFFFFF00) | tmp;
 
     if (GET_LMASK(arg2) >= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, -1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
         arg1 = (arg1 & 0xFFFF00FF) | (tmp << 8);
     }
 
     if (GET_LMASK(arg2) >= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, -2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
         arg1 = (arg1 & 0xFF00FFFF) | (tmp << 16);
     }
 
     if (GET_LMASK(arg2) == 3) {
-        tmp = do_lbu(GET_OFFSET(arg2, -3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
         arg1 = (arg1 & 0x00FFFFFF) | (tmp << 24);
     }
     return (int32_t)arg1;
 }
 
-void helper_swl(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_swl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
 {
-    do_sb(arg2, (uint8_t)(arg1 >> 24), mem_idx);
+    do_sb(env, arg2, (uint8_t)(arg1 >> 24), mem_idx);
 
     if (GET_LMASK(arg2) <= 2)
-        do_sb(GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK(arg2) <= 1)
-        do_sb(GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK(arg2) == 0)
-        do_sb(GET_OFFSET(arg2, 3), (uint8_t)arg1, mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 3), (uint8_t)arg1, mem_idx);
 }
 
-void helper_swr(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_swr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
 {
-    do_sb(arg2, (uint8_t)arg1, mem_idx);
+    do_sb(env, arg2, (uint8_t)arg1, mem_idx);
 
     if (GET_LMASK(arg2) >= 1)
-        do_sb(GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK(arg2) >= 2)
-        do_sb(GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK(arg2) == 3)
-        do_sb(GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
 }
 
 #if defined(TARGET_MIPS64)
@@ -396,167 +425,172 @@
 #define GET_LMASK64(v) (((v) & 7) ^ 7)
 #endif
 
-target_ulong helper_ldl(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_ldl(CPUMIPSState *env, target_ulong arg1,
+                        target_ulong arg2, int mem_idx)
 {
     uint64_t tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
 
     if (GET_LMASK64(arg2) <= 6) {
-        tmp = do_lbu(GET_OFFSET(arg2, 1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 1), mem_idx);
         arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
     }
 
     if (GET_LMASK64(arg2) <= 5) {
-        tmp = do_lbu(GET_OFFSET(arg2, 2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 2), mem_idx);
         arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
     }
 
     if (GET_LMASK64(arg2) <= 4) {
-        tmp = do_lbu(GET_OFFSET(arg2, 3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 3), mem_idx);
         arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
     }
 
     if (GET_LMASK64(arg2) <= 3) {
-        tmp = do_lbu(GET_OFFSET(arg2, 4), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 4), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
     }
 
     if (GET_LMASK64(arg2) <= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, 5), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 5), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
     }
 
     if (GET_LMASK64(arg2) <= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, 6), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 6), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp << 8);
     }
 
     if (GET_LMASK64(arg2) == 0) {
-        tmp = do_lbu(GET_OFFSET(arg2, 7), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, 7), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
     }
 
     return arg1;
 }
 
-target_ulong helper_ldr(target_ulong arg1, target_ulong arg2, int mem_idx)
+target_ulong helper_ldr(CPUMIPSState *env, target_ulong arg1,
+                        target_ulong arg2, int mem_idx)
 {
     uint64_t tmp;
 
-    tmp = do_lbu(arg2, mem_idx);
+    tmp = do_lbu(env, arg2, mem_idx);
     arg1 = (arg1 & 0xFFFFFFFFFFFFFF00ULL) | tmp;
 
     if (GET_LMASK64(arg2) >= 1) {
-        tmp = do_lbu(GET_OFFSET(arg2, -1), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -1), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFFFF00FFULL) | (tmp  << 8);
     }
 
     if (GET_LMASK64(arg2) >= 2) {
-        tmp = do_lbu(GET_OFFSET(arg2, -2), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -2), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFFFF00FFFFULL) | (tmp << 16);
     }
 
     if (GET_LMASK64(arg2) >= 3) {
-        tmp = do_lbu(GET_OFFSET(arg2, -3), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -3), mem_idx);
         arg1 = (arg1 & 0xFFFFFFFF00FFFFFFULL) | (tmp << 24);
     }
 
     if (GET_LMASK64(arg2) >= 4) {
-        tmp = do_lbu(GET_OFFSET(arg2, -4), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -4), mem_idx);
         arg1 = (arg1 & 0xFFFFFF00FFFFFFFFULL) | (tmp << 32);
     }
 
     if (GET_LMASK64(arg2) >= 5) {
-        tmp = do_lbu(GET_OFFSET(arg2, -5), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -5), mem_idx);
         arg1 = (arg1 & 0xFFFF00FFFFFFFFFFULL) | (tmp << 40);
     }
 
     if (GET_LMASK64(arg2) >= 6) {
-        tmp = do_lbu(GET_OFFSET(arg2, -6), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -6), mem_idx);
         arg1 = (arg1 & 0xFF00FFFFFFFFFFFFULL) | (tmp << 48);
     }
 
     if (GET_LMASK64(arg2) == 7) {
-        tmp = do_lbu(GET_OFFSET(arg2, -7), mem_idx);
+        tmp = do_lbu(env, GET_OFFSET(arg2, -7), mem_idx);
         arg1 = (arg1 & 0x00FFFFFFFFFFFFFFULL) | (tmp << 56);
     }
 
     return arg1;
 }
 
-void helper_sdl(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_sdl(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
 {
-    do_sb(arg2, (uint8_t)(arg1 >> 56), mem_idx);
+    do_sb(env, arg2, (uint8_t)(arg1 >> 56), mem_idx);
 
     if (GET_LMASK64(arg2) <= 6)
-        do_sb(GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 48), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 1), (uint8_t)(arg1 >> 48), mem_idx);
 
     if (GET_LMASK64(arg2) <= 5)
-        do_sb(GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 40), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 2), (uint8_t)(arg1 >> 40), mem_idx);
 
     if (GET_LMASK64(arg2) <= 4)
-        do_sb(GET_OFFSET(arg2, 3), (uint8_t)(arg1 >> 32), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 3), (uint8_t)(arg1 >> 32), mem_idx);
 
     if (GET_LMASK64(arg2) <= 3)
-        do_sb(GET_OFFSET(arg2, 4), (uint8_t)(arg1 >> 24), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 4), (uint8_t)(arg1 >> 24), mem_idx);
 
     if (GET_LMASK64(arg2) <= 2)
-        do_sb(GET_OFFSET(arg2, 5), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 5), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK64(arg2) <= 1)
-        do_sb(GET_OFFSET(arg2, 6), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 6), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK64(arg2) <= 0)
-        do_sb(GET_OFFSET(arg2, 7), (uint8_t)arg1, mem_idx);
+        do_sb(env, GET_OFFSET(arg2, 7), (uint8_t)arg1, mem_idx);
 }
 
-void helper_sdr(target_ulong arg1, target_ulong arg2, int mem_idx)
+void helper_sdr(CPUMIPSState *env, target_ulong arg1, target_ulong arg2,
+                int mem_idx)
 {
-    do_sb(arg2, (uint8_t)arg1, mem_idx);
+    do_sb(env, arg2, (uint8_t)arg1, mem_idx);
 
     if (GET_LMASK64(arg2) >= 1)
-        do_sb(GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -1), (uint8_t)(arg1 >> 8), mem_idx);
 
     if (GET_LMASK64(arg2) >= 2)
-        do_sb(GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -2), (uint8_t)(arg1 >> 16), mem_idx);
 
     if (GET_LMASK64(arg2) >= 3)
-        do_sb(GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -3), (uint8_t)(arg1 >> 24), mem_idx);
 
     if (GET_LMASK64(arg2) >= 4)
-        do_sb(GET_OFFSET(arg2, -4), (uint8_t)(arg1 >> 32), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -4), (uint8_t)(arg1 >> 32), mem_idx);
 
     if (GET_LMASK64(arg2) >= 5)
-        do_sb(GET_OFFSET(arg2, -5), (uint8_t)(arg1 >> 40), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -5), (uint8_t)(arg1 >> 40), mem_idx);
 
     if (GET_LMASK64(arg2) >= 6)
-        do_sb(GET_OFFSET(arg2, -6), (uint8_t)(arg1 >> 48), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -6), (uint8_t)(arg1 >> 48), mem_idx);
 
     if (GET_LMASK64(arg2) == 7)
-        do_sb(GET_OFFSET(arg2, -7), (uint8_t)(arg1 >> 56), mem_idx);
+        do_sb(env, GET_OFFSET(arg2, -7), (uint8_t)(arg1 >> 56), mem_idx);
 }
 #endif /* TARGET_MIPS64 */
 
 static const int multiple_regs[] = { 16, 17, 18, 19, 20, 21, 22, 23, 30 };
 
-void helper_lwm (target_ulong addr, target_ulong reglist, uint32_t mem_idx)
+void helper_lwm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
 #ifdef CONFIG_USER_ONLY
 #undef ldfun
-#define ldfun ldl_raw
+#define ldfun(env, addr) ldl_raw(addr)
 #else
-    uint32_t (*ldfun)(target_ulong);
+    uint32_t (*ldfun)(CPUMIPSState *env, target_ulong);
 
     switch (mem_idx)
     {
-    case 0: ldfun = ldl_kernel; break;
-    case 1: ldfun = ldl_super; break;
+    case 0: ldfun = cpu_ldl_kernel; break;
+    case 1: ldfun = cpu_ldl_super; break;
     default:
-    case 2: ldfun = ldl_user; break;
+    case 2: ldfun = cpu_ldl_user; break;
     }
 #endif
 
@@ -564,32 +598,33 @@
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            env->active_tc.gpr[multiple_regs[i]] = (target_long) ldfun(addr);
+            env->active_tc.gpr[multiple_regs[i]] = (target_long)ldfun(env, addr);
             addr += 4;
         }
     }
 
     if (do_r31) {
-        env->active_tc.gpr[31] = (target_long) ldfun(addr);
+        env->active_tc.gpr[31] = (target_long)ldfun(env, addr);
     }
 }
 
-void helper_swm (target_ulong addr, target_ulong reglist, uint32_t mem_idx)
+void helper_swm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
 #ifdef CONFIG_USER_ONLY
 #undef stfun
-#define stfun stl_raw
+#define stfun(env, addr, val) stl_raw(addr, val)
 #else
-    void (*stfun)(target_ulong, uint32_t);
+    void (*stfun)(CPUMIPSState *env, target_ulong, uint32_t);
 
     switch (mem_idx)
     {
-    case 0: stfun = stl_kernel; break;
-    case 1: stfun = stl_super; break;
+    case 0: stfun = cpu_stl_kernel; break;
+    case 1: stfun = cpu_stl_super; break;
      default:
-    case 2: stfun = stl_user; break;
+    case 2: stfun = cpu_stl_user; break;
     }
 #endif
 
@@ -597,33 +632,34 @@
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            stfun(addr, env->active_tc.gpr[multiple_regs[i]]);
+            stfun(env, addr, env->active_tc.gpr[multiple_regs[i]]);
             addr += 4;
         }
     }
 
     if (do_r31) {
-        stfun(addr, env->active_tc.gpr[31]);
+        stfun(env, addr, env->active_tc.gpr[31]);
     }
 }
 
 #if defined(TARGET_MIPS64)
-void helper_ldm (target_ulong addr, target_ulong reglist, uint32_t mem_idx)
+void helper_ldm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
 #ifdef CONFIG_USER_ONLY
 #undef ldfun
-#define ldfun ldq_raw
+#define ldfun(env, addr) ldq_raw(addr)
 #else
-    uint64_t (*ldfun)(target_ulong);
+    uint64_t (*ldfun)(CPUMIPSState *env, target_ulong);
 
     switch (mem_idx)
     {
-    case 0: ldfun = ldq_kernel; break;
-    case 1: ldfun = ldq_super; break;
+    case 0: ldfun = cpu_ldq_kernel; break;
+    case 1: ldfun = cpu_ldq_super; break;
     default:
-    case 2: ldfun = ldq_user; break;
+    case 2: ldfun = cpu_ldq_user; break;
     }
 #endif
 
@@ -631,32 +667,33 @@
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            env->active_tc.gpr[multiple_regs[i]] = ldfun(addr);
+            env->active_tc.gpr[multiple_regs[i]] = ldfun(env, addr);
             addr += 8;
         }
     }
 
     if (do_r31) {
-        env->active_tc.gpr[31] = ldfun(addr);
+        env->active_tc.gpr[31] = ldfun(env, addr);
     }
 }
 
-void helper_sdm (target_ulong addr, target_ulong reglist, uint32_t mem_idx)
+void helper_sdm(CPUMIPSState *env, target_ulong addr, target_ulong reglist,
+                uint32_t mem_idx)
 {
     target_ulong base_reglist = reglist & 0xf;
     target_ulong do_r31 = reglist & 0x10;
 #ifdef CONFIG_USER_ONLY
 #undef stfun
-#define stfun stq_raw
+#define stfun(env, addr, val) stq_raw(addr, val)
 #else
-    void (*stfun)(target_ulong, uint64_t);
+    void (*stfun)(CPUMIPSState *env, target_ulong, uint64_t);
 
     switch (mem_idx)
     {
-    case 0: stfun = stq_kernel; break;
-    case 1: stfun = stq_super; break;
+    case 0: stfun = cpu_stq_kernel; break;
+    case 1: stfun = cpu_stq_super; break;
      default:
-    case 2: stfun = stq_user; break;
+    case 2: stfun = cpu_stq_user; break;
     }
 #endif
 
@@ -664,13 +701,13 @@
         target_ulong i;
 
         for (i = 0; i < base_reglist; i++) {
-            stfun(addr, env->active_tc.gpr[multiple_regs[i]]);
+            stfun(env, addr, env->active_tc.gpr[multiple_regs[i]]);
             addr += 8;
         }
     }
 
     if (do_r31) {
-        stfun(addr, env->active_tc.gpr[31]);
+        stfun(env, addr, env->active_tc.gpr[31]);
     }
 }
 #endif
@@ -723,7 +760,7 @@
    FIXME: This code assumes that all VPEs have the same number of TCs,
           which depends on runtime setup. Can probably be fixed by
           walking the list of CPUMIPSStates.  */
-static CPUMIPSState *mips_cpu_map_tc(int *tc)
+static CPUMIPSState *mips_cpu_map_tc(CPUMIPSState *env, int *tc)
 {
     CPUMIPSState *other;
     int vpe_idx, nr_threads = env->nr_threads;
@@ -750,7 +787,7 @@
    These helper call synchronizes the regs for a given cpu.  */
 
 /* Called for updates to CP0_Status.  */
-static void sync_c0_status(CPUMIPSState *cpu, int tc)
+static void sync_c0_status(CPUMIPSState *env, CPUMIPSState *cpu, int tc)
 {
     int32_t tcstatus, *tcst;
     uint32_t v = cpu->CP0_Status;
@@ -785,7 +822,8 @@
 }
 
 /* Called for updates to CP0_TCStatus.  */
-static void sync_c0_tcstatus(CPUMIPSState *cpu, int tc, target_ulong v)
+static void sync_c0_tcstatus(CPUMIPSState *cpu, int tc,
+                             target_ulong v)
 {
     uint32_t status;
     uint32_t tcu, tmx, tasid, tksu;
@@ -834,35 +872,35 @@
 }
 
 /* CP0 helpers */
-target_ulong helper_mfc0_mvpcontrol (void)
+target_ulong helper_mfc0_mvpcontrol(CPUMIPSState *env)
 {
     return env->mvp->CP0_MVPControl;
 }
 
-target_ulong helper_mfc0_mvpconf0 (void)
+target_ulong helper_mfc0_mvpconf0(CPUMIPSState *env)
 {
     return env->mvp->CP0_MVPConf0;
 }
 
-target_ulong helper_mfc0_mvpconf1 (void)
+target_ulong helper_mfc0_mvpconf1(CPUMIPSState *env)
 {
     return env->mvp->CP0_MVPConf1;
 }
 
-target_ulong helper_mfc0_random (void)
+target_ulong helper_mfc0_random(CPUMIPSState *env)
 {
     return (int32_t)cpu_mips_get_random(env);
 }
 
-target_ulong helper_mfc0_tcstatus (void)
+target_ulong helper_mfc0_tcstatus(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCStatus;
 }
 
-target_ulong helper_mftc0_tcstatus(void)
+target_ulong helper_mftc0_tcstatus(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCStatus;
@@ -870,15 +908,15 @@
         return other->tcs[other_tc].CP0_TCStatus;
 }
 
-target_ulong helper_mfc0_tcbind (void)
+target_ulong helper_mfc0_tcbind(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCBind;
 }
 
-target_ulong helper_mftc0_tcbind(void)
+target_ulong helper_mftc0_tcbind(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCBind;
@@ -886,15 +924,15 @@
         return other->tcs[other_tc].CP0_TCBind;
 }
 
-target_ulong helper_mfc0_tcrestart (void)
+target_ulong helper_mfc0_tcrestart(CPUMIPSState *env)
 {
     return env->active_tc.PC;
 }
 
-target_ulong helper_mftc0_tcrestart(void)
+target_ulong helper_mftc0_tcrestart(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.PC;
@@ -902,15 +940,15 @@
         return other->tcs[other_tc].PC;
 }
 
-target_ulong helper_mfc0_tchalt (void)
+target_ulong helper_mfc0_tchalt(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCHalt;
 }
 
-target_ulong helper_mftc0_tchalt(void)
+target_ulong helper_mftc0_tchalt(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCHalt;
@@ -918,15 +956,15 @@
         return other->tcs[other_tc].CP0_TCHalt;
 }
 
-target_ulong helper_mfc0_tccontext (void)
+target_ulong helper_mfc0_tccontext(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCContext;
 }
 
-target_ulong helper_mftc0_tccontext(void)
+target_ulong helper_mftc0_tccontext(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCContext;
@@ -934,15 +972,15 @@
         return other->tcs[other_tc].CP0_TCContext;
 }
 
-target_ulong helper_mfc0_tcschedule (void)
+target_ulong helper_mfc0_tcschedule(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCSchedule;
 }
 
-target_ulong helper_mftc0_tcschedule(void)
+target_ulong helper_mftc0_tcschedule(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCSchedule;
@@ -950,15 +988,15 @@
         return other->tcs[other_tc].CP0_TCSchedule;
 }
 
-target_ulong helper_mfc0_tcschefback (void)
+target_ulong helper_mfc0_tcschefback(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCScheFBack;
 }
 
-target_ulong helper_mftc0_tcschefback(void)
+target_ulong helper_mftc0_tcschefback(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.CP0_TCScheFBack;
@@ -966,24 +1004,24 @@
         return other->tcs[other_tc].CP0_TCScheFBack;
 }
 
-target_ulong helper_mfc0_count (void)
+target_ulong helper_mfc0_count(CPUMIPSState *env)
 {
     return (int32_t)cpu_mips_get_count(env);
 }
 
-target_ulong helper_mftc0_entryhi(void)
+target_ulong helper_mftc0_entryhi(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     return other->CP0_EntryHi;
 }
 
-target_ulong helper_mftc0_cause(void)
+target_ulong helper_mftc0_cause(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     int32_t tccause;
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc) {
         tccause = other->CP0_Cause;
@@ -994,30 +1032,30 @@
     return tccause;
 }
 
-target_ulong helper_mftc0_status(void)
+target_ulong helper_mftc0_status(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     return other->CP0_Status;
 }
 
-target_ulong helper_mfc0_lladdr (void)
+target_ulong helper_mfc0_lladdr(CPUMIPSState *env)
 {
     return (int32_t)(env->lladdr >> env->CP0_LLAddr_shift);
 }
 
-target_ulong helper_mfc0_watchlo (uint32_t sel)
+target_ulong helper_mfc0_watchlo(CPUMIPSState *env, uint32_t sel)
 {
     return (int32_t)env->CP0_WatchLo[sel];
 }
 
-target_ulong helper_mfc0_watchhi (uint32_t sel)
+target_ulong helper_mfc0_watchhi(CPUMIPSState *env, uint32_t sel)
 {
     return env->CP0_WatchHi[sel];
 }
 
-target_ulong helper_mfc0_debug (void)
+target_ulong helper_mfc0_debug(CPUMIPSState *env)
 {
     target_ulong t0 = env->CP0_Debug;
     if (env->hflags & MIPS_HFLAG_DM)
@@ -1026,11 +1064,11 @@
     return t0;
 }
 
-target_ulong helper_mftc0_debug(void)
+target_ulong helper_mftc0_debug(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     int32_t tcstatus;
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         tcstatus = other->active_tc.CP0_Debug_tcstatus;
@@ -1043,43 +1081,43 @@
 }
 
 #if defined(TARGET_MIPS64)
-target_ulong helper_dmfc0_tcrestart (void)
+target_ulong helper_dmfc0_tcrestart(CPUMIPSState *env)
 {
     return env->active_tc.PC;
 }
 
-target_ulong helper_dmfc0_tchalt (void)
+target_ulong helper_dmfc0_tchalt(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCHalt;
 }
 
-target_ulong helper_dmfc0_tccontext (void)
+target_ulong helper_dmfc0_tccontext(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCContext;
 }
 
-target_ulong helper_dmfc0_tcschedule (void)
+target_ulong helper_dmfc0_tcschedule(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCSchedule;
 }
 
-target_ulong helper_dmfc0_tcschefback (void)
+target_ulong helper_dmfc0_tcschefback(CPUMIPSState *env)
 {
     return env->active_tc.CP0_TCScheFBack;
 }
 
-target_ulong helper_dmfc0_lladdr (void)
+target_ulong helper_dmfc0_lladdr(CPUMIPSState *env)
 {
     return env->lladdr >> env->CP0_LLAddr_shift;
 }
 
-target_ulong helper_dmfc0_watchlo (uint32_t sel)
+target_ulong helper_dmfc0_watchlo(CPUMIPSState *env, uint32_t sel)
 {
     return env->CP0_WatchLo[sel];
 }
 #endif /* TARGET_MIPS64 */
 
-void helper_mtc0_index (target_ulong arg1)
+void helper_mtc0_index(CPUMIPSState *env, target_ulong arg1)
 {
     int num = 1;
     unsigned int tmp = env->tlb->nb_tlb;
@@ -1091,7 +1129,7 @@
     env->CP0_Index = (env->CP0_Index & 0x80000000) | (arg1 & (num - 1));
 }
 
-void helper_mtc0_mvpcontrol (target_ulong arg1)
+void helper_mtc0_mvpcontrol(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0;
     uint32_t newval;
@@ -1108,7 +1146,7 @@
     env->mvp->CP0_MVPControl = newval;
 }
 
-void helper_mtc0_vpecontrol (target_ulong arg1)
+void helper_mtc0_vpecontrol(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask;
     uint32_t newval;
@@ -1125,10 +1163,10 @@
     env->CP0_VPEControl = newval;
 }
 
-void helper_mttc0_vpecontrol(target_ulong arg1)
+void helper_mttc0_vpecontrol(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
     uint32_t mask;
     uint32_t newval;
 
@@ -1141,23 +1179,23 @@
     other->CP0_VPEControl = newval;
 }
 
-target_ulong helper_mftc0_vpecontrol(void)
+target_ulong helper_mftc0_vpecontrol(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
     /* FIXME: Mask away return zero on read bits.  */
     return other->CP0_VPEControl;
 }
 
-target_ulong helper_mftc0_vpeconf0(void)
+target_ulong helper_mftc0_vpeconf0(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     return other->CP0_VPEConf0;
 }
 
-void helper_mtc0_vpeconf0 (target_ulong arg1)
+void helper_mtc0_vpeconf0(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0;
     uint32_t newval;
@@ -1174,10 +1212,10 @@
     env->CP0_VPEConf0 = newval;
 }
 
-void helper_mttc0_vpeconf0(target_ulong arg1)
+void helper_mttc0_vpeconf0(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
     uint32_t mask = 0;
     uint32_t newval;
 
@@ -1188,7 +1226,7 @@
     other->CP0_VPEConf0 = newval;
 }
 
-void helper_mtc0_vpeconf1 (target_ulong arg1)
+void helper_mtc0_vpeconf1(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = 0;
     uint32_t newval;
@@ -1206,25 +1244,25 @@
     env->CP0_VPEConf1 = newval;
 }
 
-void helper_mtc0_yqmask (target_ulong arg1)
+void helper_mtc0_yqmask(CPUMIPSState *env, target_ulong arg1)
 {
     /* Yield qualifier inputs not implemented. */
     env->CP0_YQMask = 0x00000000;
 }
 
-void helper_mtc0_vpeopt (target_ulong arg1)
+void helper_mtc0_vpeopt(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_VPEOpt = arg1 & 0x0000ffff;
 }
 
-void helper_mtc0_entrylo0 (target_ulong arg1)
+void helper_mtc0_entrylo0(CPUMIPSState *env, target_ulong arg1)
 {
     /* Large physaddr (PABITS) not implemented */
     /* 1k pages not implemented */
     env->CP0_EntryLo0 = arg1 & 0x3FFFFFFF;
 }
 
-void helper_mtc0_tcstatus (target_ulong arg1)
+void helper_mtc0_tcstatus(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = env->CP0_TCStatus_rw_bitmask;
     uint32_t newval;
@@ -1235,10 +1273,10 @@
     sync_c0_tcstatus(env, env->current_tc, newval);
 }
 
-void helper_mttc0_tcstatus (target_ulong arg1)
+void helper_mttc0_tcstatus(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.CP0_TCStatus = arg1;
@@ -1247,7 +1285,7 @@
     sync_c0_tcstatus(other, other_tc, arg1);
 }
 
-void helper_mtc0_tcbind (target_ulong arg1)
+void helper_mtc0_tcbind(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = (1 << CP0TCBd_TBE);
     uint32_t newval;
@@ -1258,12 +1296,12 @@
     env->active_tc.CP0_TCBind = newval;
 }
 
-void helper_mttc0_tcbind (target_ulong arg1)
+void helper_mttc0_tcbind(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     uint32_t mask = (1 << CP0TCBd_TBE);
     uint32_t newval;
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other->mvp->CP0_MVPControl & (1 << CP0MVPCo_VPC))
         mask |= (1 << CP0TCBd_CurVPE);
@@ -1276,7 +1314,7 @@
     }
 }
 
-void helper_mtc0_tcrestart (target_ulong arg1)
+void helper_mtc0_tcrestart(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.PC = arg1;
     env->active_tc.CP0_TCStatus &= ~(1 << CP0TCSt_TDS);
@@ -1284,10 +1322,10 @@
     /* MIPS16 not implemented. */
 }
 
-void helper_mttc0_tcrestart (target_ulong arg1)
+void helper_mttc0_tcrestart(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc) {
         other->active_tc.PC = arg1;
@@ -1302,7 +1340,7 @@
     }
 }
 
-void helper_mtc0_tchalt (target_ulong arg1)
+void helper_mtc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCHalt = arg1 & 0x1;
 
@@ -1314,10 +1352,10 @@
     }
 }
 
-void helper_mttc0_tchalt (target_ulong arg1)
+void helper_mttc0_tchalt(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     // TODO: Halt TC / Restart (if allocated+active) TC.
 
@@ -1333,15 +1371,15 @@
     }
 }
 
-void helper_mtc0_tccontext (target_ulong arg1)
+void helper_mtc0_tccontext(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCContext = arg1;
 }
 
-void helper_mttc0_tccontext (target_ulong arg1)
+void helper_mttc0_tccontext(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.CP0_TCContext = arg1;
@@ -1349,15 +1387,15 @@
         other->tcs[other_tc].CP0_TCContext = arg1;
 }
 
-void helper_mtc0_tcschedule (target_ulong arg1)
+void helper_mtc0_tcschedule(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCSchedule = arg1;
 }
 
-void helper_mttc0_tcschedule (target_ulong arg1)
+void helper_mttc0_tcschedule(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.CP0_TCSchedule = arg1;
@@ -1365,15 +1403,15 @@
         other->tcs[other_tc].CP0_TCSchedule = arg1;
 }
 
-void helper_mtc0_tcschefback (target_ulong arg1)
+void helper_mtc0_tcschefback(CPUMIPSState *env, target_ulong arg1)
 {
     env->active_tc.CP0_TCScheFBack = arg1;
 }
 
-void helper_mttc0_tcschefback (target_ulong arg1)
+void helper_mttc0_tcschefback(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.CP0_TCScheFBack = arg1;
@@ -1381,25 +1419,25 @@
         other->tcs[other_tc].CP0_TCScheFBack = arg1;
 }
 
-void helper_mtc0_entrylo1 (target_ulong arg1)
+void helper_mtc0_entrylo1(CPUMIPSState *env, target_ulong arg1)
 {
     /* Large physaddr (PABITS) not implemented */
     /* 1k pages not implemented */
     env->CP0_EntryLo1 = arg1 & 0x3FFFFFFF;
 }
 
-void helper_mtc0_context (target_ulong arg1)
+void helper_mtc0_context(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Context = (env->CP0_Context & 0x007FFFFF) | (arg1 & ~0x007FFFFF);
 }
 
-void helper_mtc0_pagemask (target_ulong arg1)
+void helper_mtc0_pagemask(CPUMIPSState *env, target_ulong arg1)
 {
     /* 1k pages not implemented */
     env->CP0_PageMask = arg1 & (0x1FFFFFFF & (TARGET_PAGE_MASK << 1));
 }
 
-void helper_mtc0_pagegrain (target_ulong arg1)
+void helper_mtc0_pagegrain(CPUMIPSState *env, target_ulong arg1)
 {
     /* SmartMIPS not implemented */
     /* Large physaddr (PABITS) not implemented */
@@ -1407,47 +1445,47 @@
     env->CP0_PageGrain = 0;
 }
 
-void helper_mtc0_wired (target_ulong arg1)
+void helper_mtc0_wired(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Wired = arg1 % env->tlb->nb_tlb;
 }
 
-void helper_mtc0_srsconf0 (target_ulong arg1)
+void helper_mtc0_srsconf0(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf0 |= arg1 & env->CP0_SRSConf0_rw_bitmask;
 }
 
-void helper_mtc0_srsconf1 (target_ulong arg1)
+void helper_mtc0_srsconf1(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf1 |= arg1 & env->CP0_SRSConf1_rw_bitmask;
 }
 
-void helper_mtc0_srsconf2 (target_ulong arg1)
+void helper_mtc0_srsconf2(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf2 |= arg1 & env->CP0_SRSConf2_rw_bitmask;
 }
 
-void helper_mtc0_srsconf3 (target_ulong arg1)
+void helper_mtc0_srsconf3(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf3 |= arg1 & env->CP0_SRSConf3_rw_bitmask;
 }
 
-void helper_mtc0_srsconf4 (target_ulong arg1)
+void helper_mtc0_srsconf4(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_SRSConf4 |= arg1 & env->CP0_SRSConf4_rw_bitmask;
 }
 
-void helper_mtc0_hwrena (target_ulong arg1)
+void helper_mtc0_hwrena(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_HWREna = arg1 & 0x0000000F;
 }
 
-void helper_mtc0_count (target_ulong arg1)
+void helper_mtc0_count(CPUMIPSState *env, target_ulong arg1)
 {
     cpu_mips_store_count(env, arg1);
 }
 
-void helper_mtc0_entryhi (target_ulong arg1)
+void helper_mtc0_entryhi(CPUMIPSState *env, target_ulong arg1)
 {
     target_ulong old, val;
 
@@ -1466,21 +1504,21 @@
         cpu_mips_tlb_flush(env, 1);
 }
 
-void helper_mttc0_entryhi(target_ulong arg1)
+void helper_mttc0_entryhi(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     other->CP0_EntryHi = arg1;
     sync_c0_entryhi(other, other_tc);
 }
 
-void helper_mtc0_compare (target_ulong arg1)
+void helper_mtc0_compare(CPUMIPSState *env, target_ulong arg1)
 {
     cpu_mips_store_compare(env, arg1);
 }
 
-void helper_mtc0_status (target_ulong arg1)
+void helper_mtc0_status(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t val, old;
     uint32_t mask = env->CP0_Status_rw_bitmask;
@@ -1489,7 +1527,7 @@
     old = env->CP0_Status;
     env->CP0_Status = (env->CP0_Status & ~mask) | val;
     if (env->CP0_Config3 & (1 << CP0C3_MT)) {
-        sync_c0_status(env, env->current_tc);
+        sync_c0_status(env, env, env->current_tc);
     } else {
         compute_hflags(env);
     }
@@ -1508,22 +1546,22 @@
     }
 }
 
-void helper_mttc0_status(target_ulong arg1)
+void helper_mttc0_status(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     other->CP0_Status = arg1 & ~0xf1000018;
-    sync_c0_status(other, other_tc);
+    sync_c0_status(env, other, other_tc);
 }
 
-void helper_mtc0_intctl (target_ulong arg1)
+void helper_mtc0_intctl(CPUMIPSState *env, target_ulong arg1)
 {
     /* vectored interrupts not implemented, no performance counters. */
     env->CP0_IntCtl = (env->CP0_IntCtl & ~0x000003e0) | (arg1 & 0x000003e0);
 }
 
-void helper_mtc0_srsctl (target_ulong arg1)
+void helper_mtc0_srsctl(CPUMIPSState *env, target_ulong arg1)
 {
     uint32_t mask = (0xf << CP0SRSCtl_ESS) | (0xf << CP0SRSCtl_PSS);
     env->CP0_SRSCtl = (env->CP0_SRSCtl & ~mask) | (arg1 & mask);
@@ -1557,52 +1595,52 @@
     }
 }
 
-void helper_mtc0_cause(target_ulong arg1)
+void helper_mtc0_cause(CPUMIPSState *env, target_ulong arg1)
 {
     mtc0_cause(env, arg1);
 }
 
-void helper_mttc0_cause(target_ulong arg1)
+void helper_mttc0_cause(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     mtc0_cause(other, arg1);
 }
 
-target_ulong helper_mftc0_epc(void)
+target_ulong helper_mftc0_epc(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     return other->CP0_EPC;
 }
 
-target_ulong helper_mftc0_ebase(void)
+target_ulong helper_mftc0_ebase(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     return other->CP0_EBase;
 }
 
-void helper_mtc0_ebase (target_ulong arg1)
+void helper_mtc0_ebase(CPUMIPSState *env, target_ulong arg1)
 {
     /* vectored interrupts not implemented */
     env->CP0_EBase = (env->CP0_EBase & ~0x3FFFF000) | (arg1 & 0x3FFFF000);
 }
 
-void helper_mttc0_ebase(target_ulong arg1)
+void helper_mttc0_ebase(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
     other->CP0_EBase = (other->CP0_EBase & ~0x3FFFF000) | (arg1 & 0x3FFFF000);
 }
 
-target_ulong helper_mftc0_configx(target_ulong idx)
+target_ulong helper_mftc0_configx(CPUMIPSState *env, target_ulong idx)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     switch (idx) {
     case 0: return other->CP0_Config0;
@@ -1618,49 +1656,49 @@
     return 0;
 }
 
-void helper_mtc0_config0 (target_ulong arg1)
+void helper_mtc0_config0(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Config0 = (env->CP0_Config0 & 0x81FFFFF8) | (arg1 & 0x00000007);
 }
 
-void helper_mtc0_config2 (target_ulong arg1)
+void helper_mtc0_config2(CPUMIPSState *env, target_ulong arg1)
 {
     /* tertiary/secondary caches not implemented */
     env->CP0_Config2 = (env->CP0_Config2 & 0x8FFF0FFF);
 }
 
-void helper_mtc0_lladdr (target_ulong arg1)
+void helper_mtc0_lladdr(CPUMIPSState *env, target_ulong arg1)
 {
     target_long mask = env->CP0_LLAddr_rw_bitmask;
     arg1 = arg1 << env->CP0_LLAddr_shift;
     env->lladdr = (env->lladdr & ~mask) | (arg1 & mask);
 }
 
-void helper_mtc0_watchlo (target_ulong arg1, uint32_t sel)
+void helper_mtc0_watchlo(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     /* Watch exceptions for instructions, data loads, data stores
        not implemented. */
     env->CP0_WatchLo[sel] = (arg1 & ~0x7);
 }
 
-void helper_mtc0_watchhi (target_ulong arg1, uint32_t sel)
+void helper_mtc0_watchhi(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     env->CP0_WatchHi[sel] = (arg1 & 0x40FF0FF8);
     env->CP0_WatchHi[sel] &= ~(env->CP0_WatchHi[sel] & arg1 & 0x7);
 }
 
-void helper_mtc0_xcontext (target_ulong arg1)
+void helper_mtc0_xcontext(CPUMIPSState *env, target_ulong arg1)
 {
     target_ulong mask = (1ULL << (env->SEGBITS - 7)) - 1;
     env->CP0_XContext = (env->CP0_XContext & mask) | (arg1 & ~mask);
 }
 
-void helper_mtc0_framemask (target_ulong arg1)
+void helper_mtc0_framemask(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Framemask = arg1; /* XXX */
 }
 
-void helper_mtc0_debug (target_ulong arg1)
+void helper_mtc0_debug(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Debug = (env->CP0_Debug & 0x8C03FC1F) | (arg1 & 0x13300120);
     if (arg1 & (1 << CP0DB_DM))
@@ -1669,11 +1707,11 @@
         env->hflags &= ~MIPS_HFLAG_DM;
 }
 
-void helper_mttc0_debug(target_ulong arg1)
+void helper_mttc0_debug(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
     uint32_t val = arg1 & ((1 << CP0DB_SSt) | (1 << CP0DB_Halt));
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     /* XXX: Might be wrong, check with EJTAG spec. */
     if (other_tc == other->current_tc)
@@ -1685,36 +1723,36 @@
                      (arg1 & ~((1 << CP0DB_SSt) | (1 << CP0DB_Halt)));
 }
 
-void helper_mtc0_performance0 (target_ulong arg1)
+void helper_mtc0_performance0(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_Performance0 = arg1 & 0x000007ff;
 }
 
-void helper_mtc0_taglo (target_ulong arg1)
+void helper_mtc0_taglo(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_TagLo = arg1 & 0xFFFFFCF6;
 }
 
-void helper_mtc0_datalo (target_ulong arg1)
+void helper_mtc0_datalo(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_DataLo = arg1; /* XXX */
 }
 
-void helper_mtc0_taghi (target_ulong arg1)
+void helper_mtc0_taghi(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_TagHi = arg1; /* XXX */
 }
 
-void helper_mtc0_datahi (target_ulong arg1)
+void helper_mtc0_datahi(CPUMIPSState *env, target_ulong arg1)
 {
     env->CP0_DataHi = arg1; /* XXX */
 }
 
 /* MIPS MT functions */
-target_ulong helper_mftgpr(uint32_t sel)
+target_ulong helper_mftgpr(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.gpr[sel];
@@ -1722,10 +1760,10 @@
         return other->tcs[other_tc].gpr[sel];
 }
 
-target_ulong helper_mftlo(uint32_t sel)
+target_ulong helper_mftlo(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.LO[sel];
@@ -1733,10 +1771,10 @@
         return other->tcs[other_tc].LO[sel];
 }
 
-target_ulong helper_mfthi(uint32_t sel)
+target_ulong helper_mfthi(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.HI[sel];
@@ -1744,10 +1782,10 @@
         return other->tcs[other_tc].HI[sel];
 }
 
-target_ulong helper_mftacx(uint32_t sel)
+target_ulong helper_mftacx(CPUMIPSState *env, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.ACX[sel];
@@ -1755,10 +1793,10 @@
         return other->tcs[other_tc].ACX[sel];
 }
 
-target_ulong helper_mftdsp(void)
+target_ulong helper_mftdsp(CPUMIPSState *env)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         return other->active_tc.DSPControl;
@@ -1766,10 +1804,10 @@
         return other->tcs[other_tc].DSPControl;
 }
 
-void helper_mttgpr(target_ulong arg1, uint32_t sel)
+void helper_mttgpr(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.gpr[sel] = arg1;
@@ -1777,10 +1815,10 @@
         other->tcs[other_tc].gpr[sel] = arg1;
 }
 
-void helper_mttlo(target_ulong arg1, uint32_t sel)
+void helper_mttlo(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.LO[sel] = arg1;
@@ -1788,10 +1826,10 @@
         other->tcs[other_tc].LO[sel] = arg1;
 }
 
-void helper_mtthi(target_ulong arg1, uint32_t sel)
+void helper_mtthi(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.HI[sel] = arg1;
@@ -1799,10 +1837,10 @@
         other->tcs[other_tc].HI[sel] = arg1;
 }
 
-void helper_mttacx(target_ulong arg1, uint32_t sel)
+void helper_mttacx(CPUMIPSState *env, target_ulong arg1, uint32_t sel)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.ACX[sel] = arg1;
@@ -1810,10 +1848,10 @@
         other->tcs[other_tc].ACX[sel] = arg1;
 }
 
-void helper_mttdsp(target_ulong arg1)
+void helper_mttdsp(CPUMIPSState *env, target_ulong arg1)
 {
     int other_tc = env->CP0_VPEControl & (0xff << CP0VPECo_TargTC);
-    CPUMIPSState *other = mips_cpu_map_tc(&other_tc);
+    CPUMIPSState *other = mips_cpu_map_tc(env, &other_tc);
 
     if (other_tc == other->current_tc)
         other->active_tc.DSPControl = arg1;
@@ -1834,7 +1872,7 @@
     return 0;
 }
 
-target_ulong helper_dvpe(void)
+target_ulong helper_dvpe(CPUMIPSState *env)
 {
     CPUMIPSState *other_cpu = first_cpu;
     target_ulong prev = env->mvp->CP0_MVPControl;
@@ -1850,7 +1888,7 @@
     return prev;
 }
 
-target_ulong helper_evpe(void)
+target_ulong helper_evpe(CPUMIPSState *env)
 {
     CPUMIPSState *other_cpu = first_cpu;
     target_ulong prev = env->mvp->CP0_MVPControl;
@@ -1876,7 +1914,7 @@
     // TODO: store to TC register
 }
 
-target_ulong helper_yield(target_ulong arg)
+target_ulong helper_yield(CPUMIPSState *env, target_ulong arg)
 {
     target_long arg1 = arg;
 
@@ -1887,13 +1925,13 @@
                 env->active_tc.CP0_TCStatus & (1 << CP0TCSt_DT)) {
                 env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
                 env->CP0_VPEControl |= 4 << CP0VPECo_EXCPT;
-                helper_raise_exception(EXCP_THREAD);
+                helper_raise_exception(env, EXCP_THREAD);
             }
         }
     } else if (arg1 == 0) {
         if (0 /* TODO: TC underflow */) {
             env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
-            helper_raise_exception(EXCP_THREAD);
+            helper_raise_exception(env, EXCP_THREAD);
         } else {
             // TODO: Deallocate TC
         }
@@ -1901,7 +1939,7 @@
         /* Yield qualifier inputs not implemented. */
         env->CP0_VPEControl &= ~(0x7 << CP0VPECo_EXCPT);
         env->CP0_VPEControl |= 2 << CP0VPECo_EXCPT;
-        helper_raise_exception(EXCP_THREAD);
+        helper_raise_exception(env, EXCP_THREAD);
     }
     return env->CP0_YQMask;
 }
@@ -1923,7 +1961,7 @@
     }
 }
 
-static void r4k_fill_tlb (int idx)
+static void r4k_fill_tlb(CPUMIPSState *env, int idx)
 {
     r4k_tlb_t *tlb;
 
@@ -1946,7 +1984,7 @@
     tlb->PFN[1] = (env->CP0_EntryLo1 >> 6) << 12;
 }
 
-void r4k_helper_tlbwi (void)
+void r4k_helper_tlbwi(CPUMIPSState *env)
 {
     int idx;
 
@@ -1958,18 +1996,18 @@
     r4k_mips_tlb_flush_extra (env, env->tlb->nb_tlb);
 
     r4k_invalidate_tlb(env, idx, 0);
-    r4k_fill_tlb(idx);
+    r4k_fill_tlb(env, idx);
 }
 
-void r4k_helper_tlbwr (void)
+void r4k_helper_tlbwr(CPUMIPSState *env)
 {
     int r = cpu_mips_get_random(env);
 
     r4k_invalidate_tlb(env, r, 1);
-    r4k_fill_tlb(r);
+    r4k_fill_tlb(env, r);
 }
 
-void r4k_helper_tlbp (void)
+void r4k_helper_tlbp(CPUMIPSState *env)
 {
     r4k_tlb_t *tlb;
     target_ulong mask;
@@ -2011,7 +2049,7 @@
     }
 }
 
-void r4k_helper_tlbr (void)
+void r4k_helper_tlbr(CPUMIPSState *env)
 {
     r4k_tlb_t *tlb;
     uint8_t ASID;
@@ -2035,28 +2073,28 @@
                         (tlb->C1 << 3) | (tlb->PFN[1] >> 6);
 }
 
-void helper_tlbwi(void)
+void helper_tlbwi(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbwi();
+    env->tlb->helper_tlbwi(env);
 }
 
-void helper_tlbwr(void)
+void helper_tlbwr(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbwr();
+    env->tlb->helper_tlbwr(env);
 }
 
-void helper_tlbp(void)
+void helper_tlbp(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbp();
+    env->tlb->helper_tlbp(env);
 }
 
-void helper_tlbr(void)
+void helper_tlbr(CPUMIPSState *env)
 {
-    env->tlb->helper_tlbr();
+    env->tlb->helper_tlbr(env);
 }
 
 /* Specials */
-target_ulong helper_di (void)
+target_ulong helper_di(CPUMIPSState *env)
 {
     target_ulong t0 = env->CP0_Status;
 
@@ -2064,7 +2102,7 @@
     return t0;
 }
 
-target_ulong helper_ei (void)
+target_ulong helper_ei(CPUMIPSState *env)
 {
     target_ulong t0 = env->CP0_Status;
 
@@ -2072,7 +2110,7 @@
     return t0;
 }
 
-static void debug_pre_eret (void)
+static void debug_pre_eret(CPUMIPSState *env)
 {
     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
         qemu_log("ERET: PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx,
@@ -2085,7 +2123,7 @@
     }
 }
 
-static void debug_post_eret (void)
+static void debug_post_eret(CPUMIPSState *env)
 {
     if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
         qemu_log("  =>  PC " TARGET_FMT_lx " EPC " TARGET_FMT_lx,
@@ -2103,7 +2141,7 @@
     }
 }
 
-static void set_pc (target_ulong error_pc)
+static void set_pc(CPUMIPSState *env, target_ulong error_pc)
 {
     env->active_tc.PC = error_pc & ~(target_ulong)1;
     if (error_pc & 1) {
@@ -2113,78 +2151,78 @@
     }
 }
 
-void helper_eret (void)
+void helper_eret(CPUMIPSState *env)
 {
-    debug_pre_eret();
+    debug_pre_eret(env);
     if (env->CP0_Status & (1 << CP0St_ERL)) {
-        set_pc(env->CP0_ErrorEPC);
+        set_pc(env, env->CP0_ErrorEPC);
         env->CP0_Status &= ~(1 << CP0St_ERL);
     } else {
-        set_pc(env->CP0_EPC);
+        set_pc(env, env->CP0_EPC);
         env->CP0_Status &= ~(1 << CP0St_EXL);
     }
     compute_hflags(env);
-    debug_post_eret();
+    debug_post_eret(env);
     env->lladdr = 1;
 }
 
-void helper_deret (void)
+void helper_deret(CPUMIPSState *env)
 {
-    debug_pre_eret();
-    set_pc(env->CP0_DEPC);
+    debug_pre_eret(env);
+    set_pc(env, env->CP0_DEPC);
 
     env->hflags &= MIPS_HFLAG_DM;
     compute_hflags(env);
-    debug_post_eret();
+    debug_post_eret(env);
     env->lladdr = 1;
 }
 #endif /* !CONFIG_USER_ONLY */
 
-target_ulong helper_rdhwr_cpunum(void)
+target_ulong helper_rdhwr_cpunum(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 0)))
         return env->CP0_EBase & 0x3ff;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-target_ulong helper_rdhwr_synci_step(void)
+target_ulong helper_rdhwr_synci_step(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 1)))
         return env->SYNCI_Step;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-target_ulong helper_rdhwr_cc(void)
+target_ulong helper_rdhwr_cc(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 2)))
         return env->CP0_Count;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-target_ulong helper_rdhwr_ccres(void)
+target_ulong helper_rdhwr_ccres(CPUMIPSState *env)
 {
     if ((env->hflags & MIPS_HFLAG_CP0) ||
         (env->CP0_HWREna & (1 << 3)))
         return env->CCRes;
     else
-        helper_raise_exception(EXCP_RI);
+        helper_raise_exception(env, EXCP_RI);
 
     return 0;
 }
 
-void helper_pmon (int function)
+void helper_pmon(CPUMIPSState *env, int function)
 {
     function /= 2;
     switch (function) {
@@ -2210,16 +2248,17 @@
     }
 }
 
-void helper_wait (void)
+void helper_wait(CPUMIPSState *env)
 {
     env->halted = 1;
     cpu_reset_interrupt(env, CPU_INTERRUPT_WAKE);
-    helper_raise_exception(EXCP_HLT);
+    helper_raise_exception(env, EXCP_HLT);
 }
 
 #if !defined(CONFIG_USER_ONLY)
 
-static void QEMU_NORETURN do_unaligned_access(target_ulong addr, int is_write,
+static void QEMU_NORETURN do_unaligned_access(CPUMIPSState *env,
+                                              target_ulong addr, int is_write,
                                               int is_user, uintptr_t retaddr);
 
 #define MMUSUFFIX _mmu
@@ -2237,23 +2276,20 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
-static void do_unaligned_access(target_ulong addr, int is_write,
-                                int is_user, uintptr_t retaddr)
+static void do_unaligned_access(CPUMIPSState *env, target_ulong addr,
+                                int is_write, int is_user, uintptr_t retaddr)
 {
     env->CP0_BadVAddr = addr;
-    do_restore_state (retaddr);
-    helper_raise_exception ((is_write == 1) ? EXCP_AdES : EXCP_AdEL);
+    do_restore_state(env, retaddr);
+    helper_raise_exception(env, (is_write == 1) ? EXCP_AdES : EXCP_AdEL);
 }
 
-void tlb_fill(CPUMIPSState *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUMIPSState *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUMIPSState *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
     ret = cpu_mips_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
         if (retaddr) {
@@ -2265,20 +2301,17 @@
                 cpu_restore_state(tb, env, retaddr);
             }
         }
-        helper_raise_exception_err(env->exception_index, env->error_code);
+        helper_raise_exception_err(env, env->exception_index, env->error_code);
     }
-    env = saved_env;
 }
 
-void cpu_unassigned_access(CPUMIPSState *env1, target_phys_addr_t addr,
+void cpu_unassigned_access(CPUMIPSState *env, target_phys_addr_t addr,
                            int is_write, int is_exec, int unused, int size)
 {
-    env = env1;
-
     if (is_exec)
-        helper_raise_exception(EXCP_IBE);
+        helper_raise_exception(env, EXCP_IBE);
     else
-        helper_raise_exception(EXCP_DBE);
+        helper_raise_exception(env, EXCP_DBE);
 }
 #endif /* !CONFIG_USER_ONLY */
 
@@ -2307,7 +2340,7 @@
 #define RESTORE_FLUSH_MODE \
     set_flush_to_zero((env->active_fpu.fcr31 & (1 << 24)) != 0, &env->active_fpu.fp_status);
 
-target_ulong helper_cfc1 (uint32_t reg)
+target_ulong helper_cfc1(CPUMIPSState *env, uint32_t reg)
 {
     target_ulong arg1;
 
@@ -2332,7 +2365,7 @@
     return arg1;
 }
 
-void helper_ctc1 (target_ulong arg1, uint32_t reg)
+void helper_ctc1(CPUMIPSState *env, target_ulong arg1, uint32_t reg)
 {
     switch(reg) {
     case 25:
@@ -2366,7 +2399,7 @@
     RESTORE_FLUSH_MODE;
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     if ((GET_FP_ENABLE(env->active_fpu.fcr31) | 0x20) & GET_FP_CAUSE(env->active_fpu.fcr31))
-        helper_raise_exception(EXCP_FPE);
+        helper_raise_exception(env, EXCP_FPE);
 }
 
 static inline int ieee_ex_to_mips(int xcpt)
@@ -2392,13 +2425,13 @@
     return ret;
 }
 
-static inline void update_fcr31(void)
+static inline void update_fcr31(CPUMIPSState *env)
 {
     int tmp = ieee_ex_to_mips(get_float_exception_flags(&env->active_fpu.fp_status));
 
     SET_FP_CAUSE(env->active_fpu.fcr31, tmp);
     if (GET_FP_ENABLE(env->active_fpu.fcr31) & tmp)
-        helper_raise_exception(EXCP_FPE);
+        helper_raise_exception(env, EXCP_FPE);
     else
         UPDATE_FP_FLAGS(env->active_fpu.fcr31, tmp);
 }
@@ -2409,71 +2442,71 @@
    paired single lower "pl", paired single upper "pu".  */
 
 /* unary operations, modifying fp status  */
-uint64_t helper_float_sqrt_d(uint64_t fdt0)
+uint64_t helper_float_sqrt_d(CPUMIPSState *env, uint64_t fdt0)
 {
     return float64_sqrt(fdt0, &env->active_fpu.fp_status);
 }
 
-uint32_t helper_float_sqrt_s(uint32_t fst0)
+uint32_t helper_float_sqrt_s(CPUMIPSState *env, uint32_t fst0)
 {
     return float32_sqrt(fst0, &env->active_fpu.fp_status);
 }
 
-uint64_t helper_float_cvtd_s(uint32_t fst0)
+uint64_t helper_float_cvtd_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float32_to_float64(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint64_t helper_float_cvtd_w(uint32_t wt0)
+uint64_t helper_float_cvtd_w(CPUMIPSState *env, uint32_t wt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = int32_to_float64(wt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint64_t helper_float_cvtd_l(uint64_t dt0)
+uint64_t helper_float_cvtd_l(CPUMIPSState *env, uint64_t dt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = int64_to_float64(dt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint64_t helper_float_cvtl_d(uint64_t fdt0)
+uint64_t helper_float_cvtl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_cvtl_s(uint32_t fst0)
+uint64_t helper_float_cvtl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_cvtps_pw(uint64_t dt0)
+uint64_t helper_float_cvtps_pw(CPUMIPSState *env, uint64_t dt0)
 {
     uint32_t fst2;
     uint32_t fsth2;
@@ -2481,11 +2514,11 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int32_to_float32(dt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     fsth2 = int32_to_float32(dt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_cvtpw_ps(uint64_t fdt0)
+uint64_t helper_float_cvtpw_ps(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
     uint32_t wth2;
@@ -2493,7 +2526,7 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     wth2 = float32_to_int32(fdt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID)) {
         wt2 = FLOAT_SNAN32;
         wth2 = FLOAT_SNAN32;
@@ -2501,81 +2534,81 @@
     return ((uint64_t)wth2 << 32) | wt2;
 }
 
-uint32_t helper_float_cvts_d(uint64_t fdt0)
+uint32_t helper_float_cvts_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float64_to_float32(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint32_t helper_float_cvts_w(uint32_t wt0)
+uint32_t helper_float_cvts_w(CPUMIPSState *env, uint32_t wt0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int32_to_float32(wt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint32_t helper_float_cvts_l(uint64_t dt0)
+uint32_t helper_float_cvts_l(CPUMIPSState *env, uint64_t dt0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = int64_to_float32(dt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint32_t helper_float_cvts_pl(uint32_t wt0)
+uint32_t helper_float_cvts_pl(CPUMIPSState *env, uint32_t wt0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = wt0;
-    update_fcr31();
+    update_fcr31(env);
     return wt2;
 }
 
-uint32_t helper_float_cvts_pu(uint32_t wth0)
+uint32_t helper_float_cvts_pu(CPUMIPSState *env, uint32_t wth0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = wth0;
-    update_fcr31();
+    update_fcr31(env);
     return wt2;
 }
 
-uint32_t helper_float_cvtw_s(uint32_t fst0)
+uint32_t helper_float_cvtw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_cvtw_d(uint64_t fdt0)
+uint32_t helper_float_cvtw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_roundl_d(uint64_t fdt0)
+uint64_t helper_float_roundl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
@@ -2583,13 +2616,13 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_roundl_s(uint32_t fst0)
+uint64_t helper_float_roundl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
@@ -2597,13 +2630,13 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_roundw_d(uint64_t fdt0)
+uint32_t helper_float_roundw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
@@ -2611,13 +2644,13 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_roundw_s(uint32_t fst0)
+uint32_t helper_float_roundw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
@@ -2625,61 +2658,61 @@
     set_float_rounding_mode(float_round_nearest_even, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_truncl_d(uint64_t fdt0)
+uint64_t helper_float_truncl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float64_to_int64_round_to_zero(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_truncl_s(uint32_t fst0)
+uint64_t helper_float_truncl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     dt2 = float32_to_int64_round_to_zero(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_truncw_d(uint64_t fdt0)
+uint32_t helper_float_truncw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float64_to_int32_round_to_zero(fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_truncw_s(uint32_t fst0)
+uint32_t helper_float_truncw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     wt2 = float32_to_int32_round_to_zero(fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_ceill_d(uint64_t fdt0)
+uint64_t helper_float_ceill_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
@@ -2687,13 +2720,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_ceill_s(uint32_t fst0)
+uint64_t helper_float_ceill_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
@@ -2701,13 +2734,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_ceilw_d(uint64_t fdt0)
+uint32_t helper_float_ceilw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
@@ -2715,13 +2748,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_ceilw_s(uint32_t fst0)
+uint32_t helper_float_ceilw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
@@ -2729,13 +2762,13 @@
     set_float_rounding_mode(float_round_up, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint64_t helper_float_floorl_d(uint64_t fdt0)
+uint64_t helper_float_floorl_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t dt2;
 
@@ -2743,13 +2776,13 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     dt2 = float64_to_int64(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint64_t helper_float_floorl_s(uint32_t fst0)
+uint64_t helper_float_floorl_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint64_t dt2;
 
@@ -2757,13 +2790,13 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     dt2 = float32_to_int64(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         dt2 = FLOAT_SNAN64;
     return dt2;
 }
 
-uint32_t helper_float_floorw_d(uint64_t fdt0)
+uint32_t helper_float_floorw_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t wt2;
 
@@ -2771,13 +2804,13 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     wt2 = float64_to_int32(fdt0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
 }
 
-uint32_t helper_float_floorw_s(uint32_t fst0)
+uint32_t helper_float_floorw_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t wt2;
 
@@ -2785,7 +2818,7 @@
     set_float_rounding_mode(float_round_down, &env->active_fpu.fp_status);
     wt2 = float32_to_int32(fst0, &env->active_fpu.fp_status);
     RESTORE_ROUNDING_MODE;
-    update_fcr31();
+    update_fcr31(env);
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & (FP_OVERFLOW | FP_INVALID))
         wt2 = FLOAT_SNAN32;
     return wt2;
@@ -2815,69 +2848,69 @@
 #undef FLOAT_UNOP
 
 /* MIPS specific unary operations */
-uint64_t helper_float_recip_d(uint64_t fdt0)
+uint64_t helper_float_recip_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_recip_s(uint32_t fst0)
+uint32_t helper_float_recip_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_rsqrt_d(uint64_t fdt0)
+uint64_t helper_float_rsqrt_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_rsqrt_s(uint32_t fst0)
+uint32_t helper_float_rsqrt_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_recip1_d(uint64_t fdt0)
+uint64_t helper_float_recip1_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_recip1_s(uint32_t fst0)
+uint32_t helper_float_recip1_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst0, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_recip1_ps(uint64_t fdt0)
+uint64_t helper_float_recip1_ps(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t fst2;
     uint32_t fsth2;
@@ -2885,33 +2918,33 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fdt0 & 0XFFFFFFFF, &env->active_fpu.fp_status);
     fsth2 = float32_div(FLOAT_ONE32, fdt0 >> 32, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_rsqrt1_d(uint64_t fdt0)
+uint64_t helper_float_rsqrt1_d(CPUMIPSState *env, uint64_t fdt0)
 {
     uint64_t fdt2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_sqrt(fdt0, &env->active_fpu.fp_status);
     fdt2 = float64_div(FLOAT_ONE64, fdt2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_rsqrt1_s(uint32_t fst0)
+uint32_t helper_float_rsqrt1_s(CPUMIPSState *env, uint32_t fst0)
 {
     uint32_t fst2;
 
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_sqrt(fst0, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_rsqrt1_ps(uint64_t fdt0)
+uint64_t helper_float_rsqrt1_ps(CPUMIPSState *env, uint64_t fdt0)
 {
     uint32_t fst2;
     uint32_t fsth2;
@@ -2921,39 +2954,43 @@
     fsth2 = float32_sqrt(fdt0 >> 32, &env->active_fpu.fp_status);
     fst2 = float32_div(FLOAT_ONE32, fst2, &env->active_fpu.fp_status);
     fsth2 = float32_div(FLOAT_ONE32, fsth2, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-#define FLOAT_OP(name, p) void helper_float_##name##_##p(void)
+#define FLOAT_OP(name, p) void helper_float_##name##_##p(CPUMIPSState *env)
 
 /* binary operations */
 #define FLOAT_BINOP(name)                                          \
-uint64_t helper_float_ ## name ## _d(uint64_t fdt0, uint64_t fdt1)     \
+uint64_t helper_float_ ## name ## _d(CPUMIPSState *env,            \
+                                     uint64_t fdt0, uint64_t fdt1) \
 {                                                                  \
     uint64_t dt2;                                                  \
                                                                    \
     set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     dt2 = float64_ ## name (fdt0, fdt1, &env->active_fpu.fp_status);     \
-    update_fcr31();                                                \
+    update_fcr31(env);                                             \
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID)                \
         dt2 = FLOAT_QNAN64;                                        \
     return dt2;                                                    \
 }                                                                  \
                                                                    \
-uint32_t helper_float_ ## name ## _s(uint32_t fst0, uint32_t fst1)     \
+uint32_t helper_float_ ## name ## _s(CPUMIPSState *env,            \
+                                     uint32_t fst0, uint32_t fst1) \
 {                                                                  \
     uint32_t wt2;                                                  \
                                                                    \
     set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     wt2 = float32_ ## name (fst0, fst1, &env->active_fpu.fp_status);     \
-    update_fcr31();                                                \
+    update_fcr31(env);                                             \
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID)                \
         wt2 = FLOAT_QNAN32;                                        \
     return wt2;                                                    \
 }                                                                  \
                                                                    \
-uint64_t helper_float_ ## name ## _ps(uint64_t fdt0, uint64_t fdt1)    \
+uint64_t helper_float_ ## name ## _ps(CPUMIPSState *env,           \
+                                      uint64_t fdt0,               \
+                                      uint64_t fdt1)               \
 {                                                                  \
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;                             \
     uint32_t fsth0 = fdt0 >> 32;                                   \
@@ -2965,7 +3002,7 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);            \
     wt2 = float32_ ## name (fst0, fst1, &env->active_fpu.fp_status);     \
     wth2 = float32_ ## name (fsth0, fsth1, &env->active_fpu.fp_status);  \
-    update_fcr31();                                                \
+    update_fcr31(env);                                             \
     if (GET_FP_CAUSE(env->active_fpu.fcr31) & FP_INVALID) {              \
         wt2 = FLOAT_QNAN32;                                        \
         wth2 = FLOAT_QNAN32;                                       \
@@ -2981,22 +3018,28 @@
 
 /* ternary operations */
 #define FLOAT_TERNOP(name1, name2)                                        \
-uint64_t helper_float_ ## name1 ## name2 ## _d(uint64_t fdt0, uint64_t fdt1,  \
-                                           uint64_t fdt2)                 \
+uint64_t helper_float_ ## name1 ## name2 ## _d(CPUMIPSState *env,         \
+                                               uint64_t fdt0,             \
+                                               uint64_t fdt1,             \
+                                               uint64_t fdt2)             \
 {                                                                         \
     fdt0 = float64_ ## name1 (fdt0, fdt1, &env->active_fpu.fp_status);          \
     return float64_ ## name2 (fdt0, fdt2, &env->active_fpu.fp_status);          \
 }                                                                         \
                                                                           \
-uint32_t helper_float_ ## name1 ## name2 ## _s(uint32_t fst0, uint32_t fst1,  \
-                                           uint32_t fst2)                 \
+uint32_t helper_float_ ## name1 ## name2 ## _s(CPUMIPSState *env,         \
+                                               uint32_t fst0,             \
+                                               uint32_t fst1,             \
+                                               uint32_t fst2)             \
 {                                                                         \
     fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status);          \
     return float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status);          \
 }                                                                         \
                                                                           \
-uint64_t helper_float_ ## name1 ## name2 ## _ps(uint64_t fdt0, uint64_t fdt1, \
-                                            uint64_t fdt2)                \
+uint64_t helper_float_ ## name1 ## name2 ## _ps(CPUMIPSState *env,        \
+                                                uint64_t fdt0,            \
+                                                uint64_t fdt1,            \
+                                                uint64_t fdt2)            \
 {                                                                         \
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;                                    \
     uint32_t fsth0 = fdt0 >> 32;                                          \
@@ -3018,24 +3061,30 @@
 
 /* negated ternary operations */
 #define FLOAT_NTERNOP(name1, name2)                                       \
-uint64_t helper_float_n ## name1 ## name2 ## _d(uint64_t fdt0, uint64_t fdt1, \
-                                           uint64_t fdt2)                 \
+uint64_t helper_float_n ## name1 ## name2 ## _d(CPUMIPSState *env,        \
+                                                uint64_t fdt0,            \
+                                                uint64_t fdt1,            \
+                                                uint64_t fdt2)            \
 {                                                                         \
     fdt0 = float64_ ## name1 (fdt0, fdt1, &env->active_fpu.fp_status);          \
     fdt2 = float64_ ## name2 (fdt0, fdt2, &env->active_fpu.fp_status);          \
     return float64_chs(fdt2);                                             \
 }                                                                         \
                                                                           \
-uint32_t helper_float_n ## name1 ## name2 ## _s(uint32_t fst0, uint32_t fst1, \
-                                           uint32_t fst2)                 \
+uint32_t helper_float_n ## name1 ## name2 ## _s(CPUMIPSState *env,        \
+                                                uint32_t fst0,            \
+                                                uint32_t fst1,            \
+                                                uint32_t fst2)            \
 {                                                                         \
     fst0 = float32_ ## name1 (fst0, fst1, &env->active_fpu.fp_status);          \
     fst2 = float32_ ## name2 (fst0, fst2, &env->active_fpu.fp_status);          \
     return float32_chs(fst2);                                             \
 }                                                                         \
                                                                           \
-uint64_t helper_float_n ## name1 ## name2 ## _ps(uint64_t fdt0, uint64_t fdt1,\
-                                           uint64_t fdt2)                 \
+uint64_t helper_float_n ## name1 ## name2 ## _ps(CPUMIPSState *env,       \
+                                                 uint64_t fdt0,           \
+                                                 uint64_t fdt1,           \
+                                                 uint64_t fdt2)           \
 {                                                                         \
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;                                    \
     uint32_t fsth0 = fdt0 >> 32;                                          \
@@ -3058,25 +3107,25 @@
 #undef FLOAT_NTERNOP
 
 /* MIPS specific binary operations */
-uint64_t helper_float_recip2_d(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_recip2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
     fdt2 = float64_chs(float64_sub(fdt2, FLOAT_ONE64, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_recip2_s(uint32_t fst0, uint32_t fst2)
+uint32_t helper_float_recip2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_recip2_ps(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_recip2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -3088,31 +3137,31 @@
     fsth2 = float32_mul(fsth0, fsth2, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status));
     fsth2 = float32_chs(float32_sub(fsth2, FLOAT_ONE32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_rsqrt2_d(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_rsqrt2_d(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fdt2 = float64_mul(fdt0, fdt2, &env->active_fpu.fp_status);
     fdt2 = float64_sub(fdt2, FLOAT_ONE64, &env->active_fpu.fp_status);
     fdt2 = float64_chs(float64_div(fdt2, FLOAT_TWO64, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fdt2;
 }
 
-uint32_t helper_float_rsqrt2_s(uint32_t fst0, uint32_t fst2)
+uint32_t helper_float_rsqrt2_s(CPUMIPSState *env, uint32_t fst0, uint32_t fst2)
 {
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul(fst0, fst2, &env->active_fpu.fp_status);
     fst2 = float32_sub(fst2, FLOAT_ONE32, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return fst2;
 }
 
-uint64_t helper_float_rsqrt2_ps(uint64_t fdt0, uint64_t fdt2)
+uint64_t helper_float_rsqrt2_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt2)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -3126,11 +3175,11 @@
     fsth2 = float32_sub(fsth2, FLOAT_ONE32, &env->active_fpu.fp_status);
     fst2 = float32_chs(float32_div(fst2, FLOAT_TWO32, &env->active_fpu.fp_status));
     fsth2 = float32_chs(float32_div(fsth2, FLOAT_TWO32, &env->active_fpu.fp_status));
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_addr_ps(uint64_t fdt0, uint64_t fdt1)
+uint64_t helper_float_addr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -3142,11 +3191,11 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_add (fst0, fsth0, &env->active_fpu.fp_status);
     fsth2 = float32_add (fst1, fsth1, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
-uint64_t helper_float_mulr_ps(uint64_t fdt0, uint64_t fdt1)
+uint64_t helper_float_mulr_ps(CPUMIPSState *env, uint64_t fdt0, uint64_t fdt1)
 {
     uint32_t fst0 = fdt0 & 0XFFFFFFFF;
     uint32_t fsth0 = fdt0 >> 32;
@@ -3158,31 +3207,33 @@
     set_float_exception_flags(0, &env->active_fpu.fp_status);
     fst2 = float32_mul (fst0, fsth0, &env->active_fpu.fp_status);
     fsth2 = float32_mul (fst1, fsth1, &env->active_fpu.fp_status);
-    update_fcr31();
+    update_fcr31(env);
     return ((uint64_t)fsth2 << 32) | fst2;
 }
 
 /* compare operations */
 #define FOP_COND_D(op, cond)                                   \
-void helper_cmp_d_ ## op (uint64_t fdt0, uint64_t fdt1, int cc)    \
+void helper_cmp_d_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
+                         uint64_t fdt1, int cc)                \
 {                                                              \
     int c;                                                     \
     set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     c = cond;                                                  \
-    update_fcr31();                                            \
+    update_fcr31(env);                                         \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
         CLEAR_FP_COND(cc, env->active_fpu);                    \
 }                                                              \
-void helper_cmpabs_d_ ## op (uint64_t fdt0, uint64_t fdt1, int cc) \
+void helper_cmpabs_d_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
+                            uint64_t fdt1, int cc)             \
 {                                                              \
     int c;                                                     \
     set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     fdt0 = float64_abs(fdt0);                                  \
     fdt1 = float64_abs(fdt1);                                  \
     c = cond;                                                  \
-    update_fcr31();                                            \
+    update_fcr31(env);                                         \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
@@ -3211,25 +3262,27 @@
 FOP_COND_D(ngt, float64_unordered(fdt1, fdt0, &env->active_fpu.fp_status)  || float64_le(fdt0, fdt1, &env->active_fpu.fp_status))
 
 #define FOP_COND_S(op, cond)                                   \
-void helper_cmp_s_ ## op (uint32_t fst0, uint32_t fst1, int cc)    \
+void helper_cmp_s_ ## op(CPUMIPSState *env, uint32_t fst0,     \
+                         uint32_t fst1, int cc)                \
 {                                                              \
     int c;                                                     \
     set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     c = cond;                                                  \
-    update_fcr31();                                            \
+    update_fcr31(env);                                         \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
         CLEAR_FP_COND(cc, env->active_fpu);                    \
 }                                                              \
-void helper_cmpabs_s_ ## op (uint32_t fst0, uint32_t fst1, int cc) \
+void helper_cmpabs_s_ ## op(CPUMIPSState *env, uint32_t fst0,  \
+                            uint32_t fst1, int cc)             \
 {                                                              \
     int c;                                                     \
     set_float_exception_flags(0, &env->active_fpu.fp_status);  \
     fst0 = float32_abs(fst0);                                  \
     fst1 = float32_abs(fst1);                                  \
     c = cond;                                                  \
-    update_fcr31();                                            \
+    update_fcr31(env);                                         \
     if (c)                                                     \
         SET_FP_COND(cc, env->active_fpu);                      \
     else                                                       \
@@ -3258,7 +3311,8 @@
 FOP_COND_S(ngt, float32_unordered(fst1, fst0, &env->active_fpu.fp_status)  || float32_le(fst0, fst1, &env->active_fpu.fp_status))
 
 #define FOP_COND_PS(op, condl, condh)                           \
-void helper_cmp_ps_ ## op (uint64_t fdt0, uint64_t fdt1, int cc)    \
+void helper_cmp_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,     \
+                          uint64_t fdt1, int cc)                \
 {                                                               \
     uint32_t fst0, fsth0, fst1, fsth1;                          \
     int ch, cl;                                                 \
@@ -3269,7 +3323,7 @@
     fsth1 = fdt1 >> 32;                                         \
     cl = condl;                                                 \
     ch = condh;                                                 \
-    update_fcr31();                                             \
+    update_fcr31(env);                                          \
     if (cl)                                                     \
         SET_FP_COND(cc, env->active_fpu);                       \
     else                                                        \
@@ -3279,7 +3333,8 @@
     else                                                        \
         CLEAR_FP_COND(cc + 1, env->active_fpu);                 \
 }                                                               \
-void helper_cmpabs_ps_ ## op (uint64_t fdt0, uint64_t fdt1, int cc) \
+void helper_cmpabs_ps_ ## op(CPUMIPSState *env, uint64_t fdt0,  \
+                             uint64_t fdt1, int cc)             \
 {                                                               \
     uint32_t fst0, fsth0, fst1, fsth1;                          \
     int ch, cl;                                                 \
@@ -3289,7 +3344,7 @@
     fsth1 = float32_abs(fdt1 >> 32);                            \
     cl = condl;                                                 \
     ch = condh;                                                 \
-    update_fcr31();                                             \
+    update_fcr31(env);                                          \
     if (cl)                                                     \
         SET_FP_COND(cc, env->active_fpu);                       \
     else                                                        \
diff --git a/target-mips/translate.c b/target-mips/translate.c
index a884f75..52eeb2b 100644
--- a/target-mips/translate.c
+++ b/target-mips/translate.c
@@ -483,27 +483,45 @@
 
 #include "gen-icount.h"
 
-#define gen_helper_0i(name, arg) do {                             \
+#define gen_helper_0e0i(name, arg) do {                           \
     TCGv_i32 helper_tmp = tcg_const_i32(arg);                     \
-    gen_helper_##name(helper_tmp);                                \
+    gen_helper_##name(cpu_env, helper_tmp);                       \
     tcg_temp_free_i32(helper_tmp);                                \
     } while(0)
 
-#define gen_helper_1i(name, arg1, arg2) do {                      \
+#define gen_helper_0e1i(name, arg1, arg2) do {                    \
     TCGv_i32 helper_tmp = tcg_const_i32(arg2);                    \
-    gen_helper_##name(arg1, helper_tmp);                          \
+    gen_helper_##name(cpu_env, arg1, helper_tmp);                 \
     tcg_temp_free_i32(helper_tmp);                                \
     } while(0)
 
-#define gen_helper_2i(name, arg1, arg2, arg3) do {                \
+#define gen_helper_1e0i(name, ret, arg1) do {                     \
+    TCGv_i32 helper_tmp = tcg_const_i32(arg1);                    \
+    gen_helper_##name(ret, cpu_env, helper_tmp);                  \
+    tcg_temp_free_i32(helper_tmp);                                \
+    } while(0)
+
+#define gen_helper_1e1i(name, ret, arg1, arg2) do {               \
+    TCGv_i32 helper_tmp = tcg_const_i32(arg2);                    \
+    gen_helper_##name(ret, cpu_env, arg1, helper_tmp);            \
+    tcg_temp_free_i32(helper_tmp);                                \
+    } while(0)
+
+#define gen_helper_0e2i(name, arg1, arg2, arg3) do {              \
     TCGv_i32 helper_tmp = tcg_const_i32(arg3);                    \
-    gen_helper_##name(arg1, arg2, helper_tmp);                    \
+    gen_helper_##name(cpu_env, arg1, arg2, helper_tmp);           \
     tcg_temp_free_i32(helper_tmp);                                \
     } while(0)
 
-#define gen_helper_3i(name, arg1, arg2, arg3, arg4) do {          \
+#define gen_helper_1e2i(name, ret, arg1, arg2, arg3) do {         \
+    TCGv_i32 helper_tmp = tcg_const_i32(arg3);                    \
+    gen_helper_##name(ret, cpu_env, arg1, arg2, helper_tmp);      \
+    tcg_temp_free_i32(helper_tmp);                                \
+    } while(0)
+
+#define gen_helper_0e3i(name, arg1, arg2, arg3, arg4) do {        \
     TCGv_i32 helper_tmp = tcg_const_i32(arg4);                    \
-    gen_helper_##name(arg1, arg2, arg3, helper_tmp);              \
+    gen_helper_##name(cpu_env, arg1, arg2, arg3, helper_tmp);     \
     tcg_temp_free_i32(helper_tmp);                                \
     } while(0)
 
@@ -748,7 +766,7 @@
     TCGv_i32 texcp = tcg_const_i32(excp);
     TCGv_i32 terr = tcg_const_i32(err);
     save_cpu_state(ctx, 1);
-    gen_helper_raise_exception_err(texcp, terr);
+    gen_helper_raise_exception_err(cpu_env, texcp, terr);
     tcg_temp_free_i32(terr);
     tcg_temp_free_i32(texcp);
 }
@@ -757,7 +775,7 @@
 generate_exception (DisasContext *ctx, int excp)
 {
     save_cpu_state(ctx, 1);
-    gen_helper_0i(raise_exception, excp);
+    gen_helper_0e0i(raise_exception, excp);
 }
 
 /* Addresses computation */
@@ -871,22 +889,22 @@
     gen_ldcmp_fpr##bits (ctx, fp0, fs);                                       \
     gen_ldcmp_fpr##bits (ctx, fp1, ft);                                       \
     switch (n) {                                                              \
-    case  0: gen_helper_2i(cmp ## type ## _ ## fmt ## _f, fp0, fp1, cc);    break;\
-    case  1: gen_helper_2i(cmp ## type ## _ ## fmt ## _un, fp0, fp1, cc);   break;\
-    case  2: gen_helper_2i(cmp ## type ## _ ## fmt ## _eq, fp0, fp1, cc);   break;\
-    case  3: gen_helper_2i(cmp ## type ## _ ## fmt ## _ueq, fp0, fp1, cc);  break;\
-    case  4: gen_helper_2i(cmp ## type ## _ ## fmt ## _olt, fp0, fp1, cc);  break;\
-    case  5: gen_helper_2i(cmp ## type ## _ ## fmt ## _ult, fp0, fp1, cc);  break;\
-    case  6: gen_helper_2i(cmp ## type ## _ ## fmt ## _ole, fp0, fp1, cc);  break;\
-    case  7: gen_helper_2i(cmp ## type ## _ ## fmt ## _ule, fp0, fp1, cc);  break;\
-    case  8: gen_helper_2i(cmp ## type ## _ ## fmt ## _sf, fp0, fp1, cc);   break;\
-    case  9: gen_helper_2i(cmp ## type ## _ ## fmt ## _ngle, fp0, fp1, cc); break;\
-    case 10: gen_helper_2i(cmp ## type ## _ ## fmt ## _seq, fp0, fp1, cc);  break;\
-    case 11: gen_helper_2i(cmp ## type ## _ ## fmt ## _ngl, fp0, fp1, cc);  break;\
-    case 12: gen_helper_2i(cmp ## type ## _ ## fmt ## _lt, fp0, fp1, cc);   break;\
-    case 13: gen_helper_2i(cmp ## type ## _ ## fmt ## _nge, fp0, fp1, cc);  break;\
-    case 14: gen_helper_2i(cmp ## type ## _ ## fmt ## _le, fp0, fp1, cc);   break;\
-    case 15: gen_helper_2i(cmp ## type ## _ ## fmt ## _ngt, fp0, fp1, cc);  break;\
+    case  0: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _f, fp0, fp1, cc);    break;\
+    case  1: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _un, fp0, fp1, cc);   break;\
+    case  2: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _eq, fp0, fp1, cc);   break;\
+    case  3: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ueq, fp0, fp1, cc);  break;\
+    case  4: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _olt, fp0, fp1, cc);  break;\
+    case  5: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ult, fp0, fp1, cc);  break;\
+    case  6: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ole, fp0, fp1, cc);  break;\
+    case  7: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ule, fp0, fp1, cc);  break;\
+    case  8: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _sf, fp0, fp1, cc);   break;\
+    case  9: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngle, fp0, fp1, cc); break;\
+    case 10: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _seq, fp0, fp1, cc);  break;\
+    case 11: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngl, fp0, fp1, cc);  break;\
+    case 12: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _lt, fp0, fp1, cc);   break;\
+    case 13: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _nge, fp0, fp1, cc);  break;\
+    case 14: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _le, fp0, fp1, cc);   break;\
+    case 15: gen_helper_0e2i(cmp ## type ## _ ## fmt ## _ngt, fp0, fp1, cc);  break;\
     default: abort();                                                         \
     }                                                                         \
     tcg_temp_free_i##bits (fp0);                                              \
@@ -948,7 +966,7 @@
 #define OP_LD_ATOMIC(insn,fname)                                           \
 static inline void op_ld_##insn(TCGv ret, TCGv arg1, DisasContext *ctx)    \
 {                                                                          \
-    gen_helper_2i(insn, ret, arg1, ctx->mem_idx);                          \
+    gen_helper_1e1i(insn, ret, arg1, ctx->mem_idx);                        \
 }
 #endif
 OP_LD_ATOMIC(ll,ld32s);
@@ -975,7 +993,7 @@
     tcg_gen_movi_tl(t0, rt | ((almask << 3) & 0x20));                        \
     tcg_gen_st_tl(t0, cpu_env, offsetof(CPUMIPSState, llreg));                   \
     tcg_gen_st_tl(arg1, cpu_env, offsetof(CPUMIPSState, llnewval));              \
-    gen_helper_0i(raise_exception, EXCP_SC);                                 \
+    gen_helper_0e0i(raise_exception, EXCP_SC);                               \
     gen_set_label(l2);                                                       \
     tcg_gen_movi_tl(t0, 0);                                                  \
     gen_store_gpr(t0, rt);                                                   \
@@ -986,7 +1004,7 @@
 static inline void op_st_##insn(TCGv arg1, TCGv arg2, int rt, DisasContext *ctx) \
 {                                                                            \
     TCGv t0 = tcg_temp_new();                                                \
-    gen_helper_3i(insn, t0, arg1, arg2, ctx->mem_idx);                       \
+    gen_helper_1e2i(insn, t0, arg1, arg2, ctx->mem_idx);                     \
     gen_store_gpr(t0, rt);                                                   \
     tcg_temp_free(t0);                                                       \
 }
@@ -1066,14 +1084,14 @@
     case OPC_LDL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(ldl, t1, t1, t0, ctx->mem_idx);
+        gen_helper_1e2i(ldl, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "ldl";
         break;
     case OPC_LDR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(ldr, t1, t1, t0, ctx->mem_idx);
+        gen_helper_1e2i(ldr, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "ldr";
         break;
@@ -1127,14 +1145,14 @@
     case OPC_LWL:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(lwl, t1, t1, t0, ctx->mem_idx);
+        gen_helper_1e2i(lwl, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "lwl";
         break;
     case OPC_LWR:
         save_cpu_state(ctx, 1);
         gen_load_gpr(t1, rt);
-        gen_helper_3i(lwr, t1, t1, t0, ctx->mem_idx);
+        gen_helper_1e2i(lwr, t1, t1, t0, ctx->mem_idx);
         gen_store_gpr(t1, rt);
         opn = "lwr";
         break;
@@ -1170,12 +1188,12 @@
         break;
     case OPC_SDL:
         save_cpu_state(ctx, 1);
-        gen_helper_2i(sdl, t1, t0, ctx->mem_idx);
+        gen_helper_0e2i(sdl, t1, t0, ctx->mem_idx);
         opn = "sdl";
         break;
     case OPC_SDR:
         save_cpu_state(ctx, 1);
-        gen_helper_2i(sdr, t1, t0, ctx->mem_idx);
+        gen_helper_0e2i(sdr, t1, t0, ctx->mem_idx);
         opn = "sdr";
         break;
 #endif
@@ -1196,12 +1214,12 @@
         break;
     case OPC_SWL:
         save_cpu_state(ctx, 1);
-        gen_helper_2i(swl, t1, t0, ctx->mem_idx);
+        gen_helper_0e2i(swl, t1, t0, ctx->mem_idx);
         opn = "swl";
         break;
     case OPC_SWR:
         save_cpu_state(ctx, 1);
-        gen_helper_2i(swr, t1, t0, ctx->mem_idx);
+        gen_helper_0e2i(swr, t1, t0, ctx->mem_idx);
         opn = "swr";
         break;
     }
@@ -2138,11 +2156,11 @@
         opn = "ddivu";
         break;
     case OPC_DMULT:
-        gen_helper_dmult(t0, t1);
+        gen_helper_dmult(cpu_env, t0, t1);
         opn = "dmult";
         break;
     case OPC_DMULTU:
-        gen_helper_dmultu(t0, t1);
+        gen_helper_dmultu(cpu_env, t0, t1);
         opn = "dmultu";
         break;
 #endif
@@ -2254,59 +2272,59 @@
 
     switch (opc) {
     case OPC_VR54XX_MULS:
-        gen_helper_muls(t0, t0, t1);
+        gen_helper_muls(t0, cpu_env, t0, t1);
         opn = "muls";
         break;
     case OPC_VR54XX_MULSU:
-        gen_helper_mulsu(t0, t0, t1);
+        gen_helper_mulsu(t0, cpu_env, t0, t1);
         opn = "mulsu";
         break;
     case OPC_VR54XX_MACC:
-        gen_helper_macc(t0, t0, t1);
+        gen_helper_macc(t0, cpu_env, t0, t1);
         opn = "macc";
         break;
     case OPC_VR54XX_MACCU:
-        gen_helper_maccu(t0, t0, t1);
+        gen_helper_maccu(t0, cpu_env, t0, t1);
         opn = "maccu";
         break;
     case OPC_VR54XX_MSAC:
-        gen_helper_msac(t0, t0, t1);
+        gen_helper_msac(t0, cpu_env, t0, t1);
         opn = "msac";
         break;
     case OPC_VR54XX_MSACU:
-        gen_helper_msacu(t0, t0, t1);
+        gen_helper_msacu(t0, cpu_env, t0, t1);
         opn = "msacu";
         break;
     case OPC_VR54XX_MULHI:
-        gen_helper_mulhi(t0, t0, t1);
+        gen_helper_mulhi(t0, cpu_env, t0, t1);
         opn = "mulhi";
         break;
     case OPC_VR54XX_MULHIU:
-        gen_helper_mulhiu(t0, t0, t1);
+        gen_helper_mulhiu(t0, cpu_env, t0, t1);
         opn = "mulhiu";
         break;
     case OPC_VR54XX_MULSHI:
-        gen_helper_mulshi(t0, t0, t1);
+        gen_helper_mulshi(t0, cpu_env, t0, t1);
         opn = "mulshi";
         break;
     case OPC_VR54XX_MULSHIU:
-        gen_helper_mulshiu(t0, t0, t1);
+        gen_helper_mulshiu(t0, cpu_env, t0, t1);
         opn = "mulshiu";
         break;
     case OPC_VR54XX_MACCHI:
-        gen_helper_macchi(t0, t0, t1);
+        gen_helper_macchi(t0, cpu_env, t0, t1);
         opn = "macchi";
         break;
     case OPC_VR54XX_MACCHIU:
-        gen_helper_macchiu(t0, t0, t1);
+        gen_helper_macchiu(t0, cpu_env, t0, t1);
         opn = "macchiu";
         break;
     case OPC_VR54XX_MSACHI:
-        gen_helper_msachi(t0, t0, t1);
+        gen_helper_msachi(t0, cpu_env, t0, t1);
         opn = "msachi";
         break;
     case OPC_VR54XX_MSACHIU:
-        gen_helper_msachiu(t0, t0, t1);
+        gen_helper_msachiu(t0, cpu_env, t0, t1);
         opn = "msachiu";
         break;
     default:
@@ -2683,7 +2701,7 @@
         gen_save_pc(dest);
         if (ctx->singlestep_enabled) {
             save_cpu_state(ctx, 0);
-            gen_helper_0i(raise_exception, EXCP_DEBUG);
+            gen_helper_0e0i(raise_exception, EXCP_DEBUG);
         }
         tcg_gen_exit_tb(0);
     }
@@ -3187,17 +3205,17 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpcontrol(arg);
+            gen_helper_mfc0_mvpcontrol(arg, cpu_env);
             rn = "MVPControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf0(arg);
+            gen_helper_mfc0_mvpconf0(arg, cpu_env);
             rn = "MVPConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf1(arg);
+            gen_helper_mfc0_mvpconf1(arg, cpu_env);
             rn = "MVPConf1";
             break;
         default:
@@ -3207,7 +3225,7 @@
     case 1:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_random(arg);
+            gen_helper_mfc0_random(arg, cpu_env);
             rn = "Random";
             break;
         case 1:
@@ -3258,37 +3276,37 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcstatus(arg);
+            gen_helper_mfc0_tcstatus(arg, cpu_env);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcbind(arg);
+            gen_helper_mfc0_tcbind(arg, cpu_env);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcrestart(arg);
+            gen_helper_mfc0_tcrestart(arg, cpu_env);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tchalt(arg);
+            gen_helper_mfc0_tchalt(arg, cpu_env);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tccontext(arg);
+            gen_helper_mfc0_tccontext(arg, cpu_env);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcschedule(arg);
+            gen_helper_mfc0_tcschedule(arg, cpu_env);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcschefback(arg);
+            gen_helper_mfc0_tcschefback(arg, cpu_env);
             rn = "TCScheFBack";
             break;
         default:
@@ -3399,7 +3417,7 @@
             /* Mark as an IO operation because we read the time.  */
             if (use_icount)
                 gen_io_start();
-            gen_helper_mfc0_count(arg);
+            gen_helper_mfc0_count(arg, cpu_env);
             if (use_icount) {
                 gen_io_end();
             }
@@ -3531,7 +3549,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_lladdr(arg);
+            gen_helper_mfc0_lladdr(arg, cpu_env);
             rn = "LLAddr";
             break;
         default:
@@ -3541,7 +3559,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mfc0_watchlo, arg, sel);
+            gen_helper_1e0i(mfc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -3551,7 +3569,7 @@
     case 19:
         switch (sel) {
         case 0 ...7:
-            gen_helper_1i(mfc0_watchhi, arg, sel);
+            gen_helper_1e0i(mfc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -3590,7 +3608,7 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_debug(arg); /* EJTAG support */
+            gen_helper_mfc0_debug(arg, cpu_env); /* EJTAG support */
             rn = "Debug";
             break;
         case 1:
@@ -3765,12 +3783,12 @@
     case 0:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_index(arg);
+            gen_helper_mtc0_index(cpu_env, arg);
             rn = "Index";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_mvpcontrol(arg);
+            gen_helper_mtc0_mvpcontrol(cpu_env, arg);
             rn = "MVPControl";
             break;
         case 2:
@@ -3795,22 +3813,22 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpecontrol(arg);
+            gen_helper_mtc0_vpecontrol(cpu_env, arg);
             rn = "VPEControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf0(arg);
+            gen_helper_mtc0_vpeconf0(cpu_env, arg);
             rn = "VPEConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf1(arg);
+            gen_helper_mtc0_vpeconf1(cpu_env, arg);
             rn = "VPEConf1";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_yqmask(arg);
+            gen_helper_mtc0_yqmask(cpu_env, arg);
             rn = "YQMask";
             break;
         case 5:
@@ -3825,7 +3843,7 @@
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeopt(arg);
+            gen_helper_mtc0_vpeopt(cpu_env, arg);
             rn = "VPEOpt";
             break;
         default:
@@ -3835,42 +3853,42 @@
     case 2:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo0(arg);
+            gen_helper_mtc0_entrylo0(cpu_env, arg);
             rn = "EntryLo0";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcstatus(arg);
+            gen_helper_mtc0_tcstatus(cpu_env, arg);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcbind(arg);
+            gen_helper_mtc0_tcbind(cpu_env, arg);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcrestart(arg);
+            gen_helper_mtc0_tcrestart(cpu_env, arg);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tchalt(arg);
+            gen_helper_mtc0_tchalt(cpu_env, arg);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tccontext(arg);
+            gen_helper_mtc0_tccontext(cpu_env, arg);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschedule(arg);
+            gen_helper_mtc0_tcschedule(cpu_env, arg);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschefback(arg);
+            gen_helper_mtc0_tcschefback(cpu_env, arg);
             rn = "TCScheFBack";
             break;
         default:
@@ -3880,7 +3898,7 @@
     case 3:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo1(arg);
+            gen_helper_mtc0_entrylo1(cpu_env, arg);
             rn = "EntryLo1";
             break;
         default:
@@ -3890,11 +3908,11 @@
     case 4:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_context(arg);
+            gen_helper_mtc0_context(cpu_env, arg);
             rn = "Context";
             break;
         case 1:
-//            gen_helper_mtc0_contextconfig(arg); /* SmartMIPS ASE */
+//            gen_helper_mtc0_contextconfig(cpu_env, arg); /* SmartMIPS ASE */
             rn = "ContextConfig";
 //            break;
         default:
@@ -3904,12 +3922,12 @@
     case 5:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_pagemask(arg);
+            gen_helper_mtc0_pagemask(cpu_env, arg);
             rn = "PageMask";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_pagegrain(arg);
+            gen_helper_mtc0_pagegrain(cpu_env, arg);
             rn = "PageGrain";
             break;
         default:
@@ -3919,32 +3937,32 @@
     case 6:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_wired(arg);
+            gen_helper_mtc0_wired(cpu_env, arg);
             rn = "Wired";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf0(arg);
+            gen_helper_mtc0_srsconf0(cpu_env, arg);
             rn = "SRSConf0";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf1(arg);
+            gen_helper_mtc0_srsconf1(cpu_env, arg);
             rn = "SRSConf1";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf2(arg);
+            gen_helper_mtc0_srsconf2(cpu_env, arg);
             rn = "SRSConf2";
             break;
         case 4:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf3(arg);
+            gen_helper_mtc0_srsconf3(cpu_env, arg);
             rn = "SRSConf3";
             break;
         case 5:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf4(arg);
+            gen_helper_mtc0_srsconf4(cpu_env, arg);
             rn = "SRSConf4";
             break;
         default:
@@ -3955,7 +3973,7 @@
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_hwrena(arg);
+            gen_helper_mtc0_hwrena(cpu_env, arg);
             rn = "HWREna";
             break;
         default:
@@ -3969,7 +3987,7 @@
     case 9:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_count(arg);
+            gen_helper_mtc0_count(cpu_env, arg);
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
@@ -3980,7 +3998,7 @@
     case 10:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entryhi(arg);
+            gen_helper_mtc0_entryhi(cpu_env, arg);
             rn = "EntryHi";
             break;
         default:
@@ -3990,7 +4008,7 @@
     case 11:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_compare(arg);
+            gen_helper_mtc0_compare(cpu_env, arg);
             rn = "Compare";
             break;
         /* 6,7 are implementation dependent */
@@ -4002,7 +4020,7 @@
         switch (sel) {
         case 0:
             save_cpu_state(ctx, 1);
-            gen_helper_mtc0_status(arg);
+            gen_helper_mtc0_status(cpu_env, arg);
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
@@ -4010,14 +4028,14 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_intctl(arg);
+            gen_helper_mtc0_intctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "IntCtl";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsctl(arg);
+            gen_helper_mtc0_srsctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSCtl";
@@ -4037,7 +4055,7 @@
         switch (sel) {
         case 0:
             save_cpu_state(ctx, 1);
-            gen_helper_mtc0_cause(arg);
+            gen_helper_mtc0_cause(cpu_env, arg);
             rn = "Cause";
             break;
         default:
@@ -4062,7 +4080,7 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_ebase(arg);
+            gen_helper_mtc0_ebase(cpu_env, arg);
             rn = "EBase";
             break;
         default:
@@ -4072,7 +4090,7 @@
     case 16:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_config0(arg);
+            gen_helper_mtc0_config0(cpu_env, arg);
             rn = "Config";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -4082,7 +4100,7 @@
             rn = "Config1";
             break;
         case 2:
-            gen_helper_mtc0_config2(arg);
+            gen_helper_mtc0_config2(cpu_env, arg);
             rn = "Config2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -4109,7 +4127,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_lladdr(arg);
+            gen_helper_mtc0_lladdr(cpu_env, arg);
             rn = "LLAddr";
             break;
         default:
@@ -4119,7 +4137,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchlo, arg, sel);
+            gen_helper_0e1i(mtc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -4129,7 +4147,7 @@
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchhi, arg, sel);
+            gen_helper_0e1i(mtc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -4141,7 +4159,7 @@
         case 0:
 #if defined(TARGET_MIPS64)
             check_insn(env, ctx, ISA_MIPS3);
-            gen_helper_mtc0_xcontext(arg);
+            gen_helper_mtc0_xcontext(cpu_env, arg);
             rn = "XContext";
             break;
 #endif
@@ -4153,7 +4171,7 @@
        /* Officially reserved, but sel 0 is used for R1x000 framemask */
         switch (sel) {
         case 0:
-            gen_helper_mtc0_framemask(arg);
+            gen_helper_mtc0_framemask(cpu_env, arg);
             rn = "Framemask";
             break;
         default:
@@ -4167,20 +4185,20 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_debug(arg); /* EJTAG support */
+            gen_helper_mtc0_debug(cpu_env, arg); /* EJTAG support */
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_mtc0_tracecontrol(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol(cpu_env, arg); /* PDtrace support */
             rn = "TraceControl";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
 //            break;
         case 2:
-//            gen_helper_mtc0_tracecontrol2(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol2(cpu_env, arg); /* PDtrace support */
             rn = "TraceControl2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -4188,13 +4206,13 @@
         case 3:
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
-//            gen_helper_mtc0_usertracedata(arg); /* PDtrace support */
+//            gen_helper_mtc0_usertracedata(cpu_env, arg); /* PDtrace support */
             rn = "UserTraceData";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
 //            break;
         case 4:
-//            gen_helper_mtc0_tracebpc(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracebpc(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceBPC";
@@ -4217,7 +4235,7 @@
     case 25:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_performance0(arg);
+            gen_helper_mtc0_performance0(cpu_env, arg);
             rn = "Performance0";
             break;
         case 1:
@@ -4272,14 +4290,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taglo(arg);
+            gen_helper_mtc0_taglo(cpu_env, arg);
             rn = "TagLo";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datalo(arg);
+            gen_helper_mtc0_datalo(cpu_env, arg);
             rn = "DataLo";
             break;
         default:
@@ -4292,14 +4310,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taghi(arg);
+            gen_helper_mtc0_taghi(cpu_env, arg);
             rn = "TagHi";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datahi(arg);
+            gen_helper_mtc0_datahi(cpu_env, arg);
             rn = "DataHi";
             break;
         default:
@@ -4364,17 +4382,17 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpcontrol(arg);
+            gen_helper_mfc0_mvpcontrol(arg, cpu_env);
             rn = "MVPControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf0(arg);
+            gen_helper_mfc0_mvpconf0(arg, cpu_env);
             rn = "MVPConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_mvpconf1(arg);
+            gen_helper_mfc0_mvpconf1(arg, cpu_env);
             rn = "MVPConf1";
             break;
         default:
@@ -4384,7 +4402,7 @@
     case 1:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_random(arg);
+            gen_helper_mfc0_random(arg, cpu_env);
             rn = "Random";
             break;
         case 1:
@@ -4434,37 +4452,37 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcstatus(arg);
+            gen_helper_mfc0_tcstatus(arg, cpu_env);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mfc0_tcbind(arg);
+            gen_helper_mfc0_tcbind(arg, cpu_env);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcrestart(arg);
+            gen_helper_dmfc0_tcrestart(arg, cpu_env);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tchalt(arg);
+            gen_helper_dmfc0_tchalt(arg, cpu_env);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tccontext(arg);
+            gen_helper_dmfc0_tccontext(arg, cpu_env);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcschedule(arg);
+            gen_helper_dmfc0_tcschedule(arg, cpu_env);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_dmfc0_tcschefback(arg);
+            gen_helper_dmfc0_tcschefback(arg, cpu_env);
             rn = "TCScheFBack";
             break;
         default:
@@ -4572,7 +4590,7 @@
             /* Mark as an IO operation because we read the time.  */
             if (use_icount)
                 gen_io_start();
-            gen_helper_mfc0_count(arg);
+            gen_helper_mfc0_count(arg, cpu_env);
             if (use_icount) {
                 gen_io_end();
             }
@@ -4701,7 +4719,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_dmfc0_lladdr(arg);
+            gen_helper_dmfc0_lladdr(arg, cpu_env);
             rn = "LLAddr";
             break;
         default:
@@ -4711,7 +4729,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(dmfc0_watchlo, arg, sel);
+            gen_helper_1e0i(dmfc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -4721,7 +4739,7 @@
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mfc0_watchhi, arg, sel);
+            gen_helper_1e0i(mfc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -4757,23 +4775,23 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mfc0_debug(arg); /* EJTAG support */
+            gen_helper_mfc0_debug(arg, cpu_env); /* EJTAG support */
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_dmfc0_tracecontrol(arg); /* PDtrace support */
+//            gen_helper_dmfc0_tracecontrol(arg, cpu_env); /* PDtrace support */
             rn = "TraceControl";
 //            break;
         case 2:
-//            gen_helper_dmfc0_tracecontrol2(arg); /* PDtrace support */
+//            gen_helper_dmfc0_tracecontrol2(arg, cpu_env); /* PDtrace support */
             rn = "TraceControl2";
 //            break;
         case 3:
-//            gen_helper_dmfc0_usertracedata(arg); /* PDtrace support */
+//            gen_helper_dmfc0_usertracedata(arg, cpu_env); /* PDtrace support */
             rn = "UserTraceData";
 //            break;
         case 4:
-//            gen_helper_dmfc0_tracebpc(arg); /* PDtrace support */
+//            gen_helper_dmfc0_tracebpc(arg, cpu_env); /* PDtrace support */
             rn = "TraceBPC";
 //            break;
         default:
@@ -4931,12 +4949,12 @@
     case 0:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_index(arg);
+            gen_helper_mtc0_index(cpu_env, arg);
             rn = "Index";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_mvpcontrol(arg);
+            gen_helper_mtc0_mvpcontrol(cpu_env, arg);
             rn = "MVPControl";
             break;
         case 2:
@@ -4961,22 +4979,22 @@
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpecontrol(arg);
+            gen_helper_mtc0_vpecontrol(cpu_env, arg);
             rn = "VPEControl";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf0(arg);
+            gen_helper_mtc0_vpeconf0(cpu_env, arg);
             rn = "VPEConf0";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeconf1(arg);
+            gen_helper_mtc0_vpeconf1(cpu_env, arg);
             rn = "VPEConf1";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_yqmask(arg);
+            gen_helper_mtc0_yqmask(cpu_env, arg);
             rn = "YQMask";
             break;
         case 5:
@@ -4991,7 +5009,7 @@
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_vpeopt(arg);
+            gen_helper_mtc0_vpeopt(cpu_env, arg);
             rn = "VPEOpt";
             break;
         default:
@@ -5001,42 +5019,42 @@
     case 2:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo0(arg);
+            gen_helper_mtc0_entrylo0(cpu_env, arg);
             rn = "EntryLo0";
             break;
         case 1:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcstatus(arg);
+            gen_helper_mtc0_tcstatus(cpu_env, arg);
             rn = "TCStatus";
             break;
         case 2:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcbind(arg);
+            gen_helper_mtc0_tcbind(cpu_env, arg);
             rn = "TCBind";
             break;
         case 3:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcrestart(arg);
+            gen_helper_mtc0_tcrestart(cpu_env, arg);
             rn = "TCRestart";
             break;
         case 4:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tchalt(arg);
+            gen_helper_mtc0_tchalt(cpu_env, arg);
             rn = "TCHalt";
             break;
         case 5:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tccontext(arg);
+            gen_helper_mtc0_tccontext(cpu_env, arg);
             rn = "TCContext";
             break;
         case 6:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschedule(arg);
+            gen_helper_mtc0_tcschedule(cpu_env, arg);
             rn = "TCSchedule";
             break;
         case 7:
             check_insn(env, ctx, ASE_MT);
-            gen_helper_mtc0_tcschefback(arg);
+            gen_helper_mtc0_tcschefback(cpu_env, arg);
             rn = "TCScheFBack";
             break;
         default:
@@ -5046,7 +5064,7 @@
     case 3:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entrylo1(arg);
+            gen_helper_mtc0_entrylo1(cpu_env, arg);
             rn = "EntryLo1";
             break;
         default:
@@ -5056,11 +5074,11 @@
     case 4:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_context(arg);
+            gen_helper_mtc0_context(cpu_env, arg);
             rn = "Context";
             break;
         case 1:
-//           gen_helper_mtc0_contextconfig(arg); /* SmartMIPS ASE */
+//           gen_helper_mtc0_contextconfig(cpu_env, arg); /* SmartMIPS ASE */
             rn = "ContextConfig";
 //           break;
         default:
@@ -5070,12 +5088,12 @@
     case 5:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_pagemask(arg);
+            gen_helper_mtc0_pagemask(cpu_env, arg);
             rn = "PageMask";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_pagegrain(arg);
+            gen_helper_mtc0_pagegrain(cpu_env, arg);
             rn = "PageGrain";
             break;
         default:
@@ -5085,32 +5103,32 @@
     case 6:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_wired(arg);
+            gen_helper_mtc0_wired(cpu_env, arg);
             rn = "Wired";
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf0(arg);
+            gen_helper_mtc0_srsconf0(cpu_env, arg);
             rn = "SRSConf0";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf1(arg);
+            gen_helper_mtc0_srsconf1(cpu_env, arg);
             rn = "SRSConf1";
             break;
         case 3:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf2(arg);
+            gen_helper_mtc0_srsconf2(cpu_env, arg);
             rn = "SRSConf2";
             break;
         case 4:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf3(arg);
+            gen_helper_mtc0_srsconf3(cpu_env, arg);
             rn = "SRSConf3";
             break;
         case 5:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsconf4(arg);
+            gen_helper_mtc0_srsconf4(cpu_env, arg);
             rn = "SRSConf4";
             break;
         default:
@@ -5121,7 +5139,7 @@
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_hwrena(arg);
+            gen_helper_mtc0_hwrena(cpu_env, arg);
             rn = "HWREna";
             break;
         default:
@@ -5135,7 +5153,7 @@
     case 9:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_count(arg);
+            gen_helper_mtc0_count(cpu_env, arg);
             rn = "Count";
             break;
         /* 6,7 are implementation dependent */
@@ -5148,7 +5166,7 @@
     case 10:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_entryhi(arg);
+            gen_helper_mtc0_entryhi(cpu_env, arg);
             rn = "EntryHi";
             break;
         default:
@@ -5158,7 +5176,7 @@
     case 11:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_compare(arg);
+            gen_helper_mtc0_compare(cpu_env, arg);
             rn = "Compare";
             break;
         /* 6,7 are implementation dependent */
@@ -5172,7 +5190,7 @@
         switch (sel) {
         case 0:
             save_cpu_state(ctx, 1);
-            gen_helper_mtc0_status(arg);
+            gen_helper_mtc0_status(cpu_env, arg);
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
@@ -5180,14 +5198,14 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_intctl(arg);
+            gen_helper_mtc0_intctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "IntCtl";
             break;
         case 2:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_srsctl(arg);
+            gen_helper_mtc0_srsctl(cpu_env, arg);
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "SRSCtl";
@@ -5212,7 +5230,7 @@
             if (use_icount) {
                 gen_io_start();
             }
-            gen_helper_mtc0_cause(arg);
+            gen_helper_mtc0_cause(cpu_env, arg);
             if (use_icount) {
                 gen_io_end();
             }
@@ -5242,7 +5260,7 @@
             break;
         case 1:
             check_insn(env, ctx, ISA_MIPS32R2);
-            gen_helper_mtc0_ebase(arg);
+            gen_helper_mtc0_ebase(cpu_env, arg);
             rn = "EBase";
             break;
         default:
@@ -5252,7 +5270,7 @@
     case 16:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_config0(arg);
+            gen_helper_mtc0_config0(cpu_env, arg);
             rn = "Config";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -5262,7 +5280,7 @@
             rn = "Config1";
             break;
         case 2:
-            gen_helper_mtc0_config2(arg);
+            gen_helper_mtc0_config2(cpu_env, arg);
             rn = "Config2";
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
@@ -5280,7 +5298,7 @@
     case 17:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_lladdr(arg);
+            gen_helper_mtc0_lladdr(cpu_env, arg);
             rn = "LLAddr";
             break;
         default:
@@ -5290,7 +5308,7 @@
     case 18:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchlo, arg, sel);
+            gen_helper_0e1i(mtc0_watchlo, arg, sel);
             rn = "WatchLo";
             break;
         default:
@@ -5300,7 +5318,7 @@
     case 19:
         switch (sel) {
         case 0 ... 7:
-            gen_helper_1i(mtc0_watchhi, arg, sel);
+            gen_helper_0e1i(mtc0_watchhi, arg, sel);
             rn = "WatchHi";
             break;
         default:
@@ -5311,7 +5329,7 @@
         switch (sel) {
         case 0:
             check_insn(env, ctx, ISA_MIPS3);
-            gen_helper_mtc0_xcontext(arg);
+            gen_helper_mtc0_xcontext(cpu_env, arg);
             rn = "XContext";
             break;
         default:
@@ -5322,7 +5340,7 @@
        /* Officially reserved, but sel 0 is used for R1x000 framemask */
         switch (sel) {
         case 0:
-            gen_helper_mtc0_framemask(arg);
+            gen_helper_mtc0_framemask(cpu_env, arg);
             rn = "Framemask";
             break;
         default:
@@ -5336,32 +5354,32 @@
     case 23:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_debug(arg); /* EJTAG support */
+            gen_helper_mtc0_debug(cpu_env, arg); /* EJTAG support */
             /* BS_STOP isn't good enough here, hflags may have changed. */
             gen_save_pc(ctx->pc + 4);
             ctx->bstate = BS_EXCP;
             rn = "Debug";
             break;
         case 1:
-//            gen_helper_mtc0_tracecontrol(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceControl";
 //            break;
         case 2:
-//            gen_helper_mtc0_tracecontrol2(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracecontrol2(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceControl2";
 //            break;
         case 3:
-//            gen_helper_mtc0_usertracedata(arg); /* PDtrace support */
+//            gen_helper_mtc0_usertracedata(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "UserTraceData";
 //            break;
         case 4:
-//            gen_helper_mtc0_tracebpc(arg); /* PDtrace support */
+//            gen_helper_mtc0_tracebpc(cpu_env, arg); /* PDtrace support */
             /* Stop translation as we may have switched the execution mode */
             ctx->bstate = BS_STOP;
             rn = "TraceBPC";
@@ -5384,35 +5402,35 @@
     case 25:
         switch (sel) {
         case 0:
-            gen_helper_mtc0_performance0(arg);
+            gen_helper_mtc0_performance0(cpu_env, arg);
             rn = "Performance0";
             break;
         case 1:
-//            gen_helper_mtc0_performance1(arg);
+//            gen_helper_mtc0_performance1(cpu_env, arg);
             rn = "Performance1";
 //            break;
         case 2:
-//            gen_helper_mtc0_performance2(arg);
+//            gen_helper_mtc0_performance2(cpu_env, arg);
             rn = "Performance2";
 //            break;
         case 3:
-//            gen_helper_mtc0_performance3(arg);
+//            gen_helper_mtc0_performance3(cpu_env, arg);
             rn = "Performance3";
 //            break;
         case 4:
-//            gen_helper_mtc0_performance4(arg);
+//            gen_helper_mtc0_performance4(cpu_env, arg);
             rn = "Performance4";
 //            break;
         case 5:
-//            gen_helper_mtc0_performance5(arg);
+//            gen_helper_mtc0_performance5(cpu_env, arg);
             rn = "Performance5";
 //            break;
         case 6:
-//            gen_helper_mtc0_performance6(arg);
+//            gen_helper_mtc0_performance6(cpu_env, arg);
             rn = "Performance6";
 //            break;
         case 7:
-//            gen_helper_mtc0_performance7(arg);
+//            gen_helper_mtc0_performance7(cpu_env, arg);
             rn = "Performance7";
 //            break;
         default:
@@ -5439,14 +5457,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taglo(arg);
+            gen_helper_mtc0_taglo(cpu_env, arg);
             rn = "TagLo";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datalo(arg);
+            gen_helper_mtc0_datalo(cpu_env, arg);
             rn = "DataLo";
             break;
         default:
@@ -5459,14 +5477,14 @@
         case 2:
         case 4:
         case 6:
-            gen_helper_mtc0_taghi(arg);
+            gen_helper_mtc0_taghi(cpu_env, arg);
             rn = "TagHi";
             break;
         case 1:
         case 3:
         case 5:
         case 7:
-            gen_helper_mtc0_datahi(arg);
+            gen_helper_mtc0_datahi(cpu_env, arg);
             rn = "DataHi";
             break;
         default:
@@ -5533,10 +5551,10 @@
         case 1:
             switch (sel) {
             case 1:
-                gen_helper_mftc0_vpecontrol(t0);
+                gen_helper_mftc0_vpecontrol(t0, cpu_env);
                 break;
             case 2:
-                gen_helper_mftc0_vpeconf0(t0);
+                gen_helper_mftc0_vpeconf0(t0, cpu_env);
                 break;
             default:
                 goto die;
@@ -5546,25 +5564,25 @@
         case 2:
             switch (sel) {
             case 1:
-                gen_helper_mftc0_tcstatus(t0);
+                gen_helper_mftc0_tcstatus(t0, cpu_env);
                 break;
             case 2:
-                gen_helper_mftc0_tcbind(t0);
+                gen_helper_mftc0_tcbind(t0, cpu_env);
                 break;
             case 3:
-                gen_helper_mftc0_tcrestart(t0);
+                gen_helper_mftc0_tcrestart(t0, cpu_env);
                 break;
             case 4:
-                gen_helper_mftc0_tchalt(t0);
+                gen_helper_mftc0_tchalt(t0, cpu_env);
                 break;
             case 5:
-                gen_helper_mftc0_tccontext(t0);
+                gen_helper_mftc0_tccontext(t0, cpu_env);
                 break;
             case 6:
-                gen_helper_mftc0_tcschedule(t0);
+                gen_helper_mftc0_tcschedule(t0, cpu_env);
                 break;
             case 7:
-                gen_helper_mftc0_tcschefback(t0);
+                gen_helper_mftc0_tcschefback(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5574,7 +5592,7 @@
         case 10:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_entryhi(t0);
+                gen_helper_mftc0_entryhi(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5583,7 +5601,7 @@
         case 12:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_status(t0);
+                gen_helper_mftc0_status(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5592,7 +5610,7 @@
         case 13:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_cause(t0);
+                gen_helper_mftc0_cause(t0, cpu_env);
                 break;
             default:
                 goto die;
@@ -5602,7 +5620,7 @@
         case 14:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_epc(t0);
+                gen_helper_mftc0_epc(t0, cpu_env);
                 break;
             default:
                 goto die;
@@ -5612,7 +5630,7 @@
         case 15:
             switch (sel) {
             case 1:
-                gen_helper_mftc0_ebase(t0);
+                gen_helper_mftc0_ebase(t0, cpu_env);
                 break;
             default:
                 goto die;
@@ -5622,7 +5640,7 @@
         case 16:
             switch (sel) {
             case 0 ... 7:
-                gen_helper_mftc0_configx(t0, tcg_const_tl(sel));
+                gen_helper_mftc0_configx(t0, cpu_env, tcg_const_tl(sel));
                 break;
             default:
                 goto die;
@@ -5632,7 +5650,7 @@
         case 23:
             switch (sel) {
             case 0:
-                gen_helper_mftc0_debug(t0);
+                gen_helper_mftc0_debug(t0, cpu_env);
                 break;
             default:
                 gen_mfc0(env, ctx, t0, rt, sel);
@@ -5645,49 +5663,49 @@
     } else switch (sel) {
     /* GPR registers. */
     case 0:
-        gen_helper_1i(mftgpr, t0, rt);
+        gen_helper_1e0i(mftgpr, t0, rt);
         break;
     /* Auxiliary CPU registers */
     case 1:
         switch (rt) {
         case 0:
-            gen_helper_1i(mftlo, t0, 0);
+            gen_helper_1e0i(mftlo, t0, 0);
             break;
         case 1:
-            gen_helper_1i(mfthi, t0, 0);
+            gen_helper_1e0i(mfthi, t0, 0);
             break;
         case 2:
-            gen_helper_1i(mftacx, t0, 0);
+            gen_helper_1e0i(mftacx, t0, 0);
             break;
         case 4:
-            gen_helper_1i(mftlo, t0, 1);
+            gen_helper_1e0i(mftlo, t0, 1);
             break;
         case 5:
-            gen_helper_1i(mfthi, t0, 1);
+            gen_helper_1e0i(mfthi, t0, 1);
             break;
         case 6:
-            gen_helper_1i(mftacx, t0, 1);
+            gen_helper_1e0i(mftacx, t0, 1);
             break;
         case 8:
-            gen_helper_1i(mftlo, t0, 2);
+            gen_helper_1e0i(mftlo, t0, 2);
             break;
         case 9:
-            gen_helper_1i(mfthi, t0, 2);
+            gen_helper_1e0i(mfthi, t0, 2);
             break;
         case 10:
-            gen_helper_1i(mftacx, t0, 2);
+            gen_helper_1e0i(mftacx, t0, 2);
             break;
         case 12:
-            gen_helper_1i(mftlo, t0, 3);
+            gen_helper_1e0i(mftlo, t0, 3);
             break;
         case 13:
-            gen_helper_1i(mfthi, t0, 3);
+            gen_helper_1e0i(mfthi, t0, 3);
             break;
         case 14:
-            gen_helper_1i(mftacx, t0, 3);
+            gen_helper_1e0i(mftacx, t0, 3);
             break;
         case 16:
-            gen_helper_mftdsp(t0);
+            gen_helper_mftdsp(t0, cpu_env);
             break;
         default:
             goto die;
@@ -5712,7 +5730,7 @@
         break;
     case 3:
         /* XXX: For now we support only a single FPU context. */
-        gen_helper_1i(cfc1, t0, rt);
+        gen_helper_1e0i(cfc1, t0, rt);
         break;
     /* COP2: Not implemented. */
     case 4:
@@ -5751,10 +5769,10 @@
         case 1:
             switch (sel) {
             case 1:
-                gen_helper_mttc0_vpecontrol(t0);
+                gen_helper_mttc0_vpecontrol(cpu_env, t0);
                 break;
             case 2:
-                gen_helper_mttc0_vpeconf0(t0);
+                gen_helper_mttc0_vpeconf0(cpu_env, t0);
                 break;
             default:
                 goto die;
@@ -5764,25 +5782,25 @@
         case 2:
             switch (sel) {
             case 1:
-                gen_helper_mttc0_tcstatus(t0);
+                gen_helper_mttc0_tcstatus(cpu_env, t0);
                 break;
             case 2:
-                gen_helper_mttc0_tcbind(t0);
+                gen_helper_mttc0_tcbind(cpu_env, t0);
                 break;
             case 3:
-                gen_helper_mttc0_tcrestart(t0);
+                gen_helper_mttc0_tcrestart(cpu_env, t0);
                 break;
             case 4:
-                gen_helper_mttc0_tchalt(t0);
+                gen_helper_mttc0_tchalt(cpu_env, t0);
                 break;
             case 5:
-                gen_helper_mttc0_tccontext(t0);
+                gen_helper_mttc0_tccontext(cpu_env, t0);
                 break;
             case 6:
-                gen_helper_mttc0_tcschedule(t0);
+                gen_helper_mttc0_tcschedule(cpu_env, t0);
                 break;
             case 7:
-                gen_helper_mttc0_tcschefback(t0);
+                gen_helper_mttc0_tcschefback(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5792,7 +5810,7 @@
         case 10:
             switch (sel) {
             case 0:
-                gen_helper_mttc0_entryhi(t0);
+                gen_helper_mttc0_entryhi(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5801,7 +5819,7 @@
         case 12:
             switch (sel) {
             case 0:
-                gen_helper_mttc0_status(t0);
+                gen_helper_mttc0_status(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5810,7 +5828,7 @@
         case 13:
             switch (sel) {
             case 0:
-                gen_helper_mttc0_cause(t0);
+                gen_helper_mttc0_cause(cpu_env, t0);
                 break;
             default:
                 goto die;
@@ -5820,7 +5838,7 @@
         case 15:
             switch (sel) {
             case 1:
-                gen_helper_mttc0_ebase(t0);
+                gen_helper_mttc0_ebase(cpu_env, t0);
                 break;
             default:
                 goto die;
@@ -5830,7 +5848,7 @@
         case 23:
             switch (sel) {
             case 0:
-                gen_helper_mttc0_debug(t0);
+                gen_helper_mttc0_debug(cpu_env, t0);
                 break;
             default:
                 gen_mtc0(env, ctx, t0, rd, sel);
@@ -5843,49 +5861,49 @@
     } else switch (sel) {
     /* GPR registers. */
     case 0:
-        gen_helper_1i(mttgpr, t0, rd);
+        gen_helper_0e1i(mttgpr, t0, rd);
         break;
     /* Auxiliary CPU registers */
     case 1:
         switch (rd) {
         case 0:
-            gen_helper_1i(mttlo, t0, 0);
+            gen_helper_0e1i(mttlo, t0, 0);
             break;
         case 1:
-            gen_helper_1i(mtthi, t0, 0);
+            gen_helper_0e1i(mtthi, t0, 0);
             break;
         case 2:
-            gen_helper_1i(mttacx, t0, 0);
+            gen_helper_0e1i(mttacx, t0, 0);
             break;
         case 4:
-            gen_helper_1i(mttlo, t0, 1);
+            gen_helper_0e1i(mttlo, t0, 1);
             break;
         case 5:
-            gen_helper_1i(mtthi, t0, 1);
+            gen_helper_0e1i(mtthi, t0, 1);
             break;
         case 6:
-            gen_helper_1i(mttacx, t0, 1);
+            gen_helper_0e1i(mttacx, t0, 1);
             break;
         case 8:
-            gen_helper_1i(mttlo, t0, 2);
+            gen_helper_0e1i(mttlo, t0, 2);
             break;
         case 9:
-            gen_helper_1i(mtthi, t0, 2);
+            gen_helper_0e1i(mtthi, t0, 2);
             break;
         case 10:
-            gen_helper_1i(mttacx, t0, 2);
+            gen_helper_0e1i(mttacx, t0, 2);
             break;
         case 12:
-            gen_helper_1i(mttlo, t0, 3);
+            gen_helper_0e1i(mttlo, t0, 3);
             break;
         case 13:
-            gen_helper_1i(mtthi, t0, 3);
+            gen_helper_0e1i(mtthi, t0, 3);
             break;
         case 14:
-            gen_helper_1i(mttacx, t0, 3);
+            gen_helper_0e1i(mttacx, t0, 3);
             break;
         case 16:
-            gen_helper_mttdsp(t0);
+            gen_helper_mttdsp(cpu_env, t0);
             break;
         default:
             goto die;
@@ -5910,7 +5928,7 @@
         break;
     case 3:
         /* XXX: For now we support only a single FPU context. */
-        gen_helper_1i(ctc1, t0, rd);
+        gen_helper_0e1i(ctc1, t0, rd);
         break;
     /* COP2: Not implemented. */
     case 4:
@@ -5995,30 +6013,30 @@
         opn = "tlbwi";
         if (!env->tlb->helper_tlbwi)
             goto die;
-        gen_helper_tlbwi();
+        gen_helper_tlbwi(cpu_env);
         break;
     case OPC_TLBWR:
         opn = "tlbwr";
         if (!env->tlb->helper_tlbwr)
             goto die;
-        gen_helper_tlbwr();
+        gen_helper_tlbwr(cpu_env);
         break;
     case OPC_TLBP:
         opn = "tlbp";
         if (!env->tlb->helper_tlbp)
             goto die;
-        gen_helper_tlbp();
+        gen_helper_tlbp(cpu_env);
         break;
     case OPC_TLBR:
         opn = "tlbr";
         if (!env->tlb->helper_tlbr)
             goto die;
-        gen_helper_tlbr();
+        gen_helper_tlbr(cpu_env);
         break;
     case OPC_ERET:
         opn = "eret";
         check_insn(env, ctx, ISA_MIPS2);
-        gen_helper_eret();
+        gen_helper_eret(cpu_env);
         ctx->bstate = BS_EXCP;
         break;
     case OPC_DERET:
@@ -6028,7 +6046,7 @@
             MIPS_INVAL(opn);
             generate_exception(ctx, EXCP_RI);
         } else {
-            gen_helper_deret();
+            gen_helper_deret(cpu_env);
             ctx->bstate = BS_EXCP;
         }
         break;
@@ -6039,7 +6057,7 @@
         ctx->pc += 4;
         save_cpu_state(ctx, 1);
         ctx->pc -= 4;
-        gen_helper_wait();
+        gen_helper_wait(cpu_env);
         ctx->bstate = BS_EXCP;
         break;
     default:
@@ -6340,13 +6358,13 @@
         opn = "mtc1";
         break;
     case OPC_CFC1:
-        gen_helper_1i(cfc1, t0, fs);
+        gen_helper_1e0i(cfc1, t0, fs);
         gen_store_gpr(t0, rt);
         opn = "cfc1";
         break;
     case OPC_CTC1:
         gen_load_gpr(t0, rt);
-        gen_helper_1i(ctc1, t0, fs);
+        gen_helper_0e1i(ctc1, t0, fs);
         opn = "ctc1";
         break;
 #if defined(TARGET_MIPS64)
@@ -6543,7 +6561,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_add_s(fp0, fp0, fp1);
+            gen_helper_float_add_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6558,7 +6576,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_sub_s(fp0, fp0, fp1);
+            gen_helper_float_sub_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6573,7 +6591,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_mul_s(fp0, fp0, fp1);
+            gen_helper_float_mul_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6588,7 +6606,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_div_s(fp0, fp0, fp1);
+            gen_helper_float_div_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6601,7 +6619,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_sqrt_s(fp0, fp0);
+            gen_helper_float_sqrt_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6646,7 +6664,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_roundl_s(fp64, fp32);
+            gen_helper_float_roundl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6660,7 +6678,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_truncl_s(fp64, fp32);
+            gen_helper_float_truncl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6674,7 +6692,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_ceill_s(fp64, fp32);
+            gen_helper_float_ceill_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6688,7 +6706,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_floorl_s(fp64, fp32);
+            gen_helper_float_floorl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6700,7 +6718,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_roundw_s(fp0, fp0);
+            gen_helper_float_roundw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6711,7 +6729,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_truncw_s(fp0, fp0);
+            gen_helper_float_truncw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6722,7 +6740,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_ceilw_s(fp0, fp0);
+            gen_helper_float_ceilw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6733,7 +6751,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_floorw_s(fp0, fp0);
+            gen_helper_float_floorw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6781,7 +6799,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_recip_s(fp0, fp0);
+            gen_helper_float_recip_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6793,7 +6811,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_rsqrt_s(fp0, fp0);
+            gen_helper_float_rsqrt_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6807,7 +6825,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_recip2_s(fp0, fp0, fp1);
+            gen_helper_float_recip2_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6820,7 +6838,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_recip1_s(fp0, fp0);
+            gen_helper_float_recip1_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6832,7 +6850,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_rsqrt1_s(fp0, fp0);
+            gen_helper_float_rsqrt1_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6846,7 +6864,7 @@
 
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
-            gen_helper_float_rsqrt2_s(fp0, fp0, fp1);
+            gen_helper_float_rsqrt2_s(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
@@ -6860,7 +6878,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_cvtd_s(fp64, fp32);
+            gen_helper_float_cvtd_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6872,7 +6890,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_cvtw_s(fp0, fp0);
+            gen_helper_float_cvtw_s(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -6885,7 +6903,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_cvtl_s(fp64, fp32);
+            gen_helper_float_cvtl_s(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -6941,7 +6959,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_add_d(fp0, fp0, fp1);
+            gen_helper_float_add_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6957,7 +6975,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_sub_d(fp0, fp0, fp1);
+            gen_helper_float_sub_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6973,7 +6991,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_mul_d(fp0, fp0, fp1);
+            gen_helper_float_mul_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -6989,7 +7007,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_div_d(fp0, fp0, fp1);
+            gen_helper_float_div_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7003,7 +7021,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_sqrt_d(fp0, fp0);
+            gen_helper_float_sqrt_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7050,7 +7068,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_roundl_d(fp0, fp0);
+            gen_helper_float_roundl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7062,7 +7080,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_truncl_d(fp0, fp0);
+            gen_helper_float_truncl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7074,7 +7092,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_ceill_d(fp0, fp0);
+            gen_helper_float_ceill_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7086,7 +7104,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_floorl_d(fp0, fp0);
+            gen_helper_float_floorl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7099,7 +7117,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_roundw_d(fp32, fp64);
+            gen_helper_float_roundw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7113,7 +7131,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_truncw_d(fp32, fp64);
+            gen_helper_float_truncw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7127,7 +7145,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_ceilw_d(fp32, fp64);
+            gen_helper_float_ceilw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7141,7 +7159,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_floorw_d(fp32, fp64);
+            gen_helper_float_floorw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7190,7 +7208,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_recip_d(fp0, fp0);
+            gen_helper_float_recip_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7202,7 +7220,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rsqrt_d(fp0, fp0);
+            gen_helper_float_rsqrt_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7216,7 +7234,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_recip2_d(fp0, fp0, fp1);
+            gen_helper_float_recip2_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7229,7 +7247,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_recip1_d(fp0, fp0);
+            gen_helper_float_recip1_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7241,7 +7259,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rsqrt1_d(fp0, fp0);
+            gen_helper_float_rsqrt1_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7255,7 +7273,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_rsqrt2_d(fp0, fp0, fp1);
+            gen_helper_float_rsqrt2_d(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7293,7 +7311,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_cvts_d(fp32, fp64);
+            gen_helper_float_cvts_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7307,7 +7325,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_cvtw_d(fp32, fp64);
+            gen_helper_float_cvtw_d(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7320,7 +7338,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtl_d(fp0, fp0);
+            gen_helper_float_cvtl_d(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7331,7 +7349,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_cvts_w(fp0, fp0);
+            gen_helper_float_cvts_w(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -7344,7 +7362,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr32(fp32, fs);
-            gen_helper_float_cvtd_w(fp64, fp32);
+            gen_helper_float_cvtd_w(fp64, cpu_env, fp32);
             tcg_temp_free_i32(fp32);
             gen_store_fpr64(ctx, fp64, fd);
             tcg_temp_free_i64(fp64);
@@ -7358,7 +7376,7 @@
             TCGv_i64 fp64 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp64, fs);
-            gen_helper_float_cvts_l(fp32, fp64);
+            gen_helper_float_cvts_l(fp32, cpu_env, fp64);
             tcg_temp_free_i64(fp64);
             gen_store_fpr32(fp32, fd);
             tcg_temp_free_i32(fp32);
@@ -7371,7 +7389,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtd_l(fp0, fp0);
+            gen_helper_float_cvtd_l(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7383,7 +7401,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtps_pw(fp0, fp0);
+            gen_helper_float_cvtps_pw(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7397,7 +7415,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_add_ps(fp0, fp0, fp1);
+            gen_helper_float_add_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7412,7 +7430,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_sub_ps(fp0, fp0, fp1);
+            gen_helper_float_sub_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7427,7 +7445,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_mul_ps(fp0, fp0, fp1);
+            gen_helper_float_mul_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7515,7 +7533,7 @@
 
             gen_load_fpr64(ctx, fp0, ft);
             gen_load_fpr64(ctx, fp1, fs);
-            gen_helper_float_addr_ps(fp0, fp0, fp1);
+            gen_helper_float_addr_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7530,7 +7548,7 @@
 
             gen_load_fpr64(ctx, fp0, ft);
             gen_load_fpr64(ctx, fp1, fs);
-            gen_helper_float_mulr_ps(fp0, fp0, fp1);
+            gen_helper_float_mulr_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7545,7 +7563,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_recip2_ps(fp0, fp0, fp1);
+            gen_helper_float_recip2_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7558,7 +7576,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_recip1_ps(fp0, fp0);
+            gen_helper_float_recip1_ps(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7570,7 +7588,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_rsqrt1_ps(fp0, fp0);
+            gen_helper_float_rsqrt1_ps(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7584,7 +7602,7 @@
 
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
-            gen_helper_float_rsqrt2_ps(fp0, fp0, fp1);
+            gen_helper_float_rsqrt2_ps(fp0, cpu_env, fp0, fp1);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
@@ -7597,7 +7615,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32h(fp0, fs);
-            gen_helper_float_cvts_pu(fp0, fp0);
+            gen_helper_float_cvts_pu(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -7609,7 +7627,7 @@
             TCGv_i64 fp0 = tcg_temp_new_i64();
 
             gen_load_fpr64(ctx, fp0, fs);
-            gen_helper_float_cvtpw_ps(fp0, fp0);
+            gen_helper_float_cvtpw_ps(fp0, cpu_env, fp0);
             gen_store_fpr64(ctx, fp0, fd);
             tcg_temp_free_i64(fp0);
         }
@@ -7621,7 +7639,7 @@
             TCGv_i32 fp0 = tcg_temp_new_i32();
 
             gen_load_fpr32(fp0, fs);
-            gen_helper_float_cvts_pl(fp0, fp0);
+            gen_helper_float_cvts_pl(fp0, cpu_env, fp0);
             gen_store_fpr32(fp0, fd);
             tcg_temp_free_i32(fp0);
         }
@@ -7887,7 +7905,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_muladd_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_muladd_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -7906,7 +7924,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_muladd_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_muladd_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7924,7 +7942,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_muladd_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_muladd_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7942,7 +7960,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_mulsub_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_mulsub_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -7961,7 +7979,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_mulsub_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_mulsub_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7979,7 +7997,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_mulsub_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_mulsub_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -7997,7 +8015,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_nmuladd_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmuladd_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -8016,7 +8034,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmuladd_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmuladd_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8034,7 +8052,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmuladd_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmuladd_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8052,7 +8070,7 @@
             gen_load_fpr32(fp0, fs);
             gen_load_fpr32(fp1, ft);
             gen_load_fpr32(fp2, fr);
-            gen_helper_float_nmulsub_s(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmulsub_s(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i32(fp0);
             tcg_temp_free_i32(fp1);
             gen_store_fpr32(fp2, fd);
@@ -8071,7 +8089,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmulsub_d(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmulsub_d(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8089,7 +8107,7 @@
             gen_load_fpr64(ctx, fp0, fs);
             gen_load_fpr64(ctx, fp1, ft);
             gen_load_fpr64(ctx, fp2, fr);
-            gen_helper_float_nmulsub_ps(fp2, fp0, fp1, fp2);
+            gen_helper_float_nmulsub_ps(fp2, cpu_env, fp0, fp1, fp2);
             tcg_temp_free_i64(fp0);
             tcg_temp_free_i64(fp1);
             gen_store_fpr64(ctx, fp2, fd);
@@ -8122,22 +8140,22 @@
     switch (rd) {
     case 0:
         save_cpu_state(ctx, 1);
-        gen_helper_rdhwr_cpunum(t0);
+        gen_helper_rdhwr_cpunum(t0, cpu_env);
         gen_store_gpr(t0, rt);
         break;
     case 1:
         save_cpu_state(ctx, 1);
-        gen_helper_rdhwr_synci_step(t0);
+        gen_helper_rdhwr_synci_step(t0, cpu_env);
         gen_store_gpr(t0, rt);
         break;
     case 2:
         save_cpu_state(ctx, 1);
-        gen_helper_rdhwr_cc(t0);
+        gen_helper_rdhwr_cc(t0, cpu_env);
         gen_store_gpr(t0, rt);
         break;
     case 3:
         save_cpu_state(ctx, 1);
-        gen_helper_rdhwr_ccres(t0);
+        gen_helper_rdhwr_ccres(t0, cpu_env);
         gen_store_gpr(t0, rt);
         break;
     case 29:
@@ -8214,7 +8232,7 @@
             }
             if (ctx->singlestep_enabled) {
                 save_cpu_state(ctx, 0);
-                gen_helper_0i(raise_exception, EXCP_DEBUG);
+                gen_helper_0e0i(raise_exception, EXCP_DEBUG);
             }
             tcg_gen_exit_tb(0);
             break;
@@ -8678,7 +8696,7 @@
 static int decode_extended_mips16_opc (CPUMIPSState *env, DisasContext *ctx,
                                        int *is_branch)
 {
-    int extend = lduw_code(ctx->pc + 2);
+    int extend = cpu_lduw_code(env, ctx->pc + 2);
     int op, rx, ry, funct, sa;
     int16_t imm, offset;
 
@@ -8904,7 +8922,7 @@
         /* No delay slot, so just process as a normal instruction */
         break;
     case M16_OPC_JAL:
-        offset = lduw_code(ctx->pc + 2);
+        offset = cpu_lduw_code(env, ctx->pc + 2);
         offset = (((ctx->opcode & 0x1f) << 21)
                   | ((ctx->opcode >> 5) & 0x1f) << 16
                   | offset) << 2;
@@ -9855,17 +9873,17 @@
     save_cpu_state(ctx, 1);
     switch (opc) {
     case LWM32:
-        gen_helper_lwm(t0, t1, t2);
+        gen_helper_lwm(cpu_env, t0, t1, t2);
         break;
     case SWM32:
-        gen_helper_swm(t0, t1, t2);
+        gen_helper_swm(cpu_env, t0, t1, t2);
         break;
 #ifdef TARGET_MIPS64
     case LDM:
-        gen_helper_ldm(t0, t1, t2);
+        gen_helper_ldm(cpu_env, t0, t1, t2);
         break;
     case SDM:
-        gen_helper_sdm(t0, t1, t2);
+        gen_helper_sdm(cpu_env, t0, t1, t2);
         break;
 #endif
     }
@@ -10287,7 +10305,7 @@
                 TCGv t0 = tcg_temp_new();
 
                 save_cpu_state(ctx, 1);
-                gen_helper_di(t0);
+                gen_helper_di(t0, cpu_env);
                 gen_store_gpr(t0, rs);
                 /* Stop translation as we may have switched the execution mode */
                 ctx->bstate = BS_STOP;
@@ -10300,7 +10318,7 @@
                 TCGv t0 = tcg_temp_new();
 
                 save_cpu_state(ctx, 1);
-                gen_helper_ei(t0);
+                gen_helper_ei(t0, cpu_env);
                 gen_store_gpr(t0, rs);
                 /* Stop translation as we may have switched the execution mode */
                 ctx->bstate = BS_STOP;
@@ -10635,7 +10653,7 @@
     uint32_t op, minor, mips32_op;
     uint32_t cond, fmt, cc;
 
-    insn = lduw_code(ctx->pc + 2);
+    insn = cpu_lduw_code(env, ctx->pc + 2);
     ctx->opcode = (ctx->opcode << 16) | insn;
 
     rt = (ctx->opcode >> 21) & 0x1f;
@@ -11827,7 +11845,7 @@
             MIPS_INVAL("PMON / selsl");
             generate_exception(ctx, EXCP_RI);
 #else
-            gen_helper_0i(pmon, sa);
+            gen_helper_0e0i(pmon, sa);
 #endif
             break;
         case OPC_SYSCALL:
@@ -12045,7 +12063,7 @@
 
                 save_cpu_state(ctx, 1);
                 gen_load_gpr(t0, rs);
-                gen_helper_yield(t0, t0);
+                gen_helper_yield(t0, cpu_env, t0);
                 gen_store_gpr(t0, rd);
                 tcg_temp_free(t0);
             }
@@ -12144,18 +12162,18 @@
                     break;
                 case OPC_DVPE:
                     check_insn(env, ctx, ASE_MT);
-                    gen_helper_dvpe(t0);
+                    gen_helper_dvpe(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_EVPE:
                     check_insn(env, ctx, ASE_MT);
-                    gen_helper_evpe(t0);
+                    gen_helper_evpe(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     break;
                 case OPC_DI:
                     check_insn(env, ctx, ISA_MIPS32R2);
                     save_cpu_state(ctx, 1);
-                    gen_helper_di(t0);
+                    gen_helper_di(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     /* Stop translation as we may have switched the execution mode */
                     ctx->bstate = BS_STOP;
@@ -12163,7 +12181,7 @@
                 case OPC_EI:
                     check_insn(env, ctx, ISA_MIPS32R2);
                     save_cpu_state(ctx, 1);
-                    gen_helper_ei(t0);
+                    gen_helper_ei(t0, cpu_env);
                     gen_store_gpr(t0, rt);
                     /* Stop translation as we may have switched the execution mode */
                     ctx->bstate = BS_STOP;
@@ -12432,7 +12450,7 @@
                 if (bp->pc == ctx.pc) {
                     save_cpu_state(&ctx, 1);
                     ctx.bstate = BS_BRANCH;
-                    gen_helper_0i(raise_exception, EXCP_DEBUG);
+                    gen_helper_0e0i(raise_exception, EXCP_DEBUG);
                     /* Include the breakpoint location or the tb won't
                      * be flushed when it must be.  */
                     ctx.pc += 4;
@@ -12458,14 +12476,14 @@
 
         is_branch = 0;
         if (!(ctx.hflags & MIPS_HFLAG_M16)) {
-            ctx.opcode = ldl_code(ctx.pc);
+            ctx.opcode = cpu_ldl_code(env, ctx.pc);
             insn_bytes = 4;
             decode_opc(env, &ctx, &is_branch);
         } else if (env->insn_flags & ASE_MICROMIPS) {
-            ctx.opcode = lduw_code(ctx.pc);
+            ctx.opcode = cpu_lduw_code(env, ctx.pc);
             insn_bytes = decode_micromips_opc(env, &ctx, &is_branch);
         } else if (env->insn_flags & ASE_MIPS16) {
-            ctx.opcode = lduw_code(ctx.pc);
+            ctx.opcode = cpu_lduw_code(env, ctx.pc);
             insn_bytes = decode_mips16_opc(env, &ctx, &is_branch);
         } else {
             generate_exception(&ctx, EXCP_RI);
@@ -12502,7 +12520,7 @@
         gen_io_end();
     if (env->singlestep_enabled && ctx.bstate != BS_BRANCH) {
         save_cpu_state(&ctx, ctx.bstate == BS_NONE);
-        gen_helper_0i(raise_exception, EXCP_DEBUG);
+        gen_helper_0e0i(raise_exception, EXCP_DEBUG);
     } else {
         switch (ctx.bstate) {
         case BS_STOP:
diff --git a/target-s390x/translate.c b/target-s390x/translate.c
index 66119cd..3214783 100644
--- a/target-s390x/translate.c
+++ b/target-s390x/translate.c
@@ -274,21 +274,21 @@
 #endif
 }
 
-static inline uint64_t ld_code2(uint64_t pc)
+static inline uint64_t ld_code2(CPUS390XState *env, uint64_t pc)
 {
-    return (uint64_t)cpu_lduw_code(cpu_single_env, pc);
+    return (uint64_t)cpu_lduw_code(env, pc);
 }
 
-static inline uint64_t ld_code4(uint64_t pc)
+static inline uint64_t ld_code4(CPUS390XState *env, uint64_t pc)
 {
-    return (uint64_t)cpu_ldl_code(cpu_single_env, pc);
+    return (uint64_t)cpu_ldl_code(env, pc);
 }
 
-static inline uint64_t ld_code6(uint64_t pc)
+static inline uint64_t ld_code6(CPUS390XState *env, uint64_t pc)
 {
     uint64_t opc;
-    opc = (uint64_t)cpu_lduw_code(cpu_single_env, pc) << 32;
-    opc |= (uint64_t)(uint32_t)cpu_ldl_code(cpu_single_env, pc + 2);
+    opc = (uint64_t)cpu_lduw_code(env, pc) << 32;
+    opc |= (uint64_t)(uint32_t)cpu_ldl_code(env, pc + 2);
     return opc;
 }
 
@@ -319,7 +319,7 @@
 
 #ifdef CONFIG_USER_ONLY
 
-static void gen_illegal_opcode(DisasContext *s, int ilc)
+static void gen_illegal_opcode(CPUS390XState *env, DisasContext *s, int ilc)
 {
     TCGv_i32 tmp = tcg_const_i32(EXCP_SPEC);
     update_psw_addr(s);
@@ -331,20 +331,20 @@
 
 #else /* CONFIG_USER_ONLY */
 
-static void debug_print_inst(DisasContext *s, int ilc)
+static void debug_print_inst(CPUS390XState *env, DisasContext *s, int ilc)
 {
 #ifdef DEBUG_ILLEGAL_INSTRUCTIONS
     uint64_t inst = 0;
 
     switch (ilc & 3) {
     case 1:
-        inst = ld_code2(s->pc);
+        inst = ld_code2(env, s->pc);
         break;
     case 2:
-        inst = ld_code4(s->pc);
+        inst = ld_code4(env, s->pc);
         break;
     case 3:
-        inst = ld_code6(s->pc);
+        inst = ld_code6(env, s->pc);
         break;
     }
 
@@ -353,11 +353,12 @@
 #endif
 }
 
-static void gen_program_exception(DisasContext *s, int ilc, int code)
+static void gen_program_exception(CPUS390XState *env, DisasContext *s, int ilc,
+                                  int code)
 {
     TCGv_i32 tmp;
 
-    debug_print_inst(s, ilc);
+    debug_print_inst(env, s, ilc);
 
     /* remember what pgm exeption this was */
     tmp = tcg_const_i32(code);
@@ -385,20 +386,21 @@
 }
 
 
-static void gen_illegal_opcode(DisasContext *s, int ilc)
+static void gen_illegal_opcode(CPUS390XState *env, DisasContext *s, int ilc)
 {
-    gen_program_exception(s, ilc, PGM_SPECIFICATION);
+    gen_program_exception(env, s, ilc, PGM_SPECIFICATION);
 }
 
-static void gen_privileged_exception(DisasContext *s, int ilc)
+static void gen_privileged_exception(CPUS390XState *env, DisasContext *s,
+                                     int ilc)
 {
-    gen_program_exception(s, ilc, PGM_PRIVILEGED);
+    gen_program_exception(env, s, ilc, PGM_PRIVILEGED);
 }
 
-static void check_privileged(DisasContext *s, int ilc)
+static void check_privileged(CPUS390XState *env, DisasContext *s, int ilc)
 {
     if (s->tb->flags & (PSW_MASK_PSTATE >> 32)) {
-        gen_privileged_exception(s, ilc);
+        gen_privileged_exception(env, s, ilc);
     }
 }
 
@@ -1460,7 +1462,8 @@
     set_cc_static(s);
 }
 
-static void disas_e3(DisasContext* s, int op, int r1, int x2, int b2, int d2)
+static void disas_e3(CPUS390XState *env, DisasContext* s, int op, int r1,
+                     int x2, int b2, int d2)
 {
     TCGv_i64 addr, tmp, tmp2, tmp3, tmp4;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -1925,14 +1928,14 @@
         break;
     default:
         LOG_DISAS("illegal e3 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 3);
+        gen_illegal_opcode(env, s, 3);
         break;
     }
     tcg_temp_free_i64(addr);
 }
 
 #ifndef CONFIG_USER_ONLY
-static void disas_e5(DisasContext* s, uint64_t insn)
+static void disas_e5(CPUS390XState *env, DisasContext* s, uint64_t insn)
 {
     TCGv_i64 tmp, tmp2;
     int op = (insn >> 32) & 0xff;
@@ -1950,7 +1953,7 @@
         break;
     default:
         LOG_DISAS("illegal e5 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 3);
+        gen_illegal_opcode(env, s, 3);
         break;
     }
 
@@ -1959,7 +1962,8 @@
 }
 #endif
 
-static void disas_eb(DisasContext *s, int op, int r1, int r3, int b2, int d2)
+static void disas_eb(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int r3, int b2, int d2)
 {
     TCGv_i64 tmp, tmp2, tmp3, tmp4;
     TCGv_i32 tmp32_1, tmp32_2;
@@ -2102,7 +2106,7 @@
 #ifndef CONFIG_USER_ONLY
     case 0x2f: /* LCTLG     R1,R3,D2(B2)     [RSE] */
         /* Load Control */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
         tmp32_2 = tcg_const_i32(r3);
@@ -2114,7 +2118,7 @@
         break;
     case 0x25: /* STCTG     R1,R3,D2(B2)     [RSE] */
         /* Store Control */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
         tmp32_2 = tcg_const_i32(r3);
@@ -2191,13 +2195,13 @@
         break;
     default:
         LOG_DISAS("illegal eb operation 0x%x\n", op);
-        gen_illegal_opcode(s, ilc);
+        gen_illegal_opcode(env, s, ilc);
         break;
     }
 }
 
-static void disas_ed(DisasContext *s, int op, int r1, int x2, int b2, int d2,
-                     int r1b)
+static void disas_ed(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int x2, int b2, int d2, int r1b)
 {
     TCGv_i32 tmp_r1, tmp32;
     TCGv_i64 addr, tmp;
@@ -2311,14 +2315,15 @@
         break;
     default:
         LOG_DISAS("illegal ed operation 0x%x\n", op);
-        gen_illegal_opcode(s, 3);
+        gen_illegal_opcode(env, s, 3);
         return;
     }
     tcg_temp_free_i32(tmp_r1);
     tcg_temp_free_i64(addr);
 }
 
-static void disas_a5(DisasContext *s, int op, int r1, int i2)
+static void disas_a5(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int i2)
 {
     TCGv_i64 tmp, tmp2;
     TCGv_i32 tmp32;
@@ -2467,12 +2472,13 @@
         break;
     default:
         LOG_DISAS("illegal a5 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 2);
+        gen_illegal_opcode(env, s, 2);
         return;
     }
 }
 
-static void disas_a7(DisasContext *s, int op, int r1, int i2)
+static void disas_a7(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int i2)
 {
     TCGv_i64 tmp, tmp2;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -2604,12 +2610,13 @@
         break;
     default:
         LOG_DISAS("illegal a7 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 2);
+        gen_illegal_opcode(env, s, 2);
         return;
     }
 }
 
-static void disas_b2(DisasContext *s, int op, uint32_t insn)
+static void disas_b2(CPUS390XState *env, DisasContext *s, int op,
+                     uint32_t insn)
 {
     TCGv_i64 tmp, tmp2, tmp3;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -2708,7 +2715,7 @@
 #ifndef CONFIG_USER_ONLY
     case 0x02: /* STIDP     D2(B2)     [S] */
         /* Store CPU ID */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2717,7 +2724,7 @@
         break;
     case 0x04: /* SCK       D2(B2)     [S] */
         /* Set Clock */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2736,7 +2743,7 @@
         break;
     case 0x06: /* SCKC     D2(B2)     [S] */
         /* Set Clock Comparator */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2745,7 +2752,7 @@
         break;
     case 0x07: /* STCKC    D2(B2)     [S] */
         /* Store Clock Comparator */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2754,7 +2761,7 @@
         break;
     case 0x08: /* SPT      D2(B2)     [S] */
         /* Set CPU Timer */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2763,7 +2770,7 @@
         break;
     case 0x09: /* STPT     D2(B2)     [S] */
         /* Store CPU Timer */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2772,7 +2779,7 @@
         break;
     case 0x0a: /* SPKA     D2(B2)     [S] */
         /* Set PSW Key from Address */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -2784,12 +2791,12 @@
         break;
     case 0x0d: /* PTLB                [S] */
         /* Purge TLB */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         gen_helper_ptlb(cpu_env);
         break;
     case 0x10: /* SPX      D2(B2)     [S] */
         /* Set Prefix Register */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2798,7 +2805,7 @@
         break;
     case 0x11: /* STPX     D2(B2)     [S] */
         /* Store Prefix */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -2809,7 +2816,7 @@
         break;
     case 0x12: /* STAP     D2(B2)     [S] */
         /* Store CPU Address */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -2823,7 +2830,7 @@
         break;
     case 0x21: /* IPTE     R1,R2      [RRE] */
         /* Invalidate PTE */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp = load_reg(r1);
@@ -2834,7 +2841,7 @@
         break;
     case 0x29: /* ISKE     R1,R2      [RRE] */
         /* Insert Storage Key Extended */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp = load_reg(r2);
@@ -2846,7 +2853,7 @@
         break;
     case 0x2a: /* RRBE     R1,R2      [RRE] */
         /* Set Storage Key Extended */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp32_1 = load_reg32(r1);
@@ -2858,7 +2865,7 @@
         break;
     case 0x2b: /* SSKE     R1,R2      [RRE] */
         /* Set Storage Key Extended */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp32_1 = load_reg32(r1);
@@ -2869,12 +2876,12 @@
         break;
     case 0x34: /* STCH ? */
         /* Store Subchannel */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         gen_op_movi_cc(s, 3);
         break;
     case 0x46: /* STURA    R1,R2      [RRE] */
         /* Store Using Real Address */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp32_1 = load_reg32(r1);
@@ -2886,7 +2893,7 @@
         break;
     case 0x50: /* CSP      R1,R2      [RRE] */
         /* Compare And Swap And Purge */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         tmp32_1 = tcg_const_i32(r1);
@@ -2898,7 +2905,7 @@
         break;
     case 0x5f: /* CHSC ? */
         /* Channel Subsystem Call */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         gen_op_movi_cc(s, 3);
         break;
     case 0x78: /* STCKE    D2(B2)     [S] */
@@ -2912,7 +2919,7 @@
         break;
     case 0x79: /* SACF    D2(B2)     [S] */
         /* Store Clock Extended */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         potential_page_fault(s);
@@ -2924,7 +2931,7 @@
         s->is_jmp = DISAS_EXCP;
         break;
     case 0x7d: /* STSI     D2,(B2)     [S] */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = load_reg32(0);
@@ -2950,7 +2957,7 @@
         break;
     case 0xb1: /* STFL     D2(B2)     [S] */
         /* Store Facility List (CPU features) at 200 */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         tmp2 = tcg_const_i64(0xc0000000);
         tmp = tcg_const_i64(200);
         tcg_gen_qemu_st32(tmp2, tmp, get_mem_index(s));
@@ -2959,7 +2966,7 @@
         break;
     case 0xb2: /* LPSWE    D2(B2)     [S] */
         /* Load PSW Extended */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -2976,7 +2983,7 @@
         break;
     case 0x20: /* SERVC     R1,R2     [RRE] */
         /* SCLP Service call (PV hypercall) */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         potential_page_fault(s);
         tmp32_1 = load_reg32(r2);
         tmp = load_reg(r1);
@@ -2988,12 +2995,13 @@
 #endif
     default:
         LOG_DISAS("illegal b2 operation 0x%x\n", op);
-        gen_illegal_opcode(s, ilc);
+        gen_illegal_opcode(env, s, ilc);
         break;
     }
 }
 
-static void disas_b3(DisasContext *s, int op, int m3, int r1, int r2)
+static void disas_b3(CPUS390XState *env, DisasContext *s, int op, int m3,
+                     int r1, int r2)
 {
     TCGv_i64 tmp;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -3263,7 +3271,7 @@
         break;
     default:
         LOG_DISAS("illegal b3 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 2);
+        gen_illegal_opcode(env, s, 2);
         break;
     }
 
@@ -3271,7 +3279,8 @@
 #undef FP_HELPER
 }
 
-static void disas_b9(DisasContext *s, int op, int r1, int r2)
+static void disas_b9(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int r2)
 {
     TCGv_i64 tmp, tmp2, tmp3;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -3654,12 +3663,12 @@
         break;
     default:
         LOG_DISAS("illegal b9 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 2);
+        gen_illegal_opcode(env, s, 2);
         break;
     }
 }
 
-static void disas_c0(DisasContext *s, int op, int r1, int i2)
+static void disas_c0(CPUS390XState *env, DisasContext *s, int op, int r1, int i2)
 {
     TCGv_i64 tmp;
     TCGv_i32 tmp32_1, tmp32_2;
@@ -3755,12 +3764,13 @@
         break;
     default:
         LOG_DISAS("illegal c0 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 3);
+        gen_illegal_opcode(env, s, 3);
         break;
     }
 }
 
-static void disas_c2(DisasContext *s, int op, int r1, int i2)
+static void disas_c2(CPUS390XState *env, DisasContext *s, int op, int r1,
+                     int i2)
 {
     TCGv_i64 tmp, tmp2, tmp3;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3;
@@ -3832,7 +3842,7 @@
         break;
     default:
         LOG_DISAS("illegal c2 operation 0x%x\n", op);
-        gen_illegal_opcode(s, 3);
+        gen_illegal_opcode(env, s, 3);
         break;
     }
 }
@@ -3854,7 +3864,7 @@
     }
 }
 
-static void disas_s390_insn(DisasContext *s)
+static void disas_s390_insn(CPUS390XState *env, DisasContext *s)
 {
     TCGv_i64 tmp, tmp2, tmp3, tmp4;
     TCGv_i32 tmp32_1, tmp32_2, tmp32_3, tmp32_4;
@@ -3865,7 +3875,7 @@
     int ilc;
     int l1;
 
-    opc = cpu_ldub_code(cpu_single_env, s->pc);
+    opc = cpu_ldub_code(env, s->pc);
     LOG_DISAS("opc 0x%x\n", opc);
 
     ilc = get_ilc(opc);
@@ -3873,12 +3883,12 @@
     switch (opc) {
 #ifndef CONFIG_USER_ONLY
     case 0x01: /* SAM */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         /* set addressing mode, but we only do 64bit anyways */
         break;
 #endif
     case 0x6: /* BCTR     R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r1);
         tcg_gen_subi_i32(tmp32_1, tmp32_1, 1);
@@ -3904,7 +3914,7 @@
         }
         break;
     case 0x7: /* BCR    M1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         if (r2) {
             tmp = load_reg(r2);
@@ -3916,7 +3926,7 @@
         }
         break;
     case 0xa: /* SVC    I         [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         debug_insn(insn);
         i = insn & 0xff;
         update_psw_addr(s);
@@ -3933,7 +3943,7 @@
         tcg_temp_free_i32(tmp32_3);
         break;
     case 0xd: /* BASR   R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp = tcg_const_i64(pc_to_link_info(s, s->pc + 2));
         store_reg(r1, tmp);
@@ -3946,7 +3956,7 @@
         tcg_temp_free_i64(tmp);
         break;
     case 0xe: /* MVCL   R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = tcg_const_i32(r1);
         tmp32_2 = tcg_const_i32(r2);
@@ -3957,7 +3967,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0x10: /* LPR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r2);
         set_cc_abs32(s, tmp32_1);
@@ -3966,7 +3976,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x11: /* LNR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r2);
         set_cc_nabs32(s, tmp32_1);
@@ -3975,7 +3985,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x12: /* LTR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r2);
         if (r1 != r2) {
@@ -3985,7 +3995,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x13: /* LCR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r2);
         tcg_gen_neg_i32(tmp32_1, tmp32_1);
@@ -3996,7 +4006,7 @@
     case 0x14: /* NR     R1,R2     [RR] */
     case 0x16: /* OR     R1,R2     [RR] */
     case 0x17: /* XR     R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_2 = load_reg32(r2);
         tmp32_1 = load_reg32(r1);
@@ -4007,7 +4017,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0x18: /* LR     R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r2);
         store_reg32(r1, tmp32_1);
@@ -4015,7 +4025,7 @@
         break;
     case 0x15: /* CLR    R1,R2     [RR] */
     case 0x19: /* CR     R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = load_reg32(r2);
@@ -4029,7 +4039,7 @@
         break;
     case 0x1a: /* AR     R1,R2     [RR] */
     case 0x1e: /* ALR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = load_reg32(r2);
@@ -4047,7 +4057,7 @@
         break;
     case 0x1b: /* SR     R1,R2     [RR] */
     case 0x1f: /* SLR    R1,R2     [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = load_reg32(r2);
@@ -4065,7 +4075,7 @@
         break;
     case 0x1c: /* MR     R1,R2     [RR] */
         /* reg(r1, r1+1) = reg(r1+1) * reg(r2) */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp2 = load_reg(r2);
         tmp3 = load_reg((r1 + 1) & 15);
@@ -4079,7 +4089,7 @@
         tcg_temp_free_i64(tmp3);
         break;
     case 0x1d: /* DR     R1,R2               [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = load_reg32(r1 + 1);
@@ -4114,21 +4124,21 @@
         tcg_temp_free_i64(tmp3);
         break;
     case 0x28: /* LDR    R1,R2               [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp = load_freg(r2);
         store_freg(r1, tmp);
         tcg_temp_free_i64(tmp);
         break;
     case 0x38: /* LER    R1,R2               [RR] */
-        insn = ld_code2(s->pc);
+        insn = ld_code2(env, s->pc);
         decode_rr(s, insn, &r1, &r2);
         tmp32_1 = load_freg32(r2);
         store_freg32(r1, tmp32_1);
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x40: /* STH    R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = load_reg(r1);
         tcg_gen_qemu_st16(tmp2, tmp, get_mem_index(s));
@@ -4136,13 +4146,13 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x41:        /* la */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         store_reg(r1, tmp); /* FIXME: 31/24-bit addressing */
         tcg_temp_free_i64(tmp);
         break;
     case 0x42: /* STC    R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = load_reg(r1);
         tcg_gen_qemu_st8(tmp2, tmp, get_mem_index(s));
@@ -4150,7 +4160,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x43: /* IC     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld8u(tmp2, tmp, get_mem_index(s));
@@ -4159,7 +4169,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x44: /* EX     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = load_reg(r1);
         tmp3 = tcg_const_i64(s->pc + 4);
@@ -4172,7 +4182,7 @@
         tcg_temp_free_i64(tmp3);
         break;
     case 0x46: /* BCT    R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tcg_temp_free_i64(tmp);
 
@@ -4196,14 +4206,14 @@
         tcg_temp_free_i64(tmp);
         break;
     case 0x47: /* BC     M1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         gen_bcr(s, r1, tmp, s->pc + 4);
         tcg_temp_free_i64(tmp);
         s->is_jmp = DISAS_TB_JUMP;
         break;
     case 0x48: /* LH     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld16s(tmp2, tmp, get_mem_index(s));
@@ -4212,7 +4222,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x49: /* CH     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = tcg_temp_new_i32();
@@ -4228,7 +4238,7 @@
     case 0x4a: /* AH     R1,D2(X2,B2)     [RX] */
     case 0x4b: /* SH     R1,D2(X2,B2)     [RX] */
     case 0x4c: /* MH     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = load_reg32(r1);
@@ -4261,7 +4271,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x4d: /* BAS    R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_const_i64(pc_to_link_info(s, s->pc + 4));
         store_reg(r1, tmp2);
@@ -4271,7 +4281,7 @@
         s->is_jmp = DISAS_JUMP;
         break;
     case 0x4e: /* CVD    R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = tcg_temp_new_i32();
@@ -4283,7 +4293,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x50: /* st r1, d2(x2, b2) */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = load_reg(r1);
         tcg_gen_qemu_st32(tmp2, tmp, get_mem_index(s));
@@ -4291,7 +4301,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x55: /* CL     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = tcg_temp_new_i32();
@@ -4307,7 +4317,7 @@
     case 0x54: /* N      R1,D2(X2,B2)     [RX] */
     case 0x56: /* O      R1,D2(X2,B2)     [RX] */
     case 0x57: /* X      R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = load_reg32(r1);
@@ -4323,7 +4333,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0x58: /* l r1, d2(x2, b2) */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = tcg_temp_new_i32();
@@ -4335,7 +4345,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x59: /* C      R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = tcg_temp_new_i32();
@@ -4352,7 +4362,7 @@
     case 0x5b: /* S      R1,D2(X2,B2)     [RX] */
     case 0x5e: /* AL     R1,D2(X2,B2)     [RX] */
     case 0x5f: /* SL     R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = tcg_temp_new_i32();
@@ -4395,7 +4405,7 @@
         break;
     case 0x5c: /* M      R1,D2(X2,B2)        [RX] */
         /* reg(r1, r1+1) = reg(r1+1) * *(s32*)addr */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld32s(tmp2, tmp, get_mem_index(s));
@@ -4411,7 +4421,7 @@
         tcg_temp_free_i64(tmp3);
         break;
     case 0x5d: /* D      R1,D2(X2,B2)        [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp3 = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp32_1 = load_reg32(r1);
         tmp32_2 = load_reg32(r1 + 1);
@@ -4445,7 +4455,7 @@
         tcg_temp_free_i64(tmp3);
         break;
     case 0x60: /* STD    R1,D2(X2,B2)        [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = load_freg(r1);
         tcg_gen_qemu_st64(tmp2, tmp, get_mem_index(s));
@@ -4453,7 +4463,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x68: /* LD    R1,D2(X2,B2)        [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld64(tmp2, tmp, get_mem_index(s));
@@ -4462,7 +4472,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x70: /* STE R1,D2(X2,B2) [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = load_freg32(r1);
@@ -4473,7 +4483,7 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0x71: /* MS      R1,D2(X2,B2)     [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = load_reg32(r1);
@@ -4488,7 +4498,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0x78: /* LE     R1,D2(X2,B2)        [RX] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp2 = tcg_temp_new_i64();
         tmp32_1 = tcg_temp_new_i32();
@@ -4502,8 +4512,8 @@
 #ifndef CONFIG_USER_ONLY
     case 0x80: /* SSM      D2(B2)       [S] */
         /* Set System Mask */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -4518,8 +4528,8 @@
         break;
     case 0x82: /* LPSW     D2(B2)       [S] */
         /* Load PSW */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = tcg_temp_new_i64();
@@ -4536,9 +4546,9 @@
         break;
     case 0x83: /* DIAG     R1,R3,D2     [RS] */
         /* Diagnose call (KVM hypercall) */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         potential_page_fault(s);
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp32_1 = tcg_const_i32(insn & 0xfff);
         tmp2 = load_reg(2);
@@ -4553,7 +4563,7 @@
     case 0x88: /* SRL    R1,D2(B2)        [RS] */
     case 0x89: /* SLL    R1,D2(B2)        [RS] */
     case 0x8a: /* SRA    R1,D2(B2)        [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = load_reg32(r1);
@@ -4582,7 +4592,7 @@
     case 0x8c: /* SRDL   R1,D2(B2)        [RS] */
     case 0x8d: /* SLDL   R1,D2(B2)        [RS] */
     case 0x8e: /* SRDA   R1,D2(B2)        [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2); /* shift */
         tmp2 = tcg_temp_new_i64();
@@ -4611,7 +4621,7 @@
         break;
     case 0x98: /* LM     R1,R3,D2(B2)     [RS] */
     case 0x90: /* STM    R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
 
         tmp = get_address(s, 0, b2, d2);
@@ -4637,7 +4647,7 @@
         tcg_temp_free_i64(tmp4);
         break;
     case 0x91: /* TM     D1(B1),I2        [SI] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_si(s, insn, &i2, &b1, &d1);
         tmp2 = tcg_const_i64(i2);
         tcg_gen_qemu_ld8u(tmp, tmp, get_mem_index(s));
@@ -4646,7 +4656,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x92: /* MVI    D1(B1),I2        [SI] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_si(s, insn, &i2, &b1, &d1);
         tmp2 = tcg_const_i64(i2);
         tcg_gen_qemu_st8(tmp2, tmp, get_mem_index(s));
@@ -4656,7 +4666,7 @@
     case 0x94: /* NI     D1(B1),I2        [SI] */
     case 0x96: /* OI     D1(B1),I2        [SI] */
     case 0x97: /* XI     D1(B1),I2        [SI] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_si(s, insn, &i2, &b1, &d1);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld8u(tmp2, tmp, get_mem_index(s));
@@ -4679,7 +4689,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x95: /* CLI    D1(B1),I2        [SI] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_si(s, insn, &i2, &b1, &d1);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_qemu_ld8u(tmp2, tmp, get_mem_index(s));
@@ -4688,7 +4698,7 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0x9a: /* LAM      R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4700,7 +4710,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0x9b: /* STAM     R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4712,21 +4722,21 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0xa5:
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         r1 = (insn >> 20) & 0xf;
         op = (insn >> 16) & 0xf;
         i2 = insn & 0xffff;
-        disas_a5(s, op, r1, i2);
+        disas_a5(env, s, op, r1, i2);
         break;
     case 0xa7:
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         r1 = (insn >> 20) & 0xf;
         op = (insn >> 16) & 0xf;
         i2 = (short)insn;
-        disas_a7(s, op, r1, i2);
+        disas_a7(env, s, op, r1, i2);
         break;
     case 0xa8: /* MVCLE   R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4739,7 +4749,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0xa9: /* CLCLE   R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4754,8 +4764,8 @@
 #ifndef CONFIG_USER_ONLY
     case 0xac: /* STNSM   D1(B1),I2     [SI] */
     case 0xad: /* STOSM   D1(B1),I2     [SI] */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_si(s, insn, &i2, &b1, &d1);
         tmp2 = tcg_temp_new_i64();
         tcg_gen_shri_i64(tmp2, psw_mask, 56);
@@ -4770,8 +4780,8 @@
         tcg_temp_free_i64(tmp2);
         break;
     case 0xae: /* SIGP   R1,R3,D2(B2)     [RS] */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp2 = load_reg(r3);
@@ -4784,8 +4794,8 @@
         tcg_temp_free_i32(tmp32_1);
         break;
     case 0xb1: /* LRA    R1,D2(X2, B2)     [RX] */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         tmp = decode_rx(s, insn, &r1, &x2, &b2, &d2);
         tmp32_1 = tcg_const_i32(r1);
         potential_page_fault(s);
@@ -4796,7 +4806,7 @@
         break;
 #endif
     case 0xb2:
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         op = (insn >> 16) & 0xff;
         switch (op) {
         case 0x9c: /* STFPC    D2(B2) [S] */
@@ -4813,23 +4823,23 @@
             tcg_temp_free_i64(tmp2);
             break;
         default:
-            disas_b2(s, op, insn);
+            disas_b2(env, s, op, insn);
             break;
         }
         break;
     case 0xb3:
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         op = (insn >> 16) & 0xff;
         r3 = (insn >> 12) & 0xf; /* aka m3 */
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
-        disas_b3(s, op, r3, r1, r2);
+        disas_b3(env, s, op, r3, r1, r2);
         break;
 #ifndef CONFIG_USER_ONLY
     case 0xb6: /* STCTL     R1,R3,D2(B2)     [RS] */
         /* Store Control */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4842,8 +4852,8 @@
         break;
     case 0xb7: /* LCTL      R1,R3,D2(B2)     [RS] */
         /* Load Control */
-        check_privileged(s, ilc);
-        insn = ld_code4(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4856,14 +4866,14 @@
         break;
 #endif
     case 0xb9:
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         r1 = (insn >> 4) & 0xf;
         r2 = insn & 0xf;
         op = (insn >> 16) & 0xff;
-        disas_b9(s, op, r1, r2);
+        disas_b9(env, s, op, r1, r2);
         break;
     case 0xba: /* CS     R1,R3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = tcg_const_i32(r1);
@@ -4876,7 +4886,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0xbd: /* CLM    R1,M3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = load_reg32(r1);
@@ -4889,7 +4899,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0xbe: /* STCM R1,M3,D2(B2) [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         tmp = get_address(s, 0, b2, d2);
         tmp32_1 = load_reg32(r1);
@@ -4901,7 +4911,7 @@
         tcg_temp_free_i32(tmp32_2);
         break;
     case 0xbf: /* ICM    R1,M3,D2(B2)     [RS] */
-        insn = ld_code4(s->pc);
+        insn = ld_code4(env, s->pc);
         decode_rs(s, insn, &r1, &r3, &b2, &d2);
         if (r3 == 15) {
             /* effectively a 32-bit load */
@@ -4956,16 +4966,16 @@
         break;
     case 0xc0:
     case 0xc2:
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         r1 = (insn >> 36) & 0xf;
         op = (insn >> 32) & 0xf;
         i2 = (int)insn;
         switch (opc) {
         case 0xc0:
-            disas_c0(s, op, r1, i2);
+            disas_c0(env, s, op, r1, i2);
             break;
         case 0xc2:
-            disas_c2(s, op, r1, i2);
+            disas_c2(env, s, op, r1, i2);
             break;
         default:
             tcg_abort();
@@ -4978,7 +4988,7 @@
     case 0xd7: /* XC     D1(L,B1),D2(B2)         [SS] */
     case 0xdc: /* TR     D1(L,B1),D2(B2)         [SS] */
     case 0xf3: /* UNPK   D1(L1,B1),D2(L2,B2)     [SS] */
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         vl = tcg_const_i32((insn >> 32) & 0xff);
         b1 = (insn >> 28) & 0xf;
         b2 = (insn >> 12) & 0xf;
@@ -5026,9 +5036,9 @@
 #ifndef CONFIG_USER_ONLY
     case 0xda: /* MVCP     D1(R1,B1),D2(B2),R3   [SS] */
     case 0xdb: /* MVCS     D1(R1,B1),D2(B2),R3   [SS] */
-        check_privileged(s, ilc);
+        check_privileged(env, s, ilc);
         potential_page_fault(s);
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         r1 = (insn >> 36) & 0xf;
         r3 = (insn >> 32) & 0xf;
         b1 = (insn >> 28) & 0xf;
@@ -5051,7 +5061,7 @@
         break;
 #endif
     case 0xe3:
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         debug_insn(insn);
         op = insn & 0xff;
         r1 = (insn >> 36) & 0xf;
@@ -5059,19 +5069,19 @@
         b2 = (insn >> 28) & 0xf;
         d2 = ((int)((((insn >> 16) & 0xfff)
            | ((insn << 4) & 0xff000)) << 12)) >> 12;
-        disas_e3(s, op,  r1, x2, b2, d2 );
+        disas_e3(env, s, op,  r1, x2, b2, d2 );
         break;
 #ifndef CONFIG_USER_ONLY
     case 0xe5:
         /* Test Protection */
-        check_privileged(s, ilc);
-        insn = ld_code6(s->pc);
+        check_privileged(env, s, ilc);
+        insn = ld_code6(env, s->pc);
         debug_insn(insn);
-        disas_e5(s, insn);
+        disas_e5(env, s, insn);
         break;
 #endif
     case 0xeb:
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         debug_insn(insn);
         op = insn & 0xff;
         r1 = (insn >> 36) & 0xf;
@@ -5079,10 +5089,10 @@
         b2 = (insn >> 28) & 0xf;
         d2 = ((int)((((insn >> 16) & 0xfff)
            | ((insn << 4) & 0xff000)) << 12)) >> 12;
-        disas_eb(s, op, r1, r3, b2, d2);
+        disas_eb(env, s, op, r1, r3, b2, d2);
         break;
     case 0xed:
-        insn = ld_code6(s->pc);
+        insn = ld_code6(env, s->pc);
         debug_insn(insn);
         op = insn & 0xff;
         r1 = (insn >> 36) & 0xf;
@@ -5090,11 +5100,11 @@
         b2 = (insn >> 28) & 0xf;
         d2 = (short)((insn >> 16) & 0xfff);
         r1b = (insn >> 12) & 0xf;
-        disas_ed(s, op, r1, x2, b2, d2, r1b);
+        disas_ed(env, s, op, r1, x2, b2, d2, r1b);
         break;
     default:
         qemu_log_mask(LOG_UNIMP, "unimplemented opcode 0x%x\n", opc);
-        gen_illegal_opcode(s, ilc);
+        gen_illegal_opcode(env, s, ilc);
         break;
     }
 
@@ -5167,7 +5177,7 @@
         LOG_DISAS("pc " TARGET_FMT_lx "\n",
                   dc.pc);
 #endif
-        disas_s390_insn(&dc);
+        disas_s390_insn(env, &dc);
 
         num_insns++;
         if (env->singlestep_enabled) {
diff --git a/target-sh4/Makefile.objs b/target-sh4/Makefile.objs
index 2e0e093..ca20f21 100644
--- a/target-sh4/Makefile.objs
+++ b/target-sh4/Makefile.objs
@@ -1,4 +1,2 @@
 obj-y += translate.o op_helper.o helper.o cpu.o
 obj-$(CONFIG_SOFTMMU) += machine.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-sh4/helper.h b/target-sh4/helper.h
index 95e3c7c..6e4f108 100644
--- a/target-sh4/helper.h
+++ b/target-sh4/helper.h
@@ -1,54 +1,54 @@
 #include "def-helper.h"
 
-DEF_HELPER_0(ldtlb, void)
-DEF_HELPER_0(raise_illegal_instruction, void)
-DEF_HELPER_0(raise_slot_illegal_instruction, void)
-DEF_HELPER_0(raise_fpu_disable, void)
-DEF_HELPER_0(raise_slot_fpu_disable, void)
-DEF_HELPER_0(debug, void)
-DEF_HELPER_1(sleep, void, i32)
-DEF_HELPER_1(trapa, void, i32)
+DEF_HELPER_1(ldtlb, void, env)
+DEF_HELPER_1(raise_illegal_instruction, void, env)
+DEF_HELPER_1(raise_slot_illegal_instruction, void, env)
+DEF_HELPER_1(raise_fpu_disable, void, env)
+DEF_HELPER_1(raise_slot_fpu_disable, void, env)
+DEF_HELPER_1(debug, void, env)
+DEF_HELPER_2(sleep, void, env, i32)
+DEF_HELPER_2(trapa, void, env, i32)
 
-DEF_HELPER_2(movcal, void, i32, i32)
-DEF_HELPER_0(discard_movcal_backup, void)
-DEF_HELPER_1(ocbi, void, i32)
+DEF_HELPER_3(movcal, void, env, i32, i32)
+DEF_HELPER_1(discard_movcal_backup, void, env)
+DEF_HELPER_2(ocbi, void, env, i32)
 
-DEF_HELPER_2(addv, i32, i32, i32)
-DEF_HELPER_2(addc, i32, i32, i32)
-DEF_HELPER_2(subv, i32, i32, i32)
-DEF_HELPER_2(subc, i32, i32, i32)
-DEF_HELPER_2(div1, i32, i32, i32)
-DEF_HELPER_2(macl, void, i32, i32)
-DEF_HELPER_2(macw, void, i32, i32)
+DEF_HELPER_3(addv, i32, env, i32, i32)
+DEF_HELPER_3(addc, i32, env, i32, i32)
+DEF_HELPER_3(subv, i32, env, i32, i32)
+DEF_HELPER_3(subc, i32, env, i32, i32)
+DEF_HELPER_3(div1, i32, env, i32, i32)
+DEF_HELPER_3(macl, void, env, i32, i32)
+DEF_HELPER_3(macw, void, env, i32, i32)
 
-DEF_HELPER_1(ld_fpscr, void, i32)
+DEF_HELPER_2(ld_fpscr, void, env, i32)
 
 DEF_HELPER_1(fabs_FT, f32, f32)
 DEF_HELPER_1(fabs_DT, f64, f64)
-DEF_HELPER_2(fadd_FT, f32, f32, f32)
-DEF_HELPER_2(fadd_DT, f64, f64, f64)
-DEF_HELPER_1(fcnvsd_FT_DT, f64, f32)
-DEF_HELPER_1(fcnvds_DT_FT, f32, f64)
+DEF_HELPER_3(fadd_FT, f32, env, f32, f32)
+DEF_HELPER_3(fadd_DT, f64, env, f64, f64)
+DEF_HELPER_2(fcnvsd_FT_DT, f64, env, f32)
+DEF_HELPER_2(fcnvds_DT_FT, f32, env, f64)
 
-DEF_HELPER_2(fcmp_eq_FT, void, f32, f32)
-DEF_HELPER_2(fcmp_eq_DT, void, f64, f64)
-DEF_HELPER_2(fcmp_gt_FT, void, f32, f32)
-DEF_HELPER_2(fcmp_gt_DT, void, f64, f64)
-DEF_HELPER_2(fdiv_FT, f32, f32, f32)
-DEF_HELPER_2(fdiv_DT, f64, f64, f64)
-DEF_HELPER_1(float_FT, f32, i32)
-DEF_HELPER_1(float_DT, f64, i32)
-DEF_HELPER_3(fmac_FT, f32, f32, f32, f32)
-DEF_HELPER_2(fmul_FT, f32, f32, f32)
-DEF_HELPER_2(fmul_DT, f64, f64, f64)
+DEF_HELPER_3(fcmp_eq_FT, void, env, f32, f32)
+DEF_HELPER_3(fcmp_eq_DT, void, env, f64, f64)
+DEF_HELPER_3(fcmp_gt_FT, void, env, f32, f32)
+DEF_HELPER_3(fcmp_gt_DT, void, env, f64, f64)
+DEF_HELPER_3(fdiv_FT, f32, env, f32, f32)
+DEF_HELPER_3(fdiv_DT, f64, env, f64, f64)
+DEF_HELPER_2(float_FT, f32, env, i32)
+DEF_HELPER_2(float_DT, f64, env, i32)
+DEF_HELPER_4(fmac_FT, f32, env, f32, f32, f32)
+DEF_HELPER_3(fmul_FT, f32, env, f32, f32)
+DEF_HELPER_3(fmul_DT, f64, env, f64, f64)
 DEF_HELPER_1(fneg_T, f32, f32)
-DEF_HELPER_2(fsub_FT, f32, f32, f32)
-DEF_HELPER_2(fsub_DT, f64, f64, f64)
-DEF_HELPER_1(fsqrt_FT, f32, f32)
-DEF_HELPER_1(fsqrt_DT, f64, f64)
-DEF_HELPER_1(ftrc_FT, i32, f32)
-DEF_HELPER_1(ftrc_DT, i32, f64)
-DEF_HELPER_2(fipr, void, i32, i32)
-DEF_HELPER_1(ftrv, void, i32)
+DEF_HELPER_3(fsub_FT, f32, env, f32, f32)
+DEF_HELPER_3(fsub_DT, f64, env, f64, f64)
+DEF_HELPER_2(fsqrt_FT, f32, env, f32)
+DEF_HELPER_2(fsqrt_DT, f64, env, f64)
+DEF_HELPER_2(ftrc_FT, i32, env, f32)
+DEF_HELPER_2(ftrc_DT, i32, env, f64)
+DEF_HELPER_3(fipr, void, env, i32, i32)
+DEF_HELPER_2(ftrv, void, env, i32)
 
 #include "def-helper.h"
diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c
index 4054791..9b4328d 100644
--- a/target-sh4/op_helper.c
+++ b/target-sh4/op_helper.c
@@ -19,10 +19,9 @@
 #include <assert.h>
 #include <stdlib.h>
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helper.h"
 
-static void cpu_restore_state_from_retaddr(uintptr_t retaddr)
+static void cpu_restore_state_from_retaddr(CPUSH4State *env, uintptr_t retaddr)
 {
     TranslationBlock *tb;
 
@@ -53,26 +52,22 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
-void tlb_fill(CPUSH4State *env1, target_ulong addr, int is_write, int mmu_idx,
+void tlb_fill(CPUSH4State *env, target_ulong addr, int is_write, int mmu_idx,
               uintptr_t retaddr)
 {
-    CPUSH4State *saved_env;
     int ret;
 
-    saved_env = env;
-    env = env1;
     ret = cpu_sh4_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (ret) {
         /* now we have a real cpu fault */
-        cpu_restore_state_from_retaddr(retaddr);
+        cpu_restore_state_from_retaddr(env, retaddr);
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 
 #endif
 
-void helper_ldtlb(void)
+void helper_ldtlb(CPUSH4State *env)
 {
 #ifdef CONFIG_USER_ONLY
     /* XXXXX */
@@ -82,40 +77,41 @@
 #endif
 }
 
-static inline void raise_exception(int index, uintptr_t retaddr)
+static inline void raise_exception(CPUSH4State *env, int index,
+                                   uintptr_t retaddr)
 {
     env->exception_index = index;
-    cpu_restore_state_from_retaddr(retaddr);
+    cpu_restore_state_from_retaddr(env, retaddr);
     cpu_loop_exit(env);
 }
 
-void helper_raise_illegal_instruction(void)
+void helper_raise_illegal_instruction(CPUSH4State *env)
 {
-    raise_exception(0x180, GETPC());
+    raise_exception(env, 0x180, GETPC());
 }
 
-void helper_raise_slot_illegal_instruction(void)
+void helper_raise_slot_illegal_instruction(CPUSH4State *env)
 {
-    raise_exception(0x1a0, GETPC());
+    raise_exception(env, 0x1a0, GETPC());
 }
 
-void helper_raise_fpu_disable(void)
+void helper_raise_fpu_disable(CPUSH4State *env)
 {
-    raise_exception(0x800, GETPC());
+    raise_exception(env, 0x800, GETPC());
 }
 
-void helper_raise_slot_fpu_disable(void)
+void helper_raise_slot_fpu_disable(CPUSH4State *env)
 {
-    raise_exception(0x820, GETPC());
+    raise_exception(env, 0x820, GETPC());
 }
 
-void helper_debug(void)
+void helper_debug(CPUSH4State *env)
 {
     env->exception_index = EXCP_DEBUG;
     cpu_loop_exit(env);
 }
 
-void helper_sleep(uint32_t next_pc)
+void helper_sleep(CPUSH4State *env, uint32_t next_pc)
 {
     env->halted = 1;
     env->in_sleep = 1;
@@ -124,13 +120,13 @@
     cpu_loop_exit(env);
 }
 
-void helper_trapa(uint32_t tra)
+void helper_trapa(CPUSH4State *env, uint32_t tra)
 {
     env->tra = tra << 2;
-    raise_exception(0x160, GETPC());
+    raise_exception(env, 0x160, GETPC());
 }
 
-void helper_movcal(uint32_t address, uint32_t value)
+void helper_movcal(CPUSH4State *env, uint32_t address, uint32_t value)
 {
     if (cpu_sh4_is_cached (env, address))
     {
@@ -144,7 +140,7 @@
     }
 }
 
-void helper_discard_movcal_backup(void)
+void helper_discard_movcal_backup(CPUSH4State *env)
 {
     memory_content *current = env->movcal_backup;
 
@@ -158,7 +154,7 @@
     } 
 }
 
-void helper_ocbi(uint32_t address)
+void helper_ocbi(CPUSH4State *env, uint32_t address)
 {
     memory_content **current = &(env->movcal_backup);
     while (*current)
@@ -167,7 +163,7 @@
 	if ((a & ~0x1F) == (address & ~0x1F))
 	{
 	    memory_content *next = (*current)->next;
-	    stl(a, (*current)->value);
+            cpu_stl_data(env, a, (*current)->value);
 	    
 	    if (next == NULL)
 	    {
@@ -181,7 +177,7 @@
     }
 }
 
-uint32_t helper_addc(uint32_t arg0, uint32_t arg1)
+uint32_t helper_addc(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     uint32_t tmp0, tmp1;
 
@@ -197,7 +193,7 @@
     return arg1;
 }
 
-uint32_t helper_addv(uint32_t arg0, uint32_t arg1)
+uint32_t helper_addv(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     uint32_t dest, src, ans;
 
@@ -236,7 +232,7 @@
 #define SETM env->sr |= SR_M
 #define CLRM env->sr &= ~SR_M
 
-uint32_t helper_div1(uint32_t arg0, uint32_t arg1)
+uint32_t helper_div1(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     uint32_t tmp0, tmp2;
     uint8_t old_q, tmp1 = 0xff;
@@ -344,7 +340,7 @@
     return arg1;
 }
 
-void helper_macl(uint32_t arg0, uint32_t arg1)
+void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     int64_t res;
 
@@ -360,7 +356,7 @@
     }
 }
 
-void helper_macw(uint32_t arg0, uint32_t arg1)
+void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     int64_t res;
 
@@ -379,7 +375,7 @@
     }
 }
 
-uint32_t helper_subc(uint32_t arg0, uint32_t arg1)
+uint32_t helper_subc(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     uint32_t tmp0, tmp1;
 
@@ -395,7 +391,7 @@
     return arg1;
 }
 
-uint32_t helper_subv(uint32_t arg0, uint32_t arg1)
+uint32_t helper_subv(CPUSH4State *env, uint32_t arg0, uint32_t arg1)
 {
     int32_t dest, src, ans;
 
@@ -424,17 +420,17 @@
     return arg1;
 }
 
-static inline void set_t(void)
+static inline void set_t(CPUSH4State *env)
 {
     env->sr |= SR_T;
 }
 
-static inline void clr_t(void)
+static inline void clr_t(CPUSH4State *env)
 {
     env->sr &= ~SR_T;
 }
 
-void helper_ld_fpscr(uint32_t val)
+void helper_ld_fpscr(CPUSH4State *env, uint32_t val)
 {
     env->fpscr = val & FPSCR_MASK;
     if ((val & FPSCR_RM_MASK) == FPSCR_RM_ZERO) {
@@ -445,7 +441,7 @@
     set_flush_to_zero((val & FPSCR_DN) != 0, &env->fp_status);
 }
 
-static void update_fpscr(uintptr_t retaddr)
+static void update_fpscr(CPUSH4State *env, uintptr_t retaddr)
 {
     int xcpt, cause, enable;
 
@@ -479,7 +475,7 @@
         cause = (env->fpscr & FPSCR_CAUSE_MASK) >> FPSCR_CAUSE_SHIFT;
         enable = (env->fpscr & FPSCR_ENABLE_MASK) >> FPSCR_ENABLE_SHIFT;
         if (cause & enable) {
-            cpu_restore_state_from_retaddr(retaddr);
+            cpu_restore_state_from_retaddr(env, retaddr);
             env->exception_index = 0x120;
             cpu_loop_exit(env);
         }
@@ -496,156 +492,156 @@
     return float64_abs(t0);
 }
 
-float32 helper_fadd_FT(float32 t0, float32 t1)
+float32 helper_fadd_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float32_add(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float64 helper_fadd_DT(float64 t0, float64 t1)
+float64 helper_fadd_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float64_add(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-void helper_fcmp_eq_FT(float32 t0, float32 t1)
+void helper_fcmp_eq_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     int relation;
 
     set_float_exception_flags(0, &env->fp_status);
     relation = float32_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
-        update_fpscr(GETPC());
+        update_fpscr(env, GETPC());
     } else if (relation == float_relation_equal) {
-	set_t();
+        set_t(env);
     } else {
-	clr_t();
+        clr_t(env);
     }
 }
 
-void helper_fcmp_eq_DT(float64 t0, float64 t1)
+void helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     int relation;
 
     set_float_exception_flags(0, &env->fp_status);
     relation = float64_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
-        update_fpscr(GETPC());
+        update_fpscr(env, GETPC());
     } else if (relation == float_relation_equal) {
-	set_t();
+        set_t(env);
     } else {
-	clr_t();
+        clr_t(env);
     }
 }
 
-void helper_fcmp_gt_FT(float32 t0, float32 t1)
+void helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     int relation;
 
     set_float_exception_flags(0, &env->fp_status);
     relation = float32_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
-        update_fpscr(GETPC());
+        update_fpscr(env, GETPC());
     } else if (relation == float_relation_greater) {
-	set_t();
+        set_t(env);
     } else {
-	clr_t();
+        clr_t(env);
     }
 }
 
-void helper_fcmp_gt_DT(float64 t0, float64 t1)
+void helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     int relation;
 
     set_float_exception_flags(0, &env->fp_status);
     relation = float64_compare(t0, t1, &env->fp_status);
     if (unlikely(relation == float_relation_unordered)) {
-        update_fpscr(GETPC());
+        update_fpscr(env, GETPC());
     } else if (relation == float_relation_greater) {
-	set_t();
+        set_t(env);
     } else {
-	clr_t();
+        clr_t(env);
     }
 }
 
-float64 helper_fcnvsd_FT_DT(float32 t0)
+float64 helper_fcnvsd_FT_DT(CPUSH4State *env, float32 t0)
 {
     float64 ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = float32_to_float64(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-float32 helper_fcnvds_DT_FT(float64 t0)
+float32 helper_fcnvds_DT_FT(CPUSH4State *env, float64 t0)
 {
     float32 ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = float64_to_float32(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-float32 helper_fdiv_FT(float32 t0, float32 t1)
+float32 helper_fdiv_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float32_div(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float64 helper_fdiv_DT(float64 t0, float64 t1)
+float64 helper_fdiv_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float64_div(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float32 helper_float_FT(uint32_t t0)
+float32 helper_float_FT(CPUSH4State *env, uint32_t t0)
 {
     float32 ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = int32_to_float32(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-float64 helper_float_DT(uint32_t t0)
+float64 helper_float_DT(CPUSH4State *env, uint32_t t0)
 {
     float64 ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = int32_to_float64(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-float32 helper_fmac_FT(float32 t0, float32 t1, float32 t2)
+float32 helper_fmac_FT(CPUSH4State *env, float32 t0, float32 t1, float32 t2)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float32_mul(t0, t1, &env->fp_status);
     t0 = float32_add(t0, t2, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float32 helper_fmul_FT(float32 t0, float32 t1)
+float32 helper_fmul_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float32_mul(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float64 helper_fmul_DT(float64 t0, float64 t1)
+float64 helper_fmul_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float64_mul(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
@@ -654,57 +650,57 @@
     return float32_chs(t0);
 }
 
-float32 helper_fsqrt_FT(float32 t0)
+float32 helper_fsqrt_FT(CPUSH4State *env, float32 t0)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float32_sqrt(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float64 helper_fsqrt_DT(float64 t0)
+float64 helper_fsqrt_DT(CPUSH4State *env, float64 t0)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float64_sqrt(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float32 helper_fsub_FT(float32 t0, float32 t1)
+float32 helper_fsub_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float32_sub(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-float64 helper_fsub_DT(float64 t0, float64 t1)
+float64 helper_fsub_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
     set_float_exception_flags(0, &env->fp_status);
     t0 = float64_sub(t0, t1, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return t0;
 }
 
-uint32_t helper_ftrc_FT(float32 t0)
+uint32_t helper_ftrc_FT(CPUSH4State *env, float32 t0)
 {
     uint32_t ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = float32_to_int32_round_to_zero(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-uint32_t helper_ftrc_DT(float64 t0)
+uint32_t helper_ftrc_DT(CPUSH4State *env, float64 t0)
 {
     uint32_t ret;
     set_float_exception_flags(0, &env->fp_status);
     ret = float64_to_int32_round_to_zero(t0, &env->fp_status);
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
     return ret;
 }
 
-void helper_fipr(uint32_t m, uint32_t n)
+void helper_fipr(CPUSH4State *env, uint32_t m, uint32_t n)
 {
     int bank, i;
     float32 r, p;
@@ -719,12 +715,12 @@
                         &env->fp_status);
         r = float32_add(r, p, &env->fp_status);
     }
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
 
     env->fregs[bank + n + 3] = r;
 }
 
-void helper_ftrv(uint32_t n)
+void helper_ftrv(CPUSH4State *env, uint32_t n)
 {
     int bank_matrix, bank_vector;
     int i, j;
@@ -743,7 +739,7 @@
             r[i] = float32_add(r[i], p, &env->fp_status);
         }
     }
-    update_fpscr(GETPC());
+    update_fpscr(env, GETPC());
 
     for (i = 0 ; i < 4 ; i++) {
         env->fregs[bank_vector + i] = r[i];
diff --git a/target-sh4/translate.c b/target-sh4/translate.c
index 6532ad2..d05c74c 100644
--- a/target-sh4/translate.c
+++ b/target-sh4/translate.c
@@ -276,7 +276,7 @@
     } else {
         tcg_gen_movi_i32(cpu_pc, dest);
         if (ctx->singlestep_enabled)
-            gen_helper_debug();
+            gen_helper_debug(cpu_env);
         tcg_gen_exit_tb(0);
     }
 }
@@ -288,7 +288,7 @@
 	   delayed jump as immediate jump are conditinal jumps */
 	tcg_gen_mov_i32(cpu_pc, cpu_delayed_pc);
 	if (ctx->singlestep_enabled)
-	    gen_helper_debug();
+            gen_helper_debug(cpu_env);
 	tcg_gen_exit_tb(0);
     } else {
 	gen_goto_tb(ctx, 0, ctx->delayed_pc);
@@ -437,7 +437,7 @@
 #define CHECK_NOT_DELAY_SLOT \
   if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL))     \
   {                                                           \
-      gen_helper_raise_slot_illegal_instruction();            \
+      gen_helper_raise_slot_illegal_instruction(cpu_env);     \
       ctx->bstate = BS_EXCP;                                  \
       return;                                                 \
   }
@@ -445,9 +445,9 @@
 #define CHECK_PRIVILEGED                                        \
   if (IS_USER(ctx)) {                                           \
       if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
-         gen_helper_raise_slot_illegal_instruction();           \
+          gen_helper_raise_slot_illegal_instruction(cpu_env);   \
       } else {                                                  \
-         gen_helper_raise_illegal_instruction();                \
+          gen_helper_raise_illegal_instruction(cpu_env);        \
       }                                                         \
       ctx->bstate = BS_EXCP;                                    \
       return;                                                   \
@@ -456,9 +456,9 @@
 #define CHECK_FPU_ENABLED                                       \
   if (ctx->flags & SR_FD) {                                     \
       if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \
-          gen_helper_raise_slot_fpu_disable();                  \
+          gen_helper_raise_slot_fpu_disable(cpu_env);           \
       } else {                                                  \
-          gen_helper_raise_fpu_disable();                       \
+          gen_helper_raise_fpu_disable(cpu_env);                \
       }                                                         \
       ctx->bstate = BS_EXCP;                                    \
       return;                                                   \
@@ -492,7 +492,7 @@
 	  if (opcode != 0x0093 /* ocbi */
 	      && opcode != 0x00c3 /* movca.l */)
 	      {
-		  gen_helper_discard_movcal_backup ();
+                  gen_helper_discard_movcal_backup(cpu_env);
 		  ctx->has_movcal = 0;
 	      }
 	}
@@ -523,7 +523,7 @@
 	return;
     case 0x0038:		/* ldtlb */
 	CHECK_PRIVILEGED
-	gen_helper_ldtlb();
+        gen_helper_ldtlb(cpu_env);
 	return;
     case 0x002b:		/* rte */
 	CHECK_PRIVILEGED
@@ -551,7 +551,7 @@
 	return;
     case 0x001b:		/* sleep */
 	CHECK_PRIVILEGED
-	gen_helper_sleep(tcg_const_i32(ctx->pc + 2));
+        gen_helper_sleep(cpu_env, tcg_const_i32(ctx->pc + 2));
 	return;
     }
 
@@ -761,10 +761,10 @@
 	tcg_gen_add_i32(REG(B11_8), REG(B11_8), REG(B7_4));
 	return;
     case 0x300e:		/* addc Rm,Rn */
-	gen_helper_addc(REG(B11_8), REG(B7_4), REG(B11_8));
+        gen_helper_addc(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
 	return;
     case 0x300f:		/* addv Rm,Rn */
-	gen_helper_addv(REG(B11_8), REG(B7_4), REG(B11_8));
+        gen_helper_addv(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
 	return;
     case 0x2009:		/* and Rm,Rn */
 	tcg_gen_and_i32(REG(B11_8), REG(B11_8), REG(B7_4));
@@ -817,7 +817,7 @@
 	}
 	return;
     case 0x3004:		/* div1 Rm,Rn */
-	gen_helper_div1(REG(B11_8), REG(B7_4), REG(B11_8));
+        gen_helper_div1(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
 	return;
     case 0x300d:		/* dmuls.l Rm,Rn */
 	{
@@ -870,7 +870,7 @@
 	    tcg_gen_qemu_ld32s(arg0, REG(B7_4), ctx->memidx);
 	    arg1 = tcg_temp_new();
 	    tcg_gen_qemu_ld32s(arg1, REG(B11_8), ctx->memidx);
-	    gen_helper_macl(arg0, arg1);
+            gen_helper_macl(cpu_env, arg0, arg1);
 	    tcg_temp_free(arg1);
 	    tcg_temp_free(arg0);
 	    tcg_gen_addi_i32(REG(B7_4), REG(B7_4), 4);
@@ -884,7 +884,7 @@
 	    tcg_gen_qemu_ld32s(arg0, REG(B7_4), ctx->memidx);
 	    arg1 = tcg_temp_new();
 	    tcg_gen_qemu_ld32s(arg1, REG(B11_8), ctx->memidx);
-	    gen_helper_macw(arg0, arg1);
+            gen_helper_macw(cpu_env, arg0, arg1);
 	    tcg_temp_free(arg1);
 	    tcg_temp_free(arg0);
 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 2);
@@ -1013,10 +1013,10 @@
 	tcg_gen_sub_i32(REG(B11_8), REG(B11_8), REG(B7_4));
 	return;
     case 0x300a:		/* subc Rm,Rn */
-	gen_helper_subc(REG(B11_8), REG(B7_4), REG(B11_8));
+        gen_helper_subc(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
 	return;
     case 0x300b:		/* subv Rm,Rn */
-	gen_helper_subv(REG(B11_8), REG(B7_4), REG(B11_8));
+        gen_helper_subv(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8));
 	return;
     case 0x2008:		/* tst Rm,Rn */
 	{
@@ -1152,22 +1152,22 @@
 		gen_load_fpr64(fp1, DREG(B7_4));
                 switch (ctx->opcode & 0xf00f) {
                 case 0xf000:		/* fadd Rm,Rn */
-                    gen_helper_fadd_DT(fp0, fp0, fp1);
+                    gen_helper_fadd_DT(fp0, cpu_env, fp0, fp1);
                     break;
                 case 0xf001:		/* fsub Rm,Rn */
-                    gen_helper_fsub_DT(fp0, fp0, fp1);
+                    gen_helper_fsub_DT(fp0, cpu_env, fp0, fp1);
                     break;
                 case 0xf002:		/* fmul Rm,Rn */
-                    gen_helper_fmul_DT(fp0, fp0, fp1);
+                    gen_helper_fmul_DT(fp0, cpu_env, fp0, fp1);
                     break;
                 case 0xf003:		/* fdiv Rm,Rn */
-                    gen_helper_fdiv_DT(fp0, fp0, fp1);
+                    gen_helper_fdiv_DT(fp0, cpu_env, fp0, fp1);
                     break;
                 case 0xf004:		/* fcmp/eq Rm,Rn */
-                    gen_helper_fcmp_eq_DT(fp0, fp1);
+                    gen_helper_fcmp_eq_DT(cpu_env, fp0, fp1);
                     return;
                 case 0xf005:		/* fcmp/gt Rm,Rn */
-                    gen_helper_fcmp_gt_DT(fp0, fp1);
+                    gen_helper_fcmp_gt_DT(cpu_env, fp0, fp1);
                     return;
                 }
 		gen_store_fpr64(fp0, DREG(B11_8));
@@ -1176,22 +1176,32 @@
 	    } else {
                 switch (ctx->opcode & 0xf00f) {
                 case 0xf000:		/* fadd Rm,Rn */
-                    gen_helper_fadd_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fadd_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                       cpu_fregs[FREG(B11_8)],
+                                       cpu_fregs[FREG(B7_4)]);
                     break;
                 case 0xf001:		/* fsub Rm,Rn */
-                    gen_helper_fsub_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fsub_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                       cpu_fregs[FREG(B11_8)],
+                                       cpu_fregs[FREG(B7_4)]);
                     break;
                 case 0xf002:		/* fmul Rm,Rn */
-                    gen_helper_fmul_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fmul_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                       cpu_fregs[FREG(B11_8)],
+                                       cpu_fregs[FREG(B7_4)]);
                     break;
                 case 0xf003:		/* fdiv Rm,Rn */
-                    gen_helper_fdiv_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fdiv_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                       cpu_fregs[FREG(B11_8)],
+                                       cpu_fregs[FREG(B7_4)]);
                     break;
                 case 0xf004:		/* fcmp/eq Rm,Rn */
-                    gen_helper_fcmp_eq_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fcmp_eq_FT(cpu_env, cpu_fregs[FREG(B11_8)],
+                                          cpu_fregs[FREG(B7_4)]);
                     return;
                 case 0xf005:		/* fcmp/gt Rm,Rn */
-                    gen_helper_fcmp_gt_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B7_4)]);
+                    gen_helper_fcmp_gt_FT(cpu_env, cpu_fregs[FREG(B11_8)],
+                                          cpu_fregs[FREG(B7_4)]);
                     return;
                 }
 	    }
@@ -1203,8 +1213,9 @@
             if (ctx->fpscr & FPSCR_PR) {
                 break; /* illegal instruction */
             } else {
-                gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)],
-                                   cpu_fregs[FREG(0)], cpu_fregs[FREG(B7_4)], cpu_fregs[FREG(B11_8)]);
+                gen_helper_fmac_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                   cpu_fregs[FREG(0)], cpu_fregs[FREG(B7_4)],
+                                   cpu_fregs[FREG(B11_8)]);
                 return;
             }
         }
@@ -1356,7 +1367,7 @@
 	    TCGv imm;
 	    CHECK_NOT_DELAY_SLOT
 	    imm = tcg_const_i32(B7_0);
-	    gen_helper_trapa(imm);
+            gen_helper_trapa(cpu_env, imm);
 	    tcg_temp_free(imm);
 	    ctx->bstate = BS_BRANCH;
 	}
@@ -1531,7 +1542,7 @@
 	LDST(fpul, 0x405a, 0x4056, 0x005a, 0x4052, {CHECK_FPU_ENABLED})
     case 0x406a:		/* lds Rm,FPSCR */
 	CHECK_FPU_ENABLED
-	gen_helper_ld_fpscr(REG(B11_8));
+        gen_helper_ld_fpscr(cpu_env, REG(B11_8));
 	ctx->bstate = BS_STOP;
 	return;
     case 0x4066:		/* lds.l @Rm+,FPSCR */
@@ -1540,7 +1551,7 @@
 	    TCGv addr = tcg_temp_new();
 	    tcg_gen_qemu_ld32s(addr, REG(B11_8), ctx->memidx);
 	    tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4);
-	    gen_helper_ld_fpscr(addr);
+            gen_helper_ld_fpscr(cpu_env, addr);
 	    tcg_temp_free(addr);
 	    ctx->bstate = BS_STOP;
 	}
@@ -1567,7 +1578,7 @@
         {
             TCGv val = tcg_temp_new();
             tcg_gen_qemu_ld32u(val, REG(B11_8), ctx->memidx);
-            gen_helper_movcal (REG(B11_8), val);            
+            gen_helper_movcal(cpu_env, REG(B11_8), val);
             tcg_gen_qemu_st32(REG(0), REG(B11_8), ctx->memidx);
         }
         ctx->has_movcal = 1;
@@ -1619,7 +1630,7 @@
 	    break;
     case 0x0093:		/* ocbi @Rn */
 	{
-	    gen_helper_ocbi (REG(B11_8));
+            gen_helper_ocbi(cpu_env, REG(B11_8));
 	}
 	return;
     case 0x00a3:		/* ocbp @Rn */
@@ -1733,12 +1744,12 @@
 	    if (ctx->opcode & 0x0100)
 		break; /* illegal instruction */
 	    fp = tcg_temp_new_i64();
-	    gen_helper_float_DT(fp, cpu_fpul);
+            gen_helper_float_DT(fp, cpu_env, cpu_fpul);
 	    gen_store_fpr64(fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	}
 	else {
-	    gen_helper_float_FT(cpu_fregs[FREG(B11_8)], cpu_fpul);
+            gen_helper_float_FT(cpu_fregs[FREG(B11_8)], cpu_env, cpu_fpul);
 	}
 	return;
     case 0xf03d: /* ftrc FRm/DRm,FPUL - FPSCR: R[PR,Enable.V]/W[Cause,Flag] */
@@ -1749,11 +1760,11 @@
 		break; /* illegal instruction */
 	    fp = tcg_temp_new_i64();
 	    gen_load_fpr64(fp, DREG(B11_8));
-	    gen_helper_ftrc_DT(cpu_fpul, fp);
+            gen_helper_ftrc_DT(cpu_fpul, cpu_env, fp);
 	    tcg_temp_free_i64(fp);
 	}
 	else {
-	    gen_helper_ftrc_FT(cpu_fpul, cpu_fregs[FREG(B11_8)]);
+            gen_helper_ftrc_FT(cpu_fpul, cpu_env, cpu_fregs[FREG(B11_8)]);
 	}
 	return;
     case 0xf04d: /* fneg FRn/DRn - FPSCR: Nothing */
@@ -1783,11 +1794,12 @@
 		break; /* illegal instruction */
 	    TCGv_i64 fp = tcg_temp_new_i64();
 	    gen_load_fpr64(fp, DREG(B11_8));
-	    gen_helper_fsqrt_DT(fp, fp);
+            gen_helper_fsqrt_DT(fp, cpu_env, fp);
 	    gen_store_fpr64(fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	} else {
-	    gen_helper_fsqrt_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
+            gen_helper_fsqrt_FT(cpu_fregs[FREG(B11_8)], cpu_env,
+                                cpu_fregs[FREG(B11_8)]);
 	}
 	return;
     case 0xf07d: /* fsrra FRn */
@@ -1809,7 +1821,7 @@
 	CHECK_FPU_ENABLED
 	{
 	    TCGv_i64 fp = tcg_temp_new_i64();
-	    gen_helper_fcnvsd_FT_DT(fp, cpu_fpul);
+            gen_helper_fcnvsd_FT_DT(fp, cpu_env, cpu_fpul);
 	    gen_store_fpr64(fp, DREG(B11_8));
 	    tcg_temp_free_i64(fp);
 	}
@@ -1819,7 +1831,7 @@
 	{
 	    TCGv_i64 fp = tcg_temp_new_i64();
 	    gen_load_fpr64(fp, DREG(B11_8));
-	    gen_helper_fcnvds_DT_FT(cpu_fpul, fp);
+            gen_helper_fcnvds_DT_FT(cpu_fpul, cpu_env, fp);
 	    tcg_temp_free_i64(fp);
 	}
 	return;
@@ -1829,7 +1841,7 @@
             TCGv m, n;
             m = tcg_const_i32((ctx->opcode >> 8) & 3);
             n = tcg_const_i32((ctx->opcode >> 10) & 3);
-            gen_helper_fipr(m, n);
+            gen_helper_fipr(cpu_env, m, n);
             tcg_temp_free(m);
             tcg_temp_free(n);
             return;
@@ -1841,7 +1853,7 @@
             (ctx->fpscr & FPSCR_PR) == 0) {
             TCGv n;
             n = tcg_const_i32((ctx->opcode >> 10) & 3);
-            gen_helper_ftrv(n);
+            gen_helper_ftrv(cpu_env, n);
             tcg_temp_free(n);
             return;
         }
@@ -1853,9 +1865,9 @@
     fflush(stderr);
 #endif
     if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) {
-       gen_helper_raise_slot_illegal_instruction();
+        gen_helper_raise_slot_illegal_instruction(cpu_env);
     } else {
-       gen_helper_raise_illegal_instruction();
+        gen_helper_raise_illegal_instruction(cpu_env);
     }
     ctx->bstate = BS_EXCP;
 }
@@ -1934,7 +1946,7 @@
                 if (ctx.pc == bp->pc) {
 		    /* We have hit a breakpoint - make sure PC is up-to-date */
 		    tcg_gen_movi_i32(cpu_pc, ctx.pc);
-		    gen_helper_debug();
+                    gen_helper_debug(cpu_env);
 		    ctx.bstate = BS_EXCP;
 		    break;
 		}
@@ -1958,7 +1970,7 @@
 	fprintf(stderr, "Loading opcode at address 0x%08x\n", ctx.pc);
 	fflush(stderr);
 #endif
-	ctx.opcode = lduw_code(ctx.pc);
+        ctx.opcode = cpu_lduw_code(env, ctx.pc);
 	decode_opc(&ctx);
         num_insns++;
 	ctx.pc += 2;
@@ -1975,7 +1987,7 @@
         gen_io_end();
     if (env->singlestep_enabled) {
         tcg_gen_movi_i32(cpu_pc, ctx.pc);
-        gen_helper_debug();
+        gen_helper_debug(cpu_env);
     } else {
 	switch (ctx.bstate) {
         case BS_STOP:
diff --git a/target-sparc/Makefile.objs b/target-sparc/Makefile.objs
index a93e07d..9fc42ea 100644
--- a/target-sparc/Makefile.objs
+++ b/target-sparc/Makefile.objs
@@ -4,5 +4,3 @@
 obj-$(TARGET_SPARC) += int32_helper.o
 obj-$(TARGET_SPARC64) += int64_helper.o
 obj-$(TARGET_SPARC64) += vis_helper.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-unicore32/Makefile.objs b/target-unicore32/Makefile.objs
index 777f01f..8e143da 100644
--- a/target-unicore32/Makefile.objs
+++ b/target-unicore32/Makefile.objs
@@ -2,5 +2,3 @@
 obj-y += ucf64_helper.o
 
 obj-$(CONFIG_SOFTMMU) += machine.o softmmu.o
-
-$(obj)/op_helper.o: QEMU_CFLAGS += $(HELPER_CFLAGS)
diff --git a/target-unicore32/helper.h b/target-unicore32/helper.h
index 305318a..a4b8149 100644
--- a/target-unicore32/helper.h
+++ b/target-unicore32/helper.h
@@ -17,26 +17,26 @@
 DEF_HELPER_1(clz, i32, i32)
 DEF_HELPER_1(clo, i32, i32)
 
-DEF_HELPER_1(exception, void, i32)
+DEF_HELPER_2(exception, void, env, i32)
 
-DEF_HELPER_2(asr_write, void, i32, i32)
-DEF_HELPER_0(asr_read, i32)
+DEF_HELPER_3(asr_write, void, env, i32, i32)
+DEF_HELPER_1(asr_read, i32, env)
 
-DEF_HELPER_1(get_user_reg, i32, i32)
-DEF_HELPER_2(set_user_reg, void, i32, i32)
+DEF_HELPER_2(get_user_reg, i32, env, i32)
+DEF_HELPER_3(set_user_reg, void, env, i32, i32)
 
-DEF_HELPER_2(add_cc, i32, i32, i32)
-DEF_HELPER_2(adc_cc, i32, i32, i32)
-DEF_HELPER_2(sub_cc, i32, i32, i32)
-DEF_HELPER_2(sbc_cc, i32, i32, i32)
+DEF_HELPER_3(add_cc, i32, env, i32, i32)
+DEF_HELPER_3(adc_cc, i32, env, i32, i32)
+DEF_HELPER_3(sub_cc, i32, env, i32, i32)
+DEF_HELPER_3(sbc_cc, i32, env, i32, i32)
 
 DEF_HELPER_2(shl, i32, i32, i32)
 DEF_HELPER_2(shr, i32, i32, i32)
 DEF_HELPER_2(sar, i32, i32, i32)
-DEF_HELPER_2(shl_cc, i32, i32, i32)
-DEF_HELPER_2(shr_cc, i32, i32, i32)
-DEF_HELPER_2(sar_cc, i32, i32, i32)
-DEF_HELPER_2(ror_cc, i32, i32, i32)
+DEF_HELPER_3(shl_cc, i32, env, i32, i32)
+DEF_HELPER_3(shr_cc, i32, env, i32, i32)
+DEF_HELPER_3(sar_cc, i32, env, i32, i32)
+DEF_HELPER_3(ror_cc, i32, env, i32, i32)
 
 DEF_HELPER_1(ucf64_get_fpscr, i32, env)
 DEF_HELPER_2(ucf64_set_fpscr, void, env, i32)
diff --git a/target-unicore32/op_helper.c b/target-unicore32/op_helper.c
index c63789d..f474d1b 100644
--- a/target-unicore32/op_helper.c
+++ b/target-unicore32/op_helper.c
@@ -9,19 +9,18 @@
  * later version. See the COPYING file in the top-level directory.
  */
 #include "cpu.h"
-#include "dyngen-exec.h"
 #include "helper.h"
 
 #define SIGNBIT (uint32_t)0x80000000
 #define SIGNBIT64 ((uint64_t)1 << 63)
 
-void HELPER(exception)(uint32_t excp)
+void HELPER(exception)(CPUUniCore32State *env, uint32_t excp)
 {
     env->exception_index = excp;
     cpu_loop_exit(env);
 }
 
-static target_ulong asr_read(void)
+static target_ulong asr_read(CPUUniCore32State *env)
 {
     int ZF;
     ZF = (env->ZF == 0);
@@ -29,24 +28,18 @@
         (env->CF << 29) | ((env->VF & 0x80000000) >> 3);
 }
 
-target_ulong cpu_asr_read(CPUUniCore32State *env1)
+target_ulong cpu_asr_read(CPUUniCore32State *env)
 {
-    CPUUniCore32State *saved_env;
-    target_ulong ret;
-
-    saved_env = env;
-    env = env1;
-    ret = asr_read();
-    env = saved_env;
-    return ret;
+    return asr_read(env);
 }
 
-target_ulong HELPER(asr_read)(void)
+target_ulong HELPER(asr_read)(CPUUniCore32State *env)
 {
-    return asr_read();
+    return asr_read(env);
 }
 
-static void asr_write(target_ulong val, target_ulong mask)
+static void asr_write(CPUUniCore32State *env, target_ulong val,
+                      target_ulong mask)
 {
     if (mask & ASR_NZCV) {
         env->ZF = (~val) & ASR_Z;
@@ -62,23 +55,19 @@
     env->uncached_asr = (env->uncached_asr & ~mask) | (val & mask);
 }
 
-void cpu_asr_write(CPUUniCore32State *env1, target_ulong val, target_ulong mask)
+void cpu_asr_write(CPUUniCore32State *env, target_ulong val, target_ulong mask)
 {
-    CPUUniCore32State *saved_env;
-
-    saved_env = env;
-    env = env1;
-    asr_write(val, mask);
-    env = saved_env;
+    asr_write(env, val, mask);
 }
 
-void HELPER(asr_write)(target_ulong val, target_ulong mask)
+void HELPER(asr_write)(CPUUniCore32State *env, target_ulong val,
+                       target_ulong mask)
 {
-    asr_write(val, mask);
+    asr_write(env, val, mask);
 }
 
 /* Access to user mode registers from privileged modes.  */
-uint32_t HELPER(get_user_reg)(uint32_t regno)
+uint32_t HELPER(get_user_reg)(CPUUniCore32State *env, uint32_t regno)
 {
     uint32_t val;
 
@@ -92,7 +81,7 @@
     return val;
 }
 
-void HELPER(set_user_reg)(uint32_t regno, uint32_t val)
+void HELPER(set_user_reg)(CPUUniCore32State *env, uint32_t regno, uint32_t val)
 {
     if (regno == 29) {
         env->banked_r29[0] = val;
@@ -107,7 +96,7 @@
    The only way to do that in TCG is a conditional branch, which clobbers
    all our temporaries.  For now implement these as helper functions.  */
 
-uint32_t HELPER(add_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(add_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     result = a + b;
@@ -117,7 +106,7 @@
     return result;
 }
 
-uint32_t HELPER(adc_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(adc_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
@@ -132,7 +121,7 @@
     return result;
 }
 
-uint32_t HELPER(sub_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(sub_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     result = a - b;
@@ -142,7 +131,7 @@
     return result;
 }
 
-uint32_t HELPER(sbc_cc)(uint32_t a, uint32_t b)
+uint32_t HELPER(sbc_cc)(CPUUniCore32State *env, uint32_t a, uint32_t b)
 {
     uint32_t result;
     if (!env->CF) {
@@ -186,7 +175,7 @@
     return (int32_t)x >> shift;
 }
 
-uint32_t HELPER(shl_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(shl_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -203,7 +192,7 @@
     return x;
 }
 
-uint32_t HELPER(shr_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(shr_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -220,7 +209,7 @@
     return x;
 }
 
-uint32_t HELPER(sar_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(sar_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
 {
     int shift = i & 0xff;
     if (shift >= 32) {
@@ -233,7 +222,7 @@
     return x;
 }
 
-uint32_t HELPER(ror_cc)(uint32_t x, uint32_t i)
+uint32_t HELPER(ror_cc)(CPUUniCore32State *env, uint32_t x, uint32_t i)
 {
     int shift1, shift;
     shift1 = i & 0xff;
@@ -264,16 +253,13 @@
 #define SHIFT 3
 #include "softmmu_template.h"
 
-void tlb_fill(CPUUniCore32State *env1, target_ulong addr, int is_write,
-        int mmu_idx, uintptr_t retaddr)
+void tlb_fill(CPUUniCore32State *env, target_ulong addr, int is_write,
+              int mmu_idx, uintptr_t retaddr)
 {
     TranslationBlock *tb;
-    CPUUniCore32State *saved_env;
     unsigned long pc;
     int ret;
 
-    saved_env = env;
-    env = env1;
     ret = uc32_cpu_handle_mmu_fault(env, addr, is_write, mmu_idx);
     if (unlikely(ret)) {
         if (retaddr) {
@@ -287,6 +273,5 @@
         }
         cpu_loop_exit(env);
     }
-    env = saved_env;
 }
 #endif
diff --git a/target-unicore32/translate.c b/target-unicore32/translate.c
index 188bf8c..b786a6b 100644
--- a/target-unicore32/translate.c
+++ b/target-unicore32/translate.c
@@ -253,7 +253,7 @@
 static inline void gen_set_asr(TCGv var, uint32_t mask)
 {
     TCGv tmp_mask = tcg_const_i32(mask);
-    gen_helper_asr_write(var, tmp_mask);
+    gen_helper_asr_write(cpu_env, var, tmp_mask);
     tcg_temp_free_i32(tmp_mask);
 }
 /* Set NZCV flags from the high 4 bits of var.  */
@@ -263,7 +263,7 @@
 {
     TCGv tmp = new_tmp();
     tcg_gen_movi_i32(tmp, excp);
-    gen_helper_exception(tmp);
+    gen_helper_exception(cpu_env, tmp);
     dead_tmp(tmp);
 }
 
@@ -416,16 +416,16 @@
     if (flags) {
         switch (shiftop) {
         case 0:
-            gen_helper_shl_cc(var, var, shift);
+            gen_helper_shl_cc(var, cpu_env, var, shift);
             break;
         case 1:
-            gen_helper_shr_cc(var, var, shift);
+            gen_helper_shr_cc(var, cpu_env, var, shift);
             break;
         case 2:
-            gen_helper_sar_cc(var, var, shift);
+            gen_helper_sar_cc(var, cpu_env, var, shift);
             break;
         case 3:
-            gen_helper_ror_cc(var, var, shift);
+            gen_helper_ror_cc(var, cpu_env, var, shift);
             break;
         }
     } else {
@@ -1323,11 +1323,11 @@
             if (IS_USER(s)) {
                 ILLEGAL;
             }
-            gen_helper_sub_cc(tmp, tmp, tmp2);
+            gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             gen_exception_return(s, tmp);
         } else {
             if (UCOP_SET_S) {
-                gen_helper_sub_cc(tmp, tmp, tmp2);
+                gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
             } else {
                 tcg_gen_sub_i32(tmp, tmp, tmp2);
             }
@@ -1336,7 +1336,7 @@
         break;
     case 0x03:
         if (UCOP_SET_S) {
-            gen_helper_sub_cc(tmp, tmp2, tmp);
+            gen_helper_sub_cc(tmp, cpu_env, tmp2, tmp);
         } else {
             tcg_gen_sub_i32(tmp, tmp2, tmp);
         }
@@ -1344,7 +1344,7 @@
         break;
     case 0x04:
         if (UCOP_SET_S) {
-            gen_helper_add_cc(tmp, tmp, tmp2);
+            gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
         } else {
             tcg_gen_add_i32(tmp, tmp, tmp2);
         }
@@ -1352,7 +1352,7 @@
         break;
     case 0x05:
         if (UCOP_SET_S) {
-            gen_helper_adc_cc(tmp, tmp, tmp2);
+            gen_helper_adc_cc(tmp, cpu_env, tmp, tmp2);
         } else {
             gen_add_carry(tmp, tmp, tmp2);
         }
@@ -1360,7 +1360,7 @@
         break;
     case 0x06:
         if (UCOP_SET_S) {
-            gen_helper_sbc_cc(tmp, tmp, tmp2);
+            gen_helper_sbc_cc(tmp, cpu_env, tmp, tmp2);
         } else {
             gen_sub_carry(tmp, tmp, tmp2);
         }
@@ -1368,7 +1368,7 @@
         break;
     case 0x07:
         if (UCOP_SET_S) {
-            gen_helper_sbc_cc(tmp, tmp2, tmp);
+            gen_helper_sbc_cc(tmp, cpu_env, tmp2, tmp);
         } else {
             gen_sub_carry(tmp, tmp2, tmp);
         }
@@ -1390,13 +1390,13 @@
         break;
     case 0x0a:
         if (UCOP_SET_S) {
-            gen_helper_sub_cc(tmp, tmp, tmp2);
+            gen_helper_sub_cc(tmp, cpu_env, tmp, tmp2);
         }
         dead_tmp(tmp);
         break;
     case 0x0b:
         if (UCOP_SET_S) {
-            gen_helper_add_cc(tmp, tmp, tmp2);
+            gen_helper_add_cc(tmp, cpu_env, tmp, tmp2);
         }
         dead_tmp(tmp);
         break;
@@ -1536,7 +1536,7 @@
             tmp = load_cpu_field(bsr);
         } else {
             tmp = new_tmp();
-            gen_helper_asr_read(tmp);
+            gen_helper_asr_read(tmp, cpu_env);
         }
         store_reg(s, UCOP_REG_D, tmp);
         return;
@@ -1760,7 +1760,7 @@
                     gen_bx(s, tmp);
                 } else if (user) {
                     tmp2 = tcg_const_i32(reg);
-                    gen_helper_set_user_reg(tmp2, tmp);
+                    gen_helper_set_user_reg(cpu_env, tmp2, tmp);
                     tcg_temp_free_i32(tmp2);
                     dead_tmp(tmp);
                 } else if (reg == UCOP_REG_N) {
@@ -1778,7 +1778,7 @@
                 } else if (user) {
                     tmp = new_tmp();
                     tmp2 = tcg_const_i32(reg);
-                    gen_helper_get_user_reg(tmp, tmp2);
+                    gen_helper_get_user_reg(tmp, cpu_env, tmp2);
                     tcg_temp_free_i32(tmp2);
                 } else {
                     tmp = load_reg(s, reg);
@@ -1861,7 +1861,7 @@
 {
     unsigned int insn;
 
-    insn = ldl_code(s->pc);
+    insn = cpu_ldl_code(env, s->pc);
     s->pc += 4;
 
     /* UniCore instructions class:
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index cf0ca3d..aed3b53 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -176,7 +176,7 @@
            so don't use these. */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-#if defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+#if TARGET_LONG_BITS == 64
         /* If we're passing env to the helper as r0 and need a regpair
          * for the address then r2 will be overwritten as we're setting
          * up the args to the helper.
@@ -204,8 +204,7 @@
            use these. */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R1);
-#if defined(CONFIG_SOFTMMU) && \
-    defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+#if defined(CONFIG_SOFTMMU) && (TARGET_LONG_BITS == 64)
         /* Avoid clashes with registers being used for helper args */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
@@ -223,7 +222,7 @@
 #ifdef CONFIG_SOFTMMU
         /* r2 is still needed to load data_reg, so don't use it. */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R2);
-#if defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+#if TARGET_LONG_BITS == 64
         /* Avoid clashes with registers being used for helper args */
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
 #endif
@@ -954,7 +953,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -972,25 +970,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 
 /* Helper routines for marshalling helper function arguments into
  * the correct registers and stack.
@@ -1203,9 +1182,7 @@
      * trash by moving the earlier arguments into them.
      */
     argreg = TCG_REG_R0;
-#ifdef CONFIG_TCG_PASS_AREG0
     argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
-#endif
 #if TARGET_LONG_BITS == 64
     argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2);
 #else
@@ -1421,9 +1398,7 @@
      * trash by moving the earlier arguments into them.
      */
     argreg = TCG_REG_R0;
-#ifdef CONFIG_TCG_PASS_AREG0
     argreg = tcg_out_arg_reg32(s, argreg, TCG_AREG0);
-#endif
 #if TARGET_LONG_BITS == 64
     argreg = tcg_out_arg_reg64(s, argreg, addr_reg, addr_reg2);
 #else
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index f90b834..c0b8f72 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -77,7 +77,6 @@
 #define TCG_TARGET_HAS_GUEST_BASE
 
 enum {
-    /* Note: must be synced with dyngen-exec.h */
     TCG_AREG0 = TCG_REG_R6,
 };
 
diff --git a/tcg/hppa/tcg-target.c b/tcg/hppa/tcg-target.c
index 2885212..8b81b70 100644
--- a/tcg/hppa/tcg-target.c
+++ b/tcg/hppa/tcg-target.c
@@ -882,7 +882,6 @@
 #if defined(CONFIG_SOFTMMU)
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -900,25 +899,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 
 /* Load and compare a TLB entry, and branch if TLB miss.  OFFSET is set to
    the offset of the first ADDR_READ or ADDR_WRITE member of the appropriate
@@ -1085,7 +1065,6 @@
     }
     tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
 
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
     tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
                 tcg_target_call_iarg_regs[1]);
@@ -1093,7 +1072,6 @@
                 tcg_target_call_iarg_regs[0]);
     tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
                 TCG_AREG0);
-#endif
     tcg_out_call(s, qemu_ld_helpers[opc & 3]);
 
     switch (opc) {
@@ -1245,7 +1223,6 @@
         tcg_abort();
     }
 
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
     tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
                 tcg_target_call_iarg_regs[2]);
@@ -1255,7 +1232,6 @@
                 tcg_target_call_iarg_regs[0]);
     tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
                 TCG_AREG0);
-#endif
     tcg_out_call(s, qemu_st_helpers[opc]);
 
     /* label2: */
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index d4bf6fe..01ef960 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -104,7 +104,6 @@
 
 #define TCG_TARGET_HAS_GUEST_BASE
 
-/* Note: must be synced with dyngen-exec.h */
 #define TCG_AREG0 TCG_REG_R17
 
 
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index da17bba..34c2df8 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -183,9 +183,7 @@
             tcg_regset_set32(ct->u.regs, 0, 0xffff);
             tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[0]);
             tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[1]);
-#ifdef CONFIG_TCG_PASS_AREG0
             tcg_regset_reset_reg(ct->u.regs, tcg_target_call_iarg_regs[2]);
-#endif
         } else {
             tcg_regset_set32(ct->u.regs, 0, 0xff);
             tcg_regset_reset_reg(ct->u.regs, TCG_REG_EAX);
@@ -965,7 +963,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void *qemu_ld_helpers[4] = {
@@ -983,25 +980,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 
 /* Perform the TLB load and compare.
 
@@ -1220,16 +1198,13 @@
     }
     tcg_out_push(s, args[addrlo_idx]);
     stack_adjust += 4;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_push(s, TCG_AREG0);
     stack_adjust += 4;
-#endif
 #else
     /* The first argument is already loaded with addrlo.  */
     arg_idx = 1;
     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
                  mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
     tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
                 tcg_target_call_iarg_regs[2]);
@@ -1240,7 +1215,6 @@
     tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
                 TCG_AREG0);
 #endif
-#endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
 
@@ -1436,16 +1410,13 @@
     }
     tcg_out_push(s, args[addrlo_idx]);
     stack_adjust += 4;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_push(s, TCG_AREG0);
     stack_adjust += 4;
-#endif
 #else
     tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
                 tcg_target_call_iarg_regs[1], data_reg);
     tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
     stack_adjust = 0;
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
     tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
                 tcg_target_call_iarg_regs[2]);
@@ -1456,7 +1427,6 @@
     tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
                 TCG_AREG0);
 #endif
-#endif
 
     tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
 
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index c3cfe05..8be42f3 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -116,7 +116,6 @@
 
 #define TCG_TARGET_HAS_GUEST_BASE
 
-/* Note: must be synced with dyngen-exec.h */
 #if TCG_TARGET_REG_BITS == 64
 # define TCG_AREG0 TCG_REG_R14
 #else
diff --git a/tcg/ia64/tcg-target.c b/tcg/ia64/tcg-target.c
index dc588db..1745038 100644
--- a/tcg/ia64/tcg-target.c
+++ b/tcg/ia64/tcg-target.c
@@ -1452,7 +1452,6 @@
                                TCG_REG_P7, TCG_REG_R3, TCG_REG_R57));
 }
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -1461,16 +1460,6 @@
     helper_ldl_mmu,
     helper_ldq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-#endif
 
 static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1530,7 +1519,6 @@
                        tcg_opc_m1 (TCG_REG_P7, OPC_LD8_M1, TCG_REG_R1, TCG_REG_R2),
                        tcg_opc_i18(TCG_REG_P0, OPC_NOP_I18, 0));
     }
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
     tcg_out_bundle(s, mII,
                    tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
@@ -1539,7 +1527,6 @@
                                TCG_REG_R57, 0, TCG_REG_R56),
                    tcg_opc_a4 (TCG_REG_P7, OPC_ADDS_A4,
                                TCG_REG_R56, 0, TCG_AREG0));
-#endif
     if (!bswap || s_bits == 0) {
         tcg_out_bundle(s, miB,
                        tcg_opc_m48(TCG_REG_P0, OPC_NOP_M48, 0),
@@ -1570,7 +1557,6 @@
     }
 }
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
    uintxx_t val, int mmu_idx) */
 static const void * const qemu_st_helpers[4] = {
@@ -1579,16 +1565,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 
 static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
 {
@@ -1658,7 +1634,6 @@
         data_reg = TCG_REG_R2;
     }
 
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
     tcg_out_bundle(s, mII,
                    tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R59,
@@ -1674,15 +1649,6 @@
                                TCG_REG_R56, 0, TCG_AREG0),
                    tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
                                TCG_REG_B0, TCG_REG_B6));
-#else
-    tcg_out_bundle(s, miB,
-                   tcg_opc_m4 (TCG_REG_P6, opc_st_m4[opc],
-                               data_reg, TCG_REG_R3),
-                   tcg_opc_a5 (TCG_REG_P7, OPC_ADDL_A5, TCG_REG_R58,
-                               mem_index, TCG_REG_R0),
-                   tcg_opc_b5 (TCG_REG_P7, OPC_BR_CALL_SPTK_MANY_B5,
-                               TCG_REG_B0, TCG_REG_B6));
-#endif
 }
 
 #else /* !CONFIG_SOFTMMU */
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 0631b9f..c22962a 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -140,7 +140,6 @@
 #define TCG_TARGET_HAS_not_i32          0 /* xor r1, -1, r3 */
 #define TCG_TARGET_HAS_not_i64          0 /* xor r1, -1, r3 */
 
-/* Note: must be synced with dyngen-exec.h */
 #define TCG_AREG0 TCG_REG_R7
 
 /* Guest base is supported */
diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c
index 1006e28..74db83d 100644
--- a/tcg/mips/tcg-target.c
+++ b/tcg/mips/tcg-target.c
@@ -217,7 +217,7 @@
         tcg_regset_set(ct->u.regs, 0xffffffff);
 #if defined(CONFIG_SOFTMMU)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
-# if defined(CONFIG_TCG_PASS_AREG0) && (TARGET_LONG_BITS == 64)
+# if (TARGET_LONG_BITS == 64)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
 # endif
 #endif
@@ -227,12 +227,11 @@
         tcg_regset_set(ct->u.regs, 0xffffffff);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A0);
 #if defined(CONFIG_SOFTMMU)
-# if (defined(CONFIG_TCG_PASS_AREG0) && TARGET_LONG_BITS == 32) || \
-     (!defined(CONFIG_TCG_PASS_AREG0) && TARGET_LONG_BITS == 64)
+# if (TARGET_LONG_BITS == 32)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A1);
 # endif
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A2);
-# if defined(CONFIG_TCG_PASS_AREG0) && TARGET_LONG_BITS == 64
+# if TARGET_LONG_BITS == 64
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_A3);
 # endif
 #endif
@@ -821,7 +820,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -839,25 +837,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 #endif
 
 static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
@@ -942,9 +921,7 @@
 
     /* slow path */
     arg_num = 0;
-# ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0);
-# endif
 # if TARGET_LONG_BITS == 64
     tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh);
 # else
@@ -1127,9 +1104,7 @@
 
     /* slow path */
     arg_num = 0;
-# ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_call_iarg_reg32(s, &arg_num, TCG_AREG0);
-# endif
 # if TARGET_LONG_BITS == 64
     tcg_out_call_iarg_reg64(s, &arg_num, addr_regl, addr_regh);
 # else
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index d3c804d..1c61931 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -96,7 +96,6 @@
 #define TCG_TARGET_HAS_ext8u_i32        0 /* andi rt, rs, 0xff   */
 #define TCG_TARGET_HAS_ext16u_i32       0 /* andi rt, rs, 0xffff */
 
-/* Note: must be synced with dyngen-exec.h */
 #define TCG_AREG0 TCG_REG_S0
 
 /* guest base is supported */
diff --git a/tcg/optimize.c b/tcg/optimize.c
index 9c65474..fba0ed9 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -267,6 +267,67 @@
     return res;
 }
 
+static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
+                                       TCGArg y, TCGCond c)
+{
+    switch (op_bits(op)) {
+    case 32:
+        switch (c) {
+        case TCG_COND_EQ:
+            return (uint32_t)x == (uint32_t)y;
+        case TCG_COND_NE:
+            return (uint32_t)x != (uint32_t)y;
+        case TCG_COND_LT:
+            return (int32_t)x < (int32_t)y;
+        case TCG_COND_GE:
+            return (int32_t)x >= (int32_t)y;
+        case TCG_COND_LE:
+            return (int32_t)x <= (int32_t)y;
+        case TCG_COND_GT:
+            return (int32_t)x > (int32_t)y;
+        case TCG_COND_LTU:
+            return (uint32_t)x < (uint32_t)y;
+        case TCG_COND_GEU:
+            return (uint32_t)x >= (uint32_t)y;
+        case TCG_COND_LEU:
+            return (uint32_t)x <= (uint32_t)y;
+        case TCG_COND_GTU:
+            return (uint32_t)x > (uint32_t)y;
+        }
+        break;
+    case 64:
+        switch (c) {
+        case TCG_COND_EQ:
+            return (uint64_t)x == (uint64_t)y;
+        case TCG_COND_NE:
+            return (uint64_t)x != (uint64_t)y;
+        case TCG_COND_LT:
+            return (int64_t)x < (int64_t)y;
+        case TCG_COND_GE:
+            return (int64_t)x >= (int64_t)y;
+        case TCG_COND_LE:
+            return (int64_t)x <= (int64_t)y;
+        case TCG_COND_GT:
+            return (int64_t)x > (int64_t)y;
+        case TCG_COND_LTU:
+            return (uint64_t)x < (uint64_t)y;
+        case TCG_COND_GEU:
+            return (uint64_t)x >= (uint64_t)y;
+        case TCG_COND_LEU:
+            return (uint64_t)x <= (uint64_t)y;
+        case TCG_COND_GTU:
+            return (uint64_t)x > (uint64_t)y;
+        }
+        break;
+    }
+
+    fprintf(stderr,
+            "Unrecognized bitness %d or condition %d in "
+            "do_constant_folding_cond.\n", op_bits(op), c);
+    tcg_abort();
+}
+
+
 /* Propagate constants and copies, fold constant expressions. */
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
@@ -318,11 +379,49 @@
                 args[2] = tmp;
             }
             break;
+        CASE_OP_32_64(brcond):
+            if (temps[args[0]].state == TCG_TEMP_CONST
+                && temps[args[1]].state != TCG_TEMP_CONST) {
+                tmp = args[0];
+                args[0] = args[1];
+                args[1] = tmp;
+                args[2] = tcg_swap_cond(args[2]);
+            }
+            break;
+        CASE_OP_32_64(setcond):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state != TCG_TEMP_CONST) {
+                tmp = args[1];
+                args[1] = args[2];
+                args[2] = tmp;
+                args[3] = tcg_swap_cond(args[3]);
+            }
+            break;
         default:
             break;
         }
 
-        /* Simplify expression if possible. */
+        /* Simplify expressions for "shift/rot r, 0, a => movi r, 0" */
+        switch (op) {
+        CASE_OP_32_64(shl):
+        CASE_OP_32_64(shr):
+        CASE_OP_32_64(sar):
+        CASE_OP_32_64(rotl):
+        CASE_OP_32_64(rotr):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[1]].val == 0) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
+                args += 3;
+                gen_args += 2;
+                continue;
+            }
+            break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, 0 => mov r, a" cases */
         switch (op) {
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
@@ -331,6 +430,8 @@
         CASE_OP_32_64(sar):
         CASE_OP_32_64(rotl):
         CASE_OP_32_64(rotr):
+        CASE_OP_32_64(or):
+        CASE_OP_32_64(xor):
             if (temps[args[1]].state == TCG_TEMP_CONST) {
                 /* Proceed with possible constant folding. */
                 break;
@@ -340,18 +441,24 @@
                 if ((temps[args[0]].state == TCG_TEMP_COPY
                     && temps[args[0]].val == args[1])
                     || args[0] == args[1]) {
-                    args += 3;
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
                     tcg_opt_gen_mov(s, gen_args, args[0], args[1],
                                     nb_temps, nb_globals);
                     gen_args += 2;
-                    args += 3;
                 }
+                args += 3;
                 continue;
             }
             break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
+        switch (op) {
+        CASE_OP_32_64(and):
         CASE_OP_32_64(mul):
             if ((temps[args[2]].state == TCG_TEMP_CONST
                 && temps[args[2]].val == 0)) {
@@ -362,19 +469,24 @@
                 continue;
             }
             break;
+        default:
+            break;
+        }
+
+        /* Simplify expression for "op r, a, a => mov r, a" cases */
+        switch (op) {
         CASE_OP_32_64(or):
         CASE_OP_32_64(and):
             if (args[1] == args[2]) {
                 if (args[1] == args[0]) {
-                    args += 3;
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 } else {
                     gen_opc_buf[op_index] = op_to_mov(op);
                     tcg_opt_gen_mov(s, gen_args, args[0], args[1], nb_temps,
                                     nb_globals);
                     gen_args += 2;
-                    args += 3;
                 }
+                args += 3;
                 continue;
             }
             break;
@@ -424,17 +536,14 @@
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
                 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
-                gen_args += 2;
-                args += 2;
-                break;
             } else {
                 reset_temp(args[0], nb_temps, nb_globals);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
-                gen_args += 2;
-                args += 2;
-                break;
             }
+            gen_args += 2;
+            args += 2;
+            break;
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(mul):
@@ -458,17 +567,56 @@
                                           temps[args[2]].val);
                 tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
                 gen_args += 2;
-                args += 3;
-                break;
             } else {
                 reset_temp(args[0], nb_temps, nb_globals);
                 gen_args[0] = args[0];
                 gen_args[1] = args[1];
                 gen_args[2] = args[2];
                 gen_args += 3;
-                args += 3;
-                break;
             }
+            args += 3;
+            break;
+        CASE_OP_32_64(setcond):
+            if (temps[args[1]].state == TCG_TEMP_CONST
+                && temps[args[2]].state == TCG_TEMP_CONST) {
+                gen_opc_buf[op_index] = op_to_movi(op);
+                tmp = do_constant_folding_cond(op, temps[args[1]].val,
+                                               temps[args[2]].val, args[3]);
+                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
+                gen_args += 2;
+            } else {
+                reset_temp(args[0], nb_temps, nb_globals);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args += 4;
+            }
+            args += 4;
+            break;
+        CASE_OP_32_64(brcond):
+            if (temps[args[0]].state == TCG_TEMP_CONST
+                && temps[args[1]].state == TCG_TEMP_CONST) {
+                if (do_constant_folding_cond(op, temps[args[0]].val,
+                                             temps[args[1]].val, args[2])) {
+                    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                    gen_opc_buf[op_index] = INDEX_op_br;
+                    gen_args[0] = args[3];
+                    gen_args += 1;
+                } else {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                }
+            } else {
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                reset_temp(args[0], nb_temps, nb_globals);
+                gen_args[0] = args[0];
+                gen_args[1] = args[1];
+                gen_args[2] = args[2];
+                gen_args[3] = args[3];
+                gen_args += 4;
+            }
+            args += 4;
+            break;
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
@@ -490,7 +638,6 @@
         case INDEX_op_set_label:
         case INDEX_op_jmp:
         case INDEX_op_br:
-        CASE_OP_32_64(brcond):
             memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             for (i = 0; i < def->nb_args; i++) {
                 *gen_args = *args;
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 0cff181..26c4b33 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -248,7 +248,6 @@
         tcg_regset_set32(ct->u.regs, 0, 0xffffffff);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
-#ifdef CONFIG_TCG_PASS_AREG0
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
 #if TARGET_LONG_BITS == 64
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
@@ -256,11 +255,6 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7);
 #endif
 #endif
-#else /* !AREG0 */
-#if TARGET_LONG_BITS == 64
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
-#endif
-#endif
         break;
     case 'K':                   /* qemu_st[8..32] constraint */
         ct->ct |= TCG_CT_REG;
@@ -268,7 +262,6 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R3);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R4);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
-#ifdef CONFIG_TCG_PASS_AREG0
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
 #if TARGET_LONG_BITS == 64
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7);
@@ -276,11 +269,6 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R8);
 #endif
 #endif
-#else /* !AREG0 */
-#if TARGET_LONG_BITS == 64
-        tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
-#endif
-#endif
         break;
     case 'M':                   /* qemu_st64 constraint */
         ct->ct |= TCG_CT_REG;
@@ -290,12 +278,10 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R5);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R6);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R7);
-#if defined(CONFIG_TCG_PASS_AREG0)
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R8);
 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_R9);
 #endif
-#endif
         break;
 #else
     case 'L':
@@ -541,7 +527,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -559,25 +544,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 #endif
 
 static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
@@ -647,9 +613,7 @@
 
     /* slow path */
     ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
-#endif
 #if TARGET_LONG_BITS == 32
     tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
 #else
@@ -849,9 +813,7 @@
 
     /* slow path */
     ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_mov (s, TCG_TYPE_I32, ir++, TCG_AREG0);
-#endif
 #if TARGET_LONG_BITS == 32
     tcg_out_mov (s, TCG_TYPE_I32, ir++, addr_reg);
 #else
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 27a0ae8..337cd41 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -235,10 +235,8 @@
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R3);
 #ifdef CONFIG_SOFTMMU
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
-#ifdef CONFIG_TCG_PASS_AREG0
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5);
 #endif
-#endif
         break;
     case 'S':                   /* qemu_st constraint */
         ct->ct |= TCG_CT_REG;
@@ -247,10 +245,8 @@
 #ifdef CONFIG_SOFTMMU
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R4);
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R5);
-#ifdef CONFIG_TCG_PASS_AREG0
         tcg_regset_reset_reg (ct->u.regs, TCG_REG_R6);
 #endif
-#endif
         break;
     case 'Z':
         ct->ct |= TCG_CT_CONST_U32;
@@ -558,7 +554,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -576,25 +571,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 
 static void tcg_out_tlb_read (TCGContext *s, int r0, int r1, int r2,
                               int addr_reg, int s_bits, int offset)
@@ -676,9 +652,7 @@
 
     /* slow path */
     ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0);
-#endif
     tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg);
     tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index);
 
@@ -827,9 +801,7 @@
 
     /* slow path */
     ir = 3;
-#ifdef CONFIG_TCG_PASS_AREG0
     tcg_out_mov (s, TCG_TYPE_I64, ir++, TCG_AREG0);
-#endif
     tcg_out_mov (s, TCG_TYPE_I64, ir++, addr_reg);
     tcg_out_rld (s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc)));
     tcg_out_movi (s, TCG_TYPE_I64, ir++, mem_index);
diff --git a/tcg/s390/tcg-target.c b/tcg/s390/tcg-target.c
index 99b5339..aac11d9 100644
--- a/tcg/s390/tcg-target.c
+++ b/tcg/s390/tcg-target.c
@@ -301,7 +301,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -319,25 +318,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static void *qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static void *qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 #endif
 
 static uint8_t *tb_ret_addr;
@@ -1507,7 +1487,6 @@
             tcg_abort();
         }
         tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R4, mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
         /* XXX/FIXME: suboptimal */
         tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3],
                     tcg_target_call_iarg_regs[2]);
@@ -1517,11 +1496,9 @@
                     tcg_target_call_iarg_regs[0]);
         tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
                     TCG_AREG0);
-#endif
         tgen_calli(s, (tcg_target_ulong)qemu_st_helpers[s_bits]);
     } else {
         tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
         /* XXX/FIXME: suboptimal */
         tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
                     tcg_target_call_iarg_regs[1]);
@@ -1529,7 +1506,6 @@
                     tcg_target_call_iarg_regs[0]);
         tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0],
                     TCG_AREG0);
-#endif
         tgen_calli(s, (tcg_target_ulong)qemu_ld_helpers[s_bits]);
 
         /* sign extension */
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index d12f90b..4f7dfab 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -96,7 +96,6 @@
 #define TCG_TARGET_EXTEND_ARGS 1
 
 enum {
-    /* Note: must be synced with dyngen-exec.h */
     TCG_AREG0 = TCG_REG_R10,
 };
 
diff --git a/tcg/sparc/tcg-target.c b/tcg/sparc/tcg-target.c
index 247a278..baed3b4 100644
--- a/tcg/sparc/tcg-target.c
+++ b/tcg/sparc/tcg-target.c
@@ -59,11 +59,7 @@
 };
 #endif
 
-#ifdef CONFIG_TCG_PASS_AREG0
 #define ARG_OFFSET 1
-#else
-#define ARG_OFFSET 0
-#endif
 
 static const int tcg_target_reg_alloc_order[] = {
     TCG_REG_L0,
@@ -161,9 +157,7 @@
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O0);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O1);
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O2);
-#ifdef CONFIG_TCG_PASS_AREG0
         tcg_regset_reset_reg(ct->u.regs, TCG_REG_O3);
-#endif
         break;
     case 'I':
         ct->ct |= TCG_CT_CONST_S11;
@@ -715,7 +709,6 @@
 
 #include "../../softmmu_defs.h"
 
-#ifdef CONFIG_TCG_PASS_AREG0
 /* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
    int mmu_idx) */
 static const void * const qemu_ld_helpers[4] = {
@@ -733,25 +726,6 @@
     helper_stl_mmu,
     helper_stq_mmu,
 };
-#else
-/* legacy helper signature: __ld_mmu(target_ulong addr, int
-   mmu_idx) */
-static const void * const qemu_ld_helpers[4] = {
-    __ldb_mmu,
-    __ldw_mmu,
-    __ldl_mmu,
-    __ldq_mmu,
-};
-
-/* legacy helper signature: __st_mmu(target_ulong addr, uintxx_t val,
-   int mmu_idx) */
-static const void * const qemu_st_helpers[4] = {
-    __stb_mmu,
-    __stw_mmu,
-    __stl_mmu,
-    __stq_mmu,
-};
-#endif
 #endif
 
 #if TARGET_LONG_BITS == 32
@@ -834,7 +808,6 @@
 
     /* mov */
     tcg_out_movi(s, TCG_TYPE_I32, arg1, mem_index);
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
     tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
                 tcg_target_call_iarg_regs[2]);
@@ -844,7 +817,6 @@
                 tcg_target_call_iarg_regs[0]);
     tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
                 TCG_AREG0);
-#endif
 
     /* XXX: move that code at the end of the TB */
     /* qemu_ld_helper[s_bits](arg0, arg1) */
@@ -1061,7 +1033,6 @@
     /* mov */
     tcg_out_movi(s, TCG_TYPE_I32, arg2, mem_index);
 
-#ifdef CONFIG_TCG_PASS_AREG0
     /* XXX/FIXME: suboptimal */
     tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
                 tcg_target_call_iarg_regs[2]);
@@ -1071,7 +1042,6 @@
                 tcg_target_call_iarg_regs[0]);
     tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
                 TCG_AREG0);
-#endif
     /* XXX: move that code at the end of the TB */
     /* qemu_st_helper[s_bits](arg0, arg1, arg2) */
     tcg_out32(s, CALL | ((((tcg_target_ulong)qemu_st_helpers[s_bits]
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index ee2274d..0ea87be 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -125,7 +125,6 @@
 #define TCG_TARGET_HAS_deposit_i64      0
 #endif
 
-/* Note: must be synced with dyngen-exec.h */
 #ifdef CONFIG_SOLARIS
 #define TCG_AREG0 TCG_REG_G2
 #elif defined(__sparc_v9__)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 8386b70..a4e7f42 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -2059,22 +2059,29 @@
     }
 #endif
 
+#ifdef CONFIG_PROFILER
+    s->opt_time -= profile_getclock();
+#endif
+
 #ifdef USE_TCG_OPTIMIZATIONS
     gen_opparam_ptr =
         tcg_optimize(s, gen_opc_ptr, gen_opparam_buf, tcg_op_defs);
 #endif
 
 #ifdef CONFIG_PROFILER
+    s->opt_time += profile_getclock();
     s->la_time -= profile_getclock();
 #endif
+
     tcg_liveness_analysis(s);
+
 #ifdef CONFIG_PROFILER
     s->la_time += profile_getclock();
 #endif
 
 #ifdef DEBUG_DISAS
     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT))) {
-        qemu_log("OP after liveness analysis:\n");
+        qemu_log("OP after optimization and liveness analysis:\n");
         tcg_dump_ops(s);
         qemu_log("\n");
     }
@@ -2241,6 +2248,9 @@
                 (double)s->interm_time / tot * 100.0);
     cpu_fprintf(f, "  gen_code time     %0.1f%%\n", 
                 (double)s->code_time / tot * 100.0);
+    cpu_fprintf(f, "optim./code time    %0.1f%%\n",
+                (double)s->opt_time / (s->code_time ? s->code_time : 1)
+                * 100.0);
     cpu_fprintf(f, "liveness/code time  %0.1f%%\n", 
                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
diff --git a/tcg/tcg.h b/tcg/tcg.h
index d710694..7a72729 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -382,6 +382,7 @@
     int64_t interm_time;
     int64_t code_time;
     int64_t la_time;
+    int64_t opt_time;
     int64_t restore_count;
     int64_t restore_time;
 #endif
diff --git a/tcg/tci/tcg-target.c b/tcg/tci/tcg-target.c
index ef8580f..003244c 100644
--- a/tcg/tci/tcg-target.c
+++ b/tcg/tci/tcg-target.c
@@ -798,9 +798,7 @@
     case INDEX_op_qemu_st8:
     case INDEX_op_qemu_st16:
     case INDEX_op_qemu_st32:
-#ifdef CONFIG_TCG_PASS_AREG0
         tcg_out_r(s, TCG_AREG0);
-#endif
         tcg_out_r(s, *args++);
         tcg_out_r(s, *args++);
 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
@@ -811,9 +809,7 @@
 #endif
         break;
     case INDEX_op_qemu_st64:
-#ifdef CONFIG_TCG_PASS_AREG0
         tcg_out_r(s, TCG_AREG0);
-#endif
         tcg_out_r(s, *args++);
 #if TCG_TARGET_REG_BITS == 32
         tcg_out_r(s, *args++);
diff --git a/tci.c b/tci.c
index c79350d..ce8a988 100644
--- a/tci.c
+++ b/tci.c
@@ -25,7 +25,6 @@
 #endif
 
 #include "qemu-common.h"
-#include "dyngen-exec.h"        /* env */
 #include "exec-all.h"           /* MAX_OPC_PARAM_IARGS */
 #include "tcg-op.h"
 
@@ -63,17 +62,6 @@
 
 static tcg_target_ulong tci_reg[TCG_TARGET_NB_REGS];
 
-#if !defined(CONFIG_TCG_PASS_AREG0)
-# define helper_ldb_mmu(env, addr, mmu_idx) __ldb_mmu(addr, mmu_idx)
-# define helper_ldw_mmu(env, addr, mmu_idx) __ldw_mmu(addr, mmu_idx)
-# define helper_ldl_mmu(env, addr, mmu_idx) __ldl_mmu(addr, mmu_idx)
-# define helper_ldq_mmu(env, addr, mmu_idx) __ldq_mmu(addr, mmu_idx)
-# define helper_stb_mmu(env, addr, val, mmu_idx) __stb_mmu(addr, val, mmu_idx)
-# define helper_stw_mmu(env, addr, val, mmu_idx) __stw_mmu(addr, val, mmu_idx)
-# define helper_stl_mmu(env, addr, val, mmu_idx) __stl_mmu(addr, val, mmu_idx)
-# define helper_stq_mmu(env, addr, val, mmu_idx) __stq_mmu(addr, val, mmu_idx)
-#endif /* !CONFIG_TCG_PASS_AREG0 */
-
 static tcg_target_ulong tci_read_reg(TCGReg index)
 {
     assert(index < ARRAY_SIZE(tci_reg));
diff --git a/trace-events b/trace-events
index 8fcbc50..b25ae1c 100644
--- a/trace-events
+++ b/trace-events
@@ -263,6 +263,9 @@
 usb_ehci_queue_action(void *q, const char *action) "q %p: %s"
 usb_ehci_packet_action(void *q, void *p, const char *action) "q %p p %p: %s"
 usb_ehci_irq(uint32_t level, uint32_t frindex, uint32_t sts, uint32_t mask) "level %d, frindex 0x%04x, sts 0x%x, mask 0x%x"
+usb_ehci_guest_bug(const char *reason) "%s"
+usb_ehci_doorbell_ring(void) ""
+usb_ehci_doorbell_ack(void) ""
 
 # hw/usb/hcd-uhci.c
 usb_uhci_reset(void) "=== RESET ==="
@@ -310,7 +313,10 @@
 usb_xhci_doorbell_write(uint32_t off, uint32_t val) "off 0x%04x, val 0x%08x"
 usb_xhci_irq_intx(uint32_t level) "level %d"
 usb_xhci_irq_msi(uint32_t nr) "nr %d"
-usb_xhci_queue_event(uint32_t idx, const char *name, uint64_t param, uint32_t status, uint32_t control) "idx %d, %s, p %016" PRIx64 ", s %08x, c 0x%08x"
+usb_xhci_irq_msix(uint32_t nr) "nr %d"
+usb_xhci_irq_msix_use(uint32_t nr) "nr %d"
+usb_xhci_irq_msix_unuse(uint32_t nr) "nr %d"
+usb_xhci_queue_event(uint32_t vector, uint32_t idx, const char *trb, const char *evt, uint64_t param, uint32_t status, uint32_t control) "v %d, idx %d, %s, %s, p %016" PRIx64 ", s %08x, c 0x%08x"
 usb_xhci_fetch_trb(uint64_t addr, const char *name, uint64_t param, uint32_t status, uint32_t control) "addr %016" PRIx64 ", %s, p %016" PRIx64 ", s %08x, c 0x%08x"
 usb_xhci_slot_enable(uint32_t slotid) "slotid %d"
 usb_xhci_slot_disable(uint32_t slotid) "slotid %d"
@@ -320,10 +326,11 @@
 usb_xhci_slot_reset(uint32_t slotid) "slotid %d"
 usb_xhci_ep_enable(uint32_t slotid, uint32_t epid) "slotid %d, epid %d"
 usb_xhci_ep_disable(uint32_t slotid, uint32_t epid) "slotid %d, epid %d"
+usb_xhci_ep_set_dequeue(uint32_t slotid, uint32_t epid, uint64_t param) "slotid %d, epid %d, ptr %016" PRIx64
 usb_xhci_ep_kick(uint32_t slotid, uint32_t epid) "slotid %d, epid %d"
 usb_xhci_ep_stop(uint32_t slotid, uint32_t epid) "slotid %d, epid %d"
 usb_xhci_ep_reset(uint32_t slotid, uint32_t epid) "slotid %d, epid %d"
-usb_xhci_xfer_start(void *xfer, uint32_t slotid, uint32_t epid, uint32_t length) "%p: slotid %d, epid %d, length %d"
+usb_xhci_xfer_start(void *xfer, uint32_t slotid, uint32_t epid) "%p: slotid %d, epid %d"
 usb_xhci_xfer_async(void *xfer) "%p"
 usb_xhci_xfer_nak(void *xfer) "%p"
 usb_xhci_xfer_retry(void *xfer) "%p"
@@ -336,6 +343,7 @@
 usb_desc_config(int addr, int index, int len, int ret) "dev %d query config %d, len %d, ret %d"
 usb_desc_other_speed_config(int addr, int index, int len, int ret) "dev %d query config %d, len %d, ret %d"
 usb_desc_string(int addr, int index, int len, int ret) "dev %d query string %d, len %d, ret %d"
+usb_desc_bos(int addr, int len, int ret) "dev %d bos, len %d, ret %d"
 usb_set_addr(int addr) "dev %d"
 usb_set_config(int addr, int config, int ret) "dev %d, config %d, ret %d"
 usb_set_interface(int addr, int iface, int alt, int ret) "dev %d, interface %d, altsetting %d, ret %d"
diff --git a/user-exec.c b/user-exec.c
index b9ea9dd..ef9b172 100644
--- a/user-exec.c
+++ b/user-exec.c
@@ -18,9 +18,6 @@
  */
 #include "config.h"
 #include "cpu.h"
-#ifndef CONFIG_TCG_PASS_AREG0
-#include "dyngen-exec.h"
-#endif
 #include "disas.h"
 #include "tcg.h"
 
@@ -60,12 +57,6 @@
     struct sigcontext *uc = puc;
 #endif
 
-#ifndef CONFIG_TCG_PASS_AREG0
-    env = env1;
-
-    /* XXX: restore cpu registers saved in host registers */
-#endif
-
     if (puc) {
         /* XXX: use siglongjmp ? */
 #ifdef __linux__
@@ -93,11 +84,6 @@
     TranslationBlock *tb;
     int ret;
 
-#ifndef CONFIG_TCG_PASS_AREG0
-    if (cpu_single_env) {
-        env = cpu_single_env; /* XXX: find a correct solution for multithread */
-    }
-#endif
 #if defined(DEBUG_SIGNAL)
     qemu_printf("qemu: SIGSEGV pc=0x%08lx address=%08lx w=%d oldset=0x%08lx\n",
                 pc, address, is_write, *(unsigned long *)old_set);