Merge remote-tracking branch 'qemu-kvm/memory/urgent' into staging

* qemu-kvm/memory/urgent:
  memory: abort if a memory region is destroyed during a transaction
  i440fx: avoid destroying memory regions within a transaction
  memory: Make eventfd adhere to device endianness
diff --git a/MAINTAINERS b/MAINTAINERS
index f1f9250..8f5681f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -641,7 +641,7 @@
 
 Network device layer
 M: Anthony Liguori <aliguori@us.ibm.com>
-M: Stefan Hajnoczi <stefanha@gmail.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Maintained
 F: net/
 T: git git://github.com/stefanha/qemu.git net
@@ -661,7 +661,7 @@
 T: git git://git.kiszka.org/qemu.git queues/slirp
 
 Tracing
-M: Stefan Hajnoczi <stefanha@gmail.com>
+M: Stefan Hajnoczi <stefanha@redhat.com>
 S: Maintained
 F: trace/
 F: scripts/tracetool.py
diff --git a/Makefile b/Makefile
index a9c22bf..88285a4 100644
--- a/Makefile
+++ b/Makefile
@@ -14,9 +14,11 @@
 	@sed -n "/.*Configured with/s/[^:]*: //p" $@ | sh
 else
 config-host.mak:
+ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
 	@echo "Please call configure before running make!"
 	@exit 1
 endif
+endif
 
 GENERATED_HEADERS = config-host.h trace.h qemu-options.def
 ifeq ($(TRACE_BACKEND),dtrace)
@@ -403,7 +405,9 @@
 
 # Add a dependency on the generated files, so that they are always
 # rebuilt before other object files
+ifneq ($(filter-out %clean,$(MAKECMDGOALS)),$(if $(MAKECMDGOALS),,fail))
 Makefile: $(GENERATED_HEADERS)
+endif
 
 # Include automatically generated dependency files
 # Dependencies in Makefile.objs files come from our recursive subdir rules
diff --git a/arch_init.c b/arch_init.c
index 9904f95..e6effe8 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -31,6 +31,8 @@
 #include "config.h"
 #include "monitor.h"
 #include "sysemu.h"
+#include "bitops.h"
+#include "bitmap.h"
 #include "arch_init.h"
 #include "audio/audio.h"
 #include "hw/pc.h"
@@ -45,6 +47,7 @@
 #include "hw/pcspk.h"
 #include "qemu/page_cache.h"
 #include "qmp-commands.h"
+#include "trace.h"
 
 #ifdef DEBUG_ARCH_INIT
 #define DPRINTF(fmt, ...) \
@@ -330,6 +333,78 @@
 
 static RAMBlock *last_block;
 static ram_addr_t last_offset;
+static unsigned long *migration_bitmap;
+static uint64_t migration_dirty_pages;
+
+static inline bool migration_bitmap_test_and_reset_dirty(MemoryRegion *mr,
+                                                         ram_addr_t offset)
+{
+    bool ret;
+    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
+
+    ret = test_and_clear_bit(nr, migration_bitmap);
+
+    if (ret) {
+        migration_dirty_pages--;
+    }
+    return ret;
+}
+
+static inline bool migration_bitmap_set_dirty(MemoryRegion *mr,
+                                              ram_addr_t offset)
+{
+    bool ret;
+    int nr = (mr->ram_addr + offset) >> TARGET_PAGE_BITS;
+
+    ret = test_and_set_bit(nr, migration_bitmap);
+
+    if (!ret) {
+        migration_dirty_pages++;
+    }
+    return ret;
+}
+
+static void migration_bitmap_sync(void)
+{
+    RAMBlock *block;
+    ram_addr_t addr;
+    uint64_t num_dirty_pages_init = migration_dirty_pages;
+    MigrationState *s = migrate_get_current();
+    static int64_t start_time;
+    static int64_t num_dirty_pages_period;
+    int64_t end_time;
+
+    if (!start_time) {
+        start_time = qemu_get_clock_ms(rt_clock);
+    }
+
+    trace_migration_bitmap_sync_start();
+    memory_global_sync_dirty_bitmap(get_system_memory());
+
+    QLIST_FOREACH(block, &ram_list.blocks, next) {
+        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
+            if (memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE,
+                                        DIRTY_MEMORY_MIGRATION)) {
+                migration_bitmap_set_dirty(block->mr, addr);
+            }
+        }
+        memory_region_reset_dirty(block->mr, 0, block->length,
+                                  DIRTY_MEMORY_MIGRATION);
+    }
+    trace_migration_bitmap_sync_end(migration_dirty_pages
+                                    - num_dirty_pages_init);
+    num_dirty_pages_period += migration_dirty_pages - num_dirty_pages_init;
+    end_time = qemu_get_clock_ms(rt_clock);
+
+    /* more than 1 second = 1000 millisecons */
+    if (end_time > start_time + 1000) {
+        s->dirty_pages_rate = num_dirty_pages_period * 1000
+            / (end_time - start_time);
+        start_time = end_time;
+        num_dirty_pages_period = 0;
+    }
+}
+
 
 /*
  * ram_save_block: Writes a page of memory to the stream f
@@ -352,14 +427,10 @@
 
     do {
         mr = block->mr;
-        if (memory_region_get_dirty(mr, offset, TARGET_PAGE_SIZE,
-                                    DIRTY_MEMORY_MIGRATION)) {
+        if (migration_bitmap_test_and_reset_dirty(mr, offset)) {
             uint8_t *p;
             int cont = (block == last_block) ? RAM_SAVE_FLAG_CONTINUE : 0;
 
-            memory_region_reset_dirty(mr, offset, TARGET_PAGE_SIZE,
-                                      DIRTY_MEMORY_MIGRATION);
-
             p = memory_region_get_ram_ptr(mr) + offset;
 
             if (is_dup_page(p)) {
@@ -409,7 +480,7 @@
 
 static ram_addr_t ram_save_remaining(void)
 {
-    return ram_list.dirty_pages;
+    return migration_dirty_pages;
 }
 
 uint64_t ram_bytes_remaining(void)
@@ -481,17 +552,27 @@
     migration_end();
 }
 
+
+static void reset_ram_globals(void)
+{
+    last_block = NULL;
+    last_offset = 0;
+    sort_ram_list();
+}
+
 #define MAX_WAIT 50 /* ms, half buffered_file limit */
 
 static int ram_save_setup(QEMUFile *f, void *opaque)
 {
-    ram_addr_t addr;
     RAMBlock *block;
+    int64_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS;
+
+    migration_bitmap = bitmap_new(ram_pages);
+    bitmap_set(migration_bitmap, 1, ram_pages);
+    migration_dirty_pages = ram_pages;
 
     bytes_transferred = 0;
-    last_block = NULL;
-    last_offset = 0;
-    sort_ram_list();
+    reset_ram_globals();
 
     if (migrate_use_xbzrle()) {
         XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() /
@@ -506,17 +587,8 @@
         acct_clear();
     }
 
-    /* Make sure all dirty bits are set */
-    QLIST_FOREACH(block, &ram_list.blocks, next) {
-        for (addr = 0; addr < block->length; addr += TARGET_PAGE_SIZE) {
-            if (!memory_region_get_dirty(block->mr, addr, TARGET_PAGE_SIZE,
-                                         DIRTY_MEMORY_MIGRATION)) {
-                memory_region_set_dirty(block->mr, addr, TARGET_PAGE_SIZE);
-            }
-        }
-    }
-
     memory_global_dirty_log_start();
+    migration_bitmap_sync();
 
     qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE);
 
@@ -537,7 +609,8 @@
     double bwidth = 0;
     int ret;
     int i;
-    uint64_t expected_time;
+    uint64_t expected_downtime;
+    MigrationState *s = migrate_get_current();
 
     bytes_transferred_last = bytes_transferred;
     bwidth = qemu_get_clock_ns(rt_clock);
@@ -576,31 +649,32 @@
     bwidth = qemu_get_clock_ns(rt_clock) - bwidth;
     bwidth = (bytes_transferred - bytes_transferred_last) / bwidth;
 
-    /* if we haven't transferred anything this round, force expected_time to a
-     * a very high value, but without crashing */
+    /* if we haven't transferred anything this round, force
+     * expected_downtime to a very high value, but without
+     * crashing */
     if (bwidth == 0) {
         bwidth = 0.000001;
     }
 
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
-    expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
+    expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
+    DPRINTF("ram_save_live: expected(%" PRIu64 ") <= max(" PRIu64 ")?\n",
+            expected_downtime, migrate_max_downtime());
 
-    DPRINTF("ram_save_live: expected(%" PRIu64 ") <= max(%" PRIu64 ")?\n",
-            expected_time, migrate_max_downtime());
+    if (expected_downtime <= migrate_max_downtime()) {
+        migration_bitmap_sync();
+        expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
+        s->expected_downtime = expected_downtime / 1000000; /* ns -> ms */
 
-    if (expected_time <= migrate_max_downtime()) {
-        memory_global_sync_dirty_bitmap(get_system_memory());
-        expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
-
-        return expected_time <= migrate_max_downtime();
+        return expected_downtime <= migrate_max_downtime();
     }
     return 0;
 }
 
 static int ram_save_complete(QEMUFile *f, void *opaque)
 {
-    memory_global_sync_dirty_bitmap(get_system_memory());
+    migration_bitmap_sync();
 
     /* try transferring iterative blocks of memory */
 
@@ -619,6 +693,9 @@
 
     qemu_put_be64(f, RAM_SAVE_FLAG_EOS);
 
+    g_free(migration_bitmap);
+    migration_bitmap = NULL;
+
     return 0;
 }
 
diff --git a/block-migration.c b/block-migration.c
index ed93301..71b9601 100644
--- a/block-migration.c
+++ b/block-migration.c
@@ -423,20 +423,23 @@
 
 error:
     DPRINTF("Error reading sector %" PRId64 "\n", sector);
-    qemu_file_set_error(f, ret);
     g_free(blk->buf);
     g_free(blk);
-    return 0;
+    return ret;
 }
 
+/* return value:
+ * 0: too much data for max_downtime
+ * 1: few enough data for max_downtime
+*/
 static int blk_mig_save_dirty_block(QEMUFile *f, int is_async)
 {
     BlkMigDevState *bmds;
-    int ret = 0;
+    int ret = 1;
 
     QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) {
-        if (mig_save_device_dirty(f, bmds, is_async) == 0) {
-            ret = 1;
+        ret = mig_save_device_dirty(f, bmds, is_async);
+        if (ret <= 0) {
             break;
         }
     }
@@ -444,9 +447,10 @@
     return ret;
 }
 
-static void flush_blks(QEMUFile* f)
+static int flush_blks(QEMUFile *f)
 {
     BlkMigBlock *blk;
+    int ret = 0;
 
     DPRINTF("%s Enter submitted %d read_done %d transferred %d\n",
             __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done,
@@ -457,7 +461,7 @@
             break;
         }
         if (blk->ret < 0) {
-            qemu_file_set_error(f, blk->ret);
+            ret = blk->ret;
             break;
         }
         blk_send(f, blk);
@@ -474,6 +478,7 @@
     DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__,
             block_mig_state.submitted, block_mig_state.read_done,
             block_mig_state.transferred);
+    return ret;
 }
 
 static int64_t get_remaining_dirty(void)
@@ -555,9 +560,7 @@
     /* start track dirty blocks */
     set_dirty_tracking(1);
 
-    flush_blks(f);
-
-    ret = qemu_file_get_error(f);
+    ret = flush_blks(f);
     if (ret) {
         blk_mig_cleanup();
         return ret;
@@ -577,9 +580,7 @@
     DPRINTF("Enter save live iterate submitted %d transferred %d\n",
             block_mig_state.submitted, block_mig_state.transferred);
 
-    flush_blks(f);
-
-    ret = qemu_file_get_error(f);
+    ret = flush_blks(f);
     if (ret) {
         blk_mig_cleanup();
         return ret;
@@ -598,16 +599,19 @@
                 block_mig_state.bulk_completed = 1;
             }
         } else {
-            if (blk_mig_save_dirty_block(f, 1) == 0) {
+            ret = blk_mig_save_dirty_block(f, 1);
+            if (ret != 0) {
                 /* no more dirty blocks */
                 break;
             }
         }
     }
+    if (ret) {
+        blk_mig_cleanup();
+        return ret;
+    }
 
-    flush_blks(f);
-
-    ret = qemu_file_get_error(f);
+    ret = flush_blks(f);
     if (ret) {
         blk_mig_cleanup();
         return ret;
@@ -625,9 +629,7 @@
     DPRINTF("Enter save live complete submitted %d transferred %d\n",
             block_mig_state.submitted, block_mig_state.transferred);
 
-    flush_blks(f);
-
-    ret = qemu_file_get_error(f);
+    ret = flush_blks(f);
     if (ret) {
         blk_mig_cleanup();
         return ret;
@@ -639,18 +641,16 @@
        all async read completed */
     assert(block_mig_state.submitted == 0);
 
-    while (blk_mig_save_dirty_block(f, 0) != 0) {
-        /* Do nothing */
-    }
+    do {
+        ret = blk_mig_save_dirty_block(f, 0);
+    } while (ret == 0);
+
     blk_mig_cleanup();
-
-    /* report completion */
-    qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
-
-    ret = qemu_file_get_error(f);
     if (ret) {
         return ret;
     }
+    /* report completion */
+    qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS);
 
     DPRINTF("Block migration completed\n");
 
diff --git a/buffered_file.c b/buffered_file.c
index f170aa0..ed92df1 100644
--- a/buffered_file.c
+++ b/buffered_file.c
@@ -23,11 +23,7 @@
 
 typedef struct QEMUFileBuffered
 {
-    BufferedPutFunc *put_buffer;
-    BufferedPutReadyFunc *put_ready;
-    BufferedWaitForUnfreezeFunc *wait_for_unfreeze;
-    BufferedCloseFunc *close;
-    void *opaque;
+    MigrationState *migration_state;
     QEMUFile *file;
     int freeze_output;
     size_t bytes_xfer;
@@ -50,70 +46,60 @@
                             const uint8_t *buf, size_t size)
 {
     if (size > (s->buffer_capacity - s->buffer_size)) {
-        void *tmp;
-
         DPRINTF("increasing buffer capacity from %zu by %zu\n",
                 s->buffer_capacity, size + 1024);
 
         s->buffer_capacity += size + 1024;
 
-        tmp = g_realloc(s->buffer, s->buffer_capacity);
-        if (tmp == NULL) {
-            fprintf(stderr, "qemu file buffer expansion failed\n");
-            exit(1);
-        }
-
-        s->buffer = tmp;
+        s->buffer = g_realloc(s->buffer, s->buffer_capacity);
     }
 
     memcpy(s->buffer + s->buffer_size, buf, size);
     s->buffer_size += size;
 }
 
-static void buffered_flush(QEMUFileBuffered *s)
+static ssize_t buffered_flush(QEMUFileBuffered *s)
 {
     size_t offset = 0;
-    int error;
-
-    error = qemu_file_get_error(s->file);
-    if (error != 0) {
-        DPRINTF("flush when error, bailing: %s\n", strerror(-error));
-        return;
-    }
+    ssize_t ret = 0;
 
     DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size);
 
-    while (offset < s->buffer_size) {
-        ssize_t ret;
+    while (s->bytes_xfer < s->xfer_limit && offset < s->buffer_size) {
 
-        ret = s->put_buffer(s->opaque, s->buffer + offset,
-                            s->buffer_size - offset);
+        ret = migrate_fd_put_buffer(s->migration_state, s->buffer + offset,
+                                    s->buffer_size - offset);
         if (ret == -EAGAIN) {
             DPRINTF("backend not ready, freezing\n");
+            ret = 0;
             s->freeze_output = 1;
             break;
         }
 
         if (ret <= 0) {
             DPRINTF("error flushing data, %zd\n", ret);
-            qemu_file_set_error(s->file, ret);
             break;
         } else {
             DPRINTF("flushed %zd byte(s)\n", ret);
             offset += ret;
+            s->bytes_xfer += ret;
         }
     }
 
     DPRINTF("flushed %zu of %zu byte(s)\n", offset, s->buffer_size);
     memmove(s->buffer, s->buffer + offset, s->buffer_size - offset);
     s->buffer_size -= offset;
+
+    if (ret < 0) {
+        return ret;
+    }
+    return offset;
 }
 
 static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size)
 {
     QEMUFileBuffered *s = opaque;
-    int offset = 0, error;
-    ssize_t ret;
+    ssize_t error;
 
     DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos);
 
@@ -126,65 +112,54 @@
     DPRINTF("unfreezing output\n");
     s->freeze_output = 0;
 
-    buffered_flush(s);
-
-    while (!s->freeze_output && offset < size) {
-        if (s->bytes_xfer > s->xfer_limit) {
-            DPRINTF("transfer limit exceeded when putting\n");
-            break;
-        }
-
-        ret = s->put_buffer(s->opaque, buf + offset, size - offset);
-        if (ret == -EAGAIN) {
-            DPRINTF("backend not ready, freezing\n");
-            s->freeze_output = 1;
-            break;
-        }
-
-        if (ret <= 0) {
-            DPRINTF("error putting\n");
-            qemu_file_set_error(s->file, ret);
-            offset = -EINVAL;
-            break;
-        }
-
-        DPRINTF("put %zd byte(s)\n", ret);
-        offset += ret;
-        s->bytes_xfer += ret;
+    if (size > 0) {
+        DPRINTF("buffering %d bytes\n", size - offset);
+        buffered_append(s, buf, size);
     }
 
-    if (offset >= 0) {
-        DPRINTF("buffering %d bytes\n", size - offset);
-        buffered_append(s, buf + offset, size - offset);
-        offset = size;
+    error = buffered_flush(s);
+    if (error < 0) {
+        DPRINTF("buffered flush error. bailing: %s\n", strerror(-error));
+        return error;
     }
 
     if (pos == 0 && size == 0) {
         DPRINTF("file is ready\n");
-        if (s->bytes_xfer <= s->xfer_limit) {
+        if (!s->freeze_output && s->bytes_xfer < s->xfer_limit) {
             DPRINTF("notifying client\n");
-            s->put_ready(s->opaque);
+            migrate_fd_put_ready(s->migration_state);
         }
     }
 
-    return offset;
+    return size;
 }
 
 static int buffered_close(void *opaque)
 {
     QEMUFileBuffered *s = opaque;
-    int ret;
+    ssize_t ret = 0;
+    int ret2;
 
     DPRINTF("closing\n");
 
+    s->xfer_limit = INT_MAX;
     while (!qemu_file_get_error(s->file) && s->buffer_size) {
-        buffered_flush(s);
-        if (s->freeze_output)
-            s->wait_for_unfreeze(s->opaque);
+        ret = buffered_flush(s);
+        if (ret < 0) {
+            break;
+        }
+        if (s->freeze_output) {
+            ret = migrate_fd_wait_for_unfreeze(s->migration_state);
+            if (ret < 0) {
+                break;
+            }
+        }
     }
 
-    ret = s->close(s->opaque);
-
+    ret2 = migrate_fd_close(s->migration_state);
+    if (ret >= 0) {
+        ret = ret2;
+    }
     qemu_del_timer(s->timer);
     qemu_free_timer(s->timer);
     g_free(s->buffer);
@@ -256,29 +231,17 @@
 
     s->bytes_xfer = 0;
 
-    buffered_flush(s);
-
-    /* Add some checks around this */
-    s->put_ready(s->opaque);
+    buffered_put_buffer(s, NULL, 0, 0);
 }
 
-QEMUFile *qemu_fopen_ops_buffered(void *opaque,
-                                  size_t bytes_per_sec,
-                                  BufferedPutFunc *put_buffer,
-                                  BufferedPutReadyFunc *put_ready,
-                                  BufferedWaitForUnfreezeFunc *wait_for_unfreeze,
-                                  BufferedCloseFunc *close)
+QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state)
 {
     QEMUFileBuffered *s;
 
     s = g_malloc0(sizeof(*s));
 
-    s->opaque = opaque;
-    s->xfer_limit = bytes_per_sec / 10;
-    s->put_buffer = put_buffer;
-    s->put_ready = put_ready;
-    s->wait_for_unfreeze = wait_for_unfreeze;
-    s->close = close;
+    s->migration_state = migration_state;
+    s->xfer_limit = migration_state->bandwidth_limit / 10;
 
     s->file = qemu_fopen_ops(s, buffered_put_buffer, NULL,
                              buffered_close, buffered_rate_limit,
diff --git a/buffered_file.h b/buffered_file.h
index 98d358b..ef010fe 100644
--- a/buffered_file.h
+++ b/buffered_file.h
@@ -15,16 +15,8 @@
 #define QEMU_BUFFERED_FILE_H
 
 #include "hw/hw.h"
+#include "migration.h"
 
-typedef ssize_t (BufferedPutFunc)(void *opaque, const void *data, size_t size);
-typedef void (BufferedPutReadyFunc)(void *opaque);
-typedef void (BufferedWaitForUnfreezeFunc)(void *opaque);
-typedef int (BufferedCloseFunc)(void *opaque);
-
-QEMUFile *qemu_fopen_ops_buffered(void *opaque, size_t xfer_limit,
-                                  BufferedPutFunc *put_buffer,
-                                  BufferedPutReadyFunc *put_ready,
-                                  BufferedWaitForUnfreezeFunc *wait_for_unfreeze,
-                                  BufferedCloseFunc *close);
+QEMUFile *qemu_fopen_ops_buffered(MigrationState *migration_state);
 
 #endif
diff --git a/configure b/configure
index 353d788..fa5657f 100755
--- a/configure
+++ b/configure
@@ -199,7 +199,7 @@
 softmmu="yes"
 linux_user="no"
 bsd_user="no"
-guest_base=""
+guest_base="yes"
 uname_release=""
 mixemu="no"
 aix="no"
@@ -871,63 +871,36 @@
   esac
 done
 
-host_guest_base="no"
 case "$cpu" in
     sparc)
            LDFLAGS="-m32 $LDFLAGS"
            QEMU_CFLAGS="-m32 -mcpu=ultrasparc $QEMU_CFLAGS"
-           host_guest_base="yes"
            ;;
     sparc64)
            LDFLAGS="-m64 $LDFLAGS"
            QEMU_CFLAGS="-m64 -mcpu=ultrasparc $QEMU_CFLAGS"
-           host_guest_base="yes"
            ;;
     s390)
            QEMU_CFLAGS="-m31 -march=z990 $QEMU_CFLAGS"
            LDFLAGS="-m31 $LDFLAGS"
-           host_guest_base="yes"
            ;;
     s390x)
            QEMU_CFLAGS="-m64 -march=z990 $QEMU_CFLAGS"
            LDFLAGS="-m64 $LDFLAGS"
-           host_guest_base="yes"
            ;;
     i386)
            QEMU_CFLAGS="-m32 $QEMU_CFLAGS"
            LDFLAGS="-m32 $LDFLAGS"
            cc_i386='$(CC) -m32'
-           host_guest_base="yes"
            ;;
     x86_64)
            QEMU_CFLAGS="-m64 $QEMU_CFLAGS"
            LDFLAGS="-m64 $LDFLAGS"
            cc_i386='$(CC) -m32'
-           host_guest_base="yes"
            ;;
-    arm*)
-           host_guest_base="yes"
-           ;;
-    ppc*)
-           host_guest_base="yes"
-           ;;
-    mips*)
-           host_guest_base="yes"
-           ;;
-    ia64*)
-           host_guest_base="yes"
-           ;;
-    hppa*)
-           host_guest_base="yes"
-           ;;
-    unicore32*)
-           host_guest_base="yes"
-           ;;
+    # No special flags required for other host CPUs
 esac
 
-[ -z "$guest_base" ] && guest_base="$host_guest_base"
-
-
 default_target_list=""
 
 # these targets are portable
@@ -1323,7 +1296,7 @@
 # big/little endian test
 cat > $TMPC << EOF
 #include <inttypes.h>
-int main(int argc, char ** argv){
+int main(void) {
         volatile uint32_t i=0x01234567;
         return (*((uint8_t*)(&i))) == 0x67;
 }
@@ -2896,7 +2869,7 @@
   return __sync_fetch_and_and(ptr, 0);
 }
 
-int main(int argc, char **argv)
+int main(void)
 {
   int val = 42;
   sfaa(&val);
@@ -3227,7 +3200,7 @@
 echo "qemu_datadir=$qemu_datadir" >> $config_host_mak
 echo "qemu_docdir=$qemu_docdir" >> $config_host_mak
 echo "qemu_localstatedir=$local_statedir" >> $config_host_mak
-echo "CONFIG_QEMU_HELPERDIR=\"$libexecdir\"" >> $config_host_mak
+echo "qemu_helperdir=$libexecdir" >> $config_host_mak
 
 echo "ARCH=$ARCH" >> $config_host_mak
 if test "$debug_tcg" = "yes" ; then
diff --git a/cpu-all.h b/cpu-all.h
index 2b99682..6aa7e58 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -500,7 +500,6 @@
 typedef struct RAMList {
     uint8_t *phys_dirty;
     QLIST_HEAD(, RAMBlock) blocks;
-    uint64_t dirty_pages;
 } RAMList;
 extern RAMList ram_list;
 
@@ -518,6 +517,7 @@
 #define TLB_MMIO        (1 << 5)
 
 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
+ram_addr_t last_ram_offset(void);
 #endif /* !CONFIG_USER_ONLY */
 
 int cpu_memory_rw_debug(CPUArchState *env, target_ulong addr,
diff --git a/cpus.c b/cpus.c
index 750a76f..191cbf5 100644
--- a/cpus.c
+++ b/cpus.c
@@ -898,6 +898,11 @@
     return qemu_thread_is_self(cpu->thread);
 }
 
+static bool qemu_in_vcpu_thread(void)
+{
+    return cpu_single_env && qemu_cpu_is_self(cpu_single_env);
+}
+
 void qemu_mutex_lock_iothread(void)
 {
     if (!tcg_enabled()) {
@@ -943,7 +948,7 @@
         penv = penv->next_cpu;
     }
 
-    if (!qemu_thread_is_self(&io_thread)) {
+    if (qemu_in_vcpu_thread()) {
         cpu_stop_current();
         if (!kvm_enabled()) {
             while (penv) {
@@ -1060,7 +1065,7 @@
 
 void vm_stop(RunState state)
 {
-    if (!qemu_thread_is_self(&io_thread)) {
+    if (qemu_in_vcpu_thread()) {
         qemu_system_vmstop_request(state);
         /*
          * FIXME: should not return to device code in case
diff --git a/cputlb.c b/cputlb.c
index 51b5897..9027557 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -21,11 +21,11 @@
 #include "cpu.h"
 #include "exec-all.h"
 #include "memory.h"
+#include "exec-memory.h"
 
 #include "cputlb.h"
 
-#define WANT_EXEC_OBSOLETE
-#include "exec-obsolete.h"
+#include "memory-internal.h"
 
 //#define DEBUG_TLB
 //#define DEBUG_TLB_CHECK
@@ -252,7 +252,7 @@
     if (size != TARGET_PAGE_SIZE) {
         tlb_add_large_page(env, vaddr, size);
     }
-    section = phys_page_find(paddr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, paddr >> TARGET_PAGE_BITS);
 #if defined(DEBUG_TLB)
     printf("tlb_set_page: vaddr=" TARGET_FMT_lx " paddr=0x" TARGET_FMT_plx
            " prot=%x idx=%d pd=0x%08lx\n",
diff --git a/cputlb.h b/cputlb.h
index 2dc2c96..d537b77 100644
--- a/cputlb.h
+++ b/cputlb.h
@@ -26,7 +26,8 @@
                              target_ulong vaddr);
 void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start,
                            uintptr_t length);
-MemoryRegionSection *phys_page_find(target_phys_addr_t index);
+MemoryRegionSection *phys_page_find(struct AddressSpaceDispatch *d,
+                                    target_phys_addr_t index);
 void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length);
 void tlb_set_dirty(CPUArchState *env, target_ulong vaddr);
 extern int tlb_flush_count;
diff --git a/default-configs/pci.mak b/default-configs/pci.mak
index 69e18f1..ae9d1eb 100644
--- a/default-configs/pci.mak
+++ b/default-configs/pci.mak
@@ -19,3 +19,5 @@
 CONFIG_AHCI=y
 CONFIG_ESP=y
 CONFIG_ESP_PCI=y
+CONFIG_SERIAL=y
+CONFIG_SERIAL_PCI=y
diff --git a/dma-helpers.c b/dma-helpers.c
index 433d8b2..3f09dcb 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -14,7 +14,8 @@
 
 /* #define DEBUG_IOMMU */
 
-static void do_dma_memory_set(dma_addr_t addr, uint8_t c, dma_addr_t len)
+static void do_dma_memory_set(AddressSpace *as,
+                              dma_addr_t addr, uint8_t c, dma_addr_t len)
 {
 #define FILLBUF_SIZE 512
     uint8_t fillbuf[FILLBUF_SIZE];
@@ -23,7 +24,7 @@
     memset(fillbuf, c, FILLBUF_SIZE);
     while (len > 0) {
         l = len < FILLBUF_SIZE ? len : FILLBUF_SIZE;
-        cpu_physical_memory_rw(addr, fillbuf, l, true);
+        address_space_rw(as, addr, fillbuf, l, true);
         len -= l;
         addr += l;
     }
@@ -36,7 +37,7 @@
     if (dma_has_iommu(dma)) {
         return iommu_dma_memory_set(dma, addr, c, len);
     }
-    do_dma_memory_set(addr, c, len);
+    do_dma_memory_set(dma->as, addr, c, len);
 
     return 0;
 }
@@ -332,8 +333,7 @@
             plen = len;
         }
 
-        cpu_physical_memory_rw(paddr, buf, plen,
-                               dir == DMA_DIRECTION_FROM_DEVICE);
+        address_space_rw(dma->as, paddr, buf, plen, dir == DMA_DIRECTION_FROM_DEVICE);
 
         len -= plen;
         addr += plen;
@@ -366,7 +366,7 @@
             plen = len;
         }
 
-        do_dma_memory_set(paddr, c, plen);
+        do_dma_memory_set(dma->as, paddr, c, plen);
 
         len -= plen;
         addr += plen;
@@ -375,13 +375,14 @@
     return 0;
 }
 
-void dma_context_init(DMAContext *dma, DMATranslateFunc translate,
+void dma_context_init(DMAContext *dma, AddressSpace *as, DMATranslateFunc translate,
                       DMAMapFunc map, DMAUnmapFunc unmap)
 {
 #ifdef DEBUG_IOMMU
     fprintf(stderr, "dma_context_init(%p, %p, %p, %p)\n",
             dma, translate, map, unmap);
 #endif
+    dma->as = as;
     dma->translate = translate;
     dma->map = map;
     dma->unmap = unmap;
@@ -407,14 +408,13 @@
     /*
      * If this is true, the virtual region is contiguous,
      * but the translated physical region isn't. We just
-     * clamp *len, much like cpu_physical_memory_map() does.
+     * clamp *len, much like address_space_map() does.
      */
     if (plen < *len) {
         *len = plen;
     }
 
-    buf = cpu_physical_memory_map(paddr, &plen,
-                                  dir == DMA_DIRECTION_FROM_DEVICE);
+    buf = address_space_map(dma->as, paddr, &plen, dir == DMA_DIRECTION_FROM_DEVICE);
     *len = plen;
 
     return buf;
@@ -428,8 +428,7 @@
         return;
     }
 
-    cpu_physical_memory_unmap(buffer, len,
-                              dir == DMA_DIRECTION_FROM_DEVICE,
-                              access_len);
+    address_space_unmap(dma->as, buffer, len, dir == DMA_DIRECTION_FROM_DEVICE,
+                        access_len);
 
 }
diff --git a/dma.h b/dma.h
index 1a33603..1bd6f4a 100644
--- a/dma.h
+++ b/dma.h
@@ -11,6 +11,7 @@
 #define DMA_H
 
 #include <stdio.h>
+#include "memory.h"
 #include "hw/hw.h"
 #include "block.h"
 #include "kvm.h"
@@ -61,6 +62,7 @@
                           dma_addr_t access_len);
 
 struct DMAContext {
+    AddressSpace *as;
     DMATranslateFunc *translate;
     DMAMapFunc *map;
     DMAUnmapFunc *unmap;
@@ -93,7 +95,7 @@
 
 static inline bool dma_has_iommu(DMAContext *dma)
 {
-    return !!dma;
+    return dma && dma->translate;
 }
 
 /* Checks that the given range of addresses is valid for DMA.  This is
@@ -120,8 +122,7 @@
 {
     if (!dma_has_iommu(dma)) {
         /* Fast-path for no IOMMU */
-        cpu_physical_memory_rw(addr, buf, len,
-                               dir == DMA_DIRECTION_FROM_DEVICE);
+        address_space_rw(dma->as, addr, buf, len, dir == DMA_DIRECTION_FROM_DEVICE);
         return 0;
     } else {
         return iommu_dma_memory_rw(dma, addr, buf, len, dir);
@@ -179,8 +180,7 @@
         target_phys_addr_t xlen = *len;
         void *p;
 
-        p = cpu_physical_memory_map(addr, &xlen,
-                                    dir == DMA_DIRECTION_FROM_DEVICE);
+        p = address_space_map(dma->as, addr, &xlen, dir == DMA_DIRECTION_FROM_DEVICE);
         *len = xlen;
         return p;
     } else {
@@ -196,9 +196,8 @@
                                     DMADirection dir, dma_addr_t access_len)
 {
     if (!dma_has_iommu(dma)) {
-        cpu_physical_memory_unmap(buffer, (target_phys_addr_t)len,
-                                  dir == DMA_DIRECTION_FROM_DEVICE,
-                                  access_len);
+        address_space_unmap(dma->as, buffer, (target_phys_addr_t)len,
+                            dir == DMA_DIRECTION_FROM_DEVICE, access_len);
     } else {
         iommu_dma_memory_unmap(dma, buffer, len, dir, access_len);
     }
@@ -242,7 +241,7 @@
 
 #undef DEFINE_LDST_DMA
 
-void dma_context_init(DMAContext *dma, DMATranslateFunc translate,
+void dma_context_init(DMAContext *dma, AddressSpace *as, DMATranslateFunc translate,
                       DMAMapFunc map, DMAUnmapFunc unmap);
 
 struct ScatterGatherEntry {
diff --git a/docs/qemupciserial.inf b/docs/qemupciserial.inf
new file mode 100644
index 0000000..3474310
--- /dev/null
+++ b/docs/qemupciserial.inf
@@ -0,0 +1,109 @@
+; qemupciserial.inf for QEMU, based on MSPORTS.INF
+
+; The driver itself is shipped with Windows (serial.sys).  This is
+; just a inf file to tell windows which pci id the serial pci card
+; emulated by qemu has, and to apply a name tag to it which windows
+; will show in the device manager.
+
+; Installing the driver: Go to device manager.  You should find a "pci
+; serial card" tagged with a yellow question mark.  Open properties.
+; Pick "update driver".  Then "select driver manually".  Pick "Ports
+; (Com+Lpt)" from the list.  Click "Have a disk".  Select this file.
+; Procedure may vary a bit depending on the windows version.
+
+; FIXME: This file covers the single port version only.
+
+[Version]
+Signature="$CHICAGO$"
+Class=Ports
+ClassGuid={4D36E978-E325-11CE-BFC1-08002BE10318}
+Provider=%QEMU%
+DriverVer=09/24/2012,1.3.0
+
+[SourceDisksNames]
+3426=windows cd
+
+[SourceDisksFiles]
+serial.sys 		= 3426
+serenum.sys 		= 3426
+
+[DestinationDirs]
+DefaultDestDir  = 11        ;LDID_SYS
+ComPort.NT.Copy = 12        ;DIRID_DRIVERS
+SerialEnumerator.NT.Copy=12 ;DIRID_DRIVERS
+
+; Drivers
+;----------------------------------------------------------
+[Manufacturer]
+%QEMU%=QEMU,NTx86
+
+[QEMU.NTx86]
+%QEMU-PCI_SERIAL.DeviceDesc% = ComPort, "PCI\VEN_1b36&DEV_0002&CC_0700"
+
+; COM sections
+;----------------------------------------------------------
+[ComPort.AddReg]
+HKR,,PortSubClass,1,01
+
+[ComPort.NT]
+AddReg=ComPort.AddReg, ComPort.NT.AddReg
+LogConfig=caa
+SyssetupPnPFlags = 1
+
+[ComPort.NT.HW]
+AddReg=ComPort.NT.HW.AddReg
+
+[ComPort.NT.AddReg]
+HKR,,EnumPropPages32,,"MsPorts.dll,SerialPortPropPageProvider"
+
+[ComPort.NT.HW.AddReg]
+HKR,,"UpperFilters",0x00010000,"serenum"
+
+;-------------- Service installation
+; Port Driver (function driver for this device)
+[ComPort.NT.Services]
+AddService = Serial, 0x00000002, Serial_Service_Inst, Serial_EventLog_Inst
+AddService = Serenum,,Serenum_Service_Inst
+
+; -------------- Serial Port Driver install sections
+[Serial_Service_Inst]
+DisplayName    = %Serial.SVCDESC%
+ServiceType    = 1               ; SERVICE_KERNEL_DRIVER
+StartType      = 1               ; SERVICE_SYSTEM_START (this driver may do detection)
+ErrorControl   = 0               ; SERVICE_ERROR_IGNORE
+ServiceBinary  = %12%\serial.sys
+LoadOrderGroup = Extended base
+
+; -------------- Serenum Driver install section
+[Serenum_Service_Inst]
+DisplayName    = %Serenum.SVCDESC%
+ServiceType    = 1               ; SERVICE_KERNEL_DRIVER
+StartType      = 3               ; SERVICE_DEMAND_START
+ErrorControl   = 1               ; SERVICE_ERROR_NORMAL
+ServiceBinary  = %12%\serenum.sys
+LoadOrderGroup = PNP Filter
+
+[Serial_EventLog_Inst]
+AddReg = Serial_EventLog_AddReg
+
+[Serial_EventLog_AddReg]
+HKR,,EventMessageFile,0x00020000,"%%SystemRoot%%\System32\IoLogMsg.dll;%%SystemRoot%%\System32\drivers\serial.sys"
+HKR,,TypesSupported,0x00010001,7
+
+; The following sections are COM port resource configs.
+; Section name format means:
+; Char 1 = c (COM port)
+; Char 2 = I/O config: 1 (3f8), 2 (2f8), 3 (3e8), 4 (2e8), a (any)
+; Char 3 = IRQ config: #, a (any)
+
+[caa]                   ; Any base, any IRQ
+ConfigPriority=HARDRECONFIG
+IOConfig=8@100-ffff%fff8(3ff::)
+IRQConfig=S:3,4,5,7,9,10,11,12,14,15
+
+[Strings]
+QEMU="QEMU"
+QEMU-PCI_SERIAL.DeviceDesc="QEMU Serial PCI Card"
+
+Serial.SVCDESC   = "Serial port driver"
+Serenum.SVCDESC = "Serenum Filter Driver"
diff --git a/docs/specs/pci-serial.txt b/docs/specs/pci-serial.txt
new file mode 100644
index 0000000..66c761f
--- /dev/null
+++ b/docs/specs/pci-serial.txt
@@ -0,0 +1,34 @@
+
+QEMU pci serial devices
+=======================
+
+There is one single-port variant and two muliport-variants.  Linux
+guests out-of-the box with all cards.  There is a Windows inf file
+(docs/qemupciserial.inf) to setup the single-port card in Windows
+guests.
+
+
+single-port card
+----------------
+
+Name:   pci-serial
+PCI ID: 1b36:0002
+
+PCI Region 0:
+   IO bar, 8 bytes long, with the 16550 uart mapped to it.
+   Interrupt is wired to pin A.
+
+
+multiport cards
+---------------
+
+Name:   pci-serial-2x
+PCI ID: 1b36:0003
+
+Name:   pci-serial-4x
+PCI ID: 1b36:0004
+
+PCI Region 0:
+   IO bar, with two/four 16550 uart mapped after each other.
+   The first is at offset 0, second at offset 8, ...
+   Interrupt is wired to pin A.
diff --git a/docs/specs/standard-vga.txt b/docs/specs/standard-vga.txt
new file mode 100644
index 0000000..1cecccd
--- /dev/null
+++ b/docs/specs/standard-vga.txt
@@ -0,0 +1,64 @@
+
+QEMU Standard VGA
+=================
+
+Exists in two variants, for isa and pci.
+
+command line switches:
+    -vga std            [ picks isa for -M isapc, otherwise pci ]
+    -device VGA         [ pci variant ]
+    -device isa-vga     [ isa variant ]
+
+
+PCI spec
+--------
+
+Applies to the pci variant only for obvious reasons.
+
+PCI ID: 1234:1111
+
+PCI Region 0:
+   Framebuffer memory, 16 MB in size (by default).
+   Size is tunable via vga_mem_mb property.
+
+PCI Region 1:
+   Reserved (so we have the option to make the framebuffer bar 64bit).
+
+PCI Region 2:
+   MMIO bar, 4096 bytes in size (qemu 1.3+)
+
+PCI ROM Region:
+   Holds the vgabios (qemu 0.14+).
+
+
+IO ports used
+-------------
+
+03c0 - 03df : standard vga ports
+01ce        : bochs vbe interface index port
+01cf        : bochs vbe interface data port
+
+
+Memory regions used
+-------------------
+
+0xe0000000 : Framebuffer memory, isa variant only.
+
+The pci variant used to mirror the framebuffer bar here, qemu 0.14+
+stops doing that (except when in -M pc-$old compat mode).
+
+
+MMIO area spec
+--------------
+
+Likewise applies to the pci variant only for obvious reasons.
+
+0000 - 03ff : reserved, for possible virtio extension.
+0400 - 041f : vga ioports (0x3c0 -> 0x3df), remapped 1:1.
+              word access is supported, bytes are written
+              in little endia order (aka index port first),
+              so indexed registers can be updated with a
+              single mmio write (and thus only one vmexit).
+0500 - 0515 : bochs dispi interface registers, mapped flat
+              without index/data ports.  Use (index << 1)
+              as offset for (16bit) register access.
diff --git a/exec-all.h b/exec-all.h
index 6516da0..16caf49 100644
--- a/exec-all.h
+++ b/exec-all.h
@@ -121,8 +121,6 @@
 #define CODE_GEN_PHYS_HASH_BITS     15
 #define CODE_GEN_PHYS_HASH_SIZE     (1 << CODE_GEN_PHYS_HASH_BITS)
 
-#define MIN_CODE_GEN_BUFFER_SIZE     (1024 * 1024)
-
 /* estimated block size for TB allocation */
 /* XXX: use a per code average code fragment size and modulate it
    according to the host CPU */
@@ -296,7 +294,8 @@
 #if defined(CONFIG_TCG_INTERPRETER)
 /* Alpha and SH4 user mode emulations and Softmmu call GETPC().
    For all others, GETPC remains undefined (which makes TCI a little faster. */
-# if defined(CONFIG_SOFTMMU) || defined(TARGET_ALPHA) || defined(TARGET_SH4)
+# if defined(CONFIG_SOFTMMU) || defined(TARGET_ALPHA) || defined(TARGET_SH4) \
+     || defined(TARGET_SPARC)
 extern uintptr_t tci_tb_ptr;
 #  define GETPC() tci_tb_ptr
 # endif
diff --git a/exec-memory.h b/exec-memory.h
index 1cd92ee..ac1d07d 100644
--- a/exec-memory.h
+++ b/exec-memory.h
@@ -33,11 +33,8 @@
  */
 MemoryRegion *get_system_io(void);
 
-/* Set the root memory region.  This region is the system memory map. */
-void set_system_memory_map(MemoryRegion *mr);
-
-/* Set the I/O memory region.  This region is the I/O memory map. */
-void set_system_io_map(MemoryRegion *mr);
+extern AddressSpace address_space_memory;
+extern AddressSpace address_space_io;
 
 #endif
 
diff --git a/exec.c b/exec.c
index 7899042..750008c 100644
--- a/exec.c
+++ b/exec.c
@@ -59,8 +59,7 @@
 
 #include "cputlb.h"
 
-#define WANT_EXEC_OBSOLETE
-#include "exec-obsolete.h"
+#include "memory-internal.h"
 
 //#define DEBUG_TB_INVALIDATE
 //#define DEBUG_FLUSH
@@ -86,26 +85,11 @@
 /* any access to the tbs or the page table must use this lock */
 spinlock_t tb_lock = SPIN_LOCK_UNLOCKED;
 
-#if defined(__arm__) || defined(__sparc__)
-/* The prologue must be reachable with a direct jump. ARM and Sparc64
- have limited branch ranges (possibly also PPC) so place it in a
- section close to code segment. */
-#define code_gen_section                                \
-    __attribute__((__section__(".gen_code")))           \
-    __attribute__((aligned (32)))
-#elif defined(_WIN32) && !defined(_WIN64)
-#define code_gen_section                                \
-    __attribute__((aligned (16)))
-#else
-#define code_gen_section                                \
-    __attribute__((aligned (32)))
-#endif
-
-uint8_t code_gen_prologue[1024] code_gen_section;
+uint8_t *code_gen_prologue;
 static uint8_t *code_gen_buffer;
-static unsigned long code_gen_buffer_size;
+static size_t code_gen_buffer_size;
 /* threshold to flush the translated code buffer */
-static unsigned long code_gen_buffer_max_size;
+static size_t code_gen_buffer_max_size;
 static uint8_t *code_gen_ptr;
 
 #if !defined(CONFIG_USER_ONLY)
@@ -117,6 +101,9 @@
 static MemoryRegion *system_memory;
 static MemoryRegion *system_io;
 
+AddressSpace address_space_io;
+AddressSpace address_space_memory;
+
 MemoryRegion io_mem_ram, io_mem_rom, io_mem_unassigned, io_mem_notdirty;
 static MemoryRegion io_mem_subpage_ram;
 
@@ -185,7 +172,6 @@
 static void *l1_map[V_L1_SIZE];
 
 #if !defined(CONFIG_USER_ONLY)
-typedef struct PhysPageEntry PhysPageEntry;
 
 static MemoryRegionSection *phys_sections;
 static unsigned phys_sections_nb, phys_sections_nb_alloc;
@@ -194,22 +180,12 @@
 static uint16_t phys_section_rom;
 static uint16_t phys_section_watch;
 
-struct PhysPageEntry {
-    uint16_t is_leaf : 1;
-     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
-    uint16_t ptr : 15;
-};
-
 /* Simple allocator for PhysPageEntry nodes */
 static PhysPageEntry (*phys_map_nodes)[L2_SIZE];
 static unsigned phys_map_nodes_nb, phys_map_nodes_nb_alloc;
 
 #define PHYS_MAP_NODE_NIL (((uint16_t)~0) >> 1)
 
-/* This is a multi-level map on the physical address space.
-   The bottom level has pointers to MemoryRegionSections.  */
-static PhysPageEntry phys_map = { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
-
 static void io_mem_init(void);
 static void memory_map_init(void);
 
@@ -221,7 +197,7 @@
 static int tb_phys_invalidate_count;
 
 #ifdef _WIN32
-static void map_exec(void *addr, long size)
+static inline void map_exec(void *addr, long size)
 {
     DWORD old_protect;
     VirtualProtect(addr, size,
@@ -229,7 +205,7 @@
     
 }
 #else
-static void map_exec(void *addr, long size)
+static inline void map_exec(void *addr, long size)
 {
     unsigned long start, end, page_size;
     
@@ -457,18 +433,19 @@
     }
 }
 
-static void phys_page_set(target_phys_addr_t index, target_phys_addr_t nb,
+static void phys_page_set(AddressSpaceDispatch *d,
+                          target_phys_addr_t index, target_phys_addr_t nb,
                           uint16_t leaf)
 {
     /* Wildly overreserve - it doesn't matter much. */
     phys_map_node_reserve(3 * P_L2_LEVELS);
 
-    phys_page_set_level(&phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
+    phys_page_set_level(&d->phys_map, &index, &nb, leaf, P_L2_LEVELS - 1);
 }
 
-MemoryRegionSection *phys_page_find(target_phys_addr_t index)
+MemoryRegionSection *phys_page_find(AddressSpaceDispatch *d, target_phys_addr_t index)
 {
-    PhysPageEntry lp = phys_map;
+    PhysPageEntry lp = d->phys_map;
     PhysPageEntry *p;
     int i;
     uint16_t s_index = phys_section_unassigned;
@@ -497,111 +474,142 @@
 #define mmap_unlock() do { } while(0)
 #endif
 
-#define DEFAULT_CODE_GEN_BUFFER_SIZE (32 * 1024 * 1024)
-
 #if defined(CONFIG_USER_ONLY)
 /* Currently it is not recommended to allocate big chunks of data in
-   user mode. It will change when a dedicated libc will be used */
+   user mode. It will change when a dedicated libc will be used.  */
+/* ??? 64-bit hosts ought to have no problem mmaping data outside the
+   region in which the guest needs to run.  Revisit this.  */
 #define USE_STATIC_CODE_GEN_BUFFER
 #endif
 
+/* ??? Should configure for this, not list operating systems here.  */
+#if (defined(__linux__) \
+    || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
+    || defined(__DragonFly__) || defined(__OpenBSD__) \
+    || defined(__NetBSD__))
+# define USE_MMAP
+#endif
+
+/* Minimum size of the code gen buffer.  This number is randomly chosen,
+   but not so small that we can't have a fair number of TB's live.  */
+#define MIN_CODE_GEN_BUFFER_SIZE     (1024u * 1024)
+
+/* Maximum size of the code gen buffer we'd like to use.  Unless otherwise
+   indicated, this is constrained by the range of direct branches on the
+   host cpu, as used by the TCG implementation of goto_tb.  */
+#if defined(__x86_64__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__sparc__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (2ul * 1024 * 1024 * 1024)
+#elif defined(__arm__)
+# define MAX_CODE_GEN_BUFFER_SIZE  (16u * 1024 * 1024)
+#elif defined(__s390x__)
+  /* We have a +- 4GB range on the branches; leave some slop.  */
+# define MAX_CODE_GEN_BUFFER_SIZE  (3ul * 1024 * 1024 * 1024)
+#else
+# define MAX_CODE_GEN_BUFFER_SIZE  ((size_t)-1)
+#endif
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
+
+#define DEFAULT_CODE_GEN_BUFFER_SIZE \
+  (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
+   ? DEFAULT_CODE_GEN_BUFFER_SIZE_1 : MAX_CODE_GEN_BUFFER_SIZE)
+
+static inline size_t size_code_gen_buffer(size_t tb_size)
+{
+    /* Size the buffer.  */
+    if (tb_size == 0) {
+#ifdef USE_STATIC_CODE_GEN_BUFFER
+        tb_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
+#else
+        /* ??? Needs adjustments.  */
+        /* ??? If we relax the requirement that CONFIG_USER_ONLY use the
+           static buffer, we could size this on RESERVED_VA, on the text
+           segment size of the executable, or continue to use the default.  */
+        tb_size = (unsigned long)(ram_size / 4);
+#endif
+    }
+    if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MIN_CODE_GEN_BUFFER_SIZE;
+    }
+    if (tb_size > MAX_CODE_GEN_BUFFER_SIZE) {
+        tb_size = MAX_CODE_GEN_BUFFER_SIZE;
+    }
+    code_gen_buffer_size = tb_size;
+    return tb_size;
+}
+
 #ifdef USE_STATIC_CODE_GEN_BUFFER
 static uint8_t static_code_gen_buffer[DEFAULT_CODE_GEN_BUFFER_SIZE]
-               __attribute__((aligned (CODE_GEN_ALIGN)));
-#endif
+    __attribute__((aligned(CODE_GEN_ALIGN)));
 
-static void code_gen_alloc(unsigned long tb_size)
+static inline void *alloc_code_gen_buffer(void)
 {
-#ifdef USE_STATIC_CODE_GEN_BUFFER
-    code_gen_buffer = static_code_gen_buffer;
-    code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-    map_exec(code_gen_buffer, code_gen_buffer_size);
-#else
-    code_gen_buffer_size = tb_size;
-    if (code_gen_buffer_size == 0) {
-#if defined(CONFIG_USER_ONLY)
-        code_gen_buffer_size = DEFAULT_CODE_GEN_BUFFER_SIZE;
-#else
-        /* XXX: needs adjustments */
-        code_gen_buffer_size = (unsigned long)(ram_size / 4);
-#endif
-    }
-    if (code_gen_buffer_size < MIN_CODE_GEN_BUFFER_SIZE)
-        code_gen_buffer_size = MIN_CODE_GEN_BUFFER_SIZE;
-    /* The code gen buffer location may have constraints depending on
-       the host cpu and OS */
-#if defined(__linux__) 
-    {
-        int flags;
-        void *start = NULL;
+    map_exec(static_code_gen_buffer, code_gen_buffer_size);
+    return static_code_gen_buffer;
+}
+#elif defined(USE_MMAP)
+static inline void *alloc_code_gen_buffer(void)
+{
+    int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+    uintptr_t start = 0;
+    void *buf;
 
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        flags |= MAP_32BIT;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        start = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024))
-            code_gen_buffer_size = (512 * 1024 * 1024);
-#elif defined(__arm__)
-        /* Keep the buffer no bigger than 16MB to branch between blocks */
-        if (code_gen_buffer_size > 16 * 1024 * 1024)
-            code_gen_buffer_size = 16 * 1024 * 1024;
-#elif defined(__s390x__)
-        /* Map the buffer so that we can use direct calls and branches.  */
-        /* We have a +- 4GB range on the branches; leave some slop.  */
-        if (code_gen_buffer_size > (3ul * 1024 * 1024 * 1024)) {
-            code_gen_buffer_size = 3ul * 1024 * 1024 * 1024;
-        }
-        start = (void *)0x90000000UL;
-#endif
-        code_gen_buffer = mmap(start, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC,
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
+    /* Constrain the position of the buffer based on the host cpu.
+       Note that these addresses are chosen in concert with the
+       addresses assigned in the relevant linker script file.  */
+# if defined(__PIE__) || defined(__PIC__)
+    /* Don't bother setting a preferred location if we're building
+       a position-independent executable.  We're more likely to get
+       an address near the main executable if we let the kernel
+       choose the address.  */
+# elif defined(__x86_64__) && defined(MAP_32BIT)
+    /* Force the memory down into low memory with the executable.
+       Leave the choice of exact location with the kernel.  */
+    flags |= MAP_32BIT;
+    /* Cannot expect to map more than 800MB in low memory.  */
+    if (code_gen_buffer_size > 800u * 1024 * 1024) {
+        code_gen_buffer_size = 800u * 1024 * 1024;
     }
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \
-    || defined(__DragonFly__) || defined(__OpenBSD__) \
-    || defined(__NetBSD__)
-    {
-        int flags;
-        void *addr = NULL;
-        flags = MAP_PRIVATE | MAP_ANONYMOUS;
-#if defined(__x86_64__)
-        /* FreeBSD doesn't have MAP_32BIT, use MAP_FIXED and assume
-         * 0x40000000 is free */
-        flags |= MAP_FIXED;
-        addr = (void *)0x40000000;
-        /* Cannot map more than that */
-        if (code_gen_buffer_size > (800 * 1024 * 1024))
-            code_gen_buffer_size = (800 * 1024 * 1024);
-#elif defined(__sparc__) && HOST_LONG_BITS == 64
-        // Map the buffer below 2G, so we can use direct calls and branches
-        addr = (void *) 0x40000000UL;
-        if (code_gen_buffer_size > (512 * 1024 * 1024)) {
-            code_gen_buffer_size = (512 * 1024 * 1024);
-        }
-#endif
-        code_gen_buffer = mmap(addr, code_gen_buffer_size,
-                               PROT_WRITE | PROT_READ | PROT_EXEC, 
-                               flags, -1, 0);
-        if (code_gen_buffer == MAP_FAILED) {
-            fprintf(stderr, "Could not allocate dynamic translator buffer\n");
-            exit(1);
-        }
-    }
+# elif defined(__sparc__)
+    start = 0x40000000ul;
+# elif defined(__s390x__)
+    start = 0x90000000ul;
+# endif
+
+    buf = mmap((void *)start, code_gen_buffer_size,
+               PROT_WRITE | PROT_READ | PROT_EXEC, flags, -1, 0);
+    return buf == MAP_FAILED ? NULL : buf;
+}
 #else
-    code_gen_buffer = g_malloc(code_gen_buffer_size);
-    map_exec(code_gen_buffer, code_gen_buffer_size);
-#endif
-#endif /* !USE_STATIC_CODE_GEN_BUFFER */
-    map_exec(code_gen_prologue, sizeof(code_gen_prologue));
+static inline void *alloc_code_gen_buffer(void)
+{
+    void *buf = g_malloc(code_gen_buffer_size);
+    if (buf) {
+        map_exec(buf, code_gen_buffer_size);
+    }
+    return buf;
+}
+#endif /* USE_STATIC_CODE_GEN_BUFFER, USE_MMAP */
+
+static inline void code_gen_alloc(size_t tb_size)
+{
+    code_gen_buffer_size = size_code_gen_buffer(tb_size);
+    code_gen_buffer = alloc_code_gen_buffer();
+    if (code_gen_buffer == NULL) {
+        fprintf(stderr, "Could not allocate dynamic translator buffer\n");
+        exit(1);
+    }
+
+    /* Steal room for the prologue at the end of the buffer.  This ensures
+       (via the MAX_CODE_GEN_BUFFER_SIZE limits above) that direct branches
+       from TB's to the prologue are going to be in range.  It also means
+       that we don't need to mark (additional) portions of the data segment
+       as executable.  */
+    code_gen_prologue = code_gen_buffer + code_gen_buffer_size - 1024;
+    code_gen_buffer_size -= 1024;
+
     code_gen_buffer_max_size = code_gen_buffer_size -
         (TCG_MAX_OP_SIZE * OPC_BUF_SIZE);
     code_gen_max_blocks = code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
@@ -1470,7 +1478,7 @@
     ram_addr_t ram_addr;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
     if (!(memory_region_is_ram(section->mr)
           || (section->mr->rom_device && section->mr->readable))) {
         return;
@@ -2208,9 +2216,9 @@
     lp->ptr = PHYS_MAP_NODE_NIL;
 }
 
-static void destroy_all_mappings(void)
+static void destroy_all_mappings(AddressSpaceDispatch *d)
 {
-    destroy_l2_mapping(&phys_map, P_L2_LEVELS - 1);
+    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
     phys_map_nodes_reset();
 }
 
@@ -2230,12 +2238,12 @@
     phys_sections_nb = 0;
 }
 
-static void register_subpage(MemoryRegionSection *section)
+static void register_subpage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 {
     subpage_t *subpage;
     target_phys_addr_t base = section->offset_within_address_space
         & TARGET_PAGE_MASK;
-    MemoryRegionSection *existing = phys_page_find(base >> TARGET_PAGE_BITS);
+    MemoryRegionSection *existing = phys_page_find(d, base >> TARGET_PAGE_BITS);
     MemoryRegionSection subsection = {
         .offset_within_address_space = base,
         .size = TARGET_PAGE_SIZE,
@@ -2247,7 +2255,7 @@
     if (!(existing->mr->subpage)) {
         subpage = subpage_init(base);
         subsection.mr = &subpage->iomem;
-        phys_page_set(base >> TARGET_PAGE_BITS, 1,
+        phys_page_set(d, base >> TARGET_PAGE_BITS, 1,
                       phys_section_add(&subsection));
     } else {
         subpage = container_of(existing->mr, subpage_t, iomem);
@@ -2258,7 +2266,7 @@
 }
 
 
-static void register_multipage(MemoryRegionSection *section)
+static void register_multipage(AddressSpaceDispatch *d, MemoryRegionSection *section)
 {
     target_phys_addr_t start_addr = section->offset_within_address_space;
     ram_addr_t size = section->size;
@@ -2268,13 +2276,13 @@
     assert(size);
 
     addr = start_addr;
-    phys_page_set(addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
+    phys_page_set(d, addr >> TARGET_PAGE_BITS, size >> TARGET_PAGE_BITS,
                   section_index);
 }
 
-void cpu_register_physical_memory_log(MemoryRegionSection *section,
-                                      bool readonly)
+static void mem_add(MemoryListener *listener, MemoryRegionSection *section)
 {
+    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
     MemoryRegionSection now = *section, remain = *section;
 
     if ((now.offset_within_address_space & ~TARGET_PAGE_MASK)
@@ -2282,7 +2290,7 @@
         now.size = MIN(TARGET_PAGE_ALIGN(now.offset_within_address_space)
                        - now.offset_within_address_space,
                        now.size);
-        register_subpage(&now);
+        register_subpage(d, &now);
         remain.size -= now.size;
         remain.offset_within_address_space += now.size;
         remain.offset_within_region += now.size;
@@ -2291,10 +2299,10 @@
         now = remain;
         if (remain.offset_within_region & ~TARGET_PAGE_MASK) {
             now.size = TARGET_PAGE_SIZE;
-            register_subpage(&now);
+            register_subpage(d, &now);
         } else {
             now.size &= TARGET_PAGE_MASK;
-            register_multipage(&now);
+            register_multipage(d, &now);
         }
         remain.size -= now.size;
         remain.offset_within_address_space += now.size;
@@ -2302,23 +2310,10 @@
     }
     now = remain;
     if (now.size) {
-        register_subpage(&now);
+        register_subpage(d, &now);
     }
 }
 
-
-void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
-{
-    if (kvm_enabled())
-        kvm_coalesce_mmio_region(addr, size);
-}
-
-void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size)
-{
-    if (kvm_enabled())
-        kvm_uncoalesce_mmio_region(addr, size);
-}
-
 void qemu_flush_coalesced_mmio_buffer(void)
 {
     if (kvm_enabled())
@@ -2454,7 +2449,7 @@
     return offset;
 }
 
-static ram_addr_t last_ram_offset(void)
+ram_addr_t last_ram_offset(void)
 {
     RAMBlock *block;
     ram_addr_t last = 0;
@@ -2576,6 +2571,7 @@
     cpu_physical_memory_set_dirty_range(new_block->offset, size, 0xff);
 
     qemu_ram_setup_dump(new_block->host, size);
+    qemu_madvise(new_block->host, size, QEMU_MADV_HUGEPAGE);
 
     if (kvm_enabled())
         kvm_setup_guest_memory(new_block->host, size);
@@ -3166,18 +3162,24 @@
                           "watch", UINT64_MAX);
 }
 
+static void mem_begin(MemoryListener *listener)
+{
+    AddressSpaceDispatch *d = container_of(listener, AddressSpaceDispatch, listener);
+
+    destroy_all_mappings(d);
+    d->phys_map.ptr = PHYS_MAP_NODE_NIL;
+}
+
 static void core_begin(MemoryListener *listener)
 {
-    destroy_all_mappings();
     phys_sections_clear();
-    phys_map.ptr = PHYS_MAP_NODE_NIL;
     phys_section_unassigned = dummy_section(&io_mem_unassigned);
     phys_section_notdirty = dummy_section(&io_mem_notdirty);
     phys_section_rom = dummy_section(&io_mem_rom);
     phys_section_watch = dummy_section(&io_mem_watch);
 }
 
-static void core_commit(MemoryListener *listener)
+static void tcg_commit(MemoryListener *listener)
 {
     CPUArchState *env;
 
@@ -3189,38 +3191,6 @@
     }
 }
 
-static void core_region_add(MemoryListener *listener,
-                            MemoryRegionSection *section)
-{
-    cpu_register_physical_memory_log(section, section->readonly);
-}
-
-static void core_region_del(MemoryListener *listener,
-                            MemoryRegionSection *section)
-{
-}
-
-static void core_region_nop(MemoryListener *listener,
-                            MemoryRegionSection *section)
-{
-    cpu_register_physical_memory_log(section, section->readonly);
-}
-
-static void core_log_start(MemoryListener *listener,
-                           MemoryRegionSection *section)
-{
-}
-
-static void core_log_stop(MemoryListener *listener,
-                          MemoryRegionSection *section)
-{
-}
-
-static void core_log_sync(MemoryListener *listener,
-                          MemoryRegionSection *section)
-{
-}
-
 static void core_log_global_start(MemoryListener *listener)
 {
     cpu_physical_memory_set_dirty_tracking(1);
@@ -3231,26 +3201,6 @@
     cpu_physical_memory_set_dirty_tracking(0);
 }
 
-static void core_eventfd_add(MemoryListener *listener,
-                             MemoryRegionSection *section,
-                             bool match_data, uint64_t data, EventNotifier *e)
-{
-}
-
-static void core_eventfd_del(MemoryListener *listener,
-                             MemoryRegionSection *section,
-                             bool match_data, uint64_t data, EventNotifier *e)
-{
-}
-
-static void io_begin(MemoryListener *listener)
-{
-}
-
-static void io_commit(MemoryListener *listener)
-{
-}
-
 static void io_region_add(MemoryListener *listener,
                           MemoryRegionSection *section)
 {
@@ -3269,90 +3219,63 @@
     isa_unassign_ioport(section->offset_within_address_space, section->size);
 }
 
-static void io_region_nop(MemoryListener *listener,
-                          MemoryRegionSection *section)
-{
-}
-
-static void io_log_start(MemoryListener *listener,
-                         MemoryRegionSection *section)
-{
-}
-
-static void io_log_stop(MemoryListener *listener,
-                        MemoryRegionSection *section)
-{
-}
-
-static void io_log_sync(MemoryListener *listener,
-                        MemoryRegionSection *section)
-{
-}
-
-static void io_log_global_start(MemoryListener *listener)
-{
-}
-
-static void io_log_global_stop(MemoryListener *listener)
-{
-}
-
-static void io_eventfd_add(MemoryListener *listener,
-                           MemoryRegionSection *section,
-                           bool match_data, uint64_t data, EventNotifier *e)
-{
-}
-
-static void io_eventfd_del(MemoryListener *listener,
-                           MemoryRegionSection *section,
-                           bool match_data, uint64_t data, EventNotifier *e)
-{
-}
-
 static MemoryListener core_memory_listener = {
     .begin = core_begin,
-    .commit = core_commit,
-    .region_add = core_region_add,
-    .region_del = core_region_del,
-    .region_nop = core_region_nop,
-    .log_start = core_log_start,
-    .log_stop = core_log_stop,
-    .log_sync = core_log_sync,
     .log_global_start = core_log_global_start,
     .log_global_stop = core_log_global_stop,
-    .eventfd_add = core_eventfd_add,
-    .eventfd_del = core_eventfd_del,
-    .priority = 0,
+    .priority = 1,
 };
 
 static MemoryListener io_memory_listener = {
-    .begin = io_begin,
-    .commit = io_commit,
     .region_add = io_region_add,
     .region_del = io_region_del,
-    .region_nop = io_region_nop,
-    .log_start = io_log_start,
-    .log_stop = io_log_stop,
-    .log_sync = io_log_sync,
-    .log_global_start = io_log_global_start,
-    .log_global_stop = io_log_global_stop,
-    .eventfd_add = io_eventfd_add,
-    .eventfd_del = io_eventfd_del,
     .priority = 0,
 };
 
+static MemoryListener tcg_memory_listener = {
+    .commit = tcg_commit,
+};
+
+void address_space_init_dispatch(AddressSpace *as)
+{
+    AddressSpaceDispatch *d = g_new(AddressSpaceDispatch, 1);
+
+    d->phys_map  = (PhysPageEntry) { .ptr = PHYS_MAP_NODE_NIL, .is_leaf = 0 };
+    d->listener = (MemoryListener) {
+        .begin = mem_begin,
+        .region_add = mem_add,
+        .region_nop = mem_add,
+        .priority = 0,
+    };
+    as->dispatch = d;
+    memory_listener_register(&d->listener, as);
+}
+
+void address_space_destroy_dispatch(AddressSpace *as)
+{
+    AddressSpaceDispatch *d = as->dispatch;
+
+    memory_listener_unregister(&d->listener);
+    destroy_l2_mapping(&d->phys_map, P_L2_LEVELS - 1);
+    g_free(d);
+    as->dispatch = NULL;
+}
+
 static void memory_map_init(void)
 {
     system_memory = g_malloc(sizeof(*system_memory));
     memory_region_init(system_memory, "system", INT64_MAX);
-    set_system_memory_map(system_memory);
+    address_space_init(&address_space_memory, system_memory);
+    address_space_memory.name = "memory";
 
     system_io = g_malloc(sizeof(*system_io));
     memory_region_init(system_io, "io", 65536);
-    set_system_io_map(system_io);
+    address_space_init(&address_space_io, system_io);
+    address_space_io.name = "I/O";
 
-    memory_listener_register(&core_memory_listener, system_memory);
-    memory_listener_register(&io_memory_listener, system_io);
+    memory_listener_register(&core_memory_listener, &address_space_memory);
+    memory_listener_register(&io_memory_listener, &address_space_io);
+    memory_listener_register(&tcg_memory_listener, &address_space_memory);
 }
 
 MemoryRegion *get_system_memory(void)
@@ -3422,9 +3345,10 @@
     xen_modified_memory(addr, length);
 }
 
-void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
-                            int len, int is_write)
+void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
+                      int len, bool is_write)
 {
+    AddressSpaceDispatch *d = as->dispatch;
     int l;
     uint8_t *ptr;
     uint32_t val;
@@ -3436,7 +3360,7 @@
         l = (page + TARGET_PAGE_SIZE) - addr;
         if (l > len)
             l = len;
-        section = phys_page_find(page >> TARGET_PAGE_BITS);
+        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
 
         if (is_write) {
             if (!memory_region_is_ram(section->mr)) {
@@ -3507,10 +3431,36 @@
     }
 }
 
+void address_space_write(AddressSpace *as, target_phys_addr_t addr,
+                         const uint8_t *buf, int len)
+{
+    address_space_rw(as, addr, (uint8_t *)buf, len, true);
+}
+
+/**
+ * address_space_read: read from an address space.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ */
+void address_space_read(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf, int len)
+{
+    address_space_rw(as, addr, buf, len, false);
+}
+
+
+void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf,
+                            int len, int is_write)
+{
+    return address_space_rw(&address_space_memory, addr, buf, len, is_write);
+}
+
 /* used for ROM loading : can write in RAM and ROM */
 void cpu_physical_memory_write_rom(target_phys_addr_t addr,
                                    const uint8_t *buf, int len)
 {
+    AddressSpaceDispatch *d = address_space_memory.dispatch;
     int l;
     uint8_t *ptr;
     target_phys_addr_t page;
@@ -3521,7 +3471,7 @@
         l = (page + TARGET_PAGE_SIZE) - addr;
         if (l > len)
             l = len;
-        section = phys_page_find(page >> TARGET_PAGE_BITS);
+        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
 
         if (!(memory_region_is_ram(section->mr) ||
               memory_region_is_romd(section->mr))) {
@@ -3595,10 +3545,12 @@
  * Use cpu_register_map_client() to know when retrying the map operation is
  * likely to succeed.
  */
-void *cpu_physical_memory_map(target_phys_addr_t addr,
-                              target_phys_addr_t *plen,
-                              int is_write)
+void *address_space_map(AddressSpace *as,
+                        target_phys_addr_t addr,
+                        target_phys_addr_t *plen,
+                        bool is_write)
 {
+    AddressSpaceDispatch *d = as->dispatch;
     target_phys_addr_t len = *plen;
     target_phys_addr_t todo = 0;
     int l;
@@ -3613,7 +3565,7 @@
         l = (page + TARGET_PAGE_SIZE) - addr;
         if (l > len)
             l = len;
-        section = phys_page_find(page >> TARGET_PAGE_BITS);
+        section = phys_page_find(d, page >> TARGET_PAGE_BITS);
 
         if (!(memory_region_is_ram(section->mr) && !section->readonly)) {
             if (todo || bounce.buffer) {
@@ -3623,7 +3575,7 @@
             bounce.addr = addr;
             bounce.len = l;
             if (!is_write) {
-                cpu_physical_memory_read(addr, bounce.buffer, l);
+                address_space_read(as, addr, bounce.buffer, l);
             }
 
             *plen = l;
@@ -3644,12 +3596,12 @@
     return ret;
 }
 
-/* Unmaps a memory region previously mapped by cpu_physical_memory_map().
+/* Unmaps a memory region previously mapped by address_space_map().
  * Will also mark the memory as dirty if is_write == 1.  access_len gives
  * the amount of memory that was actually read or written by the caller.
  */
-void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
-                               int is_write, target_phys_addr_t access_len)
+void address_space_unmap(AddressSpace *as, void *buffer, target_phys_addr_t len,
+                         int is_write, target_phys_addr_t access_len)
 {
     if (buffer != bounce.buffer) {
         if (is_write) {
@@ -3670,13 +3622,26 @@
         return;
     }
     if (is_write) {
-        cpu_physical_memory_write(bounce.addr, bounce.buffer, access_len);
+        address_space_write(as, bounce.addr, bounce.buffer, access_len);
     }
     qemu_vfree(bounce.buffer);
     bounce.buffer = NULL;
     cpu_notify_map_clients();
 }
 
+void *cpu_physical_memory_map(target_phys_addr_t addr,
+                              target_phys_addr_t *plen,
+                              int is_write)
+{
+    return address_space_map(&address_space_memory, addr, plen, is_write);
+}
+
+void cpu_physical_memory_unmap(void *buffer, target_phys_addr_t len,
+                               int is_write, target_phys_addr_t access_len)
+{
+    return address_space_unmap(&address_space_memory, buffer, len, is_write, access_len);
+}
+
 /* warning: addr must be aligned */
 static inline uint32_t ldl_phys_internal(target_phys_addr_t addr,
                                          enum device_endian endian)
@@ -3685,7 +3650,7 @@
     uint32_t val;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!(memory_region_is_ram(section->mr) ||
           memory_region_is_romd(section->mr))) {
@@ -3744,7 +3709,7 @@
     uint64_t val;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!(memory_region_is_ram(section->mr) ||
           memory_region_is_romd(section->mr))) {
@@ -3811,7 +3776,7 @@
     uint64_t val;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!(memory_region_is_ram(section->mr) ||
           memory_region_is_romd(section->mr))) {
@@ -3870,7 +3835,7 @@
     uint8_t *ptr;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         addr = memory_region_section_addr(section, addr);
@@ -3902,7 +3867,7 @@
     uint8_t *ptr;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         addr = memory_region_section_addr(section, addr);
@@ -3931,7 +3896,7 @@
     uint8_t *ptr;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         addr = memory_region_section_addr(section, addr);
@@ -3998,7 +3963,7 @@
     uint8_t *ptr;
     MemoryRegionSection *section;
 
-    section = phys_page_find(addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch, addr >> TARGET_PAGE_BITS);
 
     if (!memory_region_is_ram(section->mr) || section->readonly) {
         addr = memory_region_section_addr(section, addr);
@@ -4188,7 +4153,7 @@
     }
     /* XXX: avoid using doubles ? */
     cpu_fprintf(f, "Translation buffer state:\n");
-    cpu_fprintf(f, "gen code size       %td/%ld\n",
+    cpu_fprintf(f, "gen code size       %td/%zd\n",
                 code_gen_ptr - code_gen_buffer, code_gen_buffer_max_size);
     cpu_fprintf(f, "TB count            %d/%d\n", 
                 nb_tbs, code_gen_max_blocks);
@@ -4234,7 +4199,8 @@
 {
     MemoryRegionSection *section;
 
-    section = phys_page_find(phys_addr >> TARGET_PAGE_BITS);
+    section = phys_page_find(address_space_memory.dispatch,
+                             phys_addr >> TARGET_PAGE_BITS);
 
     return !(memory_region_is_ram(section->mr) ||
              memory_region_is_romd(section->mr));
diff --git a/hmp.c b/hmp.c
index 70bdec2..2b97982 100644
--- a/hmp.c
+++ b/hmp.c
@@ -152,6 +152,14 @@
         monitor_printf(mon, "Migration status: %s\n", info->status);
         monitor_printf(mon, "total time: %" PRIu64 " milliseconds\n",
                        info->total_time);
+        if (info->has_expected_downtime) {
+            monitor_printf(mon, "expected downtime: %" PRIu64 " milliseconds\n",
+                           info->expected_downtime);
+        }
+        if (info->has_downtime) {
+            monitor_printf(mon, "downtime: %" PRIu64 " milliseconds\n",
+                           info->downtime);
+        }
     }
 
     if (info->has_ram) {
@@ -167,6 +175,10 @@
                        info->ram->normal);
         monitor_printf(mon, "normal bytes: %" PRIu64 " kbytes\n",
                        info->ram->normal_bytes >> 10);
+        if (info->ram->dirty_pages_rate) {
+            monitor_printf(mon, "dirty pages rate: %" PRIu64 " pages\n",
+                           info->ram->dirty_pages_rate);
+        }
     }
 
     if (info->has_disk) {
diff --git a/hw/Makefile.objs b/hw/Makefile.objs
index 854faa9..af4ab0c 100644
--- a/hw/Makefile.objs
+++ b/hw/Makefile.objs
@@ -20,7 +20,8 @@
 common-obj-$(CONFIG_ESCC) += escc.o
 common-obj-$(CONFIG_EMPTY_SLOT) += empty_slot.o
 
-common-obj-$(CONFIG_SERIAL) += serial.o
+common-obj-$(CONFIG_SERIAL) += serial.o serial-isa.o
+common-obj-$(CONFIG_SERIAL_PCI) += serial-pci.o
 common-obj-$(CONFIG_PARALLEL) += parallel.o
 common-obj-$(CONFIG_I8254) += i8254_common.o i8254.o
 common-obj-$(CONFIG_PCSPK) += pcspk.o
diff --git a/hw/alpha_dp264.c b/hw/alpha_dp264.c
index 5ea04c7..76d8ae8 100644
--- a/hw/alpha_dp264.c
+++ b/hw/alpha_dp264.c
@@ -15,6 +15,7 @@
 #include "mc146818rtc.h"
 #include "ide.h"
 #include "i8254.h"
+#include "serial.h"
 
 #define MAX_IDE_BUS 2
 
@@ -42,13 +43,13 @@
     return (slot + 1) * 4 + irq_num;
 }
 
-static void clipper_init(ram_addr_t ram_size,
-                         const char *boot_device,
-                         const char *kernel_filename,
-                         const char *kernel_cmdline,
-                         const char *initrd_filename,
-                         const char *cpu_model)
+static void clipper_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     CPUAlphaState *cpus[4];
     PCIBus *pci_bus;
     ISABus *isa_bus;
diff --git a/hw/an5206.c b/hw/an5206.c
index 25407c0..042c5fc 100644
--- a/hw/an5206.c
+++ b/hw/an5206.c
@@ -19,11 +19,11 @@
 
 /* Board init.  */
 
-static void an5206_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void an5206_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     CPUM68KState *env;
     int kernel_size;
     uint64_t elf_entry;
diff --git a/hw/axis_dev88.c b/hw/axis_dev88.c
index eab6327..2fd7356 100644
--- a/hw/axis_dev88.c
+++ b/hw/axis_dev88.c
@@ -242,11 +242,12 @@
 static struct cris_load_info li;
 
 static
-void axisdev88_init (ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+void axisdev88_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
     CRISCPU *cpu;
     CPUCRISState *env;
     DeviceState *dev;
diff --git a/hw/boards.h b/hw/boards.h
index a2e0a54..813d0e5 100644
--- a/hw/boards.h
+++ b/hw/boards.h
@@ -5,12 +5,16 @@
 
 #include "qdev.h"
 
-typedef void QEMUMachineInitFunc(ram_addr_t ram_size,
-                                 const char *boot_device,
-                                 const char *kernel_filename,
-                                 const char *kernel_cmdline,
-                                 const char *initrd_filename,
-                                 const char *cpu_model);
+typedef struct QEMUMachineInitArgs {
+    ram_addr_t ram_size;
+    const char *boot_device;
+    const char *kernel_filename;
+    const char *kernel_cmdline;
+    const char *initrd_filename;
+    const char *cpu_model;
+} QEMUMachineInitArgs;
+
+typedef void QEMUMachineInitFunc(QEMUMachineInitArgs *args);
 
 typedef void QEMUMachineResetFunc(void);
 
diff --git a/hw/collie.c b/hw/collie.c
index 56f89a9..695982a 100644
--- a/hw/collie.c
+++ b/hw/collie.c
@@ -23,11 +23,12 @@
     .ram_size = 0x20000000,
 };
 
-static void collie_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void collie_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     StrongARMState *s;
     DriveInfo *dinfo;
     MemoryRegion *sysmem = get_system_memory();
diff --git a/hw/dummy_m68k.c b/hw/dummy_m68k.c
index 7cc7a99..f436a0c 100644
--- a/hw/dummy_m68k.c
+++ b/hw/dummy_m68k.c
@@ -16,11 +16,11 @@
 
 /* Board init.  */
 
-static void dummy_m68k_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void dummy_m68k_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     CPUM68KState *env;
     MemoryRegion *address_space_mem =  get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/exynos4_boards.c b/hw/exynos4_boards.c
index 4bb0a60..4951064 100644
--- a/hw/exynos4_boards.c
+++ b/hw/exynos4_boards.c
@@ -130,22 +130,22 @@
             exynos4_board_ram_size[board_type]);
 }
 
-static void nuri_init(ram_addr_t ram_size,
-        const char *boot_device,
-        const char *kernel_filename, const char *kernel_cmdline,
-        const char *initrd_filename, const char *cpu_model)
+static void nuri_init(QEMUMachineInitArgs *args)
 {
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     exynos4_boards_init_common(kernel_filename, kernel_cmdline,
                 initrd_filename, EXYNOS4_BOARD_NURI);
 
     arm_load_kernel(arm_env_get_cpu(first_cpu), &exynos4_board_binfo);
 }
 
-static void smdkc210_init(ram_addr_t ram_size,
-        const char *boot_device,
-        const char *kernel_filename, const char *kernel_cmdline,
-        const char *initrd_filename, const char *cpu_model)
+static void smdkc210_init(QEMUMachineInitArgs *args)
 {
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     Exynos4210State *s = exynos4_boards_init_common(kernel_filename,
             kernel_cmdline, initrd_filename, EXYNOS4_BOARD_SMDKC210);
 
diff --git a/hw/gumstix.c b/hw/gumstix.c
index 13a36ea..4103a88 100644
--- a/hw/gumstix.c
+++ b/hw/gumstix.c
@@ -45,10 +45,7 @@
 
 static const int sector_len = 128 * 1024;
 
-static void connex_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void connex_init(QEMUMachineInitArgs *args)
 {
     PXA2xxState *cpu;
     DriveInfo *dinfo;
@@ -84,11 +81,9 @@
                     qdev_get_gpio_in(cpu->gpio, 36));
 }
 
-static void verdex_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void verdex_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
     PXA2xxState *cpu;
     DriveInfo *dinfo;
     int be;
diff --git a/hw/highbank.c b/hw/highbank.c
index 11aa131..15036b6 100644
--- a/hw/highbank.c
+++ b/hw/highbank.c
@@ -187,11 +187,13 @@
  * 32-bit host, set the reg value of memory to 0xf7ff00000 in the
  * device tree and pass -m 2047 to QEMU.
  */
-static void highbank_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void highbank_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     DeviceState *dev;
     SysBusDevice *busdev;
     qemu_irq *irqp;
diff --git a/hw/hw.h b/hw/hw.h
index 16101de..b337ee3 100644
--- a/hw/hw.h
+++ b/hw/hw.h
@@ -12,6 +12,7 @@
 #include "irq.h"
 #include "qemu-file.h"
 #include "vmstate.h"
+#include "qemu-log.h"
 
 #ifdef NEED_CPU_H
 #if TARGET_LONG_BITS == 64
diff --git a/hw/integratorcp.c b/hw/integratorcp.c
index d0e2e90..ac0ea83 100644
--- a/hw/integratorcp.c
+++ b/hw/integratorcp.c
@@ -438,11 +438,13 @@
     .board_id = 0x113,
 };
 
-static void integratorcp_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void integratorcp_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     ARMCPU *cpu;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/kzm.c b/hw/kzm.c
index 68cd1b4..687daf3 100644
--- a/hw/kzm.c
+++ b/hw/kzm.c
@@ -21,7 +21,7 @@
 #include "net.h"
 #include "sysemu.h"
 #include "boards.h"
-#include "pc.h" /* for the FPGA UART that emulates a 16550 */
+#include "serial.h"
 #include "imx.h"
 
     /* Memory map for Kzm Emulation Baseboard:
@@ -70,11 +70,13 @@
     .board_id = 1722,
 };
 
-static void kzm_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void kzm_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     ARMCPU *cpu;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/leon3.c b/hw/leon3.c
index 7a9729d..7742738 100644
--- a/hw/leon3.c
+++ b/hw/leon3.c
@@ -94,13 +94,11 @@
     }
 }
 
-static void leon3_generic_hw_init(ram_addr_t  ram_size,
-                                  const char *boot_device,
-                                  const char *kernel_filename,
-                                  const char *kernel_cmdline,
-                                  const char *initrd_filename,
-                                  const char *cpu_model)
+static void leon3_generic_hw_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     SPARCCPU *cpu;
     CPUSPARCState   *env;
     MemoryRegion *address_space_mem = get_system_memory();
diff --git a/hw/lm32_boards.c b/hw/lm32_boards.c
index b76d800..c5a62c8 100644
--- a/hw/lm32_boards.c
+++ b/hw/lm32_boards.c
@@ -69,12 +69,10 @@
     env->deba = reset_info->flash_base;
 }
 
-static void lm32_evr_init(ram_addr_t ram_size_not_used,
-                          const char *boot_device,
-                          const char *kernel_filename,
-                          const char *kernel_cmdline,
-                          const char *initrd_filename, const char *cpu_model)
+static void lm32_evr_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     LM32CPU *cpu;
     CPULM32State *env;
     DriveInfo *dinfo;
@@ -159,12 +157,12 @@
     qemu_register_reset(main_cpu_reset, reset_info);
 }
 
-static void lm32_uclinux_init(ram_addr_t ram_size_not_used,
-                          const char *boot_device,
-                          const char *kernel_filename,
-                          const char *kernel_cmdline,
-                          const char *initrd_filename, const char *cpu_model)
+static void lm32_uclinux_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     LM32CPU *cpu;
     CPULM32State *env;
     DriveInfo *dinfo;
diff --git a/hw/mainstone.c b/hw/mainstone.c
index 97687b6..c0d6034 100644
--- a/hw/mainstone.c
+++ b/hw/mainstone.c
@@ -171,11 +171,13 @@
     arm_load_kernel(mpu->cpu, &mainstone_binfo);
 }
 
-static void mainstone_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void mainstone_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     mainstone_common_init(get_system_memory(), ram_size, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, mainstone, 0x196);
 }
diff --git a/hw/mcf5208.c b/hw/mcf5208.c
index ee25b1b..688bc3c 100644
--- a/hw/mcf5208.c
+++ b/hw/mcf5208.c
@@ -187,11 +187,11 @@
     }
 }
 
-static void mcf5208evb_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void mcf5208evb_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     CPUM68KState *env;
     int kernel_size;
     uint64_t elf_entry;
diff --git a/hw/milkymist.c b/hw/milkymist.c
index 2e7235b..ca9ed43 100644
--- a/hw/milkymist.c
+++ b/hw/milkymist.c
@@ -73,12 +73,12 @@
 }
 
 static void
-milkymist_init(ram_addr_t ram_size_not_used,
-                          const char *boot_device,
-                          const char *kernel_filename,
-                          const char *kernel_cmdline,
-                          const char *initrd_filename, const char *cpu_model)
+milkymist_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     LM32CPU *cpu;
     CPULM32State *env;
     int kernel_size;
diff --git a/hw/mips_fulong2e.c b/hw/mips_fulong2e.c
index d4a8672..5fcf900 100644
--- a/hw/mips_fulong2e.c
+++ b/hw/mips_fulong2e.c
@@ -20,6 +20,7 @@
 
 #include "hw.h"
 #include "pc.h"
+#include "serial.h"
 #include "fdc.h"
 #include "net.h"
 #include "boards.h"
@@ -256,10 +257,13 @@
     }
 }
 
-static void mips_fulong2e_init(ram_addr_t ram_size, const char *boot_device,
-                        const char *kernel_filename, const char *kernel_cmdline,
-                        const char *initrd_filename, const char *cpu_model)
+static void mips_fulong2e_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/mips_jazz.c b/hw/mips_jazz.c
index db927f1..6bd231d 100644
--- a/hw/mips_jazz.c
+++ b/hw/mips_jazz.c
@@ -26,6 +26,7 @@
 #include "mips.h"
 #include "mips_cpudevs.h"
 #include "pc.h"
+#include "serial.h"
 #include "isa.h"
 #include "fdc.h"
 #include "sysemu.h"
@@ -302,21 +303,19 @@
 }
 
 static
-void mips_magnum_init (ram_addr_t ram_size,
-                       const char *boot_device,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
+void mips_magnum_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
         mips_jazz_init(get_system_memory(), get_system_io(),
                        ram_size, cpu_model, JAZZ_MAGNUM);
 }
 
 static
-void mips_pica61_init (ram_addr_t ram_size,
-                       const char *boot_device,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
+void mips_pica61_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
     mips_jazz_init(get_system_memory(), get_system_io(),
                    ram_size, cpu_model, JAZZ_PICA61);
 }
diff --git a/hw/mips_malta.c b/hw/mips_malta.c
index 632b466..22ec8b9 100644
--- a/hw/mips_malta.c
+++ b/hw/mips_malta.c
@@ -24,6 +24,7 @@
 
 #include "hw.h"
 #include "pc.h"
+#include "serial.h"
 #include "fdc.h"
 #include "net.h"
 #include "boards.h"
@@ -775,11 +776,13 @@
 }
 
 static
-void mips_malta_init (ram_addr_t ram_size,
-                      const char *boot_device,
-                      const char *kernel_filename, const char *kernel_cmdline,
-                      const char *initrd_filename, const char *cpu_model)
+void mips_malta_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     pflash_t *fl;
     MemoryRegion *system_memory = get_system_memory();
diff --git a/hw/mips_mipssim.c b/hw/mips_mipssim.c
index 830f635..a95a3c1 100644
--- a/hw/mips_mipssim.c
+++ b/hw/mips_mipssim.c
@@ -27,7 +27,7 @@
 #include "hw.h"
 #include "mips.h"
 #include "mips_cpudevs.h"
-#include "pc.h"
+#include "serial.h"
 #include "isa.h"
 #include "net.h"
 #include "sysemu.h"
@@ -131,11 +131,13 @@
 }
 
 static void
-mips_mipssim_init (ram_addr_t ram_size,
-                   const char *boot_device,
-                   const char *kernel_filename, const char *kernel_cmdline,
-                   const char *initrd_filename, const char *cpu_model)
+mips_mipssim_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/mips_r4k.c b/hw/mips_r4k.c
index 967a76e..539577b 100644
--- a/hw/mips_r4k.c
+++ b/hw/mips_r4k.c
@@ -11,6 +11,7 @@
 #include "mips.h"
 #include "mips_cpudevs.h"
 #include "pc.h"
+#include "serial.h"
 #include "isa.h"
 #include "net.h"
 #include "sysemu.h"
@@ -151,11 +152,13 @@
 
 static const int sector_len = 32 * 1024;
 static
-void mips_r4k_init (ram_addr_t ram_size,
-                    const char *boot_device,
-                    const char *kernel_filename, const char *kernel_cmdline,
-                    const char *initrd_filename, const char *cpu_model)
+void mips_r4k_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/musicpal.c b/hw/musicpal.c
index f305e21..159d3c3 100644
--- a/hw/musicpal.c
+++ b/hw/musicpal.c
@@ -15,7 +15,7 @@
 #include "net.h"
 #include "sysemu.h"
 #include "boards.h"
-#include "pc.h"
+#include "serial.h"
 #include "qemu-timer.h"
 #include "ptimer.h"
 #include "block.h"
@@ -1508,11 +1508,12 @@
     .board_id = 0x20e,
 };
 
-static void musicpal_init(ram_addr_t ram_size,
-               const char *boot_device,
-               const char *kernel_filename, const char *kernel_cmdline,
-               const char *initrd_filename, const char *cpu_model)
+static void musicpal_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     ARMCPU *cpu;
     qemu_irq *cpu_pic;
     qemu_irq pic[32];
diff --git a/hw/nseries.c b/hw/nseries.c
index 6df71eb..7ada90d 100644
--- a/hw/nseries.c
+++ b/hw/nseries.c
@@ -1397,21 +1397,27 @@
     .atag_board = n810_atag_setup,
 };
 
-static void n800_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void n800_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     return n8x0_init(ram_size, boot_device,
                     kernel_filename, kernel_cmdline, initrd_filename,
                     cpu_model, &n800_binfo, 800);
 }
 
-static void n810_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void n810_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     return n8x0_init(ram_size, boot_device,
                     kernel_filename, kernel_cmdline, initrd_filename,
                     cpu_model, &n810_binfo, 810);
diff --git a/hw/null-machine.c b/hw/null-machine.c
index 69910d3..d813c08 100644
--- a/hw/null-machine.c
+++ b/hw/null-machine.c
@@ -15,12 +15,7 @@
 #include "hw/hw.h"
 #include "hw/boards.h"
 
-static void machine_none_init(ram_addr_t ram_size,
-                              const char *boot_device,
-                              const char *kernel_filename,
-                              const char *kernel_cmdline,
-                              const char *initrd_filename,
-                              const char *cpu_model)
+static void machine_none_init(QEMUMachineInitArgs *args)
 {
 }
 
diff --git a/hw/omap_sx1.c b/hw/omap_sx1.c
index abca341..ad17487 100644
--- a/hw/omap_sx1.c
+++ b/hw/omap_sx1.c
@@ -209,20 +209,26 @@
     //~ qemu_console_resize(ds, 640, 480);
 }
 
-static void sx1_init_v1(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void sx1_init_v1(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sx1_init(ram_size, boot_device, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, 1);
 }
 
-static void sx1_init_v2(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void sx1_init_v2(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sx1_init(ram_size, boot_device, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, 2);
 }
diff --git a/hw/omap_uart.c b/hw/omap_uart.c
index 167d5c4..1c16a54 100644
--- a/hw/omap_uart.c
+++ b/hw/omap_uart.c
@@ -20,8 +20,7 @@
 #include "qemu-char.h"
 #include "hw.h"
 #include "omap.h"
-/* We use pc-style serial ports.  */
-#include "pc.h"
+#include "serial.h"
 #include "exec-memory.h"
 
 /* UARTs */
diff --git a/hw/openrisc_sim.c b/hw/openrisc_sim.c
index 55e97f0..7327740 100644
--- a/hw/openrisc_sim.c
+++ b/hw/openrisc_sim.c
@@ -21,7 +21,8 @@
 #include "hw.h"
 #include "boards.h"
 #include "elf.h"
-#include "pc.h"
+#include "serial.h"
+#include "net.h"
 #include "loader.h"
 #include "exec-memory.h"
 #include "sysemu.h"
@@ -90,13 +91,11 @@
     cpu->env.pc = entry;
 }
 
-static void openrisc_sim_init(ram_addr_t ram_size,
-                              const char *boot_device,
-                              const char *kernel_filename,
-                              const char *kernel_cmdline,
-                              const char *initrd_filename,
-                              const char *cpu_model)
+static void openrisc_sim_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
    OpenRISCCPU *cpu = NULL;
     MemoryRegion *ram;
     int n;
diff --git a/hw/palm.c b/hw/palm.c
index bacdc90..032b8d6 100644
--- a/hw/palm.c
+++ b/hw/palm.c
@@ -190,11 +190,12 @@
     .board_id = 0x331,
 };
 
-static void palmte_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void palmte_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     MemoryRegion *address_space_mem = get_system_memory();
     struct omap_mpu_state_s *mpu;
     int flash_size = 0x00800000;
diff --git a/hw/pc.c b/hw/pc.c
index 6c0722d..805e8ca 100644
--- a/hw/pc.c
+++ b/hw/pc.c
@@ -23,6 +23,7 @@
  */
 #include "hw.h"
 #include "pc.h"
+#include "serial.h"
 #include "apic.h"
 #include "fdc.h"
 #include "ide.h"
diff --git a/hw/pc.h b/hw/pc.h
index 9923d96..6cba7ce 100644
--- a/hw/pc.h
+++ b/hw/pc.h
@@ -12,33 +12,6 @@
 
 /* PC-style peripherals (also used by other machines).  */
 
-/* serial.c */
-
-SerialState *serial_init(int base, qemu_irq irq, int baudbase,
-                         CharDriverState *chr);
-SerialState *serial_mm_init(MemoryRegion *address_space,
-                            target_phys_addr_t base, int it_shift,
-                            qemu_irq irq, int baudbase,
-                            CharDriverState *chr, enum device_endian);
-static inline bool serial_isa_init(ISABus *bus, int index,
-                                   CharDriverState *chr)
-{
-    ISADevice *dev;
-
-    dev = isa_try_create(bus, "isa-serial");
-    if (!dev) {
-        return false;
-    }
-    qdev_prop_set_uint32(&dev->qdev, "index", index);
-    qdev_prop_set_chr(&dev->qdev, "chardev", chr);
-    if (qdev_init(&dev->qdev) < 0) {
-        return false;
-    }
-    return true;
-}
-
-void serial_set_frequency(SerialState *s, uint32_t frequency);
-
 /* parallel.c */
 static inline bool parallel_init(ISABus *bus, int index, CharDriverState *chr)
 {
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index 82364ab..bf04a42 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -287,13 +287,14 @@
     }
 }
 
-static void pc_init_pci(ram_addr_t ram_size,
-                        const char *boot_device,
-                        const char *kernel_filename,
-                        const char *kernel_cmdline,
-                        const char *initrd_filename,
-                        const char *cpu_model)
+static void pc_init_pci(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     pc_init1(get_system_memory(),
              get_system_io(),
              ram_size, boot_device,
@@ -301,13 +302,14 @@
              initrd_filename, cpu_model, 1, 1);
 }
 
-static void pc_init_pci_no_kvmclock(ram_addr_t ram_size,
-                                    const char *boot_device,
-                                    const char *kernel_filename,
-                                    const char *kernel_cmdline,
-                                    const char *initrd_filename,
-                                    const char *cpu_model)
+static void pc_init_pci_no_kvmclock(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     pc_init1(get_system_memory(),
              get_system_io(),
              ram_size, boot_device,
@@ -315,13 +317,14 @@
              initrd_filename, cpu_model, 1, 0);
 }
 
-static void pc_init_isa(ram_addr_t ram_size,
-                        const char *boot_device,
-                        const char *kernel_filename,
-                        const char *kernel_cmdline,
-                        const char *initrd_filename,
-                        const char *cpu_model)
+static void pc_init_isa(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (cpu_model == NULL)
         cpu_model = "486";
     pc_init1(get_system_memory(),
@@ -332,19 +335,12 @@
 }
 
 #ifdef CONFIG_XEN
-static void pc_xen_hvm_init(ram_addr_t ram_size,
-                            const char *boot_device,
-                            const char *kernel_filename,
-                            const char *kernel_cmdline,
-                            const char *initrd_filename,
-                            const char *cpu_model)
+static void pc_xen_hvm_init(QEMUMachineInitArgs *args)
 {
     if (xen_hvm_init() != 0) {
         hw_error("xen hardware virtual machine initialisation failed");
     }
-    pc_init_pci_no_kvmclock(ram_size, boot_device,
-                            kernel_filename, kernel_cmdline,
-                            initrd_filename, cpu_model);
+    pc_init_pci_no_kvmclock(args);
     xen_vcpu_init();
 }
 #endif
@@ -379,6 +375,10 @@
             .driver   = "qxl-vga",\
             .property = "revision",\
             .value    = stringify(3),\
+        },{\
+            .driver   = "VGA",\
+            .property = "mmio",\
+            .value    = "off",\
         }
 
 static QEMUMachine pc_machine_v1_2 = {
diff --git a/hw/pci.c b/hw/pci.c
index 2ca6ff6..7eeaac0 100644
--- a/hw/pci.c
+++ b/hw/pci.c
@@ -33,6 +33,7 @@
 #include "qmp-commands.h"
 #include "msi.h"
 #include "msix.h"
+#include "exec-memory.h"
 
 //#define DEBUG_PCI
 #ifdef DEBUG_PCI
@@ -777,6 +778,17 @@
     pci_dev->bus = bus;
     if (bus->dma_context_fn) {
         pci_dev->dma = bus->dma_context_fn(bus, bus->dma_context_opaque, devfn);
+    } else {
+        /* FIXME: Make dma_context_fn use MemoryRegions instead, so this path is
+         * taken unconditionally */
+        /* FIXME: inherit memory region from bus creator */
+        memory_region_init_alias(&pci_dev->bus_master_enable_region, "bus master",
+                                 get_system_memory(), 0,
+                                 memory_region_size(get_system_memory()));
+        memory_region_set_enabled(&pci_dev->bus_master_enable_region, false);
+        address_space_init(&pci_dev->bus_master_as, &pci_dev->bus_master_enable_region);
+        pci_dev->dma = g_new(DMAContext, 1);
+        dma_context_init(pci_dev->dma, &pci_dev->bus_master_as, NULL, NULL, NULL);
     }
     pci_dev->devfn = devfn;
     pstrcpy(pci_dev->name, sizeof(pci_dev->name), name);
@@ -830,6 +842,13 @@
     qemu_free_irqs(pci_dev->irq);
     pci_dev->bus->devices[pci_dev->devfn] = NULL;
     pci_config_free(pci_dev);
+
+    if (!pci_dev->bus->dma_context_fn) {
+        address_space_destroy(&pci_dev->bus_master_as);
+        memory_region_destroy(&pci_dev->bus_master_enable_region);
+        g_free(pci_dev->dma);
+        pci_dev->dma = NULL;
+    }
 }
 
 static void pci_unregister_io_regions(PCIDevice *pci_dev)
@@ -1051,8 +1070,12 @@
         range_covers_byte(addr, l, PCI_COMMAND))
         pci_update_mappings(d);
 
-    if (range_covers_byte(addr, l, PCI_COMMAND))
+    if (range_covers_byte(addr, l, PCI_COMMAND)) {
         pci_update_irq_disabled(d, was_irq_disabled);
+        memory_region_set_enabled(&d->bus_master_enable_region,
+                                  pci_get_word(d->config + PCI_COMMAND)
+                                    & PCI_COMMAND_MASTER);
+    }
 
     msi_write_config(d, addr, val, l);
     msix_write_config(d, addr, val, l);
diff --git a/hw/pci.h b/hw/pci.h
index d50d26c..1f902f5 100644
--- a/hw/pci.h
+++ b/hw/pci.h
@@ -211,6 +211,8 @@
     int32_t devfn;
     char name[64];
     PCIIORegion io_regions[PCI_NUM_REGIONS];
+    AddressSpace bus_master_as;
+    MemoryRegion bus_master_enable_region;
     DMAContext *dma;
 
     /* do not access the following fields */
diff --git a/hw/pci_ids.h b/hw/pci_ids.h
index 301bf1c..c017a79 100644
--- a/hw/pci_ids.h
+++ b/hw/pci_ids.h
@@ -37,6 +37,7 @@
 #define PCI_CLASS_BRIDGE_PCI             0x0604
 #define PCI_CLASS_BRIDGE_OTHER           0x0680
 
+#define PCI_CLASS_COMMUNICATION_SERIAL   0x0700
 #define PCI_CLASS_COMMUNICATION_OTHER    0x0780
 
 #define PCI_CLASS_PROCESSOR_CO           0x0b40
diff --git a/hw/petalogix_ml605_mmu.c b/hw/petalogix_ml605_mmu.c
index b9bfbed..5b45809 100644
--- a/hw/petalogix_ml605_mmu.c
+++ b/hw/petalogix_ml605_mmu.c
@@ -34,7 +34,7 @@
 #include "boards.h"
 #include "xilinx.h"
 #include "blockdev.h"
-#include "pc.h"
+#include "serial.h"
 #include "exec-memory.h"
 #include "ssi.h"
 
@@ -73,12 +73,10 @@
 }
 
 static void
-petalogix_ml605_init(ram_addr_t ram_size,
-                          const char *boot_device,
-                          const char *kernel_filename,
-                          const char *kernel_cmdline,
-                          const char *initrd_filename, const char *cpu_model)
+petalogix_ml605_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
     MemoryRegion *address_space_mem = get_system_memory();
     DeviceState *dev, *dma, *eth0;
     MicroBlazeCPU *cpu;
diff --git a/hw/petalogix_s3adsp1800_mmu.c b/hw/petalogix_s3adsp1800_mmu.c
index 2cf6882..71c32ce 100644
--- a/hw/petalogix_s3adsp1800_mmu.c
+++ b/hw/petalogix_s3adsp1800_mmu.c
@@ -57,12 +57,10 @@
 }
 
 static void
-petalogix_s3adsp1800_init(ram_addr_t ram_size,
-                          const char *boot_device,
-                          const char *kernel_filename,
-                          const char *kernel_cmdline,
-                          const char *initrd_filename, const char *cpu_model)
+petalogix_s3adsp1800_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
     DeviceState *dev;
     MicroBlazeCPU *cpu;
     CPUMBState *env;
diff --git a/hw/pl011.c b/hw/pl011.c
index 3245702..fb22736 100644
--- a/hw/pl011.c
+++ b/hw/pl011.c
@@ -107,7 +107,8 @@
     case 18: /* UARTDMACR */
         return s->dmacr;
     default:
-        hw_error("pl011_read: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl011_read: Bad offset %x\n", (int)offset);
         return 0;
     }
 }
@@ -178,11 +179,13 @@
         break;
     case 18: /* UARTDMACR */
         s->dmacr = value;
-        if (value & 3)
-            hw_error("PL011: DMA not implemented\n");
+        if (value & 3) {
+            qemu_log_mask(LOG_UNIMP, "pl011: DMA not implemented\n");
+        }
         break;
     default:
-        hw_error("pl011_write: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl011_write: Bad offset %x\n", (int)offset);
     }
 }
 
diff --git a/hw/pl022.c b/hw/pl022.c
index 60e35da..e2ae315 100644
--- a/hw/pl022.c
+++ b/hw/pl022.c
@@ -168,7 +168,8 @@
         /* Not implemented.  */
         return 0;
     default:
-        hw_error("pl022_read: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl022_read: Bad offset %x\n", (int)offset);
         return 0;
     }
 }
@@ -211,11 +212,12 @@
         break;
     case 0x20: /* DMACR */
         if (value) {
-            hw_error("pl022: DMA not implemented\n");
+            qemu_log_mask(LOG_UNIMP, "pl022: DMA not implemented\n");
         }
         break;
     default:
-        hw_error("pl022_write: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl022_write: Bad offset %x\n", (int)offset);
     }
 }
 
diff --git a/hw/pl031.c b/hw/pl031.c
index 9602664..6cbaf23 100644
--- a/hw/pl031.c
+++ b/hw/pl031.c
@@ -120,11 +120,13 @@
     case RTC_MIS:
         return s->is & s->im;
     case RTC_ICR:
-        fprintf(stderr, "qemu: pl031_read: Unexpected offset 0x%x\n",
-                (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl031: read of write-only register at offset 0x%x\n",
+                      (int)offset);
         break;
     default:
-        hw_error("pl031_read: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl031_read: Bad offset 0x%x\n", (int)offset);
         break;
     }
 
@@ -167,12 +169,14 @@
     case RTC_DR:
     case RTC_MIS:
     case RTC_RIS:
-        fprintf(stderr, "qemu: pl031_write: Unexpected offset 0x%x\n",
-                (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl031: write to read-only register at offset 0x%x\n",
+                      (int)offset);
         break;
 
     default:
-        hw_error("pl031_write: Bad offset 0x%x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl031_write: Bad offset 0x%x\n", (int)offset);
         break;
     }
 }
diff --git a/hw/pl041.c b/hw/pl041.c
index b6723be..9a6db1b 100644
--- a/hw/pl041.c
+++ b/hw/pl041.c
@@ -536,8 +536,9 @@
     default:
         /* NC FIFO depth of 16 is not allowed because its id bits in
            AACIPERIPHID3 overlap with the id for the default NC FIFO depth */
-        fprintf(stderr, "pl041: unsupported non-compact fifo depth [%i]\n",
-                s->fifo_depth);
+        qemu_log_mask(LOG_UNIMP,
+                      "pl041: unsupported non-compact fifo depth [%i]\n",
+                      s->fifo_depth);
         return -1;
     }
 
diff --git a/hw/pl181.c b/hw/pl181.c
index 7d91fbb..5a73473 100644
--- a/hw/pl181.c
+++ b/hw/pl181.c
@@ -352,7 +352,7 @@
     case 0xa0: case 0xa4: case 0xa8: case 0xac:
     case 0xb0: case 0xb4: case 0xb8: case 0xbc:
         if (s->fifo_len == 0) {
-            fprintf(stderr, "pl181: Unexpected FIFO read\n");
+            qemu_log_mask(LOG_GUEST_ERROR, "pl181: Unexpected FIFO read\n");
             return 0;
         } else {
             uint32_t value;
@@ -363,7 +363,8 @@
             return value;
         }
     default:
-        hw_error("pl181_read: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl181_read: Bad offset %x\n", (int)offset);
         return 0;
     }
 }
@@ -387,11 +388,11 @@
         s->cmd = value;
         if (s->cmd & PL181_CMD_ENABLE) {
             if (s->cmd & PL181_CMD_INTERRUPT) {
-                fprintf(stderr, "pl181: Interrupt mode not implemented\n");
-                abort();
+                qemu_log_mask(LOG_UNIMP,
+                              "pl181: Interrupt mode not implemented\n");
             } if (s->cmd & PL181_CMD_PENDING) {
-                fprintf(stderr, "pl181: Pending commands not implemented\n");
-                abort();
+                qemu_log_mask(LOG_UNIMP,
+                              "pl181: Pending commands not implemented\n");
             } else {
                 pl181_send_command(s);
                 pl181_fifo_run(s);
@@ -427,14 +428,15 @@
     case 0xa0: case 0xa4: case 0xa8: case 0xac:
     case 0xb0: case 0xb4: case 0xb8: case 0xbc:
         if (s->datacnt == 0) {
-            fprintf(stderr, "pl181: Unexpected FIFO write\n");
+            qemu_log_mask(LOG_GUEST_ERROR, "pl181: Unexpected FIFO write\n");
         } else {
             pl181_fifo_push(s, value);
             pl181_fifo_run(s);
         }
         break;
     default:
-        hw_error("pl181_write: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl181_write: Bad offset %x\n", (int)offset);
     }
     pl181_update(s);
 }
diff --git a/hw/pl190.c b/hw/pl190.c
index 7332f4d..961da5b 100644
--- a/hw/pl190.c
+++ b/hw/pl190.c
@@ -143,7 +143,8 @@
     case 13: /* DEFVECTADDR */
         return s->vect_addr[16];
     default:
-        hw_error("pl190_read: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "pl190_read: Bad offset %x\n", (int)offset);
         return 0;
     }
 }
@@ -202,7 +203,8 @@
         }
         break;
     default:
-        hw_error("pl190_write: Bad offset %x\n", (int)offset);
+        qemu_log_mask(LOG_GUEST_ERROR,
+                     "pl190_write: Bad offset %x\n", (int)offset);
         return;
     }
     pl190_update(s);
diff --git a/hw/ppc/e500.c b/hw/ppc/e500.c
index d23f9b2..846f53a 100644
--- a/hw/ppc/e500.c
+++ b/hw/ppc/e500.c
@@ -19,7 +19,7 @@
 #include "e500.h"
 #include "net.h"
 #include "hw/hw.h"
-#include "hw/pc.h"
+#include "hw/serial.h"
 #include "hw/pci.h"
 #include "hw/boards.h"
 #include "sysemu.h"
diff --git a/hw/ppc/e500plat.c b/hw/ppc/e500plat.c
index 60a5cb3..4cfb940 100644
--- a/hw/ppc/e500plat.c
+++ b/hw/ppc/e500plat.c
@@ -25,13 +25,14 @@
                          sizeof(compatible));
 }
 
-static void e500plat_init(ram_addr_t ram_size,
-                           const char *boot_device,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
-                           const char *cpu_model)
+static void e500plat_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *boot_device = args->boot_device;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     PPCE500Params params = {
         .ram_size = ram_size,
         .boot_device = boot_device,
diff --git a/hw/ppc/mpc8544ds.c b/hw/ppc/mpc8544ds.c
index 984d21c..e651661 100644
--- a/hw/ppc/mpc8544ds.c
+++ b/hw/ppc/mpc8544ds.c
@@ -25,13 +25,14 @@
                          sizeof(compatible));
 }
 
-static void mpc8544ds_init(ram_addr_t ram_size,
-                           const char *boot_device,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
-                           const char *cpu_model)
+static void mpc8544ds_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *boot_device = args->boot_device;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     PPCE500Params params = {
         .ram_size = ram_size,
         .boot_device = boot_device,
diff --git a/hw/ppc405_boards.c b/hw/ppc405_boards.c
index 476775d..e848cb0 100644
--- a/hw/ppc405_boards.c
+++ b/hw/ppc405_boards.c
@@ -158,7 +158,7 @@
     fpga->reg1 = 0x0F;
 }
 
-static void ref405ep_fpga_init (MemoryRegion *sysmem, uint32_t base)
+static void ref405ep_fpga_init(MemoryRegion *sysmem, uint32_t base)
 {
     ref405ep_fpga_t *fpga;
     MemoryRegion *fpga_memory = g_new(MemoryRegion, 1);
@@ -170,13 +170,12 @@
     qemu_register_reset(&ref405ep_fpga_reset, fpga);
 }
 
-static void ref405ep_init (ram_addr_t ram_size,
-                           const char *boot_device,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
-                           const char *cpu_model)
+static void ref405ep_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     ppc4xx_bd_info_t bd;
     CPUPPCState *env;
@@ -484,7 +483,7 @@
     cpld->reg1 = 0x80;
 }
 
-static void taihu_cpld_init (MemoryRegion *sysmem, uint32_t base)
+static void taihu_cpld_init(MemoryRegion *sysmem, uint32_t base)
 {
     taihu_cpld_t *cpld;
     MemoryRegion *cpld_memory = g_new(MemoryRegion, 1);
@@ -495,13 +494,11 @@
     qemu_register_reset(&taihu_cpld_reset, cpld);
 }
 
-static void taihu_405ep_init(ram_addr_t ram_size,
-                             const char *boot_device,
-                             const char *kernel_filename,
-                             const char *kernel_cmdline,
-                             const char *initrd_filename,
-                             const char *cpu_model)
+static void taihu_405ep_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *kernel_filename = args->kernel_filename;
+    const char *initrd_filename = args->initrd_filename;
     char *filename;
     qemu_irq *pic;
     MemoryRegion *sysmem = get_system_memory();
diff --git a/hw/ppc405_uc.c b/hw/ppc405_uc.c
index b52ab2f..e81409d 100644
--- a/hw/ppc405_uc.c
+++ b/hw/ppc405_uc.c
@@ -24,7 +24,7 @@
 #include "hw.h"
 #include "ppc.h"
 #include "ppc405.h"
-#include "pc.h"
+#include "serial.h"
 #include "qemu-timer.h"
 #include "sysemu.h"
 #include "qemu-log.h"
diff --git a/hw/ppc440_bamboo.c b/hw/ppc440_bamboo.c
index c198071..5616a26 100644
--- a/hw/ppc440_bamboo.c
+++ b/hw/ppc440_bamboo.c
@@ -23,7 +23,7 @@
 #include "loader.h"
 #include "elf.h"
 #include "exec-memory.h"
-#include "pc.h"
+#include "serial.h"
 #include "ppc.h"
 #include "ppc405.h"
 #include "sysemu.h"
@@ -157,13 +157,13 @@
     mmubooke_create_initial_mapping(env, 0, 0);
 }
 
-static void bamboo_init(ram_addr_t ram_size,
-                        const char *boot_device,
-                        const char *kernel_filename,
-                        const char *kernel_cmdline,
-                        const char *initrd_filename,
-                        const char *cpu_model)
+static void bamboo_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     unsigned int pci_irq_nrs[4] = { 28, 27, 26, 25 };
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ram_memories
diff --git a/hw/ppc_newworld.c b/hw/ppc_newworld.c
index b8d3c9c..a265445 100644
--- a/hw/ppc_newworld.c
+++ b/hw/ppc_newworld.c
@@ -128,13 +128,14 @@
 }
 
 /* PowerPC Mac99 hardware initialisation */
-static void ppc_core99_init (ram_addr_t ram_size,
-                             const char *boot_device,
-                             const char *kernel_filename,
-                             const char *kernel_cmdline,
-                             const char *initrd_filename,
-                             const char *cpu_model)
+static void ppc_core99_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     PowerPCCPU *cpu = NULL;
     CPUPPCState *env = NULL;
     char *filename;
diff --git a/hw/ppc_oldworld.c b/hw/ppc_oldworld.c
index 2c4a478..de33408 100644
--- a/hw/ppc_oldworld.c
+++ b/hw/ppc_oldworld.c
@@ -71,13 +71,14 @@
     cpu_reset(CPU(cpu));
 }
 
-static void ppc_heathrow_init (ram_addr_t ram_size,
-                               const char *boot_device,
-                               const char *kernel_filename,
-                               const char *kernel_cmdline,
-                               const char *initrd_filename,
-                               const char *cpu_model)
+static void ppc_heathrow_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     MemoryRegion *sysmem = get_system_memory();
     PowerPCCPU *cpu = NULL;
     CPUPPCState *env = NULL;
diff --git a/hw/ppc_prep.c b/hw/ppc_prep.c
index 1544430..a0d1c3d 100644
--- a/hw/ppc_prep.c
+++ b/hw/ppc_prep.c
@@ -24,6 +24,7 @@
 #include "hw.h"
 #include "nvram.h"
 #include "pc.h"
+#include "serial.h"
 #include "fdc.h"
 #include "net.h"
 #include "sysemu.h"
@@ -447,13 +448,14 @@
 }
 
 /* PowerPC PREP hardware initialisation */
-static void ppc_prep_init (ram_addr_t ram_size,
-                           const char *boot_device,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
-                           const char *cpu_model)
+static void ppc_prep_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     MemoryRegion *sysmem = get_system_memory();
     PowerPCCPU *cpu = NULL;
     CPUPPCState *env = NULL;
diff --git a/hw/puv3.c b/hw/puv3.c
index 43f7216..764799c 100644
--- a/hw/puv3.c
+++ b/hw/puv3.c
@@ -91,10 +91,12 @@
     graphic_console_init(NULL, NULL, NULL, NULL, NULL);
 }
 
-static void puv3_init(ram_addr_t ram_size, const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void puv3_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *initrd_filename = args->initrd_filename;
     CPUUniCore32State *env;
 
     if (initrd_filename) {
diff --git a/hw/pxa2xx.c b/hw/pxa2xx.c
index d5f1420..4ec904f 100644
--- a/hw/pxa2xx.c
+++ b/hw/pxa2xx.c
@@ -10,7 +10,7 @@
 #include "sysbus.h"
 #include "pxa.h"
 #include "sysemu.h"
-#include "pc.h"
+#include "serial.h"
 #include "i2c.h"
 #include "ssi.h"
 #include "qemu-char.h"
diff --git a/hw/r2d.c b/hw/r2d.c
index 1bc191f..3cb6942 100644
--- a/hw/r2d.c
+++ b/hw/r2d.c
@@ -219,11 +219,12 @@
     char kernel_cmdline[256];
 } boot_params;
 
-static void r2d_init(ram_addr_t ram_size,
-              const char *boot_device,
-	      const char *kernel_filename, const char *kernel_cmdline,
-	      const char *initrd_filename, const char *cpu_model)
+static void r2d_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     SuperHCPU *cpu;
     CPUSH4State *env;
     ResetData *reset_info;
diff --git a/hw/realview.c b/hw/realview.c
index 19db4d0..8dc4be6 100644
--- a/hw/realview.c
+++ b/hw/realview.c
@@ -330,11 +330,14 @@
     arm_load_kernel(arm_env_get_cpu(first_cpu), &realview_binfo);
 }
 
-static void realview_eb_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void realview_eb_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (!cpu_model) {
         cpu_model = "arm926";
     }
@@ -342,11 +345,14 @@
                   initrd_filename, cpu_model, BOARD_EB);
 }
 
-static void realview_eb_mpcore_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void realview_eb_mpcore_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (!cpu_model) {
         cpu_model = "arm11mpcore";
     }
@@ -354,11 +360,14 @@
                   initrd_filename, cpu_model, BOARD_EB_MPCORE);
 }
 
-static void realview_pb_a8_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void realview_pb_a8_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (!cpu_model) {
         cpu_model = "cortex-a8";
     }
@@ -366,11 +375,14 @@
                   initrd_filename, cpu_model, BOARD_PB_A8);
 }
 
-static void realview_pbx_a9_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void realview_pbx_a9_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (!cpu_model) {
         cpu_model = "cortex-a9";
     }
diff --git a/hw/s390-virtio.c b/hw/s390-virtio.c
index 47eed35..39ff178 100644
--- a/hw/s390-virtio.c
+++ b/hw/s390-virtio.c
@@ -151,13 +151,14 @@
 }
 
 /* PC hardware initialisation */
-static void s390_init(ram_addr_t my_ram_size,
-                      const char *boot_device,
-                      const char *kernel_filename,
-                      const char *kernel_cmdline,
-                      const char *initrd_filename,
-                      const char *cpu_model)
+static void s390_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t my_ram_size = args->ram_size;
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     CPUS390XState *env = NULL;
     MemoryRegion *sysmem = get_system_memory();
     MemoryRegion *ram = g_new(MemoryRegion, 1);
diff --git a/hw/serial-isa.c b/hw/serial-isa.c
new file mode 100644
index 0000000..96c78f7
--- /dev/null
+++ b/hw/serial-isa.c
@@ -0,0 +1,130 @@
+/*
+ * QEMU 16550A UART emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "serial.h"
+#include "isa.h"
+
+typedef struct ISASerialState {
+    ISADevice dev;
+    uint32_t index;
+    uint32_t iobase;
+    uint32_t isairq;
+    SerialState state;
+} ISASerialState;
+
+static const int isa_serial_io[MAX_SERIAL_PORTS] = {
+    0x3f8, 0x2f8, 0x3e8, 0x2e8
+};
+static const int isa_serial_irq[MAX_SERIAL_PORTS] = {
+    4, 3, 4, 3
+};
+
+static int serial_isa_initfn(ISADevice *dev)
+{
+    static int index;
+    ISASerialState *isa = DO_UPCAST(ISASerialState, dev, dev);
+    SerialState *s = &isa->state;
+
+    if (isa->index == -1) {
+        isa->index = index;
+    }
+    if (isa->index >= MAX_SERIAL_PORTS) {
+        return -1;
+    }
+    if (isa->iobase == -1) {
+        isa->iobase = isa_serial_io[isa->index];
+    }
+    if (isa->isairq == -1) {
+        isa->isairq = isa_serial_irq[isa->index];
+    }
+    index++;
+
+    s->baudbase = 115200;
+    isa_init_irq(dev, &s->irq, isa->isairq);
+    serial_init_core(s);
+    qdev_set_legacy_instance_id(&dev->qdev, isa->iobase, 3);
+
+    memory_region_init_io(&s->io, &serial_io_ops, s, "serial", 8);
+    isa_register_ioport(dev, &s->io, isa->iobase);
+    return 0;
+}
+
+static const VMStateDescription vmstate_isa_serial = {
+    .name = "serial",
+    .version_id = 3,
+    .minimum_version_id = 2,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT(state, ISASerialState, 0, vmstate_serial, SerialState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property serial_isa_properties[] = {
+    DEFINE_PROP_UINT32("index",  ISASerialState, index,   -1),
+    DEFINE_PROP_HEX32("iobase",  ISASerialState, iobase,  -1),
+    DEFINE_PROP_UINT32("irq",    ISASerialState, isairq,  -1),
+    DEFINE_PROP_CHR("chardev",   ISASerialState, state.chr),
+    DEFINE_PROP_UINT32("wakeup", ISASerialState, state.wakeup, 0),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void serial_isa_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    ISADeviceClass *ic = ISA_DEVICE_CLASS(klass);
+    ic->init = serial_isa_initfn;
+    dc->vmsd = &vmstate_isa_serial;
+    dc->props = serial_isa_properties;
+}
+
+static TypeInfo serial_isa_info = {
+    .name          = "isa-serial",
+    .parent        = TYPE_ISA_DEVICE,
+    .instance_size = sizeof(ISASerialState),
+    .class_init    = serial_isa_class_initfn,
+};
+
+static void serial_register_types(void)
+{
+    type_register_static(&serial_isa_info);
+}
+
+type_init(serial_register_types)
+
+bool serial_isa_init(ISABus *bus, int index, CharDriverState *chr)
+{
+    ISADevice *dev;
+
+    dev = isa_try_create(bus, "isa-serial");
+    if (!dev) {
+        return false;
+    }
+    qdev_prop_set_uint32(&dev->qdev, "index", index);
+    qdev_prop_set_chr(&dev->qdev, "chardev", chr);
+    if (qdev_init(&dev->qdev) < 0) {
+        return false;
+    }
+    return true;
+}
diff --git a/hw/serial-pci.c b/hw/serial-pci.c
new file mode 100644
index 0000000..95dc5c8
--- /dev/null
+++ b/hw/serial-pci.c
@@ -0,0 +1,252 @@
+/*
+ * QEMU 16550A UART emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* see docs/specs/pci-serial.txt */
+
+#include "serial.h"
+#include "pci.h"
+
+#define PCI_SERIAL_MAX_PORTS 4
+
+typedef struct PCISerialState {
+    PCIDevice dev;
+    SerialState state;
+} PCISerialState;
+
+typedef struct PCIMultiSerialState {
+    PCIDevice    dev;
+    MemoryRegion iobar;
+    uint32_t     ports;
+    char         *name[PCI_SERIAL_MAX_PORTS];
+    SerialState  state[PCI_SERIAL_MAX_PORTS];
+    uint32_t     level[PCI_SERIAL_MAX_PORTS];
+    qemu_irq     *irqs;
+} PCIMultiSerialState;
+
+static int serial_pci_init(PCIDevice *dev)
+{
+    PCISerialState *pci = DO_UPCAST(PCISerialState, dev, dev);
+    SerialState *s = &pci->state;
+
+    s->baudbase = 115200;
+    serial_init_core(s);
+
+    pci->dev.config[PCI_INTERRUPT_PIN] = 0x01;
+    s->irq = pci->dev.irq[0];
+
+    memory_region_init_io(&s->io, &serial_io_ops, s, "serial", 8);
+    pci_register_bar(&pci->dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &s->io);
+    return 0;
+}
+
+static void multi_serial_irq_mux(void *opaque, int n, int level)
+{
+    PCIMultiSerialState *pci = opaque;
+    int i, pending = 0;
+
+    pci->level[n] = level;
+    for (i = 0; i < pci->ports; i++) {
+        if (pci->level[i]) {
+            pending = 1;
+        }
+    }
+    qemu_set_irq(pci->dev.irq[0], pending);
+}
+
+static int multi_serial_pci_init(PCIDevice *dev)
+{
+    PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(dev);
+    PCIMultiSerialState *pci = DO_UPCAST(PCIMultiSerialState, dev, dev);
+    SerialState *s;
+    int i;
+
+    switch (pc->device_id) {
+    case 0x0003:
+        pci->ports = 2;
+        break;
+    case 0x0004:
+        pci->ports = 4;
+        break;
+    }
+    assert(pci->ports > 0);
+    assert(pci->ports <= PCI_SERIAL_MAX_PORTS);
+
+    pci->dev.config[PCI_INTERRUPT_PIN] = 0x01;
+    memory_region_init(&pci->iobar, "multiserial", 8 * pci->ports);
+    pci_register_bar(&pci->dev, 0, PCI_BASE_ADDRESS_SPACE_IO, &pci->iobar);
+    pci->irqs = qemu_allocate_irqs(multi_serial_irq_mux, pci,
+                                   pci->ports);
+
+    for (i = 0; i < pci->ports; i++) {
+        s = pci->state + i;
+        s->baudbase = 115200;
+        serial_init_core(s);
+        s->irq = pci->irqs[i];
+        pci->name[i] = g_strdup_printf("uart #%d", i+1);
+        memory_region_init_io(&s->io, &serial_io_ops, s, pci->name[i], 8);
+        memory_region_add_subregion(&pci->iobar, 8 * i, &s->io);
+    }
+    return 0;
+}
+
+static void serial_pci_exit(PCIDevice *dev)
+{
+    PCISerialState *pci = DO_UPCAST(PCISerialState, dev, dev);
+    SerialState *s = &pci->state;
+
+    serial_exit_core(s);
+    memory_region_destroy(&s->io);
+}
+
+static void multi_serial_pci_exit(PCIDevice *dev)
+{
+    PCIMultiSerialState *pci = DO_UPCAST(PCIMultiSerialState, dev, dev);
+    SerialState *s;
+    int i;
+
+    for (i = 0; i < pci->ports; i++) {
+        s = pci->state + i;
+        serial_exit_core(s);
+        memory_region_destroy(&s->io);
+        g_free(pci->name[i]);
+    }
+    memory_region_destroy(&pci->iobar);
+    qemu_free_irqs(pci->irqs);
+}
+
+static const VMStateDescription vmstate_pci_serial = {
+    .name = "pci-serial",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(dev, PCISerialState),
+        VMSTATE_STRUCT(state, PCISerialState, 0, vmstate_serial, SerialState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription vmstate_pci_multi_serial = {
+    .name = "pci-serial-multi",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields      = (VMStateField[]) {
+        VMSTATE_PCI_DEVICE(dev, PCIMultiSerialState),
+        VMSTATE_STRUCT_ARRAY(state, PCIMultiSerialState, PCI_SERIAL_MAX_PORTS,
+                             0, vmstate_serial, SerialState),
+        VMSTATE_UINT32_ARRAY(level, PCIMultiSerialState, PCI_SERIAL_MAX_PORTS),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static Property serial_pci_properties[] = {
+    DEFINE_PROP_CHR("chardev",  PCISerialState, state.chr),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static Property multi_2x_serial_pci_properties[] = {
+    DEFINE_PROP_CHR("chardev1",  PCIMultiSerialState, state[0].chr),
+    DEFINE_PROP_CHR("chardev2",  PCIMultiSerialState, state[1].chr),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static Property multi_4x_serial_pci_properties[] = {
+    DEFINE_PROP_CHR("chardev1",  PCIMultiSerialState, state[0].chr),
+    DEFINE_PROP_CHR("chardev2",  PCIMultiSerialState, state[1].chr),
+    DEFINE_PROP_CHR("chardev3",  PCIMultiSerialState, state[2].chr),
+    DEFINE_PROP_CHR("chardev4",  PCIMultiSerialState, state[3].chr),
+    DEFINE_PROP_END_OF_LIST(),
+};
+
+static void serial_pci_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
+    pc->init = serial_pci_init;
+    pc->exit = serial_pci_exit;
+    pc->vendor_id = 0x1b36; /* Red Hat */
+    pc->device_id = 0x0002;
+    pc->revision = 1;
+    pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
+    dc->vmsd = &vmstate_pci_serial;
+    dc->props = serial_pci_properties;
+}
+
+static void multi_2x_serial_pci_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
+    pc->init = multi_serial_pci_init;
+    pc->exit = multi_serial_pci_exit;
+    pc->vendor_id = 0x1b36; /* Red Hat */
+    pc->device_id = 0x0003;
+    pc->revision = 1;
+    pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
+    dc->vmsd = &vmstate_pci_multi_serial;
+    dc->props = multi_2x_serial_pci_properties;
+}
+
+static void multi_4x_serial_pci_class_initfn(ObjectClass *klass, void *data)
+{
+    DeviceClass *dc = DEVICE_CLASS(klass);
+    PCIDeviceClass *pc = PCI_DEVICE_CLASS(klass);
+    pc->init = multi_serial_pci_init;
+    pc->exit = multi_serial_pci_exit;
+    pc->vendor_id = 0x1b36; /* Red Hat */
+    pc->device_id = 0x0004;
+    pc->revision = 1;
+    pc->class_id = PCI_CLASS_COMMUNICATION_SERIAL;
+    dc->vmsd = &vmstate_pci_multi_serial;
+    dc->props = multi_4x_serial_pci_properties;
+}
+
+static TypeInfo serial_pci_info = {
+    .name          = "pci-serial",
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(PCISerialState),
+    .class_init    = serial_pci_class_initfn,
+};
+
+static TypeInfo multi_2x_serial_pci_info = {
+    .name          = "pci-serial-2x",
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(PCIMultiSerialState),
+    .class_init    = multi_2x_serial_pci_class_initfn,
+};
+
+static TypeInfo multi_4x_serial_pci_info = {
+    .name          = "pci-serial-4x",
+    .parent        = TYPE_PCI_DEVICE,
+    .instance_size = sizeof(PCIMultiSerialState),
+    .class_init    = multi_4x_serial_pci_class_initfn,
+};
+
+static void serial_pci_register_types(void)
+{
+    type_register_static(&serial_pci_info);
+    type_register_static(&multi_2x_serial_pci_info);
+    type_register_static(&multi_4x_serial_pci_info);
+}
+
+type_init(serial_pci_register_types)
diff --git a/hw/serial.c b/hw/serial.c
index a421d1e..5adbfaf 100644
--- a/hw/serial.c
+++ b/hw/serial.c
@@ -22,12 +22,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  * THE SOFTWARE.
  */
-#include "hw.h"
+
+#include "serial.h"
 #include "qemu-char.h"
-#include "isa.h"
-#include "pc.h"
 #include "qemu-timer.h"
-#include "sysemu.h"
 
 //#define DEBUG_SERIAL
 
@@ -93,8 +91,6 @@
 #define UART_FCR_RFR        0x02    /* RCVR Fifo Reset */
 #define UART_FCR_FE         0x01    /* FIFO Enable */
 
-#define UART_FIFO_LENGTH    16      /* 16550A Fifo Length */
-
 #define XMIT_FIFO           0
 #define RECV_FIFO           1
 #define MAX_XMIT_RETRY      4
@@ -107,64 +103,6 @@
 do {} while (0)
 #endif
 
-typedef struct SerialFIFO {
-    uint8_t data[UART_FIFO_LENGTH];
-    uint8_t count;
-    uint8_t itl;                        /* Interrupt Trigger Level */
-    uint8_t tail;
-    uint8_t head;
-} SerialFIFO;
-
-struct SerialState {
-    uint16_t divider;
-    uint8_t rbr; /* receive register */
-    uint8_t thr; /* transmit holding register */
-    uint8_t tsr; /* transmit shift register */
-    uint8_t ier;
-    uint8_t iir; /* read only */
-    uint8_t lcr;
-    uint8_t mcr;
-    uint8_t lsr; /* read only */
-    uint8_t msr; /* read only */
-    uint8_t scr;
-    uint8_t fcr;
-    uint8_t fcr_vmstate; /* we can't write directly this value
-                            it has side effects */
-    /* NOTE: this hidden state is necessary for tx irq generation as
-       it can be reset while reading iir */
-    int thr_ipending;
-    qemu_irq irq;
-    CharDriverState *chr;
-    int last_break_enable;
-    int it_shift;
-    int baudbase;
-    int tsr_retry;
-    uint32_t wakeup;
-
-    uint64_t last_xmit_ts;              /* Time when the last byte was successfully sent out of the tsr */
-    SerialFIFO recv_fifo;
-    SerialFIFO xmit_fifo;
-
-    struct QEMUTimer *fifo_timeout_timer;
-    int timeout_ipending;                   /* timeout interrupt pending state */
-    struct QEMUTimer *transmit_timer;
-
-
-    uint64_t char_transmit_time;               /* time to transmit a char in ticks*/
-    int poll_msl;
-
-    struct QEMUTimer *modem_status_poll;
-    MemoryRegion io;
-};
-
-typedef struct ISASerialState {
-    ISADevice dev;
-    uint32_t index;
-    uint32_t iobase;
-    uint32_t isairq;
-    SerialState state;
-} ISASerialState;
-
 static void serial_receive1(void *opaque, const uint8_t *buf, int size);
 
 static void fifo_clear(SerialState *s, int fifo)
@@ -687,7 +625,7 @@
     return 0;
 }
 
-static const VMStateDescription vmstate_serial = {
+const VMStateDescription vmstate_serial = {
     .name = "serial",
     .version_id = 3,
     .minimum_version_id = 2,
@@ -736,7 +674,7 @@
     qemu_irq_lower(s->irq);
 }
 
-static void serial_init_core(SerialState *s)
+void serial_init_core(SerialState *s)
 {
     if (!s->chr) {
         fprintf(stderr, "Can't create serial device, empty char device\n");
@@ -754,6 +692,12 @@
                           serial_event, s);
 }
 
+void serial_exit_core(SerialState *s)
+{
+    qemu_chr_add_handlers(s->chr, NULL, NULL, NULL, NULL);
+    qemu_unregister_reset(serial_reset, s);
+}
+
 /* Change the main reference oscillator frequency. */
 void serial_set_frequency(SerialState *s, uint32_t frequency)
 {
@@ -761,54 +705,15 @@
     serial_update_parameters(s);
 }
 
-static const int isa_serial_io[MAX_SERIAL_PORTS] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8 };
-static const int isa_serial_irq[MAX_SERIAL_PORTS] = { 4, 3, 4, 3 };
-
 static const MemoryRegionPortio serial_portio[] = {
     { 0, 8, 1, .read = serial_ioport_read, .write = serial_ioport_write },
     PORTIO_END_OF_LIST()
 };
 
-static const MemoryRegionOps serial_io_ops = {
+const MemoryRegionOps serial_io_ops = {
     .old_portio = serial_portio
 };
 
-static int serial_isa_initfn(ISADevice *dev)
-{
-    static int index;
-    ISASerialState *isa = DO_UPCAST(ISASerialState, dev, dev);
-    SerialState *s = &isa->state;
-
-    if (isa->index == -1)
-        isa->index = index;
-    if (isa->index >= MAX_SERIAL_PORTS)
-        return -1;
-    if (isa->iobase == -1)
-        isa->iobase = isa_serial_io[isa->index];
-    if (isa->isairq == -1)
-        isa->isairq = isa_serial_irq[isa->index];
-    index++;
-
-    s->baudbase = 115200;
-    isa_init_irq(dev, &s->irq, isa->isairq);
-    serial_init_core(s);
-    qdev_set_legacy_instance_id(&dev->qdev, isa->iobase, 3);
-
-    memory_region_init_io(&s->io, &serial_io_ops, s, "serial", 8);
-    isa_register_ioport(dev, &s->io, isa->iobase);
-    return 0;
-}
-
-static const VMStateDescription vmstate_isa_serial = {
-    .name = "serial",
-    .version_id = 3,
-    .minimum_version_id = 2,
-    .fields      = (VMStateField []) {
-        VMSTATE_STRUCT(state, ISASerialState, 0, vmstate_serial, SerialState),
-        VMSTATE_END_OF_LIST()
-    }
-};
-
 SerialState *serial_init(int base, qemu_irq irq, int baudbase,
                          CharDriverState *chr)
 {
@@ -886,35 +791,3 @@
     serial_update_msl(s);
     return s;
 }
-
-static Property serial_isa_properties[] = {
-    DEFINE_PROP_UINT32("index", ISASerialState, index,   -1),
-    DEFINE_PROP_HEX32("iobase", ISASerialState, iobase,  -1),
-    DEFINE_PROP_UINT32("irq",   ISASerialState, isairq,  -1),
-    DEFINE_PROP_CHR("chardev",  ISASerialState, state.chr),
-    DEFINE_PROP_UINT32("wakeup", ISASerialState, state.wakeup, 0),
-    DEFINE_PROP_END_OF_LIST(),
-};
-
-static void serial_isa_class_initfn(ObjectClass *klass, void *data)
-{
-    DeviceClass *dc = DEVICE_CLASS(klass);
-    ISADeviceClass *ic = ISA_DEVICE_CLASS(klass);
-    ic->init = serial_isa_initfn;
-    dc->vmsd = &vmstate_isa_serial;
-    dc->props = serial_isa_properties;
-}
-
-static TypeInfo serial_isa_info = {
-    .name          = "isa-serial",
-    .parent        = TYPE_ISA_DEVICE,
-    .instance_size = sizeof(ISASerialState),
-    .class_init    = serial_isa_class_initfn,
-};
-
-static void serial_register_types(void)
-{
-    type_register_static(&serial_isa_info);
-}
-
-type_init(serial_register_types)
diff --git a/hw/serial.h b/hw/serial.h
new file mode 100644
index 0000000..55a1ac5
--- /dev/null
+++ b/hw/serial.h
@@ -0,0 +1,99 @@
+/*
+ * QEMU 16550A UART emulation
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "hw.h"
+#include "sysemu.h"
+#include "memory.h"
+
+#define UART_FIFO_LENGTH    16      /* 16550A Fifo Length */
+
+typedef struct SerialFIFO {
+    uint8_t data[UART_FIFO_LENGTH];
+    uint8_t count;
+    uint8_t itl;                        /* Interrupt Trigger Level */
+    uint8_t tail;
+    uint8_t head;
+} SerialFIFO;
+
+struct SerialState {
+    uint16_t divider;
+    uint8_t rbr; /* receive register */
+    uint8_t thr; /* transmit holding register */
+    uint8_t tsr; /* transmit shift register */
+    uint8_t ier;
+    uint8_t iir; /* read only */
+    uint8_t lcr;
+    uint8_t mcr;
+    uint8_t lsr; /* read only */
+    uint8_t msr; /* read only */
+    uint8_t scr;
+    uint8_t fcr;
+    uint8_t fcr_vmstate; /* we can't write directly this value
+                            it has side effects */
+    /* NOTE: this hidden state is necessary for tx irq generation as
+       it can be reset while reading iir */
+    int thr_ipending;
+    qemu_irq irq;
+    CharDriverState *chr;
+    int last_break_enable;
+    int it_shift;
+    int baudbase;
+    int tsr_retry;
+    uint32_t wakeup;
+
+    /* Time when the last byte was successfully sent out of the tsr */
+    uint64_t last_xmit_ts;
+    SerialFIFO recv_fifo;
+    SerialFIFO xmit_fifo;
+
+    struct QEMUTimer *fifo_timeout_timer;
+    int timeout_ipending;           /* timeout interrupt pending state */
+    struct QEMUTimer *transmit_timer;
+
+
+    uint64_t char_transmit_time;    /* time to transmit a char in ticks */
+    int poll_msl;
+
+    struct QEMUTimer *modem_status_poll;
+    MemoryRegion io;
+};
+
+extern const VMStateDescription vmstate_serial;
+extern const MemoryRegionOps serial_io_ops;
+
+void serial_init_core(SerialState *s);
+void serial_exit_core(SerialState *s);
+void serial_set_frequency(SerialState *s, uint32_t frequency);
+
+/* legacy pre qom */
+SerialState *serial_init(int base, qemu_irq irq, int baudbase,
+                         CharDriverState *chr);
+SerialState *serial_mm_init(MemoryRegion *address_space,
+                            target_phys_addr_t base, int it_shift,
+                            qemu_irq irq, int baudbase,
+                            CharDriverState *chr, enum device_endian end);
+
+/* serial-isa.c */
+bool serial_isa_init(ISABus *bus, int index, CharDriverState *chr);
diff --git a/hw/shix.c b/hw/shix.c
index dd9ce17..b56dd54 100644
--- a/hw/shix.c
+++ b/hw/shix.c
@@ -37,11 +37,9 @@
 #define BIOS_FILENAME "shix_bios.bin"
 #define BIOS_ADDRESS 0xA0000000
 
-static void shix_init(ram_addr_t ram_size,
-               const char *boot_device,
-	       const char *kernel_filename, const char *kernel_cmdline,
-	       const char *initrd_filename, const char *cpu_model)
+static void shix_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
     int ret;
     CPUSH4State *env;
     struct SH7750State *s;
diff --git a/hw/sm501.c b/hw/sm501.c
index 786e076..050d096 100644
--- a/hw/sm501.c
+++ b/hw/sm501.c
@@ -24,7 +24,7 @@
 
 #include <stdio.h>
 #include "hw.h"
-#include "pc.h"
+#include "serial.h"
 #include "console.h"
 #include "devices.h"
 #include "sysbus.h"
diff --git a/hw/spapr.c b/hw/spapr.c
index 09b8e99..637b3fb 100644
--- a/hw/spapr.c
+++ b/hw/spapr.c
@@ -665,13 +665,14 @@
 }
 
 /* pSeries LPAR / sPAPR hardware init */
-static void ppc_spapr_init(ram_addr_t ram_size,
-                           const char *boot_device,
-                           const char *kernel_filename,
-                           const char *kernel_cmdline,
-                           const char *initrd_filename,
-                           const char *cpu_model)
+static void ppc_spapr_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     PowerPCCPU *cpu;
     CPUPPCState *env;
     PCIHostState *phb;
diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
index 38034c0..33f84e2 100644
--- a/hw/spapr_iommu.c
+++ b/hw/spapr_iommu.c
@@ -21,6 +21,7 @@
 #include "qdev.h"
 #include "kvm_ppc.h"
 #include "dma.h"
+#include "exec-memory.h"
 
 #include "hw/spapr.h"
 
@@ -124,7 +125,7 @@
     }
 
     tcet = g_malloc0(sizeof(*tcet));
-    dma_context_init(&tcet->dma, spapr_tce_translate, NULL, NULL);
+    dma_context_init(&tcet->dma, &address_space_memory, spapr_tce_translate, NULL, NULL);
 
     tcet->liobn = liobn;
     tcet->window_size = window_size;
diff --git a/hw/spitz.c b/hw/spitz.c
index 24346dc..2942626 100644
--- a/hw/spitz.c
+++ b/hw/spitz.c
@@ -936,38 +936,46 @@
     sl_bootparam_write(SL_PXA_PARAM_BASE);
 }
 
-static void spitz_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void spitz_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     spitz_common_init(ram_size, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, spitz, 0x2c9);
 }
 
-static void borzoi_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void borzoi_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     spitz_common_init(ram_size, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, borzoi, 0x33f);
 }
 
-static void akita_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void akita_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     spitz_common_init(ram_size, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, akita, 0x2e8);
 }
 
-static void terrier_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void terrier_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     spitz_common_init(ram_size, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, terrier, 0x33f);
 }
diff --git a/hw/stellaris.c b/hw/stellaris.c
index 353ca4c..bfb18b0 100644
--- a/hw/stellaris.c
+++ b/hw/stellaris.c
@@ -1313,19 +1313,17 @@
 }
 
 /* FIXME: Figure out how to generate these from stellaris_boards.  */
-static void lm3s811evb_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void lm3s811evb_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     stellaris_init(kernel_filename, cpu_model, &stellaris_boards[0]);
 }
 
-static void lm3s6965evb_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void lm3s6965evb_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
     stellaris_init(kernel_filename, cpu_model, &stellaris_boards[1]);
 }
 
diff --git a/hw/sun4m.c b/hw/sun4m.c
index a04b485..dbe93f9 100644
--- a/hw/sun4m.c
+++ b/hw/sun4m.c
@@ -1306,92 +1306,118 @@
 };
 
 /* SPARCstation 5 hardware initialisation */
-static void ss5_init(ram_addr_t RAM_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void ss5_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[0], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCstation 10 hardware initialisation */
-static void ss10_init(ram_addr_t RAM_size,
-                      const char *boot_device,
-                      const char *kernel_filename, const char *kernel_cmdline,
-                      const char *initrd_filename, const char *cpu_model)
+static void ss10_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[1], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCserver 600MP hardware initialisation */
-static void ss600mp_init(ram_addr_t RAM_size,
-                         const char *boot_device,
-                         const char *kernel_filename,
-                         const char *kernel_cmdline,
-                         const char *initrd_filename, const char *cpu_model)
+static void ss600mp_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[2], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCstation 20 hardware initialisation */
-static void ss20_init(ram_addr_t RAM_size,
-                      const char *boot_device,
-                      const char *kernel_filename, const char *kernel_cmdline,
-                      const char *initrd_filename, const char *cpu_model)
+static void ss20_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[3], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCstation Voyager hardware initialisation */
-static void vger_init(ram_addr_t RAM_size,
-                      const char *boot_device,
-                      const char *kernel_filename, const char *kernel_cmdline,
-                      const char *initrd_filename, const char *cpu_model)
+static void vger_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[4], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCstation LX hardware initialisation */
-static void ss_lx_init(ram_addr_t RAM_size,
-                       const char *boot_device,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
+static void ss_lx_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[5], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCstation 4 hardware initialisation */
-static void ss4_init(ram_addr_t RAM_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void ss4_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[6], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCClassic hardware initialisation */
-static void scls_init(ram_addr_t RAM_size,
-                      const char *boot_device,
-                      const char *kernel_filename, const char *kernel_cmdline,
-                      const char *initrd_filename, const char *cpu_model)
+static void scls_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[7], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCbook hardware initialisation */
-static void sbook_init(ram_addr_t RAM_size,
-                       const char *boot_device,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
+static void sbook_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4m_hw_init(&sun4m_hwdefs[8], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
@@ -1654,21 +1680,27 @@
 }
 
 /* SPARCserver 1000 hardware initialisation */
-static void ss1000_init(ram_addr_t RAM_size,
-                        const char *boot_device,
-                        const char *kernel_filename, const char *kernel_cmdline,
-                        const char *initrd_filename, const char *cpu_model)
+static void ss1000_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4d_hw_init(&sun4d_hwdefs[0], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
 
 /* SPARCcenter 2000 hardware initialisation */
-static void ss2000_init(ram_addr_t RAM_size,
-                        const char *boot_device,
-                        const char *kernel_filename, const char *kernel_cmdline,
-                        const char *initrd_filename, const char *cpu_model)
+static void ss2000_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4d_hw_init(&sun4d_hwdefs[1], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
@@ -1848,11 +1880,14 @@
 }
 
 /* SPARCstation 2 hardware initialisation */
-static void ss2_init(ram_addr_t RAM_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void ss2_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     sun4c_hw_init(&sun4c_hwdefs[0], RAM_size, boot_device, kernel_filename,
                   kernel_cmdline, initrd_filename, cpu_model);
 }
diff --git a/hw/sun4u.c b/hw/sun4u.c
index 940db33..eeb6496 100644
--- a/hw/sun4u.c
+++ b/hw/sun4u.c
@@ -25,6 +25,7 @@
 #include "pci.h"
 #include "apb_pci.h"
 #include "pc.h"
+#include "serial.h"
 #include "nvram.h"
 #include "fdc.h"
 #include "net.h"
@@ -933,31 +934,40 @@
 };
 
 /* Sun4u hardware initialisation */
-static void sun4u_init(ram_addr_t RAM_size,
-                       const char *boot_devices,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
+static void sun4u_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_devices = args->boot_device;
     sun4uv_init(get_system_memory(), RAM_size, boot_devices, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, &hwdefs[0]);
 }
 
 /* Sun4v hardware initialisation */
-static void sun4v_init(ram_addr_t RAM_size,
-                       const char *boot_devices,
-                       const char *kernel_filename, const char *kernel_cmdline,
-                       const char *initrd_filename, const char *cpu_model)
+static void sun4v_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_devices = args->boot_device;
     sun4uv_init(get_system_memory(), RAM_size, boot_devices, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, &hwdefs[1]);
 }
 
 /* Niagara hardware initialisation */
-static void niagara_init(ram_addr_t RAM_size,
-                         const char *boot_devices,
-                         const char *kernel_filename, const char *kernel_cmdline,
-                         const char *initrd_filename, const char *cpu_model)
+static void niagara_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t RAM_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_devices = args->boot_device;
     sun4uv_init(get_system_memory(), RAM_size, boot_devices, kernel_filename,
                 kernel_cmdline, initrd_filename, cpu_model, &hwdefs[2]);
 }
diff --git a/hw/tosa.c b/hw/tosa.c
index 297a8c2..512278c 100644
--- a/hw/tosa.c
+++ b/hw/tosa.c
@@ -205,11 +205,12 @@
     .ram_size = 0x04000000,
 };
 
-static void tosa_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void tosa_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *rom = g_new(MemoryRegion, 1);
     PXA2xxState *mpu;
diff --git a/hw/usb/dev-serial.c b/hw/usb/dev-serial.c
index 69b6e48..a466f99 100644
--- a/hw/usb/dev-serial.c
+++ b/hw/usb/dev-serial.c
@@ -421,12 +421,16 @@
 {
     USBSerialState *s = (USBSerialState *)dev;
 
-    qemu_chr_delete(s->cs);
+    qemu_chr_add_handlers(s->cs, NULL, NULL, NULL, NULL);
 }
 
 static int usb_serial_can_read(void *opaque)
 {
     USBSerialState *s = opaque;
+
+    if (!s->dev.attached) {
+        return 0;
+    }
     return RECV_BUF - s->recv_used;
 }
 
@@ -469,8 +473,14 @@
         case CHR_EVENT_FOCUS:
             break;
         case CHR_EVENT_OPENED:
-            usb_serial_reset(s);
-            /* TODO: Reset USB port */
+            if (!s->dev.attached) {
+                usb_device_attach(&s->dev);
+            }
+            break;
+        case CHR_EVENT_CLOSED:
+            if (s->dev.attached) {
+                usb_device_detach(&s->dev);
+            }
             break;
     }
 }
@@ -481,6 +491,7 @@
 
     usb_desc_create_serial(dev);
     usb_desc_init(dev);
+    dev->auto_attach = 0;
 
     if (!s->cs) {
         error_report("Property chardev is required");
@@ -490,6 +501,10 @@
     qemu_chr_add_handlers(s->cs, usb_serial_can_read, usb_serial_read,
                           usb_serial_event, s);
     usb_serial_handle_reset(dev);
+
+    if (s->cs->opened && !dev->attached) {
+        usb_device_attach(dev);
+    }
     return 0;
 }
 
diff --git a/hw/versatilepb.c b/hw/versatilepb.c
index 7b1b025..756ec29 100644
--- a/hw/versatilepb.c
+++ b/hw/versatilepb.c
@@ -348,22 +348,28 @@
     arm_load_kernel(cpu, &versatile_binfo);
 }
 
-static void vpb_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void vpb_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     versatile_init(ram_size,
                    boot_device,
                    kernel_filename, kernel_cmdline,
                    initrd_filename, cpu_model, 0x183);
 }
 
-static void vab_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void vab_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     versatile_init(ram_size,
                    boot_device,
                    kernel_filename, kernel_cmdline,
diff --git a/hw/vexpress.c b/hw/vexpress.c
index 3596d1e..36503d6 100644
--- a/hw/vexpress.c
+++ b/hw/vexpress.c
@@ -467,25 +467,27 @@
     arm_load_kernel(arm_env_get_cpu(first_cpu), &vexpress_binfo);
 }
 
-static void vexpress_a9_init(ram_addr_t ram_size,
-                             const char *boot_device,
-                             const char *kernel_filename,
-                             const char *kernel_cmdline,
-                             const char *initrd_filename,
-                             const char *cpu_model)
+static void vexpress_a9_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     vexpress_common_init(&a9_daughterboard,
                          ram_size, boot_device, kernel_filename,
                          kernel_cmdline, initrd_filename, cpu_model);
 }
 
-static void vexpress_a15_init(ram_addr_t ram_size,
-                              const char *boot_device,
-                              const char *kernel_filename,
-                              const char *kernel_cmdline,
-                              const char *initrd_filename,
-                              const char *cpu_model)
+static void vexpress_a15_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     vexpress_common_init(&a15_daughterboard,
                          ram_size, boot_device, kernel_filename,
                          kernel_cmdline, initrd_filename, cpu_model);
diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index 639371e..e49de0d 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -639,9 +639,8 @@
 
     vfio_disable_msi_common(vdev);
 
-    DPRINTF("%s(%04x:%02x:%02x.%x, msi%s)\n", __func__,
-            vdev->host.domain, vdev->host.bus, vdev->host.slot,
-            vdev->host.function, msix ? "x" : "");
+    DPRINTF("%s(%04x:%02x:%02x.%x)\n", __func__, vdev->host.domain,
+            vdev->host.bus, vdev->host.slot, vdev->host.function);
 }
 
 static void vfio_disable_msi(VFIODevice *vdev)
@@ -930,25 +929,6 @@
     return -errno;
 }
 
-static void vfio_listener_dummy1(MemoryListener *listener)
-{
-    /* We don't do batching (begin/commit) or care about logging */
-}
-
-static void vfio_listener_dummy2(MemoryListener *listener,
-                                 MemoryRegionSection *section)
-{
-    /* We don't do logging or care about nops */
-}
-
-static void vfio_listener_dummy3(MemoryListener *listener,
-                                 MemoryRegionSection *section,
-                                 bool match_data, uint64_t data,
-                                 EventNotifier *e)
-{
-    /* We don't care about eventfds */
-}
-
 static bool vfio_listener_skipped_section(MemoryRegionSection *section)
 {
     return !memory_region_is_ram(section->mr);
@@ -1040,18 +1020,8 @@
 }
 
 static MemoryListener vfio_memory_listener = {
-    .begin = vfio_listener_dummy1,
-    .commit = vfio_listener_dummy1,
     .region_add = vfio_listener_region_add,
     .region_del = vfio_listener_region_del,
-    .region_nop = vfio_listener_dummy2,
-    .log_start = vfio_listener_dummy2,
-    .log_stop = vfio_listener_dummy2,
-    .log_sync = vfio_listener_dummy2,
-    .log_global_start = vfio_listener_dummy1,
-    .log_global_stop = vfio_listener_dummy1,
-    .eventfd_add = vfio_listener_dummy3,
-    .eventfd_del = vfio_listener_dummy3,
 };
 
 static void vfio_listener_release(VFIOContainer *container)
@@ -1536,8 +1506,7 @@
         container->iommu_data.listener = vfio_memory_listener;
         container->iommu_data.release = vfio_listener_release;
 
-        memory_listener_register(&container->iommu_data.listener,
-                                 get_system_memory());
+        memory_listener_register(&container->iommu_data.listener, &address_space_memory);
     } else {
         error_report("vfio: No available IOMMU models\n");
         g_free(container);
@@ -1947,6 +1916,10 @@
     DEFINE_PROP_END_OF_LIST(),
 };
 
+static const VMStateDescription vfio_pci_vmstate = {
+    .name = "vfio-pci",
+    .unmigratable = 1,
+};
 
 static void vfio_pci_dev_class_init(ObjectClass *klass, void *data)
 {
@@ -1955,6 +1928,8 @@
 
     dc->reset = vfio_pci_reset;
     dc->props = vfio_pci_dev_properties;
+    dc->vmsd = &vfio_pci_vmstate;
+    dc->desc = "VFIO-based PCI device assignment";
     pdc->init = vfio_initfn;
     pdc->exit = vfio_exitfn;
     pdc->config_read = vfio_pci_read_config;
diff --git a/hw/vga-isa.c b/hw/vga-isa.c
index d290473..046602b 100644
--- a/hw/vga-isa.c
+++ b/hw/vga-isa.c
@@ -1,6 +1,8 @@
 /*
  * QEMU ISA VGA Emulator.
  *
+ * see docs/specs/standard-vga.txt for virtual hardware specs.
+ *
  * Copyright (c) 2003 Fabrice Bellard
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
diff --git a/hw/vga-pci.c b/hw/vga-pci.c
index 996d47f..5c4daee 100644
--- a/hw/vga-pci.c
+++ b/hw/vga-pci.c
@@ -1,6 +1,8 @@
 /*
  * QEMU PCI VGA Emulator.
  *
+ * see docs/specs/standard-vga.txt for virtual hardware specs.
+ *
  * Copyright (c) 2003 Fabrice Bellard
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
@@ -29,9 +31,23 @@
 #include "qemu-timer.h"
 #include "loader.h"
 
+#define PCI_VGA_IOPORT_OFFSET 0x400
+#define PCI_VGA_IOPORT_SIZE   (0x3e0 - 0x3c0)
+#define PCI_VGA_BOCHS_OFFSET  0x500
+#define PCI_VGA_BOCHS_SIZE    (0x0b * 2)
+#define PCI_VGA_MMIO_SIZE     0x1000
+
+enum vga_pci_flags {
+    PCI_VGA_FLAG_ENABLE_MMIO = 1,
+};
+
 typedef struct PCIVGAState {
     PCIDevice dev;
     VGACommonState vga;
+    uint32_t flags;
+    MemoryRegion mmio;
+    MemoryRegion ioport;
+    MemoryRegion bochs;
 } PCIVGAState;
 
 static const VMStateDescription vmstate_vga_pci = {
@@ -46,31 +62,125 @@
     }
 };
 
+static uint64_t pci_vga_ioport_read(void *ptr, target_phys_addr_t addr,
+                                    unsigned size)
+{
+    PCIVGAState *d = ptr;
+    uint64_t ret = 0;
+
+    switch (size) {
+    case 1:
+        ret = vga_ioport_read(&d->vga, addr);
+        break;
+    case 2:
+        ret  = vga_ioport_read(&d->vga, addr);
+        ret |= vga_ioport_read(&d->vga, addr+1) << 8;
+        break;
+    }
+    return ret;
+}
+
+static void pci_vga_ioport_write(void *ptr, target_phys_addr_t addr,
+                                 uint64_t val, unsigned size)
+{
+    PCIVGAState *d = ptr;
+    switch (size) {
+    case 1:
+        vga_ioport_write(&d->vga, addr, val);
+        break;
+    case 2:
+        /*
+         * Update bytes in little endian order.  Allows to update
+         * indexed registers with a single word write because the
+         * index byte is updated first.
+         */
+        vga_ioport_write(&d->vga, addr, val & 0xff);
+        vga_ioport_write(&d->vga, addr+1, (val >> 8) & 0xff);
+        break;
+    }
+}
+
+static const MemoryRegionOps pci_vga_ioport_ops = {
+    .read = pci_vga_ioport_read,
+    .write = pci_vga_ioport_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 1,
+    .impl.max_access_size = 2,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static uint64_t pci_vga_bochs_read(void *ptr, target_phys_addr_t addr,
+                                   unsigned size)
+{
+    PCIVGAState *d = ptr;
+    int index = addr >> 1;
+
+    vbe_ioport_write_index(&d->vga, 0, index);
+    return vbe_ioport_read_data(&d->vga, 0);
+}
+
+static void pci_vga_bochs_write(void *ptr, target_phys_addr_t addr,
+                                uint64_t val, unsigned size)
+{
+    PCIVGAState *d = ptr;
+    int index = addr >> 1;
+
+    vbe_ioport_write_index(&d->vga, 0, index);
+    vbe_ioport_write_data(&d->vga, 0, val);
+}
+
+static const MemoryRegionOps pci_vga_bochs_ops = {
+    .read = pci_vga_bochs_read,
+    .write = pci_vga_bochs_write,
+    .valid.min_access_size = 1,
+    .valid.max_access_size = 4,
+    .impl.min_access_size = 2,
+    .impl.max_access_size = 2,
+    .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
 static int pci_std_vga_initfn(PCIDevice *dev)
 {
-     PCIVGAState *d = DO_UPCAST(PCIVGAState, dev, dev);
-     VGACommonState *s = &d->vga;
+    PCIVGAState *d = DO_UPCAST(PCIVGAState, dev, dev);
+    VGACommonState *s = &d->vga;
 
-     // vga + console init
-     vga_common_init(s);
-     vga_init(s, pci_address_space(dev), pci_address_space_io(dev), true);
+    /* vga + console init */
+    vga_common_init(s);
+    vga_init(s, pci_address_space(dev), pci_address_space_io(dev), true);
 
-     s->ds = graphic_console_init(s->update, s->invalidate,
-                                  s->screen_dump, s->text_update, s);
+    s->ds = graphic_console_init(s->update, s->invalidate,
+                                 s->screen_dump, s->text_update, s);
 
-     /* XXX: VGA_RAM_SIZE must be a power of two */
-     pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_MEM_PREFETCH, &s->vram);
+    /* XXX: VGA_RAM_SIZE must be a power of two */
+    pci_register_bar(&d->dev, 0, PCI_BASE_ADDRESS_MEM_PREFETCH, &s->vram);
 
-     if (!dev->rom_bar) {
-         /* compatibility with pc-0.13 and older */
-         vga_init_vbe(s, pci_address_space(dev));
-     }
+    /* mmio bar for vga register access */
+    if (d->flags & (1 << PCI_VGA_FLAG_ENABLE_MMIO)) {
+        memory_region_init(&d->mmio, "vga.mmio", 4096);
+        memory_region_init_io(&d->ioport, &pci_vga_ioport_ops, d,
+                              "vga ioports remapped", PCI_VGA_IOPORT_SIZE);
+        memory_region_init_io(&d->bochs, &pci_vga_bochs_ops, d,
+                              "bochs dispi interface", PCI_VGA_BOCHS_SIZE);
 
-     return 0;
+        memory_region_add_subregion(&d->mmio, PCI_VGA_IOPORT_OFFSET,
+                                    &d->ioport);
+        memory_region_add_subregion(&d->mmio, PCI_VGA_BOCHS_OFFSET,
+                                    &d->bochs);
+        pci_register_bar(&d->dev, 2, PCI_BASE_ADDRESS_SPACE_MEMORY, &d->mmio);
+    }
+
+    if (!dev->rom_bar) {
+        /* compatibility with pc-0.13 and older */
+        vga_init_vbe(s, pci_address_space(dev));
+    }
+
+    return 0;
 }
 
 static Property vga_pci_properties[] = {
     DEFINE_PROP_UINT32("vgamem_mb", PCIVGAState, vga.vram_size_mb, 16),
+    DEFINE_PROP_BIT("mmio", PCIVGAState, flags, PCI_VGA_FLAG_ENABLE_MMIO, true),
     DEFINE_PROP_END_OF_LIST(),
 };
 
diff --git a/hw/vga.c b/hw/vga.c
index afaef0d..a07a6fb 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -582,7 +582,6 @@
     }
 }
 
-#ifdef CONFIG_BOCHS_VBE
 static uint32_t vbe_ioport_read_index(void *opaque, uint32_t addr)
 {
     VGACommonState *s = opaque;
@@ -591,7 +590,7 @@
     return val;
 }
 
-static uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr)
+uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr)
 {
     VGACommonState *s = opaque;
     uint32_t val;
@@ -627,13 +626,13 @@
     return val;
 }
 
-static void vbe_ioport_write_index(void *opaque, uint32_t addr, uint32_t val)
+void vbe_ioport_write_index(void *opaque, uint32_t addr, uint32_t val)
 {
     VGACommonState *s = opaque;
     s->vbe_index = val;
 }
 
-static void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
+void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val)
 {
     VGACommonState *s = opaque;
 
@@ -784,7 +783,6 @@
         }
     }
 }
-#endif
 
 /* called for accesses between 0xa0000 and 0xc0000 */
 uint32_t vga_mem_readb(VGACommonState *s, target_phys_addr_t addr)
@@ -1129,14 +1127,12 @@
                             uint32_t *pline_compare)
 {
     uint32_t start_addr, line_offset, line_compare;
-#ifdef CONFIG_BOCHS_VBE
+
     if (s->vbe_regs[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED) {
         line_offset = s->vbe_line_offset;
         start_addr = s->vbe_start_addr;
         line_compare = 65535;
-    } else
-#endif
-    {
+    } else {
         /* compute line_offset in bytes */
         line_offset = s->cr[VGA_CRTC_OFFSET];
         line_offset <<= 3;
@@ -1572,12 +1568,10 @@
 static int vga_get_bpp(VGACommonState *s)
 {
     int ret;
-#ifdef CONFIG_BOCHS_VBE
+
     if (s->vbe_regs[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED) {
         ret = s->vbe_regs[VBE_DISPI_INDEX_BPP];
-    } else
-#endif
-    {
+    } else {
         ret = 0;
     }
     return ret;
@@ -1587,13 +1581,10 @@
 {
     int width, height;
 
-#ifdef CONFIG_BOCHS_VBE
     if (s->vbe_regs[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED) {
         width = s->vbe_regs[VBE_DISPI_INDEX_XRES];
         height = s->vbe_regs[VBE_DISPI_INDEX_YRES];
-    } else
-#endif
-    {
+    } else {
         width = (s->cr[VGA_CRTC_H_DISP] + 1) * 8;
         height = s->cr[VGA_CRTC_V_DISP_END] |
             ((s->cr[VGA_CRTC_OVERFLOW] & 0x02) << 7) |
@@ -1948,14 +1939,12 @@
     s->dac_8bit = 0;
     memset(s->palette, '\0', sizeof(s->palette));
     s->bank_offset = 0;
-#ifdef CONFIG_BOCHS_VBE
     s->vbe_index = 0;
     memset(s->vbe_regs, '\0', sizeof(s->vbe_regs));
     s->vbe_regs[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID5;
     s->vbe_start_addr = 0;
     s->vbe_line_offset = 0;
     s->vbe_bank_mask = (s->vram_size >> 16) - 1;
-#endif
     memset(s->font_offsets, '\0', sizeof(s->font_offsets));
     s->graphic_mode = -1; /* force full update */
     s->shift_control = 0;
@@ -2229,13 +2218,11 @@
 
         VMSTATE_INT32(bank_offset, VGACommonState),
         VMSTATE_UINT8_EQUAL(is_vbe_vmstate, VGACommonState),
-#ifdef CONFIG_BOCHS_VBE
         VMSTATE_UINT16(vbe_index, VGACommonState),
         VMSTATE_UINT16_ARRAY(vbe_regs, VGACommonState, VBE_DISPI_INDEX_NB),
         VMSTATE_UINT32(vbe_start_addr, VGACommonState),
         VMSTATE_UINT32(vbe_line_offset, VGACommonState),
         VMSTATE_UINT32(vbe_bank_mask, VGACommonState),
-#endif
         VMSTATE_END_OF_LIST()
     }
 };
@@ -2275,11 +2262,7 @@
     }
     s->vram_size_mb = s->vram_size >> 20;
 
-#ifdef CONFIG_BOCHS_VBE
     s->is_vbe_vmstate = 1;
-#else
-    s->is_vbe_vmstate = 0;
-#endif
     memory_region_init_ram(&s->vram, "vga.vram", s->vram_size);
     vmstate_register_ram_global(&s->vram);
     xen_register_framebuffer(&s->vram);
@@ -2314,7 +2297,6 @@
     PORTIO_END_OF_LIST(),
 };
 
-#ifdef CONFIG_BOCHS_VBE
 static const MemoryRegionPortio vbe_portio_list[] = {
     { 0, 1, 2, .read = vbe_ioport_read_index, .write = vbe_ioport_write_index },
 # ifdef TARGET_I386
@@ -2324,7 +2306,6 @@
 # endif
     PORTIO_END_OF_LIST(),
 };
-#endif /* CONFIG_BOCHS_VBE */
 
 /* Used by both ISA and PCI */
 MemoryRegion *vga_init_io(VGACommonState *s,
@@ -2334,10 +2315,7 @@
     MemoryRegion *vga_mem;
 
     *vga_ports = vga_portio_list;
-    *vbe_ports = NULL;
-#ifdef CONFIG_BOCHS_VBE
     *vbe_ports = vbe_portio_list;
-#endif
 
     vga_mem = g_malloc(sizeof(*vga_mem));
     memory_region_init_io(vga_mem, &vga_mem_ops, s,
@@ -2379,7 +2357,6 @@
 
 void vga_init_vbe(VGACommonState *s, MemoryRegion *system_memory)
 {
-#ifdef CONFIG_BOCHS_VBE
     /* With pc-0.12 and below we map both the PCI BAR and the fixed VBE region,
      * so use an alias to avoid double-mapping the same region.
      */
@@ -2390,7 +2367,6 @@
                                 VBE_DISPI_LFB_PHYSICAL_ADDRESS,
                                 &s->vram_vbe);
     s->vbe_mapped = 1;
-#endif 
 }
 /********************************************************/
 /* vga screen dump */
diff --git a/hw/vga_int.h b/hw/vga_int.h
index 330a32f..144e7d3 100644
--- a/hw/vga_int.h
+++ b/hw/vga_int.h
@@ -29,9 +29,6 @@
 #define ST01_V_RETRACE      0x08
 #define ST01_DISP_ENABLE    0x01
 
-/* bochs VBE support */
-#define CONFIG_BOCHS_VBE
-
 #define VBE_DISPI_MAX_XRES              16000
 #define VBE_DISPI_MAX_YRES              12000
 #define VBE_DISPI_MAX_BPP               32
@@ -65,21 +62,6 @@
 
 #define VBE_DISPI_LFB_PHYSICAL_ADDRESS  0xE0000000
 
-#ifdef CONFIG_BOCHS_VBE
-
-#define VGA_STATE_COMMON_BOCHS_VBE              \
-    uint16_t vbe_index;                         \
-    uint16_t vbe_regs[VBE_DISPI_INDEX_NB];      \
-    uint32_t vbe_start_addr;                    \
-    uint32_t vbe_line_offset;                   \
-    uint32_t vbe_bank_mask;			\
-    int vbe_mapped;
-#else
-
-#define VGA_STATE_COMMON_BOCHS_VBE
-
-#endif /* !CONFIG_BOCHS_VBE */
-
 #define CH_ATTR_SIZE (160 * 100)
 #define VGA_MAX_HEIGHT 2048
 
@@ -140,7 +122,13 @@
     void (*get_resolution)(struct VGACommonState *s,
                         int *pwidth,
                         int *pheight);
-    VGA_STATE_COMMON_BOCHS_VBE
+    /* bochs vbe state */
+    uint16_t vbe_index;
+    uint16_t vbe_regs[VBE_DISPI_INDEX_NB];
+    uint32_t vbe_start_addr;
+    uint32_t vbe_line_offset;
+    uint32_t vbe_bank_mask;
+    int vbe_mapped;
     /* display refresh support */
     DisplayState *ds;
     uint32_t font_offsets[2];
@@ -208,7 +196,11 @@
 void ppm_save(const char *filename, struct DisplaySurface *ds, Error **errp);
 
 int vga_ioport_invalid(VGACommonState *s, uint32_t addr);
+
 void vga_init_vbe(VGACommonState *s, MemoryRegion *address_space);
+uint32_t vbe_ioport_read_data(void *opaque, uint32_t addr);
+void vbe_ioport_write_index(void *opaque, uint32_t addr, uint32_t val);
+void vbe_ioport_write_data(void *opaque, uint32_t addr, uint32_t val);
 
 extern const uint8_t sr_mask[8];
 extern const uint8_t gr_mask[16];
diff --git a/hw/vhost.c b/hw/vhost.c
index d0ce5aa..0b4ac3f 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -434,8 +434,7 @@
 
 static bool vhost_section(MemoryRegionSection *section)
 {
-    return section->address_space == get_system_memory()
-        && memory_region_is_ram(section->mr);
+    return memory_region_is_ram(section->mr);
 }
 
 static void vhost_begin(MemoryListener *listener)
@@ -793,7 +792,7 @@
     hdev->log_size = 0;
     hdev->log_enabled = false;
     hdev->started = false;
-    memory_listener_register(&hdev->memory_listener, NULL);
+    memory_listener_register(&hdev->memory_listener, &address_space_memory);
     hdev->force = force;
     return 0;
 fail:
diff --git a/hw/virtex_ml507.c b/hw/virtex_ml507.c
index 79bc0d1..c59e1cb 100644
--- a/hw/virtex_ml507.c
+++ b/hw/virtex_ml507.c
@@ -24,7 +24,7 @@
 
 #include "sysbus.h"
 #include "hw.h"
-#include "pc.h"
+#include "serial.h"
 #include "net.h"
 #include "flash.h"
 #include "sysemu.h"
@@ -183,12 +183,12 @@
     return fdt_size;
 }
 
-static void virtex_init(ram_addr_t ram_size,
-                        const char *boot_device,
-                        const char *kernel_filename,
-                        const char *kernel_cmdline,
-                        const char *initrd_filename, const char *cpu_model)
+static void virtex_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
     MemoryRegion *address_space_mem = get_system_memory();
     DeviceState *dev;
     PowerPCCPU *cpu;
diff --git a/hw/virtio-net.c b/hw/virtio-net.c
index 8342391..50ba728 100644
--- a/hw/virtio-net.c
+++ b/hw/virtio-net.c
@@ -921,7 +921,9 @@
             qemu_get_buffer(f, n->mac_table.macs,
                             n->mac_table.in_use * ETH_ALEN);
         } else if (n->mac_table.in_use) {
-            qemu_fseek(f, n->mac_table.in_use * ETH_ALEN, SEEK_CUR);
+            uint8_t *buf = g_malloc0(n->mac_table.in_use);
+            qemu_get_buffer(f, buf, n->mac_table.in_use * ETH_ALEN);
+            g_free(buf);
             n->mac_table.multi_overflow = n->mac_table.uni_overflow = 1;
             n->mac_table.in_use = 0;
         }
diff --git a/hw/xen_machine_pv.c b/hw/xen_machine_pv.c
index 4b72aa7..4264703 100644
--- a/hw/xen_machine_pv.c
+++ b/hw/xen_machine_pv.c
@@ -29,13 +29,12 @@
 #include "xen_domainbuild.h"
 #include "blockdev.h"
 
-static void xen_init_pv(ram_addr_t ram_size,
-			const char *boot_device,
-			const char *kernel_filename,
-			const char *kernel_cmdline,
-			const char *initrd_filename,
-			const char *cpu_model)
+static void xen_init_pv(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     X86CPU *cpu;
     CPUX86State *env;
     DriveInfo *dinfo;
diff --git a/hw/xen_pt.c b/hw/xen_pt.c
index 838bcea..d3d7c8b 100644
--- a/hw/xen_pt.c
+++ b/hw/xen_pt.c
@@ -59,6 +59,7 @@
 #include "xen_backend.h"
 #include "xen_pt.h"
 #include "range.h"
+#include "exec-memory.h"
 
 #define XEN_PT_NR_IRQS (256)
 static uint8_t xen_pt_mapped_machine_irq[XEN_PT_NR_IRQS] = {0};
@@ -600,14 +601,6 @@
     }
 }
 
-static void xen_pt_begin(MemoryListener *l)
-{
-}
-
-static void xen_pt_commit(MemoryListener *l)
-{
-}
-
 static void xen_pt_region_add(MemoryListener *l, MemoryRegionSection *sec)
 {
     XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
@@ -624,36 +617,31 @@
     xen_pt_region_update(s, sec, false);
 }
 
-static void xen_pt_region_nop(MemoryListener *l, MemoryRegionSection *s)
+static void xen_pt_io_region_add(MemoryListener *l, MemoryRegionSection *sec)
 {
+    XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
+                                             io_listener);
+
+    xen_pt_region_update(s, sec, true);
 }
 
-static void xen_pt_log_fns(MemoryListener *l, MemoryRegionSection *s)
+static void xen_pt_io_region_del(MemoryListener *l, MemoryRegionSection *sec)
 {
-}
+    XenPCIPassthroughState *s = container_of(l, XenPCIPassthroughState,
+                                             io_listener);
 
-static void xen_pt_log_global_fns(MemoryListener *l)
-{
-}
-
-static void xen_pt_eventfd_fns(MemoryListener *l, MemoryRegionSection *s,
-                               bool match_data, uint64_t data, EventNotifier *n)
-{
+    xen_pt_region_update(s, sec, false);
 }
 
 static const MemoryListener xen_pt_memory_listener = {
-    .begin = xen_pt_begin,
-    .commit = xen_pt_commit,
     .region_add = xen_pt_region_add,
-    .region_nop = xen_pt_region_nop,
     .region_del = xen_pt_region_del,
-    .log_start = xen_pt_log_fns,
-    .log_stop = xen_pt_log_fns,
-    .log_sync = xen_pt_log_fns,
-    .log_global_start = xen_pt_log_global_fns,
-    .log_global_stop = xen_pt_log_global_fns,
-    .eventfd_add = xen_pt_eventfd_fns,
-    .eventfd_del = xen_pt_eventfd_fns,
+    .priority = 10,
+};
+
+static const MemoryListener xen_pt_io_listener = {
+    .region_add = xen_pt_io_region_add,
+    .region_del = xen_pt_io_region_del,
     .priority = 10,
 };
 
@@ -694,6 +682,7 @@
     }
 
     s->memory_listener = xen_pt_memory_listener;
+    s->io_listener = xen_pt_io_listener;
 
     /* Handle real device's MMIO/PIO BARs */
     xen_pt_register_regions(s);
@@ -760,7 +749,8 @@
     }
 
 out:
-    memory_listener_register(&s->memory_listener, NULL);
+    memory_listener_register(&s->memory_listener, &address_space_memory);
+    memory_listener_register(&s->io_listener, &address_space_io);
     XEN_PT_LOG(d, "Real physical device %02x:%02x.%d registered successfuly!\n",
                bus, slot, func);
 
@@ -815,6 +805,7 @@
 
     xen_pt_unregister_regions(s);
     memory_listener_unregister(&s->memory_listener);
+    memory_listener_unregister(&s->io_listener);
 
     xen_host_pci_device_put(&s->real_device);
 }
diff --git a/hw/xen_pt.h b/hw/xen_pt.h
index 112477a..f15e69a 100644
--- a/hw/xen_pt.h
+++ b/hw/xen_pt.h
@@ -209,6 +209,7 @@
     MemoryRegion rom;
 
     MemoryListener memory_listener;
+    MemoryListener io_listener;
 };
 
 int xen_pt_config_init(XenPCIPassthroughState *s);
diff --git a/hw/xilinx_zynq.c b/hw/xilinx_zynq.c
index fd46ba2..c55dafb 100644
--- a/hw/xilinx_zynq.c
+++ b/hw/xilinx_zynq.c
@@ -77,10 +77,13 @@
 
 }
 
-static void zynq_init(ram_addr_t ram_size, const char *boot_device,
-                        const char *kernel_filename, const char *kernel_cmdline,
-                        const char *initrd_filename, const char *cpu_model)
+static void zynq_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     ARMCPU *cpu;
     MemoryRegion *address_space_mem = get_system_memory();
     MemoryRegion *ext_ram = g_new(MemoryRegion, 1);
diff --git a/hw/xtensa_lx60.c b/hw/xtensa_lx60.c
index 3653f65..0c407d3 100644
--- a/hw/xtensa_lx60.c
+++ b/hw/xtensa_lx60.c
@@ -31,7 +31,8 @@
 #include "elf.h"
 #include "memory.h"
 #include "exec-memory.h"
-#include "pc.h"
+#include "serial.h"
+#include "net.h"
 #include "sysbus.h"
 #include "flash.h"
 #include "blockdev.h"
@@ -268,11 +269,14 @@
     }
 }
 
-static void xtensa_lx60_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void xtensa_lx60_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     static const LxBoardDesc lx60_board = {
         .flash_size = 0x400000,
         .flash_sector_size = 0x10000,
@@ -283,11 +287,14 @@
             initrd_filename, cpu_model);
 }
 
-static void xtensa_lx200_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void xtensa_lx200_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     static const LxBoardDesc lx200_board = {
         .flash_size = 0x1000000,
         .flash_sector_size = 0x20000,
diff --git a/hw/xtensa_sim.c b/hw/xtensa_sim.c
index 831460b..2e846d8 100644
--- a/hw/xtensa_sim.c
+++ b/hw/xtensa_sim.c
@@ -96,11 +96,14 @@
     }
 }
 
-static void xtensa_sim_init(ram_addr_t ram_size,
-                     const char *boot_device,
-                     const char *kernel_filename, const char *kernel_cmdline,
-                     const char *initrd_filename, const char *cpu_model)
+static void xtensa_sim_init(QEMUMachineInitArgs *args)
 {
+    ram_addr_t ram_size = args->ram_size;
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
+    const char *boot_device = args->boot_device;
     if (!cpu_model) {
         cpu_model = XTENSA_DEFAULT_CPU_MODEL;
     }
diff --git a/hw/z2.c b/hw/z2.c
index 076fad2..f62b806 100644
--- a/hw/z2.c
+++ b/hw/z2.c
@@ -295,11 +295,12 @@
     .class_init    = aer915_class_init,
 };
 
-static void z2_init(ram_addr_t ram_size,
-                const char *boot_device,
-                const char *kernel_filename, const char *kernel_cmdline,
-                const char *initrd_filename, const char *cpu_model)
+static void z2_init(QEMUMachineInitArgs *args)
 {
+    const char *cpu_model = args->cpu_model;
+    const char *kernel_filename = args->kernel_filename;
+    const char *kernel_cmdline = args->kernel_cmdline;
+    const char *initrd_filename = args->initrd_filename;
     MemoryRegion *address_space_mem = get_system_memory();
     uint32_t sector_len = 0x10000;
     PXA2xxState *mpu;
diff --git a/kvm-all.c b/kvm-all.c
index 92a7137..c2c6909 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -454,9 +454,10 @@
     return ret;
 }
 
-int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
+static void kvm_coalesce_mmio_region(MemoryListener *listener,
+                                     MemoryRegionSection *secion,
+                                     target_phys_addr_t start, target_phys_addr_t size)
 {
-    int ret = -ENOSYS;
     KVMState *s = kvm_state;
 
     if (s->coalesced_mmio) {
@@ -466,15 +467,14 @@
         zone.size = size;
         zone.pad = 0;
 
-        ret = kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
+        (void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
     }
-
-    return ret;
 }
 
-int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
+static void kvm_uncoalesce_mmio_region(MemoryListener *listener,
+                                       MemoryRegionSection *secion,
+                                       target_phys_addr_t start, target_phys_addr_t size)
 {
-    int ret = -ENOSYS;
     KVMState *s = kvm_state;
 
     if (s->coalesced_mmio) {
@@ -484,10 +484,8 @@
         zone.size = size;
         zone.pad = 0;
 
-        ret = kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
+        (void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
     }
-
-    return ret;
 }
 
 int kvm_check_extension(KVMState *s, unsigned int extension)
@@ -703,14 +701,6 @@
     }
 }
 
-static void kvm_begin(MemoryListener *listener)
-{
-}
-
-static void kvm_commit(MemoryListener *listener)
-{
-}
-
 static void kvm_region_add(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
@@ -723,11 +713,6 @@
     kvm_set_phys_mem(section, false);
 }
 
-static void kvm_region_nop(MemoryListener *listener,
-                           MemoryRegionSection *section)
-{
-}
-
 static void kvm_log_sync(MemoryListener *listener,
                          MemoryRegionSection *section)
 {
@@ -755,9 +740,12 @@
     assert(r >= 0);
 }
 
-static void kvm_mem_ioeventfd_add(MemoryRegionSection *section,
-                                  bool match_data, uint64_t data, int fd)
+static void kvm_mem_ioeventfd_add(MemoryListener *listener,
+                                  MemoryRegionSection *section,
+                                  bool match_data, uint64_t data,
+                                  EventNotifier *e)
 {
+    int fd = event_notifier_get_fd(e);
     int r;
 
     assert(match_data && section->size <= 8);
@@ -769,9 +757,12 @@
     }
 }
 
-static void kvm_mem_ioeventfd_del(MemoryRegionSection *section,
-                                  bool match_data, uint64_t data, int fd)
+static void kvm_mem_ioeventfd_del(MemoryListener *listener,
+                                  MemoryRegionSection *section,
+                                  bool match_data, uint64_t data,
+                                  EventNotifier *e)
 {
+    int fd = event_notifier_get_fd(e);
     int r;
 
     r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
@@ -781,9 +772,12 @@
     }
 }
 
-static void kvm_io_ioeventfd_add(MemoryRegionSection *section,
-                                 bool match_data, uint64_t data, int fd)
+static void kvm_io_ioeventfd_add(MemoryListener *listener,
+                                 MemoryRegionSection *section,
+                                 bool match_data, uint64_t data,
+                                 EventNotifier *e)
 {
+    int fd = event_notifier_get_fd(e);
     int r;
 
     assert(match_data && section->size == 2);
@@ -795,10 +789,13 @@
     }
 }
 
-static void kvm_io_ioeventfd_del(MemoryRegionSection *section,
-                                 bool match_data, uint64_t data, int fd)
+static void kvm_io_ioeventfd_del(MemoryListener *listener,
+                                 MemoryRegionSection *section,
+                                 bool match_data, uint64_t data,
+                                 EventNotifier *e)
 
 {
+    int fd = event_notifier_get_fd(e);
     int r;
 
     r = kvm_set_ioeventfd_pio_word(fd, section->offset_within_address_space,
@@ -808,47 +805,24 @@
     }
 }
 
-static void kvm_eventfd_add(MemoryListener *listener,
-                            MemoryRegionSection *section,
-                            bool match_data, uint64_t data,
-                            EventNotifier *e)
-{
-    if (section->address_space == get_system_memory()) {
-        kvm_mem_ioeventfd_add(section, match_data, data,
-			      event_notifier_get_fd(e));
-    } else {
-        kvm_io_ioeventfd_add(section, match_data, data,
-			     event_notifier_get_fd(e));
-    }
-}
-
-static void kvm_eventfd_del(MemoryListener *listener,
-                            MemoryRegionSection *section,
-                            bool match_data, uint64_t data,
-                            EventNotifier *e)
-{
-    if (section->address_space == get_system_memory()) {
-        kvm_mem_ioeventfd_del(section, match_data, data,
-			      event_notifier_get_fd(e));
-    } else {
-        kvm_io_ioeventfd_del(section, match_data, data,
-			     event_notifier_get_fd(e));
-    }
-}
-
 static MemoryListener kvm_memory_listener = {
-    .begin = kvm_begin,
-    .commit = kvm_commit,
     .region_add = kvm_region_add,
     .region_del = kvm_region_del,
-    .region_nop = kvm_region_nop,
     .log_start = kvm_log_start,
     .log_stop = kvm_log_stop,
     .log_sync = kvm_log_sync,
     .log_global_start = kvm_log_global_start,
     .log_global_stop = kvm_log_global_stop,
-    .eventfd_add = kvm_eventfd_add,
-    .eventfd_del = kvm_eventfd_del,
+    .eventfd_add = kvm_mem_ioeventfd_add,
+    .eventfd_del = kvm_mem_ioeventfd_del,
+    .coalesced_mmio_add = kvm_coalesce_mmio_region,
+    .coalesced_mmio_del = kvm_uncoalesce_mmio_region,
+    .priority = 10,
+};
+
+static MemoryListener kvm_io_listener = {
+    .eventfd_add = kvm_io_ioeventfd_add,
+    .eventfd_del = kvm_io_ioeventfd_del,
     .priority = 10,
 };
 
@@ -1401,7 +1375,8 @@
     }
 
     kvm_state = s;
-    memory_listener_register(&kvm_memory_listener, NULL);
+    memory_listener_register(&kvm_memory_listener, &address_space_memory);
+    memory_listener_register(&kvm_io_listener, &address_space_io);
 
     s->many_ioeventfds = kvm_check_many_ioeventfds();
 
diff --git a/kvm-stub.c b/kvm-stub.c
index 3c52eb5..a3455e2 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -29,16 +29,6 @@
     return -ENOSYS;
 }
 
-int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
-{
-    return -ENOSYS;
-}
-
-int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
-{
-    return -ENOSYS;
-}
-
 int kvm_init(void)
 {
     return -ENOSYS;
diff --git a/kvm.h b/kvm.h
index dea2998..eefcb49 100644
--- a/kvm.h
+++ b/kvm.h
@@ -129,8 +129,6 @@
 void *kvm_arch_vmalloc(ram_addr_t size);
 void kvm_setup_guest_memory(void *start, size_t size);
 
-int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
-int kvm_uncoalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
 void kvm_flush_coalesced_mmio_buffer(void);
 #endif
 
diff --git a/linux-user/alpha/target_signal.h b/linux-user/alpha/target_signal.h
index 94f15f6..d3822da 100644
--- a/linux-user/alpha/target_signal.h
+++ b/linux-user/alpha/target_signal.h
@@ -6,9 +6,10 @@
 /* this struct defines a stack used during syscall handling */
 
 typedef struct target_sigaltstack {
-	abi_ulong ss_sp;
-	abi_long ss_flags;
-	abi_ulong ss_size;
+    abi_ulong ss_sp;
+    int32_t ss_flags;
+    int32_t dummy;
+    abi_ulong ss_size;
 } target_stack_t;
 
 
diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c
index b47025f..381ab89 100644
--- a/linux-user/linuxload.c
+++ b/linux-user/linuxload.c
@@ -140,8 +140,9 @@
     bprm->p = TARGET_PAGE_SIZE*MAX_ARG_PAGES-sizeof(unsigned int);
     memset(bprm->page, 0, sizeof(bprm->page));
     retval = open(filename, O_RDONLY);
-    if (retval < 0)
-        return retval;
+    if (retval < 0) {
+        return -errno;
+    }
     bprm->fd = retval;
     bprm->filename = (char *)filename;
     bprm->argc = count(argv);
@@ -165,8 +166,7 @@
             retval = load_flt_binary(bprm,regs,infop);
 #endif
         } else {
-            fprintf(stderr, "Unknown binary format\n");
-            return -1;
+            return -ENOEXEC;
         }
     }
 
diff --git a/linux-user/main.c b/linux-user/main.c
index f4bbe69..5d20abd 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -2527,6 +2527,7 @@
         case EXCP_BREAK:
             /* Return address is 4 bytes after the call.  */
             env->regs[14] += 4;
+            env->sregs[SR_PC] = env->regs[14];
             ret = do_syscall(env, 
                              env->regs[12], 
                              env->regs[5], 
@@ -2537,7 +2538,6 @@
                              env->regs[10],
                              0, 0);
             env->regs[3] = ret;
-            env->sregs[SR_PC] = env->regs[14];
             break;
         case EXCP_HW_EXCP:
             env->regs[17] = env->sregs[SR_PC] + 4;
@@ -3574,7 +3574,7 @@
     ret = loader_exec(filename, target_argv, target_environ, regs,
         info, &bprm);
     if (ret != 0) {
-        printf("Error %d while loading %s\n", ret, filename);
+        printf("Error while loading %s: %s\n", filename, strerror(-ret));
         _exit(1);
     }
 
diff --git a/linux-user/qemu.h b/linux-user/qemu.h
index fc4cc00..5e53dca 100644
--- a/linux-user/qemu.h
+++ b/linux-user/qemu.h
@@ -219,6 +219,9 @@
 
 #include "qemu-log.h"
 
+/* syscall.c */
+int host_to_target_waitstatus(int status);
+
 /* strace.c */
 void print_syscall(int num,
                    abi_long arg1, abi_long arg2, abi_long arg3,
diff --git a/linux-user/signal.c b/linux-user/signal.c
index 15bc4e8..95e2ffa 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -202,46 +202,67 @@
 static inline void host_to_target_siginfo_noswap(target_siginfo_t *tinfo,
                                                  const siginfo_t *info)
 {
-    int sig;
-    sig = host_to_target_signal(info->si_signo);
+    int sig = host_to_target_signal(info->si_signo);
     tinfo->si_signo = sig;
     tinfo->si_errno = 0;
     tinfo->si_code = info->si_code;
-    if (sig == SIGILL || sig == SIGFPE || sig == SIGSEGV ||
-        sig == SIGBUS || sig == SIGTRAP) {
-        /* should never come here, but who knows. The information for
-           the target is irrelevant */
+
+    if (sig == TARGET_SIGILL || sig == TARGET_SIGFPE || sig == TARGET_SIGSEGV
+        || sig == TARGET_SIGBUS || sig == TARGET_SIGTRAP) {
+        /* Should never come here, but who knows. The information for
+           the target is irrelevant.  */
         tinfo->_sifields._sigfault._addr = 0;
-    } else if (sig == SIGIO) {
+    } else if (sig == TARGET_SIGIO) {
+        tinfo->_sifields._sigpoll._band = info->si_band;
 	tinfo->_sifields._sigpoll._fd = info->si_fd;
+    } else if (sig == TARGET_SIGCHLD) {
+        tinfo->_sifields._sigchld._pid = info->si_pid;
+        tinfo->_sifields._sigchld._uid = info->si_uid;
+        tinfo->_sifields._sigchld._status
+            = host_to_target_waitstatus(info->si_status);
+        tinfo->_sifields._sigchld._utime = info->si_utime;
+        tinfo->_sifields._sigchld._stime = info->si_stime;
     } else if (sig >= TARGET_SIGRTMIN) {
         tinfo->_sifields._rt._pid = info->si_pid;
         tinfo->_sifields._rt._uid = info->si_uid;
         /* XXX: potential problem if 64 bit */
-        tinfo->_sifields._rt._sigval.sival_ptr =
-            (abi_ulong)(unsigned long)info->si_value.sival_ptr;
+        tinfo->_sifields._rt._sigval.sival_ptr
+            = (abi_ulong)(unsigned long)info->si_value.sival_ptr;
     }
 }
 
 static void tswap_siginfo(target_siginfo_t *tinfo,
                           const target_siginfo_t *info)
 {
-    int sig;
-    sig = info->si_signo;
+    int sig = info->si_signo;
     tinfo->si_signo = tswap32(sig);
     tinfo->si_errno = tswap32(info->si_errno);
     tinfo->si_code = tswap32(info->si_code);
-    if (sig == SIGILL || sig == SIGFPE || sig == SIGSEGV ||
-        sig == SIGBUS || sig == SIGTRAP) {
-        tinfo->_sifields._sigfault._addr =
-            tswapal(info->_sifields._sigfault._addr);
-    } else if (sig == SIGIO) {
-	tinfo->_sifields._sigpoll._fd = tswap32(info->_sifields._sigpoll._fd);
+
+    if (sig == TARGET_SIGILL || sig == TARGET_SIGFPE || sig == TARGET_SIGSEGV
+        || sig == TARGET_SIGBUS || sig == TARGET_SIGTRAP) {
+        tinfo->_sifields._sigfault._addr
+            = tswapal(info->_sifields._sigfault._addr);
+    } else if (sig == TARGET_SIGIO) {
+        tinfo->_sifields._sigpoll._band
+            = tswap32(info->_sifields._sigpoll._band);
+        tinfo->_sifields._sigpoll._fd = tswap32(info->_sifields._sigpoll._fd);
+    } else if (sig == TARGET_SIGCHLD) {
+        tinfo->_sifields._sigchld._pid
+            = tswap32(info->_sifields._sigchld._pid);
+        tinfo->_sifields._sigchld._uid
+            = tswap32(info->_sifields._sigchld._uid);
+        tinfo->_sifields._sigchld._status
+            = tswap32(info->_sifields._sigchld._status);
+        tinfo->_sifields._sigchld._utime
+            = tswapal(info->_sifields._sigchld._utime);
+        tinfo->_sifields._sigchld._stime
+            = tswapal(info->_sifields._sigchld._stime);
     } else if (sig >= TARGET_SIGRTMIN) {
         tinfo->_sifields._rt._pid = tswap32(info->_sifields._rt._pid);
         tinfo->_sifields._rt._uid = tswap32(info->_sifields._rt._uid);
-        tinfo->_sifields._rt._sigval.sival_ptr =
-            tswapal(info->_sifields._rt._sigval.sival_ptr);
+        tinfo->_sifields._rt._sigval.sival_ptr
+            = tswapal(info->_sifields._rt._sigval.sival_ptr);
     }
 }
 
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 471d060..e4291ed 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -587,12 +587,17 @@
 extern int setgroups(int, gid_t *);
 
 /* ARM EABI and MIPS expect 64bit types aligned even on pairs or registers */
-#ifdef TARGET_ARM 
+#ifdef TARGET_ARM
 static inline int regpairs_aligned(void *cpu_env) {
     return ((((CPUARMState *)cpu_env)->eabi) == 1) ;
 }
 #elif defined(TARGET_MIPS)
 static inline int regpairs_aligned(void *cpu_env) { return 1; }
+#elif defined(TARGET_PPC) && !defined(TARGET_PPC64)
+/* SysV AVI for PPC32 expects 64bit parameters to be passed on odd/even pairs
+ * of registers which translates to the same as ARM/MIPS, because we start with
+ * r3 as arg1 */
+static inline int regpairs_aligned(void *cpu_env) { return 1; }
 #else
 static inline int regpairs_aligned(void *cpu_env) { return 0; }
 #endif
@@ -1744,55 +1749,96 @@
     return ret;
 }
 
-/* FIXME
- * lock_iovec()/unlock_iovec() have a return code of 0 for success where
- * other lock functions have a return code of 0 for failure.
- */
-static abi_long lock_iovec(int type, struct iovec *vec, abi_ulong target_addr,
-                           int count, int copy)
+static struct iovec *lock_iovec(int type, abi_ulong target_addr,
+                                int count, int copy)
 {
     struct target_iovec *target_vec;
-    abi_ulong base;
+    struct iovec *vec;
+    abi_ulong total_len, max_len;
     int i;
 
-    target_vec = lock_user(VERIFY_READ, target_addr, count * sizeof(struct target_iovec), 1);
-    if (!target_vec)
-        return -TARGET_EFAULT;
-    for(i = 0;i < count; i++) {
-        base = tswapal(target_vec[i].iov_base);
-        vec[i].iov_len = tswapal(target_vec[i].iov_len);
-        if (vec[i].iov_len != 0) {
-            vec[i].iov_base = lock_user(type, base, vec[i].iov_len, copy);
-            /* Don't check lock_user return value. We must call writev even
-               if a element has invalid base address. */
-        } else {
-            /* zero length pointer is ignored */
-            vec[i].iov_base = NULL;
-        }
+    if (count == 0) {
+        errno = 0;
+        return NULL;
     }
-    unlock_user (target_vec, target_addr, 0);
-    return 0;
+    if (count > IOV_MAX) {
+        errno = EINVAL;
+        return NULL;
+    }
+
+    vec = calloc(count, sizeof(struct iovec));
+    if (vec == NULL) {
+        errno = ENOMEM;
+        return NULL;
+    }
+
+    target_vec = lock_user(VERIFY_READ, target_addr,
+                           count * sizeof(struct target_iovec), 1);
+    if (target_vec == NULL) {
+        errno = EFAULT;
+        goto fail2;
+    }
+
+    /* ??? If host page size > target page size, this will result in a
+       value larger than what we can actually support.  */
+    max_len = 0x7fffffff & TARGET_PAGE_MASK;
+    total_len = 0;
+
+    for (i = 0; i < count; i++) {
+        abi_ulong base = tswapal(target_vec[i].iov_base);
+        abi_long len = tswapal(target_vec[i].iov_len);
+
+        if (len < 0) {
+            errno = EINVAL;
+            goto fail;
+        } else if (len == 0) {
+            /* Zero length pointer is ignored.  */
+            vec[i].iov_base = 0;
+        } else {
+            vec[i].iov_base = lock_user(type, base, len, copy);
+            if (!vec[i].iov_base) {
+                errno = EFAULT;
+                goto fail;
+            }
+            if (len > max_len - total_len) {
+                len = max_len - total_len;
+            }
+        }
+        vec[i].iov_len = len;
+        total_len += len;
+    }
+
+    unlock_user(target_vec, target_addr, 0);
+    return vec;
+
+ fail:
+    free(vec);
+ fail2:
+    unlock_user(target_vec, target_addr, 0);
+    return NULL;
 }
 
-static abi_long unlock_iovec(struct iovec *vec, abi_ulong target_addr,
-                             int count, int copy)
+static void unlock_iovec(struct iovec *vec, abi_ulong target_addr,
+                         int count, int copy)
 {
     struct target_iovec *target_vec;
-    abi_ulong base;
     int i;
 
-    target_vec = lock_user(VERIFY_READ, target_addr, count * sizeof(struct target_iovec), 1);
-    if (!target_vec)
-        return -TARGET_EFAULT;
-    for(i = 0;i < count; i++) {
-        if (target_vec[i].iov_base) {
-            base = tswapal(target_vec[i].iov_base);
+    target_vec = lock_user(VERIFY_READ, target_addr,
+                           count * sizeof(struct target_iovec), 1);
+    if (target_vec) {
+        for (i = 0; i < count; i++) {
+            abi_ulong base = tswapal(target_vec[i].iov_base);
+            abi_long len = tswapal(target_vec[i].iov_base);
+            if (len < 0) {
+                break;
+            }
             unlock_user(vec[i].iov_base, base, copy ? vec[i].iov_len : 0);
         }
+        unlock_user(target_vec, target_addr, 0);
     }
-    unlock_user (target_vec, target_addr, 0);
 
-    return 0;
+    free(vec);
 }
 
 /* do_socket() Must return target values and target errnos. */
@@ -1888,8 +1934,7 @@
         ret = target_to_host_sockaddr(msg.msg_name, tswapal(msgp->msg_name),
                                 msg.msg_namelen);
         if (ret) {
-            unlock_user_struct(msgp, target_msg, send ? 0 : 1);
-            return ret;
+            goto out2;
         }
     } else {
         msg.msg_name = NULL;
@@ -1900,9 +1945,13 @@
     msg.msg_flags = tswap32(msgp->msg_flags);
 
     count = tswapal(msgp->msg_iovlen);
-    vec = alloca(count * sizeof(struct iovec));
     target_vec = tswapal(msgp->msg_iov);
-    lock_iovec(send ? VERIFY_READ : VERIFY_WRITE, vec, target_vec, count, send);
+    vec = lock_iovec(send ? VERIFY_READ : VERIFY_WRITE,
+                     target_vec, count, send);
+    if (vec == NULL) {
+        ret = -host_to_target_errno(errno);
+        goto out2;
+    }
     msg.msg_iovlen = count;
     msg.msg_iov = vec;
 
@@ -1932,6 +1981,7 @@
 
 out:
     unlock_iovec(vec, target_vec, count, !send);
+out2:
     unlock_user_struct(msgp, target_msg, send ? 0 : 1);
     return ret;
 }
@@ -4873,7 +4923,7 @@
 
 /* Map host to target signal numbers for the wait family of syscalls.
    Assume all other status bits are the same.  */
-static int host_to_target_waitstatus(int status)
+int host_to_target_waitstatus(int status)
 {
     if (WIFSIGNALED(status)) {
         return host_to_target_signal(WTERMSIG(status)) | (status & ~0x7f);
@@ -4962,8 +5012,8 @@
 #if defined(TARGET_ARM) || defined(TARGET_M68K) || defined(TARGET_UNICORE32)
     dprintf(fd, "%08llx-%08llx rw-p %08llx 00:00 0          [stack]\n",
                 (unsigned long long)ts->info->stack_limit,
-                (unsigned long long)(ts->stack_base + (TARGET_PAGE_SIZE - 1))
-                                     & TARGET_PAGE_MASK,
+                (unsigned long long)(ts->info->start_stack +
+                                     (TARGET_PAGE_SIZE - 1)) & TARGET_PAGE_MASK,
                 (unsigned long long)0);
 #endif
 
@@ -6529,6 +6579,8 @@
             __put_user(stfs.f_fsid.__val[0], &target_stfs->f_fsid.val[0]);
             __put_user(stfs.f_fsid.__val[1], &target_stfs->f_fsid.val[1]);
             __put_user(stfs.f_namelen, &target_stfs->f_namelen);
+            __put_user(stfs.f_frsize, &target_stfs->f_frsize);
+            memset(target_stfs->f_spare, 0, sizeof(target_stfs->f_spare));
             unlock_user_struct(target_stfs, arg2, 1);
         }
         break;
@@ -6557,6 +6609,8 @@
             __put_user(stfs.f_fsid.__val[0], &target_stfs->f_fsid.val[0]);
             __put_user(stfs.f_fsid.__val[1], &target_stfs->f_fsid.val[1]);
             __put_user(stfs.f_namelen, &target_stfs->f_namelen);
+            __put_user(stfs.f_frsize, &target_stfs->f_frsize);
+            memset(target_stfs->f_spare, 0, sizeof(target_stfs->f_spare));
             unlock_user_struct(target_stfs, arg3, 1);
         }
         break;
@@ -6888,6 +6942,8 @@
         ret = get_errno(do_fork(cpu_env, arg1, arg2, arg3, arg5, arg4));
 #elif defined(TARGET_CRIS)
         ret = get_errno(do_fork(cpu_env, arg2, arg1, arg3, arg4, arg5));
+#elif defined(TARGET_MICROBLAZE)
+        ret = get_errno(do_fork(cpu_env, arg1, arg2, arg4, arg6, arg5));
 #elif defined(TARGET_S390X)
         ret = get_errno(do_fork(cpu_env, arg2, arg1, arg3, arg5, arg4));
 #else
@@ -7184,26 +7240,24 @@
         break;
     case TARGET_NR_readv:
         {
-            int count = arg3;
-            struct iovec *vec;
-
-            vec = alloca(count * sizeof(struct iovec));
-            if (lock_iovec(VERIFY_WRITE, vec, arg2, count, 0) < 0)
-                goto efault;
-            ret = get_errno(readv(arg1, vec, count));
-            unlock_iovec(vec, arg2, count, 1);
+            struct iovec *vec = lock_iovec(VERIFY_WRITE, arg2, arg3, 0);
+            if (vec != NULL) {
+                ret = get_errno(readv(arg1, vec, arg3));
+                unlock_iovec(vec, arg2, arg3, 1);
+            } else {
+                ret = -host_to_target_errno(errno);
+            }
         }
         break;
     case TARGET_NR_writev:
         {
-            int count = arg3;
-            struct iovec *vec;
-
-            vec = alloca(count * sizeof(struct iovec));
-            if (lock_iovec(VERIFY_READ, vec, arg2, count, 1) < 0)
-                goto efault;
-            ret = get_errno(writev(arg1, vec, count));
-            unlock_iovec(vec, arg2, count, 0);
+            struct iovec *vec = lock_iovec(VERIFY_READ, arg2, arg3, 1);
+            if (vec != NULL) {
+                ret = get_errno(writev(arg1, vec, arg3));
+                unlock_iovec(vec, arg2, arg3, 0);
+            } else {
+                ret = -host_to_target_errno(errno);
+            }
         }
         break;
     case TARGET_NR_getsid:
@@ -7415,12 +7469,20 @@
 #endif
 #ifdef TARGET_NR_pread64
     case TARGET_NR_pread64:
+        if (regpairs_aligned(cpu_env)) {
+            arg4 = arg5;
+            arg5 = arg6;
+        }
         if (!(p = lock_user(VERIFY_WRITE, arg2, arg3, 0)))
             goto efault;
         ret = get_errno(pread64(arg1, p, arg3, target_offset64(arg4, arg5)));
         unlock_user(p, arg2, ret);
         break;
     case TARGET_NR_pwrite64:
+        if (regpairs_aligned(cpu_env)) {
+            arg4 = arg5;
+            arg5 = arg6;
+        }
         if (!(p = lock_user(VERIFY_READ, arg2, arg3, 1)))
             goto efault;
         ret = get_errno(pwrite64(arg1, p, arg3, target_offset64(arg4, arg5)));
@@ -8628,14 +8690,13 @@
 #ifdef TARGET_NR_vmsplice
 	case TARGET_NR_vmsplice:
         {
-            int count = arg3;
-            struct iovec *vec;
-
-            vec = alloca(count * sizeof(struct iovec));
-            if (lock_iovec(VERIFY_READ, vec, arg2, count, 1) < 0)
-                goto efault;
-            ret = get_errno(vmsplice(arg1, vec, count, arg4));
-            unlock_iovec(vec, arg2, count, 0);
+            struct iovec *vec = lock_iovec(VERIFY_READ, arg2, arg3, 1);
+            if (vec != NULL) {
+                ret = get_errno(vmsplice(arg1, vec, arg3, arg4));
+                unlock_iovec(vec, arg2, arg3, 0);
+            } else {
+                ret = -host_to_target_errno(errno);
+            }
         }
         break;
 #endif
@@ -8822,6 +8883,19 @@
         break;
     }
 #endif
+#ifdef TARGET_NR_gethostname
+    case TARGET_NR_gethostname:
+    {
+        char *name = lock_user(VERIFY_WRITE, arg1, arg2, 0);
+        if (name) {
+            ret = get_errno(gethostname(name, arg2));
+            unlock_user(name, arg1, arg2);
+        } else {
+            ret = -TARGET_EFAULT;
+        }
+        break;
+    }
+#endif
     default:
     unimplemented:
         gemu_log("qemu: Unsupported syscall: %d\n", num);
diff --git a/exec-obsolete.h b/memory-internal.h
similarity index 84%
rename from exec-obsolete.h
rename to memory-internal.h
index 286e2f7..b33a99d 100644
--- a/exec-obsolete.h
+++ b/memory-internal.h
@@ -16,16 +16,33 @@
  * The functions declared here will be removed soon.
  */
 
-#ifndef EXEC_OBSOLETE_H
-#define EXEC_OBSOLETE_H
-
-#ifndef WANT_EXEC_OBSOLETE
-#error Do not include exec-obsolete.h
-#endif
+#ifndef MEMORY_INTERNAL_H
+#define MEMORY_INTERNAL_H
 
 #ifndef CONFIG_USER_ONLY
 #include "hw/xen.h"
 
+typedef struct PhysPageEntry PhysPageEntry;
+
+struct PhysPageEntry {
+    uint16_t is_leaf : 1;
+     /* index into phys_sections (is_leaf) or phys_map_nodes (!is_leaf) */
+    uint16_t ptr : 15;
+};
+
+typedef struct AddressSpaceDispatch AddressSpaceDispatch;
+
+struct AddressSpaceDispatch {
+    /* This is a multi-level map on the physical address space.
+     * The bottom level has pointers to MemoryRegionSections.
+     */
+    PhysPageEntry phys_map;
+    MemoryListener listener;
+};
+
+void address_space_init_dispatch(AddressSpace *as);
+void address_space_destroy_dispatch(AddressSpace *as);
+
 ram_addr_t qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
                                    MemoryRegion *mr);
 ram_addr_t qemu_ram_alloc(ram_addr_t size, MemoryRegion *mr);
@@ -34,8 +51,6 @@
 
 struct MemoryRegion;
 struct MemoryRegionSection;
-void cpu_register_physical_memory_log(struct MemoryRegionSection *section,
-                                      bool readonly);
 
 void qemu_register_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
 void qemu_unregister_coalesced_mmio(target_phys_addr_t addr, ram_addr_t size);
@@ -75,11 +90,6 @@
 static inline int cpu_physical_memory_set_dirty_flags(ram_addr_t addr,
                                                       int dirty_flags)
 {
-    if ((dirty_flags & MIGRATION_DIRTY_FLAG) &&
-        !cpu_physical_memory_get_dirty(addr, TARGET_PAGE_SIZE,
-                                       MIGRATION_DIRTY_FLAG)) {
-        ram_list.dirty_pages++;
-    }
     return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] |= dirty_flags;
 }
 
@@ -93,11 +103,6 @@
 {
     int mask = ~dirty_flags;
 
-    if ((dirty_flags & MIGRATION_DIRTY_FLAG) &&
-        cpu_physical_memory_get_dirty(addr, TARGET_PAGE_SIZE,
-                                      MIGRATION_DIRTY_FLAG)) {
-        ram_list.dirty_pages--;
-    }
     return ram_list.phys_dirty[addr >> TARGET_PAGE_BITS] &= mask;
 }
 
diff --git a/memory.c b/memory.c
index 94049a7..714ec43 100644
--- a/memory.c
+++ b/memory.c
@@ -20,8 +20,7 @@
 #include "kvm.h"
 #include <assert.h>
 
-#define WANT_EXEC_OBSOLETE
-#include "exec-obsolete.h"
+#include "memory-internal.h"
 
 unsigned memory_region_transaction_depth = 0;
 static bool global_dirty_log = false;
@@ -29,6 +28,9 @@
 static QTAILQ_HEAD(memory_listeners, MemoryListener) memory_listeners
     = QTAILQ_HEAD_INITIALIZER(memory_listeners);
 
+static QTAILQ_HEAD(, AddressSpace) address_spaces
+    = QTAILQ_HEAD_INITIALIZER(address_spaces);
+
 typedef struct AddrRange AddrRange;
 
 /*
@@ -97,13 +99,17 @@
         switch (_direction) {                                           \
         case Forward:                                                   \
             QTAILQ_FOREACH(_listener, &memory_listeners, link) {        \
-                _listener->_callback(_listener, ##_args);               \
+                if (_listener->_callback) {                             \
+                    _listener->_callback(_listener, ##_args);           \
+                }                                                       \
             }                                                           \
             break;                                                      \
         case Reverse:                                                   \
             QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners,        \
                                    memory_listeners, link) {            \
-                _listener->_callback(_listener, ##_args);               \
+                if (_listener->_callback) {                             \
+                    _listener->_callback(_listener, ##_args);           \
+                }                                                       \
             }                                                           \
             break;                                                      \
         default:                                                        \
@@ -118,7 +124,8 @@
         switch (_direction) {                                           \
         case Forward:                                                   \
             QTAILQ_FOREACH(_listener, &memory_listeners, link) {        \
-                if (memory_listener_match(_listener, _section)) {       \
+                if (_listener->_callback                                \
+                    && memory_listener_match(_listener, _section)) {    \
                     _listener->_callback(_listener, _section, ##_args); \
                 }                                                       \
             }                                                           \
@@ -126,7 +133,8 @@
         case Reverse:                                                   \
             QTAILQ_FOREACH_REVERSE(_listener, &memory_listeners,        \
                                    memory_listeners, link) {            \
-                if (memory_listener_match(_listener, _section)) {       \
+                if (_listener->_callback                                \
+                    && memory_listener_match(_listener, _section)) {    \
                     _listener->_callback(_listener, _section, ##_args); \
                 }                                                       \
             }                                                           \
@@ -139,7 +147,7 @@
 #define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback)            \
     MEMORY_LISTENER_CALL(callback, dir, (&(MemoryRegionSection) {       \
         .mr = (fr)->mr,                                                 \
-        .address_space = (as)->root,                                    \
+        .address_space = (as),                                          \
         .offset_within_region = (fr)->offset_in_region,                 \
         .size = int128_get64((fr)->addr.size),                          \
         .offset_within_address_space = int128_get64((fr)->addr.start),  \
@@ -217,17 +225,8 @@
     unsigned nr_allocated;
 };
 
-typedef struct AddressSpace AddressSpace;
 typedef struct AddressSpaceOps AddressSpaceOps;
 
-/* A system address space - I/O, memory, etc. */
-struct AddressSpace {
-    MemoryRegion *root;
-    FlatView current_map;
-    int ioeventfd_nb;
-    MemoryRegionIoeventfd *ioeventfds;
-};
-
 #define FOR_EACH_FLAT_RANGE(var, view)          \
     for (var = (view)->ranges; var < (view)->ranges + (view)->nr; ++var)
 
@@ -365,8 +364,6 @@
     }
 }
 
-static AddressSpace address_space_memory;
-
 static const MemoryRegionPortio *find_portio(MemoryRegion *mr, uint64_t offset,
                                              unsigned width, bool write)
 {
@@ -455,18 +452,17 @@
     .destructor = memory_region_iorange_destructor,
 };
 
-static AddressSpace address_space_io;
-
 static AddressSpace *memory_region_to_address_space(MemoryRegion *mr)
 {
+    AddressSpace *as;
+
     while (mr->parent) {
         mr = mr->parent;
     }
-    if (mr == address_space_memory.root) {
-        return &address_space_memory;
-    }
-    if (mr == address_space_io.root) {
-        return &address_space_io;
+    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+        if (mr == as->root) {
+            return as;
+        }
     }
     abort();
 }
@@ -568,8 +564,10 @@
 
     flatview_init(&view);
 
-    render_memory_region(&view, mr, int128_zero(),
-                         addrrange_make(int128_zero(), int128_2_64()), false);
+    if (mr) {
+        render_memory_region(&view, mr, int128_zero(),
+                             addrrange_make(int128_zero(), int128_2_64()), false);
+    }
     flatview_simplify(&view);
 
     return view;
@@ -597,7 +595,7 @@
                                                   fds_new[inew]))) {
             fd = &fds_old[iold];
             section = (MemoryRegionSection) {
-                .address_space = as->root,
+                .address_space = as,
                 .offset_within_address_space = int128_get64(fd->addr.start),
                 .size = int128_get64(fd->addr.size),
             };
@@ -610,7 +608,7 @@
                                                          fds_old[iold]))) {
             fd = &fds_new[inew];
             section = (MemoryRegionSection) {
-                .address_space = as->root,
+                .address_space = as,
                 .offset_within_address_space = int128_get64(fd->addr.start),
                 .size = int128_get64(fd->addr.size),
             };
@@ -632,7 +630,7 @@
     AddrRange tmp;
     unsigned i;
 
-    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
+    FOR_EACH_FLAT_RANGE(fr, as->current_map) {
         for (i = 0; i < fr->mr->ioeventfd_nb; ++i) {
             tmp = addrrange_shift(fr->mr->ioeventfds[i].addr,
                                   int128_sub(fr->addr.start,
@@ -720,13 +718,13 @@
 
 static void address_space_update_topology(AddressSpace *as)
 {
-    FlatView old_view = as->current_map;
+    FlatView old_view = *as->current_map;
     FlatView new_view = generate_memory_topology(as->root);
 
     address_space_update_topology_pass(as, old_view, new_view, false);
     address_space_update_topology_pass(as, old_view, new_view, true);
 
-    as->current_map = new_view;
+    *as->current_map = new_view;
     flatview_destroy(&old_view);
     address_space_update_ioeventfds(as);
 }
@@ -739,16 +737,15 @@
 
 void memory_region_transaction_commit(void)
 {
+    AddressSpace *as;
+
     assert(memory_region_transaction_depth);
     --memory_region_transaction_depth;
     if (!memory_region_transaction_depth) {
         MEMORY_LISTENER_CALL_GLOBAL(begin, Forward);
 
-        if (address_space_memory.root) {
-            address_space_update_topology(&address_space_memory);
-        }
-        if (address_space_io.root) {
-            address_space_update_topology(&address_space_io);
+        QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+            address_space_update_topology(as);
         }
 
         MEMORY_LISTENER_CALL_GLOBAL(commit, Forward);
@@ -1083,12 +1080,14 @@
 
 void memory_region_sync_dirty_bitmap(MemoryRegion *mr)
 {
+    AddressSpace *as;
     FlatRange *fr;
 
-    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
-        if (fr->mr == mr) {
-            MEMORY_LISTENER_UPDATE_REGION(fr, &address_space_memory,
-                                          Forward, log_sync);
+    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+        FOR_EACH_FLAT_RANGE(fr, as->current_map) {
+            if (fr->mr == mr) {
+                MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
+            }
         }
     }
 }
@@ -1131,16 +1130,24 @@
     return qemu_get_ram_ptr(mr->ram_addr & TARGET_PAGE_MASK);
 }
 
-static void memory_region_update_coalesced_range(MemoryRegion *mr)
+static void memory_region_update_coalesced_range_as(MemoryRegion *mr, AddressSpace *as)
 {
     FlatRange *fr;
     CoalescedMemoryRange *cmr;
     AddrRange tmp;
+    MemoryRegionSection section;
 
-    FOR_EACH_FLAT_RANGE(fr, &address_space_memory.current_map) {
+    FOR_EACH_FLAT_RANGE(fr, as->current_map) {
         if (fr->mr == mr) {
-            qemu_unregister_coalesced_mmio(int128_get64(fr->addr.start),
-                                           int128_get64(fr->addr.size));
+            section = (MemoryRegionSection) {
+                .address_space = as,
+                .offset_within_address_space = int128_get64(fr->addr.start),
+                .size = int128_get64(fr->addr.size),
+            };
+
+            MEMORY_LISTENER_CALL(coalesced_mmio_del, Reverse, &section,
+                                 int128_get64(fr->addr.start),
+                                 int128_get64(fr->addr.size));
             QTAILQ_FOREACH(cmr, &mr->coalesced, link) {
                 tmp = addrrange_shift(cmr->addr,
                                       int128_sub(fr->addr.start,
@@ -1149,13 +1156,23 @@
                     continue;
                 }
                 tmp = addrrange_intersection(tmp, fr->addr);
-                qemu_register_coalesced_mmio(int128_get64(tmp.start),
-                                             int128_get64(tmp.size));
+                MEMORY_LISTENER_CALL(coalesced_mmio_add, Forward, &section,
+                                     int128_get64(tmp.start),
+                                     int128_get64(tmp.size));
             }
         }
     }
 }
 
+static void memory_region_update_coalesced_range(MemoryRegion *mr)
+{
+    AddressSpace *as;
+
+    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+        memory_region_update_coalesced_range_as(mr, as);
+    }
+}
+
 void memory_region_set_coalescing(MemoryRegion *mr)
 {
     memory_region_clear_coalescing(mr);
@@ -1403,7 +1420,7 @@
 
 static FlatRange *address_space_lookup(AddressSpace *as, AddrRange addr)
 {
-    return bsearch(&addr, as->current_map.ranges, as->current_map.nr,
+    return bsearch(&addr, as->current_map->ranges, as->current_map->nr,
                    sizeof(FlatRange), cmp_flatrange_addr);
 }
 
@@ -1420,7 +1437,7 @@
         return ret;
     }
 
-    while (fr > as->current_map.ranges
+    while (fr > as->current_map->ranges
            && addrrange_intersects(fr[-1].addr, range)) {
         --fr;
     }
@@ -1441,7 +1458,7 @@
     AddressSpace *as = memory_region_to_address_space(address_space);
     FlatRange *fr;
 
-    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
+    FOR_EACH_FLAT_RANGE(fr, as->current_map) {
         MEMORY_LISTENER_UPDATE_REGION(fr, as, Forward, log_sync);
     }
 }
@@ -1464,29 +1481,35 @@
     FlatRange *fr;
 
     if (listener->address_space_filter
-        && listener->address_space_filter != as->root) {
+        && listener->address_space_filter != as) {
         return;
     }
 
     if (global_dirty_log) {
-        listener->log_global_start(listener);
+        if (listener->log_global_start) {
+            listener->log_global_start(listener);
+        }
     }
-    FOR_EACH_FLAT_RANGE(fr, &as->current_map) {
+
+    FOR_EACH_FLAT_RANGE(fr, as->current_map) {
         MemoryRegionSection section = {
             .mr = fr->mr,
-            .address_space = as->root,
+            .address_space = as,
             .offset_within_region = fr->offset_in_region,
             .size = int128_get64(fr->addr.size),
             .offset_within_address_space = int128_get64(fr->addr.start),
             .readonly = fr->readonly,
         };
-        listener->region_add(listener, &section);
+        if (listener->region_add) {
+            listener->region_add(listener, &section);
+        }
     }
 }
 
-void memory_listener_register(MemoryListener *listener, MemoryRegion *filter)
+void memory_listener_register(MemoryListener *listener, AddressSpace *filter)
 {
     MemoryListener *other = NULL;
+    AddressSpace *as;
 
     listener->address_space_filter = filter;
     if (QTAILQ_EMPTY(&memory_listeners)
@@ -1501,8 +1524,10 @@
         }
         QTAILQ_INSERT_BEFORE(other, listener, link);
     }
-    listener_add_address_space(listener, &address_space_memory);
-    listener_add_address_space(listener, &address_space_io);
+
+    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+        listener_add_address_space(listener, as);
+    }
 }
 
 void memory_listener_unregister(MemoryListener *listener)
@@ -1510,18 +1535,28 @@
     QTAILQ_REMOVE(&memory_listeners, listener, link);
 }
 
-void set_system_memory_map(MemoryRegion *mr)
+void address_space_init(AddressSpace *as, MemoryRegion *root)
 {
     memory_region_transaction_begin();
-    address_space_memory.root = mr;
+    as->root = root;
+    as->current_map = g_new(FlatView, 1);
+    flatview_init(as->current_map);
+    QTAILQ_INSERT_TAIL(&address_spaces, as, address_spaces_link);
+    as->name = NULL;
     memory_region_transaction_commit();
+    address_space_init_dispatch(as);
 }
 
-void set_system_io_map(MemoryRegion *mr)
+void address_space_destroy(AddressSpace *as)
 {
+    /* Flush out anything from MemoryListeners listening in on this */
     memory_region_transaction_begin();
-    address_space_io.root = mr;
+    as->root = NULL;
     memory_region_transaction_commit();
+    QTAILQ_REMOVE(&address_spaces, as, address_spaces_link);
+    address_space_destroy_dispatch(as);
+    flatview_destroy(as->current_map);
+    g_free(as->current_map);
 }
 
 uint64_t io_mem_read(MemoryRegion *mr, target_phys_addr_t addr, unsigned size)
@@ -1641,16 +1676,16 @@
 {
     MemoryRegionListHead ml_head;
     MemoryRegionList *ml, *ml2;
+    AddressSpace *as;
 
     QTAILQ_INIT(&ml_head);
 
-    mon_printf(f, "memory\n");
-    mtree_print_mr(mon_printf, f, address_space_memory.root, 0, 0, &ml_head);
-
-    if (address_space_io.root &&
-        !QTAILQ_EMPTY(&address_space_io.root->subregions)) {
-        mon_printf(f, "I/O\n");
-        mtree_print_mr(mon_printf, f, address_space_io.root, 0, 0, &ml_head);
+    QTAILQ_FOREACH(as, &address_spaces, address_spaces_link) {
+        if (!as->name) {
+            continue;
+        }
+        mon_printf(f, "%s\n", as->name);
+        mtree_print_mr(mon_printf, f, as->root, 0, 0, &ml_head);
     }
 
     mon_printf(f, "aliases\n");
diff --git a/memory.h b/memory.h
index 37ce151..79393f1 100644
--- a/memory.h
+++ b/memory.h
@@ -157,6 +157,22 @@
 
 #define PORTIO_END_OF_LIST() { }
 
+typedef struct AddressSpace AddressSpace;
+
+/**
+ * AddressSpace: describes a mapping of addresses to #MemoryRegion objects
+ */
+struct AddressSpace {
+    /* All fields are private. */
+    const char *name;
+    MemoryRegion *root;
+    struct FlatView *current_map;
+    int ioeventfd_nb;
+    struct MemoryRegionIoeventfd *ioeventfds;
+    struct AddressSpaceDispatch *dispatch;
+    QTAILQ_ENTRY(AddressSpace) address_spaces_link;
+};
+
 typedef struct MemoryRegionSection MemoryRegionSection;
 
 /**
@@ -172,7 +188,7 @@
  */
 struct MemoryRegionSection {
     MemoryRegion *mr;
-    MemoryRegion *address_space;
+    AddressSpace *address_space;
     target_phys_addr_t offset_within_region;
     uint64_t size;
     target_phys_addr_t offset_within_address_space;
@@ -202,9 +218,13 @@
                         bool match_data, uint64_t data, EventNotifier *e);
     void (*eventfd_del)(MemoryListener *listener, MemoryRegionSection *section,
                         bool match_data, uint64_t data, EventNotifier *e);
+    void (*coalesced_mmio_add)(MemoryListener *listener, MemoryRegionSection *section,
+                               target_phys_addr_t addr, target_phys_addr_t len);
+    void (*coalesced_mmio_del)(MemoryListener *listener, MemoryRegionSection *section,
+                               target_phys_addr_t addr, target_phys_addr_t len);
     /* Lower = earlier (during add), later (during del) */
     unsigned priority;
-    MemoryRegion *address_space_filter;
+    AddressSpace *address_space_filter;
     QTAILQ_ENTRY(MemoryListener) link;
 };
 
@@ -755,7 +775,7 @@
  * @listener: an object containing the callbacks to be called
  * @filter: if non-%NULL, only regions in this address space will be observed
  */
-void memory_listener_register(MemoryListener *listener, MemoryRegion *filter);
+void memory_listener_register(MemoryListener *listener, AddressSpace *filter);
 
 /**
  * memory_listener_unregister: undo the effect of memory_listener_register()
@@ -776,6 +796,87 @@
 
 void mtree_info(fprintf_function mon_printf, void *f);
 
+/**
+ * address_space_init: initializes an address space
+ *
+ * @as: an uninitialized #AddressSpace
+ * @root: a #MemoryRegion that routes addesses for the address space
+ */
+void address_space_init(AddressSpace *as, MemoryRegion *root);
+
+
+/**
+ * address_space_destroy: destroy an address space
+ *
+ * Releases all resources associated with an address space.  After an address space
+ * is destroyed, its root memory region (given by address_space_init()) may be destroyed
+ * as well.
+ *
+ * @as: address space to be destroyed
+ */
+void address_space_destroy(AddressSpace *as);
+
+/**
+ * address_space_rw: read from or write to an address space.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ * @is_write: indicates the transfer direction
+ */
+void address_space_rw(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf,
+                      int len, bool is_write);
+
+/**
+ * address_space_write: write to address space.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ */
+void address_space_write(AddressSpace *as, target_phys_addr_t addr,
+                         const uint8_t *buf, int len);
+
+/**
+ * address_space_read: read from an address space.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @buf: buffer with the data transferred
+ */
+void address_space_read(AddressSpace *as, target_phys_addr_t addr, uint8_t *buf, int len);
+
+/* address_space_map: map a physical memory region into a host virtual address
+ *
+ * May map a subset of the requested range, given by and returned in @plen.
+ * May return %NULL if resources needed to perform the mapping are exhausted.
+ * Use only for reads OR writes - not for read-modify-write operations.
+ * Use cpu_register_map_client() to know when retrying the map operation is
+ * likely to succeed.
+ *
+ * @as: #AddressSpace to be accessed
+ * @addr: address within that address space
+ * @plen: pointer to length of buffer; updated on return
+ * @is_write: indicates the transfer direction
+ */
+void *address_space_map(AddressSpace *as, target_phys_addr_t addr,
+                        target_phys_addr_t *plen, bool is_write);
+
+/* address_space_unmap: Unmaps a memory region previously mapped by address_space_map()
+ *
+ * Will also mark the memory as dirty if @is_write == %true.  @access_len gives
+ * the amount of memory that was actually read or written by the caller.
+ *
+ * @as: #AddressSpace used
+ * @addr: address within that address space
+ * @len: buffer length as returned by address_space_map()
+ * @access_len: amount of data actually transferred
+ * @is_write: indicates the transfer direction
+ */
+void address_space_unmap(AddressSpace *as, void *buffer, target_phys_addr_t len,
+                         int is_write, target_phys_addr_t access_len);
+
+
 #endif
 
 #endif
diff --git a/migration.c b/migration.c
index 22a05c4..62e0304 100644
--- a/migration.c
+++ b/migration.c
@@ -53,7 +53,7 @@
    migrations at once.  For now we don't need to add
    dynamic creation of migration */
 
-static MigrationState *migrate_get_current(void)
+MigrationState *migrate_get_current(void)
 {
     static MigrationState current_migration = {
         .state = MIG_STATE_SETUP,
@@ -169,6 +169,8 @@
         info->has_total_time = true;
         info->total_time = qemu_get_clock_ms(rt_clock)
             - s->total_time;
+        info->has_expected_downtime = true;
+        info->expected_downtime = s->expected_downtime;
 
         info->has_ram = true;
         info->ram = g_malloc0(sizeof(*info->ram));
@@ -178,6 +180,8 @@
         info->ram->duplicate = dup_mig_pages_transferred();
         info->ram->normal = norm_mig_pages_transferred();
         info->ram->normal_bytes = norm_mig_bytes_transferred();
+        info->ram->dirty_pages_rate = s->dirty_pages_rate;
+
 
         if (blk_mig_active()) {
             info->has_disk = true;
@@ -195,6 +199,8 @@
         info->has_status = true;
         info->status = g_strdup("completed");
         info->total_time = s->total_time;
+        info->has_downtime = true;
+        info->downtime = s->downtime;
 
         info->has_ram = true;
         info->ram = g_malloc0(sizeof(*info->ram));
@@ -281,18 +287,18 @@
 static void migrate_fd_put_notify(void *opaque)
 {
     MigrationState *s = opaque;
+    int ret;
 
     qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
-    qemu_file_put_notify(s->file);
-    if (s->file && qemu_file_get_error(s->file)) {
+    ret = qemu_file_put_notify(s->file);
+    if (ret) {
         migrate_fd_error(s);
     }
 }
 
-static ssize_t migrate_fd_put_buffer(void *opaque, const void *data,
-                                     size_t size)
+ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data,
+                              size_t size)
 {
-    MigrationState *s = opaque;
     ssize_t ret;
 
     if (s->state != MIG_STATE_ACTIVE) {
@@ -313,9 +319,8 @@
     return ret;
 }
 
-static void migrate_fd_put_ready(void *opaque)
+void migrate_fd_put_ready(MigrationState *s)
 {
-    MigrationState *s = opaque;
     int ret;
 
     if (s->state != MIG_STATE_ACTIVE) {
@@ -329,8 +334,10 @@
         migrate_fd_error(s);
     } else if (ret == 1) {
         int old_vm_running = runstate_is_running();
+        int64_t start_time, end_time;
 
         DPRINTF("done iterating\n");
+        start_time = qemu_get_clock_ms(rt_clock);
         qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER);
         vm_stop_force_state(RUN_STATE_FINISH_MIGRATE);
 
@@ -339,7 +346,9 @@
         } else {
             migrate_fd_completed(s);
         }
-        s->total_time = qemu_get_clock_ms(rt_clock) - s->total_time;
+        end_time = qemu_get_clock_ms(rt_clock);
+        s->total_time = end_time - s->total_time;
+        s->downtime = end_time - start_time;
         if (s->state != MIG_STATE_COMPLETED) {
             if (old_vm_running) {
                 vm_start();
@@ -362,14 +371,13 @@
     migrate_fd_cleanup(s);
 }
 
-static void migrate_fd_wait_for_unfreeze(void *opaque)
+int migrate_fd_wait_for_unfreeze(MigrationState *s)
 {
-    MigrationState *s = opaque;
     int ret;
 
     DPRINTF("wait for unfreeze\n");
     if (s->state != MIG_STATE_ACTIVE)
-        return;
+        return -EINVAL;
 
     do {
         fd_set wfds;
@@ -381,14 +389,13 @@
     } while (ret == -1 && (s->get_error(s)) == EINTR);
 
     if (ret == -1) {
-        qemu_file_set_error(s->file, -s->get_error(s));
+        return -s->get_error(s);
     }
+    return 0;
 }
 
-static int migrate_fd_close(void *opaque)
+int migrate_fd_close(MigrationState *s)
 {
-    MigrationState *s = opaque;
-
     qemu_set_fd_handler2(s->fd, NULL, NULL, NULL, NULL);
     return s->close(s);
 }
@@ -424,12 +431,7 @@
     int ret;
 
     s->state = MIG_STATE_ACTIVE;
-    s->file = qemu_fopen_ops_buffered(s,
-                                      s->bandwidth_limit,
-                                      migrate_fd_put_buffer,
-                                      migrate_fd_put_ready,
-                                      migrate_fd_wait_for_unfreeze,
-                                      migrate_fd_close);
+    s->file = qemu_fopen_ops_buffered(s);
 
     DPRINTF("beginning savevm\n");
     ret = qemu_savevm_state_begin(s->file, &s->params);
diff --git a/migration.h b/migration.h
index a9852fc..1c3e9b7 100644
--- a/migration.h
+++ b/migration.h
@@ -40,6 +40,9 @@
     void *opaque;
     MigrationParams params;
     int64_t total_time;
+    int64_t downtime;
+    int64_t expected_downtime;
+    int64_t dirty_pages_rate;
     bool enabled_capabilities[MIGRATION_CAPABILITY_MAX];
     int64_t xbzrle_cache_size;
 };
@@ -75,11 +78,18 @@
 
 void migrate_fd_connect(MigrationState *s);
 
+ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data,
+                              size_t size);
+void migrate_fd_put_ready(MigrationState *s);
+int migrate_fd_wait_for_unfreeze(MigrationState *s);
+int migrate_fd_close(MigrationState *s);
+
 void add_migration_state_change_notifier(Notifier *notify);
 void remove_migration_state_change_notifier(Notifier *notify);
 bool migration_is_active(MigrationState *);
 bool migration_has_finished(MigrationState *);
 bool migration_has_failed(MigrationState *);
+MigrationState *migrate_get_current(void);
 
 uint64_t ram_bytes_remaining(void);
 uint64_t ram_bytes_transferred(void);
diff --git a/net/tap-win32.c b/net/tap-win32.c
index f1801e2..22dad3f 100644
--- a/net/tap-win32.c
+++ b/net/tap-win32.c
@@ -29,6 +29,7 @@
 #include "tap.h"
 
 #include "qemu-common.h"
+#include "clients.h"            /* net_init_tap */
 #include "net.h"
 #include "sysemu.h"
 #include "qemu-error.h"
diff --git a/osdep.h b/osdep.h
index cb213e0..c5fd3d9 100644
--- a/osdep.h
+++ b/osdep.h
@@ -108,6 +108,11 @@
 #else
 #define QEMU_MADV_DONTDUMP QEMU_MADV_INVALID
 #endif
+#ifdef MADV_HUGEPAGE
+#define QEMU_MADV_HUGEPAGE MADV_HUGEPAGE
+#else
+#define QEMU_MADV_HUGEPAGE QEMU_MADV_INVALID
+#endif
 
 #elif defined(CONFIG_POSIX_MADVISE)
 
diff --git a/qapi-schema.json b/qapi-schema.json
index f9dbdae..c615ee2 100644
--- a/qapi-schema.json
+++ b/qapi-schema.json
@@ -383,13 +383,17 @@
 #
 # @normal : number of normal pages (since 1.2)
 #
-# @normal-bytes : number of normal bytes sent (since 1.2)
+# @normal-bytes: number of normal bytes sent (since 1.2)
+#
+# @dirty-pages-rate: number of pages dirtied by second by the
+#        guest (since 1.3)
 #
 # Since: 0.14.0
 ##
 { 'type': 'MigrationStats',
   'data': {'transferred': 'int', 'remaining': 'int', 'total': 'int' ,
-           'duplicate': 'int', 'normal': 'int', 'normal-bytes': 'int' } }
+           'duplicate': 'int', 'normal': 'int', 'normal-bytes': 'int',
+           'dirty-pages-rate' : 'int' } }
 
 ##
 # @XBZRLECacheStats
@@ -438,13 +442,23 @@
 #        If migration has ended, it returns the total migration
 #        time. (since 1.2)
 #
+# @downtime: #optional only present when migration finishes correctly
+#        total downtime in milliseconds for the guest.
+#        (since 1.3)
+#
+# @expected-downtime: #optional only present while migration is active
+#        expected downtime in milliseconds for the guest in last walk
+#        of the dirty bitmap. (since 1.3)
+#
 # Since: 0.14.0
 ##
 { 'type': 'MigrationInfo',
   'data': {'*status': 'str', '*ram': 'MigrationStats',
            '*disk': 'MigrationStats',
            '*xbzrle-cache': 'XBZRLECacheStats',
-           '*total-time': 'int'} }
+           '*total-time': 'int',
+           '*expected-downtime': 'int',
+           '*downtime': 'int'} }
 
 ##
 # @query-migrate
diff --git a/qemu-file.h b/qemu-file.h
index 31b83f6..9c8985b 100644
--- a/qemu-file.h
+++ b/qemu-file.h
@@ -71,7 +71,6 @@
 QEMUFile *qemu_popen(FILE *popen_file, const char *mode);
 QEMUFile *qemu_popen_cmd(const char *command, const char *mode);
 int qemu_stdio_fd(QEMUFile *f);
-void qemu_fflush(QEMUFile *f);
 int qemu_fclose(QEMUFile *f);
 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size);
 void qemu_put_byte(QEMUFile *f, int v);
@@ -104,12 +103,11 @@
 int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate);
 int64_t qemu_file_get_rate_limit(QEMUFile *f);
 int qemu_file_get_error(QEMUFile *f);
-void qemu_file_set_error(QEMUFile *f, int error);
 
 /* Try to send any outstanding data.  This function is useful when output is
  * halted due to rate limiting or EAGAIN errors occur as it can be used to
  * resume output. */
-void qemu_file_put_notify(QEMUFile *f);
+int qemu_file_put_notify(QEMUFile *f);
 
 static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv)
 {
@@ -231,8 +229,4 @@
 {
     qemu_get_be64s(f, (uint64_t *)pv);
 }
-
-int64_t qemu_ftell(QEMUFile *f);
-int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence);
-
 #endif
diff --git a/qemu-log.c b/qemu-log.c
index 396aafd..a4c3d1f 100644
--- a/qemu-log.c
+++ b/qemu-log.c
@@ -116,6 +116,9 @@
       "show all i/o ports accesses" },
     { LOG_UNIMP, "unimp",
       "log unimplemented functionality" },
+    { LOG_GUEST_ERROR, "guest_errors",
+      "log when the guest OS does something invalid (eg accessing a\n"
+      "non-existent register)" },
     { 0, NULL, NULL },
 };
 
diff --git a/qemu-log.h b/qemu-log.h
index 5ccecf3..ce6bb09 100644
--- a/qemu-log.h
+++ b/qemu-log.h
@@ -35,6 +35,7 @@
 #define CPU_LOG_TB_CPU     (1 << 8)
 #define CPU_LOG_RESET      (1 << 9)
 #define LOG_UNIMP          (1 << 10)
+#define LOG_GUEST_ERROR    (1 << 11)
 
 /* Returns true if a bit is set in the current loglevel mask
  */
diff --git a/qemu-options.hx b/qemu-options.hx
index 7d97f96..46f0539 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -6,10 +6,6 @@
 HXCOMM architectures.
 HXCOMM HXCOMM can be used for comments, discarded from both texi and C
 
-HXCOMM TODO : when we are able to change -help output without breaking
-HXCOMM libvirt we should update the help options which refer to -cpu ?,
-HXCOMM -driver ?, etc to use the preferred -cpu help etc instead.
-
 DEFHEADING(Standard options:)
 STEXI
 @table @option
@@ -33,7 +29,7 @@
 
 DEF("machine", HAS_ARG, QEMU_OPTION_machine, \
     "-machine [type=]name[,prop[=value][,...]]\n"
-    "                selects emulated machine (-machine ? for list)\n"
+    "                selects emulated machine ('-machine help' for list)\n"
     "                property accel=accel1[:accel2[:...]] selects accelerator\n"
     "                supported accelerators are kvm, xen, tcg (default: tcg)\n"
     "                kernel_irqchip=on|off controls accelerated irqchip support\n"
@@ -44,7 +40,7 @@
 STEXI
 @item -machine [type=]@var{name}[,prop=@var{value}[,...]]
 @findex -machine
-Select the emulated machine by @var{name}. Use @code{-machine ?} to list
+Select the emulated machine by @var{name}. Use @code{-machine help} to list
 available machines. Supported machine properties are:
 @table @option
 @item accel=@var{accels1}[:@var{accels2}[:...]]
@@ -69,11 +65,11 @@
 DEF("M", HAS_ARG, QEMU_OPTION_M, "", QEMU_ARCH_ALL)
 
 DEF("cpu", HAS_ARG, QEMU_OPTION_cpu,
-    "-cpu cpu        select CPU (-cpu ? for list)\n", QEMU_ARCH_ALL)
+    "-cpu cpu        select CPU ('-cpu help' for list)\n", QEMU_ARCH_ALL)
 STEXI
 @item -cpu @var{model}
 @findex -cpu
-Select CPU model (-cpu ? for list and additional feature selection)
+Select CPU model (@code{-cpu help} for list and additional feature selection)
 ETEXI
 
 DEF("smp", HAS_ARG, QEMU_OPTION_smp,
@@ -463,12 +459,12 @@
 DEF("soundhw", HAS_ARG, QEMU_OPTION_soundhw,
     "-soundhw c1,... enable audio support\n"
     "                and only specified sound cards (comma separated list)\n"
-    "                use -soundhw ? to get the list of supported cards\n"
-    "                use -soundhw all to enable all of them\n", QEMU_ARCH_ALL)
+    "                use '-soundhw help' to get the list of supported cards\n"
+    "                use '-soundhw all' to enable all of them\n", QEMU_ARCH_ALL)
 STEXI
 @item -soundhw @var{card1}[,@var{card2},...] or -soundhw all
 @findex -soundhw
-Enable audio and selected sound hardware. Use ? to print all
+Enable audio and selected sound hardware. Use 'help' to print all
 available sound hardware.
 
 @example
@@ -477,7 +473,7 @@
 qemu-system-i386 -soundhw ac97 disk.img
 qemu-system-i386 -soundhw hda disk.img
 qemu-system-i386 -soundhw all disk.img
-qemu-system-i386 -soundhw ?
+qemu-system-i386 -soundhw help
 @end example
 
 Note that Linux's i810_audio OSS kernel (for AC97) module might
@@ -566,16 +562,16 @@
     "-device driver[,prop[=value][,...]]\n"
     "                add device (based on driver)\n"
     "                prop=value,... sets driver properties\n"
-    "                use -device ? to print all possible drivers\n"
-    "                use -device driver,? to print all possible properties\n",
+    "                use '-device help' to print all possible drivers\n"
+    "                use '-device driver,help' to print all possible properties\n",
     QEMU_ARCH_ALL)
 STEXI
 @item -device @var{driver}[,@var{prop}[=@var{value}][,...]]
 @findex -device
 Add device @var{driver}.  @var{prop}=@var{value} sets driver
 properties.  Valid properties depend on the driver.  To get help on
-possible drivers and properties, use @code{-device ?} and
-@code{-device @var{driver},?}.
+possible drivers and properties, use @code{-device help} and
+@code{-device @var{driver},help}.
 ETEXI
 
 DEFHEADING()
@@ -1365,7 +1361,7 @@
 @code{virtio}, @code{i82551}, @code{i82557b}, @code{i82559er},
 @code{ne2k_pci}, @code{ne2k_isa}, @code{pcnet}, @code{rtl8139},
 @code{e1000}, @code{smc91c111}, @code{lance} and @code{mcf_fec}.
-Not all devices are supported on all targets.  Use -net nic,model=?
+Not all devices are supported on all targets.  Use @code{-net nic,model=help}
 for a list of available devices for your target.
 
 @item -netdev user,id=@var{id}[,@var{option}][,@var{option}][,...]
@@ -2398,7 +2394,7 @@
 ETEXI
 
 DEF("d", HAS_ARG, QEMU_OPTION_d, \
-    "-d item1,...    output log to /tmp/qemu.log (use -d ? for a list of log items)\n",
+    "-d item1,...    output log to /tmp/qemu.log (use '-d help' for a list of log items)\n",
     QEMU_ARCH_ALL)
 STEXI
 @item -d
@@ -2533,13 +2529,13 @@
 
 DEF("clock", HAS_ARG, QEMU_OPTION_clock, \
     "-clock          force the use of the given methods for timer alarm.\n" \
-    "                To see what timers are available use -clock ?\n",
+    "                To see what timers are available use '-clock help'\n",
     QEMU_ARCH_ALL)
 STEXI
 @item -clock @var{method}
 @findex -clock
 Force the use of the given methods for timer alarm. To see what timers
-are available use -clock ?.
+are available use @code{-clock help}.
 ETEXI
 
 HXCOMM Options deprecated by -rtc
@@ -2608,7 +2604,7 @@
 controller hub) which is a much more featureful PCI-based dual-timer
 watchdog.  Choose a model for which your guest has drivers.
 
-Use @code{-watchdog ?} to list available hardware models.  Only one
+Use @code{-watchdog help} to list available hardware models.  Only one
 watchdog can be enabled for a guest.
 ETEXI
 
diff --git a/qmp-commands.hx b/qmp-commands.hx
index 2f8477e..5ba8c48 100644
--- a/qmp-commands.hx
+++ b/qmp-commands.hx
@@ -2304,6 +2304,11 @@
 - "total-time": total amount of ms since migration started.  If
                 migration has ended, it returns the total migration
 		 time (json-int)
+- "downtime": only present when migration has finished correctly
+              total amount in ms for downtime that happened (json-int)
+- "expected-downtime": only present while migration is active
+                total amount in ms for downtime that was calculated on
+		the last bitmap round (json-int)
 - "ram": only present if "status" is "active", it is a json-object with the
   following RAM information (in bytes):
          - "transferred": amount transferred (json-int)
@@ -2341,6 +2346,7 @@
           "remaining":123,
           "total":246,
           "total-time":12345,
+          "downtime":12345,
           "duplicate":123,
           "normal":123,
           "normal-bytes":123456
@@ -2364,6 +2370,7 @@
             "remaining":123,
             "total":246,
             "total-time":12345,
+            "expected-downtime":12345,
             "duplicate":123,
             "normal":123,
             "normal-bytes":123456
@@ -2382,6 +2389,7 @@
             "remaining":1053304,
             "transferred":3720,
             "total-time":12345,
+            "expected-downtime":12345,
             "duplicate":123,
             "normal":123,
             "normal-bytes":123456
@@ -2406,6 +2414,7 @@
             "remaining":1053304,
             "transferred":3720,
             "total-time":12345,
+            "expected-downtime":12345,
             "duplicate":10,
             "normal":3333,
             "normal-bytes":3412992
diff --git a/savevm.c b/savevm.c
index 31fd2e0..b080d37 100644
--- a/savevm.c
+++ b/savevm.c
@@ -440,42 +440,29 @@
     return f->last_error;
 }
 
-void qemu_file_set_error(QEMUFile *f, int ret)
+static void qemu_file_set_error(QEMUFile *f, int ret)
 {
     f->last_error = ret;
 }
 
-/** Sets last_error conditionally
- *
- * Sets last_error only if ret is negative _and_ no error
- * was set before.
- */
-static void qemu_file_set_if_error(QEMUFile *f, int ret)
-{
-    if (ret < 0 && !f->last_error) {
-        qemu_file_set_error(f, ret);
-    }
-}
-
 /** Flushes QEMUFile buffer
  *
- * In case of error, last_error is set.
  */
-void qemu_fflush(QEMUFile *f)
+static int qemu_fflush(QEMUFile *f)
 {
+    int ret = 0;
+
     if (!f->put_buffer)
-        return;
+        return 0;
 
     if (f->is_write && f->buf_index > 0) {
-        int len;
-
-        len = f->put_buffer(f->opaque, f->buf, f->buf_offset, f->buf_index);
-        if (len > 0)
+        ret = f->put_buffer(f->opaque, f->buf, f->buf_offset, f->buf_index);
+        if (ret >= 0) {
             f->buf_offset += f->buf_index;
-        else
-            qemu_file_set_error(f, -EINVAL);
+        }
         f->buf_index = 0;
     }
+    return ret;
 }
 
 static void qemu_fill_buffer(QEMUFile *f)
@@ -502,27 +489,11 @@
         f->buf_size += len;
         f->buf_offset += len;
     } else if (len == 0) {
-        f->last_error = -EIO;
+        qemu_file_set_error(f, -EIO);
     } else if (len != -EAGAIN)
         qemu_file_set_error(f, len);
 }
 
-/** Calls close function and set last_error if needed
- *
- * Internal function. qemu_fflush() must be called before this.
- *
- * Returns f->close() return value, or 0 if close function is not set.
- */
-static int qemu_fclose_internal(QEMUFile *f)
-{
-    int ret = 0;
-    if (f->close) {
-        ret = f->close(f->opaque);
-        qemu_file_set_if_error(f, ret);
-    }
-    return ret;
-}
-
 /** Closes the file
  *
  * Returns negative error value if any error happened on previous operations or
@@ -534,8 +505,14 @@
 int qemu_fclose(QEMUFile *f)
 {
     int ret;
-    qemu_fflush(f);
-    ret = qemu_fclose_internal(f);
+    ret = qemu_fflush(f);
+
+    if (f->close) {
+        int ret2 = f->close(f->opaque);
+        if (ret >= 0) {
+            ret = ret2;
+        }
+    }
     /* If any error was spotted before closing, we should report it
      * instead of the close() return value.
      */
@@ -546,22 +523,26 @@
     return ret;
 }
 
-void qemu_file_put_notify(QEMUFile *f)
+int qemu_file_put_notify(QEMUFile *f)
 {
-    f->put_buffer(f->opaque, NULL, 0, 0);
+    return f->put_buffer(f->opaque, NULL, 0, 0);
 }
 
 void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size)
 {
     int l;
 
-    if (!f->last_error && f->is_write == 0 && f->buf_index > 0) {
+    if (f->last_error) {
+        return;
+    }
+
+    if (f->is_write == 0 && f->buf_index > 0) {
         fprintf(stderr,
                 "Attempted to write to buffer while read buffer is not empty\n");
         abort();
     }
 
-    while (!f->last_error && size > 0) {
+    while (size > 0) {
         l = IO_BUF_SIZE - f->buf_index;
         if (l > size)
             l = size;
@@ -570,14 +551,23 @@
         f->buf_index += l;
         buf += l;
         size -= l;
-        if (f->buf_index >= IO_BUF_SIZE)
-            qemu_fflush(f);
+        if (f->buf_index >= IO_BUF_SIZE) {
+            int ret = qemu_fflush(f);
+            if (ret < 0) {
+                qemu_file_set_error(f, ret);
+                break;
+            }
+        }
     }
 }
 
 void qemu_put_byte(QEMUFile *f, int v)
 {
-    if (!f->last_error && f->is_write == 0 && f->buf_index > 0) {
+    if (f->last_error) {
+        return;
+    }
+
+    if (f->is_write == 0 && f->buf_index > 0) {
         fprintf(stderr,
                 "Attempted to write to buffer while read buffer is not empty\n");
         abort();
@@ -585,8 +575,12 @@
 
     f->buf[f->buf_index++] = v;
     f->is_write = 1;
-    if (f->buf_index >= IO_BUF_SIZE)
-        qemu_fflush(f);
+    if (f->buf_index >= IO_BUF_SIZE) {
+        int ret = qemu_fflush(f);
+        if (ret < 0) {
+            qemu_file_set_error(f, ret);
+        }
+    }
 }
 
 static void qemu_file_skip(QEMUFile *f, int size)
@@ -671,32 +665,11 @@
     return result;
 }
 
-int64_t qemu_ftell(QEMUFile *f)
+static int64_t qemu_ftell(QEMUFile *f)
 {
     return f->buf_offset - f->buf_size + f->buf_index;
 }
 
-int64_t qemu_fseek(QEMUFile *f, int64_t pos, int whence)
-{
-    if (whence == SEEK_SET) {
-        /* nothing to do */
-    } else if (whence == SEEK_CUR) {
-        pos += qemu_ftell(f);
-    } else {
-        /* SEEK_END not supported */
-        return -1;
-    }
-    if (f->put_buffer) {
-        qemu_fflush(f);
-        f->buf_offset = pos;
-    } else {
-        f->buf_offset = pos;
-        f->buf_index = 0;
-        f->buf_size = 0;
-    }
-    return pos;
-}
-
 int qemu_file_rate_limit(QEMUFile *f)
 {
     if (f->rate_limit)
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 8bb5129..9aa920d 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -788,7 +788,6 @@
     return helper_neon_qshl_u64(env, valop, shiftop);
 }
 
-/* FIXME: This is wrong.  */
 #define NEON_FN(dest, src1, src2) do { \
     int8_t tmp; \
     tmp = (int8_t)src2; \
diff --git a/target-arm/translate.c b/target-arm/translate.c
index c6840b7..daccb15 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -516,10 +516,10 @@
             tcg_gen_rotri_i32(var, var, shift); break;
         } else {
             TCGv tmp = tcg_temp_new_i32();
+            tcg_gen_shli_i32(tmp, cpu_CF, 31);
             if (flags)
                 shifter_out_im(var, 0);
             tcg_gen_shri_i32(var, var, 1);
-            tcg_gen_shli_i32(tmp, cpu_CF, 31);
             tcg_gen_or_i32(var, var, tmp);
             tcg_temp_free_i32(tmp);
         }
diff --git a/target-microblaze/cpu.h b/target-microblaze/cpu.h
index 4968c24..88430b5 100644
--- a/target-microblaze/cpu.h
+++ b/target-microblaze/cpu.h
@@ -345,6 +345,7 @@
 
 static inline void cpu_set_tls(CPUMBState *env, target_ulong newtls)
 {
+    env->regs[21] = newtls;
 }
 
 static inline int cpu_interrupts_enabled(CPUMBState *env)
diff --git a/target-s390x/misc_helper.c b/target-s390x/misc_helper.c
index e9b3cae..fdccd58 100644
--- a/target-s390x/misc_helper.c
+++ b/target-s390x/misc_helper.c
@@ -20,7 +20,6 @@
 
 #include "cpu.h"
 #include "memory.h"
-#include "cputlb.h"
 #include "host-utils.h"
 #include "helper.h"
 #include <string.h>
@@ -81,7 +80,7 @@
 #endif
 
     /* basic checks */
-    if (!memory_region_is_ram(phys_page_find(sccb >> TARGET_PAGE_BITS)->mr)) {
+    if (cpu_physical_memory_is_io(sccb)) {
         return -PGM_ADDRESSING;
     }
     if (sccb & ~0x7ffffff8ul) {
diff --git a/target-sparc/translate.c b/target-sparc/translate.c
index 6cef96b..4321393 100644
--- a/target-sparc/translate.c
+++ b/target-sparc/translate.c
@@ -48,7 +48,7 @@
 #ifndef CONFIG_USER_ONLY
 static TCGv cpu_tbr;
 #endif
-static TCGv cpu_cond, cpu_dst, cpu_addr, cpu_val;
+static TCGv cpu_cond;
 #ifdef TARGET_SPARC64
 static TCGv_i32 cpu_xcc, cpu_asi, cpu_fprs;
 static TCGv cpu_gsr;
@@ -58,10 +58,6 @@
 #else
 static TCGv cpu_wim;
 #endif
-/* local register indexes (only used inside old micro ops) */
-static TCGv cpu_tmp0;
-static TCGv_i32 cpu_tmp32;
-static TCGv_i64 cpu_tmp64;
 /* Floating point registers */
 static TCGv_i64 cpu_fpr[TARGET_DPREGS];
 
@@ -83,7 +79,9 @@
     struct TranslationBlock *tb;
     sparc_def_t *def;
     TCGv_i32 t32[3];
+    TCGv ttl[5];
     int n_t32;
+    int n_ttl;
 } DisasContext;
 
 typedef struct {
@@ -123,6 +121,22 @@
 
 #define IS_IMM (insn & (1<<13))
 
+static inline TCGv_i32 get_temp_i32(DisasContext *dc)
+{
+    TCGv_i32 t;
+    assert(dc->n_t32 < ARRAY_SIZE(dc->t32));
+    dc->t32[dc->n_t32++] = t = tcg_temp_new_i32();
+    return t;
+}
+
+static inline TCGv get_temp_tl(DisasContext *dc)
+{
+    TCGv t;
+    assert(dc->n_ttl < ARRAY_SIZE(dc->ttl));
+    dc->ttl[dc->n_ttl++] = t = tcg_temp_new();
+    return t;
+}
+
 static inline void gen_update_fprs_dirty(int rd)
 {
 #if defined(TARGET_SPARC64)
@@ -143,16 +157,13 @@
     if (src & 1) {
         return MAKE_TCGV_I32(GET_TCGV_I64(cpu_fpr[src / 2]));
     } else {
-        TCGv_i32 ret = tcg_temp_new_i32();
+        TCGv_i32 ret = get_temp_i32(dc);
         TCGv_i64 t = tcg_temp_new_i64();
 
         tcg_gen_shri_i64(t, cpu_fpr[src / 2], 32);
         tcg_gen_trunc_i64_i32(ret, t);
         tcg_temp_free_i64(t);
 
-        dc->t32[dc->n_t32++] = ret;
-        assert(dc->n_t32 <= ARRAY_SIZE(dc->t32));
-
         return ret;
     }
 #endif
@@ -174,9 +185,9 @@
     gen_update_fprs_dirty(dst);
 }
 
-static TCGv_i32 gen_dest_fpr_F(void)
+static TCGv_i32 gen_dest_fpr_F(DisasContext *dc)
 {
-    return cpu_tmp32;
+    return get_temp_i32(dc);
 }
 
 static TCGv_i64 gen_load_fpr_D(DisasContext *dc, unsigned int src)
@@ -192,9 +203,9 @@
     gen_update_fprs_dirty(dst);
 }
 
-static TCGv_i64 gen_dest_fpr_D(void)
+static TCGv_i64 gen_dest_fpr_D(DisasContext *dc, unsigned int dst)
 {
-    return cpu_tmp64;
+    return cpu_fpr[DFPREG(dst) / 2];
 }
 
 static void gen_op_load_fpr_QT0(unsigned int src)
@@ -263,25 +274,38 @@
 #endif
 }
 
-static inline void gen_movl_reg_TN(int reg, TCGv tn)
+static inline TCGv gen_load_gpr(DisasContext *dc, int reg)
 {
-    if (reg == 0)
-        tcg_gen_movi_tl(tn, 0);
-    else if (reg < 8)
-        tcg_gen_mov_tl(tn, cpu_gregs[reg]);
-    else {
-        tcg_gen_ld_tl(tn, cpu_regwptr, (reg - 8) * sizeof(target_ulong));
+    if (reg == 0 || reg >= 8) {
+        TCGv t = get_temp_tl(dc);
+        if (reg == 0) {
+            tcg_gen_movi_tl(t, 0);
+        } else {
+            tcg_gen_ld_tl(t, cpu_regwptr, (reg - 8) * sizeof(target_ulong));
+        }
+        return t;
+    } else {
+        return cpu_gregs[reg];
     }
 }
 
-static inline void gen_movl_TN_reg(int reg, TCGv tn)
+static inline void gen_store_gpr(DisasContext *dc, int reg, TCGv v)
 {
-    if (reg == 0)
-        return;
-    else if (reg < 8)
-        tcg_gen_mov_tl(cpu_gregs[reg], tn);
-    else {
-        tcg_gen_st_tl(tn, cpu_regwptr, (reg - 8) * sizeof(target_ulong));
+    if (reg > 0) {
+        if (reg < 8) {
+            tcg_gen_mov_tl(cpu_gregs[reg], v);
+        } else {
+            tcg_gen_st_tl(v, cpu_regwptr, (reg - 8) * sizeof(target_ulong));
+        }
+    }
+}
+
+static inline TCGv gen_dest_gpr(DisasContext *dc, int reg)
+{
+    if (reg == 0 || reg >= 8) {
+        return get_temp_tl(dc);
+    } else {
+        return cpu_gregs[reg];
     }
 }
 
@@ -582,9 +606,10 @@
 
 static inline void gen_op_mulscc(TCGv dst, TCGv src1, TCGv src2)
 {
-    TCGv r_temp, zero;
+    TCGv r_temp, zero, t0;
 
     r_temp = tcg_temp_new();
+    t0 = tcg_temp_new();
 
     /* old op:
     if (!(env->y & 1))
@@ -602,22 +627,23 @@
     // env->y = (b2 << 31) | (env->y >> 1);
     tcg_gen_andi_tl(r_temp, cpu_cc_src, 0x1);
     tcg_gen_shli_tl(r_temp, r_temp, 31);
-    tcg_gen_shri_tl(cpu_tmp0, cpu_y, 1);
-    tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0x7fffffff);
-    tcg_gen_or_tl(cpu_tmp0, cpu_tmp0, r_temp);
-    tcg_gen_andi_tl(cpu_y, cpu_tmp0, 0xffffffff);
+    tcg_gen_shri_tl(t0, cpu_y, 1);
+    tcg_gen_andi_tl(t0, t0, 0x7fffffff);
+    tcg_gen_or_tl(t0, t0, r_temp);
+    tcg_gen_andi_tl(cpu_y, t0, 0xffffffff);
 
     // b1 = N ^ V;
-    gen_mov_reg_N(cpu_tmp0, cpu_psr);
+    gen_mov_reg_N(t0, cpu_psr);
     gen_mov_reg_V(r_temp, cpu_psr);
-    tcg_gen_xor_tl(cpu_tmp0, cpu_tmp0, r_temp);
+    tcg_gen_xor_tl(t0, t0, r_temp);
     tcg_temp_free(r_temp);
 
     // T0 = (b1 << 31) | (T0 >> 1);
     // src1 = T0;
-    tcg_gen_shli_tl(cpu_tmp0, cpu_tmp0, 31);
+    tcg_gen_shli_tl(t0, t0, 31);
     tcg_gen_shri_tl(cpu_cc_src, cpu_cc_src, 1);
-    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, cpu_tmp0);
+    tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t0);
+    tcg_temp_free(t0);
 
     tcg_gen_add_tl(cpu_cc_dst, cpu_cc_src, cpu_cc_src2);
 
@@ -649,9 +675,9 @@
     tcg_gen_mul_i64(r_temp2, r_temp, r_temp2);
 
     tcg_gen_shri_i64(r_temp, r_temp2, 32);
-    tcg_gen_trunc_i64_tl(cpu_tmp0, r_temp);
+    tcg_gen_trunc_i64_tl(cpu_y, r_temp);
     tcg_temp_free_i64(r_temp);
-    tcg_gen_andi_tl(cpu_y, cpu_tmp0, 0xffffffff);
+    tcg_gen_andi_tl(cpu_y, cpu_y, 0xffffffff);
 
     tcg_gen_trunc_i64_tl(dst, r_temp2);
 
@@ -688,27 +714,33 @@
 // Z | (N ^ V)
 static inline void gen_op_eval_ble(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_N(cpu_tmp0, src);
+    TCGv t0 = tcg_temp_new();
+    gen_mov_reg_N(t0, src);
     gen_mov_reg_V(dst, src);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
-    gen_mov_reg_Z(cpu_tmp0, src);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    tcg_gen_xor_tl(dst, dst, t0);
+    gen_mov_reg_Z(t0, src);
+    tcg_gen_or_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // N ^ V
 static inline void gen_op_eval_bl(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_V(cpu_tmp0, src);
+    TCGv t0 = tcg_temp_new();
+    gen_mov_reg_V(t0, src);
     gen_mov_reg_N(dst, src);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
+    tcg_gen_xor_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // C | Z
 static inline void gen_op_eval_bleu(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_Z(cpu_tmp0, src);
+    TCGv t0 = tcg_temp_new();
+    gen_mov_reg_Z(t0, src);
     gen_mov_reg_C(dst, src);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    tcg_gen_or_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // C
@@ -745,29 +777,21 @@
 // !(Z | (N ^ V))
 static inline void gen_op_eval_bg(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_N(cpu_tmp0, src);
-    gen_mov_reg_V(dst, src);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
-    gen_mov_reg_Z(cpu_tmp0, src);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    gen_op_eval_ble(dst, src);
     tcg_gen_xori_tl(dst, dst, 0x1);
 }
 
 // !(N ^ V)
 static inline void gen_op_eval_bge(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_V(cpu_tmp0, src);
-    gen_mov_reg_N(dst, src);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
+    gen_op_eval_bl(dst, src);
     tcg_gen_xori_tl(dst, dst, 0x1);
 }
 
 // !(C | Z)
 static inline void gen_op_eval_bgu(TCGv dst, TCGv_i32 src)
 {
-    gen_mov_reg_Z(cpu_tmp0, src);
-    gen_mov_reg_C(dst, src);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    gen_op_eval_bleu(dst, src);
     tcg_gen_xori_tl(dst, dst, 0x1);
 }
 
@@ -817,18 +841,22 @@
 static inline void gen_op_eval_fbne(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_or_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // 1 or 2: FCC0 ^ FCC1
 static inline void gen_op_eval_fblg(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_xor_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // 1 or 3: FCC0
@@ -842,10 +870,11 @@
 static inline void gen_op_eval_fbl(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_xori_tl(cpu_tmp0, cpu_tmp0, 0x1);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_andc_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // 2 or 3: FCC1
@@ -859,39 +888,46 @@
 static inline void gen_op_eval_fbg(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    tcg_gen_xori_tl(dst, dst, 0x1);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_andc_tl(dst, t0, dst);
+    tcg_temp_free(t0);
 }
 
 // 3: FCC0 & FCC1
 static inline void gen_op_eval_fbu(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_and_tl(dst, dst, t0);
+    tcg_temp_free(t0);
 }
 
 // 0: !(FCC0 | FCC1)
 static inline void gen_op_eval_fbe(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_or_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_or_tl(dst, dst, t0);
     tcg_gen_xori_tl(dst, dst, 0x1);
+    tcg_temp_free(t0);
 }
 
 // 0 or 3: !(FCC0 ^ FCC1)
 static inline void gen_op_eval_fbue(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_xor_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_xor_tl(dst, dst, t0);
     tcg_gen_xori_tl(dst, dst, 0x1);
+    tcg_temp_free(t0);
 }
 
 // 0 or 2: !FCC0
@@ -906,11 +942,12 @@
 static inline void gen_op_eval_fbuge(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_xori_tl(cpu_tmp0, cpu_tmp0, 0x1);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_andc_tl(dst, dst, t0);
     tcg_gen_xori_tl(dst, dst, 0x1);
+    tcg_temp_free(t0);
 }
 
 // 0 or 1: !FCC1
@@ -925,21 +962,24 @@
 static inline void gen_op_eval_fbule(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_andc_tl(dst, t0, dst);
     tcg_gen_xori_tl(dst, dst, 0x1);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
-    tcg_gen_xori_tl(dst, dst, 0x1);
+    tcg_temp_free(t0);
 }
 
 // !3: !(FCC0 & FCC1)
 static inline void gen_op_eval_fbo(TCGv dst, TCGv src,
                                     unsigned int fcc_offset)
 {
+    TCGv t0 = tcg_temp_new();
     gen_mov_reg_FCC0(dst, src, fcc_offset);
-    gen_mov_reg_FCC1(cpu_tmp0, src, fcc_offset);
-    tcg_gen_and_tl(dst, dst, cpu_tmp0);
+    gen_mov_reg_FCC1(t0, src, fcc_offset);
+    tcg_gen_and_tl(dst, dst, t0);
     tcg_gen_xori_tl(dst, dst, 0x1);
+    tcg_temp_free(t0);
 }
 
 static inline void gen_branch2(DisasContext *dc, target_ulong pc1,
@@ -1675,7 +1715,7 @@
     TCGv_i32 dst, src;
 
     src = gen_load_fpr_F(dc, rs);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, cpu_env, src);
 
@@ -1688,7 +1728,7 @@
     TCGv_i32 dst, src;
 
     src = gen_load_fpr_F(dc, rs);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, src);
 
@@ -1702,7 +1742,7 @@
 
     src1 = gen_load_fpr_F(dc, rs1);
     src2 = gen_load_fpr_F(dc, rs2);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, cpu_env, src1, src2);
 
@@ -1717,7 +1757,7 @@
 
     src1 = gen_load_fpr_F(dc, rs1);
     src2 = gen_load_fpr_F(dc, rs2);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, src1, src2);
 
@@ -1731,7 +1771,7 @@
     TCGv_i64 dst, src;
 
     src = gen_load_fpr_D(dc, rs);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env, src);
 
@@ -1745,7 +1785,7 @@
     TCGv_i64 dst, src;
 
     src = gen_load_fpr_D(dc, rs);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, src);
 
@@ -1760,7 +1800,7 @@
 
     src1 = gen_load_fpr_D(dc, rs1);
     src2 = gen_load_fpr_D(dc, rs2);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env, src1, src2);
 
@@ -1775,7 +1815,7 @@
 
     src1 = gen_load_fpr_D(dc, rs1);
     src2 = gen_load_fpr_D(dc, rs2);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, src1, src2);
 
@@ -1789,7 +1829,7 @@
 
     src1 = gen_load_fpr_D(dc, rs1);
     src2 = gen_load_fpr_D(dc, rs2);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_gsr, src1, src2);
 
@@ -1804,7 +1844,7 @@
     src1 = gen_load_fpr_D(dc, rs1);
     src2 = gen_load_fpr_D(dc, rs2);
     src0 = gen_load_fpr_D(dc, rd);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, src0, src1, src2);
 
@@ -1856,7 +1896,7 @@
 
     src1 = gen_load_fpr_F(dc, rs1);
     src2 = gen_load_fpr_F(dc, rs2);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env, src1, src2);
 
@@ -1885,7 +1925,7 @@
     TCGv_i32 src;
 
     src = gen_load_fpr_F(dc, rs);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env, src);
 
@@ -1900,7 +1940,7 @@
     TCGv_i32 src;
 
     src = gen_load_fpr_F(dc, rs);
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env, src);
 
@@ -1914,7 +1954,7 @@
     TCGv_i64 src;
 
     src = gen_load_fpr_D(dc, rs);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, cpu_env, src);
 
@@ -1927,7 +1967,7 @@
     TCGv_i32 dst;
 
     gen_op_load_fpr_QT1(QFPREG(rs));
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
 
     gen(dst, cpu_env);
 
@@ -1940,7 +1980,7 @@
     TCGv_i64 dst;
 
     gen_op_load_fpr_QT1(QFPREG(rs));
-    dst = gen_dest_fpr_D();
+    dst = gen_dest_fpr_D(dc, rd);
 
     gen(dst, cpu_env);
 
@@ -2041,22 +2081,25 @@
     tcg_temp_free_i32(r_asi);
 }
 
-static inline void gen_swap_asi(TCGv dst, TCGv addr, int insn)
+static inline void gen_swap_asi(TCGv dst, TCGv src, TCGv addr, int insn)
 {
     TCGv_i32 r_asi, r_size, r_sign;
+    TCGv_i64 t64 = tcg_temp_new_i64();
 
     r_asi = gen_get_asi(insn, addr);
     r_size = tcg_const_i32(4);
     r_sign = tcg_const_i32(0);
-    gen_helper_ld_asi(cpu_tmp64, cpu_env, addr, r_asi, r_size, r_sign);
+    gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_size, r_sign);
     tcg_temp_free_i32(r_sign);
-    gen_helper_st_asi(cpu_env, addr, dst, r_asi, r_size);
+    gen_helper_st_asi(cpu_env, addr, src, r_asi, r_size);
     tcg_temp_free_i32(r_size);
     tcg_temp_free_i32(r_asi);
-    tcg_gen_trunc_i64_tl(dst, cpu_tmp64);
+    tcg_gen_trunc_i64_tl(dst, t64);
+    tcg_temp_free_i64(t64);
 }
 
-static inline void gen_ldda_asi(TCGv hi, TCGv addr, int insn, int rd)
+static inline void gen_ldda_asi(DisasContext *dc, TCGv hi, TCGv addr,
+                                int insn, int rd)
 {
     TCGv_i32 r_asi, r_rd;
 
@@ -2067,42 +2110,44 @@
     tcg_temp_free_i32(r_asi);
 }
 
-static inline void gen_stda_asi(TCGv hi, TCGv addr, int insn, int rd)
+static inline void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
+                                int insn, int rd)
 {
     TCGv_i32 r_asi, r_size;
+    TCGv lo = gen_load_gpr(dc, rd + 1);
+    TCGv_i64 t64 = tcg_temp_new_i64();
 
-    gen_movl_reg_TN(rd + 1, cpu_tmp0);
-    tcg_gen_concat_tl_i64(cpu_tmp64, cpu_tmp0, hi);
+    tcg_gen_concat_tl_i64(t64, lo, hi);
     r_asi = gen_get_asi(insn, addr);
     r_size = tcg_const_i32(8);
-    gen_helper_st_asi(cpu_env, addr, cpu_tmp64, r_asi, r_size);
+    gen_helper_st_asi(cpu_env, addr, t64, r_asi, r_size);
     tcg_temp_free_i32(r_size);
     tcg_temp_free_i32(r_asi);
+    tcg_temp_free_i64(t64);
 }
 
-static inline void gen_cas_asi(TCGv dst, TCGv addr, TCGv val2, int insn,
-                               int rd)
+static inline void gen_cas_asi(DisasContext *dc, TCGv addr,
+                               TCGv val2, int insn, int rd)
 {
-    TCGv r_val1;
-    TCGv_i32 r_asi;
+    TCGv val1 = gen_load_gpr(dc, rd);
+    TCGv dst = gen_dest_gpr(dc, rd);
+    TCGv_i32 r_asi = gen_get_asi(insn, addr);
 
-    r_val1 = tcg_temp_new();
-    gen_movl_reg_TN(rd, r_val1);
-    r_asi = gen_get_asi(insn, addr);
-    gen_helper_cas_asi(dst, cpu_env, addr, r_val1, val2, r_asi);
+    gen_helper_cas_asi(dst, cpu_env, addr, val1, val2, r_asi);
     tcg_temp_free_i32(r_asi);
-    tcg_temp_free(r_val1);
+    gen_store_gpr(dc, rd, dst);
 }
 
-static inline void gen_casx_asi(TCGv dst, TCGv addr, TCGv val2, int insn,
-                                int rd)
+static inline void gen_casx_asi(DisasContext *dc, TCGv addr,
+                                TCGv val2, int insn, int rd)
 {
-    TCGv_i32 r_asi;
+    TCGv val1 = gen_load_gpr(dc, rd);
+    TCGv dst = gen_dest_gpr(dc, rd);
+    TCGv_i32 r_asi = gen_get_asi(insn, addr);
 
-    gen_movl_reg_TN(rd, cpu_tmp64);
-    r_asi = gen_get_asi(insn, addr);
-    gen_helper_casx_asi(dst, cpu_env, addr, cpu_tmp64, val2, r_asi);
+    gen_helper_casx_asi(dst, cpu_env, addr, val1, val2, r_asi);
     tcg_temp_free_i32(r_asi);
+    gen_store_gpr(dc, rd, dst);
 }
 
 #elif !defined(CONFIG_USER_ONLY)
@@ -2111,77 +2156,94 @@
                               int sign)
 {
     TCGv_i32 r_asi, r_size, r_sign;
+    TCGv_i64 t64 = tcg_temp_new_i64();
 
     r_asi = tcg_const_i32(GET_FIELD(insn, 19, 26));
     r_size = tcg_const_i32(size);
     r_sign = tcg_const_i32(sign);
-    gen_helper_ld_asi(cpu_tmp64, cpu_env, addr, r_asi, r_size, r_sign);
-    tcg_temp_free(r_sign);
-    tcg_temp_free(r_size);
-    tcg_temp_free(r_asi);
-    tcg_gen_trunc_i64_tl(dst, cpu_tmp64);
+    gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_size, r_sign);
+    tcg_temp_free_i32(r_sign);
+    tcg_temp_free_i32(r_size);
+    tcg_temp_free_i32(r_asi);
+    tcg_gen_trunc_i64_tl(dst, t64);
+    tcg_temp_free_i64(t64);
 }
 
 static inline void gen_st_asi(TCGv src, TCGv addr, int insn, int size)
 {
     TCGv_i32 r_asi, r_size;
+    TCGv_i64 t64 = tcg_temp_new_i64();
 
-    tcg_gen_extu_tl_i64(cpu_tmp64, src);
+    tcg_gen_extu_tl_i64(t64, src);
     r_asi = tcg_const_i32(GET_FIELD(insn, 19, 26));
     r_size = tcg_const_i32(size);
-    gen_helper_st_asi(cpu_env, addr, cpu_tmp64, r_asi, r_size);
-    tcg_temp_free(r_size);
-    tcg_temp_free(r_asi);
+    gen_helper_st_asi(cpu_env, addr, t64, r_asi, r_size);
+    tcg_temp_free_i32(r_size);
+    tcg_temp_free_i32(r_asi);
+    tcg_temp_free_i64(t64);
 }
 
-static inline void gen_swap_asi(TCGv dst, TCGv addr, int insn)
+static inline void gen_swap_asi(TCGv dst, TCGv src, TCGv addr, int insn)
 {
     TCGv_i32 r_asi, r_size, r_sign;
-    TCGv_i64 r_val;
+    TCGv_i64 r_val, t64;
 
     r_asi = tcg_const_i32(GET_FIELD(insn, 19, 26));
     r_size = tcg_const_i32(4);
     r_sign = tcg_const_i32(0);
-    gen_helper_ld_asi(cpu_tmp64, cpu_env, addr, r_asi, r_size, r_sign);
+    t64 = tcg_temp_new_i64();
+    gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_size, r_sign);
     tcg_temp_free(r_sign);
     r_val = tcg_temp_new_i64();
-    tcg_gen_extu_tl_i64(r_val, dst);
+    tcg_gen_extu_tl_i64(r_val, src);
     gen_helper_st_asi(cpu_env, addr, r_val, r_asi, r_size);
     tcg_temp_free_i64(r_val);
-    tcg_temp_free(r_size);
-    tcg_temp_free(r_asi);
-    tcg_gen_trunc_i64_tl(dst, cpu_tmp64);
+    tcg_temp_free_i32(r_size);
+    tcg_temp_free_i32(r_asi);
+    tcg_gen_trunc_i64_tl(dst, t64);
+    tcg_temp_free_i64(t64);
 }
 
-static inline void gen_ldda_asi(TCGv hi, TCGv addr, int insn, int rd)
+static inline void gen_ldda_asi(DisasContext *dc, TCGv hi, TCGv addr,
+                                int insn, int rd)
 {
     TCGv_i32 r_asi, r_size, r_sign;
+    TCGv t;
+    TCGv_i64 t64;
 
     r_asi = tcg_const_i32(GET_FIELD(insn, 19, 26));
     r_size = tcg_const_i32(8);
     r_sign = tcg_const_i32(0);
-    gen_helper_ld_asi(cpu_tmp64, cpu_env, addr, r_asi, r_size, r_sign);
-    tcg_temp_free(r_sign);
-    tcg_temp_free(r_size);
-    tcg_temp_free(r_asi);
-    tcg_gen_trunc_i64_tl(cpu_tmp0, cpu_tmp64);
-    gen_movl_TN_reg(rd + 1, cpu_tmp0);
-    tcg_gen_shri_i64(cpu_tmp64, cpu_tmp64, 32);
-    tcg_gen_trunc_i64_tl(hi, cpu_tmp64);
-    gen_movl_TN_reg(rd, hi);
+    t64 = tcg_temp_new_i64();
+    gen_helper_ld_asi(t64, cpu_env, addr, r_asi, r_size, r_sign);
+    tcg_temp_free_i32(r_sign);
+    tcg_temp_free_i32(r_size);
+    tcg_temp_free_i32(r_asi);
+
+    t = gen_dest_gpr(dc, rd + 1);
+    tcg_gen_trunc_i64_tl(t, t64);
+    gen_store_gpr(dc, rd + 1, t);
+
+    tcg_gen_shri_i64(t64, t64, 32);
+    tcg_gen_trunc_i64_tl(hi, t64);
+    tcg_temp_free_i64(t64);
+    gen_store_gpr(dc, rd, hi);
 }
 
-static inline void gen_stda_asi(TCGv hi, TCGv addr, int insn, int rd)
+static inline void gen_stda_asi(DisasContext *dc, TCGv hi, TCGv addr,
+                                int insn, int rd)
 {
     TCGv_i32 r_asi, r_size;
+    TCGv lo = gen_load_gpr(dc, rd + 1);
+    TCGv_i64 t64 = tcg_temp_new_i64();
 
-    gen_movl_reg_TN(rd + 1, cpu_tmp0);
-    tcg_gen_concat_tl_i64(cpu_tmp64, cpu_tmp0, hi);
+    tcg_gen_concat_tl_i64(t64, lo, hi);
     r_asi = tcg_const_i32(GET_FIELD(insn, 19, 26));
     r_size = tcg_const_i32(8);
-    gen_helper_st_asi(cpu_env, addr, cpu_tmp64, r_asi, r_size);
-    tcg_temp_free(r_size);
-    tcg_temp_free(r_asi);
+    gen_helper_st_asi(cpu_env, addr, t64, r_asi, r_size);
+    tcg_temp_free_i32(r_size);
+    tcg_temp_free_i32(r_asi);
+    tcg_temp_free_i64(t64);
 }
 #endif
 
@@ -2203,40 +2265,23 @@
 }
 #endif
 
-static inline TCGv get_src1(unsigned int insn, TCGv def)
+static TCGv get_src1(DisasContext *dc, unsigned int insn)
 {
-    TCGv r_rs1 = def;
-    unsigned int rs1;
-
-    rs1 = GET_FIELD(insn, 13, 17);
-    if (rs1 == 0) {
-        tcg_gen_movi_tl(def, 0);
-    } else if (rs1 < 8) {
-        r_rs1 = cpu_gregs[rs1];
-    } else {
-        tcg_gen_ld_tl(def, cpu_regwptr, (rs1 - 8) * sizeof(target_ulong));
-    }
-    return r_rs1;
+    unsigned int rs1 = GET_FIELD(insn, 13, 17);
+    return gen_load_gpr(dc, rs1);
 }
 
-static inline TCGv get_src2(unsigned int insn, TCGv def)
+static TCGv get_src2(DisasContext *dc, unsigned int insn)
 {
-    TCGv r_rs2 = def;
-
     if (IS_IMM) { /* immediate */
         target_long simm = GET_FIELDs(insn, 19, 31);
-        tcg_gen_movi_tl(def, simm);
-    } else { /* register */
+        TCGv t = get_temp_tl(dc);
+        tcg_gen_movi_tl(t, simm);
+        return t;
+    } else {      /* register */
         unsigned int rs2 = GET_FIELD(insn, 27, 31);
-        if (rs2 == 0) {
-            tcg_gen_movi_tl(def, 0);
-        } else if (rs2 < 8) {
-            r_rs2 = cpu_gregs[rs2];
-        } else {
-            tcg_gen_ld_tl(def, cpu_regwptr, (rs2 - 8) * sizeof(target_ulong));
-        }
+        return gen_load_gpr(dc, rs2);
     }
-    return r_rs2;
 }
 
 #ifdef TARGET_SPARC64
@@ -2259,7 +2304,7 @@
 
     s1 = gen_load_fpr_F(dc, rs);
     s2 = gen_load_fpr_F(dc, rd);
-    dst = gen_dest_fpr_F();
+    dst = gen_dest_fpr_F(dc);
     zero = tcg_const_i32(0);
 
     tcg_gen_movcond_i32(TCG_COND_NE, dst, c32, zero, s1, s2);
@@ -2271,7 +2316,7 @@
 
 static void gen_fmovd(DisasContext *dc, DisasCompare *cmp, int rd, int rs)
 {
-    TCGv_i64 dst = gen_dest_fpr_D();
+    TCGv_i64 dst = gen_dest_fpr_D(dc, rd);
     tcg_gen_movcond_i64(cmp->cond, dst, cmp->c1, cmp->c2,
                         gen_load_fpr_D(dc, rs),
                         gen_load_fpr_D(dc, rd));
@@ -2470,7 +2515,7 @@
 static void disas_sparc_insn(DisasContext * dc, unsigned int insn)
 {
     unsigned int opc, rs1, rs2, rd;
-    TCGv cpu_src1, cpu_src2, cpu_tmp1, cpu_tmp2;
+    TCGv cpu_src1, cpu_src2;
     TCGv_i32 cpu_src1_32, cpu_src2_32, cpu_dst_32;
     TCGv_i64 cpu_src1_64, cpu_src2_64, cpu_dst_64;
     target_long simm;
@@ -2480,12 +2525,8 @@
     }
 
     opc = GET_FIELD(insn, 0, 1);
-
     rd = GET_FIELD(insn, 2, 6);
 
-    cpu_tmp1 = cpu_src1 = tcg_temp_new();
-    cpu_tmp2 = cpu_src2 = tcg_temp_new();
-
     switch (opc) {
     case 0:                     /* branches/sethi */
         {
@@ -2515,7 +2556,7 @@
                         (GET_FIELD_SP(insn, 20, 21) << 14);
                     target = sign_extend(target, 16);
                     target <<= 2;
-                    cpu_src1 = get_src1(insn, cpu_src1);
+                    cpu_src1 = get_src1(dc, insn);
                     do_branch_reg(dc, target, insn, cpu_src1);
                     goto jmp_insn;
                 }
@@ -2557,13 +2598,12 @@
                     goto jmp_insn;
                 }
             case 0x4:           /* SETHI */
-                if (rd) { // nop
+                /* Special-case %g0 because that's the canonical nop.  */
+                if (rd) {
                     uint32_t value = GET_FIELD(insn, 10, 31);
-                    TCGv r_const;
-
-                    r_const = tcg_const_tl(value << 10);
-                    gen_movl_TN_reg(rd, r_const);
-                    tcg_temp_free(r_const);
+                    TCGv t = gen_dest_gpr(dc, rd);
+                    tcg_gen_movi_tl(t, value << 10);
+                    gen_store_gpr(dc, rd, t);
                 }
                 break;
             case 0x0:           /* UNIMPL */
@@ -2576,11 +2616,10 @@
     case 1:                     /*CALL*/
         {
             target_long target = GET_FIELDs(insn, 2, 31) << 2;
-            TCGv r_const;
+            TCGv o7 = gen_dest_gpr(dc, 15);
 
-            r_const = tcg_const_tl(dc->pc);
-            gen_movl_TN_reg(15, r_const);
-            tcg_temp_free(r_const);
+            tcg_gen_movi_tl(o7, dc->pc);
+            gen_store_gpr(dc, 15, o7);
             target += dc->pc;
             gen_mov_pc_npc(dc);
 #ifdef TARGET_SPARC64
@@ -2594,6 +2633,9 @@
     case 2:                     /* FPU & Logical Operations */
         {
             unsigned int xop = GET_FIELD(insn, 7, 12);
+            TCGv cpu_dst = gen_dest_gpr(dc, rd);
+            TCGv cpu_tmp0;
+
             if (xop == 0x3a) {  /* generate trap */
                 int cond = GET_FIELD(insn, 3, 6);
                 TCGv_i32 trap;
@@ -2644,22 +2686,17 @@
                         /* Signal that the trap value is fully constant.  */
                         mask = 0;
                     } else {
-                        TCGv t1 = tcg_temp_new();
-                        gen_movl_reg_TN(rs1, t1);
+                        TCGv t1 = gen_load_gpr(dc, rs1);
                         tcg_gen_trunc_tl_i32(trap, t1);
-                        tcg_temp_free(t1);
                         tcg_gen_addi_i32(trap, trap, rs2);
                     }
                 } else {
-                    TCGv t1 = tcg_temp_new();
-                    TCGv t2 = tcg_temp_new();
+                    TCGv t1, t2;
                     rs2 = GET_FIELD_SP(insn, 0, 4);
-                    gen_movl_reg_TN(rs1, t1);
-                    gen_movl_reg_TN(rs2, t2);
+                    t1 = gen_load_gpr(dc, rs1);
+                    t2 = gen_load_gpr(dc, rs2);
                     tcg_gen_add_tl(t1, t1, t2);
                     tcg_gen_trunc_tl_i32(trap, t1);
-                    tcg_temp_free(t1);
-                    tcg_temp_free(t2);
                 }
                 if (mask != 0) {
                     tcg_gen_andi_i32(trap, trap, mask);
@@ -2693,27 +2730,24 @@
                                        microSPARC II */
                     /* Read Asr17 */
                     if (rs1 == 0x11 && dc->def->features & CPU_FEATURE_ASR17) {
-                        TCGv r_const;
-
+                        TCGv t = gen_dest_gpr(dc, rd);
                         /* Read Asr17 for a Leon3 monoprocessor */
-                        r_const = tcg_const_tl((1 << 8)
-                                               | (dc->def->nwindows - 1));
-                        gen_movl_TN_reg(rd, r_const);
-                        tcg_temp_free(r_const);
+                        tcg_gen_movi_tl(t, (1 << 8) | (dc->def->nwindows - 1));
+                        gen_store_gpr(dc, rd, t);
                         break;
                     }
 #endif
-                    gen_movl_TN_reg(rd, cpu_y);
+                    gen_store_gpr(dc, rd, cpu_y);
                     break;
 #ifdef TARGET_SPARC64
                 case 0x2: /* V9 rdccr */
                     update_psr(dc);
                     gen_helper_rdccr(cpu_dst, cpu_env);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x3: /* V9 rdasi */
                     tcg_gen_ext_i32_tl(cpu_dst, cpu_asi);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x4: /* V9 rdtick */
                     {
@@ -2724,25 +2758,23 @@
                                        offsetof(CPUSPARCState, tick));
                         gen_helper_tick_get_count(cpu_dst, r_tickptr);
                         tcg_temp_free_ptr(r_tickptr);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                     }
                     break;
                 case 0x5: /* V9 rdpc */
                     {
-                        TCGv r_const;
-
+                        TCGv t = gen_dest_gpr(dc, rd);
                         if (unlikely(AM_CHECK(dc))) {
-                            r_const = tcg_const_tl(dc->pc & 0xffffffffULL);
+                            tcg_gen_movi_tl(t, dc->pc & 0xffffffffULL);
                         } else {
-                           r_const = tcg_const_tl(dc->pc);
+                            tcg_gen_movi_tl(t, dc->pc);
                         }
-                        gen_movl_TN_reg(rd, r_const);
-                        tcg_temp_free(r_const);
+                        gen_store_gpr(dc, rd, t);
                     }
                     break;
                 case 0x6: /* V9 rdfprs */
                     tcg_gen_ext_i32_tl(cpu_dst, cpu_fprs);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0xf: /* V9 membar */
                     break; /* no effect */
@@ -2750,14 +2782,14 @@
                     if (gen_trap_ifnofpu(dc)) {
                         goto jmp_insn;
                     }
-                    gen_movl_TN_reg(rd, cpu_gsr);
+                    gen_store_gpr(dc, rd, cpu_gsr);
                     break;
                 case 0x16: /* Softint */
                     tcg_gen_ext_i32_tl(cpu_dst, cpu_softint);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x17: /* Tick compare */
-                    gen_movl_TN_reg(rd, cpu_tick_cmpr);
+                    gen_store_gpr(dc, rd, cpu_tick_cmpr);
                     break;
                 case 0x18: /* System tick */
                     {
@@ -2768,11 +2800,11 @@
                                        offsetof(CPUSPARCState, stick));
                         gen_helper_tick_get_count(cpu_dst, r_tickptr);
                         tcg_temp_free_ptr(r_tickptr);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                     }
                     break;
                 case 0x19: /* System tick compare */
-                    gen_movl_TN_reg(rd, cpu_stick_cmpr);
+                    gen_store_gpr(dc, rd, cpu_stick_cmpr);
                     break;
                 case 0x10: /* Performance Control */
                 case 0x11: /* Performance Instrumentation Counter */
@@ -2819,11 +2851,13 @@
                     goto illegal_insn;
                 }
 #endif
-                gen_movl_TN_reg(rd, cpu_dst);
+                gen_store_gpr(dc, rd, cpu_dst);
                 break;
             } else if (xop == 0x2a) { /* rdwim / V9 rdpr */
-                if (!supervisor(dc))
+                if (!supervisor(dc)) {
                     goto priv_insn;
+                }
+                cpu_tmp0 = get_temp_tl(dc);
 #ifdef TARGET_SPARC64
                 rs1 = GET_FIELD(insn, 13, 17);
                 switch (rs1) {
@@ -2862,14 +2896,12 @@
                     break;
                 case 3: // tt
                     {
-                        TCGv_ptr r_tsptr;
+                        TCGv_ptr r_tsptr = tcg_temp_new_ptr();
 
-                        r_tsptr = tcg_temp_new_ptr();
                         gen_load_trap_state_at_tl(r_tsptr, cpu_env);
-                        tcg_gen_ld_i32(cpu_tmp32, r_tsptr,
-                                       offsetof(trap_state, tt));
+                        tcg_gen_ld32s_tl(cpu_tmp0, r_tsptr,
+                                         offsetof(trap_state, tt));
                         tcg_temp_free_ptr(r_tsptr);
-                        tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
                     }
                     break;
                 case 4: // tick
@@ -2880,7 +2912,6 @@
                         tcg_gen_ld_ptr(r_tickptr, cpu_env,
                                        offsetof(CPUSPARCState, tick));
                         gen_helper_tick_get_count(cpu_tmp0, r_tickptr);
-                        gen_movl_TN_reg(rd, cpu_tmp0);
                         tcg_temp_free_ptr(r_tickptr);
                     }
                     break;
@@ -2888,53 +2919,44 @@
                     tcg_gen_mov_tl(cpu_tmp0, cpu_tbr);
                     break;
                 case 6: // pstate
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, pstate));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, pstate));
                     break;
                 case 7: // tl
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, tl));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, tl));
                     break;
                 case 8: // pil
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, psrpil));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, psrpil));
                     break;
                 case 9: // cwp
                     gen_helper_rdcwp(cpu_tmp0, cpu_env);
                     break;
                 case 10: // cansave
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, cansave));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, cansave));
                     break;
                 case 11: // canrestore
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, canrestore));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, canrestore));
                     break;
                 case 12: // cleanwin
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, cleanwin));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, cleanwin));
                     break;
                 case 13: // otherwin
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, otherwin));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, otherwin));
                     break;
                 case 14: // wstate
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, wstate));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, wstate));
                     break;
                 case 16: // UA2005 gl
                     CHECK_IU_FEATURE(dc, GL);
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env,
-                                   offsetof(CPUSPARCState, gl));
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_tmp32);
+                    tcg_gen_ld32s_tl(cpu_tmp0, cpu_env,
+                                     offsetof(CPUSPARCState, gl));
                     break;
                 case 26: // UA2005 strand status
                     CHECK_IU_FEATURE(dc, HYPV);
@@ -2952,7 +2974,7 @@
 #else
                 tcg_gen_ext_i32_tl(cpu_tmp0, cpu_wim);
 #endif
-                gen_movl_TN_reg(rd, cpu_tmp0);
+                gen_store_gpr(dc, rd, cpu_tmp0);
                 break;
             } else if (xop == 0x2b) { /* rdtbr / V9 flushw */
 #ifdef TARGET_SPARC64
@@ -2961,7 +2983,7 @@
 #else
                 if (!supervisor(dc))
                     goto priv_insn;
-                gen_movl_TN_reg(rd, cpu_tbr);
+                gen_store_gpr(dc, rd, cpu_tbr);
 #endif
                 break;
 #endif
@@ -3154,8 +3176,8 @@
 #define FMOVR(sz)                                                  \
                 do {                                               \
                     DisasCompare cmp;                              \
-                    cond = GET_FIELD_SP(insn, 14, 17);             \
-                    cpu_src1 = get_src1(insn, cpu_src1);           \
+                    cond = GET_FIELD_SP(insn, 10, 12);             \
+                    cpu_src1 = get_src1(dc, insn);                 \
                     gen_compare_reg(&cmp, cond, cpu_src1);         \
                     gen_fmov##sz(dc, &cmp, rd, rs2);               \
                     free_compare(&cmp);                            \
@@ -3293,43 +3315,45 @@
                         goto illegal_insn;
                 }
             } else if (xop == 0x2) {
-                // clr/mov shortcut
-
+                TCGv dst = gen_dest_gpr(dc, rd);
                 rs1 = GET_FIELD(insn, 13, 17);
                 if (rs1 == 0) {
-                    // or %g0, x, y -> mov T0, x; mov y, T0
+                    /* clr/mov shortcut : or %g0, x, y -> mov x, y */
                     if (IS_IMM) {       /* immediate */
-                        TCGv r_const;
-
                         simm = GET_FIELDs(insn, 19, 31);
-                        r_const = tcg_const_tl(simm);
-                        gen_movl_TN_reg(rd, r_const);
-                        tcg_temp_free(r_const);
+                        tcg_gen_movi_tl(dst, simm);
+                        gen_store_gpr(dc, rd, dst);
                     } else {            /* register */
                         rs2 = GET_FIELD(insn, 27, 31);
-                        gen_movl_reg_TN(rs2, cpu_dst);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        if (rs2 == 0) {
+                            tcg_gen_movi_tl(dst, 0);
+                            gen_store_gpr(dc, rd, dst);
+                        } else {
+                            cpu_src2 = gen_load_gpr(dc, rs2);
+                            gen_store_gpr(dc, rd, cpu_src2);
+                        }
                     }
                 } else {
-                    cpu_src1 = get_src1(insn, cpu_src1);
+                    cpu_src1 = get_src1(dc, insn);
                     if (IS_IMM) {       /* immediate */
                         simm = GET_FIELDs(insn, 19, 31);
-                        tcg_gen_ori_tl(cpu_dst, cpu_src1, simm);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        tcg_gen_ori_tl(dst, cpu_src1, simm);
+                        gen_store_gpr(dc, rd, dst);
                     } else {            /* register */
-                        // or x, %g0, y -> mov T1, x; mov y, T1
                         rs2 = GET_FIELD(insn, 27, 31);
-                        if (rs2 != 0) {
-                            gen_movl_reg_TN(rs2, cpu_src2);
-                            tcg_gen_or_tl(cpu_dst, cpu_src1, cpu_src2);
-                            gen_movl_TN_reg(rd, cpu_dst);
-                        } else
-                            gen_movl_TN_reg(rd, cpu_src1);
+                        if (rs2 == 0) {
+                            /* mov shortcut:  or x, %g0, y -> mov x, y */
+                            gen_store_gpr(dc, rd, cpu_src1);
+                        } else {
+                            cpu_src2 = gen_load_gpr(dc, rs2);
+                            tcg_gen_or_tl(dst, cpu_src1, cpu_src2);
+                            gen_store_gpr(dc, rd, dst);
+                        }
                     }
                 }
 #ifdef TARGET_SPARC64
             } else if (xop == 0x25) { /* sll, V9 sllx */
-                cpu_src1 = get_src1(insn, cpu_src1);
+                cpu_src1 = get_src1(dc, insn);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 20, 31);
                     if (insn & (1 << 12)) {
@@ -3339,7 +3363,8 @@
                     }
                 } else {                /* register */
                     rs2 = GET_FIELD(insn, 27, 31);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
+                    cpu_tmp0 = get_temp_tl(dc);
                     if (insn & (1 << 12)) {
                         tcg_gen_andi_i64(cpu_tmp0, cpu_src2, 0x3f);
                     } else {
@@ -3347,9 +3372,9 @@
                     }
                     tcg_gen_shl_i64(cpu_dst, cpu_src1, cpu_tmp0);
                 }
-                gen_movl_TN_reg(rd, cpu_dst);
+                gen_store_gpr(dc, rd, cpu_dst);
             } else if (xop == 0x26) { /* srl, V9 srlx */
-                cpu_src1 = get_src1(insn, cpu_src1);
+                cpu_src1 = get_src1(dc, insn);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 20, 31);
                     if (insn & (1 << 12)) {
@@ -3360,7 +3385,8 @@
                     }
                 } else {                /* register */
                     rs2 = GET_FIELD(insn, 27, 31);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
+                    cpu_tmp0 = get_temp_tl(dc);
                     if (insn & (1 << 12)) {
                         tcg_gen_andi_i64(cpu_tmp0, cpu_src2, 0x3f);
                         tcg_gen_shr_i64(cpu_dst, cpu_src1, cpu_tmp0);
@@ -3370,65 +3396,48 @@
                         tcg_gen_shr_i64(cpu_dst, cpu_dst, cpu_tmp0);
                     }
                 }
-                gen_movl_TN_reg(rd, cpu_dst);
+                gen_store_gpr(dc, rd, cpu_dst);
             } else if (xop == 0x27) { /* sra, V9 srax */
-                cpu_src1 = get_src1(insn, cpu_src1);
+                cpu_src1 = get_src1(dc, insn);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 20, 31);
                     if (insn & (1 << 12)) {
                         tcg_gen_sari_i64(cpu_dst, cpu_src1, simm & 0x3f);
                     } else {
-                        tcg_gen_andi_i64(cpu_dst, cpu_src1, 0xffffffffULL);
-                        tcg_gen_ext32s_i64(cpu_dst, cpu_dst);
+                        tcg_gen_ext32s_i64(cpu_dst, cpu_src1);
                         tcg_gen_sari_i64(cpu_dst, cpu_dst, simm & 0x1f);
                     }
                 } else {                /* register */
                     rs2 = GET_FIELD(insn, 27, 31);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
+                    cpu_tmp0 = get_temp_tl(dc);
                     if (insn & (1 << 12)) {
                         tcg_gen_andi_i64(cpu_tmp0, cpu_src2, 0x3f);
                         tcg_gen_sar_i64(cpu_dst, cpu_src1, cpu_tmp0);
                     } else {
                         tcg_gen_andi_i64(cpu_tmp0, cpu_src2, 0x1f);
-                        tcg_gen_andi_i64(cpu_dst, cpu_src1, 0xffffffffULL);
-                        tcg_gen_ext32s_i64(cpu_dst, cpu_dst);
+                        tcg_gen_ext32s_i64(cpu_dst, cpu_src1);
                         tcg_gen_sar_i64(cpu_dst, cpu_dst, cpu_tmp0);
                     }
                 }
-                gen_movl_TN_reg(rd, cpu_dst);
+                gen_store_gpr(dc, rd, cpu_dst);
 #endif
             } else if (xop < 0x36) {
                 if (xop < 0x20) {
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    cpu_src2 = get_src2(insn, cpu_src2);
+                    cpu_src1 = get_src1(dc, insn);
+                    cpu_src2 = get_src2(dc, insn);
                     switch (xop & ~0x10) {
                     case 0x0: /* add */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            if (xop & 0x10) {
-                                gen_op_addi_cc(cpu_dst, cpu_src1, simm);
-                                tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADD);
-                                dc->cc_op = CC_OP_ADD;
-                            } else {
-                                tcg_gen_addi_tl(cpu_dst, cpu_src1, simm);
-                            }
+                        if (xop & 0x10) {
+                            gen_op_add_cc(cpu_dst, cpu_src1, cpu_src2);
+                            tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADD);
+                            dc->cc_op = CC_OP_ADD;
                         } else {
-                            if (xop & 0x10) {
-                                gen_op_add_cc(cpu_dst, cpu_src1, cpu_src2);
-                                tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADD);
-                                dc->cc_op = CC_OP_ADD;
-                            } else {
-                                tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
-                            }
+                            tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
                         }
                         break;
                     case 0x1: /* and */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_andi_tl(cpu_dst, cpu_src1, simm);
-                        } else {
-                            tcg_gen_and_tl(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_and_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3436,12 +3445,7 @@
                         }
                         break;
                     case 0x2: /* or */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_ori_tl(cpu_dst, cpu_src1, simm);
-                        } else {
-                            tcg_gen_or_tl(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_or_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3449,12 +3453,7 @@
                         }
                         break;
                     case 0x3: /* xor */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_xori_tl(cpu_dst, cpu_src1, simm);
-                        } else {
-                            tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3462,30 +3461,16 @@
                         }
                         break;
                     case 0x4: /* sub */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            if (xop & 0x10) {
-                                gen_op_subi_cc(cpu_dst, cpu_src1, simm, dc);
-                            } else {
-                                tcg_gen_subi_tl(cpu_dst, cpu_src1, simm);
-                            }
+                        if (xop & 0x10) {
+                            gen_op_sub_cc(cpu_dst, cpu_src1, cpu_src2);
+                            tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUB);
+                            dc->cc_op = CC_OP_SUB;
                         } else {
-                            if (xop & 0x10) {
-                                gen_op_sub_cc(cpu_dst, cpu_src1, cpu_src2);
-                                tcg_gen_movi_i32(cpu_cc_op, CC_OP_SUB);
-                                dc->cc_op = CC_OP_SUB;
-                            } else {
-                                tcg_gen_sub_tl(cpu_dst, cpu_src1, cpu_src2);
-                            }
+                            tcg_gen_sub_tl(cpu_dst, cpu_src1, cpu_src2);
                         }
                         break;
                     case 0x5: /* andn */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_andi_tl(cpu_dst, cpu_src1, ~simm);
-                        } else {
-                            tcg_gen_andc_tl(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_andc_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3493,12 +3478,7 @@
                         }
                         break;
                     case 0x6: /* orn */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_ori_tl(cpu_dst, cpu_src1, ~simm);
-                        } else {
-                            tcg_gen_orc_tl(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_orc_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3506,13 +3486,7 @@
                         }
                         break;
                     case 0x7: /* xorn */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_xori_tl(cpu_dst, cpu_src1, ~simm);
-                        } else {
-                            tcg_gen_not_tl(cpu_tmp0, cpu_src2);
-                            tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_tmp0);
-                        }
+                        tcg_gen_eqv_tl(cpu_dst, cpu_src1, cpu_src2);
                         if (xop & 0x10) {
                             tcg_gen_mov_tl(cpu_cc_dst, cpu_dst);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_LOGIC);
@@ -3525,12 +3499,7 @@
                         break;
 #ifdef TARGET_SPARC64
                     case 0x9: /* V9 mulx */
-                        if (IS_IMM) {
-                            simm = GET_FIELDs(insn, 19, 31);
-                            tcg_gen_muli_i64(cpu_dst, cpu_src1, simm);
-                        } else {
-                            tcg_gen_mul_i64(cpu_dst, cpu_src1, cpu_src2);
-                        }
+                        tcg_gen_mul_i64(cpu_dst, cpu_src1, cpu_src2);
                         break;
 #endif
                     case 0xa: /* umul */
@@ -3585,39 +3554,39 @@
                     default:
                         goto illegal_insn;
                     }
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                 } else {
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    cpu_src2 = get_src2(insn, cpu_src2);
+                    cpu_src1 = get_src1(dc, insn);
+                    cpu_src2 = get_src2(dc, insn);
                     switch (xop) {
                     case 0x20: /* taddcc */
                         gen_op_add_cc(cpu_dst, cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_TADD);
                         dc->cc_op = CC_OP_TADD;
                         break;
                     case 0x21: /* tsubcc */
                         gen_op_sub_cc(cpu_dst, cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_TSUB);
                         dc->cc_op = CC_OP_TSUB;
                         break;
                     case 0x22: /* taddcctv */
                         gen_helper_taddcctv(cpu_dst, cpu_env,
                                             cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         dc->cc_op = CC_OP_TADDTV;
                         break;
                     case 0x23: /* tsubcctv */
                         gen_helper_tsubcctv(cpu_dst, cpu_env,
                                             cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         dc->cc_op = CC_OP_TSUBTV;
                         break;
                     case 0x24: /* mulscc */
                         update_psr(dc);
                         gen_op_mulscc(cpu_dst, cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         tcg_gen_movi_i32(cpu_cc_op, CC_OP_ADD);
                         dc->cc_op = CC_OP_ADD;
                         break;
@@ -3627,34 +3596,38 @@
                             simm = GET_FIELDs(insn, 20, 31);
                             tcg_gen_shli_tl(cpu_dst, cpu_src1, simm & 0x1f);
                         } else { /* register */
+                            cpu_tmp0 = get_temp_tl(dc);
                             tcg_gen_andi_tl(cpu_tmp0, cpu_src2, 0x1f);
                             tcg_gen_shl_tl(cpu_dst, cpu_src1, cpu_tmp0);
                         }
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         break;
                     case 0x26:  /* srl */
                         if (IS_IMM) { /* immediate */
                             simm = GET_FIELDs(insn, 20, 31);
                             tcg_gen_shri_tl(cpu_dst, cpu_src1, simm & 0x1f);
                         } else { /* register */
+                            cpu_tmp0 = get_temp_tl(dc);
                             tcg_gen_andi_tl(cpu_tmp0, cpu_src2, 0x1f);
                             tcg_gen_shr_tl(cpu_dst, cpu_src1, cpu_tmp0);
                         }
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         break;
                     case 0x27:  /* sra */
                         if (IS_IMM) { /* immediate */
                             simm = GET_FIELDs(insn, 20, 31);
                             tcg_gen_sari_tl(cpu_dst, cpu_src1, simm & 0x1f);
                         } else { /* register */
+                            cpu_tmp0 = get_temp_tl(dc);
                             tcg_gen_andi_tl(cpu_tmp0, cpu_src2, 0x1f);
                             tcg_gen_sar_tl(cpu_dst, cpu_src1, cpu_tmp0);
                         }
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         break;
 #endif
                     case 0x30:
                         {
+                            cpu_tmp0 = get_temp_tl(dc);
                             switch(rd) {
                             case 0: /* wry */
                                 tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
@@ -3672,19 +3645,19 @@
                                 break;
 #else
                             case 0x2: /* V9 wrccr */
-                                tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
-                                gen_helper_wrccr(cpu_env, cpu_dst);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                gen_helper_wrccr(cpu_env, cpu_tmp0);
                                 tcg_gen_movi_i32(cpu_cc_op, CC_OP_FLAGS);
                                 dc->cc_op = CC_OP_FLAGS;
                                 break;
                             case 0x3: /* V9 wrasi */
-                                tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
-                                tcg_gen_andi_tl(cpu_dst, cpu_dst, 0xff);
-                                tcg_gen_trunc_tl_i32(cpu_asi, cpu_dst);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xff);
+                                tcg_gen_trunc_tl_i32(cpu_asi, cpu_tmp0);
                                 break;
                             case 0x6: /* V9 wrfprs */
-                                tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
-                                tcg_gen_trunc_tl_i32(cpu_fprs, cpu_dst);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                tcg_gen_trunc_tl_i32(cpu_fprs, cpu_tmp0);
                                 save_state(dc);
                                 gen_op_next_insn();
                                 tcg_gen_exit_tb(0);
@@ -3706,20 +3679,20 @@
                             case 0x14: /* Softint set */
                                 if (!supervisor(dc))
                                     goto illegal_insn;
-                                tcg_gen_xor_tl(cpu_tmp64, cpu_src1, cpu_src2);
-                                gen_helper_set_softint(cpu_env, cpu_tmp64);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                gen_helper_set_softint(cpu_env, cpu_tmp0);
                                 break;
                             case 0x15: /* Softint clear */
                                 if (!supervisor(dc))
                                     goto illegal_insn;
-                                tcg_gen_xor_tl(cpu_tmp64, cpu_src1, cpu_src2);
-                                gen_helper_clear_softint(cpu_env, cpu_tmp64);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                gen_helper_clear_softint(cpu_env, cpu_tmp0);
                                 break;
                             case 0x16: /* Softint write */
                                 if (!supervisor(dc))
                                     goto illegal_insn;
-                                tcg_gen_xor_tl(cpu_tmp64, cpu_src1, cpu_src2);
-                                gen_helper_write_softint(cpu_env, cpu_tmp64);
+                                tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                                gen_helper_write_softint(cpu_env, cpu_tmp0);
                                 break;
                             case 0x17: /* Tick compare */
 #if !defined(CONFIG_USER_ONLY)
@@ -3747,13 +3720,13 @@
                                 {
                                     TCGv_ptr r_tickptr;
 
-                                    tcg_gen_xor_tl(cpu_dst, cpu_src1,
+                                    tcg_gen_xor_tl(cpu_tmp0, cpu_src1,
                                                    cpu_src2);
                                     r_tickptr = tcg_temp_new_ptr();
                                     tcg_gen_ld_ptr(r_tickptr, cpu_env,
                                                    offsetof(CPUSPARCState, stick));
                                     gen_helper_tick_set_count(r_tickptr,
-                                                              cpu_dst);
+                                                              cpu_tmp0);
                                     tcg_temp_free_ptr(r_tickptr);
                                 }
                                 break;
@@ -3808,8 +3781,9 @@
                                 goto illegal_insn;
                             }
 #else
-                            tcg_gen_xor_tl(cpu_dst, cpu_src1, cpu_src2);
-                            gen_helper_wrpsr(cpu_env, cpu_dst);
+                            cpu_tmp0 = get_temp_tl(dc);
+                            tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                            gen_helper_wrpsr(cpu_env, cpu_tmp0);
                             tcg_gen_movi_i32(cpu_cc_op, CC_OP_FLAGS);
                             dc->cc_op = CC_OP_FLAGS;
                             save_state(dc);
@@ -3823,6 +3797,7 @@
                         {
                             if (!supervisor(dc))
                                 goto priv_insn;
+                            cpu_tmp0 = get_temp_tl(dc);
                             tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
 #ifdef TARGET_SPARC64
                             switch (rd) {
@@ -3866,9 +3841,8 @@
 
                                     r_tsptr = tcg_temp_new_ptr();
                                     gen_load_trap_state_at_tl(r_tsptr, cpu_env);
-                                    tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                    tcg_gen_st_i32(cpu_tmp32, r_tsptr,
-                                                   offsetof(trap_state, tt));
+                                    tcg_gen_st32_tl(cpu_tmp0, r_tsptr,
+                                                    offsetof(trap_state, tt));
                                     tcg_temp_free_ptr(r_tsptr);
                                 }
                                 break;
@@ -3894,8 +3868,7 @@
                                 break;
                             case 7: // tl
                                 save_state(dc);
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
                                                offsetof(CPUSPARCState, tl));
                                 dc->npc = DYNAMIC_PC;
                                 break;
@@ -3906,40 +3879,34 @@
                                 gen_helper_wrcwp(cpu_env, cpu_tmp0);
                                 break;
                             case 10: // cansave
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState,
-                                                        cansave));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState,
+                                                         cansave));
                                 break;
                             case 11: // canrestore
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState,
-                                                        canrestore));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState,
+                                                         canrestore));
                                 break;
                             case 12: // cleanwin
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState,
-                                                        cleanwin));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState,
+                                                         cleanwin));
                                 break;
                             case 13: // otherwin
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState,
-                                                        otherwin));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState,
+                                                         otherwin));
                                 break;
                             case 14: // wstate
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState,
-                                                        wstate));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState,
+                                                         wstate));
                                 break;
                             case 16: // UA2005 gl
                                 CHECK_IU_FEATURE(dc, GL);
-                                tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                                tcg_gen_st_i32(cpu_tmp32, cpu_env,
-                                               offsetof(CPUSPARCState, gl));
+                                tcg_gen_st32_tl(cpu_tmp0, cpu_env,
+                                                offsetof(CPUSPARCState, gl));
                                 break;
                             case 26: // UA2005 strand status
                                 CHECK_IU_FEATURE(dc, HYPV);
@@ -3951,11 +3918,11 @@
                                 goto illegal_insn;
                             }
 #else
-                            tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                            if (dc->def->nwindows != 32)
-                                tcg_gen_andi_tl(cpu_tmp32, cpu_tmp32,
+                            tcg_gen_trunc_tl_i32(cpu_wim, cpu_tmp0);
+                            if (dc->def->nwindows != 32) {
+                                tcg_gen_andi_tl(cpu_wim, cpu_wim,
                                                 (1 << dc->def->nwindows) - 1);
-                            tcg_gen_mov_i32(cpu_wim, cpu_tmp32);
+                            }
 #endif
                         }
                         break;
@@ -3969,6 +3936,7 @@
                             CHECK_IU_FEATURE(dc, HYPV);
                             if (!hypervisor(dc))
                                 goto priv_insn;
+                            cpu_tmp0 = get_temp_tl(dc);
                             tcg_gen_xor_tl(cpu_tmp0, cpu_src1, cpu_src2);
                             switch (rd) {
                             case 0: // hpstate
@@ -4014,6 +3982,7 @@
                             int cc = GET_FIELD_SP(insn, 11, 12);
                             int cond = GET_FIELD_SP(insn, 14, 17);
                             DisasCompare cmp;
+                            TCGv dst;
 
                             if (insn & (1 << 18)) {
                                 if (cc == 0) {
@@ -4035,28 +4004,27 @@
                                 tcg_gen_movi_tl(cpu_src2, simm);
                             }
 
-                            gen_movl_reg_TN(rd, cpu_dst);
-                            tcg_gen_movcond_tl(cmp.cond, cpu_dst,
+                            dst = gen_load_gpr(dc, rd);
+                            tcg_gen_movcond_tl(cmp.cond, dst,
                                                cmp.c1, cmp.c2,
-                                               cpu_src2, cpu_dst);
+                                               cpu_src2, dst);
                             free_compare(&cmp);
-                            gen_movl_TN_reg(rd, cpu_dst);
+                            gen_store_gpr(dc, rd, dst);
                             break;
                         }
                     case 0x2d: /* V9 sdivx */
                         gen_helper_sdivx(cpu_dst, cpu_env, cpu_src1, cpu_src2);
-                        gen_movl_TN_reg(rd, cpu_dst);
+                        gen_store_gpr(dc, rd, cpu_dst);
                         break;
                     case 0x2e: /* V9 popc */
-                        {
-                            cpu_src2 = get_src2(insn, cpu_src2);
-                            gen_helper_popc(cpu_dst, cpu_src2);
-                            gen_movl_TN_reg(rd, cpu_dst);
-                        }
+                        gen_helper_popc(cpu_dst, cpu_src2);
+                        gen_store_gpr(dc, rd, cpu_dst);
+                        break;
                     case 0x2f: /* V9 movr */
                         {
                             int cond = GET_FIELD_SP(insn, 10, 12);
                             DisasCompare cmp;
+                            TCGv dst;
 
                             gen_compare_reg(&cmp, cond, cpu_src1);
 
@@ -4068,12 +4036,12 @@
                                 tcg_gen_movi_tl(cpu_src2, simm);
                             }
 
-                            gen_movl_reg_TN(rd, cpu_dst);
-                            tcg_gen_movcond_tl(cmp.cond, cpu_dst,
+                            dst = gen_load_gpr(dc, rd);
+                            tcg_gen_movcond_tl(cmp.cond, dst,
                                                cmp.c1, cmp.c2,
-                                               cpu_src2, cpu_dst);
+                                               cpu_src2, dst);
                             free_compare(&cmp);
-                            gen_movl_TN_reg(rd, cpu_dst);
+                            gen_store_gpr(dc, rd, dst);
                             break;
                         }
 #endif
@@ -4093,188 +4061,188 @@
                 switch (opf) {
                 case 0x000: /* VIS I edge8cc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 1, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x001: /* VIS II edge8n */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 0, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x002: /* VIS I edge8lcc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 1, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x003: /* VIS II edge8ln */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 8, 0, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x004: /* VIS I edge16cc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 1, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x005: /* VIS II edge16n */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 0, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x006: /* VIS I edge16lcc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 1, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x007: /* VIS II edge16ln */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 16, 0, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x008: /* VIS I edge32cc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 1, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x009: /* VIS II edge32n */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 0, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x00a: /* VIS I edge32lcc */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 1, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x00b: /* VIS II edge32ln */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    gen_movl_reg_TN(rs1, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_edge(dc, cpu_dst, cpu_src1, cpu_src2, 32, 0, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x010: /* VIS I array8 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x012: /* VIS I array16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
                     tcg_gen_shli_i64(cpu_dst, cpu_dst, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x014: /* VIS I array32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_helper_array8(cpu_dst, cpu_src1, cpu_src2);
                     tcg_gen_shli_i64(cpu_dst, cpu_dst, 2);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x018: /* VIS I alignaddr */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_alignaddr(cpu_dst, cpu_src1, cpu_src2, 0);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x01a: /* VIS I alignaddrl */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    gen_movl_reg_TN(rs2, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     gen_alignaddr(cpu_dst, cpu_src1, cpu_src2, 1);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x019: /* VIS II bmask */
                     CHECK_FPU_FEATURE(dc, VIS2);
-                    cpu_src1 = get_src1(insn, cpu_src1);
-                    cpu_src2 = get_src1(insn, cpu_src2);
+                    cpu_src1 = gen_load_gpr(dc, rs1);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
                     tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
                     tcg_gen_deposit_tl(cpu_gsr, cpu_gsr, cpu_dst, 32, 32);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x020: /* VIS I fcmple16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmple16(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x022: /* VIS I fcmpne16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpne16(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x024: /* VIS I fcmple32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmple32(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x026: /* VIS I fcmpne32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpne32(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x028: /* VIS I fcmpgt16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpgt16(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x02a: /* VIS I fcmpeq16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpeq16(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x02c: /* VIS I fcmpgt32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpgt32(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x02e: /* VIS I fcmpeq32 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs1);
                     cpu_src2_64 = gen_load_fpr_D(dc, rs2);
                     gen_helper_fcmpeq32(cpu_dst, cpu_src1_64, cpu_src2_64);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_dst);
                     break;
                 case 0x031: /* VIS I fmul8x16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
@@ -4311,14 +4279,14 @@
                 case 0x03b: /* VIS I fpack16 */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs2);
-                    cpu_dst_32 = gen_dest_fpr_F();
+                    cpu_dst_32 = gen_dest_fpr_F(dc);
                     gen_helper_fpack16(cpu_dst_32, cpu_gsr, cpu_src1_64);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x03d: /* VIS I fpackfix */
                     CHECK_FPU_FEATURE(dc, VIS1);
                     cpu_src1_64 = gen_load_fpr_D(dc, rs2);
-                    cpu_dst_32 = gen_dest_fpr_F();
+                    cpu_dst_32 = gen_dest_fpr_F(dc);
                     gen_helper_fpackfix(cpu_dst_32, cpu_gsr, cpu_src1_64);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
@@ -4376,13 +4344,13 @@
                     break;
                 case 0x060: /* VIS I fzero */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_dst_64 = gen_dest_fpr_D();
+                    cpu_dst_64 = gen_dest_fpr_D(dc, rd);
                     tcg_gen_movi_i64(cpu_dst_64, 0);
                     gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 case 0x061: /* VIS I fzeros */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_dst_32 = gen_dest_fpr_F();
+                    cpu_dst_32 = gen_dest_fpr_F(dc);
                     tcg_gen_movi_i32(cpu_dst_32, 0);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
@@ -4504,13 +4472,13 @@
                     break;
                 case 0x07e: /* VIS I fone */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_dst_64 = gen_dest_fpr_D();
+                    cpu_dst_64 = gen_dest_fpr_D(dc, rd);
                     tcg_gen_movi_i64(cpu_dst_64, -1);
                     gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
                 case 0x07f: /* VIS I fones */
                     CHECK_FPU_FEATURE(dc, VIS1);
-                    cpu_dst_32 = gen_dest_fpr_F();
+                    cpu_dst_32 = gen_dest_fpr_F(dc);
                     tcg_gen_movi_i32(cpu_dst_32, -1);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
@@ -4535,55 +4503,59 @@
                 TCGv_i32 r_const;
 
                 save_state(dc);
-                cpu_src1 = get_src1(insn, cpu_src1);
+                cpu_src1 = get_src1(dc, insn);
+                cpu_tmp0 = get_temp_tl(dc);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 19, 31);
-                    tcg_gen_addi_tl(cpu_dst, cpu_src1, simm);
+                    tcg_gen_addi_tl(cpu_tmp0, cpu_src1, simm);
                 } else {                /* register */
                     rs2 = GET_FIELD(insn, 27, 31);
                     if (rs2) {
-                        gen_movl_reg_TN(rs2, cpu_src2);
-                        tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
-                    } else
-                        tcg_gen_mov_tl(cpu_dst, cpu_src1);
+                        cpu_src2 = gen_load_gpr(dc, rs2);
+                        tcg_gen_add_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                    } else {
+                        tcg_gen_mov_tl(cpu_tmp0, cpu_src1);
+                    }
                 }
                 gen_helper_restore(cpu_env);
                 gen_mov_pc_npc(dc);
                 r_const = tcg_const_i32(3);
-                gen_helper_check_align(cpu_env, cpu_dst, r_const);
+                gen_helper_check_align(cpu_env, cpu_tmp0, r_const);
                 tcg_temp_free_i32(r_const);
-                tcg_gen_mov_tl(cpu_npc, cpu_dst);
+                tcg_gen_mov_tl(cpu_npc, cpu_tmp0);
                 dc->npc = DYNAMIC_PC;
                 goto jmp_insn;
 #endif
             } else {
-                cpu_src1 = get_src1(insn, cpu_src1);
+                cpu_src1 = get_src1(dc, insn);
+                cpu_tmp0 = get_temp_tl(dc);
                 if (IS_IMM) {   /* immediate */
                     simm = GET_FIELDs(insn, 19, 31);
-                    tcg_gen_addi_tl(cpu_dst, cpu_src1, simm);
+                    tcg_gen_addi_tl(cpu_tmp0, cpu_src1, simm);
                 } else {                /* register */
                     rs2 = GET_FIELD(insn, 27, 31);
                     if (rs2) {
-                        gen_movl_reg_TN(rs2, cpu_src2);
-                        tcg_gen_add_tl(cpu_dst, cpu_src1, cpu_src2);
-                    } else
-                        tcg_gen_mov_tl(cpu_dst, cpu_src1);
+                        cpu_src2 = gen_load_gpr(dc, rs2);
+                        tcg_gen_add_tl(cpu_tmp0, cpu_src1, cpu_src2);
+                    } else {
+                        tcg_gen_mov_tl(cpu_tmp0, cpu_src1);
+                    }
                 }
                 switch (xop) {
                 case 0x38:      /* jmpl */
                     {
-                        TCGv r_pc;
+                        TCGv t;
                         TCGv_i32 r_const;
 
-                        r_pc = tcg_const_tl(dc->pc);
-                        gen_movl_TN_reg(rd, r_pc);
-                        tcg_temp_free(r_pc);
+                        t = gen_dest_gpr(dc, rd);
+                        tcg_gen_movi_tl(t, dc->pc);
+                        gen_store_gpr(dc, rd, t);
                         gen_mov_pc_npc(dc);
                         r_const = tcg_const_i32(3);
-                        gen_helper_check_align(cpu_env, cpu_dst, r_const);
+                        gen_helper_check_align(cpu_env, cpu_tmp0, r_const);
                         tcg_temp_free_i32(r_const);
-                        gen_address_mask(dc, cpu_dst);
-                        tcg_gen_mov_tl(cpu_npc, cpu_dst);
+                        gen_address_mask(dc, cpu_tmp0);
+                        tcg_gen_mov_tl(cpu_npc, cpu_tmp0);
                         dc->npc = DYNAMIC_PC;
                     }
                     goto jmp_insn;
@@ -4596,9 +4568,9 @@
                             goto priv_insn;
                         gen_mov_pc_npc(dc);
                         r_const = tcg_const_i32(3);
-                        gen_helper_check_align(cpu_env, cpu_dst, r_const);
+                        gen_helper_check_align(cpu_env, cpu_tmp0, r_const);
                         tcg_temp_free_i32(r_const);
-                        tcg_gen_mov_tl(cpu_npc, cpu_dst);
+                        tcg_gen_mov_tl(cpu_npc, cpu_tmp0);
                         dc->npc = DYNAMIC_PC;
                         gen_helper_rett(cpu_env);
                     }
@@ -4612,12 +4584,12 @@
                 case 0x3c:      /* save */
                     save_state(dc);
                     gen_helper_save(cpu_env);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_tmp0);
                     break;
                 case 0x3d:      /* restore */
                     save_state(dc);
                     gen_helper_restore(cpu_env);
-                    gen_movl_TN_reg(rd, cpu_dst);
+                    gen_store_gpr(dc, rd, cpu_tmp0);
                     break;
 #if !defined(CONFIG_USER_ONLY) && defined(TARGET_SPARC64)
                 case 0x3e:      /* V9 done/retry */
@@ -4653,26 +4625,29 @@
     case 3:                     /* load/store instructions */
         {
             unsigned int xop = GET_FIELD(insn, 7, 12);
+            /* ??? gen_address_mask prevents us from using a source
+               register directly.  Always generate a temporary.  */
+            TCGv cpu_addr = get_temp_tl(dc);
 
-            cpu_src1 = get_src1(insn, cpu_src1);
-            if (xop == 0x3c || xop == 0x3e) { // V9 casa/casxa
-                rs2 = GET_FIELD(insn, 27, 31);
-                gen_movl_reg_TN(rs2, cpu_src2);
-                tcg_gen_mov_tl(cpu_addr, cpu_src1);
+            tcg_gen_mov_tl(cpu_addr, get_src1(dc, insn));
+            if (xop == 0x3c || xop == 0x3e) {
+                /* V9 casa/casxa : no offset */
             } else if (IS_IMM) {     /* immediate */
                 simm = GET_FIELDs(insn, 19, 31);
-                tcg_gen_addi_tl(cpu_addr, cpu_src1, simm);
+                if (simm != 0) {
+                    tcg_gen_addi_tl(cpu_addr, cpu_addr, simm);
+                }
             } else {            /* register */
                 rs2 = GET_FIELD(insn, 27, 31);
                 if (rs2 != 0) {
-                    gen_movl_reg_TN(rs2, cpu_src2);
-                    tcg_gen_add_tl(cpu_addr, cpu_src1, cpu_src2);
-                } else
-                    tcg_gen_mov_tl(cpu_addr, cpu_src1);
+                    tcg_gen_add_tl(cpu_addr, cpu_addr, gen_load_gpr(dc, rs2));
+                }
             }
             if (xop < 4 || (xop > 7 && xop < 0x14 && xop != 0x0e) ||
                 (xop > 0x17 && xop <= 0x1d ) ||
                 (xop > 0x2c && xop <= 0x33) || xop == 0x1f || xop == 0x3d) {
+                TCGv cpu_val = gen_dest_gpr(dc, rd);
+
                 switch (xop) {
                 case 0x0:       /* ld, V9 lduw, load unsigned word */
                     gen_address_mask(dc, cpu_addr);
@@ -4691,6 +4666,7 @@
                         goto illegal_insn;
                     else {
                         TCGv_i32 r_const;
+                        TCGv_i64 t64;
 
                         save_state(dc);
                         r_const = tcg_const_i32(7);
@@ -4698,13 +4674,15 @@
                         gen_helper_check_align(cpu_env, cpu_addr, r_const);
                         tcg_temp_free_i32(r_const);
                         gen_address_mask(dc, cpu_addr);
-                        tcg_gen_qemu_ld64(cpu_tmp64, cpu_addr, dc->mem_idx);
-                        tcg_gen_trunc_i64_tl(cpu_tmp0, cpu_tmp64);
-                        tcg_gen_andi_tl(cpu_tmp0, cpu_tmp0, 0xffffffffULL);
-                        gen_movl_TN_reg(rd + 1, cpu_tmp0);
-                        tcg_gen_shri_i64(cpu_tmp64, cpu_tmp64, 32);
-                        tcg_gen_trunc_i64_tl(cpu_val, cpu_tmp64);
-                        tcg_gen_andi_tl(cpu_val, cpu_val, 0xffffffffULL);
+                        t64 = tcg_temp_new_i64();
+                        tcg_gen_qemu_ld64(t64, cpu_addr, dc->mem_idx);
+                        tcg_gen_trunc_i64_tl(cpu_val, t64);
+                        tcg_gen_ext32u_tl(cpu_val, cpu_val);
+                        gen_store_gpr(dc, rd + 1, cpu_val);
+                        tcg_gen_shri_i64(t64, t64, 32);
+                        tcg_gen_trunc_i64_tl(cpu_val, t64);
+                        tcg_temp_free_i64(t64);
+                        tcg_gen_ext32u_tl(cpu_val, cpu_val);
                     }
                     break;
                 case 0x9:       /* ldsb, load signed byte */
@@ -4726,14 +4704,17 @@
                         tcg_temp_free(r_const);
                     }
                     break;
-                case 0x0f:      /* swap, swap register with memory. Also
-                                   atomically */
-                    CHECK_IU_FEATURE(dc, SWAP);
-                    gen_movl_reg_TN(rd, cpu_val);
-                    gen_address_mask(dc, cpu_addr);
-                    tcg_gen_qemu_ld32u(cpu_tmp0, cpu_addr, dc->mem_idx);
-                    tcg_gen_qemu_st32(cpu_val, cpu_addr, dc->mem_idx);
-                    tcg_gen_mov_tl(cpu_val, cpu_tmp0);
+                case 0x0f:
+                    /* swap, swap register with memory. Also atomically */
+                    {
+                        TCGv t0 = get_temp_tl(dc);
+                        CHECK_IU_FEATURE(dc, SWAP);
+                        cpu_src1 = gen_load_gpr(dc, rd);
+                        gen_address_mask(dc, cpu_addr);
+                        tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
+                        tcg_gen_qemu_st32(cpu_src1, cpu_addr, dc->mem_idx);
+                        tcg_gen_mov_tl(cpu_val, t0);
+                    }
                     break;
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
                 case 0x10:      /* lda, V9 lduwa, load word alternate */
@@ -4776,7 +4757,7 @@
                     if (rd & 1)
                         goto illegal_insn;
                     save_state(dc);
-                    gen_ldda_asi(cpu_val, cpu_addr, insn, rd);
+                    gen_ldda_asi(dc, cpu_val, cpu_addr, insn, rd);
                     goto skip_move;
                 case 0x19:      /* ldsba, load signed byte alternate */
 #ifndef TARGET_SPARC64
@@ -4818,8 +4799,8 @@
                         goto priv_insn;
 #endif
                     save_state(dc);
-                    gen_movl_reg_TN(rd, cpu_val);
-                    gen_swap_asi(cpu_val, cpu_addr, insn);
+                    cpu_src1 = gen_load_gpr(dc, rd);
+                    gen_swap_asi(cpu_val, cpu_src1, cpu_addr, insn);
                     break;
 
 #ifndef TARGET_SPARC64
@@ -4879,11 +4860,13 @@
                 default:
                     goto illegal_insn;
                 }
-                gen_movl_TN_reg(rd, cpu_val);
+                gen_store_gpr(dc, rd, cpu_val);
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
             skip_move: ;
 #endif
             } else if (xop >= 0x20 && xop < 0x24) {
+                TCGv t0;
+
                 if (gen_trap_ifnofpu(dc)) {
                     goto jmp_insn;
                 }
@@ -4891,28 +4874,28 @@
                 switch (xop) {
                 case 0x20:      /* ldf, load fpreg */
                     gen_address_mask(dc, cpu_addr);
-                    tcg_gen_qemu_ld32u(cpu_tmp0, cpu_addr, dc->mem_idx);
-                    cpu_dst_32 = gen_dest_fpr_F();
-                    tcg_gen_trunc_tl_i32(cpu_dst_32, cpu_tmp0);
+                    t0 = get_temp_tl(dc);
+                    tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
+                    cpu_dst_32 = gen_dest_fpr_F(dc);
+                    tcg_gen_trunc_tl_i32(cpu_dst_32, t0);
                     gen_store_fpr_F(dc, rd, cpu_dst_32);
                     break;
                 case 0x21:      /* ldfsr, V9 ldxfsr */
 #ifdef TARGET_SPARC64
                     gen_address_mask(dc, cpu_addr);
                     if (rd == 1) {
-                        tcg_gen_qemu_ld64(cpu_tmp64, cpu_addr, dc->mem_idx);
-                        gen_helper_ldxfsr(cpu_env, cpu_tmp64);
-                    } else {
-                        tcg_gen_qemu_ld32u(cpu_tmp0, cpu_addr, dc->mem_idx);
-                        tcg_gen_trunc_tl_i32(cpu_tmp32, cpu_tmp0);
-                        gen_helper_ldfsr(cpu_env, cpu_tmp32);
-                    }
-#else
-                    {
-                        tcg_gen_qemu_ld32u(cpu_tmp32, cpu_addr, dc->mem_idx);
-                        gen_helper_ldfsr(cpu_env, cpu_tmp32);
+                        TCGv_i64 t64 = tcg_temp_new_i64();
+                        tcg_gen_qemu_ld64(t64, cpu_addr, dc->mem_idx);
+                        gen_helper_ldxfsr(cpu_env, t64);
+                        tcg_temp_free_i64(t64);
+                        break;
                     }
 #endif
+                    cpu_dst_32 = get_temp_i32(dc);
+                    t0 = get_temp_tl(dc);
+                    tcg_gen_qemu_ld32u(t0, cpu_addr, dc->mem_idx);
+                    tcg_gen_trunc_tl_i32(cpu_dst_32, t0);
+                    gen_helper_ldfsr(cpu_env, cpu_dst_32);
                     break;
                 case 0x22:      /* ldqf, load quad fpreg */
                     {
@@ -4929,7 +4912,7 @@
                     break;
                 case 0x23:      /* lddf, load double fpreg */
                     gen_address_mask(dc, cpu_addr);
-                    cpu_dst_64 = gen_dest_fpr_D();
+                    cpu_dst_64 = gen_dest_fpr_D(dc, rd);
                     tcg_gen_qemu_ld64(cpu_dst_64, cpu_addr, dc->mem_idx);
                     gen_store_fpr_D(dc, rd, cpu_dst_64);
                     break;
@@ -4938,7 +4921,8 @@
                 }
             } else if (xop < 8 || (xop >= 0x14 && xop < 0x18) ||
                        xop == 0xe || xop == 0x1e) {
-                gen_movl_reg_TN(rd, cpu_val);
+                TCGv cpu_val = gen_load_gpr(dc, rd);
+
                 switch (xop) {
                 case 0x4: /* st, store word */
                     gen_address_mask(dc, cpu_addr);
@@ -4957,6 +4941,8 @@
                         goto illegal_insn;
                     else {
                         TCGv_i32 r_const;
+                        TCGv_i64 t64;
+                        TCGv lo;
 
                         save_state(dc);
                         gen_address_mask(dc, cpu_addr);
@@ -4964,9 +4950,12 @@
                         /* XXX remove alignment check */
                         gen_helper_check_align(cpu_env, cpu_addr, r_const);
                         tcg_temp_free_i32(r_const);
-                        gen_movl_reg_TN(rd + 1, cpu_tmp0);
-                        tcg_gen_concat_tl_i64(cpu_tmp64, cpu_tmp0, cpu_val);
-                        tcg_gen_qemu_st64(cpu_tmp64, cpu_addr, dc->mem_idx);
+                        lo = gen_load_gpr(dc, rd + 1);
+
+                        t64 = tcg_temp_new_i64();
+                        tcg_gen_concat_tl_i64(t64, lo, cpu_val);
+                        tcg_gen_qemu_st64(t64, cpu_addr, dc->mem_idx);
+                        tcg_temp_free_i64(t64);
                     }
                     break;
 #if !defined(CONFIG_USER_ONLY) || defined(TARGET_SPARC64)
@@ -5014,7 +5003,7 @@
                         goto illegal_insn;
                     else {
                         save_state(dc);
-                        gen_stda_asi(cpu_val, cpu_addr, insn, rd);
+                        gen_stda_asi(dc, cpu_val, cpu_addr, insn, rd);
                     }
                     break;
 #endif
@@ -5039,23 +5028,28 @@
                 save_state(dc);
                 switch (xop) {
                 case 0x24: /* stf, store fpreg */
-                    gen_address_mask(dc, cpu_addr);
-                    cpu_src1_32 = gen_load_fpr_F(dc, rd);
-                    tcg_gen_ext_i32_tl(cpu_tmp0, cpu_src1_32);
-                    tcg_gen_qemu_st32(cpu_tmp0, cpu_addr, dc->mem_idx);
+                    {
+                        TCGv t = get_temp_tl(dc);
+                        gen_address_mask(dc, cpu_addr);
+                        cpu_src1_32 = gen_load_fpr_F(dc, rd);
+                        tcg_gen_ext_i32_tl(t, cpu_src1_32);
+                        tcg_gen_qemu_st32(t, cpu_addr, dc->mem_idx);
+                    }
                     break;
                 case 0x25: /* stfsr, V9 stxfsr */
+                    {
+                        TCGv t = get_temp_tl(dc);
+
+                        tcg_gen_ld_tl(t, cpu_env, offsetof(CPUSPARCState, fsr));
 #ifdef TARGET_SPARC64
-                    gen_address_mask(dc, cpu_addr);
-                    tcg_gen_ld_i64(cpu_tmp64, cpu_env, offsetof(CPUSPARCState, fsr));
-                    if (rd == 1)
-                        tcg_gen_qemu_st64(cpu_tmp64, cpu_addr, dc->mem_idx);
-                    else
-                        tcg_gen_qemu_st32(cpu_tmp64, cpu_addr, dc->mem_idx);
-#else
-                    tcg_gen_ld_i32(cpu_tmp32, cpu_env, offsetof(CPUSPARCState, fsr));
-                    tcg_gen_qemu_st32(cpu_tmp32, cpu_addr, dc->mem_idx);
+                        gen_address_mask(dc, cpu_addr);
+                        if (rd == 1) {
+                            tcg_gen_qemu_st64(t, cpu_addr, dc->mem_idx);
+                            break;
+                        }
 #endif
+                        tcg_gen_qemu_st32(t, cpu_addr, dc->mem_idx);
+                    }
                     break;
                 case 0x26:
 #ifdef TARGET_SPARC64
@@ -5123,12 +5117,14 @@
                     gen_stf_asi(cpu_addr, insn, 8, DFPREG(rd));
                     break;
                 case 0x3c: /* V9 casa */
-                    gen_cas_asi(cpu_val, cpu_addr, cpu_src2, insn, rd);
-                    gen_movl_TN_reg(rd, cpu_val);
+                    rs2 = GET_FIELD(insn, 27, 31);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
+                    gen_cas_asi(dc, cpu_addr, cpu_src2, insn, rd);
                     break;
                 case 0x3e: /* V9 casxa */
-                    gen_casx_asi(cpu_val, cpu_addr, cpu_src2, insn, rd);
-                    gen_movl_TN_reg(rd, cpu_val);
+                    rs2 = GET_FIELD(insn, 27, 31);
+                    cpu_src2 = gen_load_gpr(dc, rs2);
+                    gen_casx_asi(dc, cpu_addr, cpu_src2, insn, rd);
                     break;
 #else
                 case 0x34: /* stc */
@@ -5140,8 +5136,9 @@
                 default:
                     goto illegal_insn;
                 }
-            } else
+            } else {
                 goto illegal_insn;
+            }
         }
         break;
     }
@@ -5220,8 +5217,6 @@
     goto egress;
 #endif
  egress:
-    tcg_temp_free(cpu_tmp1);
-    tcg_temp_free(cpu_tmp2);
     if (dc->n_t32 != 0) {
         int i;
         for (i = dc->n_t32 - 1; i >= 0; --i) {
@@ -5229,6 +5224,13 @@
         }
         dc->n_t32 = 0;
     }
+    if (dc->n_ttl != 0) {
+        int i;
+        for (i = dc->n_ttl - 1; i >= 0; --i) {
+            tcg_temp_free(dc->ttl[i]);
+        }
+        dc->n_ttl = 0;
+    }
 }
 
 static inline void gen_intermediate_code_internal(TranslationBlock * tb,
@@ -5293,23 +5295,9 @@
         last_pc = dc->pc;
         insn = cpu_ldl_code(env, dc->pc);
 
-        cpu_tmp0 = tcg_temp_new();
-        cpu_tmp32 = tcg_temp_new_i32();
-        cpu_tmp64 = tcg_temp_new_i64();
-        cpu_dst = tcg_temp_new();
-        cpu_val = tcg_temp_new();
-        cpu_addr = tcg_temp_new();
-
         disas_sparc_insn(dc, insn);
         num_insns++;
 
-        tcg_temp_free(cpu_addr);
-        tcg_temp_free(cpu_val);
-        tcg_temp_free(cpu_dst);
-        tcg_temp_free_i64(cpu_tmp64);
-        tcg_temp_free_i32(cpu_tmp32);
-        tcg_temp_free(cpu_tmp0);
-
         if (dc->is_br)
             break;
         /* if the next PC is different, we abort now */
diff --git a/targphys.h b/targphys.h
index 08cade9..50911fd 100644
--- a/targphys.h
+++ b/targphys.h
@@ -3,6 +3,8 @@
 #ifndef TARGPHYS_H
 #define TARGPHYS_H
 
+#ifndef CONFIG_USER_ONLY
+
 #define TARGET_PHYS_ADDR_BITS 64
 /* target_phys_addr_t is the type of a physical address (its size can
    be different from 'target_ulong').  */
@@ -18,3 +20,5 @@
 #define TARGET_PRIXPHYS PRIX64
 
 #endif
+
+#endif
diff --git a/tcg/arm/tcg-target.h b/tcg/arm/tcg-target.h
index 0df3352..98fa11b 100644
--- a/tcg/arm/tcg-target.h
+++ b/tcg/arm/tcg-target.h
@@ -75,8 +75,6 @@
 #define TCG_TARGET_HAS_deposit_i32      0
 #define TCG_TARGET_HAS_movcond_i32      1
 
-#define TCG_TARGET_HAS_GUEST_BASE
-
 enum {
     TCG_AREG0 = TCG_REG_R6,
 };
diff --git a/tcg/hppa/tcg-target.h b/tcg/hppa/tcg-target.h
index 5351353..f43fb41 100644
--- a/tcg/hppa/tcg-target.h
+++ b/tcg/hppa/tcg-target.h
@@ -103,8 +103,6 @@
 #define TCG_TARGET_HAS_ext8u_i32        0 /* and rd, rs, 0xff */
 #define TCG_TARGET_HAS_ext16u_i32       0 /* and rd, rs, 0xffff */
 
-#define TCG_TARGET_HAS_GUEST_BASE
-
 #define TCG_AREG0 TCG_REG_R17
 
 
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index ace63ba..dbc6756 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -125,8 +125,6 @@
      ((ofs) == 0 && (len) == 16))
 #define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
 
-#define TCG_TARGET_HAS_GUEST_BASE
-
 #if TCG_TARGET_REG_BITS == 64
 # define TCG_AREG0 TCG_REG_R14
 #else
diff --git a/tcg/ia64/tcg-target.h b/tcg/ia64/tcg-target.h
index 4255ca5..91fe7a3 100644
--- a/tcg/ia64/tcg-target.h
+++ b/tcg/ia64/tcg-target.h
@@ -147,9 +147,6 @@
 
 #define TCG_AREG0 TCG_REG_R7
 
-/* Guest base is supported */
-#define TCG_TARGET_HAS_GUEST_BASE
-
 static inline void flush_icache_range(tcg_target_ulong start,
                                       tcg_target_ulong stop)
 {
diff --git a/tcg/mips/tcg-target.h b/tcg/mips/tcg-target.h
index 7020d65..65b5c59 100644
--- a/tcg/mips/tcg-target.h
+++ b/tcg/mips/tcg-target.h
@@ -116,9 +116,6 @@
 
 #define TCG_AREG0 TCG_REG_S0
 
-/* guest base is supported */
-#define TCG_TARGET_HAS_GUEST_BASE
-
 #ifdef __OpenBSD__
 #include <machine/sysarch.h>
 #else
diff --git a/tcg/optimize.c b/tcg/optimize.c
index edb2b0e..a06c8eb 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -292,6 +292,82 @@
     return res;
 }
 
+static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int32_t)x < (int32_t)y;
+    case TCG_COND_GE:
+        return (int32_t)x >= (int32_t)y;
+    case TCG_COND_LE:
+        return (int32_t)x <= (int32_t)y;
+    case TCG_COND_GT:
+        return (int32_t)x > (int32_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_EQ:
+        return x == y;
+    case TCG_COND_NE:
+        return x != y;
+    case TCG_COND_LT:
+        return (int64_t)x < (int64_t)y;
+    case TCG_COND_GE:
+        return (int64_t)x >= (int64_t)y;
+    case TCG_COND_LE:
+        return (int64_t)x <= (int64_t)y;
+    case TCG_COND_GT:
+        return (int64_t)x > (int64_t)y;
+    case TCG_COND_LTU:
+        return x < y;
+    case TCG_COND_GEU:
+        return x >= y;
+    case TCG_COND_LEU:
+        return x <= y;
+    case TCG_COND_GTU:
+        return x > y;
+    default:
+        tcg_abort();
+    }
+}
+
+static bool do_constant_folding_cond_eq(TCGCond c)
+{
+    switch (c) {
+    case TCG_COND_GT:
+    case TCG_COND_LTU:
+    case TCG_COND_LT:
+    case TCG_COND_GTU:
+    case TCG_COND_NE:
+        return 0;
+    case TCG_COND_GE:
+    case TCG_COND_GEU:
+    case TCG_COND_LE:
+    case TCG_COND_LEU:
+    case TCG_COND_EQ:
+        return 1;
+    default:
+        tcg_abort();
+    }
+}
+
 /* Return 2 if the condition can't be simplified, and the result
    of the condition (0 or 1) if it can */
 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
@@ -300,75 +376,14 @@
     if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
         switch (op_bits(op)) {
         case 32:
-            switch (c) {
-            case TCG_COND_EQ:
-                return (uint32_t)temps[x].val == (uint32_t)temps[y].val;
-            case TCG_COND_NE:
-                return (uint32_t)temps[x].val != (uint32_t)temps[y].val;
-            case TCG_COND_LT:
-                return (int32_t)temps[x].val < (int32_t)temps[y].val;
-            case TCG_COND_GE:
-                return (int32_t)temps[x].val >= (int32_t)temps[y].val;
-            case TCG_COND_LE:
-                return (int32_t)temps[x].val <= (int32_t)temps[y].val;
-            case TCG_COND_GT:
-                return (int32_t)temps[x].val > (int32_t)temps[y].val;
-            case TCG_COND_LTU:
-                return (uint32_t)temps[x].val < (uint32_t)temps[y].val;
-            case TCG_COND_GEU:
-                return (uint32_t)temps[x].val >= (uint32_t)temps[y].val;
-            case TCG_COND_LEU:
-                return (uint32_t)temps[x].val <= (uint32_t)temps[y].val;
-            case TCG_COND_GTU:
-                return (uint32_t)temps[x].val > (uint32_t)temps[y].val;
-            default:
-                break;
-            }
-            break;
+            return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
         case 64:
-            switch (c) {
-            case TCG_COND_EQ:
-                return (uint64_t)temps[x].val == (uint64_t)temps[y].val;
-            case TCG_COND_NE:
-                return (uint64_t)temps[x].val != (uint64_t)temps[y].val;
-            case TCG_COND_LT:
-                return (int64_t)temps[x].val < (int64_t)temps[y].val;
-            case TCG_COND_GE:
-                return (int64_t)temps[x].val >= (int64_t)temps[y].val;
-            case TCG_COND_LE:
-                return (int64_t)temps[x].val <= (int64_t)temps[y].val;
-            case TCG_COND_GT:
-                return (int64_t)temps[x].val > (int64_t)temps[y].val;
-            case TCG_COND_LTU:
-                return (uint64_t)temps[x].val < (uint64_t)temps[y].val;
-            case TCG_COND_GEU:
-                return (uint64_t)temps[x].val >= (uint64_t)temps[y].val;
-            case TCG_COND_LEU:
-                return (uint64_t)temps[x].val <= (uint64_t)temps[y].val;
-            case TCG_COND_GTU:
-                return (uint64_t)temps[x].val > (uint64_t)temps[y].val;
-            default:
-                break;
-            }
-            break;
+            return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
+        default:
+            tcg_abort();
         }
     } else if (temps_are_copies(x, y)) {
-        switch (c) {
-        case TCG_COND_GT:
-        case TCG_COND_LTU:
-        case TCG_COND_LT:
-        case TCG_COND_GTU:
-        case TCG_COND_NE:
-            return 0;
-        case TCG_COND_GE:
-        case TCG_COND_GEU:
-        case TCG_COND_LE:
-        case TCG_COND_LEU:
-        case TCG_COND_EQ:
-            return 1;
-        default:
-            break;
-        }
+        return do_constant_folding_cond_eq(c);
     } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
         switch (c) {
         case TCG_COND_LTU:
@@ -381,11 +396,73 @@
     } else {
         return 2;
     }
+}
 
-    fprintf(stderr,
-            "Unrecognized bitness %d or condition %d in "
-            "do_constant_folding_cond.\n", op_bits(op), c);
-    tcg_abort();
+/* Return 2 if the condition can't be simplified, and the result
+   of the condition (0 or 1) if it can */
+static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
+{
+    TCGArg al = p1[0], ah = p1[1];
+    TCGArg bl = p2[0], bh = p2[1];
+
+    if (temps[bl].state == TCG_TEMP_CONST
+        && temps[bh].state == TCG_TEMP_CONST) {
+        uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
+
+        if (temps[al].state == TCG_TEMP_CONST
+            && temps[ah].state == TCG_TEMP_CONST) {
+            uint64_t a;
+            a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
+            return do_constant_folding_cond_64(a, b, c);
+        }
+        if (b == 0) {
+            switch (c) {
+            case TCG_COND_LTU:
+                return 0;
+            case TCG_COND_GEU:
+                return 1;
+            default:
+                break;
+            }
+        }
+    }
+    if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
+        return do_constant_folding_cond_eq(c);
+    }
+    return 2;
+}
+
+static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
+{
+    TCGArg a1 = *p1, a2 = *p2;
+    int sum = 0;
+    sum += temps[a1].state == TCG_TEMP_CONST;
+    sum -= temps[a2].state == TCG_TEMP_CONST;
+
+    /* Prefer the constant in second argument, and then the form
+       op a, a, b, which is better handled on non-RISC hosts. */
+    if (sum > 0 || (sum == 0 && dest == a2)) {
+        *p1 = a2;
+        *p2 = a1;
+        return true;
+    }
+    return false;
+}
+
+static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
+{
+    int sum = 0;
+    sum += temps[p1[0]].state == TCG_TEMP_CONST;
+    sum += temps[p1[1]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[0]].state == TCG_TEMP_CONST;
+    sum -= temps[p2[1]].state == TCG_TEMP_CONST;
+    if (sum > 0) {
+        TCGArg t;
+        t = p1[0], p1[0] = p2[0], p2[0] = t;
+        t = p1[1], p1[1] = p2[1], p2[1] = t;
+        return true;
+    }
+    return false;
 }
 
 /* Propagate constants and copies, fold constant expressions. */
@@ -397,7 +474,6 @@
     const TCGOpDef *def;
     TCGArg *gen_args;
     TCGArg tmp;
-    TCGCond cond;
 
     /* Array VALS has an element for each temp.
        If this temp holds a constant then its value is kept in VALS' element.
@@ -440,52 +516,46 @@
         CASE_OP_32_64(eqv):
         CASE_OP_32_64(nand):
         CASE_OP_32_64(nor):
-            /* Prefer the constant in second argument, and then the form
-               op a, a, b, which is better handled on non-RISC hosts. */
-            if (temps[args[1]].state == TCG_TEMP_CONST || (args[0] == args[2]
-                && temps[args[2]].state != TCG_TEMP_CONST)) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
-            }
+            swap_commutative(args[0], &args[1], &args[2]);
             break;
         CASE_OP_32_64(brcond):
-            if (temps[args[0]].state == TCG_TEMP_CONST
-                && temps[args[1]].state != TCG_TEMP_CONST) {
-                tmp = args[0];
-                args[0] = args[1];
-                args[1] = tmp;
+            if (swap_commutative(-1, &args[0], &args[1])) {
                 args[2] = tcg_swap_cond(args[2]);
             }
             break;
         CASE_OP_32_64(setcond):
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state != TCG_TEMP_CONST) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
+            if (swap_commutative(args[0], &args[1], &args[2])) {
                 args[3] = tcg_swap_cond(args[3]);
             }
             break;
         CASE_OP_32_64(movcond):
-            cond = args[5];
-            if (temps[args[1]].state == TCG_TEMP_CONST
-                && temps[args[2]].state != TCG_TEMP_CONST) {
-                tmp = args[1];
-                args[1] = args[2];
-                args[2] = tmp;
-                cond = tcg_swap_cond(cond);
+            if (swap_commutative(-1, &args[1], &args[2])) {
+                args[5] = tcg_swap_cond(args[5]);
             }
             /* For movcond, we canonicalize the "false" input reg to match
                the destination reg so that the tcg backend can implement
                a "move if true" operation.  */
-            if (args[0] == args[3]) {
-                tmp = args[3];
-                args[3] = args[4];
-                args[4] = tmp;
-                cond = tcg_invert_cond(cond);
+            if (swap_commutative(args[0], &args[4], &args[3])) {
+                args[5] = tcg_invert_cond(args[5]);
             }
-            args[5] = cond;
+            break;
+        case INDEX_op_add2_i32:
+            swap_commutative(args[0], &args[2], &args[4]);
+            swap_commutative(args[1], &args[3], &args[5]);
+            break;
+        case INDEX_op_mulu2_i32:
+            swap_commutative(args[0], &args[2], &args[3]);
+            break;
+        case INDEX_op_brcond2_i32:
+            if (swap_commutative2(&args[0], &args[2])) {
+                args[4] = tcg_swap_cond(args[4]);
+            }
+            break;
+        case INDEX_op_setcond2_i32:
+            if (swap_commutative2(&args[1], &args[3])) {
+                args[5] = tcg_swap_cond(args[5]);
+            }
+            break;
         default:
             break;
         }
@@ -622,6 +692,7 @@
             gen_args += 2;
             args += 2;
             break;
+
         CASE_OP_32_64(not):
         CASE_OP_32_64(neg):
         CASE_OP_32_64(ext8s):
@@ -634,14 +705,12 @@
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
+                gen_args += 2;
+                args += 2;
+                break;
             }
-            gen_args += 2;
-            args += 2;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(add):
         CASE_OP_32_64(sub):
         CASE_OP_32_64(mul):
@@ -665,15 +734,11 @@
                                           temps[args[2]].val);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args += 3;
+                args += 3;
+                break;
             }
-            args += 3;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(deposit):
             if (temps[args[1]].state == TCG_TEMP_CONST
                 && temps[args[2]].state == TCG_TEMP_CONST) {
@@ -683,33 +748,22 @@
                       | ((temps[args[2]].val & tmp) << args[3]);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args[4] = args[4];
-                gen_args += 5;
+                args += 5;
+                break;
             }
-            args += 5;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(setcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
             if (tmp != 2) {
                 gen_opc_buf[op_index] = op_to_movi(op);
                 tcg_opt_gen_movi(gen_args, args[0], tmp);
                 gen_args += 2;
-            } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args += 4;
+                args += 4;
+                break;
             }
-            args += 4;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(brcond):
             tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
             if (tmp != 2) {
@@ -721,17 +775,11 @@
                 } else {
                     gen_opc_buf[op_index] = INDEX_op_nop;
                 }
-            } else {
-                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args += 4;
+                args += 4;
+                break;
             }
-            args += 4;
-            break;
+            goto do_default;
+
         CASE_OP_32_64(movcond):
             tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
             if (tmp != 2) {
@@ -746,18 +794,125 @@
                     tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
                     gen_args += 2;
                 }
+                args += 6;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_add2_i32:
+        case INDEX_op_sub2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST
+                && temps[args[4]].state == TCG_TEMP_CONST
+                && temps[args[5]].state == TCG_TEMP_CONST) {
+                uint32_t al = temps[args[2]].val;
+                uint32_t ah = temps[args[3]].val;
+                uint32_t bl = temps[args[4]].val;
+                uint32_t bh = temps[args[5]].val;
+                uint64_t a = ((uint64_t)ah << 32) | al;
+                uint64_t b = ((uint64_t)bh << 32) | bl;
+                TCGArg rl, rh;
+
+                if (op == INDEX_op_add2_i32) {
+                    a += b;
+                } else {
+                    a -= b;
+                }
+
+                /* We emit the extra nop when we emit the add2/sub2.  */
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
+                gen_args += 4;
+                args += 6;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_mulu2_i32:
+            if (temps[args[2]].state == TCG_TEMP_CONST
+                && temps[args[3]].state == TCG_TEMP_CONST) {
+                uint32_t a = temps[args[2]].val;
+                uint32_t b = temps[args[3]].val;
+                uint64_t r = (uint64_t)a * b;
+                TCGArg rl, rh;
+
+                /* We emit the extra nop when we emit the mulu2.  */
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+
+                rl = args[0];
+                rh = args[1];
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                gen_opc_buf[++op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
+                tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
+                gen_args += 4;
+                args += 4;
+                break;
+            }
+            goto do_default;
+
+        case INDEX_op_brcond2_i32:
+            tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
+            if (tmp != 2) {
+                if (tmp) {
+                    memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                    gen_opc_buf[op_index] = INDEX_op_br;
+                    gen_args[0] = args[5];
+                    gen_args += 1;
+                } else {
+                    gen_opc_buf[op_index] = INDEX_op_nop;
+                }
+            } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
+                       && temps[args[2]].state == TCG_TEMP_CONST
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[2]].val == 0
+                       && temps[args[3]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
+                memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
+                gen_opc_buf[op_index] = INDEX_op_brcond_i32;
+                gen_args[0] = args[1];
+                gen_args[1] = args[3];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
             } else {
-                reset_temp(args[0]);
-                gen_args[0] = args[0];
-                gen_args[1] = args[1];
-                gen_args[2] = args[2];
-                gen_args[3] = args[3];
-                gen_args[4] = args[4];
-                gen_args[5] = args[5];
-                gen_args += 6;
+                goto do_default;
             }
             args += 6;
             break;
+
+        case INDEX_op_setcond2_i32:
+            tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
+            if (tmp != 2) {
+                gen_opc_buf[op_index] = INDEX_op_movi_i32;
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
+                gen_args += 2;
+            } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
+                       && temps[args[3]].state == TCG_TEMP_CONST
+                       && temps[args[4]].state == TCG_TEMP_CONST
+                       && temps[args[3]].val == 0
+                       && temps[args[4]].val == 0) {
+                /* Simplify LT/GE comparisons vs zero to a single compare
+                   vs the high word of the input.  */
+                gen_opc_buf[op_index] = INDEX_op_setcond_i32;
+                gen_args[0] = args[0];
+                gen_args[1] = args[2];
+                gen_args[2] = args[4];
+                gen_args[3] = args[5];
+                gen_args += 4;
+            } else {
+                goto do_default;
+            }
+            args += 6;
+            break;
+
         case INDEX_op_call:
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
@@ -776,11 +931,13 @@
                 i--;
             }
             break;
+
         default:
-            /* Default case: we do know nothing about operation so no
-               propagation is done.  We trash everything if the operation
-               is the end of a basic block, otherwise we only trash the
-               output args.  */
+        do_default:
+            /* Default case: we know nothing about operation (or were unable
+               to compute the operation result) so no propagation is done.
+               We trash everything if the operation is the end of a basic
+               block, otherwise we only trash the output args.  */
             if (def->flags & TCG_OPF_BB_END) {
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
             } else {
diff --git a/tcg/ppc/tcg-target.h b/tcg/ppc/tcg-target.h
index 3259d89..ad433ae 100644
--- a/tcg/ppc/tcg-target.h
+++ b/tcg/ppc/tcg-target.h
@@ -96,8 +96,6 @@
 
 #define TCG_AREG0 TCG_REG_R27
 
-#define TCG_TARGET_HAS_GUEST_BASE
-
 #define tcg_qemu_tb_exec(env, tb_ptr) \
     ((long __attribute__ ((longcall)) \
       (*)(void *, void *))code_gen_prologue)(env, tb_ptr)
diff --git a/tcg/ppc64/tcg-target.h b/tcg/ppc64/tcg-target.h
index 57569e8..97fc5c9 100644
--- a/tcg/ppc64/tcg-target.h
+++ b/tcg/ppc64/tcg-target.h
@@ -108,5 +108,4 @@
 
 #define TCG_AREG0 TCG_REG_R27
 
-#define TCG_TARGET_HAS_GUEST_BASE
 #define TCG_TARGET_EXTEND_ARGS 1
diff --git a/tcg/s390/tcg-target.h b/tcg/s390/tcg-target.h
index ed55c33..a0181ae 100644
--- a/tcg/s390/tcg-target.h
+++ b/tcg/s390/tcg-target.h
@@ -88,8 +88,6 @@
 #define TCG_TARGET_HAS_movcond_i64      0
 #endif
 
-#define TCG_TARGET_HAS_GUEST_BASE
-
 /* used for function call generation */
 #define TCG_REG_CALL_STACK		TCG_REG_R15
 #define TCG_TARGET_STACK_ALIGN		8
diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h
index c2fbb23..0e7d398 100644
--- a/tcg/sparc/tcg-target.h
+++ b/tcg/sparc/tcg-target.h
@@ -125,8 +125,6 @@
 #define TCG_TARGET_HAS_movcond_i64      1
 #endif
 
-#define TCG_TARGET_HAS_GUEST_BASE
-
 #define TCG_AREG0 TCG_REG_I0
 
 static inline void flush_icache_range(tcg_target_ulong start,
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 5518458..8100a5a 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -25,6 +25,11 @@
 
 int gen_new_label(void);
 
+static inline void tcg_gen_op0(TCGOpcode opc)
+{
+    *gen_opc_ptr++ = opc;
+}
+
 static inline void tcg_gen_op1_i32(TCGOpcode opc, TCGv_i32 arg1)
 {
     *gen_opc_ptr++ = opc;
@@ -886,6 +891,8 @@
     tcg_gen_op6_i32(INDEX_op_add2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace add2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_sub_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -893,6 +900,8 @@
     tcg_gen_op6_i32(INDEX_op_sub2_i32, TCGV_LOW(ret), TCGV_HIGH(ret),
                     TCGV_LOW(arg1), TCGV_HIGH(arg1), TCGV_LOW(arg2),
                     TCGV_HIGH(arg2));
+    /* Allow the optimizer room to replace sub2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 }
 
 static inline void tcg_gen_and_i64(TCGv_i64 ret, TCGv_i64 arg1, TCGv_i64 arg2)
@@ -1018,6 +1027,8 @@
 
     tcg_gen_op4_i32(INDEX_op_mulu2_i32, TCGV_LOW(t0), TCGV_HIGH(t0),
                     TCGV_LOW(arg1), TCGV_LOW(arg2));
+    /* Allow the optimizer room to replace mulu2 with two moves.  */
+    tcg_gen_op0(INDEX_op_nop);
 
     tcg_gen_mul_i32(t1, TCGV_LOW(arg1), TCGV_HIGH(arg2));
     tcg_gen_add_i32(TCGV_HIGH(t0), TCGV_HIGH(t0), t1);
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 32cd0c6..5faaca5 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -62,10 +62,6 @@
 
 #include "elf.h"
 
-#if defined(CONFIG_USE_GUEST_BASE) && !defined(TCG_TARGET_HAS_GUEST_BASE)
-#error GUEST_BASE not supported on this host.
-#endif
-
 /* Forward declarations for functions declared in tcg-target.c and used here. */
 static void tcg_target_init(TCGContext *s);
 static void tcg_target_qemu_prologue(TCGContext *s);
@@ -1307,8 +1303,58 @@
             break;
         case INDEX_op_end:
             break;
-            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
+
+        case INDEX_op_add2_i32:
+        case INDEX_op_sub2_i32:
+            args -= 6;
+            nb_iargs = 4;
+            nb_oargs = 2;
+            /* Test if the high part of the operation is dead, but not
+               the low part.  The result can be optimized to a simple
+               add or sub.  This happens often for x86_64 guest when the
+               cpu mode is set to 32 bit.  */
+            if (dead_temps[args[1]]) {
+                if (dead_temps[args[0]]) {
+                    goto do_remove;
+                }
+                /* Create the single operation plus nop.  */
+                if (op == INDEX_op_add2_i32) {
+                    op = INDEX_op_add_i32;
+                } else {
+                    op = INDEX_op_sub_i32;
+                }
+                gen_opc_buf[op_index] = op;
+                args[1] = args[2];
+                args[2] = args[4];
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 3);
+                /* Fall through and mark the single-word operation live.  */
+                nb_iargs = 2;
+                nb_oargs = 1;
+            }
+            goto do_not_remove;
+
+        case INDEX_op_mulu2_i32:
+            args -= 4;
+            nb_iargs = 2;
+            nb_oargs = 2;
+            /* Likewise, test for the high part of the operation dead.  */
+            if (dead_temps[args[1]]) {
+                if (dead_temps[args[0]]) {
+                    goto do_remove;
+                }
+                gen_opc_buf[op_index] = op = INDEX_op_mul_i32;
+                args[1] = args[2];
+                args[2] = args[3];
+                assert(gen_opc_buf[op_index + 1] == INDEX_op_nop);
+                tcg_set_nop(s, gen_opc_buf + op_index + 1, args + 3, 1);
+                /* Fall through and mark the single-word operation live.  */
+                nb_oargs = 1;
+            }
+            goto do_not_remove;
+
         default:
+            /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
             args -= def->nb_args;
             nb_iargs = def->nb_iargs;
             nb_oargs = def->nb_oargs;
@@ -1322,6 +1368,7 @@
                     if (!dead_temps[arg])
                         goto do_not_remove;
                 }
+            do_remove:
                 tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args);
 #ifdef CONFIG_PROFILER
                 s->del_op_count++;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 7bafe0e..45e94f5 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -616,7 +616,7 @@
 TCGv_i32 tcg_const_local_i32(int32_t val);
 TCGv_i64 tcg_const_local_i64(int64_t val);
 
-extern uint8_t code_gen_prologue[];
+extern uint8_t *code_gen_prologue;
 
 /* TCG targets may use a different definition of tcg_qemu_tb_exec. */
 #if !defined(tcg_qemu_tb_exec)
diff --git a/tcg/tci/tcg-target.h b/tcg/tci/tcg-target.h
index 6d89495..37f28c0 100644
--- a/tcg/tci/tcg-target.h
+++ b/tcg/tci/tcg-target.h
@@ -102,9 +102,6 @@
 #define TCG_TARGET_HAS_movcond_i64      0
 #endif /* TCG_TARGET_REG_BITS == 64 */
 
-/* Offset to user memory in user mode. */
-#define TCG_TARGET_HAS_GUEST_BASE
-
 /* Number of registers available.
    For 32 bit hosts, we need more than 8 registers (call arguments). */
 /* #define TCG_TARGET_NB_REGS 8 */
diff --git a/tests/tcg/Makefile b/tests/tcg/Makefile
index 15e36a2..80b1a4b 100644
--- a/tests/tcg/Makefile
+++ b/tests/tcg/Makefile
@@ -1,13 +1,13 @@
--include ../config-host.mak
+-include ../../config-host.mak
 -include $(SRC_PATH)/rules.mak
 
-$(call set-vpath, $(SRC_PATH)/tests)
+$(call set-vpath, $(SRC_PATH)/tests/tcg)
 
-QEMU=../i386-linux-user/qemu-i386
-QEMU_X86_64=../x86_64-linux-user/qemu-x86_64
+QEMU=../../i386-linux-user/qemu-i386
+QEMU_X86_64=../../x86_64-linux-user/qemu-x86_64
 CC_X86_64=$(CC_I386) -m64
 
-QEMU_INCLUDES += -I..
+QEMU_INCLUDES += -I../..
 CFLAGS=-Wall -O2 -g -fno-strict-aliasing
 #CFLAGS+=-msse2
 LDFLAGS=
@@ -36,6 +36,7 @@
 endif
 
 all: $(patsubst %,run-%,$(TESTS))
+test: all
 
 # rules to run tests
 
@@ -74,7 +75,10 @@
 # rules to compile tests
 
 test_path: test_path.o
+	$(CC_I386) $(LDFLAGS) -o $@ $^ $(LIBS)
+
 test_path.o: test_path.c
+	$(CC_I386) $(QEMU_INCLUDES) $(GLIB_CFLAGS) $(CFLAGS) -c -o $@ $^
 
 hello-i386: hello-i386.c
 	$(CC_I386) -nostdlib $(CFLAGS) -static $(LDFLAGS) -o $@ $<
@@ -86,12 +90,12 @@
 # i386/x86_64 emulation test (test various opcodes) */
 test-i386: test-i386.c test-i386-code16.S test-i386-vm86.S \
            test-i386.h test-i386-shift.h test-i386-muldiv.h
-	$(CC_I386) $(CFLAGS) $(LDFLAGS) -o $@ \
+	$(CC_I386) $(QEMU_INCLUDES) $(CFLAGS) $(LDFLAGS) -o $@ \
               $(<D)/test-i386.c $(<D)/test-i386-code16.S $(<D)/test-i386-vm86.S -lm
 
 test-x86_64: test-i386.c \
            test-i386.h test-i386-shift.h test-i386-muldiv.h
-	$(CC_X86_64) $(CFLAGS) $(LDFLAGS) -o $@ $(<D)/test-i386.c -lm
+	$(CC_X86_64) $(QEMU_INCLUDES) $(CFLAGS) $(LDFLAGS) -o $@ $(<D)/test-i386.c -lm
 
 # generic Linux and CPU test
 linux-test: linux-test.c
diff --git a/tests/tcg/linux-test.c b/tests/tcg/linux-test.c
index 2e4a746..83cb32d 100644
--- a/tests/tcg/linux-test.c
+++ b/tests/tcg/linux-test.c
@@ -16,6 +16,7 @@
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
+#define _GNU_SOURCE
 #include <stdarg.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -38,6 +39,7 @@
 #include <dirent.h>
 #include <setjmp.h>
 #include <sys/shm.h>
+#include <sched.h>
 
 #define TESTPATH "/tmp/linux-test.tmp"
 #define TESTPORT 7654
diff --git a/tests/tcg/test-i386.c b/tests/tcg/test-i386.c
index 8e64bba..64d929e 100644
--- a/tests/tcg/test-i386.c
+++ b/tests/tcg/test-i386.c
@@ -17,6 +17,7 @@
  *  along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 #define _GNU_SOURCE
+#include "compiler.h"
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
@@ -1827,7 +1828,7 @@
     printf("lock nop exception:\n");
     if (setjmp(jmp_env) == 0) {
         /* now execute an invalid instruction */
-        asm volatile("lock nop");
+        asm volatile(".byte 0xf0, 0x90"); /* lock nop */
     }
 
     printf("INT exception:\n");
diff --git a/tests/tcg/test_path.c b/tests/tcg/test_path.c
index 7265a94..a064eea 100644
--- a/tests/tcg/test_path.c
+++ b/tests/tcg/test_path.c
@@ -1,11 +1,12 @@
 /* Test path override code */
-#include "../config-host.h"
-#include "../qemu-malloc.c"
-#include "../cutils.c"
-#include "../path.c"
-#include "../trace.c"
+#define _GNU_SOURCE
+#include "config-host.h"
+#include "iov.c"
+#include "cutils.c"
+#include "path.c"
+#include "trace.c"
 #ifdef CONFIG_TRACE_SIMPLE
-#include "../simpletrace.c"
+#include "../trace/simple.c"
 #endif
 
 #include <stdarg.h>
diff --git a/trace-events b/trace-events
index 42b66f1..e2d4580 100644
--- a/trace-events
+++ b/trace-events
@@ -921,6 +921,10 @@
 savevm_section_start(void) ""
 savevm_section_end(unsigned int section_id) "section_id %u"
 
+# arch_init.c
+migration_bitmap_sync_start(void) ""
+migration_bitmap_sync_end(uint64_t dirty_pages) "dirty_pages %" PRIu64""
+
 # hw/qxl.c
 disable qxl_interface_set_mm_time(int qid, uint32_t mm_time) "%d %d"
 disable qxl_io_write_vga(int qid, const char *mode, uint32_t addr, uint32_t val) "%d %s addr=%u val=%u"
diff --git a/ui/vnc-jobs.c b/ui/vnc-jobs.c
index 087b84d..3c592b3 100644
--- a/ui/vnc-jobs.c
+++ b/ui/vnc-jobs.c
@@ -33,21 +33,21 @@
 /*
  * Locking:
  *
- * There is three levels of locking:
+ * There are three levels of locking:
  * - jobs queue lock: for each operation on the queue (push, pop, isEmpty?)
  * - VncDisplay global lock: mainly used for framebuffer updates to avoid
  *                      screen corruption if the framebuffer is updated
- *			while the worker is doing something.
+ *                      while the worker is doing something.
  * - VncState::output lock: used to make sure the output buffer is not corrupted
- * 		   	 if two threads try to write on it at the same time
+ *                          if two threads try to write on it at the same time
  *
- * While the VNC worker thread is working, the VncDisplay global lock is hold
- * to avoid screen corruptions (this does not block vnc_refresh() because it
- * uses trylock()) but the output lock is not hold because the thread work on
+ * While the VNC worker thread is working, the VncDisplay global lock is held
+ * to avoid screen corruption (this does not block vnc_refresh() because it
+ * uses trylock()) but the output lock is not held because the thread works on
  * its own output buffer.
  * When the encoding job is done, the worker thread will hold the output lock
  * and copy its output buffer in vs->output.
-*/
+ */
 
 struct VncJobQueue {
     QemuCond cond;
@@ -62,7 +62,7 @@
 
 /*
  * We use a single global queue, but most of the functions are
- * already reetrant, so we can easilly add more than one encoding thread
+ * already reentrant, so we can easily add more than one encoding thread
  */
 static VncJobQueue *queue;
 
diff --git a/ui/vnc.c b/ui/vnc.c
index 33e6386..66ae930 100644
--- a/ui/vnc.c
+++ b/ui/vnc.c
@@ -1806,10 +1806,12 @@
             vs->features |= VNC_FEATURE_TIGHT_MASK;
             vs->vnc_encoding = enc;
             break;
+#ifdef CONFIG_VNC_PNG
         case VNC_ENCODING_TIGHT_PNG:
             vs->features |= VNC_FEATURE_TIGHT_PNG_MASK;
             vs->vnc_encoding = enc;
             break;
+#endif
         case VNC_ENCODING_ZLIB:
             vs->features |= VNC_FEATURE_ZLIB_MASK;
             vs->vnc_encoding = enc;
diff --git a/vl.c b/vl.c
index 5b357a3..ee3c43a 100644
--- a/vl.c
+++ b/vl.c
@@ -3638,8 +3638,13 @@
 
     qdev_machine_init();
 
-    machine->init(ram_size, boot_devices,
-                  kernel_filename, kernel_cmdline, initrd_filename, cpu_model);
+    QEMUMachineInitArgs args = { .ram_size = ram_size,
+                                 .boot_device = boot_devices,
+                                 .kernel_filename = kernel_filename,
+                                 .kernel_cmdline = kernel_cmdline,
+                                 .initrd_filename = initrd_filename,
+                                 .cpu_model = cpu_model };
+    machine->init(&args);
 
     cpu_synchronize_all_post_init();
 
diff --git a/xen-all.c b/xen-all.c
index bcb7ef7..9d1e168 100644
--- a/xen-all.c
+++ b/xen-all.c
@@ -454,14 +454,6 @@
     }
 }
 
-static void xen_begin(MemoryListener *listener)
-{
-}
-
-static void xen_commit(MemoryListener *listener)
-{
-}
-
 static void xen_region_add(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
@@ -474,11 +466,6 @@
     xen_set_memory(listener, section, false);
 }
 
-static void xen_region_nop(MemoryListener *listener,
-                           MemoryRegionSection *section)
-{
-}
-
 static void xen_sync_dirty_bitmap(XenIOState *state,
                                   target_phys_addr_t start_addr,
                                   ram_addr_t size)
@@ -565,33 +552,14 @@
     xen_in_migration = false;
 }
 
-static void xen_eventfd_add(MemoryListener *listener,
-                            MemoryRegionSection *section,
-                            bool match_data, uint64_t data,
-                            EventNotifier *e)
-{
-}
-
-static void xen_eventfd_del(MemoryListener *listener,
-                            MemoryRegionSection *section,
-                            bool match_data, uint64_t data,
-                            EventNotifier *e)
-{
-}
-
 static MemoryListener xen_memory_listener = {
-    .begin = xen_begin,
-    .commit = xen_commit,
     .region_add = xen_region_add,
     .region_del = xen_region_del,
-    .region_nop = xen_region_nop,
     .log_start = xen_log_start,
     .log_stop = xen_log_stop,
     .log_sync = xen_log_sync,
     .log_global_start = xen_log_global_start,
     .log_global_stop = xen_log_global_stop,
-    .eventfd_add = xen_eventfd_add,
-    .eventfd_del = xen_eventfd_del,
     .priority = 10,
 };
 
@@ -1173,7 +1141,7 @@
 
     state->memory_listener = xen_memory_listener;
     QLIST_INIT(&state->physmap);
-    memory_listener_register(&state->memory_listener, get_system_memory());
+    memory_listener_register(&state->memory_listener, &address_space_memory);
     state->log_for_dirtybit = NULL;
 
     /* Initialize backend core & drivers */